diff options
Diffstat (limited to 'drivers')
302 files changed, 21895 insertions, 9198 deletions
diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c index 3d56ebcda720..6a94aa6a22c2 100644 --- a/drivers/bus/imx-weim.c +++ b/drivers/bus/imx-weim.c @@ -45,6 +45,8 @@ static const struct imx_weim_devtype imx51_weim_devtype = { .cs_stride = 0x18, }; +#define MAX_CS_REGS_COUNT 6 + static const struct of_device_id weim_id_table[] = { /* i.MX1/21 */ { .compatible = "fsl,imx1-weim", .data = &imx1_weim_devtype, }, @@ -112,9 +114,12 @@ err: static int __init weim_timing_setup(struct device_node *np, void __iomem *base, const struct imx_weim_devtype *devtype) { - u32 cs_idx, value[devtype->cs_regs_count]; + u32 cs_idx, value[MAX_CS_REGS_COUNT]; int i, ret; + if (WARN_ON(devtype->cs_regs_count > MAX_CS_REGS_COUNT)) + return -EINVAL; + /* get the CS index from this child node's "reg" property. */ ret = of_property_read_u32(np, "reg", &cs_idx); if (ret) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 54edaec1e608..bf6519cf64bc 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -758,8 +758,13 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy, cur_msec = jiffies_to_msecs(get_jiffies_64()); - spin_lock(&gpstates->gpstate_lock); freq_data.pstate_id = idx_to_pstate(new_index); + if (!gpstates) { + freq_data.gpstate_id = freq_data.pstate_id; + goto no_gpstate; + } + + spin_lock(&gpstates->gpstate_lock); if (!gpstates->last_sampled_time) { gpstate_idx = new_index; @@ -809,6 +814,7 @@ gpstates_done: spin_unlock(&gpstates->gpstate_lock); +no_gpstate: /* * Use smp_call_function to send IPI and execute the * mtspr on target CPU. We could do that without IPI @@ -843,6 +849,13 @@ static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy) kernfs_put(kn); } + policy->freq_table = powernv_freqs; + policy->fast_switch_possible = true; + + if (pvr_version_is(PVR_POWER9)) + return 0; + + /* Initialise Gpstate ramp-down timer only on POWER8 */ gpstates = kzalloc(sizeof(*gpstates), GFP_KERNEL); if (!gpstates) return -ENOMEM; @@ -857,8 +870,6 @@ static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy) msecs_to_jiffies(GPSTATE_TIMER_INTERVAL); spin_lock_init(&gpstates->gpstate_lock); - policy->freq_table = powernv_freqs; - policy->fast_switch_possible = true; return 0; } @@ -998,7 +1009,8 @@ static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy) freq_data.pstate_id = idx_to_pstate(powernv_pstate_info.min); freq_data.gpstate_id = idx_to_pstate(powernv_pstate_info.min); smp_call_function_single(policy->cpu, set_pstate, &freq_data, 1); - del_timer_sync(&gpstates->timer); + if (gpstates) + del_timer_sync(&gpstates->timer); } static unsigned int powernv_fast_switch(struct cpufreq_policy *policy, diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index d29e4f041efe..84b1ebe212b3 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -242,6 +242,7 @@ static inline void add_powernv_state(int index, const char *name, powernv_states[index].target_residency = target_residency; powernv_states[index].exit_latency = exit_latency; powernv_states[index].enter = idle_fn; + /* For power8 and below psscr_* will be 0 */ stop_psscr_table[index].val = psscr_val; stop_psscr_table[index].mask = psscr_mask; } @@ -263,186 +264,80 @@ static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len, extern u32 pnv_get_supported_cpuidle_states(void); static int powernv_add_idle_states(void) { - struct device_node *power_mgt; int nr_idle_states = 1; /* Snooze */ - int dt_idle_states, count; - u32 latency_ns[CPUIDLE_STATE_MAX]; - u32 residency_ns[CPUIDLE_STATE_MAX]; - u32 flags[CPUIDLE_STATE_MAX]; - u64 psscr_val[CPUIDLE_STATE_MAX]; - u64 psscr_mask[CPUIDLE_STATE_MAX]; - const char *names[CPUIDLE_STATE_MAX]; + int dt_idle_states; u32 has_stop_states = 0; - int i, rc; + int i; u32 supported_flags = pnv_get_supported_cpuidle_states(); /* Currently we have snooze statically defined */ - - power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); - if (!power_mgt) { - pr_warn("opal: PowerMgmt Node not found\n"); - goto out; - } - - /* Read values of any property to determine the num of idle states */ - dt_idle_states = of_property_count_u32_elems(power_mgt, "ibm,cpu-idle-state-flags"); - if (dt_idle_states < 0) { - pr_warn("cpuidle-powernv: no idle states found in the DT\n"); + if (nr_pnv_idle_states <= 0) { + pr_warn("cpuidle-powernv : Only Snooze is available\n"); goto out; } - count = of_property_count_u32_elems(power_mgt, - "ibm,cpu-idle-state-latencies-ns"); - - if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", dt_idle_states, - "ibm,cpu-idle-state-latencies-ns", - count) != 0) - goto out; - - count = of_property_count_strings(power_mgt, - "ibm,cpu-idle-state-names"); - if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", dt_idle_states, - "ibm,cpu-idle-state-names", - count) != 0) - goto out; + /* TODO: Count only states which are eligible for cpuidle */ + dt_idle_states = nr_pnv_idle_states; /* * Since snooze is used as first idle state, max idle states allowed is * CPUIDLE_STATE_MAX -1 */ - if (dt_idle_states > CPUIDLE_STATE_MAX - 1) { + if (nr_pnv_idle_states > CPUIDLE_STATE_MAX - 1) { pr_warn("cpuidle-powernv: discovered idle states more than allowed"); dt_idle_states = CPUIDLE_STATE_MAX - 1; } - if (of_property_read_u32_array(power_mgt, - "ibm,cpu-idle-state-flags", flags, dt_idle_states)) { - pr_warn("cpuidle-powernv : missing ibm,cpu-idle-state-flags in DT\n"); - goto out; - } - - if (of_property_read_u32_array(power_mgt, - "ibm,cpu-idle-state-latencies-ns", latency_ns, - dt_idle_states)) { - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); - goto out; - } - if (of_property_read_string_array(power_mgt, - "ibm,cpu-idle-state-names", names, dt_idle_states) < 0) { - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n"); - goto out; - } - /* * If the idle states use stop instruction, probe for psscr values * and psscr mask which are necessary to specify required stop level. */ - has_stop_states = (flags[0] & + has_stop_states = (pnv_idle_states[0].flags & (OPAL_PM_STOP_INST_FAST | OPAL_PM_STOP_INST_DEEP)); - if (has_stop_states) { - count = of_property_count_u64_elems(power_mgt, - "ibm,cpu-idle-state-psscr"); - if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", - dt_idle_states, - "ibm,cpu-idle-state-psscr", - count) != 0) - goto out; - - count = of_property_count_u64_elems(power_mgt, - "ibm,cpu-idle-state-psscr-mask"); - if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", - dt_idle_states, - "ibm,cpu-idle-state-psscr-mask", - count) != 0) - goto out; - - if (of_property_read_u64_array(power_mgt, - "ibm,cpu-idle-state-psscr", psscr_val, dt_idle_states)) { - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n"); - goto out; - } - - if (of_property_read_u64_array(power_mgt, - "ibm,cpu-idle-state-psscr-mask", - psscr_mask, dt_idle_states)) { - pr_warn("cpuidle-powernv:Missing ibm,cpu-idle-state-psscr-mask in DT\n"); - goto out; - } - } - - count = of_property_count_u32_elems(power_mgt, - "ibm,cpu-idle-state-residency-ns"); - - if (count < 0) { - rc = count; - } else if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", - dt_idle_states, - "ibm,cpu-idle-state-residency-ns", - count) != 0) { - goto out; - } else { - rc = of_property_read_u32_array(power_mgt, - "ibm,cpu-idle-state-residency-ns", - residency_ns, dt_idle_states); - } for (i = 0; i < dt_idle_states; i++) { unsigned int exit_latency, target_residency; bool stops_timebase = false; + struct pnv_idle_states_t *state = &pnv_idle_states[i]; /* * Skip the platform idle state whose flag isn't in * the supported_cpuidle_states flag mask. */ - if ((flags[i] & supported_flags) != flags[i]) + if ((state->flags & supported_flags) != state->flags) continue; /* * If an idle state has exit latency beyond * POWERNV_THRESHOLD_LATENCY_NS then don't use it * in cpu-idle. */ - if (latency_ns[i] > POWERNV_THRESHOLD_LATENCY_NS) + if (state->latency_ns > POWERNV_THRESHOLD_LATENCY_NS) continue; /* * Firmware passes residency and latency values in ns. * cpuidle expects it in us. */ - exit_latency = DIV_ROUND_UP(latency_ns[i], 1000); - if (!rc) - target_residency = DIV_ROUND_UP(residency_ns[i], 1000); - else - target_residency = 0; - - if (has_stop_states) { - int err = validate_psscr_val_mask(&psscr_val[i], - &psscr_mask[i], - flags[i]); - if (err) { - report_invalid_psscr_val(psscr_val[i], err); + exit_latency = DIV_ROUND_UP(state->latency_ns, 1000); + target_residency = DIV_ROUND_UP(state->residency_ns, 1000); + + if (has_stop_states && !(state->valid)) continue; - } - } - if (flags[i] & OPAL_PM_TIMEBASE_STOP) + if (state->flags & OPAL_PM_TIMEBASE_STOP) stops_timebase = true; - /* - * For nap and fastsleep, use default target_residency - * values if f/w does not expose it. - */ - if (flags[i] & OPAL_PM_NAP_ENABLED) { - if (!rc) - target_residency = 100; + if (state->flags & OPAL_PM_NAP_ENABLED) { /* Add NAP state */ add_powernv_state(nr_idle_states, "Nap", CPUIDLE_FLAG_NONE, nap_loop, target_residency, exit_latency, 0, 0); } else if (has_stop_states && !stops_timebase) { - add_powernv_state(nr_idle_states, names[i], + add_powernv_state(nr_idle_states, state->name, CPUIDLE_FLAG_NONE, stop_loop, target_residency, exit_latency, - psscr_val[i], psscr_mask[i]); + state->psscr_val, + state->psscr_mask); } /* @@ -450,20 +345,19 @@ static int powernv_add_idle_states(void) * within this config dependency check. */ #ifdef CONFIG_TICK_ONESHOT - else if (flags[i] & OPAL_PM_SLEEP_ENABLED || - flags[i] & OPAL_PM_SLEEP_ENABLED_ER1) { - if (!rc) - target_residency = 300000; + else if (state->flags & OPAL_PM_SLEEP_ENABLED || + state->flags & OPAL_PM_SLEEP_ENABLED_ER1) { /* Add FASTSLEEP state */ add_powernv_state(nr_idle_states, "FastSleep", CPUIDLE_FLAG_TIMER_STOP, fastsleep_loop, target_residency, exit_latency, 0, 0); } else if (has_stop_states && stops_timebase) { - add_powernv_state(nr_idle_states, names[i], + add_powernv_state(nr_idle_states, state->name, CPUIDLE_FLAG_TIMER_STOP, stop_loop, target_residency, exit_latency, - psscr_val[i], psscr_mask[i]); + state->psscr_val, + state->psscr_mask); } #endif else diff --git a/drivers/crypto/nx/nx-842-powernv.c b/drivers/crypto/nx/nx-842-powernv.c index 36afd6d8753c..c68df7e8bee1 100644 --- a/drivers/crypto/nx/nx-842-powernv.c +++ b/drivers/crypto/nx/nx-842-powernv.c @@ -24,6 +24,8 @@ #include <asm/icswx.h> #include <asm/vas.h> #include <asm/reg.h> +#include <asm/opal-api.h> +#include <asm/opal.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); @@ -753,7 +755,7 @@ static int nx842_open_percpu_txwins(void) } static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id, - int vasid) + int vasid, int *ct) { struct vas_window *rxwin = NULL; struct vas_rx_win_attr rxattr; @@ -837,6 +839,15 @@ static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id, coproc->vas.id = vasid; nx842_add_coprocs_list(coproc, chip_id); + /* + * (lpid, pid, tid) combination has to be unique for each + * coprocessor instance in the system. So to make it + * unique, skiboot uses coprocessor type such as 842 or + * GZIP for pid and provides this value to kernel in pid + * device-tree property. + */ + *ct = pid; + return 0; err_out: @@ -850,6 +861,7 @@ static int __init nx842_powernv_probe_vas(struct device_node *pn) struct device_node *dn; int chip_id, vasid, ret = 0; int nx_fifo_found = 0; + int uninitialized_var(ct); chip_id = of_get_ibm_chip_id(pn); if (chip_id < 0) { @@ -865,7 +877,7 @@ static int __init nx842_powernv_probe_vas(struct device_node *pn) for_each_child_of_node(pn, dn) { if (of_device_is_compatible(dn, "ibm,p9-nx-842")) { - ret = vas_cfg_coproc_info(dn, chip_id, vasid); + ret = vas_cfg_coproc_info(dn, chip_id, vasid, &ct); if (ret) { of_node_put(dn); return ret; @@ -876,9 +888,22 @@ static int __init nx842_powernv_probe_vas(struct device_node *pn) if (!nx_fifo_found) { pr_err("NX842 FIFO nodes are missing\n"); - ret = -EINVAL; + return -EINVAL; } + /* + * Initialize NX instance for both high and normal priority FIFOs. + */ + if (opal_check_token(OPAL_NX_COPROC_INIT)) { + ret = opal_nx_coproc_init(chip_id, ct); + if (ret) { + pr_err("Failed to initialize NX for chip(%d): %d\n", + chip_id, ret); + ret = opal_error_code(ret); + } + } else + pr_warn("Firmware doesn't support NX initialization\n"); + return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 9f1a5bd39ae8..5b39d1399630 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -131,6 +131,11 @@ psp_cmd_submit_buf(struct psp_context *psp, msleep(1); } + if (ucode) { + ucode->tmr_mc_addr_lo = psp->cmd_buf_mem->resp.fw_addr_lo; + ucode->tmr_mc_addr_hi = psp->cmd_buf_mem->resp.fw_addr_hi; + } + return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 08e38579af24..bdc472b6e641 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -194,6 +194,7 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_SMC, AMDGPU_UCODE_ID_UVD, AMDGPU_UCODE_ID_VCE, + AMDGPU_UCODE_ID_VCN, AMDGPU_UCODE_ID_MAXIMUM, }; @@ -226,6 +227,9 @@ struct amdgpu_firmware_info { void *kaddr; /* ucode_size_bytes */ uint32_t ucode_size; + /* starting tmr mc address */ + uint32_t tmr_mc_addr_lo; + uint32_t tmr_mc_addr_hi; }; void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 632fa5980ff4..e5a6db6beab7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -122,8 +122,6 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work); int amdgpu_uvd_sw_init(struct amdgpu_device *adev) { - struct amdgpu_ring *ring; - struct drm_sched_rq *rq; unsigned long bo_size; const char *fw_name; const struct common_firmware_header *hdr; @@ -266,13 +264,6 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) } } - ring = &adev->uvd.inst[0].ring; - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&adev->uvd.entity, &rq, 1, NULL); - if (r) { - DRM_ERROR("Failed setting up UVD kernel entity.\n"); - return r; - } for (i = 0; i < adev->uvd.max_handles; ++i) { atomic_set(&adev->uvd.handles[i], 0); adev->uvd.filp[i] = NULL; @@ -311,7 +302,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { if (adev->uvd.harvest_config & (1 << j)) continue; - kfree(adev->uvd.inst[j].saved_bo); + kvfree(adev->uvd.inst[j].saved_bo); amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo, &adev->uvd.inst[j].gpu_addr, @@ -327,6 +318,29 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) return 0; } +/** + * amdgpu_uvd_entity_init - init entity + * + * @adev: amdgpu_device pointer + * + */ +int amdgpu_uvd_entity_init(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + struct drm_sched_rq *rq; + int r; + + ring = &adev->uvd.inst[0].ring; + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; + r = drm_sched_entity_init(&adev->uvd.entity, &rq, 1, NULL); + if (r) { + DRM_ERROR("Failed setting up UVD kernel entity.\n"); + return r; + } + + return 0; +} + int amdgpu_uvd_suspend(struct amdgpu_device *adev) { unsigned size; @@ -354,7 +368,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo); ptr = adev->uvd.inst[j].cpu_addr; - adev->uvd.inst[j].saved_bo = kmalloc(size, GFP_KERNEL); + adev->uvd.inst[j].saved_bo = kvmalloc(size, GFP_KERNEL); if (!adev->uvd.inst[j].saved_bo) return -ENOMEM; @@ -380,7 +394,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev) if (adev->uvd.inst[i].saved_bo != NULL) { memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size); - kfree(adev->uvd.inst[i].saved_bo); + kvfree(adev->uvd.inst[i].saved_bo); adev->uvd.inst[i].saved_bo = NULL; } else { const struct common_firmware_header *hdr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index 33c5f806f925..a3ab1a41060f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -69,6 +69,7 @@ struct amdgpu_uvd { int amdgpu_uvd_sw_init(struct amdgpu_device *adev); int amdgpu_uvd_sw_fini(struct amdgpu_device *adev); +int amdgpu_uvd_entity_init(struct amdgpu_device *adev); int amdgpu_uvd_suspend(struct amdgpu_device *adev); int amdgpu_uvd_resume(struct amdgpu_device *adev); int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index b6ab4f5350c8..0cc5190f4f36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -90,8 +90,6 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work); */ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) { - struct amdgpu_ring *ring; - struct drm_sched_rq *rq; const char *fw_name; const struct common_firmware_header *hdr; unsigned ucode_version, version_major, version_minor, binary_id; @@ -188,14 +186,6 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) return r; } - ring = &adev->vce.ring[0]; - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&adev->vce.entity, &rq, 1, NULL); - if (r != 0) { - DRM_ERROR("Failed setting up VCE run queue.\n"); - return r; - } - for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { atomic_set(&adev->vce.handles[i], 0); adev->vce.filp[i] = NULL; @@ -236,6 +226,29 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) } /** + * amdgpu_vce_entity_init - init entity + * + * @adev: amdgpu_device pointer + * + */ +int amdgpu_vce_entity_init(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + struct drm_sched_rq *rq; + int r; + + ring = &adev->vce.ring[0]; + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; + r = drm_sched_entity_init(&adev->vce.entity, &rq, 1, NULL); + if (r != 0) { + DRM_ERROR("Failed setting up VCE run queue.\n"); + return r; + } + + return 0; +} + +/** * amdgpu_vce_suspend - unpin VCE fw memory * * @adev: amdgpu_device pointer diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index 71781267ee4c..a1f209eed4c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -55,6 +55,7 @@ struct amdgpu_vce { int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size); int amdgpu_vce_sw_fini(struct amdgpu_device *adev); +int amdgpu_vce_entity_init(struct amdgpu_device *adev); int amdgpu_vce_suspend(struct amdgpu_device *adev); int amdgpu_vce_resume(struct amdgpu_device *adev); int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 798648a19710..fd654a4406db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -111,9 +111,10 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) version_major, version_minor, family_id); } - bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) - + AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE + bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE + AMDGPU_VCN_SESSION_SIZE * 40; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) + bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo, &adev->vcn.gpu_addr, &adev->vcn.cpu_addr); @@ -129,7 +130,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) { int i; - kfree(adev->vcn.saved_bo); + kvfree(adev->vcn.saved_bo); amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo, &adev->vcn.gpu_addr, @@ -160,7 +161,7 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev) size = amdgpu_bo_size(adev->vcn.vcpu_bo); ptr = adev->vcn.cpu_addr; - adev->vcn.saved_bo = kmalloc(size, GFP_KERNEL); + adev->vcn.saved_bo = kvmalloc(size, GFP_KERNEL); if (!adev->vcn.saved_bo) return -ENOMEM; @@ -182,18 +183,20 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) if (adev->vcn.saved_bo != NULL) { memcpy_toio(ptr, adev->vcn.saved_bo, size); - kfree(adev->vcn.saved_bo); + kvfree(adev->vcn.saved_bo); adev->vcn.saved_bo = NULL; } else { const struct common_firmware_header *hdr; unsigned offset; hdr = (const struct common_firmware_header *)adev->vcn.fw->data; - offset = le32_to_cpu(hdr->ucode_array_offset_bytes); - memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset, - le32_to_cpu(hdr->ucode_size_bytes)); - size -= le32_to_cpu(hdr->ucode_size_bytes); - ptr += le32_to_cpu(hdr->ucode_size_bytes); + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + offset = le32_to_cpu(hdr->ucode_array_offset_bytes); + memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset, + le32_to_cpu(hdr->ucode_size_bytes)); + size -= le32_to_cpu(hdr->ucode_size_bytes); + ptr += le32_to_cpu(hdr->ucode_size_bytes); + } memset_io(ptr, 0, size); } diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 0ff136d02d9b..02be34e72ed9 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -88,6 +88,9 @@ psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type * case AMDGPU_UCODE_ID_VCE: *type = GFX_FW_TYPE_VCE; break; + case AMDGPU_UCODE_ID_VCN: + *type = GFX_FW_TYPE_VCN; + break; case AMDGPU_UCODE_ID_MAXIMUM: default: return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 6fed3d7797a8..8a926d1df939 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -123,6 +123,10 @@ static int uvd_v4_2_sw_init(void *handle) ring = &adev->uvd.inst->ring; sprintf(ring->name, "uvd"); r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); + if (r) + return r; + + r = amdgpu_uvd_entity_init(adev); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index aeaa1ca46a99..50248059412e 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -120,6 +120,10 @@ static int uvd_v5_0_sw_init(void *handle) ring = &adev->uvd.inst->ring; sprintf(ring->name, "uvd"); r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); + if (r) + return r; + + r = amdgpu_uvd_entity_init(adev); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 598dbeaba636..6ae82cc2e55e 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -440,6 +440,8 @@ static int uvd_v6_0_sw_init(void *handle) } } + r = amdgpu_uvd_entity_init(adev); + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 5fab3560a71d..9b7f8469bc5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -410,6 +410,7 @@ static int uvd_v7_0_early_init(void *handle) static int uvd_v7_0_sw_init(void *handle) { struct amdgpu_ring *ring; + int i, j, r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -478,6 +479,10 @@ static int uvd_v7_0_sw_init(void *handle) } } + r = amdgpu_uvd_entity_init(adev); + if (r) + return r; + r = amdgpu_virt_alloc_mm_table(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index d48e877b682e..7eaa54ba016b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -439,6 +439,8 @@ static int vce_v2_0_sw_init(void *handle) return r; } + r = amdgpu_vce_entity_init(adev); + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index cc6ce6cc03f4..c8390f9adfd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -448,6 +448,8 @@ static int vce_v3_0_sw_init(void *handle) return r; } + r = amdgpu_vce_entity_init(adev); + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 65f8860169e9..2e4d1b5f6243 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -419,6 +419,7 @@ static int vce_v4_0_sw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; + unsigned size; int r, i; @@ -438,7 +439,7 @@ static int vce_v4_0_sw_init(void *handle) const struct common_firmware_header *hdr; unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); - adev->vce.saved_bo = kmalloc(size, GFP_KERNEL); + adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL); if (!adev->vce.saved_bo) return -ENOMEM; @@ -474,6 +475,11 @@ static int vce_v4_0_sw_init(void *handle) return r; } + + r = amdgpu_vce_entity_init(adev); + if (r) + return r; + r = amdgpu_virt_alloc_mm_table(adev); if (r) return r; @@ -490,7 +496,7 @@ static int vce_v4_0_sw_fini(void *handle) amdgpu_virt_free_mm_table(adev); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - kfree(adev->vce.saved_bo); + kvfree(adev->vce.saved_bo); adev->vce.saved_bo = NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 2ce91a748c40..072371ef5975 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -100,6 +100,16 @@ static int vcn_v1_0_sw_init(void *handle) if (r) return r; + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + const struct common_firmware_header *hdr; + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + DRM_INFO("PSP loading VCN firmware\n"); + } + r = amdgpu_vcn_resume(adev); if (r) return r; @@ -265,26 +275,38 @@ static int vcn_v1_0_resume(void *handle) static void vcn_v1_0_mc_resume(struct amdgpu_device *adev) { uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); - - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + uint32_t offset; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi)); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0); + offset = 0; + } else { + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, lower_32_bits(adev->vcn.gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, upper_32_bits(adev->vcn.gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, - AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + offset = size; + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + size)); + lower_32_bits(adev->vcn.gpu_addr + offset)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + size)); + upper_32_bits(adev->vcn.gpu_addr + offset)); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_HEAP_SIZE); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + size + AMDGPU_VCN_HEAP_SIZE)); + lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_HEAP_SIZE)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + size + AMDGPU_VCN_HEAP_SIZE)); + upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_HEAP_SIZE)); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_STACK_SIZE + (AMDGPU_VCN_SESSION_SIZE * 40)); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index 52f2c01349e3..9bfb040352e9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -98,10 +98,16 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name, */ void amdgpu_dm_crtc_handle_crc_irq(struct drm_crtc *crtc) { - struct dm_crtc_state *crtc_state = to_dm_crtc_state(crtc->state); - struct dc_stream_state *stream_state = crtc_state->stream; + struct dm_crtc_state *crtc_state; + struct dc_stream_state *stream_state; uint32_t crcs[3]; + if (crtc == NULL) + return; + + crtc_state = to_dm_crtc_state(crtc->state); + stream_state = crtc_state->stream; + /* Early return if CRC capture is not enabled. */ if (!crtc_state->crc_enabled) return; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index a38e7ad36a7e..326b3e99b7e4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1812,6 +1812,8 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx) bool is_vga_mode = (stream->timing.h_addressable == 640) && (stream->timing.v_addressable == 480); + if (stream->phy_pix_clk == 0) + stream->phy_pix_clk = stream->timing.pix_clk_khz; if (stream->phy_pix_clk > 340000) is_over_340mhz = true; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 4ca41d6e3bcf..1644f2a946b0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -268,24 +268,30 @@ bool resource_construct( return true; } +static int find_matching_clock_source( + const struct resource_pool *pool, + struct clock_source *clock_source) +{ + + int i; + for (i = 0; i < pool->clk_src_count; i++) { + if (pool->clock_sources[i] == clock_source) + return i; + } + return -1; +} void resource_unreference_clock_source( struct resource_context *res_ctx, const struct resource_pool *pool, struct clock_source *clock_source) { - int i; - - for (i = 0; i < pool->clk_src_count; i++) { - if (pool->clock_sources[i] != clock_source) - continue; + int i = find_matching_clock_source(pool, clock_source); + if (i > -1) res_ctx->clock_source_ref_count[i]--; - break; - } - if (pool->dp_clock_source == clock_source) res_ctx->dp_clock_source_ref_count--; } @@ -295,19 +301,31 @@ void resource_reference_clock_source( const struct resource_pool *pool, struct clock_source *clock_source) { - int i; - for (i = 0; i < pool->clk_src_count; i++) { - if (pool->clock_sources[i] != clock_source) - continue; + int i = find_matching_clock_source(pool, clock_source); + if (i > -1) res_ctx->clock_source_ref_count[i]++; - break; - } if (pool->dp_clock_source == clock_source) res_ctx->dp_clock_source_ref_count++; } +int resource_get_clock_source_reference( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct clock_source *clock_source) +{ + int i = find_matching_clock_source(pool, clock_source); + + if (i > -1) + return res_ctx->clock_source_ref_count[i]; + + if (pool->dp_clock_source == clock_source) + return res_ctx->dp_clock_source_ref_count; + + return -1; +} + bool resource_are_streams_timing_synchronizable( struct dc_stream_state *stream1, struct dc_stream_state *stream2) @@ -372,11 +390,11 @@ static bool is_sharable_clk_src( return false; if (dc_is_hdmi_signal(pipe_with_clk_src->stream->signal) - && dc_is_dvi_signal(pipe->stream->signal)) + && dc_is_dual_link_signal(pipe->stream->signal)) return false; if (dc_is_hdmi_signal(pipe->stream->signal) - && dc_is_dvi_signal(pipe_with_clk_src->stream->signal)) + && dc_is_dual_link_signal(pipe_with_clk_src->stream->signal)) return false; if (!resource_are_streams_timing_synchronizable( diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c index 752b3d62e793..eff7d22d78fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c @@ -930,7 +930,7 @@ void dce110_link_encoder_enable_tmds_output( enum bp_result result; /* Enable the PHY */ - + cntl.connector_obj_id = enc110->base.connector; cntl.action = TRANSMITTER_CONTROL_ENABLE; cntl.engine_id = enc->preferred_engine; cntl.transmitter = enc110->base.transmitter; @@ -972,7 +972,7 @@ void dce110_link_encoder_enable_dp_output( * We need to set number of lanes manually. */ configure_encoder(enc110, link_settings); - + cntl.connector_obj_id = enc110->base.connector; cntl.action = TRANSMITTER_CONTROL_ENABLE; cntl.engine_id = enc->preferred_engine; cntl.transmitter = enc110->base.transmitter; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 1d98e3678b04..5450d4d38e8a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1908,7 +1908,9 @@ static void dce110_reset_hw_ctx_wrap( pipe_ctx_old->plane_res.mi->funcs->free_mem_input( pipe_ctx_old->plane_res.mi, dc->current_state->stream_count); - if (old_clk) + if (old_clk && 0 == resource_get_clock_source_reference(&context->res_ctx, + dc->res_pool, + old_clk)) old_clk->funcs->cs_power_down(old_clk); dc->hwss.disable_plane(dc, pipe_ctx_old); diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c index 2ea490f8482e..04b866f0fa1f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c @@ -772,7 +772,7 @@ void dce120_tg_set_blank(struct timing_generator *tg, CRTC_REG_SET( CRTC0_CRTC_DOUBLE_BUFFER_CONTROL, - CRTC_BLANK_DATA_DOUBLE_BUFFER_EN, 0); + CRTC_BLANK_DATA_DOUBLE_BUFFER_EN, 1); if (enable_blanking) CRTC_REG_SET(CRTC0_CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 1); diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index e92facbd038f..5b321008b0b5 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -103,6 +103,11 @@ void resource_reference_clock_source( const struct resource_pool *pool, struct clock_source *clock_source); +int resource_get_clock_source_reference( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct clock_source *clock_source); + bool resource_are_streams_timing_synchronizable( struct dc_stream_state *stream1, struct dc_stream_state *stream2); diff --git a/drivers/gpu/drm/i2c/tda9950.c b/drivers/gpu/drm/i2c/tda9950.c index 3f7396caad48..5d2f0d548469 100644 --- a/drivers/gpu/drm/i2c/tda9950.c +++ b/drivers/gpu/drm/i2c/tda9950.c @@ -76,9 +76,12 @@ struct tda9950_priv { static int tda9950_write_range(struct i2c_client *client, u8 addr, u8 *p, int cnt) { struct i2c_msg msg; - u8 buf[cnt + 1]; + u8 buf[CEC_MAX_MSG_SIZE + 3]; int ret; + if (WARN_ON(cnt > sizeof(buf) - 1)) + return -EINVAL; + buf[0] = addr; memcpy(buf + 1, p, cnt); diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c index 380eeb2a0e83..fe754022e356 100644 --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c @@ -131,7 +131,7 @@ void intel_vgpu_write_fence(struct intel_vgpu *vgpu, assert_rpm_wakelock_held(dev_priv); - if (WARN_ON(fence > vgpu_fence_sz(vgpu))) + if (WARN_ON(fence >= vgpu_fence_sz(vgpu))) return; reg = vgpu->fence.regs[fence]; diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 45e89b1e0481..a614db310ea2 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -874,7 +874,7 @@ static int cmd_reg_handler(struct parser_exec_state *s, if (!intel_gvt_mmio_is_cmd_access(gvt, offset)) { gvt_vgpu_err("%s access to non-render register (%x)\n", cmd, offset); - return 0; + return -EBADRQC; } if (is_shadowed_mmio(offset)) { diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 712f9d14e720..46c8b720e336 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -176,6 +176,7 @@ static const struct intel_gvt_ops intel_gvt_ops = { .emulate_mmio_write = intel_vgpu_emulate_mmio_write, .vgpu_create = intel_gvt_create_vgpu, .vgpu_destroy = intel_gvt_destroy_vgpu, + .vgpu_release = intel_gvt_release_vgpu, .vgpu_reset = intel_gvt_reset_vgpu, .vgpu_activate = intel_gvt_activate_vgpu, .vgpu_deactivate = intel_gvt_deactivate_vgpu, @@ -315,6 +316,11 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv) if (WARN_ON(!gvt)) return; + intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu); + intel_gvt_hypervisor_host_exit(&dev_priv->drm.pdev->dev, gvt); + intel_gvt_cleanup_vgpu_type_groups(gvt); + intel_gvt_clean_vgpu_types(gvt); + intel_gvt_debugfs_clean(gvt); clean_service_thread(gvt); intel_gvt_clean_cmd_parser(gvt); @@ -322,17 +328,10 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv) intel_gvt_clean_workload_scheduler(gvt); intel_gvt_clean_gtt(gvt); intel_gvt_clean_irq(gvt); - intel_gvt_clean_mmio_info(gvt); intel_gvt_free_firmware(gvt); - - intel_gvt_hypervisor_host_exit(&dev_priv->drm.pdev->dev, gvt); - intel_gvt_cleanup_vgpu_type_groups(gvt); - intel_gvt_clean_vgpu_types(gvt); - + intel_gvt_clean_mmio_info(gvt); idr_destroy(&gvt->vgpu_idr); - intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu); - kfree(dev_priv->gvt); dev_priv->gvt = NULL; } diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 9a9671522774..31f6cdbe5c42 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -486,6 +486,7 @@ void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu); struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, struct intel_vgpu_type *type); void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu); +void intel_gvt_release_vgpu(struct intel_vgpu *vgpu); void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, unsigned int engine_mask); void intel_gvt_reset_vgpu(struct intel_vgpu *vgpu); @@ -563,7 +564,8 @@ struct intel_gvt_ops { unsigned int); struct intel_vgpu *(*vgpu_create)(struct intel_gvt *, struct intel_vgpu_type *); - void (*vgpu_destroy)(struct intel_vgpu *); + void (*vgpu_destroy)(struct intel_vgpu *vgpu); + void (*vgpu_release)(struct intel_vgpu *vgpu); void (*vgpu_reset)(struct intel_vgpu *); void (*vgpu_activate)(struct intel_vgpu *); void (*vgpu_deactivate)(struct intel_vgpu *); diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 4d2f53ae9f0f..a45f46d8537f 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -43,6 +43,8 @@ #include <linux/mdev.h> #include <linux/debugfs.h> +#include <linux/nospec.h> + #include "i915_drv.h" #include "gvt.h" @@ -187,14 +189,14 @@ static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn, /* Setup DMA mapping. */ *dma_addr = dma_map_page(dev, page, 0, size, PCI_DMA_BIDIRECTIONAL); - ret = dma_mapping_error(dev, *dma_addr); - if (ret) { + if (dma_mapping_error(dev, *dma_addr)) { gvt_vgpu_err("DMA mapping failed for pfn 0x%lx, ret %d\n", page_to_pfn(page), ret); gvt_unpin_guest_page(vgpu, gfn, size); + return -ENOMEM; } - return ret; + return 0; } static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn, @@ -666,7 +668,7 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu) if (atomic_cmpxchg(&vgpu->vdev.released, 0, 1)) return; - intel_gvt_ops->vgpu_deactivate(vgpu); + intel_gvt_ops->vgpu_release(vgpu); ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY, &vgpu->vdev.iommu_notifier); @@ -1139,7 +1141,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { struct vfio_region_info info; struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; - int i, ret; + unsigned int i; + int ret; struct vfio_region_info_cap_sparse_mmap *sparse = NULL; size_t size; int nr_areas = 1; @@ -1224,6 +1227,10 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, if (info.index >= VFIO_PCI_NUM_REGIONS + vgpu->vdev.num_regions) return -EINVAL; + info.index = + array_index_nospec(info.index, + VFIO_PCI_NUM_REGIONS + + vgpu->vdev.num_regions); i = info.index - VFIO_PCI_NUM_REGIONS; @@ -1250,11 +1257,13 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, &sparse->header, sizeof(*sparse) + (sparse->nr_areas * sizeof(*sparse->areas))); - kfree(sparse); - if (ret) + if (ret) { + kfree(sparse); return ret; + } break; default: + kfree(sparse); return -EINVAL; } } @@ -1270,6 +1279,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, sizeof(info), caps.buf, caps.size)) { kfree(caps.buf); + kfree(sparse); return -EFAULT; } info.cap_offset = sizeof(info); @@ -1278,6 +1288,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, kfree(caps.buf); } + kfree(sparse); return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) { @@ -1615,7 +1626,6 @@ static int kvmgt_guest_init(struct mdev_device *mdev) kvmgt_protect_table_init(info); gvt_cache_init(vgpu); - mutex_init(&vgpu->dmabuf_lock); init_completion(&vgpu->vblank_done); info->track_node.track_write = kvmgt_page_track_write; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index b0e566956b8d..43aa058e29fc 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -784,7 +784,8 @@ static void update_guest_context(struct intel_vgpu_workload *workload) kunmap(page); } -static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask) +void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu, + unsigned long engine_mask) { struct intel_vgpu_submission *s = &vgpu->submission; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; @@ -879,7 +880,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) * cleaned up during the resetting process later, so doing * the workload clean up here doesn't have any impact. **/ - clean_workloads(vgpu, ENGINE_MASK(ring_id)); + intel_vgpu_clean_workloads(vgpu, ENGINE_MASK(ring_id)); } workload->complete(workload); @@ -1081,7 +1082,7 @@ void intel_vgpu_reset_submission(struct intel_vgpu *vgpu, if (!s->active) return; - clean_workloads(vgpu, engine_mask); + intel_vgpu_clean_workloads(vgpu, engine_mask); s->ops->reset(vgpu, engine_mask); } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 21eddab4a9cd..ca5529d0e48e 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -158,4 +158,7 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload); +void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu, + unsigned long engine_mask); + #endif diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index f6fa916517c3..a4e8e3cf74fd 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -222,7 +222,7 @@ void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu) * @vgpu: virtual GPU * * This function is called when user wants to deactivate a virtual GPU. - * All virtual GPU runtime information will be destroyed. + * The virtual GPU will be stopped. * */ void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu) @@ -238,12 +238,30 @@ void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu) } intel_vgpu_stop_schedule(vgpu); - intel_vgpu_dmabuf_cleanup(vgpu); mutex_unlock(&vgpu->vgpu_lock); } /** + * intel_gvt_release_vgpu - release a virtual GPU + * @vgpu: virtual GPU + * + * This function is called when user wants to release a virtual GPU. + * The virtual GPU will be stopped and all runtime information will be + * destroyed. + * + */ +void intel_gvt_release_vgpu(struct intel_vgpu *vgpu) +{ + intel_gvt_deactivate_vgpu(vgpu); + + mutex_lock(&vgpu->vgpu_lock); + intel_vgpu_clean_workloads(vgpu, ALL_ENGINES); + intel_vgpu_dmabuf_cleanup(vgpu); + mutex_unlock(&vgpu->vgpu_lock); +} + +/** * intel_gvt_destroy_vgpu - destroy a virtual GPU * @vgpu: virtual GPU * @@ -361,6 +379,7 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, vgpu->gvt = gvt; vgpu->sched_ctl.weight = param->weight; mutex_init(&vgpu->vgpu_lock); + mutex_init(&vgpu->dmabuf_lock); INIT_LIST_HEAD(&vgpu->dmabuf_obj_list_head); INIT_RADIX_TREE(&vgpu->page_track_tree, GFP_KERNEL); idr_init(&vgpu->object_idr); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 91e7483228e1..08ec7446282e 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -9201,6 +9201,7 @@ enum skl_power_gate { #define TRANS_MSA_10_BPC (2 << 5) #define TRANS_MSA_12_BPC (3 << 5) #define TRANS_MSA_16_BPC (4 << 5) +#define TRANS_MSA_CEA_RANGE (1 << 3) /* LCPLL Control */ #define LCPLL_CTL _MMIO(0x130040) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 39d66f8493fa..8761513f3532 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1685,6 +1685,10 @@ void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state) WARN_ON(transcoder_is_dsi(cpu_transcoder)); temp = TRANS_MSA_SYNC_CLK; + + if (crtc_state->limited_color_range) + temp |= TRANS_MSA_CEA_RANGE; + switch (crtc_state->pipe_bpp) { case 18: temp |= TRANS_MSA_6_BPC; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 33faad3197fe..6a8f27d0a742 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -387,8 +387,18 @@ static void intel_ring_setup_status_page(struct intel_engine_cs *engine) mmio = RING_HWS_PGA(engine->mmio_base); } - if (INTEL_GEN(dev_priv) >= 6) - I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff); + if (INTEL_GEN(dev_priv) >= 6) { + u32 mask = ~0u; + + /* + * Keep the render interrupt unmasked as this papers over + * lost interrupts following a reset. + */ + if (engine->id == RCS) + mask &= ~BIT(0); + + I915_WRITE(RING_HWSTAM(engine->mmio_base), mask); + } I915_WRITE(mmio, engine->status_page.ggtt_offset); POSTING_READ(mmio); diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index b892ca8396e8..50b39aa4ffb8 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -359,8 +359,8 @@ intel_uncore_fw_release_timer(struct hrtimer *timer) } /* Note callers must have acquired the PUNIT->PMIC bus, before calling this. */ -static void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv, - bool restore) +static unsigned int +intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv) { unsigned long irqflags; struct intel_uncore_forcewake_domain *domain; @@ -412,20 +412,11 @@ static void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv, dev_priv->uncore.funcs.force_wake_put(dev_priv, fw); fw_domains_reset(dev_priv, dev_priv->uncore.fw_domains); - - if (restore) { /* If reset with a user forcewake, try to restore */ - if (fw) - dev_priv->uncore.funcs.force_wake_get(dev_priv, fw); - - if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) - dev_priv->uncore.fifo_count = - fifo_free_entries(dev_priv); - } - - if (!restore) - assert_forcewakes_inactive(dev_priv); + assert_forcewakes_inactive(dev_priv); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); + + return fw; /* track the lost user forcewake domains */ } static u64 gen9_edram_size(struct drm_i915_private *dev_priv) @@ -534,7 +525,7 @@ check_for_unclaimed_mmio(struct drm_i915_private *dev_priv) } static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, - bool restore_forcewake) + unsigned int restore_forcewake) { /* clear out unclaimed reg detection bit */ if (check_for_unclaimed_mmio(dev_priv)) @@ -549,7 +540,17 @@ static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, } iosf_mbi_punit_acquire(); - intel_uncore_forcewake_reset(dev_priv, restore_forcewake); + intel_uncore_forcewake_reset(dev_priv); + if (restore_forcewake) { + spin_lock_irq(&dev_priv->uncore.lock); + dev_priv->uncore.funcs.force_wake_get(dev_priv, + restore_forcewake); + + if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) + dev_priv->uncore.fifo_count = + fifo_free_entries(dev_priv); + spin_unlock_irq(&dev_priv->uncore.lock); + } iosf_mbi_punit_release(); } @@ -558,13 +559,18 @@ void intel_uncore_suspend(struct drm_i915_private *dev_priv) iosf_mbi_punit_acquire(); iosf_mbi_unregister_pmic_bus_access_notifier_unlocked( &dev_priv->uncore.pmic_bus_access_nb); - intel_uncore_forcewake_reset(dev_priv, false); + dev_priv->uncore.fw_domains_saved = + intel_uncore_forcewake_reset(dev_priv); iosf_mbi_punit_release(); } void intel_uncore_resume_early(struct drm_i915_private *dev_priv) { - __intel_uncore_early_sanitize(dev_priv, true); + unsigned int restore_forcewake; + + restore_forcewake = fetch_and_zero(&dev_priv->uncore.fw_domains_saved); + __intel_uncore_early_sanitize(dev_priv, restore_forcewake); + iosf_mbi_register_pmic_bus_access_notifier( &dev_priv->uncore.pmic_bus_access_nb); i915_check_and_clear_faults(dev_priv); @@ -1545,7 +1551,7 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) intel_uncore_edram_detect(dev_priv); intel_uncore_fw_domains_init(dev_priv); - __intel_uncore_early_sanitize(dev_priv, false); + __intel_uncore_early_sanitize(dev_priv, 0); dev_priv->uncore.unclaimed_mmio_check = 1; dev_priv->uncore.pmic_bus_access_nb.notifier_call = @@ -1632,7 +1638,7 @@ void intel_uncore_fini(struct drm_i915_private *dev_priv) iosf_mbi_punit_acquire(); iosf_mbi_unregister_pmic_bus_access_notifier_unlocked( &dev_priv->uncore.pmic_bus_access_nb); - intel_uncore_forcewake_reset(dev_priv, false); + intel_uncore_forcewake_reset(dev_priv); iosf_mbi_punit_release(); } diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 2fbe93178fb2..e5e157d288de 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -104,6 +104,7 @@ struct intel_uncore { enum forcewake_domains fw_domains; enum forcewake_domains fw_domains_active; + enum forcewake_domains fw_domains_saved; /* user domains saved for S3 */ u32 fw_set; u32 fw_clear; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index c69cbd5aed52..ba4f322d56b8 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -499,6 +499,19 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, return err == expected; } +static void disable_retire_worker(struct drm_i915_private *i915) +{ + mutex_lock(&i915->drm.struct_mutex); + if (!i915->gt.active_requests++) { + intel_runtime_pm_get(i915); + i915_gem_unpark(i915); + intel_runtime_pm_put(i915); + } + mutex_unlock(&i915->drm.struct_mutex); + cancel_delayed_work_sync(&i915->gt.retire_work); + cancel_delayed_work_sync(&i915->gt.idle_work); +} + static int igt_mmap_offset_exhaustion(void *arg) { struct drm_i915_private *i915 = arg; @@ -509,12 +522,7 @@ static int igt_mmap_offset_exhaustion(void *arg) int loop, err; /* Disable background reaper */ - mutex_lock(&i915->drm.struct_mutex); - if (!i915->gt.active_requests++) - i915_gem_unpark(i915); - mutex_unlock(&i915->drm.struct_mutex); - cancel_delayed_work_sync(&i915->gt.retire_work); - cancel_delayed_work_sync(&i915->gt.idle_work); + disable_retire_worker(i915); GEM_BUG_ON(!i915->gt.awake); /* Trim the device mmap space to only a page */ diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 47bc5b2ddb56..81d9d31042a9 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -160,7 +160,7 @@ static int intel_uncore_check_forcewake_domains(struct drm_i915_private *dev_pri i915_reg_t reg = { offset }; iosf_mbi_punit_acquire(); - intel_uncore_forcewake_reset(dev_priv, false); + intel_uncore_forcewake_reset(dev_priv); iosf_mbi_punit_release(); check_for_unclaimed_mmio(dev_priv); diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 7c773e003663..261fa79d456d 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -11,6 +11,9 @@ msm-y := \ adreno/a5xx_gpu.o \ adreno/a5xx_power.o \ adreno/a5xx_preempt.o \ + adreno/a6xx_gpu.o \ + adreno/a6xx_gmu.o \ + adreno/a6xx_hfi.o \ hdmi/hdmi.o \ hdmi/hdmi_audio.o \ hdmi/hdmi_bridge.o \ diff --git a/drivers/gpu/drm/msm/adreno/a2xx.xml.h b/drivers/gpu/drm/msm/adreno/a2xx.xml.h index 644374c7b3e0..4bff0a740c7d 100644 --- a/drivers/gpu/drm/msm/adreno/a2xx.xml.h +++ b/drivers/gpu/drm/msm/adreno/a2xx.xml.h @@ -8,17 +8,19 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 37162 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 13324 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 31866 bytes, from 2017-06-06 18:26:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 111898 bytes, from 2017-06-06 18:23:59) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 139480 bytes, from 2017-06-16 12:44:39) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2017-05-17 13:21:27) - -Copyright (C) 2013-2017 by the following authors: +- /home/robclark/src/envytools/rnndb/adreno.xml ( 501 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml ( 36805 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml ( 13634 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml ( 42393 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml ( 147240 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml ( 101627 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-07-03 19:37:13) + +Copyright (C) 2013-2018 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) @@ -84,13 +86,12 @@ enum a2xx_sq_surfaceformat { FMT_5_5_5_1 = 13, FMT_8_8_8_8_A = 14, FMT_4_4_4_4 = 15, - FMT_10_11_11 = 16, - FMT_11_11_10 = 17, + FMT_8_8_8 = 16, FMT_DXT1 = 18, FMT_DXT2_3 = 19, FMT_DXT4_5 = 20, + FMT_10_10_10_2 = 21, FMT_24_8 = 22, - FMT_24_8_FLOAT = 23, FMT_16 = 24, FMT_16_16 = 25, FMT_16_16_16_16 = 26, @@ -106,29 +107,23 @@ enum a2xx_sq_surfaceformat { FMT_32_FLOAT = 36, FMT_32_32_FLOAT = 37, FMT_32_32_32_32_FLOAT = 38, - FMT_32_AS_8 = 39, - FMT_32_AS_8_8 = 40, - FMT_16_MPEG = 41, - FMT_16_16_MPEG = 42, - FMT_8_INTERLACED = 43, - FMT_32_AS_8_INTERLACED = 44, - FMT_32_AS_8_8_INTERLACED = 45, - FMT_16_INTERLACED = 46, - FMT_16_MPEG_INTERLACED = 47, - FMT_16_16_MPEG_INTERLACED = 48, + FMT_ATI_TC_RGB = 39, + FMT_ATI_TC_RGBA = 40, + FMT_ATI_TC_555_565_RGB = 41, + FMT_ATI_TC_555_565_RGBA = 42, + FMT_ATI_TC_RGBA_INTERP = 43, + FMT_ATI_TC_555_565_RGBA_INTERP = 44, + FMT_ETC1_RGBA_INTERP = 46, + FMT_ETC1_RGB = 47, + FMT_ETC1_RGBA = 48, FMT_DXN = 49, - FMT_8_8_8_8_AS_16_16_16_16 = 50, - FMT_DXT1_AS_16_16_16_16 = 51, - FMT_DXT2_3_AS_16_16_16_16 = 52, - FMT_DXT4_5_AS_16_16_16_16 = 53, + FMT_2_3_3 = 51, FMT_2_10_10_10_AS_16_16_16_16 = 54, - FMT_10_11_11_AS_16_16_16_16 = 55, - FMT_11_11_10_AS_16_16_16_16 = 56, + FMT_10_10_10_2_AS_16_16_16_16 = 55, FMT_32_32_32_FLOAT = 57, FMT_DXT3A = 58, FMT_DXT5A = 59, FMT_CTX1 = 60, - FMT_DXT3A_AS_1_1_1_1 = 61, }; enum a2xx_sq_ps_vtx_mode { diff --git a/drivers/gpu/drm/msm/adreno/a3xx.xml.h b/drivers/gpu/drm/msm/adreno/a3xx.xml.h index 663a73216926..645a19aef399 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx.xml.h +++ b/drivers/gpu/drm/msm/adreno/a3xx.xml.h @@ -8,17 +8,19 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 37162 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 13324 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 31866 bytes, from 2017-06-06 18:26:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 111898 bytes, from 2017-06-06 18:23:59) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 139480 bytes, from 2017-06-16 12:44:39) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2017-05-17 13:21:27) - -Copyright (C) 2013-2017 by the following authors: +- /home/robclark/src/envytools/rnndb/adreno.xml ( 501 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml ( 36805 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml ( 13634 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml ( 42393 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml ( 147240 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml ( 101627 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-07-03 19:37:13) + +Copyright (C) 2013-2018 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) diff --git a/drivers/gpu/drm/msm/adreno/a4xx.xml.h b/drivers/gpu/drm/msm/adreno/a4xx.xml.h index 1a14f4a40b9c..19565e87aa7b 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx.xml.h +++ b/drivers/gpu/drm/msm/adreno/a4xx.xml.h @@ -8,17 +8,19 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 37162 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 13324 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 31866 bytes, from 2017-06-06 18:26:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 111898 bytes, from 2017-06-06 18:23:59) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 139480 bytes, from 2017-06-16 12:44:39) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2017-05-17 13:21:27) - -Copyright (C) 2013-2017 by the following authors: +- /home/robclark/src/envytools/rnndb/adreno.xml ( 501 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml ( 36805 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml ( 13634 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml ( 42393 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml ( 147240 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml ( 101627 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-07-03 19:37:13) + +Copyright (C) 2013-2018 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) @@ -263,12 +265,6 @@ enum a4xx_depth_format { DEPTH4_32 = 3, }; -enum a4xx_tess_spacing { - EQUAL_SPACING = 0, - ODD_SPACING = 2, - EVEN_SPACING = 3, -}; - enum a4xx_ccu_perfcounter_select { CCU_BUSY_CYCLES = 0, CCU_RB_DEPTH_RETURN_STALL = 2, @@ -3544,12 +3540,13 @@ static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(uint32_t val) { return ((val) << A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK; } -#define A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x0000ff00 +#define A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x00007f00 #define A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 8 static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) { return ((val) << A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; } +#define A4XX_HLSQ_VS_CONTROL_REG_SSBO_ENABLE 0x00008000 #define A4XX_HLSQ_VS_CONTROL_REG_ENABLED 0x00010000 #define A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 #define A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 @@ -3571,12 +3568,13 @@ static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(uint32_t val) { return ((val) << A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK; } -#define A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x0000ff00 +#define A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x00007f00 #define A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 8 static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) { return ((val) << A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; } +#define A4XX_HLSQ_FS_CONTROL_REG_SSBO_ENABLE 0x00008000 #define A4XX_HLSQ_FS_CONTROL_REG_ENABLED 0x00010000 #define A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 #define A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 @@ -3598,12 +3596,13 @@ static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH(uint32_t val) { return ((val) << A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH__MASK; } -#define A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x0000ff00 +#define A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x00007f00 #define A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 8 static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) { return ((val) << A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; } +#define A4XX_HLSQ_HS_CONTROL_REG_SSBO_ENABLE 0x00008000 #define A4XX_HLSQ_HS_CONTROL_REG_ENABLED 0x00010000 #define A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 #define A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 @@ -3625,12 +3624,13 @@ static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH(uint32_t val) { return ((val) << A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH__MASK; } -#define A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x0000ff00 +#define A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x00007f00 #define A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 8 static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) { return ((val) << A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; } +#define A4XX_HLSQ_DS_CONTROL_REG_SSBO_ENABLE 0x00008000 #define A4XX_HLSQ_DS_CONTROL_REG_ENABLED 0x00010000 #define A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 #define A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 @@ -3652,12 +3652,13 @@ static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH(uint32_t val) { return ((val) << A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH__MASK; } -#define A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x0000ff00 +#define A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x00007f00 #define A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 8 static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) { return ((val) << A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; } +#define A4XX_HLSQ_GS_CONTROL_REG_SSBO_ENABLE 0x00008000 #define A4XX_HLSQ_GS_CONTROL_REG_ENABLED 0x00010000 #define A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 #define A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 @@ -3672,23 +3673,103 @@ static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(uint32_t val) return ((val) << A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__MASK; } -#define REG_A4XX_HLSQ_CS_CONTROL 0x000023ca +#define REG_A4XX_HLSQ_CS_CONTROL_REG 0x000023ca +#define A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH__MASK 0x000000ff +#define A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH__SHIFT 0 +static inline uint32_t A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH__MASK; +} +#define A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x00007f00 +#define A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 8 +static inline uint32_t A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; +} +#define A4XX_HLSQ_CS_CONTROL_REG_SSBO_ENABLE 0x00008000 +#define A4XX_HLSQ_CS_CONTROL_REG_ENABLED 0x00010000 +#define A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 +#define A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 +static inline uint32_t A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET__MASK; +} +#define A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH__MASK 0xff000000 +#define A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH__SHIFT 24 +static inline uint32_t A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH__MASK; +} #define REG_A4XX_HLSQ_CL_NDRANGE_0 0x000023cd +#define A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM__MASK 0x00000003 +#define A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM__SHIFT 0 +static inline uint32_t A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM__SHIFT) & A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM__MASK; +} +#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX__MASK 0x00000ffc +#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX__SHIFT 2 +static inline uint32_t A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX__SHIFT) & A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX__MASK; +} +#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY__MASK 0x003ff000 +#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY__SHIFT 12 +static inline uint32_t A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY__SHIFT) & A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY__MASK; +} +#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ__MASK 0xffc00000 +#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ__SHIFT 22 +static inline uint32_t A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ__SHIFT) & A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ__MASK; +} #define REG_A4XX_HLSQ_CL_NDRANGE_1 0x000023ce +#define A4XX_HLSQ_CL_NDRANGE_1_SIZE_X__MASK 0xffffffff +#define A4XX_HLSQ_CL_NDRANGE_1_SIZE_X__SHIFT 0 +static inline uint32_t A4XX_HLSQ_CL_NDRANGE_1_SIZE_X(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CL_NDRANGE_1_SIZE_X__SHIFT) & A4XX_HLSQ_CL_NDRANGE_1_SIZE_X__MASK; +} #define REG_A4XX_HLSQ_CL_NDRANGE_2 0x000023cf #define REG_A4XX_HLSQ_CL_NDRANGE_3 0x000023d0 +#define A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y__MASK 0xffffffff +#define A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y__SHIFT 0 +static inline uint32_t A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y__SHIFT) & A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y__MASK; +} #define REG_A4XX_HLSQ_CL_NDRANGE_4 0x000023d1 #define REG_A4XX_HLSQ_CL_NDRANGE_5 0x000023d2 +#define A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z__MASK 0xffffffff +#define A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z__SHIFT 0 +static inline uint32_t A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z__SHIFT) & A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z__MASK; +} #define REG_A4XX_HLSQ_CL_NDRANGE_6 0x000023d3 #define REG_A4XX_HLSQ_CL_CONTROL_0 0x000023d4 +#define A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID__MASK 0x000000ff +#define A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID__SHIFT 0 +static inline uint32_t A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID__SHIFT) & A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID__MASK; +} +#define A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID__MASK 0xff000000 +#define A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID__SHIFT 24 +static inline uint32_t A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID__SHIFT) & A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID__MASK; +} #define REG_A4XX_HLSQ_CL_CONTROL_1 0x000023d5 @@ -4087,5 +4168,71 @@ static inline uint32_t A4XX_TEX_CONST_4_BASE(uint32_t val) #define REG_A4XX_TEX_CONST_7 0x00000007 +#define REG_A4XX_SSBO_0_0 0x00000000 +#define A4XX_SSBO_0_0_BASE__MASK 0xffffffe0 +#define A4XX_SSBO_0_0_BASE__SHIFT 5 +static inline uint32_t A4XX_SSBO_0_0_BASE(uint32_t val) +{ + return ((val >> 5) << A4XX_SSBO_0_0_BASE__SHIFT) & A4XX_SSBO_0_0_BASE__MASK; +} + +#define REG_A4XX_SSBO_0_1 0x00000001 +#define A4XX_SSBO_0_1_PITCH__MASK 0x003fffff +#define A4XX_SSBO_0_1_PITCH__SHIFT 0 +static inline uint32_t A4XX_SSBO_0_1_PITCH(uint32_t val) +{ + return ((val) << A4XX_SSBO_0_1_PITCH__SHIFT) & A4XX_SSBO_0_1_PITCH__MASK; +} + +#define REG_A4XX_SSBO_0_2 0x00000002 +#define A4XX_SSBO_0_2_ARRAY_PITCH__MASK 0x03fff000 +#define A4XX_SSBO_0_2_ARRAY_PITCH__SHIFT 12 +static inline uint32_t A4XX_SSBO_0_2_ARRAY_PITCH(uint32_t val) +{ + return ((val >> 12) << A4XX_SSBO_0_2_ARRAY_PITCH__SHIFT) & A4XX_SSBO_0_2_ARRAY_PITCH__MASK; +} + +#define REG_A4XX_SSBO_0_3 0x00000003 +#define A4XX_SSBO_0_3_CPP__MASK 0x0000003f +#define A4XX_SSBO_0_3_CPP__SHIFT 0 +static inline uint32_t A4XX_SSBO_0_3_CPP(uint32_t val) +{ + return ((val) << A4XX_SSBO_0_3_CPP__SHIFT) & A4XX_SSBO_0_3_CPP__MASK; +} + +#define REG_A4XX_SSBO_1_0 0x00000000 +#define A4XX_SSBO_1_0_CPP__MASK 0x0000001f +#define A4XX_SSBO_1_0_CPP__SHIFT 0 +static inline uint32_t A4XX_SSBO_1_0_CPP(uint32_t val) +{ + return ((val) << A4XX_SSBO_1_0_CPP__SHIFT) & A4XX_SSBO_1_0_CPP__MASK; +} +#define A4XX_SSBO_1_0_FMT__MASK 0x0000ff00 +#define A4XX_SSBO_1_0_FMT__SHIFT 8 +static inline uint32_t A4XX_SSBO_1_0_FMT(enum a4xx_color_fmt val) +{ + return ((val) << A4XX_SSBO_1_0_FMT__SHIFT) & A4XX_SSBO_1_0_FMT__MASK; +} +#define A4XX_SSBO_1_0_WIDTH__MASK 0xffff0000 +#define A4XX_SSBO_1_0_WIDTH__SHIFT 16 +static inline uint32_t A4XX_SSBO_1_0_WIDTH(uint32_t val) +{ + return ((val) << A4XX_SSBO_1_0_WIDTH__SHIFT) & A4XX_SSBO_1_0_WIDTH__MASK; +} + +#define REG_A4XX_SSBO_1_1 0x00000001 +#define A4XX_SSBO_1_1_HEIGHT__MASK 0x0000ffff +#define A4XX_SSBO_1_1_HEIGHT__SHIFT 0 +static inline uint32_t A4XX_SSBO_1_1_HEIGHT(uint32_t val) +{ + return ((val) << A4XX_SSBO_1_1_HEIGHT__SHIFT) & A4XX_SSBO_1_1_HEIGHT__MASK; +} +#define A4XX_SSBO_1_1_DEPTH__MASK 0xffff0000 +#define A4XX_SSBO_1_1_DEPTH__SHIFT 16 +static inline uint32_t A4XX_SSBO_1_1_DEPTH(uint32_t val) +{ + return ((val) << A4XX_SSBO_1_1_DEPTH__SHIFT) & A4XX_SSBO_1_1_DEPTH__MASK; +} + #endif /* A4XX_XML */ diff --git a/drivers/gpu/drm/msm/adreno/a5xx.xml.h b/drivers/gpu/drm/msm/adreno/a5xx.xml.h index e0e6711f4f78..182d37ff3794 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx.xml.h +++ b/drivers/gpu/drm/msm/adreno/a5xx.xml.h @@ -8,17 +8,19 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 37162 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 13324 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 31866 bytes, from 2017-06-06 18:26:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 111898 bytes, from 2017-06-06 18:23:59) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 139480 bytes, from 2017-06-16 12:44:39) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2017-05-17 13:21:27) - -Copyright (C) 2013-2017 by the following authors: +- /home/robclark/src/envytools/rnndb/adreno.xml ( 501 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml ( 36805 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml ( 13634 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml ( 42393 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml ( 147240 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml ( 101627 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-07-03 19:37:13) + +Copyright (C) 2013-2018 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) @@ -119,6 +121,11 @@ enum a5xx_vtx_fmt { VFMT5_8_8_8_8_SNORM = 50, VFMT5_8_8_8_8_UINT = 51, VFMT5_8_8_8_8_SINT = 52, + VFMT5_10_10_10_2_UNORM = 54, + VFMT5_10_10_10_2_SNORM = 57, + VFMT5_10_10_10_2_UINT = 58, + VFMT5_10_10_10_2_SINT = 59, + VFMT5_11_11_10_FLOAT = 66, VFMT5_16_16_UNORM = 67, VFMT5_16_16_SNORM = 68, VFMT5_16_16_FLOAT = 69, @@ -204,14 +211,45 @@ enum a5xx_tex_fmt { TFMT5_32_32_FLOAT = 103, TFMT5_32_32_UINT = 104, TFMT5_32_32_SINT = 105, + TFMT5_32_32_32_UINT = 114, + TFMT5_32_32_32_SINT = 115, + TFMT5_32_32_32_FLOAT = 116, TFMT5_32_32_32_32_FLOAT = 130, TFMT5_32_32_32_32_UINT = 131, TFMT5_32_32_32_32_SINT = 132, TFMT5_X8Z24_UNORM = 160, + TFMT5_ETC2_RG11_UNORM = 171, + TFMT5_ETC2_RG11_SNORM = 172, + TFMT5_ETC2_R11_UNORM = 173, + TFMT5_ETC2_R11_SNORM = 174, + TFMT5_ETC1 = 175, + TFMT5_ETC2_RGB8 = 176, + TFMT5_ETC2_RGBA8 = 177, + TFMT5_ETC2_RGB8A1 = 178, + TFMT5_DXT1 = 179, + TFMT5_DXT3 = 180, + TFMT5_DXT5 = 181, TFMT5_RGTC1_UNORM = 183, TFMT5_RGTC1_SNORM = 184, TFMT5_RGTC2_UNORM = 187, TFMT5_RGTC2_SNORM = 188, + TFMT5_BPTC_UFLOAT = 190, + TFMT5_BPTC_FLOAT = 191, + TFMT5_BPTC = 192, + TFMT5_ASTC_4x4 = 193, + TFMT5_ASTC_5x4 = 194, + TFMT5_ASTC_5x5 = 195, + TFMT5_ASTC_6x5 = 196, + TFMT5_ASTC_6x6 = 197, + TFMT5_ASTC_8x5 = 198, + TFMT5_ASTC_8x6 = 199, + TFMT5_ASTC_8x8 = 200, + TFMT5_ASTC_10x5 = 201, + TFMT5_ASTC_10x6 = 202, + TFMT5_ASTC_10x8 = 203, + TFMT5_ASTC_10x10 = 204, + TFMT5_ASTC_12x10 = 205, + TFMT5_ASTC_12x12 = 206, }; enum a5xx_tex_fetchsize { @@ -239,7 +277,7 @@ enum a5xx_blit_buf { BLIT_MRT6 = 6, BLIT_MRT7 = 7, BLIT_ZS = 8, - BLIT_Z32 = 9, + BLIT_S = 9, }; enum a5xx_cp_perfcounter_select { @@ -899,6 +937,12 @@ enum a5xx_tex_type { #define REG_A5XX_CP_DRAW_STATE_DATA 0x0000080c +#define REG_A5XX_CP_ME_NRT_ADDR_LO 0x0000080d + +#define REG_A5XX_CP_ME_NRT_ADDR_HI 0x0000080e + +#define REG_A5XX_CP_ME_NRT_DATA 0x00000810 + #define REG_A5XX_CP_CRASH_SCRIPT_BASE_LO 0x00000817 #define REG_A5XX_CP_CRASH_SCRIPT_BASE_HI 0x00000818 @@ -2072,9 +2116,17 @@ static inline uint32_t A5XX_VSC_RESOLVE_CNTL_Y(uint32_t val) #define REG_A5XX_PC_MODE_CNTL 0x00000d02 -#define REG_A5XX_UNKNOWN_0D08 0x00000d08 +#define REG_A5XX_PC_INDEX_BUF_LO 0x00000d04 + +#define REG_A5XX_PC_INDEX_BUF_HI 0x00000d05 + +#define REG_A5XX_PC_START_INDEX 0x00000d06 -#define REG_A5XX_UNKNOWN_0D09 0x00000d09 +#define REG_A5XX_PC_MAX_INDEX 0x00000d07 + +#define REG_A5XX_PC_TESSFACTOR_ADDR_LO 0x00000d08 + +#define REG_A5XX_PC_TESSFACTOR_ADDR_HI 0x00000d09 #define REG_A5XX_PC_PERFCTR_PC_SEL_0 0x00000d10 @@ -2327,6 +2379,14 @@ static inline uint32_t A5XX_VSC_RESOLVE_CNTL_Y(uint32_t val) #define REG_A5XX_VBIF_PERF_CNT_EN3 0x000030c3 +#define REG_A5XX_VBIF_PERF_CNT_CLR0 0x000030c8 + +#define REG_A5XX_VBIF_PERF_CNT_CLR1 0x000030c9 + +#define REG_A5XX_VBIF_PERF_CNT_CLR2 0x000030ca + +#define REG_A5XX_VBIF_PERF_CNT_CLR3 0x000030cb + #define REG_A5XX_VBIF_PERF_CNT_SEL0 0x000030d0 #define REG_A5XX_VBIF_PERF_CNT_SEL1 0x000030d1 @@ -2590,6 +2650,7 @@ static inline uint32_t A5XX_VSC_RESOLVE_CNTL_Y(uint32_t val) #define REG_A5XX_GPU_CS_AMP_CALIBRATION_CONTROL1 0x0000c557 #define REG_A5XX_GRAS_CL_CNTL 0x0000e000 +#define A5XX_GRAS_CL_CNTL_ZERO_GB_SCALE_Z 0x00000040 #define REG_A5XX_UNKNOWN_E001 0x0000e001 @@ -2700,7 +2761,7 @@ static inline uint32_t A5XX_GRAS_SU_POINT_SIZE(float val) return ((((int32_t)(val * 16.0))) << A5XX_GRAS_SU_POINT_SIZE__SHIFT) & A5XX_GRAS_SU_POINT_SIZE__MASK; } -#define REG_A5XX_UNKNOWN_E093 0x0000e093 +#define REG_A5XX_GRAS_SU_LAYERED 0x0000e093 #define REG_A5XX_GRAS_SU_DEPTH_PLANE_CNTL 0x0000e094 #define A5XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z 0x00000001 @@ -2936,7 +2997,9 @@ static inline uint32_t A5XX_RB_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val #define A5XX_RB_RENDER_CONTROL0_WCOORD 0x00000200 #define REG_A5XX_RB_RENDER_CONTROL1 0x0000e145 +#define A5XX_RB_RENDER_CONTROL1_SAMPLEMASK 0x00000001 #define A5XX_RB_RENDER_CONTROL1_FACENESS 0x00000002 +#define A5XX_RB_RENDER_CONTROL1_SAMPLEID 0x00000004 #define REG_A5XX_RB_FS_OUTPUT_CNTL 0x0000e146 #define A5XX_RB_FS_OUTPUT_CNTL_MRT__MASK 0x0000000f @@ -3002,6 +3065,13 @@ static inline uint32_t REG_A5XX_RB_MRT(uint32_t i0) { return 0x0000e150 + 0x7*i0 static inline uint32_t REG_A5XX_RB_MRT_CONTROL(uint32_t i0) { return 0x0000e150 + 0x7*i0; } #define A5XX_RB_MRT_CONTROL_BLEND 0x00000001 #define A5XX_RB_MRT_CONTROL_BLEND2 0x00000002 +#define A5XX_RB_MRT_CONTROL_ROP_ENABLE 0x00000004 +#define A5XX_RB_MRT_CONTROL_ROP_CODE__MASK 0x00000078 +#define A5XX_RB_MRT_CONTROL_ROP_CODE__SHIFT 3 +static inline uint32_t A5XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val) +{ + return ((val) << A5XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A5XX_RB_MRT_CONTROL_ROP_CODE__MASK; +} #define A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK 0x00000780 #define A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT 7 static inline uint32_t A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val) @@ -3060,6 +3130,12 @@ static inline uint32_t A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a5xx_tile_mode { return ((val) << A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK; } +#define A5XX_RB_MRT_BUF_INFO_DITHER_MODE__MASK 0x00001800 +#define A5XX_RB_MRT_BUF_INFO_DITHER_MODE__SHIFT 11 +static inline uint32_t A5XX_RB_MRT_BUF_INFO_DITHER_MODE(enum adreno_rb_dither_mode val) +{ + return ((val) << A5XX_RB_MRT_BUF_INFO_DITHER_MODE__SHIFT) & A5XX_RB_MRT_BUF_INFO_DITHER_MODE__MASK; +} #define A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK 0x00006000 #define A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT 13 static inline uint32_t A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val) @@ -3223,6 +3299,7 @@ static inline uint32_t A5XX_RB_BLEND_CNTL_ENABLE_BLEND(uint32_t val) return ((val) << A5XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT) & A5XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK; } #define A5XX_RB_BLEND_CNTL_INDEPENDENT_BLEND 0x00000100 +#define A5XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE 0x00000400 #define A5XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK 0xffff0000 #define A5XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT 16 static inline uint32_t A5XX_RB_BLEND_CNTL_SAMPLE_MASK(uint32_t val) @@ -3369,7 +3446,25 @@ static inline uint32_t A5XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val) return ((val) << A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK; } -#define REG_A5XX_UNKNOWN_E1C7 0x0000e1c7 +#define REG_A5XX_RB_STENCILREFMASK_BF 0x0000e1c7 +#define A5XX_RB_STENCILREFMASK_BF_STENCILREF__MASK 0x000000ff +#define A5XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT 0 +static inline uint32_t A5XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val) +{ + return ((val) << A5XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A5XX_RB_STENCILREFMASK_BF_STENCILREF__MASK; +} +#define A5XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK 0x0000ff00 +#define A5XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT 8 +static inline uint32_t A5XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val) +{ + return ((val) << A5XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A5XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK; +} +#define A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK 0x00ff0000 +#define A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT 16 +static inline uint32_t A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val) +{ + return ((val) << A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK; +} #define REG_A5XX_RB_WINDOW_OFFSET 0x0000e1d0 #define A5XX_RB_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE 0x80000000 @@ -3428,6 +3523,7 @@ static inline uint32_t A5XX_RB_RESOLVE_CNTL_2_Y(uint32_t val) } #define REG_A5XX_RB_RESOLVE_CNTL_3 0x0000e213 +#define A5XX_RB_RESOLVE_CNTL_3_TILED 0x00000001 #define REG_A5XX_RB_BLIT_DST_LO 0x0000e214 @@ -3459,6 +3555,7 @@ static inline uint32_t A5XX_RB_BLIT_DST_ARRAY_PITCH(uint32_t val) #define REG_A5XX_RB_CLEAR_CNTL 0x0000e21c #define A5XX_RB_CLEAR_CNTL_FAST_CLEAR 0x00000002 +#define A5XX_RB_CLEAR_CNTL_MSAA_RESOLVE 0x00000004 #define A5XX_RB_CLEAR_CNTL_MASK__MASK 0x000000f0 #define A5XX_RB_CLEAR_CNTL_MASK__SHIFT 4 static inline uint32_t A5XX_RB_CLEAR_CNTL_MASK(uint32_t val) @@ -3627,22 +3724,69 @@ static inline uint32_t A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC(uint32_t val) { return ((val) << A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__SHIFT) & A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__MASK; } +#define A5XX_PC_PRIMITIVE_CNTL_PRIMITIVE_RESTART 0x00000100 +#define A5XX_PC_PRIMITIVE_CNTL_COUNT_PRIMITIVES 0x00000200 #define A5XX_PC_PRIMITIVE_CNTL_PROVOKING_VTX_LAST 0x00000400 #define REG_A5XX_PC_PRIM_VTX_CNTL 0x0000e385 #define A5XX_PC_PRIM_VTX_CNTL_PSIZE 0x00000800 #define REG_A5XX_PC_RASTER_CNTL 0x0000e388 +#define A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__MASK 0x00000007 +#define A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__SHIFT 0 +static inline uint32_t A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__SHIFT) & A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__MASK; +} +#define A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__MASK 0x00000038 +#define A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__SHIFT 3 +static inline uint32_t A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__SHIFT) & A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__MASK; +} +#define A5XX_PC_RASTER_CNTL_POLYMODE_ENABLE 0x00000040 #define REG_A5XX_UNKNOWN_E389 0x0000e389 #define REG_A5XX_PC_RESTART_INDEX 0x0000e38c -#define REG_A5XX_UNKNOWN_E38D 0x0000e38d +#define REG_A5XX_PC_GS_LAYERED 0x0000e38d #define REG_A5XX_PC_GS_PARAM 0x0000e38e +#define A5XX_PC_GS_PARAM_MAX_VERTICES__MASK 0x000003ff +#define A5XX_PC_GS_PARAM_MAX_VERTICES__SHIFT 0 +static inline uint32_t A5XX_PC_GS_PARAM_MAX_VERTICES(uint32_t val) +{ + return ((val) << A5XX_PC_GS_PARAM_MAX_VERTICES__SHIFT) & A5XX_PC_GS_PARAM_MAX_VERTICES__MASK; +} +#define A5XX_PC_GS_PARAM_INVOCATIONS__MASK 0x0000f800 +#define A5XX_PC_GS_PARAM_INVOCATIONS__SHIFT 11 +static inline uint32_t A5XX_PC_GS_PARAM_INVOCATIONS(uint32_t val) +{ + return ((val) << A5XX_PC_GS_PARAM_INVOCATIONS__SHIFT) & A5XX_PC_GS_PARAM_INVOCATIONS__MASK; +} +#define A5XX_PC_GS_PARAM_PRIMTYPE__MASK 0x01800000 +#define A5XX_PC_GS_PARAM_PRIMTYPE__SHIFT 23 +static inline uint32_t A5XX_PC_GS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A5XX_PC_GS_PARAM_PRIMTYPE__SHIFT) & A5XX_PC_GS_PARAM_PRIMTYPE__MASK; +} #define REG_A5XX_PC_HS_PARAM 0x0000e38f +#define A5XX_PC_HS_PARAM_VERTICES_OUT__MASK 0x0000003f +#define A5XX_PC_HS_PARAM_VERTICES_OUT__SHIFT 0 +static inline uint32_t A5XX_PC_HS_PARAM_VERTICES_OUT(uint32_t val) +{ + return ((val) << A5XX_PC_HS_PARAM_VERTICES_OUT__SHIFT) & A5XX_PC_HS_PARAM_VERTICES_OUT__MASK; +} +#define A5XX_PC_HS_PARAM_SPACING__MASK 0x00600000 +#define A5XX_PC_HS_PARAM_SPACING__SHIFT 21 +static inline uint32_t A5XX_PC_HS_PARAM_SPACING(enum a4xx_tess_spacing val) +{ + return ((val) << A5XX_PC_HS_PARAM_SPACING__SHIFT) & A5XX_PC_HS_PARAM_SPACING__MASK; +} +#define A5XX_PC_HS_PARAM_CW 0x00800000 +#define A5XX_PC_HS_PARAM_CONNECTED 0x01000000 #define REG_A5XX_PC_POWER_CNTL 0x0000e3b0 @@ -3667,10 +3811,40 @@ static inline uint32_t A5XX_VFD_CONTROL_1_REGID4INST(uint32_t val) { return ((val) << A5XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A5XX_VFD_CONTROL_1_REGID4INST__MASK; } +#define A5XX_VFD_CONTROL_1_REGID4PRIMID__MASK 0x00ff0000 +#define A5XX_VFD_CONTROL_1_REGID4PRIMID__SHIFT 16 +static inline uint32_t A5XX_VFD_CONTROL_1_REGID4PRIMID(uint32_t val) +{ + return ((val) << A5XX_VFD_CONTROL_1_REGID4PRIMID__SHIFT) & A5XX_VFD_CONTROL_1_REGID4PRIMID__MASK; +} #define REG_A5XX_VFD_CONTROL_2 0x0000e402 +#define A5XX_VFD_CONTROL_2_REGID_PATCHID__MASK 0x000000ff +#define A5XX_VFD_CONTROL_2_REGID_PATCHID__SHIFT 0 +static inline uint32_t A5XX_VFD_CONTROL_2_REGID_PATCHID(uint32_t val) +{ + return ((val) << A5XX_VFD_CONTROL_2_REGID_PATCHID__SHIFT) & A5XX_VFD_CONTROL_2_REGID_PATCHID__MASK; +} #define REG_A5XX_VFD_CONTROL_3 0x0000e403 +#define A5XX_VFD_CONTROL_3_REGID_PATCHID__MASK 0x0000ff00 +#define A5XX_VFD_CONTROL_3_REGID_PATCHID__SHIFT 8 +static inline uint32_t A5XX_VFD_CONTROL_3_REGID_PATCHID(uint32_t val) +{ + return ((val) << A5XX_VFD_CONTROL_3_REGID_PATCHID__SHIFT) & A5XX_VFD_CONTROL_3_REGID_PATCHID__MASK; +} +#define A5XX_VFD_CONTROL_3_REGID_TESSX__MASK 0x00ff0000 +#define A5XX_VFD_CONTROL_3_REGID_TESSX__SHIFT 16 +static inline uint32_t A5XX_VFD_CONTROL_3_REGID_TESSX(uint32_t val) +{ + return ((val) << A5XX_VFD_CONTROL_3_REGID_TESSX__SHIFT) & A5XX_VFD_CONTROL_3_REGID_TESSX__MASK; +} +#define A5XX_VFD_CONTROL_3_REGID_TESSY__MASK 0xff000000 +#define A5XX_VFD_CONTROL_3_REGID_TESSY__SHIFT 24 +static inline uint32_t A5XX_VFD_CONTROL_3_REGID_TESSY(uint32_t val) +{ + return ((val) << A5XX_VFD_CONTROL_3_REGID_TESSY__SHIFT) & A5XX_VFD_CONTROL_3_REGID_TESSY__MASK; +} #define REG_A5XX_VFD_CONTROL_4 0x0000e404 @@ -3700,12 +3874,18 @@ static inline uint32_t A5XX_VFD_DECODE_INSTR_IDX(uint32_t val) return ((val) << A5XX_VFD_DECODE_INSTR_IDX__SHIFT) & A5XX_VFD_DECODE_INSTR_IDX__MASK; } #define A5XX_VFD_DECODE_INSTR_INSTANCED 0x00020000 -#define A5XX_VFD_DECODE_INSTR_FORMAT__MASK 0x3ff00000 +#define A5XX_VFD_DECODE_INSTR_FORMAT__MASK 0x0ff00000 #define A5XX_VFD_DECODE_INSTR_FORMAT__SHIFT 20 static inline uint32_t A5XX_VFD_DECODE_INSTR_FORMAT(enum a5xx_vtx_fmt val) { return ((val) << A5XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A5XX_VFD_DECODE_INSTR_FORMAT__MASK; } +#define A5XX_VFD_DECODE_INSTR_SWAP__MASK 0x30000000 +#define A5XX_VFD_DECODE_INSTR_SWAP__SHIFT 28 +static inline uint32_t A5XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A5XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A5XX_VFD_DECODE_INSTR_SWAP__MASK; +} #define A5XX_VFD_DECODE_INSTR_UNK30 0x40000000 #define A5XX_VFD_DECODE_INSTR_FLOAT 0x80000000 @@ -3960,6 +4140,7 @@ static inline uint32_t A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(uint32_t val) #define REG_A5XX_SP_BLEND_CNTL 0x0000e5c9 #define A5XX_SP_BLEND_CNTL_ENABLED 0x00000001 #define A5XX_SP_BLEND_CNTL_UNK8 0x00000100 +#define A5XX_SP_BLEND_CNTL_ALPHA_TO_COVERAGE 0x00000400 #define REG_A5XX_SP_FS_OUTPUT_CNTL 0x0000e5ca #define A5XX_SP_FS_OUTPUT_CNTL_MRT__MASK 0x0000000f @@ -4001,16 +4182,12 @@ static inline uint32_t A5XX_SP_FS_MRT_REG_COLOR_FORMAT(enum a5xx_color_fmt val) { return ((val) << A5XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT) & A5XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK; } +#define A5XX_SP_FS_MRT_REG_COLOR_SINT 0x00000100 +#define A5XX_SP_FS_MRT_REG_COLOR_UINT 0x00000200 #define A5XX_SP_FS_MRT_REG_COLOR_SRGB 0x00000400 #define REG_A5XX_UNKNOWN_E5DB 0x0000e5db -#define REG_A5XX_UNKNOWN_E5F2 0x0000e5f2 - -#define REG_A5XX_SP_CS_OBJ_START_LO 0x0000e5f3 - -#define REG_A5XX_SP_CS_OBJ_START_HI 0x0000e5f4 - #define REG_A5XX_SP_CS_CTRL_REG0 0x0000e5f0 #define A5XX_SP_CS_CTRL_REG0_THREADSIZE__MASK 0x00000008 #define A5XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT 3 @@ -4039,7 +4216,39 @@ static inline uint32_t A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(uint32_t val) return ((val) << A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK; } -#define REG_A5XX_UNKNOWN_E600 0x0000e600 +#define REG_A5XX_UNKNOWN_E5F2 0x0000e5f2 + +#define REG_A5XX_SP_CS_OBJ_START_LO 0x0000e5f3 + +#define REG_A5XX_SP_CS_OBJ_START_HI 0x0000e5f4 + +#define REG_A5XX_SP_HS_CTRL_REG0 0x0000e600 +#define A5XX_SP_HS_CTRL_REG0_THREADSIZE__MASK 0x00000008 +#define A5XX_SP_HS_CTRL_REG0_THREADSIZE__SHIFT 3 +static inline uint32_t A5XX_SP_HS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A5XX_SP_HS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_HS_CTRL_REG0_THREADSIZE__MASK; +} +#define A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 +#define A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 +static inline uint32_t A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 +#define A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 +static inline uint32_t A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A5XX_SP_HS_CTRL_REG0_VARYING 0x00010000 +#define A5XX_SP_HS_CTRL_REG0_PIXLODENABLE 0x00100000 +#define A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 +#define A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__SHIFT 25 +static inline uint32_t A5XX_SP_HS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__MASK; +} #define REG_A5XX_UNKNOWN_E602 0x0000e602 @@ -4047,13 +4256,67 @@ static inline uint32_t A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(uint32_t val) #define REG_A5XX_SP_HS_OBJ_START_HI 0x0000e604 +#define REG_A5XX_SP_DS_CTRL_REG0 0x0000e610 +#define A5XX_SP_DS_CTRL_REG0_THREADSIZE__MASK 0x00000008 +#define A5XX_SP_DS_CTRL_REG0_THREADSIZE__SHIFT 3 +static inline uint32_t A5XX_SP_DS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A5XX_SP_DS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_DS_CTRL_REG0_THREADSIZE__MASK; +} +#define A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 +#define A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 +static inline uint32_t A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 +#define A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 +static inline uint32_t A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A5XX_SP_DS_CTRL_REG0_VARYING 0x00010000 +#define A5XX_SP_DS_CTRL_REG0_PIXLODENABLE 0x00100000 +#define A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 +#define A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__SHIFT 25 +static inline uint32_t A5XX_SP_DS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__MASK; +} + #define REG_A5XX_UNKNOWN_E62B 0x0000e62b #define REG_A5XX_SP_DS_OBJ_START_LO 0x0000e62c #define REG_A5XX_SP_DS_OBJ_START_HI 0x0000e62d -#define REG_A5XX_UNKNOWN_E640 0x0000e640 +#define REG_A5XX_SP_GS_CTRL_REG0 0x0000e640 +#define A5XX_SP_GS_CTRL_REG0_THREADSIZE__MASK 0x00000008 +#define A5XX_SP_GS_CTRL_REG0_THREADSIZE__SHIFT 3 +static inline uint32_t A5XX_SP_GS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A5XX_SP_GS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_GS_CTRL_REG0_THREADSIZE__MASK; +} +#define A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 +#define A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 +static inline uint32_t A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 +#define A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 +static inline uint32_t A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A5XX_SP_GS_CTRL_REG0_VARYING 0x00010000 +#define A5XX_SP_GS_CTRL_REG0_PIXLODENABLE 0x00100000 +#define A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 +#define A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__SHIFT 25 +static inline uint32_t A5XX_SP_GS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__MASK; +} #define REG_A5XX_UNKNOWN_E65B 0x0000e65b @@ -4173,6 +4436,18 @@ static inline uint32_t A5XX_HLSQ_CONTROL_2_REG_FACEREGID(uint32_t val) { return ((val) << A5XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT) & A5XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK; } +#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__MASK 0x0000ff00 +#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__SHIFT 8 +static inline uint32_t A5XX_HLSQ_CONTROL_2_REG_SAMPLEID(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__SHIFT) & A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__MASK; +} +#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__MASK 0x00ff0000 +#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__SHIFT 16 +static inline uint32_t A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__SHIFT) & A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__MASK; +} #define REG_A5XX_HLSQ_CONTROL_3_REG 0x0000e787 #define A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK 0x000000ff @@ -4375,34 +4650,52 @@ static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(uint32_t val) } #define REG_A5XX_HLSQ_CS_NDRANGE_1 0x0000e7b1 -#define A5XX_HLSQ_CS_NDRANGE_1_SIZE_X__MASK 0xffffffff -#define A5XX_HLSQ_CS_NDRANGE_1_SIZE_X__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_1_SIZE_X(uint32_t val) +#define A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__MASK 0xffffffff +#define A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(uint32_t val) { - return ((val) << A5XX_HLSQ_CS_NDRANGE_1_SIZE_X__SHIFT) & A5XX_HLSQ_CS_NDRANGE_1_SIZE_X__MASK; + return ((val) << A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__SHIFT) & A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__MASK; } #define REG_A5XX_HLSQ_CS_NDRANGE_2 0x0000e7b2 +#define A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__MASK 0xffffffff +#define A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__SHIFT) & A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__MASK; +} #define REG_A5XX_HLSQ_CS_NDRANGE_3 0x0000e7b3 -#define A5XX_HLSQ_CS_NDRANGE_3_SIZE_Y__MASK 0xffffffff -#define A5XX_HLSQ_CS_NDRANGE_3_SIZE_Y__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_3_SIZE_Y(uint32_t val) +#define A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__MASK 0xffffffff +#define A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(uint32_t val) { - return ((val) << A5XX_HLSQ_CS_NDRANGE_3_SIZE_Y__SHIFT) & A5XX_HLSQ_CS_NDRANGE_3_SIZE_Y__MASK; + return ((val) << A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__SHIFT) & A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__MASK; } #define REG_A5XX_HLSQ_CS_NDRANGE_4 0x0000e7b4 +#define A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__MASK 0xffffffff +#define A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__SHIFT) & A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__MASK; +} #define REG_A5XX_HLSQ_CS_NDRANGE_5 0x0000e7b5 -#define A5XX_HLSQ_CS_NDRANGE_5_SIZE_Z__MASK 0xffffffff -#define A5XX_HLSQ_CS_NDRANGE_5_SIZE_Z__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_5_SIZE_Z(uint32_t val) +#define A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__MASK 0xffffffff +#define A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(uint32_t val) { - return ((val) << A5XX_HLSQ_CS_NDRANGE_5_SIZE_Z__SHIFT) & A5XX_HLSQ_CS_NDRANGE_5_SIZE_Z__MASK; + return ((val) << A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__SHIFT) & A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__MASK; } #define REG_A5XX_HLSQ_CS_NDRANGE_6 0x0000e7b6 +#define A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__MASK 0xffffffff +#define A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__SHIFT) & A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__MASK; +} #define REG_A5XX_HLSQ_CS_CNTL_0 0x0000e7b7 #define A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK 0x000000ff @@ -4468,6 +4761,8 @@ static inline uint32_t A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID(uint32_t val) #define REG_A5XX_HLSQ_CS_INSTRLEN 0x0000e7dd +#define REG_A5XX_RB_2D_BLIT_CNTL 0x00002100 + #define REG_A5XX_RB_2D_SRC_SOLID_DW0 0x00002101 #define REG_A5XX_RB_2D_SRC_SOLID_DW1 0x00002102 @@ -4483,12 +4778,19 @@ static inline uint32_t A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(enum a5xx_color_fmt val) { return ((val) << A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__SHIFT) & A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__MASK; } +#define A5XX_RB_2D_SRC_INFO_TILE_MODE__MASK 0x00000300 +#define A5XX_RB_2D_SRC_INFO_TILE_MODE__SHIFT 8 +static inline uint32_t A5XX_RB_2D_SRC_INFO_TILE_MODE(enum a5xx_tile_mode val) +{ + return ((val) << A5XX_RB_2D_SRC_INFO_TILE_MODE__SHIFT) & A5XX_RB_2D_SRC_INFO_TILE_MODE__MASK; +} #define A5XX_RB_2D_SRC_INFO_COLOR_SWAP__MASK 0x00000c00 #define A5XX_RB_2D_SRC_INFO_COLOR_SWAP__SHIFT 10 static inline uint32_t A5XX_RB_2D_SRC_INFO_COLOR_SWAP(enum a3xx_color_swap val) { return ((val) << A5XX_RB_2D_SRC_INFO_COLOR_SWAP__SHIFT) & A5XX_RB_2D_SRC_INFO_COLOR_SWAP__MASK; } +#define A5XX_RB_2D_SRC_INFO_FLAGS 0x00001000 #define REG_A5XX_RB_2D_SRC_LO 0x00002108 @@ -4515,12 +4817,19 @@ static inline uint32_t A5XX_RB_2D_DST_INFO_COLOR_FORMAT(enum a5xx_color_fmt val) { return ((val) << A5XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT) & A5XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK; } +#define A5XX_RB_2D_DST_INFO_TILE_MODE__MASK 0x00000300 +#define A5XX_RB_2D_DST_INFO_TILE_MODE__SHIFT 8 +static inline uint32_t A5XX_RB_2D_DST_INFO_TILE_MODE(enum a5xx_tile_mode val) +{ + return ((val) << A5XX_RB_2D_DST_INFO_TILE_MODE__SHIFT) & A5XX_RB_2D_DST_INFO_TILE_MODE__MASK; +} #define A5XX_RB_2D_DST_INFO_COLOR_SWAP__MASK 0x00000c00 #define A5XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT 10 static inline uint32_t A5XX_RB_2D_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val) { return ((val) << A5XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT) & A5XX_RB_2D_DST_INFO_COLOR_SWAP__MASK; } +#define A5XX_RB_2D_DST_INFO_FLAGS 0x00001000 #define REG_A5XX_RB_2D_DST_LO 0x00002111 @@ -4548,6 +4857,8 @@ static inline uint32_t A5XX_RB_2D_DST_SIZE_ARRAY_PITCH(uint32_t val) #define REG_A5XX_RB_2D_DST_FLAGS_HI 0x00002144 +#define REG_A5XX_GRAS_2D_BLIT_CNTL 0x00002180 + #define REG_A5XX_GRAS_2D_SRC_INFO 0x00002181 #define A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__MASK 0x000000ff #define A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__SHIFT 0 @@ -4555,12 +4866,19 @@ static inline uint32_t A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(enum a5xx_color_fmt va { return ((val) << A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__SHIFT) & A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__MASK; } +#define A5XX_GRAS_2D_SRC_INFO_TILE_MODE__MASK 0x00000300 +#define A5XX_GRAS_2D_SRC_INFO_TILE_MODE__SHIFT 8 +static inline uint32_t A5XX_GRAS_2D_SRC_INFO_TILE_MODE(enum a5xx_tile_mode val) +{ + return ((val) << A5XX_GRAS_2D_SRC_INFO_TILE_MODE__SHIFT) & A5XX_GRAS_2D_SRC_INFO_TILE_MODE__MASK; +} #define A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__MASK 0x00000c00 #define A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__SHIFT 10 static inline uint32_t A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(enum a3xx_color_swap val) { return ((val) << A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__SHIFT) & A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__MASK; } +#define A5XX_GRAS_2D_SRC_INFO_FLAGS 0x00001000 #define REG_A5XX_GRAS_2D_DST_INFO 0x00002182 #define A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__MASK 0x000000ff @@ -4569,12 +4887,19 @@ static inline uint32_t A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(enum a5xx_color_fmt va { return ((val) << A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__SHIFT) & A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__MASK; } +#define A5XX_GRAS_2D_DST_INFO_TILE_MODE__MASK 0x00000300 +#define A5XX_GRAS_2D_DST_INFO_TILE_MODE__SHIFT 8 +static inline uint32_t A5XX_GRAS_2D_DST_INFO_TILE_MODE(enum a5xx_tile_mode val) +{ + return ((val) << A5XX_GRAS_2D_DST_INFO_TILE_MODE__SHIFT) & A5XX_GRAS_2D_DST_INFO_TILE_MODE__MASK; +} #define A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__MASK 0x00000c00 #define A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__SHIFT 10 static inline uint32_t A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val) { return ((val) << A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__SHIFT) & A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__MASK; } +#define A5XX_GRAS_2D_DST_INFO_FLAGS 0x00001000 #define REG_A5XX_UNKNOWN_2100 0x00002100 @@ -4698,6 +5023,12 @@ static inline uint32_t A5XX_TEX_CONST_0_MIPLVLS(uint32_t val) { return ((val) << A5XX_TEX_CONST_0_MIPLVLS__SHIFT) & A5XX_TEX_CONST_0_MIPLVLS__MASK; } +#define A5XX_TEX_CONST_0_SAMPLES__MASK 0x00300000 +#define A5XX_TEX_CONST_0_SAMPLES__SHIFT 20 +static inline uint32_t A5XX_TEX_CONST_0_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A5XX_TEX_CONST_0_SAMPLES__SHIFT) & A5XX_TEX_CONST_0_SAMPLES__MASK; +} #define A5XX_TEX_CONST_0_FMT__MASK 0x3fc00000 #define A5XX_TEX_CONST_0_FMT__SHIFT 22 static inline uint32_t A5XX_TEX_CONST_0_FMT(enum a5xx_tex_fmt val) @@ -4788,5 +5119,81 @@ static inline uint32_t A5XX_TEX_CONST_5_DEPTH(uint32_t val) #define REG_A5XX_TEX_CONST_11 0x0000000b +#define REG_A5XX_SSBO_0_0 0x00000000 +#define A5XX_SSBO_0_0_BASE_LO__MASK 0xffffffe0 +#define A5XX_SSBO_0_0_BASE_LO__SHIFT 5 +static inline uint32_t A5XX_SSBO_0_0_BASE_LO(uint32_t val) +{ + return ((val >> 5) << A5XX_SSBO_0_0_BASE_LO__SHIFT) & A5XX_SSBO_0_0_BASE_LO__MASK; +} + +#define REG_A5XX_SSBO_0_1 0x00000001 +#define A5XX_SSBO_0_1_PITCH__MASK 0x003fffff +#define A5XX_SSBO_0_1_PITCH__SHIFT 0 +static inline uint32_t A5XX_SSBO_0_1_PITCH(uint32_t val) +{ + return ((val) << A5XX_SSBO_0_1_PITCH__SHIFT) & A5XX_SSBO_0_1_PITCH__MASK; +} + +#define REG_A5XX_SSBO_0_2 0x00000002 +#define A5XX_SSBO_0_2_ARRAY_PITCH__MASK 0x03fff000 +#define A5XX_SSBO_0_2_ARRAY_PITCH__SHIFT 12 +static inline uint32_t A5XX_SSBO_0_2_ARRAY_PITCH(uint32_t val) +{ + return ((val >> 12) << A5XX_SSBO_0_2_ARRAY_PITCH__SHIFT) & A5XX_SSBO_0_2_ARRAY_PITCH__MASK; +} + +#define REG_A5XX_SSBO_0_3 0x00000003 +#define A5XX_SSBO_0_3_CPP__MASK 0x0000003f +#define A5XX_SSBO_0_3_CPP__SHIFT 0 +static inline uint32_t A5XX_SSBO_0_3_CPP(uint32_t val) +{ + return ((val) << A5XX_SSBO_0_3_CPP__SHIFT) & A5XX_SSBO_0_3_CPP__MASK; +} + +#define REG_A5XX_SSBO_1_0 0x00000000 +#define A5XX_SSBO_1_0_FMT__MASK 0x0000ff00 +#define A5XX_SSBO_1_0_FMT__SHIFT 8 +static inline uint32_t A5XX_SSBO_1_0_FMT(enum a5xx_tex_fmt val) +{ + return ((val) << A5XX_SSBO_1_0_FMT__SHIFT) & A5XX_SSBO_1_0_FMT__MASK; +} +#define A5XX_SSBO_1_0_WIDTH__MASK 0xffff0000 +#define A5XX_SSBO_1_0_WIDTH__SHIFT 16 +static inline uint32_t A5XX_SSBO_1_0_WIDTH(uint32_t val) +{ + return ((val) << A5XX_SSBO_1_0_WIDTH__SHIFT) & A5XX_SSBO_1_0_WIDTH__MASK; +} + +#define REG_A5XX_SSBO_1_1 0x00000001 +#define A5XX_SSBO_1_1_HEIGHT__MASK 0x0000ffff +#define A5XX_SSBO_1_1_HEIGHT__SHIFT 0 +static inline uint32_t A5XX_SSBO_1_1_HEIGHT(uint32_t val) +{ + return ((val) << A5XX_SSBO_1_1_HEIGHT__SHIFT) & A5XX_SSBO_1_1_HEIGHT__MASK; +} +#define A5XX_SSBO_1_1_DEPTH__MASK 0xffff0000 +#define A5XX_SSBO_1_1_DEPTH__SHIFT 16 +static inline uint32_t A5XX_SSBO_1_1_DEPTH(uint32_t val) +{ + return ((val) << A5XX_SSBO_1_1_DEPTH__SHIFT) & A5XX_SSBO_1_1_DEPTH__MASK; +} + +#define REG_A5XX_SSBO_2_0 0x00000000 +#define A5XX_SSBO_2_0_BASE_LO__MASK 0xffffffff +#define A5XX_SSBO_2_0_BASE_LO__SHIFT 0 +static inline uint32_t A5XX_SSBO_2_0_BASE_LO(uint32_t val) +{ + return ((val) << A5XX_SSBO_2_0_BASE_LO__SHIFT) & A5XX_SSBO_2_0_BASE_LO__MASK; +} + +#define REG_A5XX_SSBO_2_1 0x00000001 +#define A5XX_SSBO_2_1_BASE_HI__MASK 0xffffffff +#define A5XX_SSBO_2_1_BASE_HI__SHIFT 0 +static inline uint32_t A5XX_SSBO_2_1_BASE_HI(uint32_t val) +{ + return ((val) << A5XX_SSBO_2_1_BASE_HI__SHIFT) & A5XX_SSBO_2_1_BASE_HI__MASK; +} + #endif /* A5XX_XML */ diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index bd84f71d27d8..ab1d9308c311 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -11,6 +11,7 @@ * */ +#include <linux/kernel.h> #include <linux/types.h> #include <linux/cpumask.h> #include <linux/qcom_scm.h> @@ -20,6 +21,7 @@ #include <linux/pm_opp.h> #include <linux/nvmem-consumer.h> #include <linux/iopoll.h> +#include <linux/slab.h> #include "msm_gem.h" #include "msm_mmu.h" #include "a5xx_gpu.h" @@ -92,12 +94,13 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname) ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID, mem_region, mem_phys, mem_size, NULL); } else { - char newname[strlen("qcom/") + strlen(fwname) + 1]; + char *newname; - sprintf(newname, "qcom/%s", fwname); + newname = kasprintf(GFP_KERNEL, "qcom/%s", fwname); ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID, mem_region, mem_phys, mem_size, NULL); + kfree(newname); } if (ret) goto out; diff --git a/drivers/gpu/drm/msm/adreno/a6xx.xml.h b/drivers/gpu/drm/msm/adreno/a6xx.xml.h new file mode 100644 index 000000000000..87eab51f7000 --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a6xx.xml.h @@ -0,0 +1,4562 @@ +#ifndef A6XX_XML +#define A6XX_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://github.com/freedreno/envytools/ +git clone https://github.com/freedreno/envytools.git + +The rules-ng-ng source files this header was generated from are: +- /home/robclark/src/envytools/rnndb/adreno.xml ( 501 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml ( 36805 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml ( 13634 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml ( 42393 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml ( 147240 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml ( 101627 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-07-03 19:37:13) + +Copyright (C) 2013-2018 by the following authors: +- Rob Clark <robdclark@gmail.com> (robclark) +- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +enum a6xx_color_fmt { + RB6_A8_UNORM = 2, + RB6_R8_UNORM = 3, + RB6_R8_SNORM = 4, + RB6_R8_UINT = 5, + RB6_R8_SINT = 6, + RB6_R4G4B4A4_UNORM = 8, + RB6_R5G5B5A1_UNORM = 10, + RB6_R5G6B5_UNORM = 14, + RB6_R8G8_UNORM = 15, + RB6_R8G8_SNORM = 16, + RB6_R8G8_UINT = 17, + RB6_R8G8_SINT = 18, + RB6_R16_UNORM = 21, + RB6_R16_SNORM = 22, + RB6_R16_FLOAT = 23, + RB6_R16_UINT = 24, + RB6_R16_SINT = 25, + RB6_R8G8B8A8_UNORM = 48, + RB6_R8G8B8_UNORM = 49, + RB6_R8G8B8A8_SNORM = 50, + RB6_R8G8B8A8_UINT = 51, + RB6_R8G8B8A8_SINT = 52, + RB6_R10G10B10A2_UNORM = 55, + RB6_R10G10B10A2_UINT = 58, + RB6_R11G11B10_FLOAT = 66, + RB6_R16G16_UNORM = 67, + RB6_R16G16_SNORM = 68, + RB6_R16G16_FLOAT = 69, + RB6_R16G16_UINT = 70, + RB6_R16G16_SINT = 71, + RB6_R32_FLOAT = 74, + RB6_R32_UINT = 75, + RB6_R32_SINT = 76, + RB6_R16G16B16A16_UNORM = 96, + RB6_R16G16B16A16_SNORM = 97, + RB6_R16G16B16A16_FLOAT = 98, + RB6_R16G16B16A16_UINT = 99, + RB6_R16G16B16A16_SINT = 100, + RB6_R32G32_FLOAT = 103, + RB6_R32G32_UINT = 104, + RB6_R32G32_SINT = 105, + RB6_R32G32B32A32_FLOAT = 130, + RB6_R32G32B32A32_UINT = 131, + RB6_R32G32B32A32_SINT = 132, + RB6_X8Z24_UNORM = 160, +}; + +enum a6xx_tile_mode { + TILE6_LINEAR = 0, + TILE6_2 = 2, + TILE6_3 = 3, +}; + +enum a6xx_vtx_fmt { + VFMT6_8_UNORM = 3, + VFMT6_8_SNORM = 4, + VFMT6_8_UINT = 5, + VFMT6_8_SINT = 6, + VFMT6_8_8_UNORM = 15, + VFMT6_8_8_SNORM = 16, + VFMT6_8_8_UINT = 17, + VFMT6_8_8_SINT = 18, + VFMT6_16_UNORM = 21, + VFMT6_16_SNORM = 22, + VFMT6_16_FLOAT = 23, + VFMT6_16_UINT = 24, + VFMT6_16_SINT = 25, + VFMT6_8_8_8_UNORM = 33, + VFMT6_8_8_8_SNORM = 34, + VFMT6_8_8_8_UINT = 35, + VFMT6_8_8_8_SINT = 36, + VFMT6_8_8_8_8_UNORM = 48, + VFMT6_8_8_8_8_SNORM = 50, + VFMT6_8_8_8_8_UINT = 51, + VFMT6_8_8_8_8_SINT = 52, + VFMT6_10_10_10_2_UNORM = 54, + VFMT6_10_10_10_2_SNORM = 57, + VFMT6_10_10_10_2_UINT = 58, + VFMT6_10_10_10_2_SINT = 59, + VFMT6_11_11_10_FLOAT = 66, + VFMT6_16_16_UNORM = 67, + VFMT6_16_16_SNORM = 68, + VFMT6_16_16_FLOAT = 69, + VFMT6_16_16_UINT = 70, + VFMT6_16_16_SINT = 71, + VFMT6_32_UNORM = 72, + VFMT6_32_SNORM = 73, + VFMT6_32_FLOAT = 74, + VFMT6_32_UINT = 75, + VFMT6_32_SINT = 76, + VFMT6_32_FIXED = 77, + VFMT6_16_16_16_UNORM = 88, + VFMT6_16_16_16_SNORM = 89, + VFMT6_16_16_16_FLOAT = 90, + VFMT6_16_16_16_UINT = 91, + VFMT6_16_16_16_SINT = 92, + VFMT6_16_16_16_16_UNORM = 96, + VFMT6_16_16_16_16_SNORM = 97, + VFMT6_16_16_16_16_FLOAT = 98, + VFMT6_16_16_16_16_UINT = 99, + VFMT6_16_16_16_16_SINT = 100, + VFMT6_32_32_UNORM = 101, + VFMT6_32_32_SNORM = 102, + VFMT6_32_32_FLOAT = 103, + VFMT6_32_32_UINT = 104, + VFMT6_32_32_SINT = 105, + VFMT6_32_32_FIXED = 106, + VFMT6_32_32_32_UNORM = 112, + VFMT6_32_32_32_SNORM = 113, + VFMT6_32_32_32_UINT = 114, + VFMT6_32_32_32_SINT = 115, + VFMT6_32_32_32_FLOAT = 116, + VFMT6_32_32_32_FIXED = 117, + VFMT6_32_32_32_32_UNORM = 128, + VFMT6_32_32_32_32_SNORM = 129, + VFMT6_32_32_32_32_FLOAT = 130, + VFMT6_32_32_32_32_UINT = 131, + VFMT6_32_32_32_32_SINT = 132, + VFMT6_32_32_32_32_FIXED = 133, +}; + +enum a6xx_tex_fmt { + TFMT6_A8_UNORM = 2, + TFMT6_8_UNORM = 3, + TFMT6_8_SNORM = 4, + TFMT6_8_UINT = 5, + TFMT6_8_SINT = 6, + TFMT6_4_4_4_4_UNORM = 8, + TFMT6_5_5_5_1_UNORM = 10, + TFMT6_5_6_5_UNORM = 14, + TFMT6_8_8_UNORM = 15, + TFMT6_8_8_SNORM = 16, + TFMT6_8_8_UINT = 17, + TFMT6_8_8_SINT = 18, + TFMT6_L8_A8_UNORM = 19, + TFMT6_16_UNORM = 21, + TFMT6_16_SNORM = 22, + TFMT6_16_FLOAT = 23, + TFMT6_16_UINT = 24, + TFMT6_16_SINT = 25, + TFMT6_8_8_8_8_UNORM = 48, + TFMT6_8_8_8_UNORM = 49, + TFMT6_8_8_8_8_SNORM = 50, + TFMT6_8_8_8_8_UINT = 51, + TFMT6_8_8_8_8_SINT = 52, + TFMT6_9_9_9_E5_FLOAT = 53, + TFMT6_10_10_10_2_UNORM = 54, + TFMT6_10_10_10_2_UINT = 58, + TFMT6_11_11_10_FLOAT = 66, + TFMT6_16_16_UNORM = 67, + TFMT6_16_16_SNORM = 68, + TFMT6_16_16_FLOAT = 69, + TFMT6_16_16_UINT = 70, + TFMT6_16_16_SINT = 71, + TFMT6_32_FLOAT = 74, + TFMT6_32_UINT = 75, + TFMT6_32_SINT = 76, + TFMT6_16_16_16_16_UNORM = 96, + TFMT6_16_16_16_16_SNORM = 97, + TFMT6_16_16_16_16_FLOAT = 98, + TFMT6_16_16_16_16_UINT = 99, + TFMT6_16_16_16_16_SINT = 100, + TFMT6_32_32_FLOAT = 103, + TFMT6_32_32_UINT = 104, + TFMT6_32_32_SINT = 105, + TFMT6_32_32_32_UINT = 114, + TFMT6_32_32_32_SINT = 115, + TFMT6_32_32_32_FLOAT = 116, + TFMT6_32_32_32_32_FLOAT = 130, + TFMT6_32_32_32_32_UINT = 131, + TFMT6_32_32_32_32_SINT = 132, + TFMT6_X8Z24_UNORM = 160, + TFMT6_ETC2_RG11_UNORM = 171, + TFMT6_ETC2_RG11_SNORM = 172, + TFMT6_ETC2_R11_UNORM = 173, + TFMT6_ETC2_R11_SNORM = 174, + TFMT6_ETC1 = 175, + TFMT6_ETC2_RGB8 = 176, + TFMT6_ETC2_RGBA8 = 177, + TFMT6_ETC2_RGB8A1 = 178, + TFMT6_DXT1 = 179, + TFMT6_DXT3 = 180, + TFMT6_DXT5 = 181, + TFMT6_RGTC1_UNORM = 183, + TFMT6_RGTC1_SNORM = 184, + TFMT6_RGTC2_UNORM = 187, + TFMT6_RGTC2_SNORM = 188, + TFMT6_BPTC_UFLOAT = 190, + TFMT6_BPTC_FLOAT = 191, + TFMT6_BPTC = 192, + TFMT6_ASTC_4x4 = 193, + TFMT6_ASTC_5x4 = 194, + TFMT6_ASTC_5x5 = 195, + TFMT6_ASTC_6x5 = 196, + TFMT6_ASTC_6x6 = 197, + TFMT6_ASTC_8x5 = 198, + TFMT6_ASTC_8x6 = 199, + TFMT6_ASTC_8x8 = 200, + TFMT6_ASTC_10x5 = 201, + TFMT6_ASTC_10x6 = 202, + TFMT6_ASTC_10x8 = 203, + TFMT6_ASTC_10x10 = 204, + TFMT6_ASTC_12x10 = 205, + TFMT6_ASTC_12x12 = 206, +}; + +enum a6xx_tex_fetchsize { + TFETCH6_1_BYTE = 0, + TFETCH6_2_BYTE = 1, + TFETCH6_4_BYTE = 2, + TFETCH6_8_BYTE = 3, + TFETCH6_16_BYTE = 4, +}; + +enum a6xx_depth_format { + DEPTH6_NONE = 0, + DEPTH6_16 = 1, + DEPTH6_24_8 = 2, + DEPTH6_32 = 4, +}; + +enum a6xx_cp_perfcounter_select { + PERF_CP_ALWAYS_COUNT = 0, +}; + +enum a6xx_tex_filter { + A6XX_TEX_NEAREST = 0, + A6XX_TEX_LINEAR = 1, + A6XX_TEX_ANISO = 2, +}; + +enum a6xx_tex_clamp { + A6XX_TEX_REPEAT = 0, + A6XX_TEX_CLAMP_TO_EDGE = 1, + A6XX_TEX_MIRROR_REPEAT = 2, + A6XX_TEX_CLAMP_TO_BORDER = 3, + A6XX_TEX_MIRROR_CLAMP = 4, +}; + +enum a6xx_tex_aniso { + A6XX_TEX_ANISO_1 = 0, + A6XX_TEX_ANISO_2 = 1, + A6XX_TEX_ANISO_4 = 2, + A6XX_TEX_ANISO_8 = 3, + A6XX_TEX_ANISO_16 = 4, +}; + +enum a6xx_tex_swiz { + A6XX_TEX_X = 0, + A6XX_TEX_Y = 1, + A6XX_TEX_Z = 2, + A6XX_TEX_W = 3, + A6XX_TEX_ZERO = 4, + A6XX_TEX_ONE = 5, +}; + +enum a6xx_tex_type { + A6XX_TEX_1D = 0, + A6XX_TEX_2D = 1, + A6XX_TEX_CUBE = 2, + A6XX_TEX_3D = 3, +}; + +#define A6XX_RBBM_INT_0_MASK_RBBM_GPU_IDLE 0x00000001 +#define A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR 0x00000002 +#define A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW 0x00000040 +#define A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR 0x00000080 +#define A6XX_RBBM_INT_0_MASK_CP_SW 0x00000100 +#define A6XX_RBBM_INT_0_MASK_CP_HW_ERROR 0x00000200 +#define A6XX_RBBM_INT_0_MASK_CP_CCU_FLUSH_DEPTH_TS 0x00000400 +#define A6XX_RBBM_INT_0_MASK_CP_CCU_FLUSH_COLOR_TS 0x00000800 +#define A6XX_RBBM_INT_0_MASK_CP_CCU_RESOLVE_TS 0x00001000 +#define A6XX_RBBM_INT_0_MASK_CP_IB2 0x00002000 +#define A6XX_RBBM_INT_0_MASK_CP_IB1 0x00004000 +#define A6XX_RBBM_INT_0_MASK_CP_RB 0x00008000 +#define A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS 0x00020000 +#define A6XX_RBBM_INT_0_MASK_CP_WT_DONE_TS 0x00040000 +#define A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS 0x00100000 +#define A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW 0x00400000 +#define A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT 0x00800000 +#define A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS 0x01000000 +#define A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR 0x02000000 +#define A6XX_RBBM_INT_0_MASK_DEBBUS_INTR_0 0x04000000 +#define A6XX_RBBM_INT_0_MASK_DEBBUS_INTR_1 0x08000000 +#define A6XX_RBBM_INT_0_MASK_ISDB_CPU_IRQ 0x40000000 +#define A6XX_RBBM_INT_0_MASK_ISDB_UNDER_DEBUG 0x80000000 +#define A6XX_CP_INT_CP_OPCODE_ERROR 0x00000001 +#define A6XX_CP_INT_CP_UCODE_ERROR 0x00000002 +#define A6XX_CP_INT_CP_HW_FAULT_ERROR 0x00000004 +#define A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR 0x00000010 +#define A6XX_CP_INT_CP_AHB_ERROR 0x00000020 +#define A6XX_CP_INT_CP_VSD_PARITY_ERROR 0x00000040 +#define A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR 0x00000080 +#define REG_A6XX_CP_RB_BASE 0x00000800 + +#define REG_A6XX_CP_RB_BASE_HI 0x00000801 + +#define REG_A6XX_CP_RB_CNTL 0x00000802 + +#define REG_A6XX_CP_RB_RPTR_ADDR_LO 0x00000804 + +#define REG_A6XX_CP_RB_RPTR_ADDR_HI 0x00000805 + +#define REG_A6XX_CP_RB_RPTR 0x00000806 + +#define REG_A6XX_CP_RB_WPTR 0x00000807 + +#define REG_A6XX_CP_SQE_CNTL 0x00000808 + +#define REG_A6XX_CP_HW_FAULT 0x00000821 + +#define REG_A6XX_CP_INTERRUPT_STATUS 0x00000823 + +#define REG_A6XX_CP_PROTECT_STATUS 0x00000824 + +#define REG_A6XX_CP_SQE_INSTR_BASE_LO 0x00000830 + +#define REG_A6XX_CP_SQE_INSTR_BASE_HI 0x00000831 + +#define REG_A6XX_CP_MISC_CNTL 0x00000840 + +#define REG_A6XX_CP_ROQ_THRESHOLDS_1 0x000008c1 + +#define REG_A6XX_CP_ROQ_THRESHOLDS_2 0x000008c2 + +#define REG_A6XX_CP_MEM_POOL_SIZE 0x000008c3 + +#define REG_A6XX_CP_CHICKEN_DBG 0x00000841 + +#define REG_A6XX_CP_ADDR_MODE_CNTL 0x00000842 + +#define REG_A6XX_CP_DBG_ECO_CNTL 0x00000843 + +#define REG_A6XX_CP_PROTECT_CNTL 0x0000084f + +static inline uint32_t REG_A6XX_CP_SCRATCH(uint32_t i0) { return 0x00000883 + 0x1*i0; } + +static inline uint32_t REG_A6XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000883 + 0x1*i0; } + +static inline uint32_t REG_A6XX_CP_PROTECT(uint32_t i0) { return 0x00000850 + 0x1*i0; } + +static inline uint32_t REG_A6XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000850 + 0x1*i0; } +#define A6XX_CP_PROTECT_REG_BASE_ADDR__MASK 0x0003ffff +#define A6XX_CP_PROTECT_REG_BASE_ADDR__SHIFT 0 +static inline uint32_t A6XX_CP_PROTECT_REG_BASE_ADDR(uint32_t val) +{ + return ((val) << A6XX_CP_PROTECT_REG_BASE_ADDR__SHIFT) & A6XX_CP_PROTECT_REG_BASE_ADDR__MASK; +} +#define A6XX_CP_PROTECT_REG_MASK_LEN__MASK 0x7ffc0000 +#define A6XX_CP_PROTECT_REG_MASK_LEN__SHIFT 18 +static inline uint32_t A6XX_CP_PROTECT_REG_MASK_LEN(uint32_t val) +{ + return ((val) << A6XX_CP_PROTECT_REG_MASK_LEN__SHIFT) & A6XX_CP_PROTECT_REG_MASK_LEN__MASK; +} +#define A6XX_CP_PROTECT_REG_READ 0x80000000 + +#define REG_A6XX_CP_CONTEXT_SWITCH_CNTL 0x000008a0 + +#define REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO 0x000008a1 + +#define REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI 0x000008a2 + +#define REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO 0x000008a3 + +#define REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI 0x000008a4 + +#define REG_A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO 0x000008a5 + +#define REG_A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI 0x000008a6 + +#define REG_A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO 0x000008a7 + +#define REG_A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI 0x000008a8 + +#define REG_A6XX_CP_PERFCTR_CP_SEL_0 0x000008d0 + +#define REG_A6XX_CP_PERFCTR_CP_SEL_1 0x000008d1 + +#define REG_A6XX_CP_PERFCTR_CP_SEL_2 0x000008d2 + +#define REG_A6XX_CP_PERFCTR_CP_SEL_3 0x000008d3 + +#define REG_A6XX_CP_PERFCTR_CP_SEL_4 0x000008d4 + +#define REG_A6XX_CP_PERFCTR_CP_SEL_5 0x000008d5 + +#define REG_A6XX_CP_PERFCTR_CP_SEL_6 0x000008d6 + +#define REG_A6XX_CP_PERFCTR_CP_SEL_7 0x000008d7 + +#define REG_A6XX_CP_PERFCTR_CP_SEL_8 0x000008d8 + +#define REG_A6XX_CP_PERFCTR_CP_SEL_9 0x000008d9 + +#define REG_A6XX_CP_PERFCTR_CP_SEL_10 0x000008da + +#define REG_A6XX_CP_PERFCTR_CP_SEL_11 0x000008db + +#define REG_A6XX_CP_PERFCTR_CP_SEL_12 0x000008dc + +#define REG_A6XX_CP_PERFCTR_CP_SEL_13 0x000008dd + +#define REG_A6XX_CP_CRASH_SCRIPT_BASE_LO 0x00000900 + +#define REG_A6XX_CP_CRASH_SCRIPT_BASE_HI 0x00000901 + +#define REG_A6XX_CP_CRASH_DUMP_CNTL 0x00000902 + +#define REG_A6XX_CP_CRASH_DUMP_STATUS 0x00000903 + +#define REG_A6XX_CP_SQE_STAT_ADDR 0x00000908 + +#define REG_A6XX_CP_SQE_STAT_DATA 0x00000909 + +#define REG_A6XX_CP_DRAW_STATE_ADDR 0x0000090a + +#define REG_A6XX_CP_DRAW_STATE_DATA 0x0000090b + +#define REG_A6XX_CP_ROQ_DBG_ADDR 0x0000090c + +#define REG_A6XX_CP_ROQ_DBG_DATA 0x0000090d + +#define REG_A6XX_CP_MEM_POOL_DBG_ADDR 0x0000090e + +#define REG_A6XX_CP_MEM_POOL_DBG_DATA 0x0000090f + +#define REG_A6XX_CP_SQE_UCODE_DBG_ADDR 0x00000910 + +#define REG_A6XX_CP_SQE_UCODE_DBG_DATA 0x00000911 + +#define REG_A6XX_CP_IB1_BASE 0x00000928 + +#define REG_A6XX_CP_IB1_BASE_HI 0x00000929 + +#define REG_A6XX_CP_IB1_REM_SIZE 0x0000092a + +#define REG_A6XX_CP_IB2_BASE 0x0000092b + +#define REG_A6XX_CP_IB2_BASE_HI 0x0000092c + +#define REG_A6XX_CP_IB2_REM_SIZE 0x0000092d + +#define REG_A6XX_CP_ALWAYS_ON_COUNTER_LO 0x00000980 + +#define REG_A6XX_CP_ALWAYS_ON_COUNTER_HI 0x00000981 + +#define REG_A6XX_CP_AHB_CNTL 0x0000098d + +#define REG_A6XX_CP_APERTURE_CNTL_HOST 0x00000a00 + +#define REG_A6XX_CP_APERTURE_CNTL_CD 0x00000a03 + +#define REG_A6XX_VSC_ADDR_MODE_CNTL 0x00000c01 + +#define REG_A6XX_RBBM_INT_0_STATUS 0x00000201 + +#define REG_A6XX_RBBM_STATUS 0x00000210 +#define A6XX_RBBM_STATUS_GPU_BUSY_IGN_AHB 0x00800000 +#define A6XX_RBBM_STATUS_GPU_BUSY_IGN_AHB_CP 0x00400000 +#define A6XX_RBBM_STATUS_HLSQ_BUSY 0x00200000 +#define A6XX_RBBM_STATUS_VSC_BUSY 0x00100000 +#define A6XX_RBBM_STATUS_TPL1_BUSY 0x00080000 +#define A6XX_RBBM_STATUS_SP_BUSY 0x00040000 +#define A6XX_RBBM_STATUS_UCHE_BUSY 0x00020000 +#define A6XX_RBBM_STATUS_VPC_BUSY 0x00010000 +#define A6XX_RBBM_STATUS_VFD_BUSY 0x00008000 +#define A6XX_RBBM_STATUS_TESS_BUSY 0x00004000 +#define A6XX_RBBM_STATUS_PC_VSD_BUSY 0x00002000 +#define A6XX_RBBM_STATUS_PC_DCALL_BUSY 0x00001000 +#define A6XX_RBBM_STATUS_COM_DCOM_BUSY 0x00000800 +#define A6XX_RBBM_STATUS_LRZ_BUSY 0x00000400 +#define A6XX_RBBM_STATUS_A2D_BUSY 0x00000200 +#define A6XX_RBBM_STATUS_CCU_BUSY 0x00000100 +#define A6XX_RBBM_STATUS_RB_BUSY 0x00000080 +#define A6XX_RBBM_STATUS_RAS_BUSY 0x00000040 +#define A6XX_RBBM_STATUS_TSE_BUSY 0x00000020 +#define A6XX_RBBM_STATUS_VBIF_BUSY 0x00000010 +#define A6XX_RBBM_STATUS_GFX_DBGC_BUSY 0x00000008 +#define A6XX_RBBM_STATUS_CP_BUSY 0x00000004 +#define A6XX_RBBM_STATUS_CP_AHB_BUSY_CP_MASTER 0x00000002 +#define A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER 0x00000001 + +#define REG_A6XX_RBBM_STATUS3 0x00000213 + +#define REG_A6XX_RBBM_VBIF_GX_RESET_STATUS 0x00000215 + +#define REG_A6XX_RBBM_PERFCTR_CP_0_LO 0x00000400 + +#define REG_A6XX_RBBM_PERFCTR_CP_0_HI 0x00000401 + +#define REG_A6XX_RBBM_PERFCTR_CP_1_LO 0x00000402 + +#define REG_A6XX_RBBM_PERFCTR_CP_1_HI 0x00000403 + +#define REG_A6XX_RBBM_PERFCTR_CP_2_LO 0x00000404 + +#define REG_A6XX_RBBM_PERFCTR_CP_2_HI 0x00000405 + +#define REG_A6XX_RBBM_PERFCTR_CP_3_LO 0x00000406 + +#define REG_A6XX_RBBM_PERFCTR_CP_3_HI 0x00000407 + +#define REG_A6XX_RBBM_PERFCTR_CP_4_LO 0x00000408 + +#define REG_A6XX_RBBM_PERFCTR_CP_4_HI 0x00000409 + +#define REG_A6XX_RBBM_PERFCTR_CP_5_LO 0x0000040a + +#define REG_A6XX_RBBM_PERFCTR_CP_5_HI 0x0000040b + +#define REG_A6XX_RBBM_PERFCTR_CP_6_LO 0x0000040c + +#define REG_A6XX_RBBM_PERFCTR_CP_6_HI 0x0000040d + +#define REG_A6XX_RBBM_PERFCTR_CP_7_LO 0x0000040e + +#define REG_A6XX_RBBM_PERFCTR_CP_7_HI 0x0000040f + +#define REG_A6XX_RBBM_PERFCTR_CP_8_LO 0x00000410 + +#define REG_A6XX_RBBM_PERFCTR_CP_8_HI 0x00000411 + +#define REG_A6XX_RBBM_PERFCTR_CP_9_LO 0x00000412 + +#define REG_A6XX_RBBM_PERFCTR_CP_9_HI 0x00000413 + +#define REG_A6XX_RBBM_PERFCTR_CP_10_LO 0x00000414 + +#define REG_A6XX_RBBM_PERFCTR_CP_10_HI 0x00000415 + +#define REG_A6XX_RBBM_PERFCTR_CP_11_LO 0x00000416 + +#define REG_A6XX_RBBM_PERFCTR_CP_11_HI 0x00000417 + +#define REG_A6XX_RBBM_PERFCTR_CP_12_LO 0x00000418 + +#define REG_A6XX_RBBM_PERFCTR_CP_12_HI 0x00000419 + +#define REG_A6XX_RBBM_PERFCTR_CP_13_LO 0x0000041a + +#define REG_A6XX_RBBM_PERFCTR_CP_13_HI 0x0000041b + +#define REG_A6XX_RBBM_PERFCTR_RBBM_0_LO 0x0000041c + +#define REG_A6XX_RBBM_PERFCTR_RBBM_0_HI 0x0000041d + +#define REG_A6XX_RBBM_PERFCTR_RBBM_1_LO 0x0000041e + +#define REG_A6XX_RBBM_PERFCTR_RBBM_1_HI 0x0000041f + +#define REG_A6XX_RBBM_PERFCTR_RBBM_2_LO 0x00000420 + +#define REG_A6XX_RBBM_PERFCTR_RBBM_2_HI 0x00000421 + +#define REG_A6XX_RBBM_PERFCTR_RBBM_3_LO 0x00000422 + +#define REG_A6XX_RBBM_PERFCTR_RBBM_3_HI 0x00000423 + +#define REG_A6XX_RBBM_PERFCTR_PC_0_LO 0x00000424 + +#define REG_A6XX_RBBM_PERFCTR_PC_0_HI 0x00000425 + +#define REG_A6XX_RBBM_PERFCTR_PC_1_LO 0x00000426 + +#define REG_A6XX_RBBM_PERFCTR_PC_1_HI 0x00000427 + +#define REG_A6XX_RBBM_PERFCTR_PC_2_LO 0x00000428 + +#define REG_A6XX_RBBM_PERFCTR_PC_2_HI 0x00000429 + +#define REG_A6XX_RBBM_PERFCTR_PC_3_LO 0x0000042a + +#define REG_A6XX_RBBM_PERFCTR_PC_3_HI 0x0000042b + +#define REG_A6XX_RBBM_PERFCTR_PC_4_LO 0x0000042c + +#define REG_A6XX_RBBM_PERFCTR_PC_4_HI 0x0000042d + +#define REG_A6XX_RBBM_PERFCTR_PC_5_LO 0x0000042e + +#define REG_A6XX_RBBM_PERFCTR_PC_5_HI 0x0000042f + +#define REG_A6XX_RBBM_PERFCTR_PC_6_LO 0x00000430 + +#define REG_A6XX_RBBM_PERFCTR_PC_6_HI 0x00000431 + +#define REG_A6XX_RBBM_PERFCTR_PC_7_LO 0x00000432 + +#define REG_A6XX_RBBM_PERFCTR_PC_7_HI 0x00000433 + +#define REG_A6XX_RBBM_PERFCTR_VFD_0_LO 0x00000434 + +#define REG_A6XX_RBBM_PERFCTR_VFD_0_HI 0x00000435 + +#define REG_A6XX_RBBM_PERFCTR_VFD_1_LO 0x00000436 + +#define REG_A6XX_RBBM_PERFCTR_VFD_1_HI 0x00000437 + +#define REG_A6XX_RBBM_PERFCTR_VFD_2_LO 0x00000438 + +#define REG_A6XX_RBBM_PERFCTR_VFD_2_HI 0x00000439 + +#define REG_A6XX_RBBM_PERFCTR_VFD_3_LO 0x0000043a + +#define REG_A6XX_RBBM_PERFCTR_VFD_3_HI 0x0000043b + +#define REG_A6XX_RBBM_PERFCTR_VFD_4_LO 0x0000043c + +#define REG_A6XX_RBBM_PERFCTR_VFD_4_HI 0x0000043d + +#define REG_A6XX_RBBM_PERFCTR_VFD_5_LO 0x0000043e + +#define REG_A6XX_RBBM_PERFCTR_VFD_5_HI 0x0000043f + +#define REG_A6XX_RBBM_PERFCTR_VFD_6_LO 0x00000440 + +#define REG_A6XX_RBBM_PERFCTR_VFD_6_HI 0x00000441 + +#define REG_A6XX_RBBM_PERFCTR_VFD_7_LO 0x00000442 + +#define REG_A6XX_RBBM_PERFCTR_VFD_7_HI 0x00000443 + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_0_LO 0x00000444 + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_0_HI 0x00000445 + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_1_LO 0x00000446 + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_1_HI 0x00000447 + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_2_LO 0x00000448 + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_2_HI 0x00000449 + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_3_LO 0x0000044a + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_3_HI 0x0000044b + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_4_LO 0x0000044c + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_4_HI 0x0000044d + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_5_LO 0x0000044e + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_5_HI 0x0000044f + +#define REG_A6XX_RBBM_PERFCTR_VPC_0_LO 0x00000450 + +#define REG_A6XX_RBBM_PERFCTR_VPC_0_HI 0x00000451 + +#define REG_A6XX_RBBM_PERFCTR_VPC_1_LO 0x00000452 + +#define REG_A6XX_RBBM_PERFCTR_VPC_1_HI 0x00000453 + +#define REG_A6XX_RBBM_PERFCTR_VPC_2_LO 0x00000454 + +#define REG_A6XX_RBBM_PERFCTR_VPC_2_HI 0x00000455 + +#define REG_A6XX_RBBM_PERFCTR_VPC_3_LO 0x00000456 + +#define REG_A6XX_RBBM_PERFCTR_VPC_3_HI 0x00000457 + +#define REG_A6XX_RBBM_PERFCTR_VPC_4_LO 0x00000458 + +#define REG_A6XX_RBBM_PERFCTR_VPC_4_HI 0x00000459 + +#define REG_A6XX_RBBM_PERFCTR_VPC_5_LO 0x0000045a + +#define REG_A6XX_RBBM_PERFCTR_VPC_5_HI 0x0000045b + +#define REG_A6XX_RBBM_PERFCTR_CCU_0_LO 0x0000045c + +#define REG_A6XX_RBBM_PERFCTR_CCU_0_HI 0x0000045d + +#define REG_A6XX_RBBM_PERFCTR_CCU_1_LO 0x0000045e + +#define REG_A6XX_RBBM_PERFCTR_CCU_1_HI 0x0000045f + +#define REG_A6XX_RBBM_PERFCTR_CCU_2_LO 0x00000460 + +#define REG_A6XX_RBBM_PERFCTR_CCU_2_HI 0x00000461 + +#define REG_A6XX_RBBM_PERFCTR_CCU_3_LO 0x00000462 + +#define REG_A6XX_RBBM_PERFCTR_CCU_3_HI 0x00000463 + +#define REG_A6XX_RBBM_PERFCTR_CCU_4_LO 0x00000464 + +#define REG_A6XX_RBBM_PERFCTR_CCU_4_HI 0x00000465 + +#define REG_A6XX_RBBM_PERFCTR_TSE_0_LO 0x00000466 + +#define REG_A6XX_RBBM_PERFCTR_TSE_0_HI 0x00000467 + +#define REG_A6XX_RBBM_PERFCTR_TSE_1_LO 0x00000468 + +#define REG_A6XX_RBBM_PERFCTR_TSE_1_HI 0x00000469 + +#define REG_A6XX_RBBM_PERFCTR_TSE_2_LO 0x0000046a + +#define REG_A6XX_RBBM_PERFCTR_CCU_4_HI 0x00000465 + +#define REG_A6XX_RBBM_PERFCTR_TSE_0_LO 0x00000466 + +#define REG_A6XX_RBBM_PERFCTR_TSE_0_HI 0x00000467 + +#define REG_A6XX_RBBM_PERFCTR_TSE_1_LO 0x00000468 + +#define REG_A6XX_RBBM_PERFCTR_TSE_1_HI 0x00000469 + +#define REG_A6XX_RBBM_PERFCTR_TSE_2_LO 0x0000046a + +#define REG_A6XX_RBBM_PERFCTR_TSE_2_HI 0x0000046b + +#define REG_A6XX_RBBM_PERFCTR_TSE_3_LO 0x0000046c + +#define REG_A6XX_RBBM_PERFCTR_TSE_3_HI 0x0000046d + +#define REG_A6XX_RBBM_PERFCTR_RAS_0_LO 0x0000046e + +#define REG_A6XX_RBBM_PERFCTR_RAS_0_HI 0x0000046f + +#define REG_A6XX_RBBM_PERFCTR_RAS_1_LO 0x00000470 + +#define REG_A6XX_RBBM_PERFCTR_RAS_1_HI 0x00000471 + +#define REG_A6XX_RBBM_PERFCTR_RAS_2_LO 0x00000472 + +#define REG_A6XX_RBBM_PERFCTR_RAS_2_HI 0x00000473 + +#define REG_A6XX_RBBM_PERFCTR_RAS_3_LO 0x00000474 + +#define REG_A6XX_RBBM_PERFCTR_RAS_3_HI 0x00000475 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_0_LO 0x00000476 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_0_HI 0x00000477 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_1_LO 0x00000478 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_1_HI 0x00000479 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_2_LO 0x0000047a + +#define REG_A6XX_RBBM_PERFCTR_UCHE_2_HI 0x0000047b + +#define REG_A6XX_RBBM_PERFCTR_UCHE_3_LO 0x0000047c + +#define REG_A6XX_RBBM_PERFCTR_UCHE_3_HI 0x0000047d + +#define REG_A6XX_RBBM_PERFCTR_UCHE_4_LO 0x0000047e + +#define REG_A6XX_RBBM_PERFCTR_UCHE_4_HI 0x0000047f + +#define REG_A6XX_RBBM_PERFCTR_UCHE_5_LO 0x00000480 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_5_HI 0x00000481 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_6_LO 0x00000482 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_6_HI 0x00000483 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_7_LO 0x00000484 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_7_HI 0x00000485 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_8_LO 0x00000486 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_8_HI 0x00000487 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_9_LO 0x00000488 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_9_HI 0x00000489 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_10_LO 0x0000048a + +#define REG_A6XX_RBBM_PERFCTR_UCHE_10_HI 0x0000048b + +#define REG_A6XX_RBBM_PERFCTR_UCHE_11_LO 0x0000048c + +#define REG_A6XX_RBBM_PERFCTR_UCHE_11_HI 0x0000048d + +#define REG_A6XX_RBBM_PERFCTR_TP_0_LO 0x0000048e + +#define REG_A6XX_RBBM_PERFCTR_TP_0_HI 0x0000048f + +#define REG_A6XX_RBBM_PERFCTR_TP_1_LO 0x00000490 + +#define REG_A6XX_RBBM_PERFCTR_TP_1_HI 0x00000491 + +#define REG_A6XX_RBBM_PERFCTR_TP_2_LO 0x00000492 + +#define REG_A6XX_RBBM_PERFCTR_TP_2_HI 0x00000493 + +#define REG_A6XX_RBBM_PERFCTR_TP_3_LO 0x00000494 + +#define REG_A6XX_RBBM_PERFCTR_TP_3_HI 0x00000495 + +#define REG_A6XX_RBBM_PERFCTR_TP_4_LO 0x00000496 + +#define REG_A6XX_RBBM_PERFCTR_TP_4_HI 0x00000497 + +#define REG_A6XX_RBBM_PERFCTR_TP_5_LO 0x00000498 + +#define REG_A6XX_RBBM_PERFCTR_TP_5_HI 0x00000499 + +#define REG_A6XX_RBBM_PERFCTR_TP_6_LO 0x0000049a + +#define REG_A6XX_RBBM_PERFCTR_TP_6_HI 0x0000049b + +#define REG_A6XX_RBBM_PERFCTR_TP_7_LO 0x0000049c + +#define REG_A6XX_RBBM_PERFCTR_TP_7_HI 0x0000049d + +#define REG_A6XX_RBBM_PERFCTR_TP_8_LO 0x0000049e + +#define REG_A6XX_RBBM_PERFCTR_TP_8_HI 0x0000049f + +#define REG_A6XX_RBBM_PERFCTR_TP_9_LO 0x000004a0 + +#define REG_A6XX_RBBM_PERFCTR_TP_9_HI 0x000004a1 + +#define REG_A6XX_RBBM_PERFCTR_TP_10_LO 0x000004a2 + +#define REG_A6XX_RBBM_PERFCTR_TP_10_HI 0x000004a3 + +#define REG_A6XX_RBBM_PERFCTR_TP_11_LO 0x000004a4 + +#define REG_A6XX_RBBM_PERFCTR_TP_11_HI 0x000004a5 + +#define REG_A6XX_RBBM_PERFCTR_SP_0_LO 0x000004a6 + +#define REG_A6XX_RBBM_PERFCTR_SP_0_HI 0x000004a7 + +#define REG_A6XX_RBBM_PERFCTR_SP_1_LO 0x000004a8 + +#define REG_A6XX_RBBM_PERFCTR_SP_1_HI 0x000004a9 + +#define REG_A6XX_RBBM_PERFCTR_SP_2_LO 0x000004aa + +#define REG_A6XX_RBBM_PERFCTR_SP_2_HI 0x000004ab + +#define REG_A6XX_RBBM_PERFCTR_SP_3_LO 0x000004ac + +#define REG_A6XX_RBBM_PERFCTR_SP_3_HI 0x000004ad + +#define REG_A6XX_RBBM_PERFCTR_SP_4_LO 0x000004ae + +#define REG_A6XX_RBBM_PERFCTR_SP_4_HI 0x000004af + +#define REG_A6XX_RBBM_PERFCTR_SP_5_LO 0x000004b0 + +#define REG_A6XX_RBBM_PERFCTR_SP_5_HI 0x000004b1 + +#define REG_A6XX_RBBM_PERFCTR_SP_6_LO 0x000004b2 + +#define REG_A6XX_RBBM_PERFCTR_SP_6_HI 0x000004b3 + +#define REG_A6XX_RBBM_PERFCTR_SP_7_LO 0x000004b4 + +#define REG_A6XX_RBBM_PERFCTR_SP_7_HI 0x000004b5 + +#define REG_A6XX_RBBM_PERFCTR_SP_8_LO 0x000004b6 + +#define REG_A6XX_RBBM_PERFCTR_SP_8_HI 0x000004b7 + +#define REG_A6XX_RBBM_PERFCTR_SP_9_LO 0x000004b8 + +#define REG_A6XX_RBBM_PERFCTR_SP_9_HI 0x000004b9 + +#define REG_A6XX_RBBM_PERFCTR_SP_10_LO 0x000004ba + +#define REG_A6XX_RBBM_PERFCTR_SP_10_HI 0x000004bb + +#define REG_A6XX_RBBM_PERFCTR_SP_11_LO 0x000004bc + +#define REG_A6XX_RBBM_PERFCTR_SP_11_HI 0x000004bd + +#define REG_A6XX_RBBM_PERFCTR_SP_12_LO 0x000004be + +#define REG_A6XX_RBBM_PERFCTR_SP_12_HI 0x000004bf + +#define REG_A6XX_RBBM_PERFCTR_SP_13_LO 0x000004c0 + +#define REG_A6XX_RBBM_PERFCTR_SP_13_HI 0x000004c1 + +#define REG_A6XX_RBBM_PERFCTR_SP_14_LO 0x000004c2 + +#define REG_A6XX_RBBM_PERFCTR_SP_14_HI 0x000004c3 + +#define REG_A6XX_RBBM_PERFCTR_SP_15_LO 0x000004c4 + +#define REG_A6XX_RBBM_PERFCTR_SP_15_HI 0x000004c5 + +#define REG_A6XX_RBBM_PERFCTR_SP_16_LO 0x000004c6 + +#define REG_A6XX_RBBM_PERFCTR_SP_16_HI 0x000004c7 + +#define REG_A6XX_RBBM_PERFCTR_SP_17_LO 0x000004c8 + +#define REG_A6XX_RBBM_PERFCTR_SP_17_HI 0x000004c9 + +#define REG_A6XX_RBBM_PERFCTR_SP_18_LO 0x000004ca + +#define REG_A6XX_RBBM_PERFCTR_SP_18_HI 0x000004cb + +#define REG_A6XX_RBBM_PERFCTR_SP_19_LO 0x000004cc + +#define REG_A6XX_RBBM_PERFCTR_SP_19_HI 0x000004cd + +#define REG_A6XX_RBBM_PERFCTR_SP_20_LO 0x000004ce + +#define REG_A6XX_RBBM_PERFCTR_SP_20_HI 0x000004cf + +#define REG_A6XX_RBBM_PERFCTR_SP_21_LO 0x000004d0 + +#define REG_A6XX_RBBM_PERFCTR_SP_21_HI 0x000004d1 + +#define REG_A6XX_RBBM_PERFCTR_SP_22_LO 0x000004d2 + +#define REG_A6XX_RBBM_PERFCTR_SP_22_HI 0x000004d3 + +#define REG_A6XX_RBBM_PERFCTR_SP_23_LO 0x000004d4 + +#define REG_A6XX_RBBM_PERFCTR_SP_23_HI 0x000004d5 + +#define REG_A6XX_RBBM_PERFCTR_RB_0_LO 0x000004d6 + +#define REG_A6XX_RBBM_PERFCTR_RB_0_HI 0x000004d7 + +#define REG_A6XX_RBBM_PERFCTR_RB_1_LO 0x000004d8 + +#define REG_A6XX_RBBM_PERFCTR_RB_1_HI 0x000004d9 + +#define REG_A6XX_RBBM_PERFCTR_RB_2_LO 0x000004da + +#define REG_A6XX_RBBM_PERFCTR_RB_2_HI 0x000004db + +#define REG_A6XX_RBBM_PERFCTR_RB_3_LO 0x000004dc + +#define REG_A6XX_RBBM_PERFCTR_RB_3_HI 0x000004dd + +#define REG_A6XX_RBBM_PERFCTR_RB_4_LO 0x000004de + +#define REG_A6XX_RBBM_PERFCTR_RB_4_HI 0x000004df + +#define REG_A6XX_RBBM_PERFCTR_RB_5_LO 0x000004e0 + +#define REG_A6XX_RBBM_PERFCTR_RB_5_HI 0x000004e1 + +#define REG_A6XX_RBBM_PERFCTR_RB_6_LO 0x000004e2 + +#define REG_A6XX_RBBM_PERFCTR_RB_6_HI 0x000004e3 + +#define REG_A6XX_RBBM_PERFCTR_RB_7_LO 0x000004e4 + +#define REG_A6XX_RBBM_PERFCTR_RB_7_HI 0x000004e5 + +#define REG_A6XX_RBBM_PERFCTR_VSC_0_LO 0x000004e6 + +#define REG_A6XX_RBBM_PERFCTR_VSC_0_HI 0x000004e7 + +#define REG_A6XX_RBBM_PERFCTR_VSC_1_LO 0x000004e8 + +#define REG_A6XX_RBBM_PERFCTR_VSC_1_HI 0x000004e9 + +#define REG_A6XX_RBBM_PERFCTR_LRZ_0_LO 0x000004ea + +#define REG_A6XX_RBBM_PERFCTR_LRZ_0_HI 0x000004eb + +#define REG_A6XX_RBBM_PERFCTR_LRZ_1_LO 0x000004ec + +#define REG_A6XX_RBBM_PERFCTR_LRZ_1_HI 0x000004ed + +#define REG_A6XX_RBBM_PERFCTR_LRZ_2_LO 0x000004ee + +#define REG_A6XX_RBBM_PERFCTR_LRZ_2_HI 0x000004ef + +#define REG_A6XX_RBBM_PERFCTR_LRZ_3_LO 0x000004f0 + +#define REG_A6XX_RBBM_PERFCTR_LRZ_3_HI 0x000004f1 + +#define REG_A6XX_RBBM_PERFCTR_CMP_0_LO 0x000004f2 + +#define REG_A6XX_RBBM_PERFCTR_CMP_0_HI 0x000004f3 + +#define REG_A6XX_RBBM_PERFCTR_CMP_1_LO 0x000004f4 + +#define REG_A6XX_RBBM_PERFCTR_CMP_1_HI 0x000004f5 + +#define REG_A6XX_RBBM_PERFCTR_CMP_2_LO 0x000004f6 + +#define REG_A6XX_RBBM_PERFCTR_CMP_2_HI 0x000004f7 + +#define REG_A6XX_RBBM_PERFCTR_CMP_3_LO 0x000004f8 + +#define REG_A6XX_RBBM_PERFCTR_CMP_3_HI 0x000004f9 + +#define REG_A6XX_RBBM_PERFCTR_CNTL 0x00000500 + +#define REG_A6XX_RBBM_PERFCTR_LOAD_CMD0 0x00000501 + +#define REG_A6XX_RBBM_PERFCTR_LOAD_CMD1 0x00000502 + +#define REG_A6XX_RBBM_PERFCTR_LOAD_CMD2 0x00000503 + +#define REG_A6XX_RBBM_PERFCTR_LOAD_CMD3 0x00000504 + +#define REG_A6XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x00000505 + +#define REG_A6XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x00000506 + +#define REG_A6XX_RBBM_PERFCTR_RBBM_SEL_0 0x00000507 + +#define REG_A6XX_RBBM_PERFCTR_RBBM_SEL_1 0x00000508 + +#define REG_A6XX_RBBM_PERFCTR_RBBM_SEL_2 0x00000509 + +#define REG_A6XX_RBBM_PERFCTR_RBBM_SEL_3 0x0000050a + +#define REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED 0x0000050b + +#define REG_A6XX_RBBM_ISDB_CNT 0x00000533 + +#define REG_A6XX_RBBM_SECVID_TRUST_CNTL 0x0000f400 + +#define REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO 0x0000f800 + +#define REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI 0x0000f801 + +#define REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE 0x0000f802 + +#define REG_A6XX_RBBM_SECVID_TSB_CNTL 0x0000f803 + +#define REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL 0x0000f810 + +#define REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL 0x00000010 + +#define REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL 0x0000001f + +#define REG_A6XX_RBBM_INT_CLEAR_CMD 0x00000037 + +#define REG_A6XX_RBBM_INT_0_MASK 0x00000038 + +#define REG_A6XX_RBBM_SP_HYST_CNT 0x00000042 + +#define REG_A6XX_RBBM_SW_RESET_CMD 0x00000043 + +#define REG_A6XX_RBBM_RAC_THRESHOLD_CNT 0x00000044 + +#define REG_A6XX_RBBM_BLOCK_SW_RESET_CMD 0x00000045 + +#define REG_A6XX_RBBM_BLOCK_SW_RESET_CMD2 0x00000046 + +#define REG_A6XX_RBBM_CLOCK_CNTL 0x000000ae + +#define REG_A6XX_RBBM_CLOCK_CNTL_SP0 0x000000b0 + +#define REG_A6XX_RBBM_CLOCK_CNTL_SP1 0x000000b1 + +#define REG_A6XX_RBBM_CLOCK_CNTL_SP2 0x000000b2 + +#define REG_A6XX_RBBM_CLOCK_CNTL_SP3 0x000000b3 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_SP0 0x000000b4 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_SP1 0x000000b5 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_SP2 0x000000b6 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_SP3 0x000000b7 + +#define REG_A6XX_RBBM_CLOCK_DELAY_SP0 0x000000b8 + +#define REG_A6XX_RBBM_CLOCK_DELAY_SP1 0x000000b9 + +#define REG_A6XX_RBBM_CLOCK_DELAY_SP2 0x000000ba + +#define REG_A6XX_RBBM_CLOCK_DELAY_SP3 0x000000bb + +#define REG_A6XX_RBBM_CLOCK_HYST_SP0 0x000000bc + +#define REG_A6XX_RBBM_CLOCK_HYST_SP1 0x000000bd + +#define REG_A6XX_RBBM_CLOCK_HYST_SP2 0x000000be + +#define REG_A6XX_RBBM_CLOCK_HYST_SP3 0x000000bf + +#define REG_A6XX_RBBM_CLOCK_CNTL_TP0 0x000000c0 + +#define REG_A6XX_RBBM_CLOCK_CNTL_TP1 0x000000c1 + +#define REG_A6XX_RBBM_CLOCK_CNTL_TP2 0x000000c2 + +#define REG_A6XX_RBBM_CLOCK_CNTL_TP3 0x000000c3 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_TP0 0x000000c4 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_TP1 0x000000c5 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_TP2 0x000000c6 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_TP3 0x000000c7 + +#define REG_A6XX_RBBM_CLOCK_CNTL3_TP0 0x000000c8 + +#define REG_A6XX_RBBM_CLOCK_CNTL3_TP1 0x000000c9 + +#define REG_A6XX_RBBM_CLOCK_CNTL3_TP2 0x000000ca + +#define REG_A6XX_RBBM_CLOCK_CNTL3_TP3 0x000000cb + +#define REG_A6XX_RBBM_CLOCK_CNTL4_TP0 0x000000cc + +#define REG_A6XX_RBBM_CLOCK_CNTL4_TP1 0x000000cd + +#define REG_A6XX_RBBM_CLOCK_CNTL4_TP2 0x000000ce + +#define REG_A6XX_RBBM_CLOCK_CNTL4_TP3 0x000000cf + +#define REG_A6XX_RBBM_CLOCK_DELAY_TP0 0x000000d0 + +#define REG_A6XX_RBBM_CLOCK_DELAY_TP1 0x000000d1 + +#define REG_A6XX_RBBM_CLOCK_DELAY_TP2 0x000000d2 + +#define REG_A6XX_RBBM_CLOCK_DELAY_TP3 0x000000d3 + +#define REG_A6XX_RBBM_CLOCK_DELAY2_TP0 0x000000d4 + +#define REG_A6XX_RBBM_CLOCK_DELAY2_TP1 0x000000d5 + +#define REG_A6XX_RBBM_CLOCK_DELAY2_TP2 0x000000d6 + +#define REG_A6XX_RBBM_CLOCK_DELAY2_TP3 0x000000d7 + +#define REG_A6XX_RBBM_CLOCK_DELAY3_TP0 0x000000d8 + +#define REG_A6XX_RBBM_CLOCK_DELAY3_TP1 0x000000d9 + +#define REG_A6XX_RBBM_CLOCK_DELAY3_TP2 0x000000da + +#define REG_A6XX_RBBM_CLOCK_DELAY3_TP3 0x000000db + +#define REG_A6XX_RBBM_CLOCK_DELAY4_TP0 0x000000dc + +#define REG_A6XX_RBBM_CLOCK_DELAY4_TP1 0x000000dd + +#define REG_A6XX_RBBM_CLOCK_DELAY4_TP2 0x000000de + +#define REG_A6XX_RBBM_CLOCK_DELAY4_TP3 0x000000df + +#define REG_A6XX_RBBM_CLOCK_HYST_TP0 0x000000e0 + +#define REG_A6XX_RBBM_CLOCK_HYST_TP1 0x000000e1 + +#define REG_A6XX_RBBM_CLOCK_HYST_TP2 0x000000e2 + +#define REG_A6XX_RBBM_CLOCK_HYST_TP3 0x000000e3 + +#define REG_A6XX_RBBM_CLOCK_HYST2_TP0 0x000000e4 + +#define REG_A6XX_RBBM_CLOCK_HYST2_TP1 0x000000e5 + +#define REG_A6XX_RBBM_CLOCK_HYST2_TP2 0x000000e6 + +#define REG_A6XX_RBBM_CLOCK_HYST2_TP3 0x000000e7 + +#define REG_A6XX_RBBM_CLOCK_HYST3_TP0 0x000000e8 + +#define REG_A6XX_RBBM_CLOCK_HYST3_TP1 0x000000e9 + +#define REG_A6XX_RBBM_CLOCK_HYST3_TP2 0x000000ea + +#define REG_A6XX_RBBM_CLOCK_HYST3_TP3 0x000000eb + +#define REG_A6XX_RBBM_CLOCK_HYST4_TP0 0x000000ec + +#define REG_A6XX_RBBM_CLOCK_HYST4_TP1 0x000000ed + +#define REG_A6XX_RBBM_CLOCK_HYST4_TP2 0x000000ee + +#define REG_A6XX_RBBM_CLOCK_HYST4_TP3 0x000000ef + +#define REG_A6XX_RBBM_CLOCK_CNTL_RB0 0x000000f0 + +#define REG_A6XX_RBBM_CLOCK_CNTL_RB1 0x000000f1 + +#define REG_A6XX_RBBM_CLOCK_CNTL_RB2 0x000000f2 + +#define REG_A6XX_RBBM_CLOCK_CNTL_RB3 0x000000f3 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_RB0 0x000000f4 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_RB1 0x000000f5 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_RB2 0x000000f6 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_RB3 0x000000f7 + +#define REG_A6XX_RBBM_CLOCK_CNTL_CCU0 0x000000f8 + +#define REG_A6XX_RBBM_CLOCK_CNTL_CCU1 0x000000f9 + +#define REG_A6XX_RBBM_CLOCK_CNTL_CCU2 0x000000fa + +#define REG_A6XX_RBBM_CLOCK_CNTL_CCU3 0x000000fb + +#define REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0 0x00000100 + +#define REG_A6XX_RBBM_CLOCK_HYST_RB_CCU1 0x00000101 + +#define REG_A6XX_RBBM_CLOCK_HYST_RB_CCU2 0x00000102 + +#define REG_A6XX_RBBM_CLOCK_HYST_RB_CCU3 0x00000103 + +#define REG_A6XX_RBBM_CLOCK_CNTL_RAC 0x00000104 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_RAC 0x00000105 + +#define REG_A6XX_RBBM_CLOCK_DELAY_RAC 0x00000106 + +#define REG_A6XX_RBBM_CLOCK_HYST_RAC 0x00000107 + +#define REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM 0x00000108 + +#define REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM 0x00000109 + +#define REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM 0x0000010a + +#define REG_A6XX_RBBM_CLOCK_CNTL_UCHE 0x0000010b + +#define REG_A6XX_RBBM_CLOCK_CNTL2_UCHE 0x0000010c + +#define REG_A6XX_RBBM_CLOCK_CNTL3_UCHE 0x0000010d + +#define REG_A6XX_RBBM_CLOCK_CNTL4_UCHE 0x0000010e + +#define REG_A6XX_RBBM_CLOCK_DELAY_UCHE 0x0000010f + +#define REG_A6XX_RBBM_CLOCK_HYST_UCHE 0x00000110 + +#define REG_A6XX_RBBM_CLOCK_MODE_VFD 0x00000111 + +#define REG_A6XX_RBBM_CLOCK_DELAY_VFD 0x00000112 + +#define REG_A6XX_RBBM_CLOCK_HYST_VFD 0x00000113 + +#define REG_A6XX_RBBM_CLOCK_MODE_GPC 0x00000114 + +#define REG_A6XX_RBBM_CLOCK_DELAY_GPC 0x00000115 + +#define REG_A6XX_RBBM_CLOCK_HYST_GPC 0x00000116 + +#define REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2 0x00000117 + +#define REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX 0x00000118 + +#define REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX 0x00000119 + +#define REG_A6XX_RBBM_CLOCK_HYST_GMU_GX 0x0000011a + +#define REG_A6XX_RBBM_CLOCK_MODE_HLSQ 0x0000011b + +#define REG_A6XX_RBBM_CLOCK_DELAY_HLSQ 0x0000011c + +#define REG_A6XX_DBGC_CFG_DBGBUS_SEL_A 0x00000600 + +#define REG_A6XX_DBGC_CFG_DBGBUS_SEL_B 0x00000601 + +#define REG_A6XX_DBGC_CFG_DBGBUS_SEL_C 0x00000602 + +#define REG_A6XX_DBGC_CFG_DBGBUS_SEL_D 0x00000603 +#define A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__MASK 0x000000ff +#define A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__SHIFT 0 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__SHIFT) & A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__MASK 0x0000ff00 +#define A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__SHIFT 8 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__SHIFT) & A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__MASK; +} + +#define REG_A6XX_DBGC_CFG_DBGBUS_CNTLT 0x00000604 +#define A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__MASK 0x0000003f +#define A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__SHIFT 0 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__SHIFT) & A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU__MASK 0x00007000 +#define A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU__SHIFT 12 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU__SHIFT) & A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT__MASK 0xf0000000 +#define A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT__SHIFT 28 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT__SHIFT) & A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT__MASK; +} + +#define REG_A6XX_DBGC_CFG_DBGBUS_CNTLM 0x00000605 +#define A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__MASK 0x0f000000 +#define A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__SHIFT 24 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__SHIFT) & A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__MASK; +} + +#define REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0 0x00000608 + +#define REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1 0x00000609 + +#define REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2 0x0000060a + +#define REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3 0x0000060b + +#define REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0 0x0000060c + +#define REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1 0x0000060d + +#define REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2 0x0000060e + +#define REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3 0x0000060f + +#define REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0 0x00000610 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__MASK 0x0000000f +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__SHIFT 0 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__MASK 0x000000f0 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__SHIFT 4 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__MASK 0x00000f00 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__SHIFT 8 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__MASK 0x0000f000 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__SHIFT 12 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__MASK 0x000f0000 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__SHIFT 16 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__MASK 0x00f00000 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__SHIFT 20 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__MASK 0x0f000000 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__SHIFT 24 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__MASK 0xf0000000 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__SHIFT 28 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__MASK; +} + +#define REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1 0x00000611 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__MASK 0x0000000f +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__SHIFT 0 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__MASK 0x000000f0 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__SHIFT 4 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__MASK 0x00000f00 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__SHIFT 8 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__MASK 0x0000f000 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__SHIFT 12 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__MASK 0x000f0000 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__SHIFT 16 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__MASK 0x00f00000 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__SHIFT 20 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__MASK 0x0f000000 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__SHIFT 24 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__MASK 0xf0000000 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__SHIFT 28 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__MASK; +} + +#define REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1 0x0000062f + +#define REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2 0x00000630 + +#define REG_A6XX_VSC_PERFCTR_VSC_SEL_0 0x00000cd8 + +#define REG_A6XX_VSC_PERFCTR_VSC_SEL_1 0x00000cd9 + +#define REG_A6XX_GRAS_ADDR_MODE_CNTL 0x00008601 + +#define REG_A6XX_GRAS_PERFCTR_TSE_SEL_0 0x00008610 + +#define REG_A6XX_GRAS_PERFCTR_TSE_SEL_1 0x00008611 + +#define REG_A6XX_GRAS_PERFCTR_TSE_SEL_2 0x00008612 + +#define REG_A6XX_GRAS_PERFCTR_TSE_SEL_3 0x00008613 + +#define REG_A6XX_GRAS_PERFCTR_RAS_SEL_0 0x00008614 + +#define REG_A6XX_GRAS_PERFCTR_RAS_SEL_1 0x00008615 + +#define REG_A6XX_GRAS_PERFCTR_RAS_SEL_2 0x00008616 + +#define REG_A6XX_GRAS_PERFCTR_RAS_SEL_3 0x00008617 + +#define REG_A6XX_GRAS_PERFCTR_LRZ_SEL_0 0x00008618 + +#define REG_A6XX_GRAS_PERFCTR_LRZ_SEL_1 0x00008619 + +#define REG_A6XX_GRAS_PERFCTR_LRZ_SEL_2 0x0000861a + +#define REG_A6XX_GRAS_PERFCTR_LRZ_SEL_3 0x0000861b + +#define REG_A6XX_RB_ADDR_MODE_CNTL 0x00008e05 + +#define REG_A6XX_RB_NC_MODE_CNTL 0x00008e08 + +#define REG_A6XX_RB_PERFCTR_RB_SEL_0 0x00008e10 + +#define REG_A6XX_RB_PERFCTR_RB_SEL_1 0x00008e11 + +#define REG_A6XX_RB_PERFCTR_RB_SEL_2 0x00008e12 + +#define REG_A6XX_RB_PERFCTR_RB_SEL_3 0x00008e13 + +#define REG_A6XX_RB_PERFCTR_RB_SEL_4 0x00008e14 + +#define REG_A6XX_RB_PERFCTR_RB_SEL_5 0x00008e15 + +#define REG_A6XX_RB_PERFCTR_RB_SEL_6 0x00008e16 + +#define REG_A6XX_RB_PERFCTR_RB_SEL_7 0x00008e17 + +#define REG_A6XX_RB_PERFCTR_CCU_SEL_0 0x00008e18 + +#define REG_A6XX_RB_PERFCTR_CCU_SEL_1 0x00008e19 + +#define REG_A6XX_RB_PERFCTR_CCU_SEL_2 0x00008e1a + +#define REG_A6XX_RB_PERFCTR_CCU_SEL_3 0x00008e1b + +#define REG_A6XX_RB_PERFCTR_CCU_SEL_4 0x00008e1c + +#define REG_A6XX_RB_PERFCTR_CMP_SEL_0 0x00008e2c + +#define REG_A6XX_RB_PERFCTR_CMP_SEL_1 0x00008e2d + +#define REG_A6XX_RB_PERFCTR_CMP_SEL_2 0x00008e2e + +#define REG_A6XX_RB_PERFCTR_CMP_SEL_3 0x00008e2f + +#define REG_A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD 0x00008e3d + +#define REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE 0x00008e50 + +#define REG_A6XX_PC_DBG_ECO_CNTL 0x00009e00 + +#define REG_A6XX_PC_ADDR_MODE_CNTL 0x00009e01 + +#define REG_A6XX_PC_PERFCTR_PC_SEL_0 0x00009e34 + +#define REG_A6XX_PC_PERFCTR_PC_SEL_1 0x00009e35 + +#define REG_A6XX_PC_PERFCTR_PC_SEL_2 0x00009e36 + +#define REG_A6XX_PC_PERFCTR_PC_SEL_3 0x00009e37 + +#define REG_A6XX_PC_PERFCTR_PC_SEL_4 0x00009e38 + +#define REG_A6XX_PC_PERFCTR_PC_SEL_5 0x00009e39 + +#define REG_A6XX_PC_PERFCTR_PC_SEL_6 0x00009e3a + +#define REG_A6XX_PC_PERFCTR_PC_SEL_7 0x00009e3b + +#define REG_A6XX_HLSQ_ADDR_MODE_CNTL 0x0000be05 + +#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_0 0x0000be10 + +#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_1 0x0000be11 + +#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_2 0x0000be12 + +#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_3 0x0000be13 + +#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_4 0x0000be14 + +#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_5 0x0000be15 + +#define REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE 0x0000c800 + +#define REG_A6XX_HLSQ_DBG_READ_SEL 0x0000d000 + +#define REG_A6XX_VFD_ADDR_MODE_CNTL 0x0000a601 + +#define REG_A6XX_VFD_PERFCTR_VFD_SEL_0 0x0000a610 + +#define REG_A6XX_VFD_PERFCTR_VFD_SEL_1 0x0000a611 + +#define REG_A6XX_VFD_PERFCTR_VFD_SEL_2 0x0000a612 + +#define REG_A6XX_VFD_PERFCTR_VFD_SEL_3 0x0000a613 + +#define REG_A6XX_VFD_PERFCTR_VFD_SEL_4 0x0000a614 + +#define REG_A6XX_VFD_PERFCTR_VFD_SEL_5 0x0000a615 + +#define REG_A6XX_VFD_PERFCTR_VFD_SEL_6 0x0000a616 + +#define REG_A6XX_VFD_PERFCTR_VFD_SEL_7 0x0000a617 + +#define REG_A6XX_VPC_ADDR_MODE_CNTL 0x00009601 + +#define REG_A6XX_VPC_PERFCTR_VPC_SEL_0 0x00009604 + +#define REG_A6XX_VPC_PERFCTR_VPC_SEL_1 0x00009605 + +#define REG_A6XX_VPC_PERFCTR_VPC_SEL_2 0x00009606 + +#define REG_A6XX_VPC_PERFCTR_VPC_SEL_3 0x00009607 + +#define REG_A6XX_VPC_PERFCTR_VPC_SEL_4 0x00009608 + +#define REG_A6XX_VPC_PERFCTR_VPC_SEL_5 0x00009609 + +#define REG_A6XX_UCHE_ADDR_MODE_CNTL 0x00000e00 + +#define REG_A6XX_UCHE_MODE_CNTL 0x00000e01 + +#define REG_A6XX_UCHE_WRITE_RANGE_MAX_LO 0x00000e05 + +#define REG_A6XX_UCHE_WRITE_RANGE_MAX_HI 0x00000e06 + +#define REG_A6XX_UCHE_WRITE_THRU_BASE_LO 0x00000e07 + +#define REG_A6XX_UCHE_WRITE_THRU_BASE_HI 0x00000e08 + +#define REG_A6XX_UCHE_TRAP_BASE_LO 0x00000e09 + +#define REG_A6XX_UCHE_TRAP_BASE_HI 0x00000e0a + +#define REG_A6XX_UCHE_GMEM_RANGE_MIN_LO 0x00000e0b + +#define REG_A6XX_UCHE_GMEM_RANGE_MIN_HI 0x00000e0c + +#define REG_A6XX_UCHE_GMEM_RANGE_MAX_LO 0x00000e0d + +#define REG_A6XX_UCHE_GMEM_RANGE_MAX_HI 0x00000e0e + +#define REG_A6XX_UCHE_CACHE_WAYS 0x00000e17 + +#define REG_A6XX_UCHE_FILTER_CNTL 0x00000e18 + +#define REG_A6XX_UCHE_CLIENT_PF 0x00000e19 +#define A6XX_UCHE_CLIENT_PF_PERFSEL__MASK 0x000000ff +#define A6XX_UCHE_CLIENT_PF_PERFSEL__SHIFT 0 +static inline uint32_t A6XX_UCHE_CLIENT_PF_PERFSEL(uint32_t val) +{ + return ((val) << A6XX_UCHE_CLIENT_PF_PERFSEL__SHIFT) & A6XX_UCHE_CLIENT_PF_PERFSEL__MASK; +} + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_0 0x00000e1c + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_1 0x00000e1d + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_2 0x00000e1e + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_3 0x00000e1f + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_4 0x00000e20 + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_5 0x00000e21 + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_6 0x00000e22 + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_7 0x00000e23 + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_8 0x00000e24 + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_9 0x00000e25 + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_10 0x00000e26 + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_11 0x00000e27 + +#define REG_A6XX_SP_ADDR_MODE_CNTL 0x0000ae01 + +#define REG_A6XX_SP_NC_MODE_CNTL 0x0000ae02 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_0 0x0000ae10 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_1 0x0000ae11 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_2 0x0000ae12 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_3 0x0000ae13 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_4 0x0000ae14 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_5 0x0000ae15 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_6 0x0000ae16 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_7 0x0000ae17 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_8 0x0000ae18 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_9 0x0000ae19 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_10 0x0000ae1a + +#define REG_A6XX_SP_PERFCTR_SP_SEL_11 0x0000ae1b + +#define REG_A6XX_SP_PERFCTR_SP_SEL_12 0x0000ae1c + +#define REG_A6XX_SP_PERFCTR_SP_SEL_13 0x0000ae1d + +#define REG_A6XX_SP_PERFCTR_SP_SEL_14 0x0000ae1e + +#define REG_A6XX_SP_PERFCTR_SP_SEL_15 0x0000ae1f + +#define REG_A6XX_SP_PERFCTR_SP_SEL_16 0x0000ae20 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_17 0x0000ae21 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_18 0x0000ae22 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_19 0x0000ae23 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_20 0x0000ae24 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_21 0x0000ae25 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_22 0x0000ae26 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_23 0x0000ae27 + +#define REG_A6XX_TPL1_ADDR_MODE_CNTL 0x0000b601 + +#define REG_A6XX_TPL1_NC_MODE_CNTL 0x0000b604 + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_0 0x0000b610 + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_1 0x0000b611 + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_2 0x0000b612 + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_3 0x0000b613 + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_4 0x0000b614 + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_5 0x0000b615 + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_6 0x0000b616 + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_7 0x0000b617 + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_8 0x0000b618 + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_9 0x0000b619 + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_10 0x0000b61a + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_11 0x0000b61b + +#define REG_A6XX_VBIF_VERSION 0x00003000 + +#define REG_A6XX_VBIF_GATE_OFF_WRREQ_EN 0x0000302a + +#define REG_A6XX_VBIF_XIN_HALT_CTRL0 0x00003080 + +#define REG_A6XX_VBIF_XIN_HALT_CTRL1 0x00003081 + +#define REG_A6XX_VBIF_PERF_CNT_SEL0 0x000030d0 + +#define REG_A6XX_VBIF_PERF_CNT_SEL1 0x000030d1 + +#define REG_A6XX_VBIF_PERF_CNT_SEL2 0x000030d2 + +#define REG_A6XX_VBIF_PERF_CNT_SEL3 0x000030d3 + +#define REG_A6XX_VBIF_PERF_CNT_LOW0 0x000030d8 + +#define REG_A6XX_VBIF_PERF_CNT_LOW1 0x000030d9 + +#define REG_A6XX_VBIF_PERF_CNT_LOW2 0x000030da + +#define REG_A6XX_VBIF_PERF_CNT_LOW3 0x000030db + +#define REG_A6XX_VBIF_PERF_CNT_HIGH0 0x000030e0 + +#define REG_A6XX_VBIF_PERF_CNT_HIGH1 0x000030e1 + +#define REG_A6XX_VBIF_PERF_CNT_HIGH2 0x000030e2 + +#define REG_A6XX_VBIF_PERF_CNT_HIGH3 0x000030e3 + +#define REG_A6XX_VBIF_PERF_PWR_CNT_EN0 0x00003100 + +#define REG_A6XX_VBIF_PERF_PWR_CNT_EN1 0x00003101 + +#define REG_A6XX_VBIF_PERF_PWR_CNT_EN2 0x00003102 + +#define REG_A6XX_VBIF_PERF_PWR_CNT_LOW0 0x00003110 + +#define REG_A6XX_VBIF_PERF_PWR_CNT_LOW1 0x00003111 + +#define REG_A6XX_VBIF_PERF_PWR_CNT_LOW2 0x00003112 + +#define REG_A6XX_VBIF_PERF_PWR_CNT_HIGH0 0x00003118 + +#define REG_A6XX_VBIF_PERF_PWR_CNT_HIGH1 0x00003119 + +#define REG_A6XX_VBIF_PERF_PWR_CNT_HIGH2 0x0000311a + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A 0x00018400 + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B 0x00018401 + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C 0x00018402 + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D 0x00018403 +#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__MASK 0x000000ff +#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__SHIFT 0 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__MASK 0x0000ff00 +#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__SHIFT 8 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__MASK; +} + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT 0x00018404 +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__MASK 0x0000003f +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__SHIFT 0 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU__MASK 0x00007000 +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU__SHIFT 12 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT__MASK 0xf0000000 +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT__SHIFT 28 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT__MASK; +} + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM 0x00018405 +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__MASK 0x0f000000 +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__SHIFT 24 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__MASK; +} + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0 0x00018408 + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1 0x00018409 + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2 0x0001840a + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3 0x0001840b + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0 0x0001840c + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1 0x0001840d + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2 0x0001840e + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3 0x0001840f + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0 0x00018410 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__MASK 0x0000000f +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__SHIFT 0 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__MASK 0x000000f0 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__SHIFT 4 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__MASK 0x00000f00 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__SHIFT 8 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__MASK 0x0000f000 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__SHIFT 12 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__MASK 0x000f0000 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__SHIFT 16 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__MASK 0x00f00000 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__SHIFT 20 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__MASK 0x0f000000 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__SHIFT 24 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__MASK 0xf0000000 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__SHIFT 28 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__MASK; +} + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1 0x00018411 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__MASK 0x0000000f +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__SHIFT 0 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__MASK 0x000000f0 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__SHIFT 4 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__MASK 0x00000f00 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__SHIFT 8 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__MASK 0x0000f000 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__SHIFT 12 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__MASK 0x000f0000 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__SHIFT 16 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__MASK 0x00f00000 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__SHIFT 20 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__MASK 0x0f000000 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__SHIFT 24 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__MASK 0xf0000000 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__SHIFT 28 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__MASK; +} + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1 0x0001842f + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2 0x00018430 + +#define REG_A6XX_PDC_GPU_ENABLE_PDC 0x00021140 + +#define REG_A6XX_PDC_GPU_SEQ_START_ADDR 0x00021148 + +#define REG_A6XX_PDC_GPU_TCS0_CONTROL 0x00021540 + +#define REG_A6XX_PDC_GPU_TCS0_CMD_ENABLE_BANK 0x00021541 + +#define REG_A6XX_PDC_GPU_TCS0_CMD_WAIT_FOR_CMPL_BANK 0x00021542 + +#define REG_A6XX_PDC_GPU_TCS0_CMD0_MSGID 0x00021543 + +#define REG_A6XX_PDC_GPU_TCS0_CMD0_ADDR 0x00021544 + +#define REG_A6XX_PDC_GPU_TCS0_CMD0_DATA 0x00021545 + +#define REG_A6XX_PDC_GPU_TCS1_CONTROL 0x00021572 + +#define REG_A6XX_PDC_GPU_TCS1_CMD_ENABLE_BANK 0x00021573 + +#define REG_A6XX_PDC_GPU_TCS1_CMD_WAIT_FOR_CMPL_BANK 0x00021574 + +#define REG_A6XX_PDC_GPU_TCS1_CMD0_MSGID 0x00021575 + +#define REG_A6XX_PDC_GPU_TCS1_CMD0_ADDR 0x00021576 + +#define REG_A6XX_PDC_GPU_TCS1_CMD0_DATA 0x00021577 + +#define REG_A6XX_PDC_GPU_TCS2_CONTROL 0x000215a4 + +#define REG_A6XX_PDC_GPU_TCS2_CMD_ENABLE_BANK 0x000215a5 + +#define REG_A6XX_PDC_GPU_TCS2_CMD_WAIT_FOR_CMPL_BANK 0x000215a6 + +#define REG_A6XX_PDC_GPU_TCS2_CMD0_MSGID 0x000215a7 + +#define REG_A6XX_PDC_GPU_TCS2_CMD0_ADDR 0x000215a8 + +#define REG_A6XX_PDC_GPU_TCS2_CMD0_DATA 0x000215a9 + +#define REG_A6XX_PDC_GPU_TCS3_CONTROL 0x000215d6 + +#define REG_A6XX_PDC_GPU_TCS3_CMD_ENABLE_BANK 0x000215d7 + +#define REG_A6XX_PDC_GPU_TCS3_CMD_WAIT_FOR_CMPL_BANK 0x000215d8 + +#define REG_A6XX_PDC_GPU_TCS3_CMD0_MSGID 0x000215d9 + +#define REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR 0x000215da + +#define REG_A6XX_PDC_GPU_TCS3_CMD0_DATA 0x000215db + +#define REG_A6XX_PDC_GPU_SEQ_MEM_0 0x000a0000 + +#define REG_A6XX_X1_WINDOW_OFFSET 0x000088d4 +#define A6XX_X1_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_X1_WINDOW_OFFSET_X__MASK 0x00007fff +#define A6XX_X1_WINDOW_OFFSET_X__SHIFT 0 +static inline uint32_t A6XX_X1_WINDOW_OFFSET_X(uint32_t val) +{ + return ((val) << A6XX_X1_WINDOW_OFFSET_X__SHIFT) & A6XX_X1_WINDOW_OFFSET_X__MASK; +} +#define A6XX_X1_WINDOW_OFFSET_Y__MASK 0x7fff0000 +#define A6XX_X1_WINDOW_OFFSET_Y__SHIFT 16 +static inline uint32_t A6XX_X1_WINDOW_OFFSET_Y(uint32_t val) +{ + return ((val) << A6XX_X1_WINDOW_OFFSET_Y__SHIFT) & A6XX_X1_WINDOW_OFFSET_Y__MASK; +} + +#define REG_A6XX_X2_WINDOW_OFFSET 0x0000b4d1 +#define A6XX_X2_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_X2_WINDOW_OFFSET_X__MASK 0x00007fff +#define A6XX_X2_WINDOW_OFFSET_X__SHIFT 0 +static inline uint32_t A6XX_X2_WINDOW_OFFSET_X(uint32_t val) +{ + return ((val) << A6XX_X2_WINDOW_OFFSET_X__SHIFT) & A6XX_X2_WINDOW_OFFSET_X__MASK; +} +#define A6XX_X2_WINDOW_OFFSET_Y__MASK 0x7fff0000 +#define A6XX_X2_WINDOW_OFFSET_Y__SHIFT 16 +static inline uint32_t A6XX_X2_WINDOW_OFFSET_Y(uint32_t val) +{ + return ((val) << A6XX_X2_WINDOW_OFFSET_Y__SHIFT) & A6XX_X2_WINDOW_OFFSET_Y__MASK; +} + +#define REG_A6XX_X3_WINDOW_OFFSET 0x0000b307 +#define A6XX_X3_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_X3_WINDOW_OFFSET_X__MASK 0x00007fff +#define A6XX_X3_WINDOW_OFFSET_X__SHIFT 0 +static inline uint32_t A6XX_X3_WINDOW_OFFSET_X(uint32_t val) +{ + return ((val) << A6XX_X3_WINDOW_OFFSET_X__SHIFT) & A6XX_X3_WINDOW_OFFSET_X__MASK; +} +#define A6XX_X3_WINDOW_OFFSET_Y__MASK 0x7fff0000 +#define A6XX_X3_WINDOW_OFFSET_Y__SHIFT 16 +static inline uint32_t A6XX_X3_WINDOW_OFFSET_Y(uint32_t val) +{ + return ((val) << A6XX_X3_WINDOW_OFFSET_Y__SHIFT) & A6XX_X3_WINDOW_OFFSET_Y__MASK; +} + +#define REG_A6XX_X1_BIN_SIZE 0x000080a1 +#define A6XX_X1_BIN_SIZE_WIDTH__MASK 0x000000ff +#define A6XX_X1_BIN_SIZE_WIDTH__SHIFT 0 +static inline uint32_t A6XX_X1_BIN_SIZE_WIDTH(uint32_t val) +{ + return ((val >> 5) << A6XX_X1_BIN_SIZE_WIDTH__SHIFT) & A6XX_X1_BIN_SIZE_WIDTH__MASK; +} +#define A6XX_X1_BIN_SIZE_HEIGHT__MASK 0x0001ff00 +#define A6XX_X1_BIN_SIZE_HEIGHT__SHIFT 8 +static inline uint32_t A6XX_X1_BIN_SIZE_HEIGHT(uint32_t val) +{ + return ((val >> 4) << A6XX_X1_BIN_SIZE_HEIGHT__SHIFT) & A6XX_X1_BIN_SIZE_HEIGHT__MASK; +} + +#define REG_A6XX_X2_BIN_SIZE 0x00008800 +#define A6XX_X2_BIN_SIZE_WIDTH__MASK 0x000000ff +#define A6XX_X2_BIN_SIZE_WIDTH__SHIFT 0 +static inline uint32_t A6XX_X2_BIN_SIZE_WIDTH(uint32_t val) +{ + return ((val >> 5) << A6XX_X2_BIN_SIZE_WIDTH__SHIFT) & A6XX_X2_BIN_SIZE_WIDTH__MASK; +} +#define A6XX_X2_BIN_SIZE_HEIGHT__MASK 0x0001ff00 +#define A6XX_X2_BIN_SIZE_HEIGHT__SHIFT 8 +static inline uint32_t A6XX_X2_BIN_SIZE_HEIGHT(uint32_t val) +{ + return ((val >> 4) << A6XX_X2_BIN_SIZE_HEIGHT__SHIFT) & A6XX_X2_BIN_SIZE_HEIGHT__MASK; +} + +#define REG_A6XX_X3_BIN_SIZE 0x000088d3 +#define A6XX_X3_BIN_SIZE_WIDTH__MASK 0x000000ff +#define A6XX_X3_BIN_SIZE_WIDTH__SHIFT 0 +static inline uint32_t A6XX_X3_BIN_SIZE_WIDTH(uint32_t val) +{ + return ((val >> 5) << A6XX_X3_BIN_SIZE_WIDTH__SHIFT) & A6XX_X3_BIN_SIZE_WIDTH__MASK; +} +#define A6XX_X3_BIN_SIZE_HEIGHT__MASK 0x0001ff00 +#define A6XX_X3_BIN_SIZE_HEIGHT__SHIFT 8 +static inline uint32_t A6XX_X3_BIN_SIZE_HEIGHT(uint32_t val) +{ + return ((val >> 4) << A6XX_X3_BIN_SIZE_HEIGHT__SHIFT) & A6XX_X3_BIN_SIZE_HEIGHT__MASK; +} + +#define REG_A6XX_VSC_BIN_SIZE 0x00000c02 +#define A6XX_VSC_BIN_SIZE_WIDTH__MASK 0x000000ff +#define A6XX_VSC_BIN_SIZE_WIDTH__SHIFT 0 +static inline uint32_t A6XX_VSC_BIN_SIZE_WIDTH(uint32_t val) +{ + return ((val >> 5) << A6XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A6XX_VSC_BIN_SIZE_WIDTH__MASK; +} +#define A6XX_VSC_BIN_SIZE_HEIGHT__MASK 0x0001ff00 +#define A6XX_VSC_BIN_SIZE_HEIGHT__SHIFT 8 +static inline uint32_t A6XX_VSC_BIN_SIZE_HEIGHT(uint32_t val) +{ + return ((val >> 4) << A6XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A6XX_VSC_BIN_SIZE_HEIGHT__MASK; +} + +#define REG_A6XX_VSC_SIZE_ADDRESS_LO 0x00000c03 + +#define REG_A6XX_VSC_SIZE_ADDRESS_HI 0x00000c04 + +#define REG_A6XX_VSC_BIN_COUNT 0x00000c06 +#define A6XX_VSC_BIN_COUNT_NX__MASK 0x000007fe +#define A6XX_VSC_BIN_COUNT_NX__SHIFT 1 +static inline uint32_t A6XX_VSC_BIN_COUNT_NX(uint32_t val) +{ + return ((val) << A6XX_VSC_BIN_COUNT_NX__SHIFT) & A6XX_VSC_BIN_COUNT_NX__MASK; +} +#define A6XX_VSC_BIN_COUNT_NY__MASK 0x001ff800 +#define A6XX_VSC_BIN_COUNT_NY__SHIFT 11 +static inline uint32_t A6XX_VSC_BIN_COUNT_NY(uint32_t val) +{ + return ((val) << A6XX_VSC_BIN_COUNT_NY__SHIFT) & A6XX_VSC_BIN_COUNT_NY__MASK; +} + +static inline uint32_t REG_A6XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c10 + 0x1*i0; } + +static inline uint32_t REG_A6XX_VSC_PIPE_CONFIG_REG(uint32_t i0) { return 0x00000c10 + 0x1*i0; } +#define A6XX_VSC_PIPE_CONFIG_REG_X__MASK 0x000003ff +#define A6XX_VSC_PIPE_CONFIG_REG_X__SHIFT 0 +static inline uint32_t A6XX_VSC_PIPE_CONFIG_REG_X(uint32_t val) +{ + return ((val) << A6XX_VSC_PIPE_CONFIG_REG_X__SHIFT) & A6XX_VSC_PIPE_CONFIG_REG_X__MASK; +} +#define A6XX_VSC_PIPE_CONFIG_REG_Y__MASK 0x000ffc00 +#define A6XX_VSC_PIPE_CONFIG_REG_Y__SHIFT 10 +static inline uint32_t A6XX_VSC_PIPE_CONFIG_REG_Y(uint32_t val) +{ + return ((val) << A6XX_VSC_PIPE_CONFIG_REG_Y__SHIFT) & A6XX_VSC_PIPE_CONFIG_REG_Y__MASK; +} +#define A6XX_VSC_PIPE_CONFIG_REG_W__MASK 0x03f00000 +#define A6XX_VSC_PIPE_CONFIG_REG_W__SHIFT 20 +static inline uint32_t A6XX_VSC_PIPE_CONFIG_REG_W(uint32_t val) +{ + return ((val) << A6XX_VSC_PIPE_CONFIG_REG_W__SHIFT) & A6XX_VSC_PIPE_CONFIG_REG_W__MASK; +} +#define A6XX_VSC_PIPE_CONFIG_REG_H__MASK 0xfc000000 +#define A6XX_VSC_PIPE_CONFIG_REG_H__SHIFT 26 +static inline uint32_t A6XX_VSC_PIPE_CONFIG_REG_H(uint32_t val) +{ + return ((val) << A6XX_VSC_PIPE_CONFIG_REG_H__SHIFT) & A6XX_VSC_PIPE_CONFIG_REG_H__MASK; +} + +#define REG_A6XX_VSC_XXX_ADDRESS_LO 0x00000c30 + +#define REG_A6XX_VSC_XXX_ADDRESS_HI 0x00000c31 + +#define REG_A6XX_VSC_XXX_PITCH 0x00000c32 + +#define REG_A6XX_VSC_PIPE_DATA_ADDRESS_LO 0x00000c34 + +#define REG_A6XX_VSC_PIPE_DATA_ADDRESS_HI 0x00000c35 + +#define REG_A6XX_VSC_PIPE_DATA_PITCH 0x00000c36 + +static inline uint32_t REG_A6XX_VSC_SIZE(uint32_t i0) { return 0x00000c78 + 0x1*i0; } + +static inline uint32_t REG_A6XX_VSC_SIZE_REG(uint32_t i0) { return 0x00000c78 + 0x1*i0; } + +#define REG_A6XX_UCHE_UNKNOWN_0E12 0x00000e12 + +#define REG_A6XX_GRAS_UNKNOWN_8001 0x00008001 + +#define REG_A6XX_GRAS_UNKNOWN_8004 0x00008004 + +#define REG_A6XX_GRAS_CNTL 0x00008005 +#define A6XX_GRAS_CNTL_VARYING 0x00000001 +#define A6XX_GRAS_CNTL_XCOORD 0x00000040 +#define A6XX_GRAS_CNTL_YCOORD 0x00000080 +#define A6XX_GRAS_CNTL_ZCOORD 0x00000100 +#define A6XX_GRAS_CNTL_WCOORD 0x00000200 + +#define REG_A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ 0x00008006 +#define A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK 0x000003ff +#define A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__SHIFT 0 +static inline uint32_t A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(uint32_t val) +{ + return ((val) << A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__SHIFT) & A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK; +} +#define A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__MASK 0x000ffc00 +#define A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__SHIFT 10 +static inline uint32_t A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(uint32_t val) +{ + return ((val) << A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__SHIFT) & A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__MASK; +} + +#define REG_A6XX_GRAS_CL_VPORT_XOFFSET_0 0x00008010 +#define A6XX_GRAS_CL_VPORT_XOFFSET_0__MASK 0xffffffff +#define A6XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT 0 +static inline uint32_t A6XX_GRAS_CL_VPORT_XOFFSET_0(float val) +{ + return ((fui(val)) << A6XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT) & A6XX_GRAS_CL_VPORT_XOFFSET_0__MASK; +} + +#define REG_A6XX_GRAS_CL_VPORT_XSCALE_0 0x00008011 +#define A6XX_GRAS_CL_VPORT_XSCALE_0__MASK 0xffffffff +#define A6XX_GRAS_CL_VPORT_XSCALE_0__SHIFT 0 +static inline uint32_t A6XX_GRAS_CL_VPORT_XSCALE_0(float val) +{ + return ((fui(val)) << A6XX_GRAS_CL_VPORT_XSCALE_0__SHIFT) & A6XX_GRAS_CL_VPORT_XSCALE_0__MASK; +} + +#define REG_A6XX_GRAS_CL_VPORT_YOFFSET_0 0x00008012 +#define A6XX_GRAS_CL_VPORT_YOFFSET_0__MASK 0xffffffff +#define A6XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT 0 +static inline uint32_t A6XX_GRAS_CL_VPORT_YOFFSET_0(float val) +{ + return ((fui(val)) << A6XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT) & A6XX_GRAS_CL_VPORT_YOFFSET_0__MASK; +} + +#define REG_A6XX_GRAS_CL_VPORT_YSCALE_0 0x00008013 +#define A6XX_GRAS_CL_VPORT_YSCALE_0__MASK 0xffffffff +#define A6XX_GRAS_CL_VPORT_YSCALE_0__SHIFT 0 +static inline uint32_t A6XX_GRAS_CL_VPORT_YSCALE_0(float val) +{ + return ((fui(val)) << A6XX_GRAS_CL_VPORT_YSCALE_0__SHIFT) & A6XX_GRAS_CL_VPORT_YSCALE_0__MASK; +} + +#define REG_A6XX_GRAS_CL_VPORT_ZOFFSET_0 0x00008014 +#define A6XX_GRAS_CL_VPORT_ZOFFSET_0__MASK 0xffffffff +#define A6XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT 0 +static inline uint32_t A6XX_GRAS_CL_VPORT_ZOFFSET_0(float val) +{ + return ((fui(val)) << A6XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT) & A6XX_GRAS_CL_VPORT_ZOFFSET_0__MASK; +} + +#define REG_A6XX_GRAS_CL_VPORT_ZSCALE_0 0x00008015 +#define A6XX_GRAS_CL_VPORT_ZSCALE_0__MASK 0xffffffff +#define A6XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT 0 +static inline uint32_t A6XX_GRAS_CL_VPORT_ZSCALE_0(float val) +{ + return ((fui(val)) << A6XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT) & A6XX_GRAS_CL_VPORT_ZSCALE_0__MASK; +} + +#define REG_A6XX_GRAS_SU_CNTL 0x00008090 +#define A6XX_GRAS_SU_CNTL_CULL_FRONT 0x00000001 +#define A6XX_GRAS_SU_CNTL_CULL_BACK 0x00000002 +#define A6XX_GRAS_SU_CNTL_FRONT_CW 0x00000004 +#define A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK 0x000007f8 +#define A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT 3 +static inline uint32_t A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(float val) +{ + return ((((int32_t)(val * 4.0))) << A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT) & A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK; +} +#define A6XX_GRAS_SU_CNTL_POLY_OFFSET 0x00000800 +#define A6XX_GRAS_SU_CNTL_MSAA_ENABLE 0x00002000 + +#define REG_A6XX_GRAS_SU_POINT_MINMAX 0x00008091 +#define A6XX_GRAS_SU_POINT_MINMAX_MIN__MASK 0x0000ffff +#define A6XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT 0 +static inline uint32_t A6XX_GRAS_SU_POINT_MINMAX_MIN(float val) +{ + return ((((uint32_t)(val * 16.0))) << A6XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A6XX_GRAS_SU_POINT_MINMAX_MIN__MASK; +} +#define A6XX_GRAS_SU_POINT_MINMAX_MAX__MASK 0xffff0000 +#define A6XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT 16 +static inline uint32_t A6XX_GRAS_SU_POINT_MINMAX_MAX(float val) +{ + return ((((uint32_t)(val * 16.0))) << A6XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A6XX_GRAS_SU_POINT_MINMAX_MAX__MASK; +} + +#define REG_A6XX_GRAS_SU_POINT_SIZE 0x00008092 +#define A6XX_GRAS_SU_POINT_SIZE__MASK 0xffffffff +#define A6XX_GRAS_SU_POINT_SIZE__SHIFT 0 +static inline uint32_t A6XX_GRAS_SU_POINT_SIZE(float val) +{ + return ((((int32_t)(val * 16.0))) << A6XX_GRAS_SU_POINT_SIZE__SHIFT) & A6XX_GRAS_SU_POINT_SIZE__MASK; +} + +#define REG_A6XX_GRAS_SU_POLY_OFFSET_SCALE 0x00008095 +#define A6XX_GRAS_SU_POLY_OFFSET_SCALE__MASK 0xffffffff +#define A6XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT 0 +static inline uint32_t A6XX_GRAS_SU_POLY_OFFSET_SCALE(float val) +{ + return ((fui(val)) << A6XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT) & A6XX_GRAS_SU_POLY_OFFSET_SCALE__MASK; +} + +#define REG_A6XX_GRAS_SU_POLY_OFFSET_OFFSET 0x00008096 +#define A6XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK 0xffffffff +#define A6XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT 0 +static inline uint32_t A6XX_GRAS_SU_POLY_OFFSET_OFFSET(float val) +{ + return ((fui(val)) << A6XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A6XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK; +} + +#define REG_A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP 0x00008097 +#define A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__MASK 0xffffffff +#define A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__SHIFT 0 +static inline uint32_t A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(float val) +{ + return ((fui(val)) << A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__SHIFT) & A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__MASK; +} + +#define REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO 0x00008098 +#define A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK 0x00000007 +#define A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT 0 +static inline uint32_t A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a6xx_depth_format val) +{ + return ((val) << A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT) & A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK; +} + +#define REG_A6XX_GRAS_UNKNOWN_8099 0x00008099 + +#define REG_A6XX_GRAS_UNKNOWN_809B 0x0000809b + +#define REG_A6XX_GRAS_RAS_MSAA_CNTL 0x000080a2 +#define A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES__MASK; +} + +#define REG_A6XX_GRAS_DEST_MSAA_CNTL 0x000080a3 +#define A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES__MASK; +} +#define A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE 0x00000004 + +#define REG_A6XX_GRAS_UNKNOWN_80A4 0x000080a4 + +#define REG_A6XX_GRAS_UNKNOWN_80A5 0x000080a5 + +#define REG_A6XX_GRAS_UNKNOWN_80A6 0x000080a6 + +#define REG_A6XX_GRAS_UNKNOWN_80AF 0x000080af + +#define REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0 0x000080b0 +#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__MASK 0x00007fff +#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__SHIFT 0 +static inline uint32_t A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__SHIFT) & A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__MASK; +} +#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__MASK 0x7fff0000 +#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__SHIFT 16 +static inline uint32_t A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__SHIFT) & A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__MASK; +} + +#define REG_A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0 0x000080b1 +#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__MASK 0x00007fff +#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__SHIFT 0 +static inline uint32_t A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__SHIFT) & A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__MASK; +} +#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__MASK 0x7fff0000 +#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__SHIFT 16 +static inline uint32_t A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__SHIFT) & A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__MASK; +} + +#define REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0 0x000080d0 +#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__MASK 0x00007fff +#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__SHIFT 0 +static inline uint32_t A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__SHIFT) & A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__MASK; +} +#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__MASK 0x7fff0000 +#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__SHIFT 16 +static inline uint32_t A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__SHIFT) & A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__MASK; +} + +#define REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0 0x000080d1 +#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__MASK 0x00007fff +#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__SHIFT 0 +static inline uint32_t A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__SHIFT) & A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__MASK; +} +#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__MASK 0x7fff0000 +#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__SHIFT 16 +static inline uint32_t A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__SHIFT) & A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__MASK; +} + +#define REG_A6XX_GRAS_SC_WINDOW_SCISSOR_TL 0x000080f0 +#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK 0x00007fff +#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT 0 +static inline uint32_t A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK; +} +#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK 0x7fff0000 +#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT 16 +static inline uint32_t A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK; +} + +#define REG_A6XX_GRAS_SC_WINDOW_SCISSOR_BR 0x000080f1 +#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK 0x00007fff +#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT 0 +static inline uint32_t A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK; +} +#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK 0x7fff0000 +#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT 16 +static inline uint32_t A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK; +} + +#define REG_A6XX_GRAS_LRZ_CNTL 0x00008100 +#define A6XX_GRAS_LRZ_CNTL_ENABLE 0x00000001 +#define A6XX_GRAS_LRZ_CNTL_LRZ_WRITE 0x00000002 +#define A6XX_GRAS_LRZ_CNTL_GREATER 0x00000004 + +#define REG_A6XX_GRAS_2D_BLIT_INFO 0x00008102 +#define A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT__MASK 0x000000ff +#define A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT(enum a6xx_color_fmt val) +{ + return ((val) << A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT__SHIFT) & A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT__MASK; +} + +#define REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO 0x00008103 + +#define REG_A6XX_GRAS_LRZ_BUFFER_BASE_HI 0x00008104 + +#define REG_A6XX_GRAS_LRZ_BUFFER_PITCH 0x00008105 +#define A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH__MASK 0x000007ff +#define A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH__SHIFT 0 +static inline uint32_t A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH(uint32_t val) +{ + return ((val >> 5) << A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH__SHIFT) & A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH__MASK; +} +#define A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH__MASK 0x003ff800 +#define A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH__SHIFT 11 +static inline uint32_t A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH(uint32_t val) +{ + return ((val >> 5) << A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH__SHIFT) & A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH__MASK; +} + +#define REG_A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO 0x00008106 + +#define REG_A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI 0x00008107 + +#define REG_A6XX_GRAS_2D_BLIT_CNTL 0x00008400 + +#define REG_A6XX_GRAS_2D_SRC_TL_X 0x00008401 +#define A6XX_GRAS_2D_SRC_TL_X_X__MASK 0x00ffff00 +#define A6XX_GRAS_2D_SRC_TL_X_X__SHIFT 8 +static inline uint32_t A6XX_GRAS_2D_SRC_TL_X_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_2D_SRC_TL_X_X__SHIFT) & A6XX_GRAS_2D_SRC_TL_X_X__MASK; +} + +#define REG_A6XX_GRAS_2D_SRC_BR_X 0x00008402 +#define A6XX_GRAS_2D_SRC_BR_X_X__MASK 0x00ffff00 +#define A6XX_GRAS_2D_SRC_BR_X_X__SHIFT 8 +static inline uint32_t A6XX_GRAS_2D_SRC_BR_X_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_2D_SRC_BR_X_X__SHIFT) & A6XX_GRAS_2D_SRC_BR_X_X__MASK; +} + +#define REG_A6XX_GRAS_2D_SRC_TL_Y 0x00008403 +#define A6XX_GRAS_2D_SRC_TL_Y_Y__MASK 0x00ffff00 +#define A6XX_GRAS_2D_SRC_TL_Y_Y__SHIFT 8 +static inline uint32_t A6XX_GRAS_2D_SRC_TL_Y_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_2D_SRC_TL_Y_Y__SHIFT) & A6XX_GRAS_2D_SRC_TL_Y_Y__MASK; +} + +#define REG_A6XX_GRAS_2D_SRC_BR_Y 0x00008404 +#define A6XX_GRAS_2D_SRC_BR_Y_Y__MASK 0x00ffff00 +#define A6XX_GRAS_2D_SRC_BR_Y_Y__SHIFT 8 +static inline uint32_t A6XX_GRAS_2D_SRC_BR_Y_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_2D_SRC_BR_Y_Y__SHIFT) & A6XX_GRAS_2D_SRC_BR_Y_Y__MASK; +} + +#define REG_A6XX_GRAS_2D_DST_TL 0x00008405 +#define A6XX_GRAS_2D_DST_TL_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_GRAS_2D_DST_TL_X__MASK 0x00007fff +#define A6XX_GRAS_2D_DST_TL_X__SHIFT 0 +static inline uint32_t A6XX_GRAS_2D_DST_TL_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_2D_DST_TL_X__SHIFT) & A6XX_GRAS_2D_DST_TL_X__MASK; +} +#define A6XX_GRAS_2D_DST_TL_Y__MASK 0x7fff0000 +#define A6XX_GRAS_2D_DST_TL_Y__SHIFT 16 +static inline uint32_t A6XX_GRAS_2D_DST_TL_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_2D_DST_TL_Y__SHIFT) & A6XX_GRAS_2D_DST_TL_Y__MASK; +} + +#define REG_A6XX_GRAS_2D_DST_BR 0x00008406 +#define A6XX_GRAS_2D_DST_BR_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_GRAS_2D_DST_BR_X__MASK 0x00007fff +#define A6XX_GRAS_2D_DST_BR_X__SHIFT 0 +static inline uint32_t A6XX_GRAS_2D_DST_BR_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_2D_DST_BR_X__SHIFT) & A6XX_GRAS_2D_DST_BR_X__MASK; +} +#define A6XX_GRAS_2D_DST_BR_Y__MASK 0x7fff0000 +#define A6XX_GRAS_2D_DST_BR_Y__SHIFT 16 +static inline uint32_t A6XX_GRAS_2D_DST_BR_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_2D_DST_BR_Y__SHIFT) & A6XX_GRAS_2D_DST_BR_Y__MASK; +} + +#define REG_A6XX_GRAS_RESOLVE_CNTL_1 0x0000840a +#define A6XX_GRAS_RESOLVE_CNTL_1_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_GRAS_RESOLVE_CNTL_1_X__MASK 0x00007fff +#define A6XX_GRAS_RESOLVE_CNTL_1_X__SHIFT 0 +static inline uint32_t A6XX_GRAS_RESOLVE_CNTL_1_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_RESOLVE_CNTL_1_X__SHIFT) & A6XX_GRAS_RESOLVE_CNTL_1_X__MASK; +} +#define A6XX_GRAS_RESOLVE_CNTL_1_Y__MASK 0x7fff0000 +#define A6XX_GRAS_RESOLVE_CNTL_1_Y__SHIFT 16 +static inline uint32_t A6XX_GRAS_RESOLVE_CNTL_1_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_RESOLVE_CNTL_1_Y__SHIFT) & A6XX_GRAS_RESOLVE_CNTL_1_Y__MASK; +} + +#define REG_A6XX_GRAS_RESOLVE_CNTL_2 0x0000840b +#define A6XX_GRAS_RESOLVE_CNTL_2_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_GRAS_RESOLVE_CNTL_2_X__MASK 0x00007fff +#define A6XX_GRAS_RESOLVE_CNTL_2_X__SHIFT 0 +static inline uint32_t A6XX_GRAS_RESOLVE_CNTL_2_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_RESOLVE_CNTL_2_X__SHIFT) & A6XX_GRAS_RESOLVE_CNTL_2_X__MASK; +} +#define A6XX_GRAS_RESOLVE_CNTL_2_Y__MASK 0x7fff0000 +#define A6XX_GRAS_RESOLVE_CNTL_2_Y__SHIFT 16 +static inline uint32_t A6XX_GRAS_RESOLVE_CNTL_2_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_RESOLVE_CNTL_2_Y__SHIFT) & A6XX_GRAS_RESOLVE_CNTL_2_Y__MASK; +} + +#define REG_A6XX_GRAS_UNKNOWN_8600 0x00008600 + +#define REG_A6XX_RB_RAS_MSAA_CNTL 0x00008802 +#define A6XX_RB_RAS_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A6XX_RB_RAS_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A6XX_RB_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A6XX_RB_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_RB_RAS_MSAA_CNTL_SAMPLES__MASK; +} + +#define REG_A6XX_RB_DEST_MSAA_CNTL 0x00008803 +#define A6XX_RB_DEST_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A6XX_RB_DEST_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A6XX_RB_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A6XX_RB_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_RB_DEST_MSAA_CNTL_SAMPLES__MASK; +} +#define A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE 0x00000004 + +#define REG_A6XX_RB_UNKNOWN_8804 0x00008804 + +#define REG_A6XX_RB_UNKNOWN_8805 0x00008805 + +#define REG_A6XX_RB_UNKNOWN_8806 0x00008806 + +#define REG_A6XX_RB_RENDER_CONTROL0 0x00008809 +#define A6XX_RB_RENDER_CONTROL0_VARYING 0x00000001 +#define A6XX_RB_RENDER_CONTROL0_XCOORD 0x00000040 +#define A6XX_RB_RENDER_CONTROL0_YCOORD 0x00000080 +#define A6XX_RB_RENDER_CONTROL0_ZCOORD 0x00000100 +#define A6XX_RB_RENDER_CONTROL0_WCOORD 0x00000200 +#define A6XX_RB_RENDER_CONTROL0_UNK10 0x00000400 + +#define REG_A6XX_RB_RENDER_CONTROL1 0x0000880a +#define A6XX_RB_RENDER_CONTROL1_SAMPLEMASK 0x00000001 +#define A6XX_RB_RENDER_CONTROL1_FACENESS 0x00000002 +#define A6XX_RB_RENDER_CONTROL1_SAMPLEID 0x00000008 + +#define REG_A6XX_RB_FS_OUTPUT_CNTL0 0x0000880b +#define A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z 0x00000002 + +#define REG_A6XX_RB_FS_OUTPUT_CNTL1 0x0000880c +#define A6XX_RB_FS_OUTPUT_CNTL1_MRT__MASK 0x0000000f +#define A6XX_RB_FS_OUTPUT_CNTL1_MRT__SHIFT 0 +static inline uint32_t A6XX_RB_FS_OUTPUT_CNTL1_MRT(uint32_t val) +{ + return ((val) << A6XX_RB_FS_OUTPUT_CNTL1_MRT__SHIFT) & A6XX_RB_FS_OUTPUT_CNTL1_MRT__MASK; +} + +#define REG_A6XX_RB_RENDER_COMPONENTS 0x0000880d +#define A6XX_RB_RENDER_COMPONENTS_RT0__MASK 0x0000000f +#define A6XX_RB_RENDER_COMPONENTS_RT0__SHIFT 0 +static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT0(uint32_t val) +{ + return ((val) << A6XX_RB_RENDER_COMPONENTS_RT0__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT0__MASK; +} +#define A6XX_RB_RENDER_COMPONENTS_RT1__MASK 0x000000f0 +#define A6XX_RB_RENDER_COMPONENTS_RT1__SHIFT 4 +static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT1(uint32_t val) +{ + return ((val) << A6XX_RB_RENDER_COMPONENTS_RT1__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT1__MASK; +} +#define A6XX_RB_RENDER_COMPONENTS_RT2__MASK 0x00000f00 +#define A6XX_RB_RENDER_COMPONENTS_RT2__SHIFT 8 +static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT2(uint32_t val) +{ + return ((val) << A6XX_RB_RENDER_COMPONENTS_RT2__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT2__MASK; +} +#define A6XX_RB_RENDER_COMPONENTS_RT3__MASK 0x0000f000 +#define A6XX_RB_RENDER_COMPONENTS_RT3__SHIFT 12 +static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT3(uint32_t val) +{ + return ((val) << A6XX_RB_RENDER_COMPONENTS_RT3__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT3__MASK; +} +#define A6XX_RB_RENDER_COMPONENTS_RT4__MASK 0x000f0000 +#define A6XX_RB_RENDER_COMPONENTS_RT4__SHIFT 16 +static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT4(uint32_t val) +{ + return ((val) << A6XX_RB_RENDER_COMPONENTS_RT4__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT4__MASK; +} +#define A6XX_RB_RENDER_COMPONENTS_RT5__MASK 0x00f00000 +#define A6XX_RB_RENDER_COMPONENTS_RT5__SHIFT 20 +static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT5(uint32_t val) +{ + return ((val) << A6XX_RB_RENDER_COMPONENTS_RT5__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT5__MASK; +} +#define A6XX_RB_RENDER_COMPONENTS_RT6__MASK 0x0f000000 +#define A6XX_RB_RENDER_COMPONENTS_RT6__SHIFT 24 +static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT6(uint32_t val) +{ + return ((val) << A6XX_RB_RENDER_COMPONENTS_RT6__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT6__MASK; +} +#define A6XX_RB_RENDER_COMPONENTS_RT7__MASK 0xf0000000 +#define A6XX_RB_RENDER_COMPONENTS_RT7__SHIFT 28 +static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT7(uint32_t val) +{ + return ((val) << A6XX_RB_RENDER_COMPONENTS_RT7__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT7__MASK; +} + +#define REG_A6XX_RB_DITHER_CNTL 0x0000880e +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0__MASK 0x00000003 +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0__SHIFT 0 +static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0(enum adreno_rb_dither_mode val) +{ + return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0__MASK; +} +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1__MASK 0x0000000c +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1__SHIFT 2 +static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1(enum adreno_rb_dither_mode val) +{ + return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1__MASK; +} +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2__MASK 0x00000030 +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2__SHIFT 4 +static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2(enum adreno_rb_dither_mode val) +{ + return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2__MASK; +} +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3__MASK 0x000000c0 +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3__SHIFT 6 +static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3(enum adreno_rb_dither_mode val) +{ + return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3__MASK; +} +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4__MASK 0x00000300 +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4__SHIFT 8 +static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4(enum adreno_rb_dither_mode val) +{ + return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4__MASK; +} +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5__MASK 0x00000c00 +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5__SHIFT 10 +static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5(enum adreno_rb_dither_mode val) +{ + return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5__MASK; +} +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6__MASK 0x00001000 +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6__SHIFT 12 +static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6(enum adreno_rb_dither_mode val) +{ + return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6__MASK; +} +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7__MASK 0x0000c000 +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7__SHIFT 14 +static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7(enum adreno_rb_dither_mode val) +{ + return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7__MASK; +} + +#define REG_A6XX_RB_SRGB_CNTL 0x0000880f +#define A6XX_RB_SRGB_CNTL_SRGB_MRT0 0x00000001 +#define A6XX_RB_SRGB_CNTL_SRGB_MRT1 0x00000002 +#define A6XX_RB_SRGB_CNTL_SRGB_MRT2 0x00000004 +#define A6XX_RB_SRGB_CNTL_SRGB_MRT3 0x00000008 +#define A6XX_RB_SRGB_CNTL_SRGB_MRT4 0x00000010 +#define A6XX_RB_SRGB_CNTL_SRGB_MRT5 0x00000020 +#define A6XX_RB_SRGB_CNTL_SRGB_MRT6 0x00000040 +#define A6XX_RB_SRGB_CNTL_SRGB_MRT7 0x00000080 + +#define REG_A6XX_RB_UNKNOWN_8818 0x00008818 + +#define REG_A6XX_RB_UNKNOWN_8819 0x00008819 + +#define REG_A6XX_RB_UNKNOWN_881A 0x0000881a + +#define REG_A6XX_RB_UNKNOWN_881B 0x0000881b + +#define REG_A6XX_RB_UNKNOWN_881C 0x0000881c + +#define REG_A6XX_RB_UNKNOWN_881D 0x0000881d + +#define REG_A6XX_RB_UNKNOWN_881E 0x0000881e + +static inline uint32_t REG_A6XX_RB_MRT(uint32_t i0) { return 0x00008820 + 0x8*i0; } + +static inline uint32_t REG_A6XX_RB_MRT_CONTROL(uint32_t i0) { return 0x00008820 + 0x8*i0; } +#define A6XX_RB_MRT_CONTROL_BLEND 0x00000001 +#define A6XX_RB_MRT_CONTROL_BLEND2 0x00000002 +#define A6XX_RB_MRT_CONTROL_ROP_ENABLE 0x00000004 +#define A6XX_RB_MRT_CONTROL_ROP_CODE__MASK 0x00000078 +#define A6XX_RB_MRT_CONTROL_ROP_CODE__SHIFT 3 +static inline uint32_t A6XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val) +{ + return ((val) << A6XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A6XX_RB_MRT_CONTROL_ROP_CODE__MASK; +} +#define A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK 0x00000780 +#define A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT 7 +static inline uint32_t A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val) +{ + return ((val) << A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; +} + +static inline uint32_t REG_A6XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x00008821 + 0x8*i0; } +#define A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK 0x0000001f +#define A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT 0 +static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK; +} +#define A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK 0x000000e0 +#define A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT 5 +static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) +{ + return ((val) << A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK; +} +#define A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK 0x00001f00 +#define A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT 8 +static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK; +} +#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK 0x001f0000 +#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT 16 +static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK; +} +#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK 0x00e00000 +#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT 21 +static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) +{ + return ((val) << A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK; +} +#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK 0x1f000000 +#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT 24 +static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK; +} + +static inline uint32_t REG_A6XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x00008822 + 0x8*i0; } +#define A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK 0x000000ff +#define A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a6xx_color_fmt val) +{ + return ((val) << A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK; +} +#define A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK 0x00000300 +#define A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT 8 +static inline uint32_t A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a6xx_tile_mode val) +{ + return ((val) << A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK; +} +#define A6XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK 0x00006000 +#define A6XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT 13 +static inline uint32_t A6XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A6XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A6XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK; +} +#define A6XX_RB_MRT_BUF_INFO_COLOR_SRGB 0x00008000 + +static inline uint32_t REG_A6XX_RB_MRT_PITCH(uint32_t i0) { return 0x00008823 + 0x8*i0; } +#define A6XX_RB_MRT_PITCH__MASK 0xffffffff +#define A6XX_RB_MRT_PITCH__SHIFT 0 +static inline uint32_t A6XX_RB_MRT_PITCH(uint32_t val) +{ + return ((val >> 6) << A6XX_RB_MRT_PITCH__SHIFT) & A6XX_RB_MRT_PITCH__MASK; +} + +static inline uint32_t REG_A6XX_RB_MRT_ARRAY_PITCH(uint32_t i0) { return 0x00008824 + 0x8*i0; } +#define A6XX_RB_MRT_ARRAY_PITCH__MASK 0xffffffff +#define A6XX_RB_MRT_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A6XX_RB_MRT_ARRAY_PITCH(uint32_t val) +{ + return ((val >> 6) << A6XX_RB_MRT_ARRAY_PITCH__SHIFT) & A6XX_RB_MRT_ARRAY_PITCH__MASK; +} + +static inline uint32_t REG_A6XX_RB_MRT_BASE_LO(uint32_t i0) { return 0x00008825 + 0x8*i0; } + +static inline uint32_t REG_A6XX_RB_MRT_BASE_HI(uint32_t i0) { return 0x00008826 + 0x8*i0; } + +static inline uint32_t REG_A6XX_RB_MRT_BASE_GMEM(uint32_t i0) { return 0x00008827 + 0x8*i0; } + +#define REG_A6XX_RB_BLEND_RED_F32 0x00008860 +#define A6XX_RB_BLEND_RED_F32__MASK 0xffffffff +#define A6XX_RB_BLEND_RED_F32__SHIFT 0 +static inline uint32_t A6XX_RB_BLEND_RED_F32(float val) +{ + return ((fui(val)) << A6XX_RB_BLEND_RED_F32__SHIFT) & A6XX_RB_BLEND_RED_F32__MASK; +} + +#define REG_A6XX_RB_BLEND_GREEN_F32 0x00008861 +#define A6XX_RB_BLEND_GREEN_F32__MASK 0xffffffff +#define A6XX_RB_BLEND_GREEN_F32__SHIFT 0 +static inline uint32_t A6XX_RB_BLEND_GREEN_F32(float val) +{ + return ((fui(val)) << A6XX_RB_BLEND_GREEN_F32__SHIFT) & A6XX_RB_BLEND_GREEN_F32__MASK; +} + +#define REG_A6XX_RB_BLEND_BLUE_F32 0x00008862 +#define A6XX_RB_BLEND_BLUE_F32__MASK 0xffffffff +#define A6XX_RB_BLEND_BLUE_F32__SHIFT 0 +static inline uint32_t A6XX_RB_BLEND_BLUE_F32(float val) +{ + return ((fui(val)) << A6XX_RB_BLEND_BLUE_F32__SHIFT) & A6XX_RB_BLEND_BLUE_F32__MASK; +} + +#define REG_A6XX_RB_BLEND_ALPHA_F32 0x00008863 +#define A6XX_RB_BLEND_ALPHA_F32__MASK 0xffffffff +#define A6XX_RB_BLEND_ALPHA_F32__SHIFT 0 +static inline uint32_t A6XX_RB_BLEND_ALPHA_F32(float val) +{ + return ((fui(val)) << A6XX_RB_BLEND_ALPHA_F32__SHIFT) & A6XX_RB_BLEND_ALPHA_F32__MASK; +} + +#define REG_A6XX_RB_ALPHA_CONTROL 0x00008864 +#define A6XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK 0x000000ff +#define A6XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT 0 +static inline uint32_t A6XX_RB_ALPHA_CONTROL_ALPHA_REF(uint32_t val) +{ + return ((val) << A6XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT) & A6XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK; +} +#define A6XX_RB_ALPHA_CONTROL_ALPHA_TEST 0x00000100 +#define A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK 0x00000e00 +#define A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT 9 +static inline uint32_t A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val) +{ + return ((val) << A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK; +} + +#define REG_A6XX_RB_BLEND_CNTL 0x00008865 +#define A6XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK 0x000000ff +#define A6XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT 0 +static inline uint32_t A6XX_RB_BLEND_CNTL_ENABLE_BLEND(uint32_t val) +{ + return ((val) << A6XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT) & A6XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK; +} +#define A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND 0x00000100 +#define A6XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK 0xffff0000 +#define A6XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT 16 +static inline uint32_t A6XX_RB_BLEND_CNTL_SAMPLE_MASK(uint32_t val) +{ + return ((val) << A6XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT) & A6XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK; +} + +#define REG_A6XX_RB_DEPTH_CNTL 0x00008871 +#define A6XX_RB_DEPTH_CNTL_Z_ENABLE 0x00000001 +#define A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE 0x00000002 +#define A6XX_RB_DEPTH_CNTL_ZFUNC__MASK 0x0000001c +#define A6XX_RB_DEPTH_CNTL_ZFUNC__SHIFT 2 +static inline uint32_t A6XX_RB_DEPTH_CNTL_ZFUNC(enum adreno_compare_func val) +{ + return ((val) << A6XX_RB_DEPTH_CNTL_ZFUNC__SHIFT) & A6XX_RB_DEPTH_CNTL_ZFUNC__MASK; +} +#define A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE 0x00000040 + +#define REG_A6XX_RB_DEPTH_BUFFER_INFO 0x00008872 +#define A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK 0x00000007 +#define A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT 0 +static inline uint32_t A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a6xx_depth_format val) +{ + return ((val) << A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT) & A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK; +} + +#define REG_A6XX_RB_DEPTH_BUFFER_PITCH 0x00008873 +#define A6XX_RB_DEPTH_BUFFER_PITCH__MASK 0xffffffff +#define A6XX_RB_DEPTH_BUFFER_PITCH__SHIFT 0 +static inline uint32_t A6XX_RB_DEPTH_BUFFER_PITCH(uint32_t val) +{ + return ((val >> 6) << A6XX_RB_DEPTH_BUFFER_PITCH__SHIFT) & A6XX_RB_DEPTH_BUFFER_PITCH__MASK; +} + +#define REG_A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH 0x00008874 +#define A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK 0xffffffff +#define A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(uint32_t val) +{ + return ((val >> 6) << A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT) & A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK; +} + +#define REG_A6XX_RB_DEPTH_BUFFER_BASE_LO 0x00008875 + +#define REG_A6XX_RB_DEPTH_BUFFER_BASE_HI 0x00008876 + +#define REG_A6XX_RB_DEPTH_BUFFER_BASE_GMEM 0x00008877 + +#define REG_A6XX_RB_UNKNOWN_8878 0x00008878 + +#define REG_A6XX_RB_UNKNOWN_8879 0x00008879 + +#define REG_A6XX_RB_STENCIL_CONTROL 0x00008880 +#define A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE 0x00000001 +#define A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF 0x00000002 +#define A6XX_RB_STENCIL_CONTROL_STENCIL_READ 0x00000004 +#define A6XX_RB_STENCIL_CONTROL_FUNC__MASK 0x00000700 +#define A6XX_RB_STENCIL_CONTROL_FUNC__SHIFT 8 +static inline uint32_t A6XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val) +{ + return ((val) << A6XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A6XX_RB_STENCIL_CONTROL_FUNC__MASK; +} +#define A6XX_RB_STENCIL_CONTROL_FAIL__MASK 0x00003800 +#define A6XX_RB_STENCIL_CONTROL_FAIL__SHIFT 11 +static inline uint32_t A6XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val) +{ + return ((val) << A6XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A6XX_RB_STENCIL_CONTROL_FAIL__MASK; +} +#define A6XX_RB_STENCIL_CONTROL_ZPASS__MASK 0x0001c000 +#define A6XX_RB_STENCIL_CONTROL_ZPASS__SHIFT 14 +static inline uint32_t A6XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val) +{ + return ((val) << A6XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A6XX_RB_STENCIL_CONTROL_ZPASS__MASK; +} +#define A6XX_RB_STENCIL_CONTROL_ZFAIL__MASK 0x000e0000 +#define A6XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT 17 +static inline uint32_t A6XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val) +{ + return ((val) << A6XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A6XX_RB_STENCIL_CONTROL_ZFAIL__MASK; +} +#define A6XX_RB_STENCIL_CONTROL_FUNC_BF__MASK 0x00700000 +#define A6XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT 20 +static inline uint32_t A6XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val) +{ + return ((val) << A6XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A6XX_RB_STENCIL_CONTROL_FUNC_BF__MASK; +} +#define A6XX_RB_STENCIL_CONTROL_FAIL_BF__MASK 0x03800000 +#define A6XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT 23 +static inline uint32_t A6XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val) +{ + return ((val) << A6XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A6XX_RB_STENCIL_CONTROL_FAIL_BF__MASK; +} +#define A6XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK 0x1c000000 +#define A6XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT 26 +static inline uint32_t A6XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val) +{ + return ((val) << A6XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A6XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK; +} +#define A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK 0xe0000000 +#define A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT 29 +static inline uint32_t A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val) +{ + return ((val) << A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK; +} + +#define REG_A6XX_RB_STENCIL_INFO 0x00008881 +#define A6XX_RB_STENCIL_INFO_SEPARATE_STENCIL 0x00000001 + +#define REG_A6XX_RB_STENCIL_BUFFER_PITCH 0x00008882 +#define A6XX_RB_STENCIL_BUFFER_PITCH__MASK 0xffffffff +#define A6XX_RB_STENCIL_BUFFER_PITCH__SHIFT 0 +static inline uint32_t A6XX_RB_STENCIL_BUFFER_PITCH(uint32_t val) +{ + return ((val >> 6) << A6XX_RB_STENCIL_BUFFER_PITCH__SHIFT) & A6XX_RB_STENCIL_BUFFER_PITCH__MASK; +} + +#define REG_A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH 0x00008883 +#define A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH__MASK 0xffffffff +#define A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH(uint32_t val) +{ + return ((val >> 6) << A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH__SHIFT) & A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH__MASK; +} + +#define REG_A6XX_RB_STENCIL_BUFFER_BASE_LO 0x00008884 + +#define REG_A6XX_RB_STENCIL_BUFFER_BASE_HI 0x00008885 + +#define REG_A6XX_RB_STENCIL_BUFFER_BASE_GMEM 0x00008886 + +#define REG_A6XX_RB_STENCILREF 0x00008887 +#define A6XX_RB_STENCILREF_REF__MASK 0x000000ff +#define A6XX_RB_STENCILREF_REF__SHIFT 0 +static inline uint32_t A6XX_RB_STENCILREF_REF(uint32_t val) +{ + return ((val) << A6XX_RB_STENCILREF_REF__SHIFT) & A6XX_RB_STENCILREF_REF__MASK; +} + +#define REG_A6XX_RB_STENCILMASK 0x00008888 +#define A6XX_RB_STENCILMASK_MASK__MASK 0x000000ff +#define A6XX_RB_STENCILMASK_MASK__SHIFT 0 +static inline uint32_t A6XX_RB_STENCILMASK_MASK(uint32_t val) +{ + return ((val) << A6XX_RB_STENCILMASK_MASK__SHIFT) & A6XX_RB_STENCILMASK_MASK__MASK; +} + +#define REG_A6XX_RB_STENCILWRMASK 0x00008889 +#define A6XX_RB_STENCILWRMASK_WRMASK__MASK 0x000000ff +#define A6XX_RB_STENCILWRMASK_WRMASK__SHIFT 0 +static inline uint32_t A6XX_RB_STENCILWRMASK_WRMASK(uint32_t val) +{ + return ((val) << A6XX_RB_STENCILWRMASK_WRMASK__SHIFT) & A6XX_RB_STENCILWRMASK_WRMASK__MASK; +} + +#define REG_A6XX_RB_WINDOW_OFFSET 0x00008890 +#define A6XX_RB_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_RB_WINDOW_OFFSET_X__MASK 0x00007fff +#define A6XX_RB_WINDOW_OFFSET_X__SHIFT 0 +static inline uint32_t A6XX_RB_WINDOW_OFFSET_X(uint32_t val) +{ + return ((val) << A6XX_RB_WINDOW_OFFSET_X__SHIFT) & A6XX_RB_WINDOW_OFFSET_X__MASK; +} +#define A6XX_RB_WINDOW_OFFSET_Y__MASK 0x7fff0000 +#define A6XX_RB_WINDOW_OFFSET_Y__SHIFT 16 +static inline uint32_t A6XX_RB_WINDOW_OFFSET_Y(uint32_t val) +{ + return ((val) << A6XX_RB_WINDOW_OFFSET_Y__SHIFT) & A6XX_RB_WINDOW_OFFSET_Y__MASK; +} + +#define REG_A6XX_RB_SAMPLE_COUNT_CONTROL 0x00008891 +#define A6XX_RB_SAMPLE_COUNT_CONTROL_COPY 0x00000002 + +#define REG_A6XX_RB_UNKNOWN_88D0 0x000088d0 + +#define REG_A6XX_RB_BLIT_SCISSOR_TL 0x000088d1 +#define A6XX_RB_BLIT_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_RB_BLIT_SCISSOR_TL_X__MASK 0x00007fff +#define A6XX_RB_BLIT_SCISSOR_TL_X__SHIFT 0 +static inline uint32_t A6XX_RB_BLIT_SCISSOR_TL_X(uint32_t val) +{ + return ((val) << A6XX_RB_BLIT_SCISSOR_TL_X__SHIFT) & A6XX_RB_BLIT_SCISSOR_TL_X__MASK; +} +#define A6XX_RB_BLIT_SCISSOR_TL_Y__MASK 0x7fff0000 +#define A6XX_RB_BLIT_SCISSOR_TL_Y__SHIFT 16 +static inline uint32_t A6XX_RB_BLIT_SCISSOR_TL_Y(uint32_t val) +{ + return ((val) << A6XX_RB_BLIT_SCISSOR_TL_Y__SHIFT) & A6XX_RB_BLIT_SCISSOR_TL_Y__MASK; +} + +#define REG_A6XX_RB_BLIT_SCISSOR_BR 0x000088d2 +#define A6XX_RB_BLIT_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_RB_BLIT_SCISSOR_BR_X__MASK 0x00007fff +#define A6XX_RB_BLIT_SCISSOR_BR_X__SHIFT 0 +static inline uint32_t A6XX_RB_BLIT_SCISSOR_BR_X(uint32_t val) +{ + return ((val) << A6XX_RB_BLIT_SCISSOR_BR_X__SHIFT) & A6XX_RB_BLIT_SCISSOR_BR_X__MASK; +} +#define A6XX_RB_BLIT_SCISSOR_BR_Y__MASK 0x7fff0000 +#define A6XX_RB_BLIT_SCISSOR_BR_Y__SHIFT 16 +static inline uint32_t A6XX_RB_BLIT_SCISSOR_BR_Y(uint32_t val) +{ + return ((val) << A6XX_RB_BLIT_SCISSOR_BR_Y__SHIFT) & A6XX_RB_BLIT_SCISSOR_BR_Y__MASK; +} + +#define REG_A6XX_RB_BLIT_BASE_GMEM 0x000088d6 + +#define REG_A6XX_RB_BLIT_DST_INFO 0x000088d7 +#define A6XX_RB_BLIT_DST_INFO_TILE_MODE__MASK 0x00000003 +#define A6XX_RB_BLIT_DST_INFO_TILE_MODE__SHIFT 0 +static inline uint32_t A6XX_RB_BLIT_DST_INFO_TILE_MODE(enum a6xx_tile_mode val) +{ + return ((val) << A6XX_RB_BLIT_DST_INFO_TILE_MODE__SHIFT) & A6XX_RB_BLIT_DST_INFO_TILE_MODE__MASK; +} +#define A6XX_RB_BLIT_DST_INFO_FLAGS 0x00000004 +#define A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT__MASK 0x00007f80 +#define A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT__SHIFT 7 +static inline uint32_t A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(enum a6xx_color_fmt val) +{ + return ((val) << A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT__SHIFT) & A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT__MASK; +} +#define A6XX_RB_BLIT_DST_INFO_COLOR_SWAP__MASK 0x00000060 +#define A6XX_RB_BLIT_DST_INFO_COLOR_SWAP__SHIFT 5 +static inline uint32_t A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A6XX_RB_BLIT_DST_INFO_COLOR_SWAP__SHIFT) & A6XX_RB_BLIT_DST_INFO_COLOR_SWAP__MASK; +} + +#define REG_A6XX_RB_BLIT_DST_LO 0x000088d8 + +#define REG_A6XX_RB_BLIT_DST_HI 0x000088d9 + +#define REG_A6XX_RB_BLIT_DST_PITCH 0x000088da +#define A6XX_RB_BLIT_DST_PITCH__MASK 0xffffffff +#define A6XX_RB_BLIT_DST_PITCH__SHIFT 0 +static inline uint32_t A6XX_RB_BLIT_DST_PITCH(uint32_t val) +{ + return ((val >> 6) << A6XX_RB_BLIT_DST_PITCH__SHIFT) & A6XX_RB_BLIT_DST_PITCH__MASK; +} + +#define REG_A6XX_RB_BLIT_DST_ARRAY_PITCH 0x000088db +#define A6XX_RB_BLIT_DST_ARRAY_PITCH__MASK 0xffffffff +#define A6XX_RB_BLIT_DST_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A6XX_RB_BLIT_DST_ARRAY_PITCH(uint32_t val) +{ + return ((val >> 6) << A6XX_RB_BLIT_DST_ARRAY_PITCH__SHIFT) & A6XX_RB_BLIT_DST_ARRAY_PITCH__MASK; +} + +#define REG_A6XX_RB_BLIT_FLAG_DST_LO 0x000088dc + +#define REG_A6XX_RB_BLIT_FLAG_DST_HI 0x000088dd + +#define REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0 0x000088df + +#define REG_A6XX_RB_BLIT_CLEAR_COLOR_DW1 0x000088e0 + +#define REG_A6XX_RB_BLIT_CLEAR_COLOR_DW2 0x000088e1 + +#define REG_A6XX_RB_BLIT_CLEAR_COLOR_DW3 0x000088e2 + +#define REG_A6XX_RB_BLIT_INFO 0x000088e3 +#define A6XX_RB_BLIT_INFO_UNK0 0x00000001 +#define A6XX_RB_BLIT_INFO_FAST_CLEAR 0x00000002 +#define A6XX_RB_BLIT_INFO_INTEGER 0x00000004 +#define A6XX_RB_BLIT_INFO_UNK3 0x00000008 +#define A6XX_RB_BLIT_INFO_MASK__MASK 0x000000f0 +#define A6XX_RB_BLIT_INFO_MASK__SHIFT 4 +static inline uint32_t A6XX_RB_BLIT_INFO_MASK(uint32_t val) +{ + return ((val) << A6XX_RB_BLIT_INFO_MASK__SHIFT) & A6XX_RB_BLIT_INFO_MASK__MASK; +} + +#define REG_A6XX_RB_UNKNOWN_88F0 0x000088f0 + +#define REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE_LO 0x00008900 + +#define REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE_HI 0x00008901 + +#define REG_A6XX_RB_DEPTH_FLAG_BUFFER_PITCH 0x00008902 + +static inline uint32_t REG_A6XX_RB_MRT_FLAG_BUFFER(uint32_t i0) { return 0x00008903 + 0x3*i0; } + +static inline uint32_t REG_A6XX_RB_MRT_FLAG_BUFFER_ADDR_LO(uint32_t i0) { return 0x00008903 + 0x3*i0; } + +static inline uint32_t REG_A6XX_RB_MRT_FLAG_BUFFER_ADDR_HI(uint32_t i0) { return 0x00008904 + 0x3*i0; } + +static inline uint32_t REG_A6XX_RB_MRT_FLAG_BUFFER_PITCH(uint32_t i0) { return 0x00008905 + 0x3*i0; } +#define A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH__MASK 0x000007ff +#define A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH__SHIFT 0 +static inline uint32_t A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH(uint32_t val) +{ + return ((val >> 5) << A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH__SHIFT) & A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH__MASK; +} +#define A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH__MASK 0x003ff800 +#define A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH__SHIFT 11 +static inline uint32_t A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH(uint32_t val) +{ + return ((val >> 5) << A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH__SHIFT) & A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH__MASK; +} + +#define REG_A6XX_RB_SAMPLE_COUNT_ADDR_LO 0x00008927 + +#define REG_A6XX_RB_SAMPLE_COUNT_ADDR_HI 0x00008928 + +#define REG_A6XX_RB_2D_BLIT_CNTL 0x00008c00 +#define A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT__MASK 0x0000ff00 +#define A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT__SHIFT 8 +static inline uint32_t A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(enum a6xx_color_fmt val) +{ + return ((val) << A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT__SHIFT) & A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT__MASK; +} + +#define REG_A6XX_RB_2D_DST_INFO 0x00008c17 +#define A6XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK 0x000000ff +#define A6XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A6XX_RB_2D_DST_INFO_COLOR_FORMAT(enum a6xx_color_fmt val) +{ + return ((val) << A6XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT) & A6XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK; +} +#define A6XX_RB_2D_DST_INFO_TILE_MODE__MASK 0x00000300 +#define A6XX_RB_2D_DST_INFO_TILE_MODE__SHIFT 8 +static inline uint32_t A6XX_RB_2D_DST_INFO_TILE_MODE(enum a6xx_tile_mode val) +{ + return ((val) << A6XX_RB_2D_DST_INFO_TILE_MODE__SHIFT) & A6XX_RB_2D_DST_INFO_TILE_MODE__MASK; +} +#define A6XX_RB_2D_DST_INFO_COLOR_SWAP__MASK 0x00000c00 +#define A6XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT 10 +static inline uint32_t A6XX_RB_2D_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A6XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT) & A6XX_RB_2D_DST_INFO_COLOR_SWAP__MASK; +} +#define A6XX_RB_2D_DST_INFO_FLAGS 0x00001000 + +#define REG_A6XX_RB_2D_DST_LO 0x00008c18 + +#define REG_A6XX_RB_2D_DST_HI 0x00008c19 + +#define REG_A6XX_RB_2D_DST_SIZE 0x00008c1a +#define A6XX_RB_2D_DST_SIZE_PITCH__MASK 0x0000ffff +#define A6XX_RB_2D_DST_SIZE_PITCH__SHIFT 0 +static inline uint32_t A6XX_RB_2D_DST_SIZE_PITCH(uint32_t val) +{ + return ((val >> 6) << A6XX_RB_2D_DST_SIZE_PITCH__SHIFT) & A6XX_RB_2D_DST_SIZE_PITCH__MASK; +} + +#define REG_A6XX_RB_2D_DST_FLAGS_LO 0x00008c20 + +#define REG_A6XX_RB_2D_DST_FLAGS_HI 0x00008c21 + +#define REG_A6XX_RB_2D_SRC_SOLID_C0 0x00008c2c + +#define REG_A6XX_RB_2D_SRC_SOLID_C1 0x00008c2d + +#define REG_A6XX_RB_2D_SRC_SOLID_C2 0x00008c2e + +#define REG_A6XX_RB_2D_SRC_SOLID_C3 0x00008c2f + +#define REG_A6XX_RB_UNKNOWN_8E01 0x00008e01 + +#define REG_A6XX_RB_CCU_CNTL 0x00008e07 + +#define REG_A6XX_VPC_UNKNOWN_9101 0x00009101 + +#define REG_A6XX_VPC_GS_SIV_CNTL 0x00009104 + +#define REG_A6XX_VPC_UNKNOWN_9108 0x00009108 + +static inline uint32_t REG_A6XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x00009200 + 0x1*i0; } + +static inline uint32_t REG_A6XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x00009200 + 0x1*i0; } + +static inline uint32_t REG_A6XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x00009208 + 0x1*i0; } + +static inline uint32_t REG_A6XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x00009208 + 0x1*i0; } + +#define REG_A6XX_VPC_UNKNOWN_9210 0x00009210 + +#define REG_A6XX_VPC_UNKNOWN_9211 0x00009211 + +static inline uint32_t REG_A6XX_VPC_VAR(uint32_t i0) { return 0x00009212 + 0x1*i0; } + +static inline uint32_t REG_A6XX_VPC_VAR_DISABLE(uint32_t i0) { return 0x00009212 + 0x1*i0; } + +#define REG_A6XX_VPC_SO_CNTL 0x00009216 +#define A6XX_VPC_SO_CNTL_ENABLE 0x00010000 + +#define REG_A6XX_VPC_SO_PROG 0x00009217 +#define A6XX_VPC_SO_PROG_A_BUF__MASK 0x00000003 +#define A6XX_VPC_SO_PROG_A_BUF__SHIFT 0 +static inline uint32_t A6XX_VPC_SO_PROG_A_BUF(uint32_t val) +{ + return ((val) << A6XX_VPC_SO_PROG_A_BUF__SHIFT) & A6XX_VPC_SO_PROG_A_BUF__MASK; +} +#define A6XX_VPC_SO_PROG_A_OFF__MASK 0x000007fc +#define A6XX_VPC_SO_PROG_A_OFF__SHIFT 2 +static inline uint32_t A6XX_VPC_SO_PROG_A_OFF(uint32_t val) +{ + return ((val >> 2) << A6XX_VPC_SO_PROG_A_OFF__SHIFT) & A6XX_VPC_SO_PROG_A_OFF__MASK; +} +#define A6XX_VPC_SO_PROG_A_EN 0x00000800 +#define A6XX_VPC_SO_PROG_B_BUF__MASK 0x00003000 +#define A6XX_VPC_SO_PROG_B_BUF__SHIFT 12 +static inline uint32_t A6XX_VPC_SO_PROG_B_BUF(uint32_t val) +{ + return ((val) << A6XX_VPC_SO_PROG_B_BUF__SHIFT) & A6XX_VPC_SO_PROG_B_BUF__MASK; +} +#define A6XX_VPC_SO_PROG_B_OFF__MASK 0x007fc000 +#define A6XX_VPC_SO_PROG_B_OFF__SHIFT 14 +static inline uint32_t A6XX_VPC_SO_PROG_B_OFF(uint32_t val) +{ + return ((val >> 2) << A6XX_VPC_SO_PROG_B_OFF__SHIFT) & A6XX_VPC_SO_PROG_B_OFF__MASK; +} +#define A6XX_VPC_SO_PROG_B_EN 0x00800000 + +static inline uint32_t REG_A6XX_VPC_SO(uint32_t i0) { return 0x0000921a + 0x7*i0; } + +static inline uint32_t REG_A6XX_VPC_SO_BUFFER_BASE_LO(uint32_t i0) { return 0x0000921a + 0x7*i0; } + +static inline uint32_t REG_A6XX_VPC_SO_BUFFER_BASE_HI(uint32_t i0) { return 0x0000921b + 0x7*i0; } + +static inline uint32_t REG_A6XX_VPC_SO_BUFFER_SIZE(uint32_t i0) { return 0x0000921c + 0x7*i0; } + +static inline uint32_t REG_A6XX_VPC_SO_NCOMP(uint32_t i0) { return 0x0000921d + 0x7*i0; } + +static inline uint32_t REG_A6XX_VPC_SO_BUFFER_OFFSET(uint32_t i0) { return 0x0000921e + 0x7*i0; } + +static inline uint32_t REG_A6XX_VPC_SO_FLUSH_BASE_LO(uint32_t i0) { return 0x0000921f + 0x7*i0; } + +static inline uint32_t REG_A6XX_VPC_SO_FLUSH_BASE_HI(uint32_t i0) { return 0x00009220 + 0x7*i0; } + +#define REG_A6XX_VPC_UNKNOWN_9236 0x00009236 + +#define REG_A6XX_VPC_UNKNOWN_9300 0x00009300 + +#define REG_A6XX_VPC_PACK 0x00009301 +#define A6XX_VPC_PACK_STRIDE_IN_VPC__MASK 0x000000ff +#define A6XX_VPC_PACK_STRIDE_IN_VPC__SHIFT 0 +static inline uint32_t A6XX_VPC_PACK_STRIDE_IN_VPC(uint32_t val) +{ + return ((val) << A6XX_VPC_PACK_STRIDE_IN_VPC__SHIFT) & A6XX_VPC_PACK_STRIDE_IN_VPC__MASK; +} +#define A6XX_VPC_PACK_NUMNONPOSVAR__MASK 0x0000ff00 +#define A6XX_VPC_PACK_NUMNONPOSVAR__SHIFT 8 +static inline uint32_t A6XX_VPC_PACK_NUMNONPOSVAR(uint32_t val) +{ + return ((val) << A6XX_VPC_PACK_NUMNONPOSVAR__SHIFT) & A6XX_VPC_PACK_NUMNONPOSVAR__MASK; +} +#define A6XX_VPC_PACK_PSIZELOC__MASK 0x00ff0000 +#define A6XX_VPC_PACK_PSIZELOC__SHIFT 16 +static inline uint32_t A6XX_VPC_PACK_PSIZELOC(uint32_t val) +{ + return ((val) << A6XX_VPC_PACK_PSIZELOC__SHIFT) & A6XX_VPC_PACK_PSIZELOC__MASK; +} + +#define REG_A6XX_VPC_CNTL_0 0x00009304 +#define A6XX_VPC_CNTL_0_NUMNONPOSVAR__MASK 0x000000ff +#define A6XX_VPC_CNTL_0_NUMNONPOSVAR__SHIFT 0 +static inline uint32_t A6XX_VPC_CNTL_0_NUMNONPOSVAR(uint32_t val) +{ + return ((val) << A6XX_VPC_CNTL_0_NUMNONPOSVAR__SHIFT) & A6XX_VPC_CNTL_0_NUMNONPOSVAR__MASK; +} +#define A6XX_VPC_CNTL_0_VARYING 0x00010000 + +#define REG_A6XX_VPC_SO_BUF_CNTL 0x00009305 +#define A6XX_VPC_SO_BUF_CNTL_BUF0 0x00000001 +#define A6XX_VPC_SO_BUF_CNTL_BUF1 0x00000008 +#define A6XX_VPC_SO_BUF_CNTL_BUF2 0x00000040 +#define A6XX_VPC_SO_BUF_CNTL_BUF3 0x00000200 +#define A6XX_VPC_SO_BUF_CNTL_ENABLE 0x00008000 + +#define REG_A6XX_VPC_UNKNOWN_9600 0x00009600 + +#define REG_A6XX_VPC_UNKNOWN_9602 0x00009602 + +#define REG_A6XX_PC_UNKNOWN_9801 0x00009801 + +#define REG_A6XX_PC_RESTART_INDEX 0x00009803 + +#define REG_A6XX_PC_MODE_CNTL 0x00009804 + +#define REG_A6XX_PC_UNKNOWN_9805 0x00009805 + +#define REG_A6XX_PC_UNKNOWN_9981 0x00009981 + +#define REG_A6XX_PC_PRIMITIVE_CNTL_0 0x00009b00 +#define A6XX_PC_PRIMITIVE_CNTL_0_PRIMITIVE_RESTART 0x00000001 +#define A6XX_PC_PRIMITIVE_CNTL_0_PROVOKING_VTX_LAST 0x00000002 + +#define REG_A6XX_PC_PRIMITIVE_CNTL_1 0x00009b01 +#define A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC__MASK 0x0000007f +#define A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC__SHIFT 0 +static inline uint32_t A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC(uint32_t val) +{ + return ((val) << A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC__SHIFT) & A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC__MASK; +} + +#define REG_A6XX_PC_UNKNOWN_9B06 0x00009b06 + +#define REG_A6XX_PC_UNKNOWN_9B07 0x00009b07 + +#define REG_A6XX_PC_TESSFACTOR_ADDR_LO 0x00009e08 + +#define REG_A6XX_PC_TESSFACTOR_ADDR_HI 0x00009e09 + +#define REG_A6XX_PC_UNKNOWN_9E72 0x00009e72 + +#define REG_A6XX_VFD_CONTROL_0 0x0000a000 +#define A6XX_VFD_CONTROL_0_VTXCNT__MASK 0x0000003f +#define A6XX_VFD_CONTROL_0_VTXCNT__SHIFT 0 +static inline uint32_t A6XX_VFD_CONTROL_0_VTXCNT(uint32_t val) +{ + return ((val) << A6XX_VFD_CONTROL_0_VTXCNT__SHIFT) & A6XX_VFD_CONTROL_0_VTXCNT__MASK; +} + +#define REG_A6XX_VFD_CONTROL_1 0x0000a001 +#define A6XX_VFD_CONTROL_1_REGID4VTX__MASK 0x000000ff +#define A6XX_VFD_CONTROL_1_REGID4VTX__SHIFT 0 +static inline uint32_t A6XX_VFD_CONTROL_1_REGID4VTX(uint32_t val) +{ + return ((val) << A6XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A6XX_VFD_CONTROL_1_REGID4VTX__MASK; +} +#define A6XX_VFD_CONTROL_1_REGID4INST__MASK 0x0000ff00 +#define A6XX_VFD_CONTROL_1_REGID4INST__SHIFT 8 +static inline uint32_t A6XX_VFD_CONTROL_1_REGID4INST(uint32_t val) +{ + return ((val) << A6XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A6XX_VFD_CONTROL_1_REGID4INST__MASK; +} +#define A6XX_VFD_CONTROL_1_REGID4PRIMID__MASK 0x00ff0000 +#define A6XX_VFD_CONTROL_1_REGID4PRIMID__SHIFT 16 +static inline uint32_t A6XX_VFD_CONTROL_1_REGID4PRIMID(uint32_t val) +{ + return ((val) << A6XX_VFD_CONTROL_1_REGID4PRIMID__SHIFT) & A6XX_VFD_CONTROL_1_REGID4PRIMID__MASK; +} + +#define REG_A6XX_VFD_CONTROL_2 0x0000a002 +#define A6XX_VFD_CONTROL_2_REGID_PATCHID__MASK 0x000000ff +#define A6XX_VFD_CONTROL_2_REGID_PATCHID__SHIFT 0 +static inline uint32_t A6XX_VFD_CONTROL_2_REGID_PATCHID(uint32_t val) +{ + return ((val) << A6XX_VFD_CONTROL_2_REGID_PATCHID__SHIFT) & A6XX_VFD_CONTROL_2_REGID_PATCHID__MASK; +} + +#define REG_A6XX_VFD_CONTROL_3 0x0000a003 +#define A6XX_VFD_CONTROL_3_REGID_PATCHID__MASK 0x0000ff00 +#define A6XX_VFD_CONTROL_3_REGID_PATCHID__SHIFT 8 +static inline uint32_t A6XX_VFD_CONTROL_3_REGID_PATCHID(uint32_t val) +{ + return ((val) << A6XX_VFD_CONTROL_3_REGID_PATCHID__SHIFT) & A6XX_VFD_CONTROL_3_REGID_PATCHID__MASK; +} +#define A6XX_VFD_CONTROL_3_REGID_TESSX__MASK 0x00ff0000 +#define A6XX_VFD_CONTROL_3_REGID_TESSX__SHIFT 16 +static inline uint32_t A6XX_VFD_CONTROL_3_REGID_TESSX(uint32_t val) +{ + return ((val) << A6XX_VFD_CONTROL_3_REGID_TESSX__SHIFT) & A6XX_VFD_CONTROL_3_REGID_TESSX__MASK; +} +#define A6XX_VFD_CONTROL_3_REGID_TESSY__MASK 0xff000000 +#define A6XX_VFD_CONTROL_3_REGID_TESSY__SHIFT 24 +static inline uint32_t A6XX_VFD_CONTROL_3_REGID_TESSY(uint32_t val) +{ + return ((val) << A6XX_VFD_CONTROL_3_REGID_TESSY__SHIFT) & A6XX_VFD_CONTROL_3_REGID_TESSY__MASK; +} + +#define REG_A6XX_VFD_CONTROL_4 0x0000a004 + +#define REG_A6XX_VFD_CONTROL_5 0x0000a005 + +#define REG_A6XX_VFD_CONTROL_6 0x0000a006 + +#define REG_A6XX_VFD_MODE_CNTL 0x0000a007 +#define A6XX_VFD_MODE_CNTL_BINNING_PASS 0x00000001 + +#define REG_A6XX_VFD_UNKNOWN_A008 0x0000a008 + +#define REG_A6XX_VFD_INDEX_OFFSET 0x0000a00e + +#define REG_A6XX_VFD_INSTANCE_START_OFFSET 0x0000a00f + +static inline uint32_t REG_A6XX_VFD_FETCH(uint32_t i0) { return 0x0000a010 + 0x4*i0; } + +static inline uint32_t REG_A6XX_VFD_FETCH_BASE_LO(uint32_t i0) { return 0x0000a010 + 0x4*i0; } + +static inline uint32_t REG_A6XX_VFD_FETCH_BASE_HI(uint32_t i0) { return 0x0000a011 + 0x4*i0; } + +static inline uint32_t REG_A6XX_VFD_FETCH_SIZE(uint32_t i0) { return 0x0000a012 + 0x4*i0; } + +static inline uint32_t REG_A6XX_VFD_FETCH_STRIDE(uint32_t i0) { return 0x0000a013 + 0x4*i0; } + +static inline uint32_t REG_A6XX_VFD_DECODE(uint32_t i0) { return 0x0000a090 + 0x2*i0; } + +static inline uint32_t REG_A6XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x0000a090 + 0x2*i0; } +#define A6XX_VFD_DECODE_INSTR_IDX__MASK 0x0000001f +#define A6XX_VFD_DECODE_INSTR_IDX__SHIFT 0 +static inline uint32_t A6XX_VFD_DECODE_INSTR_IDX(uint32_t val) +{ + return ((val) << A6XX_VFD_DECODE_INSTR_IDX__SHIFT) & A6XX_VFD_DECODE_INSTR_IDX__MASK; +} +#define A6XX_VFD_DECODE_INSTR_INSTANCED 0x00020000 +#define A6XX_VFD_DECODE_INSTR_FORMAT__MASK 0x0ff00000 +#define A6XX_VFD_DECODE_INSTR_FORMAT__SHIFT 20 +static inline uint32_t A6XX_VFD_DECODE_INSTR_FORMAT(enum a6xx_vtx_fmt val) +{ + return ((val) << A6XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A6XX_VFD_DECODE_INSTR_FORMAT__MASK; +} +#define A6XX_VFD_DECODE_INSTR_SWAP__MASK 0x30000000 +#define A6XX_VFD_DECODE_INSTR_SWAP__SHIFT 28 +static inline uint32_t A6XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A6XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A6XX_VFD_DECODE_INSTR_SWAP__MASK; +} +#define A6XX_VFD_DECODE_INSTR_UNK30 0x40000000 +#define A6XX_VFD_DECODE_INSTR_FLOAT 0x80000000 + +static inline uint32_t REG_A6XX_VFD_DECODE_STEP_RATE(uint32_t i0) { return 0x0000a091 + 0x2*i0; } + +static inline uint32_t REG_A6XX_VFD_DEST_CNTL(uint32_t i0) { return 0x0000a0d0 + 0x1*i0; } + +static inline uint32_t REG_A6XX_VFD_DEST_CNTL_INSTR(uint32_t i0) { return 0x0000a0d0 + 0x1*i0; } +#define A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK__MASK 0x0000000f +#define A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK__SHIFT 0 +static inline uint32_t A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK(uint32_t val) +{ + return ((val) << A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK__SHIFT) & A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK__MASK; +} +#define A6XX_VFD_DEST_CNTL_INSTR_REGID__MASK 0x00000ff0 +#define A6XX_VFD_DEST_CNTL_INSTR_REGID__SHIFT 4 +static inline uint32_t A6XX_VFD_DEST_CNTL_INSTR_REGID(uint32_t val) +{ + return ((val) << A6XX_VFD_DEST_CNTL_INSTR_REGID__SHIFT) & A6XX_VFD_DEST_CNTL_INSTR_REGID__MASK; +} + +#define REG_A6XX_SP_UNKNOWN_A0F8 0x0000a0f8 + +#define REG_A6XX_SP_PRIMITIVE_CNTL 0x0000a802 +#define A6XX_SP_PRIMITIVE_CNTL_VSOUT__MASK 0x0000001f +#define A6XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT 0 +static inline uint32_t A6XX_SP_PRIMITIVE_CNTL_VSOUT(uint32_t val) +{ + return ((val) << A6XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT) & A6XX_SP_PRIMITIVE_CNTL_VSOUT__MASK; +} + +static inline uint32_t REG_A6XX_SP_VS_OUT(uint32_t i0) { return 0x0000a803 + 0x1*i0; } + +static inline uint32_t REG_A6XX_SP_VS_OUT_REG(uint32_t i0) { return 0x0000a803 + 0x1*i0; } +#define A6XX_SP_VS_OUT_REG_A_REGID__MASK 0x000000ff +#define A6XX_SP_VS_OUT_REG_A_REGID__SHIFT 0 +static inline uint32_t A6XX_SP_VS_OUT_REG_A_REGID(uint32_t val) +{ + return ((val) << A6XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A6XX_SP_VS_OUT_REG_A_REGID__MASK; +} +#define A6XX_SP_VS_OUT_REG_A_COMPMASK__MASK 0x00000f00 +#define A6XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT 8 +static inline uint32_t A6XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val) +{ + return ((val) << A6XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A6XX_SP_VS_OUT_REG_A_COMPMASK__MASK; +} +#define A6XX_SP_VS_OUT_REG_B_REGID__MASK 0x00ff0000 +#define A6XX_SP_VS_OUT_REG_B_REGID__SHIFT 16 +static inline uint32_t A6XX_SP_VS_OUT_REG_B_REGID(uint32_t val) +{ + return ((val) << A6XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A6XX_SP_VS_OUT_REG_B_REGID__MASK; +} +#define A6XX_SP_VS_OUT_REG_B_COMPMASK__MASK 0x0f000000 +#define A6XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT 24 +static inline uint32_t A6XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val) +{ + return ((val) << A6XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A6XX_SP_VS_OUT_REG_B_COMPMASK__MASK; +} + +static inline uint32_t REG_A6XX_SP_VS_VPC_DST(uint32_t i0) { return 0x0000a813 + 0x1*i0; } + +static inline uint32_t REG_A6XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x0000a813 + 0x1*i0; } +#define A6XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff +#define A6XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT 0 +static inline uint32_t A6XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val) +{ + return ((val) << A6XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A6XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK; +} +#define A6XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK 0x0000ff00 +#define A6XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT 8 +static inline uint32_t A6XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val) +{ + return ((val) << A6XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A6XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK; +} +#define A6XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK 0x00ff0000 +#define A6XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT 16 +static inline uint32_t A6XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val) +{ + return ((val) << A6XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A6XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK; +} +#define A6XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK 0xff000000 +#define A6XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT 24 +static inline uint32_t A6XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val) +{ + return ((val) << A6XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A6XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK; +} + +#define REG_A6XX_SP_VS_CTRL_REG0 0x0000a800 +#define A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x0000007e +#define A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 1 +static inline uint32_t A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x00001f80 +#define A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 7 +static inline uint32_t A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A6XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK 0x000fc000 +#define A6XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT 14 +static inline uint32_t A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A6XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK; +} +#define A6XX_SP_VS_CTRL_REG0_THREADSIZE__MASK 0x00100000 +#define A6XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT 20 +static inline uint32_t A6XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A6XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_VS_CTRL_REG0_THREADSIZE__MASK; +} +#define A6XX_SP_VS_CTRL_REG0_VARYING 0x00400000 +#define A6XX_SP_VS_CTRL_REG0_PIXLODENABLE 0x04000000 +#define A6XX_SP_VS_CTRL_REG0_MERGEDREGS 0x80000000 + +#define REG_A6XX_SP_VS_OBJ_START_LO 0x0000a81c + +#define REG_A6XX_SP_VS_OBJ_START_HI 0x0000a81d + +#define REG_A6XX_SP_VS_TEX_COUNT 0x0000a822 + +#define REG_A6XX_SP_VS_CONFIG 0x0000a823 +#define A6XX_SP_VS_CONFIG_ENABLED 0x00000100 +#define A6XX_SP_VS_CONFIG_NTEX__MASK 0x0001fe00 +#define A6XX_SP_VS_CONFIG_NTEX__SHIFT 9 +static inline uint32_t A6XX_SP_VS_CONFIG_NTEX(uint32_t val) +{ + return ((val) << A6XX_SP_VS_CONFIG_NTEX__SHIFT) & A6XX_SP_VS_CONFIG_NTEX__MASK; +} +#define A6XX_SP_VS_CONFIG_NSAMP__MASK 0x01fe0000 +#define A6XX_SP_VS_CONFIG_NSAMP__SHIFT 17 +static inline uint32_t A6XX_SP_VS_CONFIG_NSAMP(uint32_t val) +{ + return ((val) << A6XX_SP_VS_CONFIG_NSAMP__SHIFT) & A6XX_SP_VS_CONFIG_NSAMP__MASK; +} + +#define REG_A6XX_SP_VS_INSTRLEN 0x0000a824 + +#define REG_A6XX_SP_HS_CTRL_REG0 0x0000a830 +#define A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x0000007e +#define A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 1 +static inline uint32_t A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x00001f80 +#define A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 7 +static inline uint32_t A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A6XX_SP_HS_CTRL_REG0_BRANCHSTACK__MASK 0x000fc000 +#define A6XX_SP_HS_CTRL_REG0_BRANCHSTACK__SHIFT 14 +static inline uint32_t A6XX_SP_HS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A6XX_SP_HS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_HS_CTRL_REG0_BRANCHSTACK__MASK; +} +#define A6XX_SP_HS_CTRL_REG0_THREADSIZE__MASK 0x00100000 +#define A6XX_SP_HS_CTRL_REG0_THREADSIZE__SHIFT 20 +static inline uint32_t A6XX_SP_HS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A6XX_SP_HS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_HS_CTRL_REG0_THREADSIZE__MASK; +} +#define A6XX_SP_HS_CTRL_REG0_VARYING 0x00400000 +#define A6XX_SP_HS_CTRL_REG0_PIXLODENABLE 0x04000000 +#define A6XX_SP_HS_CTRL_REG0_MERGEDREGS 0x80000000 + +#define REG_A6XX_SP_HS_UNKNOWN_A831 0x0000a831 + +#define REG_A6XX_SP_HS_OBJ_START_LO 0x0000a834 + +#define REG_A6XX_SP_HS_OBJ_START_HI 0x0000a835 + +#define REG_A6XX_SP_HS_TEX_COUNT 0x0000a83a + +#define REG_A6XX_SP_HS_CONFIG 0x0000a83b +#define A6XX_SP_HS_CONFIG_ENABLED 0x00000100 +#define A6XX_SP_HS_CONFIG_NTEX__MASK 0x0001fe00 +#define A6XX_SP_HS_CONFIG_NTEX__SHIFT 9 +static inline uint32_t A6XX_SP_HS_CONFIG_NTEX(uint32_t val) +{ + return ((val) << A6XX_SP_HS_CONFIG_NTEX__SHIFT) & A6XX_SP_HS_CONFIG_NTEX__MASK; +} +#define A6XX_SP_HS_CONFIG_NSAMP__MASK 0x01fe0000 +#define A6XX_SP_HS_CONFIG_NSAMP__SHIFT 17 +static inline uint32_t A6XX_SP_HS_CONFIG_NSAMP(uint32_t val) +{ + return ((val) << A6XX_SP_HS_CONFIG_NSAMP__SHIFT) & A6XX_SP_HS_CONFIG_NSAMP__MASK; +} + +#define REG_A6XX_SP_HS_INSTRLEN 0x0000a83c + +#define REG_A6XX_SP_DS_CTRL_REG0 0x0000a840 +#define A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x0000007e +#define A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 1 +static inline uint32_t A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x00001f80 +#define A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 7 +static inline uint32_t A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A6XX_SP_DS_CTRL_REG0_BRANCHSTACK__MASK 0x000fc000 +#define A6XX_SP_DS_CTRL_REG0_BRANCHSTACK__SHIFT 14 +static inline uint32_t A6XX_SP_DS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A6XX_SP_DS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_DS_CTRL_REG0_BRANCHSTACK__MASK; +} +#define A6XX_SP_DS_CTRL_REG0_THREADSIZE__MASK 0x00100000 +#define A6XX_SP_DS_CTRL_REG0_THREADSIZE__SHIFT 20 +static inline uint32_t A6XX_SP_DS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A6XX_SP_DS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_DS_CTRL_REG0_THREADSIZE__MASK; +} +#define A6XX_SP_DS_CTRL_REG0_VARYING 0x00400000 +#define A6XX_SP_DS_CTRL_REG0_PIXLODENABLE 0x04000000 +#define A6XX_SP_DS_CTRL_REG0_MERGEDREGS 0x80000000 + +#define REG_A6XX_SP_DS_OBJ_START_LO 0x0000a85c + +#define REG_A6XX_SP_DS_OBJ_START_HI 0x0000a85d + +#define REG_A6XX_SP_DS_TEX_COUNT 0x0000a862 + +#define REG_A6XX_SP_DS_CONFIG 0x0000a863 +#define A6XX_SP_DS_CONFIG_ENABLED 0x00000100 +#define A6XX_SP_DS_CONFIG_NTEX__MASK 0x0001fe00 +#define A6XX_SP_DS_CONFIG_NTEX__SHIFT 9 +static inline uint32_t A6XX_SP_DS_CONFIG_NTEX(uint32_t val) +{ + return ((val) << A6XX_SP_DS_CONFIG_NTEX__SHIFT) & A6XX_SP_DS_CONFIG_NTEX__MASK; +} +#define A6XX_SP_DS_CONFIG_NSAMP__MASK 0x01fe0000 +#define A6XX_SP_DS_CONFIG_NSAMP__SHIFT 17 +static inline uint32_t A6XX_SP_DS_CONFIG_NSAMP(uint32_t val) +{ + return ((val) << A6XX_SP_DS_CONFIG_NSAMP__SHIFT) & A6XX_SP_DS_CONFIG_NSAMP__MASK; +} + +#define REG_A6XX_SP_DS_INSTRLEN 0x0000a864 + +#define REG_A6XX_SP_GS_CTRL_REG0 0x0000a870 +#define A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x0000007e +#define A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 1 +static inline uint32_t A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x00001f80 +#define A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 7 +static inline uint32_t A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A6XX_SP_GS_CTRL_REG0_BRANCHSTACK__MASK 0x000fc000 +#define A6XX_SP_GS_CTRL_REG0_BRANCHSTACK__SHIFT 14 +static inline uint32_t A6XX_SP_GS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A6XX_SP_GS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_GS_CTRL_REG0_BRANCHSTACK__MASK; +} +#define A6XX_SP_GS_CTRL_REG0_THREADSIZE__MASK 0x00100000 +#define A6XX_SP_GS_CTRL_REG0_THREADSIZE__SHIFT 20 +static inline uint32_t A6XX_SP_GS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A6XX_SP_GS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_GS_CTRL_REG0_THREADSIZE__MASK; +} +#define A6XX_SP_GS_CTRL_REG0_VARYING 0x00400000 +#define A6XX_SP_GS_CTRL_REG0_PIXLODENABLE 0x04000000 +#define A6XX_SP_GS_CTRL_REG0_MERGEDREGS 0x80000000 + +#define REG_A6XX_SP_GS_UNKNOWN_A871 0x0000a871 + +#define REG_A6XX_SP_GS_OBJ_START_LO 0x0000a88d + +#define REG_A6XX_SP_GS_OBJ_START_HI 0x0000a88e + +#define REG_A6XX_SP_GS_TEX_COUNT 0x0000a893 + +#define REG_A6XX_SP_GS_CONFIG 0x0000a894 +#define A6XX_SP_GS_CONFIG_ENABLED 0x00000100 +#define A6XX_SP_GS_CONFIG_NTEX__MASK 0x0001fe00 +#define A6XX_SP_GS_CONFIG_NTEX__SHIFT 9 +static inline uint32_t A6XX_SP_GS_CONFIG_NTEX(uint32_t val) +{ + return ((val) << A6XX_SP_GS_CONFIG_NTEX__SHIFT) & A6XX_SP_GS_CONFIG_NTEX__MASK; +} +#define A6XX_SP_GS_CONFIG_NSAMP__MASK 0x01fe0000 +#define A6XX_SP_GS_CONFIG_NSAMP__SHIFT 17 +static inline uint32_t A6XX_SP_GS_CONFIG_NSAMP(uint32_t val) +{ + return ((val) << A6XX_SP_GS_CONFIG_NSAMP__SHIFT) & A6XX_SP_GS_CONFIG_NSAMP__MASK; +} + +#define REG_A6XX_SP_GS_INSTRLEN 0x0000a895 + +#define REG_A6XX_SP_VS_TEX_SAMP_LO 0x0000a8a0 + +#define REG_A6XX_SP_VS_TEX_SAMP_HI 0x0000a8a1 + +#define REG_A6XX_SP_HS_TEX_SAMP_LO 0x0000a8a2 + +#define REG_A6XX_SP_HS_TEX_SAMP_HI 0x0000a8a3 + +#define REG_A6XX_SP_DS_TEX_SAMP_LO 0x0000a8a4 + +#define REG_A6XX_SP_DS_TEX_SAMP_HI 0x0000a8a5 + +#define REG_A6XX_SP_GS_TEX_SAMP_LO 0x0000a8a6 + +#define REG_A6XX_SP_GS_TEX_SAMP_HI 0x0000a8a7 + +#define REG_A6XX_SP_VS_TEX_CONST_LO 0x0000a8a8 + +#define REG_A6XX_SP_VS_TEX_CONST_HI 0x0000a8a9 + +#define REG_A6XX_SP_HS_TEX_CONST_LO 0x0000a8aa + +#define REG_A6XX_SP_HS_TEX_CONST_HI 0x0000a8ab + +#define REG_A6XX_SP_DS_TEX_CONST_LO 0x0000a8ac + +#define REG_A6XX_SP_DS_TEX_CONST_HI 0x0000a8ad + +#define REG_A6XX_SP_GS_TEX_CONST_LO 0x0000a8ae + +#define REG_A6XX_SP_GS_TEX_CONST_HI 0x0000a8af + +#define REG_A6XX_SP_FS_CTRL_REG0 0x0000a980 +#define A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x0000007e +#define A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 1 +static inline uint32_t A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x00001f80 +#define A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 7 +static inline uint32_t A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A6XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK 0x000fc000 +#define A6XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT 14 +static inline uint32_t A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A6XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK; +} +#define A6XX_SP_FS_CTRL_REG0_THREADSIZE__MASK 0x00100000 +#define A6XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT 20 +static inline uint32_t A6XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A6XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_FS_CTRL_REG0_THREADSIZE__MASK; +} +#define A6XX_SP_FS_CTRL_REG0_VARYING 0x00400000 +#define A6XX_SP_FS_CTRL_REG0_PIXLODENABLE 0x04000000 +#define A6XX_SP_FS_CTRL_REG0_MERGEDREGS 0x80000000 + +#define REG_A6XX_SP_FS_OBJ_START_LO 0x0000a983 + +#define REG_A6XX_SP_FS_OBJ_START_HI 0x0000a984 + +#define REG_A6XX_SP_BLEND_CNTL 0x0000a989 +#define A6XX_SP_BLEND_CNTL_ENABLED 0x00000001 +#define A6XX_SP_BLEND_CNTL_UNK8 0x00000100 + +#define REG_A6XX_SP_SRGB_CNTL 0x0000a98a +#define A6XX_SP_SRGB_CNTL_SRGB_MRT0 0x00000001 +#define A6XX_SP_SRGB_CNTL_SRGB_MRT1 0x00000002 +#define A6XX_SP_SRGB_CNTL_SRGB_MRT2 0x00000004 +#define A6XX_SP_SRGB_CNTL_SRGB_MRT3 0x00000008 +#define A6XX_SP_SRGB_CNTL_SRGB_MRT4 0x00000010 +#define A6XX_SP_SRGB_CNTL_SRGB_MRT5 0x00000020 +#define A6XX_SP_SRGB_CNTL_SRGB_MRT6 0x00000040 +#define A6XX_SP_SRGB_CNTL_SRGB_MRT7 0x00000080 + +#define REG_A6XX_SP_FS_RENDER_COMPONENTS 0x0000a98b +#define A6XX_SP_FS_RENDER_COMPONENTS_RT0__MASK 0x0000000f +#define A6XX_SP_FS_RENDER_COMPONENTS_RT0__SHIFT 0 +static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT0(uint32_t val) +{ + return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT0__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT0__MASK; +} +#define A6XX_SP_FS_RENDER_COMPONENTS_RT1__MASK 0x000000f0 +#define A6XX_SP_FS_RENDER_COMPONENTS_RT1__SHIFT 4 +static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT1(uint32_t val) +{ + return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT1__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT1__MASK; +} +#define A6XX_SP_FS_RENDER_COMPONENTS_RT2__MASK 0x00000f00 +#define A6XX_SP_FS_RENDER_COMPONENTS_RT2__SHIFT 8 +static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT2(uint32_t val) +{ + return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT2__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT2__MASK; +} +#define A6XX_SP_FS_RENDER_COMPONENTS_RT3__MASK 0x0000f000 +#define A6XX_SP_FS_RENDER_COMPONENTS_RT3__SHIFT 12 +static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT3(uint32_t val) +{ + return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT3__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT3__MASK; +} +#define A6XX_SP_FS_RENDER_COMPONENTS_RT4__MASK 0x000f0000 +#define A6XX_SP_FS_RENDER_COMPONENTS_RT4__SHIFT 16 +static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT4(uint32_t val) +{ + return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT4__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT4__MASK; +} +#define A6XX_SP_FS_RENDER_COMPONENTS_RT5__MASK 0x00f00000 +#define A6XX_SP_FS_RENDER_COMPONENTS_RT5__SHIFT 20 +static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT5(uint32_t val) +{ + return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT5__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT5__MASK; +} +#define A6XX_SP_FS_RENDER_COMPONENTS_RT6__MASK 0x0f000000 +#define A6XX_SP_FS_RENDER_COMPONENTS_RT6__SHIFT 24 +static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT6(uint32_t val) +{ + return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT6__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT6__MASK; +} +#define A6XX_SP_FS_RENDER_COMPONENTS_RT7__MASK 0xf0000000 +#define A6XX_SP_FS_RENDER_COMPONENTS_RT7__SHIFT 28 +static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT7(uint32_t val) +{ + return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT7__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT7__MASK; +} + +#define REG_A6XX_SP_FS_OUTPUT_CNTL0 0x0000a98c +#define A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID__MASK 0x0000ff00 +#define A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID__SHIFT 8 +static inline uint32_t A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(uint32_t val) +{ + return ((val) << A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID__SHIFT) & A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID__MASK; +} + +#define REG_A6XX_SP_FS_OUTPUT_CNTL1 0x0000a98d +#define A6XX_SP_FS_OUTPUT_CNTL1_MRT__MASK 0x0000000f +#define A6XX_SP_FS_OUTPUT_CNTL1_MRT__SHIFT 0 +static inline uint32_t A6XX_SP_FS_OUTPUT_CNTL1_MRT(uint32_t val) +{ + return ((val) << A6XX_SP_FS_OUTPUT_CNTL1_MRT__SHIFT) & A6XX_SP_FS_OUTPUT_CNTL1_MRT__MASK; +} + +static inline uint32_t REG_A6XX_SP_FS_MRT(uint32_t i0) { return 0x0000a996 + 0x1*i0; } + +static inline uint32_t REG_A6XX_SP_FS_MRT_REG(uint32_t i0) { return 0x0000a996 + 0x1*i0; } +#define A6XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK 0x000000ff +#define A6XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A6XX_SP_FS_MRT_REG_COLOR_FORMAT(enum a6xx_color_fmt val) +{ + return ((val) << A6XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT) & A6XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK; +} +#define A6XX_SP_FS_MRT_REG_COLOR_SINT 0x00000100 +#define A6XX_SP_FS_MRT_REG_COLOR_UINT 0x00000200 +#define A6XX_SP_FS_MRT_REG_COLOR_SRGB 0x00000400 + +#define REG_A6XX_SP_FS_TEX_COUNT 0x0000a9a7 + +#define REG_A6XX_SP_UNKNOWN_A9A8 0x0000a9a8 + +#define REG_A6XX_SP_FS_TEX_SAMP_LO 0x0000a9e0 + +#define REG_A6XX_SP_FS_TEX_SAMP_HI 0x0000a9e1 + +#define REG_A6XX_SP_CS_TEX_SAMP_LO 0x0000a9e2 + +#define REG_A6XX_SP_CS_TEX_SAMP_HI 0x0000a9e3 + +#define REG_A6XX_SP_FS_TEX_CONST_LO 0x0000a9e4 + +#define REG_A6XX_SP_FS_TEX_CONST_HI 0x0000a9e5 + +#define REG_A6XX_SP_CS_TEX_CONST_LO 0x0000a9e6 + +#define REG_A6XX_SP_CS_TEX_CONST_HI 0x0000a9e7 + +static inline uint32_t REG_A6XX_SP_FS_OUTPUT(uint32_t i0) { return 0x0000a98e + 0x1*i0; } + +static inline uint32_t REG_A6XX_SP_FS_OUTPUT_REG(uint32_t i0) { return 0x0000a98e + 0x1*i0; } +#define A6XX_SP_FS_OUTPUT_REG_REGID__MASK 0x000000ff +#define A6XX_SP_FS_OUTPUT_REG_REGID__SHIFT 0 +static inline uint32_t A6XX_SP_FS_OUTPUT_REG_REGID(uint32_t val) +{ + return ((val) << A6XX_SP_FS_OUTPUT_REG_REGID__SHIFT) & A6XX_SP_FS_OUTPUT_REG_REGID__MASK; +} +#define A6XX_SP_FS_OUTPUT_REG_HALF_PRECISION 0x00000100 + +#define REG_A6XX_SP_CS_CTRL_REG0 0x0000a9b0 +#define A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x0000007e +#define A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 1 +static inline uint32_t A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x00001f80 +#define A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 7 +static inline uint32_t A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A6XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK 0x000fc000 +#define A6XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT 14 +static inline uint32_t A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A6XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK; +} +#define A6XX_SP_CS_CTRL_REG0_THREADSIZE__MASK 0x00100000 +#define A6XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT 20 +static inline uint32_t A6XX_SP_CS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A6XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_CS_CTRL_REG0_THREADSIZE__MASK; +} +#define A6XX_SP_CS_CTRL_REG0_VARYING 0x00400000 +#define A6XX_SP_CS_CTRL_REG0_PIXLODENABLE 0x04000000 +#define A6XX_SP_CS_CTRL_REG0_MERGEDREGS 0x80000000 + +#define REG_A6XX_SP_CS_OBJ_START_LO 0x0000a9b4 + +#define REG_A6XX_SP_CS_OBJ_START_HI 0x0000a9b5 + +#define REG_A6XX_SP_CS_INSTRLEN 0x0000a9bc + +#define REG_A6XX_SP_UNKNOWN_AB00 0x0000ab00 + +#define REG_A6XX_SP_FS_CONFIG 0x0000ab04 +#define A6XX_SP_FS_CONFIG_ENABLED 0x00000100 +#define A6XX_SP_FS_CONFIG_NTEX__MASK 0x0001fe00 +#define A6XX_SP_FS_CONFIG_NTEX__SHIFT 9 +static inline uint32_t A6XX_SP_FS_CONFIG_NTEX(uint32_t val) +{ + return ((val) << A6XX_SP_FS_CONFIG_NTEX__SHIFT) & A6XX_SP_FS_CONFIG_NTEX__MASK; +} +#define A6XX_SP_FS_CONFIG_NSAMP__MASK 0x01fe0000 +#define A6XX_SP_FS_CONFIG_NSAMP__SHIFT 17 +static inline uint32_t A6XX_SP_FS_CONFIG_NSAMP(uint32_t val) +{ + return ((val) << A6XX_SP_FS_CONFIG_NSAMP__SHIFT) & A6XX_SP_FS_CONFIG_NSAMP__MASK; +} + +#define REG_A6XX_SP_FS_INSTRLEN 0x0000ab05 + +#define REG_A6XX_SP_UNKNOWN_AE00 0x0000ae00 + +#define REG_A6XX_SP_UNKNOWN_AE04 0x0000ae04 + +#define REG_A6XX_SP_UNKNOWN_AE0F 0x0000ae0f + +#define REG_A6XX_SP_UNKNOWN_B182 0x0000b182 + +#define REG_A6XX_SP_TP_RAS_MSAA_CNTL 0x0000b300 +#define A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES__MASK; +} + +#define REG_A6XX_SP_TP_DEST_MSAA_CNTL 0x0000b301 +#define A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES__MASK; +} +#define A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE 0x00000004 + +#define REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR_LO 0x0000b302 + +#define REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR_HI 0x0000b303 + +#define REG_A6XX_SP_TP_UNKNOWN_B304 0x0000b304 + +#define REG_A6XX_SP_PS_2D_SRC_INFO 0x0000b4c0 +#define A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__MASK 0x000000ff +#define A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(enum a6xx_color_fmt val) +{ + return ((val) << A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__SHIFT) & A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__MASK; +} +#define A6XX_SP_PS_2D_SRC_INFO_TILE_MODE__MASK 0x00000300 +#define A6XX_SP_PS_2D_SRC_INFO_TILE_MODE__SHIFT 8 +static inline uint32_t A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(enum a6xx_tile_mode val) +{ + return ((val) << A6XX_SP_PS_2D_SRC_INFO_TILE_MODE__SHIFT) & A6XX_SP_PS_2D_SRC_INFO_TILE_MODE__MASK; +} +#define A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP__MASK 0x00000c00 +#define A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP__SHIFT 10 +static inline uint32_t A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP__SHIFT) & A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP__MASK; +} +#define A6XX_SP_PS_2D_SRC_INFO_FLAGS 0x00001000 + +#define REG_A6XX_SP_PS_2D_SRC_LO 0x0000b4c2 + +#define REG_A6XX_SP_PS_2D_SRC_HI 0x0000b4c3 + +#define REG_A6XX_SP_PS_2D_SRC_FLAGS_LO 0x0000b4ca + +#define REG_A6XX_SP_PS_2D_SRC_FLAGS_HI 0x0000b4cb + +#define REG_A6XX_SP_UNKNOWN_B600 0x0000b600 + +#define REG_A6XX_SP_UNKNOWN_B605 0x0000b605 + +#define REG_A6XX_HLSQ_VS_CNTL 0x0000b800 +#define A6XX_HLSQ_VS_CNTL_CONSTLEN__MASK 0x000000ff +#define A6XX_HLSQ_VS_CNTL_CONSTLEN__SHIFT 0 +static inline uint32_t A6XX_HLSQ_VS_CNTL_CONSTLEN(uint32_t val) +{ + return ((val >> 2) << A6XX_HLSQ_VS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_VS_CNTL_CONSTLEN__MASK; +} + +#define REG_A6XX_HLSQ_HS_CNTL 0x0000b801 +#define A6XX_HLSQ_HS_CNTL_CONSTLEN__MASK 0x000000ff +#define A6XX_HLSQ_HS_CNTL_CONSTLEN__SHIFT 0 +static inline uint32_t A6XX_HLSQ_HS_CNTL_CONSTLEN(uint32_t val) +{ + return ((val >> 2) << A6XX_HLSQ_HS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_HS_CNTL_CONSTLEN__MASK; +} + +#define REG_A6XX_HLSQ_DS_CNTL 0x0000b802 +#define A6XX_HLSQ_DS_CNTL_CONSTLEN__MASK 0x000000ff +#define A6XX_HLSQ_DS_CNTL_CONSTLEN__SHIFT 0 +static inline uint32_t A6XX_HLSQ_DS_CNTL_CONSTLEN(uint32_t val) +{ + return ((val >> 2) << A6XX_HLSQ_DS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_DS_CNTL_CONSTLEN__MASK; +} + +#define REG_A6XX_HLSQ_GS_CNTL 0x0000b803 +#define A6XX_HLSQ_GS_CNTL_CONSTLEN__MASK 0x000000ff +#define A6XX_HLSQ_GS_CNTL_CONSTLEN__SHIFT 0 +static inline uint32_t A6XX_HLSQ_GS_CNTL_CONSTLEN(uint32_t val) +{ + return ((val >> 2) << A6XX_HLSQ_GS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_GS_CNTL_CONSTLEN__MASK; +} + +#define REG_A6XX_HLSQ_CONTROL_1_REG 0x0000b982 + +#define REG_A6XX_HLSQ_CONTROL_2_REG 0x0000b983 +#define A6XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK 0x000000ff +#define A6XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT 0 +static inline uint32_t A6XX_HLSQ_CONTROL_2_REG_FACEREGID(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT) & A6XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK; +} +#define A6XX_HLSQ_CONTROL_2_REG_SAMPLEID__MASK 0x0000ff00 +#define A6XX_HLSQ_CONTROL_2_REG_SAMPLEID__SHIFT 8 +static inline uint32_t A6XX_HLSQ_CONTROL_2_REG_SAMPLEID(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CONTROL_2_REG_SAMPLEID__SHIFT) & A6XX_HLSQ_CONTROL_2_REG_SAMPLEID__MASK; +} +#define A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__MASK 0x00ff0000 +#define A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__SHIFT 16 +static inline uint32_t A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__SHIFT) & A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__MASK; +} + +#define REG_A6XX_HLSQ_CONTROL_3_REG 0x0000b984 +#define A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK 0x000000ff +#define A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__SHIFT 0 +static inline uint32_t A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__SHIFT) & A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK; +} + +#define REG_A6XX_HLSQ_CONTROL_4_REG 0x0000b985 +#define A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__MASK 0x00ff0000 +#define A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__SHIFT 16 +static inline uint32_t A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__SHIFT) & A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__MASK; +} +#define A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__MASK 0xff000000 +#define A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__SHIFT 24 +static inline uint32_t A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__SHIFT) & A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__MASK; +} + +#define REG_A6XX_HLSQ_CONTROL_5_REG 0x0000b986 + +#define REG_A6XX_HLSQ_CS_NDRANGE_0 0x0000b990 +#define A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK 0x00000003 +#define A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT 0 +static inline uint32_t A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT) & A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK; +} +#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK 0x00000ffc +#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT 2 +static inline uint32_t A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT) & A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK; +} +#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK 0x003ff000 +#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT 12 +static inline uint32_t A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT) & A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK; +} +#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK 0xffc00000 +#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT 22 +static inline uint32_t A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT) & A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK; +} + +#define REG_A6XX_HLSQ_CS_NDRANGE_1 0x0000b991 +#define A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__MASK 0xffffffff +#define A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__SHIFT 0 +static inline uint32_t A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__SHIFT) & A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__MASK; +} + +#define REG_A6XX_HLSQ_CS_NDRANGE_2 0x0000b992 +#define A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__MASK 0xffffffff +#define A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__SHIFT 0 +static inline uint32_t A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__SHIFT) & A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__MASK; +} + +#define REG_A6XX_HLSQ_CS_NDRANGE_3 0x0000b993 +#define A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__MASK 0xffffffff +#define A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__SHIFT 0 +static inline uint32_t A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__SHIFT) & A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__MASK; +} + +#define REG_A6XX_HLSQ_CS_NDRANGE_4 0x0000b994 +#define A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__MASK 0xffffffff +#define A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__SHIFT 0 +static inline uint32_t A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__SHIFT) & A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__MASK; +} + +#define REG_A6XX_HLSQ_CS_NDRANGE_5 0x0000b995 +#define A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__MASK 0xffffffff +#define A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__SHIFT 0 +static inline uint32_t A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__SHIFT) & A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__MASK; +} + +#define REG_A6XX_HLSQ_CS_NDRANGE_6 0x0000b996 +#define A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__MASK 0xffffffff +#define A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__SHIFT 0 +static inline uint32_t A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__SHIFT) & A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__MASK; +} + +#define REG_A6XX_HLSQ_CS_CNTL_0 0x0000b997 +#define A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK 0x000000ff +#define A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT 0 +static inline uint32_t A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT) & A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK; +} +#define A6XX_HLSQ_CS_CNTL_0_UNK0__MASK 0x0000ff00 +#define A6XX_HLSQ_CS_CNTL_0_UNK0__SHIFT 8 +static inline uint32_t A6XX_HLSQ_CS_CNTL_0_UNK0(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_CNTL_0_UNK0__SHIFT) & A6XX_HLSQ_CS_CNTL_0_UNK0__MASK; +} +#define A6XX_HLSQ_CS_CNTL_0_UNK1__MASK 0x00ff0000 +#define A6XX_HLSQ_CS_CNTL_0_UNK1__SHIFT 16 +static inline uint32_t A6XX_HLSQ_CS_CNTL_0_UNK1(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_CNTL_0_UNK1__SHIFT) & A6XX_HLSQ_CS_CNTL_0_UNK1__MASK; +} +#define A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK 0xff000000 +#define A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT 24 +static inline uint32_t A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT) & A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK; +} + +#define REG_A6XX_HLSQ_CS_KERNEL_GROUP_X 0x0000b999 + +#define REG_A6XX_HLSQ_CS_KERNEL_GROUP_Y 0x0000b99a + +#define REG_A6XX_HLSQ_CS_KERNEL_GROUP_Z 0x0000b99b + +#define REG_A6XX_HLSQ_UPDATE_CNTL 0x0000bb08 + +#define REG_A6XX_HLSQ_FS_CNTL 0x0000bb10 +#define A6XX_HLSQ_FS_CNTL_CONSTLEN__MASK 0x000000ff +#define A6XX_HLSQ_FS_CNTL_CONSTLEN__SHIFT 0 +static inline uint32_t A6XX_HLSQ_FS_CNTL_CONSTLEN(uint32_t val) +{ + return ((val >> 2) << A6XX_HLSQ_FS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_FS_CNTL_CONSTLEN__MASK; +} + +#define REG_A6XX_HLSQ_UNKNOWN_BB11 0x0000bb11 + +#define REG_A6XX_HLSQ_UNKNOWN_BE00 0x0000be00 + +#define REG_A6XX_HLSQ_UNKNOWN_BE01 0x0000be01 + +#define REG_A6XX_HLSQ_UNKNOWN_BE04 0x0000be04 + +#define REG_A6XX_TEX_SAMP_0 0x00000000 +#define A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR 0x00000001 +#define A6XX_TEX_SAMP_0_XY_MAG__MASK 0x00000006 +#define A6XX_TEX_SAMP_0_XY_MAG__SHIFT 1 +static inline uint32_t A6XX_TEX_SAMP_0_XY_MAG(enum a6xx_tex_filter val) +{ + return ((val) << A6XX_TEX_SAMP_0_XY_MAG__SHIFT) & A6XX_TEX_SAMP_0_XY_MAG__MASK; +} +#define A6XX_TEX_SAMP_0_XY_MIN__MASK 0x00000018 +#define A6XX_TEX_SAMP_0_XY_MIN__SHIFT 3 +static inline uint32_t A6XX_TEX_SAMP_0_XY_MIN(enum a6xx_tex_filter val) +{ + return ((val) << A6XX_TEX_SAMP_0_XY_MIN__SHIFT) & A6XX_TEX_SAMP_0_XY_MIN__MASK; +} +#define A6XX_TEX_SAMP_0_WRAP_S__MASK 0x000000e0 +#define A6XX_TEX_SAMP_0_WRAP_S__SHIFT 5 +static inline uint32_t A6XX_TEX_SAMP_0_WRAP_S(enum a6xx_tex_clamp val) +{ + return ((val) << A6XX_TEX_SAMP_0_WRAP_S__SHIFT) & A6XX_TEX_SAMP_0_WRAP_S__MASK; +} +#define A6XX_TEX_SAMP_0_WRAP_T__MASK 0x00000700 +#define A6XX_TEX_SAMP_0_WRAP_T__SHIFT 8 +static inline uint32_t A6XX_TEX_SAMP_0_WRAP_T(enum a6xx_tex_clamp val) +{ + return ((val) << A6XX_TEX_SAMP_0_WRAP_T__SHIFT) & A6XX_TEX_SAMP_0_WRAP_T__MASK; +} +#define A6XX_TEX_SAMP_0_WRAP_R__MASK 0x00003800 +#define A6XX_TEX_SAMP_0_WRAP_R__SHIFT 11 +static inline uint32_t A6XX_TEX_SAMP_0_WRAP_R(enum a6xx_tex_clamp val) +{ + return ((val) << A6XX_TEX_SAMP_0_WRAP_R__SHIFT) & A6XX_TEX_SAMP_0_WRAP_R__MASK; +} +#define A6XX_TEX_SAMP_0_ANISO__MASK 0x0001c000 +#define A6XX_TEX_SAMP_0_ANISO__SHIFT 14 +static inline uint32_t A6XX_TEX_SAMP_0_ANISO(enum a6xx_tex_aniso val) +{ + return ((val) << A6XX_TEX_SAMP_0_ANISO__SHIFT) & A6XX_TEX_SAMP_0_ANISO__MASK; +} +#define A6XX_TEX_SAMP_0_LOD_BIAS__MASK 0xfff80000 +#define A6XX_TEX_SAMP_0_LOD_BIAS__SHIFT 19 +static inline uint32_t A6XX_TEX_SAMP_0_LOD_BIAS(float val) +{ + return ((((int32_t)(val * 256.0))) << A6XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A6XX_TEX_SAMP_0_LOD_BIAS__MASK; +} + +#define REG_A6XX_TEX_SAMP_1 0x00000001 +#define A6XX_TEX_SAMP_1_COMPARE_FUNC__MASK 0x0000000e +#define A6XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT 1 +static inline uint32_t A6XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val) +{ + return ((val) << A6XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A6XX_TEX_SAMP_1_COMPARE_FUNC__MASK; +} +#define A6XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF 0x00000010 +#define A6XX_TEX_SAMP_1_UNNORM_COORDS 0x00000020 +#define A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR 0x00000040 +#define A6XX_TEX_SAMP_1_MAX_LOD__MASK 0x000fff00 +#define A6XX_TEX_SAMP_1_MAX_LOD__SHIFT 8 +static inline uint32_t A6XX_TEX_SAMP_1_MAX_LOD(float val) +{ + return ((((uint32_t)(val * 256.0))) << A6XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A6XX_TEX_SAMP_1_MAX_LOD__MASK; +} +#define A6XX_TEX_SAMP_1_MIN_LOD__MASK 0xfff00000 +#define A6XX_TEX_SAMP_1_MIN_LOD__SHIFT 20 +static inline uint32_t A6XX_TEX_SAMP_1_MIN_LOD(float val) +{ + return ((((uint32_t)(val * 256.0))) << A6XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A6XX_TEX_SAMP_1_MIN_LOD__MASK; +} + +#define REG_A6XX_TEX_SAMP_2 0x00000002 +#define A6XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK 0xfffffff0 +#define A6XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT 4 +static inline uint32_t A6XX_TEX_SAMP_2_BCOLOR_OFFSET(uint32_t val) +{ + return ((val) << A6XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT) & A6XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK; +} + +#define REG_A6XX_TEX_SAMP_3 0x00000003 + +#define REG_A6XX_TEX_CONST_0 0x00000000 +#define A6XX_TEX_CONST_0_TILE_MODE__MASK 0x00000003 +#define A6XX_TEX_CONST_0_TILE_MODE__SHIFT 0 +static inline uint32_t A6XX_TEX_CONST_0_TILE_MODE(enum a6xx_tile_mode val) +{ + return ((val) << A6XX_TEX_CONST_0_TILE_MODE__SHIFT) & A6XX_TEX_CONST_0_TILE_MODE__MASK; +} +#define A6XX_TEX_CONST_0_SRGB 0x00000004 +#define A6XX_TEX_CONST_0_SWIZ_X__MASK 0x00000070 +#define A6XX_TEX_CONST_0_SWIZ_X__SHIFT 4 +static inline uint32_t A6XX_TEX_CONST_0_SWIZ_X(enum a6xx_tex_swiz val) +{ + return ((val) << A6XX_TEX_CONST_0_SWIZ_X__SHIFT) & A6XX_TEX_CONST_0_SWIZ_X__MASK; +} +#define A6XX_TEX_CONST_0_SWIZ_Y__MASK 0x00000380 +#define A6XX_TEX_CONST_0_SWIZ_Y__SHIFT 7 +static inline uint32_t A6XX_TEX_CONST_0_SWIZ_Y(enum a6xx_tex_swiz val) +{ + return ((val) << A6XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A6XX_TEX_CONST_0_SWIZ_Y__MASK; +} +#define A6XX_TEX_CONST_0_SWIZ_Z__MASK 0x00001c00 +#define A6XX_TEX_CONST_0_SWIZ_Z__SHIFT 10 +static inline uint32_t A6XX_TEX_CONST_0_SWIZ_Z(enum a6xx_tex_swiz val) +{ + return ((val) << A6XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A6XX_TEX_CONST_0_SWIZ_Z__MASK; +} +#define A6XX_TEX_CONST_0_SWIZ_W__MASK 0x0000e000 +#define A6XX_TEX_CONST_0_SWIZ_W__SHIFT 13 +static inline uint32_t A6XX_TEX_CONST_0_SWIZ_W(enum a6xx_tex_swiz val) +{ + return ((val) << A6XX_TEX_CONST_0_SWIZ_W__SHIFT) & A6XX_TEX_CONST_0_SWIZ_W__MASK; +} +#define A6XX_TEX_CONST_0_MIPLVLS__MASK 0x000f0000 +#define A6XX_TEX_CONST_0_MIPLVLS__SHIFT 16 +static inline uint32_t A6XX_TEX_CONST_0_MIPLVLS(uint32_t val) +{ + return ((val) << A6XX_TEX_CONST_0_MIPLVLS__SHIFT) & A6XX_TEX_CONST_0_MIPLVLS__MASK; +} +#define A6XX_TEX_CONST_0_FMT__MASK 0x3fc00000 +#define A6XX_TEX_CONST_0_FMT__SHIFT 22 +static inline uint32_t A6XX_TEX_CONST_0_FMT(enum a6xx_tex_fmt val) +{ + return ((val) << A6XX_TEX_CONST_0_FMT__SHIFT) & A6XX_TEX_CONST_0_FMT__MASK; +} +#define A6XX_TEX_CONST_0_SWAP__MASK 0xc0000000 +#define A6XX_TEX_CONST_0_SWAP__SHIFT 30 +static inline uint32_t A6XX_TEX_CONST_0_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A6XX_TEX_CONST_0_SWAP__SHIFT) & A6XX_TEX_CONST_0_SWAP__MASK; +} + +#define REG_A6XX_TEX_CONST_1 0x00000001 +#define A6XX_TEX_CONST_1_WIDTH__MASK 0x00007fff +#define A6XX_TEX_CONST_1_WIDTH__SHIFT 0 +static inline uint32_t A6XX_TEX_CONST_1_WIDTH(uint32_t val) +{ + return ((val) << A6XX_TEX_CONST_1_WIDTH__SHIFT) & A6XX_TEX_CONST_1_WIDTH__MASK; +} +#define A6XX_TEX_CONST_1_HEIGHT__MASK 0x3fff8000 +#define A6XX_TEX_CONST_1_HEIGHT__SHIFT 15 +static inline uint32_t A6XX_TEX_CONST_1_HEIGHT(uint32_t val) +{ + return ((val) << A6XX_TEX_CONST_1_HEIGHT__SHIFT) & A6XX_TEX_CONST_1_HEIGHT__MASK; +} + +#define REG_A6XX_TEX_CONST_2 0x00000002 +#define A6XX_TEX_CONST_2_FETCHSIZE__MASK 0x0000000f +#define A6XX_TEX_CONST_2_FETCHSIZE__SHIFT 0 +static inline uint32_t A6XX_TEX_CONST_2_FETCHSIZE(enum a6xx_tex_fetchsize val) +{ + return ((val) << A6XX_TEX_CONST_2_FETCHSIZE__SHIFT) & A6XX_TEX_CONST_2_FETCHSIZE__MASK; +} +#define A6XX_TEX_CONST_2_PITCH__MASK 0x1fffff80 +#define A6XX_TEX_CONST_2_PITCH__SHIFT 7 +static inline uint32_t A6XX_TEX_CONST_2_PITCH(uint32_t val) +{ + return ((val) << A6XX_TEX_CONST_2_PITCH__SHIFT) & A6XX_TEX_CONST_2_PITCH__MASK; +} +#define A6XX_TEX_CONST_2_TYPE__MASK 0x60000000 +#define A6XX_TEX_CONST_2_TYPE__SHIFT 29 +static inline uint32_t A6XX_TEX_CONST_2_TYPE(enum a6xx_tex_type val) +{ + return ((val) << A6XX_TEX_CONST_2_TYPE__SHIFT) & A6XX_TEX_CONST_2_TYPE__MASK; +} + +#define REG_A6XX_TEX_CONST_3 0x00000003 +#define A6XX_TEX_CONST_3_ARRAY_PITCH__MASK 0x00003fff +#define A6XX_TEX_CONST_3_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A6XX_TEX_CONST_3_ARRAY_PITCH(uint32_t val) +{ + return ((val >> 12) << A6XX_TEX_CONST_3_ARRAY_PITCH__SHIFT) & A6XX_TEX_CONST_3_ARRAY_PITCH__MASK; +} +#define A6XX_TEX_CONST_3_FLAG 0x10000000 + +#define REG_A6XX_TEX_CONST_4 0x00000004 +#define A6XX_TEX_CONST_4_BASE_LO__MASK 0xffffffe0 +#define A6XX_TEX_CONST_4_BASE_LO__SHIFT 5 +static inline uint32_t A6XX_TEX_CONST_4_BASE_LO(uint32_t val) +{ + return ((val >> 5) << A6XX_TEX_CONST_4_BASE_LO__SHIFT) & A6XX_TEX_CONST_4_BASE_LO__MASK; +} + +#define REG_A6XX_TEX_CONST_5 0x00000005 +#define A6XX_TEX_CONST_5_BASE_HI__MASK 0x0001ffff +#define A6XX_TEX_CONST_5_BASE_HI__SHIFT 0 +static inline uint32_t A6XX_TEX_CONST_5_BASE_HI(uint32_t val) +{ + return ((val) << A6XX_TEX_CONST_5_BASE_HI__SHIFT) & A6XX_TEX_CONST_5_BASE_HI__MASK; +} +#define A6XX_TEX_CONST_5_DEPTH__MASK 0x3ffe0000 +#define A6XX_TEX_CONST_5_DEPTH__SHIFT 17 +static inline uint32_t A6XX_TEX_CONST_5_DEPTH(uint32_t val) +{ + return ((val) << A6XX_TEX_CONST_5_DEPTH__SHIFT) & A6XX_TEX_CONST_5_DEPTH__MASK; +} + +#define REG_A6XX_TEX_CONST_6 0x00000006 + +#define REG_A6XX_TEX_CONST_7 0x00000007 +#define A6XX_TEX_CONST_7_FLAG_LO__MASK 0xffffffe0 +#define A6XX_TEX_CONST_7_FLAG_LO__SHIFT 5 +static inline uint32_t A6XX_TEX_CONST_7_FLAG_LO(uint32_t val) +{ + return ((val >> 5) << A6XX_TEX_CONST_7_FLAG_LO__SHIFT) & A6XX_TEX_CONST_7_FLAG_LO__MASK; +} + +#define REG_A6XX_TEX_CONST_8 0x00000008 +#define A6XX_TEX_CONST_8_BASE_HI__MASK 0x0001ffff +#define A6XX_TEX_CONST_8_BASE_HI__SHIFT 0 +static inline uint32_t A6XX_TEX_CONST_8_BASE_HI(uint32_t val) +{ + return ((val) << A6XX_TEX_CONST_8_BASE_HI__SHIFT) & A6XX_TEX_CONST_8_BASE_HI__MASK; +} + +#define REG_A6XX_TEX_CONST_9 0x00000009 + +#define REG_A6XX_TEX_CONST_10 0x0000000a + +#define REG_A6XX_TEX_CONST_11 0x0000000b + +#define REG_A6XX_TEX_CONST_12 0x0000000c + +#define REG_A6XX_TEX_CONST_13 0x0000000d + +#define REG_A6XX_TEX_CONST_14 0x0000000e + +#define REG_A6XX_TEX_CONST_15 0x0000000f + + +#endif /* A6XX_XML */ diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c new file mode 100644 index 000000000000..bbb8126ec5c5 --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -0,0 +1,1207 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2017-2018 The Linux Foundation. All rights reserved. */ + +#include <linux/clk.h> +#include <linux/iopoll.h> +#include <linux/pm_opp.h> +#include <soc/qcom/cmd-db.h> + +#include "a6xx_gpu.h" +#include "a6xx_gmu.xml.h" + +static irqreturn_t a6xx_gmu_irq(int irq, void *data) +{ + struct a6xx_gmu *gmu = data; + u32 status; + + status = gmu_read(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_STATUS); + gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_CLR, status); + + if (status & A6XX_GMU_AO_HOST_INTERRUPT_STATUS_WDOG_BITE) { + dev_err_ratelimited(gmu->dev, "GMU watchdog expired\n"); + + /* Temporary until we can recover safely */ + BUG(); + } + + if (status & A6XX_GMU_AO_HOST_INTERRUPT_STATUS_HOST_AHB_BUS_ERROR) + dev_err_ratelimited(gmu->dev, "GMU AHB bus error\n"); + + if (status & A6XX_GMU_AO_HOST_INTERRUPT_STATUS_FENCE_ERR) + dev_err_ratelimited(gmu->dev, "GMU fence error: 0x%x\n", + gmu_read(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS)); + + return IRQ_HANDLED; +} + +static irqreturn_t a6xx_hfi_irq(int irq, void *data) +{ + struct a6xx_gmu *gmu = data; + u32 status; + + status = gmu_read(gmu, REG_A6XX_GMU_GMU2HOST_INTR_INFO); + gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_CLR, status); + + if (status & A6XX_GMU_GMU2HOST_INTR_INFO_MSGQ) + tasklet_schedule(&gmu->hfi_tasklet); + + if (status & A6XX_GMU_GMU2HOST_INTR_INFO_CM3_FAULT) { + dev_err_ratelimited(gmu->dev, "GMU firmware fault\n"); + + /* Temporary until we can recover safely */ + BUG(); + } + + return IRQ_HANDLED; +} + +/* Check to see if the GX rail is still powered */ +static bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu) +{ + u32 val = gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS); + + return !(val & + (A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_GDSC_POWER_OFF | + A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF)); +} + +static int a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index) +{ + gmu_write(gmu, REG_A6XX_GMU_DCVS_ACK_OPTION, 0); + + gmu_write(gmu, REG_A6XX_GMU_DCVS_PERF_SETTING, + ((index << 24) & 0xff) | (3 & 0xf)); + + /* + * Send an invalid index as a vote for the bus bandwidth and let the + * firmware decide on the right vote + */ + gmu_write(gmu, REG_A6XX_GMU_DCVS_BW_SETTING, 0xff); + + /* Set and clear the OOB for DCVS to trigger the GMU */ + a6xx_gmu_set_oob(gmu, GMU_OOB_DCVS_SET); + a6xx_gmu_clear_oob(gmu, GMU_OOB_DCVS_SET); + + return gmu_read(gmu, REG_A6XX_GMU_DCVS_RETURN); +} + +static bool a6xx_gmu_check_idle_level(struct a6xx_gmu *gmu) +{ + u32 val; + int local = gmu->idle_level; + + /* SPTP and IFPC both report as IFPC */ + if (gmu->idle_level == GMU_IDLE_STATE_SPTP) + local = GMU_IDLE_STATE_IFPC; + + val = gmu_read(gmu, REG_A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE); + + if (val == local) { + if (gmu->idle_level != GMU_IDLE_STATE_IFPC || + !a6xx_gmu_gx_is_on(gmu)) + return true; + } + + return false; +} + +/* Wait for the GMU to get to its most idle state */ +int a6xx_gmu_wait_for_idle(struct a6xx_gpu *a6xx_gpu) +{ + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + + return spin_until(a6xx_gmu_check_idle_level(gmu)); +} + +static int a6xx_gmu_start(struct a6xx_gmu *gmu) +{ + int ret; + u32 val; + + gmu_write(gmu, REG_A6XX_GMU_CM3_SYSRESET, 1); + gmu_write(gmu, REG_A6XX_GMU_CM3_SYSRESET, 0); + + ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_CM3_FW_INIT_RESULT, val, + val == 0xbabeface, 100, 10000); + + if (ret) + dev_err(gmu->dev, "GMU firmware initialization timed out\n"); + + return ret; +} + +static int a6xx_gmu_hfi_start(struct a6xx_gmu *gmu) +{ + u32 val; + int ret; + + gmu_rmw(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, + A6XX_GMU_GMU2HOST_INTR_INFO_MSGQ, 0); + + gmu_write(gmu, REG_A6XX_GMU_HFI_CTRL_INIT, 1); + + ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_HFI_CTRL_STATUS, val, + val & 1, 100, 10000); + if (ret) + dev_err(gmu->dev, "Unable to start the HFI queues\n"); + + return ret; +} + +/* Trigger a OOB (out of band) request to the GMU */ +int a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state) +{ + int ret; + u32 val; + int request, ack; + const char *name; + + switch (state) { + case GMU_OOB_GPU_SET: + request = GMU_OOB_GPU_SET_REQUEST; + ack = GMU_OOB_GPU_SET_ACK; + name = "GPU_SET"; + break; + case GMU_OOB_BOOT_SLUMBER: + request = GMU_OOB_BOOT_SLUMBER_REQUEST; + ack = GMU_OOB_BOOT_SLUMBER_ACK; + name = "BOOT_SLUMBER"; + break; + case GMU_OOB_DCVS_SET: + request = GMU_OOB_DCVS_REQUEST; + ack = GMU_OOB_DCVS_ACK; + name = "GPU_DCVS"; + break; + default: + return -EINVAL; + } + + /* Trigger the equested OOB operation */ + gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, 1 << request); + + /* Wait for the acknowledge interrupt */ + ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_GMU2HOST_INTR_INFO, val, + val & (1 << ack), 100, 10000); + + if (ret) + dev_err(gmu->dev, + "Timeout waiting for GMU OOB set %s: 0x%x\n", + name, + gmu_read(gmu, REG_A6XX_GMU_GMU2HOST_INTR_INFO)); + + /* Clear the acknowledge interrupt */ + gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_CLR, 1 << ack); + + return ret; +} + +/* Clear a pending OOB state in the GMU */ +void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state) +{ + switch (state) { + case GMU_OOB_GPU_SET: + gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, + 1 << GMU_OOB_GPU_SET_CLEAR); + break; + case GMU_OOB_BOOT_SLUMBER: + gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, + 1 << GMU_OOB_BOOT_SLUMBER_CLEAR); + break; + case GMU_OOB_DCVS_SET: + gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, + 1 << GMU_OOB_DCVS_CLEAR); + break; + } +} + +/* Enable CPU control of SPTP power power collapse */ +static int a6xx_sptprac_enable(struct a6xx_gmu *gmu) +{ + int ret; + u32 val; + + gmu_write(gmu, REG_A6XX_GMU_GX_SPTPRAC_POWER_CONTROL, 0x778000); + + ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS, val, + (val & 0x38) == 0x28, 1, 100); + + if (ret) { + dev_err(gmu->dev, "Unable to power on SPTPRAC: 0x%x\n", + gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS)); + } + + return 0; +} + +/* Disable CPU control of SPTP power power collapse */ +static void a6xx_sptprac_disable(struct a6xx_gmu *gmu) +{ + u32 val; + int ret; + + /* Make sure retention is on */ + gmu_rmw(gmu, REG_A6XX_GPU_CC_GX_GDSCR, 0, (1 << 11)); + + gmu_write(gmu, REG_A6XX_GMU_GX_SPTPRAC_POWER_CONTROL, 0x778001); + + ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS, val, + (val & 0x04), 100, 10000); + + if (ret) + dev_err(gmu->dev, "failed to power off SPTPRAC: 0x%x\n", + gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS)); +} + +/* Let the GMU know we are starting a boot sequence */ +static int a6xx_gmu_gfx_rail_on(struct a6xx_gmu *gmu) +{ + u32 vote; + + /* Let the GMU know we are getting ready for boot */ + gmu_write(gmu, REG_A6XX_GMU_BOOT_SLUMBER_OPTION, 0); + + /* Choose the "default" power level as the highest available */ + vote = gmu->gx_arc_votes[gmu->nr_gpu_freqs - 1]; + + gmu_write(gmu, REG_A6XX_GMU_GX_VOTE_IDX, vote & 0xff); + gmu_write(gmu, REG_A6XX_GMU_MX_VOTE_IDX, (vote >> 8) & 0xff); + + /* Let the GMU know the boot sequence has started */ + return a6xx_gmu_set_oob(gmu, GMU_OOB_BOOT_SLUMBER); +} + +/* Let the GMU know that we are about to go into slumber */ +static int a6xx_gmu_notify_slumber(struct a6xx_gmu *gmu) +{ + int ret; + + /* Disable the power counter so the GMU isn't busy */ + gmu_write(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0); + + /* Disable SPTP_PC if the CPU is responsible for it */ + if (gmu->idle_level < GMU_IDLE_STATE_SPTP) + a6xx_sptprac_disable(gmu); + + /* Tell the GMU to get ready to slumber */ + gmu_write(gmu, REG_A6XX_GMU_BOOT_SLUMBER_OPTION, 1); + + ret = a6xx_gmu_set_oob(gmu, GMU_OOB_BOOT_SLUMBER); + a6xx_gmu_clear_oob(gmu, GMU_OOB_BOOT_SLUMBER); + + if (!ret) { + /* Check to see if the GMU really did slumber */ + if (gmu_read(gmu, REG_A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE) + != 0x0f) { + dev_err(gmu->dev, "The GMU did not go into slumber\n"); + ret = -ETIMEDOUT; + } + } + + /* Put fence into allow mode */ + gmu_write(gmu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0); + return ret; +} + +static int a6xx_rpmh_start(struct a6xx_gmu *gmu) +{ + int ret; + u32 val; + + gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 1 << 1); + /* Wait for the register to finish posting */ + wmb(); + + ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_RSCC_CONTROL_ACK, val, + val & (1 << 1), 100, 10000); + if (ret) { + dev_err(gmu->dev, "Unable to power on the GPU RSC\n"); + return ret; + } + + ret = gmu_poll_timeout(gmu, REG_A6XX_RSCC_SEQ_BUSY_DRV0, val, + !val, 100, 10000); + + if (!ret) { + gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 0); + + /* Re-enable the power counter */ + gmu_write(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1); + return 0; + } + + dev_err(gmu->dev, "GPU RSC sequence stuck while waking up the GPU\n"); + return ret; +} + +static void a6xx_rpmh_stop(struct a6xx_gmu *gmu) +{ + int ret; + u32 val; + + gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 1); + + ret = gmu_poll_timeout(gmu, REG_A6XX_GPU_RSCC_RSC_STATUS0_DRV0, + val, val & (1 << 16), 100, 10000); + if (ret) + dev_err(gmu->dev, "Unable to power off the GPU RSC\n"); + + gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 0); +} + +static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu) +{ + /* Disable SDE clock gating */ + gmu_write(gmu, REG_A6XX_GPU_RSCC_RSC_STATUS0_DRV0, BIT(24)); + + /* Setup RSC PDC handshake for sleep and wakeup */ + gmu_write(gmu, REG_A6XX_RSCC_PDC_SLAVE_ID_DRV0, 1); + gmu_write(gmu, REG_A6XX_RSCC_HIDDEN_TCS_CMD0_DATA, 0); + gmu_write(gmu, REG_A6XX_RSCC_HIDDEN_TCS_CMD0_ADDR, 0); + gmu_write(gmu, REG_A6XX_RSCC_HIDDEN_TCS_CMD0_DATA + 2, 0); + gmu_write(gmu, REG_A6XX_RSCC_HIDDEN_TCS_CMD0_ADDR + 2, 0); + gmu_write(gmu, REG_A6XX_RSCC_HIDDEN_TCS_CMD0_DATA + 4, 0x80000000); + gmu_write(gmu, REG_A6XX_RSCC_HIDDEN_TCS_CMD0_ADDR + 4, 0); + gmu_write(gmu, REG_A6XX_RSCC_OVERRIDE_START_ADDR, 0); + gmu_write(gmu, REG_A6XX_RSCC_PDC_SEQ_START_ADDR, 0x4520); + gmu_write(gmu, REG_A6XX_RSCC_PDC_MATCH_VALUE_LO, 0x4510); + gmu_write(gmu, REG_A6XX_RSCC_PDC_MATCH_VALUE_HI, 0x4514); + + /* Load RSC sequencer uCode for sleep and wakeup */ + gmu_write(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0, 0xa7a506a0); + gmu_write(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0 + 1, 0xa1e6a6e7); + gmu_write(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0 + 2, 0xa2e081e1); + gmu_write(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0 + 3, 0xe9a982e2); + gmu_write(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0 + 4, 0x0020e8a8); + + /* Load PDC sequencer uCode for power up and power down sequence */ + pdc_write(gmu, REG_A6XX_PDC_GPU_SEQ_MEM_0, 0xfebea1e1); + pdc_write(gmu, REG_A6XX_PDC_GPU_SEQ_MEM_0 + 1, 0xa5a4a3a2); + pdc_write(gmu, REG_A6XX_PDC_GPU_SEQ_MEM_0 + 2, 0x8382a6e0); + pdc_write(gmu, REG_A6XX_PDC_GPU_SEQ_MEM_0 + 3, 0xbce3e284); + pdc_write(gmu, REG_A6XX_PDC_GPU_SEQ_MEM_0 + 4, 0x002081fc); + + /* Set TCS commands used by PDC sequence for low power modes */ + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CMD_ENABLE_BANK, 7); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CMD_WAIT_FOR_CMPL_BANK, 0); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CONTROL, 0); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CMD0_MSGID, 0x10108); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CMD0_ADDR, 0x30010); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CMD0_DATA, 1); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CMD0_MSGID + 4, 0x10108); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CMD0_ADDR + 4, 0x30000); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CMD0_DATA + 4, 0x0); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CMD0_MSGID + 8, 0x10108); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CMD0_ADDR + 8, 0x30080); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS1_CMD0_DATA + 8, 0x0); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CMD_ENABLE_BANK, 7); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CMD_WAIT_FOR_CMPL_BANK, 0); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CONTROL, 0); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CMD0_MSGID, 0x10108); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR, 0x30010); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CMD0_DATA, 2); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CMD0_MSGID + 4, 0x10108); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR + 4, 0x30000); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CMD0_DATA + 4, 0x3); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CMD0_MSGID + 8, 0x10108); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR + 8, 0x30080); + pdc_write(gmu, REG_A6XX_PDC_GPU_TCS3_CMD0_DATA + 8, 0x3); + + /* Setup GPU PDC */ + pdc_write(gmu, REG_A6XX_PDC_GPU_SEQ_START_ADDR, 0); + pdc_write(gmu, REG_A6XX_PDC_GPU_ENABLE_PDC, 0x80000001); + + /* ensure no writes happen before the uCode is fully written */ + wmb(); +} + +/* + * The lowest 16 bits of this value are the number of XO clock cycles for main + * hysteresis which is set at 0x1680 cycles (300 us). The higher 16 bits are + * for the shorter hysteresis that happens after main - this is 0xa (.5 us) + */ + +#define GMU_PWR_COL_HYST 0x000a1680 + +/* Set up the idle state for the GMU */ +static void a6xx_gmu_power_config(struct a6xx_gmu *gmu) +{ + /* Disable GMU WB/RB buffer */ + gmu_write(gmu, REG_A6XX_GMU_SYS_BUS_CONFIG, 0x1); + + gmu_write(gmu, REG_A6XX_GMU_PWR_COL_INTER_FRAME_CTRL, 0x9c40400); + + switch (gmu->idle_level) { + case GMU_IDLE_STATE_IFPC: + gmu_write(gmu, REG_A6XX_GMU_PWR_COL_INTER_FRAME_HYST, + GMU_PWR_COL_HYST); + gmu_rmw(gmu, REG_A6XX_GMU_PWR_COL_INTER_FRAME_CTRL, 0, + A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_IFPC_ENABLE | + A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_HM_POWER_COLLAPSE_ENABLE); + /* Fall through */ + case GMU_IDLE_STATE_SPTP: + gmu_write(gmu, REG_A6XX_GMU_PWR_COL_SPTPRAC_HYST, + GMU_PWR_COL_HYST); + gmu_rmw(gmu, REG_A6XX_GMU_PWR_COL_INTER_FRAME_CTRL, 0, + A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_IFPC_ENABLE | + A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_SPTPRAC_POWER_CONTROL_ENABLE); + } + + /* Enable RPMh GPU client */ + gmu_rmw(gmu, REG_A6XX_GMU_RPMH_CTRL, 0, + A6XX_GMU_RPMH_CTRL_RPMH_INTERFACE_ENABLE | + A6XX_GMU_RPMH_CTRL_LLC_VOTE_ENABLE | + A6XX_GMU_RPMH_CTRL_DDR_VOTE_ENABLE | + A6XX_GMU_RPMH_CTRL_MX_VOTE_ENABLE | + A6XX_GMU_RPMH_CTRL_CX_VOTE_ENABLE | + A6XX_GMU_RPMH_CTRL_GFX_VOTE_ENABLE); +} + +static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state) +{ + static bool rpmh_init; + struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + int i, ret; + u32 chipid; + u32 *image; + + if (state == GMU_WARM_BOOT) { + ret = a6xx_rpmh_start(gmu); + if (ret) + return ret; + } else { + if (WARN(!adreno_gpu->fw[ADRENO_FW_GMU], + "GMU firmware is not loaded\n")) + return -ENOENT; + + /* Sanity check the size of the firmware that was loaded */ + if (adreno_gpu->fw[ADRENO_FW_GMU]->size > 0x8000) { + dev_err(gmu->dev, + "GMU firmware is bigger than the available region\n"); + return -EINVAL; + } + + /* Turn on register retention */ + gmu_write(gmu, REG_A6XX_GMU_GENERAL_7, 1); + + /* We only need to load the RPMh microcode once */ + if (!rpmh_init) { + a6xx_gmu_rpmh_init(gmu); + rpmh_init = true; + } else if (state != GMU_RESET) { + ret = a6xx_rpmh_start(gmu); + if (ret) + return ret; + } + + image = (u32 *) adreno_gpu->fw[ADRENO_FW_GMU]->data; + + for (i = 0; i < adreno_gpu->fw[ADRENO_FW_GMU]->size >> 2; i++) + gmu_write(gmu, REG_A6XX_GMU_CM3_ITCM_START + i, + image[i]); + } + + gmu_write(gmu, REG_A6XX_GMU_CM3_FW_INIT_RESULT, 0); + gmu_write(gmu, REG_A6XX_GMU_CM3_BOOT_CONFIG, 0x02); + + /* Write the iova of the HFI table */ + gmu_write(gmu, REG_A6XX_GMU_HFI_QTBL_ADDR, gmu->hfi->iova); + gmu_write(gmu, REG_A6XX_GMU_HFI_QTBL_INFO, 1); + + gmu_write(gmu, REG_A6XX_GMU_AHB_FENCE_RANGE_0, + (1 << 31) | (0xa << 18) | (0xa0)); + + chipid = adreno_gpu->rev.core << 24; + chipid |= adreno_gpu->rev.major << 16; + chipid |= adreno_gpu->rev.minor << 12; + chipid |= adreno_gpu->rev.patchid << 8; + + gmu_write(gmu, REG_A6XX_GMU_HFI_SFR_ADDR, chipid); + + /* Set up the lowest idle level on the GMU */ + a6xx_gmu_power_config(gmu); + + ret = a6xx_gmu_start(gmu); + if (ret) + return ret; + + ret = a6xx_gmu_gfx_rail_on(gmu); + if (ret) + return ret; + + /* Enable SPTP_PC if the CPU is responsible for it */ + if (gmu->idle_level < GMU_IDLE_STATE_SPTP) { + ret = a6xx_sptprac_enable(gmu); + if (ret) + return ret; + } + + ret = a6xx_gmu_hfi_start(gmu); + if (ret) + return ret; + + /* FIXME: Do we need this wmb() here? */ + wmb(); + + return 0; +} + +#define A6XX_HFI_IRQ_MASK \ + (A6XX_GMU_GMU2HOST_INTR_INFO_MSGQ | \ + A6XX_GMU_GMU2HOST_INTR_INFO_CM3_FAULT) + +#define A6XX_GMU_IRQ_MASK \ + (A6XX_GMU_AO_HOST_INTERRUPT_STATUS_WDOG_BITE | \ + A6XX_GMU_AO_HOST_INTERRUPT_STATUS_HOST_AHB_BUS_ERROR | \ + A6XX_GMU_AO_HOST_INTERRUPT_STATUS_FENCE_ERR) + +static void a6xx_gmu_irq_enable(struct a6xx_gmu *gmu) +{ + gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_CLR, ~0); + gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_CLR, ~0); + + gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_MASK, + ~A6XX_GMU_IRQ_MASK); + gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, + ~A6XX_HFI_IRQ_MASK); + + enable_irq(gmu->gmu_irq); + enable_irq(gmu->hfi_irq); +} + +static void a6xx_gmu_irq_disable(struct a6xx_gmu *gmu) +{ + disable_irq(gmu->gmu_irq); + disable_irq(gmu->hfi_irq); + + gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_MASK, ~0); + gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, ~0); +} + +int a6xx_gmu_reset(struct a6xx_gpu *a6xx_gpu) +{ + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + int ret; + u32 val; + + /* Flush all the queues */ + a6xx_hfi_stop(gmu); + + /* Stop the interrupts */ + a6xx_gmu_irq_disable(gmu); + + /* Force off SPTP in case the GMU is managing it */ + a6xx_sptprac_disable(gmu); + + /* Make sure there are no outstanding RPMh votes */ + gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS0_DRV0_STATUS, val, + (val & 1), 100, 10000); + gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS1_DRV0_STATUS, val, + (val & 1), 100, 10000); + gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS2_DRV0_STATUS, val, + (val & 1), 100, 10000); + gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS3_DRV0_STATUS, val, + (val & 1), 100, 1000); + + /* Force off the GX GSDC */ + regulator_force_disable(gmu->gx); + + /* Disable the resources */ + clk_bulk_disable_unprepare(gmu->nr_clocks, gmu->clocks); + pm_runtime_put_sync(gmu->dev); + + /* Re-enable the resources */ + pm_runtime_get_sync(gmu->dev); + + /* Use a known rate to bring up the GMU */ + clk_set_rate(gmu->core_clk, 200000000); + ret = clk_bulk_prepare_enable(gmu->nr_clocks, gmu->clocks); + if (ret) + goto out; + + a6xx_gmu_irq_enable(gmu); + + ret = a6xx_gmu_fw_start(gmu, GMU_RESET); + if (!ret) + ret = a6xx_hfi_start(gmu, GMU_COLD_BOOT); + + /* Set the GPU back to the highest power frequency */ + a6xx_gmu_set_freq(gmu, gmu->nr_gpu_freqs - 1); + +out: + if (ret) + a6xx_gmu_clear_oob(gmu, GMU_OOB_BOOT_SLUMBER); + + return ret; +} + +int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) +{ + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + int status, ret; + + if (WARN(!gmu->mmio, "The GMU is not set up yet\n")) + return 0; + + /* Turn on the resources */ + pm_runtime_get_sync(gmu->dev); + + /* Use a known rate to bring up the GMU */ + clk_set_rate(gmu->core_clk, 200000000); + ret = clk_bulk_prepare_enable(gmu->nr_clocks, gmu->clocks); + if (ret) + goto out; + + a6xx_gmu_irq_enable(gmu); + + /* Check to see if we are doing a cold or warm boot */ + status = gmu_read(gmu, REG_A6XX_GMU_GENERAL_7) == 1 ? + GMU_WARM_BOOT : GMU_COLD_BOOT; + + ret = a6xx_gmu_fw_start(gmu, status); + if (ret) + goto out; + + ret = a6xx_hfi_start(gmu, status); + + /* Set the GPU to the highest power frequency */ + a6xx_gmu_set_freq(gmu, gmu->nr_gpu_freqs - 1); + +out: + /* Make sure to turn off the boot OOB request on error */ + if (ret) + a6xx_gmu_clear_oob(gmu, GMU_OOB_BOOT_SLUMBER); + + return ret; +} + +bool a6xx_gmu_isidle(struct a6xx_gmu *gmu) +{ + u32 reg; + + if (!gmu->mmio) + return true; + + reg = gmu_read(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS); + + if (reg & A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS_GPUBUSYIGNAHB) + return false; + + return true; +} + +int a6xx_gmu_stop(struct a6xx_gpu *a6xx_gpu) +{ + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + u32 val; + + /* + * The GMU may still be in slumber unless the GPU started so check and + * skip putting it back into slumber if so + */ + val = gmu_read(gmu, REG_A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE); + + if (val != 0xf) { + int ret = a6xx_gmu_wait_for_idle(a6xx_gpu); + + /* Temporary until we can recover safely */ + BUG_ON(ret); + + /* tell the GMU we want to slumber */ + a6xx_gmu_notify_slumber(gmu); + + ret = gmu_poll_timeout(gmu, + REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, val, + !(val & A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS_GPUBUSYIGNAHB), + 100, 10000); + + /* + * Let the user know we failed to slumber but don't worry too + * much because we are powering down anyway + */ + + if (ret) + dev_err(gmu->dev, + "Unable to slumber GMU: status = 0%x/0%x\n", + gmu_read(gmu, + REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS), + gmu_read(gmu, + REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS2)); + } + + /* Turn off HFI */ + a6xx_hfi_stop(gmu); + + /* Stop the interrupts and mask the hardware */ + a6xx_gmu_irq_disable(gmu); + + /* Tell RPMh to power off the GPU */ + a6xx_rpmh_stop(gmu); + + clk_bulk_disable_unprepare(gmu->nr_clocks, gmu->clocks); + + pm_runtime_put_sync(gmu->dev); + + return 0; +} + +static void a6xx_gmu_memory_free(struct a6xx_gmu *gmu, struct a6xx_gmu_bo *bo) +{ + int count, i; + u64 iova; + + if (IS_ERR_OR_NULL(bo)) + return; + + count = bo->size >> PAGE_SHIFT; + iova = bo->iova; + + for (i = 0; i < count; i++, iova += PAGE_SIZE) { + iommu_unmap(gmu->domain, iova, PAGE_SIZE); + __free_pages(bo->pages[i], 0); + } + + kfree(bo->pages); + kfree(bo); +} + +static struct a6xx_gmu_bo *a6xx_gmu_memory_alloc(struct a6xx_gmu *gmu, + size_t size) +{ + struct a6xx_gmu_bo *bo; + int ret, count, i; + + bo = kzalloc(sizeof(*bo), GFP_KERNEL); + if (!bo) + return ERR_PTR(-ENOMEM); + + bo->size = PAGE_ALIGN(size); + + count = bo->size >> PAGE_SHIFT; + + bo->pages = kcalloc(count, sizeof(struct page *), GFP_KERNEL); + if (!bo->pages) { + kfree(bo); + return ERR_PTR(-ENOMEM); + } + + for (i = 0; i < count; i++) { + bo->pages[i] = alloc_page(GFP_KERNEL); + if (!bo->pages[i]) + goto err; + } + + bo->iova = gmu->uncached_iova_base; + + for (i = 0; i < count; i++) { + ret = iommu_map(gmu->domain, + bo->iova + (PAGE_SIZE * i), + page_to_phys(bo->pages[i]), PAGE_SIZE, + IOMMU_READ | IOMMU_WRITE); + + if (ret) { + dev_err(gmu->dev, "Unable to map GMU buffer object\n"); + + for (i = i - 1 ; i >= 0; i--) + iommu_unmap(gmu->domain, + bo->iova + (PAGE_SIZE * i), + PAGE_SIZE); + + goto err; + } + } + + bo->virt = vmap(bo->pages, count, VM_IOREMAP, + pgprot_writecombine(PAGE_KERNEL)); + if (!bo->virt) + goto err; + + /* Align future IOVA addresses on 1MB boundaries */ + gmu->uncached_iova_base += ALIGN(size, SZ_1M); + + return bo; + +err: + for (i = 0; i < count; i++) { + if (bo->pages[i]) + __free_pages(bo->pages[i], 0); + } + + kfree(bo->pages); + kfree(bo); + + return ERR_PTR(-ENOMEM); +} + +static int a6xx_gmu_memory_probe(struct a6xx_gmu *gmu) +{ + int ret; + + /* + * The GMU address space is hardcoded to treat the range + * 0x60000000 - 0x80000000 as un-cached memory. All buffers shared + * between the GMU and the CPU will live in this space + */ + gmu->uncached_iova_base = 0x60000000; + + + gmu->domain = iommu_domain_alloc(&platform_bus_type); + if (!gmu->domain) + return -ENODEV; + + ret = iommu_attach_device(gmu->domain, gmu->dev); + + if (ret) { + iommu_domain_free(gmu->domain); + gmu->domain = NULL; + } + + return ret; +} + +/* Get the list of RPMh voltage levels from cmd-db */ +static int a6xx_gmu_rpmh_arc_cmds(const char *id, void *vals, int size) +{ + u32 len = cmd_db_read_aux_data_len(id); + + if (!len) + return 0; + + if (WARN_ON(len > size)) + return -EINVAL; + + cmd_db_read_aux_data(id, vals, len); + + /* + * The data comes back as an array of unsigned shorts so adjust the + * count accordingly + */ + return len >> 1; +} + +/* Return the 'arc-level' for the given frequency */ +static u32 a6xx_gmu_get_arc_level(struct device *dev, unsigned long freq) +{ + struct dev_pm_opp *opp; + struct device_node *np; + u32 val = 0; + + if (!freq) + return 0; + + opp = dev_pm_opp_find_freq_exact(dev, freq, true); + if (IS_ERR(opp)) + return 0; + + np = dev_pm_opp_get_of_node(opp); + + if (np) { + of_property_read_u32(np, "qcom,level", &val); + of_node_put(np); + } + + dev_pm_opp_put(opp); + + return val; +} + +static int a6xx_gmu_rpmh_arc_votes_init(struct device *dev, u32 *votes, + unsigned long *freqs, int freqs_count, + u16 *pri, int pri_count, + u16 *sec, int sec_count) +{ + int i, j; + + /* Construct a vote for each frequency */ + for (i = 0; i < freqs_count; i++) { + u8 pindex = 0, sindex = 0; + u32 level = a6xx_gmu_get_arc_level(dev, freqs[i]); + + /* Get the primary index that matches the arc level */ + for (j = 0; j < pri_count; j++) { + if (pri[j] >= level) { + pindex = j; + break; + } + } + + if (j == pri_count) { + dev_err(dev, + "Level %u not found in in the RPMh list\n", + level); + dev_err(dev, "Available levels:\n"); + for (j = 0; j < pri_count; j++) + dev_err(dev, " %u\n", pri[j]); + + return -EINVAL; + } + + /* + * Look for a level in in the secondary list that matches. If + * nothing fits, use the maximum non zero vote + */ + + for (j = 0; j < sec_count; j++) { + if (sec[j] >= level) { + sindex = j; + break; + } else if (sec[j]) { + sindex = j; + } + } + + /* Construct the vote */ + votes[i] = ((pri[pindex] & 0xffff) << 16) | + (sindex << 8) | pindex; + } + + return 0; +} + +/* + * The GMU votes with the RPMh for itself and on behalf of the GPU but we need + * to construct the list of votes on the CPU and send it over. Query the RPMh + * voltage levels and build the votes + */ + +static int a6xx_gmu_rpmh_votes_init(struct a6xx_gmu *gmu) +{ + struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + struct msm_gpu *gpu = &adreno_gpu->base; + + u16 gx[16], cx[16], mx[16]; + u32 gxcount, cxcount, mxcount; + int ret; + + /* Get the list of available voltage levels for each component */ + gxcount = a6xx_gmu_rpmh_arc_cmds("gfx.lvl", gx, sizeof(gx)); + cxcount = a6xx_gmu_rpmh_arc_cmds("cx.lvl", cx, sizeof(cx)); + mxcount = a6xx_gmu_rpmh_arc_cmds("mx.lvl", mx, sizeof(mx)); + + /* Build the GX votes */ + ret = a6xx_gmu_rpmh_arc_votes_init(&gpu->pdev->dev, gmu->gx_arc_votes, + gmu->gpu_freqs, gmu->nr_gpu_freqs, + gx, gxcount, mx, mxcount); + + /* Build the CX votes */ + ret |= a6xx_gmu_rpmh_arc_votes_init(gmu->dev, gmu->cx_arc_votes, + gmu->gmu_freqs, gmu->nr_gmu_freqs, + cx, cxcount, mx, mxcount); + + return ret; +} + +static int a6xx_gmu_build_freq_table(struct device *dev, unsigned long *freqs, + u32 size) +{ + int count = dev_pm_opp_get_opp_count(dev); + struct dev_pm_opp *opp; + int i, index = 0; + unsigned long freq = 1; + + /* + * The OPP table doesn't contain the "off" frequency level so we need to + * add 1 to the table size to account for it + */ + + if (WARN(count + 1 > size, + "The GMU frequency table is being truncated\n")) + count = size - 1; + + /* Set the "off" frequency */ + freqs[index++] = 0; + + for (i = 0; i < count; i++) { + opp = dev_pm_opp_find_freq_ceil(dev, &freq); + if (IS_ERR(opp)) + break; + + dev_pm_opp_put(opp); + freqs[index++] = freq++; + } + + return index; +} + +static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu) +{ + struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + struct msm_gpu *gpu = &adreno_gpu->base; + + int ret = 0; + + /* + * The GMU handles its own frequency switching so build a list of + * available frequencies to send during initialization + */ + ret = dev_pm_opp_of_add_table(gmu->dev); + if (ret) { + dev_err(gmu->dev, "Unable to set the OPP table for the GMU\n"); + return ret; + } + + gmu->nr_gmu_freqs = a6xx_gmu_build_freq_table(gmu->dev, + gmu->gmu_freqs, ARRAY_SIZE(gmu->gmu_freqs)); + + /* + * The GMU also handles GPU frequency switching so build a list + * from the GPU OPP table + */ + gmu->nr_gpu_freqs = a6xx_gmu_build_freq_table(&gpu->pdev->dev, + gmu->gpu_freqs, ARRAY_SIZE(gmu->gpu_freqs)); + + /* Build the list of RPMh votes that we'll send to the GMU */ + return a6xx_gmu_rpmh_votes_init(gmu); +} + +static int a6xx_gmu_clocks_probe(struct a6xx_gmu *gmu) +{ + int ret = msm_clk_bulk_get(gmu->dev, &gmu->clocks); + + if (ret < 1) + return ret; + + gmu->nr_clocks = ret; + + gmu->core_clk = msm_clk_bulk_get_clock(gmu->clocks, + gmu->nr_clocks, "gmu"); + + return 0; +} + +static void __iomem *a6xx_gmu_get_mmio(struct platform_device *pdev, + const char *name) +{ + void __iomem *ret; + struct resource *res = platform_get_resource_byname(pdev, + IORESOURCE_MEM, name); + + if (!res) { + dev_err(&pdev->dev, "Unable to find the %s registers\n", name); + return ERR_PTR(-EINVAL); + } + + ret = devm_ioremap(&pdev->dev, res->start, resource_size(res)); + if (!ret) { + dev_err(&pdev->dev, "Unable to map the %s registers\n", name); + return ERR_PTR(-EINVAL); + } + + return ret; +} + +static int a6xx_gmu_get_irq(struct a6xx_gmu *gmu, struct platform_device *pdev, + const char *name, irq_handler_t handler) +{ + int irq, ret; + + irq = platform_get_irq_byname(pdev, name); + + ret = devm_request_irq(&pdev->dev, irq, handler, IRQF_TRIGGER_HIGH, + name, gmu); + if (ret) { + dev_err(&pdev->dev, "Unable to get interrupt %s\n", name); + return ret; + } + + disable_irq(irq); + + return irq; +} + +void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu) +{ + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + + if (IS_ERR_OR_NULL(gmu->mmio)) + return; + + pm_runtime_disable(gmu->dev); + a6xx_gmu_stop(a6xx_gpu); + + a6xx_gmu_irq_disable(gmu); + a6xx_gmu_memory_free(gmu, gmu->hfi); + + iommu_detach_device(gmu->domain, gmu->dev); + + iommu_domain_free(gmu->domain); +} + +int a6xx_gmu_probe(struct a6xx_gpu *a6xx_gpu, struct device_node *node) +{ + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + struct platform_device *pdev = of_find_device_by_node(node); + int ret; + + if (!pdev) + return -ENODEV; + + gmu->dev = &pdev->dev; + + of_dma_configure(gmu->dev, node, false); + + /* Fow now, don't do anything fancy until we get our feet under us */ + gmu->idle_level = GMU_IDLE_STATE_ACTIVE; + + pm_runtime_enable(gmu->dev); + gmu->gx = devm_regulator_get(gmu->dev, "vdd"); + + /* Get the list of clocks */ + ret = a6xx_gmu_clocks_probe(gmu); + if (ret) + return ret; + + /* Set up the IOMMU context bank */ + ret = a6xx_gmu_memory_probe(gmu); + if (ret) + return ret; + + /* Allocate memory for for the HFI queues */ + gmu->hfi = a6xx_gmu_memory_alloc(gmu, SZ_16K); + if (IS_ERR(gmu->hfi)) + goto err; + + /* Allocate memory for the GMU debug region */ + gmu->debug = a6xx_gmu_memory_alloc(gmu, SZ_16K); + if (IS_ERR(gmu->debug)) + goto err; + + /* Map the GMU registers */ + gmu->mmio = a6xx_gmu_get_mmio(pdev, "gmu"); + + /* Map the GPU power domain controller registers */ + gmu->pdc_mmio = a6xx_gmu_get_mmio(pdev, "gmu_pdc"); + + if (IS_ERR(gmu->mmio) || IS_ERR(gmu->pdc_mmio)) + goto err; + + /* Get the HFI and GMU interrupts */ + gmu->hfi_irq = a6xx_gmu_get_irq(gmu, pdev, "hfi", a6xx_hfi_irq); + gmu->gmu_irq = a6xx_gmu_get_irq(gmu, pdev, "gmu", a6xx_gmu_irq); + + if (gmu->hfi_irq < 0 || gmu->gmu_irq < 0) + goto err; + + /* Set up a tasklet to handle GMU HFI responses */ + tasklet_init(&gmu->hfi_tasklet, a6xx_hfi_task, (unsigned long) gmu); + + /* Get the power levels for the GMU and GPU */ + a6xx_gmu_pwrlevels_probe(gmu); + + /* Set up the HFI queues */ + a6xx_hfi_init(gmu); + + return 0; +err: + a6xx_gmu_memory_free(gmu, gmu->hfi); + + if (gmu->domain) { + iommu_detach_device(gmu->domain, gmu->dev); + + iommu_domain_free(gmu->domain); + } + + return -ENODEV; +} diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h new file mode 100644 index 000000000000..d9a386c18799 --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2017 The Linux Foundation. All rights reserved. */ + +#ifndef _A6XX_GMU_H_ +#define _A6XX_GMU_H_ + +#include <linux/interrupt.h> +#include "msm_drv.h" +#include "a6xx_hfi.h" + +struct a6xx_gmu_bo { + void *virt; + size_t size; + u64 iova; + struct page **pages; +}; + +/* + * These define the different GMU wake up options - these define how both the + * CPU and the GMU bring up the hardware + */ + +/* THe GMU has already been booted and the rentention registers are active */ +#define GMU_WARM_BOOT 0 + +/* the GMU is coming up for the first time or back from a power collapse */ +#define GMU_COLD_BOOT 1 + +/* The GMU is being soft reset after a fault */ +#define GMU_RESET 2 + +/* + * These define the level of control that the GMU has - the higher the number + * the more things that the GMU hardware controls on its own. + */ + +/* The GMU does not do any idle state management */ +#define GMU_IDLE_STATE_ACTIVE 0 + +/* The GMU manages SPTP power collapse */ +#define GMU_IDLE_STATE_SPTP 2 + +/* The GMU does automatic IFPC (intra-frame power collapse) */ +#define GMU_IDLE_STATE_IFPC 3 + +struct a6xx_gmu { + struct device *dev; + + void * __iomem mmio; + void * __iomem pdc_mmio; + + int hfi_irq; + int gmu_irq; + + struct regulator *gx; + + struct iommu_domain *domain; + u64 uncached_iova_base; + + int idle_level; + + struct a6xx_gmu_bo *hfi; + struct a6xx_gmu_bo *debug; + + int nr_clocks; + struct clk_bulk_data *clocks; + struct clk *core_clk; + + int nr_gpu_freqs; + unsigned long gpu_freqs[16]; + u32 gx_arc_votes[16]; + + int nr_gmu_freqs; + unsigned long gmu_freqs[4]; + u32 cx_arc_votes[4]; + + struct a6xx_hfi_queue queues[2]; + + struct tasklet_struct hfi_tasklet; +}; + +static inline u32 gmu_read(struct a6xx_gmu *gmu, u32 offset) +{ + return msm_readl(gmu->mmio + (offset << 2)); +} + +static inline void gmu_write(struct a6xx_gmu *gmu, u32 offset, u32 value) +{ + return msm_writel(value, gmu->mmio + (offset << 2)); +} + +static inline void pdc_write(struct a6xx_gmu *gmu, u32 offset, u32 value) +{ + return msm_writel(value, gmu->pdc_mmio + (offset << 2)); +} + +static inline void gmu_rmw(struct a6xx_gmu *gmu, u32 reg, u32 mask, u32 or) +{ + u32 val = gmu_read(gmu, reg); + + val &= ~mask; + + gmu_write(gmu, reg, val | or); +} + +#define gmu_poll_timeout(gmu, addr, val, cond, interval, timeout) \ + readl_poll_timeout((gmu)->mmio + ((addr) << 2), val, cond, \ + interval, timeout) + +/* + * These are the available OOB (out of band requests) to the GMU where "out of + * band" means that the CPU talks to the GMU directly and not through HFI. + * Normally this works by writing a ITCM/DTCM register and then triggering a + * interrupt (the "request" bit) and waiting for an acknowledgment (the "ack" + * bit). The state is cleared by writing the "clear' bit to the GMU interrupt. + * + * These are used to force the GMU/GPU to stay on during a critical sequence or + * for hardware workarounds. + */ + +enum a6xx_gmu_oob_state { + GMU_OOB_BOOT_SLUMBER = 0, + GMU_OOB_GPU_SET, + GMU_OOB_DCVS_SET, +}; + +/* These are the interrupt / ack bits for each OOB request that are set + * in a6xx_gmu_set_oob and a6xx_clear_oob + */ + +/* + * Let the GMU know that a boot or slumber operation has started. The value in + * REG_A6XX_GMU_BOOT_SLUMBER_OPTION lets the GMU know which operation we are + * doing + */ +#define GMU_OOB_BOOT_SLUMBER_REQUEST 22 +#define GMU_OOB_BOOT_SLUMBER_ACK 30 +#define GMU_OOB_BOOT_SLUMBER_CLEAR 30 + +/* + * Set a new power level for the GPU when the CPU is doing frequency scaling + */ +#define GMU_OOB_DCVS_REQUEST 23 +#define GMU_OOB_DCVS_ACK 31 +#define GMU_OOB_DCVS_CLEAR 31 + +/* + * Let the GMU know to not turn off any GPU registers while the CPU is in a + * critical section + */ +#define GMU_OOB_GPU_SET_REQUEST 16 +#define GMU_OOB_GPU_SET_ACK 24 +#define GMU_OOB_GPU_SET_CLEAR 24 + + +void a6xx_hfi_init(struct a6xx_gmu *gmu); +int a6xx_hfi_start(struct a6xx_gmu *gmu, int boot_state); +void a6xx_hfi_stop(struct a6xx_gmu *gmu); + +void a6xx_hfi_task(unsigned long data); + +#endif diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.xml.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.xml.h new file mode 100644 index 000000000000..ef68098d2adc --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.xml.h @@ -0,0 +1,382 @@ +#ifndef A6XX_GMU_XML +#define A6XX_GMU_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://github.com/freedreno/envytools/ +git clone https://github.com/freedreno/envytools.git + +The rules-ng-ng source files this header was generated from are: +- /home/robclark/src/envytools/rnndb/adreno.xml ( 501 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml ( 36805 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml ( 13634 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml ( 42393 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml ( 147240 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml ( 101627 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-07-03 19:37:13) + +Copyright (C) 2013-2018 by the following authors: +- Rob Clark <robdclark@gmail.com> (robclark) +- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define A6XX_GMU_GPU_IDLE_STATUS_BUSY_IGN_AHB 0x00800000 +#define A6XX_GMU_GPU_IDLE_STATUS_CX_GX_CPU_BUSY_IGN_AHB 0x40000000 +#define A6XX_GMU_OOB_BOOT_SLUMBER_SET_MASK 0x00400000 +#define A6XX_GMU_OOB_BOOT_SLUMBER_CHECK_MASK 0x40000000 +#define A6XX_GMU_OOB_BOOT_SLUMBER_CLEAR_MASK 0x40000000 +#define A6XX_GMU_OOB_DCVS_SET_MASK 0x00800000 +#define A6XX_GMU_OOB_DCVS_CHECK_MASK 0x80000000 +#define A6XX_GMU_OOB_DCVS_CLEAR_MASK 0x80000000 +#define A6XX_GMU_OOB_GPU_SET_MASK 0x00040000 +#define A6XX_GMU_OOB_GPU_CHECK_MASK 0x04000000 +#define A6XX_GMU_OOB_GPU_CLEAR_MASK 0x04000000 +#define A6XX_GMU_OOB_PERFCNTR_SET_MASK 0x00020000 +#define A6XX_GMU_OOB_PERFCNTR_CHECK_MASK 0x02000000 +#define A6XX_GMU_OOB_PERFCNTR_CLEAR_MASK 0x02000000 +#define A6XX_HFI_IRQ_MSGQ_MASK 0x00000001 +#define A6XX_HFI_IRQ_DSGQ_MASK 0x00000002 +#define A6XX_HFI_IRQ_BLOCKED_MSG_MASK 0x00000004 +#define A6XX_HFI_IRQ_CM3_FAULT_MASK 0x00800000 +#define A6XX_HFI_IRQ_GMU_ERR_MASK__MASK 0x007f0000 +#define A6XX_HFI_IRQ_GMU_ERR_MASK__SHIFT 16 +static inline uint32_t A6XX_HFI_IRQ_GMU_ERR_MASK(uint32_t val) +{ + return ((val) << A6XX_HFI_IRQ_GMU_ERR_MASK__SHIFT) & A6XX_HFI_IRQ_GMU_ERR_MASK__MASK; +} +#define A6XX_HFI_IRQ_OOB_MASK__MASK 0xff000000 +#define A6XX_HFI_IRQ_OOB_MASK__SHIFT 24 +static inline uint32_t A6XX_HFI_IRQ_OOB_MASK(uint32_t val) +{ + return ((val) << A6XX_HFI_IRQ_OOB_MASK__SHIFT) & A6XX_HFI_IRQ_OOB_MASK__MASK; +} +#define A6XX_HFI_H2F_IRQ_MASK_BIT 0x00000001 +#define REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL 0x00000080 + +#define REG_A6XX_GMU_GX_SPTPRAC_POWER_CONTROL 0x00000081 + +#define REG_A6XX_GMU_CM3_ITCM_START 0x00000c00 + +#define REG_A6XX_GMU_CM3_DTCM_START 0x00001c00 + +#define REG_A6XX_GMU_NMI_CONTROL_STATUS 0x000023f0 + +#define REG_A6XX_GMU_BOOT_SLUMBER_OPTION 0x000023f8 + +#define REG_A6XX_GMU_GX_VOTE_IDX 0x000023f9 + +#define REG_A6XX_GMU_MX_VOTE_IDX 0x000023fa + +#define REG_A6XX_GMU_DCVS_ACK_OPTION 0x000023fc + +#define REG_A6XX_GMU_DCVS_PERF_SETTING 0x000023fd + +#define REG_A6XX_GMU_DCVS_BW_SETTING 0x000023fe + +#define REG_A6XX_GMU_DCVS_RETURN 0x000023ff + +#define REG_A6XX_GMU_SYS_BUS_CONFIG 0x00004c0f + +#define REG_A6XX_GMU_CM3_SYSRESET 0x00005000 + +#define REG_A6XX_GMU_CM3_BOOT_CONFIG 0x00005001 + +#define REG_A6XX_GMU_CM3_FW_BUSY 0x0000501a + +#define REG_A6XX_GMU_CM3_FW_INIT_RESULT 0x0000501c + +#define REG_A6XX_GMU_CM3_CFG 0x0000502d + +#define REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE 0x00005040 + +#define REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0 0x00005041 + +#define REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1 0x00005042 + +#define REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L 0x00005044 + +#define REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H 0x00005045 + +#define REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_L 0x00005046 + +#define REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_1_H 0x00005047 + +#define REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_L 0x00005048 + +#define REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_2_H 0x00005049 + +#define REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_L 0x0000504a + +#define REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_3_H 0x0000504b + +#define REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_L 0x0000504c + +#define REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_4_H 0x0000504d + +#define REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_5_L 0x0000504e + +#define REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_5_H 0x0000504f + +#define REG_A6XX_GMU_PWR_COL_INTER_FRAME_CTRL 0x000050c0 +#define A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_IFPC_ENABLE 0x00000001 +#define A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_HM_POWER_COLLAPSE_ENABLE 0x00000002 +#define A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_SPTPRAC_POWER_CONTROL_ENABLE 0x00000004 +#define A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_NUM_PASS_SKIPS__MASK 0x00003c00 +#define A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_NUM_PASS_SKIPS__SHIFT 10 +static inline uint32_t A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_NUM_PASS_SKIPS(uint32_t val) +{ + return ((val) << A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_NUM_PASS_SKIPS__SHIFT) & A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_NUM_PASS_SKIPS__MASK; +} +#define A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_MIN_PASS_LENGTH__MASK 0xffffc000 +#define A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_MIN_PASS_LENGTH__SHIFT 14 +static inline uint32_t A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_MIN_PASS_LENGTH(uint32_t val) +{ + return ((val) << A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_MIN_PASS_LENGTH__SHIFT) & A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_MIN_PASS_LENGTH__MASK; +} + +#define REG_A6XX_GMU_PWR_COL_INTER_FRAME_HYST 0x000050c1 + +#define REG_A6XX_GMU_PWR_COL_SPTPRAC_HYST 0x000050c2 + +#define REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS 0x000050d0 +#define A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_SPTPRAC_GDSC_POWERING_OFF 0x00000001 +#define A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_SPTPRAC_GDSC_POWERING_ON 0x00000002 +#define A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_SPTPRAC_GDSC_POWER_ON 0x00000004 +#define A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_SPTPRAC_GDSC_POWER_OFF 0x00000008 +#define A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_SP_CLOCK_OFF 0x00000010 +#define A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GMU_UP_POWER_STATE 0x00000020 +#define A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_GDSC_POWER_OFF 0x00000040 +#define A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF 0x00000080 + +#define REG_A6XX_GMU_GPU_NAP_CTRL 0x000050e4 +#define A6XX_GMU_GPU_NAP_CTRL_HW_NAP_ENABLE 0x00000001 +#define A6XX_GMU_GPU_NAP_CTRL_SID__MASK 0x000001f0 +#define A6XX_GMU_GPU_NAP_CTRL_SID__SHIFT 4 +static inline uint32_t A6XX_GMU_GPU_NAP_CTRL_SID(uint32_t val) +{ + return ((val) << A6XX_GMU_GPU_NAP_CTRL_SID__SHIFT) & A6XX_GMU_GPU_NAP_CTRL_SID__MASK; +} + +#define REG_A6XX_GMU_RPMH_CTRL 0x000050e8 +#define A6XX_GMU_RPMH_CTRL_RPMH_INTERFACE_ENABLE 0x00000001 +#define A6XX_GMU_RPMH_CTRL_LLC_VOTE_ENABLE 0x00000010 +#define A6XX_GMU_RPMH_CTRL_DDR_VOTE_ENABLE 0x00000100 +#define A6XX_GMU_RPMH_CTRL_MX_VOTE_ENABLE 0x00000200 +#define A6XX_GMU_RPMH_CTRL_CX_VOTE_ENABLE 0x00000400 +#define A6XX_GMU_RPMH_CTRL_GFX_VOTE_ENABLE 0x00000800 +#define A6XX_GMU_RPMH_CTRL_DDR_MIN_VOTE_ENABLE 0x00001000 +#define A6XX_GMU_RPMH_CTRL_MX_MIN_VOTE_ENABLE 0x00002000 +#define A6XX_GMU_RPMH_CTRL_CX_MIN_VOTE_ENABLE 0x00004000 +#define A6XX_GMU_RPMH_CTRL_GFX_MIN_VOTE_ENABLE 0x00008000 + +#define REG_A6XX_GMU_RPMH_HYST_CTRL 0x000050e9 + +#define REG_A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE 0x000050ec + +#define REG_A6XX_GMU_BOOT_KMD_LM_HANDSHAKE 0x000051f0 + +#define REG_A6XX_GMU_LLM_GLM_SLEEP_CTRL 0x00005157 + +#define REG_A6XX_GMU_LLM_GLM_SLEEP_STATUS 0x00005158 + +#define REG_A6XX_GMU_ALWAYS_ON_COUNTER_L 0x00005088 + +#define REG_A6XX_GMU_ALWAYS_ON_COUNTER_H 0x00005089 + +#define REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE 0x000050c3 + +#define REG_A6XX_GMU_HFI_CTRL_STATUS 0x00005180 + +#define REG_A6XX_GMU_HFI_VERSION_INFO 0x00005181 + +#define REG_A6XX_GMU_HFI_SFR_ADDR 0x00005182 + +#define REG_A6XX_GMU_HFI_MMAP_ADDR 0x00005183 + +#define REG_A6XX_GMU_HFI_QTBL_INFO 0x00005184 + +#define REG_A6XX_GMU_HFI_QTBL_ADDR 0x00005185 + +#define REG_A6XX_GMU_HFI_CTRL_INIT 0x00005186 + +#define REG_A6XX_GMU_GMU2HOST_INTR_SET 0x00005190 + +#define REG_A6XX_GMU_GMU2HOST_INTR_CLR 0x00005191 + +#define REG_A6XX_GMU_GMU2HOST_INTR_INFO 0x00005192 +#define A6XX_GMU_GMU2HOST_INTR_INFO_MSGQ 0x00000001 +#define A6XX_GMU_GMU2HOST_INTR_INFO_CM3_FAULT 0x00800000 + +#define REG_A6XX_GMU_GMU2HOST_INTR_MASK 0x00005193 + +#define REG_A6XX_GMU_HOST2GMU_INTR_SET 0x00005194 + +#define REG_A6XX_GMU_HOST2GMU_INTR_CLR 0x00005195 + +#define REG_A6XX_GMU_HOST2GMU_INTR_RAW_INFO 0x00005196 + +#define REG_A6XX_GMU_HOST2GMU_INTR_EN_0 0x00005197 + +#define REG_A6XX_GMU_HOST2GMU_INTR_EN_1 0x00005198 + +#define REG_A6XX_GMU_HOST2GMU_INTR_EN_2 0x00005199 + +#define REG_A6XX_GMU_HOST2GMU_INTR_EN_3 0x0000519a + +#define REG_A6XX_GMU_HOST2GMU_INTR_INFO_0 0x0000519b + +#define REG_A6XX_GMU_HOST2GMU_INTR_INFO_1 0x0000519c + +#define REG_A6XX_GMU_HOST2GMU_INTR_INFO_2 0x0000519d + +#define REG_A6XX_GMU_HOST2GMU_INTR_INFO_3 0x0000519e + +#define REG_A6XX_GMU_GENERAL_1 0x000051c6 + +#define REG_A6XX_GMU_GENERAL_7 0x000051cc + +#define REG_A6XX_GMU_ISENSE_CTRL 0x0000515d + +#define REG_A6XX_GPU_CS_ENABLE_REG 0x00008920 + +#define REG_A6XX_GPU_GMU_CX_GMU_ISENSE_CTRL 0x0000515d + +#define REG_A6XX_GPU_CS_AMP_CALIBRATION_CONTROL3 0x00008578 + +#define REG_A6XX_GPU_CS_AMP_CALIBRATION_CONTROL2 0x00008558 + +#define REG_A6XX_GPU_CS_A_SENSOR_CTRL_0 0x00008580 + +#define REG_A6XX_GPU_CS_A_SENSOR_CTRL_2 0x00027ada + +#define REG_A6XX_GPU_CS_SENSOR_GENERAL_STATUS 0x0000881a + +#define REG_A6XX_GPU_CS_AMP_CALIBRATION_CONTROL1 0x00008957 + +#define REG_A6XX_GPU_CS_SENSOR_GENERAL_STATUS 0x0000881a + +#define REG_A6XX_GPU_CS_AMP_CALIBRATION_STATUS1_0 0x0000881d + +#define REG_A6XX_GPU_CS_AMP_CALIBRATION_STATUS1_2 0x0000881f + +#define REG_A6XX_GPU_CS_AMP_CALIBRATION_STATUS1_4 0x00008821 + +#define REG_A6XX_GPU_CS_AMP_CALIBRATION_DONE 0x00008965 + +#define REG_A6XX_GPU_CS_AMP_PERIOD_CTRL 0x0000896d + +#define REG_A6XX_GPU_CS_AMP_CALIBRATION_DONE 0x00008965 + +#define REG_A6XX_GPU_GMU_CX_GMU_PWR_THRESHOLD 0x0000514d + +#define REG_A6XX_GMU_AO_INTERRUPT_EN 0x00009303 + +#define REG_A6XX_GMU_AO_HOST_INTERRUPT_CLR 0x00009304 + +#define REG_A6XX_GMU_AO_HOST_INTERRUPT_STATUS 0x00009305 +#define A6XX_GMU_AO_HOST_INTERRUPT_STATUS_WDOG_BITE 0x00000001 +#define A6XX_GMU_AO_HOST_INTERRUPT_STATUS_RSCC_COMP 0x00000002 +#define A6XX_GMU_AO_HOST_INTERRUPT_STATUS_VDROOP 0x00000004 +#define A6XX_GMU_AO_HOST_INTERRUPT_STATUS_FENCE_ERR 0x00000008 +#define A6XX_GMU_AO_HOST_INTERRUPT_STATUS_DBD_WAKEUP 0x00000010 +#define A6XX_GMU_AO_HOST_INTERRUPT_STATUS_HOST_AHB_BUS_ERROR 0x00000020 + +#define REG_A6XX_GMU_AO_HOST_INTERRUPT_MASK 0x00009306 + +#define REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL 0x00009309 + +#define REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL 0x0000930a + +#define REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL 0x0000930b + +#define REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS 0x0000930c +#define A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS_GPUBUSYIGNAHB 0x00800000 + +#define REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS2 0x0000930d + +#define REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK 0x0000930e + +#define REG_A6XX_GMU_AO_AHB_FENCE_CTRL 0x00009310 + +#define REG_A6XX_GMU_AHB_FENCE_STATUS 0x00009313 + +#define REG_A6XX_GMU_RBBM_INT_UNMASKED_STATUS 0x00009315 + +#define REG_A6XX_GMU_AO_SPARE_CNTL 0x00009316 + +#define REG_A6XX_GPU_RSCC_RSC_STATUS0_DRV0 0x00008c04 + +#define REG_A6XX_GMU_RSCC_CONTROL_REQ 0x00009307 + +#define REG_A6XX_GMU_RSCC_CONTROL_ACK 0x00009308 + +#define REG_A6XX_GMU_AHB_FENCE_RANGE_0 0x00009311 + +#define REG_A6XX_GMU_AHB_FENCE_RANGE_1 0x00009312 + +#define REG_A6XX_GPU_CC_GX_GDSCR 0x00009c03 + +#define REG_A6XX_GPU_CC_GX_DOMAIN_MISC 0x00009d42 + +#define REG_A6XX_RSCC_PDC_SEQ_START_ADDR 0x00008c08 + +#define REG_A6XX_RSCC_PDC_MATCH_VALUE_LO 0x00008c09 + +#define REG_A6XX_RSCC_PDC_MATCH_VALUE_HI 0x00008c0a + +#define REG_A6XX_RSCC_PDC_SLAVE_ID_DRV0 0x00008c0b + +#define REG_A6XX_RSCC_HIDDEN_TCS_CMD0_ADDR 0x00008c0d + +#define REG_A6XX_RSCC_HIDDEN_TCS_CMD0_DATA 0x00008c0e + +#define REG_A6XX_RSCC_TIMESTAMP_UNIT0_TIMESTAMP_L_DRV0 0x00008c82 + +#define REG_A6XX_RSCC_TIMESTAMP_UNIT0_TIMESTAMP_H_DRV0 0x00008c83 + +#define REG_A6XX_RSCC_TIMESTAMP_UNIT1_EN_DRV0 0x00008c89 + +#define REG_A6XX_RSCC_TIMESTAMP_UNIT1_OUTPUT_DRV0 0x00008c8c + +#define REG_A6XX_RSCC_OVERRIDE_START_ADDR 0x00008d00 + +#define REG_A6XX_RSCC_SEQ_BUSY_DRV0 0x00008d01 + +#define REG_A6XX_RSCC_SEQ_MEM_0_DRV0 0x00008d80 + +#define REG_A6XX_RSCC_TCS0_DRV0_STATUS 0x00008f46 + +#define REG_A6XX_RSCC_TCS1_DRV0_STATUS 0x000090ae + +#define REG_A6XX_RSCC_TCS2_DRV0_STATUS 0x00009216 + +#define REG_A6XX_RSCC_TCS3_DRV0_STATUS 0x0000937e + + +#endif /* A6XX_GMU_XML */ diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c new file mode 100644 index 000000000000..c629f742a1d1 --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -0,0 +1,818 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2017-2018 The Linux Foundation. All rights reserved. */ + + +#include "msm_gem.h" +#include "msm_mmu.h" +#include "a6xx_gpu.h" +#include "a6xx_gmu.xml.h" + +static inline bool _a6xx_check_idle(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + /* Check that the GMU is idle */ + if (!a6xx_gmu_isidle(&a6xx_gpu->gmu)) + return false; + + /* Check tha the CX master is idle */ + if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) & + ~A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER) + return false; + + return !(gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS) & + A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT); +} + +bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + /* wait for CP to drain ringbuffer: */ + if (!adreno_idle(gpu, ring)) + return false; + + if (spin_until(_a6xx_check_idle(gpu))) { + DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", + gpu->name, __builtin_return_address(0), + gpu_read(gpu, REG_A6XX_RBBM_STATUS), + gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS), + gpu_read(gpu, REG_A6XX_CP_RB_RPTR), + gpu_read(gpu, REG_A6XX_CP_RB_WPTR)); + return false; + } + + return true; +} + +static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + uint32_t wptr; + unsigned long flags; + + spin_lock_irqsave(&ring->lock, flags); + + /* Copy the shadow to the actual register */ + ring->cur = ring->next; + + /* Make sure to wrap wptr if we need to */ + wptr = get_wptr(ring); + + spin_unlock_irqrestore(&ring->lock, flags); + + /* Make sure everything is posted before making a decision */ + mb(); + + gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); +} + +static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, + struct msm_file_private *ctx) +{ + struct msm_drm_private *priv = gpu->dev->dev_private; + struct msm_ringbuffer *ring = submit->ring; + unsigned int i; + + /* Invalidate CCU depth and color */ + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, PC_CCU_INVALIDATE_DEPTH); + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, PC_CCU_INVALIDATE_COLOR); + + /* Submit the commands */ + for (i = 0; i < submit->nr_cmds; i++) { + switch (submit->cmd[i].type) { + case MSM_SUBMIT_CMD_IB_TARGET_BUF: + break; + case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: + if (priv->lastctx == ctx) + break; + case MSM_SUBMIT_CMD_BUF: + OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); + OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); + OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); + OUT_RING(ring, submit->cmd[i].size); + break; + } + } + + /* Write the fence to the scratch register */ + OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1); + OUT_RING(ring, submit->seqno); + + /* + * Execute a CACHE_FLUSH_TS event. This will ensure that the + * timestamp is written to the memory and then triggers the interrupt + */ + OUT_PKT7(ring, CP_EVENT_WRITE, 4); + OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31)); + OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); + OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); + OUT_RING(ring, submit->seqno); + + a6xx_flush(gpu, ring); +} + +static const struct { + u32 offset; + u32 value; +} a6xx_hwcg[] = { + {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL_SP1, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL_SP2, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL_SP3, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02022220}, + {REG_A6XX_RBBM_CLOCK_CNTL2_SP1, 0x02022220}, + {REG_A6XX_RBBM_CLOCK_CNTL2_SP2, 0x02022220}, + {REG_A6XX_RBBM_CLOCK_CNTL2_SP3, 0x02022220}, + {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {REG_A6XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, + {REG_A6XX_RBBM_CLOCK_DELAY_SP2, 0x00000080}, + {REG_A6XX_RBBM_CLOCK_DELAY_SP3, 0x00000080}, + {REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000f3cf}, + {REG_A6XX_RBBM_CLOCK_HYST_SP1, 0x0000f3cf}, + {REG_A6XX_RBBM_CLOCK_HYST_SP2, 0x0000f3cf}, + {REG_A6XX_RBBM_CLOCK_HYST_SP3, 0x0000f3cf}, + {REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222}, + {REG_A6XX_RBBM_CLOCK_CNTL_TP1, 0x02222222}, + {REG_A6XX_RBBM_CLOCK_CNTL_TP2, 0x02222222}, + {REG_A6XX_RBBM_CLOCK_CNTL_TP3, 0x02222222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL3_TP1, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL3_TP2, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL3_TP3, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, + {REG_A6XX_RBBM_CLOCK_CNTL4_TP1, 0x00022222}, + {REG_A6XX_RBBM_CLOCK_CNTL4_TP2, 0x00022222}, + {REG_A6XX_RBBM_CLOCK_CNTL4_TP3, 0x00022222}, + {REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST_TP2, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST_TP3, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST2_TP2, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST2_TP3, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST3_TP1, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST3_TP2, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST3_TP3, 0x77777777}, + {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, + {REG_A6XX_RBBM_CLOCK_HYST4_TP1, 0x00077777}, + {REG_A6XX_RBBM_CLOCK_HYST4_TP2, 0x00077777}, + {REG_A6XX_RBBM_CLOCK_HYST4_TP3, 0x00077777}, + {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY_TP2, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY_TP3, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY3_TP1, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY3_TP2, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY3_TP3, 0x11111111}, + {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, + {REG_A6XX_RBBM_CLOCK_DELAY4_TP1, 0x00011111}, + {REG_A6XX_RBBM_CLOCK_DELAY4_TP2, 0x00011111}, + {REG_A6XX_RBBM_CLOCK_DELAY4_TP3, 0x00011111}, + {REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, + {REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, + {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL_RB2, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL_RB3, 0x22222222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x00002222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_RB1, 0x00002222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_RB2, 0x00002222}, + {REG_A6XX_RBBM_CLOCK_CNTL2_RB3, 0x00002222}, + {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, + {REG_A6XX_RBBM_CLOCK_CNTL_CCU1, 0x00002220}, + {REG_A6XX_RBBM_CLOCK_CNTL_CCU2, 0x00002220}, + {REG_A6XX_RBBM_CLOCK_CNTL_CCU3, 0x00002220}, + {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040f00}, + {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU1, 0x00040f00}, + {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU2, 0x00040f00}, + {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU3, 0x00040f00}, + {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05022022}, + {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, + {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, + {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, + {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222}, + {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, + {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, + {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, + {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, + {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, + {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555} +}; + +static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + unsigned int i; + u32 val; + + val = gpu_read(gpu, REG_A6XX_RBBM_CLOCK_CNTL); + + /* Don't re-program the registers if they are already correct */ + if ((!state && !val) || (state && (val == 0x8aa8aa02))) + return; + + /* Disable SP clock before programming HWCG registers */ + gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0); + + for (i = 0; i < ARRAY_SIZE(a6xx_hwcg); i++) + gpu_write(gpu, a6xx_hwcg[i].offset, + state ? a6xx_hwcg[i].value : 0); + + /* Enable SP clock */ + gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1); + + gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? 0x8aa8aa02 : 0); +} + +static int a6xx_cp_init(struct msm_gpu *gpu) +{ + struct msm_ringbuffer *ring = gpu->rb[0]; + + OUT_PKT7(ring, CP_ME_INIT, 8); + + OUT_RING(ring, 0x0000002f); + + /* Enable multiple hardware contexts */ + OUT_RING(ring, 0x00000003); + + /* Enable error detection */ + OUT_RING(ring, 0x20000000); + + /* Don't enable header dump */ + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + /* No workarounds enabled */ + OUT_RING(ring, 0x00000000); + + /* Pad rest of the cmds with 0's */ + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + a6xx_flush(gpu, ring); + return a6xx_idle(gpu, ring) ? 0 : -EINVAL; +} + +static int a6xx_ucode_init(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + if (!a6xx_gpu->sqe_bo) { + a6xx_gpu->sqe_bo = adreno_fw_create_bo(gpu, + adreno_gpu->fw[ADRENO_FW_SQE], &a6xx_gpu->sqe_iova); + + if (IS_ERR(a6xx_gpu->sqe_bo)) { + int ret = PTR_ERR(a6xx_gpu->sqe_bo); + + a6xx_gpu->sqe_bo = NULL; + DRM_DEV_ERROR(&gpu->pdev->dev, + "Could not allocate SQE ucode: %d\n", ret); + + return ret; + } + } + + gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE_LO, + REG_A6XX_CP_SQE_INSTR_BASE_HI, a6xx_gpu->sqe_iova); + + return 0; +} + +#define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \ + A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \ + A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ + A6XX_RBBM_INT_0_MASK_CP_IB2 | \ + A6XX_RBBM_INT_0_MASK_CP_IB1 | \ + A6XX_RBBM_INT_0_MASK_CP_RB | \ + A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ + A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \ + A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ + A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ + A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR) + +static int a6xx_hw_init(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + int ret; + + /* Make sure the GMU keeps the GPU on while we set it up */ + a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); + + gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0); + + /* + * Disable the trusted memory range - we don't actually supported secure + * memory rendering at this point in time and we don't want to block off + * part of the virtual memory space. + */ + gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, + REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000); + gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); + + /* enable hardware clockgating */ + a6xx_set_hwcg(gpu, true); + + /* VBIF start */ + gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009); + gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3); + + /* Make all blocks contribute to the GPU BUSY perf counter */ + gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff); + + /* Disable L2 bypass in the UCHE */ + gpu_write(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX_LO, 0xffffffc0); + gpu_write(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX_HI, 0x0001ffff); + gpu_write(gpu, REG_A6XX_UCHE_TRAP_BASE_LO, 0xfffff000); + gpu_write(gpu, REG_A6XX_UCHE_TRAP_BASE_HI, 0x0001ffff); + gpu_write(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE_LO, 0xfffff000); + gpu_write(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE_HI, 0x0001ffff); + + /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */ + gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO, + REG_A6XX_UCHE_GMEM_RANGE_MIN_HI, 0x00100000); + + gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX_LO, + REG_A6XX_UCHE_GMEM_RANGE_MAX_HI, + 0x00100000 + adreno_gpu->gmem - 1); + + gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804); + gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4); + + gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0); + gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c); + + /* Setting the mem pool size */ + gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128); + + /* Setting the primFifo thresholds default values */ + gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, (0x300 << 11)); + + /* Set the AHB default slave response to "ERROR" */ + gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1); + + /* Turn on performance counters */ + gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1); + + /* Select CP0 to always count cycles */ + gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT); + + /* FIXME: not sure if this should live here or in a6xx_gmu.c */ + gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, + 0xff000000); + gmu_rmw(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0, + 0xff, 0x20); + gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, + 0x01); + + gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, 2 << 1); + gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, 2 << 1); + gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, 2 << 1); + gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, 2 << 21); + + /* Enable fault detection */ + gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, + (1 << 30) | 0x1fffff); + + gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, 1); + + /* Protect registers from the CP */ + gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL, 0x00000003); + + gpu_write(gpu, REG_A6XX_CP_PROTECT(0), + A6XX_PROTECT_RDONLY(0x600, 0x51)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(1), A6XX_PROTECT_RW(0xae50, 0x2)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(2), A6XX_PROTECT_RW(0x9624, 0x13)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(3), A6XX_PROTECT_RW(0x8630, 0x8)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(4), A6XX_PROTECT_RW(0x9e70, 0x1)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(5), A6XX_PROTECT_RW(0x9e78, 0x187)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(6), A6XX_PROTECT_RW(0xf000, 0x810)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(7), + A6XX_PROTECT_RDONLY(0xfc00, 0x3)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(8), A6XX_PROTECT_RW(0x50e, 0x0)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(9), A6XX_PROTECT_RDONLY(0x50f, 0x0)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(10), A6XX_PROTECT_RW(0x510, 0x0)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(11), + A6XX_PROTECT_RDONLY(0x0, 0x4f9)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(12), + A6XX_PROTECT_RDONLY(0x501, 0xa)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(13), + A6XX_PROTECT_RDONLY(0x511, 0x44)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(14), A6XX_PROTECT_RW(0xe00, 0xe)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(15), A6XX_PROTECT_RW(0x8e00, 0x0)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(16), A6XX_PROTECT_RW(0x8e50, 0xf)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(17), A6XX_PROTECT_RW(0xbe02, 0x0)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(18), + A6XX_PROTECT_RW(0xbe20, 0x11f3)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(19), A6XX_PROTECT_RW(0x800, 0x82)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(20), A6XX_PROTECT_RW(0x8a0, 0x8)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(21), A6XX_PROTECT_RW(0x8ab, 0x19)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(22), A6XX_PROTECT_RW(0x900, 0x4d)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(23), A6XX_PROTECT_RW(0x98d, 0x76)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(24), + A6XX_PROTECT_RDONLY(0x8d0, 0x23)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(25), + A6XX_PROTECT_RDONLY(0x980, 0x4)); + gpu_write(gpu, REG_A6XX_CP_PROTECT(26), A6XX_PROTECT_RW(0xa630, 0x0)); + + /* Enable interrupts */ + gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, A6XX_INT_MASK); + + ret = adreno_hw_init(gpu); + if (ret) + goto out; + + ret = a6xx_ucode_init(gpu); + if (ret) + goto out; + + /* Always come up on rb 0 */ + a6xx_gpu->cur_ring = gpu->rb[0]; + + /* Enable the SQE_to start the CP engine */ + gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1); + + ret = a6xx_cp_init(gpu); + if (ret) + goto out; + + gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0); + +out: + /* + * Tell the GMU that we are done touching the GPU and it can start power + * management + */ + a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); + + /* Take the GMU out of its special boot mode */ + a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER); + + return ret; +} + +static void a6xx_dump(struct msm_gpu *gpu) +{ + dev_info(&gpu->pdev->dev, "status: %08x\n", + gpu_read(gpu, REG_A6XX_RBBM_STATUS)); + adreno_dump(gpu); +} + +#define VBIF_RESET_ACK_TIMEOUT 100 +#define VBIF_RESET_ACK_MASK 0x00f0 + +static void a6xx_recover(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + int i; + + adreno_dump_info(gpu); + + for (i = 0; i < 8; i++) + dev_info(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i, + gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i))); + + if (hang_debug) + a6xx_dump(gpu); + + /* + * Turn off keep alive that might have been enabled by the hang + * interrupt + */ + gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 0); + + gpu->funcs->pm_suspend(gpu); + gpu->funcs->pm_resume(gpu); + + msm_gpu_hw_init(gpu); +} + +static int a6xx_fault_handler(void *arg, unsigned long iova, int flags) +{ + struct msm_gpu *gpu = arg; + + pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n", + iova, flags, + gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)), + gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)), + gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)), + gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7))); + + return -EFAULT; +} + +static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu) +{ + u32 status = gpu_read(gpu, REG_A6XX_CP_INTERRUPT_STATUS); + + if (status & A6XX_CP_INT_CP_OPCODE_ERROR) { + u32 val; + + gpu_write(gpu, REG_A6XX_CP_SQE_STAT_ADDR, 1); + val = gpu_read(gpu, REG_A6XX_CP_SQE_STAT_DATA); + dev_err_ratelimited(&gpu->pdev->dev, + "CP | opcode error | possible opcode=0x%8.8X\n", + val); + } + + if (status & A6XX_CP_INT_CP_UCODE_ERROR) + dev_err_ratelimited(&gpu->pdev->dev, + "CP ucode error interrupt\n"); + + if (status & A6XX_CP_INT_CP_HW_FAULT_ERROR) + dev_err_ratelimited(&gpu->pdev->dev, "CP | HW fault | status=0x%8.8X\n", + gpu_read(gpu, REG_A6XX_CP_HW_FAULT)); + + if (status & A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) { + u32 val = gpu_read(gpu, REG_A6XX_CP_PROTECT_STATUS); + + dev_err_ratelimited(&gpu->pdev->dev, + "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n", + val & (1 << 20) ? "READ" : "WRITE", + (val & 0x3ffff), val); + } + + if (status & A6XX_CP_INT_CP_AHB_ERROR) + dev_err_ratelimited(&gpu->pdev->dev, "CP AHB error interrupt\n"); + + if (status & A6XX_CP_INT_CP_VSD_PARITY_ERROR) + dev_err_ratelimited(&gpu->pdev->dev, "CP VSD decoder parity error\n"); + + if (status & A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR) + dev_err_ratelimited(&gpu->pdev->dev, "CP illegal instruction error\n"); + +} + +static void a6xx_fault_detect_irq(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct drm_device *dev = gpu->dev; + struct msm_drm_private *priv = dev->dev_private; + struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); + + /* + * Force the GPU to stay on until after we finish + * collecting information + */ + gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1); + + DRM_DEV_ERROR(&gpu->pdev->dev, + "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", + ring ? ring->id : -1, ring ? ring->seqno : 0, + gpu_read(gpu, REG_A6XX_RBBM_STATUS), + gpu_read(gpu, REG_A6XX_CP_RB_RPTR), + gpu_read(gpu, REG_A6XX_CP_RB_WPTR), + gpu_read64(gpu, REG_A6XX_CP_IB1_BASE, REG_A6XX_CP_IB1_BASE_HI), + gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), + gpu_read64(gpu, REG_A6XX_CP_IB2_BASE, REG_A6XX_CP_IB2_BASE_HI), + gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE)); + + /* Turn off the hangcheck timer to keep it from bothering us */ + del_timer(&gpu->hangcheck_timer); + + queue_work(priv->wq, &gpu->recover_work); +} + +static irqreturn_t a6xx_irq(struct msm_gpu *gpu) +{ + u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS); + + gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status); + + if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT) + a6xx_fault_detect_irq(gpu); + + if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR) + dev_err_ratelimited(&gpu->pdev->dev, "CP | AHB bus error\n"); + + if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR) + a6xx_cp_hw_err_irq(gpu); + + if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW) + dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n"); + + if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW) + dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n"); + + if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) + dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); + + if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) + msm_gpu_retire(gpu); + + return IRQ_HANDLED; +} + +static const u32 a6xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { + REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A6XX_CP_RB_BASE), + REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A6XX_CP_RB_BASE_HI), + REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, + REG_A6XX_CP_RB_RPTR_ADDR_LO), + REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI, + REG_A6XX_CP_RB_RPTR_ADDR_HI), + REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A6XX_CP_RB_RPTR), + REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A6XX_CP_RB_WPTR), + REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A6XX_CP_RB_CNTL), +}; + +static const u32 a6xx_registers[] = { + 0x0000, 0x0002, 0x0010, 0x0010, 0x0012, 0x0012, 0x0018, 0x001b, + 0x001e, 0x0032, 0x0038, 0x003c, 0x0042, 0x0042, 0x0044, 0x0044, + 0x0047, 0x0047, 0x0056, 0x0056, 0x00ad, 0x00ae, 0x00b0, 0x00fb, + 0x0100, 0x011d, 0x0200, 0x020d, 0x0210, 0x0213, 0x0218, 0x023d, + 0x0400, 0x04f9, 0x0500, 0x0500, 0x0505, 0x050b, 0x050e, 0x0511, + 0x0533, 0x0533, 0x0540, 0x0555, 0x0800, 0x0808, 0x0810, 0x0813, + 0x0820, 0x0821, 0x0823, 0x0827, 0x0830, 0x0833, 0x0840, 0x0843, + 0x084f, 0x086f, 0x0880, 0x088a, 0x08a0, 0x08ab, 0x08c0, 0x08c4, + 0x08d0, 0x08dd, 0x08f0, 0x08f3, 0x0900, 0x0903, 0x0908, 0x0911, + 0x0928, 0x093e, 0x0942, 0x094d, 0x0980, 0x0984, 0x098d, 0x0996, + 0x0998, 0x099e, 0x09a0, 0x09a6, 0x09a8, 0x09ae, 0x09b0, 0x09b1, + 0x09c2, 0x09c8, 0x0a00, 0x0a03, 0x0c00, 0x0c04, 0x0c06, 0x0c06, + 0x0c10, 0x0cd9, 0x0e00, 0x0e0e, 0x0e10, 0x0e13, 0x0e17, 0x0e19, + 0x0e1c, 0x0e2b, 0x0e30, 0x0e32, 0x0e38, 0x0e39, 0x8600, 0x8601, + 0x8610, 0x861b, 0x8620, 0x8620, 0x8628, 0x862b, 0x8630, 0x8637, + 0x8e01, 0x8e01, 0x8e04, 0x8e05, 0x8e07, 0x8e08, 0x8e0c, 0x8e0c, + 0x8e10, 0x8e1c, 0x8e20, 0x8e25, 0x8e28, 0x8e28, 0x8e2c, 0x8e2f, + 0x8e3b, 0x8e3e, 0x8e40, 0x8e43, 0x8e50, 0x8e5e, 0x8e70, 0x8e77, + 0x9600, 0x9604, 0x9624, 0x9637, 0x9e00, 0x9e01, 0x9e03, 0x9e0e, + 0x9e11, 0x9e16, 0x9e19, 0x9e19, 0x9e1c, 0x9e1c, 0x9e20, 0x9e23, + 0x9e30, 0x9e31, 0x9e34, 0x9e34, 0x9e70, 0x9e72, 0x9e78, 0x9e79, + 0x9e80, 0x9fff, 0xa600, 0xa601, 0xa603, 0xa603, 0xa60a, 0xa60a, + 0xa610, 0xa617, 0xa630, 0xa630, + ~0 +}; + +static int a6xx_pm_resume(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + int ret; + + ret = a6xx_gmu_resume(a6xx_gpu); + + gpu->needs_hw_init = true; + + return ret; +} + +static int a6xx_pm_suspend(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + /* + * Make sure the GMU is idle before continuing (because some transitions + * may use VBIF + */ + a6xx_gmu_wait_for_idle(a6xx_gpu); + + /* Clear the VBIF pipe before shutting down */ + /* FIXME: This accesses the GPU - do we need to make sure it is on? */ + gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0xf); + spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) & 0xf) == 0xf); + gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0); + + return a6xx_gmu_stop(a6xx_gpu); +} + +static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + /* Force the GPU power on so we can read this register */ + a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); + + *value = gpu_read64(gpu, REG_A6XX_RBBM_PERFCTR_CP_0_LO, + REG_A6XX_RBBM_PERFCTR_CP_0_HI); + + a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); + return 0; +} + +#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) +static void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, + struct drm_printer *p) +{ + adreno_show(gpu, state, p); +} +#endif + +static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + return a6xx_gpu->cur_ring; +} + +static void a6xx_destroy(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + if (a6xx_gpu->sqe_bo) { + if (a6xx_gpu->sqe_iova) + msm_gem_put_iova(a6xx_gpu->sqe_bo, gpu->aspace); + drm_gem_object_unreference_unlocked(a6xx_gpu->sqe_bo); + } + + a6xx_gmu_remove(a6xx_gpu); + + adreno_gpu_cleanup(adreno_gpu); + kfree(a6xx_gpu); +} + +static const struct adreno_gpu_funcs funcs = { + .base = { + .get_param = adreno_get_param, + .hw_init = a6xx_hw_init, + .pm_suspend = a6xx_pm_suspend, + .pm_resume = a6xx_pm_resume, + .recover = a6xx_recover, + .submit = a6xx_submit, + .flush = a6xx_flush, + .active_ring = a6xx_active_ring, + .irq = a6xx_irq, + .destroy = a6xx_destroy, +#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) + .show = a6xx_show, +#endif + }, + .get_timestamp = a6xx_get_timestamp, +}; + +struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) +{ + struct msm_drm_private *priv = dev->dev_private; + struct platform_device *pdev = priv->gpu_pdev; + struct device_node *node; + struct a6xx_gpu *a6xx_gpu; + struct adreno_gpu *adreno_gpu; + struct msm_gpu *gpu; + int ret; + + a6xx_gpu = kzalloc(sizeof(*a6xx_gpu), GFP_KERNEL); + if (!a6xx_gpu) + return ERR_PTR(-ENOMEM); + + adreno_gpu = &a6xx_gpu->base; + gpu = &adreno_gpu->base; + + adreno_gpu->registers = a6xx_registers; + adreno_gpu->reg_offsets = a6xx_register_offsets; + + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); + if (ret) { + a6xx_destroy(&(a6xx_gpu->base.base)); + return ERR_PTR(ret); + } + + /* Check if there is a GMU phandle and set it up */ + node = of_parse_phandle(pdev->dev.of_node, "gmu", 0); + + /* FIXME: How do we gracefully handle this? */ + BUG_ON(!node); + + ret = a6xx_gmu_probe(a6xx_gpu, node); + if (ret) { + a6xx_destroy(&(a6xx_gpu->base.base)); + return ERR_PTR(ret); + } + + if (gpu->aspace) + msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, + a6xx_fault_handler); + + return gpu; +} diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h new file mode 100644 index 000000000000..dd69e5b0e692 --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2017 The Linux Foundation. All rights reserved. */ + +#ifndef __A6XX_GPU_H__ +#define __A6XX_GPU_H__ + + +#include "adreno_gpu.h" +#include "a6xx.xml.h" + +#include "a6xx_gmu.h" + +extern bool hang_debug; + +struct a6xx_gpu { + struct adreno_gpu base; + + struct drm_gem_object *sqe_bo; + uint64_t sqe_iova; + + struct msm_ringbuffer *cur_ring; + + struct a6xx_gmu gmu; +}; + +#define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base) + +/* + * Given a register and a count, return a value to program into + * REG_CP_PROTECT_REG(n) - this will block both reads and writes for _len + * registers starting at _reg. + */ +#define A6XX_PROTECT_RW(_reg, _len) \ + ((1 << 31) | \ + (((_len) & 0x3FFF) << 18) | ((_reg) & 0x3FFFF)) + +/* + * Same as above, but allow reads over the range. For areas of mixed use (such + * as performance counters) this allows us to protect a much larger range with a + * single register + */ +#define A6XX_PROTECT_RDONLY(_reg, _len) \ + ((((_len) & 0x3FFF) << 18) | ((_reg) & 0x3FFFF)) + + +int a6xx_gmu_resume(struct a6xx_gpu *gpu); +int a6xx_gmu_stop(struct a6xx_gpu *gpu); + +int a6xx_gmu_wait_for_idle(struct a6xx_gpu *gpu); + +int a6xx_gmu_reset(struct a6xx_gpu *a6xx_gpu); +bool a6xx_gmu_isidle(struct a6xx_gmu *gmu); + +int a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state); +void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state); + +int a6xx_gmu_probe(struct a6xx_gpu *a6xx_gpu, struct device_node *node); +void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu); + +#endif /* __A6XX_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c new file mode 100644 index 000000000000..f19ef4cb6ea4 --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c @@ -0,0 +1,435 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2017-2018 The Linux Foundation. All rights reserved. */ + +#include <linux/completion.h> +#include <linux/circ_buf.h> +#include <linux/list.h> + +#include "a6xx_gmu.h" +#include "a6xx_gmu.xml.h" + +#define HFI_MSG_ID(val) [val] = #val + +static const char * const a6xx_hfi_msg_id[] = { + HFI_MSG_ID(HFI_H2F_MSG_INIT), + HFI_MSG_ID(HFI_H2F_MSG_FW_VERSION), + HFI_MSG_ID(HFI_H2F_MSG_BW_TABLE), + HFI_MSG_ID(HFI_H2F_MSG_PERF_TABLE), + HFI_MSG_ID(HFI_H2F_MSG_TEST), +}; + +static int a6xx_hfi_queue_read(struct a6xx_hfi_queue *queue, u32 *data, + u32 dwords) +{ + struct a6xx_hfi_queue_header *header = queue->header; + u32 i, hdr, index = header->read_index; + + if (header->read_index == header->write_index) { + header->rx_request = 1; + return 0; + } + + hdr = queue->data[index]; + + /* + * If we are to assume that the GMU firmware is in fact a rational actor + * and is programmed to not send us a larger response than we expect + * then we can also assume that if the header size is unexpectedly large + * that it is due to memory corruption and/or hardware failure. In this + * case the only reasonable course of action is to BUG() to help harden + * the failure. + */ + + BUG_ON(HFI_HEADER_SIZE(hdr) > dwords); + + for (i = 0; i < HFI_HEADER_SIZE(hdr); i++) { + data[i] = queue->data[index]; + index = (index + 1) % header->size; + } + + header->read_index = index; + return HFI_HEADER_SIZE(hdr); +} + +static int a6xx_hfi_queue_write(struct a6xx_gmu *gmu, + struct a6xx_hfi_queue *queue, u32 *data, u32 dwords) +{ + struct a6xx_hfi_queue_header *header = queue->header; + u32 i, space, index = header->write_index; + + spin_lock(&queue->lock); + + space = CIRC_SPACE(header->write_index, header->read_index, + header->size); + if (space < dwords) { + header->dropped++; + spin_unlock(&queue->lock); + return -ENOSPC; + } + + for (i = 0; i < dwords; i++) { + queue->data[index] = data[i]; + index = (index + 1) % header->size; + } + + header->write_index = index; + spin_unlock(&queue->lock); + + gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, 0x01); + return 0; +} + +struct a6xx_hfi_response { + u32 id; + u32 seqnum; + struct list_head node; + struct completion complete; + + u32 error; + u32 payload[16]; +}; + +/* + * Incoming HFI ack messages can come in out of order so we need to store all + * the pending messages on a list until they are handled. + */ +static spinlock_t hfi_ack_lock = __SPIN_LOCK_UNLOCKED(message_lock); +static LIST_HEAD(hfi_ack_list); + +static void a6xx_hfi_handle_ack(struct a6xx_gmu *gmu, + struct a6xx_hfi_msg_response *msg) +{ + struct a6xx_hfi_response *resp; + u32 id, seqnum; + + /* msg->ret_header contains the header of the message being acked */ + id = HFI_HEADER_ID(msg->ret_header); + seqnum = HFI_HEADER_SEQNUM(msg->ret_header); + + spin_lock(&hfi_ack_lock); + list_for_each_entry(resp, &hfi_ack_list, node) { + if (resp->id == id && resp->seqnum == seqnum) { + resp->error = msg->error; + memcpy(resp->payload, msg->payload, + sizeof(resp->payload)); + + complete(&resp->complete); + spin_unlock(&hfi_ack_lock); + return; + } + } + spin_unlock(&hfi_ack_lock); + + dev_err(gmu->dev, "Nobody was waiting for HFI message %d\n", seqnum); +} + +static void a6xx_hfi_handle_error(struct a6xx_gmu *gmu, + struct a6xx_hfi_msg_response *msg) +{ + struct a6xx_hfi_msg_error *error = (struct a6xx_hfi_msg_error *) msg; + + dev_err(gmu->dev, "GMU firmware error %d\n", error->code); +} + +void a6xx_hfi_task(unsigned long data) +{ + struct a6xx_gmu *gmu = (struct a6xx_gmu *) data; + struct a6xx_hfi_queue *queue = &gmu->queues[HFI_RESPONSE_QUEUE]; + struct a6xx_hfi_msg_response resp; + + for (;;) { + u32 id; + int ret = a6xx_hfi_queue_read(queue, (u32 *) &resp, + sizeof(resp) >> 2); + + /* Returns the number of bytes copied or negative on error */ + if (ret <= 0) { + if (ret < 0) + dev_err(gmu->dev, + "Unable to read the HFI message queue\n"); + break; + } + + id = HFI_HEADER_ID(resp.header); + + if (id == HFI_F2H_MSG_ACK) + a6xx_hfi_handle_ack(gmu, &resp); + else if (id == HFI_F2H_MSG_ERROR) + a6xx_hfi_handle_error(gmu, &resp); + } +} + +static int a6xx_hfi_send_msg(struct a6xx_gmu *gmu, int id, + void *data, u32 size, u32 *payload, u32 payload_size) +{ + struct a6xx_hfi_queue *queue = &gmu->queues[HFI_COMMAND_QUEUE]; + struct a6xx_hfi_response resp = { 0 }; + int ret, dwords = size >> 2; + u32 seqnum; + + seqnum = atomic_inc_return(&queue->seqnum) % 0xfff; + + /* First dword of the message is the message header - fill it in */ + *((u32 *) data) = (seqnum << 20) | (HFI_MSG_CMD << 16) | + (dwords << 8) | id; + + init_completion(&resp.complete); + resp.id = id; + resp.seqnum = seqnum; + + spin_lock_bh(&hfi_ack_lock); + list_add_tail(&resp.node, &hfi_ack_list); + spin_unlock_bh(&hfi_ack_lock); + + ret = a6xx_hfi_queue_write(gmu, queue, data, dwords); + if (ret) { + dev_err(gmu->dev, "Unable to send message %s id %d\n", + a6xx_hfi_msg_id[id], seqnum); + goto out; + } + + /* Wait up to 5 seconds for the response */ + ret = wait_for_completion_timeout(&resp.complete, + msecs_to_jiffies(5000)); + if (!ret) { + dev_err(gmu->dev, + "Message %s id %d timed out waiting for response\n", + a6xx_hfi_msg_id[id], seqnum); + ret = -ETIMEDOUT; + } else + ret = 0; + +out: + spin_lock_bh(&hfi_ack_lock); + list_del(&resp.node); + spin_unlock_bh(&hfi_ack_lock); + + if (ret) + return ret; + + if (resp.error) { + dev_err(gmu->dev, "Message %s id %d returned error %d\n", + a6xx_hfi_msg_id[id], seqnum, resp.error); + return -EINVAL; + } + + if (payload && payload_size) { + int copy = min_t(u32, payload_size, sizeof(resp.payload)); + + memcpy(payload, resp.payload, copy); + } + + return 0; +} + +static int a6xx_hfi_send_gmu_init(struct a6xx_gmu *gmu, int boot_state) +{ + struct a6xx_hfi_msg_gmu_init_cmd msg = { 0 }; + + msg.dbg_buffer_addr = (u32) gmu->debug->iova; + msg.dbg_buffer_size = (u32) gmu->debug->size; + msg.boot_state = boot_state; + + return a6xx_hfi_send_msg(gmu, HFI_H2F_MSG_INIT, &msg, sizeof(msg), + NULL, 0); +} + +static int a6xx_hfi_get_fw_version(struct a6xx_gmu *gmu, u32 *version) +{ + struct a6xx_hfi_msg_fw_version msg = { 0 }; + + /* Currently supporting version 1.1 */ + msg.supported_version = (1 << 28) | (1 << 16); + + return a6xx_hfi_send_msg(gmu, HFI_H2F_MSG_FW_VERSION, &msg, sizeof(msg), + version, sizeof(*version)); +} + +static int a6xx_hfi_send_perf_table(struct a6xx_gmu *gmu) +{ + struct a6xx_hfi_msg_perf_table msg = { 0 }; + int i; + + msg.num_gpu_levels = gmu->nr_gpu_freqs; + msg.num_gmu_levels = gmu->nr_gmu_freqs; + + for (i = 0; i < gmu->nr_gpu_freqs; i++) { + msg.gx_votes[i].vote = gmu->gx_arc_votes[i]; + msg.gx_votes[i].freq = gmu->gpu_freqs[i] / 1000; + } + + for (i = 0; i < gmu->nr_gmu_freqs; i++) { + msg.cx_votes[i].vote = gmu->cx_arc_votes[i]; + msg.cx_votes[i].freq = gmu->gmu_freqs[i] / 1000; + } + + return a6xx_hfi_send_msg(gmu, HFI_H2F_MSG_PERF_TABLE, &msg, sizeof(msg), + NULL, 0); +} + +static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu) +{ + struct a6xx_hfi_msg_bw_table msg = { 0 }; + + /* + * The sdm845 GMU doesn't do bus frequency scaling on its own but it + * does need at least one entry in the list because it might be accessed + * when the GMU is shutting down. Send a single "off" entry. + */ + + msg.bw_level_num = 1; + + msg.ddr_cmds_num = 3; + msg.ddr_wait_bitmask = 0x07; + + msg.ddr_cmds_addrs[0] = 0x50000; + msg.ddr_cmds_addrs[1] = 0x5005c; + msg.ddr_cmds_addrs[2] = 0x5000c; + + msg.ddr_cmds_data[0][0] = 0x40000000; + msg.ddr_cmds_data[0][1] = 0x40000000; + msg.ddr_cmds_data[0][2] = 0x40000000; + + /* + * These are the CX (CNOC) votes. This is used but the values for the + * sdm845 GMU are known and fixed so we can hard code them. + */ + + msg.cnoc_cmds_num = 3; + msg.cnoc_wait_bitmask = 0x05; + + msg.cnoc_cmds_addrs[0] = 0x50034; + msg.cnoc_cmds_addrs[1] = 0x5007c; + msg.cnoc_cmds_addrs[2] = 0x5004c; + + msg.cnoc_cmds_data[0][0] = 0x40000000; + msg.cnoc_cmds_data[0][1] = 0x00000000; + msg.cnoc_cmds_data[0][2] = 0x40000000; + + msg.cnoc_cmds_data[1][0] = 0x60000001; + msg.cnoc_cmds_data[1][1] = 0x20000001; + msg.cnoc_cmds_data[1][2] = 0x60000001; + + return a6xx_hfi_send_msg(gmu, HFI_H2F_MSG_BW_TABLE, &msg, sizeof(msg), + NULL, 0); +} + +static int a6xx_hfi_send_test(struct a6xx_gmu *gmu) +{ + struct a6xx_hfi_msg_test msg = { 0 }; + + return a6xx_hfi_send_msg(gmu, HFI_H2F_MSG_TEST, &msg, sizeof(msg), + NULL, 0); +} + +int a6xx_hfi_start(struct a6xx_gmu *gmu, int boot_state) +{ + int ret; + + ret = a6xx_hfi_send_gmu_init(gmu, boot_state); + if (ret) + return ret; + + ret = a6xx_hfi_get_fw_version(gmu, NULL); + if (ret) + return ret; + + /* + * We have to get exchange version numbers per the sequence but at this + * point th kernel driver doesn't need to know the exact version of + * the GMU firmware + */ + + ret = a6xx_hfi_send_perf_table(gmu); + if (ret) + return ret; + + ret = a6xx_hfi_send_bw_table(gmu); + if (ret) + return ret; + + /* + * Let the GMU know that there won't be any more HFI messages until next + * boot + */ + a6xx_hfi_send_test(gmu); + + return 0; +} + +void a6xx_hfi_stop(struct a6xx_gmu *gmu) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) { + struct a6xx_hfi_queue *queue = &gmu->queues[i]; + + if (!queue->header) + continue; + + if (queue->header->read_index != queue->header->write_index) + dev_err(gmu->dev, "HFI queue %d is not empty\n", i); + + queue->header->read_index = 0; + queue->header->write_index = 0; + } +} + +static void a6xx_hfi_queue_init(struct a6xx_hfi_queue *queue, + struct a6xx_hfi_queue_header *header, void *virt, u64 iova, + u32 id) +{ + spin_lock_init(&queue->lock); + queue->header = header; + queue->data = virt; + atomic_set(&queue->seqnum, 0); + + /* Set up the shared memory header */ + header->iova = iova; + header->type = 10 << 8 | id; + header->status = 1; + header->size = SZ_4K >> 2; + header->msg_size = 0; + header->dropped = 0; + header->rx_watermark = 1; + header->tx_watermark = 1; + header->rx_request = 1; + header->tx_request = 0; + header->read_index = 0; + header->write_index = 0; +} + +void a6xx_hfi_init(struct a6xx_gmu *gmu) +{ + struct a6xx_gmu_bo *hfi = gmu->hfi; + struct a6xx_hfi_queue_table_header *table = hfi->virt; + struct a6xx_hfi_queue_header *headers = hfi->virt + sizeof(*table); + u64 offset; + int table_size; + + /* + * The table size is the size of the table header plus all of the queue + * headers + */ + table_size = sizeof(*table); + table_size += (ARRAY_SIZE(gmu->queues) * + sizeof(struct a6xx_hfi_queue_header)); + + table->version = 0; + table->size = table_size; + /* First queue header is located immediately after the table header */ + table->qhdr0_offset = sizeof(*table) >> 2; + table->qhdr_size = sizeof(struct a6xx_hfi_queue_header) >> 2; + table->num_queues = ARRAY_SIZE(gmu->queues); + table->active_queues = ARRAY_SIZE(gmu->queues); + + /* Command queue */ + offset = SZ_4K; + a6xx_hfi_queue_init(&gmu->queues[0], &headers[0], hfi->virt + offset, + hfi->iova + offset, 0); + + /* GMU response queue */ + offset += SZ_4K; + a6xx_hfi_queue_init(&gmu->queues[1], &headers[1], hfi->virt + offset, + hfi->iova + offset, 4); +} diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.h b/drivers/gpu/drm/msm/adreno/a6xx_hfi.h new file mode 100644 index 000000000000..60d1319fa44f --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.h @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2017 The Linux Foundation. All rights reserved. */ + +#ifndef _A6XX_HFI_H_ +#define _A6XX_HFI_H_ + +struct a6xx_hfi_queue_table_header { + u32 version; + u32 size; /* Size of the queue table in dwords */ + u32 qhdr0_offset; /* Offset of the first queue header */ + u32 qhdr_size; /* Size of the queue headers */ + u32 num_queues; /* Number of total queues */ + u32 active_queues; /* Number of active queues */ +}; + +struct a6xx_hfi_queue_header { + u32 status; + u32 iova; + u32 type; + u32 size; + u32 msg_size; + u32 dropped; + u32 rx_watermark; + u32 tx_watermark; + u32 rx_request; + u32 tx_request; + u32 read_index; + u32 write_index; +}; + +struct a6xx_hfi_queue { + struct a6xx_hfi_queue_header *header; + spinlock_t lock; + u32 *data; + atomic_t seqnum; +}; + +/* This is the outgoing queue to the GMU */ +#define HFI_COMMAND_QUEUE 0 + +/* THis is the incoming response queue from the GMU */ +#define HFI_RESPONSE_QUEUE 1 + +#define HFI_HEADER_ID(msg) ((msg) & 0xff) +#define HFI_HEADER_SIZE(msg) (((msg) >> 8) & 0xff) +#define HFI_HEADER_SEQNUM(msg) (((msg) >> 20) & 0xfff) + +/* FIXME: Do we need this or can we use ARRAY_SIZE? */ +#define HFI_RESPONSE_PAYLOAD_SIZE 16 + +/* HFI message types */ + +#define HFI_MSG_CMD 0 +#define HFI_MSG_ACK 2 + +#define HFI_F2H_MSG_ACK 126 + +struct a6xx_hfi_msg_response { + u32 header; + u32 ret_header; + u32 error; + u32 payload[HFI_RESPONSE_PAYLOAD_SIZE]; +}; + +#define HFI_F2H_MSG_ERROR 100 + +struct a6xx_hfi_msg_error { + u32 header; + u32 code; + u32 payload[2]; +}; + +#define HFI_H2F_MSG_INIT 0 + +struct a6xx_hfi_msg_gmu_init_cmd { + u32 header; + u32 seg_id; + u32 dbg_buffer_addr; + u32 dbg_buffer_size; + u32 boot_state; +}; + +#define HFI_H2F_MSG_FW_VERSION 1 + +struct a6xx_hfi_msg_fw_version { + u32 header; + u32 supported_version; +}; + +#define HFI_H2F_MSG_PERF_TABLE 4 + +struct perf_level { + u32 vote; + u32 freq; +}; + +struct a6xx_hfi_msg_perf_table { + u32 header; + u32 num_gpu_levels; + u32 num_gmu_levels; + + struct perf_level gx_votes[16]; + struct perf_level cx_votes[4]; +}; + +#define HFI_H2F_MSG_BW_TABLE 3 + +struct a6xx_hfi_msg_bw_table { + u32 header; + u32 bw_level_num; + u32 cnoc_cmds_num; + u32 ddr_cmds_num; + u32 cnoc_wait_bitmask; + u32 ddr_wait_bitmask; + u32 cnoc_cmds_addrs[6]; + u32 cnoc_cmds_data[2][6]; + u32 ddr_cmds_addrs[8]; + u32 ddr_cmds_data[16][8]; +}; + +#define HFI_H2F_MSG_TEST 5 + +struct a6xx_hfi_msg_test { + u32 header; +}; + +#endif diff --git a/drivers/gpu/drm/msm/adreno/adreno_common.xml.h b/drivers/gpu/drm/msm/adreno/adreno_common.xml.h index b634cf71352b..5dace1350810 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_common.xml.h +++ b/drivers/gpu/drm/msm/adreno/adreno_common.xml.h @@ -8,17 +8,19 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 37162 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 13324 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 31866 bytes, from 2017-06-06 18:26:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 111898 bytes, from 2017-06-06 18:23:59) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 139480 bytes, from 2017-06-16 12:44:39) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2017-05-17 13:21:27) - -Copyright (C) 2013-2017 by the following authors: +- /home/robclark/src/envytools/rnndb/adreno.xml ( 501 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml ( 36805 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml ( 13634 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml ( 42393 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml ( 147240 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml ( 101627 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-07-03 19:37:13) + +Copyright (C) 2013-2018 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) @@ -44,6 +46,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +enum chip { + A2XX = 0, + A3XX = 0, + A4XX = 0, + A5XX = 0, + A6XX = 0, +}; + enum adreno_pa_su_sc_draw { PC_DRAW_POINTS = 0, PC_DRAW_LINES = 1, @@ -181,6 +191,12 @@ enum a3xx_rb_blend_opcode { BLEND_MAX_DST_SRC = 4, }; +enum a4xx_tess_spacing { + EQUAL_SPACING = 0, + ODD_SPACING = 2, + EVEN_SPACING = 3, +}; + #define REG_AXXX_CP_RB_BASE 0x000001c0 #define REG_AXXX_CP_RB_CNTL 0x000001c1 diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 44813624a286..7d3e9a129ac7 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -111,6 +111,16 @@ static const struct adreno_info gpulist[] = { ADRENO_QUIRK_FAULT_DETECT_MASK, .init = a5xx_gpu_init, .zapfw = "a530_zap.mdt", + }, { + .rev = ADRENO_REV(6, 3, 0, ANY_ID), + .revn = 630, + .name = "A630", + .fw = { + [ADRENO_FW_SQE] = "a630_sqe.fw", + [ADRENO_FW_GMU] = "a630_gmu.bin", + }, + .gmem = SZ_1M, + .init = a6xx_gpu_init, }, }; @@ -127,6 +137,8 @@ MODULE_FIRMWARE("qcom/a530_zap.mdt"); MODULE_FIRMWARE("qcom/a530_zap.b00"); MODULE_FIRMWARE("qcom/a530_zap.b01"); MODULE_FIRMWARE("qcom/a530_zap.b02"); +MODULE_FIRMWARE("qcom/a630_sqe.fw"); +MODULE_FIRMWARE("qcom/a630_gmu.bin"); static inline bool _rev_match(uint8_t entry, uint8_t id) { @@ -155,6 +167,7 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev) struct msm_drm_private *priv = dev->dev_private; struct platform_device *pdev = priv->gpu_pdev; struct msm_gpu *gpu = NULL; + struct adreno_gpu *adreno_gpu; int ret; if (pdev) @@ -165,7 +178,27 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev) return NULL; } - pm_runtime_get_sync(&pdev->dev); + adreno_gpu = to_adreno_gpu(gpu); + + /* + * The number one reason for HW init to fail is if the firmware isn't + * loaded yet. Try that first and don't bother continuing on + * otherwise + */ + + ret = adreno_load_fw(adreno_gpu); + if (ret) + return NULL; + + /* Make sure pm runtime is active and reset any previous errors */ + pm_runtime_set_active(&pdev->dev); + + ret = pm_runtime_get_sync(&pdev->dev); + if (ret < 0) { + dev_err(dev->dev, "Couldn't power up the GPU: %d\n", ret); + return NULL; + } + mutex_lock(&dev->struct_mutex); ret = msm_gpu_hw_init(gpu); mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 38ac50b73829..da1363a0c54d 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -18,7 +18,9 @@ */ #include <linux/ascii85.h> +#include <linux/kernel.h> #include <linux/pm_opp.h> +#include <linux/slab.h> #include "adreno_gpu.h" #include "msm_gem.h" #include "msm_mmu.h" @@ -71,10 +73,12 @@ adreno_request_fw(struct adreno_gpu *adreno_gpu, const char *fwname) { struct drm_device *drm = adreno_gpu->base.dev; const struct firmware *fw = NULL; - char newname[strlen("qcom/") + strlen(fwname) + 1]; + char *newname; int ret; - sprintf(newname, "qcom/%s", fwname); + newname = kasprintf(GFP_KERNEL, "qcom/%s", fwname); + if (!newname) + return ERR_PTR(-ENOMEM); /* * Try first to load from qcom/$fwfile using a direct load (to avoid @@ -88,11 +92,12 @@ adreno_request_fw(struct adreno_gpu *adreno_gpu, const char *fwname) dev_info(drm->dev, "loaded %s from new location\n", newname); adreno_gpu->fwloc = FW_LOCATION_NEW; - return fw; + goto out; } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { dev_err(drm->dev, "failed to load %s: %d\n", newname, ret); - return ERR_PTR(ret); + fw = ERR_PTR(ret); + goto out; } } @@ -107,11 +112,12 @@ adreno_request_fw(struct adreno_gpu *adreno_gpu, const char *fwname) dev_info(drm->dev, "loaded %s from legacy location\n", newname); adreno_gpu->fwloc = FW_LOCATION_LEGACY; - return fw; + goto out; } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { dev_err(drm->dev, "failed to load %s: %d\n", fwname, ret); - return ERR_PTR(ret); + fw = ERR_PTR(ret); + goto out; } } @@ -127,19 +133,23 @@ adreno_request_fw(struct adreno_gpu *adreno_gpu, const char *fwname) dev_info(drm->dev, "loaded %s with helper\n", newname); adreno_gpu->fwloc = FW_LOCATION_HELPER; - return fw; + goto out; } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { dev_err(drm->dev, "failed to load %s: %d\n", newname, ret); - return ERR_PTR(ret); + fw = ERR_PTR(ret); + goto out; } } dev_err(drm->dev, "failed to load %s\n", fwname); - return ERR_PTR(-ENOENT); + fw = ERR_PTR(-ENOENT); +out: + kfree(newname); + return fw; } -static int adreno_load_fw(struct adreno_gpu *adreno_gpu) +int adreno_load_fw(struct adreno_gpu *adreno_gpu) { int i; diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 4406776597fd..de6e6ee42fba 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -50,7 +50,9 @@ enum adreno_regs { enum { ADRENO_FW_PM4 = 0, + ADRENO_FW_SQE = 0, /* a6xx */ ADRENO_FW_PFP = 1, + ADRENO_FW_GMU = 1, /* a6xx */ ADRENO_FW_GPMU = 2, ADRENO_FW_MAX, }; @@ -228,7 +230,7 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs, int nr_rings); void adreno_gpu_cleanup(struct adreno_gpu *gpu); - +int adreno_load_fw(struct adreno_gpu *adreno_gpu); void adreno_gpu_state_destroy(struct msm_gpu_state *state); @@ -335,6 +337,7 @@ static inline void adreno_gpu_write(struct adreno_gpu *gpu, struct msm_gpu *a3xx_gpu_init(struct drm_device *dev); struct msm_gpu *a4xx_gpu_init(struct drm_device *dev); struct msm_gpu *a5xx_gpu_init(struct drm_device *dev); +struct msm_gpu *a6xx_gpu_init(struct drm_device *dev); static inline void adreno_gpu_write64(struct adreno_gpu *gpu, enum adreno_regs lo, enum adreno_regs hi, u64 data) diff --git a/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h b/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h index fb605a3534cf..03a91e10b310 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h +++ b/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h @@ -8,17 +8,19 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 431 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 37162 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 13324 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 31866 bytes, from 2017-06-06 18:26:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 111898 bytes, from 2017-06-06 18:23:59) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 139480 bytes, from 2017-06-16 12:44:39) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2017-05-17 13:21:27) - -Copyright (C) 2013-2017 by the following authors: +- /home/robclark/src/envytools/rnndb/adreno.xml ( 501 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml ( 36805 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml ( 13634 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml ( 42393 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml ( 147240 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml ( 101627 bytes, from 2018-08-06 18:45:45) +- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-07-03 19:37:13) + +Copyright (C) 2013-2018 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) @@ -71,7 +73,8 @@ enum vgt_event_type { FLUSH_SO_1 = 18, FLUSH_SO_2 = 19, FLUSH_SO_3 = 20, - UNK_19 = 25, + PC_CCU_INVALIDATE_DEPTH = 24, + PC_CCU_INVALIDATE_COLOR = 25, UNK_1C = 28, UNK_1D = 29, BLIT = 30, @@ -199,9 +202,12 @@ enum adreno_pm4_type3_packets { CP_WAIT_MEM_WRITES = 18, CP_COND_REG_EXEC = 71, CP_MEM_TO_REG = 66, + CP_EXEC_CS_INDIRECT = 65, CP_EXEC_CS = 51, CP_PERFCOUNTER_ACTION = 80, CP_SMMU_TABLE_UPDATE = 83, + CP_SET_MARKER = 101, + CP_SET_PSEUDO_REG = 86, CP_CONTEXT_REG_BUNCH = 92, CP_YIELD_ENABLE = 28, CP_SKIP_IB2_ENABLE_GLOBAL = 29, @@ -215,7 +221,10 @@ enum adreno_pm4_type3_packets { CP_COMPUTE_CHECKPOINT = 110, CP_MEM_TO_MEM = 115, CP_BLIT = 44, - CP_UNK_39 = 57, + CP_REG_TEST = 57, + CP_SET_MODE = 99, + CP_LOAD_STATE6_GEOM = 50, + CP_LOAD_STATE6_FRAG = 52, IN_IB_PREFETCH_END = 23, IN_SUBBLK_PREFETCH = 31, IN_INSTR_PREFETCH = 32, @@ -224,6 +233,11 @@ enum adreno_pm4_type3_packets { IN_INCR_UPDT_STATE = 85, IN_INCR_UPDT_CONST = 86, IN_INCR_UPDT_INSTR = 87, + PKT4 = 4, + CP_UNK_A6XX_14 = 20, + CP_UNK_A6XX_36 = 54, + CP_UNK_A6XX_55 = 85, + UNK_A6XX_6D = 109, }; enum adreno_state_block { @@ -278,6 +292,33 @@ enum a4xx_state_src { SS4_INDIRECT = 2, }; +enum a6xx_state_block { + SB6_VS_TEX = 0, + SB6_HS_TEX = 1, + SB6_DS_TEX = 2, + SB6_GS_TEX = 3, + SB6_FS_TEX = 4, + SB6_CS_TEX = 5, + SB6_VS_SHADER = 8, + SB6_HS_SHADER = 9, + SB6_DS_SHADER = 10, + SB6_GS_SHADER = 11, + SB6_FS_SHADER = 12, + SB6_CS_SHADER = 13, + SB6_SSBO = 14, + SB6_CS_SSBO = 15, +}; + +enum a6xx_state_type { + ST6_SHADER = 0, + ST6_CONSTANTS = 1, +}; + +enum a6xx_state_src { + SS6_DIRECT = 0, + SS6_INDIRECT = 2, +}; + enum a4xx_index_size { INDEX4_SIZE_8_BIT = 0, INDEX4_SIZE_16_BIT = 1, @@ -300,6 +341,7 @@ enum render_mode_cmd { GMEM = 3, BLIT2D = 5, BLIT2DSCALE = 7, + END2D = 8, }; enum cp_blit_cmd { @@ -308,6 +350,22 @@ enum cp_blit_cmd { BLIT_OP_SCALE = 3, }; +enum a6xx_render_mode { + RM6_BYPASS = 1, + RM6_BINNING = 2, + RM6_GMEM = 4, + RM6_BLIT2D = 5, + RM6_RESOLVE = 6, +}; + +enum pseudo_reg { + SMMU_INFO = 0, + NON_SECURE_SAVE_ADDR = 1, + SECURE_SAVE_ADDR = 2, + NON_PRIV_SAVE_ADDR = 3, + COUNTER = 4, +}; + #define REG_CP_LOAD_STATE_0 0x00000000 #define CP_LOAD_STATE_0_DST_OFF__MASK 0x0000ffff #define CP_LOAD_STATE_0_DST_OFF__SHIFT 0 @@ -349,7 +407,7 @@ static inline uint32_t CP_LOAD_STATE_1_EXT_SRC_ADDR(uint32_t val) } #define REG_CP_LOAD_STATE4_0 0x00000000 -#define CP_LOAD_STATE4_0_DST_OFF__MASK 0x0000ffff +#define CP_LOAD_STATE4_0_DST_OFF__MASK 0x00003fff #define CP_LOAD_STATE4_0_DST_OFF__SHIFT 0 static inline uint32_t CP_LOAD_STATE4_0_DST_OFF(uint32_t val) { @@ -396,6 +454,54 @@ static inline uint32_t CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(uint32_t val) return ((val) << CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__SHIFT) & CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__MASK; } +#define REG_CP_LOAD_STATE6_0 0x00000000 +#define CP_LOAD_STATE6_0_DST_OFF__MASK 0x00003fff +#define CP_LOAD_STATE6_0_DST_OFF__SHIFT 0 +static inline uint32_t CP_LOAD_STATE6_0_DST_OFF(uint32_t val) +{ + return ((val) << CP_LOAD_STATE6_0_DST_OFF__SHIFT) & CP_LOAD_STATE6_0_DST_OFF__MASK; +} +#define CP_LOAD_STATE6_0_STATE_TYPE__MASK 0x00004000 +#define CP_LOAD_STATE6_0_STATE_TYPE__SHIFT 14 +static inline uint32_t CP_LOAD_STATE6_0_STATE_TYPE(enum a6xx_state_type val) +{ + return ((val) << CP_LOAD_STATE6_0_STATE_TYPE__SHIFT) & CP_LOAD_STATE6_0_STATE_TYPE__MASK; +} +#define CP_LOAD_STATE6_0_STATE_SRC__MASK 0x00030000 +#define CP_LOAD_STATE6_0_STATE_SRC__SHIFT 16 +static inline uint32_t CP_LOAD_STATE6_0_STATE_SRC(enum a6xx_state_src val) +{ + return ((val) << CP_LOAD_STATE6_0_STATE_SRC__SHIFT) & CP_LOAD_STATE6_0_STATE_SRC__MASK; +} +#define CP_LOAD_STATE6_0_STATE_BLOCK__MASK 0x003c0000 +#define CP_LOAD_STATE6_0_STATE_BLOCK__SHIFT 18 +static inline uint32_t CP_LOAD_STATE6_0_STATE_BLOCK(enum a6xx_state_block val) +{ + return ((val) << CP_LOAD_STATE6_0_STATE_BLOCK__SHIFT) & CP_LOAD_STATE6_0_STATE_BLOCK__MASK; +} +#define CP_LOAD_STATE6_0_NUM_UNIT__MASK 0xffc00000 +#define CP_LOAD_STATE6_0_NUM_UNIT__SHIFT 22 +static inline uint32_t CP_LOAD_STATE6_0_NUM_UNIT(uint32_t val) +{ + return ((val) << CP_LOAD_STATE6_0_NUM_UNIT__SHIFT) & CP_LOAD_STATE6_0_NUM_UNIT__MASK; +} + +#define REG_CP_LOAD_STATE6_1 0x00000001 +#define CP_LOAD_STATE6_1_EXT_SRC_ADDR__MASK 0xfffffffc +#define CP_LOAD_STATE6_1_EXT_SRC_ADDR__SHIFT 2 +static inline uint32_t CP_LOAD_STATE6_1_EXT_SRC_ADDR(uint32_t val) +{ + return ((val >> 2) << CP_LOAD_STATE6_1_EXT_SRC_ADDR__SHIFT) & CP_LOAD_STATE6_1_EXT_SRC_ADDR__MASK; +} + +#define REG_CP_LOAD_STATE6_2 0x00000002 +#define CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI__MASK 0xffffffff +#define CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI__SHIFT 0 +static inline uint32_t CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(uint32_t val) +{ + return ((val) << CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI__SHIFT) & CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI__MASK; +} + #define REG_CP_DRAW_INDX_0 0x00000000 #define CP_DRAW_INDX_0_VIZ_QUERY__MASK 0xffffffff #define CP_DRAW_INDX_0_VIZ_QUERY__SHIFT 0 @@ -580,6 +686,153 @@ static inline uint32_t CP_DRAW_INDX_OFFSET_5_INDX_SIZE(uint32_t val) return ((val) << CP_DRAW_INDX_OFFSET_5_INDX_SIZE__SHIFT) & CP_DRAW_INDX_OFFSET_5_INDX_SIZE__MASK; } +#define REG_A4XX_CP_DRAW_INDIRECT_0 0x00000000 +#define A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__MASK 0x0000003f +#define A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__SHIFT 0 +static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE(enum pc_di_primtype val) +{ + return ((val) << A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__MASK; +} +#define A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__MASK 0x000000c0 +#define A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__SHIFT 6 +static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT(enum pc_di_src_sel val) +{ + return ((val) << A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__MASK; +} +#define A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__MASK 0x00000300 +#define A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__SHIFT 8 +static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_VIS_CULL(enum pc_di_vis_cull_mode val) +{ + return ((val) << A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__MASK; +} +#define A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__MASK 0x00000c00 +#define A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__SHIFT 10 +static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE(enum a4xx_index_size val) +{ + return ((val) << A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__MASK; +} +#define A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__MASK 0x01f00000 +#define A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__SHIFT 20 +static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_TESS_MODE(uint32_t val) +{ + return ((val) << A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__MASK; +} + +#define REG_A4XX_CP_DRAW_INDIRECT_1 0x00000001 +#define A4XX_CP_DRAW_INDIRECT_1_INDIRECT__MASK 0xffffffff +#define A4XX_CP_DRAW_INDIRECT_1_INDIRECT__SHIFT 0 +static inline uint32_t A4XX_CP_DRAW_INDIRECT_1_INDIRECT(uint32_t val) +{ + return ((val) << A4XX_CP_DRAW_INDIRECT_1_INDIRECT__SHIFT) & A4XX_CP_DRAW_INDIRECT_1_INDIRECT__MASK; +} + + +#define REG_A5XX_CP_DRAW_INDIRECT_2 0x00000002 +#define A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__MASK 0xffffffff +#define A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__SHIFT 0 +static inline uint32_t A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI(uint32_t val) +{ + return ((val) << A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__SHIFT) & A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__MASK; +} + +#define REG_A4XX_CP_DRAW_INDX_INDIRECT_0 0x00000000 +#define A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__MASK 0x0000003f +#define A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__SHIFT 0 +static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE(enum pc_di_primtype val) +{ + return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__MASK; +} +#define A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__MASK 0x000000c0 +#define A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__SHIFT 6 +static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT(enum pc_di_src_sel val) +{ + return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__MASK; +} +#define A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__MASK 0x00000300 +#define A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__SHIFT 8 +static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL(enum pc_di_vis_cull_mode val) +{ + return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__MASK; +} +#define A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__MASK 0x00000c00 +#define A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__SHIFT 10 +static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE(enum a4xx_index_size val) +{ + return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__MASK; +} +#define A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__MASK 0x01f00000 +#define A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__SHIFT 20 +static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE(uint32_t val) +{ + return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__MASK; +} + + +#define REG_A4XX_CP_DRAW_INDX_INDIRECT_1 0x00000001 +#define A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__MASK 0xffffffff +#define A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__SHIFT 0 +static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE(uint32_t val) +{ + return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__MASK; +} + +#define REG_A4XX_CP_DRAW_INDX_INDIRECT_2 0x00000002 +#define A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__MASK 0xffffffff +#define A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__SHIFT 0 +static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE(uint32_t val) +{ + return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__MASK; +} + +#define REG_A4XX_CP_DRAW_INDX_INDIRECT_3 0x00000003 +#define A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__MASK 0xffffffff +#define A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__SHIFT 0 +static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT(uint32_t val) +{ + return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__MASK; +} + + +#define REG_A5XX_CP_DRAW_INDX_INDIRECT_1 0x00000001 +#define A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__MASK 0xffffffff +#define A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__SHIFT 0 +static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO(uint32_t val) +{ + return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__MASK; +} + +#define REG_A5XX_CP_DRAW_INDX_INDIRECT_2 0x00000002 +#define A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__MASK 0xffffffff +#define A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__SHIFT 0 +static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI(uint32_t val) +{ + return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__MASK; +} + +#define REG_A5XX_CP_DRAW_INDX_INDIRECT_3 0x00000003 +#define A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__MASK 0xffffffff +#define A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__SHIFT 0 +static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES(uint32_t val) +{ + return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__MASK; +} + +#define REG_A5XX_CP_DRAW_INDX_INDIRECT_4 0x00000004 +#define A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__MASK 0xffffffff +#define A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__SHIFT 0 +static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO(uint32_t val) +{ + return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__MASK; +} + +#define REG_A5XX_CP_DRAW_INDX_INDIRECT_5 0x00000005 +#define A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__MASK 0xffffffff +#define A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__SHIFT 0 +static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI(uint32_t val) +{ + return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__MASK; +} + static inline uint32_t REG_CP_SET_DRAW_STATE_(uint32_t i0) { return 0x00000000 + 0x3*i0; } static inline uint32_t REG_CP_SET_DRAW_STATE__0(uint32_t i0) { return 0x00000000 + 0x3*i0; } @@ -593,6 +846,12 @@ static inline uint32_t CP_SET_DRAW_STATE__0_COUNT(uint32_t val) #define CP_SET_DRAW_STATE__0_DISABLE 0x00020000 #define CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS 0x00040000 #define CP_SET_DRAW_STATE__0_LOAD_IMMED 0x00080000 +#define CP_SET_DRAW_STATE__0_ENABLE_MASK__MASK 0x00f00000 +#define CP_SET_DRAW_STATE__0_ENABLE_MASK__SHIFT 20 +static inline uint32_t CP_SET_DRAW_STATE__0_ENABLE_MASK(uint32_t val) +{ + return ((val) << CP_SET_DRAW_STATE__0_ENABLE_MASK__SHIFT) & CP_SET_DRAW_STATE__0_ENABLE_MASK__MASK; +} #define CP_SET_DRAW_STATE__0_GROUP_ID__MASK 0x1f000000 #define CP_SET_DRAW_STATE__0_GROUP_ID__SHIFT 24 static inline uint32_t CP_SET_DRAW_STATE__0_GROUP_ID(uint32_t val) @@ -708,6 +967,22 @@ static inline uint32_t CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI(uint32_t val) return ((val) << CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__SHIFT) & CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__MASK; } +#define REG_CP_SET_BIN_DATA5_5 0x00000005 +#define CP_SET_BIN_DATA5_5_XXX_ADDRESS_LO__MASK 0xffffffff +#define CP_SET_BIN_DATA5_5_XXX_ADDRESS_LO__SHIFT 0 +static inline uint32_t CP_SET_BIN_DATA5_5_XXX_ADDRESS_LO(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA5_5_XXX_ADDRESS_LO__SHIFT) & CP_SET_BIN_DATA5_5_XXX_ADDRESS_LO__MASK; +} + +#define REG_CP_SET_BIN_DATA5_6 0x00000006 +#define CP_SET_BIN_DATA5_6_XXX_ADDRESS_HI__MASK 0xffffffff +#define CP_SET_BIN_DATA5_6_XXX_ADDRESS_HI__SHIFT 0 +static inline uint32_t CP_SET_BIN_DATA5_6_XXX_ADDRESS_HI(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA5_6_XXX_ADDRESS_HI__SHIFT) & CP_SET_BIN_DATA5_6_XXX_ADDRESS_HI__MASK; +} + #define REG_CP_REG_TO_MEM_0 0x00000000 #define CP_REG_TO_MEM_0_REG__MASK 0x0000ffff #define CP_REG_TO_MEM_0_REG__SHIFT 0 @@ -732,6 +1007,46 @@ static inline uint32_t CP_REG_TO_MEM_1_DEST(uint32_t val) return ((val) << CP_REG_TO_MEM_1_DEST__SHIFT) & CP_REG_TO_MEM_1_DEST__MASK; } +#define REG_CP_REG_TO_MEM_2 0x00000002 +#define CP_REG_TO_MEM_2_DEST_HI__MASK 0xffffffff +#define CP_REG_TO_MEM_2_DEST_HI__SHIFT 0 +static inline uint32_t CP_REG_TO_MEM_2_DEST_HI(uint32_t val) +{ + return ((val) << CP_REG_TO_MEM_2_DEST_HI__SHIFT) & CP_REG_TO_MEM_2_DEST_HI__MASK; +} + +#define REG_CP_MEM_TO_REG_0 0x00000000 +#define CP_MEM_TO_REG_0_REG__MASK 0x0000ffff +#define CP_MEM_TO_REG_0_REG__SHIFT 0 +static inline uint32_t CP_MEM_TO_REG_0_REG(uint32_t val) +{ + return ((val) << CP_MEM_TO_REG_0_REG__SHIFT) & CP_MEM_TO_REG_0_REG__MASK; +} +#define CP_MEM_TO_REG_0_CNT__MASK 0x3ff80000 +#define CP_MEM_TO_REG_0_CNT__SHIFT 19 +static inline uint32_t CP_MEM_TO_REG_0_CNT(uint32_t val) +{ + return ((val) << CP_MEM_TO_REG_0_CNT__SHIFT) & CP_MEM_TO_REG_0_CNT__MASK; +} +#define CP_MEM_TO_REG_0_64B 0x40000000 +#define CP_MEM_TO_REG_0_ACCUMULATE 0x80000000 + +#define REG_CP_MEM_TO_REG_1 0x00000001 +#define CP_MEM_TO_REG_1_SRC__MASK 0xffffffff +#define CP_MEM_TO_REG_1_SRC__SHIFT 0 +static inline uint32_t CP_MEM_TO_REG_1_SRC(uint32_t val) +{ + return ((val) << CP_MEM_TO_REG_1_SRC__SHIFT) & CP_MEM_TO_REG_1_SRC__MASK; +} + +#define REG_CP_MEM_TO_REG_2 0x00000002 +#define CP_MEM_TO_REG_2_SRC_HI__MASK 0xffffffff +#define CP_MEM_TO_REG_2_SRC_HI__SHIFT 0 +static inline uint32_t CP_MEM_TO_REG_2_SRC_HI(uint32_t val) +{ + return ((val) << CP_MEM_TO_REG_2_SRC_HI__SHIFT) & CP_MEM_TO_REG_2_SRC_HI__MASK; +} + #define REG_CP_MEM_TO_MEM_0 0x00000000 #define CP_MEM_TO_MEM_0_NEG_A 0x00000001 #define CP_MEM_TO_MEM_0_NEG_B 0x00000002 @@ -953,15 +1268,15 @@ static inline uint32_t CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI(uint32_t val) #define REG_CP_COMPUTE_CHECKPOINT_2 0x00000002 #define REG_CP_COMPUTE_CHECKPOINT_3 0x00000003 - -#define REG_CP_COMPUTE_CHECKPOINT_4 0x00000004 -#define CP_COMPUTE_CHECKPOINT_4_ADDR_1_LEN__MASK 0xffffffff -#define CP_COMPUTE_CHECKPOINT_4_ADDR_1_LEN__SHIFT 0 -static inline uint32_t CP_COMPUTE_CHECKPOINT_4_ADDR_1_LEN(uint32_t val) +#define CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__MASK 0xffffffff +#define CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__SHIFT 0 +static inline uint32_t CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN(uint32_t val) { - return ((val) << CP_COMPUTE_CHECKPOINT_4_ADDR_1_LEN__SHIFT) & CP_COMPUTE_CHECKPOINT_4_ADDR_1_LEN__MASK; + return ((val) << CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__SHIFT) & CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__MASK; } +#define REG_CP_COMPUTE_CHECKPOINT_4 0x00000004 + #define REG_CP_COMPUTE_CHECKPOINT_5 0x00000005 #define CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__MASK 0xffffffff #define CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__SHIFT 0 @@ -978,6 +1293,8 @@ static inline uint32_t CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI(uint32_t val) return ((val) << CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__SHIFT) & CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__MASK; } +#define REG_CP_COMPUTE_CHECKPOINT_7 0x00000007 + #define REG_CP_PERFCOUNTER_ACTION_0 0x00000000 #define REG_CP_PERFCOUNTER_ACTION_1 0x00000001 @@ -1032,13 +1349,13 @@ static inline uint32_t CP_BLIT_0_OP(enum cp_blit_cmd val) } #define REG_CP_BLIT_1 0x00000001 -#define CP_BLIT_1_SRC_X1__MASK 0x0000ffff +#define CP_BLIT_1_SRC_X1__MASK 0x00003fff #define CP_BLIT_1_SRC_X1__SHIFT 0 static inline uint32_t CP_BLIT_1_SRC_X1(uint32_t val) { return ((val) << CP_BLIT_1_SRC_X1__SHIFT) & CP_BLIT_1_SRC_X1__MASK; } -#define CP_BLIT_1_SRC_Y1__MASK 0xffff0000 +#define CP_BLIT_1_SRC_Y1__MASK 0x3fff0000 #define CP_BLIT_1_SRC_Y1__SHIFT 16 static inline uint32_t CP_BLIT_1_SRC_Y1(uint32_t val) { @@ -1046,13 +1363,13 @@ static inline uint32_t CP_BLIT_1_SRC_Y1(uint32_t val) } #define REG_CP_BLIT_2 0x00000002 -#define CP_BLIT_2_SRC_X2__MASK 0x0000ffff +#define CP_BLIT_2_SRC_X2__MASK 0x00003fff #define CP_BLIT_2_SRC_X2__SHIFT 0 static inline uint32_t CP_BLIT_2_SRC_X2(uint32_t val) { return ((val) << CP_BLIT_2_SRC_X2__SHIFT) & CP_BLIT_2_SRC_X2__MASK; } -#define CP_BLIT_2_SRC_Y2__MASK 0xffff0000 +#define CP_BLIT_2_SRC_Y2__MASK 0x3fff0000 #define CP_BLIT_2_SRC_Y2__SHIFT 16 static inline uint32_t CP_BLIT_2_SRC_Y2(uint32_t val) { @@ -1060,13 +1377,13 @@ static inline uint32_t CP_BLIT_2_SRC_Y2(uint32_t val) } #define REG_CP_BLIT_3 0x00000003 -#define CP_BLIT_3_DST_X1__MASK 0x0000ffff +#define CP_BLIT_3_DST_X1__MASK 0x00003fff #define CP_BLIT_3_DST_X1__SHIFT 0 static inline uint32_t CP_BLIT_3_DST_X1(uint32_t val) { return ((val) << CP_BLIT_3_DST_X1__SHIFT) & CP_BLIT_3_DST_X1__MASK; } -#define CP_BLIT_3_DST_Y1__MASK 0xffff0000 +#define CP_BLIT_3_DST_Y1__MASK 0x3fff0000 #define CP_BLIT_3_DST_Y1__SHIFT 16 static inline uint32_t CP_BLIT_3_DST_Y1(uint32_t val) { @@ -1074,13 +1391,13 @@ static inline uint32_t CP_BLIT_3_DST_Y1(uint32_t val) } #define REG_CP_BLIT_4 0x00000004 -#define CP_BLIT_4_DST_X2__MASK 0x0000ffff +#define CP_BLIT_4_DST_X2__MASK 0x00003fff #define CP_BLIT_4_DST_X2__SHIFT 0 static inline uint32_t CP_BLIT_4_DST_X2(uint32_t val) { return ((val) << CP_BLIT_4_DST_X2__SHIFT) & CP_BLIT_4_DST_X2__MASK; } -#define CP_BLIT_4_DST_Y2__MASK 0xffff0000 +#define CP_BLIT_4_DST_Y2__MASK 0x3fff0000 #define CP_BLIT_4_DST_Y2__SHIFT 16 static inline uint32_t CP_BLIT_4_DST_Y2(uint32_t val) { @@ -1113,5 +1430,129 @@ static inline uint32_t CP_EXEC_CS_3_NGROUPS_Z(uint32_t val) return ((val) << CP_EXEC_CS_3_NGROUPS_Z__SHIFT) & CP_EXEC_CS_3_NGROUPS_Z__MASK; } +#define REG_A4XX_CP_EXEC_CS_INDIRECT_0 0x00000000 + + +#define REG_A4XX_CP_EXEC_CS_INDIRECT_1 0x00000001 +#define A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__MASK 0xffffffff +#define A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__SHIFT 0 +static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_1_ADDR(uint32_t val) +{ + return ((val) << A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__MASK; +} + +#define REG_A4XX_CP_EXEC_CS_INDIRECT_2 0x00000002 +#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__MASK 0x00000ffc +#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__SHIFT 2 +static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX(uint32_t val) +{ + return ((val) << A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__MASK; +} +#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__MASK 0x003ff000 +#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__SHIFT 12 +static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY(uint32_t val) +{ + return ((val) << A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__MASK; +} +#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__MASK 0xffc00000 +#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__SHIFT 22 +static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ(uint32_t val) +{ + return ((val) << A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__MASK; +} + + +#define REG_A5XX_CP_EXEC_CS_INDIRECT_1 0x00000001 +#define A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__MASK 0xffffffff +#define A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__SHIFT 0 +static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO(uint32_t val) +{ + return ((val) << A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__MASK; +} + +#define REG_A5XX_CP_EXEC_CS_INDIRECT_2 0x00000002 +#define A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__MASK 0xffffffff +#define A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__SHIFT 0 +static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI(uint32_t val) +{ + return ((val) << A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__MASK; +} + +#define REG_A5XX_CP_EXEC_CS_INDIRECT_3 0x00000003 +#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__MASK 0x00000ffc +#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__SHIFT 2 +static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(uint32_t val) +{ + return ((val) << A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__MASK; +} +#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__MASK 0x003ff000 +#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__SHIFT 12 +static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(uint32_t val) +{ + return ((val) << A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__MASK; +} +#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__MASK 0xffc00000 +#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__SHIFT 22 +static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(uint32_t val) +{ + return ((val) << A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__MASK; +} + +#define REG_A2XX_CP_SET_MARKER_0 0x00000000 +#define A2XX_CP_SET_MARKER_0_MARKER__MASK 0x0000000f +#define A2XX_CP_SET_MARKER_0_MARKER__SHIFT 0 +static inline uint32_t A2XX_CP_SET_MARKER_0_MARKER(uint32_t val) +{ + return ((val) << A2XX_CP_SET_MARKER_0_MARKER__SHIFT) & A2XX_CP_SET_MARKER_0_MARKER__MASK; +} +#define A2XX_CP_SET_MARKER_0_MODE__MASK 0x0000000f +#define A2XX_CP_SET_MARKER_0_MODE__SHIFT 0 +static inline uint32_t A2XX_CP_SET_MARKER_0_MODE(enum a6xx_render_mode val) +{ + return ((val) << A2XX_CP_SET_MARKER_0_MODE__SHIFT) & A2XX_CP_SET_MARKER_0_MODE__MASK; +} +#define A2XX_CP_SET_MARKER_0_IFPC 0x00000100 + +static inline uint32_t REG_A2XX_CP_SET_PSEUDO_REG_(uint32_t i0) { return 0x00000000 + 0x3*i0; } + +static inline uint32_t REG_A2XX_CP_SET_PSEUDO_REG__0(uint32_t i0) { return 0x00000000 + 0x3*i0; } +#define A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG__MASK 0x00000007 +#define A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG__SHIFT 0 +static inline uint32_t A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG(enum pseudo_reg val) +{ + return ((val) << A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG__SHIFT) & A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG__MASK; +} + +static inline uint32_t REG_A2XX_CP_SET_PSEUDO_REG__1(uint32_t i0) { return 0x00000001 + 0x3*i0; } +#define A2XX_CP_SET_PSEUDO_REG__1_LO__MASK 0xffffffff +#define A2XX_CP_SET_PSEUDO_REG__1_LO__SHIFT 0 +static inline uint32_t A2XX_CP_SET_PSEUDO_REG__1_LO(uint32_t val) +{ + return ((val) << A2XX_CP_SET_PSEUDO_REG__1_LO__SHIFT) & A2XX_CP_SET_PSEUDO_REG__1_LO__MASK; +} + +static inline uint32_t REG_A2XX_CP_SET_PSEUDO_REG__2(uint32_t i0) { return 0x00000002 + 0x3*i0; } +#define A2XX_CP_SET_PSEUDO_REG__2_HI__MASK 0xffffffff +#define A2XX_CP_SET_PSEUDO_REG__2_HI__SHIFT 0 +static inline uint32_t A2XX_CP_SET_PSEUDO_REG__2_HI(uint32_t val) +{ + return ((val) << A2XX_CP_SET_PSEUDO_REG__2_HI__SHIFT) & A2XX_CP_SET_PSEUDO_REG__2_HI__MASK; +} + +#define REG_A2XX_CP_REG_TEST_0 0x00000000 +#define A2XX_CP_REG_TEST_0_REG__MASK 0x00000fff +#define A2XX_CP_REG_TEST_0_REG__SHIFT 0 +static inline uint32_t A2XX_CP_REG_TEST_0_REG(uint32_t val) +{ + return ((val) << A2XX_CP_REG_TEST_0_REG__SHIFT) & A2XX_CP_REG_TEST_0_REG__MASK; +} +#define A2XX_CP_REG_TEST_0_BIT__MASK 0x01f00000 +#define A2XX_CP_REG_TEST_0_BIT__SHIFT 20 +static inline uint32_t A2XX_CP_REG_TEST_0_BIT(uint32_t val) +{ + return ((val) << A2XX_CP_REG_TEST_0_BIT__SHIFT) & A2XX_CP_REG_TEST_0_BIT__MASK; +} +#define A2XX_CP_REG_TEST_0_UNK25 0x02000000 + #endif /* ADRENO_PM4_XML */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 0bd3eda93e22..1b4de3486ef9 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -421,7 +421,7 @@ int dpu_encoder_helper_unregister_irq(struct dpu_encoder_phys *phys_enc, ret = dpu_core_irq_disable(phys_enc->dpu_kms, &irq->irq_idx, 1); if (ret) { - DRM_ERROR("diable failed id=%u, intr=%d, hw=%d, irq=%d ret=%d", + DRM_ERROR("disable failed id=%u, intr=%d, hw=%d, irq=%d ret=%d", DRMID(phys_enc->parent), intr_idx, irq->hw_idx, irq->irq_idx, ret); } @@ -2444,6 +2444,8 @@ int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc, for (i = 0; i < dpu_enc->num_phys_encs; i++) { struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; + if (!phys) + continue; switch (event) { case MSM_ENC_COMMIT_DONE: @@ -2461,7 +2463,7 @@ int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc, return -EINVAL; }; - if (phys && fn_wait) { + if (fn_wait) { DPU_ATRACE_BEGIN("wait_for_completion_event"); ret = fn_wait(phys); DPU_ATRACE_END("wait_for_completion_event"); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_power_handle.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_power_handle.c index a68f1249388c..a75eebca2f37 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_power_handle.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_power_handle.c @@ -121,7 +121,7 @@ void dpu_power_resource_deinit(struct platform_device *pdev, mutex_lock(&phandle->phandle_lock); list_for_each_entry_safe(curr_client, next_client, &phandle->power_client_clist, list) { - pr_err("cliend:%s-%d still registered with refcount:%d\n", + pr_err("client:%s-%d still registered with refcount:%d\n", curr_client->name, curr_client->id, curr_client->refcount); curr_client->active = false; diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4.xml.h b/drivers/gpu/drm/msm/disp/mdp4/mdp4.xml.h index 576cea30d391..4b36b8954bae 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4.xml.h +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4.xml.h @@ -8,19 +8,19 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 33004 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2017-06-16 12:32:42) -- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2017-05-17 13:21:27) - -Copyright (C) 2013-2017 by the following authors: +- /home/robclark/src/envytools/rnndb/msm.xml ( 676 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/dsi.xml ( 37239 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2018-07-03 19:37:13) + +Copyright (C) 2013-2018 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5.xml.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5.xml.h index d9c10e02ee41..784d98989e3a 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5.xml.h +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5.xml.h @@ -8,19 +8,19 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 33004 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2017-06-16 12:32:42) -- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2017-05-17 13:21:27) - -Copyright (C) 2013-2017 by the following authors: +- /home/robclark/src/envytools/rnndb/msm.xml ( 676 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/dsi.xml ( 37239 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2018-07-03 19:37:13) + +Copyright (C) 2013-2018 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) diff --git a/drivers/gpu/drm/msm/disp/mdp_common.xml.h b/drivers/gpu/drm/msm/disp/mdp_common.xml.h index 1494c407be44..d420c8044e23 100644 --- a/drivers/gpu/drm/msm/disp/mdp_common.xml.h +++ b/drivers/gpu/drm/msm/disp/mdp_common.xml.h @@ -8,19 +8,19 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 33004 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2017-06-16 12:32:42) -- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2017-05-17 13:21:27) - -Copyright (C) 2013-2017 by the following authors: +- /home/robclark/src/envytools/rnndb/msm.xml ( 676 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/dsi.xml ( 37239 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2018-07-03 19:37:13) + +Copyright (C) 2013-2018 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) diff --git a/drivers/gpu/drm/msm/dsi/dsi.xml.h b/drivers/gpu/drm/msm/dsi/dsi.xml.h index f6a9471b70c8..21f489a737d7 100644 --- a/drivers/gpu/drm/msm/dsi/dsi.xml.h +++ b/drivers/gpu/drm/msm/dsi/dsi.xml.h @@ -8,8 +8,17 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /local/mnt/workspace/source_trees/envytools/rnndb/../rnndb/dsi/dsi.xml ( 37239 bytes, from 2018-01-12 09:09:22) -- /local/mnt/workspace/source_trees/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-05-09 06:32:54) +- /home/robclark/src/envytools/rnndb/msm.xml ( 676 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/dsi.xml ( 37239 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2018-07-03 19:37:13) Copyright (C) 2013-2018 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) diff --git a/drivers/gpu/drm/msm/dsi/mmss_cc.xml.h b/drivers/gpu/drm/msm/dsi/mmss_cc.xml.h index 57cf7fa7f1c4..874265314413 100644 --- a/drivers/gpu/drm/msm/dsi/mmss_cc.xml.h +++ b/drivers/gpu/drm/msm/dsi/mmss_cc.xml.h @@ -8,19 +8,19 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 33004 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2017-06-16 12:32:42) -- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2017-05-17 13:21:27) - -Copyright (C) 2013-2017 by the following authors: +- /home/robclark/src/envytools/rnndb/msm.xml ( 676 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/dsi.xml ( 37239 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2018-07-03 19:37:13) + +Copyright (C) 2013-2018 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) diff --git a/drivers/gpu/drm/msm/dsi/sfpb.xml.h b/drivers/gpu/drm/msm/dsi/sfpb.xml.h index 9d4d1feaefd7..07c48ddb5301 100644 --- a/drivers/gpu/drm/msm/dsi/sfpb.xml.h +++ b/drivers/gpu/drm/msm/dsi/sfpb.xml.h @@ -8,19 +8,19 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 33004 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2017-06-16 12:32:42) -- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2017-05-17 13:21:27) - -Copyright (C) 2013-2017 by the following authors: +- /home/robclark/src/envytools/rnndb/msm.xml ( 676 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/dsi.xml ( 37239 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2018-07-03 19:37:13) + +Copyright (C) 2013-2018 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) diff --git a/drivers/gpu/drm/msm/edp/edp.xml.h b/drivers/gpu/drm/msm/edp/edp.xml.h index f150d4a47707..9cb6e6fe9810 100644 --- a/drivers/gpu/drm/msm/edp/edp.xml.h +++ b/drivers/gpu/drm/msm/edp/edp.xml.h @@ -8,19 +8,19 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 33004 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2017-06-16 12:32:42) -- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2017-05-17 13:21:27) - -Copyright (C) 2013-2017 by the following authors: +- /home/robclark/src/envytools/rnndb/msm.xml ( 676 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/dsi.xml ( 37239 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2018-07-03 19:37:13) + +Copyright (C) 2013-2018 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.xml.h b/drivers/gpu/drm/msm/hdmi/hdmi.xml.h index ecebf8b623ab..3eff3ea3b271 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.xml.h +++ b/drivers/gpu/drm/msm/hdmi/hdmi.xml.h @@ -8,19 +8,19 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 33004 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2017-06-16 12:32:42) -- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2017-05-17 13:21:27) - -Copyright (C) 2013-2017 by the following authors: +- /home/robclark/src/envytools/rnndb/msm.xml ( 676 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/dsi.xml ( 37239 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2018-07-03 19:37:13) + +Copyright (C) 2013-2018 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) diff --git a/drivers/gpu/drm/msm/hdmi/qfprom.xml.h b/drivers/gpu/drm/msm/hdmi/qfprom.xml.h index da646deedf4b..7717d4269662 100644 --- a/drivers/gpu/drm/msm/hdmi/qfprom.xml.h +++ b/drivers/gpu/drm/msm/hdmi/qfprom.xml.h @@ -8,19 +8,19 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 33004 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2017-05-17 13:21:27) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2017-06-16 12:32:42) -- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2017-05-17 13:21:27) - -Copyright (C) 2013-2017 by the following authors: +- /home/robclark/src/envytools/rnndb/msm.xml ( 676 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp4.xml ( 20915 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp_common.xml ( 2849 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/mdp/mdp5.xml ( 37411 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/dsi.xml ( 37239 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/sfpb.xml ( 602 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/hdmi/hdmi.xml ( 41799 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/edp/edp.xml ( 10416 bytes, from 2018-07-03 19:37:13) + +Copyright (C) 2013-2018 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 46876bc8b707..c1abad8a8612 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -81,6 +81,63 @@ module_param(modeset, bool, 0600); * Util/helpers: */ +int msm_clk_bulk_get(struct device *dev, struct clk_bulk_data **bulk) +{ + struct property *prop; + const char *name; + struct clk_bulk_data *local; + int i = 0, ret, count; + + count = of_property_count_strings(dev->of_node, "clock-names"); + if (count < 1) + return 0; + + local = devm_kcalloc(dev, sizeof(struct clk_bulk_data *), + count, GFP_KERNEL); + if (!local) + return -ENOMEM; + + of_property_for_each_string(dev->of_node, "clock-names", prop, name) { + local[i].id = devm_kstrdup(dev, name, GFP_KERNEL); + if (!local[i].id) { + devm_kfree(dev, local); + return -ENOMEM; + } + + i++; + } + + ret = devm_clk_bulk_get(dev, count, local); + + if (ret) { + for (i = 0; i < count; i++) + devm_kfree(dev, (void *) local[i].id); + devm_kfree(dev, local); + + return ret; + } + + *bulk = local; + return count; +} + +struct clk *msm_clk_bulk_get_clock(struct clk_bulk_data *bulk, int count, + const char *name) +{ + int i; + char n[32]; + + snprintf(n, sizeof(n), "%s_clk", name); + + for (i = 0; bulk && i < count; i++) { + if (!strcmp(bulk[i].id, name) || !strcmp(bulk[i].id, n)) + return bulk[i].clk; + } + + + return NULL; +} + struct clk *msm_clk_get(struct platform_device *pdev, const char *name) { struct clk *clk; diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 0cba86ed3f54..8e510d5c758a 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -263,7 +263,7 @@ void msm_gem_shrinker_cleanup(struct drm_device *dev); int msm_gem_mmap_obj(struct drm_gem_object *obj, struct vm_area_struct *vma); int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma); -int msm_gem_fault(struct vm_fault *vmf); +vm_fault_t msm_gem_fault(struct vm_fault *vmf); uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj); int msm_gem_get_iova(struct drm_gem_object *obj, struct msm_gem_address_space *aspace, uint64_t *iova); @@ -387,6 +387,10 @@ static inline void msm_perf_debugfs_cleanup(struct msm_drm_private *priv) {} #endif struct clk *msm_clk_get(struct platform_device *pdev, const char *name); +int msm_clk_bulk_get(struct device *dev, struct clk_bulk_data **bulk); + +struct clk *msm_clk_bulk_get_clock(struct clk_bulk_data *bulk, int count, + const char *name); void __iomem *msm_ioremap(struct platform_device *pdev, const char *name, const char *dbgname); void msm_writel(u32 data, void __iomem *addr); diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index f583bb4222f9..f59ca27a4a35 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -219,7 +219,7 @@ int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma) return msm_gem_mmap_obj(vma->vm_private_data, vma); } -int msm_gem_fault(struct vm_fault *vmf) +vm_fault_t msm_gem_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct drm_gem_object *obj = vma->vm_private_data; @@ -227,15 +227,18 @@ int msm_gem_fault(struct vm_fault *vmf) struct page **pages; unsigned long pfn; pgoff_t pgoff; - int ret; + int err; + vm_fault_t ret; /* * vm_ops.open/drm_gem_mmap_obj and close get and put * a reference on obj. So, we dont need to hold one here. */ - ret = mutex_lock_interruptible(&msm_obj->lock); - if (ret) + err = mutex_lock_interruptible(&msm_obj->lock); + if (err) { + ret = VM_FAULT_NOPAGE; goto out; + } if (WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED)) { mutex_unlock(&msm_obj->lock); @@ -245,7 +248,7 @@ int msm_gem_fault(struct vm_fault *vmf) /* make sure we have pages attached now */ pages = get_pages(obj); if (IS_ERR(pages)) { - ret = PTR_ERR(pages); + ret = vmf_error(PTR_ERR(pages)); goto out_unlock; } @@ -257,27 +260,11 @@ int msm_gem_fault(struct vm_fault *vmf) VERB("Inserting %p pfn %lx, pa %lx", (void *)vmf->address, pfn, pfn << PAGE_SHIFT); - ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV)); - + ret = vmf_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV)); out_unlock: mutex_unlock(&msm_obj->lock); out: - switch (ret) { - case -EAGAIN: - case 0: - case -ERESTARTSYS: - case -EINTR: - case -EBUSY: - /* - * EBUSY is ok: this just means that another thread - * already did the job. - */ - return VM_FAULT_NOPAGE; - case -ENOMEM: - return VM_FAULT_OOM; - default: - return VM_FAULT_SIGBUS; - } + return ret; } /** get mmap offset */ diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index f388944c93e2..5e808cfec345 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -88,7 +88,7 @@ static struct devfreq_dev_profile msm_devfreq_profile = { static void msm_devfreq_init(struct msm_gpu *gpu) { /* We need target support to do devfreq */ - if (!gpu->funcs->gpu_busy) + if (!gpu->funcs->gpu_busy || !gpu->core_clk) return; msm_devfreq_profile.initial_freq = gpu->fast_rate; @@ -142,8 +142,6 @@ static int disable_pwrrail(struct msm_gpu *gpu) static int enable_clk(struct msm_gpu *gpu) { - int i; - if (gpu->core_clk && gpu->fast_rate) clk_set_rate(gpu->core_clk, gpu->fast_rate); @@ -151,28 +149,12 @@ static int enable_clk(struct msm_gpu *gpu) if (gpu->rbbmtimer_clk) clk_set_rate(gpu->rbbmtimer_clk, 19200000); - for (i = gpu->nr_clocks - 1; i >= 0; i--) - if (gpu->grp_clks[i]) - clk_prepare(gpu->grp_clks[i]); - - for (i = gpu->nr_clocks - 1; i >= 0; i--) - if (gpu->grp_clks[i]) - clk_enable(gpu->grp_clks[i]); - - return 0; + return clk_bulk_prepare_enable(gpu->nr_clocks, gpu->grp_clks); } static int disable_clk(struct msm_gpu *gpu) { - int i; - - for (i = gpu->nr_clocks - 1; i >= 0; i--) - if (gpu->grp_clks[i]) - clk_disable(gpu->grp_clks[i]); - - for (i = gpu->nr_clocks - 1; i >= 0; i--) - if (gpu->grp_clks[i]) - clk_unprepare(gpu->grp_clks[i]); + clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks); /* * Set the clock to a deliberately low rate. On older targets the clock @@ -785,44 +767,22 @@ static irqreturn_t irq_handler(int irq, void *data) return gpu->funcs->irq(gpu); } -static struct clk *get_clock(struct device *dev, const char *name) -{ - struct clk *clk = devm_clk_get(dev, name); - - return IS_ERR(clk) ? NULL : clk; -} - static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu) { - struct device *dev = &pdev->dev; - struct property *prop; - const char *name; - int i = 0; + int ret = msm_clk_bulk_get(&pdev->dev, &gpu->grp_clks); - gpu->nr_clocks = of_property_count_strings(dev->of_node, "clock-names"); - if (gpu->nr_clocks < 1) { + if (ret < 1) { gpu->nr_clocks = 0; - return 0; - } - - gpu->grp_clks = devm_kcalloc(dev, sizeof(struct clk *), gpu->nr_clocks, - GFP_KERNEL); - if (!gpu->grp_clks) { - gpu->nr_clocks = 0; - return -ENOMEM; + return ret; } - of_property_for_each_string(dev->of_node, "clock-names", prop, name) { - gpu->grp_clks[i] = get_clock(dev, name); + gpu->nr_clocks = ret; - /* Remember the key clocks that we need to control later */ - if (!strcmp(name, "core") || !strcmp(name, "core_clk")) - gpu->core_clk = gpu->grp_clks[i]; - else if (!strcmp(name, "rbbmtimer") || !strcmp(name, "rbbmtimer_clk")) - gpu->rbbmtimer_clk = gpu->grp_clks[i]; + gpu->core_clk = msm_clk_bulk_get_clock(gpu->grp_clks, + gpu->nr_clocks, "core"); - ++i; - } + gpu->rbbmtimer_clk = msm_clk_bulk_get_clock(gpu->grp_clks, + gpu->nr_clocks, "rbbmtimer"); return 0; } diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 1c6105bc55c7..9122ee6e55e4 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -112,7 +112,7 @@ struct msm_gpu { /* Power Control: */ struct regulator *gpu_reg, *gpu_cx; - struct clk **grp_clks; + struct clk_bulk_data *grp_clks; int nr_clocks; struct clk *ebi1_clk, *core_clk, *rbbmtimer_clk; uint32_t fast_rate; diff --git a/drivers/hwmon/ibmpowernv.c b/drivers/hwmon/ibmpowernv.c index f829dadfd5a0..83472808c816 100644 --- a/drivers/hwmon/ibmpowernv.c +++ b/drivers/hwmon/ibmpowernv.c @@ -90,11 +90,20 @@ struct sensor_data { char label[MAX_LABEL_LEN]; char name[MAX_ATTR_LEN]; struct device_attribute dev_attr; + struct sensor_group_data *sgrp_data; +}; + +struct sensor_group_data { + struct mutex mutex; + u32 gid; + bool enable; }; struct platform_data { const struct attribute_group *attr_groups[MAX_SENSOR_TYPE + 1]; + struct sensor_group_data *sgrp_data; u32 sensors_count; /* Total count of sensors from each group */ + u32 nr_sensor_groups; /* Total number of sensor groups */ }; static ssize_t show_sensor(struct device *dev, struct device_attribute *devattr, @@ -105,6 +114,9 @@ static ssize_t show_sensor(struct device *dev, struct device_attribute *devattr, ssize_t ret; u64 x; + if (sdata->sgrp_data && !sdata->sgrp_data->enable) + return -ENODATA; + ret = opal_get_sensor_data_u64(sdata->id, &x); if (ret) @@ -120,6 +132,46 @@ static ssize_t show_sensor(struct device *dev, struct device_attribute *devattr, return sprintf(buf, "%llu\n", x); } +static ssize_t show_enable(struct device *dev, + struct device_attribute *devattr, char *buf) +{ + struct sensor_data *sdata = container_of(devattr, struct sensor_data, + dev_attr); + + return sprintf(buf, "%u\n", sdata->sgrp_data->enable); +} + +static ssize_t store_enable(struct device *dev, + struct device_attribute *devattr, + const char *buf, size_t count) +{ + struct sensor_data *sdata = container_of(devattr, struct sensor_data, + dev_attr); + struct sensor_group_data *sgrp_data = sdata->sgrp_data; + int ret; + bool data; + + ret = kstrtobool(buf, &data); + if (ret) + return ret; + + ret = mutex_lock_interruptible(&sgrp_data->mutex); + if (ret) + return ret; + + if (data != sgrp_data->enable) { + ret = sensor_group_enable(sgrp_data->gid, data); + if (!ret) + sgrp_data->enable = data; + } + + if (!ret) + ret = count; + + mutex_unlock(&sgrp_data->mutex); + return ret; +} + static ssize_t show_label(struct device *dev, struct device_attribute *devattr, char *buf) { @@ -292,12 +344,115 @@ static u32 get_sensor_hwmon_index(struct sensor_data *sdata, return ++sensor_groups[sdata->type].hwmon_index; } +static int init_sensor_group_data(struct platform_device *pdev, + struct platform_data *pdata) +{ + struct sensor_group_data *sgrp_data; + struct device_node *groups, *sgrp; + int count = 0, ret = 0; + enum sensors type; + + groups = of_find_compatible_node(NULL, NULL, "ibm,opal-sensor-group"); + if (!groups) + return ret; + + for_each_child_of_node(groups, sgrp) { + type = get_sensor_type(sgrp); + if (type != MAX_SENSOR_TYPE) + pdata->nr_sensor_groups++; + } + + if (!pdata->nr_sensor_groups) + goto out; + + sgrp_data = devm_kcalloc(&pdev->dev, pdata->nr_sensor_groups, + sizeof(*sgrp_data), GFP_KERNEL); + if (!sgrp_data) { + ret = -ENOMEM; + goto out; + } + + for_each_child_of_node(groups, sgrp) { + u32 gid; + + type = get_sensor_type(sgrp); + if (type == MAX_SENSOR_TYPE) + continue; + + if (of_property_read_u32(sgrp, "sensor-group-id", &gid)) + continue; + + if (of_count_phandle_with_args(sgrp, "sensors", NULL) <= 0) + continue; + + sensor_groups[type].attr_count++; + sgrp_data[count].gid = gid; + mutex_init(&sgrp_data[count].mutex); + sgrp_data[count++].enable = false; + } + + pdata->sgrp_data = sgrp_data; +out: + of_node_put(groups); + return ret; +} + +static struct sensor_group_data *get_sensor_group(struct platform_data *pdata, + struct device_node *node, + enum sensors gtype) +{ + struct sensor_group_data *sgrp_data = pdata->sgrp_data; + struct device_node *groups, *sgrp; + + groups = of_find_compatible_node(NULL, NULL, "ibm,opal-sensor-group"); + if (!groups) + return NULL; + + for_each_child_of_node(groups, sgrp) { + struct of_phandle_iterator it; + u32 gid; + int rc, i; + enum sensors type; + + type = get_sensor_type(sgrp); + if (type != gtype) + continue; + + if (of_property_read_u32(sgrp, "sensor-group-id", &gid)) + continue; + + of_for_each_phandle(&it, rc, sgrp, "sensors", NULL, 0) + if (it.phandle == node->phandle) { + of_node_put(it.node); + break; + } + + if (rc) + continue; + + for (i = 0; i < pdata->nr_sensor_groups; i++) + if (gid == sgrp_data[i].gid) { + of_node_put(sgrp); + of_node_put(groups); + return &sgrp_data[i]; + } + } + + of_node_put(groups); + return NULL; +} + static int populate_attr_groups(struct platform_device *pdev) { struct platform_data *pdata = platform_get_drvdata(pdev); const struct attribute_group **pgroups = pdata->attr_groups; struct device_node *opal, *np; enum sensors type; + int ret; + + ret = init_sensor_group_data(pdev, pdata); + if (ret) + return ret; opal = of_find_node_by_path("/ibm,opal/sensors"); for_each_child_of_node(opal, np) { @@ -344,7 +499,10 @@ static int populate_attr_groups(struct platform_device *pdev) static void create_hwmon_attr(struct sensor_data *sdata, const char *attr_name, ssize_t (*show)(struct device *dev, struct device_attribute *attr, - char *buf)) + char *buf), + ssize_t (*store)(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count)) { snprintf(sdata->name, MAX_ATTR_LEN, "%s%d_%s", sensor_groups[sdata->type].name, sdata->hwmon_index, @@ -352,23 +510,33 @@ static void create_hwmon_attr(struct sensor_data *sdata, const char *attr_name, sysfs_attr_init(&sdata->dev_attr.attr); sdata->dev_attr.attr.name = sdata->name; - sdata->dev_attr.attr.mode = S_IRUGO; sdata->dev_attr.show = show; + if (store) { + sdata->dev_attr.store = store; + sdata->dev_attr.attr.mode = 0664; + } else { + sdata->dev_attr.attr.mode = 0444; + } } static void populate_sensor(struct sensor_data *sdata, int od, int hd, int sid, const char *attr_name, enum sensors type, const struct attribute_group *pgroup, + struct sensor_group_data *sgrp_data, ssize_t (*show)(struct device *dev, struct device_attribute *attr, - char *buf)) + char *buf), + ssize_t (*store)(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count)) { sdata->id = sid; sdata->type = type; sdata->opal_index = od; sdata->hwmon_index = hd; - create_hwmon_attr(sdata, attr_name, show); + create_hwmon_attr(sdata, attr_name, show, store); pgroup->attrs[sensor_groups[type].attr_count++] = &sdata->dev_attr.attr; + sdata->sgrp_data = sgrp_data; } static char *get_max_attr(enum sensors type) @@ -403,24 +571,23 @@ static int create_device_attrs(struct platform_device *pdev) const struct attribute_group **pgroups = pdata->attr_groups; struct device_node *opal, *np; struct sensor_data *sdata; - u32 sensor_id; - enum sensors type; u32 count = 0; - int err = 0; + u32 group_attr_id[MAX_SENSOR_TYPE] = {0}; - opal = of_find_node_by_path("/ibm,opal/sensors"); sdata = devm_kcalloc(&pdev->dev, pdata->sensors_count, sizeof(*sdata), GFP_KERNEL); - if (!sdata) { - err = -ENOMEM; - goto exit_put_node; - } + if (!sdata) + return -ENOMEM; + opal = of_find_node_by_path("/ibm,opal/sensors"); for_each_child_of_node(opal, np) { + struct sensor_group_data *sgrp_data; const char *attr_name; - u32 opal_index; + u32 opal_index, hw_id; + u32 sensor_id; const char *label; + enum sensors type; if (np->name == NULL) continue; @@ -456,14 +623,12 @@ static int create_device_attrs(struct platform_device *pdev) opal_index = INVALID_INDEX; } - sdata[count].opal_index = opal_index; - sdata[count].hwmon_index = - get_sensor_hwmon_index(&sdata[count], sdata, count); - - create_hwmon_attr(&sdata[count], attr_name, show_sensor); - - pgroups[type]->attrs[sensor_groups[type].attr_count++] = - &sdata[count++].dev_attr.attr; + hw_id = get_sensor_hwmon_index(&sdata[count], sdata, count); + sgrp_data = get_sensor_group(pdata, np, type); + populate_sensor(&sdata[count], opal_index, hw_id, sensor_id, + attr_name, type, pgroups[type], sgrp_data, + show_sensor, NULL); + count++; if (!of_property_read_string(np, "label", &label)) { /* @@ -474,35 +639,43 @@ static int create_device_attrs(struct platform_device *pdev) */ make_sensor_label(np, &sdata[count], label); - populate_sensor(&sdata[count], opal_index, - sdata[count - 1].hwmon_index, + populate_sensor(&sdata[count], opal_index, hw_id, sensor_id, "label", type, pgroups[type], - show_label); + NULL, show_label, NULL); count++; } if (!of_property_read_u32(np, "sensor-data-max", &sensor_id)) { attr_name = get_max_attr(type); - populate_sensor(&sdata[count], opal_index, - sdata[count - 1].hwmon_index, + populate_sensor(&sdata[count], opal_index, hw_id, sensor_id, attr_name, type, - pgroups[type], show_sensor); + pgroups[type], sgrp_data, show_sensor, + NULL); count++; } if (!of_property_read_u32(np, "sensor-data-min", &sensor_id)) { attr_name = get_min_attr(type); - populate_sensor(&sdata[count], opal_index, - sdata[count - 1].hwmon_index, + populate_sensor(&sdata[count], opal_index, hw_id, sensor_id, attr_name, type, - pgroups[type], show_sensor); + pgroups[type], sgrp_data, show_sensor, + NULL); + count++; + } + + if (sgrp_data && !sgrp_data->enable) { + sgrp_data->enable = true; + hw_id = ++group_attr_id[type]; + populate_sensor(&sdata[count], opal_index, hw_id, + sgrp_data->gid, "enable", type, + pgroups[type], sgrp_data, show_enable, + store_enable); count++; } } -exit_put_node: of_node_put(opal); - return err; + return 0; } static int ibmpowernv_probe(struct platform_device *pdev) @@ -517,6 +690,7 @@ static int ibmpowernv_probe(struct platform_device *pdev) platform_set_drvdata(pdev, pdata); pdata->sensors_count = 0; + pdata->nr_sensor_groups = 0; err = populate_attr_groups(pdev); if (err) return err; diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index b03af54367c0..d160d2d1f3a3 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -37,7 +37,7 @@ config INFINIBAND_USER_ACCESS config INFINIBAND_USER_ACCESS_UCM bool "Userspace CM (UCM, DEPRECATED)" - depends on BROKEN + depends on BROKEN || COMPILE_TEST depends on INFINIBAND_USER_ACCESS help The UCM module has known security flaws, which no one is diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 61667705d746..867cee5e27b2 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -35,6 +35,7 @@ ib_ucm-y := ucm.o ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ rdma_core.o uverbs_std_types.o uverbs_ioctl.o \ - uverbs_ioctl_merge.o uverbs_std_types_cq.o \ + uverbs_std_types_cq.o \ uverbs_std_types_flow_action.o uverbs_std_types_dm.o \ - uverbs_std_types_mr.o uverbs_std_types_counters.o + uverbs_std_types_mr.o uverbs_std_types_counters.o \ + uverbs_uapi.o diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 4f32c4062fb6..46b855a42884 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -188,7 +188,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, return -ENODATA; } -int rdma_addr_size(struct sockaddr *addr) +int rdma_addr_size(const struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: @@ -315,19 +315,17 @@ static int dst_fetch_ha(const struct dst_entry *dst, int ret = 0; n = dst_neigh_lookup(dst, daddr); + if (!n) + return -ENODATA; - rcu_read_lock(); - if (!n || !(n->nud_state & NUD_VALID)) { - if (n) - neigh_event_send(n, NULL); + if (!(n->nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); ret = -ENODATA; } else { rdma_copy_addr(dev_addr, dst->dev, n->ha); } - rcu_read_unlock(); - if (n) - neigh_release(n); + neigh_release(n); return ret; } @@ -587,7 +585,7 @@ static void process_one_req(struct work_struct *_work) spin_unlock_bh(&lock); } -int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr, +int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr, struct rdma_dev_addr *addr, int timeout_ms, void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context), diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 81d66f56e38f..0bee1f4b914e 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -66,20 +66,28 @@ enum gid_attr_find_mask { GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3, }; -enum gid_table_entry_props { - GID_TABLE_ENTRY_INVALID = 1UL << 0, - GID_TABLE_ENTRY_DEFAULT = 1UL << 1, +enum gid_table_entry_state { + GID_TABLE_ENTRY_INVALID = 1, + GID_TABLE_ENTRY_VALID = 2, + /* + * Indicates that entry is pending to be removed, there may + * be active users of this GID entry. + * When last user of the GID entry releases reference to it, + * GID entry is detached from the table. + */ + GID_TABLE_ENTRY_PENDING_DEL = 3, }; struct ib_gid_table_entry { - unsigned long props; - union ib_gid gid; - struct ib_gid_attr attr; - void *context; + struct kref kref; + struct work_struct del_work; + struct ib_gid_attr attr; + void *context; + enum gid_table_entry_state state; }; struct ib_gid_table { - int sz; + int sz; /* In RoCE, adding a GID to the table requires: * (a) Find if this GID is already exists. * (b) Find a free space. @@ -91,13 +99,16 @@ struct ib_gid_table { * **/ /* Any writer to data_vec must hold this lock and the write side of - * rwlock. readers must hold only rwlock. All writers must be in a + * rwlock. Readers must hold only rwlock. All writers must be in a * sleepable context. */ - struct mutex lock; - /* rwlock protects data_vec[ix]->props. */ - rwlock_t rwlock; - struct ib_gid_table_entry *data_vec; + struct mutex lock; + /* rwlock protects data_vec[ix]->state and entry pointer. + */ + rwlock_t rwlock; + struct ib_gid_table_entry **data_vec; + /* bit field, each bit indicates the index of default GID */ + u32 default_gid_indices; }; static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) @@ -135,6 +146,19 @@ bool rdma_is_zero_gid(const union ib_gid *gid) } EXPORT_SYMBOL(rdma_is_zero_gid); +/** is_gid_index_default - Check if a given index belongs to + * reserved default GIDs or not. + * @table: GID table pointer + * @index: Index to check in GID table + * Returns true if index is one of the reserved default GID index otherwise + * returns false. + */ +static bool is_gid_index_default(const struct ib_gid_table *table, + unsigned int index) +{ + return index < 32 && (BIT(index) & table->default_gid_indices); +} + int ib_cache_gid_parse_type_str(const char *buf) { unsigned int i; @@ -164,26 +188,136 @@ static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port) return device->cache.ports[port - rdma_start_port(device)].gid; } -static void del_roce_gid(struct ib_device *device, u8 port_num, - struct ib_gid_table *table, int ix) +static bool is_gid_entry_free(const struct ib_gid_table_entry *entry) +{ + return !entry; +} + +static bool is_gid_entry_valid(const struct ib_gid_table_entry *entry) +{ + return entry && entry->state == GID_TABLE_ENTRY_VALID; +} + +static void schedule_free_gid(struct kref *kref) { + struct ib_gid_table_entry *entry = + container_of(kref, struct ib_gid_table_entry, kref); + + queue_work(ib_wq, &entry->del_work); +} + +static void free_gid_entry_locked(struct ib_gid_table_entry *entry) +{ + struct ib_device *device = entry->attr.device; + u8 port_num = entry->attr.port_num; + struct ib_gid_table *table = rdma_gid_table(device, port_num); + pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, - device->name, port_num, ix, - table->data_vec[ix].gid.raw); + device->name, port_num, entry->attr.index, + entry->attr.gid.raw); + + if (rdma_cap_roce_gid_table(device, port_num) && + entry->state != GID_TABLE_ENTRY_INVALID) + device->del_gid(&entry->attr, &entry->context); + + write_lock_irq(&table->rwlock); - if (rdma_cap_roce_gid_table(device, port_num)) - device->del_gid(&table->data_vec[ix].attr, - &table->data_vec[ix].context); - dev_put(table->data_vec[ix].attr.ndev); + /* + * The only way to avoid overwriting NULL in table is + * by comparing if it is same entry in table or not! + * If new entry in table is added by the time we free here, + * don't overwrite the table entry. + */ + if (entry == table->data_vec[entry->attr.index]) + table->data_vec[entry->attr.index] = NULL; + /* Now this index is ready to be allocated */ + write_unlock_irq(&table->rwlock); + + if (entry->attr.ndev) + dev_put(entry->attr.ndev); + kfree(entry); } -static int add_roce_gid(struct ib_gid_table *table, - const union ib_gid *gid, - const struct ib_gid_attr *attr) +static void free_gid_entry(struct kref *kref) +{ + struct ib_gid_table_entry *entry = + container_of(kref, struct ib_gid_table_entry, kref); + + free_gid_entry_locked(entry); +} + +/** + * free_gid_work - Release reference to the GID entry + * @work: Work structure to refer to GID entry which needs to be + * deleted. + * + * free_gid_work() frees the entry from the HCA's hardware table + * if provider supports it. It releases reference to netdevice. + */ +static void free_gid_work(struct work_struct *work) +{ + struct ib_gid_table_entry *entry = + container_of(work, struct ib_gid_table_entry, del_work); + struct ib_device *device = entry->attr.device; + u8 port_num = entry->attr.port_num; + struct ib_gid_table *table = rdma_gid_table(device, port_num); + + mutex_lock(&table->lock); + free_gid_entry_locked(entry); + mutex_unlock(&table->lock); +} + +static struct ib_gid_table_entry * +alloc_gid_entry(const struct ib_gid_attr *attr) { struct ib_gid_table_entry *entry; - int ix = attr->index; - int ret = 0; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return NULL; + kref_init(&entry->kref); + memcpy(&entry->attr, attr, sizeof(*attr)); + if (entry->attr.ndev) + dev_hold(entry->attr.ndev); + INIT_WORK(&entry->del_work, free_gid_work); + entry->state = GID_TABLE_ENTRY_INVALID; + return entry; +} + +static void store_gid_entry(struct ib_gid_table *table, + struct ib_gid_table_entry *entry) +{ + entry->state = GID_TABLE_ENTRY_VALID; + + pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, + entry->attr.device->name, entry->attr.port_num, + entry->attr.index, entry->attr.gid.raw); + + lockdep_assert_held(&table->lock); + write_lock_irq(&table->rwlock); + table->data_vec[entry->attr.index] = entry; + write_unlock_irq(&table->rwlock); +} + +static void get_gid_entry(struct ib_gid_table_entry *entry) +{ + kref_get(&entry->kref); +} + +static void put_gid_entry(struct ib_gid_table_entry *entry) +{ + kref_put(&entry->kref, schedule_free_gid); +} + +static void put_gid_entry_locked(struct ib_gid_table_entry *entry) +{ + kref_put(&entry->kref, free_gid_entry); +} + +static int add_roce_gid(struct ib_gid_table_entry *entry) +{ + const struct ib_gid_attr *attr = &entry->attr; + int ret; if (!attr->ndev) { pr_err("%s NULL netdev device=%s port=%d index=%d\n", @@ -191,38 +325,22 @@ static int add_roce_gid(struct ib_gid_table *table, attr->index); return -EINVAL; } - - entry = &table->data_vec[ix]; - if ((entry->props & GID_TABLE_ENTRY_INVALID) == 0) { - WARN(1, "GID table corruption device=%s port=%d index=%d\n", - attr->device->name, attr->port_num, - attr->index); - return -EINVAL; - } - if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) { - ret = attr->device->add_gid(gid, attr, &entry->context); + ret = attr->device->add_gid(attr, &entry->context); if (ret) { pr_err("%s GID add failed device=%s port=%d index=%d\n", __func__, attr->device->name, attr->port_num, attr->index); - goto add_err; + return ret; } } - dev_hold(attr->ndev); - -add_err: - if (!ret) - pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, - attr->device->name, attr->port_num, ix, gid->raw); - return ret; + return 0; } /** * add_modify_gid - Add or modify GID table entry * * @table: GID table in which GID to be added or modified - * @gid: GID content * @attr: Attributes of the GID * * Returns 0 on success or appropriate error code. It accepts zero @@ -230,34 +348,42 @@ add_err: * GID. However such zero GIDs are not added to the cache. */ static int add_modify_gid(struct ib_gid_table *table, - const union ib_gid *gid, const struct ib_gid_attr *attr) { - int ret; + struct ib_gid_table_entry *entry; + int ret = 0; + + /* + * Invalidate any old entry in the table to make it safe to write to + * this index. + */ + if (is_gid_entry_valid(table->data_vec[attr->index])) + put_gid_entry(table->data_vec[attr->index]); + + /* + * Some HCA's report multiple GID entries with only one valid GID, and + * leave other unused entries as the zero GID. Convert zero GIDs to + * empty table entries instead of storing them. + */ + if (rdma_is_zero_gid(&attr->gid)) + return 0; + + entry = alloc_gid_entry(attr); + if (!entry) + return -ENOMEM; if (rdma_protocol_roce(attr->device, attr->port_num)) { - ret = add_roce_gid(table, gid, attr); + ret = add_roce_gid(entry); if (ret) - return ret; - } else { - /* - * Some HCA's report multiple GID entries with only one - * valid GID, but remaining as zero GID. - * So ignore such behavior for IB link layer and don't - * fail the call, but don't add such entry to GID cache. - */ - if (rdma_is_zero_gid(gid)) - return 0; + goto done; } - lockdep_assert_held(&table->lock); - memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid)); - memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr)); - - write_lock_irq(&table->rwlock); - table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID; - write_unlock_irq(&table->rwlock); + store_gid_entry(table, entry); return 0; + +done: + put_gid_entry(entry); + return ret; } /** @@ -272,16 +398,25 @@ static int add_modify_gid(struct ib_gid_table *table, static void del_gid(struct ib_device *ib_dev, u8 port, struct ib_gid_table *table, int ix) { + struct ib_gid_table_entry *entry; + lockdep_assert_held(&table->lock); + + pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, + ib_dev->name, port, ix, + table->data_vec[ix]->attr.gid.raw); + write_lock_irq(&table->rwlock); - table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID; + entry = table->data_vec[ix]; + entry->state = GID_TABLE_ENTRY_PENDING_DEL; + /* + * For non RoCE protocol, GID entry slot is ready to use. + */ + if (!rdma_protocol_roce(ib_dev, port)) + table->data_vec[ix] = NULL; write_unlock_irq(&table->rwlock); - if (rdma_protocol_roce(ib_dev, port)) - del_roce_gid(ib_dev, port, table, ix); - memset(&table->data_vec[ix].gid, 0, sizeof(table->data_vec[ix].gid)); - memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr)); - table->data_vec[ix].context = NULL; + put_gid_entry_locked(entry); } /* rwlock should be read locked, or lock should be held */ @@ -294,8 +429,8 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, int empty = pempty ? -1 : 0; while (i < table->sz && (found < 0 || empty < 0)) { - struct ib_gid_table_entry *data = &table->data_vec[i]; - struct ib_gid_attr *attr = &data->attr; + struct ib_gid_table_entry *data = table->data_vec[i]; + struct ib_gid_attr *attr; int curr_index = i; i++; @@ -306,9 +441,9 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, * so lookup free slot only if requested. */ if (pempty && empty < 0) { - if (data->props & GID_TABLE_ENTRY_INVALID && - (default_gid == - !!(data->props & GID_TABLE_ENTRY_DEFAULT))) { + if (is_gid_entry_free(data) && + default_gid == + is_gid_index_default(table, curr_index)) { /* * Found an invalid (free) entry; allocate it. * If default GID is requested, then our @@ -323,22 +458,23 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, /* * Additionally find_gid() is used to find valid entry during - * lookup operation, where validity needs to be checked. So - * find the empty entry first to continue to search for a free - * slot and ignore its INVALID flag. + * lookup operation; so ignore the entries which are marked as + * pending for removal and the entries which are marked as + * invalid. */ - if (data->props & GID_TABLE_ENTRY_INVALID) + if (!is_gid_entry_valid(data)) continue; if (found >= 0) continue; + attr = &data->attr; if (mask & GID_ATTR_FIND_MASK_GID_TYPE && attr->gid_type != val->gid_type) continue; if (mask & GID_ATTR_FIND_MASK_GID && - memcmp(gid, &data->gid, sizeof(*gid))) + memcmp(gid, &data->attr.gid, sizeof(*gid))) continue; if (mask & GID_ATTR_FIND_MASK_NETDEV && @@ -346,8 +482,7 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, continue; if (mask & GID_ATTR_FIND_MASK_DEFAULT && - !!(data->props & GID_TABLE_ENTRY_DEFAULT) != - default_gid) + is_gid_index_default(table, curr_index) != default_gid) continue; found = curr_index; @@ -396,7 +531,8 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port, attr->device = ib_dev; attr->index = empty; attr->port_num = port; - ret = add_modify_gid(table, gid, attr); + attr->gid = *gid; + ret = add_modify_gid(table, attr); if (!ret) dispatch_gid_change_event(ib_dev, port); @@ -492,7 +628,8 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, mutex_lock(&table->lock); for (ix = 0; ix < table->sz; ix++) { - if (table->data_vec[ix].attr.ndev == ndev) { + if (is_gid_entry_valid(table->data_vec[ix]) && + table->data_vec[ix]->attr.ndev == ndev) { del_gid(ib_dev, port, table, ix); deleted = true; } @@ -506,103 +643,37 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, return 0; } -static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index, - union ib_gid *gid, struct ib_gid_attr *attr) -{ - struct ib_gid_table *table; - - table = rdma_gid_table(ib_dev, port); - - if (index < 0 || index >= table->sz) - return -EINVAL; - - if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) - return -EINVAL; - - memcpy(gid, &table->data_vec[index].gid, sizeof(*gid)); - if (attr) { - memcpy(attr, &table->data_vec[index].attr, sizeof(*attr)); - if (attr->ndev) - dev_hold(attr->ndev); - } - - return 0; -} - -static int _ib_cache_gid_table_find(struct ib_device *ib_dev, - const union ib_gid *gid, - const struct ib_gid_attr *val, - unsigned long mask, - u8 *port, u16 *index) -{ - struct ib_gid_table *table; - u8 p; - int local_index; - unsigned long flags; - - for (p = 0; p < ib_dev->phys_port_cnt; p++) { - table = ib_dev->cache.ports[p].gid; - read_lock_irqsave(&table->rwlock, flags); - local_index = find_gid(table, gid, val, false, mask, NULL); - if (local_index >= 0) { - if (index) - *index = local_index; - if (port) - *port = p + rdma_start_port(ib_dev); - read_unlock_irqrestore(&table->rwlock, flags); - return 0; - } - read_unlock_irqrestore(&table->rwlock, flags); - } - - return -ENOENT; -} - -static int ib_cache_gid_find(struct ib_device *ib_dev, - const union ib_gid *gid, - enum ib_gid_type gid_type, - struct net_device *ndev, u8 *port, - u16 *index) -{ - unsigned long mask = GID_ATTR_FIND_MASK_GID | - GID_ATTR_FIND_MASK_GID_TYPE; - struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type}; - - if (ndev) - mask |= GID_ATTR_FIND_MASK_NETDEV; - - return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val, - mask, port, index); -} - /** - * ib_find_cached_gid_by_port - Returns the GID table index where a specified - * GID value occurs. It searches for the specified GID value in the local - * software cache. + * rdma_find_gid_by_port - Returns the GID entry attributes when it finds + * a valid GID entry for given search parameters. It searches for the specified + * GID value in the local software cache. * @device: The device to query. * @gid: The GID value to search for. * @gid_type: The GID type to search for. * @port_num: The port number of the device where the GID value should be * searched. - * @ndev: In RoCE, the net device of the device. Null means ignore. - * @index: The index into the cached GID table where the GID was found. This - * parameter may be NULL. + * @ndev: In RoCE, the net device of the device. NULL means ignore. + * + * Returns sgid attributes if the GID is found with valid reference or + * returns ERR_PTR for the error. + * The caller must invoke rdma_put_gid_attr() to release the reference. */ -int ib_find_cached_gid_by_port(struct ib_device *ib_dev, - const union ib_gid *gid, - enum ib_gid_type gid_type, - u8 port, struct net_device *ndev, - u16 *index) +const struct ib_gid_attr * +rdma_find_gid_by_port(struct ib_device *ib_dev, + const union ib_gid *gid, + enum ib_gid_type gid_type, + u8 port, struct net_device *ndev) { int local_index; struct ib_gid_table *table; unsigned long mask = GID_ATTR_FIND_MASK_GID | GID_ATTR_FIND_MASK_GID_TYPE; struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type}; + const struct ib_gid_attr *attr; unsigned long flags; if (!rdma_is_port_valid(ib_dev, port)) - return -ENOENT; + return ERR_PTR(-ENOENT); table = rdma_gid_table(ib_dev, port); @@ -612,89 +683,73 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev, read_lock_irqsave(&table->rwlock, flags); local_index = find_gid(table, gid, &val, false, mask, NULL); if (local_index >= 0) { - if (index) - *index = local_index; + get_gid_entry(table->data_vec[local_index]); + attr = &table->data_vec[local_index]->attr; read_unlock_irqrestore(&table->rwlock, flags); - return 0; + return attr; } read_unlock_irqrestore(&table->rwlock, flags); - return -ENOENT; + return ERR_PTR(-ENOENT); } -EXPORT_SYMBOL(ib_find_cached_gid_by_port); +EXPORT_SYMBOL(rdma_find_gid_by_port); /** - * ib_cache_gid_find_by_filter - Returns the GID table index where a specified - * GID value occurs + * rdma_find_gid_by_filter - Returns the GID table attribute where a + * specified GID value occurs * @device: The device to query. * @gid: The GID value to search for. - * @port_num: The port number of the device where the GID value could be + * @port: The port number of the device where the GID value could be * searched. * @filter: The filter function is executed on any matching GID in the table. * If the filter function returns true, the corresponding index is returned, * otherwise, we continue searching the GID table. It's guaranteed that * while filter is executed, ndev field is valid and the structure won't * change. filter is executed in an atomic context. filter must not be NULL. - * @index: The index into the cached GID table where the GID was found. This - * parameter may be NULL. * - * ib_cache_gid_find_by_filter() searches for the specified GID value + * rdma_find_gid_by_filter() searches for the specified GID value * of which the filter function returns true in the port's GID table. - * This function is only supported on RoCE ports. * */ -static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev, - const union ib_gid *gid, - u8 port, - bool (*filter)(const union ib_gid *, - const struct ib_gid_attr *, - void *), - void *context, - u16 *index) +const struct ib_gid_attr *rdma_find_gid_by_filter( + struct ib_device *ib_dev, const union ib_gid *gid, u8 port, + bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *, + void *), + void *context) { + const struct ib_gid_attr *res = ERR_PTR(-ENOENT); struct ib_gid_table *table; - unsigned int i; unsigned long flags; - bool found = false; - + unsigned int i; - if (!rdma_is_port_valid(ib_dev, port) || - !rdma_protocol_roce(ib_dev, port)) - return -EPROTONOSUPPORT; + if (!rdma_is_port_valid(ib_dev, port)) + return ERR_PTR(-EINVAL); table = rdma_gid_table(ib_dev, port); read_lock_irqsave(&table->rwlock, flags); for (i = 0; i < table->sz; i++) { - struct ib_gid_attr attr; + struct ib_gid_table_entry *entry = table->data_vec[i]; - if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID) + if (!is_gid_entry_valid(entry)) continue; - if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid))) + if (memcmp(gid, &entry->attr.gid, sizeof(*gid))) continue; - memcpy(&attr, &table->data_vec[i].attr, sizeof(attr)); - - if (filter(gid, &attr, context)) { - found = true; - if (index) - *index = i; + if (filter(gid, &entry->attr, context)) { + get_gid_entry(entry); + res = &entry->attr; break; } } read_unlock_irqrestore(&table->rwlock, flags); - - if (!found) - return -ENOENT; - return 0; + return res; } static struct ib_gid_table *alloc_gid_table(int sz) { - struct ib_gid_table *table = - kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL); - int i; + struct ib_gid_table *table = kzalloc(sizeof(*table), GFP_KERNEL); if (!table) return NULL; @@ -707,12 +762,6 @@ static struct ib_gid_table *alloc_gid_table(int sz) table->sz = sz; rwlock_init(&table->rwlock); - - /* Mark all entries as invalid so that allocator can allocate - * one of the invalid (free) entry. - */ - for (i = 0; i < sz; i++) - table->data_vec[i].props |= GID_TABLE_ENTRY_INVALID; return table; err_free_table: @@ -720,12 +769,30 @@ err_free_table: return NULL; } -static void release_gid_table(struct ib_gid_table *table) +static void release_gid_table(struct ib_device *device, u8 port, + struct ib_gid_table *table) { - if (table) { - kfree(table->data_vec); - kfree(table); + bool leak = false; + int i; + + if (!table) + return; + + for (i = 0; i < table->sz; i++) { + if (is_gid_entry_free(table->data_vec[i])) + continue; + if (kref_read(&table->data_vec[i]->kref) > 1) { + pr_err("GID entry ref leak for %s (index %d) ref=%d\n", + device->name, i, + kref_read(&table->data_vec[i]->kref)); + leak = true; + } } + if (leak) + return; + + kfree(table->data_vec); + kfree(table); } static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, @@ -739,7 +806,7 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, mutex_lock(&table->lock); for (i = 0; i < table->sz; ++i) { - if (!rdma_is_zero_gid(&table->data_vec[i].gid)) { + if (is_gid_entry_valid(table->data_vec[i])) { del_gid(ib_dev, port, table, i); deleted = true; } @@ -757,12 +824,9 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, { union ib_gid gid = { }; struct ib_gid_attr gid_attr; - struct ib_gid_table *table; unsigned int gid_type; unsigned long mask; - table = rdma_gid_table(ib_dev, port); - mask = GID_ATTR_FIND_MASK_GID_TYPE | GID_ATTR_FIND_MASK_DEFAULT | GID_ATTR_FIND_MASK_NETDEV; @@ -792,19 +856,12 @@ static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port, unsigned int i; unsigned long roce_gid_type_mask; unsigned int num_default_gids; - unsigned int current_gid = 0; roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port); num_default_gids = hweight_long(roce_gid_type_mask); - for (i = 0; i < num_default_gids && i < table->sz; i++) { - struct ib_gid_table_entry *entry = &table->data_vec[i]; - - entry->props |= GID_TABLE_ENTRY_DEFAULT; - current_gid = find_next_bit(&roce_gid_type_mask, - BITS_PER_LONG, - current_gid); - entry->attr.gid_type = current_gid++; - } + /* Reserve starting indices for default GIDs */ + for (i = 0; i < num_default_gids && i < table->sz; i++) + table->default_gid_indices |= BIT(i); } @@ -815,7 +872,7 @@ static void gid_table_release_one(struct ib_device *ib_dev) for (port = 0; port < ib_dev->phys_port_cnt; port++) { table = ib_dev->cache.ports[port].gid; - release_gid_table(table); + release_gid_table(ib_dev, port, table); ib_dev->cache.ports[port].gid = NULL; } } @@ -869,69 +926,94 @@ static int gid_table_setup_one(struct ib_device *ib_dev) return err; } -int ib_get_cached_gid(struct ib_device *device, - u8 port_num, - int index, - union ib_gid *gid, - struct ib_gid_attr *gid_attr) +/** + * rdma_query_gid - Read the GID content from the GID software cache + * @device: Device to query the GID + * @port_num: Port number of the device + * @index: Index of the GID table entry to read + * @gid: Pointer to GID where to store the entry's GID + * + * rdma_query_gid() only reads the GID entry content for requested device, + * port and index. It reads for IB, RoCE and iWarp link layers. It doesn't + * hold any reference to the GID table entry in the HCA or software cache. + * + * Returns 0 on success or appropriate error code. + * + */ +int rdma_query_gid(struct ib_device *device, u8 port_num, + int index, union ib_gid *gid) { - int res; - unsigned long flags; struct ib_gid_table *table; + unsigned long flags; + int res = -EINVAL; if (!rdma_is_port_valid(device, port_num)) return -EINVAL; table = rdma_gid_table(device, port_num); read_lock_irqsave(&table->rwlock, flags); - res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr); - read_unlock_irqrestore(&table->rwlock, flags); + if (index < 0 || index >= table->sz || + !is_gid_entry_valid(table->data_vec[index])) + goto done; + + memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid)); + res = 0; + +done: + read_unlock_irqrestore(&table->rwlock, flags); return res; } -EXPORT_SYMBOL(ib_get_cached_gid); +EXPORT_SYMBOL(rdma_query_gid); /** - * ib_find_cached_gid - Returns the port number and GID table index where - * a specified GID value occurs. + * rdma_find_gid - Returns SGID attributes if the matching GID is found. * @device: The device to query. * @gid: The GID value to search for. * @gid_type: The GID type to search for. * @ndev: In RoCE, the net device of the device. NULL means ignore. - * @port_num: The port number of the device where the GID value was found. - * @index: The index into the cached GID table where the GID was found. This - * parameter may be NULL. * - * ib_find_cached_gid() searches for the specified GID value in - * the local software cache. + * rdma_find_gid() searches for the specified GID value in the software cache. + * + * Returns GID attributes if a valid GID is found or returns ERR_PTR for the + * error. The caller must invoke rdma_put_gid_attr() to release the reference. + * */ -int ib_find_cached_gid(struct ib_device *device, - const union ib_gid *gid, - enum ib_gid_type gid_type, - struct net_device *ndev, - u8 *port_num, - u16 *index) -{ - return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index); -} -EXPORT_SYMBOL(ib_find_cached_gid); - -int ib_find_gid_by_filter(struct ib_device *device, - const union ib_gid *gid, - u8 port_num, - bool (*filter)(const union ib_gid *gid, - const struct ib_gid_attr *, - void *), - void *context, u16 *index) +const struct ib_gid_attr *rdma_find_gid(struct ib_device *device, + const union ib_gid *gid, + enum ib_gid_type gid_type, + struct net_device *ndev) { - /* Only RoCE GID table supports filter function */ - if (!rdma_protocol_roce(device, port_num) && filter) - return -EPROTONOSUPPORT; + unsigned long mask = GID_ATTR_FIND_MASK_GID | + GID_ATTR_FIND_MASK_GID_TYPE; + struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type}; + u8 p; + + if (ndev) + mask |= GID_ATTR_FIND_MASK_NETDEV; + + for (p = 0; p < device->phys_port_cnt; p++) { + struct ib_gid_table *table; + unsigned long flags; + int index; + + table = device->cache.ports[p].gid; + read_lock_irqsave(&table->rwlock, flags); + index = find_gid(table, gid, &gid_attr_val, false, mask, NULL); + if (index >= 0) { + const struct ib_gid_attr *attr; + + get_gid_entry(table->data_vec[index]); + attr = &table->data_vec[index]->attr; + read_unlock_irqrestore(&table->rwlock, flags); + return attr; + } + read_unlock_irqrestore(&table->rwlock, flags); + } - return ib_cache_gid_find_by_filter(device, gid, - port_num, filter, - context, index); + return ERR_PTR(-ENOENT); } +EXPORT_SYMBOL(rdma_find_gid); int ib_get_cached_pkey(struct ib_device *device, u8 port_num, @@ -1089,12 +1171,92 @@ int ib_get_cached_port_state(struct ib_device *device, } EXPORT_SYMBOL(ib_get_cached_port_state); +/** + * rdma_get_gid_attr - Returns GID attributes for a port of a device + * at a requested gid_index, if a valid GID entry exists. + * @device: The device to query. + * @port_num: The port number on the device where the GID value + * is to be queried. + * @index: Index of the GID table entry whose attributes are to + * be queried. + * + * rdma_get_gid_attr() acquires reference count of gid attributes from the + * cached GID table. Caller must invoke rdma_put_gid_attr() to release + * reference to gid attribute regardless of link layer. + * + * Returns pointer to valid gid attribute or ERR_PTR for the appropriate error + * code. + */ +const struct ib_gid_attr * +rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index) +{ + const struct ib_gid_attr *attr = ERR_PTR(-EINVAL); + struct ib_gid_table *table; + unsigned long flags; + + if (!rdma_is_port_valid(device, port_num)) + return ERR_PTR(-EINVAL); + + table = rdma_gid_table(device, port_num); + if (index < 0 || index >= table->sz) + return ERR_PTR(-EINVAL); + + read_lock_irqsave(&table->rwlock, flags); + if (!is_gid_entry_valid(table->data_vec[index])) + goto done; + + get_gid_entry(table->data_vec[index]); + attr = &table->data_vec[index]->attr; +done: + read_unlock_irqrestore(&table->rwlock, flags); + return attr; +} +EXPORT_SYMBOL(rdma_get_gid_attr); + +/** + * rdma_put_gid_attr - Release reference to the GID attribute + * @attr: Pointer to the GID attribute whose reference + * needs to be released. + * + * rdma_put_gid_attr() must be used to release reference whose + * reference is acquired using rdma_get_gid_attr() or any APIs + * which returns a pointer to the ib_gid_attr regardless of link layer + * of IB or RoCE. + * + */ +void rdma_put_gid_attr(const struct ib_gid_attr *attr) +{ + struct ib_gid_table_entry *entry = + container_of(attr, struct ib_gid_table_entry, attr); + + put_gid_entry(entry); +} +EXPORT_SYMBOL(rdma_put_gid_attr); + +/** + * rdma_hold_gid_attr - Get reference to existing GID attribute + * + * @attr: Pointer to the GID attribute whose reference + * needs to be taken. + * + * Increase the reference count to a GID attribute to keep it from being + * freed. Callers are required to already be holding a reference to attribute. + * + */ +void rdma_hold_gid_attr(const struct ib_gid_attr *attr) +{ + struct ib_gid_table_entry *entry = + container_of(attr, struct ib_gid_table_entry, attr); + + get_gid_entry(entry); +} +EXPORT_SYMBOL(rdma_hold_gid_attr); + static int config_non_roce_gid_cache(struct ib_device *device, u8 port, int gid_tbl_len) { struct ib_gid_attr gid_attr = {}; struct ib_gid_table *table; - union ib_gid gid; int ret = 0; int i; @@ -1106,14 +1268,14 @@ static int config_non_roce_gid_cache(struct ib_device *device, for (i = 0; i < gid_tbl_len; ++i) { if (!device->query_gid) continue; - ret = device->query_gid(device, port, i, &gid); + ret = device->query_gid(device, port, i, &gid_attr.gid); if (ret) { pr_warn("query_gid failed (%d) for %s (index %d)\n", ret, device->name, i); goto err; } gid_attr.index = i; - add_modify_gid(table, &gid, &gid_attr); + add_modify_gid(table, &gid_attr); } err: mutex_unlock(&table->lock); @@ -1128,13 +1290,10 @@ static void ib_cache_update(struct ib_device *device, struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; int i; int ret; - struct ib_gid_table *table; if (!rdma_is_port_valid(device, port)) return; - table = rdma_gid_table(device, port); - tprops = kmalloc(sizeof *tprops, GFP_KERNEL); if (!tprops) return; @@ -1296,4 +1455,9 @@ void ib_cache_cleanup_one(struct ib_device *device) ib_unregister_event_handler(&device->cache.event_handler); flush_workqueue(ib_wq); gid_table_cleanup_one(device); + + /* + * Flush the wq second time for any pending GID delete work. + */ + flush_workqueue(ib_wq); } diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 27a7b0a2e27a..6e39c27dca8e 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -474,7 +474,7 @@ static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc, if (ret) return ret; - memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr)); + rdma_move_ah_attr(&av->ah_attr, &new_ah_attr); return 0; } @@ -508,31 +508,50 @@ static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv, return ret; } -static struct cm_port *get_cm_port_from_path(struct sa_path_rec *path) +static struct cm_port * +get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr) { struct cm_device *cm_dev; struct cm_port *port = NULL; unsigned long flags; - u8 p; - struct net_device *ndev = ib_get_ndev_from_path(path); - - read_lock_irqsave(&cm.device_lock, flags); - list_for_each_entry(cm_dev, &cm.device_list, list) { - if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid, - sa_conv_pathrec_to_gid_type(path), - ndev, &p, NULL)) { - port = cm_dev->port[p - 1]; - break; + + if (attr) { + read_lock_irqsave(&cm.device_lock, flags); + list_for_each_entry(cm_dev, &cm.device_list, list) { + if (cm_dev->ib_device == attr->device) { + port = cm_dev->port[attr->port_num - 1]; + break; + } + } + read_unlock_irqrestore(&cm.device_lock, flags); + } else { + /* SGID attribute can be NULL in following + * conditions. + * (a) Alternative path + * (b) IB link layer without GRH + * (c) LAP send messages + */ + read_lock_irqsave(&cm.device_lock, flags); + list_for_each_entry(cm_dev, &cm.device_list, list) { + attr = rdma_find_gid(cm_dev->ib_device, + &path->sgid, + sa_conv_pathrec_to_gid_type(path), + NULL); + if (!IS_ERR(attr)) { + port = cm_dev->port[attr->port_num - 1]; + break; + } } + read_unlock_irqrestore(&cm.device_lock, flags); + if (port) + rdma_put_gid_attr(attr); } - read_unlock_irqrestore(&cm.device_lock, flags); - - if (ndev) - dev_put(ndev); return port; } -static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, +static int cm_init_av_by_path(struct sa_path_rec *path, + const struct ib_gid_attr *sgid_attr, + struct cm_av *av, struct cm_id_private *cm_id_priv) { struct rdma_ah_attr new_ah_attr; @@ -540,7 +559,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, struct cm_port *port; int ret; - port = get_cm_port_from_path(path); + port = get_cm_port_from_path(path, sgid_attr); if (!port) return -EINVAL; cm_dev = port->cm_dev; @@ -554,22 +573,26 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, /* * av->ah_attr might be initialized based on wc or during - * request processing time. So initialize a new ah_attr on stack. + * request processing time which might have reference to sgid_attr. + * So initialize a new ah_attr on stack. * If initialization fails, old ah_attr is used for sending any * responses. If initialization is successful, than new ah_attr - * is used by overwriting the old one. + * is used by overwriting the old one. So that right ah_attr + * can be used to return an error response. */ ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path, - &new_ah_attr); + &new_ah_attr, sgid_attr); if (ret) return ret; av->timeout = path->packet_life_time + 1; ret = add_cm_id_to_port_list(cm_id_priv, av, port); - if (ret) + if (ret) { + rdma_destroy_ah_attr(&new_ah_attr); return ret; - memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr)); + } + rdma_move_ah_attr(&av->ah_attr, &new_ah_attr); return 0; } @@ -1091,6 +1114,9 @@ retest: wait_for_completion(&cm_id_priv->comp); while ((work = cm_dequeue_work(cm_id_priv)) != NULL) cm_free_work(work); + + rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr); + rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr); kfree(cm_id_priv->private_data); kfree(cm_id_priv); } @@ -1230,14 +1256,12 @@ new_id: } EXPORT_SYMBOL(ib_cm_insert_listen); -static __be64 cm_form_tid(struct cm_id_private *cm_id_priv, - enum cm_msg_sequence msg_seq) +static __be64 cm_form_tid(struct cm_id_private *cm_id_priv) { u64 hi_tid, low_tid; hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32; - low_tid = (u64) ((__force u32)cm_id_priv->id.local_id | - (msg_seq << 30)); + low_tid = (u64)cm_id_priv->id.local_id; return cpu_to_be64(hi_tid | low_tid); } @@ -1265,7 +1289,7 @@ static void cm_format_req(struct cm_req_msg *req_msg, pri_path->opa.slid); cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, - cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ)); + cm_form_tid(cm_id_priv)); req_msg->local_comm_id = cm_id_priv->id.local_id; req_msg->service_id = param->service_id; @@ -1413,12 +1437,13 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, goto out; } - ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av, + ret = cm_init_av_by_path(param->primary_path, + param->ppath_sgid_attr, &cm_id_priv->av, cm_id_priv); if (ret) goto error1; if (param->alternate_path) { - ret = cm_init_av_by_path(param->alternate_path, + ret = cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av, cm_id_priv); if (ret) goto error1; @@ -1646,7 +1671,7 @@ static void cm_opa_to_ib_sgid(struct cm_work *work, (ib_is_opa_gid(&path->sgid))) { union ib_gid sgid; - if (ib_get_cached_gid(dev, port_num, 0, &sgid, NULL)) { + if (rdma_query_gid(dev, port_num, 0, &sgid)) { dev_warn(&dev->dev, "Error updating sgid in CM request\n"); return; @@ -1691,6 +1716,7 @@ static void cm_format_req_event(struct cm_work *work, param->retry_count = cm_req_get_retry_count(req_msg); param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); param->srq = cm_req_get_srq(req_msg); + param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr; work->cm_event.private_data = &req_msg->private_data; } @@ -1914,9 +1940,8 @@ static int cm_req_handler(struct cm_work *work) struct ib_cm_id *cm_id; struct cm_id_private *cm_id_priv, *listen_cm_id_priv; struct cm_req_msg *req_msg; - union ib_gid gid; - struct ib_gid_attr gid_attr; const struct ib_global_route *grh; + const struct ib_gid_attr *gid_attr; int ret; req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; @@ -1961,24 +1986,13 @@ static int cm_req_handler(struct cm_work *work) if (cm_req_has_alt_path(req_msg)) memset(&work->path[1], 0, sizeof(work->path[1])); grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr); - ret = ib_get_cached_gid(work->port->cm_dev->ib_device, - work->port->port_num, - grh->sgid_index, - &gid, &gid_attr); - if (ret) { - ib_send_cm_rej(cm_id, IB_CM_REJ_UNSUPPORTED, NULL, 0, NULL, 0); - goto rejected; - } + gid_attr = grh->sgid_attr; - if (gid_attr.ndev) { + if (gid_attr && gid_attr->ndev) { work->path[0].rec_type = - sa_conv_gid_to_pathrec_type(gid_attr.gid_type); - sa_path_set_ifindex(&work->path[0], - gid_attr.ndev->ifindex); - sa_path_set_ndev(&work->path[0], - dev_net(gid_attr.ndev)); - dev_put(gid_attr.ndev); + sa_conv_gid_to_pathrec_type(gid_attr->gid_type); } else { + /* If no GID attribute or ndev is null, it is not RoCE. */ cm_path_set_rec_type(work->port->cm_dev->ib_device, work->port->port_num, &work->path[0], @@ -1992,15 +2006,14 @@ static int cm_req_handler(struct cm_work *work) sa_path_set_dmac(&work->path[0], cm_id_priv->av.ah_attr.roce.dmac); work->path[0].hop_limit = grh->hop_limit; - ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av, + ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av, cm_id_priv); if (ret) { int err; - err = ib_get_cached_gid(work->port->cm_dev->ib_device, - work->port->port_num, 0, - &work->path[0].sgid, - NULL); + err = rdma_query_gid(work->port->cm_dev->ib_device, + work->port->port_num, 0, + &work->path[0].sgid); if (err) ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, NULL, 0, NULL, 0); @@ -2012,8 +2025,8 @@ static int cm_req_handler(struct cm_work *work) goto rejected; } if (cm_req_has_alt_path(req_msg)) { - ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av, - cm_id_priv); + ret = cm_init_av_by_path(&work->path[1], NULL, + &cm_id_priv->alt_av, cm_id_priv); if (ret) { ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID, &work->path[0].sgid, @@ -2451,7 +2464,7 @@ static void cm_format_dreq(struct cm_dreq_msg *dreq_msg, u8 private_data_len) { cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID, - cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ)); + cm_form_tid(cm_id_priv)); dreq_msg->local_comm_id = cm_id_priv->id.local_id; dreq_msg->remote_comm_id = cm_id_priv->id.remote_id; cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn); @@ -3082,7 +3095,7 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg, alt_ext = opa_is_extended_lid(alternate_path->opa.dlid, alternate_path->opa.slid); cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID, - cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP)); + cm_form_tid(cm_id_priv)); lap_msg->local_comm_id = cm_id_priv->id.local_id; lap_msg->remote_comm_id = cm_id_priv->id.remote_id; cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn); @@ -3136,7 +3149,7 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id, goto out; } - ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av, + ret = cm_init_av_by_path(alternate_path, NULL, &cm_id_priv->alt_av, cm_id_priv); if (ret) goto out; @@ -3279,7 +3292,7 @@ static int cm_lap_handler(struct cm_work *work) if (ret) goto unlock; - cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av, + cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av, cm_id_priv); cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; cm_id_priv->tid = lap_msg->hdr.tid; @@ -3458,7 +3471,7 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg, struct ib_cm_sidr_req_param *param) { cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID, - cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR)); + cm_form_tid(cm_id_priv)); sidr_req_msg->request_id = cm_id_priv->id.local_id; sidr_req_msg->pkey = param->path->pkey; sidr_req_msg->service_id = param->service_id; @@ -3481,7 +3494,9 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); - ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv); + ret = cm_init_av_by_path(param->path, param->sgid_attr, + &cm_id_priv->av, + cm_id_priv); if (ret) goto out; @@ -3518,6 +3533,7 @@ out: EXPORT_SYMBOL(ib_send_cm_sidr_req); static void cm_format_sidr_req_event(struct cm_work *work, + const struct cm_id_private *rx_cm_id, struct ib_cm_id *listen_id) { struct cm_sidr_req_msg *sidr_req_msg; @@ -3531,6 +3547,7 @@ static void cm_format_sidr_req_event(struct cm_work *work, param->service_id = sidr_req_msg->service_id; param->bth_pkey = cm_get_bth_pkey(work); param->port = work->port->port_num; + param->sgid_attr = rx_cm_id->av.ah_attr.grh.sgid_attr; work->cm_event.private_data = &sidr_req_msg->private_data; } @@ -3588,7 +3605,7 @@ static int cm_sidr_req_handler(struct cm_work *work) cm_id_priv->id.service_id = sidr_req_msg->service_id; cm_id_priv->id.service_mask = ~cpu_to_be64(0); - cm_format_sidr_req_event(work, &cur_cm_id_priv->id); + cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id); cm_process_work(cm_id_priv, work); cm_deref_id(cur_cm_id_priv); return 0; @@ -3665,7 +3682,8 @@ error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); } EXPORT_SYMBOL(ib_send_cm_sidr_rep); -static void cm_format_sidr_rep_event(struct cm_work *work) +static void cm_format_sidr_rep_event(struct cm_work *work, + const struct cm_id_private *cm_id_priv) { struct cm_sidr_rep_msg *sidr_rep_msg; struct ib_cm_sidr_rep_event_param *param; @@ -3678,6 +3696,7 @@ static void cm_format_sidr_rep_event(struct cm_work *work) param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg)); param->info = &sidr_rep_msg->info; param->info_len = sidr_rep_msg->info_length; + param->sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr; work->cm_event.private_data = &sidr_rep_msg->private_data; } @@ -3701,7 +3720,7 @@ static int cm_sidr_rep_handler(struct cm_work *work) ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); spin_unlock_irq(&cm_id_priv->lock); - cm_format_sidr_rep_event(work); + cm_format_sidr_rep_event(work, cm_id_priv); cm_process_work(cm_id_priv, work); return 0; out: diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 8b76f0ef965e..476d4309576d 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -44,13 +44,6 @@ #define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */ -enum cm_msg_sequence { - CM_MSG_SEQUENCE_REQ, - CM_MSG_SEQUENCE_LAP, - CM_MSG_SEQUENCE_DREQ, - CM_MSG_SEQUENCE_SIDR -}; - struct cm_req_msg { struct ib_mad_hdr hdr; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index bff10ab141b0..f72677291b69 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -366,7 +366,6 @@ struct cma_multicast { void *context; struct sockaddr_storage addr; struct kref mcref; - bool igmp_joined; u8 join_state; }; @@ -412,11 +411,11 @@ struct cma_req_info { struct sockaddr_storage listen_addr_storage; struct sockaddr_storage src_addr_storage; struct ib_device *device; - int port; union ib_gid local_gid; __be64 service_id; + int port; + bool has_gid; u16 pkey; - bool has_gid:1; }; static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) @@ -491,12 +490,10 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv, { cma_ref_dev(cma_dev); id_priv->cma_dev = cma_dev; - id_priv->gid_type = 0; id_priv->id.device = cma_dev->device; id_priv->id.route.addr.dev_addr.transport = rdma_node_get_transport(cma_dev->device->node_type); list_add_tail(&id_priv->list, &cma_dev->id_list); - id_priv->res.type = RDMA_RESTRACK_CM_ID; rdma_restrack_add(&id_priv->res); } @@ -603,46 +600,53 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a return ret; } -static inline int cma_validate_port(struct ib_device *device, u8 port, - enum ib_gid_type gid_type, - union ib_gid *gid, - struct rdma_id_private *id_priv) +static const struct ib_gid_attr * +cma_validate_port(struct ib_device *device, u8 port, + enum ib_gid_type gid_type, + union ib_gid *gid, + struct rdma_id_private *id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; int bound_if_index = dev_addr->bound_dev_if; + const struct ib_gid_attr *sgid_attr; int dev_type = dev_addr->dev_type; struct net_device *ndev = NULL; - int ret = -ENODEV; if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) - return ret; + return ERR_PTR(-ENODEV); if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) - return ret; + return ERR_PTR(-ENODEV); if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { ndev = dev_get_by_index(dev_addr->net, bound_if_index); if (!ndev) - return ret; + return ERR_PTR(-ENODEV); } else { gid_type = IB_GID_TYPE_IB; } - ret = ib_find_cached_gid_by_port(device, gid, gid_type, port, - ndev, NULL); - + sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev); if (ndev) dev_put(ndev); + return sgid_attr; +} - return ret; +static void cma_bind_sgid_attr(struct rdma_id_private *id_priv, + const struct ib_gid_attr *sgid_attr) +{ + WARN_ON(id_priv->id.route.addr.dev_addr.sgid_attr); + id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr; } static int cma_acquire_dev(struct rdma_id_private *id_priv, - struct rdma_id_private *listen_id_priv) + const struct rdma_id_private *listen_id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + const struct ib_gid_attr *sgid_attr; struct cma_device *cma_dev; union ib_gid gid, iboe_gid, *gidp; + enum ib_gid_type gid_type; int ret = -ENODEV; u8 port; @@ -662,14 +666,13 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv, port = listen_id_priv->id.port_num; gidp = rdma_protocol_roce(cma_dev->device, port) ? &iboe_gid : &gid; - - ret = cma_validate_port(cma_dev->device, port, - rdma_protocol_ib(cma_dev->device, port) ? - IB_GID_TYPE_IB : - listen_id_priv->gid_type, gidp, - id_priv); - if (!ret) { + gid_type = listen_id_priv->gid_type; + sgid_attr = cma_validate_port(cma_dev->device, port, + gid_type, gidp, id_priv); + if (!IS_ERR(sgid_attr)) { id_priv->id.port_num = port; + cma_bind_sgid_attr(id_priv, sgid_attr); + ret = 0; goto out; } } @@ -683,14 +686,13 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv, gidp = rdma_protocol_roce(cma_dev->device, port) ? &iboe_gid : &gid; - - ret = cma_validate_port(cma_dev->device, port, - rdma_protocol_ib(cma_dev->device, port) ? - IB_GID_TYPE_IB : - cma_dev->default_gid_type[port - 1], - gidp, id_priv); - if (!ret) { + gid_type = cma_dev->default_gid_type[port - 1]; + sgid_attr = cma_validate_port(cma_dev->device, port, + gid_type, gidp, id_priv); + if (!IS_ERR(sgid_attr)) { id_priv->id.port_num = port; + cma_bind_sgid_attr(id_priv, sgid_attr); + ret = 0; goto out; } } @@ -732,8 +734,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) if (ib_get_cached_port_state(cur_dev->device, p, &port_state)) continue; - for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, - &gid, NULL); + for (i = 0; !rdma_query_gid(cur_dev->device, + p, i, &gid); i++) { if (!memcmp(&gid, dgid, sizeof(gid))) { cma_dev = cur_dev; @@ -785,12 +787,14 @@ struct rdma_cm_id *__rdma_create_id(struct net *net, id_priv->res.kern_name = caller; else rdma_restrack_set_task(&id_priv->res, current); + id_priv->res.type = RDMA_RESTRACK_CM_ID; id_priv->state = RDMA_CM_IDLE; id_priv->id.context = context; id_priv->id.event_handler = event_handler; id_priv->id.ps = ps; id_priv->id.qp_type = qp_type; id_priv->tos_set = false; + id_priv->gid_type = IB_GID_TYPE_IB; spin_lock_init(&id_priv->lock); mutex_init(&id_priv->qp_mutex); init_completion(&id_priv->comp); @@ -1036,35 +1040,38 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, } EXPORT_SYMBOL(rdma_init_qp_attr); -static inline int cma_zero_addr(struct sockaddr *addr) +static inline bool cma_zero_addr(const struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr); case AF_INET6: - return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr); + return ipv6_addr_any(&((struct sockaddr_in6 *)addr)->sin6_addr); case AF_IB: - return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr); + return ib_addr_any(&((struct sockaddr_ib *)addr)->sib_addr); default: - return 0; + return false; } } -static inline int cma_loopback_addr(struct sockaddr *addr) +static inline bool cma_loopback_addr(const struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: - return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); + return ipv4_is_loopback( + ((struct sockaddr_in *)addr)->sin_addr.s_addr); case AF_INET6: - return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr); + return ipv6_addr_loopback( + &((struct sockaddr_in6 *)addr)->sin6_addr); case AF_IB: - return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr); + return ib_addr_loopback( + &((struct sockaddr_ib *)addr)->sib_addr); default: - return 0; + return false; } } -static inline int cma_any_addr(struct sockaddr *addr) +static inline bool cma_any_addr(const struct sockaddr *addr) { return cma_zero_addr(addr) || cma_loopback_addr(addr); } @@ -1087,7 +1094,7 @@ static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) } } -static __be16 cma_port(struct sockaddr *addr) +static __be16 cma_port(const struct sockaddr *addr) { struct sockaddr_ib *sib; @@ -1105,15 +1112,15 @@ static __be16 cma_port(struct sockaddr *addr) } } -static inline int cma_any_port(struct sockaddr *addr) +static inline int cma_any_port(const struct sockaddr *addr) { return !cma_port(addr); } static void cma_save_ib_info(struct sockaddr *src_addr, struct sockaddr *dst_addr, - struct rdma_cm_id *listen_id, - struct sa_path_rec *path) + const struct rdma_cm_id *listen_id, + const struct sa_path_rec *path) { struct sockaddr_ib *listen_ib, *ib; @@ -1198,7 +1205,7 @@ static u16 cma_port_from_service_id(__be64 service_id) static int cma_save_ip_info(struct sockaddr *src_addr, struct sockaddr *dst_addr, - struct ib_cm_event *ib_event, + const struct ib_cm_event *ib_event, __be64 service_id) { struct cma_hdr *hdr; @@ -1228,8 +1235,8 @@ static int cma_save_ip_info(struct sockaddr *src_addr, static int cma_save_net_info(struct sockaddr *src_addr, struct sockaddr *dst_addr, - struct rdma_cm_id *listen_id, - struct ib_cm_event *ib_event, + const struct rdma_cm_id *listen_id, + const struct ib_cm_event *ib_event, sa_family_t sa_family, __be64 service_id) { if (sa_family == AF_IB) { @@ -1361,7 +1368,23 @@ static bool validate_net_dev(struct net_device *net_dev, } } -static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, +static struct net_device * +roce_get_net_dev_by_cm_event(const struct ib_cm_event *ib_event) +{ + const struct ib_gid_attr *sgid_attr = NULL; + + if (ib_event->event == IB_CM_REQ_RECEIVED) + sgid_attr = ib_event->param.req_rcvd.ppath_sgid_attr; + else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) + sgid_attr = ib_event->param.sidr_req_rcvd.sgid_attr; + + if (!sgid_attr) + return NULL; + dev_hold(sgid_attr->ndev); + return sgid_attr->ndev; +} + +static struct net_device *cma_get_net_dev(const struct ib_cm_event *ib_event, struct cma_req_info *req) { struct sockaddr *listen_addr = @@ -1376,8 +1399,12 @@ static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, if (err) return ERR_PTR(err); - net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey, - gid, listen_addr); + if (rdma_protocol_roce(req->device, req->port)) + net_dev = roce_get_net_dev_by_cm_event(ib_event); + else + net_dev = ib_get_net_dev_by_params(req->device, req->port, + req->pkey, + gid, listen_addr); if (!net_dev) return ERR_PTR(-ENODEV); @@ -1440,14 +1467,20 @@ static bool cma_match_net_dev(const struct rdma_cm_id *id, const struct rdma_addr *addr = &id->route.addr; if (!net_dev) - /* This request is an AF_IB request or a RoCE request */ + /* This request is an AF_IB request */ return (!id->port_num || id->port_num == port_num) && - (addr->src_addr.ss_family == AF_IB || - rdma_protocol_roce(id->device, port_num)); + (addr->src_addr.ss_family == AF_IB); - return !addr->dev_addr.bound_dev_if || - (net_eq(dev_net(net_dev), addr->dev_addr.net) && - addr->dev_addr.bound_dev_if == net_dev->ifindex); + /* + * Net namespaces must match, and if the listner is listening + * on a specific netdevice than netdevice must match as well. + */ + if (net_eq(dev_net(net_dev), addr->dev_addr.net) && + (!!addr->dev_addr.bound_dev_if == + (addr->dev_addr.bound_dev_if == net_dev->ifindex))) + return true; + else + return false; } static struct rdma_id_private *cma_find_listener( @@ -1480,9 +1513,10 @@ static struct rdma_id_private *cma_find_listener( return ERR_PTR(-EINVAL); } -static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, - struct ib_cm_event *ib_event, - struct net_device **net_dev) +static struct rdma_id_private * +cma_ib_id_from_event(struct ib_cm_id *cm_id, + const struct ib_cm_event *ib_event, + struct net_device **net_dev) { struct cma_req_info req; struct rdma_bind_list *bind_list; @@ -1498,10 +1532,6 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { /* Assuming the protocol is AF_IB */ *net_dev = NULL; - } else if (rdma_protocol_roce(req.device, req.port)) { - /* TODO find the net dev matching the request parameters - * through the RoCE GID table */ - *net_dev = NULL; } else { return ERR_CAST(*net_dev); } @@ -1629,6 +1659,21 @@ static void cma_release_port(struct rdma_id_private *id_priv) mutex_unlock(&lock); } +static void cma_leave_roce_mc_group(struct rdma_id_private *id_priv, + struct cma_multicast *mc) +{ + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + struct net_device *ndev = NULL; + + if (dev_addr->bound_dev_if) + ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); + if (ndev) { + cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false); + dev_put(ndev); + } + kref_put(&mc->mcref, release_mc); +} + static void cma_leave_mc_groups(struct rdma_id_private *id_priv) { struct cma_multicast *mc; @@ -1642,22 +1687,7 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv) ib_sa_free_multicast(mc->multicast.ib); kfree(mc); } else { - if (mc->igmp_joined) { - struct rdma_dev_addr *dev_addr = - &id_priv->id.route.addr.dev_addr; - struct net_device *ndev = NULL; - - if (dev_addr->bound_dev_if) - ndev = dev_get_by_index(&init_net, - dev_addr->bound_dev_if); - if (ndev) { - cma_igmp_send(ndev, - &mc->multicast.ib->rec.mgid, - false); - dev_put(ndev); - } - } - kref_put(&mc->mcref, release_mc); + cma_leave_roce_mc_group(id_priv, mc); } } } @@ -1699,6 +1729,10 @@ void rdma_destroy_id(struct rdma_cm_id *id) cma_deref_id(id_priv->id.context); kfree(id_priv->id.route.path_rec); + + if (id_priv->id.route.addr.dev_addr.sgid_attr) + rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr); + put_net(id_priv->id.route.addr.dev_addr.net); kfree(id_priv); } @@ -1730,7 +1764,7 @@ reject: } static void cma_set_rep_event_data(struct rdma_cm_event *event, - struct ib_cm_rep_event_param *rep_data, + const struct ib_cm_rep_event_param *rep_data, void *private_data) { event->param.conn.private_data = private_data; @@ -1743,10 +1777,11 @@ static void cma_set_rep_event_data(struct rdma_cm_event *event, event->param.conn.qp_num = rep_data->remote_qpn; } -static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) +static int cma_ib_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *ib_event) { struct rdma_id_private *id_priv = cm_id->context; - struct rdma_cm_event event; + struct rdma_cm_event event = {}; int ret = 0; mutex_lock(&id_priv->handler_mutex); @@ -1756,7 +1791,6 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) id_priv->state != RDMA_CM_DISCONNECT)) goto out; - memset(&event, 0, sizeof event); switch (ib_event->event) { case IB_CM_REQ_ERROR: case IB_CM_REP_ERROR: @@ -1825,9 +1859,10 @@ out: return ret; } -static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, - struct ib_cm_event *ib_event, - struct net_device *net_dev) +static struct rdma_id_private * +cma_ib_new_conn_id(const struct rdma_cm_id *listen_id, + const struct ib_cm_event *ib_event, + struct net_device *net_dev) { struct rdma_id_private *listen_id_priv; struct rdma_id_private *id_priv; @@ -1888,11 +1923,12 @@ err: return NULL; } -static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, - struct ib_cm_event *ib_event, - struct net_device *net_dev) +static struct rdma_id_private * +cma_ib_new_udp_id(const struct rdma_cm_id *listen_id, + const struct ib_cm_event *ib_event, + struct net_device *net_dev) { - struct rdma_id_private *listen_id_priv; + const struct rdma_id_private *listen_id_priv; struct rdma_id_private *id_priv; struct rdma_cm_id *id; const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; @@ -1932,7 +1968,7 @@ err: } static void cma_set_req_event_data(struct rdma_cm_event *event, - struct ib_cm_req_event_param *req_data, + const struct ib_cm_req_event_param *req_data, void *private_data, int offset) { event->param.conn.private_data = private_data + offset; @@ -1946,7 +1982,8 @@ static void cma_set_req_event_data(struct rdma_cm_event *event, event->param.conn.qp_num = req_data->remote_qpn; } -static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) +static int cma_ib_check_req_qp_type(const struct rdma_cm_id *id, + const struct ib_cm_event *ib_event) { return (((ib_event->event == IB_CM_REQ_RECEIVED) && (ib_event->param.req_rcvd.qp_type == id->qp_type)) || @@ -1955,19 +1992,20 @@ static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_e (!id->qp_type)); } -static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) +static int cma_ib_req_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *ib_event) { struct rdma_id_private *listen_id, *conn_id = NULL; - struct rdma_cm_event event; + struct rdma_cm_event event = {}; struct net_device *net_dev; u8 offset; int ret; - listen_id = cma_id_from_event(cm_id, ib_event, &net_dev); + listen_id = cma_ib_id_from_event(cm_id, ib_event, &net_dev); if (IS_ERR(listen_id)) return PTR_ERR(listen_id); - if (!cma_check_req_qp_type(&listen_id->id, ib_event)) { + if (!cma_ib_check_req_qp_type(&listen_id->id, ib_event)) { ret = -EINVAL; goto net_dev_put; } @@ -1978,16 +2016,15 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) goto err1; } - memset(&event, 0, sizeof event); offset = cma_user_data_offset(listen_id); event.event = RDMA_CM_EVENT_CONNECT_REQUEST; if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { - conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev); + conn_id = cma_ib_new_udp_id(&listen_id->id, ib_event, net_dev); event.param.ud.private_data = ib_event->private_data + offset; event.param.ud.private_data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; } else { - conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev); + conn_id = cma_ib_new_conn_id(&listen_id->id, ib_event, net_dev); cma_set_req_event_data(&event, &ib_event->param.req_rcvd, ib_event->private_data, offset); } @@ -2087,7 +2124,7 @@ EXPORT_SYMBOL(rdma_read_gids); static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) { struct rdma_id_private *id_priv = iw_id->context; - struct rdma_cm_event event; + struct rdma_cm_event event = {}; int ret = 0; struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; @@ -2096,7 +2133,6 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) if (id_priv->state != RDMA_CM_CONNECT) goto out; - memset(&event, 0, sizeof event); switch (iw_event->event) { case IW_CM_EVENT_CLOSE: event.event = RDMA_CM_EVENT_DISCONNECTED; @@ -2156,11 +2192,17 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, { struct rdma_cm_id *new_cm_id; struct rdma_id_private *listen_id, *conn_id; - struct rdma_cm_event event; + struct rdma_cm_event event = {}; int ret = -ECONNABORTED; struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; + event.event = RDMA_CM_EVENT_CONNECT_REQUEST; + event.param.conn.private_data = iw_event->private_data; + event.param.conn.private_data_len = iw_event->private_data_len; + event.param.conn.initiator_depth = iw_event->ird; + event.param.conn.responder_resources = iw_event->ord; + listen_id = cm_id->context; mutex_lock(&listen_id->handler_mutex); @@ -2202,13 +2244,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); - memset(&event, 0, sizeof event); - event.event = RDMA_CM_EVENT_CONNECT_REQUEST; - event.param.conn.private_data = iw_event->private_data; - event.param.conn.private_data_len = iw_event->private_data_len; - event.param.conn.initiator_depth = iw_event->ird; - event.param.conn.responder_resources = iw_event->ord; - /* * Protect against the user destroying conn_id from another thread * until we're done accessing it. @@ -2241,7 +2276,8 @@ static int cma_ib_listen(struct rdma_id_private *id_priv) addr = cma_src_addr(id_priv); svc_id = rdma_get_service_id(&id_priv->id, addr); - id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id); + id = ib_cm_insert_listen(id_priv->id.device, + cma_ib_req_handler, svc_id); if (IS_ERR(id)) return PTR_ERR(id); id_priv->cm_id.ib = id; @@ -2561,8 +2597,6 @@ cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv) route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type); route->path_rec->roce.route_resolved = true; - sa_path_set_ndev(route->path_rec, addr->dev_addr.net); - sa_path_set_ifindex(route->path_rec, ndev->ifindex); sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr); return ndev; } @@ -2791,7 +2825,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv) p = 1; port_found: - ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL); + ret = rdma_query_gid(cma_dev->device, p, 0, &gid); if (ret) goto out; @@ -2817,9 +2851,8 @@ static void addr_handler(int status, struct sockaddr *src_addr, struct rdma_dev_addr *dev_addr, void *context) { struct rdma_id_private *id_priv = context; - struct rdma_cm_event event; + struct rdma_cm_event event = {}; - memset(&event, 0, sizeof event); mutex_lock(&id_priv->handler_mutex); if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_RESOLVED)) @@ -2910,7 +2943,7 @@ err: } static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - struct sockaddr *dst_addr) + const struct sockaddr *dst_addr) { if (!src_addr || !src_addr->sa_family) { src_addr = (struct sockaddr *) &id->route.addr.src_addr; @@ -2931,31 +2964,25 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, } int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - struct sockaddr *dst_addr, int timeout_ms) + const struct sockaddr *dst_addr, int timeout_ms) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); - memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); if (id_priv->state == RDMA_CM_IDLE) { ret = cma_bind_addr(id, src_addr, dst_addr); - if (ret) { - memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); + if (ret) return ret; - } } - if (cma_family(id_priv) != dst_addr->sa_family) { - memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); + if (cma_family(id_priv) != dst_addr->sa_family) return -EINVAL; - } - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { - memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) return -EINVAL; - } + memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); atomic_inc(&id_priv->refcount); if (cma_any_addr(dst_addr)) { ret = cma_resolve_loopback(id_priv); @@ -3451,18 +3478,18 @@ static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) } static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, - struct ib_cm_event *ib_event) + const struct ib_cm_event *ib_event) { struct rdma_id_private *id_priv = cm_id->context; - struct rdma_cm_event event; - struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; + struct rdma_cm_event event = {}; + const struct ib_cm_sidr_rep_event_param *rep = + &ib_event->param.sidr_rep_rcvd; int ret = 0; mutex_lock(&id_priv->handler_mutex); if (id_priv->state != RDMA_CM_CONNECT) goto out; - memset(&event, 0, sizeof event); switch (ib_event->event) { case IB_CM_SIDR_REQ_ERROR: event.event = RDMA_CM_EVENT_UNREACHABLE; @@ -3488,7 +3515,8 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, ib_init_ah_attr_from_path(id_priv->id.device, id_priv->id.port_num, id_priv->id.route.path_rec, - &event.param.ud.ah_attr); + &event.param.ud.ah_attr, + rep->sgid_attr); event.param.ud.qp_num = rep->qpn; event.param.ud.qkey = rep->qkey; event.event = RDMA_CM_EVENT_ESTABLISHED; @@ -3501,6 +3529,8 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, } ret = id_priv->id.event_handler(&id_priv->id, &event); + + rdma_destroy_ah_attr(&event.param.ud.ah_attr); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; @@ -3557,6 +3587,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, id_priv->cm_id.ib = id; req.path = id_priv->id.route.path_rec; + req.sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr; req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); req.max_cm_retries = CMA_MAX_CM_RETRIES; @@ -3618,6 +3649,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, if (route->num_paths == 2) req.alternate_path = &route->path_rec[1]; + req.ppath_sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr; + /* Alternate path SGID attribute currently unsupported */ req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); req.qp_num = id_priv->qp_num; req.qp_type = id_priv->id.qp_type; @@ -3928,7 +3961,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) { struct rdma_id_private *id_priv; struct cma_multicast *mc = multicast->context; - struct rdma_cm_event event; + struct rdma_cm_event event = {}; int ret = 0; id_priv = mc->id_priv; @@ -3952,7 +3985,6 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) } mutex_unlock(&id_priv->qp_mutex); - memset(&event, 0, sizeof event); event.status = status; event.param.ud.private_data = mc->context; if (!status) { @@ -3981,6 +4013,8 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) event.event = RDMA_CM_EVENT_MULTICAST_ERROR; ret = id_priv->id.event_handler(&id_priv->id, &event); + + rdma_destroy_ah_attr(&event.param.ud.ah_attr); if (ret) { cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); @@ -4010,7 +4044,7 @@ static void cma_set_mgid(struct rdma_id_private *id_priv, memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); } else if (addr->sa_family == AF_IB) { memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid); - } else if ((addr->sa_family == AF_INET6)) { + } else if (addr->sa_family == AF_INET6) { ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); if (id_priv->id.ps == RDMA_PS_UDP) mc_map[7] = 0x01; /* Use RDMA CM signature */ @@ -4168,8 +4202,6 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, if (!send_only) { err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, true); - if (!err) - mc->igmp_joined = true; } } } else { @@ -4221,26 +4253,29 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, memcpy(&mc->addr, addr, rdma_addr_size(addr)); mc->context = context; mc->id_priv = id_priv; - mc->igmp_joined = false; mc->join_state = join_state; - spin_lock(&id_priv->lock); - list_add(&mc->list, &id_priv->mc_list); - spin_unlock(&id_priv->lock); if (rdma_protocol_roce(id->device, id->port_num)) { kref_init(&mc->mcref); ret = cma_iboe_join_multicast(id_priv, mc); - } else if (rdma_cap_ib_mcast(id->device, id->port_num)) + if (ret) + goto out_err; + } else if (rdma_cap_ib_mcast(id->device, id->port_num)) { ret = cma_join_ib_multicast(id_priv, mc); - else + if (ret) + goto out_err; + } else { ret = -ENOSYS; - - if (ret) { - spin_lock_irq(&id_priv->lock); - list_del(&mc->list); - spin_unlock_irq(&id_priv->lock); - kfree(mc); + goto out_err; } + + spin_lock(&id_priv->lock); + list_add(&mc->list, &id_priv->mc_list); + spin_unlock(&id_priv->lock); + + return 0; +out_err: + kfree(mc); return ret; } EXPORT_SYMBOL(rdma_join_multicast); @@ -4268,23 +4303,7 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) ib_sa_free_multicast(mc->multicast.ib); kfree(mc); } else if (rdma_protocol_roce(id->device, id->port_num)) { - if (mc->igmp_joined) { - struct rdma_dev_addr *dev_addr = - &id->route.addr.dev_addr; - struct net_device *ndev = NULL; - - if (dev_addr->bound_dev_if) - ndev = dev_get_by_index(dev_addr->net, - dev_addr->bound_dev_if); - if (ndev) { - cma_igmp_send(ndev, - &mc->multicast.ib->rec.mgid, - false); - dev_put(ndev); - } - mc->igmp_joined = false; - } - kref_put(&mc->mcref, release_mc); + cma_leave_roce_mc_group(id_priv, mc); } return; } @@ -4410,7 +4429,7 @@ free_cma_dev: static int cma_remove_id_dev(struct rdma_id_private *id_priv) { - struct rdma_cm_event event; + struct rdma_cm_event event = {}; enum rdma_cm_state state; int ret = 0; @@ -4426,7 +4445,6 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv) if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) goto out; - memset(&event, 0, sizeof event); event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; ret = id_priv->id.event_handler(&id_priv->id, &event); out: diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index fae417a391fb..77c7005c396c 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -91,8 +91,8 @@ void ib_device_unregister_sysfs(struct ib_device *device); typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port, struct net_device *idev, void *cookie); -typedef int (*roce_netdev_filter)(struct ib_device *device, u8 port, - struct net_device *idev, void *cookie); +typedef bool (*roce_netdev_filter)(struct ib_device *device, u8 port, + struct net_device *idev, void *cookie); void ib_enum_roce_netdev(struct ib_device *ib_dev, roce_netdev_filter filter, diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 6fa4c59dc7a7..db3b6271f09d 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -105,8 +105,6 @@ static int ib_device_check_mandatory(struct ib_device *device) IB_MANDATORY_FUNC(query_pkey), IB_MANDATORY_FUNC(alloc_pd), IB_MANDATORY_FUNC(dealloc_pd), - IB_MANDATORY_FUNC(create_ah), - IB_MANDATORY_FUNC(destroy_ah), IB_MANDATORY_FUNC(create_qp), IB_MANDATORY_FUNC(modify_qp), IB_MANDATORY_FUNC(destroy_qp), @@ -862,25 +860,6 @@ int ib_query_port(struct ib_device *device, EXPORT_SYMBOL(ib_query_port); /** - * ib_query_gid - Get GID table entry - * @device:Device to query - * @port_num:Port number to query - * @index:GID table index to query - * @gid:Returned GID - * @attr: Returned GID attributes related to this GID index (only in RoCE). - * NULL means ignore. - * - * ib_query_gid() fetches the specified GID table entry from the cache. - */ -int ib_query_gid(struct ib_device *device, - u8 port_num, int index, union ib_gid *gid, - struct ib_gid_attr *attr) -{ - return ib_get_cached_gid(device, port_num, index, gid, attr); -} -EXPORT_SYMBOL(ib_query_gid); - -/** * ib_enum_roce_netdev - enumerate all RoCE ports * @ib_dev : IB device we want to query * @filter: Should we call the callback? @@ -1057,7 +1036,7 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid, continue; for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) { - ret = ib_query_gid(device, port, i, &tmp_gid, NULL); + ret = rdma_query_gid(device, port, i, &tmp_gid); if (ret) return ret; if (!memcmp(&tmp_gid, gid, sizeof *gid)) { diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index f742ae7a768b..ef459f2f2eeb 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -38,6 +38,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/dma-mapping.h> +#include <linux/idr.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/security.h> @@ -58,8 +59,13 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests module_param_named(recv_queue_size, mad_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); +/* + * The mlx4 driver uses the top byte to distinguish which virtual function + * generated the MAD, so we must avoid using it. + */ +#define AGENT_ID_LIMIT (1 << 24) +static DEFINE_IDR(ib_mad_clients); static struct list_head ib_mad_port_list; -static atomic_t ib_mad_client_id = ATOMIC_INIT(0); /* Port list lock */ static DEFINE_SPINLOCK(ib_mad_port_list_lock); @@ -190,6 +196,8 @@ EXPORT_SYMBOL(ib_response_mad); /* * ib_register_mad_agent - Register to send/receive MADs + * + * Context: Process context. */ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, u8 port_num, @@ -210,7 +218,6 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, struct ib_mad_mgmt_vendor_class *vendor_class; struct ib_mad_mgmt_method_table *method; int ret2, qpn; - unsigned long flags; u8 mgmt_class, vclass; /* Validate parameters */ @@ -376,13 +383,24 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, goto error4; } - spin_lock_irqsave(&port_priv->reg_lock, flags); - mad_agent_priv->agent.hi_tid = atomic_inc_return(&ib_mad_client_id); + idr_preload(GFP_KERNEL); + idr_lock(&ib_mad_clients); + ret2 = idr_alloc_cyclic(&ib_mad_clients, mad_agent_priv, 0, + AGENT_ID_LIMIT, GFP_ATOMIC); + idr_unlock(&ib_mad_clients); + idr_preload_end(); + + if (ret2 < 0) { + ret = ERR_PTR(ret2); + goto error5; + } + mad_agent_priv->agent.hi_tid = ret2; /* * Make sure MAD registration (if supplied) * is non overlapping with any existing ones */ + spin_lock_irq(&port_priv->reg_lock); if (mad_reg_req) { mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class); if (!is_vendor_class(mgmt_class)) { @@ -393,7 +411,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, if (method) { if (method_in_use(&method, mad_reg_req)) - goto error5; + goto error6; } } ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv, @@ -409,24 +427,25 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, if (is_vendor_method_in_use( vendor_class, mad_reg_req)) - goto error5; + goto error6; } } ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv); } if (ret2) { ret = ERR_PTR(ret2); - goto error5; + goto error6; } } - - /* Add mad agent into port's agent list */ - list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list); - spin_unlock_irqrestore(&port_priv->reg_lock, flags); + spin_unlock_irq(&port_priv->reg_lock); return &mad_agent_priv->agent; +error6: + spin_unlock_irq(&port_priv->reg_lock); + idr_lock(&ib_mad_clients); + idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid); + idr_unlock(&ib_mad_clients); error5: - spin_unlock_irqrestore(&port_priv->reg_lock, flags); ib_mad_agent_security_cleanup(&mad_agent_priv->agent); error4: kfree(reg_req); @@ -575,7 +594,6 @@ static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) { struct ib_mad_port_private *port_priv; - unsigned long flags; /* Note that we could still be handling received MADs */ @@ -587,10 +605,12 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) port_priv = mad_agent_priv->qp_info->port_priv; cancel_delayed_work(&mad_agent_priv->timed_work); - spin_lock_irqsave(&port_priv->reg_lock, flags); + spin_lock_irq(&port_priv->reg_lock); remove_mad_reg_req(mad_agent_priv); - list_del(&mad_agent_priv->agent_list); - spin_unlock_irqrestore(&port_priv->reg_lock, flags); + spin_unlock_irq(&port_priv->reg_lock); + idr_lock(&ib_mad_clients); + idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid); + idr_unlock(&ib_mad_clients); flush_workqueue(port_priv->wq); ib_cancel_rmpp_recvs(mad_agent_priv); @@ -601,7 +621,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) ib_mad_agent_security_cleanup(&mad_agent_priv->agent); kfree(mad_agent_priv->reg_req); - kfree(mad_agent_priv); + kfree_rcu(mad_agent_priv, rcu); } static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) @@ -625,6 +645,8 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) /* * ib_unregister_mad_agent - Unregisters a client from using MAD services + * + * Context: Process context. */ void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) { @@ -1159,7 +1181,6 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_mad_qp_info *qp_info; struct list_head *list; - struct ib_send_wr *bad_send_wr; struct ib_mad_agent *mad_agent; struct ib_sge *sge; unsigned long flags; @@ -1197,7 +1218,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) spin_lock_irqsave(&qp_info->send_queue.lock, flags); if (qp_info->send_queue.count < qp_info->send_queue.max_active) { ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr, - &bad_send_wr); + NULL); list = &qp_info->send_queue.list; } else { ret = 0; @@ -1720,22 +1741,19 @@ find_mad_agent(struct ib_mad_port_private *port_priv, struct ib_mad_agent_private *mad_agent = NULL; unsigned long flags; - spin_lock_irqsave(&port_priv->reg_lock, flags); if (ib_response_mad(mad_hdr)) { u32 hi_tid; - struct ib_mad_agent_private *entry; /* * Routing is based on high 32 bits of transaction ID * of MAD. */ hi_tid = be64_to_cpu(mad_hdr->tid) >> 32; - list_for_each_entry(entry, &port_priv->agent_list, agent_list) { - if (entry->agent.hi_tid == hi_tid) { - mad_agent = entry; - break; - } - } + rcu_read_lock(); + mad_agent = idr_find(&ib_mad_clients, hi_tid); + if (mad_agent && !atomic_inc_not_zero(&mad_agent->refcount)) + mad_agent = NULL; + rcu_read_unlock(); } else { struct ib_mad_mgmt_class_table *class; struct ib_mad_mgmt_method_table *method; @@ -1744,6 +1762,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, const struct ib_vendor_mad *vendor_mad; int index; + spin_lock_irqsave(&port_priv->reg_lock, flags); /* * Routing is based on version, class, and method * For "newer" vendor MADs, also based on OUI @@ -1783,20 +1802,19 @@ find_mad_agent(struct ib_mad_port_private *port_priv, ~IB_MGMT_METHOD_RESP]; } } + if (mad_agent) + atomic_inc(&mad_agent->refcount); +out: + spin_unlock_irqrestore(&port_priv->reg_lock, flags); } - if (mad_agent) { - if (mad_agent->agent.recv_handler) - atomic_inc(&mad_agent->refcount); - else { - dev_notice(&port_priv->device->dev, - "No receive handler for client %p on port %d\n", - &mad_agent->agent, port_priv->port_num); - mad_agent = NULL; - } + if (mad_agent && !mad_agent->agent.recv_handler) { + dev_notice(&port_priv->device->dev, + "No receive handler for client %p on port %d\n", + &mad_agent->agent, port_priv->port_num); + deref_mad_agent(mad_agent); + mad_agent = NULL; } -out: - spin_unlock_irqrestore(&port_priv->reg_lock, flags); return mad_agent; } @@ -1896,8 +1914,8 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_ const struct ib_global_route *grh = rdma_ah_read_grh(&attr); - if (ib_get_cached_gid(device, port_num, - grh->sgid_index, &sgid, NULL)) + if (rdma_query_gid(device, port_num, + grh->sgid_index, &sgid)) return 0; return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw, 16); @@ -2457,7 +2475,6 @@ static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc) struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr; struct ib_mad_qp_info *qp_info; struct ib_mad_queue *send_queue; - struct ib_send_wr *bad_send_wr; struct ib_mad_send_wc mad_send_wc; unsigned long flags; int ret; @@ -2507,7 +2524,7 @@ retry: if (queued_send_wr) { ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr, - &bad_send_wr); + NULL); if (ret) { dev_err(&port_priv->device->dev, "ib_post_send failed: %d\n", ret); @@ -2552,11 +2569,9 @@ static bool ib_mad_send_error(struct ib_mad_port_private *port_priv, if (wc->status == IB_WC_WR_FLUSH_ERR) { if (mad_send_wr->retry) { /* Repost send */ - struct ib_send_wr *bad_send_wr; - mad_send_wr->retry = 0; ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr, - &bad_send_wr); + NULL); if (!ret) return false; } @@ -2872,7 +2887,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, int post, ret; struct ib_mad_private *mad_priv; struct ib_sge sg_list; - struct ib_recv_wr recv_wr, *bad_recv_wr; + struct ib_recv_wr recv_wr; struct ib_mad_queue *recv_queue = &qp_info->recv_queue; /* Initialize common scatter list fields */ @@ -2916,7 +2931,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, post = (++recv_queue->count < recv_queue->max_active); list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list); spin_unlock_irqrestore(&recv_queue->lock, flags); - ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr); + ret = ib_post_recv(qp_info->qp, &recv_wr, NULL); if (ret) { spin_lock_irqsave(&recv_queue->lock, flags); list_del(&mad_priv->header.mad_list.list); @@ -3159,7 +3174,6 @@ static int ib_mad_port_open(struct ib_device *device, port_priv->device = device; port_priv->port_num = port_num; spin_lock_init(&port_priv->reg_lock); - INIT_LIST_HEAD(&port_priv->agent_list); init_mad_qp(port_priv, &port_priv->qp_info[0]); init_mad_qp(port_priv, &port_priv->qp_info[1]); @@ -3338,6 +3352,9 @@ int ib_mad_init(void) INIT_LIST_HEAD(&ib_mad_port_list); + /* Client ID 0 is used for snoop-only clients */ + idr_alloc(&ib_mad_clients, NULL, 0, 0, GFP_KERNEL); + if (ib_register_client(&mad_client)) { pr_err("Couldn't register ib_mad client\n"); return -EINVAL; diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 28669f6419e1..d84ae1671898 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -89,7 +89,6 @@ struct ib_rmpp_segment { }; struct ib_mad_agent_private { - struct list_head agent_list; struct ib_mad_agent agent; struct ib_mad_reg_req *reg_req; struct ib_mad_qp_info *qp_info; @@ -105,7 +104,10 @@ struct ib_mad_agent_private { struct list_head rmpp_list; atomic_t refcount; - struct completion comp; + union { + struct completion comp; + struct rcu_head rcu; + }; }; struct ib_mad_snoop_private { @@ -203,7 +205,6 @@ struct ib_mad_port_private { spinlock_t reg_lock; struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION]; - struct list_head agent_list; struct workqueue_struct *wq; struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE]; }; diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 6c48f4193dda..d50ff70bb24b 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -716,14 +716,28 @@ int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num, } EXPORT_SYMBOL(ib_sa_get_mcmember_rec); +/** + * ib_init_ah_from_mcmember - Initialize AH attribute from multicast + * member record and gid of the device. + * @device: RDMA device + * @port_num: Port of the rdma device to consider + * @ndev: Optional netdevice, applicable only for RoCE + * @gid_type: GID type to consider + * @ah_attr: AH attribute to fillup on successful completion + * + * ib_init_ah_from_mcmember() initializes AH attribute based on multicast + * member record and other device properties. On success the caller is + * responsible to call rdma_destroy_ah_attr on the ah_attr. Returns 0 on + * success or appropriate error code. + * + */ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, struct ib_sa_mcmember_rec *rec, struct net_device *ndev, enum ib_gid_type gid_type, struct rdma_ah_attr *ah_attr) { - int ret; - u16 gid_index; + const struct ib_gid_attr *sgid_attr; /* GID table is not based on the netdevice for IB link layer, * so ignore ndev during search. @@ -733,26 +747,22 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, else if (!rdma_protocol_roce(device, port_num)) return -EINVAL; - ret = ib_find_cached_gid_by_port(device, &rec->port_gid, - gid_type, port_num, - ndev, - &gid_index); - if (ret) - return ret; + sgid_attr = rdma_find_gid_by_port(device, &rec->port_gid, + gid_type, port_num, ndev); + if (IS_ERR(sgid_attr)) + return PTR_ERR(sgid_attr); - memset(ah_attr, 0, sizeof *ah_attr); + memset(ah_attr, 0, sizeof(*ah_attr)); ah_attr->type = rdma_ah_find_type(device, port_num); rdma_ah_set_dlid(ah_attr, be16_to_cpu(rec->mlid)); rdma_ah_set_sl(ah_attr, rec->sl); rdma_ah_set_port_num(ah_attr, port_num); rdma_ah_set_static_rate(ah_attr, rec->rate); - - rdma_ah_set_grh(ah_attr, &rec->mgid, - be32_to_cpu(rec->flow_label), - (u8)gid_index, - rec->hop_limit, - rec->traffic_class); + rdma_move_grh_sgid_attr(ah_attr, &rec->mgid, + be32_to_cpu(rec->flow_label), + rec->hop_limit, rec->traffic_class, + sgid_attr); return 0; } EXPORT_SYMBOL(ib_init_ah_from_mcmember); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 340c7bea45ab..0385ab438320 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -237,15 +237,15 @@ static int fill_port_info(struct sk_buff *msg, if (ret) return ret; - BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64)); - if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, - (u64)attr.port_cap_flags, RDMA_NLDEV_ATTR_PAD)) - return -EMSGSIZE; - if (rdma_protocol_ib(device, port) && - nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX, - attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD)) - return -EMSGSIZE; if (rdma_protocol_ib(device, port)) { + BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64)); + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, + (u64)attr.port_cap_flags, + RDMA_NLDEV_ATTR_PAD)) + return -EMSGSIZE; + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX, + attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD)) + return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid)) return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid)) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 475910ffbcb6..6eb64c6f0802 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -32,6 +32,7 @@ #include <linux/file.h> #include <linux/anon_inodes.h> +#include <linux/sched/mm.h> #include <rdma/ib_verbs.h> #include <rdma/uverbs_types.h> #include <linux/rcupdate.h> @@ -41,51 +42,6 @@ #include "core_priv.h" #include "rdma_core.h" -int uverbs_ns_idx(u16 *id, unsigned int ns_count) -{ - int ret = (*id & UVERBS_ID_NS_MASK) >> UVERBS_ID_NS_SHIFT; - - if (ret >= ns_count) - return -EINVAL; - - *id &= ~UVERBS_ID_NS_MASK; - return ret; -} - -const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev, - uint16_t object) -{ - const struct uverbs_root_spec *object_hash = ibdev->specs_root; - const struct uverbs_object_spec_hash *objects; - int ret = uverbs_ns_idx(&object, object_hash->num_buckets); - - if (ret < 0) - return NULL; - - objects = object_hash->object_buckets[ret]; - - if (object >= objects->num_objects) - return NULL; - - return objects->objects[object]; -} - -const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object, - uint16_t method) -{ - const struct uverbs_method_spec_hash *methods; - int ret = uverbs_ns_idx(&method, object->num_buckets); - - if (ret < 0) - return NULL; - - methods = object->method_buckets[ret]; - if (method >= methods->num_methods) - return NULL; - - return methods->methods[method]; -} - void uverbs_uobject_get(struct ib_uobject *uobject) { kref_get(&uobject->ref); @@ -96,7 +52,7 @@ static void uverbs_uobject_free(struct kref *ref) struct ib_uobject *uobj = container_of(ref, struct ib_uobject, ref); - if (uobj->type->type_class->needs_kfree_rcu) + if (uobj->uapi_object->type_class->needs_kfree_rcu) kfree_rcu(uobj, rcu); else kfree(uobj); @@ -107,7 +63,8 @@ void uverbs_uobject_put(struct ib_uobject *uobject) kref_put(&uobject->ref, uverbs_uobject_free); } -static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive) +static int uverbs_try_lock_object(struct ib_uobject *uobj, + enum rdma_lookup_mode mode) { /* * When a shared access is required, we use a positive counter. Each @@ -120,27 +77,211 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive) * concurrently, setting the counter to zero is enough for releasing * this lock. */ - if (!exclusive) + switch (mode) { + case UVERBS_LOOKUP_READ: return atomic_fetch_add_unless(&uobj->usecnt, 1, -1) == -1 ? -EBUSY : 0; + case UVERBS_LOOKUP_WRITE: + /* lock is exclusive */ + return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY; + case UVERBS_LOOKUP_DESTROY: + return 0; + } + return 0; +} + +static void assert_uverbs_usecnt(struct ib_uobject *uobj, + enum rdma_lookup_mode mode) +{ +#ifdef CONFIG_LOCKDEP + switch (mode) { + case UVERBS_LOOKUP_READ: + WARN_ON(atomic_read(&uobj->usecnt) <= 0); + break; + case UVERBS_LOOKUP_WRITE: + WARN_ON(atomic_read(&uobj->usecnt) != -1); + break; + case UVERBS_LOOKUP_DESTROY: + break; + } +#endif +} + +/* + * This must be called with the hw_destroy_rwsem locked for read or write, + * also the uobject itself must be locked for write. + * + * Upon return the HW object is guaranteed to be destroyed. + * + * For RDMA_REMOVE_ABORT, the hw_destroy_rwsem is not required to be held, + * however the type's allocat_commit function cannot have been called and the + * uobject cannot be on the uobjects_lists + * + * For RDMA_REMOVE_DESTROY the caller shold be holding a kref (eg via + * rdma_lookup_get_uobject) and the object is left in a state where the caller + * needs to call rdma_lookup_put_uobject. + * + * For all other destroy modes this function internally unlocks the uobject + * and consumes the kref on the uobj. + */ +static int uverbs_destroy_uobject(struct ib_uobject *uobj, + enum rdma_remove_reason reason) +{ + struct ib_uverbs_file *ufile = uobj->ufile; + unsigned long flags; + int ret; + + lockdep_assert_held(&ufile->hw_destroy_rwsem); + assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE); + + if (uobj->object) { + ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason); + if (ret) { + if (ib_is_destroy_retryable(ret, reason, uobj)) + return ret; + + /* Nothing to be done, dangle the memory and move on */ + WARN(true, + "ib_uverbs: failed to remove uobject id %d, driver err=%d", + uobj->id, ret); + } + + uobj->object = NULL; + } - /* lock is either WRITE or DESTROY - should be exclusive */ - return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY; + if (reason == RDMA_REMOVE_ABORT) { + WARN_ON(!list_empty(&uobj->list)); + WARN_ON(!uobj->context); + uobj->uapi_object->type_class->alloc_abort(uobj); + } + + uobj->context = NULL; + + /* + * For DESTROY the usecnt is held write locked, the caller is expected + * to put it unlock and put the object when done with it. Only DESTROY + * can remove the IDR handle. + */ + if (reason != RDMA_REMOVE_DESTROY) + atomic_set(&uobj->usecnt, 0); + else + uobj->uapi_object->type_class->remove_handle(uobj); + + if (!list_empty(&uobj->list)) { + spin_lock_irqsave(&ufile->uobjects_lock, flags); + list_del_init(&uobj->list); + spin_unlock_irqrestore(&ufile->uobjects_lock, flags); + + /* + * Pairs with the get in rdma_alloc_commit_uobject(), could + * destroy uobj. + */ + uverbs_uobject_put(uobj); + } + + /* + * When aborting the stack kref remains owned by the core code, and is + * not transferred into the type. Pairs with the get in alloc_uobj + */ + if (reason == RDMA_REMOVE_ABORT) + uverbs_uobject_put(uobj); + + return 0; } -static struct ib_uobject *alloc_uobj(struct ib_ucontext *context, - const struct uverbs_obj_type *type) +/* + * This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY + * sequence. It should only be used from command callbacks. On success the + * caller must pair this with rdma_lookup_put_uobject(LOOKUP_WRITE). This + * version requires the caller to have already obtained an + * LOOKUP_DESTROY uobject kref. + */ +int uobj_destroy(struct ib_uobject *uobj) { - struct ib_uobject *uobj = kzalloc(type->obj_size, GFP_KERNEL); + struct ib_uverbs_file *ufile = uobj->ufile; + int ret; + + down_read(&ufile->hw_destroy_rwsem); + + ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE); + if (ret) + goto out_unlock; + + ret = uverbs_destroy_uobject(uobj, RDMA_REMOVE_DESTROY); + if (ret) { + atomic_set(&uobj->usecnt, 0); + goto out_unlock; + } +out_unlock: + up_read(&ufile->hw_destroy_rwsem); + return ret; +} + +/* + * uobj_get_destroy destroys the HW object and returns a handle to the uobj + * with a NULL object pointer. The caller must pair this with + * uverbs_put_destroy. + */ +struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, + u32 id, struct ib_uverbs_file *ufile) +{ + struct ib_uobject *uobj; + int ret; + + uobj = rdma_lookup_get_uobject(obj, ufile, id, UVERBS_LOOKUP_DESTROY); + if (IS_ERR(uobj)) + return uobj; + + ret = uobj_destroy(uobj); + if (ret) { + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY); + return ERR_PTR(ret); + } + + return uobj; +} + +/* + * Does both uobj_get_destroy() and uobj_put_destroy(). Returns success_res + * on success (negative errno on failure). For use by callers that do not need + * the uobj. + */ +int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id, + struct ib_uverbs_file *ufile, int success_res) +{ + struct ib_uobject *uobj; + + uobj = __uobj_get_destroy(obj, id, ufile); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); + + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); + return success_res; +} + +/* alloc_uobj must be undone by uverbs_destroy_uobject() */ +static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile, + const struct uverbs_api_object *obj) +{ + struct ib_uobject *uobj; + struct ib_ucontext *ucontext; + + ucontext = ib_uverbs_get_ucontext(ufile); + if (IS_ERR(ucontext)) + return ERR_CAST(ucontext); + + uobj = kzalloc(obj->type_attrs->obj_size, GFP_KERNEL); if (!uobj) return ERR_PTR(-ENOMEM); /* * user_handle should be filled by the handler, * The object is added to the list in the commit stage. */ - uobj->context = context; - uobj->type = type; + uobj->ufile = ufile; + uobj->context = ucontext; + INIT_LIST_HEAD(&uobj->list); + uobj->uapi_object = obj; /* * Allocated objects start out as write locked to deny any other * syscalls from accessing them until they are committed. See @@ -157,45 +298,39 @@ static int idr_add_uobj(struct ib_uobject *uobj) int ret; idr_preload(GFP_KERNEL); - spin_lock(&uobj->context->ufile->idr_lock); + spin_lock(&uobj->ufile->idr_lock); /* * We start with allocating an idr pointing to NULL. This represents an * object which isn't initialized yet. We'll replace it later on with * the real object once we commit. */ - ret = idr_alloc(&uobj->context->ufile->idr, NULL, 0, + ret = idr_alloc(&uobj->ufile->idr, NULL, 0, min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT); if (ret >= 0) uobj->id = ret; - spin_unlock(&uobj->context->ufile->idr_lock); + spin_unlock(&uobj->ufile->idr_lock); idr_preload_end(); return ret < 0 ? ret : 0; } -/* - * It only removes it from the uobjects list, uverbs_uobject_put() is still - * required. - */ -static void uverbs_idr_remove_uobj(struct ib_uobject *uobj) -{ - spin_lock(&uobj->context->ufile->idr_lock); - idr_remove(&uobj->context->ufile->idr, uobj->id); - spin_unlock(&uobj->context->ufile->idr_lock); -} - /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */ -static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext, - int id, bool exclusive) +static struct ib_uobject * +lookup_get_idr_uobject(const struct uverbs_api_object *obj, + struct ib_uverbs_file *ufile, s64 id, + enum rdma_lookup_mode mode) { struct ib_uobject *uobj; + unsigned long idrno = id; + + if (id < 0 || id > ULONG_MAX) + return ERR_PTR(-EINVAL); rcu_read_lock(); /* object won't be released as we're protected in rcu */ - uobj = idr_find(&ucontext->ufile->idr, id); + uobj = idr_find(&ufile->idr, idrno); if (!uobj) { uobj = ERR_PTR(-ENOENT); goto free; @@ -215,19 +350,28 @@ free: return uobj; } -static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext, - int id, bool exclusive) +static struct ib_uobject * +lookup_get_fd_uobject(const struct uverbs_api_object *obj, + struct ib_uverbs_file *ufile, s64 id, + enum rdma_lookup_mode mode) { + const struct uverbs_obj_fd_type *fd_type; struct file *f; struct ib_uobject *uobject; - const struct uverbs_obj_fd_type *fd_type = - container_of(type, struct uverbs_obj_fd_type, type); + int fdno = id; - if (exclusive) + if (fdno != id) + return ERR_PTR(-EINVAL); + + if (mode != UVERBS_LOOKUP_READ) return ERR_PTR(-EOPNOTSUPP); - f = fget(id); + if (!obj->type_attrs) + return ERR_PTR(-EIO); + fd_type = + container_of(obj->type_attrs, struct uverbs_obj_fd_type, type); + + f = fget(fdno); if (!f) return ERR_PTR(-EBADF); @@ -246,43 +390,55 @@ static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *ty return uobject; } -struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext, - int id, bool exclusive) +struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj, + struct ib_uverbs_file *ufile, s64 id, + enum rdma_lookup_mode mode) { struct ib_uobject *uobj; int ret; - uobj = type->type_class->lookup_get(type, ucontext, id, exclusive); + if (!obj) + return ERR_PTR(-EINVAL); + + uobj = obj->type_class->lookup_get(obj, ufile, id, mode); if (IS_ERR(uobj)) return uobj; - if (uobj->type != type) { + if (uobj->uapi_object != obj) { ret = -EINVAL; goto free; } - ret = uverbs_try_lock_object(uobj, exclusive); - if (ret) { - WARN(ucontext->cleanup_reason, - "ib_uverbs: Trying to lookup_get while cleanup context\n"); + /* + * If we have been disassociated block every command except for + * DESTROY based commands. + */ + if (mode != UVERBS_LOOKUP_DESTROY && + !srcu_dereference(ufile->device->ib_dev, + &ufile->device->disassociate_srcu)) { + ret = -EIO; goto free; } + ret = uverbs_try_lock_object(uobj, mode); + if (ret) + goto free; + return uobj; free: - uobj->type->type_class->lookup_put(uobj, exclusive); + obj->type_class->lookup_put(uobj, mode); uverbs_uobject_put(uobj); return ERR_PTR(ret); } -static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext) +static struct ib_uobject * +alloc_begin_idr_uobject(const struct uverbs_api_object *obj, + struct ib_uverbs_file *ufile) { int ret; struct ib_uobject *uobj; - uobj = alloc_uobj(ucontext, type); + uobj = alloc_uobj(ufile, obj); if (IS_ERR(uobj)) return uobj; @@ -290,7 +446,7 @@ static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type * if (ret) goto uobj_put; - ret = ib_rdmacg_try_charge(&uobj->cg_obj, ucontext->device, + ret = ib_rdmacg_try_charge(&uobj->cg_obj, uobj->context->device, RDMACG_RESOURCE_HCA_OBJECT); if (ret) goto idr_remove; @@ -298,304 +454,305 @@ static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type * return uobj; idr_remove: - uverbs_idr_remove_uobj(uobj); + spin_lock(&ufile->idr_lock); + idr_remove(&ufile->idr, uobj->id); + spin_unlock(&ufile->idr_lock); uobj_put: uverbs_uobject_put(uobj); return ERR_PTR(ret); } -static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext) +static struct ib_uobject * +alloc_begin_fd_uobject(const struct uverbs_api_object *obj, + struct ib_uverbs_file *ufile) { - const struct uverbs_obj_fd_type *fd_type = - container_of(type, struct uverbs_obj_fd_type, type); int new_fd; struct ib_uobject *uobj; - struct ib_uobject_file *uobj_file; - struct file *filp; new_fd = get_unused_fd_flags(O_CLOEXEC); if (new_fd < 0) return ERR_PTR(new_fd); - uobj = alloc_uobj(ucontext, type); + uobj = alloc_uobj(ufile, obj); if (IS_ERR(uobj)) { put_unused_fd(new_fd); return uobj; } - uobj_file = container_of(uobj, struct ib_uobject_file, uobj); - filp = anon_inode_getfile(fd_type->name, - fd_type->fops, - uobj_file, - fd_type->flags); - if (IS_ERR(filp)) { - put_unused_fd(new_fd); - uverbs_uobject_put(uobj); - return (void *)filp; - } - - uobj_file->uobj.id = new_fd; - uobj_file->uobj.object = filp; - uobj_file->ufile = ucontext->ufile; - INIT_LIST_HEAD(&uobj->list); - kref_get(&uobj_file->ufile->ref); + uobj->id = new_fd; + uobj->ufile = ufile; return uobj; } -struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, - struct ib_ucontext *ucontext) +struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, + struct ib_uverbs_file *ufile) { - return type->type_class->alloc_begin(type, ucontext); -} + struct ib_uobject *ret; -static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj, - enum rdma_remove_reason why) -{ - const struct uverbs_obj_idr_type *idr_type = - container_of(uobj->type, struct uverbs_obj_idr_type, - type); - int ret = idr_type->destroy_object(uobj, why); + if (!obj) + return ERR_PTR(-EINVAL); /* - * We can only fail gracefully if the user requested to destroy the - * object. In the rest of the cases, just remove whatever you can. + * The hw_destroy_rwsem is held across the entire object creation and + * released during rdma_alloc_commit_uobject or + * rdma_alloc_abort_uobject */ - if (why == RDMA_REMOVE_DESTROY && ret) - return ret; - - ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, - RDMACG_RESOURCE_HCA_OBJECT); - uverbs_idr_remove_uobj(uobj); + if (!down_read_trylock(&ufile->hw_destroy_rwsem)) + return ERR_PTR(-EIO); + ret = obj->type_class->alloc_begin(obj, ufile); + if (IS_ERR(ret)) { + up_read(&ufile->hw_destroy_rwsem); + return ret; + } return ret; } -static void alloc_abort_fd_uobject(struct ib_uobject *uobj) +static void alloc_abort_idr_uobject(struct ib_uobject *uobj) { - struct ib_uobject_file *uobj_file = - container_of(uobj, struct ib_uobject_file, uobj); - struct file *filp = uobj->object; - int id = uobj_file->uobj.id; + ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, + RDMACG_RESOURCE_HCA_OBJECT); - /* Unsuccessful NEW */ - fput(filp); - put_unused_fd(id); + spin_lock(&uobj->ufile->idr_lock); + idr_remove(&uobj->ufile->idr, uobj->id); + spin_unlock(&uobj->ufile->idr_lock); } -static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj, + enum rdma_remove_reason why) { - const struct uverbs_obj_fd_type *fd_type = - container_of(uobj->type, struct uverbs_obj_fd_type, type); - struct ib_uobject_file *uobj_file = - container_of(uobj, struct ib_uobject_file, uobj); - int ret = fd_type->context_closed(uobj_file, why); + const struct uverbs_obj_idr_type *idr_type = + container_of(uobj->uapi_object->type_attrs, + struct uverbs_obj_idr_type, type); + int ret = idr_type->destroy_object(uobj, why); - if (why == RDMA_REMOVE_DESTROY && ret) + /* + * We can only fail gracefully if the user requested to destroy the + * object or when a retry may be called upon an error. + * In the rest of the cases, just remove whatever you can. + */ + if (ib_is_destroy_retryable(ret, why, uobj)) return ret; - if (why == RDMA_REMOVE_DURING_CLEANUP) { - alloc_abort_fd_uobject(uobj); - return ret; - } + if (why == RDMA_REMOVE_ABORT) + return 0; - uobj_file->uobj.context = NULL; - return ret; + ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, + RDMACG_RESOURCE_HCA_OBJECT); + + return 0; } -static void assert_uverbs_usecnt(struct ib_uobject *uobj, bool exclusive) +static void remove_handle_idr_uobject(struct ib_uobject *uobj) { -#ifdef CONFIG_LOCKDEP - if (exclusive) - WARN_ON(atomic_read(&uobj->usecnt) != -1); - else - WARN_ON(atomic_read(&uobj->usecnt) <= 0); -#endif + spin_lock(&uobj->ufile->idr_lock); + idr_remove(&uobj->ufile->idr, uobj->id); + spin_unlock(&uobj->ufile->idr_lock); + /* Matches the kref in alloc_commit_idr_uobject */ + uverbs_uobject_put(uobj); } -static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static void alloc_abort_fd_uobject(struct ib_uobject *uobj) { - int ret; - struct ib_ucontext *ucontext = uobj->context; - - ret = uobj->type->type_class->remove_commit(uobj, why); - if (ret && why == RDMA_REMOVE_DESTROY) { - /* We couldn't remove the object, so just unlock the uobject */ - atomic_set(&uobj->usecnt, 0); - uobj->type->type_class->lookup_put(uobj, true); - } else { - mutex_lock(&ucontext->uobjects_lock); - list_del(&uobj->list); - mutex_unlock(&ucontext->uobjects_lock); - /* put the ref we took when we created the object */ - uverbs_uobject_put(uobj); - } - - return ret; + put_unused_fd(uobj->id); } -/* This is called only for user requested DESTROY reasons */ -int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj) +static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj, + enum rdma_remove_reason why) { - int ret; - struct ib_ucontext *ucontext = uobj->context; - - /* put the ref count we took at lookup_get */ - uverbs_uobject_put(uobj); - /* Cleanup is running. Calling this should have been impossible */ - if (!down_read_trylock(&ucontext->cleanup_rwsem)) { - WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n"); - return 0; - } - assert_uverbs_usecnt(uobj, true); - ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY); + const struct uverbs_obj_fd_type *fd_type = container_of( + uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); + int ret = fd_type->context_closed(uobj, why); - up_read(&ucontext->cleanup_rwsem); - return ret; -} + if (ib_is_destroy_retryable(ret, why, uobj)) + return ret; -static int null_obj_type_class_remove_commit(struct ib_uobject *uobj, - enum rdma_remove_reason why) -{ return 0; } -static const struct uverbs_obj_type null_obj_type = { - .type_class = &((const struct uverbs_obj_type_class){ - .remove_commit = null_obj_type_class_remove_commit, - /* be cautious */ - .needs_kfree_rcu = true}), -}; - -int rdma_explicit_destroy(struct ib_uobject *uobject) +static void remove_handle_fd_uobject(struct ib_uobject *uobj) { - int ret; - struct ib_ucontext *ucontext = uobject->context; - - /* Cleanup is running. Calling this should have been impossible */ - if (!down_read_trylock(&ucontext->cleanup_rwsem)) { - WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n"); - return 0; - } - assert_uverbs_usecnt(uobject, true); - ret = uobject->type->type_class->remove_commit(uobject, - RDMA_REMOVE_DESTROY); - if (ret) - goto out; - - uobject->type = &null_obj_type; - -out: - up_read(&ucontext->cleanup_rwsem); - return ret; } -static void alloc_commit_idr_uobject(struct ib_uobject *uobj) +static int alloc_commit_idr_uobject(struct ib_uobject *uobj) { - spin_lock(&uobj->context->ufile->idr_lock); + struct ib_uverbs_file *ufile = uobj->ufile; + + spin_lock(&ufile->idr_lock); /* * We already allocated this IDR with a NULL object, so * this shouldn't fail. + * + * NOTE: Once we set the IDR we loose ownership of our kref on uobj. + * It will be put by remove_commit_idr_uobject() */ - WARN_ON(idr_replace(&uobj->context->ufile->idr, - uobj, uobj->id)); - spin_unlock(&uobj->context->ufile->idr_lock); + WARN_ON(idr_replace(&ufile->idr, uobj, uobj->id)); + spin_unlock(&ufile->idr_lock); + + return 0; } -static void alloc_commit_fd_uobject(struct ib_uobject *uobj) +static int alloc_commit_fd_uobject(struct ib_uobject *uobj) { - struct ib_uobject_file *uobj_file = - container_of(uobj, struct ib_uobject_file, uobj); + const struct uverbs_obj_fd_type *fd_type = container_of( + uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); + int fd = uobj->id; + struct file *filp; + + /* + * The kref for uobj is moved into filp->private data and put in + * uverbs_close_fd(). Once alloc_commit() succeeds uverbs_close_fd() + * must be guaranteed to be called from the provided fops release + * callback. + */ + filp = anon_inode_getfile(fd_type->name, + fd_type->fops, + uobj, + fd_type->flags); + if (IS_ERR(filp)) + return PTR_ERR(filp); + + uobj->object = filp; + + /* Matching put will be done in uverbs_close_fd() */ + kref_get(&uobj->ufile->ref); - fd_install(uobj_file->uobj.id, uobj->object); /* This shouldn't be used anymore. Use the file object instead */ - uobj_file->uobj.id = 0; - /* Get another reference as we export this to the fops */ - uverbs_uobject_get(&uobj_file->uobj); + uobj->id = 0; + + /* + * NOTE: Once we install the file we loose ownership of our kref on + * uobj. It will be put by uverbs_close_fd() + */ + fd_install(fd, filp); + + return 0; } -int rdma_alloc_commit_uobject(struct ib_uobject *uobj) +/* + * In all cases rdma_alloc_commit_uobject() consumes the kref to uobj and the + * caller can no longer assume uobj is valid. If this function fails it + * destroys the uboject, including the attached HW object. + */ +int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj) { - /* Cleanup is running. Calling this should have been impossible */ - if (!down_read_trylock(&uobj->context->cleanup_rwsem)) { - int ret; + struct ib_uverbs_file *ufile = uobj->ufile; + int ret; - WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n"); - ret = uobj->type->type_class->remove_commit(uobj, - RDMA_REMOVE_DURING_CLEANUP); - if (ret) - pr_warn("ib_uverbs: cleanup of idr object %d failed\n", - uobj->id); + /* alloc_commit consumes the uobj kref */ + ret = uobj->uapi_object->type_class->alloc_commit(uobj); + if (ret) { + uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT); + up_read(&ufile->hw_destroy_rwsem); return ret; } + /* kref is held so long as the uobj is on the uobj list. */ + uverbs_uobject_get(uobj); + spin_lock_irq(&ufile->uobjects_lock); + list_add(&uobj->list, &ufile->uobjects); + spin_unlock_irq(&ufile->uobjects_lock); + /* matches atomic_set(-1) in alloc_uobj */ - assert_uverbs_usecnt(uobj, true); atomic_set(&uobj->usecnt, 0); - mutex_lock(&uobj->context->uobjects_lock); - list_add(&uobj->list, &uobj->context->uobjects); - mutex_unlock(&uobj->context->uobjects_lock); - - uobj->type->type_class->alloc_commit(uobj); - up_read(&uobj->context->cleanup_rwsem); + /* Matches the down_read in rdma_alloc_begin_uobject */ + up_read(&ufile->hw_destroy_rwsem); return 0; } -static void alloc_abort_idr_uobject(struct ib_uobject *uobj) -{ - uverbs_idr_remove_uobj(uobj); - ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, - RDMACG_RESOURCE_HCA_OBJECT); - uverbs_uobject_put(uobj); -} - +/* + * This consumes the kref for uobj. It is up to the caller to unwind the HW + * object and anything else connected to uobj before calling this. + */ void rdma_alloc_abort_uobject(struct ib_uobject *uobj) { - uobj->type->type_class->alloc_abort(uobj); + struct ib_uverbs_file *ufile = uobj->ufile; + + uobj->object = NULL; + uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT); + + /* Matches the down_read in rdma_alloc_begin_uobject */ + up_read(&ufile->hw_destroy_rwsem); } -static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool exclusive) +static void lookup_put_idr_uobject(struct ib_uobject *uobj, + enum rdma_lookup_mode mode) { } -static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive) +static void lookup_put_fd_uobject(struct ib_uobject *uobj, + enum rdma_lookup_mode mode) { struct file *filp = uobj->object; - WARN_ON(exclusive); + WARN_ON(mode != UVERBS_LOOKUP_READ); /* This indirectly calls uverbs_close_fd and free the object */ fput(filp); } -void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive) +void rdma_lookup_put_uobject(struct ib_uobject *uobj, + enum rdma_lookup_mode mode) { - assert_uverbs_usecnt(uobj, exclusive); - uobj->type->type_class->lookup_put(uobj, exclusive); + assert_uverbs_usecnt(uobj, mode); + uobj->uapi_object->type_class->lookup_put(uobj, mode); /* * In order to unlock an object, either decrease its usecnt for * read access or zero it in case of exclusive access. See * uverbs_try_lock_object for locking schema information. */ - if (!exclusive) + switch (mode) { + case UVERBS_LOOKUP_READ: atomic_dec(&uobj->usecnt); - else + break; + case UVERBS_LOOKUP_WRITE: atomic_set(&uobj->usecnt, 0); + break; + case UVERBS_LOOKUP_DESTROY: + break; + } + /* Pairs with the kref obtained by type->lookup_get */ uverbs_uobject_put(uobj); } +void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile) +{ + spin_lock_init(&ufile->idr_lock); + idr_init(&ufile->idr); +} + +void release_ufile_idr_uobject(struct ib_uverbs_file *ufile) +{ + struct ib_uobject *entry; + int id; + + /* + * At this point uverbs_cleanup_ufile() is guaranteed to have run, and + * there are no HW objects left, however the IDR is still populated + * with anything that has not been cleaned up by userspace. Since the + * kref on ufile is 0, nothing is allowed to call lookup_get. + * + * This is an optimized equivalent to remove_handle_idr_uobject + */ + idr_for_each_entry(&ufile->idr, entry, id) { + WARN_ON(entry->object); + uverbs_uobject_put(entry); + } + + idr_destroy(&ufile->idr); +} + const struct uverbs_obj_type_class uverbs_idr_class = { .alloc_begin = alloc_begin_idr_uobject, .lookup_get = lookup_get_idr_uobject, .alloc_commit = alloc_commit_idr_uobject, .alloc_abort = alloc_abort_idr_uobject, .lookup_put = lookup_put_idr_uobject, - .remove_commit = remove_commit_idr_uobject, + .destroy_hw = destroy_hw_idr_uobject, + .remove_handle = remove_handle_idr_uobject, /* * When we destroy an object, we first just lock it for WRITE and * actually DESTROY it in the finalize stage. So, the problematic @@ -611,103 +768,180 @@ const struct uverbs_obj_type_class uverbs_idr_class = { */ .needs_kfree_rcu = true, }; +EXPORT_SYMBOL(uverbs_idr_class); -static void _uverbs_close_fd(struct ib_uobject_file *uobj_file) +void uverbs_close_fd(struct file *f) { - struct ib_ucontext *ucontext; - struct ib_uverbs_file *ufile = uobj_file->ufile; - int ret; + struct ib_uobject *uobj = f->private_data; + struct ib_uverbs_file *ufile = uobj->ufile; - mutex_lock(&uobj_file->ufile->cleanup_mutex); + if (down_read_trylock(&ufile->hw_destroy_rwsem)) { + /* + * lookup_get_fd_uobject holds the kref on the struct file any + * time a FD uobj is locked, which prevents this release + * method from being invoked. Meaning we can always get the + * write lock here, or we have a kernel bug. + */ + WARN_ON(uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE)); + uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE); + up_read(&ufile->hw_destroy_rwsem); + } - /* uobject was either already cleaned up or is cleaned up right now anyway */ - if (!uobj_file->uobj.context || - !down_read_trylock(&uobj_file->uobj.context->cleanup_rwsem)) - goto unlock; + /* Matches the get in alloc_begin_fd_uobject */ + kref_put(&ufile->ref, ib_uverbs_release_file); - ucontext = uobj_file->uobj.context; - ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE); - up_read(&ucontext->cleanup_rwsem); - if (ret) - pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n"); -unlock: - mutex_unlock(&ufile->cleanup_mutex); + /* Pairs with filp->private_data in alloc_begin_fd_uobject */ + uverbs_uobject_put(uobj); } -void uverbs_close_fd(struct file *f) -{ - struct ib_uobject_file *uobj_file = f->private_data; - struct kref *uverbs_file_ref = &uobj_file->ufile->ref; +static void ufile_disassociate_ucontext(struct ib_ucontext *ibcontext) +{ + struct ib_device *ib_dev = ibcontext->device; + struct task_struct *owning_process = NULL; + struct mm_struct *owning_mm = NULL; + + owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); + if (!owning_process) + return; + + owning_mm = get_task_mm(owning_process); + if (!owning_mm) { + pr_info("no mm, disassociate ucontext is pending task termination\n"); + while (1) { + put_task_struct(owning_process); + usleep_range(1000, 2000); + owning_process = get_pid_task(ibcontext->tgid, + PIDTYPE_PID); + if (!owning_process || + owning_process->state == TASK_DEAD) { + pr_info("disassociate ucontext done, task was terminated\n"); + /* in case task was dead need to release the + * task struct. + */ + if (owning_process) + put_task_struct(owning_process); + return; + } + } + } - _uverbs_close_fd(uobj_file); - uverbs_uobject_put(&uobj_file->uobj); - kref_put(uverbs_file_ref, ib_uverbs_release_file); + down_write(&owning_mm->mmap_sem); + ib_dev->disassociate_ucontext(ibcontext); + up_write(&owning_mm->mmap_sem); + mmput(owning_mm); + put_task_struct(owning_process); } -void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed) +/* + * Drop the ucontext off the ufile and completely disconnect it from the + * ib_device + */ +static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason) { - enum rdma_remove_reason reason = device_removed ? - RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE; - unsigned int cur_order = 0; + struct ib_ucontext *ucontext = ufile->ucontext; + int ret; + + if (reason == RDMA_REMOVE_DRIVER_REMOVE) + ufile_disassociate_ucontext(ucontext); + + put_pid(ucontext->tgid); + ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device, + RDMACG_RESOURCE_HCA_HANDLE); - ucontext->cleanup_reason = reason; /* - * Waits for all remove_commit and alloc_commit to finish. Logically, We - * want to hold this forever as the context is going to be destroyed, - * but we'll release it since it causes a "held lock freed" BUG message. + * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove + * the error return. */ - down_write(&ucontext->cleanup_rwsem); + ret = ucontext->device->dealloc_ucontext(ucontext); + WARN_ON(ret); - while (!list_empty(&ucontext->uobjects)) { - struct ib_uobject *obj, *next_obj; - unsigned int next_order = UINT_MAX; + ufile->ucontext = NULL; +} + +static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason) +{ + struct ib_uobject *obj, *next_obj; + int ret = -EINVAL; + /* + * This shouldn't run while executing other commands on this + * context. Thus, the only thing we should take care of is + * releasing a FD while traversing this list. The FD could be + * closed and released from the _release fop of this FD. + * In order to mitigate this, we add a lock. + * We take and release the lock per traversal in order to let + * other threads (which might still use the FDs) chance to run. + */ + list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) { /* - * This shouldn't run while executing other commands on this - * context. Thus, the only thing we should take care of is - * releasing a FD while traversing this list. The FD could be - * closed and released from the _release fop of this FD. - * In order to mitigate this, we add a lock. - * We take and release the lock per order traversal in order - * to let other threads (which might still use the FDs) chance - * to run. + * if we hit this WARN_ON, that means we are + * racing with a lookup_get. */ - mutex_lock(&ucontext->uobjects_lock); - list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects, - list) { - if (obj->type->destroy_order == cur_order) { - int ret; - - /* - * if we hit this WARN_ON, that means we are - * racing with a lookup_get. - */ - WARN_ON(uverbs_try_lock_object(obj, true)); - ret = obj->type->type_class->remove_commit(obj, - reason); - list_del(&obj->list); - if (ret) - pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n", - obj->id, cur_order); - /* put the ref we took when we created the object */ - uverbs_uobject_put(obj); - } else { - next_order = min(next_order, - obj->type->destroy_order); - } - } - mutex_unlock(&ucontext->uobjects_lock); - cur_order = next_order; + WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE)); + if (!uverbs_destroy_uobject(obj, reason)) + ret = 0; } - up_write(&ucontext->cleanup_rwsem); + return ret; } -void uverbs_initialize_ucontext(struct ib_ucontext *ucontext) +/* + * Destroy the uncontext and every uobject associated with it. If called with + * reason != RDMA_REMOVE_CLOSE this will not return until the destruction has + * been completed and ufile->ucontext is NULL. + * + * This is internally locked and can be called in parallel from multiple + * contexts. + */ +void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason) { - ucontext->cleanup_reason = 0; - mutex_init(&ucontext->uobjects_lock); - INIT_LIST_HEAD(&ucontext->uobjects); - init_rwsem(&ucontext->cleanup_rwsem); + if (reason == RDMA_REMOVE_CLOSE) { + /* + * During destruction we might trigger something that + * synchronously calls release on any file descriptor. For + * this reason all paths that come from file_operations + * release must use try_lock. They can progress knowing that + * there is an ongoing uverbs_destroy_ufile_hw that will clean + * up the driver resources. + */ + if (!mutex_trylock(&ufile->ucontext_lock)) + return; + + } else { + mutex_lock(&ufile->ucontext_lock); + } + + down_write(&ufile->hw_destroy_rwsem); + + /* + * If a ucontext was never created then we can't have any uobjects to + * cleanup, nothing to do. + */ + if (!ufile->ucontext) + goto done; + + ufile->ucontext->closing = true; + ufile->ucontext->cleanup_retryable = true; + while (!list_empty(&ufile->uobjects)) + if (__uverbs_cleanup_ufile(ufile, reason)) { + /* + * No entry was cleaned-up successfully during this + * iteration + */ + break; + } + + ufile->ucontext->cleanup_retryable = false; + if (!list_empty(&ufile->uobjects)) + __uverbs_cleanup_ufile(ufile, reason); + + ufile_destroy_ucontext(ufile, reason); + +done: + up_write(&ufile->hw_destroy_rwsem); + mutex_unlock(&ufile->ucontext_lock); } const struct uverbs_obj_type_class uverbs_fd_class = { @@ -716,23 +950,33 @@ const struct uverbs_obj_type_class uverbs_fd_class = { .alloc_commit = alloc_commit_fd_uobject, .alloc_abort = alloc_abort_fd_uobject, .lookup_put = lookup_put_fd_uobject, - .remove_commit = remove_commit_fd_uobject, + .destroy_hw = destroy_hw_fd_uobject, + .remove_handle = remove_handle_fd_uobject, .needs_kfree_rcu = false, }; +EXPORT_SYMBOL(uverbs_fd_class); -struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs, - struct ib_ucontext *ucontext, - enum uverbs_obj_access access, - int id) +struct ib_uobject * +uverbs_get_uobject_from_file(u16 object_id, + struct ib_uverbs_file *ufile, + enum uverbs_obj_access access, s64 id) { + const struct uverbs_api_object *obj = + uapi_get_object(ufile->device->uapi, object_id); + switch (access) { case UVERBS_ACCESS_READ: - return rdma_lookup_get_uobject(type_attrs, ucontext, id, false); + return rdma_lookup_get_uobject(obj, ufile, id, + UVERBS_LOOKUP_READ); case UVERBS_ACCESS_DESTROY: + /* Actual destruction is done inside uverbs_handle_method */ + return rdma_lookup_get_uobject(obj, ufile, id, + UVERBS_LOOKUP_DESTROY); case UVERBS_ACCESS_WRITE: - return rdma_lookup_get_uobject(type_attrs, ucontext, id, true); + return rdma_lookup_get_uobject(obj, ufile, id, + UVERBS_LOOKUP_WRITE); case UVERBS_ACCESS_NEW: - return rdma_alloc_begin_uobject(type_attrs, ucontext); + return rdma_alloc_begin_uobject(obj, ufile); default: WARN_ON(true); return ERR_PTR(-EOPNOTSUPP); @@ -753,16 +997,14 @@ int uverbs_finalize_object(struct ib_uobject *uobj, switch (access) { case UVERBS_ACCESS_READ: - rdma_lookup_put_uobject(uobj, false); + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ); break; case UVERBS_ACCESS_WRITE: - rdma_lookup_put_uobject(uobj, true); + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); break; case UVERBS_ACCESS_DESTROY: - if (commit) - ret = rdma_remove_commit_uobject(uobj); - else - rdma_lookup_put_uobject(uobj, true); + if (uobj) + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY); break; case UVERBS_ACCESS_NEW: if (commit) @@ -777,43 +1019,3 @@ int uverbs_finalize_object(struct ib_uobject *uobj, return ret; } - -int uverbs_finalize_objects(struct uverbs_attr_bundle *attrs_bundle, - struct uverbs_attr_spec_hash * const *spec_hash, - size_t num, - bool commit) -{ - unsigned int i; - int ret = 0; - - for (i = 0; i < num; i++) { - struct uverbs_attr_bundle_hash *curr_bundle = - &attrs_bundle->hash[i]; - const struct uverbs_attr_spec_hash *curr_spec_bucket = - spec_hash[i]; - unsigned int j; - - for (j = 0; j < curr_bundle->num_attrs; j++) { - struct uverbs_attr *attr; - const struct uverbs_attr_spec *spec; - - if (!uverbs_attr_is_valid_in_hash(curr_bundle, j)) - continue; - - attr = &curr_bundle->attrs[j]; - spec = &curr_spec_bucket->attrs[j]; - - if (spec->type == UVERBS_ATTR_TYPE_IDR || - spec->type == UVERBS_ATTR_TYPE_FD) { - int current_ret; - - current_ret = uverbs_finalize_object(attr->obj_attr.uobject, - spec->obj.access, - commit); - if (!ret) - ret = current_ret; - } - } - } - return ret; -} diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 1efcf93238dd..f962f2a593ba 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -43,20 +43,12 @@ #include <rdma/ib_verbs.h> #include <linux/mutex.h> -int uverbs_ns_idx(u16 *id, unsigned int ns_count); -const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev, - uint16_t object); -const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object, - uint16_t method); -/* - * These functions initialize the context and cleanups its uobjects. - * The context has a list of objects which is protected by a mutex - * on the context. initialize_ucontext should be called when we create - * a context. - * cleanup_ucontext removes all uobjects from the context and puts them. - */ -void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed); -void uverbs_initialize_ucontext(struct ib_ucontext *ucontext); +struct ib_uverbs_device; + +void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason); + +int uobj_destroy(struct ib_uobject *uobj); /* * uverbs_uobject_get is called in order to increase the reference count on @@ -82,7 +74,7 @@ void uverbs_uobject_put(struct ib_uobject *uobject); void uverbs_close_fd(struct file *f); /* - * Get an ib_uobject that corresponds to the given id from ucontext, assuming + * Get an ib_uobject that corresponds to the given id from ufile, assuming * the object is from the given type. Lock it to the required access when * applicable. * This function could create (access == NEW), destroy (access == DESTROY) @@ -90,13 +82,11 @@ void uverbs_close_fd(struct file *f); * The action will be finalized only when uverbs_finalize_object or * uverbs_finalize_objects are called. */ -struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs, - struct ib_ucontext *ucontext, - enum uverbs_obj_access access, - int id); -int uverbs_finalize_object(struct ib_uobject *uobj, - enum uverbs_obj_access access, - bool commit); +struct ib_uobject * +uverbs_get_uobject_from_file(u16 object_id, + struct ib_uverbs_file *ufile, + enum uverbs_obj_access access, s64 id); + /* * Note that certain finalize stages could return a status: * (a) alloc_commit could return a failure if the object is committed at the @@ -112,9 +102,63 @@ int uverbs_finalize_object(struct ib_uobject *uobj, * function. For example, this could happen when we couldn't destroy an * object. */ -int uverbs_finalize_objects(struct uverbs_attr_bundle *attrs_bundle, - struct uverbs_attr_spec_hash * const *spec_hash, - size_t num, - bool commit); +int uverbs_finalize_object(struct ib_uobject *uobj, + enum uverbs_obj_access access, + bool commit); + +void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile); +void release_ufile_idr_uobject(struct ib_uverbs_file *ufile); + +/* + * This is the runtime description of the uverbs API, used by the syscall + * machinery to validate and dispatch calls. + */ + +/* + * Depending on ID the slot pointer in the radix tree points at one of these + * structs. + */ +struct uverbs_api_object { + const struct uverbs_obj_type *type_attrs; + const struct uverbs_obj_type_class *type_class; +}; + +struct uverbs_api_ioctl_method { + int (__rcu *handler)(struct ib_uverbs_file *ufile, + struct uverbs_attr_bundle *ctx); + DECLARE_BITMAP(attr_mandatory, UVERBS_API_ATTR_BKEY_LEN); + u16 bundle_size; + u8 use_stack:1; + u8 driver_method:1; + u8 key_bitmap_len; + u8 destroy_bkey; +}; + +struct uverbs_api_attr { + struct uverbs_attr_spec spec; +}; + +struct uverbs_api_object; +struct uverbs_api { + /* radix tree contains struct uverbs_api_* pointers */ + struct radix_tree_root radix; + enum rdma_driver_id driver_id; +}; + +static inline const struct uverbs_api_object * +uapi_get_object(struct uverbs_api *uapi, u16 object_id) +{ + return radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id)); +} + +char *uapi_key_format(char *S, unsigned int key); +struct uverbs_api *uverbs_alloc_api( + const struct uverbs_object_tree_def *const *driver_specs, + enum rdma_driver_id driver_id); +void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev); +void uverbs_disassociate_api(struct uverbs_api *uapi); +void uverbs_destroy_api(struct uverbs_api *uapi); +void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm, + unsigned int num_attrs); #endif /* RDMA_CORE_H */ diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index a4fbdc5d28fa..ee366199b169 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -143,14 +143,15 @@ static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_de #define REQUIRED_BOND_STATES (BONDING_SLAVE_STATE_ACTIVE | \ BONDING_SLAVE_STATE_NA) -static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port, - struct net_device *rdma_ndev, void *cookie) +static bool +is_eth_port_of_netdev_filter(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) { struct net_device *real_dev; - int res; + bool res; if (!rdma_ndev) - return 0; + return false; rcu_read_lock(); real_dev = rdma_vlan_dev_real_dev(cookie); @@ -166,14 +167,15 @@ static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port, return res; } -static int is_eth_port_inactive_slave(struct ib_device *ib_dev, u8 port, - struct net_device *rdma_ndev, void *cookie) +static bool +is_eth_port_inactive_slave_filter(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) { struct net_device *master_dev; - int res; + bool res; if (!rdma_ndev) - return 0; + return false; rcu_read_lock(); master_dev = netdev_master_upper_dev_get_rcu(rdma_ndev); @@ -184,22 +186,59 @@ static int is_eth_port_inactive_slave(struct ib_device *ib_dev, u8 port, return res; } -static int pass_all_filter(struct ib_device *ib_dev, u8 port, - struct net_device *rdma_ndev, void *cookie) +/** is_ndev_for_default_gid_filter - Check if a given netdevice + * can be considered for default GIDs or not. + * @ib_dev: IB device to check + * @port: Port to consider for adding default GID + * @rdma_ndev: rdma netdevice pointer + * @cookie_ndev: Netdevice to consider to form a default GID + * + * is_ndev_for_default_gid_filter() returns true if a given netdevice can be + * considered for deriving default RoCE GID, returns false otherwise. + */ +static bool +is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) +{ + struct net_device *cookie_ndev = cookie; + bool res; + + if (!rdma_ndev) + return false; + + rcu_read_lock(); + + /* + * When rdma netdevice is used in bonding, bonding master netdevice + * should be considered for default GIDs. Therefore, ignore slave rdma + * netdevices when bonding is considered. + * Additionally when event(cookie) netdevice is bond master device, + * make sure that it the upper netdevice of rdma netdevice. + */ + res = ((cookie_ndev == rdma_ndev && !netif_is_bond_slave(rdma_ndev)) || + (netif_is_bond_master(cookie_ndev) && + rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev))); + + rcu_read_unlock(); + return res; +} + +static bool pass_all_filter(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) { - return 1; + return true; } -static int upper_device_filter(struct ib_device *ib_dev, u8 port, - struct net_device *rdma_ndev, void *cookie) +static bool upper_device_filter(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) { - int res; + bool res; if (!rdma_ndev) - return 0; + return false; if (rdma_ndev == cookie) - return 1; + return true; rcu_read_lock(); res = rdma_is_upper_dev_rcu(rdma_ndev, cookie); @@ -208,6 +247,34 @@ static int upper_device_filter(struct ib_device *ib_dev, u8 port, return res; } +/** + * is_upper_ndev_bond_master_filter - Check if a given netdevice + * is bond master device of netdevice of the the RDMA device of port. + * @ib_dev: IB device to check + * @port: Port to consider for adding default GID + * @rdma_ndev: Pointer to rdma netdevice + * @cookie: Netdevice to consider to form a default GID + * + * is_upper_ndev_bond_master_filter() returns true if a cookie_netdev + * is bond master device and rdma_ndev is its lower netdevice. It might + * not have been established as slave device yet. + */ +static bool +is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, + void *cookie) +{ + struct net_device *cookie_ndev = cookie; + bool match = false; + + rcu_read_lock(); + if (netif_is_bond_master(cookie_ndev) && + rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev)) + match = true; + rcu_read_unlock(); + return match; +} + static void update_gid_ip(enum gid_op_type gid_op, struct ib_device *ib_dev, u8 port, struct net_device *ndev, @@ -223,34 +290,10 @@ static void update_gid_ip(enum gid_op_type gid_op, update_gid(gid_op, ib_dev, port, &gid, &gid_attr); } -static void enum_netdev_default_gids(struct ib_device *ib_dev, - u8 port, struct net_device *event_ndev, - struct net_device *rdma_ndev) -{ - unsigned long gid_type_mask; - - rcu_read_lock(); - if (!rdma_ndev || - ((rdma_ndev != event_ndev && - !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) || - is_eth_active_slave_of_bonding_rcu(rdma_ndev, - netdev_master_upper_dev_get_rcu(rdma_ndev)) == - BONDING_SLAVE_STATE_INACTIVE)) { - rcu_read_unlock(); - return; - } - rcu_read_unlock(); - - gid_type_mask = roce_gid_type_mask_support(ib_dev, port); - - ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, gid_type_mask, - IB_CACHE_GID_DEFAULT_MODE_SET); -} - static void bond_delete_netdev_default_gids(struct ib_device *ib_dev, u8 port, - struct net_device *event_ndev, - struct net_device *rdma_ndev) + struct net_device *rdma_ndev, + struct net_device *event_ndev) { struct net_device *real_dev = rdma_vlan_dev_real_dev(event_ndev); unsigned long gid_type_mask; @@ -381,7 +424,6 @@ static void _add_netdev_ips(struct ib_device *ib_dev, u8 port, static void add_netdev_ips(struct ib_device *ib_dev, u8 port, struct net_device *rdma_ndev, void *cookie) { - enum_netdev_default_gids(ib_dev, port, cookie, rdma_ndev); _add_netdev_ips(ib_dev, port, cookie); } @@ -391,6 +433,38 @@ static void del_netdev_ips(struct ib_device *ib_dev, u8 port, ib_cache_gid_del_all_netdev_gids(ib_dev, port, cookie); } +/** + * del_default_gids - Delete default GIDs of the event/cookie netdevice + * @ib_dev: RDMA device pointer + * @port: Port of the RDMA device whose GID table to consider + * @rdma_ndev: Unused rdma netdevice + * @cookie: Pointer to event netdevice + * + * del_default_gids() deletes the default GIDs of the event/cookie netdevice. + */ +static void del_default_gids(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) +{ + struct net_device *cookie_ndev = cookie; + unsigned long gid_type_mask; + + gid_type_mask = roce_gid_type_mask_support(ib_dev, port); + + ib_cache_gid_set_default_gid(ib_dev, port, cookie_ndev, gid_type_mask, + IB_CACHE_GID_DEFAULT_MODE_DELETE); +} + +static void add_default_gids(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) +{ + struct net_device *event_ndev = cookie; + unsigned long gid_type_mask; + + gid_type_mask = roce_gid_type_mask_support(ib_dev, port); + ib_cache_gid_set_default_gid(ib_dev, port, event_ndev, gid_type_mask, + IB_CACHE_GID_DEFAULT_MODE_SET); +} + static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev, u8 port, struct net_device *rdma_ndev, @@ -405,9 +479,20 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev, rtnl_lock(); down_read(&net_rwsem); for_each_net(net) - for_each_netdev(net, ndev) - if (is_eth_port_of_netdev(ib_dev, port, rdma_ndev, ndev)) - add_netdev_ips(ib_dev, port, rdma_ndev, ndev); + for_each_netdev(net, ndev) { + /* + * Filter and add default GIDs of the primary netdevice + * when not in bonding mode, or add default GIDs + * of bond master device, when in bonding mode. + */ + if (is_ndev_for_default_gid_filter(ib_dev, port, + rdma_ndev, ndev)) + add_default_gids(ib_dev, port, rdma_ndev, ndev); + + if (is_eth_port_of_netdev_filter(ib_dev, port, + rdma_ndev, ndev)) + _add_netdev_ips(ib_dev, port, ndev); + } up_read(&net_rwsem); rtnl_unlock(); } @@ -513,18 +598,12 @@ static void del_netdev_default_ips_join(struct ib_device *ib_dev, u8 port, rcu_read_unlock(); if (master_ndev) { - bond_delete_netdev_default_gids(ib_dev, port, master_ndev, - rdma_ndev); + bond_delete_netdev_default_gids(ib_dev, port, rdma_ndev, + master_ndev); dev_put(master_ndev); } } -static void del_netdev_default_ips(struct ib_device *ib_dev, u8 port, - struct net_device *rdma_ndev, void *cookie) -{ - bond_delete_netdev_default_gids(ib_dev, port, cookie, rdma_ndev); -} - /* The following functions operate on all IB devices. netdevice_event and * addr_event execute ib_enum_all_roce_netdevs through a work. * ib_enum_all_roce_netdevs iterates through all IB devices. @@ -575,40 +654,94 @@ static int netdevice_queue_work(struct netdev_event_work_cmd *cmds, } static const struct netdev_event_work_cmd add_cmd = { - .cb = add_netdev_ips, .filter = is_eth_port_of_netdev}; + .cb = add_netdev_ips, + .filter = is_eth_port_of_netdev_filter +}; + static const struct netdev_event_work_cmd add_cmd_upper_ips = { - .cb = add_netdev_upper_ips, .filter = is_eth_port_of_netdev}; + .cb = add_netdev_upper_ips, + .filter = is_eth_port_of_netdev_filter +}; -static void netdevice_event_changeupper(struct netdev_notifier_changeupper_info *changeupper_info, - struct netdev_event_work_cmd *cmds) +static void +ndev_event_unlink(struct netdev_notifier_changeupper_info *changeupper_info, + struct netdev_event_work_cmd *cmds) { - static const struct netdev_event_work_cmd upper_ips_del_cmd = { - .cb = del_netdev_upper_ips, .filter = upper_device_filter}; - static const struct netdev_event_work_cmd bonding_default_del_cmd = { - .cb = del_netdev_default_ips, .filter = is_eth_port_inactive_slave}; - - if (changeupper_info->linking == false) { - cmds[0] = upper_ips_del_cmd; - cmds[0].ndev = changeupper_info->upper_dev; - cmds[1] = add_cmd; - } else { - cmds[0] = bonding_default_del_cmd; - cmds[0].ndev = changeupper_info->upper_dev; - cmds[1] = add_cmd_upper_ips; - cmds[1].ndev = changeupper_info->upper_dev; - cmds[1].filter_ndev = changeupper_info->upper_dev; - } + static const struct netdev_event_work_cmd + upper_ips_del_cmd = { + .cb = del_netdev_upper_ips, + .filter = upper_device_filter + }; + + cmds[0] = upper_ips_del_cmd; + cmds[0].ndev = changeupper_info->upper_dev; + cmds[1] = add_cmd; } +static const struct netdev_event_work_cmd bonding_default_add_cmd = { + .cb = add_default_gids, + .filter = is_upper_ndev_bond_master_filter +}; + +static void +ndev_event_link(struct net_device *event_ndev, + struct netdev_notifier_changeupper_info *changeupper_info, + struct netdev_event_work_cmd *cmds) +{ + static const struct netdev_event_work_cmd + bonding_default_del_cmd = { + .cb = del_default_gids, + .filter = is_upper_ndev_bond_master_filter + }; + /* + * When a lower netdev is linked to its upper bonding + * netdev, delete lower slave netdev's default GIDs. + */ + cmds[0] = bonding_default_del_cmd; + cmds[0].ndev = event_ndev; + cmds[0].filter_ndev = changeupper_info->upper_dev; + + /* Now add bonding upper device default GIDs */ + cmds[1] = bonding_default_add_cmd; + cmds[1].ndev = changeupper_info->upper_dev; + cmds[1].filter_ndev = changeupper_info->upper_dev; + + /* Now add bonding upper device IP based GIDs */ + cmds[2] = add_cmd_upper_ips; + cmds[2].ndev = changeupper_info->upper_dev; + cmds[2].filter_ndev = changeupper_info->upper_dev; +} + +static void netdevice_event_changeupper(struct net_device *event_ndev, + struct netdev_notifier_changeupper_info *changeupper_info, + struct netdev_event_work_cmd *cmds) +{ + if (changeupper_info->linking) + ndev_event_link(event_ndev, changeupper_info, cmds); + else + ndev_event_unlink(changeupper_info, cmds); +} + +static const struct netdev_event_work_cmd add_default_gid_cmd = { + .cb = add_default_gids, + .filter = is_ndev_for_default_gid_filter, +}; + static int netdevice_event(struct notifier_block *this, unsigned long event, void *ptr) { static const struct netdev_event_work_cmd del_cmd = { .cb = del_netdev_ips, .filter = pass_all_filter}; - static const struct netdev_event_work_cmd bonding_default_del_cmd_join = { - .cb = del_netdev_default_ips_join, .filter = is_eth_port_inactive_slave}; - static const struct netdev_event_work_cmd default_del_cmd = { - .cb = del_netdev_default_ips, .filter = pass_all_filter}; + static const struct netdev_event_work_cmd + bonding_default_del_cmd_join = { + .cb = del_netdev_default_ips_join, + .filter = is_eth_port_inactive_slave_filter + }; + static const struct netdev_event_work_cmd + netdev_del_cmd = { + .cb = del_netdev_ips, + .filter = is_eth_port_of_netdev_filter + }; static const struct netdev_event_work_cmd bonding_event_ips_del_cmd = { .cb = del_netdev_upper_ips, .filter = upper_device_filter}; struct net_device *ndev = netdev_notifier_info_to_dev(ptr); @@ -621,7 +754,8 @@ static int netdevice_event(struct notifier_block *this, unsigned long event, case NETDEV_REGISTER: case NETDEV_UP: cmds[0] = bonding_default_del_cmd_join; - cmds[1] = add_cmd; + cmds[1] = add_default_gid_cmd; + cmds[2] = add_cmd; break; case NETDEV_UNREGISTER: @@ -632,19 +766,22 @@ static int netdevice_event(struct notifier_block *this, unsigned long event, break; case NETDEV_CHANGEADDR: - cmds[0] = default_del_cmd; - cmds[1] = add_cmd; + cmds[0] = netdev_del_cmd; + cmds[1] = add_default_gid_cmd; + cmds[2] = add_cmd; break; case NETDEV_CHANGEUPPER: - netdevice_event_changeupper( + netdevice_event_changeupper(ndev, container_of(ptr, struct netdev_notifier_changeupper_info, info), cmds); break; case NETDEV_BONDING_FAILOVER: cmds[0] = bonding_event_ips_del_cmd; - cmds[1] = bonding_default_del_cmd_join; + /* Add default GIDs of the bond device */ + cmds[1] = bonding_default_add_cmd; + /* Add IP based GIDs of the bond device */ cmds[2] = add_cmd_upper_ips; break; @@ -660,7 +797,8 @@ static void update_gid_event_work_handler(struct work_struct *_work) struct update_gid_event_work *work = container_of(_work, struct update_gid_event_work, work); - ib_enum_all_roce_netdevs(is_eth_port_of_netdev, work->gid_attr.ndev, + ib_enum_all_roce_netdevs(is_eth_port_of_netdev_filter, + work->gid_attr.ndev, callback_for_addr_gid_device_scan, work); dev_put(work->gid_attr.ndev); diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index c8963e91f92a..683e6d11a564 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -87,7 +87,7 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num, } ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE); - if (ret < nents) { + if (ret < 0 || ret < nents) { ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr); return -EINVAL; } @@ -325,7 +325,7 @@ out_unmap_sg: EXPORT_SYMBOL(rdma_rw_ctx_init); /** - * rdma_rw_ctx_signature init - initialize a RW context with signature offload + * rdma_rw_ctx_signature_init - initialize a RW context with signature offload * @ctx: context to initialize * @qp: queue pair to operate on * @port_num: port num to which the connection is bound @@ -564,10 +564,10 @@ EXPORT_SYMBOL(rdma_rw_ctx_wrs); int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr) { - struct ib_send_wr *first_wr, *bad_wr; + struct ib_send_wr *first_wr; first_wr = rdma_rw_ctx_wrs(ctx, qp, port_num, cqe, chain_wr); - return ib_post_send(qp, first_wr, &bad_wr); + return ib_post_send(qp, first_wr, NULL); } EXPORT_SYMBOL(rdma_rw_ctx_post); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index a61ec7e33613..7b794a14d6e8 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1227,20 +1227,10 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num) return src_path_mask; } -static int -roce_resolve_route_from_path(struct ib_device *device, u8 port_num, - struct sa_path_rec *rec) +static int roce_resolve_route_from_path(struct sa_path_rec *rec, + const struct ib_gid_attr *attr) { - struct net_device *resolved_dev; - struct net_device *ndev; - struct net_device *idev; - struct rdma_dev_addr dev_addr = { - .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ? - sa_path_get_ifindex(rec) : 0), - .net = sa_path_get_ndev(rec) ? - sa_path_get_ndev(rec) : - &init_net - }; + struct rdma_dev_addr dev_addr = {}; union { struct sockaddr _sockaddr; struct sockaddr_in _sockaddr_in; @@ -1250,9 +1240,14 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num, if (rec->roce.route_resolved) return 0; + if (!attr || !attr->ndev) + return -EINVAL; - if (!device->get_netdev) - return -EOPNOTSUPP; + dev_addr.bound_dev_if = attr->ndev->ifindex; + /* TODO: Use net from the ib_gid_attr once it is added to it, + * until than, limit itself to init_net. + */ + dev_addr.net = &init_net; rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid); rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid); @@ -1268,60 +1263,52 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num, rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2) return -EINVAL; - idev = device->get_netdev(device, port_num); - if (!idev) - return -ENODEV; - - resolved_dev = dev_get_by_index(dev_addr.net, - dev_addr.bound_dev_if); - if (!resolved_dev) { - ret = -ENODEV; - goto done; - } - ndev = ib_get_ndev_from_path(rec); - rcu_read_lock(); - if ((ndev && ndev != resolved_dev) || - (resolved_dev != idev && - !rdma_is_upper_dev_rcu(idev, resolved_dev))) - ret = -EHOSTUNREACH; - rcu_read_unlock(); - dev_put(resolved_dev); - if (ndev) - dev_put(ndev); -done: - dev_put(idev); - if (!ret) - rec->roce.route_resolved = true; - return ret; + rec->roce.route_resolved = true; + return 0; } static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num, struct sa_path_rec *rec, - struct rdma_ah_attr *ah_attr) + struct rdma_ah_attr *ah_attr, + const struct ib_gid_attr *gid_attr) { enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec); - struct net_device *ndev; - u16 gid_index; - int ret; - ndev = ib_get_ndev_from_path(rec); - ret = ib_find_cached_gid_by_port(device, &rec->sgid, type, - port_num, ndev, &gid_index); - if (ndev) - dev_put(ndev); - if (ret) - return ret; + if (!gid_attr) { + gid_attr = rdma_find_gid_by_port(device, &rec->sgid, type, + port_num, NULL); + if (IS_ERR(gid_attr)) + return PTR_ERR(gid_attr); + } else + rdma_hold_gid_attr(gid_attr); - rdma_ah_set_grh(ah_attr, &rec->dgid, - be32_to_cpu(rec->flow_label), - gid_index, rec->hop_limit, - rec->traffic_class); + rdma_move_grh_sgid_attr(ah_attr, &rec->dgid, + be32_to_cpu(rec->flow_label), + rec->hop_limit, rec->traffic_class, + gid_attr); return 0; } +/** + * ib_init_ah_attr_from_path - Initialize address handle attributes based on + * an SA path record. + * @device: Device associated ah attributes initialization. + * @port_num: Port on the specified device. + * @rec: path record entry to use for ah attributes initialization. + * @ah_attr: address handle attributes to initialization from path record. + * @sgid_attr: SGID attribute to consider during initialization. + * + * When ib_init_ah_attr_from_path() returns success, + * (a) for IB link layer it optionally contains a reference to SGID attribute + * when GRH is present for IB link layer. + * (b) for RoCE link layer it contains a reference to SGID attribute. + * User must invoke rdma_destroy_ah_attr() to release reference to SGID + * attributes which are initialized using ib_init_ah_attr_from_path(). + */ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num, struct sa_path_rec *rec, - struct rdma_ah_attr *ah_attr) + struct rdma_ah_attr *ah_attr, + const struct ib_gid_attr *gid_attr) { int ret = 0; @@ -1332,7 +1319,7 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num, rdma_ah_set_static_rate(ah_attr, rec->rate); if (sa_path_is_roce(rec)) { - ret = roce_resolve_route_from_path(device, port_num, rec); + ret = roce_resolve_route_from_path(rec, gid_attr); if (ret) return ret; @@ -1349,7 +1336,8 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num, } if (rec->hop_limit > 0 || sa_path_is_roce(rec)) - ret = init_ah_attr_grh_fields(device, port_num, rec, ah_attr); + ret = init_ah_attr_grh_fields(device, port_num, + rec, ah_attr, gid_attr); return ret; } EXPORT_SYMBOL(ib_init_ah_attr_from_path); @@ -1557,8 +1545,6 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, ARRAY_SIZE(path_rec_table), mad->data, &rec); rec.rec_type = SA_PATH_REC_TYPE_IB; - sa_path_set_ndev(&rec, NULL); - sa_path_set_ifindex(&rec, 0); sa_path_set_dmac_zero(&rec); if (query->conv_pr) { @@ -2290,6 +2276,7 @@ static void update_sm_ah(struct work_struct *work) struct ib_sa_sm_ah *new_ah; struct ib_port_attr port_attr; struct rdma_ah_attr ah_attr; + bool grh_required; if (ib_query_port(port->agent->device, port->port_num, &port_attr)) { pr_warn("Couldn't query port\n"); @@ -2314,16 +2301,27 @@ static void update_sm_ah(struct work_struct *work) rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid); rdma_ah_set_sl(&ah_attr, port_attr.sm_sl); rdma_ah_set_port_num(&ah_attr, port->port_num); - if (port_attr.grh_required) { - if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA) { - rdma_ah_set_make_grd(&ah_attr, true); - } else { - rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH); - rdma_ah_set_subnet_prefix(&ah_attr, - cpu_to_be64(port_attr.subnet_prefix)); - rdma_ah_set_interface_id(&ah_attr, - cpu_to_be64(IB_SA_WELL_KNOWN_GUID)); - } + + grh_required = rdma_is_grh_required(port->agent->device, + port->port_num); + + /* + * The OPA sm_lid of 0xFFFF needs special handling so that it can be + * differentiated from a permissive LID of 0xFFFF. We set the + * grh_required flag here so the SA can program the DGID in the + * address handle appropriately + */ + if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA && + (grh_required || + port_attr.sm_lid == be16_to_cpu(IB_LID_PERMISSIVE))) + rdma_ah_set_make_grd(&ah_attr, true); + + if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && grh_required) { + rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH); + rdma_ah_set_subnet_prefix(&ah_attr, + cpu_to_be64(port_attr.subnet_prefix)); + rdma_ah_set_interface_id(&ah_attr, + cpu_to_be64(IB_SA_WELL_KNOWN_GUID)); } new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 31c7efaf8e7a..7fd14ead7b37 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -42,6 +42,7 @@ #include <rdma/ib_mad.h> #include <rdma/ib_pma.h> +#include <rdma/ib_cache.h> struct ib_port; @@ -346,7 +347,7 @@ static struct attribute *port_default_attrs[] = { NULL }; -static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf) +static size_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf) { if (!gid_attr->ndev) return -EINVAL; @@ -354,33 +355,26 @@ static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf) return sprintf(buf, "%s\n", gid_attr->ndev->name); } -static size_t print_gid_type(struct ib_gid_attr *gid_attr, char *buf) +static size_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf) { return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type)); } -static ssize_t _show_port_gid_attr(struct ib_port *p, - struct port_attribute *attr, - char *buf, - size_t (*print)(struct ib_gid_attr *gid_attr, - char *buf)) +static ssize_t _show_port_gid_attr( + struct ib_port *p, struct port_attribute *attr, char *buf, + size_t (*print)(const struct ib_gid_attr *gid_attr, char *buf)) { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); - union ib_gid gid; - struct ib_gid_attr gid_attr = {}; + const struct ib_gid_attr *gid_attr; ssize_t ret; - ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, - &gid_attr); - if (ret) - goto err; + gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index); + if (IS_ERR(gid_attr)) + return PTR_ERR(gid_attr); - ret = print(&gid_attr, buf); - -err: - if (gid_attr.ndev) - dev_put(gid_attr.ndev); + ret = print(gid_attr, buf); + rdma_put_gid_attr(gid_attr); return ret; } @@ -389,26 +383,28 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); - union ib_gid *pgid; - union ib_gid gid; + const struct ib_gid_attr *gid_attr; ssize_t ret; - ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL); + gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index); + if (IS_ERR(gid_attr)) { + const union ib_gid zgid = {}; + + /* If reading GID fails, it is likely due to GID entry being + * empty (invalid) or reserved GID in the table. User space + * expects to read GID table entries as long as it given index + * is within GID table size. Administrative/debugging tool + * fails to query rest of the GID entries if it hits error + * while querying a GID of the given index. To avoid user + * space throwing such error on fail to read gid, return zero + * GID as before. This maintains backward compatibility. + */ + return sprintf(buf, "%pI6\n", zgid.raw); + } - /* If reading GID fails, it is likely due to GID entry being empty - * (invalid) or reserved GID in the table. - * User space expects to read GID table entries as long as it given - * index is within GID table size. - * Administrative/debugging tool fails to query rest of the GID entries - * if it hits error while querying a GID of the given index. - * To avoid user space throwing such error on fail to read gid, return - * zero GID as before. This maintains backward compatibility. - */ - if (ret) - pgid = &zgid; - else - pgid = &gid; - return sprintf(buf, "%pI6\n", pgid->raw); + ret = sprintf(buf, "%pI6\n", gid_attr->gid.raw); + rdma_put_gid_attr(gid_attr); + return ret; } static ssize_t show_port_gid_attr_ndev(struct ib_port *p, diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 9eef96dacbd7..faa9e6116b2f 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -207,7 +207,7 @@ error: } static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, - struct ib_cm_req_event_param *kreq) + const struct ib_cm_req_event_param *kreq) { ureq->remote_ca_guid = kreq->remote_ca_guid; ureq->remote_qkey = kreq->remote_qkey; @@ -231,7 +231,7 @@ static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, } static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep, - struct ib_cm_rep_event_param *krep) + const struct ib_cm_rep_event_param *krep) { urep->remote_ca_guid = krep->remote_ca_guid; urep->remote_qkey = krep->remote_qkey; @@ -247,14 +247,14 @@ static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep, } static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep, - struct ib_cm_sidr_rep_event_param *krep) + const struct ib_cm_sidr_rep_event_param *krep) { urep->status = krep->status; urep->qkey = krep->qkey; urep->qpn = krep->qpn; }; -static int ib_ucm_event_process(struct ib_cm_event *evt, +static int ib_ucm_event_process(const struct ib_cm_event *evt, struct ib_ucm_event *uvt) { void *info = NULL; @@ -351,7 +351,7 @@ err1: } static int ib_ucm_event_handler(struct ib_cm_id *cm_id, - struct ib_cm_event *event) + const struct ib_cm_event *event) { struct ib_ucm_event *uevent; struct ib_ucm_context *ctx; @@ -1000,14 +1000,11 @@ static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { - struct ib_cm_sidr_req_param param; + struct ib_cm_sidr_req_param param = {}; struct ib_ucm_context *ctx; struct ib_ucm_sidr_req cmd; int result; - param.private_data = NULL; - param.path = NULL; - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 54ab6335c48d..a41792dbae1f 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -84,7 +84,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, struct ib_umem *umem; struct page **page_list; struct vm_area_struct **vma_list; - unsigned long locked; unsigned long lock_limit; unsigned long cur_base; unsigned long npages; @@ -92,7 +91,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, int i; unsigned long dma_attrs = 0; struct scatterlist *sg, *sg_list_start; - int need_release = 0; unsigned int gup_flags = FOLL_WRITE; if (dmasync) @@ -121,10 +119,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (access & IB_ACCESS_ON_DEMAND) { ret = ib_umem_odp_get(context, umem, access); - if (ret) { - kfree(umem); - return ERR_PTR(ret); - } + if (ret) + goto umem_kfree; return umem; } @@ -135,8 +131,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) { - kfree(umem); - return ERR_PTR(-ENOMEM); + ret = -ENOMEM; + goto umem_kfree; } /* @@ -149,41 +145,43 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, npages = ib_umem_num_pages(umem); - down_write(¤t->mm->mmap_sem); - - locked = npages + current->mm->pinned_vm; lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { + down_write(¤t->mm->mmap_sem); + current->mm->pinned_vm += npages; + if ((current->mm->pinned_vm > lock_limit) && !capable(CAP_IPC_LOCK)) { + up_write(¤t->mm->mmap_sem); ret = -ENOMEM; - goto out; + goto vma; } + up_write(¤t->mm->mmap_sem); cur_base = addr & PAGE_MASK; if (npages == 0 || npages > UINT_MAX) { ret = -EINVAL; - goto out; + goto vma; } ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL); if (ret) - goto out; + goto vma; if (!umem->writable) gup_flags |= FOLL_FORCE; - need_release = 1; sg_list_start = umem->sg_head.sgl; + down_read(¤t->mm->mmap_sem); while (npages) { ret = get_user_pages_longterm(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof (struct page *)), gup_flags, page_list, vma_list); - - if (ret < 0) - goto out; + if (ret < 0) { + up_read(¤t->mm->mmap_sem); + goto umem_release; + } umem->npages += ret; cur_base += ret * PAGE_SIZE; @@ -199,6 +197,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, /* preparing for next loop */ sg_list_start = sg; } + up_read(¤t->mm->mmap_sem); umem->nmap = ib_dma_map_sg_attrs(context->device, umem->sg_head.sgl, @@ -206,27 +205,28 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, DMA_BIDIRECTIONAL, dma_attrs); - if (umem->nmap <= 0) { + if (!umem->nmap) { ret = -ENOMEM; - goto out; + goto umem_release; } ret = 0; + goto out; -out: - if (ret < 0) { - if (need_release) - __ib_umem_release(context->device, umem, 0); - kfree(umem); - } else - current->mm->pinned_vm = locked; - +umem_release: + __ib_umem_release(context->device, umem, 0); +vma: + down_write(¤t->mm->mmap_sem); + current->mm->pinned_vm -= ib_umem_num_pages(umem); up_write(¤t->mm->mmap_sem); +out: if (vma_list) free_page((unsigned long) vma_list); free_page((unsigned long) page_list); - - return ret < 0 ? ERR_PTR(ret) : umem; +umem_kfree: + if (ret) + kfree(umem); + return ret ? ERR_PTR(ret) : umem; } EXPORT_SYMBOL(ib_umem_get); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index bb98c9e4a7fd..c34a6852d691 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -268,6 +268,7 @@ static void recv_handler(struct ib_mad_agent *agent, packet->mad.hdr.traffic_class = grh->traffic_class; memcpy(packet->mad.hdr.gid, &grh->dgid, 16); packet->mad.hdr.flow_label = cpu_to_be32(grh->flow_label); + rdma_destroy_ah_attr(&ah_attr); } if (queue_packet(file, agent, packet)) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index c0d40fc3a53a..5df8e548cc14 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -111,7 +111,7 @@ struct ib_uverbs_device { struct mutex lists_mutex; /* protect lists */ struct list_head uverbs_file_list; struct list_head uverbs_events_file_list; - struct uverbs_root_spec *specs_root; + struct uverbs_api *uapi; }; struct ib_uverbs_event_queue { @@ -130,21 +130,37 @@ struct ib_uverbs_async_event_file { }; struct ib_uverbs_completion_event_file { - struct ib_uobject_file uobj_file; + struct ib_uobject uobj; struct ib_uverbs_event_queue ev_queue; }; struct ib_uverbs_file { struct kref ref; - struct mutex mutex; - struct mutex cleanup_mutex; /* protect cleanup */ struct ib_uverbs_device *device; + struct mutex ucontext_lock; + /* + * ucontext must be accessed via ib_uverbs_get_ucontext() or with + * ucontext_lock held + */ struct ib_ucontext *ucontext; struct ib_event_handler event_handler; struct ib_uverbs_async_event_file *async_file; struct list_head list; int is_closed; + /* + * To access the uobjects list hw_destroy_rwsem must be held for write + * OR hw_destroy_rwsem held for read AND uobjects_lock held. + * hw_destroy_rwsem should be called across any destruction of the HW + * object of an associated uobject. + */ + struct rw_semaphore hw_destroy_rwsem; + spinlock_t uobjects_lock; + struct list_head uobjects; + + u64 uverbs_cmd_mask; + u64 uverbs_ex_cmd_mask; + struct idr idr; /* spinlock protects write access to idr */ spinlock_t idr_lock; @@ -196,7 +212,6 @@ struct ib_uwq_object { struct ib_ucq_object { struct ib_uobject uobject; - struct ib_uverbs_file *uverbs_file; struct list_head comp_list; struct list_head async_list; u32 comp_events_reported; @@ -230,7 +245,7 @@ void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); -int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd, +int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, enum rdma_remove_reason why); int uverbs_dealloc_mw(struct ib_mw *mw); @@ -238,12 +253,7 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp, struct ib_uqp_object *uobj); void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata); -extern const struct uverbs_attr_def uverbs_uhw_compat_in; -extern const struct uverbs_attr_def uverbs_uhw_compat_out; long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); -int uverbs_destroy_def_handler(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs); struct ib_uverbs_flow_spec { union { @@ -292,7 +302,6 @@ extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS); #define IB_UVERBS_DECLARE_CMD(name) \ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ - struct ib_device *ib_dev, \ const char __user *buf, int in_len, \ int out_len) @@ -334,7 +343,6 @@ IB_UVERBS_DECLARE_CMD(close_xrcd); #define IB_UVERBS_DECLARE_EX_CMD(name) \ int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \ - struct ib_device *ib_dev, \ struct ib_udata *ucore, \ struct ib_udata *uhw) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 583d3a10b940..a21d5214afc3 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -48,11 +48,10 @@ #include "core_priv.h" static struct ib_uverbs_completion_event_file * -ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context) +_ib_uverbs_lookup_comp_file(s32 fd, struct ib_uverbs_file *ufile) { - struct ib_uobject *uobj = uobj_get_read(UVERBS_OBJECT_COMP_CHANNEL, - fd, context); - struct ib_uobject_file *uobj_file; + struct ib_uobject *uobj = ufd_get_read(UVERBS_OBJECT_COMP_CHANNEL, + fd, ufile); if (IS_ERR(uobj)) return (void *)uobj; @@ -60,13 +59,13 @@ ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context) uverbs_uobject_get(uobj); uobj_put_read(uobj); - uobj_file = container_of(uobj, struct ib_uobject_file, uobj); - return container_of(uobj_file, struct ib_uverbs_completion_event_file, - uobj_file); + return container_of(uobj, struct ib_uverbs_completion_event_file, + uobj); } +#define ib_uverbs_lookup_comp_file(_fd, _ufile) \ + _ib_uverbs_lookup_comp_file((_fd)*typecheck(s32, _fd), _ufile) ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -76,6 +75,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, struct ib_ucontext *ucontext; struct file *filp; struct ib_rdmacg_object cg_obj; + struct ib_device *ib_dev; int ret; if (out_len < sizeof resp) @@ -84,7 +84,13 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&file->mutex); + mutex_lock(&file->ucontext_lock); + ib_dev = srcu_dereference(file->device->ib_dev, + &file->device->disassociate_srcu); + if (!ib_dev) { + ret = -EIO; + goto err; + } if (file->ucontext) { ret = -EINVAL; @@ -110,12 +116,12 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, ucontext->cg_obj = cg_obj; /* ufile is required when some objects are released */ ucontext->ufile = file; - uverbs_initialize_ucontext(ucontext); rcu_read_lock(); ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); rcu_read_unlock(); ucontext->closing = 0; + ucontext->cleanup_retryable = false; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING ucontext->umem_tree = RB_ROOT_CACHED; @@ -146,11 +152,15 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, goto err_file; } - file->ucontext = ucontext; - fd_install(resp.async_fd, filp); - mutex_unlock(&file->mutex); + /* + * Make sure that ib_uverbs_get_ucontext() sees the pointer update + * only after all writes to setup the ucontext have completed + */ + smp_store_release(&file->ucontext, ucontext); + + mutex_unlock(&file->ucontext_lock); return in_len; @@ -169,15 +179,16 @@ err_alloc: ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE); err: - mutex_unlock(&file->mutex); + mutex_unlock(&file->ucontext_lock); return ret; } -static void copy_query_dev_fields(struct ib_uverbs_file *file, - struct ib_device *ib_dev, +static void copy_query_dev_fields(struct ib_ucontext *ucontext, struct ib_uverbs_query_device_resp *resp, struct ib_device_attr *attr) { + struct ib_device *ib_dev = ucontext->device; + resp->fw_ver = attr->fw_ver; resp->node_guid = ib_dev->node_guid; resp->sys_image_guid = attr->sys_image_guid; @@ -189,7 +200,7 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file, resp->max_qp = attr->max_qp; resp->max_qp_wr = attr->max_qp_wr; resp->device_cap_flags = lower_32_bits(attr->device_cap_flags); - resp->max_sge = attr->max_sge; + resp->max_sge = min(attr->max_send_sge, attr->max_recv_sge); resp->max_sge_rd = attr->max_sge_rd; resp->max_cq = attr->max_cq; resp->max_cqe = attr->max_cqe; @@ -221,12 +232,16 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file, } ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_query_device cmd; struct ib_uverbs_query_device_resp resp; + struct ib_ucontext *ucontext; + + ucontext = ib_uverbs_get_ucontext(file); + if (IS_ERR(ucontext)) + return PTR_ERR(ucontext); if (out_len < sizeof resp) return -ENOSPC; @@ -235,7 +250,7 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, return -EFAULT; memset(&resp, 0, sizeof resp); - copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs); + copy_query_dev_fields(ucontext, &resp, &ucontext->device->attrs); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) return -EFAULT; @@ -243,8 +258,28 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, return in_len; } +/* + * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the + * PortInfo CapabilityMask, but was extended with unique bits. + */ +static u32 make_port_cap_flags(const struct ib_port_attr *attr) +{ + u32 res; + + /* All IBA CapabilityMask bits are passed through here, except bit 26, + * which is overridden with IP_BASED_GIDS. This is due to a historical + * mistake in the implementation of IP_BASED_GIDS. Otherwise all other + * bits match the IBA definition across all kernel versions. + */ + res = attr->port_cap_flags & ~(u32)IB_UVERBS_PCF_IP_BASED_GIDS; + + if (attr->ip_gids) + res |= IB_UVERBS_PCF_IP_BASED_GIDS; + + return res; +} + ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -252,6 +287,13 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, struct ib_uverbs_query_port_resp resp; struct ib_port_attr attr; int ret; + struct ib_ucontext *ucontext; + struct ib_device *ib_dev; + + ucontext = ib_uverbs_get_ucontext(file); + if (IS_ERR(ucontext)) + return PTR_ERR(ucontext); + ib_dev = ucontext->device; if (out_len < sizeof resp) return -ENOSPC; @@ -269,12 +311,15 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, resp.max_mtu = attr.max_mtu; resp.active_mtu = attr.active_mtu; resp.gid_tbl_len = attr.gid_tbl_len; - resp.port_cap_flags = attr.port_cap_flags; + resp.port_cap_flags = make_port_cap_flags(&attr); resp.max_msg_sz = attr.max_msg_sz; resp.bad_pkey_cntr = attr.bad_pkey_cntr; resp.qkey_viol_cntr = attr.qkey_viol_cntr; resp.pkey_tbl_len = attr.pkey_tbl_len; + if (rdma_is_grh_required(ib_dev, cmd.port_num)) + resp.flags |= IB_UVERBS_QPF_GRH_REQUIRED; + if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) { resp.lid = OPA_TO_IB_UCAST_LID(attr.lid); resp.sm_lid = OPA_TO_IB_UCAST_LID(attr.sm_lid); @@ -300,7 +345,6 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, } ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -310,6 +354,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, struct ib_uobject *uobj; struct ib_pd *pd; int ret; + struct ib_device *ib_dev; if (out_len < sizeof resp) return -ENOSPC; @@ -322,11 +367,11 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - uobj = uobj_alloc(UVERBS_OBJECT_PD, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_PD, file, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata); + pd = ib_dev->alloc_pd(ib_dev, uobj->context, &udata); if (IS_ERR(pd)) { ret = PTR_ERR(pd); goto err; @@ -348,9 +393,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, goto err_copy; } - uobj_alloc_commit(uobj); - - return in_len; + return uobj_alloc_commit(uobj, in_len); err_copy: ib_dealloc_pd(pd); @@ -361,25 +404,16 @@ err: } ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_dealloc_pd cmd; - struct ib_uobject *uobj; - int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_PD, cmd.pd_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - - return ret ?: in_len; + return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, file, + in_len); } struct xrcd_table_entry { @@ -468,7 +502,6 @@ static void xrcd_table_delete(struct ib_uverbs_device *dev, } ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -481,6 +514,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, struct inode *inode = NULL; int ret = 0; int new_xrcd = 0; + struct ib_device *ib_dev; if (out_len < sizeof resp) return -ENOSPC; @@ -517,15 +551,15 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, } } - obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, - file->ucontext); + obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, file, + &ib_dev); if (IS_ERR(obj)) { ret = PTR_ERR(obj); goto err_tree_mutex_unlock; } if (!xrcd) { - xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata); + xrcd = ib_dev->alloc_xrcd(ib_dev, obj->uobject.context, &udata); if (IS_ERR(xrcd)) { ret = PTR_ERR(xrcd); goto err; @@ -564,9 +598,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, mutex_unlock(&file->device->xrcd_tree_mutex); - uobj_alloc_commit(&obj->uobject); - - return in_len; + return uobj_alloc_commit(&obj->uobject, in_len); err_copy: if (inode) { @@ -591,32 +623,25 @@ err_tree_mutex_unlock: } ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_close_xrcd cmd; - struct ib_uobject *uobj; - int ret = 0; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - return ret ?: in_len; + return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, file, + in_len); } -int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, +int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, enum rdma_remove_reason why) { struct inode *inode; int ret; + struct ib_uverbs_device *dev = uobject->context->ufile->device; inode = xrcd->inode; if (inode && !atomic_dec_and_test(&xrcd->usecnt)) @@ -624,16 +649,18 @@ int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, ret = ib_dealloc_xrcd(xrcd); - if (why == RDMA_REMOVE_DESTROY && ret) + if (ib_is_destroy_retryable(ret, why, uobject)) { atomic_inc(&xrcd->usecnt); - else if (inode) + return ret; + } + + if (inode) xrcd_table_delete(dev, inode); return ret; } ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -644,6 +671,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, struct ib_pd *pd; struct ib_mr *mr; int ret; + struct ib_device *ib_dev; if (out_len < sizeof resp) return -ENOSPC; @@ -663,11 +691,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if (ret) return ret; - uobj = uobj_alloc(UVERBS_OBJECT_MR, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_MR, file, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); if (!pd) { ret = -EINVAL; goto err_free; @@ -711,9 +739,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, uobj_put_obj_read(pd); - uobj_alloc_commit(uobj); - - return in_len; + return uobj_alloc_commit(uobj, in_len); err_copy: ib_dereg_mr(mr); @@ -727,7 +753,6 @@ err_free: } ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -759,8 +784,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) return -EINVAL; - uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, - file->ucontext); + uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -778,7 +802,8 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, } if (cmd.flags & IB_MR_REREG_PD) { - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, + file); if (!pd) { ret = -EINVAL; goto put_uobjs; @@ -819,29 +844,19 @@ put_uobjs: } ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_dereg_mr cmd; - struct ib_uobject *uobj; - int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - - return ret ?: in_len; + return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, file, + in_len); } ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -852,6 +867,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, struct ib_mw *mw; struct ib_udata udata; int ret; + struct ib_device *ib_dev; if (out_len < sizeof(resp)) return -ENOSPC; @@ -859,11 +875,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - uobj = uobj_alloc(UVERBS_OBJECT_MW, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_MW, file, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); if (!pd) { ret = -EINVAL; goto err_free; @@ -897,9 +913,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, } uobj_put_obj_read(pd); - uobj_alloc_commit(uobj); - - return in_len; + return uobj_alloc_commit(uobj, in_len); err_copy: uverbs_dealloc_mw(mw); @@ -911,28 +925,19 @@ err_free: } ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_dealloc_mw cmd; - struct ib_uobject *uobj; - int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_MW, cmd.mw_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - return ret ?: in_len; + return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, file, + in_len); } ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -940,6 +945,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, struct ib_uverbs_create_comp_channel_resp resp; struct ib_uobject *uobj; struct ib_uverbs_completion_event_file *ev_file; + struct ib_device *ib_dev; if (out_len < sizeof resp) return -ENOSPC; @@ -947,14 +953,14 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); resp.fd = uobj->id; ev_file = container_of(uobj, struct ib_uverbs_completion_event_file, - uobj_file.uobj); + uobj); ib_uverbs_init_event_queue(&ev_file->ev_queue); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { @@ -962,12 +968,10 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, return -EFAULT; } - uobj_alloc_commit(uobj); - return in_len; + return uobj_alloc_commit(uobj, in_len); } static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw, struct ib_uverbs_ex_create_cq *cmd, @@ -985,21 +989,23 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, int ret; struct ib_uverbs_ex_create_cq_resp resp; struct ib_cq_init_attr attr = {}; - - if (!ib_dev->create_cq) - return ERR_PTR(-EOPNOTSUPP); + struct ib_device *ib_dev; if (cmd->comp_vector >= file->device->num_comp_vectors) return ERR_PTR(-EINVAL); - obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, - file->ucontext); + obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, file, + &ib_dev); if (IS_ERR(obj)) return obj; + if (!ib_dev->create_cq) { + ret = -EOPNOTSUPP; + goto err; + } + if (cmd->comp_channel >= 0) { - ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, - file->ucontext); + ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, file); if (IS_ERR(ev_file)) { ret = PTR_ERR(ev_file); goto err; @@ -1007,7 +1013,6 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, } obj->uobject.user_handle = cmd->user_handle; - obj->uverbs_file = file; obj->comp_events_reported = 0; obj->async_events_reported = 0; INIT_LIST_HEAD(&obj->comp_list); @@ -1019,7 +1024,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags)) attr.flags = cmd->flags; - cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, uhw); + cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, uhw); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_file; @@ -1047,7 +1052,9 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, if (ret) goto err_cb; - uobj_alloc_commit(&obj->uobject); + ret = uobj_alloc_commit(&obj->uobject, 0); + if (ret) + return ERR_PTR(ret); return obj; err_cb: @@ -1075,7 +1082,6 @@ static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file, } ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1106,7 +1112,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, cmd_ex.comp_vector = cmd.comp_vector; cmd_ex.comp_channel = cmd.comp_channel; - obj = create_cq(file, ib_dev, &ucore, &uhw, &cmd_ex, + obj = create_cq(file, &ucore, &uhw, &cmd_ex, offsetof(typeof(cmd_ex), comp_channel) + sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb, NULL); @@ -1129,7 +1135,6 @@ static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file, } int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -1155,7 +1160,7 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file, sizeof(resp.response_length))) return -ENOSPC; - obj = create_cq(file, ib_dev, ucore, uhw, &cmd, + obj = create_cq(file, ucore, uhw, &cmd, min(ucore->inlen, sizeof(cmd)), ib_uverbs_ex_create_cq_cb, NULL); @@ -1163,7 +1168,6 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1181,7 +1185,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (!cq) return -EINVAL; @@ -1231,7 +1235,6 @@ static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest, } ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1246,7 +1249,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (!cq) return -EINVAL; @@ -1262,7 +1265,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, if (!ret) break; - ret = copy_wc_to_user(ib_dev, data_ptr, &wc); + ret = copy_wc_to_user(cq->device, data_ptr, &wc); if (ret) goto out_put; @@ -1283,7 +1286,6 @@ out_put: } ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1293,7 +1295,7 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (!cq) return -EINVAL; @@ -1306,45 +1308,28 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_destroy_cq cmd; struct ib_uverbs_destroy_cq_resp resp; struct ib_uobject *uobj; - struct ib_cq *cq; struct ib_ucq_object *obj; - int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_CQ, cmd.cq_handle, - file->ucontext); + uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); - /* - * Make sure we don't free the memory in remove_commit as we still - * needs the uobject memory to create the response. - */ - uverbs_uobject_get(uobj); - cq = uobj->object; - obj = container_of(cq->uobject, struct ib_ucq_object, uobject); - + obj = container_of(uobj, struct ib_ucq_object, uobject); memset(&resp, 0, sizeof(resp)); - - ret = uobj_remove_commit(uobj); - if (ret) { - uverbs_uobject_put(uobj); - return ret; - } - resp.comp_events_reported = obj->comp_events_reported; resp.async_events_reported = obj->async_events_reported; - uverbs_uobject_put(uobj); + uobj_put_destroy(uobj); + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) return -EFAULT; @@ -1375,12 +1360,13 @@ static int create_qp(struct ib_uverbs_file *file, int ret; struct ib_rwq_ind_table *ind_tbl = NULL; bool has_sq = true; + struct ib_device *ib_dev; if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) return -EPERM; - obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, - file->ucontext); + obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file, + &ib_dev); if (IS_ERR(obj)) return PTR_ERR(obj); obj->uxrcd = NULL; @@ -1390,9 +1376,9 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) + sizeof(cmd->rwq_ind_tbl_handle) && (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) { - ind_tbl = uobj_get_obj_read(rwq_ind_table, UVERBS_OBJECT_RWQ_IND_TBL, - cmd->rwq_ind_tbl_handle, - file->ucontext); + ind_tbl = uobj_get_obj_read(rwq_ind_table, + UVERBS_OBJECT_RWQ_IND_TBL, + cmd->rwq_ind_tbl_handle, file); if (!ind_tbl) { ret = -EINVAL; goto err_put; @@ -1418,7 +1404,7 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd->qp_type == IB_QPT_XRC_TGT) { xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle, - file->ucontext); + file); if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; @@ -1437,8 +1423,8 @@ static int create_qp(struct ib_uverbs_file *file, cmd->max_recv_sge = 0; } else { if (cmd->is_srq) { - srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd->srq_handle, - file->ucontext); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, + cmd->srq_handle, file); if (!srq || srq->srq_type == IB_SRQT_XRC) { ret = -EINVAL; goto err_put; @@ -1447,8 +1433,9 @@ static int create_qp(struct ib_uverbs_file *file, if (!ind_tbl) { if (cmd->recv_cq_handle != cmd->send_cq_handle) { - rcq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->recv_cq_handle, - file->ucontext); + rcq = uobj_get_obj_read( + cq, UVERBS_OBJECT_CQ, + cmd->recv_cq_handle, file); if (!rcq) { ret = -EINVAL; goto err_put; @@ -1458,11 +1445,12 @@ static int create_qp(struct ib_uverbs_file *file, } if (has_sq) - scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->send_cq_handle, - file->ucontext); + scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, + cmd->send_cq_handle, file); if (!ind_tbl) rcq = rcq ?: scq; - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, + file); if (!pd || (!scq && has_sq)) { ret = -EINVAL; goto err_put; @@ -1602,9 +1590,7 @@ static int create_qp(struct ib_uverbs_file *file, if (ind_tbl) uobj_put_obj_read(ind_tbl); - uobj_alloc_commit(&obj->uevent.uobject); - - return 0; + return uobj_alloc_commit(&obj->uevent.uobject, 0); err_cb: ib_destroy_qp(qp); @@ -1637,7 +1623,6 @@ static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file, } ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1698,7 +1683,6 @@ static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file, } int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -1735,7 +1719,6 @@ int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file, } ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_open_qp cmd; @@ -1747,6 +1730,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, struct ib_qp *qp; struct ib_qp_open_attr attr; int ret; + struct ib_device *ib_dev; if (out_len < sizeof resp) return -ENOSPC; @@ -1759,13 +1743,12 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, - file->ucontext); + obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file, + &ib_dev); if (IS_ERR(obj)) return PTR_ERR(obj); - xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, - file->ucontext); + xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, file); if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; goto err_put; @@ -1809,10 +1792,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, qp->uobject = &obj->uevent.uobject; uobj_put_read(xrcd_uobj); - - uobj_alloc_commit(&obj->uevent.uobject); - - return in_len; + return uobj_alloc_commit(&obj->uevent.uobject, in_len); err_destroy: ib_destroy_qp(qp); @@ -1846,7 +1826,6 @@ static void copy_ah_attr_to_uverbs(struct ib_uverbs_qp_dest *uverb_attr, } ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1867,7 +1846,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, goto out; } - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) { ret = -EINVAL; goto out; @@ -1968,11 +1947,11 @@ static int modify_qp(struct ib_uverbs_file *file, struct ib_qp *qp; int ret; - attr = kmalloc(sizeof *attr, GFP_KERNEL); + attr = kzalloc(sizeof(*attr), GFP_KERNEL); if (!attr) return -ENOMEM; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file); if (!qp) { ret = -EINVAL; goto out; @@ -2098,7 +2077,6 @@ out: } ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2125,7 +2103,6 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, } int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -2161,7 +2138,6 @@ int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file, } ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2169,33 +2145,19 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, struct ib_uverbs_destroy_qp_resp resp; struct ib_uobject *uobj; struct ib_uqp_object *obj; - int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - memset(&resp, 0, sizeof resp); - - uobj = uobj_get_write(UVERBS_OBJECT_QP, cmd.qp_handle, - file->ucontext); + uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); obj = container_of(uobj, struct ib_uqp_object, uevent.uobject); - /* - * Make sure we don't free the memory in remove_commit as we still - * needs the uobject memory to create the response. - */ - uverbs_uobject_get(uobj); - - ret = uobj_remove_commit(uobj); - if (ret) { - uverbs_uobject_put(uobj); - return ret; - } - + memset(&resp, 0, sizeof(resp)); resp.events_reported = obj->uevent.events_reported; - uverbs_uobject_put(uobj); + + uobj_put_destroy(uobj); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) return -EFAULT; @@ -2214,14 +2176,14 @@ static void *alloc_wr(size_t wr_size, __u32 num_sge) } ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_post_send cmd; struct ib_uverbs_post_send_resp resp; struct ib_uverbs_send_wr *user_wr; - struct ib_send_wr *wr = NULL, *last, *next, *bad_wr; + struct ib_send_wr *wr = NULL, *last, *next; + const struct ib_send_wr *bad_wr; struct ib_qp *qp; int i, sg_ind; int is_ud; @@ -2242,7 +2204,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, if (!user_wr) return -ENOMEM; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) goto out; @@ -2278,8 +2240,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, goto out_put; } - ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, user_wr->wr.ud.ah, - file->ucontext); + ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, + user_wr->wr.ud.ah, file); if (!ud->ah) { kfree(ud); ret = -EINVAL; @@ -2494,13 +2456,13 @@ err: } ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_post_recv cmd; struct ib_uverbs_post_recv_resp resp; - struct ib_recv_wr *wr, *next, *bad_wr; + struct ib_recv_wr *wr, *next; + const struct ib_recv_wr *bad_wr; struct ib_qp *qp; ssize_t ret = -EINVAL; @@ -2513,7 +2475,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, if (IS_ERR(wr)) return PTR_ERR(wr); - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) goto out; @@ -2543,13 +2505,13 @@ out: } ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_post_srq_recv cmd; struct ib_uverbs_post_srq_recv_resp resp; - struct ib_recv_wr *wr, *next, *bad_wr; + struct ib_recv_wr *wr, *next; + const struct ib_recv_wr *bad_wr; struct ib_srq *srq; ssize_t ret = -EINVAL; @@ -2562,12 +2524,13 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, if (IS_ERR(wr)) return PTR_ERR(wr); - srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file); if (!srq) goto out; resp.bad_wr = 0; - ret = srq->device->post_srq_recv(srq, wr, &bad_wr); + ret = srq->device->post_srq_recv ? + srq->device->post_srq_recv(srq, wr, &bad_wr) : -EOPNOTSUPP; uobj_put_obj_read(srq); @@ -2592,7 +2555,6 @@ out: } ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2601,9 +2563,10 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, struct ib_uobject *uobj; struct ib_pd *pd; struct ib_ah *ah; - struct rdma_ah_attr attr; + struct rdma_ah_attr attr = {}; int ret; struct ib_udata udata; + struct ib_device *ib_dev; if (out_len < sizeof resp) return -ENOSPC; @@ -2611,19 +2574,21 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num)) - return -EINVAL; - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), u64_to_user_ptr(cmd.response) + sizeof(resp), in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - uobj = uobj_alloc(UVERBS_OBJECT_AH, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_AH, file, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); + if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num)) { + ret = -EINVAL; + goto err; + } + + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); if (!pd) { ret = -EINVAL; goto err; @@ -2665,9 +2630,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, } uobj_put_obj_read(pd); - uobj_alloc_commit(uobj); - - return in_len; + return uobj_alloc_commit(uobj, in_len); err_copy: rdma_destroy_ah(ah); @@ -2681,27 +2644,18 @@ err: } ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_destroy_ah cmd; - struct ib_uobject *uobj; - int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_AH, cmd.ah_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - return ret ?: in_len; + return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, file, + in_len); } ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2714,7 +2668,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) return -EINVAL; @@ -2751,7 +2705,6 @@ out_put: } ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2765,7 +2718,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) return -EINVAL; @@ -2810,29 +2763,27 @@ static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs) resources = kzalloc(sizeof(*resources), GFP_KERNEL); if (!resources) - goto err_res; + return NULL; + + if (!num_specs) + goto out; resources->counters = kcalloc(num_specs, sizeof(*resources->counters), GFP_KERNEL); - - if (!resources->counters) - goto err_cnt; - resources->collection = kcalloc(num_specs, sizeof(*resources->collection), GFP_KERNEL); - if (!resources->collection) - goto err_collection; + if (!resources->counters || !resources->collection) + goto err; +out: resources->max = num_specs; - return resources; -err_collection: +err: kfree(resources->counters); -err_cnt: kfree(resources); -err_res: + return NULL; } @@ -2840,6 +2791,9 @@ void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res) { unsigned int i; + if (!uflow_res) + return; + for (i = 0; i < uflow_res->collection_num; i++) atomic_dec(&uflow_res->collection[i]->usecnt); @@ -2875,7 +2829,7 @@ static void flow_resources_add(struct ib_uflow_resources *uflow_res, uflow_res->num++; } -static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext, +static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile, struct ib_uverbs_flow_spec *kern_spec, union ib_flow_spec *ib_spec, struct ib_uflow_resources *uflow_res) @@ -2904,7 +2858,7 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext, ib_spec->action.act = uobj_get_obj_read(flow_action, UVERBS_OBJECT_FLOW_ACTION, kern_spec->action.handle, - ucontext); + ufile); if (!ib_spec->action.act) return -EINVAL; ib_spec->action.size = @@ -2922,7 +2876,7 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext, uobj_get_obj_read(counters, UVERBS_OBJECT_COUNTERS, kern_spec->flow_count.handle, - ucontext); + ufile); if (!ib_spec->flow_count.counters) return -EINVAL; ib_spec->flow_count.size = @@ -3091,9 +3045,6 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec, void *kern_spec_mask; void *kern_spec_val; - if (kern_spec->reserved) - return -EINVAL; - kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr); kern_spec_val = (void *)kern_spec + @@ -3106,7 +3057,7 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec, kern_filter_sz, ib_spec); } -static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext, +static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile, struct ib_uverbs_flow_spec *kern_spec, union ib_flow_spec *ib_spec, struct ib_uflow_resources *uflow_res) @@ -3115,14 +3066,13 @@ static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext, return -EINVAL; if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG) - return kern_spec_to_ib_spec_action(ucontext, kern_spec, ib_spec, + return kern_spec_to_ib_spec_action(ufile, kern_spec, ib_spec, uflow_res); else return kern_spec_to_ib_spec_filter(kern_spec, ib_spec); } int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -3136,6 +3086,7 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, struct ib_wq_init_attr wq_init_attr = {}; size_t required_cmd_sz; size_t required_resp_len; + struct ib_device *ib_dev; required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge); required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn); @@ -3158,18 +3109,18 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EOPNOTSUPP; - obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, - file->ucontext); + obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, file, + &ib_dev); if (IS_ERR(obj)) return PTR_ERR(obj); - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); if (!pd) { err = -EINVAL; goto err_uobj; } - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (!cq) { err = -EINVAL; goto err_put_pd; @@ -3223,8 +3174,7 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, uobj_put_obj_read(pd); uobj_put_obj_read(cq); - uobj_alloc_commit(&obj->uevent.uobject); - return 0; + return uobj_alloc_commit(&obj->uevent.uobject, 0); err_copy: ib_destroy_wq(wq); @@ -3239,7 +3189,6 @@ err_uobj: } int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -3273,29 +3222,19 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, return -EOPNOTSUPP; resp.response_length = required_resp_len; - uobj = uobj_get_write(UVERBS_OBJECT_WQ, cmd.wq_handle, - file->ucontext); + uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); obj = container_of(uobj, struct ib_uwq_object, uevent.uobject); - /* - * Make sure we don't free the memory in remove_commit as we still - * needs the uobject memory to create the response. - */ - uverbs_uobject_get(uobj); - - ret = uobj_remove_commit(uobj); resp.events_reported = obj->uevent.events_reported; - uverbs_uobject_put(uobj); - if (ret) - return ret; + + uobj_put_destroy(uobj); return ib_copy_to_udata(ucore, &resp, resp.response_length); } int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -3324,7 +3263,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS)) return -EINVAL; - wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file->ucontext); + wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file); if (!wq) return -EINVAL; @@ -3345,7 +3284,6 @@ out: } int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -3363,6 +3301,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, u32 expected_in_size; size_t required_cmd_sz_header; size_t required_resp_len; + struct ib_device *ib_dev; required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size); required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num); @@ -3418,8 +3357,8 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, for (num_read_wqs = 0; num_read_wqs < num_wq_handles; num_read_wqs++) { - wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, wqs_handles[num_read_wqs], - file->ucontext); + wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, + wqs_handles[num_read_wqs], file); if (!wq) { err = -EINVAL; goto put_wqs; @@ -3428,7 +3367,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, wqs[num_read_wqs] = wq; } - uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file, &ib_dev); if (IS_ERR(uobj)) { err = PTR_ERR(uobj); goto put_wqs; @@ -3472,8 +3411,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, for (j = 0; j < num_read_wqs; j++) uobj_put_obj_read(wqs[j]); - uobj_alloc_commit(uobj); - return 0; + return uobj_alloc_commit(uobj, 0); err_copy: ib_destroy_rwq_ind_table(rwq_ind_tbl); @@ -3489,12 +3427,10 @@ err_free: } int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {}; - struct ib_uobject *uobj; int ret; size_t required_cmd_sz; @@ -3515,16 +3451,11 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EOPNOTSUPP; - uobj = uobj_get_write(UVERBS_OBJECT_RWQ_IND_TBL, cmd.ind_tbl_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - return uobj_remove_commit(uobj); + return uobj_perform_destroy(UVERBS_OBJECT_RWQ_IND_TBL, + cmd.ind_tbl_handle, file, 0); } int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -3541,6 +3472,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, int err = 0; void *ib_spec; int i; + struct ib_device *ib_dev; if (ucore->inlen < sizeof(cmd)) return -EINVAL; @@ -3596,13 +3528,13 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, kern_flow_attr = &cmd.flow_attr; } - uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file->ucontext); + uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file, &ib_dev); if (IS_ERR(uobj)) { err = PTR_ERR(uobj); goto err_free_attr; } - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); if (!qp) { err = -EINVAL; goto err_uobj; @@ -3613,6 +3545,11 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, goto err_put; } + if (!qp->device->create_flow) { + err = -EOPNOTSUPP; + goto err_put; + } + flow_attr = kzalloc(struct_size(flow_attr, flows, cmd.flow_attr.num_of_specs), GFP_KERNEL); if (!flow_attr) { @@ -3639,7 +3576,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, cmd.flow_attr.size >= kern_spec->size; i++) { err = kern_spec_to_ib_spec( - file->ucontext, (struct ib_uverbs_flow_spec *)kern_spec, + file, (struct ib_uverbs_flow_spec *)kern_spec, ib_spec, uflow_res); if (err) goto err_free; @@ -3666,6 +3603,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, } atomic_inc(&qp->usecnt); flow_id->qp = qp; + flow_id->device = qp->device; flow_id->uobject = uobj; uobj->object = flow_id; uflow = container_of(uobj, typeof(*uflow), uobject); @@ -3680,13 +3618,13 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, goto err_copy; uobj_put_obj_read(qp); - uobj_alloc_commit(uobj); kfree(flow_attr); if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); - return 0; + return uobj_alloc_commit(uobj, 0); err_copy: - ib_destroy_flow(flow_id); + if (!qp->device->destroy_flow(flow_id)) + atomic_dec(&qp->usecnt); err_free: ib_uverbs_flow_resources_free(uflow_res); err_free_flow_attr: @@ -3702,12 +3640,10 @@ err_free_attr: } int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { struct ib_uverbs_destroy_flow cmd; - struct ib_uobject *uobj; int ret; if (ucore->inlen < sizeof(cmd)) @@ -3720,17 +3656,11 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EINVAL; - uobj = uobj_get_write(UVERBS_OBJECT_FLOW, cmd.flow_handle, - file->ucontext); - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - ret = uobj_remove_commit(uobj); - return ret; + return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, file, + 0); } static int __uverbs_create_xsrq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_uverbs_create_xsrq *cmd, struct ib_udata *udata) { @@ -3741,9 +3671,10 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, struct ib_uobject *uninitialized_var(xrcd_uobj); struct ib_srq_init_attr attr; int ret; + struct ib_device *ib_dev; - obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, - file->ucontext); + obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, file, + &ib_dev); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -3752,7 +3683,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, if (cmd->srq_type == IB_SRQT_XRC) { xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle, - file->ucontext); + file); if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; goto err; @@ -3769,15 +3700,15 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, } if (ib_srq_has_cq(cmd->srq_type)) { - attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->cq_handle, - file->ucontext); + attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, + cmd->cq_handle, file); if (!attr.ext.cq) { ret = -EINVAL; goto err_put_xrcd; } } - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file); if (!pd) { ret = -EINVAL; goto err_put_cq; @@ -3842,9 +3773,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, uobj_put_obj_read(attr.ext.cq); uobj_put_obj_read(pd); - uobj_alloc_commit(&obj->uevent.uobject); - - return 0; + return uobj_alloc_commit(&obj->uevent.uobject, 0); err_copy: ib_destroy_srq(srq); @@ -3868,7 +3797,6 @@ err: } ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -3898,7 +3826,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata); + ret = __uverbs_create_xsrq(file, &xcmd, &udata); if (ret) return ret; @@ -3906,7 +3834,6 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_create_xsrq cmd; @@ -3925,7 +3852,7 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata); + ret = __uverbs_create_xsrq(file, &cmd, &udata); if (ret) return ret; @@ -3933,7 +3860,6 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -3949,7 +3875,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, out_len); - srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file); if (!srq) return -EINVAL; @@ -3964,7 +3890,6 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -3980,7 +3905,7 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file); if (!srq) return -EINVAL; @@ -4004,7 +3929,6 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -4012,32 +3936,20 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, struct ib_uverbs_destroy_srq_resp resp; struct ib_uobject *uobj; struct ib_uevent_object *obj; - int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = uobj_get_write(UVERBS_OBJECT_SRQ, cmd.srq_handle, - file->ucontext); + uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, file); if (IS_ERR(uobj)) return PTR_ERR(uobj); obj = container_of(uobj, struct ib_uevent_object, uobject); - /* - * Make sure we don't free the memory in remove_commit as we still - * needs the uobject memory to create the response. - */ - uverbs_uobject_get(uobj); - memset(&resp, 0, sizeof(resp)); - - ret = uobj_remove_commit(uobj); - if (ret) { - uverbs_uobject_put(uobj); - return ret; - } resp.events_reported = obj->events_reported; - uverbs_uobject_put(uobj); + + uobj_put_destroy(uobj); + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) return -EFAULT; @@ -4045,15 +3957,21 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, } int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { struct ib_uverbs_ex_query_device_resp resp = { {0} }; struct ib_uverbs_ex_query_device cmd; struct ib_device_attr attr = {0}; + struct ib_ucontext *ucontext; + struct ib_device *ib_dev; int err; + ucontext = ib_uverbs_get_ucontext(file); + if (IS_ERR(ucontext)) + return PTR_ERR(ucontext); + ib_dev = ucontext->device; + if (!ib_dev->query_device) return -EOPNOTSUPP; @@ -4079,7 +3997,7 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, if (err) return err; - copy_query_dev_fields(file, ib_dev, &resp.base, &attr); + copy_query_dev_fields(ucontext, &resp.base, &attr); if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps)) goto end; @@ -4166,7 +4084,6 @@ end: } int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -4196,7 +4113,7 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file, if (cmd.attr_mask > IB_CQ_MODERATE) return -EOPNOTSUPP; - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); if (!cq) return -EINVAL; diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 8d32c4ae368c..1a6b229e3db3 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -35,6 +35,103 @@ #include "rdma_core.h" #include "uverbs.h" +struct bundle_alloc_head { + struct bundle_alloc_head *next; + u8 data[]; +}; + +struct bundle_priv { + /* Must be first */ + struct bundle_alloc_head alloc_head; + struct bundle_alloc_head *allocated_mem; + size_t internal_avail; + size_t internal_used; + + struct radix_tree_root *radix; + const struct uverbs_api_ioctl_method *method_elm; + void __rcu **radix_slots; + unsigned long radix_slots_len; + u32 method_key; + + struct ib_uverbs_attr __user *user_attrs; + struct ib_uverbs_attr *uattrs; + + DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN); + + /* + * Must be last. bundle ends in a flex array which overlaps + * internal_buffer. + */ + struct uverbs_attr_bundle bundle; + u64 internal_buffer[32]; +}; + +/* + * Each method has an absolute minimum amount of memory it needs to allocate, + * precompute that amount and determine if the onstack memory can be used or + * if allocation is need. + */ +void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm, + unsigned int num_attrs) +{ + struct bundle_priv *pbundle; + size_t bundle_size = + offsetof(struct bundle_priv, internal_buffer) + + sizeof(*pbundle->bundle.attrs) * method_elm->key_bitmap_len + + sizeof(*pbundle->uattrs) * num_attrs; + + method_elm->use_stack = bundle_size <= sizeof(*pbundle); + method_elm->bundle_size = + ALIGN(bundle_size + 256, sizeof(*pbundle->internal_buffer)); + + /* Do not want order-2 allocations for this. */ + WARN_ON_ONCE(method_elm->bundle_size > PAGE_SIZE); +} + +/** + * uverbs_alloc() - Quickly allocate memory for use with a bundle + * @bundle: The bundle + * @size: Number of bytes to allocate + * @flags: Allocator flags + * + * The bundle allocator is intended for allocations that are connected with + * processing the system call related to the bundle. The allocated memory is + * always freed once the system call completes, and cannot be freed any other + * way. + * + * This tries to use a small pool of pre-allocated memory for performance. + */ +__malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size, + gfp_t flags) +{ + struct bundle_priv *pbundle = + container_of(bundle, struct bundle_priv, bundle); + size_t new_used; + void *res; + + if (check_add_overflow(size, pbundle->internal_used, &new_used)) + return ERR_PTR(-EOVERFLOW); + + if (new_used > pbundle->internal_avail) { + struct bundle_alloc_head *buf; + + buf = kvmalloc(struct_size(buf, data, size), flags); + if (!buf) + return ERR_PTR(-ENOMEM); + buf->next = pbundle->allocated_mem; + pbundle->allocated_mem = buf; + return buf->data; + } + + res = (void *)pbundle->internal_buffer + pbundle->internal_used; + pbundle->internal_used = + ALIGN(new_used, sizeof(*pbundle->internal_buffer)); + if (flags & __GFP_ZERO) + memset(res, 0, size); + return res; +} +EXPORT_SYMBOL(_uverbs_alloc); + static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr, u16 len) { @@ -46,45 +143,24 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr, 0, uattr->len - len); } -static int uverbs_process_attr(struct ib_device *ibdev, - struct ib_ucontext *ucontext, - const struct ib_uverbs_attr *uattr, - u16 attr_id, - const struct uverbs_attr_spec_hash *attr_spec_bucket, - struct uverbs_attr_bundle_hash *attr_bundle_h, - struct ib_uverbs_attr __user *uattr_ptr) +static int uverbs_process_attr(struct bundle_priv *pbundle, + const struct uverbs_api_attr *attr_uapi, + struct ib_uverbs_attr *uattr, u32 attr_bkey) { - const struct uverbs_attr_spec *spec; - const struct uverbs_attr_spec *val_spec; - struct uverbs_attr *e; - const struct uverbs_object_spec *object; + const struct uverbs_attr_spec *spec = &attr_uapi->spec; + struct uverbs_attr *e = &pbundle->bundle.attrs[attr_bkey]; + const struct uverbs_attr_spec *val_spec = spec; struct uverbs_obj_attr *o_attr; - struct uverbs_attr *elements = attr_bundle_h->attrs; - - if (attr_id >= attr_spec_bucket->num_attrs) { - if (uattr->flags & UVERBS_ATTR_F_MANDATORY) - return -EINVAL; - else - return 0; - } - - if (test_bit(attr_id, attr_bundle_h->valid_bitmap)) - return -EINVAL; - - spec = &attr_spec_bucket->attrs[attr_id]; - val_spec = spec; - e = &elements[attr_id]; - e->uattr = uattr_ptr; switch (spec->type) { case UVERBS_ATTR_TYPE_ENUM_IN: - if (uattr->attr_data.enum_data.elem_id >= spec->enum_def.num_elems) + if (uattr->attr_data.enum_data.elem_id >= spec->u.enum_def.num_elems) return -EOPNOTSUPP; if (uattr->attr_data.enum_data.reserved) return -EINVAL; - val_spec = &spec->enum_def.ids[uattr->attr_data.enum_data.elem_id]; + val_spec = &spec->u2.enum_def.ids[uattr->attr_data.enum_data.elem_id]; /* Currently we only support PTR_IN based enums */ if (val_spec->type != UVERBS_ATTR_TYPE_PTR_IN) @@ -98,64 +174,75 @@ static int uverbs_process_attr(struct ib_device *ibdev, * longer struct will fail here if used with an old kernel and * non-zero content, making ABI compat/discovery simpler. */ - if (uattr->len > val_spec->ptr.len && - val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO && - !uverbs_is_attr_cleared(uattr, val_spec->ptr.len)) + if (uattr->len > val_spec->u.ptr.len && + val_spec->zero_trailing && + !uverbs_is_attr_cleared(uattr, val_spec->u.ptr.len)) return -EOPNOTSUPP; /* fall through */ case UVERBS_ATTR_TYPE_PTR_OUT: - if (uattr->len < val_spec->ptr.min_len || - (!(val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO) && - uattr->len > val_spec->ptr.len)) + if (uattr->len < val_spec->u.ptr.min_len || + (!val_spec->zero_trailing && + uattr->len > val_spec->u.ptr.len)) return -EINVAL; if (spec->type != UVERBS_ATTR_TYPE_ENUM_IN && uattr->attr_data.reserved) return -EINVAL; - e->ptr_attr.data = uattr->data; + e->ptr_attr.uattr_idx = uattr - pbundle->uattrs; e->ptr_attr.len = uattr->len; - e->ptr_attr.flags = uattr->flags; + + if (val_spec->alloc_and_copy && !uverbs_attr_ptr_is_inline(e)) { + void *p; + + p = uverbs_alloc(&pbundle->bundle, uattr->len); + if (IS_ERR(p)) + return PTR_ERR(p); + + e->ptr_attr.ptr = p; + + if (copy_from_user(p, u64_to_user_ptr(uattr->data), + uattr->len)) + return -EFAULT; + } else { + e->ptr_attr.data = uattr->data; + } break; case UVERBS_ATTR_TYPE_IDR: - if (uattr->data >> 32) - return -EINVAL; - /* fall through */ case UVERBS_ATTR_TYPE_FD: if (uattr->attr_data.reserved) return -EINVAL; - if (uattr->len != 0 || !ucontext || uattr->data > INT_MAX) + if (uattr->len != 0) return -EINVAL; o_attr = &e->obj_attr; - object = uverbs_get_object(ibdev, spec->obj.obj_type); - if (!object) - return -EINVAL; - o_attr->type = object->type_attrs; - - o_attr->id = (int)uattr->data; - o_attr->uobject = uverbs_get_uobject_from_context( - o_attr->type, - ucontext, - spec->obj.access, - o_attr->id); + o_attr->attr_elm = attr_uapi; + /* + * The type of uattr->data is u64 for UVERBS_ATTR_TYPE_IDR and + * s64 for UVERBS_ATTR_TYPE_FD. We can cast the u64 to s64 + * here without caring about truncation as we know that the + * IDR implementation today rejects negative IDs + */ + o_attr->uobject = uverbs_get_uobject_from_file( + spec->u.obj.obj_type, + pbundle->bundle.ufile, + spec->u.obj.access, + uattr->data_s64); if (IS_ERR(o_attr->uobject)) return PTR_ERR(o_attr->uobject); + __set_bit(attr_bkey, pbundle->uobj_finalize); - if (spec->obj.access == UVERBS_ACCESS_NEW) { - u64 id = o_attr->uobject->id; + if (spec->u.obj.access == UVERBS_ACCESS_NEW) { + unsigned int uattr_idx = uattr - pbundle->uattrs; + s64 id = o_attr->uobject->id; /* Copy the allocated id to the user-space */ - if (put_user(id, &e->uattr->data)) { - uverbs_finalize_object(o_attr->uobject, - UVERBS_ACCESS_NEW, - false); + if (put_user(id, &pbundle->user_attrs[uattr_idx].data)) return -EFAULT; - } } break; @@ -163,220 +250,225 @@ static int uverbs_process_attr(struct ib_device *ibdev, return -EOPNOTSUPP; } - set_bit(attr_id, attr_bundle_h->valid_bitmap); return 0; } -static int uverbs_uattrs_process(struct ib_device *ibdev, - struct ib_ucontext *ucontext, - const struct ib_uverbs_attr *uattrs, - size_t num_uattrs, - const struct uverbs_method_spec *method, - struct uverbs_attr_bundle *attr_bundle, - struct ib_uverbs_attr __user *uattr_ptr) +/* + * We search the radix tree with the method prefix and now we want to fast + * search the suffix bits to get a particular attribute pointer. It is not + * totally clear to me if this breaks the radix tree encasulation or not, but + * it uses the iter data to determine if the method iter points at the same + * chunk that will store the attribute, if so it just derefs it directly. By + * construction in most kernel configs the method and attrs will all fit in a + * single radix chunk, so in most cases this will have no search. Other cases + * this falls back to a full search. + */ +static void __rcu **uapi_get_attr_for_method(struct bundle_priv *pbundle, + u32 attr_key) { - size_t i; - int ret = 0; - int num_given_buckets = 0; - - for (i = 0; i < num_uattrs; i++) { - const struct ib_uverbs_attr *uattr = &uattrs[i]; - u16 attr_id = uattr->attr_id; - struct uverbs_attr_spec_hash *attr_spec_bucket; - - ret = uverbs_ns_idx(&attr_id, method->num_buckets); - if (ret < 0) { - if (uattr->flags & UVERBS_ATTR_F_MANDATORY) { - uverbs_finalize_objects(attr_bundle, - method->attr_buckets, - num_given_buckets, - false); - return ret; - } - continue; - } + void __rcu **slot; - /* - * ret is the found ns, so increase num_given_buckets if - * necessary. - */ - if (ret >= num_given_buckets) - num_given_buckets = ret + 1; - - attr_spec_bucket = method->attr_buckets[ret]; - ret = uverbs_process_attr(ibdev, ucontext, uattr, attr_id, - attr_spec_bucket, &attr_bundle->hash[ret], - uattr_ptr++); - if (ret) { - uverbs_finalize_objects(attr_bundle, - method->attr_buckets, - num_given_buckets, - false); - return ret; - } + if (likely(attr_key < pbundle->radix_slots_len)) { + void *entry; + + slot = pbundle->radix_slots + attr_key; + entry = rcu_dereference_raw(*slot); + if (likely(!radix_tree_is_internal_node(entry) && entry)) + return slot; } - return num_given_buckets; + return radix_tree_lookup_slot(pbundle->radix, + pbundle->method_key | attr_key); } -static int uverbs_validate_kernel_mandatory(const struct uverbs_method_spec *method_spec, - struct uverbs_attr_bundle *attr_bundle) +static int uverbs_set_attr(struct bundle_priv *pbundle, + struct ib_uverbs_attr *uattr) { - unsigned int i; - - for (i = 0; i < attr_bundle->num_buckets; i++) { - struct uverbs_attr_spec_hash *attr_spec_bucket = - method_spec->attr_buckets[i]; + u32 attr_key = uapi_key_attr(uattr->attr_id); + u32 attr_bkey = uapi_bkey_attr(attr_key); + const struct uverbs_api_attr *attr; + void __rcu **slot; + int ret; - if (!bitmap_subset(attr_spec_bucket->mandatory_attrs_bitmask, - attr_bundle->hash[i].valid_bitmap, - attr_spec_bucket->num_attrs)) - return -EINVAL; + slot = uapi_get_attr_for_method(pbundle, attr_key); + if (!slot) { + /* + * Kernel does not support the attribute but user-space says it + * is mandatory + */ + if (uattr->flags & UVERBS_ATTR_F_MANDATORY) + return -EPROTONOSUPPORT; + return 0; } + attr = srcu_dereference( + *slot, &pbundle->bundle.ufile->device->disassociate_srcu); - for (; i < method_spec->num_buckets; i++) { - struct uverbs_attr_spec_hash *attr_spec_bucket = - method_spec->attr_buckets[i]; + /* Reject duplicate attributes from user-space */ + if (test_bit(attr_bkey, pbundle->bundle.attr_present)) + return -EINVAL; - if (!bitmap_empty(attr_spec_bucket->mandatory_attrs_bitmask, - attr_spec_bucket->num_attrs)) - return -EINVAL; - } + ret = uverbs_process_attr(pbundle, attr, uattr, attr_bkey); + if (ret) + return ret; + + __set_bit(attr_bkey, pbundle->bundle.attr_present); return 0; } -static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr, - const struct ib_uverbs_attr *uattrs, - size_t num_uattrs, - struct ib_device *ibdev, - struct ib_uverbs_file *ufile, - const struct uverbs_method_spec *method_spec, - struct uverbs_attr_bundle *attr_bundle) +static int ib_uverbs_run_method(struct bundle_priv *pbundle, + unsigned int num_attrs) { + int (*handler)(struct ib_uverbs_file *ufile, + struct uverbs_attr_bundle *ctx); + size_t uattrs_size = array_size(sizeof(*pbundle->uattrs), num_attrs); + unsigned int destroy_bkey = pbundle->method_elm->destroy_bkey; + unsigned int i; int ret; - int finalize_ret; - int num_given_buckets; - num_given_buckets = uverbs_uattrs_process(ibdev, ufile->ucontext, uattrs, - num_uattrs, method_spec, - attr_bundle, uattr_ptr); - if (num_given_buckets <= 0) + /* See uverbs_disassociate_api() */ + handler = srcu_dereference( + pbundle->method_elm->handler, + &pbundle->bundle.ufile->device->disassociate_srcu); + if (!handler) + return -EIO; + + pbundle->uattrs = uverbs_alloc(&pbundle->bundle, uattrs_size); + if (IS_ERR(pbundle->uattrs)) + return PTR_ERR(pbundle->uattrs); + if (copy_from_user(pbundle->uattrs, pbundle->user_attrs, uattrs_size)) + return -EFAULT; + + for (i = 0; i != num_attrs; i++) { + ret = uverbs_set_attr(pbundle, &pbundle->uattrs[i]); + if (unlikely(ret)) + return ret; + } + + /* User space did not provide all the mandatory attributes */ + if (unlikely(!bitmap_subset(pbundle->method_elm->attr_mandatory, + pbundle->bundle.attr_present, + pbundle->method_elm->key_bitmap_len))) return -EINVAL; - attr_bundle->num_buckets = num_given_buckets; - ret = uverbs_validate_kernel_mandatory(method_spec, attr_bundle); - if (ret) - goto cleanup; + if (destroy_bkey != UVERBS_API_ATTR_BKEY_LEN) { + struct uverbs_obj_attr *destroy_attr = + &pbundle->bundle.attrs[destroy_bkey].obj_attr; - ret = method_spec->handler(ibdev, ufile, attr_bundle); -cleanup: - finalize_ret = uverbs_finalize_objects(attr_bundle, - method_spec->attr_buckets, - attr_bundle->num_buckets, - !ret); + ret = uobj_destroy(destroy_attr->uobject); + if (ret) + return ret; + __clear_bit(destroy_bkey, pbundle->uobj_finalize); - return ret ? ret : finalize_ret; -} + ret = handler(pbundle->bundle.ufile, &pbundle->bundle); + uobj_put_destroy(destroy_attr->uobject); + } else { + ret = handler(pbundle->bundle.ufile, &pbundle->bundle); + } -#define UVERBS_OPTIMIZE_USING_STACK_SZ 256 -static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct ib_uverbs_ioctl_hdr *hdr, - void __user *buf) -{ - const struct uverbs_object_spec *object_spec; - const struct uverbs_method_spec *method_spec; - long err = 0; - unsigned int i; - struct { - struct ib_uverbs_attr *uattrs; - struct uverbs_attr_bundle *uverbs_attr_bundle; - } *ctx = NULL; - struct uverbs_attr *curr_attr; - unsigned long *curr_bitmap; - size_t ctx_size; - uintptr_t data[UVERBS_OPTIMIZE_USING_STACK_SZ / sizeof(uintptr_t)]; - - if (hdr->driver_id != ib_dev->driver_id) + /* + * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can + * not invoke the method because the request is not supported. No + * other cases should return this code. + */ + if (WARN_ON_ONCE(ret == -EPROTONOSUPPORT)) return -EINVAL; - object_spec = uverbs_get_object(ib_dev, hdr->object_id); - if (!object_spec) - return -EPROTONOSUPPORT; + return ret; +} - method_spec = uverbs_get_method(object_spec, hdr->method_id); - if (!method_spec) - return -EPROTONOSUPPORT; +static int bundle_destroy(struct bundle_priv *pbundle, bool commit) +{ + unsigned int key_bitmap_len = pbundle->method_elm->key_bitmap_len; + struct bundle_alloc_head *memblock; + unsigned int i; + int ret = 0; - if ((method_spec->flags & UVERBS_ACTION_FLAG_CREATE_ROOT) ^ !file->ucontext) - return -EINVAL; + i = -1; + while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len, + i + 1)) < key_bitmap_len) { + struct uverbs_attr *attr = &pbundle->bundle.attrs[i]; + int current_ret; + + current_ret = uverbs_finalize_object( + attr->obj_attr.uobject, + attr->obj_attr.attr_elm->spec.u.obj.access, commit); + if (!ret) + ret = current_ret; + } - ctx_size = sizeof(*ctx) + - sizeof(struct uverbs_attr_bundle) + - sizeof(struct uverbs_attr_bundle_hash) * method_spec->num_buckets + - sizeof(*ctx->uattrs) * hdr->num_attrs + - sizeof(*ctx->uverbs_attr_bundle->hash[0].attrs) * - method_spec->num_child_attrs + - sizeof(*ctx->uverbs_attr_bundle->hash[0].valid_bitmap) * - (method_spec->num_child_attrs / BITS_PER_LONG + - method_spec->num_buckets); - - if (ctx_size <= UVERBS_OPTIMIZE_USING_STACK_SZ) - ctx = (void *)data; - if (!ctx) - ctx = kmalloc(ctx_size, GFP_KERNEL); - if (!ctx) - return -ENOMEM; - - ctx->uverbs_attr_bundle = (void *)ctx + sizeof(*ctx); - ctx->uattrs = (void *)(ctx->uverbs_attr_bundle + 1) + - (sizeof(ctx->uverbs_attr_bundle->hash[0]) * - method_spec->num_buckets); - curr_attr = (void *)(ctx->uattrs + hdr->num_attrs); - curr_bitmap = (void *)(curr_attr + method_spec->num_child_attrs); + for (memblock = pbundle->allocated_mem; memblock;) { + struct bundle_alloc_head *tmp = memblock; - /* - * We just fill the pointers and num_attrs here. The data itself will be - * filled at a later stage (uverbs_process_attr) - */ - for (i = 0; i < method_spec->num_buckets; i++) { - unsigned int curr_num_attrs = method_spec->attr_buckets[i]->num_attrs; - - ctx->uverbs_attr_bundle->hash[i].attrs = curr_attr; - curr_attr += curr_num_attrs; - ctx->uverbs_attr_bundle->hash[i].num_attrs = curr_num_attrs; - ctx->uverbs_attr_bundle->hash[i].valid_bitmap = curr_bitmap; - bitmap_zero(curr_bitmap, curr_num_attrs); - curr_bitmap += BITS_TO_LONGS(curr_num_attrs); + memblock = memblock->next; + kvfree(tmp); } - err = copy_from_user(ctx->uattrs, buf, - sizeof(*ctx->uattrs) * hdr->num_attrs); - if (err) { - err = -EFAULT; - goto out; - } + return ret; +} - err = uverbs_handle_method(buf, ctx->uattrs, hdr->num_attrs, ib_dev, - file, method_spec, ctx->uverbs_attr_bundle); +static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile, + struct ib_uverbs_ioctl_hdr *hdr, + struct ib_uverbs_attr __user *user_attrs) +{ + const struct uverbs_api_ioctl_method *method_elm; + struct uverbs_api *uapi = ufile->device->uapi; + struct radix_tree_iter attrs_iter; + struct bundle_priv *pbundle; + struct bundle_priv onstack; + void __rcu **slot; + int destroy_ret; + int ret; - /* - * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can - * not invoke the method because the request is not supported. No - * other cases should return this code. - */ - if (unlikely(err == -EPROTONOSUPPORT)) { - WARN_ON_ONCE(err == -EPROTONOSUPPORT); - err = -EINVAL; + if (unlikely(hdr->driver_id != uapi->driver_id)) + return -EINVAL; + + slot = radix_tree_iter_lookup( + &uapi->radix, &attrs_iter, + uapi_key_obj(hdr->object_id) | + uapi_key_ioctl_method(hdr->method_id)); + if (unlikely(!slot)) + return -EPROTONOSUPPORT; + method_elm = srcu_dereference(*slot, &ufile->device->disassociate_srcu); + + if (!method_elm->use_stack) { + pbundle = kmalloc(method_elm->bundle_size, GFP_KERNEL); + if (!pbundle) + return -ENOMEM; + pbundle->internal_avail = + method_elm->bundle_size - + offsetof(struct bundle_priv, internal_buffer); + pbundle->alloc_head.next = NULL; + pbundle->allocated_mem = &pbundle->alloc_head; + } else { + pbundle = &onstack; + pbundle->internal_avail = sizeof(pbundle->internal_buffer); + pbundle->allocated_mem = NULL; } -out: - if (ctx != (void *)data) - kfree(ctx); - return err; -} -#define IB_UVERBS_MAX_CMD_SZ 4096 + /* Space for the pbundle->bundle.attrs flex array */ + pbundle->method_elm = method_elm; + pbundle->method_key = attrs_iter.index; + pbundle->bundle.ufile = ufile; + pbundle->radix = &uapi->radix; + pbundle->radix_slots = slot; + pbundle->radix_slots_len = radix_tree_chunk_size(&attrs_iter); + pbundle->user_attrs = user_attrs; + + pbundle->internal_used = ALIGN(pbundle->method_elm->key_bitmap_len * + sizeof(*pbundle->bundle.attrs), + sizeof(*pbundle->internal_buffer)); + memset(pbundle->bundle.attr_present, 0, + sizeof(pbundle->bundle.attr_present)); + memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize)); + + ret = ib_uverbs_run_method(pbundle, hdr->num_attrs); + destroy_ret = bundle_destroy(pbundle, ret == 0); + if (unlikely(destroy_ret && !ret)) + return destroy_ret; + + return ret; +} long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -384,39 +476,138 @@ long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct ib_uverbs_ioctl_hdr __user *user_hdr = (struct ib_uverbs_ioctl_hdr __user *)arg; struct ib_uverbs_ioctl_hdr hdr; - struct ib_device *ib_dev; int srcu_key; - long err; + int err; + + if (unlikely(cmd != RDMA_VERBS_IOCTL)) + return -ENOIOCTLCMD; + + err = copy_from_user(&hdr, user_hdr, sizeof(hdr)); + if (err) + return -EFAULT; + + if (hdr.length > PAGE_SIZE || + hdr.length != struct_size(&hdr, attrs, hdr.num_attrs)) + return -EINVAL; + + if (hdr.reserved1 || hdr.reserved2) + return -EPROTONOSUPPORT; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); - ib_dev = srcu_dereference(file->device->ib_dev, - &file->device->disassociate_srcu); - if (!ib_dev) { - err = -EIO; - goto out; + err = ib_uverbs_cmd_verbs(file, &hdr, user_hdr->attrs); + srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); + return err; +} + +int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, u64 allowed_bits) +{ + const struct uverbs_attr *attr; + u64 flags; + + attr = uverbs_attr_get(attrs_bundle, idx); + /* Missing attribute means 0 flags */ + if (IS_ERR(attr)) { + *to = 0; + return 0; } - if (cmd == RDMA_VERBS_IOCTL) { - err = copy_from_user(&hdr, user_hdr, sizeof(hdr)); + /* + * New userspace code should use 8 bytes to pass flags, but we + * transparently support old userspaces that were using 4 bytes as + * well. + */ + if (attr->ptr_attr.len == 8) + flags = attr->ptr_attr.data; + else if (attr->ptr_attr.len == 4) + flags = *(u32 *)&attr->ptr_attr.data; + else + return -EINVAL; - if (err || hdr.length > IB_UVERBS_MAX_CMD_SZ || - hdr.length != sizeof(hdr) + hdr.num_attrs * sizeof(struct ib_uverbs_attr)) { - err = -EINVAL; - goto out; - } + if (flags & ~allowed_bits) + return -EINVAL; - if (hdr.reserved1 || hdr.reserved2) { - err = -EPROTONOSUPPORT; - goto out; - } + *to = flags; + return 0; +} +EXPORT_SYMBOL(uverbs_get_flags64); - err = ib_uverbs_cmd_verbs(ib_dev, file, &hdr, - (__user void *)arg + sizeof(hdr)); +int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, u64 allowed_bits) +{ + u64 flags; + int ret; + + ret = uverbs_get_flags64(&flags, attrs_bundle, idx, allowed_bits); + if (ret) + return ret; + + if (flags > U32_MAX) + return -EINVAL; + *to = flags; + + return 0; +} +EXPORT_SYMBOL(uverbs_get_flags32); + +/* + * This is for ease of conversion. The purpose is to convert all drivers to + * use uverbs_attr_bundle instead of ib_udata. Assume attr == 0 is input and + * attr == 1 is output. + */ +void create_udata(struct uverbs_attr_bundle *bundle, struct ib_udata *udata) +{ + struct bundle_priv *pbundle = + container_of(bundle, struct bundle_priv, bundle); + const struct uverbs_attr *uhw_in = + uverbs_attr_get(bundle, UVERBS_ATTR_UHW_IN); + const struct uverbs_attr *uhw_out = + uverbs_attr_get(bundle, UVERBS_ATTR_UHW_OUT); + + if (!IS_ERR(uhw_in)) { + udata->inlen = uhw_in->ptr_attr.len; + if (uverbs_attr_ptr_is_inline(uhw_in)) + udata->inbuf = + &pbundle->user_attrs[uhw_in->ptr_attr.uattr_idx] + .data; + else + udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data); } else { - err = -ENOIOCTLCMD; + udata->inbuf = NULL; + udata->inlen = 0; } -out: - srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); - return err; + if (!IS_ERR(uhw_out)) { + udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data); + udata->outlen = uhw_out->ptr_attr.len; + } else { + udata->outbuf = NULL; + udata->outlen = 0; + } +} + +int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx, + const void *from, size_t size) +{ + struct bundle_priv *pbundle = + container_of(bundle, struct bundle_priv, bundle); + const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx); + u16 flags; + size_t min_size; + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + min_size = min_t(size_t, attr->ptr_attr.len, size); + if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size)) + return -EFAULT; + + flags = pbundle->uattrs[attr->ptr_attr.uattr_idx].flags | + UVERBS_ATTR_F_VALID_OUTPUT; + if (put_user(flags, + &pbundle->user_attrs[attr->ptr_attr.uattr_idx].flags)) + return -EFAULT; + + return 0; } +EXPORT_SYMBOL(uverbs_copy_to); diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c deleted file mode 100644 index 6ceb672c4d46..000000000000 --- a/drivers/infiniband/core/uverbs_ioctl_merge.c +++ /dev/null @@ -1,664 +0,0 @@ -/* - * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <rdma/uverbs_ioctl.h> -#include <rdma/rdma_user_ioctl.h> -#include <linux/bitops.h> -#include "uverbs.h" - -#define UVERBS_NUM_NS (UVERBS_ID_NS_MASK >> UVERBS_ID_NS_SHIFT) -#define GET_NS_ID(idx) (((idx) & UVERBS_ID_NS_MASK) >> UVERBS_ID_NS_SHIFT) -#define GET_ID(idx) ((idx) & ~UVERBS_ID_NS_MASK) - -#define _for_each_element(elem, tmpi, tmpj, hashes, num_buckets_offset, \ - buckets_offset) \ - for (tmpj = 0, \ - elem = (*(const void ***)((hashes)[tmpi] + \ - (buckets_offset)))[0]; \ - tmpj < *(size_t *)((hashes)[tmpi] + (num_buckets_offset)); \ - tmpj++) \ - if ((elem = ((*(const void ***)(hashes[tmpi] + \ - (buckets_offset)))[tmpj]))) - -/* - * Iterate all elements of a few @hashes. The number of given hashes is - * indicated by @num_hashes. The offset of the number of buckets in the hash is - * represented by @num_buckets_offset, while the offset of the buckets array in - * the hash structure is represented by @buckets_offset. tmpi and tmpj are two - * short (or int) based indices that are given by the user. tmpi iterates over - * the different hashes. @elem points the current element in the hashes[tmpi] - * bucket we are looping on. To be honest, @hashes representation isn't exactly - * a hash, but more a collection of elements. These elements' ids are treated - * in a hash like manner, where the first upper bits are the bucket number. - * These elements are later mapped into a perfect-hash. - */ -#define for_each_element(elem, tmpi, tmpj, hashes, num_hashes, \ - num_buckets_offset, buckets_offset) \ - for (tmpi = 0; tmpi < (num_hashes); tmpi++) \ - _for_each_element(elem, tmpi, tmpj, hashes, num_buckets_offset,\ - buckets_offset) - -#define get_elements_iterators_entry_above(iters, num_elements, elements, \ - num_objects_fld, objects_fld, bucket,\ - min_id) \ - get_elements_above_id((const void **)iters, num_elements, \ - (const void **)(elements), \ - offsetof(typeof(**elements), \ - num_objects_fld), \ - offsetof(typeof(**elements), objects_fld),\ - offsetof(typeof(***(*elements)->objects_fld), id),\ - bucket, min_id) - -#define get_objects_above_id(iters, num_trees, trees, bucket, min_id) \ - get_elements_iterators_entry_above(iters, num_trees, trees, \ - num_objects, objects, bucket, min_id) - -#define get_methods_above_id(method_iters, num_iters, iters, bucket, min_id)\ - get_elements_iterators_entry_above(method_iters, num_iters, iters, \ - num_methods, methods, bucket, min_id) - -#define get_attrs_above_id(attrs_iters, num_iters, iters, bucket, min_id)\ - get_elements_iterators_entry_above(attrs_iters, num_iters, iters, \ - num_attrs, attrs, bucket, min_id) - -/* - * get_elements_above_id get a few hashes represented by @elements and - * @num_elements. The hashes fields are described by @num_offset, @data_offset - * and @id_offset in the same way as required by for_each_element. The function - * returns an array of @iters, represents an array of elements in the hashes - * buckets, which their ids are the smallest ids in all hashes but are all - * larger than the id given by min_id. Elements are only added to the iters - * array if their id belongs to the bucket @bucket. The number of elements in - * the returned array is returned by the function. @min_id is also updated to - * reflect the new min_id of all elements in iters. - */ -static size_t get_elements_above_id(const void **iters, - unsigned int num_elements, - const void **elements, - size_t num_offset, - size_t data_offset, - size_t id_offset, - u16 bucket, - short *min_id) -{ - size_t num_iters = 0; - short min = SHRT_MAX; - const void *elem; - int i, j, last_stored = -1; - unsigned int equal_min = 0; - - for_each_element(elem, i, j, elements, num_elements, num_offset, - data_offset) { - u16 id = *(u16 *)(elem + id_offset); - - if (GET_NS_ID(id) != bucket) - continue; - - if (GET_ID(id) < *min_id || - (min != SHRT_MAX && GET_ID(id) > min)) - continue; - - /* - * We first iterate all hashes represented by @elements. When - * we do, we try to find an element @elem in the bucket @bucket - * which its id is min. Since we can't ensure the user sorted - * the elements in increasing order, we override this hash's - * minimal id element we found, if a new element with a smaller - * id was just found. - */ - iters[last_stored == i ? num_iters - 1 : num_iters++] = elem; - last_stored = i; - if (min == GET_ID(id)) - equal_min++; - else - equal_min = 1; - min = GET_ID(id); - } - - /* - * We only insert to our iters array an element, if its id is smaller - * than all previous ids. Therefore, the final iters array is sorted so - * that smaller ids are in the end of the array. - * Therefore, we need to clean the beginning of the array to make sure - * all ids of final elements are equal to min. - */ - memmove(iters, iters + num_iters - equal_min, sizeof(*iters) * equal_min); - - *min_id = min; - return equal_min; -} - -#define find_max_element_entry_id(num_elements, elements, num_objects_fld, \ - objects_fld, bucket) \ - find_max_element_id(num_elements, (const void **)(elements), \ - offsetof(typeof(**elements), num_objects_fld), \ - offsetof(typeof(**elements), objects_fld), \ - offsetof(typeof(***(*elements)->objects_fld), id),\ - bucket) - -static short find_max_element_ns_id(unsigned int num_elements, - const void **elements, - size_t num_offset, - size_t data_offset, - size_t id_offset) -{ - short max_ns = SHRT_MIN; - const void *elem; - int i, j; - - for_each_element(elem, i, j, elements, num_elements, num_offset, - data_offset) { - u16 id = *(u16 *)(elem + id_offset); - - if (GET_NS_ID(id) > max_ns) - max_ns = GET_NS_ID(id); - } - - return max_ns; -} - -static short find_max_element_id(unsigned int num_elements, - const void **elements, - size_t num_offset, - size_t data_offset, - size_t id_offset, - u16 bucket) -{ - short max_id = SHRT_MIN; - const void *elem; - int i, j; - - for_each_element(elem, i, j, elements, num_elements, num_offset, - data_offset) { - u16 id = *(u16 *)(elem + id_offset); - - if (GET_NS_ID(id) == bucket && - GET_ID(id) > max_id) - max_id = GET_ID(id); - } - return max_id; -} - -#define find_max_element_entry_id(num_elements, elements, num_objects_fld, \ - objects_fld, bucket) \ - find_max_element_id(num_elements, (const void **)(elements), \ - offsetof(typeof(**elements), num_objects_fld), \ - offsetof(typeof(**elements), objects_fld), \ - offsetof(typeof(***(*elements)->objects_fld), id),\ - bucket) - -#define find_max_element_ns_entry_id(num_elements, elements, \ - num_objects_fld, objects_fld) \ - find_max_element_ns_id(num_elements, (const void **)(elements), \ - offsetof(typeof(**elements), num_objects_fld),\ - offsetof(typeof(**elements), objects_fld), \ - offsetof(typeof(***(*elements)->objects_fld), id)) - -/* - * find_max_xxxx_ns_id gets a few elements. Each element is described by an id - * which its upper bits represents a namespace. It finds the max namespace. This - * could be used in order to know how many buckets do we need to allocate. If no - * elements exist, SHRT_MIN is returned. Namespace represents here different - * buckets. The common example is "common bucket" and "driver bucket". - * - * find_max_xxxx_id gets a few elements and a bucket. Each element is described - * by an id which its upper bits represent a namespace. It returns the max id - * which is contained in the same namespace defined in @bucket. This could be - * used in order to know how many elements do we need to allocate in the bucket. - * If no elements exist, SHRT_MIN is returned. - */ - -#define find_max_object_id(num_trees, trees, bucket) \ - find_max_element_entry_id(num_trees, trees, num_objects,\ - objects, bucket) -#define find_max_object_ns_id(num_trees, trees) \ - find_max_element_ns_entry_id(num_trees, trees, \ - num_objects, objects) - -#define find_max_method_id(num_iters, iters, bucket) \ - find_max_element_entry_id(num_iters, iters, num_methods,\ - methods, bucket) -#define find_max_method_ns_id(num_iters, iters) \ - find_max_element_ns_entry_id(num_iters, iters, \ - num_methods, methods) - -#define find_max_attr_id(num_iters, iters, bucket) \ - find_max_element_entry_id(num_iters, iters, num_attrs, \ - attrs, bucket) -#define find_max_attr_ns_id(num_iters, iters) \ - find_max_element_ns_entry_id(num_iters, iters, \ - num_attrs, attrs) - -static void free_method(struct uverbs_method_spec *method) -{ - unsigned int i; - - if (!method) - return; - - for (i = 0; i < method->num_buckets; i++) - kfree(method->attr_buckets[i]); - - kfree(method); -} - -#define IS_ATTR_OBJECT(attr) ((attr)->type == UVERBS_ATTR_TYPE_IDR || \ - (attr)->type == UVERBS_ATTR_TYPE_FD) - -/* - * This function gets array of size @num_method_defs which contains pointers to - * method definitions @method_defs. The function allocates an - * uverbs_method_spec structure and initializes its number of buckets and the - * elements in buckets to the correct attributes. While doing that, it - * validates that there aren't conflicts between attributes of different - * method_defs. - */ -static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_method_def **method_defs, - size_t num_method_defs) -{ - int bucket_idx; - int max_attr_buckets = 0; - size_t num_attr_buckets = 0; - int res = 0; - struct uverbs_method_spec *method = NULL; - const struct uverbs_attr_def **attr_defs; - unsigned int num_of_singularities = 0; - - max_attr_buckets = find_max_attr_ns_id(num_method_defs, method_defs); - if (max_attr_buckets >= 0) - num_attr_buckets = max_attr_buckets + 1; - - method = kzalloc(struct_size(method, attr_buckets, num_attr_buckets), - GFP_KERNEL); - if (!method) - return ERR_PTR(-ENOMEM); - - method->num_buckets = num_attr_buckets; - attr_defs = kcalloc(num_method_defs, sizeof(*attr_defs), GFP_KERNEL); - if (!attr_defs) { - res = -ENOMEM; - goto free_method; - } - for (bucket_idx = 0; bucket_idx < method->num_buckets; bucket_idx++) { - short min_id = SHRT_MIN; - int attr_max_bucket = 0; - struct uverbs_attr_spec_hash *hash = NULL; - - attr_max_bucket = find_max_attr_id(num_method_defs, method_defs, - bucket_idx); - if (attr_max_bucket < 0) - continue; - - hash = kzalloc(sizeof(*hash) + - ALIGN(sizeof(*hash->attrs) * (attr_max_bucket + 1), - sizeof(long)) + - BITS_TO_LONGS(attr_max_bucket + 1) * sizeof(long), - GFP_KERNEL); - if (!hash) { - res = -ENOMEM; - goto free; - } - hash->num_attrs = attr_max_bucket + 1; - method->num_child_attrs += hash->num_attrs; - hash->mandatory_attrs_bitmask = (void *)(hash + 1) + - ALIGN(sizeof(*hash->attrs) * - (attr_max_bucket + 1), - sizeof(long)); - - method->attr_buckets[bucket_idx] = hash; - - do { - size_t num_attr_defs; - struct uverbs_attr_spec *attr; - bool attr_obj_with_special_access; - - num_attr_defs = - get_attrs_above_id(attr_defs, - num_method_defs, - method_defs, - bucket_idx, - &min_id); - /* Last attr in bucket */ - if (!num_attr_defs) - break; - - if (num_attr_defs > 1) { - /* - * We don't allow two attribute definitions for - * the same attribute. This is usually a - * programmer error. If required, it's better to - * just add a new attribute to capture the new - * semantics. - */ - res = -EEXIST; - goto free; - } - - attr = &hash->attrs[min_id]; - memcpy(attr, &attr_defs[0]->attr, sizeof(*attr)); - - attr_obj_with_special_access = IS_ATTR_OBJECT(attr) && - (attr->obj.access == UVERBS_ACCESS_NEW || - attr->obj.access == UVERBS_ACCESS_DESTROY); - num_of_singularities += !!attr_obj_with_special_access; - if (WARN(num_of_singularities > 1, - "ib_uverbs: Method contains more than one object attr (%d) with new/destroy access\n", - min_id) || - WARN(attr_obj_with_special_access && - !(attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY), - "ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy access but isn't mandatory\n", - min_id) || - WARN(IS_ATTR_OBJECT(attr) && - attr->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO, - "ib_uverbs: Tried to merge attr (%d) but it's an object with min_sz flag\n", - min_id)) { - res = -EINVAL; - goto free; - } - - if (attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY) - set_bit(min_id, hash->mandatory_attrs_bitmask); - min_id++; - - } while (1); - } - kfree(attr_defs); - return method; - -free: - kfree(attr_defs); -free_method: - free_method(method); - return ERR_PTR(res); -} - -static void free_object(struct uverbs_object_spec *object) -{ - unsigned int i, j; - - if (!object) - return; - - for (i = 0; i < object->num_buckets; i++) { - struct uverbs_method_spec_hash *method_buckets = - object->method_buckets[i]; - - if (!method_buckets) - continue; - - for (j = 0; j < method_buckets->num_methods; j++) - free_method(method_buckets->methods[j]); - - kfree(method_buckets); - } - - kfree(object); -} - -/* - * This function gets array of size @num_object_defs which contains pointers to - * object definitions @object_defs. The function allocated an - * uverbs_object_spec structure and initialize its number of buckets and the - * elements in buckets to the correct methods. While doing that, it - * sorts out the correct relationship between conflicts in the same method. - */ -static struct uverbs_object_spec *build_object_with_methods(const struct uverbs_object_def **object_defs, - size_t num_object_defs) -{ - u16 bucket_idx; - int max_method_buckets = 0; - u16 num_method_buckets = 0; - int res = 0; - struct uverbs_object_spec *object = NULL; - const struct uverbs_method_def **method_defs; - - max_method_buckets = find_max_method_ns_id(num_object_defs, object_defs); - if (max_method_buckets >= 0) - num_method_buckets = max_method_buckets + 1; - - object = kzalloc(struct_size(object, method_buckets, - num_method_buckets), - GFP_KERNEL); - if (!object) - return ERR_PTR(-ENOMEM); - - object->num_buckets = num_method_buckets; - method_defs = kcalloc(num_object_defs, sizeof(*method_defs), GFP_KERNEL); - if (!method_defs) { - res = -ENOMEM; - goto free_object; - } - - for (bucket_idx = 0; bucket_idx < object->num_buckets; bucket_idx++) { - short min_id = SHRT_MIN; - int methods_max_bucket = 0; - struct uverbs_method_spec_hash *hash = NULL; - - methods_max_bucket = find_max_method_id(num_object_defs, object_defs, - bucket_idx); - if (methods_max_bucket < 0) - continue; - - hash = kzalloc(struct_size(hash, methods, - methods_max_bucket + 1), - GFP_KERNEL); - if (!hash) { - res = -ENOMEM; - goto free; - } - - hash->num_methods = methods_max_bucket + 1; - object->method_buckets[bucket_idx] = hash; - - do { - size_t num_method_defs; - struct uverbs_method_spec *method; - int i; - - num_method_defs = - get_methods_above_id(method_defs, - num_object_defs, - object_defs, - bucket_idx, - &min_id); - /* Last method in bucket */ - if (!num_method_defs) - break; - - method = build_method_with_attrs(method_defs, - num_method_defs); - if (IS_ERR(method)) { - res = PTR_ERR(method); - goto free; - } - - /* - * The last tree which is given as an argument to the - * merge overrides previous method handler. - * Therefore, we iterate backwards and search for the - * first handler which != NULL. This also defines the - * set of flags used for this handler. - */ - for (i = num_method_defs - 1; - i >= 0 && !method_defs[i]->handler; i--) - ; - hash->methods[min_id++] = method; - /* NULL handler isn't allowed */ - if (WARN(i < 0, - "ib_uverbs: tried to merge function id %d, but all handlers are NULL\n", - min_id)) { - res = -EINVAL; - goto free; - } - method->handler = method_defs[i]->handler; - method->flags = method_defs[i]->flags; - - } while (1); - } - kfree(method_defs); - return object; - -free: - kfree(method_defs); -free_object: - free_object(object); - return ERR_PTR(res); -} - -void uverbs_free_spec_tree(struct uverbs_root_spec *root) -{ - unsigned int i, j; - - if (!root) - return; - - for (i = 0; i < root->num_buckets; i++) { - struct uverbs_object_spec_hash *object_hash = - root->object_buckets[i]; - - if (!object_hash) - continue; - - for (j = 0; j < object_hash->num_objects; j++) - free_object(object_hash->objects[j]); - - kfree(object_hash); - } - - kfree(root); -} -EXPORT_SYMBOL(uverbs_free_spec_tree); - -struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees, - const struct uverbs_object_tree_def **trees) -{ - u16 bucket_idx; - short max_object_buckets = 0; - size_t num_objects_buckets = 0; - struct uverbs_root_spec *root_spec = NULL; - const struct uverbs_object_def **object_defs; - int i; - int res = 0; - - max_object_buckets = find_max_object_ns_id(num_trees, trees); - /* - * Devices which don't want to support ib_uverbs, should just allocate - * an empty parsing tree. Every user-space command won't hit any valid - * entry in the parsing tree and thus will fail. - */ - if (max_object_buckets >= 0) - num_objects_buckets = max_object_buckets + 1; - - root_spec = kzalloc(struct_size(root_spec, object_buckets, - num_objects_buckets), - GFP_KERNEL); - if (!root_spec) - return ERR_PTR(-ENOMEM); - root_spec->num_buckets = num_objects_buckets; - - object_defs = kcalloc(num_trees, sizeof(*object_defs), - GFP_KERNEL); - if (!object_defs) { - res = -ENOMEM; - goto free_root; - } - - for (bucket_idx = 0; bucket_idx < root_spec->num_buckets; bucket_idx++) { - short min_id = SHRT_MIN; - short objects_max_bucket; - struct uverbs_object_spec_hash *hash = NULL; - - objects_max_bucket = find_max_object_id(num_trees, trees, - bucket_idx); - if (objects_max_bucket < 0) - continue; - - hash = kzalloc(struct_size(hash, objects, - objects_max_bucket + 1), - GFP_KERNEL); - if (!hash) { - res = -ENOMEM; - goto free; - } - hash->num_objects = objects_max_bucket + 1; - root_spec->object_buckets[bucket_idx] = hash; - - do { - size_t num_object_defs; - struct uverbs_object_spec *object; - - num_object_defs = get_objects_above_id(object_defs, - num_trees, - trees, - bucket_idx, - &min_id); - /* Last object in bucket */ - if (!num_object_defs) - break; - - object = build_object_with_methods(object_defs, - num_object_defs); - if (IS_ERR(object)) { - res = PTR_ERR(object); - goto free; - } - - /* - * The last tree which is given as an argument to the - * merge overrides previous object's type_attrs. - * Therefore, we iterate backwards and search for the - * first type_attrs which != NULL. - */ - for (i = num_object_defs - 1; - i >= 0 && !object_defs[i]->type_attrs; i--) - ; - /* - * NULL is a valid type_attrs. It means an object we - * can't instantiate (like DEVICE). - */ - object->type_attrs = i < 0 ? NULL : - object_defs[i]->type_attrs; - - hash->objects[min_id++] = object; - } while (1); - } - - kfree(object_defs); - return root_spec; - -free: - kfree(object_defs); -free_root: - uverbs_free_spec_tree(root_spec); - return ERR_PTR(res); -} -EXPORT_SYMBOL(uverbs_alloc_spec_tree); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 2094d136513d..823beca448e1 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -41,8 +41,6 @@ #include <linux/fs.h> #include <linux/poll.h> #include <linux/sched.h> -#include <linux/sched/mm.h> -#include <linux/sched/task.h> #include <linux/file.h> #include <linux/cdev.h> #include <linux/anon_inodes.h> @@ -77,7 +75,6 @@ static struct class *uverbs_class; static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, - struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) = { [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, @@ -118,7 +115,6 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, }; static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, - struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) = { [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow, @@ -138,6 +134,30 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); +/* + * Must be called with the ufile->device->disassociate_srcu held, and the lock + * must be held until use of the ucontext is finished. + */ +struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile) +{ + /* + * We do not hold the hw_destroy_rwsem lock for this flow, instead + * srcu is used. It does not matter if someone races this with + * get_context, we get NULL or valid ucontext. + */ + struct ib_ucontext *ucontext = smp_load_acquire(&ufile->ucontext); + + if (!srcu_dereference(ufile->device->ib_dev, + &ufile->device->disassociate_srcu)) + return ERR_PTR(-EIO); + + if (!ucontext) + return ERR_PTR(-EINVAL); + + return ucontext; +} +EXPORT_SYMBOL(ib_uverbs_get_ucontext); + int uverbs_dealloc_mw(struct ib_mw *mw) { struct ib_pd *pd = mw->pd; @@ -154,6 +174,7 @@ static void ib_uverbs_release_dev(struct kobject *kobj) struct ib_uverbs_device *dev = container_of(kobj, struct ib_uverbs_device, kobj); + uverbs_destroy_api(dev->uapi); cleanup_srcu_struct(&dev->disassociate_srcu); kfree(dev); } @@ -184,7 +205,7 @@ void ib_uverbs_release_ucq(struct ib_uverbs_file *file, } spin_unlock_irq(&ev_file->ev_queue.lock); - uverbs_uobject_put(&ev_file->uobj_file.uobj); + uverbs_uobject_put(&ev_file->uobj); } spin_lock_irq(&file->async_file->ev_queue.lock); @@ -220,20 +241,6 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp, } } -static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, - struct ib_ucontext *context, - bool device_removed) -{ - context->closing = 1; - uverbs_cleanup_ucontext(context, device_removed); - put_pid(context->tgid); - - ib_rdmacg_uncharge(&context->cg_obj, context->device, - RDMACG_RESOURCE_HCA_HANDLE); - - return context->device->dealloc_ucontext(context); -} - static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) { complete(&dev->comp); @@ -246,6 +253,8 @@ void ib_uverbs_release_file(struct kref *ref) struct ib_device *ib_dev; int srcu_key; + release_ufile_idr_uobject(file); + srcu_key = srcu_read_lock(&file->device->disassociate_srcu); ib_dev = srcu_dereference(file->device->ib_dev, &file->device->disassociate_srcu); @@ -338,7 +347,7 @@ static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf, filp->private_data; return ib_uverbs_event_read(&comp_ev_file->ev_queue, - comp_ev_file->uobj_file.ufile, filp, + comp_ev_file->uobj.ufile, filp, buf, count, pos, sizeof(struct ib_uverbs_comp_event_desc)); } @@ -420,7 +429,9 @@ static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp) static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp) { - struct ib_uverbs_completion_event_file *file = filp->private_data; + struct ib_uobject *uobj = filp->private_data; + struct ib_uverbs_completion_event_file *file = container_of( + uobj, struct ib_uverbs_completion_event_file, uobj); struct ib_uverbs_event *entry, *tmp; spin_lock_irq(&file->ev_queue.lock); @@ -528,7 +539,7 @@ void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) struct ib_ucq_object *uobj = container_of(event->element.cq->uobject, struct ib_ucq_object, uobject); - ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle, + ib_uverbs_async_handler(uobj->uobject.ufile, uobj->uobject.user_handle, event->event, &uobj->async_list, &uobj->async_events_reported); } @@ -637,13 +648,13 @@ err_put_refs: return filp; } -static bool verify_command_mask(struct ib_device *ib_dev, - u32 command, bool extended) +static bool verify_command_mask(struct ib_uverbs_file *ufile, u32 command, + bool extended) { if (!extended) - return ib_dev->uverbs_cmd_mask & BIT_ULL(command); + return ufile->uverbs_cmd_mask & BIT_ULL(command); - return ib_dev->uverbs_ex_cmd_mask & BIT_ULL(command); + return ufile->uverbs_ex_cmd_mask & BIT_ULL(command); } static bool verify_command_idx(u32 command, bool extended) @@ -713,7 +724,6 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, { struct ib_uverbs_file *file = filp->private_data; struct ib_uverbs_ex_cmd_hdr ex_hdr; - struct ib_device *ib_dev; struct ib_uverbs_cmd_hdr hdr; bool extended; int srcu_key; @@ -748,24 +758,8 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, return ret; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); - ib_dev = srcu_dereference(file->device->ib_dev, - &file->device->disassociate_srcu); - if (!ib_dev) { - ret = -EIO; - goto out; - } - - /* - * Must be after the ib_dev check, as once the RCU clears ib_dev == - * NULL means ucontext == NULL - */ - if (!file->ucontext && - (command != IB_USER_VERBS_CMD_GET_CONTEXT || extended)) { - ret = -EINVAL; - goto out; - } - if (!verify_command_mask(ib_dev, command, extended)) { + if (!verify_command_mask(file, command, extended)) { ret = -EOPNOTSUPP; goto out; } @@ -773,7 +767,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, buf += sizeof(hdr); if (!extended) { - ret = uverbs_cmd_table[command](file, ib_dev, buf, + ret = uverbs_cmd_table[command](file, buf, hdr.in_words * 4, hdr.out_words * 4); } else { @@ -792,7 +786,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, ex_hdr.provider_in_words * 8, ex_hdr.provider_out_words * 8); - ret = uverbs_ex_cmd_table[command](file, ib_dev, &ucore, &uhw); + ret = uverbs_ex_cmd_table[command](file, &ucore, &uhw); ret = (ret) ? : count; } @@ -804,22 +798,18 @@ out: static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) { struct ib_uverbs_file *file = filp->private_data; - struct ib_device *ib_dev; + struct ib_ucontext *ucontext; int ret = 0; int srcu_key; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); - ib_dev = srcu_dereference(file->device->ib_dev, - &file->device->disassociate_srcu); - if (!ib_dev) { - ret = -EIO; + ucontext = ib_uverbs_get_ucontext(file); + if (IS_ERR(ucontext)) { + ret = PTR_ERR(ucontext); goto out; } - if (!file->ucontext) - ret = -ENODEV; - else - ret = ib_dev->mmap(file->ucontext, vma); + ret = ucontext->device->mmap(ucontext, vma); out: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); return ret; @@ -879,13 +869,12 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) } file->device = dev; - spin_lock_init(&file->idr_lock); - idr_init(&file->idr); - file->ucontext = NULL; - file->async_file = NULL; kref_init(&file->ref); - mutex_init(&file->mutex); - mutex_init(&file->cleanup_mutex); + mutex_init(&file->ucontext_lock); + + spin_lock_init(&file->uobjects_lock); + INIT_LIST_HEAD(&file->uobjects); + init_rwsem(&file->hw_destroy_rwsem); filp->private_data = file; kobject_get(&dev->kobj); @@ -893,6 +882,11 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) mutex_unlock(&dev->lists_mutex); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); + file->uverbs_cmd_mask = ib_dev->uverbs_cmd_mask; + file->uverbs_ex_cmd_mask = ib_dev->uverbs_ex_cmd_mask; + + setup_ufile_idr_uobject(file); + return nonseekable_open(inode, filp); err_module: @@ -911,13 +905,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; - mutex_lock(&file->cleanup_mutex); - if (file->ucontext) { - ib_uverbs_cleanup_ucontext(file, file->ucontext, false); - file->ucontext = NULL; - } - mutex_unlock(&file->cleanup_mutex); - idr_destroy(&file->idr); + uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE); mutex_lock(&file->device->lists_mutex); if (!file->is_closed) { @@ -1006,6 +994,19 @@ static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); static CLASS_ATTR_STRING(abi_version, S_IRUGO, __stringify(IB_USER_VERBS_ABI_VERSION)); +static int ib_uverbs_create_uapi(struct ib_device *device, + struct ib_uverbs_device *uverbs_dev) +{ + struct uverbs_api *uapi; + + uapi = uverbs_alloc_api(device->driver_specs, device->driver_id); + if (IS_ERR(uapi)) + return PTR_ERR(uapi); + + uverbs_dev->uapi = uapi; + return 0; +} + static void ib_uverbs_add_one(struct ib_device *device) { int devnum; @@ -1048,6 +1049,9 @@ static void ib_uverbs_add_one(struct ib_device *device) rcu_assign_pointer(uverbs_dev->ib_dev, device); uverbs_dev->num_comp_vectors = device->num_comp_vectors; + if (ib_uverbs_create_uapi(device, uverbs_dev)) + goto err; + cdev_init(&uverbs_dev->cdev, NULL); uverbs_dev->cdev.owner = THIS_MODULE; uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; @@ -1067,18 +1071,6 @@ static void ib_uverbs_add_one(struct ib_device *device) if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) goto err_class; - if (!device->specs_root) { - const struct uverbs_object_tree_def *default_root[] = { - uverbs_default_get_objects()}; - - uverbs_dev->specs_root = uverbs_alloc_spec_tree(1, - default_root); - if (IS_ERR(uverbs_dev->specs_root)) - goto err_class; - - device->specs_root = uverbs_dev->specs_root; - } - ib_set_client_data(device, &uverbs_client, uverbs_dev); return; @@ -1098,44 +1090,6 @@ err: return; } -static void ib_uverbs_disassociate_ucontext(struct ib_ucontext *ibcontext) -{ - struct ib_device *ib_dev = ibcontext->device; - struct task_struct *owning_process = NULL; - struct mm_struct *owning_mm = NULL; - - owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); - if (!owning_process) - return; - - owning_mm = get_task_mm(owning_process); - if (!owning_mm) { - pr_info("no mm, disassociate ucontext is pending task termination\n"); - while (1) { - put_task_struct(owning_process); - usleep_range(1000, 2000); - owning_process = get_pid_task(ibcontext->tgid, - PIDTYPE_PID); - if (!owning_process || - owning_process->state == TASK_DEAD) { - pr_info("disassociate ucontext done, task was terminated\n"); - /* in case task was dead need to release the - * task struct. - */ - if (owning_process) - put_task_struct(owning_process); - return; - } - } - } - - down_write(&owning_mm->mmap_sem); - ib_dev->disassociate_ucontext(ibcontext); - up_write(&owning_mm->mmap_sem); - mmput(owning_mm); - put_task_struct(owning_process); -} - static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, struct ib_device *ib_dev) { @@ -1144,46 +1098,31 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, struct ib_event event; /* Pending running commands to terminate */ - synchronize_srcu(&uverbs_dev->disassociate_srcu); + uverbs_disassociate_api_pre(uverbs_dev); event.event = IB_EVENT_DEVICE_FATAL; event.element.port_num = 0; event.device = ib_dev; mutex_lock(&uverbs_dev->lists_mutex); while (!list_empty(&uverbs_dev->uverbs_file_list)) { - struct ib_ucontext *ucontext; file = list_first_entry(&uverbs_dev->uverbs_file_list, struct ib_uverbs_file, list); file->is_closed = 1; list_del(&file->list); kref_get(&file->ref); - mutex_unlock(&uverbs_dev->lists_mutex); - - - mutex_lock(&file->cleanup_mutex); - ucontext = file->ucontext; - file->ucontext = NULL; - mutex_unlock(&file->cleanup_mutex); - /* At this point ib_uverbs_close cannot be running - * ib_uverbs_cleanup_ucontext + /* We must release the mutex before going ahead and calling + * uverbs_cleanup_ufile, as it might end up indirectly calling + * uverbs_close, for example due to freeing the resources (e.g + * mmput). */ - if (ucontext) { - /* We must release the mutex before going ahead and - * calling disassociate_ucontext. disassociate_ucontext - * might end up indirectly calling uverbs_close, - * for example due to freeing the resources - * (e.g mmput). - */ - ib_uverbs_event_handler(&file->event_handler, &event); - ib_uverbs_disassociate_ucontext(ucontext); - mutex_lock(&file->cleanup_mutex); - ib_uverbs_cleanup_ucontext(file, ucontext, true); - mutex_unlock(&file->cleanup_mutex); - } + mutex_unlock(&uverbs_dev->lists_mutex); - mutex_lock(&uverbs_dev->lists_mutex); + ib_uverbs_event_handler(&file->event_handler, &event); + uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE); kref_put(&file->ref, ib_uverbs_release_file); + + mutex_lock(&uverbs_dev->lists_mutex); } while (!list_empty(&uverbs_dev->uverbs_events_file_list)) { @@ -1205,6 +1144,8 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN); } mutex_unlock(&uverbs_dev->lists_mutex); + + uverbs_disassociate_api(uverbs_dev->uapi); } static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) @@ -1232,7 +1173,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) * cdev was deleted, however active clients can still issue * commands and close their open files. */ - rcu_assign_pointer(uverbs_dev->ib_dev, NULL); ib_uverbs_free_hw_resources(uverbs_dev, device); wait_clients = 0; } @@ -1241,10 +1181,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) ib_uverbs_comp_dev(uverbs_dev); if (wait_clients) wait_for_completion(&uverbs_dev->comp); - if (uverbs_dev->specs_root) { - uverbs_free_spec_tree(uverbs_dev->specs_root); - device->specs_root = NULL; - } kobject_put(&uverbs_dev->kobj); } diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index bb372b4713a4..b8d715c68ca4 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -211,7 +211,5 @@ void ib_copy_path_rec_from_user(struct sa_path_rec *dst, /* TODO: No need to set this */ sa_path_set_dmac_zero(dst); - sa_path_set_ndev(dst, NULL); - sa_path_set_ifindex(dst, 0); } EXPORT_SYMBOL(ib_copy_path_rec_from_user); diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index b570acbd94af..203cc96ac6f5 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -48,14 +48,18 @@ static int uverbs_free_ah(struct ib_uobject *uobject, static int uverbs_free_flow(struct ib_uobject *uobject, enum rdma_remove_reason why) { - int ret; struct ib_flow *flow = (struct ib_flow *)uobject->object; struct ib_uflow_object *uflow = container_of(uobject, struct ib_uflow_object, uobject); + struct ib_qp *qp = flow->qp; + int ret; - ret = ib_destroy_flow(flow); - if (!ret) + ret = flow->device->destroy_flow(flow); + if (!ret) { + if (qp) + atomic_dec(&qp->usecnt); ib_uverbs_flow_resources_free(uflow->resources); + } return ret; } @@ -74,6 +78,13 @@ static int uverbs_free_qp(struct ib_uobject *uobject, container_of(uobject, struct ib_uqp_object, uevent.uobject); int ret; + /* + * If this is a user triggered destroy then do not allow destruction + * until the user cleans up all the mcast bindings. Unlike in other + * places we forcibly clean up the mcast attachments for !DESTROY + * because the mcast attaches are not ubojects and will not be + * destroyed by anything else during cleanup processing. + */ if (why == RDMA_REMOVE_DESTROY) { if (!list_empty(&uqp->mcast_list)) return -EBUSY; @@ -82,7 +93,7 @@ static int uverbs_free_qp(struct ib_uobject *uobject, } ret = ib_destroy_qp(qp); - if (ret && why == RDMA_REMOVE_DESTROY) + if (ib_is_destroy_retryable(ret, why, uobject)) return ret; if (uqp->uxrcd) @@ -100,8 +111,10 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, int ret; ret = ib_destroy_rwq_ind_table(rwq_ind_tbl); - if (!ret || why != RDMA_REMOVE_DESTROY) - kfree(ind_tbl); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + kfree(ind_tbl); return ret; } @@ -114,8 +127,10 @@ static int uverbs_free_wq(struct ib_uobject *uobject, int ret; ret = ib_destroy_wq(wq); - if (!ret || why != RDMA_REMOVE_DESTROY) - ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent); return ret; } @@ -129,8 +144,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject, int ret; ret = ib_destroy_srq(srq); - - if (ret && why == RDMA_REMOVE_DESTROY) + if (ib_is_destroy_retryable(ret, why, uobject)) return ret; if (srq_type == IB_SRQT_XRC) { @@ -152,12 +166,12 @@ static int uverbs_free_xrcd(struct ib_uobject *uobject, container_of(uobject, struct ib_uxrcd_object, uobject); int ret; + ret = ib_destroy_usecnt(&uxrcd->refcnt, why, uobject); + if (ret) + return ret; + mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex); - if (why == RDMA_REMOVE_DESTROY && atomic_read(&uxrcd->refcnt)) - ret = -EBUSY; - else - ret = ib_uverbs_dealloc_xrcd(uobject->context->ufile->device, - xrcd, why); + ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why); mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex); return ret; @@ -167,20 +181,22 @@ static int uverbs_free_pd(struct ib_uobject *uobject, enum rdma_remove_reason why) { struct ib_pd *pd = uobject->object; + int ret; - if (why == RDMA_REMOVE_DESTROY && atomic_read(&pd->usecnt)) - return -EBUSY; + ret = ib_destroy_usecnt(&pd->usecnt, why, uobject); + if (ret) + return ret; ib_dealloc_pd((struct ib_pd *)uobject->object); return 0; } -static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_file, +static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj, enum rdma_remove_reason why) { struct ib_uverbs_completion_event_file *comp_event_file = - container_of(uobj_file, struct ib_uverbs_completion_event_file, - uobj_file); + container_of(uobj, struct ib_uverbs_completion_event_file, + uobj); struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue; spin_lock_irq(&event_queue->lock); @@ -194,119 +210,77 @@ static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_ return 0; }; -int uverbs_destroy_def_handler(struct ib_device *ib_dev, - struct ib_uverbs_file *file, +int uverbs_destroy_def_handler(struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { return 0; } +EXPORT_SYMBOL(uverbs_destroy_def_handler); -/* - * This spec is used in order to pass information to the hardware driver in a - * legacy way. Every verb that could get driver specific data should get this - * spec. - */ -const struct uverbs_attr_def uverbs_uhw_compat_in = - UVERBS_ATTR_PTR_IN_SZ(UVERBS_ATTR_UHW_IN, UVERBS_ATTR_SIZE(0, USHRT_MAX), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)); -const struct uverbs_attr_def uverbs_uhw_compat_out = - UVERBS_ATTR_PTR_OUT_SZ(UVERBS_ATTR_UHW_OUT, UVERBS_ATTR_SIZE(0, USHRT_MAX), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)); - -void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata) -{ - /* - * This is for ease of conversion. The purpose is to convert all drivers - * to use uverbs_attr_bundle instead of ib_udata. - * Assume attr == 0 is input and attr == 1 is output. - */ - const struct uverbs_attr *uhw_in = - uverbs_attr_get(ctx, UVERBS_ATTR_UHW_IN); - const struct uverbs_attr *uhw_out = - uverbs_attr_get(ctx, UVERBS_ATTR_UHW_OUT); - - if (!IS_ERR(uhw_in)) { - udata->inlen = uhw_in->ptr_attr.len; - if (uverbs_attr_ptr_is_inline(uhw_in)) - udata->inbuf = &uhw_in->uattr->data; - else - udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data); - } else { - udata->inbuf = NULL; - udata->inlen = 0; - } - - if (!IS_ERR(uhw_out)) { - udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data); - udata->outlen = uhw_out->ptr_attr.len; - } else { - udata->outbuf = NULL; - udata->outlen = 0; - } -} - -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COMP_CHANNEL, - &UVERBS_TYPE_ALLOC_FD(0, - sizeof(struct ib_uverbs_completion_event_file), - uverbs_hot_unplug_completion_event_file, - &uverbs_event_fops, - "[infinibandevent]", O_RDONLY)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_COMP_CHANNEL, + UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), + uverbs_hot_unplug_completion_event_file, + &uverbs_event_fops, + "[infinibandevent]", + O_RDONLY)); -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_QP, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0, - uverbs_free_qp)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_QP, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw)); + UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw)); -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_SRQ, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0, - uverbs_free_srq)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_SRQ, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), + uverbs_free_srq)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah)); + UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah)); -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object), - 0, uverbs_free_flow)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_FLOW, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object), + uverbs_free_flow)); -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_WQ, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0, - uverbs_free_wq)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_WQ, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl)); + UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl)); -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_XRCD, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0, - uverbs_free_xrcd)); +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_XRCD, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), + uverbs_free_xrcd)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD, - /* 2 is used in order to free the PD after MRs */ - &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd)); - -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DEVICE, NULL); - -static DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects, - &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE), - &UVERBS_OBJECT(UVERBS_OBJECT_PD), - &UVERBS_OBJECT(UVERBS_OBJECT_MR), - &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL), - &UVERBS_OBJECT(UVERBS_OBJECT_CQ), - &UVERBS_OBJECT(UVERBS_OBJECT_QP), - &UVERBS_OBJECT(UVERBS_OBJECT_AH), - &UVERBS_OBJECT(UVERBS_OBJECT_MW), - &UVERBS_OBJECT(UVERBS_OBJECT_SRQ), - &UVERBS_OBJECT(UVERBS_OBJECT_FLOW), - &UVERBS_OBJECT(UVERBS_OBJECT_WQ), - &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL), - &UVERBS_OBJECT(UVERBS_OBJECT_XRCD), - &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION), - &UVERBS_OBJECT(UVERBS_OBJECT_DM), - &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS)); + UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd)); + +DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE); + +DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects, + &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE), + &UVERBS_OBJECT(UVERBS_OBJECT_PD), + &UVERBS_OBJECT(UVERBS_OBJECT_MR), + &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL), + &UVERBS_OBJECT(UVERBS_OBJECT_CQ), + &UVERBS_OBJECT(UVERBS_OBJECT_QP), + &UVERBS_OBJECT(UVERBS_OBJECT_AH), + &UVERBS_OBJECT(UVERBS_OBJECT_MW), + &UVERBS_OBJECT(UVERBS_OBJECT_SRQ), + &UVERBS_OBJECT(UVERBS_OBJECT_FLOW), + &UVERBS_OBJECT(UVERBS_OBJECT_WQ), + &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL), + &UVERBS_OBJECT(UVERBS_OBJECT_XRCD), + &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION), + &UVERBS_OBJECT(UVERBS_OBJECT_DM), + &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS)); const struct uverbs_object_tree_def *uverbs_default_get_objects(void) { return &uverbs_default_objects; } -EXPORT_SYMBOL_GPL(uverbs_default_get_objects); diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index 03b182a684a6..a0ffdcf9a51c 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -38,20 +38,22 @@ static int uverbs_free_counters(struct ib_uobject *uobject, enum rdma_remove_reason why) { struct ib_counters *counters = uobject->object; + int ret; - if (why == RDMA_REMOVE_DESTROY && - atomic_read(&counters->usecnt)) - return -EBUSY; + ret = ib_destroy_usecnt(&counters->usecnt, why, uobject); + if (ret) + return ret; return counters->device->destroy_counters(counters); } -static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE); + struct ib_device *ib_dev = uobj->context->device; struct ib_counters *counters; - struct ib_uobject *uobj; int ret; /* @@ -62,7 +64,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_de if (!ib_dev->create_counters) return -EOPNOTSUPP; - uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE); counters = ib_dev->create_counters(ib_dev, attrs); if (IS_ERR(counters)) { ret = PTR_ERR(counters); @@ -80,9 +81,8 @@ err_create_counters: return ret; } -static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { struct ib_counters_read_attr read_attr = {}; const struct uverbs_attr *uattr; @@ -90,68 +90,62 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev, uverbs_attr_get_obj(attrs, UVERBS_ATTR_READ_COUNTERS_HANDLE); int ret; - if (!ib_dev->read_counters) + if (!counters->device->read_counters) return -EOPNOTSUPP; if (!atomic_read(&counters->usecnt)) return -EINVAL; - ret = uverbs_copy_from(&read_attr.flags, attrs, - UVERBS_ATTR_READ_COUNTERS_FLAGS); + ret = uverbs_get_flags32(&read_attr.flags, attrs, + UVERBS_ATTR_READ_COUNTERS_FLAGS, + IB_UVERBS_READ_COUNTERS_PREFER_CACHED); if (ret) return ret; uattr = uverbs_attr_get(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF); read_attr.ncounters = uattr->ptr_attr.len / sizeof(u64); - read_attr.counters_buff = kcalloc(read_attr.ncounters, - sizeof(u64), GFP_KERNEL); - if (!read_attr.counters_buff) - return -ENOMEM; - - ret = ib_dev->read_counters(counters, - &read_attr, - attrs); - if (ret) - goto err_read; + read_attr.counters_buff = uverbs_zalloc( + attrs, array_size(read_attr.ncounters, sizeof(u64))); + if (IS_ERR(read_attr.counters_buff)) + return PTR_ERR(read_attr.counters_buff); - ret = uverbs_copy_to(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF, - read_attr.counters_buff, - read_attr.ncounters * sizeof(u64)); + ret = counters->device->read_counters(counters, &read_attr, attrs); + if (ret) + return ret; -err_read: - kfree(read_attr.counters_buff); - return ret; + return uverbs_copy_to(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF, + read_attr.counters_buff, + read_attr.ncounters * sizeof(u64)); } -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_CREATE, - &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE, - UVERBS_OBJECT_COUNTERS, - UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_COUNTERS_DESTROY, - uverbs_destroy_def_handler, - &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE, - UVERBS_OBJECT_COUNTERS, - UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -#define MAX_COUNTERS_BUFF_SIZE USHRT_MAX -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_READ, - &UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE, - UVERBS_OBJECT_COUNTERS, - UVERBS_ACCESS_READ, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF, - UVERBS_ATTR_SIZE(0, MAX_COUNTERS_BUFF_SIZE), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS, - UVERBS_ATTR_TYPE(__u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_COUNTERS_CREATE, + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE, + UVERBS_OBJECT_COUNTERS, + UVERBS_ACCESS_NEW, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_COUNTERS_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE, + UVERBS_OBJECT_COUNTERS, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_COUNTERS_READ, + UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE, + UVERBS_OBJECT_COUNTERS, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF, + UVERBS_ATTR_MIN_SIZE(0), + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS, + enum ib_uverbs_read_counters_flags)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_counters), + UVERBS_TYPE_ALLOC_IDR(uverbs_free_counters), &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE), &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY), &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ)); - diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 3d293d01afea..5b5f2052cd52 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -44,21 +44,26 @@ static int uverbs_free_cq(struct ib_uobject *uobject, int ret; ret = ib_destroy_cq(cq); - if (!ret || why != RDMA_REMOVE_DESTROY) - ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ? - container_of(ev_queue, - struct ib_uverbs_completion_event_file, - ev_queue) : NULL, - ucq); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + ib_uverbs_release_ucq( + uobject->context->ufile, + ev_queue ? container_of(ev_queue, + struct ib_uverbs_completion_event_file, + ev_queue) : + NULL, + ucq); return ret; } -static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { - struct ib_ucontext *ucontext = file->ucontext; - struct ib_ucq_object *obj; + struct ib_ucq_object *obj = container_of( + uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE), + typeof(*obj), uobject); + struct ib_device *ib_dev = obj->uobject.context->device; struct ib_udata uhw; int ret; u64 user_handle; @@ -67,7 +72,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, struct ib_uverbs_completion_event_file *ev_file = NULL; struct ib_uobject *ev_file_uobj; - if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ)) + if (!ib_dev->create_cq || !ib_dev->destroy_cq) return -EOPNOTSUPP; ret = uverbs_copy_from(&attr.comp_vector, attrs, @@ -81,28 +86,26 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, if (ret) return ret; - /* Optional param, if it doesn't exist, we get -ENOENT and skip it */ - if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&attr.flags, attrs, - UVERBS_ATTR_CREATE_CQ_FLAGS))) - return -EFAULT; + ret = uverbs_get_flags32(&attr.flags, attrs, + UVERBS_ATTR_CREATE_CQ_FLAGS, + IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION | + IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN); + if (ret) + return ret; ev_file_uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL); if (!IS_ERR(ev_file_uobj)) { ev_file = container_of(ev_file_uobj, struct ib_uverbs_completion_event_file, - uobj_file.uobj); + uobj); uverbs_uobject_get(ev_file_uobj); } - if (attr.comp_vector >= ucontext->ufile->device->num_comp_vectors) { + if (attr.comp_vector >= file->device->num_comp_vectors) { ret = -EINVAL; goto err_event_file; } - obj = container_of(uverbs_attr_get_uobject(attrs, - UVERBS_ATTR_CREATE_CQ_HANDLE), - typeof(*obj), uobject); - obj->uverbs_file = ucontext->ufile; obj->comp_events_reported = 0; obj->async_events_reported = 0; INIT_LIST_HEAD(&obj->comp_list); @@ -111,7 +114,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, /* Temporary, only until drivers get the new uverbs_attr_bundle */ create_udata(attrs, &uhw); - cq = ib_dev->create_cq(ib_dev, &attr, ucontext, &uhw); + cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, &uhw); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_event_file; @@ -143,69 +146,64 @@ err_event_file: return ret; }; -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_CREATE, - &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ, - UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE, +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_CQ_CREATE, + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE, + UVERBS_OBJECT_CQ, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL, + UVERBS_OBJECT_COMP_CHANNEL, + UVERBS_ACCESS_READ, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, + enum ib_uverbs_ex_create_cq_flags), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL, - UVERBS_OBJECT_COMP_CHANNEL, - UVERBS_ACCESS_READ), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)), - &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &uverbs_uhw_compat_in, &uverbs_uhw_compat_out); - -static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) + UA_MANDATORY), + UVERBS_ATTR_UHW()); + +static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE); - struct ib_uverbs_destroy_cq_resp resp; - struct ib_ucq_object *obj; - int ret; - - if (IS_ERR(uobj)) - return PTR_ERR(uobj); - - obj = container_of(uobj, struct ib_ucq_object, uobject); - - if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ)) - return -EOPNOTSUPP; - - ret = rdma_explicit_destroy(uobj); - if (ret) - return ret; - - resp.comp_events_reported = obj->comp_events_reported; - resp.async_events_reported = obj->async_events_reported; + struct ib_ucq_object *obj = + container_of(uobj, struct ib_ucq_object, uobject); + struct ib_uverbs_destroy_cq_resp resp = { + .comp_events_reported = obj->comp_events_reported, + .async_events_reported = obj->async_events_reported + }; return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_CQ_RESP, &resp, sizeof(resp)); } -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_DESTROY, - &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ, - UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP, - UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_CQ, - &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0, - uverbs_free_cq), +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_CQ_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE, + UVERBS_OBJECT_CQ, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP, + UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_CQ, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), uverbs_free_cq), + #if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI) - &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE), - &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY) + &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE), + &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY) #endif - ); - +); diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c index 8b681575b615..edc3ff7733d4 100644 --- a/drivers/infiniband/core/uverbs_std_types_dm.c +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -37,20 +37,24 @@ static int uverbs_free_dm(struct ib_uobject *uobject, enum rdma_remove_reason why) { struct ib_dm *dm = uobject->object; + int ret; - if (why == RDMA_REMOVE_DESTROY && atomic_read(&dm->usecnt)) - return -EBUSY; + ret = ib_destroy_usecnt(&dm->usecnt, why, uobject); + if (ret) + return ret; return dm->device->dealloc_dm(dm); } -static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int +UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) { - struct ib_ucontext *ucontext = file->ucontext; struct ib_dm_alloc_attr attr = {}; - struct ib_uobject *uobj; + struct ib_uobject *uobj = + uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE) + ->obj_attr.uobject; + struct ib_device *ib_dev = uobj->context->device; struct ib_dm *dm; int ret; @@ -67,9 +71,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev, if (ret) return ret; - uobj = uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)->obj_attr.uobject; - - dm = ib_dev->alloc_dm(ib_dev, ucontext, &attr, attrs); + dm = ib_dev->alloc_dm(ib_dev, uobj->context, &attr, attrs); if (IS_ERR(dm)) return PTR_ERR(dm); @@ -83,26 +85,27 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev, return 0; } -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_ALLOC, - &UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE, UVERBS_OBJECT_DM, - UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT, - UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_DM_FREE, - uverbs_destroy_def_handler, - &UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE, - UVERBS_OBJECT_DM, - UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_DM_ALLOC, + UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE, + UVERBS_OBJECT_DM, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_DM_FREE, + UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE, + UVERBS_OBJECT_DM, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM, - /* 1 is used in order to free the DM after MRs */ - &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_dm), + UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm), &UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC), &UVERBS_METHOD(UVERBS_METHOD_DM_FREE)); diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index a7be51cf2e42..d8cfafe23bd9 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -37,10 +37,11 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject, enum rdma_remove_reason why) { struct ib_flow_action *action = uobject->object; + int ret; - if (why == RDMA_REMOVE_DESTROY && - atomic_read(&action->usecnt)) - return -EBUSY; + ret = ib_destroy_usecnt(&action->usecnt, why, uobject); + if (ret) + return ret; return action->device->destroy_flow_action(action); } @@ -303,12 +304,13 @@ static int parse_flow_action_esp(struct ib_device *ib_dev, return 0; } -static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE); + struct ib_device *ib_dev = uobj->context->device; int ret; - struct ib_uobject *uobj; struct ib_flow_action *action; struct ib_flow_action_esp_attr esp_attr = {}; @@ -320,7 +322,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device return ret; /* No need to check as this attribute is marked as MANDATORY */ - uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE); action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs); if (IS_ERR(action)) return PTR_ERR(action); @@ -334,102 +335,109 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device return 0; } -static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE); + struct ib_flow_action *action = uobj->object; int ret; - struct ib_uobject *uobj; - struct ib_flow_action *action; struct ib_flow_action_esp_attr esp_attr = {}; - if (!ib_dev->modify_flow_action_esp) + if (!action->device->modify_flow_action_esp) return -EOPNOTSUPP; - ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, true); + ret = parse_flow_action_esp(action->device, file, attrs, &esp_attr, + true); if (ret) return ret; - uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE); - action = uobj->object; - if (action->type != IB_FLOW_ACTION_ESP) return -EINVAL; - return ib_dev->modify_flow_action_esp(action, - &esp_attr.hdr, - attrs); + return action->device->modify_flow_action_esp(action, &esp_attr.hdr, + attrs); } static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = { [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = { - { .ptr = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm), - .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO, - } }, + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_STRUCT( + struct ib_uverbs_flow_action_esp_keymat_aes_gcm, + aes_key), }, }; static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = { [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = { - { .ptr = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - /* No need to specify any data */ - .len = 0, - } } + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_NO_DATA(), }, [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = { - { .ptr = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size), - .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO, - } } + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, + size), }, }; -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, - &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | - UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)), - &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, - uverbs_flow_action_esp_keymat, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, - uverbs_flow_action_esp_replay), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type))); - -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY, - &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_WRITE, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)), - &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, - uverbs_flow_action_esp_keymat), - &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, - uverbs_flow_action_esp_replay), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type))); - -static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_FLOW_ACTION_DESTROY, - uverbs_destroy_def_handler, - &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_DESTROY, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW_ACTION, - &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_flow_action), - &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE), - &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY), - &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)); - +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, + hard_limit_pkts), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, + UVERBS_ATTR_TYPE(__u32), + UA_OPTIONAL), + UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, + uverbs_flow_action_esp_keymat, + UA_MANDATORY), + UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, + uverbs_flow_action_esp_replay, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN( + UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, + UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap), + UA_OPTIONAL)); + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY, + UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_WRITE, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, + hard_limit_pkts), + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, + UVERBS_ATTR_TYPE(__u32), + UA_OPTIONAL), + UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, + uverbs_flow_action_esp_keymat, + UA_OPTIONAL), + UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, + uverbs_flow_action_esp_replay, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN( + UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, + UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap), + UA_OPTIONAL)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_FLOW_ACTION_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action), + &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE), + &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY), + &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)); diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index 68f7cadf088f..cf02e774303e 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -39,14 +39,18 @@ static int uverbs_free_mr(struct ib_uobject *uobject, return ib_dereg_mr((struct ib_mr *)uobject->object); } -static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { struct ib_dm_mr_attr attr = {}; - struct ib_uobject *uobj; - struct ib_dm *dm; - struct ib_pd *pd; + struct ib_uobject *uobj = + uverbs_attr_get_uobject(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE); + struct ib_dm *dm = + uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_DM_HANDLE); + struct ib_pd *pd = + uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_PD_HANDLE); + struct ib_device *ib_dev = pd->device; + struct ib_mr *mr; int ret; @@ -62,8 +66,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev, if (ret) return ret; - ret = uverbs_copy_from(&attr.access_flags, attrs, - UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS); + ret = uverbs_get_flags32(&attr.access_flags, attrs, + UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS, + IB_ACCESS_SUPPORTED); if (ret) return ret; @@ -74,12 +79,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev, if (ret) return ret; - pd = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_PD_HANDLE); - - dm = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_DM_HANDLE); - - uobj = uverbs_attr_get(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE)->obj_attr.uobject; - if (attr.offset > dm->length || attr.length > dm->length || attr.length > dm->length - attr.offset) return -EINVAL; @@ -115,33 +114,36 @@ err_dereg: return ret; } -static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_MR_REG, - &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, UVERBS_OBJECT_MR, - UVERBS_ACCESS_NEW, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE, UVERBS_OBJECT_PD, - UVERBS_ACCESS_READ, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS, +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_DM_MR_REG, + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, + UVERBS_OBJECT_MR, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE, + UVERBS_OBJECT_PD, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS, + enum ib_access_flags), + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE, + UVERBS_OBJECT_DM, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY, UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE, UVERBS_OBJECT_DM, - UVERBS_ACCESS_READ, - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY, - UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY, - UVERBS_ATTR_TYPE(u32), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MR, - /* 1 is used in order to free the MR after all the MWs */ - &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_mr), - &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG)); + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_MR, + UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr), + &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG)); diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c new file mode 100644 index 000000000000..73ea6f0db88f --- /dev/null +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -0,0 +1,346 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + */ +#include <rdma/uverbs_ioctl.h> +#include <rdma/rdma_user_ioctl.h> +#include <linux/bitops.h> +#include "rdma_core.h" +#include "uverbs.h" + +static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size) +{ + void *elm; + int rc; + + if (key == UVERBS_API_KEY_ERR) + return ERR_PTR(-EOVERFLOW); + + elm = kzalloc(alloc_size, GFP_KERNEL); + rc = radix_tree_insert(&uapi->radix, key, elm); + if (rc) { + kfree(elm); + return ERR_PTR(rc); + } + + return elm; +} + +static int uapi_merge_method(struct uverbs_api *uapi, + struct uverbs_api_object *obj_elm, u32 obj_key, + const struct uverbs_method_def *method, + bool is_driver) +{ + u32 method_key = obj_key | uapi_key_ioctl_method(method->id); + struct uverbs_api_ioctl_method *method_elm; + unsigned int i; + + if (!method->attrs) + return 0; + + method_elm = uapi_add_elm(uapi, method_key, sizeof(*method_elm)); + if (IS_ERR(method_elm)) { + if (method_elm != ERR_PTR(-EEXIST)) + return PTR_ERR(method_elm); + + /* + * This occurs when a driver uses ADD_UVERBS_ATTRIBUTES_SIMPLE + */ + if (WARN_ON(method->handler)) + return -EINVAL; + method_elm = radix_tree_lookup(&uapi->radix, method_key); + if (WARN_ON(!method_elm)) + return -EINVAL; + } else { + WARN_ON(!method->handler); + rcu_assign_pointer(method_elm->handler, method->handler); + if (method->handler != uverbs_destroy_def_handler) + method_elm->driver_method = is_driver; + } + + for (i = 0; i != method->num_attrs; i++) { + const struct uverbs_attr_def *attr = (*method->attrs)[i]; + struct uverbs_api_attr *attr_slot; + + if (!attr) + continue; + + /* + * ENUM_IN contains the 'ids' pointer to the driver's .rodata, + * so if it is specified by a driver then it always makes this + * into a driver method. + */ + if (attr->attr.type == UVERBS_ATTR_TYPE_ENUM_IN) + method_elm->driver_method |= is_driver; + + attr_slot = + uapi_add_elm(uapi, method_key | uapi_key_attr(attr->id), + sizeof(*attr_slot)); + /* Attributes are not allowed to be modified by drivers */ + if (IS_ERR(attr_slot)) + return PTR_ERR(attr_slot); + + attr_slot->spec = attr->attr; + } + + return 0; +} + +static int uapi_merge_tree(struct uverbs_api *uapi, + const struct uverbs_object_tree_def *tree, + bool is_driver) +{ + unsigned int i, j; + int rc; + + if (!tree->objects) + return 0; + + for (i = 0; i != tree->num_objects; i++) { + const struct uverbs_object_def *obj = (*tree->objects)[i]; + struct uverbs_api_object *obj_elm; + u32 obj_key; + + if (!obj) + continue; + + obj_key = uapi_key_obj(obj->id); + obj_elm = uapi_add_elm(uapi, obj_key, sizeof(*obj_elm)); + if (IS_ERR(obj_elm)) { + if (obj_elm != ERR_PTR(-EEXIST)) + return PTR_ERR(obj_elm); + + /* This occurs when a driver uses ADD_UVERBS_METHODS */ + if (WARN_ON(obj->type_attrs)) + return -EINVAL; + obj_elm = radix_tree_lookup(&uapi->radix, obj_key); + if (WARN_ON(!obj_elm)) + return -EINVAL; + } else { + obj_elm->type_attrs = obj->type_attrs; + if (obj->type_attrs) { + obj_elm->type_class = + obj->type_attrs->type_class; + /* + * Today drivers are only permitted to use + * idr_class types. They cannot use FD types + * because we currently have no way to revoke + * the fops pointer after device + * disassociation. + */ + if (WARN_ON(is_driver && + obj->type_attrs->type_class != + &uverbs_idr_class)) + return -EINVAL; + } + } + + if (!obj->methods) + continue; + + for (j = 0; j != obj->num_methods; j++) { + const struct uverbs_method_def *method = + (*obj->methods)[j]; + if (!method) + continue; + + rc = uapi_merge_method(uapi, obj_elm, obj_key, method, + is_driver); + if (rc) + return rc; + } + } + + return 0; +} + +static int +uapi_finalize_ioctl_method(struct uverbs_api *uapi, + struct uverbs_api_ioctl_method *method_elm, + u32 method_key) +{ + struct radix_tree_iter iter; + unsigned int num_attrs = 0; + unsigned int max_bkey = 0; + bool single_uobj = false; + void __rcu **slot; + + method_elm->destroy_bkey = UVERBS_API_ATTR_BKEY_LEN; + radix_tree_for_each_slot (slot, &uapi->radix, &iter, + uapi_key_attrs_start(method_key)) { + struct uverbs_api_attr *elm = + rcu_dereference_protected(*slot, true); + u32 attr_key = iter.index & UVERBS_API_ATTR_KEY_MASK; + u32 attr_bkey = uapi_bkey_attr(attr_key); + u8 type = elm->spec.type; + + if (uapi_key_attr_to_method(iter.index) != + uapi_key_attr_to_method(method_key)) + break; + + if (elm->spec.mandatory) + __set_bit(attr_bkey, method_elm->attr_mandatory); + + if (type == UVERBS_ATTR_TYPE_IDR || + type == UVERBS_ATTR_TYPE_FD) { + u8 access = elm->spec.u.obj.access; + + /* + * Verbs specs may only have one NEW/DESTROY, we don't + * have the infrastructure to abort multiple NEW's or + * cope with multiple DESTROY failure. + */ + if (access == UVERBS_ACCESS_NEW || + access == UVERBS_ACCESS_DESTROY) { + if (WARN_ON(single_uobj)) + return -EINVAL; + + single_uobj = true; + if (WARN_ON(!elm->spec.mandatory)) + return -EINVAL; + } + + if (access == UVERBS_ACCESS_DESTROY) + method_elm->destroy_bkey = attr_bkey; + } + + max_bkey = max(max_bkey, attr_bkey); + num_attrs++; + } + + method_elm->key_bitmap_len = max_bkey + 1; + WARN_ON(method_elm->key_bitmap_len > UVERBS_API_ATTR_BKEY_LEN); + + uapi_compute_bundle_size(method_elm, num_attrs); + return 0; +} + +static int uapi_finalize(struct uverbs_api *uapi) +{ + struct radix_tree_iter iter; + void __rcu **slot; + int rc; + + radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) { + struct uverbs_api_ioctl_method *method_elm = + rcu_dereference_protected(*slot, true); + + if (uapi_key_is_ioctl_method(iter.index)) { + rc = uapi_finalize_ioctl_method(uapi, method_elm, + iter.index); + if (rc) + return rc; + } + } + + return 0; +} + +void uverbs_destroy_api(struct uverbs_api *uapi) +{ + struct radix_tree_iter iter; + void __rcu **slot; + + if (!uapi) + return; + + radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) { + kfree(rcu_dereference_protected(*slot, true)); + radix_tree_iter_delete(&uapi->radix, &iter, slot); + } +} + +struct uverbs_api *uverbs_alloc_api( + const struct uverbs_object_tree_def *const *driver_specs, + enum rdma_driver_id driver_id) +{ + struct uverbs_api *uapi; + int rc; + + uapi = kzalloc(sizeof(*uapi), GFP_KERNEL); + if (!uapi) + return ERR_PTR(-ENOMEM); + + INIT_RADIX_TREE(&uapi->radix, GFP_KERNEL); + uapi->driver_id = driver_id; + + rc = uapi_merge_tree(uapi, uverbs_default_get_objects(), false); + if (rc) + goto err; + + for (; driver_specs && *driver_specs; driver_specs++) { + rc = uapi_merge_tree(uapi, *driver_specs, true); + if (rc) + goto err; + } + + rc = uapi_finalize(uapi); + if (rc) + goto err; + + return uapi; +err: + if (rc != -ENOMEM) + pr_err("Setup of uverbs_api failed, kernel parsing tree description is not valid (%d)??\n", + rc); + + uverbs_destroy_api(uapi); + return ERR_PTR(rc); +} + +/* + * The pre version is done before destroying the HW objects, it only blocks + * off method access. All methods that require the ib_dev or the module data + * must test one of these assignments prior to continuing. + */ +void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev) +{ + struct uverbs_api *uapi = uverbs_dev->uapi; + struct radix_tree_iter iter; + void __rcu **slot; + + rcu_assign_pointer(uverbs_dev->ib_dev, NULL); + + radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) { + if (uapi_key_is_ioctl_method(iter.index)) { + struct uverbs_api_ioctl_method *method_elm = + rcu_dereference_protected(*slot, true); + + if (method_elm->driver_method) + rcu_assign_pointer(method_elm->handler, NULL); + } + } + + synchronize_srcu(&uverbs_dev->disassociate_srcu); +} + +/* + * Called when a driver disassociates from the ib_uverbs_device. The + * assumption is that the driver module will unload after. Replace everything + * related to the driver with NULL as a safety measure. + */ +void uverbs_disassociate_api(struct uverbs_api *uapi) +{ + struct radix_tree_iter iter; + void __rcu **slot; + + radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) { + if (uapi_key_is_object(iter.index)) { + struct uverbs_api_object *object_elm = + rcu_dereference_protected(*slot, true); + + /* + * Some type_attrs are in the driver module. We don't + * bother to keep track of which since there should be + * no use of this after disassociate. + */ + object_elm->type_attrs = NULL; + } else if (uapi_key_is_attr(iter.index)) { + struct uverbs_api_attr *elm = + rcu_dereference_protected(*slot, true); + + if (elm->spec.type == UVERBS_ATTR_TYPE_ENUM_IN) + elm->spec.u2.enum_def.ids = NULL; + } + } +} diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 9d6beb948535..6ee03d6089eb 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -326,12 +326,162 @@ EXPORT_SYMBOL(ib_dealloc_pd); /* Address handles */ +/** + * rdma_copy_ah_attr - Copy rdma ah attribute from source to destination. + * @dest: Pointer to destination ah_attr. Contents of the destination + * pointer is assumed to be invalid and attribute are overwritten. + * @src: Pointer to source ah_attr. + */ +void rdma_copy_ah_attr(struct rdma_ah_attr *dest, + const struct rdma_ah_attr *src) +{ + *dest = *src; + if (dest->grh.sgid_attr) + rdma_hold_gid_attr(dest->grh.sgid_attr); +} +EXPORT_SYMBOL(rdma_copy_ah_attr); + +/** + * rdma_replace_ah_attr - Replace valid ah_attr with new new one. + * @old: Pointer to existing ah_attr which needs to be replaced. + * old is assumed to be valid or zero'd + * @new: Pointer to the new ah_attr. + * + * rdma_replace_ah_attr() first releases any reference in the old ah_attr if + * old the ah_attr is valid; after that it copies the new attribute and holds + * the reference to the replaced ah_attr. + */ +void rdma_replace_ah_attr(struct rdma_ah_attr *old, + const struct rdma_ah_attr *new) +{ + rdma_destroy_ah_attr(old); + *old = *new; + if (old->grh.sgid_attr) + rdma_hold_gid_attr(old->grh.sgid_attr); +} +EXPORT_SYMBOL(rdma_replace_ah_attr); + +/** + * rdma_move_ah_attr - Move ah_attr pointed by source to destination. + * @dest: Pointer to destination ah_attr to copy to. + * dest is assumed to be valid or zero'd + * @src: Pointer to the new ah_attr. + * + * rdma_move_ah_attr() first releases any reference in the destination ah_attr + * if it is valid. This also transfers ownership of internal references from + * src to dest, making src invalid in the process. No new reference of the src + * ah_attr is taken. + */ +void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src) +{ + rdma_destroy_ah_attr(dest); + *dest = *src; + src->grh.sgid_attr = NULL; +} +EXPORT_SYMBOL(rdma_move_ah_attr); + +/* + * Validate that the rdma_ah_attr is valid for the device before passing it + * off to the driver. + */ +static int rdma_check_ah_attr(struct ib_device *device, + struct rdma_ah_attr *ah_attr) +{ + if (!rdma_is_port_valid(device, ah_attr->port_num)) + return -EINVAL; + + if ((rdma_is_grh_required(device, ah_attr->port_num) || + ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) && + !(ah_attr->ah_flags & IB_AH_GRH)) + return -EINVAL; + + if (ah_attr->grh.sgid_attr) { + /* + * Make sure the passed sgid_attr is consistent with the + * parameters + */ + if (ah_attr->grh.sgid_attr->index != ah_attr->grh.sgid_index || + ah_attr->grh.sgid_attr->port_num != ah_attr->port_num) + return -EINVAL; + } + return 0; +} + +/* + * If the ah requires a GRH then ensure that sgid_attr pointer is filled in. + * On success the caller is responsible to call rdma_unfill_sgid_attr(). + */ +static int rdma_fill_sgid_attr(struct ib_device *device, + struct rdma_ah_attr *ah_attr, + const struct ib_gid_attr **old_sgid_attr) +{ + const struct ib_gid_attr *sgid_attr; + struct ib_global_route *grh; + int ret; + + *old_sgid_attr = ah_attr->grh.sgid_attr; + + ret = rdma_check_ah_attr(device, ah_attr); + if (ret) + return ret; + + if (!(ah_attr->ah_flags & IB_AH_GRH)) + return 0; + + grh = rdma_ah_retrieve_grh(ah_attr); + if (grh->sgid_attr) + return 0; + + sgid_attr = + rdma_get_gid_attr(device, ah_attr->port_num, grh->sgid_index); + if (IS_ERR(sgid_attr)) + return PTR_ERR(sgid_attr); + + /* Move ownerhip of the kref into the ah_attr */ + grh->sgid_attr = sgid_attr; + return 0; +} + +static void rdma_unfill_sgid_attr(struct rdma_ah_attr *ah_attr, + const struct ib_gid_attr *old_sgid_attr) +{ + /* + * Fill didn't change anything, the caller retains ownership of + * whatever it passed + */ + if (ah_attr->grh.sgid_attr == old_sgid_attr) + return; + + /* + * Otherwise, we need to undo what rdma_fill_sgid_attr so the caller + * doesn't see any change in the rdma_ah_attr. If we get here + * old_sgid_attr is NULL. + */ + rdma_destroy_ah_attr(ah_attr); +} + +static const struct ib_gid_attr * +rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr, + const struct ib_gid_attr *old_attr) +{ + if (old_attr) + rdma_put_gid_attr(old_attr); + if (ah_attr->ah_flags & IB_AH_GRH) { + rdma_hold_gid_attr(ah_attr->grh.sgid_attr); + return ah_attr->grh.sgid_attr; + } + return NULL; +} + static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, struct ib_udata *udata) { struct ib_ah *ah; + if (!pd->device->create_ah) + return ERR_PTR(-EOPNOTSUPP); + ah = pd->device->create_ah(pd, ah_attr, udata); if (!IS_ERR(ah)) { @@ -339,15 +489,38 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, ah->pd = pd; ah->uobject = NULL; ah->type = ah_attr->type; + ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL); + atomic_inc(&pd->usecnt); } return ah; } +/** + * rdma_create_ah - Creates an address handle for the + * given address vector. + * @pd: The protection domain associated with the address handle. + * @ah_attr: The attributes of the address vector. + * + * It returns 0 on success and returns appropriate error code on error. + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr) { - return _rdma_create_ah(pd, ah_attr, NULL); + const struct ib_gid_attr *old_sgid_attr; + struct ib_ah *ah; + int ret; + + ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr); + if (ret) + return ERR_PTR(ret); + + ah = _rdma_create_ah(pd, ah_attr, NULL); + + rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); + return ah; } EXPORT_SYMBOL(rdma_create_ah); @@ -368,15 +541,27 @@ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, struct ib_udata *udata) { + const struct ib_gid_attr *old_sgid_attr; + struct ib_ah *ah; int err; + err = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr); + if (err) + return ERR_PTR(err); + if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { err = ib_resolve_eth_dmac(pd->device, ah_attr); - if (err) - return ERR_PTR(err); + if (err) { + ah = ERR_PTR(err); + goto out; + } } - return _rdma_create_ah(pd, ah_attr, udata); + ah = _rdma_create_ah(pd, ah_attr, udata); + +out: + rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); + return ah; } EXPORT_SYMBOL(rdma_create_user_ah); @@ -455,16 +640,16 @@ static bool find_gid_index(const union ib_gid *gid, return true; } -static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num, - u16 vlan_id, const union ib_gid *sgid, - enum ib_gid_type gid_type, - u16 *gid_index) +static const struct ib_gid_attr * +get_sgid_attr_from_eth(struct ib_device *device, u8 port_num, + u16 vlan_id, const union ib_gid *sgid, + enum ib_gid_type gid_type) { struct find_gid_index_context context = {.vlan_id = vlan_id, .gid_type = gid_type}; - return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index, - &context, gid_index); + return rdma_find_gid_by_filter(device, sgid, port_num, find_gid_index, + &context); } int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr, @@ -508,39 +693,24 @@ EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr); static int ib_resolve_unicast_gid_dmac(struct ib_device *device, struct rdma_ah_attr *ah_attr) { - struct ib_gid_attr sgid_attr; - struct ib_global_route *grh; + struct ib_global_route *grh = rdma_ah_retrieve_grh(ah_attr); + const struct ib_gid_attr *sgid_attr = grh->sgid_attr; int hop_limit = 0xff; - union ib_gid sgid; - int ret; - - grh = rdma_ah_retrieve_grh(ah_attr); - - ret = ib_query_gid(device, - rdma_ah_get_port_num(ah_attr), - grh->sgid_index, - &sgid, &sgid_attr); - if (ret || !sgid_attr.ndev) { - if (!ret) - ret = -ENXIO; - return ret; - } + int ret = 0; /* If destination is link local and source GID is RoCEv1, * IP stack is not used. */ if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) && - sgid_attr.gid_type == IB_GID_TYPE_ROCE) { + sgid_attr->gid_type == IB_GID_TYPE_ROCE) { rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw, ah_attr->roce.dmac); - goto done; + return ret; } - ret = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid, + ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid, ah_attr->roce.dmac, - sgid_attr.ndev, &hop_limit); -done: - dev_put(sgid_attr.ndev); + sgid_attr->ndev, &hop_limit); grh->hop_limit = hop_limit; return ret; @@ -555,16 +725,18 @@ done: * as sgid and, sgid is used as dgid because sgid contains destinations * GID whom to respond to. * + * On success the caller is responsible to call rdma_destroy_ah_attr on the + * attr. */ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num, const struct ib_wc *wc, const struct ib_grh *grh, struct rdma_ah_attr *ah_attr) { u32 flow_class; - u16 gid_index; int ret; enum rdma_network_type net_type = RDMA_NETWORK_IB; enum ib_gid_type gid_type = IB_GID_TYPE_IB; + const struct ib_gid_attr *sgid_attr; int hoplimit = 0xff; union ib_gid dgid; union ib_gid sgid; @@ -595,72 +767,141 @@ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num, if (!(wc->wc_flags & IB_WC_GRH)) return -EPROTOTYPE; - ret = get_sgid_index_from_eth(device, port_num, - vlan_id, &dgid, - gid_type, &gid_index); - if (ret) - return ret; + sgid_attr = get_sgid_attr_from_eth(device, port_num, + vlan_id, &dgid, + gid_type); + if (IS_ERR(sgid_attr)) + return PTR_ERR(sgid_attr); flow_class = be32_to_cpu(grh->version_tclass_flow); - rdma_ah_set_grh(ah_attr, &sgid, - flow_class & 0xFFFFF, - (u8)gid_index, hoplimit, - (flow_class >> 20) & 0xFF); - return ib_resolve_unicast_gid_dmac(device, ah_attr); + rdma_move_grh_sgid_attr(ah_attr, + &sgid, + flow_class & 0xFFFFF, + hoplimit, + (flow_class >> 20) & 0xFF, + sgid_attr); + + ret = ib_resolve_unicast_gid_dmac(device, ah_attr); + if (ret) + rdma_destroy_ah_attr(ah_attr); + + return ret; } else { rdma_ah_set_dlid(ah_attr, wc->slid); rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits); - if (wc->wc_flags & IB_WC_GRH) { - if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) { - ret = ib_find_cached_gid_by_port(device, &dgid, - IB_GID_TYPE_IB, - port_num, NULL, - &gid_index); - if (ret) - return ret; - } else { - gid_index = 0; - } + if ((wc->wc_flags & IB_WC_GRH) == 0) + return 0; + + if (dgid.global.interface_id != + cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) { + sgid_attr = rdma_find_gid_by_port( + device, &dgid, IB_GID_TYPE_IB, port_num, NULL); + } else + sgid_attr = rdma_get_gid_attr(device, port_num, 0); - flow_class = be32_to_cpu(grh->version_tclass_flow); - rdma_ah_set_grh(ah_attr, &sgid, + if (IS_ERR(sgid_attr)) + return PTR_ERR(sgid_attr); + flow_class = be32_to_cpu(grh->version_tclass_flow); + rdma_move_grh_sgid_attr(ah_attr, + &sgid, flow_class & 0xFFFFF, - (u8)gid_index, hoplimit, - (flow_class >> 20) & 0xFF); - } + hoplimit, + (flow_class >> 20) & 0xFF, + sgid_attr); + return 0; } } EXPORT_SYMBOL(ib_init_ah_attr_from_wc); +/** + * rdma_move_grh_sgid_attr - Sets the sgid attribute of GRH, taking ownership + * of the reference + * + * @attr: Pointer to AH attribute structure + * @dgid: Destination GID + * @flow_label: Flow label + * @hop_limit: Hop limit + * @traffic_class: traffic class + * @sgid_attr: Pointer to SGID attribute + * + * This takes ownership of the sgid_attr reference. The caller must ensure + * rdma_destroy_ah_attr() is called before destroying the rdma_ah_attr after + * calling this function. + */ +void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid, + u32 flow_label, u8 hop_limit, u8 traffic_class, + const struct ib_gid_attr *sgid_attr) +{ + rdma_ah_set_grh(attr, dgid, flow_label, sgid_attr->index, hop_limit, + traffic_class); + attr->grh.sgid_attr = sgid_attr; +} +EXPORT_SYMBOL(rdma_move_grh_sgid_attr); + +/** + * rdma_destroy_ah_attr - Release reference to SGID attribute of + * ah attribute. + * @ah_attr: Pointer to ah attribute + * + * Release reference to the SGID attribute of the ah attribute if it is + * non NULL. It is safe to call this multiple times, and safe to call it on + * a zero initialized ah_attr. + */ +void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr) +{ + if (ah_attr->grh.sgid_attr) { + rdma_put_gid_attr(ah_attr->grh.sgid_attr); + ah_attr->grh.sgid_attr = NULL; + } +} +EXPORT_SYMBOL(rdma_destroy_ah_attr); + struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, const struct ib_grh *grh, u8 port_num) { struct rdma_ah_attr ah_attr; + struct ib_ah *ah; int ret; ret = ib_init_ah_attr_from_wc(pd->device, port_num, wc, grh, &ah_attr); if (ret) return ERR_PTR(ret); - return rdma_create_ah(pd, &ah_attr); + ah = rdma_create_ah(pd, &ah_attr); + + rdma_destroy_ah_attr(&ah_attr); + return ah; } EXPORT_SYMBOL(ib_create_ah_from_wc); int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) { + const struct ib_gid_attr *old_sgid_attr; + int ret; + if (ah->type != ah_attr->type) return -EINVAL; - return ah->device->modify_ah ? + ret = rdma_fill_sgid_attr(ah->device, ah_attr, &old_sgid_attr); + if (ret) + return ret; + + ret = ah->device->modify_ah ? ah->device->modify_ah(ah, ah_attr) : -EOPNOTSUPP; + + ah->sgid_attr = rdma_update_sgid_attr(ah_attr, ah->sgid_attr); + rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); + return ret; } EXPORT_SYMBOL(rdma_modify_ah); int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) { + ah_attr->grh.sgid_attr = NULL; + return ah->device->query_ah ? ah->device->query_ah(ah, ah_attr) : -EOPNOTSUPP; @@ -669,13 +910,17 @@ EXPORT_SYMBOL(rdma_query_ah); int rdma_destroy_ah(struct ib_ah *ah) { + const struct ib_gid_attr *sgid_attr = ah->sgid_attr; struct ib_pd *pd; int ret; pd = ah->pd; ret = ah->device->destroy_ah(ah); - if (!ret) + if (!ret) { atomic_dec(&pd->usecnt); + if (sgid_attr) + rdma_put_gid_attr(sgid_attr); + } return ret; } @@ -1290,16 +1535,19 @@ bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, } EXPORT_SYMBOL(ib_modify_qp_is_ok); +/** + * ib_resolve_eth_dmac - Resolve destination mac address + * @device: Device to consider + * @ah_attr: address handle attribute which describes the + * source and destination parameters + * ib_resolve_eth_dmac() resolves destination mac address and L3 hop limit It + * returns 0 on success or appropriate error code. It initializes the + * necessary ah_attr fields when call is successful. + */ static int ib_resolve_eth_dmac(struct ib_device *device, struct rdma_ah_attr *ah_attr) { - int ret = 0; - struct ib_global_route *grh; - - if (!rdma_is_port_valid(device, rdma_ah_get_port_num(ah_attr))) - return -EINVAL; - - grh = rdma_ah_retrieve_grh(ah_attr); + int ret = 0; if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) { if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) { @@ -1317,6 +1565,14 @@ static int ib_resolve_eth_dmac(struct ib_device *device, return ret; } +static bool is_qp_type_connected(const struct ib_qp *qp) +{ + return (qp->qp_type == IB_QPT_UC || + qp->qp_type == IB_QPT_RC || + qp->qp_type == IB_QPT_XRC_INI || + qp->qp_type == IB_QPT_XRC_TGT); +} + /** * IB core internal function to perform QP attributes modification. */ @@ -1324,8 +1580,53 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; + const struct ib_gid_attr *old_sgid_attr_av; + const struct ib_gid_attr *old_sgid_attr_alt_av; int ret; + if (attr_mask & IB_QP_AV) { + ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr, + &old_sgid_attr_av); + if (ret) + return ret; + } + if (attr_mask & IB_QP_ALT_PATH) { + /* + * FIXME: This does not track the migration state, so if the + * user loads a new alternate path after the HW has migrated + * from primary->alternate we will keep the wrong + * references. This is OK for IB because the reference + * counting does not serve any functional purpose. + */ + ret = rdma_fill_sgid_attr(qp->device, &attr->alt_ah_attr, + &old_sgid_attr_alt_av); + if (ret) + goto out_av; + + /* + * Today the core code can only handle alternate paths and APM + * for IB. Ban them in roce mode. + */ + if (!(rdma_protocol_ib(qp->device, + attr->alt_ah_attr.port_num) && + rdma_protocol_ib(qp->device, port))) { + ret = EINVAL; + goto out; + } + } + + /* + * If the user provided the qp_attr then we have to resolve it. Kernel + * users have to provide already resolved rdma_ah_attr's + */ + if (udata && (attr_mask & IB_QP_AV) && + attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE && + is_qp_type_connected(qp)) { + ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); + if (ret) + goto out; + } + if (rdma_ib_or_roce(qp->device, port)) { if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) { pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n", @@ -1341,20 +1642,27 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, } ret = ib_security_modify_qp(qp, attr, attr_mask, udata); - if (!ret && (attr_mask & IB_QP_PORT)) - qp->port = attr->port_num; + if (ret) + goto out; + if (attr_mask & IB_QP_PORT) + qp->port = attr->port_num; + if (attr_mask & IB_QP_AV) + qp->av_sgid_attr = + rdma_update_sgid_attr(&attr->ah_attr, qp->av_sgid_attr); + if (attr_mask & IB_QP_ALT_PATH) + qp->alt_path_sgid_attr = rdma_update_sgid_attr( + &attr->alt_ah_attr, qp->alt_path_sgid_attr); + +out: + if (attr_mask & IB_QP_ALT_PATH) + rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av); +out_av: + if (attr_mask & IB_QP_AV) + rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av); return ret; } -static bool is_qp_type_connected(const struct ib_qp *qp) -{ - return (qp->qp_type == IB_QPT_UC || - qp->qp_type == IB_QPT_RC || - qp->qp_type == IB_QPT_XRC_INI || - qp->qp_type == IB_QPT_XRC_TGT); -} - /** * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. * @ib_qp: The QP to modify. @@ -1369,17 +1677,7 @@ static bool is_qp_type_connected(const struct ib_qp *qp) int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { - struct ib_qp *qp = ib_qp->real_qp; - int ret; - - if (attr_mask & IB_QP_AV && - attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE && - is_qp_type_connected(qp)) { - ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); - if (ret) - return ret; - } - return _ib_modify_qp(qp, attr, attr_mask, udata); + return _ib_modify_qp(ib_qp->real_qp, attr, attr_mask, udata); } EXPORT_SYMBOL(ib_modify_qp_with_udata); @@ -1451,6 +1749,9 @@ int ib_query_qp(struct ib_qp *qp, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { + qp_attr->ah_attr.grh.sgid_attr = NULL; + qp_attr->alt_ah_attr.grh.sgid_attr = NULL; + return qp->device->query_qp ? qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) : -EOPNOTSUPP; @@ -1509,6 +1810,8 @@ static int __ib_destroy_shared_qp(struct ib_qp *qp) int ib_destroy_qp(struct ib_qp *qp) { + const struct ib_gid_attr *alt_path_sgid_attr = qp->alt_path_sgid_attr; + const struct ib_gid_attr *av_sgid_attr = qp->av_sgid_attr; struct ib_pd *pd; struct ib_cq *scq, *rcq; struct ib_srq *srq; @@ -1539,6 +1842,10 @@ int ib_destroy_qp(struct ib_qp *qp) rdma_restrack_del(&qp->res); ret = qp->device->destroy_qp(qp); if (!ret) { + if (alt_path_sgid_attr) + rdma_put_gid_attr(alt_path_sgid_attr); + if (av_sgid_attr) + rdma_put_gid_attr(av_sgid_attr); if (pd) atomic_dec(&pd->usecnt); if (scq) @@ -1977,35 +2284,6 @@ int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table) } EXPORT_SYMBOL(ib_destroy_rwq_ind_table); -struct ib_flow *ib_create_flow(struct ib_qp *qp, - struct ib_flow_attr *flow_attr, - int domain) -{ - struct ib_flow *flow_id; - if (!qp->device->create_flow) - return ERR_PTR(-EOPNOTSUPP); - - flow_id = qp->device->create_flow(qp, flow_attr, domain, NULL); - if (!IS_ERR(flow_id)) { - atomic_inc(&qp->usecnt); - flow_id->qp = qp; - } - return flow_id; -} -EXPORT_SYMBOL(ib_create_flow); - -int ib_destroy_flow(struct ib_flow *flow_id) -{ - int err; - struct ib_qp *qp = flow_id->qp; - - err = qp->device->destroy_flow(flow_id); - if (!err) - atomic_dec(&qp->usecnt); - return err; -} -EXPORT_SYMBOL(ib_destroy_flow); - int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status) { @@ -2200,7 +2478,6 @@ static void __ib_drain_sq(struct ib_qp *qp) struct ib_cq *cq = qp->send_cq; struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; struct ib_drain_cqe sdrain; - struct ib_send_wr *bad_swr; struct ib_rdma_wr swr = { .wr = { .next = NULL, @@ -2219,7 +2496,7 @@ static void __ib_drain_sq(struct ib_qp *qp) sdrain.cqe.done = ib_drain_qp_done; init_completion(&sdrain.done); - ret = ib_post_send(qp, &swr.wr, &bad_swr); + ret = ib_post_send(qp, &swr.wr, NULL); if (ret) { WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); return; @@ -2240,7 +2517,7 @@ static void __ib_drain_rq(struct ib_qp *qp) struct ib_cq *cq = qp->recv_cq; struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; struct ib_drain_cqe rdrain; - struct ib_recv_wr rwr = {}, *bad_rwr; + struct ib_recv_wr rwr = {}; int ret; ret = ib_modify_qp(qp, &attr, IB_QP_STATE); @@ -2253,7 +2530,7 @@ static void __ib_drain_rq(struct ib_qp *qp) rdrain.cqe.done = ib_drain_qp_done; init_completion(&rdrain.done); - ret = ib_post_recv(qp, &rwr, &bad_rwr); + ret = ib_post_recv(qp, &rwr, NULL); if (ret) { WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); return; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index a76e206704d4..bbfb86eb2d24 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -166,7 +166,8 @@ int bnxt_re_query_device(struct ib_device *ibdev, | IB_DEVICE_MEM_WINDOW | IB_DEVICE_MEM_WINDOW_TYPE_2B | IB_DEVICE_MEM_MGT_EXTENSIONS; - ib_attr->max_sge = dev_attr->max_qp_sges; + ib_attr->max_send_sge = dev_attr->max_qp_sges; + ib_attr->max_recv_sge = dev_attr->max_qp_sges; ib_attr->max_sge_rd = dev_attr->max_qp_sges; ib_attr->max_cq = dev_attr->max_cq; ib_attr->max_cqe = dev_attr->max_cq_wqes; @@ -243,8 +244,8 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, port_attr->gid_tbl_len = dev_attr->max_sgid; port_attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | IB_PORT_DEVICE_MGMT_SUP | - IB_PORT_VENDOR_CLASS_SUP | - IB_PORT_IP_BASED_GIDS; + IB_PORT_VENDOR_CLASS_SUP; + port_attr->ip_gids = true; port_attr->max_msg_sz = (u32)BNXT_RE_MAX_MR_SIZE_LOW; port_attr->bad_pkey_cntr = 0; @@ -364,8 +365,7 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context) return rc; } -int bnxt_re_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, void **context) +int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context) { int rc; u32 tbl_idx = 0; @@ -377,7 +377,7 @@ int bnxt_re_add_gid(const union ib_gid *gid, if ((attr->ndev) && is_vlan_dev(attr->ndev)) vlan_id = vlan_dev_vlan_id(attr->ndev); - rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)gid, + rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)&attr->gid, rdev->qplib_res.netdev->dev_addr, vlan_id, true, &tbl_idx); if (rc == -EALREADY) { @@ -673,8 +673,6 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, int rc; u8 nw_type; - struct ib_gid_attr sgid_attr; - if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) { dev_err(rdev_to_dev(rdev), "Failed to alloc AH: GRH not set"); return ERR_PTR(-EINVAL); @@ -705,20 +703,11 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, grh->dgid.raw) && !rdma_link_local_addr((struct in6_addr *) grh->dgid.raw)) { - union ib_gid sgid; + const struct ib_gid_attr *sgid_attr; - rc = ib_get_cached_gid(&rdev->ibdev, 1, - grh->sgid_index, &sgid, - &sgid_attr); - if (rc) { - dev_err(rdev_to_dev(rdev), - "Failed to query gid at index %d", - grh->sgid_index); - goto fail; - } - dev_put(sgid_attr.ndev); + sgid_attr = grh->sgid_attr; /* Get network header type for this GID */ - nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); + nw_type = rdma_gid_attr_network_type(sgid_attr); switch (nw_type) { case RDMA_NETWORK_IPV4: ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V2IPV4; @@ -1408,7 +1397,7 @@ struct ib_srq *bnxt_re_create_srq(struct ib_pd *ib_pd, } if (srq_init_attr->srq_type != IB_SRQT_BASIC) { - rc = -ENOTSUPP; + rc = -EOPNOTSUPP; goto exit; } @@ -1530,8 +1519,8 @@ int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr) return 0; } -int bnxt_re_post_srq_recv(struct ib_srq *ib_srq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int bnxt_re_post_srq_recv(struct ib_srq *ib_srq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq); @@ -1599,9 +1588,6 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; enum ib_qp_state curr_qp_state, new_qp_state; int rc, entries; - int status; - union ib_gid sgid; - struct ib_gid_attr sgid_attr; unsigned int flags; u8 nw_type; @@ -1668,6 +1654,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, if (qp_attr_mask & IB_QP_AV) { const struct ib_global_route *grh = rdma_ah_read_grh(&qp_attr->ah_attr); + const struct ib_gid_attr *sgid_attr; qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_DGID | CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL | @@ -1691,29 +1678,23 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, ether_addr_copy(qp->qplib_qp.ah.dmac, qp_attr->ah_attr.roce.dmac); - status = ib_get_cached_gid(&rdev->ibdev, 1, - grh->sgid_index, - &sgid, &sgid_attr); - if (!status) { - memcpy(qp->qplib_qp.smac, sgid_attr.ndev->dev_addr, - ETH_ALEN); - dev_put(sgid_attr.ndev); - nw_type = ib_gid_to_network_type(sgid_attr.gid_type, - &sgid); - switch (nw_type) { - case RDMA_NETWORK_IPV4: - qp->qplib_qp.nw_type = - CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4; - break; - case RDMA_NETWORK_IPV6: - qp->qplib_qp.nw_type = - CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6; - break; - default: - qp->qplib_qp.nw_type = - CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1; - break; - } + sgid_attr = qp_attr->ah_attr.grh.sgid_attr; + memcpy(qp->qplib_qp.smac, sgid_attr->ndev->dev_addr, + ETH_ALEN); + nw_type = rdma_gid_attr_network_type(sgid_attr); + switch (nw_type) { + case RDMA_NETWORK_IPV4: + qp->qplib_qp.nw_type = + CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4; + break; + case RDMA_NETWORK_IPV6: + qp->qplib_qp.nw_type = + CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6; + break; + default: + qp->qplib_qp.nw_type = + CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1; + break; } } @@ -1895,19 +1876,17 @@ out: /* Routine for sending QP1 packets for RoCE V1 an V2 */ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, - struct ib_send_wr *wr, + const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe, int payload_size) { - struct ib_device *ibdev = &qp->rdev->ibdev; struct bnxt_re_ah *ah = container_of(ud_wr(wr)->ah, struct bnxt_re_ah, ib_ah); struct bnxt_qplib_ah *qplib_ah = &ah->qplib_ah; + const struct ib_gid_attr *sgid_attr = ah->ib_ah.sgid_attr; struct bnxt_qplib_sge sge; - union ib_gid sgid; u8 nw_type; u16 ether_type; - struct ib_gid_attr sgid_attr; union ib_gid dgid; bool is_eth = false; bool is_vlan = false; @@ -1920,22 +1899,10 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, memset(&qp->qp1_hdr, 0, sizeof(qp->qp1_hdr)); - rc = ib_get_cached_gid(ibdev, 1, - qplib_ah->host_sgid_index, &sgid, - &sgid_attr); - if (rc) { - dev_err(rdev_to_dev(qp->rdev), - "Failed to query gid at index %d", - qplib_ah->host_sgid_index); - return rc; - } - if (sgid_attr.ndev) { - if (is_vlan_dev(sgid_attr.ndev)) - vlan_id = vlan_dev_vlan_id(sgid_attr.ndev); - dev_put(sgid_attr.ndev); - } + if (is_vlan_dev(sgid_attr->ndev)) + vlan_id = vlan_dev_vlan_id(sgid_attr->ndev); /* Get network header type for this GID */ - nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); + nw_type = rdma_gid_attr_network_type(sgid_attr); switch (nw_type) { case RDMA_NETWORK_IPV4: nw_type = BNXT_RE_ROCEV2_IPV4_PACKET; @@ -1948,9 +1915,9 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, break; } memcpy(&dgid.raw, &qplib_ah->dgid, 16); - is_udp = sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP; + is_udp = sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP; if (is_udp) { - if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) { + if (ipv6_addr_v4mapped((struct in6_addr *)&sgid_attr->gid)) { ip_version = 4; ether_type = ETH_P_IP; } else { @@ -1983,9 +1950,10 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, } if (is_grh || (ip_version == 6)) { - memcpy(qp->qp1_hdr.grh.source_gid.raw, sgid.raw, sizeof(sgid)); + memcpy(qp->qp1_hdr.grh.source_gid.raw, sgid_attr->gid.raw, + sizeof(sgid_attr->gid)); memcpy(qp->qp1_hdr.grh.destination_gid.raw, qplib_ah->dgid.data, - sizeof(sgid)); + sizeof(sgid_attr->gid)); qp->qp1_hdr.grh.hop_limit = qplib_ah->hop_limit; } @@ -1995,7 +1963,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, qp->qp1_hdr.ip4.frag_off = htons(IP_DF); qp->qp1_hdr.ip4.ttl = qplib_ah->hop_limit; - memcpy(&qp->qp1_hdr.ip4.saddr, sgid.raw + 12, 4); + memcpy(&qp->qp1_hdr.ip4.saddr, sgid_attr->gid.raw + 12, 4); memcpy(&qp->qp1_hdr.ip4.daddr, qplib_ah->dgid.data + 12, 4); qp->qp1_hdr.ip4.check = ib_ud_ip4_csum(&qp->qp1_hdr); } @@ -2080,7 +2048,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, * and the MAD datagram out to the provided SGE. */ static int bnxt_re_build_qp1_shadow_qp_recv(struct bnxt_re_qp *qp, - struct ib_recv_wr *wr, + const struct ib_recv_wr *wr, struct bnxt_qplib_swqe *wqe, int payload_size) { @@ -2125,7 +2093,7 @@ static int is_ud_qp(struct bnxt_re_qp *qp) } static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp, - struct ib_send_wr *wr, + const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { struct bnxt_re_ah *ah = NULL; @@ -2163,7 +2131,7 @@ static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp, return 0; } -static int bnxt_re_build_rdma_wqe(struct ib_send_wr *wr, +static int bnxt_re_build_rdma_wqe(const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { switch (wr->opcode) { @@ -2195,7 +2163,7 @@ static int bnxt_re_build_rdma_wqe(struct ib_send_wr *wr, return 0; } -static int bnxt_re_build_atomic_wqe(struct ib_send_wr *wr, +static int bnxt_re_build_atomic_wqe(const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { switch (wr->opcode) { @@ -2222,7 +2190,7 @@ static int bnxt_re_build_atomic_wqe(struct ib_send_wr *wr, return 0; } -static int bnxt_re_build_inv_wqe(struct ib_send_wr *wr, +static int bnxt_re_build_inv_wqe(const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV; @@ -2241,7 +2209,7 @@ static int bnxt_re_build_inv_wqe(struct ib_send_wr *wr, return 0; } -static int bnxt_re_build_reg_wqe(struct ib_reg_wr *wr, +static int bnxt_re_build_reg_wqe(const struct ib_reg_wr *wr, struct bnxt_qplib_swqe *wqe) { struct bnxt_re_mr *mr = container_of(wr->mr, struct bnxt_re_mr, ib_mr); @@ -2283,7 +2251,7 @@ static int bnxt_re_build_reg_wqe(struct ib_reg_wr *wr, } static int bnxt_re_copy_inline_data(struct bnxt_re_dev *rdev, - struct ib_send_wr *wr, + const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { /* Copy the inline data to the data field */ @@ -2313,7 +2281,7 @@ static int bnxt_re_copy_inline_data(struct bnxt_re_dev *rdev, } static int bnxt_re_copy_wr_payload(struct bnxt_re_dev *rdev, - struct ib_send_wr *wr, + const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { int payload_sz = 0; @@ -2345,7 +2313,7 @@ static void bnxt_ud_qp_hw_stall_workaround(struct bnxt_re_qp *qp) static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { struct bnxt_qplib_swqe wqe; int rc = 0, payload_sz = 0; @@ -2393,8 +2361,8 @@ bad: return rc; } -int bnxt_re_post_send(struct ib_qp *ib_qp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); struct bnxt_qplib_swqe wqe; @@ -2441,7 +2409,7 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, struct ib_send_wr *wr, default: break; } - /* Fall thru to build the wqe */ + /* fall through */ case IB_WR_SEND_WITH_INV: rc = bnxt_re_build_send_wqe(qp, wr, &wqe); break; @@ -2493,7 +2461,7 @@ bad: static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp, - struct ib_recv_wr *wr) + const struct ib_recv_wr *wr) { struct bnxt_qplib_swqe wqe; int rc = 0; @@ -2526,8 +2494,8 @@ static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev, return rc; } -int bnxt_re_post_recv(struct ib_qp *ib_qp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); struct bnxt_qplib_swqe wqe; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 5c6414cad4af..aa33e7b82c84 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -158,8 +158,7 @@ void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str); int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index, u16 *pkey); int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context); -int bnxt_re_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, void **context); +int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context); int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num, int index, union ib_gid *gid); enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev, @@ -182,8 +181,8 @@ int bnxt_re_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, struct ib_udata *udata); int bnxt_re_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); int bnxt_re_destroy_srq(struct ib_srq *srq); -int bnxt_re_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr); +int bnxt_re_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr); struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr, struct ib_udata *udata); @@ -192,10 +191,10 @@ int bnxt_re_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int bnxt_re_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int bnxt_re_destroy_qp(struct ib_qp *qp); -int bnxt_re_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr); -int bnxt_re_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr); +int bnxt_re_post_send(struct ib_qp *qp, const struct ib_send_wr *send_wr, + const struct ib_send_wr **bad_send_wr); +int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr); struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 50d8f1fc98d5..e426b990c1dd 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -2354,7 +2354,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, srq = qp->srq; if (!srq) return -EINVAL; - if (wr_id_idx > srq->hwq.max_elements) { + if (wr_id_idx >= srq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process RC "); dev_err(&cq->hwq.pdev->dev, @@ -2369,7 +2369,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, *pcqe = cqe; } else { rq = &qp->rq; - if (wr_id_idx > rq->hwq.max_elements) { + if (wr_id_idx >= rq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process RC "); dev_err(&cq->hwq.pdev->dev, @@ -2437,7 +2437,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, if (!srq) return -EINVAL; - if (wr_id_idx > srq->hwq.max_elements) { + if (wr_id_idx >= srq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process UD "); dev_err(&cq->hwq.pdev->dev, @@ -2452,7 +2452,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, *pcqe = cqe; } else { rq = &qp->rq; - if (wr_id_idx > rq->hwq.max_elements) { + if (wr_id_idx >= rq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process UD "); dev_err(&cq->hwq.pdev->dev, @@ -2546,7 +2546,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, "QPLIB: FP: SRQ used but not defined??"); return -EINVAL; } - if (wr_id_idx > srq->hwq.max_elements) { + if (wr_id_idx >= srq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process Raw/QP1 "); dev_err(&cq->hwq.pdev->dev, @@ -2561,7 +2561,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, *pcqe = cqe; } else { rq = &qp->rq; - if (wr_id_idx > rq->hwq.max_elements) { + if (wr_id_idx >= rq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process Raw/QP1 RQ wr_id "); dev_err(&cq->hwq.pdev->dev, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 2f3f32eaa1d5..4097f3fa25c5 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -197,7 +197,7 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res, struct bnxt_qplib_sgid_tbl *sgid_tbl, int index, struct bnxt_qplib_gid *gid) { - if (index > sgid_tbl->max) { + if (index >= sgid_tbl->max) { dev_err(&res->pdev->dev, "QPLIB: Index %d exceeded SGID table max (%d)", index, sgid_tbl->max); @@ -402,7 +402,7 @@ int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res, *pkey = 0xFFFF; return 0; } - if (index > pkey_tbl->max) { + if (index >= pkey_tbl->max) { dev_err(&res->pdev->dev, "QPLIB: Index %d exceeded PKEY table max (%d)", index, pkey_tbl->max); diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c index 0a8542c20804..a098c0140580 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cq.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c @@ -32,38 +32,16 @@ #include "iwch_provider.h" #include "iwch.h" -/* - * Get one cq entry from cxio and map it to openib. - * - * Returns: - * 0 EMPTY; - * 1 cqe returned - * -EAGAIN caller must try again - * any other -errno fatal error - */ -static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, - struct ib_wc *wc) +static int __iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, + struct iwch_qp *qhp, struct ib_wc *wc) { - struct iwch_qp *qhp = NULL; - struct t3_cqe cqe, *rd_cqe; - struct t3_wq *wq; + struct t3_wq *wq = qhp ? &qhp->wq : NULL; + struct t3_cqe cqe; u32 credit = 0; u8 cqe_flushed; u64 cookie; int ret = 1; - rd_cqe = cxio_next_cqe(&chp->cq); - - if (!rd_cqe) - return 0; - - qhp = get_qhp(rhp, CQE_QPID(*rd_cqe)); - if (!qhp) - wq = NULL; - else { - spin_lock(&qhp->lock); - wq = &(qhp->wq); - } ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit); if (t3a_device(chp->rhp) && credit) { @@ -79,7 +57,7 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, ret = 1; wc->wr_id = cookie; - wc->qp = &qhp->ibqp; + wc->qp = qhp ? &qhp->ibqp : NULL; wc->vendor_err = CQE_STATUS(cqe); wc->wc_flags = 0; @@ -182,8 +160,38 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, } } out: - if (wq) + return ret; +} + +/* + * Get one cq entry from cxio and map it to openib. + * + * Returns: + * 0 EMPTY; + * 1 cqe returned + * -EAGAIN caller must try again + * any other -errno fatal error + */ +static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, + struct ib_wc *wc) +{ + struct iwch_qp *qhp; + struct t3_cqe *rd_cqe; + int ret; + + rd_cqe = cxio_next_cqe(&chp->cq); + + if (!rd_cqe) + return 0; + + qhp = get_qhp(rhp, CQE_QPID(*rd_cqe)); + if (qhp) { + spin_lock(&qhp->lock); + ret = __iwch_poll_cq_one(rhp, chp, qhp, wc); spin_unlock(&qhp->lock); + } else { + ret = __iwch_poll_cq_one(rhp, chp, NULL, wc); + } return ret; } diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index be097c6723c0..1b9ff21aa1d5 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -61,42 +61,6 @@ #include <rdma/cxgb3-abi.h> #include "common.h" -static struct ib_ah *iwch_ah_create(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - struct ib_udata *udata) -{ - return ERR_PTR(-ENOSYS); -} - -static int iwch_ah_destroy(struct ib_ah *ah) -{ - return -ENOSYS; -} - -static int iwch_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - return -ENOSYS; -} - -static int iwch_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - return -ENOSYS; -} - -static int iwch_process_mad(struct ib_device *ibdev, - int mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, - size_t in_mad_size, - struct ib_mad_hdr *out_mad, - size_t *out_mad_size, - u16 *out_mad_pkey_index) -{ - return -ENOSYS; -} - static int iwch_dealloc_ucontext(struct ib_ucontext *context) { struct iwch_dev *rhp = to_iwch_dev(context->device); @@ -1103,7 +1067,8 @@ static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *pro props->max_mr_size = dev->attr.max_mr_size; props->max_qp = dev->attr.max_qps; props->max_qp_wr = dev->attr.max_wrs; - props->max_sge = dev->attr.max_sge_per_wr; + props->max_send_sge = dev->attr.max_sge_per_wr; + props->max_recv_sge = dev->attr.max_sge_per_wr; props->max_sge_rd = 1; props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp; props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp; @@ -1398,8 +1363,6 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.mmap = iwch_mmap; dev->ibdev.alloc_pd = iwch_allocate_pd; dev->ibdev.dealloc_pd = iwch_deallocate_pd; - dev->ibdev.create_ah = iwch_ah_create; - dev->ibdev.destroy_ah = iwch_ah_destroy; dev->ibdev.create_qp = iwch_create_qp; dev->ibdev.modify_qp = iwch_ib_modify_qp; dev->ibdev.destroy_qp = iwch_destroy_qp; @@ -1414,9 +1377,6 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.dealloc_mw = iwch_dealloc_mw; dev->ibdev.alloc_mr = iwch_alloc_mr; dev->ibdev.map_mr_sg = iwch_map_mr_sg; - dev->ibdev.attach_mcast = iwch_multicast_attach; - dev->ibdev.detach_mcast = iwch_multicast_detach; - dev->ibdev.process_mad = iwch_process_mad; dev->ibdev.req_notify_cq = iwch_arm_cq; dev->ibdev.post_send = iwch_post_send; dev->ibdev.post_recv = iwch_post_receive; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h index 2e38ddefea8a..8adbe9658935 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -326,10 +326,10 @@ enum iwch_qp_query_flags { }; u16 iwch_rqes_posted(struct iwch_qp *qhp); -int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg); int iwch_post_zb_read(struct iwch_ep *ep); diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 3871e1fd8395..c649faad63f9 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -39,8 +39,8 @@ #define NO_SUPPORT -1 -static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, - u8 * flit_cnt) +static int build_rdma_send(union t3_wr *wqe, const struct ib_send_wr *wr, + u8 *flit_cnt) { int i; u32 plen; @@ -84,8 +84,8 @@ static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, - u8 *flit_cnt) +static int build_rdma_write(union t3_wr *wqe, const struct ib_send_wr *wr, + u8 *flit_cnt) { int i; u32 plen; @@ -125,8 +125,8 @@ static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, - u8 *flit_cnt) +static int build_rdma_read(union t3_wr *wqe, const struct ib_send_wr *wr, + u8 *flit_cnt) { if (wr->num_sge > 1) return -EINVAL; @@ -146,8 +146,8 @@ static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -static int build_memreg(union t3_wr *wqe, struct ib_reg_wr *wr, - u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) +static int build_memreg(union t3_wr *wqe, const struct ib_reg_wr *wr, + u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) { struct iwch_mr *mhp = to_iwch_mr(wr->mr); int i; @@ -189,8 +189,8 @@ static int build_memreg(union t3_wr *wqe, struct ib_reg_wr *wr, return 0; } -static int build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr, - u8 *flit_cnt) +static int build_inv_stag(union t3_wr *wqe, const struct ib_send_wr *wr, + u8 *flit_cnt) { wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey); wqe->local_inv.reserved = 0; @@ -246,7 +246,7 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, } static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe, - struct ib_recv_wr *wr) + const struct ib_recv_wr *wr) { int i, err = 0; u32 pbl_addr[T3_MAX_SGE]; @@ -286,7 +286,7 @@ static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe, } static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe, - struct ib_recv_wr *wr) + const struct ib_recv_wr *wr) { int i; u32 pbl_addr; @@ -348,8 +348,8 @@ static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe, return 0; } -int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { int err = 0; u8 uninitialized_var(t3_wr_flit_cnt); @@ -463,8 +463,8 @@ out: return err; } -int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { int err = 0; struct iwch_qp *qhp; diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 0912fa026327..0f83cbec33f3 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -587,24 +587,29 @@ static int send_flowc(struct c4iw_ep *ep) { struct fw_flowc_wr *flowc; struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); - int i; u16 vlan = ep->l2t->vlan; int nparams; + int flowclen, flowclen16; if (WARN_ON(!skb)) return -ENOMEM; if (vlan == CPL_L2T_VLAN_NONE) - nparams = 8; - else nparams = 9; + else + nparams = 10; - flowc = __skb_put(skb, FLOWC_LEN); + flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]); + flowclen16 = DIV_ROUND_UP(flowclen, 16); + flowclen = flowclen16 * 16; + + flowc = __skb_put(skb, flowclen); + memset(flowc, 0, flowclen); flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | FW_FLOWC_WR_NPARAMS_V(nparams)); - flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(FLOWC_LEN, - 16)) | FW_WR_FLOWID_V(ep->hwtid)); + flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) | + FW_WR_FLOWID_V(ep->hwtid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V @@ -623,21 +628,13 @@ static int send_flowc(struct c4iw_ep *ep) flowc->mnemval[6].val = cpu_to_be32(ep->snd_win); flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; flowc->mnemval[7].val = cpu_to_be32(ep->emss); - if (nparams == 9) { + flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_RCV_SCALE; + flowc->mnemval[8].val = cpu_to_be32(ep->snd_wscale); + if (nparams == 10) { u16 pri; - pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; - flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; - flowc->mnemval[8].val = cpu_to_be32(pri); - } else { - /* Pad WR to 16 byte boundary */ - flowc->mnemval[8].mnemonic = 0; - flowc->mnemval[8].val = 0; - } - for (i = 0; i < 9; i++) { - flowc->mnemval[i].r4[0] = 0; - flowc->mnemval[i].r4[1] = 0; - flowc->mnemval[i].r4[2] = 0; + flowc->mnemval[9].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; + flowc->mnemval[9].val = cpu_to_be32(pri); } set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); @@ -1176,6 +1173,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *ep; struct cpl_act_establish *req = cplhdr(skb); + unsigned short tcp_opt = ntohs(req->tcp_opt); unsigned int tid = GET_TID(req); unsigned int atid = TID_TID_G(ntohl(req->tos_atid)); struct tid_info *t = dev->rdev.lldi.tids; @@ -1196,8 +1194,9 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) ep->snd_seq = be32_to_cpu(req->snd_isn); ep->rcv_seq = be32_to_cpu(req->rcv_isn); + ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt); - set_emss(ep, ntohs(req->tcp_opt)); + set_emss(ep, tcp_opt); /* dealloc the atid */ remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid); @@ -1853,10 +1852,33 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } +static void complete_cached_srq_buffers(struct c4iw_ep *ep, + __be32 srqidx_status) +{ + enum chip_type adapter_type; + u32 srqidx; + + adapter_type = ep->com.dev->rdev.lldi.adapter_type; + srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(srqidx_status)); + + /* + * If this TCB had a srq buffer cached, then we must complete + * it. For user mode, that means saving the srqidx in the + * user/kernel status page for this qp. For kernel mode, just + * synthesize the CQE now. + */ + if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) { + if (ep->com.qp->ibqp.uobject) + t4_set_wq_in_error(&ep->com.qp->wq, srqidx); + else + c4iw_flush_srqidx(ep->com.qp, srqidx); + } +} + static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *ep; - struct cpl_abort_rpl_rss *rpl = cplhdr(skb); + struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb); int release = 0; unsigned int tid = GET_TID(rpl); @@ -1865,6 +1887,9 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) pr_warn("Abort rpl to freed endpoint\n"); return 0; } + + complete_cached_srq_buffers(ep, rpl->srqidx_status); + pr_debug("ep %p tid %u\n", ep, ep->hwtid); mutex_lock(&ep->com.mutex); switch (ep->com.state) { @@ -2603,16 +2628,17 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) struct cpl_pass_establish *req = cplhdr(skb); unsigned int tid = GET_TID(req); int ret; + u16 tcp_opt = ntohs(req->tcp_opt); ep = get_ep_from_tid(dev, tid); pr_debug("ep %p tid %u\n", ep, ep->hwtid); ep->snd_seq = be32_to_cpu(req->snd_isn); ep->rcv_seq = be32_to_cpu(req->rcv_isn); + ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt); - pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid, - ntohs(req->tcp_opt)); + pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid, tcp_opt); - set_emss(ep, ntohs(req->tcp_opt)); + set_emss(ep, tcp_opt); dst_confirm(ep->dst); mutex_lock(&ep->com.mutex); @@ -2719,28 +2745,35 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) { - struct cpl_abort_req_rss *req = cplhdr(skb); + struct cpl_abort_req_rss6 *req = cplhdr(skb); struct c4iw_ep *ep; struct sk_buff *rpl_skb; struct c4iw_qp_attributes attrs; int ret; int release = 0; unsigned int tid = GET_TID(req); + u8 status; + u32 len = roundup(sizeof(struct cpl_abort_rpl), 16); ep = get_ep_from_tid(dev, tid); if (!ep) return 0; - if (cxgb_is_neg_adv(req->status)) { + status = ABORT_RSS_STATUS_G(be32_to_cpu(req->srqidx_status)); + + if (cxgb_is_neg_adv(status)) { pr_debug("Negative advice on abort- tid %u status %d (%s)\n", - ep->hwtid, req->status, neg_adv_str(req->status)); + ep->hwtid, status, neg_adv_str(status)); ep->stats.abort_neg_adv++; mutex_lock(&dev->rdev.stats.lock); dev->rdev.stats.neg_adv++; mutex_unlock(&dev->rdev.stats.lock); goto deref_ep; } + + complete_cached_srq_buffers(ep, req->srqidx_status); + pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, ep->com.state); set_bit(PEER_ABORT, &ep->com.history); @@ -3444,9 +3477,6 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) } insert_handle(dev, &dev->stid_idr, ep, ep->stid); - memcpy(&ep->com.local_addr, &cm_id->m_local_addr, - sizeof(ep->com.local_addr)); - state_set(&ep->com, LISTEN); if (ep->com.local_addr.ss_family == AF_INET) err = create_server4(dev, ep); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 2be2e1ac1b5f..6d3042794094 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -77,6 +77,10 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, int user = (uctx != &rdev->uctx); int ret; struct sk_buff *skb; + struct c4iw_ucontext *ucontext = NULL; + + if (user) + ucontext = container_of(uctx, struct c4iw_ucontext, uctx); cq->cqid = c4iw_get_cqid(rdev, uctx); if (!cq->cqid) { @@ -100,6 +104,16 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, dma_unmap_addr_set(cq, mapping, cq->dma_addr); memset(cq->queue, 0, cq->memsize); + if (user && ucontext->is_32b_cqe) { + cq->qp_errp = &((struct t4_status_page *) + ((u8 *)cq->queue + (cq->size - 1) * + (sizeof(*cq->queue) / 2)))->qp_err; + } else { + cq->qp_errp = &((struct t4_status_page *) + ((u8 *)cq->queue + (cq->size - 1) * + sizeof(*cq->queue)))->qp_err; + } + /* build fw_ri_res_wr */ wr_len = sizeof *res_wr + sizeof *res; @@ -132,7 +146,9 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, FW_RI_RES_WR_IQPCIECH_V(2) | FW_RI_RES_WR_IQINTCNTTHRESH_V(0) | FW_RI_RES_WR_IQO_F | - FW_RI_RES_WR_IQESIZE_V(1)); + ((user && ucontext->is_32b_cqe) ? + FW_RI_RES_WR_IQESIZE_V(1) : + FW_RI_RES_WR_IQESIZE_V(2))); res->u.cq.iqsize = cpu_to_be16(cq->size); res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr); @@ -166,7 +182,7 @@ err1: return ret; } -static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) +static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq, u32 srqidx) { struct t4_cqe cqe; @@ -179,6 +195,8 @@ static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) CQE_SWCQE_V(1) | CQE_QPID_V(wq->sq.qid)); cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen)); + if (srqidx) + cqe.u.srcqe.abs_rqe_idx = cpu_to_be32(srqidx); cq->sw_queue[cq->sw_pidx] = cqe; t4_swcq_produce(cq); } @@ -191,7 +209,7 @@ int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count) pr_debug("wq %p cq %p rq.in_use %u skip count %u\n", wq, cq, wq->rq.in_use, count); while (in_use--) { - insert_recv_cqe(wq, cq); + insert_recv_cqe(wq, cq, 0); flushed++; } return flushed; @@ -442,6 +460,72 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) pr_debug("cq %p count %d\n", cq, *count); } +static void post_pending_srq_wrs(struct t4_srq *srq) +{ + struct t4_srq_pending_wr *pwr; + u16 idx = 0; + + while (srq->pending_in_use) { + pwr = &srq->pending_wrs[srq->pending_cidx]; + srq->sw_rq[srq->pidx].wr_id = pwr->wr_id; + srq->sw_rq[srq->pidx].valid = 1; + + pr_debug("%s posting pending cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n", + __func__, + srq->cidx, srq->pidx, srq->wq_pidx, + srq->in_use, srq->size, + (unsigned long long)pwr->wr_id); + + c4iw_copy_wr_to_srq(srq, &pwr->wqe, pwr->len16); + t4_srq_consume_pending_wr(srq); + t4_srq_produce(srq, pwr->len16); + idx += DIV_ROUND_UP(pwr->len16 * 16, T4_EQ_ENTRY_SIZE); + } + + if (idx) { + t4_ring_srq_db(srq, idx, pwr->len16, &pwr->wqe); + srq->queue[srq->size].status.host_wq_pidx = + srq->wq_pidx; + } +} + +static u64 reap_srq_cqe(struct t4_cqe *hw_cqe, struct t4_srq *srq) +{ + int rel_idx = CQE_ABS_RQE_IDX(hw_cqe) - srq->rqt_abs_idx; + u64 wr_id; + + srq->sw_rq[rel_idx].valid = 0; + wr_id = srq->sw_rq[rel_idx].wr_id; + + if (rel_idx == srq->cidx) { + pr_debug("%s in order cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n", + __func__, rel_idx, srq->cidx, srq->pidx, + srq->wq_pidx, srq->in_use, srq->size, + (unsigned long long)srq->sw_rq[rel_idx].wr_id); + t4_srq_consume(srq); + while (srq->ooo_count && !srq->sw_rq[srq->cidx].valid) { + pr_debug("%s eat ooo cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n", + __func__, srq->cidx, srq->pidx, + srq->wq_pidx, srq->in_use, + srq->size, srq->ooo_count, + (unsigned long long) + srq->sw_rq[srq->cidx].wr_id); + t4_srq_consume_ooo(srq); + } + if (srq->ooo_count == 0 && srq->pending_in_use) + post_pending_srq_wrs(srq); + } else { + pr_debug("%s ooo cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n", + __func__, rel_idx, srq->cidx, + srq->pidx, srq->wq_pidx, + srq->in_use, srq->size, + srq->ooo_count, + (unsigned long long)srq->sw_rq[rel_idx].wr_id); + t4_srq_produce_ooo(srq); + } + return wr_id; +} + /* * poll_cq * @@ -459,7 +543,8 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) * -EOVERFLOW CQ overflow detected. */ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, - u8 *cqe_flushed, u64 *cookie, u32 *credit) + u8 *cqe_flushed, u64 *cookie, u32 *credit, + struct t4_srq *srq) { int ret = 0; struct t4_cqe *hw_cqe, read_cqe; @@ -524,7 +609,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, */ if (CQE_TYPE(hw_cqe) == 1) { if (CQE_STATUS(hw_cqe)) - t4_set_wq_in_error(wq); + t4_set_wq_in_error(wq, 0); ret = -EAGAIN; goto skip_cqe; } @@ -535,7 +620,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, */ if (CQE_WRID_STAG(hw_cqe) == 1) { if (CQE_STATUS(hw_cqe)) - t4_set_wq_in_error(wq); + t4_set_wq_in_error(wq, 0); ret = -EAGAIN; goto skip_cqe; } @@ -560,7 +645,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) { *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH); - t4_set_wq_in_error(wq); + t4_set_wq_in_error(wq, 0); } /* @@ -574,15 +659,9 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, * then we complete this with T4_ERR_MSN and mark the wq in * error. */ - - if (t4_rq_empty(wq)) { - t4_set_wq_in_error(wq); - ret = -EAGAIN; - goto skip_cqe; - } if (unlikely(!CQE_STATUS(hw_cqe) && CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) { - t4_set_wq_in_error(wq); + t4_set_wq_in_error(wq, 0); hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN)); } goto proc_cqe; @@ -641,11 +720,16 @@ proc_cqe: c4iw_log_wr_stats(wq, hw_cqe); t4_sq_consume(wq); } else { - pr_debug("completing rq idx %u\n", wq->rq.cidx); - *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; - if (c4iw_wr_log) - c4iw_log_wr_stats(wq, hw_cqe); - t4_rq_consume(wq); + if (!srq) { + pr_debug("completing rq idx %u\n", wq->rq.cidx); + *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; + if (c4iw_wr_log) + c4iw_log_wr_stats(wq, hw_cqe); + t4_rq_consume(wq); + } else { + *cookie = reap_srq_cqe(hw_cqe, srq); + } + wq->rq.msn++; goto skip_cqe; } @@ -668,46 +752,33 @@ skip_cqe: return ret; } -/* - * Get one cq entry from c4iw and map it to openib. - * - * Returns: - * 0 cqe returned - * -ENODATA EMPTY; - * -EAGAIN caller must try again - * any other -errno fatal error - */ -static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) +static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp, + struct ib_wc *wc, struct c4iw_srq *srq) { - struct c4iw_qp *qhp = NULL; - struct t4_cqe uninitialized_var(cqe), *rd_cqe; - struct t4_wq *wq; + struct t4_cqe uninitialized_var(cqe); + struct t4_wq *wq = qhp ? &qhp->wq : NULL; u32 credit = 0; u8 cqe_flushed; u64 cookie = 0; int ret; - ret = t4_next_cqe(&chp->cq, &rd_cqe); - - if (ret) - return ret; - - qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe)); - if (!qhp) - wq = NULL; - else { - spin_lock(&qhp->lock); - wq = &(qhp->wq); - } - ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit); + ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit, + srq ? &srq->wq : NULL); if (ret) goto out; wc->wr_id = cookie; - wc->qp = &qhp->ibqp; + wc->qp = qhp ? &qhp->ibqp : NULL; wc->vendor_err = CQE_STATUS(&cqe); wc->wc_flags = 0; + /* + * Simulate a SRQ_LIMIT_REACHED HW notification if required. + */ + if (srq && !(srq->flags & T4_SRQ_LIMIT_SUPPORT) && srq->armed && + srq->wq.in_use < srq->srq_limit) + c4iw_dispatch_srq_limit_reached_event(srq); + pr_debug("qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n", CQE_QPID(&cqe), CQE_TYPE(&cqe), CQE_OPCODE(&cqe), @@ -720,15 +791,32 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) wc->byte_len = CQE_LEN(&cqe); else wc->byte_len = 0; - wc->opcode = IB_WC_RECV; - if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV || - CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) { + + switch (CQE_OPCODE(&cqe)) { + case FW_RI_SEND: + wc->opcode = IB_WC_RECV; + break; + case FW_RI_SEND_WITH_INV: + case FW_RI_SEND_WITH_SE_INV: + wc->opcode = IB_WC_RECV; wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); wc->wc_flags |= IB_WC_WITH_INVALIDATE; c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); + break; + case FW_RI_WRITE_IMMEDIATE: + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + wc->ex.imm_data = CQE_IMM_DATA(&cqe); + wc->wc_flags |= IB_WC_WITH_IMM; + break; + default: + pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", + CQE_OPCODE(&cqe), CQE_QPID(&cqe)); + ret = -EINVAL; + goto out; } } else { switch (CQE_OPCODE(&cqe)) { + case FW_RI_WRITE_IMMEDIATE: case FW_RI_RDMA_WRITE: wc->opcode = IB_WC_RDMA_WRITE; break; @@ -819,8 +907,43 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) } } out: - if (wq) + return ret; +} + +/* + * Get one cq entry from c4iw and map it to openib. + * + * Returns: + * 0 cqe returned + * -ENODATA EMPTY; + * -EAGAIN caller must try again + * any other -errno fatal error + */ +static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) +{ + struct c4iw_srq *srq = NULL; + struct c4iw_qp *qhp = NULL; + struct t4_cqe *rd_cqe; + int ret; + + ret = t4_next_cqe(&chp->cq, &rd_cqe); + + if (ret) + return ret; + + qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe)); + if (qhp) { + spin_lock(&qhp->lock); + srq = qhp->srq; + if (srq) + spin_lock(&srq->lock); + ret = __c4iw_poll_cq_one(chp, qhp, wc, srq); spin_unlock(&qhp->lock); + if (srq) + spin_unlock(&srq->lock); + } else { + ret = __c4iw_poll_cq_one(chp, NULL, wc, NULL); + } return ret; } @@ -876,6 +999,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int vector = attr->comp_vector; struct c4iw_dev *rhp; struct c4iw_cq *chp; + struct c4iw_create_cq ucmd; struct c4iw_create_cq_resp uresp; struct c4iw_ucontext *ucontext = NULL; int ret, wr_len; @@ -891,9 +1015,16 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, if (vector >= rhp->rdev.lldi.nciq) return ERR_PTR(-EINVAL); + if (ib_context) { + ucontext = to_c4iw_ucontext(ib_context); + if (udata->inlen < sizeof(ucmd)) + ucontext->is_32b_cqe = 1; + } + chp = kzalloc(sizeof(*chp), GFP_KERNEL); if (!chp) return ERR_PTR(-ENOMEM); + chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); if (!chp->wr_waitp) { ret = -ENOMEM; @@ -908,9 +1039,6 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, goto err_free_wr_wait; } - if (ib_context) - ucontext = to_c4iw_ucontext(ib_context); - /* account for the status page. */ entries++; @@ -934,13 +1062,15 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, if (hwentries < 64) hwentries = 64; - memsize = hwentries * sizeof *chp->cq.queue; + memsize = hwentries * ((ucontext && ucontext->is_32b_cqe) ? + (sizeof(*chp->cq.queue) / 2) : sizeof(*chp->cq.queue)); /* * memsize must be a multiple of the page size if its a user cq. */ if (ucontext) memsize = roundup(memsize, PAGE_SIZE); + chp->cq.size = hwentries; chp->cq.memsize = memsize; chp->cq.vector = vector; @@ -971,6 +1101,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, if (!mm2) goto err_free_mm; + memset(&uresp, 0, sizeof(uresp)); uresp.qid_mask = rhp->rdev.cqmask; uresp.cqid = chp->cq.cqid; uresp.size = chp->cq.size; @@ -980,9 +1111,16 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, ucontext->key += PAGE_SIZE; uresp.gts_key = ucontext->key; ucontext->key += PAGE_SIZE; + /* communicate to the userspace that + * kernel driver supports 64B CQE + */ + uresp.flags |= C4IW_64B_CQE; + spin_unlock(&ucontext->mmap_lock); ret = ib_copy_to_udata(udata, &uresp, - sizeof(uresp) - sizeof(uresp.reserved)); + ucontext->is_32b_cqe ? + sizeof(uresp) - sizeof(uresp.flags) : + sizeof(uresp)); if (ret) goto err_free_mm2; @@ -1019,11 +1157,6 @@ err_free_chp: return ERR_PTR(ret); } -int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) -{ - return -ENOSYS; -} - int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct c4iw_cq *chp; @@ -1039,3 +1172,19 @@ int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) spin_unlock_irqrestore(&chp->lock, flag); return ret; } + +void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx) +{ + struct c4iw_cq *rchp = to_c4iw_cq(qhp->ibqp.recv_cq); + unsigned long flag; + + /* locking heirarchy: cq lock first, then qp lock. */ + spin_lock_irqsave(&rchp->lock, flag); + spin_lock(&qhp->lock); + + /* create a SRQ RECV CQE for srqidx */ + insert_recv_cqe(&qhp->wq, &rchp->cq, srqidx); + + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&rchp->lock, flag); +} diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index a3c3418afd73..c13c0ba30f63 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -275,10 +275,11 @@ static int dump_qp(int id, void *p, void *data) set_ep_sin_addrs(ep, &lsin, &rsin, &m_lsin, &m_rsin); cc = snprintf(qpd->buf + qpd->pos, space, - "rc qp sq id %u rq id %u state %u " + "rc qp sq id %u %s id %u state %u " "onchip %u ep tid %u state %u " "%pI4:%u/%u->%pI4:%u/%u\n", - qp->wq.sq.qid, qp->wq.rq.qid, + qp->wq.sq.qid, qp->srq ? "srq" : "rq", + qp->srq ? qp->srq->idx : qp->wq.rq.qid, (int)qp->attr.state, qp->wq.sq.flags & T4_SQ_ONCHIP, ep->hwtid, (int)ep->com.state, @@ -480,6 +481,9 @@ static int stats_show(struct seq_file *seq, void *v) seq_printf(seq, " QID: %10llu %10llu %10llu %10llu\n", dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur, dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail); + seq_printf(seq, " SRQS: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.srqt.total, dev->rdev.stats.srqt.cur, + dev->rdev.stats.srqt.max, dev->rdev.stats.srqt.fail); seq_printf(seq, " TPTMEM: %10llu %10llu %10llu %10llu\n", dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur, dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail); @@ -530,6 +534,8 @@ static ssize_t stats_clear(struct file *file, const char __user *buf, dev->rdev.stats.pbl.fail = 0; dev->rdev.stats.rqt.max = 0; dev->rdev.stats.rqt.fail = 0; + dev->rdev.stats.rqt.max = 0; + dev->rdev.stats.rqt.fail = 0; dev->rdev.stats.ocqp.max = 0; dev->rdev.stats.ocqp.fail = 0; dev->rdev.stats.db_full = 0; @@ -802,7 +808,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->qpmask = rdev->lldi.udb_density - 1; rdev->cqmask = rdev->lldi.ucq_density - 1; - pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u\n", + pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u srq size %u\n", pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start, rdev->lldi.vr->stag.size, c4iw_num_stags(rdev), rdev->lldi.vr->pbl.start, @@ -811,7 +817,8 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->lldi.vr->qp.start, rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.start, - rdev->lldi.vr->cq.size); + rdev->lldi.vr->cq.size, + rdev->lldi.vr->srq.size); pr_debug("udb %pR db_reg %p gts_reg %p qpmask 0x%x cqmask 0x%x\n", &rdev->lldi.pdev->resource[2], rdev->lldi.db_reg, rdev->lldi.gts_reg, @@ -824,10 +831,12 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->stats.stag.total = rdev->lldi.vr->stag.size; rdev->stats.pbl.total = rdev->lldi.vr->pbl.size; rdev->stats.rqt.total = rdev->lldi.vr->rq.size; + rdev->stats.srqt.total = rdev->lldi.vr->srq.size; rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size; rdev->stats.qid.total = rdev->lldi.vr->qp.size; - err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD); + err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), + T4_MAX_NUM_PD, rdev->lldi.vr->srq.size); if (err) { pr_err("error %d initializing resources\n", err); return err; @@ -857,6 +866,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->status_page->qp_size = rdev->lldi.vr->qp.size; rdev->status_page->cq_start = rdev->lldi.vr->cq.start; rdev->status_page->cq_size = rdev->lldi.vr->cq.size; + rdev->status_page->write_cmpl_supported = rdev->lldi.write_cmpl_support; if (c4iw_wr_log) { rdev->wr_log = kcalloc(1 << c4iw_wr_log_size_order, diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index 3e9d8b277ab9..8741d23168f3 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -70,9 +70,10 @@ static void dump_err_cqe(struct c4iw_dev *dev, struct t4_cqe *err_cqe) CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), ntohl(err_cqe->len), CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe)); - pr_debug("%016llx %016llx %016llx %016llx\n", + pr_debug("%016llx %016llx %016llx %016llx - %016llx %016llx %016llx %016llx\n", be64_to_cpu(p[0]), be64_to_cpu(p[1]), be64_to_cpu(p[2]), - be64_to_cpu(p[3])); + be64_to_cpu(p[3]), be64_to_cpu(p[4]), be64_to_cpu(p[5]), + be64_to_cpu(p[6]), be64_to_cpu(p[7])); /* * Ingress WRITE and READ_RESP errors provide diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 870649ff049c..f0fceadd0d12 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -97,6 +97,7 @@ struct c4iw_resource { struct c4iw_id_table tpt_table; struct c4iw_id_table qid_table; struct c4iw_id_table pdid_table; + struct c4iw_id_table srq_table; }; struct c4iw_qid_list { @@ -130,6 +131,8 @@ struct c4iw_stats { struct c4iw_stat stag; struct c4iw_stat pbl; struct c4iw_stat rqt; + struct c4iw_stat srqt; + struct c4iw_stat srq; struct c4iw_stat ocqp; u64 db_full; u64 db_empty; @@ -549,6 +552,7 @@ struct c4iw_qp { struct kref kref; wait_queue_head_t wait; int sq_sig_all; + struct c4iw_srq *srq; struct work_struct free_work; struct c4iw_ucontext *ucontext; struct c4iw_wr_wait *wr_waitp; @@ -559,6 +563,26 @@ static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) return container_of(ibqp, struct c4iw_qp, ibqp); } +struct c4iw_srq { + struct ib_srq ibsrq; + struct list_head db_fc_entry; + struct c4iw_dev *rhp; + struct t4_srq wq; + struct sk_buff *destroy_skb; + u32 srq_limit; + u32 pdid; + int idx; + u32 flags; + spinlock_t lock; /* protects srq */ + struct c4iw_wr_wait *wr_waitp; + bool armed; +}; + +static inline struct c4iw_srq *to_c4iw_srq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct c4iw_srq, ibsrq); +} + struct c4iw_ucontext { struct ib_ucontext ibucontext; struct c4iw_dev_ucontext uctx; @@ -566,6 +590,7 @@ struct c4iw_ucontext { spinlock_t mmap_lock; struct list_head mmaps; struct kref kref; + bool is_32b_cqe; }; static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c) @@ -885,7 +910,10 @@ enum conn_pre_alloc_buffers { CN_MAX_CON_BUF }; -#define FLOWC_LEN 80 +enum { + FLOWC_LEN = offsetof(struct fw_flowc_wr, mnemval[FW_FLOWC_MNEM_MAX]) +}; + union cpl_wr_size { struct cpl_abort_req abrt_req; struct cpl_abort_rpl abrt_rpl; @@ -952,6 +980,7 @@ struct c4iw_ep { unsigned int retry_count; int snd_win; int rcv_win; + u32 snd_wscale; struct c4iw_ep_stats stats; }; @@ -988,7 +1017,8 @@ void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qpid, struct c4iw_dev_ucontext *uctx); u32 c4iw_get_resource(struct c4iw_id_table *id_table); void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry); -int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid); +int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, + u32 nr_pdid, u32 nr_srqt); int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev); int c4iw_pblpool_create(struct c4iw_rdev *rdev); int c4iw_rqtpool_create(struct c4iw_rdev *rdev); @@ -1007,10 +1037,10 @@ void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev, void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx); int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); -int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int c4iw_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog); int c4iw_destroy_listen(struct iw_cm_id *cm_id); @@ -1037,8 +1067,14 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *ib_context, struct ib_udata *udata); -int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); +int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata); +int c4iw_destroy_srq(struct ib_srq *ib_srq); +struct ib_srq *c4iw_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *attrs, + struct ib_udata *udata); int c4iw_destroy_qp(struct ib_qp *ib_qp); struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, @@ -1075,12 +1111,19 @@ extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS]; void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, enum cxgb4_bar2_qtype qtype, unsigned int *pbar2_qid, u64 *pbar2_pa); +int c4iw_alloc_srq_idx(struct c4iw_rdev *rdev); +void c4iw_free_srq_idx(struct c4iw_rdev *rdev, int idx); extern void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe); extern int c4iw_wr_log; extern int db_fc_threshold; extern int db_coalescing_threshold; extern int use_dsgl; void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey); +void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq); +void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16); +void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx); +int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp); typedef int c4iw_restrack_func(struct sk_buff *msg, diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 1feade8bb4b3..4eda6872e617 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -58,41 +58,6 @@ static int fastreg_support = 1; module_param(fastreg_support, int, 0644); MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)"); -static struct ib_ah *c4iw_ah_create(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - struct ib_udata *udata) - -{ - return ERR_PTR(-ENOSYS); -} - -static int c4iw_ah_destroy(struct ib_ah *ah) -{ - return -ENOSYS; -} - -static int c4iw_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - return -ENOSYS; -} - -static int c4iw_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - return -ENOSYS; -} - -static int c4iw_process_mad(struct ib_device *ibdev, int mad_flags, - u8 port_num, const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, - size_t in_mad_size, - struct ib_mad_hdr *out_mad, - size_t *out_mad_size, - u16 *out_mad_pkey_index) -{ - return -ENOSYS; -} - void _c4iw_free_ucontext(struct kref *kref) { struct c4iw_ucontext *ucontext; @@ -342,8 +307,12 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro props->vendor_part_id = (u32)dev->rdev.lldi.pdev->device; props->max_mr_size = T4_MAX_MR_SIZE; props->max_qp = dev->rdev.lldi.vr->qp.size / 2; + props->max_srq = dev->rdev.lldi.vr->srq.size; props->max_qp_wr = dev->rdev.hw_queue.t4_max_qp_depth; - props->max_sge = T4_MAX_RECV_SGE; + props->max_srq_wr = dev->rdev.hw_queue.t4_max_qp_depth; + props->max_send_sge = min(T4_MAX_SEND_SGE, T4_MAX_WRITE_SGE); + props->max_recv_sge = T4_MAX_RECV_SGE; + props->max_srq_sge = T4_MAX_RECV_SGE; props->max_sge_rd = 1; props->max_res_rd_atom = dev->rdev.lldi.max_ird_adapter; props->max_qp_rd_atom = min(dev->rdev.lldi.max_ordird_qp, @@ -592,7 +561,10 @@ void c4iw_register_device(struct work_struct *work) (1ull << IB_USER_VERBS_CMD_POLL_CQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV); + (1ull << IB_USER_VERBS_CMD_POST_RECV) | + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); dev->ibdev.node_type = RDMA_NODE_RNIC; BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC)); @@ -608,15 +580,15 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.mmap = c4iw_mmap; dev->ibdev.alloc_pd = c4iw_allocate_pd; dev->ibdev.dealloc_pd = c4iw_deallocate_pd; - dev->ibdev.create_ah = c4iw_ah_create; - dev->ibdev.destroy_ah = c4iw_ah_destroy; dev->ibdev.create_qp = c4iw_create_qp; dev->ibdev.modify_qp = c4iw_ib_modify_qp; dev->ibdev.query_qp = c4iw_ib_query_qp; dev->ibdev.destroy_qp = c4iw_destroy_qp; + dev->ibdev.create_srq = c4iw_create_srq; + dev->ibdev.modify_srq = c4iw_modify_srq; + dev->ibdev.destroy_srq = c4iw_destroy_srq; dev->ibdev.create_cq = c4iw_create_cq; dev->ibdev.destroy_cq = c4iw_destroy_cq; - dev->ibdev.resize_cq = c4iw_resize_cq; dev->ibdev.poll_cq = c4iw_poll_cq; dev->ibdev.get_dma_mr = c4iw_get_dma_mr; dev->ibdev.reg_user_mr = c4iw_reg_user_mr; @@ -625,12 +597,10 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.dealloc_mw = c4iw_dealloc_mw; dev->ibdev.alloc_mr = c4iw_alloc_mr; dev->ibdev.map_mr_sg = c4iw_map_mr_sg; - dev->ibdev.attach_mcast = c4iw_multicast_attach; - dev->ibdev.detach_mcast = c4iw_multicast_detach; - dev->ibdev.process_mad = c4iw_process_mad; dev->ibdev.req_notify_cq = c4iw_arm_cq; dev->ibdev.post_send = c4iw_post_send; dev->ibdev.post_recv = c4iw_post_receive; + dev->ibdev.post_srq_recv = c4iw_post_srq_recv; dev->ibdev.alloc_hw_stats = c4iw_alloc_stats; dev->ibdev.get_hw_stats = c4iw_get_mib; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index aef53305f1c3..b3203afa3b1d 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -147,21 +147,24 @@ static int alloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq, int user) } static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, - struct c4iw_dev_ucontext *uctx) + struct c4iw_dev_ucontext *uctx, int has_rq) { /* * uP clears EQ contexts when the connection exits rdma mode, * so no need to post a RESET WR for these EQs. */ - dma_free_coherent(&(rdev->lldi.pdev->dev), - wq->rq.memsize, wq->rq.queue, - dma_unmap_addr(&wq->rq, mapping)); dealloc_sq(rdev, &wq->sq); - c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); - kfree(wq->rq.sw_rq); kfree(wq->sq.sw_sq); - c4iw_put_qpid(rdev, wq->rq.qid, uctx); c4iw_put_qpid(rdev, wq->sq.qid, uctx); + + if (has_rq) { + dma_free_coherent(&rdev->lldi.pdev->dev, + wq->rq.memsize, wq->rq.queue, + dma_unmap_addr(&wq->rq, mapping)); + c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); + kfree(wq->rq.sw_rq); + c4iw_put_qpid(rdev, wq->rq.qid, uctx); + } return 0; } @@ -195,7 +198,8 @@ void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, struct t4_cq *rcq, struct t4_cq *scq, struct c4iw_dev_ucontext *uctx, - struct c4iw_wr_wait *wr_waitp) + struct c4iw_wr_wait *wr_waitp, + int need_rq) { int user = (uctx != &rdev->uctx); struct fw_ri_res_wr *res_wr; @@ -209,10 +213,12 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, if (!wq->sq.qid) return -ENOMEM; - wq->rq.qid = c4iw_get_qpid(rdev, uctx); - if (!wq->rq.qid) { - ret = -ENOMEM; - goto free_sq_qid; + if (need_rq) { + wq->rq.qid = c4iw_get_qpid(rdev, uctx); + if (!wq->rq.qid) { + ret = -ENOMEM; + goto free_sq_qid; + } } if (!user) { @@ -220,25 +226,31 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, GFP_KERNEL); if (!wq->sq.sw_sq) { ret = -ENOMEM; - goto free_rq_qid; + goto free_rq_qid;//FIXME } - wq->rq.sw_rq = kcalloc(wq->rq.size, sizeof(*wq->rq.sw_rq), - GFP_KERNEL); - if (!wq->rq.sw_rq) { - ret = -ENOMEM; - goto free_sw_sq; + if (need_rq) { + wq->rq.sw_rq = kcalloc(wq->rq.size, + sizeof(*wq->rq.sw_rq), + GFP_KERNEL); + if (!wq->rq.sw_rq) { + ret = -ENOMEM; + goto free_sw_sq; + } } } - /* - * RQT must be a power of 2 and at least 16 deep. - */ - wq->rq.rqt_size = roundup_pow_of_two(max_t(u16, wq->rq.size, 16)); - wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); - if (!wq->rq.rqt_hwaddr) { - ret = -ENOMEM; - goto free_sw_rq; + if (need_rq) { + /* + * RQT must be a power of 2 and at least 16 deep. + */ + wq->rq.rqt_size = + roundup_pow_of_two(max_t(u16, wq->rq.size, 16)); + wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); + if (!wq->rq.rqt_hwaddr) { + ret = -ENOMEM; + goto free_sw_rq; + } } ret = alloc_sq(rdev, &wq->sq, user); @@ -247,34 +259,39 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, memset(wq->sq.queue, 0, wq->sq.memsize); dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); - wq->rq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), - wq->rq.memsize, &(wq->rq.dma_addr), - GFP_KERNEL); - if (!wq->rq.queue) { - ret = -ENOMEM; - goto free_sq; + if (need_rq) { + wq->rq.queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, + wq->rq.memsize, + &wq->rq.dma_addr, + GFP_KERNEL); + if (!wq->rq.queue) { + ret = -ENOMEM; + goto free_sq; + } + pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n", + wq->sq.queue, + (unsigned long long)virt_to_phys(wq->sq.queue), + wq->rq.queue, + (unsigned long long)virt_to_phys(wq->rq.queue)); + memset(wq->rq.queue, 0, wq->rq.memsize); + dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); } - pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n", - wq->sq.queue, - (unsigned long long)virt_to_phys(wq->sq.queue), - wq->rq.queue, - (unsigned long long)virt_to_phys(wq->rq.queue)); - memset(wq->rq.queue, 0, wq->rq.memsize); - dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); wq->db = rdev->lldi.db_reg; wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS, &wq->sq.bar2_qid, user ? &wq->sq.bar2_pa : NULL); - wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, T4_BAR2_QTYPE_EGRESS, - &wq->rq.bar2_qid, - user ? &wq->rq.bar2_pa : NULL); + if (need_rq) + wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, + T4_BAR2_QTYPE_EGRESS, + &wq->rq.bar2_qid, + user ? &wq->rq.bar2_pa : NULL); /* * User mode must have bar2 access. */ - if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) { + if (user && (!wq->sq.bar2_pa || (need_rq && !wq->rq.bar2_pa))) { pr_warn("%s: sqid %u or rqid %u not in BAR2 range\n", pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid); goto free_dma; @@ -285,7 +302,8 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, /* build fw_ri_res_wr */ wr_len = sizeof *res_wr + 2 * sizeof *res; - + if (need_rq) + wr_len += sizeof(*res); skb = alloc_skb(wr_len, GFP_KERNEL); if (!skb) { ret = -ENOMEM; @@ -296,7 +314,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, res_wr = __skb_put_zero(skb, wr_len); res_wr->op_nres = cpu_to_be32( FW_WR_OP_V(FW_RI_RES_WR) | - FW_RI_RES_WR_NRES_V(2) | + FW_RI_RES_WR_NRES_V(need_rq ? 2 : 1) | FW_WR_COMPL_F); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); res_wr->cookie = (uintptr_t)wr_waitp; @@ -327,30 +345,36 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, FW_RI_RES_WR_EQSIZE_V(eqsize)); res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid); res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr); - res++; - res->u.sqrq.restype = FW_RI_RES_TYPE_RQ; - res->u.sqrq.op = FW_RI_RES_OP_WRITE; - /* - * eqsize is the number of 64B entries plus the status page size. - */ - eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + - rdev->hw_queue.t4_eq_status_entries; - res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( - FW_RI_RES_WR_HOSTFCMODE_V(0) | /* no host cidx updates */ - FW_RI_RES_WR_CPRIO_V(0) | /* don't keep in chip cache */ - FW_RI_RES_WR_PCIECHN_V(0) | /* set by uP at ri_init time */ - FW_RI_RES_WR_IQID_V(rcq->cqid)); - res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( - FW_RI_RES_WR_DCAEN_V(0) | - FW_RI_RES_WR_DCACPU_V(0) | - FW_RI_RES_WR_FBMIN_V(2) | - FW_RI_RES_WR_FBMAX_V(3) | - FW_RI_RES_WR_CIDXFTHRESHO_V(0) | - FW_RI_RES_WR_CIDXFTHRESH_V(0) | - FW_RI_RES_WR_EQSIZE_V(eqsize)); - res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid); - res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr); + if (need_rq) { + res++; + res->u.sqrq.restype = FW_RI_RES_TYPE_RQ; + res->u.sqrq.op = FW_RI_RES_OP_WRITE; + + /* + * eqsize is the number of 64B entries plus the status page size + */ + eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + + rdev->hw_queue.t4_eq_status_entries; + res->u.sqrq.fetchszm_to_iqid = + /* no host cidx updates */ + cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) | + /* don't keep in chip cache */ + FW_RI_RES_WR_CPRIO_V(0) | + /* set by uP at ri_init time */ + FW_RI_RES_WR_PCIECHN_V(0) | + FW_RI_RES_WR_IQID_V(rcq->cqid)); + res->u.sqrq.dcaen_to_eqsize = + cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) | + FW_RI_RES_WR_DCACPU_V(0) | + FW_RI_RES_WR_FBMIN_V(2) | + FW_RI_RES_WR_FBMAX_V(3) | + FW_RI_RES_WR_CIDXFTHRESHO_V(0) | + FW_RI_RES_WR_CIDXFTHRESH_V(0) | + FW_RI_RES_WR_EQSIZE_V(eqsize)); + res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid); + res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr); + } c4iw_init_wr_wait(wr_waitp); ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->sq.qid, __func__); @@ -363,26 +387,30 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, return 0; free_dma: - dma_free_coherent(&(rdev->lldi.pdev->dev), - wq->rq.memsize, wq->rq.queue, - dma_unmap_addr(&wq->rq, mapping)); + if (need_rq) + dma_free_coherent(&rdev->lldi.pdev->dev, + wq->rq.memsize, wq->rq.queue, + dma_unmap_addr(&wq->rq, mapping)); free_sq: dealloc_sq(rdev, &wq->sq); free_hwaddr: - c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); + if (need_rq) + c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); free_sw_rq: - kfree(wq->rq.sw_rq); + if (need_rq) + kfree(wq->rq.sw_rq); free_sw_sq: kfree(wq->sq.sw_sq); free_rq_qid: - c4iw_put_qpid(rdev, wq->rq.qid, uctx); + if (need_rq) + c4iw_put_qpid(rdev, wq->rq.qid, uctx); free_sq_qid: c4iw_put_qpid(rdev, wq->sq.qid, uctx); return ret; } static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp, - struct ib_send_wr *wr, int max, u32 *plenp) + const struct ib_send_wr *wr, int max, u32 *plenp) { u8 *dstp, *srcp; u32 plen = 0; @@ -427,7 +455,12 @@ static int build_isgl(__be64 *queue_start, __be64 *queue_end, { int i; u32 plen = 0; - __be64 *flitp = (__be64 *)isglp->sge; + __be64 *flitp; + + if ((__be64 *)isglp == queue_end) + isglp = (struct fw_ri_isgl *)queue_start; + + flitp = (__be64 *)isglp->sge; for (i = 0; i < num_sge; i++) { if ((plen + sg_list[i].length) < plen) @@ -452,7 +485,7 @@ static int build_isgl(__be64 *queue_start, __be64 *queue_end, } static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, - struct ib_send_wr *wr, u8 *len16) + const struct ib_send_wr *wr, u8 *len16) { u32 plen; int size; @@ -519,7 +552,7 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, } static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, - struct ib_send_wr *wr, u8 *len16) + const struct ib_send_wr *wr, u8 *len16) { u32 plen; int size; @@ -527,7 +560,15 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, if (wr->num_sge > T4_MAX_SEND_SGE) return -EINVAL; - wqe->write.r2 = 0; + + /* + * iWARP protocol supports 64 bit immediate data but rdma api + * limits it to 32bit. + */ + if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) + wqe->write.iw_imm_data.ib_imm_data.imm_data32 = wr->ex.imm_data; + else + wqe->write.iw_imm_data.ib_imm_data.imm_data32 = 0; wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); if (wr->num_sge) { @@ -561,7 +602,58 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, return 0; } -static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) +static void build_immd_cmpl(struct t4_sq *sq, struct fw_ri_immd_cmpl *immdp, + struct ib_send_wr *wr) +{ + memcpy((u8 *)immdp->data, (u8 *)(uintptr_t)wr->sg_list->addr, 16); + memset(immdp->r1, 0, 6); + immdp->op = FW_RI_DATA_IMMD; + immdp->immdlen = 16; +} + +static void build_rdma_write_cmpl(struct t4_sq *sq, + struct fw_ri_rdma_write_cmpl_wr *wcwr, + const struct ib_send_wr *wr, u8 *len16) +{ + u32 plen; + int size; + + /* + * This code assumes the struct fields preceding the write isgl + * fit in one 64B WR slot. This is because the WQE is built + * directly in the dma queue, and wrapping is only handled + * by the code buildling sgls. IE the "fixed part" of the wr + * structs must all fit in 64B. The WQE build code should probably be + * redesigned to avoid this restriction, but for now just add + * the BUILD_BUG_ON() to catch if this WQE struct gets too big. + */ + BUILD_BUG_ON(offsetof(struct fw_ri_rdma_write_cmpl_wr, u) > 64); + + wcwr->stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); + wcwr->to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); + wcwr->stag_inv = cpu_to_be32(wr->next->ex.invalidate_rkey); + wcwr->r2 = 0; + wcwr->r3 = 0; + + /* SEND_INV SGL */ + if (wr->next->send_flags & IB_SEND_INLINE) + build_immd_cmpl(sq, &wcwr->u_cmpl.immd_src, wr->next); + else + build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size], + &wcwr->u_cmpl.isgl_src, wr->next->sg_list, 1, NULL); + + /* WRITE SGL */ + build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size], + wcwr->u.isgl_src, wr->sg_list, wr->num_sge, &plen); + + size = sizeof(*wcwr) + sizeof(struct fw_ri_isgl) + + wr->num_sge * sizeof(struct fw_ri_sge); + wcwr->plen = cpu_to_be32(plen); + *len16 = DIV_ROUND_UP(size, 16); +} + +static int build_rdma_read(union t4_wr *wqe, const struct ib_send_wr *wr, + u8 *len16) { if (wr->num_sge > 1) return -EINVAL; @@ -590,8 +682,74 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) return 0; } +static void post_write_cmpl(struct c4iw_qp *qhp, const struct ib_send_wr *wr) +{ + bool send_signaled = (wr->next->send_flags & IB_SEND_SIGNALED) || + qhp->sq_sig_all; + bool write_signaled = (wr->send_flags & IB_SEND_SIGNALED) || + qhp->sq_sig_all; + struct t4_swsqe *swsqe; + union t4_wr *wqe; + u16 write_wrid; + u8 len16; + u16 idx; + + /* + * The sw_sq entries still look like a WRITE and a SEND and consume + * 2 slots. The FW WR, however, will be a single uber-WR. + */ + wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue + + qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE); + build_rdma_write_cmpl(&qhp->wq.sq, &wqe->write_cmpl, wr, &len16); + + /* WRITE swsqe */ + swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; + swsqe->opcode = FW_RI_RDMA_WRITE; + swsqe->idx = qhp->wq.sq.pidx; + swsqe->complete = 0; + swsqe->signaled = write_signaled; + swsqe->flushed = 0; + swsqe->wr_id = wr->wr_id; + if (c4iw_wr_log) { + swsqe->sge_ts = + cxgb4_read_sge_timestamp(qhp->rhp->rdev.lldi.ports[0]); + swsqe->host_time = ktime_get(); + } + + write_wrid = qhp->wq.sq.pidx; + + /* just bump the sw_sq */ + qhp->wq.sq.in_use++; + if (++qhp->wq.sq.pidx == qhp->wq.sq.size) + qhp->wq.sq.pidx = 0; + + /* SEND_WITH_INV swsqe */ + swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; + swsqe->opcode = FW_RI_SEND_WITH_INV; + swsqe->idx = qhp->wq.sq.pidx; + swsqe->complete = 0; + swsqe->signaled = send_signaled; + swsqe->flushed = 0; + swsqe->wr_id = wr->next->wr_id; + if (c4iw_wr_log) { + swsqe->sge_ts = + cxgb4_read_sge_timestamp(qhp->rhp->rdev.lldi.ports[0]); + swsqe->host_time = ktime_get(); + } + + wqe->write_cmpl.flags_send = send_signaled ? FW_RI_COMPLETION_FLAG : 0; + wqe->write_cmpl.wrid_send = qhp->wq.sq.pidx; + + init_wr_hdr(wqe, write_wrid, FW_RI_RDMA_WRITE_CMPL_WR, + write_signaled ? FW_RI_COMPLETION_FLAG : 0, len16); + t4_sq_produce(&qhp->wq, len16); + idx = DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE); + + t4_ring_sq_db(&qhp->wq, idx, wqe); +} + static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, - struct ib_recv_wr *wr, u8 *len16) + const struct ib_recv_wr *wr, u8 *len16) { int ret; @@ -605,8 +763,22 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, return 0; } +static int build_srq_recv(union t4_recv_wr *wqe, const struct ib_recv_wr *wr, + u8 *len16) +{ + int ret; + + ret = build_isgl((__be64 *)wqe, (__be64 *)(wqe + 1), + &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL); + if (ret) + return ret; + *len16 = DIV_ROUND_UP(sizeof(wqe->recv) + + wr->num_sge * sizeof(struct fw_ri_sge), 16); + return 0; +} + static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr, - struct ib_reg_wr *wr, struct c4iw_mr *mhp, + const struct ib_reg_wr *wr, struct c4iw_mr *mhp, u8 *len16) { __be64 *p = (__be64 *)fr->pbl; @@ -638,8 +810,8 @@ static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr, } static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, - struct ib_reg_wr *wr, struct c4iw_mr *mhp, u8 *len16, - bool dsgl_supported) + const struct ib_reg_wr *wr, struct c4iw_mr *mhp, + u8 *len16, bool dsgl_supported) { struct fw_ri_immd *imdp; __be64 *p; @@ -701,7 +873,8 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, return 0; } -static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) +static int build_inv_stag(union t4_wr *wqe, const struct ib_send_wr *wr, + u8 *len16) { wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); wqe->inv.r2 = 0; @@ -721,7 +894,7 @@ static void free_qp_work(struct work_struct *work) pr_debug("qhp %p ucontext %p\n", qhp, ucontext); destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq); if (ucontext) c4iw_put_ucontext(ucontext); @@ -804,6 +977,9 @@ static int ib_to_fw_opcode(int ib_opcode) case IB_WR_RDMA_WRITE: opcode = FW_RI_RDMA_WRITE; break; + case IB_WR_RDMA_WRITE_WITH_IMM: + opcode = FW_RI_WRITE_IMMEDIATE; + break; case IB_WR_RDMA_READ: case IB_WR_RDMA_READ_WITH_INV: opcode = FW_RI_READ_REQ; @@ -820,7 +996,8 @@ static int ib_to_fw_opcode(int ib_opcode) return opcode; } -static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) +static int complete_sq_drain_wr(struct c4iw_qp *qhp, + const struct ib_send_wr *wr) { struct t4_cqe cqe = {}; struct c4iw_cq *schp; @@ -858,8 +1035,9 @@ static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) return 0; } -static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int complete_sq_drain_wrs(struct c4iw_qp *qhp, + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { int ret = 0; @@ -874,7 +1052,8 @@ static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr, return ret; } -static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) +static void complete_rq_drain_wr(struct c4iw_qp *qhp, + const struct ib_recv_wr *wr) { struct t4_cqe cqe = {}; struct c4iw_cq *rchp; @@ -906,7 +1085,8 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) } } -static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr) +static void complete_rq_drain_wrs(struct c4iw_qp *qhp, + const struct ib_recv_wr *wr) { while (wr) { complete_rq_drain_wr(qhp, wr); @@ -914,14 +1094,15 @@ static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr) } } -int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { int err = 0; u8 len16 = 0; enum fw_wr_opcodes fw_opcode = 0; enum fw_ri_wr_flags fw_flags; struct c4iw_qp *qhp; + struct c4iw_dev *rhp; union t4_wr *wqe = NULL; u32 num_wrs; struct t4_swsqe *swsqe; @@ -929,6 +1110,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, u16 idx = 0; qhp = to_c4iw_qp(ibqp); + rhp = qhp->rhp; spin_lock_irqsave(&qhp->lock, flag); /* @@ -946,6 +1128,30 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, *bad_wr = wr; return -ENOMEM; } + + /* + * Fastpath for NVMe-oF target WRITE + SEND_WITH_INV wr chain which is + * the response for small NVMEe-oF READ requests. If the chain is + * exactly a WRITE->SEND_WITH_INV and the sgl depths and lengths + * meet the requirements of the fw_ri_write_cmpl_wr work request, + * then build and post the write_cmpl WR. If any of the tests + * below are not true, then we continue on with the tradtional WRITE + * and SEND WRs. + */ + if (qhp->rhp->rdev.lldi.write_cmpl_support && + CHELSIO_CHIP_VERSION(qhp->rhp->rdev.lldi.adapter_type) >= + CHELSIO_T5 && + wr && wr->next && !wr->next->next && + wr->opcode == IB_WR_RDMA_WRITE && + wr->sg_list[0].length && wr->num_sge <= T4_WRITE_CMPL_MAX_SGL && + wr->next->opcode == IB_WR_SEND_WITH_INV && + wr->next->sg_list[0].length == T4_WRITE_CMPL_MAX_CQE && + wr->next->num_sge == 1 && num_wrs >= 2) { + post_write_cmpl(qhp, wr); + spin_unlock_irqrestore(&qhp->lock, flag); + return 0; + } + while (wr) { if (num_wrs == 0) { err = -ENOMEM; @@ -973,6 +1179,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, swsqe->opcode = FW_RI_SEND_WITH_INV; err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16); break; + case IB_WR_RDMA_WRITE_WITH_IMM: + if (unlikely(!rhp->rdev.lldi.write_w_imm_support)) { + err = -EINVAL; + break; + } + fw_flags |= FW_RI_RDMA_WRITE_WITH_IMMEDIATE; + /*FALLTHROUGH*/ case IB_WR_RDMA_WRITE: fw_opcode = FW_RI_RDMA_WRITE_WR; swsqe->opcode = FW_RI_RDMA_WRITE; @@ -983,8 +1196,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, fw_opcode = FW_RI_RDMA_READ_WR; swsqe->opcode = FW_RI_READ_REQ; if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) { - c4iw_invalidate_mr(qhp->rhp, - wr->sg_list[0].lkey); + c4iw_invalidate_mr(rhp, wr->sg_list[0].lkey); fw_flags = FW_RI_RDMA_READ_INVALIDATE; } else { fw_flags = 0; @@ -1000,7 +1212,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct c4iw_mr *mhp = to_c4iw_mr(reg_wr(wr)->mr); swsqe->opcode = FW_RI_FAST_REGISTER; - if (qhp->rhp->rdev.lldi.fr_nsmr_tpte_wr_support && + if (rhp->rdev.lldi.fr_nsmr_tpte_wr_support && !mhp->attr.state && mhp->mpl_len <= 2) { fw_opcode = FW_RI_FR_NSMR_TPTE_WR; build_tpte_memreg(&wqe->fr_tpte, reg_wr(wr), @@ -1009,7 +1221,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, fw_opcode = FW_RI_FR_NSMR_WR; err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), mhp, &len16, - qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl); + rhp->rdev.lldi.ulptx_memwrite_dsgl); if (err) break; } @@ -1022,7 +1234,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, fw_opcode = FW_RI_INV_LSTAG_WR; swsqe->opcode = FW_RI_LOCAL_INV; err = build_inv_stag(wqe, wr, &len16); - c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey); + c4iw_invalidate_mr(rhp, wr->ex.invalidate_rkey); break; default: pr_warn("%s post of type=%d TBD!\n", __func__, @@ -1041,7 +1253,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, swsqe->wr_id = wr->wr_id; if (c4iw_wr_log) { swsqe->sge_ts = cxgb4_read_sge_timestamp( - qhp->rhp->rdev.lldi.ports[0]); + rhp->rdev.lldi.ports[0]); swsqe->host_time = ktime_get(); } @@ -1055,7 +1267,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, t4_sq_produce(&qhp->wq, len16); idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); } - if (!qhp->rhp->rdev.status_page->db_off) { + if (!rhp->rdev.status_page->db_off) { t4_ring_sq_db(&qhp->wq, idx, wqe); spin_unlock_irqrestore(&qhp->lock, flag); } else { @@ -1065,8 +1277,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, return err; } -int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int c4iw_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { int err = 0; struct c4iw_qp *qhp; @@ -1145,6 +1357,89 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, return err; } +static void defer_srq_wr(struct t4_srq *srq, union t4_recv_wr *wqe, + u64 wr_id, u8 len16) +{ + struct t4_srq_pending_wr *pwr = &srq->pending_wrs[srq->pending_pidx]; + + pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u ooo_count %u wr_id 0x%llx pending_cidx %u pending_pidx %u pending_in_use %u\n", + __func__, srq->cidx, srq->pidx, srq->wq_pidx, + srq->in_use, srq->ooo_count, + (unsigned long long)wr_id, srq->pending_cidx, + srq->pending_pidx, srq->pending_in_use); + pwr->wr_id = wr_id; + pwr->len16 = len16; + memcpy(&pwr->wqe, wqe, len16 * 16); + t4_srq_produce_pending_wr(srq); +} + +int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + union t4_recv_wr *wqe, lwqe; + struct c4iw_srq *srq; + unsigned long flag; + u8 len16 = 0; + u16 idx = 0; + int err = 0; + u32 num_wrs; + + srq = to_c4iw_srq(ibsrq); + spin_lock_irqsave(&srq->lock, flag); + num_wrs = t4_srq_avail(&srq->wq); + if (num_wrs == 0) { + spin_unlock_irqrestore(&srq->lock, flag); + return -ENOMEM; + } + while (wr) { + if (wr->num_sge > T4_MAX_RECV_SGE) { + err = -EINVAL; + *bad_wr = wr; + break; + } + wqe = &lwqe; + if (num_wrs) + err = build_srq_recv(wqe, wr, &len16); + else + err = -ENOMEM; + if (err) { + *bad_wr = wr; + break; + } + + wqe->recv.opcode = FW_RI_RECV_WR; + wqe->recv.r1 = 0; + wqe->recv.wrid = srq->wq.pidx; + wqe->recv.r2[0] = 0; + wqe->recv.r2[1] = 0; + wqe->recv.r2[2] = 0; + wqe->recv.len16 = len16; + + if (srq->wq.ooo_count || + srq->wq.pending_in_use || + srq->wq.sw_rq[srq->wq.pidx].valid) { + defer_srq_wr(&srq->wq, wqe, wr->wr_id, len16); + } else { + srq->wq.sw_rq[srq->wq.pidx].wr_id = wr->wr_id; + srq->wq.sw_rq[srq->wq.pidx].valid = 1; + c4iw_copy_wr_to_srq(&srq->wq, wqe, len16); + pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u wr_id 0x%llx\n", + __func__, srq->wq.cidx, + srq->wq.pidx, srq->wq.wq_pidx, + srq->wq.in_use, + (unsigned long long)wr->wr_id); + t4_srq_produce(&srq->wq, len16); + idx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE); + } + wr = wr->next; + num_wrs--; + } + if (idx) + t4_ring_srq_db(&srq->wq, idx, len16, wqe); + spin_unlock_irqrestore(&srq->lock, flag); + return err; +} + static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type, u8 *ecode) { @@ -1321,7 +1616,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, struct c4iw_cq *schp) { int count; - int rq_flushed, sq_flushed; + int rq_flushed = 0, sq_flushed; unsigned long flag; pr_debug("qhp %p rchp %p schp %p\n", qhp, rchp, schp); @@ -1340,11 +1635,13 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, return; } qhp->wq.flushed = 1; - t4_set_wq_in_error(&qhp->wq); + t4_set_wq_in_error(&qhp->wq, 0); c4iw_flush_hw_cq(rchp, qhp); - c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); - rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); + if (!qhp->srq) { + c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); + rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); + } if (schp != rchp) c4iw_flush_hw_cq(schp, qhp); @@ -1388,7 +1685,7 @@ static void flush_qp(struct c4iw_qp *qhp) schp = to_c4iw_cq(qhp->ibqp.send_cq); if (qhp->ibqp.uobject) { - t4_set_wq_in_error(&qhp->wq); + t4_set_wq_in_error(&qhp->wq, 0); t4_set_cq_in_error(&rchp->cq); spin_lock_irqsave(&rchp->comp_handler_lock, flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); @@ -1517,16 +1814,21 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd); wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid); wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid); - wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid); + if (qhp->srq) { + wqe->u.init.rq_eqid = cpu_to_be32(FW_RI_INIT_RQEQID_SRQ | + qhp->srq->idx); + } else { + wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid); + wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size); + wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr - + rhp->rdev.lldi.vr->rq.start); + } wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq); wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq); wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord); wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird); wqe->u.init.iss = cpu_to_be32(qhp->ep->snd_seq); wqe->u.init.irs = cpu_to_be32(qhp->ep->rcv_seq); - wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size); - wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr - - rhp->rdev.lldi.vr->rq.start); if (qhp->attr.mpa_attr.initiator) build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init); @@ -1643,7 +1945,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, case C4IW_QP_STATE_RTS: switch (attrs->next_state) { case C4IW_QP_STATE_CLOSING: - t4_set_wq_in_error(&qhp->wq); + t4_set_wq_in_error(&qhp->wq, 0); set_state(qhp, C4IW_QP_STATE_CLOSING); ep = qhp->ep; if (!internal) { @@ -1656,7 +1958,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, goto err; break; case C4IW_QP_STATE_TERMINATE: - t4_set_wq_in_error(&qhp->wq); + t4_set_wq_in_error(&qhp->wq, 0); set_state(qhp, C4IW_QP_STATE_TERMINATE); qhp->attr.layer_etype = attrs->layer_etype; qhp->attr.ecode = attrs->ecode; @@ -1673,7 +1975,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, } break; case C4IW_QP_STATE_ERROR: - t4_set_wq_in_error(&qhp->wq); + t4_set_wq_in_error(&qhp->wq, 0); set_state(qhp, C4IW_QP_STATE_ERROR); if (!internal) { abort = 1; @@ -1819,7 +2121,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct c4iw_cq *schp; struct c4iw_cq *rchp; struct c4iw_create_qp_resp uresp; - unsigned int sqsize, rqsize; + unsigned int sqsize, rqsize = 0; struct c4iw_ucontext *ucontext; int ret; struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm; @@ -1840,11 +2142,13 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE) return ERR_PTR(-EINVAL); - if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size) - return ERR_PTR(-E2BIG); - rqsize = attrs->cap.max_recv_wr + 1; - if (rqsize < 8) - rqsize = 8; + if (!attrs->srq) { + if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size) + return ERR_PTR(-E2BIG); + rqsize = attrs->cap.max_recv_wr + 1; + if (rqsize < 8) + rqsize = 8; + } if (attrs->cap.max_send_wr > rhp->rdev.hw_queue.t4_max_sq_size) return ERR_PTR(-E2BIG); @@ -1869,19 +2173,23 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, (sqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * sizeof(*qhp->wq.sq.queue) + 16 * sizeof(__be64); qhp->wq.sq.flush_cidx = -1; - qhp->wq.rq.size = rqsize; - qhp->wq.rq.memsize = - (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * - sizeof(*qhp->wq.rq.queue); + if (!attrs->srq) { + qhp->wq.rq.size = rqsize; + qhp->wq.rq.memsize = + (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * + sizeof(*qhp->wq.rq.queue); + } if (ucontext) { qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE); - qhp->wq.rq.memsize = roundup(qhp->wq.rq.memsize, PAGE_SIZE); + if (!attrs->srq) + qhp->wq.rq.memsize = + roundup(qhp->wq.rq.memsize, PAGE_SIZE); } ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, - qhp->wr_waitp); + qhp->wr_waitp, !attrs->srq); if (ret) goto err_free_wr_wait; @@ -1894,10 +2202,12 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid; qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid; qhp->attr.sq_num_entries = attrs->cap.max_send_wr; - qhp->attr.rq_num_entries = attrs->cap.max_recv_wr; qhp->attr.sq_max_sges = attrs->cap.max_send_sge; qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge; - qhp->attr.rq_max_sges = attrs->cap.max_recv_sge; + if (!attrs->srq) { + qhp->attr.rq_num_entries = attrs->cap.max_recv_wr; + qhp->attr.rq_max_sges = attrs->cap.max_recv_sge; + } qhp->attr.state = C4IW_QP_STATE_IDLE; qhp->attr.next_state = C4IW_QP_STATE_IDLE; qhp->attr.enable_rdma_read = 1; @@ -1922,21 +2232,27 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, ret = -ENOMEM; goto err_remove_handle; } - rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL); - if (!rq_key_mm) { - ret = -ENOMEM; - goto err_free_sq_key; + if (!attrs->srq) { + rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL); + if (!rq_key_mm) { + ret = -ENOMEM; + goto err_free_sq_key; + } } sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL); if (!sq_db_key_mm) { ret = -ENOMEM; goto err_free_rq_key; } - rq_db_key_mm = kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL); - if (!rq_db_key_mm) { - ret = -ENOMEM; - goto err_free_sq_db_key; + if (!attrs->srq) { + rq_db_key_mm = + kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL); + if (!rq_db_key_mm) { + ret = -ENOMEM; + goto err_free_sq_db_key; + } } + memset(&uresp, 0, sizeof(uresp)); if (t4_sq_onchip(&qhp->wq.sq)) { ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm), GFP_KERNEL); @@ -1945,30 +2261,35 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, goto err_free_rq_db_key; } uresp.flags = C4IW_QPF_ONCHIP; - } else - uresp.flags = 0; + } + if (rhp->rdev.lldi.write_w_imm_support) + uresp.flags |= C4IW_QPF_WRITE_W_IMM; uresp.qid_mask = rhp->rdev.qpmask; uresp.sqid = qhp->wq.sq.qid; uresp.sq_size = qhp->wq.sq.size; uresp.sq_memsize = qhp->wq.sq.memsize; - uresp.rqid = qhp->wq.rq.qid; - uresp.rq_size = qhp->wq.rq.size; - uresp.rq_memsize = qhp->wq.rq.memsize; + if (!attrs->srq) { + uresp.rqid = qhp->wq.rq.qid; + uresp.rq_size = qhp->wq.rq.size; + uresp.rq_memsize = qhp->wq.rq.memsize; + } spin_lock(&ucontext->mmap_lock); if (ma_sync_key_mm) { uresp.ma_sync_key = ucontext->key; ucontext->key += PAGE_SIZE; - } else { - uresp.ma_sync_key = 0; } uresp.sq_key = ucontext->key; ucontext->key += PAGE_SIZE; - uresp.rq_key = ucontext->key; - ucontext->key += PAGE_SIZE; + if (!attrs->srq) { + uresp.rq_key = ucontext->key; + ucontext->key += PAGE_SIZE; + } uresp.sq_db_gts_key = ucontext->key; ucontext->key += PAGE_SIZE; - uresp.rq_db_gts_key = ucontext->key; - ucontext->key += PAGE_SIZE; + if (!attrs->srq) { + uresp.rq_db_gts_key = ucontext->key; + ucontext->key += PAGE_SIZE; + } spin_unlock(&ucontext->mmap_lock); ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); if (ret) @@ -1977,18 +2298,23 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, sq_key_mm->addr = qhp->wq.sq.phys_addr; sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize); insert_mmap(ucontext, sq_key_mm); - rq_key_mm->key = uresp.rq_key; - rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue); - rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize); - insert_mmap(ucontext, rq_key_mm); + if (!attrs->srq) { + rq_key_mm->key = uresp.rq_key; + rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue); + rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize); + insert_mmap(ucontext, rq_key_mm); + } sq_db_key_mm->key = uresp.sq_db_gts_key; sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa; sq_db_key_mm->len = PAGE_SIZE; insert_mmap(ucontext, sq_db_key_mm); - rq_db_key_mm->key = uresp.rq_db_gts_key; - rq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.rq.bar2_pa; - rq_db_key_mm->len = PAGE_SIZE; - insert_mmap(ucontext, rq_db_key_mm); + if (!attrs->srq) { + rq_db_key_mm->key = uresp.rq_db_gts_key; + rq_db_key_mm->addr = + (u64)(unsigned long)qhp->wq.rq.bar2_pa; + rq_db_key_mm->len = PAGE_SIZE; + insert_mmap(ucontext, rq_db_key_mm); + } if (ma_sync_key_mm) { ma_sync_key_mm->key = uresp.ma_sync_key; ma_sync_key_mm->addr = @@ -2001,7 +2327,19 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, c4iw_get_ucontext(ucontext); qhp->ucontext = ucontext; } + if (!attrs->srq) { + qhp->wq.qp_errp = + &qhp->wq.rq.queue[qhp->wq.rq.size].status.qp_err; + } else { + qhp->wq.qp_errp = + &qhp->wq.sq.queue[qhp->wq.sq.size].status.qp_err; + qhp->wq.srqidxp = + &qhp->wq.sq.queue[qhp->wq.sq.size].status.srqidx; + } + qhp->ibqp.qp_num = qhp->wq.sq.qid; + if (attrs->srq) + qhp->srq = to_c4iw_srq(attrs->srq); INIT_LIST_HEAD(&qhp->db_fc_entry); pr_debug("sq id %u size %u memsize %zu num_entries %u rq id %u size %u memsize %zu num_entries %u\n", qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize, @@ -2011,18 +2349,20 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, err_free_ma_sync_key: kfree(ma_sync_key_mm); err_free_rq_db_key: - kfree(rq_db_key_mm); + if (!attrs->srq) + kfree(rq_db_key_mm); err_free_sq_db_key: kfree(sq_db_key_mm); err_free_rq_key: - kfree(rq_key_mm); + if (!attrs->srq) + kfree(rq_key_mm); err_free_sq_key: kfree(sq_key_mm); err_remove_handle: remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); err_destroy_qp: destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !attrs->srq); err_free_wr_wait: c4iw_put_wr_wait(qhp->wr_waitp); err_free_qhp: @@ -2088,6 +2428,45 @@ struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn) return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn); } +void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq) +{ + struct ib_event event = {}; + + event.device = &srq->rhp->ibdev; + event.element.srq = &srq->ibsrq; + event.event = IB_EVENT_SRQ_LIMIT_REACHED; + ib_dispatch_event(&event); +} + +int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata) +{ + struct c4iw_srq *srq = to_c4iw_srq(ib_srq); + int ret = 0; + + /* + * XXX 0 mask == a SW interrupt for srq_limit reached... + */ + if (udata && !srq_attr_mask) { + c4iw_dispatch_srq_limit_reached_event(srq); + goto out; + } + + /* no support for this yet */ + if (srq_attr_mask & IB_SRQ_MAX_WR) { + ret = -EINVAL; + goto out; + } + + if (!udata && (srq_attr_mask & IB_SRQ_LIMIT)) { + srq->armed = true; + srq->srq_limit = attr->srq_limit; + } +out: + return ret; +} + int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr) { @@ -2104,3 +2483,359 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0; return 0; } + +static void free_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx, + struct c4iw_wr_wait *wr_waitp) +{ + struct c4iw_rdev *rdev = &srq->rhp->rdev; + struct sk_buff *skb = srq->destroy_skb; + struct t4_srq *wq = &srq->wq; + struct fw_ri_res_wr *res_wr; + struct fw_ri_res *res; + int wr_len; + + wr_len = sizeof(*res_wr) + sizeof(*res); + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); + memset(res_wr, 0, wr_len); + res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) | + FW_RI_RES_WR_NRES_V(1) | + FW_WR_COMPL_F); + res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); + res_wr->cookie = (uintptr_t)wr_waitp; + res = res_wr->res; + res->u.srq.restype = FW_RI_RES_TYPE_SRQ; + res->u.srq.op = FW_RI_RES_OP_RESET; + res->u.srq.srqid = cpu_to_be32(srq->idx); + res->u.srq.eqid = cpu_to_be32(wq->qid); + + c4iw_init_wr_wait(wr_waitp); + c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__); + + dma_free_coherent(&rdev->lldi.pdev->dev, + wq->memsize, wq->queue, + pci_unmap_addr(wq, mapping)); + c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size); + kfree(wq->sw_rq); + c4iw_put_qpid(rdev, wq->qid, uctx); +} + +static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx, + struct c4iw_wr_wait *wr_waitp) +{ + struct c4iw_rdev *rdev = &srq->rhp->rdev; + int user = (uctx != &rdev->uctx); + struct t4_srq *wq = &srq->wq; + struct fw_ri_res_wr *res_wr; + struct fw_ri_res *res; + struct sk_buff *skb; + int wr_len; + int eqsize; + int ret = -ENOMEM; + + wq->qid = c4iw_get_qpid(rdev, uctx); + if (!wq->qid) + goto err; + + if (!user) { + wq->sw_rq = kcalloc(wq->size, sizeof(*wq->sw_rq), + GFP_KERNEL); + if (!wq->sw_rq) + goto err_put_qpid; + wq->pending_wrs = kcalloc(srq->wq.size, + sizeof(*srq->wq.pending_wrs), + GFP_KERNEL); + if (!wq->pending_wrs) + goto err_free_sw_rq; + } + + wq->rqt_size = wq->size; + wq->rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rqt_size); + if (!wq->rqt_hwaddr) + goto err_free_pending_wrs; + wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >> + T4_RQT_ENTRY_SHIFT; + + wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, + wq->memsize, &wq->dma_addr, + GFP_KERNEL); + if (!wq->queue) + goto err_free_rqtpool; + + memset(wq->queue, 0, wq->memsize); + pci_unmap_addr_set(wq, mapping, wq->dma_addr); + + wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, T4_BAR2_QTYPE_EGRESS, + &wq->bar2_qid, + user ? &wq->bar2_pa : NULL); + + /* + * User mode must have bar2 access. + */ + + if (user && !wq->bar2_va) { + pr_warn(MOD "%s: srqid %u not in BAR2 range.\n", + pci_name(rdev->lldi.pdev), wq->qid); + ret = -EINVAL; + goto err_free_queue; + } + + /* build fw_ri_res_wr */ + wr_len = sizeof(*res_wr) + sizeof(*res); + + skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); + if (!skb) + goto err_free_queue; + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); + memset(res_wr, 0, wr_len); + res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) | + FW_RI_RES_WR_NRES_V(1) | + FW_WR_COMPL_F); + res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); + res_wr->cookie = (uintptr_t)wr_waitp; + res = res_wr->res; + res->u.srq.restype = FW_RI_RES_TYPE_SRQ; + res->u.srq.op = FW_RI_RES_OP_WRITE; + + /* + * eqsize is the number of 64B entries plus the status page size. + */ + eqsize = wq->size * T4_RQ_NUM_SLOTS + + rdev->hw_queue.t4_eq_status_entries; + res->u.srq.eqid = cpu_to_be32(wq->qid); + res->u.srq.fetchszm_to_iqid = + /* no host cidx updates */ + cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) | + FW_RI_RES_WR_CPRIO_V(0) | /* don't keep in chip cache */ + FW_RI_RES_WR_PCIECHN_V(0) | /* set by uP at ri_init time */ + FW_RI_RES_WR_FETCHRO_V(0)); /* relaxed_ordering */ + res->u.srq.dcaen_to_eqsize = + cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) | + FW_RI_RES_WR_DCACPU_V(0) | + FW_RI_RES_WR_FBMIN_V(2) | + FW_RI_RES_WR_FBMAX_V(3) | + FW_RI_RES_WR_CIDXFTHRESHO_V(0) | + FW_RI_RES_WR_CIDXFTHRESH_V(0) | + FW_RI_RES_WR_EQSIZE_V(eqsize)); + res->u.srq.eqaddr = cpu_to_be64(wq->dma_addr); + res->u.srq.srqid = cpu_to_be32(srq->idx); + res->u.srq.pdid = cpu_to_be32(srq->pdid); + res->u.srq.hwsrqsize = cpu_to_be32(wq->rqt_size); + res->u.srq.hwsrqaddr = cpu_to_be32(wq->rqt_hwaddr - + rdev->lldi.vr->rq.start); + + c4iw_init_wr_wait(wr_waitp); + + ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->qid, __func__); + if (ret) + goto err_free_queue; + + pr_debug("%s srq %u eqid %u pdid %u queue va %p pa 0x%llx\n" + " bar2_addr %p rqt addr 0x%x size %d\n", + __func__, srq->idx, wq->qid, srq->pdid, wq->queue, + (u64)virt_to_phys(wq->queue), wq->bar2_va, + wq->rqt_hwaddr, wq->rqt_size); + + return 0; +err_free_queue: + dma_free_coherent(&rdev->lldi.pdev->dev, + wq->memsize, wq->queue, + pci_unmap_addr(wq, mapping)); +err_free_rqtpool: + c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size); +err_free_pending_wrs: + if (!user) + kfree(wq->pending_wrs); +err_free_sw_rq: + if (!user) + kfree(wq->sw_rq); +err_put_qpid: + c4iw_put_qpid(rdev, wq->qid, uctx); +err: + return ret; +} + +void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16) +{ + u64 *src, *dst; + + src = (u64 *)wqe; + dst = (u64 *)((u8 *)srq->queue + srq->wq_pidx * T4_EQ_ENTRY_SIZE); + while (len16) { + *dst++ = *src++; + if (dst >= (u64 *)&srq->queue[srq->size]) + dst = (u64 *)srq->queue; + *dst++ = *src++; + if (dst >= (u64 *)&srq->queue[srq->size]) + dst = (u64 *)srq->queue; + len16--; + } +} + +struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs, + struct ib_udata *udata) +{ + struct c4iw_dev *rhp; + struct c4iw_srq *srq; + struct c4iw_pd *php; + struct c4iw_create_srq_resp uresp; + struct c4iw_ucontext *ucontext; + struct c4iw_mm_entry *srq_key_mm, *srq_db_key_mm; + int rqsize; + int ret; + int wr_len; + + pr_debug("%s ib_pd %p\n", __func__, pd); + + php = to_c4iw_pd(pd); + rhp = php->rhp; + + if (!rhp->rdev.lldi.vr->srq.size) + return ERR_PTR(-EINVAL); + if (attrs->attr.max_wr > rhp->rdev.hw_queue.t4_max_rq_size) + return ERR_PTR(-E2BIG); + if (attrs->attr.max_sge > T4_MAX_RECV_SGE) + return ERR_PTR(-E2BIG); + + /* + * SRQ RQT and RQ must be a power of 2 and at least 16 deep. + */ + rqsize = attrs->attr.max_wr + 1; + rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16)); + + ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL; + + srq = kzalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + srq->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); + if (!srq->wr_waitp) { + ret = -ENOMEM; + goto err_free_srq; + } + + srq->idx = c4iw_alloc_srq_idx(&rhp->rdev); + if (srq->idx < 0) { + ret = -ENOMEM; + goto err_free_wr_wait; + } + + wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res); + srq->destroy_skb = alloc_skb(wr_len, GFP_KERNEL); + if (!srq->destroy_skb) { + ret = -ENOMEM; + goto err_free_srq_idx; + } + + srq->rhp = rhp; + srq->pdid = php->pdid; + + srq->wq.size = rqsize; + srq->wq.memsize = + (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * + sizeof(*srq->wq.queue); + if (ucontext) + srq->wq.memsize = roundup(srq->wq.memsize, PAGE_SIZE); + + ret = alloc_srq_queue(srq, ucontext ? &ucontext->uctx : + &rhp->rdev.uctx, srq->wr_waitp); + if (ret) + goto err_free_skb; + attrs->attr.max_wr = rqsize - 1; + + if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6) + srq->flags = T4_SRQ_LIMIT_SUPPORT; + + ret = insert_handle(rhp, &rhp->qpidr, srq, srq->wq.qid); + if (ret) + goto err_free_queue; + + if (udata) { + srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL); + if (!srq_key_mm) { + ret = -ENOMEM; + goto err_remove_handle; + } + srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL); + if (!srq_db_key_mm) { + ret = -ENOMEM; + goto err_free_srq_key_mm; + } + memset(&uresp, 0, sizeof(uresp)); + uresp.flags = srq->flags; + uresp.qid_mask = rhp->rdev.qpmask; + uresp.srqid = srq->wq.qid; + uresp.srq_size = srq->wq.size; + uresp.srq_memsize = srq->wq.memsize; + uresp.rqt_abs_idx = srq->wq.rqt_abs_idx; + spin_lock(&ucontext->mmap_lock); + uresp.srq_key = ucontext->key; + ucontext->key += PAGE_SIZE; + uresp.srq_db_gts_key = ucontext->key; + ucontext->key += PAGE_SIZE; + spin_unlock(&ucontext->mmap_lock); + ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (ret) + goto err_free_srq_db_key_mm; + srq_key_mm->key = uresp.srq_key; + srq_key_mm->addr = virt_to_phys(srq->wq.queue); + srq_key_mm->len = PAGE_ALIGN(srq->wq.memsize); + insert_mmap(ucontext, srq_key_mm); + srq_db_key_mm->key = uresp.srq_db_gts_key; + srq_db_key_mm->addr = (u64)(unsigned long)srq->wq.bar2_pa; + srq_db_key_mm->len = PAGE_SIZE; + insert_mmap(ucontext, srq_db_key_mm); + } + + pr_debug("%s srq qid %u idx %u size %u memsize %lu num_entries %u\n", + __func__, srq->wq.qid, srq->idx, srq->wq.size, + (unsigned long)srq->wq.memsize, attrs->attr.max_wr); + + spin_lock_init(&srq->lock); + return &srq->ibsrq; +err_free_srq_db_key_mm: + kfree(srq_db_key_mm); +err_free_srq_key_mm: + kfree(srq_key_mm); +err_remove_handle: + remove_handle(rhp, &rhp->qpidr, srq->wq.qid); +err_free_queue: + free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, + srq->wr_waitp); +err_free_skb: + if (srq->destroy_skb) + kfree_skb(srq->destroy_skb); +err_free_srq_idx: + c4iw_free_srq_idx(&rhp->rdev, srq->idx); +err_free_wr_wait: + c4iw_put_wr_wait(srq->wr_waitp); +err_free_srq: + kfree(srq); + return ERR_PTR(ret); +} + +int c4iw_destroy_srq(struct ib_srq *ibsrq) +{ + struct c4iw_dev *rhp; + struct c4iw_srq *srq; + struct c4iw_ucontext *ucontext; + + srq = to_c4iw_srq(ibsrq); + rhp = srq->rhp; + + pr_debug("%s id %d\n", __func__, srq->wq.qid); + + remove_handle(rhp, &rhp->qpidr, srq->wq.qid); + ucontext = ibsrq->uobject ? + to_c4iw_ucontext(ibsrq->uobject->context) : NULL; + free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, + srq->wr_waitp); + c4iw_free_srq_idx(&rhp->rdev, srq->idx); + c4iw_put_wr_wait(srq->wr_waitp); + kfree(srq); + return 0; +} diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c index 0ef25ae05e6f..57ed26b3cc21 100644 --- a/drivers/infiniband/hw/cxgb4/resource.c +++ b/drivers/infiniband/hw/cxgb4/resource.c @@ -53,7 +53,8 @@ static int c4iw_init_qid_table(struct c4iw_rdev *rdev) } /* nr_* must be power of 2 */ -int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid) +int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, + u32 nr_pdid, u32 nr_srqt) { int err = 0; err = c4iw_id_table_alloc(&rdev->resource.tpt_table, 0, nr_tpt, 1, @@ -67,7 +68,17 @@ int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid) nr_pdid, 1, 0); if (err) goto pdid_err; + if (!nr_srqt) + err = c4iw_id_table_alloc(&rdev->resource.srq_table, 0, + 1, 1, 0); + else + err = c4iw_id_table_alloc(&rdev->resource.srq_table, 0, + nr_srqt, 0, 0); + if (err) + goto srq_err; return 0; + srq_err: + c4iw_id_table_free(&rdev->resource.pdid_table); pdid_err: c4iw_id_table_free(&rdev->resource.qid_table); qid_err: @@ -371,13 +382,21 @@ void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size) int c4iw_rqtpool_create(struct c4iw_rdev *rdev) { unsigned rqt_start, rqt_chunk, rqt_top; + int skip = 0; rdev->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1); if (!rdev->rqt_pool) return -ENOMEM; - rqt_start = rdev->lldi.vr->rq.start; - rqt_chunk = rdev->lldi.vr->rq.size; + /* + * If SRQs are supported, then never use the first RQE from + * the RQT region. This is because HW uses RQT index 0 as NULL. + */ + if (rdev->lldi.vr->srq.size) + skip = T4_RQT_ENTRY_SIZE; + + rqt_start = rdev->lldi.vr->rq.start + skip; + rqt_chunk = rdev->lldi.vr->rq.size - skip; rqt_top = rqt_start + rqt_chunk; while (rqt_start < rqt_top) { @@ -405,6 +424,32 @@ void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev) kref_put(&rdev->rqt_kref, destroy_rqtpool); } +int c4iw_alloc_srq_idx(struct c4iw_rdev *rdev) +{ + int idx; + + idx = c4iw_id_alloc(&rdev->resource.srq_table); + mutex_lock(&rdev->stats.lock); + if (idx == -1) { + rdev->stats.srqt.fail++; + mutex_unlock(&rdev->stats.lock); + return -ENOMEM; + } + rdev->stats.srqt.cur++; + if (rdev->stats.srqt.cur > rdev->stats.srqt.max) + rdev->stats.srqt.max = rdev->stats.srqt.cur; + mutex_unlock(&rdev->stats.lock); + return idx; +} + +void c4iw_free_srq_idx(struct c4iw_rdev *rdev, int idx) +{ + c4iw_id_free(&rdev->resource.srq_table, idx); + mutex_lock(&rdev->stats.lock); + rdev->stats.srqt.cur--; + mutex_unlock(&rdev->stats.lock); +} + /* * On-Chip QP Memory. */ diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 8369c7c8de83..e42021fd6fd6 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -52,12 +52,16 @@ struct t4_status_page { __be16 pidx; u8 qp_err; /* flit 1 - sw owns */ u8 db_off; - u8 pad; + u8 pad[2]; u16 host_wq_pidx; u16 host_cidx; u16 host_pidx; + u16 pad2; + u32 srqidx; }; +#define T4_RQT_ENTRY_SHIFT 6 +#define T4_RQT_ENTRY_SIZE BIT(T4_RQT_ENTRY_SHIFT) #define T4_EQ_ENTRY_SIZE 64 #define T4_SQ_NUM_SLOTS 5 @@ -87,6 +91,9 @@ static inline int t4_max_fr_depth(int use_dsgl) #define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS) #define T4_MAX_RECV_SGE 4 +#define T4_WRITE_CMPL_MAX_SGL 4 +#define T4_WRITE_CMPL_MAX_CQE 16 + union t4_wr { struct fw_ri_res_wr res; struct fw_ri_wr ri; @@ -97,6 +104,7 @@ union t4_wr { struct fw_ri_fr_nsmr_wr fr; struct fw_ri_fr_nsmr_tpte_wr fr_tpte; struct fw_ri_inv_lstag_wr inv; + struct fw_ri_rdma_write_cmpl_wr write_cmpl; struct t4_status_page status; __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS]; }; @@ -179,9 +187,32 @@ struct t4_cqe { __be32 wrid_hi; __be32 wrid_low; } gen; + struct { + __be32 stag; + __be32 msn; + __be32 reserved; + __be32 abs_rqe_idx; + } srcqe; + struct { + __be32 mo; + __be32 msn; + /* + * Use union for immediate data to be consistent with + * stack's 32 bit data and iWARP spec's 64 bit data. + */ + union { + struct { + __be32 imm_data32; + u32 reserved; + } ib_imm_data; + __be64 imm_data64; + } iw_imm_data; + } imm_data_rcqe; + u64 drain_cookie; + __be64 flits[3]; } u; - __be64 reserved; + __be64 reserved[3]; __be64 bits_type_ts; }; @@ -237,6 +268,9 @@ struct t4_cqe { /* used for RQ completion processing */ #define CQE_WRID_STAG(x) (be32_to_cpu((x)->u.rcqe.stag)) #define CQE_WRID_MSN(x) (be32_to_cpu((x)->u.rcqe.msn)) +#define CQE_ABS_RQE_IDX(x) (be32_to_cpu((x)->u.srcqe.abs_rqe_idx)) +#define CQE_IMM_DATA(x)( \ + (x)->u.imm_data_rcqe.iw_imm_data.ib_imm_data.imm_data32) /* used for SQ completion processing */ #define CQE_WRID_SQ_IDX(x) ((x)->u.scqe.cidx) @@ -320,6 +354,7 @@ struct t4_swrqe { u64 wr_id; ktime_t host_time; u64 sge_ts; + int valid; }; struct t4_rq { @@ -349,8 +384,98 @@ struct t4_wq { void __iomem *db; struct c4iw_rdev *rdev; int flushed; + u8 *qp_errp; + u32 *srqidxp; +}; + +struct t4_srq_pending_wr { + u64 wr_id; + union t4_recv_wr wqe; + u8 len16; +}; + +struct t4_srq { + union t4_recv_wr *queue; + dma_addr_t dma_addr; + DECLARE_PCI_UNMAP_ADDR(mapping); + struct t4_swrqe *sw_rq; + void __iomem *bar2_va; + u64 bar2_pa; + size_t memsize; + u32 bar2_qid; + u32 qid; + u32 msn; + u32 rqt_hwaddr; + u32 rqt_abs_idx; + u16 rqt_size; + u16 size; + u16 cidx; + u16 pidx; + u16 wq_pidx; + u16 wq_pidx_inc; + u16 in_use; + struct t4_srq_pending_wr *pending_wrs; + u16 pending_cidx; + u16 pending_pidx; + u16 pending_in_use; + u16 ooo_count; }; +static inline u32 t4_srq_avail(struct t4_srq *srq) +{ + return srq->size - 1 - srq->in_use; +} + +static inline void t4_srq_produce(struct t4_srq *srq, u8 len16) +{ + srq->in_use++; + if (++srq->pidx == srq->size) + srq->pidx = 0; + srq->wq_pidx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE); + if (srq->wq_pidx >= srq->size * T4_RQ_NUM_SLOTS) + srq->wq_pidx %= srq->size * T4_RQ_NUM_SLOTS; + srq->queue[srq->size].status.host_pidx = srq->pidx; +} + +static inline void t4_srq_produce_pending_wr(struct t4_srq *srq) +{ + srq->pending_in_use++; + srq->in_use++; + if (++srq->pending_pidx == srq->size) + srq->pending_pidx = 0; +} + +static inline void t4_srq_consume_pending_wr(struct t4_srq *srq) +{ + srq->pending_in_use--; + srq->in_use--; + if (++srq->pending_cidx == srq->size) + srq->pending_cidx = 0; +} + +static inline void t4_srq_produce_ooo(struct t4_srq *srq) +{ + srq->in_use--; + srq->ooo_count++; +} + +static inline void t4_srq_consume_ooo(struct t4_srq *srq) +{ + srq->cidx++; + if (srq->cidx == srq->size) + srq->cidx = 0; + srq->queue[srq->size].status.host_cidx = srq->cidx; + srq->ooo_count--; +} + +static inline void t4_srq_consume(struct t4_srq *srq) +{ + srq->in_use--; + if (++srq->cidx == srq->size) + srq->cidx = 0; + srq->queue[srq->size].status.host_cidx = srq->cidx; +} + static inline int t4_rqes_posted(struct t4_wq *wq) { return wq->rq.in_use; @@ -384,7 +509,6 @@ static inline void t4_rq_produce(struct t4_wq *wq, u8 len16) static inline void t4_rq_consume(struct t4_wq *wq) { wq->rq.in_use--; - wq->rq.msn++; if (++wq->rq.cidx == wq->rq.size) wq->rq.cidx = 0; } @@ -464,6 +588,25 @@ static inline void pio_copy(u64 __iomem *dst, u64 *src) } } +static inline void t4_ring_srq_db(struct t4_srq *srq, u16 inc, u8 len16, + union t4_recv_wr *wqe) +{ + /* Flush host queue memory writes. */ + wmb(); + if (inc == 1 && srq->bar2_qid == 0 && wqe) { + pr_debug("%s : WC srq->pidx = %d; len16=%d\n", + __func__, srq->pidx, len16); + pio_copy(srq->bar2_va + SGE_UDB_WCDOORBELL, (u64 *)wqe); + } else { + pr_debug("%s: DB srq->pidx = %d; len16=%d\n", + __func__, srq->pidx, len16); + writel(PIDX_T5_V(inc) | QID_V(srq->bar2_qid), + srq->bar2_va + SGE_UDB_KDOORBELL); + } + /* Flush user doorbell area writes. */ + wmb(); +} + static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe) { @@ -515,12 +658,14 @@ static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, static inline int t4_wq_in_error(struct t4_wq *wq) { - return wq->rq.queue[wq->rq.size].status.qp_err; + return *wq->qp_errp; } -static inline void t4_set_wq_in_error(struct t4_wq *wq) +static inline void t4_set_wq_in_error(struct t4_wq *wq, u32 srqidx) { - wq->rq.queue[wq->rq.size].status.qp_err = 1; + if (srqidx) + *wq->srqidxp = srqidx; + *wq->qp_errp = 1; } static inline void t4_disable_wq_db(struct t4_wq *wq) @@ -565,6 +710,7 @@ struct t4_cq { u16 cidx_inc; u8 gen; u8 error; + u8 *qp_errp; unsigned long flags; }; @@ -698,18 +844,18 @@ static inline int t4_next_cqe(struct t4_cq *cq, struct t4_cqe **cqe) static inline int t4_cq_in_error(struct t4_cq *cq) { - return ((struct t4_status_page *)&cq->queue[cq->size])->qp_err; + return *cq->qp_errp; } static inline void t4_set_cq_in_error(struct t4_cq *cq) { - ((struct t4_status_page *)&cq->queue[cq->size])->qp_err = 1; + *cq->qp_errp = 1; } #endif struct t4_dev_status_page { u8 db_off; - u8 pad1; + u8 write_cmpl_supported; u16 pad2; u32 pad3; u64 qp_start; diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h index 58c531db4f4a..cbdb300a4794 100644 --- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -50,7 +50,8 @@ enum fw_ri_wr_opcode { FW_RI_BYPASS = 0xd, FW_RI_RECEIVE = 0xe, - FW_RI_SGE_EC_CR_RETURN = 0xf + FW_RI_SGE_EC_CR_RETURN = 0xf, + FW_RI_WRITE_IMMEDIATE = FW_RI_RDMA_INIT }; enum fw_ri_wr_flags { @@ -59,7 +60,8 @@ enum fw_ri_wr_flags { FW_RI_SOLICITED_EVENT_FLAG = 0x04, FW_RI_READ_FENCE_FLAG = 0x08, FW_RI_LOCAL_FENCE_FLAG = 0x10, - FW_RI_RDMA_READ_INVALIDATE = 0x20 + FW_RI_RDMA_READ_INVALIDATE = 0x20, + FW_RI_RDMA_WRITE_WITH_IMMEDIATE = 0x40 }; enum fw_ri_mpa_attrs { @@ -263,6 +265,7 @@ enum fw_ri_res_type { FW_RI_RES_TYPE_SQ, FW_RI_RES_TYPE_RQ, FW_RI_RES_TYPE_CQ, + FW_RI_RES_TYPE_SRQ, }; enum fw_ri_res_op { @@ -296,6 +299,20 @@ struct fw_ri_res { __be32 r6_lo; __be64 r7; } cq; + struct fw_ri_res_srq { + __u8 restype; + __u8 op; + __be16 r3; + __be32 eqid; + __be32 r4[2]; + __be32 fetchszm_to_iqid; + __be32 dcaen_to_eqsize; + __be64 eqaddr; + __be32 srqid; + __be32 pdid; + __be32 hwsrqsize; + __be32 hwsrqaddr; + } srq; } u; }; @@ -531,7 +548,17 @@ struct fw_ri_rdma_write_wr { __u16 wrid; __u8 r1[3]; __u8 len16; - __be64 r2; + /* + * Use union for immediate data to be consistent with stack's 32 bit + * data and iWARP spec's 64 bit data. + */ + union { + struct { + __be32 imm_data32; + u32 reserved; + } ib_imm_data; + __be64 imm_data64; + } iw_imm_data; __be32 plen; __be32 stag_sink; __be64 to_sink; @@ -568,6 +595,37 @@ struct fw_ri_send_wr { #define FW_RI_SEND_WR_SENDOP_G(x) \ (((x) >> FW_RI_SEND_WR_SENDOP_S) & FW_RI_SEND_WR_SENDOP_M) +struct fw_ri_rdma_write_cmpl_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __u8 r2; + __u8 flags_send; + __u16 wrid_send; + __be32 stag_inv; + __be32 plen; + __be32 stag_sink; + __be64 to_sink; + union fw_ri_cmpl { + struct fw_ri_immd_cmpl { + __u8 op; + __u8 r1[6]; + __u8 immdlen; + __u8 data[16]; + } immd_src; + struct fw_ri_isgl isgl_src; + } u_cmpl; + __be64 r3; +#ifndef C99_NOT_SUPPORTED + union fw_ri_write { + struct fw_ri_immd immd_src[0]; + struct fw_ri_isgl isgl_src[0]; + } u; +#endif +}; + struct fw_ri_rdma_read_wr { __u8 opcode; __u8 flags; @@ -707,6 +765,10 @@ enum fw_ri_init_p2ptype { FW_RI_INIT_P2PTYPE_DISABLED = 0xf, }; +enum fw_ri_init_rqeqid_srq { + FW_RI_INIT_RQEQID_SRQ = 1 << 31, +}; + struct fw_ri_wr { __be32 op_compl; __be32 flowid_len16; diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 6deb101cdd43..2c19bf772451 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8143,8 +8143,15 @@ static void is_sdma_eng_int(struct hfi1_devdata *dd, unsigned int source) } } -/* +/** + * is_rcv_avail_int() - User receive context available IRQ handler + * @dd: valid dd + * @source: logical IRQ source (offset from IS_RCVAVAIL_START) + * * RX block receive available interrupt. Source is < 160. + * + * This is the general interrupt handler for user (PSM) receive contexts, + * and can only be used for non-threaded IRQs. */ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) { @@ -8154,12 +8161,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) if (likely(source < dd->num_rcv_contexts)) { rcd = hfi1_rcd_get_by_index(dd, source); if (rcd) { - /* Check for non-user contexts, including vnic */ - if (source < dd->first_dyn_alloc_ctxt || rcd->is_vnic) - rcd->do_interrupt(rcd, 0); - else - handle_user_interrupt(rcd); - + handle_user_interrupt(rcd); hfi1_rcd_put(rcd); return; /* OK */ } @@ -8173,8 +8175,14 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) err_detail, source); } -/* +/** + * is_rcv_urgent_int() - User receive context urgent IRQ handler + * @dd: valid dd + * @source: logical IRQ source (ofse from IS_RCVURGENT_START) + * * RX block receive urgent interrupt. Source is < 160. + * + * NOTE: kernel receive contexts specifically do NOT enable this IRQ. */ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source) { @@ -8184,11 +8192,7 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source) if (likely(source < dd->num_rcv_contexts)) { rcd = hfi1_rcd_get_by_index(dd, source); if (rcd) { - /* only pay attention to user urgent interrupts */ - if (source >= dd->first_dyn_alloc_ctxt && - !rcd->is_vnic) - handle_user_interrupt(rcd); - + handle_user_interrupt(rcd); hfi1_rcd_put(rcd); return; /* OK */ } @@ -8260,9 +8264,14 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source) dd_dev_err(dd, "invalid interrupt source %u\n", source); } -/* - * General interrupt handler. This is able to correctly handle - * all interrupts in case INTx is used. +/** + * gerneral_interrupt() - General interrupt handler + * @irq: MSIx IRQ vector + * @data: hfi1 devdata + * + * This is able to correctly handle all non-threaded interrupts. Receive + * context DATA IRQs are threaded and are not supported by this handler. + * */ static irqreturn_t general_interrupt(int irq, void *data) { @@ -10130,7 +10139,7 @@ static void set_lidlmc(struct hfi1_pportdata *ppd) (((lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) << SEND_CTXT_CHECK_SLID_VALUE_SHIFT); - for (i = 0; i < dd->chip_send_contexts; i++) { + for (i = 0; i < chip_send_contexts(dd); i++) { hfi1_cdbg(LINKVERB, "SendContext[%d].SLID_CHECK = 0x%x", i, (u32)sreg); write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, sreg); @@ -11857,7 +11866,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, * sequence numbers could land exactly on the same spot. * E.g. a rcd restart before the receive header wrapped. */ - memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size); + memset(rcd->rcvhdrq, 0, rcvhdrq_size(rcd)); /* starting timeout */ rcd->rcvavail_timeout = dd->rcv_intr_timeout_csr; @@ -11952,9 +11961,8 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK; if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS) rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK; - rcd->rcvctrl = rcvctrl; hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl); - write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl); + write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl); /* work around sticky RcvCtxtStatus.BlockedRHQFull */ if (did_enable && @@ -12042,7 +12050,7 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp) } else if (entry->flags & CNTR_SDMA) { hfi1_cdbg(CNTR, "\t Per SDMA Engine\n"); - for (j = 0; j < dd->chip_sdma_engines; + for (j = 0; j < chip_sdma_engines(dd); j++) { val = entry->rw_cntr(entry, dd, j, @@ -12418,6 +12426,7 @@ static int init_cntrs(struct hfi1_devdata *dd) struct hfi1_pportdata *ppd; const char *bit_type_32 = ",32"; const int bit_type_32_sz = strlen(bit_type_32); + u32 sdma_engines = chip_sdma_engines(dd); /* set up the stats timer; the add_timer is done at the end */ timer_setup(&dd->synth_stats_timer, update_synth_timer, 0); @@ -12450,7 +12459,7 @@ static int init_cntrs(struct hfi1_devdata *dd) } } else if (dev_cntrs[i].flags & CNTR_SDMA) { dev_cntrs[i].offset = dd->ndevcntrs; - for (j = 0; j < dd->chip_sdma_engines; j++) { + for (j = 0; j < sdma_engines; j++) { snprintf(name, C_MAX_NAME, "%s%d", dev_cntrs[i].name, j); sz += strlen(name); @@ -12507,7 +12516,7 @@ static int init_cntrs(struct hfi1_devdata *dd) *p++ = '\n'; } } else if (dev_cntrs[i].flags & CNTR_SDMA) { - for (j = 0; j < dd->chip_sdma_engines; j++) { + for (j = 0; j < sdma_engines; j++) { snprintf(name, C_MAX_NAME, "%s%d", dev_cntrs[i].name, j); memcpy(p, name, strlen(name)); @@ -13020,9 +13029,9 @@ static void clear_all_interrupts(struct hfi1_devdata *dd) write_csr(dd, SEND_PIO_ERR_CLEAR, ~(u64)0); write_csr(dd, SEND_DMA_ERR_CLEAR, ~(u64)0); write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~(u64)0); - for (i = 0; i < dd->chip_send_contexts; i++) + for (i = 0; i < chip_send_contexts(dd); i++) write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~(u64)0); - for (i = 0; i < dd->chip_sdma_engines; i++) + for (i = 0; i < chip_sdma_engines(dd); i++) write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~(u64)0); write_csr(dd, DCC_ERR_FLG_CLR, ~(u64)0); @@ -13030,48 +13039,30 @@ static void clear_all_interrupts(struct hfi1_devdata *dd) write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0); } -/* Move to pcie.c? */ -static void disable_intx(struct pci_dev *pdev) -{ - pci_intx(pdev, 0); -} - /** * hfi1_clean_up_interrupts() - Free all IRQ resources * @dd: valid device data data structure * - * Free the MSI or INTx IRQs and assoicated PCI resources, - * if they have been allocated. + * Free the MSIx and assoicated PCI resources, if they have been allocated. */ void hfi1_clean_up_interrupts(struct hfi1_devdata *dd) { int i; + struct hfi1_msix_entry *me = dd->msix_entries; /* remove irqs - must happen before disabling/turning off */ - if (dd->num_msix_entries) { - /* MSI-X */ - struct hfi1_msix_entry *me = dd->msix_entries; - - for (i = 0; i < dd->num_msix_entries; i++, me++) { - if (!me->arg) /* => no irq, no affinity */ - continue; - hfi1_put_irq_affinity(dd, me); - pci_free_irq(dd->pcidev, i, me->arg); - } - - /* clean structures */ - kfree(dd->msix_entries); - dd->msix_entries = NULL; - dd->num_msix_entries = 0; - } else { - /* INTx */ - if (dd->requested_intx_irq) { - pci_free_irq(dd->pcidev, 0, dd); - dd->requested_intx_irq = 0; - } - disable_intx(dd->pcidev); + for (i = 0; i < dd->num_msix_entries; i++, me++) { + if (!me->arg) /* => no irq, no affinity */ + continue; + hfi1_put_irq_affinity(dd, me); + pci_free_irq(dd->pcidev, i, me->arg); } + /* clean structures */ + kfree(dd->msix_entries); + dd->msix_entries = NULL; + dd->num_msix_entries = 0; + pci_free_irq_vectors(dd->pcidev); } @@ -13121,20 +13112,6 @@ static void remap_sdma_interrupts(struct hfi1_devdata *dd, msix_intr); } -static int request_intx_irq(struct hfi1_devdata *dd) -{ - int ret; - - ret = pci_request_irq(dd->pcidev, 0, general_interrupt, NULL, dd, - DRIVER_NAME "_%d", dd->unit); - if (ret) - dd_dev_err(dd, "unable to request INTx interrupt, err %d\n", - ret); - else - dd->requested_intx_irq = 1; - return ret; -} - static int request_msix_irqs(struct hfi1_devdata *dd) { int first_general, last_general; @@ -13253,11 +13230,6 @@ void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd) { int i; - if (!dd->num_msix_entries) { - synchronize_irq(pci_irq_vector(dd->pcidev, 0)); - return; - } - for (i = 0; i < dd->vnic.num_ctxt; i++) { struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i]; struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr]; @@ -13346,7 +13318,6 @@ static int set_up_interrupts(struct hfi1_devdata *dd) { u32 total; int ret, request; - int single_interrupt = 0; /* we expect to have all the interrupts */ /* * Interrupt count: @@ -13363,17 +13334,6 @@ static int set_up_interrupts(struct hfi1_devdata *dd) if (request < 0) { ret = request; goto fail; - } else if (request == 0) { - /* using INTx */ - /* dd->num_msix_entries already zero */ - single_interrupt = 1; - dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n"); - } else if (request < total) { - /* using MSI-X, with reduced interrupts */ - dd_dev_err(dd, "reduced interrupt found, wanted %u, got %u\n", - total, request); - ret = -EINVAL; - goto fail; } else { dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries), GFP_KERNEL); @@ -13394,10 +13354,7 @@ static int set_up_interrupts(struct hfi1_devdata *dd) /* reset general handler mask, chip MSI-X mappings */ reset_interrupts(dd); - if (single_interrupt) - ret = request_intx_irq(dd); - else - ret = request_msix_irqs(dd); + ret = request_msix_irqs(dd); if (ret) goto fail; @@ -13429,6 +13386,8 @@ static int set_up_context_variables(struct hfi1_devdata *dd) int qos_rmt_count; int user_rmt_reduced; u32 n_usr_ctxts; + u32 send_contexts = chip_send_contexts(dd); + u32 rcv_contexts = chip_rcv_contexts(dd); /* * Kernel receive contexts: @@ -13450,16 +13409,16 @@ static int set_up_context_variables(struct hfi1_devdata *dd) * Every kernel receive context needs an ACK send context. * one send context is allocated for each VL{0-7} and VL15 */ - if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) { + if (num_kernel_contexts > (send_contexts - num_vls - 1)) { dd_dev_err(dd, "Reducing # kernel rcv contexts to: %d, from %lu\n", - (int)(dd->chip_send_contexts - num_vls - 1), + send_contexts - num_vls - 1, num_kernel_contexts); - num_kernel_contexts = dd->chip_send_contexts - num_vls - 1; + num_kernel_contexts = send_contexts - num_vls - 1; } /* Accommodate VNIC contexts if possible */ - if ((num_kernel_contexts + num_vnic_contexts) > dd->chip_rcv_contexts) { + if ((num_kernel_contexts + num_vnic_contexts) > rcv_contexts) { dd_dev_err(dd, "No receive contexts available for VNIC\n"); num_vnic_contexts = 0; } @@ -13477,13 +13436,13 @@ static int set_up_context_variables(struct hfi1_devdata *dd) /* * Adjust the counts given a global max. */ - if (total_contexts + n_usr_ctxts > dd->chip_rcv_contexts) { + if (total_contexts + n_usr_ctxts > rcv_contexts) { dd_dev_err(dd, "Reducing # user receive contexts to: %d, from %u\n", - (int)(dd->chip_rcv_contexts - total_contexts), + rcv_contexts - total_contexts, n_usr_ctxts); /* recalculate */ - n_usr_ctxts = dd->chip_rcv_contexts - total_contexts; + n_usr_ctxts = rcv_contexts - total_contexts; } /* each user context requires an entry in the RMT */ @@ -13509,7 +13468,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) dd->freectxts = n_usr_ctxts; dd_dev_info(dd, "rcv contexts: chip %d, used %d (kernel %d, vnic %u, user %u)\n", - (int)dd->chip_rcv_contexts, + rcv_contexts, (int)dd->num_rcv_contexts, (int)dd->n_krcv_queues, dd->num_vnic_contexts, @@ -13527,7 +13486,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) * contexts. */ dd->rcv_entries.group_size = RCV_INCREMENT; - ngroups = dd->chip_rcv_array_count / dd->rcv_entries.group_size; + ngroups = chip_rcv_array_count(dd) / dd->rcv_entries.group_size; dd->rcv_entries.ngroups = ngroups / dd->num_rcv_contexts; dd->rcv_entries.nctxt_extra = ngroups - (dd->num_rcv_contexts * dd->rcv_entries.ngroups); @@ -13552,7 +13511,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) dd_dev_info( dd, "send contexts: chip %d, used %d (kernel %d, ack %d, user %d, vl15 %d)\n", - dd->chip_send_contexts, + send_contexts, dd->num_send_contexts, dd->sc_sizes[SC_KERNEL].count, dd->sc_sizes[SC_ACK].count, @@ -13610,7 +13569,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd) write_csr(dd, CCE_INT_MAP + (8 * i), 0); /* SendCtxtCreditReturnAddr */ - for (i = 0; i < dd->chip_send_contexts; i++) + for (i = 0; i < chip_send_contexts(dd); i++) write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0); /* PIO Send buffers */ @@ -13623,7 +13582,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd) /* RcvHdrAddr */ /* RcvHdrTailAddr */ /* RcvTidFlowTable */ - for (i = 0; i < dd->chip_rcv_contexts; i++) { + for (i = 0; i < chip_rcv_contexts(dd); i++) { write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0); write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0); for (j = 0; j < RXE_NUM_TID_FLOWS; j++) @@ -13631,7 +13590,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd) } /* RcvArray */ - for (i = 0; i < dd->chip_rcv_array_count; i++) + for (i = 0; i < chip_rcv_array_count(dd); i++) hfi1_put_tid(dd, i, PT_INVALID_FLUSH, 0, 0); /* RcvQPMapTable */ @@ -13789,7 +13748,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd) write_csr(dd, SEND_LOW_PRIORITY_LIST + (8 * i), 0); for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++) write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8 * i), 0); - for (i = 0; i < dd->chip_send_contexts / NUM_CONTEXTS_PER_SET; i++) + for (i = 0; i < chip_send_contexts(dd) / NUM_CONTEXTS_PER_SET; i++) write_csr(dd, SEND_CONTEXT_SET_CTRL + (8 * i), 0); for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++) write_csr(dd, SEND_COUNTER_ARRAY32 + (8 * i), 0); @@ -13817,7 +13776,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd) /* * TXE Per-Context CSRs */ - for (i = 0; i < dd->chip_send_contexts; i++) { + for (i = 0; i < chip_send_contexts(dd); i++) { write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0); write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_CTRL, 0); write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0); @@ -13835,7 +13794,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd) /* * TXE Per-SDMA CSRs */ - for (i = 0; i < dd->chip_sdma_engines; i++) { + for (i = 0; i < chip_sdma_engines(dd); i++) { write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0); /* SEND_DMA_STATUS read-only */ write_kctxt_csr(dd, i, SEND_DMA_BASE_ADDR, 0); @@ -13968,7 +13927,7 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd) /* * RXE Kernel and User Per-Context CSRs */ - for (i = 0; i < dd->chip_rcv_contexts; i++) { + for (i = 0; i < chip_rcv_contexts(dd); i++) { /* kernel */ write_kctxt_csr(dd, i, RCV_CTXT_CTRL, 0); /* RCV_CTXT_STATUS read-only */ @@ -14084,13 +14043,13 @@ static int init_chip(struct hfi1_devdata *dd) /* disable send contexts and SDMA engines */ write_csr(dd, SEND_CTRL, 0); - for (i = 0; i < dd->chip_send_contexts; i++) + for (i = 0; i < chip_send_contexts(dd); i++) write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0); - for (i = 0; i < dd->chip_sdma_engines; i++) + for (i = 0; i < chip_sdma_engines(dd); i++) write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0); /* disable port (turn off RXE inbound traffic) and contexts */ write_csr(dd, RCV_CTRL, 0); - for (i = 0; i < dd->chip_rcv_contexts; i++) + for (i = 0; i < chip_rcv_contexts(dd); i++) write_csr(dd, RCV_CTXT_CTRL, 0); /* mask all interrupt sources */ for (i = 0; i < CCE_NUM_INT_CSRS; i++) @@ -14709,9 +14668,9 @@ static void init_txe(struct hfi1_devdata *dd) write_csr(dd, SEND_EGRESS_ERR_MASK, ~0ull); /* enable all per-context and per-SDMA engine errors */ - for (i = 0; i < dd->chip_send_contexts; i++) + for (i = 0; i < chip_send_contexts(dd); i++) write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, ~0ull); - for (i = 0; i < dd->chip_sdma_engines; i++) + for (i = 0; i < chip_sdma_engines(dd); i++) write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, ~0ull); /* set the local CU to AU mapping */ @@ -14979,11 +14938,13 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, "Functional simulator" }; struct pci_dev *parent = pdev->bus->self; + u32 sdma_engines; dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS * sizeof(struct hfi1_pportdata)); if (IS_ERR(dd)) goto bail; + sdma_engines = chip_sdma_engines(dd); ppd = dd->pport; for (i = 0; i < dd->num_pports; i++, ppd++) { int vl; @@ -15081,11 +15042,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, /* give a reasonable active value, will be set on link up */ dd->pport->link_speed_active = OPA_LINK_SPEED_25G; - dd->chip_rcv_contexts = read_csr(dd, RCV_CONTEXTS); - dd->chip_send_contexts = read_csr(dd, SEND_CONTEXTS); - dd->chip_sdma_engines = read_csr(dd, SEND_DMA_ENGINES); - dd->chip_pio_mem_size = read_csr(dd, SEND_PIO_MEM_SIZE); - dd->chip_sdma_mem_size = read_csr(dd, SEND_DMA_MEM_SIZE); /* fix up link widths for emulation _p */ ppd = dd->pport; if (dd->icode == ICODE_FPGA_EMULATION && is_emulator_p(dd)) { @@ -15096,11 +15052,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, OPA_LINK_WIDTH_1X; } /* insure num_vls isn't larger than number of sdma engines */ - if (HFI1_CAP_IS_KSET(SDMA) && num_vls > dd->chip_sdma_engines) { + if (HFI1_CAP_IS_KSET(SDMA) && num_vls > sdma_engines) { dd_dev_err(dd, "num_vls %u too large, using %u VLs\n", - num_vls, dd->chip_sdma_engines); - num_vls = dd->chip_sdma_engines; - ppd->vls_supported = dd->chip_sdma_engines; + num_vls, sdma_engines); + num_vls = sdma_engines; + ppd->vls_supported = sdma_engines; ppd->vls_operational = ppd->vls_supported; } @@ -15216,13 +15172,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, */ aspm_init(dd); - dd->rcvhdrsize = DEFAULT_RCVHDRSIZE; - /* - * rcd[0] is guaranteed to be valid by this point. Also, all - * context are using the same value, as per the module parameter. - */ - dd->rhf_offset = dd->rcd[0]->rcvhdrqentsize - sizeof(u64) / sizeof(u32); - ret = init_pervl_scs(dd); if (ret) goto bail_cleanup; diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index fdf389e46e19..36b04d6300e5 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -656,6 +656,36 @@ static inline void write_uctxt_csr(struct hfi1_devdata *dd, int ctxt, write_csr(dd, offset0 + (0x1000 * ctxt), value); } +static inline u32 chip_rcv_contexts(struct hfi1_devdata *dd) +{ + return read_csr(dd, RCV_CONTEXTS); +} + +static inline u32 chip_send_contexts(struct hfi1_devdata *dd) +{ + return read_csr(dd, SEND_CONTEXTS); +} + +static inline u32 chip_sdma_engines(struct hfi1_devdata *dd) +{ + return read_csr(dd, SEND_DMA_ENGINES); +} + +static inline u32 chip_pio_mem_size(struct hfi1_devdata *dd) +{ + return read_csr(dd, SEND_PIO_MEM_SIZE); +} + +static inline u32 chip_sdma_mem_size(struct hfi1_devdata *dd) +{ + return read_csr(dd, SEND_DMA_MEM_SIZE); +} + +static inline u32 chip_rcv_array_count(struct hfi1_devdata *dd) +{ + return read_csr(dd, RCV_ARRAY_CNT); +} + u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl, u32 dw_len); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 94dca95db04f..a41f85558312 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -208,25 +208,25 @@ static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf, (offset * RCV_BUF_BLOCK_SIZE)); } -static inline void *hfi1_get_header(struct hfi1_devdata *dd, +static inline void *hfi1_get_header(struct hfi1_ctxtdata *rcd, __le32 *rhf_addr) { u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr)); - return (void *)(rhf_addr - dd->rhf_offset + offset); + return (void *)(rhf_addr - rcd->rhf_offset + offset); } -static inline struct ib_header *hfi1_get_msgheader(struct hfi1_devdata *dd, +static inline struct ib_header *hfi1_get_msgheader(struct hfi1_ctxtdata *rcd, __le32 *rhf_addr) { - return (struct ib_header *)hfi1_get_header(dd, rhf_addr); + return (struct ib_header *)hfi1_get_header(rcd, rhf_addr); } static inline struct hfi1_16b_header - *hfi1_get_16B_header(struct hfi1_devdata *dd, + *hfi1_get_16B_header(struct hfi1_ctxtdata *rcd, __le32 *rhf_addr) { - return (struct hfi1_16b_header *)hfi1_get_header(dd, rhf_addr); + return (struct hfi1_16b_header *)hfi1_get_header(rcd, rhf_addr); } /* @@ -591,13 +591,12 @@ static void __prescan_rxq(struct hfi1_packet *packet) init_ps_mdata(&mdata, packet); while (1) { - struct hfi1_devdata *dd = rcd->dd; struct hfi1_ibport *ibp = rcd_to_iport(rcd); __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + - dd->rhf_offset; + packet->rcd->rhf_offset; struct rvt_qp *qp; struct ib_header *hdr; - struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; + struct rvt_dev_info *rdi = &rcd->dd->verbs_dev.rdi; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn, bth1; int is_ecn = 0; @@ -612,7 +611,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) if (etype != RHF_RCV_TYPE_IB) goto next; - packet->hdr = hfi1_get_msgheader(dd, rhf_addr); + packet->hdr = hfi1_get_msgheader(packet->rcd, rhf_addr); hdr = packet->hdr; lnh = ib_get_lnh(hdr); @@ -718,7 +717,7 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread) ret = check_max_packet(packet, thread); packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff + - packet->rcd->dd->rhf_offset; + packet->rcd->rhf_offset; packet->rhf = rhf_to_cpu(packet->rhf_addr); return ret; @@ -757,7 +756,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) * crashing down. There is no need to eat another * comparison in this performance critical code. */ - packet->rcd->dd->rhf_rcv_function_map[packet->etype](packet); + packet->rcd->rhf_rcv_function_map[packet->etype](packet); packet->numpkt++; /* Set up for the next packet */ @@ -768,7 +767,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) ret = check_max_packet(packet, thread); packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff + - packet->rcd->dd->rhf_offset; + packet->rcd->rhf_offset; packet->rhf = rhf_to_cpu(packet->rhf_addr); return ret; @@ -949,12 +948,12 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, u8 sc = SC15_PACKET; if (etype == RHF_RCV_TYPE_IB) { - struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd, + struct ib_header *hdr = hfi1_get_msgheader(packet->rcd, packet->rhf_addr); sc = hfi1_9B_get_sc5(hdr, packet->rhf); } else if (etype == RHF_RCV_TYPE_BYPASS) { struct hfi1_16b_header *hdr = hfi1_get_16B_header( - packet->rcd->dd, + packet->rcd, packet->rhf_addr); sc = hfi1_16B_get_sc(hdr); } @@ -1034,7 +1033,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) packet.rhqoff += packet.rsize; packet.rhf_addr = (__le32 *)rcd->rcvhdrq + packet.rhqoff + - dd->rhf_offset; + rcd->rhf_offset; packet.rhf = rhf_to_cpu(packet.rhf_addr); } else if (skip_pkt) { @@ -1384,7 +1383,7 @@ bail: static inline void hfi1_setup_ib_header(struct hfi1_packet *packet) { packet->hdr = (struct hfi1_ib_message_header *) - hfi1_get_msgheader(packet->rcd->dd, + hfi1_get_msgheader(packet->rcd, packet->rhf_addr); packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; } @@ -1485,7 +1484,7 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) u8 l4; packet->hdr = (struct hfi1_16b_header *) - hfi1_get_16B_header(packet->rcd->dd, + hfi1_get_16B_header(packet->rcd, packet->rhf_addr); l4 = hfi1_16B_get_l4(packet->hdr); if (l4 == OPA_16B_L4_IB_LOCAL) { @@ -1575,7 +1574,7 @@ void handle_eflags(struct hfi1_packet *packet) * The following functions are called by the interrupt handler. They are type * specific handlers for each packet type. */ -int process_receive_ib(struct hfi1_packet *packet) +static int process_receive_ib(struct hfi1_packet *packet) { if (hfi1_setup_9B_packet(packet)) return RHF_RCV_CONTINUE; @@ -1607,7 +1606,7 @@ static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet) return false; } -int process_receive_bypass(struct hfi1_packet *packet) +static int process_receive_bypass(struct hfi1_packet *packet) { struct hfi1_devdata *dd = packet->rcd->dd; @@ -1649,7 +1648,7 @@ int process_receive_bypass(struct hfi1_packet *packet) return RHF_RCV_CONTINUE; } -int process_receive_error(struct hfi1_packet *packet) +static int process_receive_error(struct hfi1_packet *packet) { /* KHdrHCRCErr -- KDETH packet with a bad HCRC */ if (unlikely( @@ -1668,7 +1667,7 @@ int process_receive_error(struct hfi1_packet *packet) return RHF_RCV_CONTINUE; } -int kdeth_process_expected(struct hfi1_packet *packet) +static int kdeth_process_expected(struct hfi1_packet *packet) { hfi1_setup_9B_packet(packet); if (unlikely(hfi1_dbg_should_fault_rx(packet))) @@ -1682,7 +1681,7 @@ int kdeth_process_expected(struct hfi1_packet *packet) return RHF_RCV_CONTINUE; } -int kdeth_process_eager(struct hfi1_packet *packet) +static int kdeth_process_eager(struct hfi1_packet *packet) { hfi1_setup_9B_packet(packet); if (unlikely(hfi1_dbg_should_fault_rx(packet))) @@ -1695,7 +1694,7 @@ int kdeth_process_eager(struct hfi1_packet *packet) return RHF_RCV_CONTINUE; } -int process_receive_invalid(struct hfi1_packet *packet) +static int process_receive_invalid(struct hfi1_packet *packet) { dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n", rhf_rcv_type(packet->rhf)); @@ -1719,9 +1718,8 @@ void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd) init_ps_mdata(&mdata, &packet); while (1) { - struct hfi1_devdata *dd = rcd->dd; __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + - dd->rhf_offset; + rcd->rhf_offset; struct ib_header *hdr; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn; @@ -1738,7 +1736,7 @@ void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd) if (etype > RHF_RCV_TYPE_IB) goto next; - packet.hdr = hfi1_get_msgheader(dd, rhf_addr); + packet.hdr = hfi1_get_msgheader(rcd, rhf_addr); hdr = packet.hdr; lnh = be16_to_cpu(hdr->lrh[0]) & 3; @@ -1760,3 +1758,14 @@ next: update_ps_mdata(&mdata, rcd); } } + +const rhf_rcv_function_ptr normal_rhf_rcv_functions[] = { + [RHF_RCV_TYPE_EXPECTED] = kdeth_process_expected, + [RHF_RCV_TYPE_EAGER] = kdeth_process_eager, + [RHF_RCV_TYPE_IB] = process_receive_ib, + [RHF_RCV_TYPE_ERROR] = process_receive_error, + [RHF_RCV_TYPE_BYPASS] = process_receive_bypass, + [RHF_RCV_TYPE_INVALID5] = process_receive_invalid, + [RHF_RCV_TYPE_INVALID6] = process_receive_invalid, + [RHF_RCV_TYPE_INVALID7] = process_receive_invalid, +}; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 0fc4aa9455c3..1fc75647e47b 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -411,7 +411,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) mapio = 1; break; case RCV_HDRQ: - memlen = uctxt->rcvhdrq_size; + memlen = rcvhdrq_size(uctxt); memvirt = uctxt->rcvhdrq; break; case RCV_EGRBUF: { @@ -521,7 +521,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) break; case SUBCTXT_RCV_HDRQ: memaddr = (u64)uctxt->subctxt_rcvhdr_base; - memlen = uctxt->rcvhdrq_size * uctxt->subctxt_cnt; + memlen = rcvhdrq_size(uctxt) * uctxt->subctxt_cnt; flags |= VM_IO | VM_DONTEXPAND; vmf = 1; break; @@ -985,7 +985,11 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, * sub contexts. * This has to be done here so the rest of the sub-contexts find the * proper base context. + * NOTE: _set_bit() can be used here because the context creation is + * protected by the mutex (rather than the spin_lock), and will be the + * very first instance of this context. */ + __set_bit(0, uctxt->in_use_ctxts); if (uinfo->subctxt_cnt) init_subctxts(uctxt, uinfo); uctxt->userversion = uinfo->userversion; @@ -1040,7 +1044,7 @@ static int setup_subctxt(struct hfi1_ctxtdata *uctxt) return -ENOMEM; /* We can take the size of the RcvHdr Queue from the master */ - uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size * + uctxt->subctxt_rcvhdr_base = vmalloc_user(rcvhdrq_size(uctxt) * num_subctxts); if (!uctxt->subctxt_rcvhdr_base) { ret = -ENOMEM; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 4ab8b5bfbed1..d9470317983f 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -169,12 +169,6 @@ extern const struct pci_error_handlers hfi1_pci_err_handler; struct hfi1_opcode_stats_perctx; struct ctxt_eager_bufs { - ssize_t size; /* total size of eager buffers */ - u32 count; /* size of buffers array */ - u32 numbufs; /* number of buffers allocated */ - u32 alloced; /* number of rcvarray entries used */ - u32 rcvtid_size; /* size of each eager rcv tid */ - u32 threshold; /* head update threshold */ struct eager_buffer { void *addr; dma_addr_t dma; @@ -184,6 +178,12 @@ struct ctxt_eager_bufs { void *addr; dma_addr_t dma; } *rcvtids; + u32 size; /* total size of eager buffers */ + u32 rcvtid_size; /* size of each eager rcv tid */ + u16 count; /* size of buffers array */ + u16 numbufs; /* number of buffers allocated */ + u16 alloced; /* number of rcvarray entries used */ + u16 threshold; /* head update threshold */ }; struct exp_tid_set { @@ -191,43 +191,84 @@ struct exp_tid_set { u32 count; }; +typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet); struct hfi1_ctxtdata { - /* shadow the ctxt's RcvCtrl register */ - u64 rcvctrl; /* rcvhdrq base, needs mmap before useful */ void *rcvhdrq; /* kernel virtual address where hdrqtail is updated */ volatile __le64 *rcvhdrtail_kvaddr; - /* when waiting for rcv or pioavail */ - wait_queue_head_t wait; - /* rcvhdrq size (for freeing) */ - size_t rcvhdrq_size; + /* so functions that need physical port can get it easily */ + struct hfi1_pportdata *ppd; + /* so file ops can get at unit */ + struct hfi1_devdata *dd; + /* this receive context's assigned PIO ACK send context */ + struct send_context *sc; + /* per context recv functions */ + const rhf_rcv_function_ptr *rhf_rcv_function_map; + /* + * The interrupt handler for a particular receive context can vary + * throughout it's lifetime. This is not a lock protected data member so + * it must be updated atomically and the prev and new value must always + * be valid. Worst case is we process an extra interrupt and up to 64 + * packets with the wrong interrupt handler. + */ + int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded); + /* verbs rx_stats per rcd */ + struct hfi1_opcode_stats_perctx *opstats; + /* clear interrupt mask */ + u64 imask; + /* ctxt rcvhdrq head offset */ + u32 head; /* number of rcvhdrq entries */ u16 rcvhdrq_cnt; + u8 ireg; /* clear interrupt register */ + /* receive packet sequence counter */ + u8 seq_cnt; /* size of each of the rcvhdrq entries */ - u16 rcvhdrqentsize; + u8 rcvhdrqentsize; + /* offset of RHF within receive header entry */ + u8 rhf_offset; + /* dynamic receive available interrupt timeout */ + u8 rcvavail_timeout; + /* Indicates that this is vnic context */ + bool is_vnic; + /* vnic queue index this context is mapped to */ + u8 vnic_q_idx; + /* Is ASPM interrupt supported for this context */ + bool aspm_intr_supported; + /* ASPM state (enabled/disabled) for this context */ + bool aspm_enabled; + /* Is ASPM processing enabled for this context (in intr context) */ + bool aspm_intr_enable; + struct ctxt_eager_bufs egrbufs; + /* QPs waiting for context processing */ + struct list_head qp_wait_list; + /* tid allocation lists */ + struct exp_tid_set tid_group_list; + struct exp_tid_set tid_used_list; + struct exp_tid_set tid_full_list; + + /* Timer for re-enabling ASPM if interrupt activity quiets down */ + struct timer_list aspm_timer; + /* per-context configuration flags */ + unsigned long flags; + /* array of tid_groups */ + struct tid_group *groups; /* mmap of hdrq, must fit in 44 bits */ dma_addr_t rcvhdrq_dma; dma_addr_t rcvhdrqtailaddr_dma; - struct ctxt_eager_bufs egrbufs; - /* this receive context's assigned PIO ACK send context */ - struct send_context *sc; - - /* dynamic receive available interrupt timeout */ - u32 rcvavail_timeout; + /* Last interrupt timestamp */ + ktime_t aspm_ts_last_intr; + /* Last timestamp at which we scheduled a timer for this context */ + ktime_t aspm_ts_timer_sched; + /* Lock to serialize between intr, timer intr and user threads */ + spinlock_t aspm_lock; /* Reference count the base context usage */ struct kref kref; - - /* Device context index */ - u16 ctxt; - /* - * non-zero if ctxt can be shared, and defines the maximum number of - * sub-contexts for this device context. - */ - u16 subctxt_cnt; - /* non-zero if ctxt is being shared. */ - u16 subctxt_id; - u8 uuid[16]; + /* numa node of this context */ + int numa_id; + /* associated msix interrupt. */ + s16 msix_intr; /* job key */ u16 jkey; /* number of RcvArray groups for this context. */ @@ -238,87 +279,59 @@ struct hfi1_ctxtdata { u16 expected_count; /* index of first expected TID entry. */ u16 expected_base; - /* array of tid_groups */ - struct tid_group *groups; - - struct exp_tid_set tid_group_list; - struct exp_tid_set tid_used_list; - struct exp_tid_set tid_full_list; + /* Device context index */ + u8 ctxt; - /* lock protecting all Expected TID data of user contexts */ + /* PSM Specific fields */ + /* lock protecting all Expected TID data */ struct mutex exp_mutex; - /* per-context configuration flags */ - unsigned long flags; - /* per-context event flags for fileops/intr communication */ - unsigned long event_flags; - /* total number of polled urgent packets */ - u32 urgent; - /* saved total number of polled urgent packets for poll edge trigger */ - u32 urgent_poll; + /* when waiting for rcv or pioavail */ + wait_queue_head_t wait; + /* uuid from PSM */ + u8 uuid[16]; /* same size as task_struct .comm[], command that opened context */ char comm[TASK_COMM_LEN]; - /* so file ops can get at unit */ - struct hfi1_devdata *dd; - /* so functions that need physical port can get it easily */ - struct hfi1_pportdata *ppd; - /* associated msix interrupt */ - u32 msix_intr; + /* Bitmask of in use context(s) */ + DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS); + /* per-context event flags for fileops/intr communication */ + unsigned long event_flags; /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */ void *subctxt_uregbase; /* An array of pages for the eager receive buffers * N */ void *subctxt_rcvegrbuf; /* An array of pages for the eager header queue entries * N */ void *subctxt_rcvhdr_base; - /* Bitmask of in use context(s) */ - DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS); - /* The version of the library which opened this ctxt */ - u32 userversion; + /* total number of polled urgent packets */ + u32 urgent; + /* saved total number of polled urgent packets for poll edge trigger */ + u32 urgent_poll; /* Type of packets or conditions we want to poll for */ u16 poll_type; - /* receive packet sequence counter */ - u8 seq_cnt; - /* ctxt rcvhdrq head offset */ - u32 head; - /* QPs waiting for context processing */ - struct list_head qp_wait_list; - /* interrupt handling */ - u64 imask; /* clear interrupt mask */ - int ireg; /* clear interrupt register */ - int numa_id; /* numa node of this context */ - /* verbs rx_stats per rcd */ - struct hfi1_opcode_stats_perctx *opstats; - - /* Is ASPM interrupt supported for this context */ - bool aspm_intr_supported; - /* ASPM state (enabled/disabled) for this context */ - bool aspm_enabled; - /* Timer for re-enabling ASPM if interrupt activity quietens down */ - struct timer_list aspm_timer; - /* Lock to serialize between intr, timer intr and user threads */ - spinlock_t aspm_lock; - /* Is ASPM processing enabled for this context (in intr context) */ - bool aspm_intr_enable; - /* Last interrupt timestamp */ - ktime_t aspm_ts_last_intr; - /* Last timestamp at which we scheduled a timer for this context */ - ktime_t aspm_ts_timer_sched; - + /* non-zero if ctxt is being shared. */ + u16 subctxt_id; + /* The version of the library which opened this ctxt */ + u32 userversion; /* - * The interrupt handler for a particular receive context can vary - * throughout it's lifetime. This is not a lock protected data member so - * it must be updated atomically and the prev and new value must always - * be valid. Worst case is we process an extra interrupt and up to 64 - * packets with the wrong interrupt handler. + * non-zero if ctxt can be shared, and defines the maximum number of + * sub-contexts for this device context. */ - int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded); - - /* Indicates that this is vnic context */ - bool is_vnic; + u8 subctxt_cnt; - /* vnic queue index this context is mapped to */ - u8 vnic_q_idx; }; +/** + * rcvhdrq_size - return total size in bytes for header queue + * @rcd: the receive context + * + * rcvhdrqentsize is in DWs, so we have to convert to bytes + * + */ +static inline u32 rcvhdrq_size(struct hfi1_ctxtdata *rcd) +{ + return PAGE_ALIGN(rcd->rcvhdrq_cnt * + rcd->rcvhdrqentsize * sizeof(u32)); +} + /* * Represents a single packet at a high level. Put commonly computed things in * here so we do not have to keep doing them over and over. The rule of thumb is @@ -897,12 +910,11 @@ struct hfi1_pportdata { u64 vl_xmit_flit_cnt[C_VL_COUNT + 1]; }; -typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet); - typedef void (*opcode_handler)(struct hfi1_packet *packet); typedef void (*hfi1_make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct rvt_swqe *wqe); +extern const rhf_rcv_function_ptr normal_rhf_rcv_functions[]; /* return values for the RHF receive functions */ @@ -1046,8 +1058,6 @@ struct hfi1_devdata { dma_addr_t sdma_pad_phys; /* for deallocation */ size_t sdma_heads_size; - /* number from the chip */ - u32 chip_sdma_engines; /* num used */ u32 num_sdma; /* array of engines sized by num_sdma */ @@ -1102,8 +1112,6 @@ struct hfi1_devdata { /* base receive interrupt timeout, in CSR units */ u32 rcv_intr_timeout_csr; - u32 freezelen; /* max length of freezemsg */ - u64 __iomem *egrtidbase; spinlock_t sendctrl_lock; /* protect changes to SendCtrl */ spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */ spinlock_t uctxt_lock; /* protect rcd changes */ @@ -1130,25 +1138,6 @@ struct hfi1_devdata { /* Base GUID for device (network order) */ u64 base_guid; - /* these are the "32 bit" regs */ - - /* value we put in kr_rcvhdrsize */ - u32 rcvhdrsize; - /* number of receive contexts the chip supports */ - u32 chip_rcv_contexts; - /* number of receive array entries */ - u32 chip_rcv_array_count; - /* number of PIO send contexts the chip supports */ - u32 chip_send_contexts; - /* number of bytes in the PIO memory buffer */ - u32 chip_pio_mem_size; - /* number of bytes in the SDMA memory buffer */ - u32 chip_sdma_mem_size; - - /* size of each rcvegrbuffer */ - u32 rcvegrbufsize; - /* log2 of above */ - u16 rcvegrbufsize_shift; /* both sides of the PCIe link are gen3 capable */ u8 link_gen3_capable; u8 dc_shutdown; @@ -1221,9 +1210,6 @@ struct hfi1_devdata { u32 num_msix_entries; u32 first_dyn_msix_idx; - /* INTx information */ - u32 requested_intx_irq; /* did we request one? */ - /* general interrupt: mask of handled interrupts */ u64 gi_mask[CCE_NUM_INT_CSRS]; @@ -1289,8 +1275,6 @@ struct hfi1_devdata { u64 sw_cce_err_status_aggregate; /* Software counter that aggregates all bypass packet rcv errors */ u64 sw_rcv_bypass_packet_errors; - /* receive interrupt function */ - rhf_rcv_function_ptr normal_rhf_rcv_functions[8]; /* Save the enabled LCB error bits */ u64 lcb_err_en; @@ -1329,10 +1313,7 @@ struct hfi1_devdata { /* seqlock for sc2vl */ seqlock_t sc2vl_lock ____cacheline_aligned_in_smp; u64 sc2vl[4]; - /* receive interrupt functions */ - rhf_rcv_function_ptr *rhf_rcv_function_map; u64 __percpu *rcv_limit; - u16 rhf_offset; /* offset of RHF within receive header entry */ /* adding a new field here would make it part of this cacheline */ /* OUI comes from the HW. Used everywhere as 3 separate bytes. */ @@ -1471,7 +1452,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, /* calculate the current RHF address */ static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd) { - return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->dd->rhf_offset; + return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->rhf_offset; } int hfi1_reset_device(int); @@ -2021,12 +2002,6 @@ static inline void flush_wc(void) } void handle_eflags(struct hfi1_packet *packet); -int process_receive_ib(struct hfi1_packet *packet); -int process_receive_bypass(struct hfi1_packet *packet); -int process_receive_error(struct hfi1_packet *packet); -int kdeth_process_expected(struct hfi1_packet *packet); -int kdeth_process_eager(struct hfi1_packet *packet); -int process_receive_invalid(struct hfi1_packet *packet); void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd); /* global module parameter variables */ diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index f110842b91f5..758d273c32cf 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -364,9 +364,9 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, hfi1_exp_tid_group_init(rcd); rcd->ppd = ppd; rcd->dd = dd; - __set_bit(0, rcd->in_use_ctxts); rcd->numa_id = numa; rcd->rcv_array_groups = dd->rcv_entries.ngroups; + rcd->rhf_rcv_function_map = normal_rhf_rcv_functions; mutex_init(&rcd->exp_mutex); @@ -404,6 +404,8 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, rcd->rcvhdrq_cnt = rcvhdrcnt; rcd->rcvhdrqentsize = hfi1_hdrq_entsize; + rcd->rhf_offset = + rcd->rcvhdrqentsize - sizeof(u64) / sizeof(u32); /* * Simple Eager buffer allocation: we have already pre-allocated * the number of RcvArray entry groups. Each ctxtdata structure @@ -853,24 +855,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) struct hfi1_ctxtdata *rcd; struct hfi1_pportdata *ppd; - /* Set up recv low level handlers */ - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_EXPECTED] = - kdeth_process_expected; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_EAGER] = - kdeth_process_eager; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_IB] = process_receive_ib; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_ERROR] = - process_receive_error; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_BYPASS] = - process_receive_bypass; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID5] = - process_receive_invalid; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID6] = - process_receive_invalid; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID7] = - process_receive_invalid; - dd->rhf_rcv_function_map = dd->normal_rhf_rcv_functions; - /* Set up send low level handlers */ dd->process_pio_send = hfi1_verbs_send_pio; dd->process_dma_send = hfi1_verbs_send_dma; @@ -936,7 +920,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) } /* Allocate enough memory for user event notification. */ - len = PAGE_ALIGN(dd->chip_rcv_contexts * HFI1_MAX_SHARED_CTXTS * + len = PAGE_ALIGN(chip_rcv_contexts(dd) * HFI1_MAX_SHARED_CTXTS * sizeof(*dd->events)); dd->events = vmalloc_user(len); if (!dd->events) @@ -948,9 +932,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) dd->status = vmalloc_user(PAGE_SIZE); if (!dd->status) dd_dev_err(dd, "Failed to allocate dev status page\n"); - else - dd->freezelen = PAGE_SIZE - (sizeof(*dd->status) - - sizeof(dd->status->freezemsg)); for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; if (dd->status) @@ -1144,7 +1125,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) return; if (rcd->rcvhdrq) { - dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size, + dma_free_coherent(&dd->pcidev->dev, rcvhdrq_size(rcd), rcd->rcvhdrq, rcd->rcvhdrq_dma); rcd->rcvhdrq = NULL; if (rcd->rcvhdrtail_kvaddr) { @@ -1855,12 +1836,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) if (!rcd->rcvhdrq) { gfp_t gfp_flags; - /* - * rcvhdrqentsize is in DWs, so we have to convert to bytes - * (* sizeof(u32)). - */ - amt = PAGE_ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize * - sizeof(u32)); + amt = rcvhdrq_size(rcd); if (rcd->ctxt < dd->first_dyn_alloc_ctxt || rcd->is_vnic) gfp_flags = GFP_KERNEL; @@ -1885,8 +1861,6 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) if (!rcd->rcvhdrtail_kvaddr) goto bail_free; } - - rcd->rcvhdrq_size = amt; } /* * These values are per-context: @@ -1902,7 +1876,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) & RCV_HDR_ENT_SIZE_ENT_SIZE_MASK) << RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT; write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_ENT_SIZE, reg); - reg = (dd->rcvhdrsize & RCV_HDR_SIZE_HDR_SIZE_MASK) + reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK) << RCV_HDR_SIZE_HDR_SIZE_SHIFT; write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_SIZE, reg); @@ -1938,9 +1912,9 @@ bail: int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) { struct hfi1_devdata *dd = rcd->dd; - u32 max_entries, egrtop, alloced_bytes = 0, idx = 0; + u32 max_entries, egrtop, alloced_bytes = 0; gfp_t gfp_flags; - u16 order; + u16 order, idx = 0; int ret = 0; u16 round_mtu = roundup_pow_of_two(hfi1_max_mtu); diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index baf7c324f7b8..eec83757d55f 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -157,6 +157,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) unsigned long len; resource_size_t addr; int ret = 0; + u32 rcv_array_count; addr = pci_resource_start(pdev, 0); len = pci_resource_len(pdev, 0); @@ -186,9 +187,9 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) goto nomem; } - dd->chip_rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT); - dd_dev_info(dd, "RcvArray count: %u\n", dd->chip_rcv_array_count); - dd->base2_start = RCV_ARRAY + dd->chip_rcv_array_count * 8; + rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT); + dd_dev_info(dd, "RcvArray count: %u\n", rcv_array_count); + dd->base2_start = RCV_ARRAY + rcv_array_count * 8; dd->kregbase2 = ioremap_nocache( addr + dd->base2_start, @@ -214,13 +215,13 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) * to write an entire cacheline worth of entries in one shot. */ dd->rcvarray_wc = ioremap_wc(addr + RCV_ARRAY, - dd->chip_rcv_array_count * 8); + rcv_array_count * 8); if (!dd->rcvarray_wc) { dd_dev_err(dd, "WC mapping of receive array failed\n"); goto nomem; } dd_dev_info(dd, "WC RcvArray: %p for %x\n", - dd->rcvarray_wc, dd->chip_rcv_array_count * 8); + dd->rcvarray_wc, rcv_array_count * 8); dd->flags |= HFI1_PRESENT; /* chip.c CSR routines now work */ return 0; @@ -346,15 +347,13 @@ int pcie_speeds(struct hfi1_devdata *dd) /* * Returns: * - actual number of interrupts allocated or - * - 0 if fell back to INTx. * - error */ int request_msix(struct hfi1_devdata *dd, u32 msireq) { int nvec; - nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq, - PCI_IRQ_MSIX | PCI_IRQ_LEGACY); + nvec = pci_alloc_irq_vectors(dd->pcidev, msireq, msireq, PCI_IRQ_MSIX); if (nvec < 0) { dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec); return nvec; @@ -362,10 +361,6 @@ int request_msix(struct hfi1_devdata *dd, u32 msireq) tune_pcie_caps(dd); - /* check for legacy IRQ */ - if (nvec == 1 && !dd->pcidev->msix_enabled) - return 0; - return nvec; } diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 9cac15d10c4f..c2c1cba5b23b 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015-2017 Intel Corporation. + * Copyright(c) 2015-2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -226,7 +226,7 @@ static const char *sc_type_name(int index) int init_sc_pools_and_sizes(struct hfi1_devdata *dd) { struct mem_pool_info mem_pool_info[NUM_SC_POOLS] = { { 0 } }; - int total_blocks = (dd->chip_pio_mem_size / PIO_BLOCK_SIZE) - 1; + int total_blocks = (chip_pio_mem_size(dd) / PIO_BLOCK_SIZE) - 1; int total_contexts = 0; int fixed_blocks; int pool_blocks; @@ -343,8 +343,8 @@ int init_sc_pools_and_sizes(struct hfi1_devdata *dd) sc_type_name(i), count); return -EINVAL; } - if (total_contexts + count > dd->chip_send_contexts) - count = dd->chip_send_contexts - total_contexts; + if (total_contexts + count > chip_send_contexts(dd)) + count = chip_send_contexts(dd) - total_contexts; total_contexts += count; @@ -507,7 +507,7 @@ static int sc_hw_alloc(struct hfi1_devdata *dd, int type, u32 *sw_index, if (sci->type == type && sci->allocated == 0) { sci->allocated = 1; /* use a 1:1 mapping, but make them non-equal */ - context = dd->chip_send_contexts - index - 1; + context = chip_send_contexts(dd) - index - 1; dd->hw_to_sw[context] = index; *sw_index = index; *hw_context = context; @@ -1618,11 +1618,11 @@ static void sc_piobufavail(struct send_context *sc) /* Wake up the most starved one first */ if (n) hfi1_qp_wakeup(qps[max_idx], - RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN); + RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); for (i = 0; i < n; i++) if (i != max_idx) hfi1_qp_wakeup(qps[i], - RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN); + RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); } /* translate a send credit update to a bit code of reasons */ diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 1697d96151bd..9b1e84a6b1cc 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -273,7 +273,7 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_PATH_MIG_STATE && attr->path_mig_state == IB_MIG_MIGRATED && qp->s_mig_state == IB_MIG_ARMED) { - qp->s_flags |= RVT_S_AHG_CLEAR; + qp->s_flags |= HFI1_S_AHG_CLEAR; priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); @@ -717,7 +717,7 @@ void hfi1_migrate_qp(struct rvt_qp *qp) qp->remote_ah_attr = qp->alt_ah_attr; qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr); qp->s_pkey_index = qp->s_alt_pkey_index; - qp->s_flags |= RVT_S_AHG_CLEAR; + qp->s_flags |= HFI1_S_AHG_CLEAR; priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr); priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); qp_set_16b(qp); diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h index b2d4cba8d15b..078cff7560b6 100644 --- a/drivers/infiniband/hw/hfi1/qp.h +++ b/drivers/infiniband/hw/hfi1/qp.h @@ -1,7 +1,7 @@ #ifndef _QP_H #define _QP_H /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -70,6 +70,26 @@ static inline int hfi1_send_ok(struct rvt_qp *qp) } /* + * Driver specific s_flags starting at bit 31 down to HFI1_S_MIN_BIT_MASK + * + * HFI1_S_AHG_VALID - ahg header valid on chip + * HFI1_S_AHG_CLEAR - have send engine clear ahg state + * HFI1_S_WAIT_PIO_DRAIN - qp waiting for PIOs to drain + * HFI1_S_MIN_BIT_MASK - the lowest bit that can be used by hfi1 + */ +#define HFI1_S_AHG_VALID 0x80000000 +#define HFI1_S_AHG_CLEAR 0x40000000 +#define HFI1_S_WAIT_PIO_DRAIN 0x20000000 +#define HFI1_S_MIN_BIT_MASK 0x01000000 + +/* + * overload wait defines + */ + +#define HFI1_S_ANY_WAIT_IO (RVT_S_ANY_WAIT_IO | HFI1_S_WAIT_PIO_DRAIN) +#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND) + +/* * free_ahg - clear ahg from QP */ static inline void clear_ahg(struct rvt_qp *qp) @@ -77,7 +97,7 @@ static inline void clear_ahg(struct rvt_qp *qp) struct hfi1_qp_priv *priv = qp->priv; priv->s_ahg->ahgcount = 0; - qp->s_flags &= ~(RVT_S_AHG_VALID | RVT_S_AHG_CLEAR); + qp->s_flags &= ~(HFI1_S_AHG_VALID | HFI1_S_AHG_CLEAR); if (priv->s_sde && qp->s_ahgidx >= 0) sdma_ahg_free(priv->s_sde, qp->s_ahgidx); qp->s_ahgidx = -1; diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index f15c93102081..9bd63abb2dfe 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -241,7 +241,7 @@ bail: smp_wmb(); qp->s_flags &= ~(RVT_S_RESP_PENDING | RVT_S_ACK_PENDING - | RVT_S_AHG_VALID); + | HFI1_S_AHG_VALID); return 0; } @@ -1024,7 +1024,7 @@ done: if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) && (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) qp->s_flags |= RVT_S_WAIT_PSN; - qp->s_flags &= ~RVT_S_AHG_VALID; + qp->s_flags &= ~HFI1_S_AHG_VALID; } /* diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index ef4c566e206f..5f56f3c1b4c4 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -194,7 +194,7 @@ static void ruc_loopback(struct rvt_qp *sqp) spin_lock_irqsave(&sqp->s_lock, flags); /* Return if we are already busy processing a work request. */ - if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) || + if ((sqp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT)) || !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND)) goto unlock; @@ -533,9 +533,9 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn) { struct hfi1_qp_priv *priv = qp->priv; - if (unlikely(qp->s_flags & RVT_S_AHG_CLEAR)) + if (unlikely(qp->s_flags & HFI1_S_AHG_CLEAR)) clear_ahg(qp); - if (!(qp->s_flags & RVT_S_AHG_VALID)) { + if (!(qp->s_flags & HFI1_S_AHG_VALID)) { /* first middle that needs copy */ if (qp->s_ahgidx < 0) qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde); @@ -544,7 +544,7 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn) priv->s_ahg->tx_flags |= SDMA_TXREQ_F_AHG_COPY; /* save to protect a change in another thread */ priv->s_ahg->ahgidx = qp->s_ahgidx; - qp->s_flags |= RVT_S_AHG_VALID; + qp->s_flags |= HFI1_S_AHG_VALID; } } else { /* subsequent middle after valid */ @@ -650,7 +650,7 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, if (middle) build_ahg(qp, bth2); else - qp->s_flags &= ~RVT_S_AHG_VALID; + qp->s_flags &= ~HFI1_S_AHG_VALID; bth0 |= pkey; bth0 |= extra_bytes << 20; @@ -727,7 +727,7 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, if (middle) build_ahg(qp, bth2); else - qp->s_flags &= ~RVT_S_AHG_VALID; + qp->s_flags &= ~HFI1_S_AHG_VALID; bth0 |= pkey; bth0 |= extra_bytes << 20; diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 7fb350b87b49..88e326d6cc49 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1351,7 +1351,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) struct hfi1_pportdata *ppd = dd->pport + port; u32 per_sdma_credits; uint idle_cnt = sdma_idle_cnt; - size_t num_engines = dd->chip_sdma_engines; + size_t num_engines = chip_sdma_engines(dd); int ret = -ENOMEM; if (!HFI1_CAP_IS_KSET(SDMA)) { @@ -1360,18 +1360,18 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) } if (mod_num_sdma && /* can't exceed chip support */ - mod_num_sdma <= dd->chip_sdma_engines && + mod_num_sdma <= chip_sdma_engines(dd) && /* count must be >= vls */ mod_num_sdma >= num_vls) num_engines = mod_num_sdma; dd_dev_info(dd, "SDMA mod_num_sdma: %u\n", mod_num_sdma); - dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", dd->chip_sdma_engines); + dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", chip_sdma_engines(dd)); dd_dev_info(dd, "SDMA chip_sdma_mem_size: %u\n", - dd->chip_sdma_mem_size); + chip_sdma_mem_size(dd)); per_sdma_credits = - dd->chip_sdma_mem_size / (num_engines * SDMA_BLOCK_SIZE); + chip_sdma_mem_size(dd) / (num_engines * SDMA_BLOCK_SIZE); /* set up freeze waitqueue */ init_waitqueue_head(&dd->sdma_unfreeze_wq); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 08991874c0e2..13374c727b14 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1007,7 +1007,7 @@ static int pio_wait(struct rvt_qp *qp, int was_empty; dev->n_piowait += !!(flag & RVT_S_WAIT_PIO); - dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN); + dev->n_piodrain += !!(flag & HFI1_S_WAIT_PIO_DRAIN); qp->s_flags |= flag; was_empty = list_empty(&sc->piowait); iowait_queue(ps->pkts_sent, &priv->s_iowait, @@ -1376,7 +1376,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) return pio_wait(qp, ps->s_txreq->psc, ps, - RVT_S_WAIT_PIO_DRAIN); + HFI1_S_WAIT_PIO_DRAIN); return sr(qp, ps, 0); } @@ -1410,7 +1410,8 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd) rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX; rdi->dparms.props.max_qp = hfi1_max_qps; rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs; - rdi->dparms.props.max_sge = hfi1_max_sges; + rdi->dparms.props.max_send_sge = hfi1_max_sges; + rdi->dparms.props.max_recv_sge = hfi1_max_sges; rdi->dparms.props.max_sge_rd = hfi1_max_sges; rdi->dparms.props.max_cq = hfi1_max_cqs; rdi->dparms.props.max_ah = hfi1_max_ahs; @@ -1497,15 +1498,6 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num, props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : mtu_to_enum(ppd->ibmtu, IB_MTU_4096); - /* - * sm_lid of 0xFFFF needs special handling so that it can - * be differentiated from a permissve LID of 0xFFFF. - * We set the grh_required flag here so the SA can program - * the DGID in the address handle appropriately - */ - if (props->sm_lid == be16_to_cpu(IB_LID_PERMISSIVE)) - props->grh_required = true; - return 0; } @@ -1892,7 +1884,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->process_mad = hfi1_process_mad; ibdev->get_dev_fw_str = hfi1_get_dev_fw_str; - strncpy(ibdev->node_desc, init_utsname()->nodename, + strlcpy(ibdev->node_desc, init_utsname()->nodename, sizeof(ibdev->node_desc)); /* diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index 616fc9b6fad8..c643d80c5a53 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2017 Intel Corporation. + * Copyright(c) 2017 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -120,8 +120,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd, uctxt->seq_cnt = 1; uctxt->is_vnic = true; - if (dd->num_msix_entries) - hfi1_set_vnic_msix_info(uctxt); + hfi1_set_vnic_msix_info(uctxt); hfi1_stats.sps_ctxts++; dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt); @@ -136,8 +135,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt); flush_wc(); - if (dd->num_msix_entries) - hfi1_reset_vnic_msix_info(uctxt); + hfi1_reset_vnic_msix_info(uctxt); /* * Disable receive context and interrupt available, reset all @@ -818,14 +816,14 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo); netdev = alloc_netdev_mqs(size, name, name_assign_type, setup, - dd->chip_sdma_engines, dd->num_vnic_contexts); + chip_sdma_engines(dd), dd->num_vnic_contexts); if (!netdev) return ERR_PTR(-ENOMEM); rn = netdev_priv(netdev); vinfo = opa_vnic_dev_priv(netdev); vinfo->dd = dd; - vinfo->num_tx_q = dd->chip_sdma_engines; + vinfo->num_tx_q = chip_sdma_engines(dd); vinfo->num_rx_q = dd->num_vnic_contexts; vinfo->netdev = netdev; rn->free_rdma_netdev = hfi1_vnic_free_rn; diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index d74928621559..0d96c5bb38cd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -44,13 +44,11 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device); + const struct ib_gid_attr *gid_attr; struct device *dev = hr_dev->dev; - struct ib_gid_attr gid_attr; struct hns_roce_ah *ah; u16 vlan_tag = 0xffff; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); - union ib_gid sgid; - int ret; ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) @@ -59,18 +57,9 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, /* Get mac address */ memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); - /* Get source gid */ - ret = ib_get_cached_gid(ibpd->device, rdma_ah_get_port_num(ah_attr), - grh->sgid_index, &sgid, &gid_attr); - if (ret) { - dev_err(dev, "get sgid failed! ret = %d\n", ret); - kfree(ah); - return ERR_PTR(ret); - } - - if (is_vlan_dev(gid_attr.ndev)) - vlan_tag = vlan_dev_vlan_id(gid_attr.ndev); - dev_put(gid_attr.ndev); + gid_attr = ah_attr->grh.sgid_attr; + if (is_vlan_dev(gid_attr->ndev)) + vlan_tag = vlan_dev_vlan_id(gid_attr->ndev); if (vlan_tag < 0x1000) vlan_tag |= (rdma_ah_get_sl(ah_attr) & @@ -108,7 +97,7 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) rdma_ah_set_static_rate(ah_attr, ah->av.stat_rate); rdma_ah_set_grh(ah_attr, NULL, (le32_to_cpu(ah->av.sl_tclass_flowlabel) & - HNS_ROCE_FLOW_LABLE_MASK), ah->av.gid_index, + HNS_ROCE_FLOW_LABEL_MASK), ah->av.gid_index, ah->av.hop_limit, (le32_to_cpu(ah->av.sl_tclass_flowlabel) >> HNS_ROCE_TCLASS_SHIFT)); diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 319cb74aebaf..93d4b4ec002d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -382,15 +382,6 @@ #define ROCEE_VF_EQ_DB_CFG0_REG 0x238 #define ROCEE_VF_EQ_DB_CFG1_REG 0x23C -#define ROCEE_VF_SMAC_CFG0_REG 0x12000 -#define ROCEE_VF_SMAC_CFG1_REG 0x12004 - -#define ROCEE_VF_SGID_CFG0_REG 0x10000 -#define ROCEE_VF_SGID_CFG1_REG 0x10004 -#define ROCEE_VF_SGID_CFG2_REG 0x10008 -#define ROCEE_VF_SGID_CFG3_REG 0x1000c -#define ROCEE_VF_SGID_CFG4_REG 0x10010 - #define ROCEE_VF_ABN_INT_CFG_REG 0x13000 #define ROCEE_VF_ABN_INT_ST_REG 0x13004 #define ROCEE_VF_ABN_INT_EN_REG 0x13008 diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index ebee2782a573..e2f93c1ce86a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -41,6 +41,8 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt, found: db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK); + page->umem->sg_head.sgl->offset = virt & ~PAGE_MASK; + db->virt_addr = sg_virt(page->umem->sg_head.sgl); db->u.user_page = page; refcount_inc(&page->refcount); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 31221d506d9a..9a24fd0ee3e7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -76,7 +76,7 @@ /* 4G/4K = 1M */ #define HNS_ROCE_SL_SHIFT 28 #define HNS_ROCE_TCLASS_SHIFT 20 -#define HNS_ROCE_FLOW_LABLE_MASK 0xfffff +#define HNS_ROCE_FLOW_LABEL_MASK 0xfffff #define HNS_ROCE_MAX_PORTS 6 #define HNS_ROCE_MAX_GID_NUM 16 @@ -110,6 +110,7 @@ enum { HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0, + HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1, }; enum { @@ -190,7 +191,8 @@ enum { HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0), HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1), HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2), - HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3) + HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3), + HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4), }; enum hns_roce_mtt_type { @@ -385,6 +387,7 @@ struct hns_roce_db { struct hns_roce_user_db_page *user_page; } u; dma_addr_t dma; + void *virt_addr; int index; int order; }; @@ -524,7 +527,9 @@ struct hns_roce_qp { struct hns_roce_buf hr_buf; struct hns_roce_wq rq; struct hns_roce_db rdb; + struct hns_roce_db sdb; u8 rdb_en; + u8 sdb_en; u32 doorbell_qpn; __le32 sq_signal_bits; u32 sq_next_wqe; @@ -579,22 +584,22 @@ struct hns_roce_ceqe { }; struct hns_roce_aeqe { - u32 asyn; + __le32 asyn; union { struct { - u32 qp; + __le32 qp; u32 rsv0; u32 rsv1; } qp_event; struct { - u32 cq; + __le32 cq; u32 rsv0; u32 rsv1; } cq_event; struct { - u32 ceqe; + __le32 ceqe; u32 rsv0; u32 rsv1; } ce_event; @@ -641,6 +646,8 @@ struct hns_roce_eq { int shift; dma_addr_t cur_eqe_ba; dma_addr_t nxt_eqe_ba; + int event_type; + int sub_type; }; struct hns_roce_eq_table { @@ -720,10 +727,21 @@ struct hns_roce_caps { u32 eqe_ba_pg_sz; u32 eqe_buf_pg_sz; u32 eqe_hop_num; + u32 sl_num; + u32 tsq_buf_pg_sz; + u32 tpq_buf_pg_sz; u32 chunk_sz; /* chunk size in non multihop mode*/ u64 flags; }; +struct hns_roce_work { + struct hns_roce_dev *hr_dev; + struct work_struct work; + u32 qpn; + int event_type; + int sub_type; +}; + struct hns_roce_hw { int (*reset)(struct hns_roce_dev *hr_dev, bool enable); int (*cmq_init)(struct hns_roce_dev *hr_dev); @@ -736,7 +754,7 @@ struct hns_roce_hw { u16 token, int event); int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned long timeout); int (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index, - union ib_gid *gid, const struct ib_gid_attr *attr); + const union ib_gid *gid, const struct ib_gid_attr *attr); int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr); void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port, enum ib_mtu mtu); @@ -760,10 +778,10 @@ struct hns_roce_hw { int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state); int (*destroy_qp)(struct ib_qp *ibqp); - int (*post_send)(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); - int (*post_recv)(struct ib_qp *qp, struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr); + int (*post_send)(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); + int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr); int (*req_notify_cq)(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr); @@ -816,6 +834,7 @@ struct hns_roce_dev { u32 tptr_size; /*only for hw v1*/ const struct hns_roce_hw *hw; void *priv; + struct workqueue_struct *irq_workq; }; static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) @@ -864,7 +883,7 @@ static inline struct hns_roce_sqp *hr_to_hr_sqp(struct hns_roce_qp *hr_qp) return container_of(hr_qp, struct hns_roce_sqp, hr_qp); } -static inline void hns_roce_write64_k(__be32 val[2], void __iomem *dest) +static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest) { __raw_writeq(*(u64 *) val, dest); } @@ -982,7 +1001,7 @@ void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, int cnt); -__be32 send_ieth(struct ib_send_wr *wr); +__be32 send_ieth(const struct ib_send_wr *wr); int to_hr_qp_type(int qp_type); struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 63b5b3edabcb..f6faefed96e8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -170,7 +170,7 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, case 3: mhop->l2_idx = table_idx & (chunk_ba_num - 1); mhop->l1_idx = table_idx / chunk_ba_num & (chunk_ba_num - 1); - mhop->l0_idx = table_idx / chunk_ba_num / chunk_ba_num; + mhop->l0_idx = (table_idx / chunk_ba_num) / chunk_ba_num; break; case 2: mhop->l1_idx = table_idx & (chunk_ba_num - 1); @@ -342,7 +342,7 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, } else { break; } - msleep(HW_SYNC_SLEEP_TIME_INTERVAL); + mdelay(HW_SYNC_SLEEP_TIME_INTERVAL); } bt_cmd_l = (u32)bt_ba; @@ -494,6 +494,9 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, step_idx = 1; } else if (hop_num == HNS_ROCE_HOP_NUM_0) { step_idx = 0; + } else { + ret = -EINVAL; + goto err_dma_alloc_l1; } /* set HEM base address to hardware */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 8444234ed092..081aa91fc162 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -58,8 +58,9 @@ static void set_raddr_seg(struct hns_roce_wqe_raddr_seg *rseg, u64 remote_addr, rseg->len = 0; } -static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int hns_roce_v1_post_send(struct ib_qp *ibqp, + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah); @@ -173,12 +174,14 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, roce_set_field(ud_sq_wqe->u32_36, UD_SEND_WQE_U32_36_FLOW_LABEL_M, - UD_SEND_WQE_U32_36_FLOW_LABEL_S, 0); + UD_SEND_WQE_U32_36_FLOW_LABEL_S, + ah->av.sl_tclass_flowlabel & + HNS_ROCE_FLOW_LABEL_MASK); roce_set_field(ud_sq_wqe->u32_36, - UD_SEND_WQE_U32_36_PRIORITY_M, - UD_SEND_WQE_U32_36_PRIORITY_S, - ah->av.sl_tclass_flowlabel >> - HNS_ROCE_SL_SHIFT); + UD_SEND_WQE_U32_36_PRIORITY_M, + UD_SEND_WQE_U32_36_PRIORITY_S, + le32_to_cpu(ah->av.sl_tclass_flowlabel) >> + HNS_ROCE_SL_SHIFT); roce_set_field(ud_sq_wqe->u32_36, UD_SEND_WQE_U32_36_SGID_INDEX_M, UD_SEND_WQE_U32_36_SGID_INDEX_S, @@ -191,7 +194,9 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ah->av.hop_limit); roce_set_field(ud_sq_wqe->u32_40, UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M, - UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S, 0); + UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S, + ah->av.sl_tclass_flowlabel >> + HNS_ROCE_TCLASS_SHIFT); memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN); @@ -333,7 +338,7 @@ out: doorbell[0] = le32_to_cpu(sq_db.u32_4); doorbell[1] = le32_to_cpu(sq_db.u32_8); - hns_roce_write64_k(doorbell, qp->sq.db_reg_l); + hns_roce_write64_k((__le32 *)doorbell, qp->sq.db_reg_l); qp->sq_next_wqe = ind; } @@ -342,14 +347,15 @@ out: return ret; } -static int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +static int hns_roce_v1_post_recv(struct ib_qp *ibqp, + const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { int ret = 0; int nreq = 0; int ind = 0; int i = 0; - u32 reg_val = 0; + u32 reg_val; unsigned long flags = 0; struct hns_roce_rq_wqe_ctrl *ctrl = NULL; struct hns_roce_wqe_data_seg *scat = NULL; @@ -402,14 +408,18 @@ out: wmb(); if (ibqp->qp_type == IB_QPT_GSI) { + __le32 tmp; + /* SW update GSI rq header */ reg_val = roce_read(to_hr_dev(ibqp->device), ROCEE_QP1C_CFG3_0_REG + QP1C_CFGN_OFFSET * hr_qp->phy_port); - roce_set_field(reg_val, + tmp = cpu_to_le32(reg_val); + roce_set_field(tmp, ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M, ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S, hr_qp->rq.head); + reg_val = le32_to_cpu(tmp); roce_write(to_hr_dev(ibqp->device), ROCEE_QP1C_CFG3_0_REG + QP1C_CFGN_OFFSET * hr_qp->phy_port, reg_val); @@ -430,7 +440,8 @@ out: doorbell[0] = le32_to_cpu(rq_db.u32_4); doorbell[1] = le32_to_cpu(rq_db.u32_8); - hns_roce_write64_k(doorbell, hr_qp->rq.db_reg_l); + hns_roce_write64_k((__le32 *)doorbell, + hr_qp->rq.db_reg_l); } } spin_unlock_irqrestore(&hr_qp->rq.lock, flags); @@ -441,51 +452,63 @@ out: static void hns_roce_set_db_event_mode(struct hns_roce_dev *hr_dev, int sdb_mode, int odb_mode) { + __le32 tmp; u32 val; val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); - roce_set_bit(val, ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S, sdb_mode); - roce_set_bit(val, ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S, odb_mode); + tmp = cpu_to_le32(val); + roce_set_bit(tmp, ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S, sdb_mode); + roce_set_bit(tmp, ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S, odb_mode); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); } static void hns_roce_set_db_ext_mode(struct hns_roce_dev *hr_dev, u32 sdb_mode, u32 odb_mode) { + __le32 tmp; u32 val; /* Configure SDB/ODB extend mode */ val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); - roce_set_bit(val, ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S, sdb_mode); - roce_set_bit(val, ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S, odb_mode); + tmp = cpu_to_le32(val); + roce_set_bit(tmp, ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S, sdb_mode); + roce_set_bit(tmp, ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S, odb_mode); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); } static void hns_roce_set_sdb(struct hns_roce_dev *hr_dev, u32 sdb_alept, u32 sdb_alful) { + __le32 tmp; u32 val; /* Configure SDB */ val = roce_read(hr_dev, ROCEE_DB_SQ_WL_REG); - roce_set_field(val, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S, sdb_alful); - roce_set_field(val, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M, + roce_set_field(tmp, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S, sdb_alept); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_DB_SQ_WL_REG, val); } static void hns_roce_set_odb(struct hns_roce_dev *hr_dev, u32 odb_alept, u32 odb_alful) { + __le32 tmp; u32 val; /* Configure ODB */ val = roce_read(hr_dev, ROCEE_DB_OTHERS_WL_REG); - roce_set_field(val, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S, odb_alful); - roce_set_field(val, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M, + roce_set_field(tmp, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S, odb_alept); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_DB_OTHERS_WL_REG, val); } @@ -496,6 +519,7 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept, struct hns_roce_v1_priv *priv; struct hns_roce_db_table *db; dma_addr_t sdb_dma_addr; + __le32 tmp; u32 val; priv = (struct hns_roce_v1_priv *)hr_dev->priv; @@ -511,7 +535,8 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept, /* Configure extend SDB depth */ val = roce_read(hr_dev, ROCEE_EXT_DB_SQ_H_REG); - roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S, db->ext_db->esdb_dep); /* @@ -519,8 +544,9 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept, * using 4K page, and shift more 32 because of * caculating the high 32 bit value evaluated to hardware. */ - roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M, + roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S, sdb_dma_addr >> 44); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_EXT_DB_SQ_H_REG, val); dev_dbg(dev, "ext SDB depth: 0x%x\n", db->ext_db->esdb_dep); @@ -535,6 +561,7 @@ static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept, struct hns_roce_v1_priv *priv; struct hns_roce_db_table *db; dma_addr_t odb_dma_addr; + __le32 tmp; u32 val; priv = (struct hns_roce_v1_priv *)hr_dev->priv; @@ -550,12 +577,14 @@ static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept, /* Configure extend ODB depth */ val = roce_read(hr_dev, ROCEE_EXT_DB_OTH_H_REG); - roce_set_field(val, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S, db->ext_db->eodb_dep); - roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M, + roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S, db->ext_db->eodb_dep); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_EXT_DB_OTH_H_REG, val); dev_dbg(dev, "ext ODB depth: 0x%x\n", db->ext_db->eodb_dep); @@ -762,6 +791,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd); if (!free_mr->mr_free_qp[i]) { dev_err(dev, "Create loop qp failed!\n"); + ret = -ENOMEM; goto create_lp_qp_failed; } hr_qp = free_mr->mr_free_qp[i]; @@ -831,7 +861,7 @@ alloc_pd_failed: if (hns_roce_ib_destroy_cq(cq)) dev_err(dev, "Destroy cq for create_lp_qp failed!\n"); - return -EINVAL; + return ret; } static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev) @@ -969,7 +999,8 @@ static int hns_roce_v1_send_lp_wqe(struct hns_roce_qp *hr_qp) { struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); struct device *dev = &hr_dev->pdev->dev; - struct ib_send_wr send_wr, *bad_wr; + struct ib_send_wr send_wr; + const struct ib_send_wr *bad_wr; int ret; memset(&send_wr, 0, sizeof(send_wr)); @@ -1161,9 +1192,10 @@ static void hns_roce_db_free(struct hns_roce_dev *hr_dev) static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) { int ret; + u32 val; + __le32 tmp; int raq_shift = 0; dma_addr_t addr; - u32 val; struct hns_roce_v1_priv *priv; struct hns_roce_raq_table *raq; struct device *dev = &hr_dev->pdev->dev; @@ -1189,46 +1221,54 @@ static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) /* Configure raq_shift */ raq_shift = ilog2(HNS_ROCE_V1_RAQ_SIZE / HNS_ROCE_V1_RAQ_ENTRY); val = roce_read(hr_dev, ROCEE_EXT_RAQ_H_REG); - roce_set_field(val, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S, raq_shift); /* * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of * using 4K page, and shift more 32 because of * caculating the high 32 bit value evaluated to hardware. */ - roce_set_field(val, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M, + roce_set_field(tmp, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S, raq->e_raq_buf->map >> 44); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_EXT_RAQ_H_REG, val); dev_dbg(dev, "Configure raq_shift 0x%x.\n", val); /* Configure raq threshold */ val = roce_read(hr_dev, ROCEE_RAQ_WL_REG); - roce_set_field(val, ROCEE_RAQ_WL_ROCEE_RAQ_WL_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_RAQ_WL_ROCEE_RAQ_WL_M, ROCEE_RAQ_WL_ROCEE_RAQ_WL_S, HNS_ROCE_V1_EXT_RAQ_WF); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_RAQ_WL_REG, val); dev_dbg(dev, "Configure raq_wl 0x%x.\n", val); /* Enable extend raq */ val = roce_read(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG); - roce_set_field(val, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_M, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S, POL_TIME_INTERVAL_VAL); - roce_set_bit(val, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE, 1); - roce_set_field(val, + roce_set_bit(tmp, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE, 1); + roce_set_field(tmp, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_M, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S, 2); - roce_set_bit(val, + roce_set_bit(tmp, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_EN_S, 1); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG, val); dev_dbg(dev, "Configure WrmsPolTimeInterval 0x%x.\n", val); /* Enable raq drop */ val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); - roce_set_bit(val, ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S, 1); + tmp = cpu_to_le32(val); + roce_set_bit(tmp, ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S, 1); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); dev_dbg(dev, "Configure GlbCfg = 0x%x.\n", val); @@ -1255,20 +1295,25 @@ static void hns_roce_raq_free(struct hns_roce_dev *hr_dev) static void hns_roce_port_enable(struct hns_roce_dev *hr_dev, int enable_flag) { + __le32 tmp; u32 val; if (enable_flag) { val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); /* Open all ports */ - roce_set_field(val, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, ROCEE_GLB_CFG_ROCEE_PORT_ST_S, ALL_PORT_VAL_OPEN); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); } else { val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); /* Close all ports */ - roce_set_field(val, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, ROCEE_GLB_CFG_ROCEE_PORT_ST_S, 0x0); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); } } @@ -1498,13 +1543,11 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) int i = 0; struct hns_roce_caps *caps = &hr_dev->caps; - hr_dev->vendor_id = le32_to_cpu(roce_read(hr_dev, ROCEE_VENDOR_ID_REG)); - hr_dev->vendor_part_id = le32_to_cpu(roce_read(hr_dev, - ROCEE_VENDOR_PART_ID_REG)); - hr_dev->sys_image_guid = le32_to_cpu(roce_read(hr_dev, - ROCEE_SYS_IMAGE_GUID_L_REG)) | - ((u64)le32_to_cpu(roce_read(hr_dev, - ROCEE_SYS_IMAGE_GUID_H_REG)) << 32); + hr_dev->vendor_id = roce_read(hr_dev, ROCEE_VENDOR_ID_REG); + hr_dev->vendor_part_id = roce_read(hr_dev, ROCEE_VENDOR_PART_ID_REG); + hr_dev->sys_image_guid = roce_read(hr_dev, ROCEE_SYS_IMAGE_GUID_L_REG) | + ((u64)roce_read(hr_dev, + ROCEE_SYS_IMAGE_GUID_H_REG) << 32); hr_dev->hw_rev = HNS_ROCE_HW_VER1; caps->num_qps = HNS_ROCE_V1_MAX_QP_NUM; @@ -1557,8 +1600,7 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) caps->ceqe_depth = HNS_ROCE_V1_COMP_EQE_NUM; caps->aeqe_depth = HNS_ROCE_V1_ASYNC_EQE_NUM; - caps->local_ca_ack_delay = le32_to_cpu(roce_read(hr_dev, - ROCEE_ACK_DELAY_REG)); + caps->local_ca_ack_delay = roce_read(hr_dev, ROCEE_ACK_DELAY_REG); caps->max_mtu = IB_MTU_2048; return 0; @@ -1568,21 +1610,25 @@ static int hns_roce_v1_init(struct hns_roce_dev *hr_dev) { int ret; u32 val; + __le32 tmp; struct device *dev = &hr_dev->pdev->dev; /* DMAE user config */ val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG1_REG); - roce_set_field(val, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S, 0xf); - roce_set_field(val, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M, + roce_set_field(tmp, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S, 1 << PAGES_SHIFT_16); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_DMAE_USER_CFG1_REG, val); val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG2_REG); - roce_set_field(val, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S, 0xf); - roce_set_field(val, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M, + roce_set_field(tmp, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S, 1 << PAGES_SHIFT_16); @@ -1668,6 +1714,7 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u32 __iomem *hcr = (u32 __iomem *)(hr_dev->reg_base + ROCEE_MB1_REG); unsigned long end; u32 val = 0; + __le32 tmp; end = msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS) + jiffies; while (hns_roce_v1_cmd_pending(hr_dev)) { @@ -1679,15 +1726,17 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, cond_resched(); } - roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S, op); - roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_MDF_M, + roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_CMD_MDF_M, ROCEE_MB6_ROCEE_MB_CMD_MDF_S, op_modifier); - roce_set_bit(val, ROCEE_MB6_ROCEE_MB_EVENT_S, event); - roce_set_bit(val, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1); - roce_set_field(val, ROCEE_MB6_ROCEE_MB_TOKEN_M, + roce_set_bit(tmp, ROCEE_MB6_ROCEE_MB_EVENT_S, event); + roce_set_bit(tmp, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1); + roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_TOKEN_M, ROCEE_MB6_ROCEE_MB_TOKEN_S, token); + val = le32_to_cpu(tmp); writeq(in_param, hcr + 0); writeq(out_param, hcr + 2); writel(in_modifier, hcr + 4); @@ -1717,7 +1766,7 @@ static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev, return -ETIMEDOUT; } - status = le32_to_cpu((__force __be32) + status = le32_to_cpu((__force __le32) __raw_readl(hcr + HCR_STATUS_OFFSET)); if ((status & STATUS_MASK) != 0x1) { dev_err(hr_dev->dev, "mailbox status 0x%x!\n", status); @@ -1728,7 +1777,7 @@ static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev, } static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port, - int gid_index, union ib_gid *gid, + int gid_index, const union ib_gid *gid, const struct ib_gid_attr *attr) { u32 *p = NULL; @@ -1760,6 +1809,7 @@ static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, { u32 reg_smac_l; u16 reg_smac_h; + __le32 tmp; u16 *p_h; u32 *p; u32 val; @@ -1784,10 +1834,12 @@ static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, val = roce_read(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET); + tmp = cpu_to_le32(val); p_h = (u16 *)(&addr[4]); reg_smac_h = *p_h; - roce_set_field(val, ROCEE_SMAC_H_ROCEE_SMAC_H_M, + roce_set_field(tmp, ROCEE_SMAC_H_ROCEE_SMAC_H_M, ROCEE_SMAC_H_ROCEE_SMAC_H_S, reg_smac_h); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET, val); @@ -1797,12 +1849,15 @@ static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, static void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port, enum ib_mtu mtu) { + __le32 tmp; u32 val; val = roce_read(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET); - roce_set_field(val, ROCEE_SMAC_H_ROCEE_PORT_MTU_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_SMAC_H_ROCEE_PORT_MTU_M, ROCEE_SMAC_H_ROCEE_PORT_MTU_S, mtu); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET, val); } @@ -1848,9 +1903,9 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_MW_BIND_COUNTER_M, MPT_BYTE_12_MW_BIND_COUNTER_S, 0); - mpt_entry->virt_addr_l = (u32)mr->iova; - mpt_entry->virt_addr_h = (u32)(mr->iova >> 32); - mpt_entry->length = (u32)mr->size; + mpt_entry->virt_addr_l = cpu_to_le32((u32)mr->iova); + mpt_entry->virt_addr_h = cpu_to_le32((u32)(mr->iova >> 32)); + mpt_entry->length = cpu_to_le32((u32)mr->size); roce_set_field(mpt_entry->mpt_byte_28, MPT_BYTE_28_PD_M, MPT_BYTE_28_PD_S, mr->pd); @@ -1885,64 +1940,59 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, roce_set_field(mpt_entry->mpt_byte_36, MPT_BYTE_36_PA0_H_M, MPT_BYTE_36_PA0_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_32))); + (u32)(pages[i] >> PAGES_SHIFT_32)); break; case 1: roce_set_field(mpt_entry->mpt_byte_36, MPT_BYTE_36_PA1_L_M, - MPT_BYTE_36_PA1_L_S, - cpu_to_le32((u32)(pages[i]))); + MPT_BYTE_36_PA1_L_S, (u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_40, MPT_BYTE_40_PA1_H_M, MPT_BYTE_40_PA1_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_24))); + (u32)(pages[i] >> PAGES_SHIFT_24)); break; case 2: roce_set_field(mpt_entry->mpt_byte_40, MPT_BYTE_40_PA2_L_M, - MPT_BYTE_40_PA2_L_S, - cpu_to_le32((u32)(pages[i]))); + MPT_BYTE_40_PA2_L_S, (u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_44, MPT_BYTE_44_PA2_H_M, MPT_BYTE_44_PA2_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_16))); + (u32)(pages[i] >> PAGES_SHIFT_16)); break; case 3: roce_set_field(mpt_entry->mpt_byte_44, MPT_BYTE_44_PA3_L_M, - MPT_BYTE_44_PA3_L_S, - cpu_to_le32((u32)(pages[i]))); + MPT_BYTE_44_PA3_L_S, (u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_48, MPT_BYTE_48_PA3_H_M, MPT_BYTE_48_PA3_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_8))); + (u32)(pages[i] >> PAGES_SHIFT_8)); break; case 4: mpt_entry->pa4_l = cpu_to_le32((u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_56, MPT_BYTE_56_PA4_H_M, MPT_BYTE_56_PA4_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_32))); + (u32)(pages[i] >> PAGES_SHIFT_32)); break; case 5: roce_set_field(mpt_entry->mpt_byte_56, MPT_BYTE_56_PA5_L_M, - MPT_BYTE_56_PA5_L_S, - cpu_to_le32((u32)(pages[i]))); + MPT_BYTE_56_PA5_L_S, (u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_60, MPT_BYTE_60_PA5_H_M, MPT_BYTE_60_PA5_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_24))); + (u32)(pages[i] >> PAGES_SHIFT_24)); break; case 6: roce_set_field(mpt_entry->mpt_byte_60, MPT_BYTE_60_PA6_L_M, - MPT_BYTE_60_PA6_L_S, - cpu_to_le32((u32)(pages[i]))); + MPT_BYTE_60_PA6_L_S, (u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_64, MPT_BYTE_64_PA6_H_M, MPT_BYTE_64_PA6_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_16))); + (u32)(pages[i] >> PAGES_SHIFT_16)); break; default: break; @@ -1951,7 +2001,7 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, free_page((unsigned long) pages); - mpt_entry->pbl_addr_l = (u32)(mr->pbl_dma_addr); + mpt_entry->pbl_addr_l = cpu_to_le32((u32)(mr->pbl_dma_addr)); roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M, MPT_BYTE_12_PBL_ADDR_H_S, @@ -1982,9 +2032,9 @@ static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *hr_cq) static void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index) { - u32 doorbell[2]; + __le32 doorbell[2]; - doorbell[0] = cons_index & ((hr_cq->cq_depth << 1) - 1); + doorbell[0] = cpu_to_le32(cons_index & ((hr_cq->cq_depth << 1) - 1)); doorbell[1] = 0; roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1); roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M, @@ -2081,10 +2131,8 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S, CQ_STATE_VALID); roce_set_field(cq_context->cqc_byte_4, CQ_CONTEXT_CQC_BYTE_4_CQN_M, CQ_CONTEXT_CQC_BYTE_4_CQN_S, hr_cq->cqn); - cq_context->cqc_byte_4 = cpu_to_le32(cq_context->cqc_byte_4); - cq_context->cq_bt_l = (u32)dma_handle; - cq_context->cq_bt_l = cpu_to_le32(cq_context->cq_bt_l); + cq_context->cq_bt_l = cpu_to_le32((u32)dma_handle); roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_M, @@ -2096,15 +2144,12 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, ilog2((unsigned int)nent)); roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CEQN_M, CQ_CONTEXT_CQC_BYTE_12_CEQN_S, vector); - cq_context->cqc_byte_12 = cpu_to_le32(cq_context->cqc_byte_12); - cq_context->cur_cqe_ba0_l = (u32)(mtts[0]); - cq_context->cur_cqe_ba0_l = cpu_to_le32(cq_context->cur_cqe_ba0_l); + cq_context->cur_cqe_ba0_l = cpu_to_le32((u32)(mtts[0])); roce_set_field(cq_context->cqc_byte_20, CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_M, - CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S, - cpu_to_le32((mtts[0]) >> 32)); + CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S, (mtts[0]) >> 32); /* Dedicated hardware, directly set 0 */ roce_set_field(cq_context->cqc_byte_20, CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_M, @@ -2118,9 +2163,8 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M, CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S, tptr_dma_addr >> 44); - cq_context->cqc_byte_20 = cpu_to_le32(cq_context->cqc_byte_20); - cq_context->cqe_tptr_addr_l = (u32)(tptr_dma_addr >> 12); + cq_context->cqe_tptr_addr_l = cpu_to_le32((u32)(tptr_dma_addr >> 12)); roce_set_field(cq_context->cqc_byte_32, CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M, @@ -2138,7 +2182,6 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->cqc_byte_32, CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M, CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0); - cq_context->cqc_byte_32 = cpu_to_le32(cq_context->cqc_byte_32); } static int hns_roce_v1_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) @@ -2151,7 +2194,7 @@ static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, { struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); u32 notification_flag; - u32 doorbell[2]; + __le32 doorbell[2]; notification_flag = (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL; @@ -2159,7 +2202,8 @@ static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, * flags = 0; Notification Flag = 1, next * flags = 1; Notification Flag = 0, solocited */ - doorbell[0] = hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1); + doorbell[0] = + cpu_to_le32(hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1)); roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1); roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M, ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3); @@ -2416,7 +2460,7 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, struct device *dev = &hr_dev->pdev->dev; struct hns_roce_v1_priv *priv; unsigned long end = 0, flags = 0; - uint32_t bt_cmd_val[2] = {0}; + __le32 bt_cmd_val[2] = {0}; void __iomem *bt_cmd; u64 bt_ba = 0; @@ -2468,7 +2512,7 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, msleep(HW_SYNC_SLEEP_TIME_INTERVAL); } - bt_cmd_val[0] = (uint32_t)bt_ba; + bt_cmd_val[0] = (__le32)bt_ba; roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, bt_ba >> 32); hns_roce_write64_k(bt_cmd_val, hr_dev->reg_base + ROCEE_BT_CMD_L_REG); @@ -2569,10 +2613,11 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, struct hns_roce_sqp_context *context; struct device *dev = &hr_dev->pdev->dev; dma_addr_t dma_handle = 0; + u32 __iomem *addr; int rq_pa_start; + __le32 tmp; u32 reg_val; u64 *mtts; - u32 __iomem *addr; context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) @@ -2598,7 +2643,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, roce_set_field(context->qp1c_bytes_4, QP1C_BYTES_4_PD_M, QP1C_BYTES_4_PD_S, to_hr_pd(ibqp->pd)->pdn); - context->sq_rq_bt_l = (u32)(dma_handle); + context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle)); roce_set_field(context->qp1c_bytes_12, QP1C_BYTES_12_SQ_RQ_BT_H_M, QP1C_BYTES_12_SQ_RQ_BT_H_S, @@ -2610,7 +2655,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP1C_BYTES_16_PORT_NUM_S, hr_qp->phy_port); roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SIGNALING_TYPE_S, - hr_qp->sq_signal_bits); + le32_to_cpu(hr_qp->sq_signal_bits)); roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_BA_FLG_S, 1); roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SQ_BA_FLG_S, @@ -2624,7 +2669,8 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP1C_BYTES_20_PKEY_IDX_S, attr->pkey_index); rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; - context->cur_rq_wqe_ba_l = (u32)(mtts[rq_pa_start]); + context->cur_rq_wqe_ba_l = + cpu_to_le32((u32)(mtts[rq_pa_start])); roce_set_field(context->qp1c_bytes_28, QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M, @@ -2643,7 +2689,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP1C_BYTES_32_TX_CQ_NUM_S, to_hr_cq(ibqp->send_cq)->cqn); - context->cur_sq_wqe_ba_l = (u32)mtts[0]; + context->cur_sq_wqe_ba_l = cpu_to_le32((u32)mtts[0]); roce_set_field(context->qp1c_bytes_40, QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M, @@ -2658,23 +2704,25 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, ROCEE_QP1C_CFG0_0_REG + hr_qp->phy_port * sizeof(*context)); - writel(context->qp1c_bytes_4, addr); - writel(context->sq_rq_bt_l, addr + 1); - writel(context->qp1c_bytes_12, addr + 2); - writel(context->qp1c_bytes_16, addr + 3); - writel(context->qp1c_bytes_20, addr + 4); - writel(context->cur_rq_wqe_ba_l, addr + 5); - writel(context->qp1c_bytes_28, addr + 6); - writel(context->qp1c_bytes_32, addr + 7); - writel(context->cur_sq_wqe_ba_l, addr + 8); - writel(context->qp1c_bytes_40, addr + 9); + writel(le32_to_cpu(context->qp1c_bytes_4), addr); + writel(le32_to_cpu(context->sq_rq_bt_l), addr + 1); + writel(le32_to_cpu(context->qp1c_bytes_12), addr + 2); + writel(le32_to_cpu(context->qp1c_bytes_16), addr + 3); + writel(le32_to_cpu(context->qp1c_bytes_20), addr + 4); + writel(le32_to_cpu(context->cur_rq_wqe_ba_l), addr + 5); + writel(le32_to_cpu(context->qp1c_bytes_28), addr + 6); + writel(le32_to_cpu(context->qp1c_bytes_32), addr + 7); + writel(le32_to_cpu(context->cur_sq_wqe_ba_l), addr + 8); + writel(le32_to_cpu(context->qp1c_bytes_40), addr + 9); } /* Modify QP1C status */ reg_val = roce_read(hr_dev, ROCEE_QP1C_CFG0_0_REG + hr_qp->phy_port * sizeof(*context)); - roce_set_field(reg_val, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M, + tmp = cpu_to_le32(reg_val); + roce_set_field(tmp, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S, new_state); + reg_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_QP1C_CFG0_0_REG + hr_qp->phy_port * sizeof(*context), reg_val); @@ -2712,7 +2760,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); dma_addr_t dma_handle_2 = 0; dma_addr_t dma_handle = 0; - uint32_t doorbell[2] = {0}; + __le32 doorbell[2] = {0}; int rq_pa_start = 0; u64 *mtts_2 = NULL; int ret = -EINVAL; @@ -2887,7 +2935,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, dmac = (u8 *)attr->ah_attr.roce.dmac; - context->sq_rq_bt_l = (u32)(dma_handle); + context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle)); roce_set_field(context->qpc_bytes_24, QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M, QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S, @@ -2899,7 +2947,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M, QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S, attr->min_rnr_timer); - context->irrl_ba_l = (u32)(dma_handle_2); + context->irrl_ba_l = cpu_to_le32((u32)(dma_handle_2)); roce_set_field(context->qpc_bytes_32, QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M, QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S, @@ -2913,7 +2961,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, 1); roce_set_bit(context->qpc_bytes_32, QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S, - hr_qp->sq_signal_bits); + le32_to_cpu(hr_qp->sq_signal_bits)); port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : hr_qp->port; @@ -2991,7 +3039,8 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S, 0); rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; - context->cur_rq_wqe_ba_l = (u32)(mtts[rq_pa_start]); + context->cur_rq_wqe_ba_l = + cpu_to_le32((u32)(mtts[rq_pa_start])); roce_set_field(context->qpc_bytes_76, QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M, @@ -3071,7 +3120,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, goto out; } - context->rx_cur_sq_wqe_ba_l = (u32)(mtts[0]); + context->rx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0])); roce_set_field(context->qpc_bytes_120, QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M, @@ -3219,7 +3268,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M, QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S, 0); - context->tx_cur_sq_wqe_ba_l = (u32)(mtts[0]); + context->tx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0])); roce_set_field(context->qpc_bytes_188, QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M, @@ -3386,16 +3435,16 @@ static int hns_roce_v1_q_sqp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, addr = ROCEE_QP1C_CFG0_0_REG + hr_qp->port * sizeof(struct hns_roce_sqp_context); - context.qp1c_bytes_4 = roce_read(hr_dev, addr); - context.sq_rq_bt_l = roce_read(hr_dev, addr + 1); - context.qp1c_bytes_12 = roce_read(hr_dev, addr + 2); - context.qp1c_bytes_16 = roce_read(hr_dev, addr + 3); - context.qp1c_bytes_20 = roce_read(hr_dev, addr + 4); - context.cur_rq_wqe_ba_l = roce_read(hr_dev, addr + 5); - context.qp1c_bytes_28 = roce_read(hr_dev, addr + 6); - context.qp1c_bytes_32 = roce_read(hr_dev, addr + 7); - context.cur_sq_wqe_ba_l = roce_read(hr_dev, addr + 8); - context.qp1c_bytes_40 = roce_read(hr_dev, addr + 9); + context.qp1c_bytes_4 = cpu_to_le32(roce_read(hr_dev, addr)); + context.sq_rq_bt_l = cpu_to_le32(roce_read(hr_dev, addr + 1)); + context.qp1c_bytes_12 = cpu_to_le32(roce_read(hr_dev, addr + 2)); + context.qp1c_bytes_16 = cpu_to_le32(roce_read(hr_dev, addr + 3)); + context.qp1c_bytes_20 = cpu_to_le32(roce_read(hr_dev, addr + 4)); + context.cur_rq_wqe_ba_l = cpu_to_le32(roce_read(hr_dev, addr + 5)); + context.qp1c_bytes_28 = cpu_to_le32(roce_read(hr_dev, addr + 6)); + context.qp1c_bytes_32 = cpu_to_le32(roce_read(hr_dev, addr + 7)); + context.cur_sq_wqe_ba_l = cpu_to_le32(roce_read(hr_dev, addr + 8)); + context.qp1c_bytes_40 = cpu_to_le32(roce_read(hr_dev, addr + 9)); hr_qp->state = roce_get_field(context.qp1c_bytes_4, QP1C_BYTES_4_QP_STATE_M, @@ -3557,7 +3606,7 @@ static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->retry_cnt = roce_get_field(context->qpc_bytes_148, QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M, QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S); - qp_attr->rnr_retry = context->rnr_retry; + qp_attr->rnr_retry = (u8)context->rnr_retry; done: qp_attr->cur_qp_state = qp_attr->qp_state; @@ -3595,42 +3644,47 @@ static void hns_roce_check_sdb_status(struct hns_roce_dev *hr_dev, u32 *old_send, u32 *old_retry, u32 *tsp_st, u32 *success_flags) { + __le32 *old_send_tmp, *old_retry_tmp; u32 sdb_retry_cnt; u32 sdb_send_ptr; u32 cur_cnt, old_cnt; + __le32 tmp, tmp1; u32 send_ptr; sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); sdb_retry_cnt = roce_read(hr_dev, ROCEE_SDB_RETRY_CNT_REG); - cur_cnt = roce_get_field(sdb_send_ptr, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + tmp = cpu_to_le32(sdb_send_ptr); + tmp1 = cpu_to_le32(sdb_retry_cnt); + cur_cnt = roce_get_field(tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(sdb_retry_cnt, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, + roce_get_field(tmp1, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); + + old_send_tmp = (__le32 *)old_send; + old_retry_tmp = (__le32 *)old_retry; if (!roce_get_bit(*tsp_st, ROCEE_CNT_CLR_CE_CNT_CLR_CE_S)) { - old_cnt = roce_get_field(*old_send, + old_cnt = roce_get_field(*old_send_tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(*old_retry, + roce_get_field(*old_retry_tmp, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) *success_flags = 1; } else { - old_cnt = roce_get_field(*old_send, + old_cnt = roce_get_field(*old_send_tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S); if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) { *success_flags = 1; } else { - send_ptr = roce_get_field(*old_send, + send_ptr = roce_get_field(*old_send_tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(sdb_retry_cnt, + roce_get_field(tmp1, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); - roce_set_field(*old_send, + roce_set_field(*old_send_tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S, send_ptr); @@ -3646,11 +3700,14 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, { struct device *dev = &hr_dev->pdev->dev; u32 sdb_send_ptr, old_send; + __le32 sdb_issue_ptr_tmp; + __le32 sdb_send_ptr_tmp; u32 success_flags = 0; unsigned long end; u32 old_retry; u32 inv_cnt; u32 tsp_st; + __le32 tmp; if (*wait_stage > HNS_ROCE_V1_DB_STAGE2 || *wait_stage < HNS_ROCE_V1_DB_STAGE1) { @@ -3679,10 +3736,12 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, ROCEE_SDB_SEND_PTR_REG); } - if (roce_get_field(sdb_issue_ptr, + sdb_send_ptr_tmp = cpu_to_le32(sdb_send_ptr); + sdb_issue_ptr_tmp = cpu_to_le32(sdb_issue_ptr); + if (roce_get_field(sdb_issue_ptr_tmp, ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M, ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) == - roce_get_field(sdb_send_ptr, + roce_get_field(sdb_send_ptr_tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)) { old_send = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); @@ -3690,7 +3749,8 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, do { tsp_st = roce_read(hr_dev, ROCEE_TSP_BP_ST_REG); - if (roce_get_bit(tsp_st, + tmp = cpu_to_le32(tsp_st); + if (roce_get_bit(tmp, ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S) == 1) { *wait_stage = HNS_ROCE_V1_DB_WAIT_OK; return 0; @@ -3699,8 +3759,9 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, if (!time_before(jiffies, end)) { dev_dbg(dev, "QP(0x%lx) db process stage1 timeout when send ptr equals issue ptr.\n" "issue 0x%x send 0x%x.\n", - hr_qp->qpn, sdb_issue_ptr, - sdb_send_ptr); + hr_qp->qpn, + le32_to_cpu(sdb_issue_ptr_tmp), + le32_to_cpu(sdb_send_ptr_tmp)); return 0; } @@ -4102,9 +4163,9 @@ static void hns_roce_v1_cq_err_handle(struct hns_roce_dev *hr_dev, struct device *dev = &hr_dev->pdev->dev; u32 cqn; - cqn = le32_to_cpu(roce_get_field(aeqe->event.cq_event.cq, + cqn = roce_get_field(aeqe->event.cq_event.cq, HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M, - HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S)); + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: @@ -4340,6 +4401,7 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) u32 aeshift_val; u32 ceshift_val; u32 cemask_val; + __le32 tmp; int i; /* @@ -4348,30 +4410,34 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) * interrupt, mask irq, clear irq, cancel mask operation */ aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG); + tmp = cpu_to_le32(aeshift_val); /* AEQE overflow */ - if (roce_get_bit(aeshift_val, + if (roce_get_bit(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S) == 1) { dev_warn(dev, "AEQ overflow!\n"); /* Set mask */ caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); - roce_set_bit(caepaemask_val, - ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, + tmp = cpu_to_le32(caepaemask_val); + roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, HNS_ROCE_INT_MASK_ENABLE); + caepaemask_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val); /* Clear int state(INT_WC : write 1 clear) */ caepaest_val = roce_read(hr_dev, ROCEE_CAEP_AE_ST_REG); - roce_set_bit(caepaest_val, - ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1); + tmp = cpu_to_le32(caepaest_val); + roce_set_bit(tmp, ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1); + caepaest_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_AE_ST_REG, caepaest_val); /* Clear mask */ caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); - roce_set_bit(caepaemask_val, - ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, + tmp = cpu_to_le32(caepaemask_val); + roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, HNS_ROCE_INT_MASK_DISABLE); + caepaemask_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val); } @@ -4379,8 +4445,9 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) { ceshift_val = roce_read(hr_dev, ROCEE_CAEP_CEQC_SHIFT_0_REG + i * CEQ_REG_OFFSET); + tmp = cpu_to_le32(ceshift_val); - if (roce_get_bit(ceshift_val, + if (roce_get_bit(tmp, ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S) == 1) { dev_warn(dev, "CEQ[%d] almost overflow!\n", i); int_work++; @@ -4389,9 +4456,11 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) cemask_val = roce_read(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + i * CEQ_REG_OFFSET); - roce_set_bit(cemask_val, + tmp = cpu_to_le32(cemask_val); + roce_set_bit(tmp, ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S, HNS_ROCE_INT_MASK_ENABLE); + cemask_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + i * CEQ_REG_OFFSET, cemask_val); @@ -4399,9 +4468,11 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) cealmovf_val = roce_read(hr_dev, ROCEE_CAEP_CEQ_ALM_OVF_0_REG + i * CEQ_REG_OFFSET); - roce_set_bit(cealmovf_val, + tmp = cpu_to_le32(cealmovf_val); + roce_set_bit(tmp, ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S, 1); + cealmovf_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_CEQ_ALM_OVF_0_REG + i * CEQ_REG_OFFSET, cealmovf_val); @@ -4409,9 +4480,11 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) cemask_val = roce_read(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + i * CEQ_REG_OFFSET); - roce_set_bit(cemask_val, + tmp = cpu_to_le32(cemask_val); + roce_set_bit(tmp, ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S, HNS_ROCE_INT_MASK_DISABLE); + cemask_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + i * CEQ_REG_OFFSET, cemask_val); } @@ -4435,13 +4508,16 @@ static void hns_roce_v1_int_mask_enable(struct hns_roce_dev *hr_dev) { u32 aemask_val; int masken = 0; + __le32 tmp; int i; /* AEQ INT */ aemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); - roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, + tmp = cpu_to_le32(aemask_val); + roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, masken); - roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken); + roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken); + aemask_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, aemask_val); /* CEQ INT */ @@ -4473,20 +4549,24 @@ static void hns_roce_v1_enable_eq(struct hns_roce_dev *hr_dev, int eq_num, int enable_flag) { void __iomem *eqc = hr_dev->eq_table.eqc_base[eq_num]; + __le32 tmp; u32 val; val = readl(eqc); + tmp = cpu_to_le32(val); if (enable_flag) - roce_set_field(val, + roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, HNS_ROCE_EQ_STAT_VALID); else - roce_set_field(val, + roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, HNS_ROCE_EQ_STAT_INVALID); + + val = le32_to_cpu(tmp); writel(val, eqc); } @@ -4499,6 +4579,9 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev, u32 eqconsindx_val = 0; u32 eqcuridx_val = 0; u32 eqshift_val = 0; + __le32 tmp2 = 0; + __le32 tmp1 = 0; + __le32 tmp = 0; int num_bas; int ret; int i; @@ -4530,14 +4613,13 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev, memset(eq->buf_list[i].buf, 0, HNS_ROCE_BA_SIZE); } eq->cons_index = 0; - roce_set_field(eqshift_val, - ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, + roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, HNS_ROCE_EQ_STAT_INVALID); - roce_set_field(eqshift_val, - ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M, + roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S, eq->log_entries); + eqshift_val = le32_to_cpu(tmp); writel(eqshift_val, eqc); /* Configure eq extended address 12~44bit */ @@ -4549,18 +4631,18 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev, * using 4K page, and shift more 32 because of * caculating the high 32 bit value evaluated to hardware. */ - roce_set_field(eqcuridx_val, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M, + roce_set_field(tmp1, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S, eq->buf_list[0].map >> 44); - roce_set_field(eqcuridx_val, - ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M, + roce_set_field(tmp1, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S, 0); + eqcuridx_val = le32_to_cpu(tmp1); writel(eqcuridx_val, eqc + 8); /* Configure eq consumer index */ - roce_set_field(eqconsindx_val, - ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M, + roce_set_field(tmp2, ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M, ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S, 0); + eqconsindx_val = le32_to_cpu(tmp2); writel(eqconsindx_val, eqc + 0xc); return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index e9a2717ea7cd..66440147d9eb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -260,7 +260,7 @@ struct hns_roce_cqe { __le32 cqe_byte_4; union { __le32 r_key; - __be32 immediate_data; + __le32 immediate_data; }; __le32 byte_cnt; __le32 cqe_byte_16; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index a6e11be0ea0f..0218c0f8c2a7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -36,6 +36,7 @@ #include <linux/kernel.h> #include <linux/types.h> #include <net/addrconf.h> +#include <rdma/ib_addr.h> #include <rdma/ib_umem.h> #include "hnae3.h" @@ -53,7 +54,7 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, dseg->len = cpu_to_le32(sg->length); } -static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr, +static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, unsigned int *sge_ind) { struct hns_roce_v2_wqe_data_seg *dseg; @@ -100,10 +101,10 @@ static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr, } } -static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr, +static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, void *wqe, unsigned int *sge_ind, - struct ib_send_wr **bad_wr) + const struct ib_send_wr **bad_wr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_v2_wqe_data_seg *dseg = wqe; @@ -164,23 +165,30 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr, return 0; } -static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, + int attr_mask, enum ib_qp_state cur_state, + enum ib_qp_state new_state); + +static int hns_roce_v2_post_send(struct ib_qp *ibqp, + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah); struct hns_roce_v2_ud_send_wqe *ud_sq_wqe; struct hns_roce_v2_rc_send_wqe *rc_sq_wqe; struct hns_roce_qp *qp = to_hr_qp(ibqp); - struct hns_roce_v2_wqe_data_seg *dseg; struct device *dev = hr_dev->dev; struct hns_roce_v2_db sq_db; + struct ib_qp_attr attr; unsigned int sge_ind = 0; unsigned int owner_bit; unsigned long flags; unsigned int ind; void *wqe = NULL; bool loopback; + int attr_mask; u32 tmp_len; int ret = 0; u8 *smac; @@ -273,7 +281,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_SEND_WITH_IMM: case IB_WR_RDMA_WRITE_WITH_IMM: - ud_sq_wqe->immtdata = wr->ex.imm_data; + ud_sq_wqe->immtdata = + cpu_to_le32(be32_to_cpu(wr->ex.imm_data)); break; default: ud_sq_wqe->immtdata = 0; @@ -330,14 +339,13 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, V2_UD_SEND_WQE_BYTE_36_TCLASS_S, - 0); - roce_set_field(ud_sq_wqe->byte_36, - V2_UD_SEND_WQE_BYTE_36_TCLASS_M, - V2_UD_SEND_WQE_BYTE_36_TCLASS_S, - 0); + ah->av.sl_tclass_flowlabel >> + HNS_ROCE_TCLASS_SHIFT); roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, - V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, 0); + V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, + ah->av.sl_tclass_flowlabel & + HNS_ROCE_FLOW_LABEL_MASK); roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, V2_UD_SEND_WQE_BYTE_40_SL_S, @@ -371,7 +379,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_SEND_WITH_IMM: case IB_WR_RDMA_WRITE_WITH_IMM: - rc_sq_wqe->immtdata = wr->ex.imm_data; + rc_sq_wqe->immtdata = + cpu_to_le32(be32_to_cpu(wr->ex.imm_data)); break; case IB_WR_SEND_WITH_INV: rc_sq_wqe->inv_key = @@ -485,7 +494,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } wqe += sizeof(struct hns_roce_v2_rc_send_wqe); - dseg = wqe; ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe, &sge_ind, bad_wr); @@ -523,6 +531,19 @@ out: qp->sq_next_wqe = ind; qp->next_sge = sge_ind; + + if (qp->state == IB_QPS_ERR) { + attr_mask = IB_QP_STATE; + attr.qp_state = IB_QPS_ERR; + + ret = hns_roce_v2_modify_qp(&qp->ibqp, &attr, attr_mask, + qp->state, IB_QPS_ERR); + if (ret) { + spin_unlock_irqrestore(&qp->sq.lock, flags); + *bad_wr = wr; + return ret; + } + } } spin_unlock_irqrestore(&qp->sq.lock, flags); @@ -530,16 +551,19 @@ out: return ret; } -static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +static int hns_roce_v2_post_recv(struct ib_qp *ibqp, + const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct hns_roce_v2_wqe_data_seg *dseg; struct hns_roce_rinl_sge *sge_list; struct device *dev = hr_dev->dev; + struct ib_qp_attr attr; unsigned long flags; void *wqe = NULL; + int attr_mask; int ret = 0; int nreq; int ind; @@ -608,6 +632,20 @@ out: wmb(); *hr_qp->rdb.db_record = hr_qp->rq.head & 0xffff; + + if (hr_qp->state == IB_QPS_ERR) { + attr_mask = IB_QP_STATE; + attr.qp_state = IB_QPS_ERR; + + ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, &attr, + attr_mask, hr_qp->state, + IB_QPS_ERR); + if (ret) { + spin_unlock_irqrestore(&hr_qp->rq.lock, flags); + *bad_wr = wr; + return ret; + } + } } spin_unlock_irqrestore(&hr_qp->rq.lock, flags); @@ -702,8 +740,8 @@ static int hns_roce_v2_cmq_init(struct hns_roce_dev *hr_dev) int ret; /* Setup the queue entries for command queue */ - priv->cmq.csq.desc_num = 1024; - priv->cmq.crq.desc_num = 1024; + priv->cmq.csq.desc_num = CMD_CSQ_DESC_NUM; + priv->cmq.crq.desc_num = CMD_CRQ_DESC_NUM; /* Setup the lock for command queue */ spin_lock_init(&priv->cmq.csq.lock); @@ -925,7 +963,8 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev) static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) { struct hns_roce_cmq_desc desc[2]; - struct hns_roce_pf_res *res; + struct hns_roce_pf_res_a *req_a; + struct hns_roce_pf_res_b *req_b; int ret; int i; @@ -943,21 +982,26 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) if (ret) return ret; - res = (struct hns_roce_pf_res *)desc[0].data; + req_a = (struct hns_roce_pf_res_a *)desc[0].data; + req_b = (struct hns_roce_pf_res_b *)desc[1].data; - hr_dev->caps.qpc_bt_num = roce_get_field(res->qpc_bt_idx_num, + hr_dev->caps.qpc_bt_num = roce_get_field(req_a->qpc_bt_idx_num, PF_RES_DATA_1_PF_QPC_BT_NUM_M, PF_RES_DATA_1_PF_QPC_BT_NUM_S); - hr_dev->caps.srqc_bt_num = roce_get_field(res->srqc_bt_idx_num, + hr_dev->caps.srqc_bt_num = roce_get_field(req_a->srqc_bt_idx_num, PF_RES_DATA_2_PF_SRQC_BT_NUM_M, PF_RES_DATA_2_PF_SRQC_BT_NUM_S); - hr_dev->caps.cqc_bt_num = roce_get_field(res->cqc_bt_idx_num, + hr_dev->caps.cqc_bt_num = roce_get_field(req_a->cqc_bt_idx_num, PF_RES_DATA_3_PF_CQC_BT_NUM_M, PF_RES_DATA_3_PF_CQC_BT_NUM_S); - hr_dev->caps.mpt_bt_num = roce_get_field(res->mpt_bt_idx_num, + hr_dev->caps.mpt_bt_num = roce_get_field(req_a->mpt_bt_idx_num, PF_RES_DATA_4_PF_MPT_BT_NUM_M, PF_RES_DATA_4_PF_MPT_BT_NUM_S); + hr_dev->caps.sl_num = roce_get_field(req_b->qid_idx_sl_num, + PF_RES_DATA_3_PF_SL_NUM_M, + PF_RES_DATA_3_PF_SL_NUM_S); + return 0; } @@ -1203,12 +1247,14 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->eqe_ba_pg_sz = 0; caps->eqe_buf_pg_sz = 0; caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM; + caps->tsq_buf_pg_sz = 0; caps->chunk_sz = HNS_ROCE_V2_TABLE_CHUNK_SIZE; caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR | HNS_ROCE_CAP_FLAG_ROCE_V1_V2 | HNS_ROCE_CAP_FLAG_RQ_INLINE | - HNS_ROCE_CAP_FLAG_RECORD_DB; + HNS_ROCE_CAP_FLAG_RECORD_DB | + HNS_ROCE_CAP_FLAG_SQ_RECORD_DB; caps->pkey_table_len[0] = 1; caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM; @@ -1224,6 +1270,228 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) return ret; } +static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev, + enum hns_roce_link_table_type type) +{ + struct hns_roce_cmq_desc desc[2]; + struct hns_roce_cfg_llm_a *req_a = + (struct hns_roce_cfg_llm_a *)desc[0].data; + struct hns_roce_cfg_llm_b *req_b = + (struct hns_roce_cfg_llm_b *)desc[1].data; + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_link_table *link_tbl; + struct hns_roce_link_table_entry *entry; + enum hns_roce_opcode_type opcode; + u32 page_num; + int i; + + switch (type) { + case TSQ_LINK_TABLE: + link_tbl = &priv->tsq; + opcode = HNS_ROCE_OPC_CFG_EXT_LLM; + break; + case TPQ_LINK_TABLE: + link_tbl = &priv->tpq; + opcode = HNS_ROCE_OPC_CFG_TMOUT_LLM; + break; + default: + return -EINVAL; + } + + page_num = link_tbl->npages; + entry = link_tbl->table.buf; + memset(req_a, 0, sizeof(*req_a)); + memset(req_b, 0, sizeof(*req_b)); + + for (i = 0; i < 2; i++) { + hns_roce_cmq_setup_basic_desc(&desc[i], opcode, false); + + if (i == 0) + desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + else + desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + + if (i == 0) { + req_a->base_addr_l = link_tbl->table.map & 0xffffffff; + req_a->base_addr_h = (link_tbl->table.map >> 32) & + 0xffffffff; + roce_set_field(req_a->depth_pgsz_init_en, + CFG_LLM_QUE_DEPTH_M, + CFG_LLM_QUE_DEPTH_S, + link_tbl->npages); + roce_set_field(req_a->depth_pgsz_init_en, + CFG_LLM_QUE_PGSZ_M, + CFG_LLM_QUE_PGSZ_S, + link_tbl->pg_sz); + req_a->head_ba_l = entry[0].blk_ba0; + req_a->head_ba_h_nxtptr = entry[0].blk_ba1_nxt_ptr; + roce_set_field(req_a->head_ptr, + CFG_LLM_HEAD_PTR_M, + CFG_LLM_HEAD_PTR_S, 0); + } else { + req_b->tail_ba_l = entry[page_num - 1].blk_ba0; + roce_set_field(req_b->tail_ba_h, + CFG_LLM_TAIL_BA_H_M, + CFG_LLM_TAIL_BA_H_S, + entry[page_num - 1].blk_ba1_nxt_ptr & + HNS_ROCE_LINK_TABLE_BA1_M); + roce_set_field(req_b->tail_ptr, + CFG_LLM_TAIL_PTR_M, + CFG_LLM_TAIL_PTR_S, + (entry[page_num - 2].blk_ba1_nxt_ptr & + HNS_ROCE_LINK_TABLE_NXT_PTR_M) >> + HNS_ROCE_LINK_TABLE_NXT_PTR_S); + } + } + roce_set_field(req_a->depth_pgsz_init_en, + CFG_LLM_INIT_EN_M, CFG_LLM_INIT_EN_S, 1); + + return hns_roce_cmq_send(hr_dev, desc, 2); +} + +static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev, + enum hns_roce_link_table_type type) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_link_table *link_tbl; + struct hns_roce_link_table_entry *entry; + struct device *dev = hr_dev->dev; + u32 buf_chk_sz; + dma_addr_t t; + int func_num = 1; + int pg_num_a; + int pg_num_b; + int pg_num; + int size; + int i; + + switch (type) { + case TSQ_LINK_TABLE: + link_tbl = &priv->tsq; + buf_chk_sz = 1 << (hr_dev->caps.tsq_buf_pg_sz + PAGE_SHIFT); + pg_num_a = hr_dev->caps.num_qps * 8 / buf_chk_sz; + pg_num_b = hr_dev->caps.sl_num * 4 + 2; + break; + case TPQ_LINK_TABLE: + link_tbl = &priv->tpq; + buf_chk_sz = 1 << (hr_dev->caps.tpq_buf_pg_sz + PAGE_SHIFT); + pg_num_a = hr_dev->caps.num_cqs * 4 / buf_chk_sz; + pg_num_b = 2 * 4 * func_num + 2; + break; + default: + return -EINVAL; + } + + pg_num = max(pg_num_a, pg_num_b); + size = pg_num * sizeof(struct hns_roce_link_table_entry); + + link_tbl->table.buf = dma_alloc_coherent(dev, size, + &link_tbl->table.map, + GFP_KERNEL); + if (!link_tbl->table.buf) + goto out; + + link_tbl->pg_list = kcalloc(pg_num, sizeof(*link_tbl->pg_list), + GFP_KERNEL); + if (!link_tbl->pg_list) + goto err_kcalloc_failed; + + entry = link_tbl->table.buf; + for (i = 0; i < pg_num; ++i) { + link_tbl->pg_list[i].buf = dma_alloc_coherent(dev, buf_chk_sz, + &t, GFP_KERNEL); + if (!link_tbl->pg_list[i].buf) + goto err_alloc_buf_failed; + + link_tbl->pg_list[i].map = t; + memset(link_tbl->pg_list[i].buf, 0, buf_chk_sz); + + entry[i].blk_ba0 = (t >> 12) & 0xffffffff; + roce_set_field(entry[i].blk_ba1_nxt_ptr, + HNS_ROCE_LINK_TABLE_BA1_M, + HNS_ROCE_LINK_TABLE_BA1_S, + t >> 44); + + if (i < (pg_num - 1)) + roce_set_field(entry[i].blk_ba1_nxt_ptr, + HNS_ROCE_LINK_TABLE_NXT_PTR_M, + HNS_ROCE_LINK_TABLE_NXT_PTR_S, + i + 1); + } + link_tbl->npages = pg_num; + link_tbl->pg_sz = buf_chk_sz; + + return hns_roce_config_link_table(hr_dev, type); + +err_alloc_buf_failed: + for (i -= 1; i >= 0; i--) + dma_free_coherent(dev, buf_chk_sz, + link_tbl->pg_list[i].buf, + link_tbl->pg_list[i].map); + kfree(link_tbl->pg_list); + +err_kcalloc_failed: + dma_free_coherent(dev, size, link_tbl->table.buf, + link_tbl->table.map); + +out: + return -ENOMEM; +} + +static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev, + struct hns_roce_link_table *link_tbl) +{ + struct device *dev = hr_dev->dev; + int size; + int i; + + size = link_tbl->npages * sizeof(struct hns_roce_link_table_entry); + + for (i = 0; i < link_tbl->npages; ++i) + if (link_tbl->pg_list[i].buf) + dma_free_coherent(dev, link_tbl->pg_sz, + link_tbl->pg_list[i].buf, + link_tbl->pg_list[i].map); + kfree(link_tbl->pg_list); + + dma_free_coherent(dev, size, link_tbl->table.buf, + link_tbl->table.map); +} + +static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + int ret; + + /* TSQ includes SQ doorbell and ack doorbell */ + ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE); + if (ret) { + dev_err(hr_dev->dev, "TSQ init failed, ret = %d.\n", ret); + return ret; + } + + ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE); + if (ret) { + dev_err(hr_dev->dev, "TPQ init failed, ret = %d.\n", ret); + goto err_tpq_init_failed; + } + + return 0; + +err_tpq_init_failed: + hns_roce_free_link_table(hr_dev, &priv->tsq); + + return ret; +} + +static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + + hns_roce_free_link_table(hr_dev, &priv->tpq); + hns_roce_free_link_table(hr_dev, &priv->tsq); +} + static int hns_roce_v2_cmd_pending(struct hns_roce_dev *hr_dev) { u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG); @@ -1307,13 +1575,45 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, return 0; } +static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev, + int gid_index, const union ib_gid *gid, + enum hns_roce_sgid_type sgid_type) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_cfg_sgid_tb *sgid_tb = + (struct hns_roce_cfg_sgid_tb *)desc.data; + u32 *p; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false); + + roce_set_field(sgid_tb->table_idx_rsv, + CFG_SGID_TB_TABLE_IDX_M, + CFG_SGID_TB_TABLE_IDX_S, gid_index); + roce_set_field(sgid_tb->vf_sgid_type_rsv, + CFG_SGID_TB_VF_SGID_TYPE_M, + CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type); + + p = (u32 *)&gid->raw[0]; + sgid_tb->vf_sgid_l = cpu_to_le32(*p); + + p = (u32 *)&gid->raw[4]; + sgid_tb->vf_sgid_ml = cpu_to_le32(*p); + + p = (u32 *)&gid->raw[8]; + sgid_tb->vf_sgid_mh = cpu_to_le32(*p); + + p = (u32 *)&gid->raw[0xc]; + sgid_tb->vf_sgid_h = cpu_to_le32(*p); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +} + static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, - int gid_index, union ib_gid *gid, + int gid_index, const union ib_gid *gid, const struct ib_gid_attr *attr) { enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1; - u32 *p; - u32 val; + int ret; if (!gid || !attr) return -EINVAL; @@ -1328,49 +1628,37 @@ static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6; } - p = (u32 *)&gid->raw[0]; - roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG0_REG + - 0x20 * gid_index); - - p = (u32 *)&gid->raw[4]; - roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG1_REG + - 0x20 * gid_index); - - p = (u32 *)&gid->raw[8]; - roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG2_REG + - 0x20 * gid_index); - - p = (u32 *)&gid->raw[0xc]; - roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG3_REG + - 0x20 * gid_index); - - val = roce_read(hr_dev, ROCEE_VF_SGID_CFG4_REG + 0x20 * gid_index); - roce_set_field(val, ROCEE_VF_SGID_CFG4_SGID_TYPE_M, - ROCEE_VF_SGID_CFG4_SGID_TYPE_S, sgid_type); - - roce_write(hr_dev, ROCEE_VF_SGID_CFG4_REG + 0x20 * gid_index, val); + ret = hns_roce_config_sgid_table(hr_dev, gid_index, gid, sgid_type); + if (ret) + dev_err(hr_dev->dev, "Configure sgid table failed(%d)!\n", ret); - return 0; + return ret; } static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr) { + struct hns_roce_cmq_desc desc; + struct hns_roce_cfg_smac_tb *smac_tb = + (struct hns_roce_cfg_smac_tb *)desc.data; u16 reg_smac_h; u32 reg_smac_l; - u32 val; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SMAC_TB, false); reg_smac_l = *(u32 *)(&addr[0]); - roce_raw_write(reg_smac_l, hr_dev->reg_base + ROCEE_VF_SMAC_CFG0_REG + - 0x08 * phy_port); - val = roce_read(hr_dev, ROCEE_VF_SMAC_CFG1_REG + 0x08 * phy_port); + reg_smac_h = *(u16 *)(&addr[4]); - reg_smac_h = *(u16 *)(&addr[4]); - roce_set_field(val, ROCEE_VF_SMAC_CFG1_VF_SMAC_H_M, - ROCEE_VF_SMAC_CFG1_VF_SMAC_H_S, reg_smac_h); - roce_write(hr_dev, ROCEE_VF_SMAC_CFG1_REG + 0x08 * phy_port, val); + memset(smac_tb, 0, sizeof(*smac_tb)); + roce_set_field(smac_tb->tb_idx_rsv, + CFG_SMAC_TB_IDX_M, + CFG_SMAC_TB_IDX_S, phy_port); + roce_set_field(smac_tb->vf_smac_h_rsv, + CFG_SMAC_TB_VF_SMAC_H_M, + CFG_SMAC_TB_VF_SMAC_H_S, reg_smac_h); + smac_tb->vf_smac_l = reg_smac_l; - return 0; + return hns_roce_cmq_send(hr_dev, &desc, 1); } static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, @@ -1758,6 +2046,8 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, struct hns_roce_v2_cqe *cqe; struct hns_roce_qp *hr_qp; struct hns_roce_wq *wq; + struct ib_qp_attr attr; + int attr_mask; int is_send; u16 wqe_ctr; u32 opcode; @@ -1844,8 +2134,17 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, break; } - /* CQE status error, directly return */ - if (wc->status != IB_WC_SUCCESS) + /* flush cqe if wc status is error, excluding flush error */ + if ((wc->status != IB_WC_SUCCESS) && + (wc->status != IB_WC_WR_FLUSH_ERR)) { + attr_mask = IB_QP_STATE; + attr.qp_state = IB_QPS_ERR; + return hns_roce_v2_modify_qp(&(*cur_qp)->ibqp, + &attr, attr_mask, + (*cur_qp)->state, IB_QPS_ERR); + } + + if (wc->status == IB_WC_WR_FLUSH_ERR) return 0; if (is_send) { @@ -1931,7 +2230,8 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, case HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM: wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; wc->wc_flags = IB_WC_WITH_IMM; - wc->ex.imm_data = cqe->immtdata; + wc->ex.imm_data = + cpu_to_be32(le32_to_cpu(cqe->immtdata)); break; case HNS_ROCE_V2_OPCODE_SEND: wc->opcode = IB_WC_RECV; @@ -1940,7 +2240,8 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, case HNS_ROCE_V2_OPCODE_SEND_WITH_IMM: wc->opcode = IB_WC_RECV; wc->wc_flags = IB_WC_WITH_IMM; - wc->ex.imm_data = cqe->immtdata; + wc->ex.imm_data = + cpu_to_be32(le32_to_cpu(cqe->immtdata)); break; case HNS_ROCE_V2_OPCODE_SEND_WITH_INV: wc->opcode = IB_WC_RECV; @@ -2273,10 +2574,10 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0); /* No VLAN need to set 0xFFF */ - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_IDX_M, - V2_QPC_BYTE_24_VLAN_IDX_S, 0xfff); - roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_IDX_M, - V2_QPC_BYTE_24_VLAN_IDX_S, 0); + roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, + V2_QPC_BYTE_24_VLAN_ID_S, 0xfff); + roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, + V2_QPC_BYTE_24_VLAN_ID_S, 0); /* * Set some fields in context to zero, Because the default values @@ -2886,21 +3187,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, V2_QPC_BYTE_56_LP_PKTN_INI_S, 0); - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, - V2_QPC_BYTE_24_HOP_LIMIT_S, grh->hop_limit); - roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, - V2_QPC_BYTE_24_HOP_LIMIT_S, 0); - - roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, - V2_QPC_BYTE_28_FL_S, grh->flow_label); - roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, - V2_QPC_BYTE_28_FL_S, 0); - - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, - V2_QPC_BYTE_24_TC_S, grh->traffic_class); - roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, - V2_QPC_BYTE_24_TC_S, 0); - if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD) roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, V2_QPC_BYTE_24_MTU_S, IB_MTU_4096); @@ -2911,9 +3197,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, V2_QPC_BYTE_24_MTU_S, 0); - memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); - memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw)); - roce_set_field(context->byte_84_rq_ci_pi, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, hr_qp->rq.head); @@ -2952,12 +3235,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_168_LP_SGEN_INI_M, V2_QPC_BYTE_168_LP_SGEN_INI_S, 0); - roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, - V2_QPC_BYTE_28_SL_S, rdma_ah_get_sl(&attr->ah_attr)); - roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, - V2_QPC_BYTE_28_SL_S, 0); - hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); - return 0; } @@ -3135,13 +3412,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, V2_QPC_BYTE_28_AT_S, 0); } - roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, - V2_QPC_BYTE_28_SL_S, - rdma_ah_get_sl(&attr->ah_attr)); - roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, - V2_QPC_BYTE_28_SL_S, 0); - hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); - roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M, V2_QPC_BYTE_172_SQ_CUR_PSN_S, attr->sq_psn); roce_set_field(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M, @@ -3224,9 +3494,114 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, ; } else { dev_err(dev, "Illegal state for QP!\n"); + ret = -EINVAL; goto out; } + /* When QP state is err, SQ and RQ WQE should be flushed */ + if (new_state == IB_QPS_ERR) { + roce_set_field(context->byte_160_sq_ci_pi, + V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M, + V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, + hr_qp->sq.head); + roce_set_field(qpc_mask->byte_160_sq_ci_pi, + V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M, + V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0); + roce_set_field(context->byte_84_rq_ci_pi, + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, + hr_qp->rq.head); + roce_set_field(qpc_mask->byte_84_rq_ci_pi, + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0); + } + + if (attr_mask & IB_QP_AV) { + const struct ib_global_route *grh = + rdma_ah_read_grh(&attr->ah_attr); + const struct ib_gid_attr *gid_attr = NULL; + u8 src_mac[ETH_ALEN]; + int is_roce_protocol; + u16 vlan = 0xffff; + u8 ib_port; + u8 hr_port; + + ib_port = (attr_mask & IB_QP_PORT) ? attr->port_num : + hr_qp->port + 1; + hr_port = ib_port - 1; + is_roce_protocol = rdma_cap_eth_ah(&hr_dev->ib_dev, ib_port) && + rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH; + + if (is_roce_protocol) { + gid_attr = attr->ah_attr.grh.sgid_attr; + vlan = rdma_vlan_dev_vlan_id(gid_attr->ndev); + memcpy(src_mac, gid_attr->ndev->dev_addr, ETH_ALEN); + } + + roce_set_field(context->byte_24_mtu_tc, + V2_QPC_BYTE_24_VLAN_ID_M, + V2_QPC_BYTE_24_VLAN_ID_S, vlan); + roce_set_field(qpc_mask->byte_24_mtu_tc, + V2_QPC_BYTE_24_VLAN_ID_M, + V2_QPC_BYTE_24_VLAN_ID_S, 0); + + if (grh->sgid_index >= hr_dev->caps.gid_table_len[hr_port]) { + dev_err(hr_dev->dev, + "sgid_index(%u) too large. max is %d\n", + grh->sgid_index, + hr_dev->caps.gid_table_len[hr_port]); + ret = -EINVAL; + goto out; + } + + if (attr->ah_attr.type != RDMA_AH_ATTR_TYPE_ROCE) { + dev_err(hr_dev->dev, "ah attr is not RDMA roce type\n"); + ret = -EINVAL; + goto out; + } + + roce_set_field(context->byte_52_udpspn_dmac, + V2_QPC_BYTE_52_UDPSPN_M, V2_QPC_BYTE_52_UDPSPN_S, + (gid_attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) ? + 0 : 0x12b7); + + roce_set_field(qpc_mask->byte_52_udpspn_dmac, + V2_QPC_BYTE_52_UDPSPN_M, + V2_QPC_BYTE_52_UDPSPN_S, 0); + + roce_set_field(context->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SGID_IDX_M, + V2_QPC_BYTE_20_SGID_IDX_S, grh->sgid_index); + + roce_set_field(qpc_mask->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SGID_IDX_M, + V2_QPC_BYTE_20_SGID_IDX_S, 0); + + roce_set_field(context->byte_24_mtu_tc, + V2_QPC_BYTE_24_HOP_LIMIT_M, + V2_QPC_BYTE_24_HOP_LIMIT_S, grh->hop_limit); + roce_set_field(qpc_mask->byte_24_mtu_tc, + V2_QPC_BYTE_24_HOP_LIMIT_M, + V2_QPC_BYTE_24_HOP_LIMIT_S, 0); + + roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, + V2_QPC_BYTE_24_TC_S, grh->traffic_class); + roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, + V2_QPC_BYTE_24_TC_S, 0); + roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, + V2_QPC_BYTE_28_FL_S, grh->flow_label); + roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, + V2_QPC_BYTE_28_FL_S, 0); + memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); + memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw)); + roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, + V2_QPC_BYTE_28_SL_S, + rdma_ah_get_sl(&attr->ah_attr)); + roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, + V2_QPC_BYTE_28_SL_S, 0); + hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); + } + if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask); @@ -3497,6 +3872,11 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); if (is_user) { + if (hr_qp->sq.wqe_cnt && (hr_qp->sdb_en == 1)) + hns_roce_db_unmap_user( + to_hr_ucontext(hr_qp->ibqp.uobject->context), + &hr_qp->sdb); + if (hr_qp->rq.wqe_cnt && (hr_qp->rdb_en == 1)) hns_roce_db_unmap_user( to_hr_ucontext(hr_qp->ibqp.uobject->context), @@ -3579,6 +3959,74 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) return ret; } +static void hns_roce_set_qps_to_err(struct hns_roce_dev *hr_dev, u32 qpn) +{ + struct hns_roce_qp *hr_qp; + struct ib_qp_attr attr; + int attr_mask; + int ret; + + hr_qp = __hns_roce_qp_lookup(hr_dev, qpn); + if (!hr_qp) { + dev_warn(hr_dev->dev, "no hr_qp can be found!\n"); + return; + } + + if (hr_qp->ibqp.uobject) { + if (hr_qp->sdb_en == 1) { + hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr); + hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr); + } else { + dev_warn(hr_dev->dev, "flush cqe is unsupported in userspace!\n"); + return; + } + } + + attr_mask = IB_QP_STATE; + attr.qp_state = IB_QPS_ERR; + ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, &attr, attr_mask, + hr_qp->state, IB_QPS_ERR); + if (ret) + dev_err(hr_dev->dev, "failed to modify qp %d to err state.\n", + qpn); +} + +static void hns_roce_irq_work_handle(struct work_struct *work) +{ + struct hns_roce_work *irq_work = + container_of(work, struct hns_roce_work, work); + u32 qpn = irq_work->qpn; + + switch (irq_work->event_type) { + case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: + case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: + case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: + hns_roce_set_qps_to_err(irq_work->hr_dev, qpn); + break; + default: + break; + } + + kfree(irq_work); +} + +static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, + struct hns_roce_eq *eq, u32 qpn) +{ + struct hns_roce_work *irq_work; + + irq_work = kzalloc(sizeof(struct hns_roce_work), GFP_ATOMIC); + if (!irq_work) + return; + + INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle); + irq_work->hr_dev = hr_dev; + irq_work->qpn = qpn; + irq_work->event_type = eq->event_type; + irq_work->sub_type = eq->sub_type; + queue_work(hr_dev->irq_workq, &(irq_work->work)); +} + static void set_eq_cons_index_v2(struct hns_roce_eq *eq) { u32 doorbell[2]; @@ -3681,14 +4129,9 @@ static void hns_roce_v2_local_wq_access_err_handle(struct hns_roce_dev *hr_dev, static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev, struct hns_roce_aeqe *aeqe, - int event_type) + int event_type, u32 qpn) { struct device *dev = hr_dev->dev; - u32 qpn; - - qpn = roce_get_field(aeqe->event.qp_event.qp, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_COMM_EST: @@ -3715,14 +4158,9 @@ static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev, static void hns_roce_v2_cq_err_handle(struct hns_roce_dev *hr_dev, struct hns_roce_aeqe *aeqe, - int event_type) + int event_type, u32 cqn) { struct device *dev = hr_dev->dev; - u32 cqn; - - cqn = roce_get_field(aeqe->event.cq_event.cq, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: @@ -3787,6 +4225,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_aeqe *aeqe; int aeqe_found = 0; int event_type; + int sub_type; + u32 qpn; + u32 cqn; while ((aeqe = next_aeqe_sw_v2(eq))) { @@ -3798,6 +4239,15 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, event_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_EVENT_TYPE_M, HNS_ROCE_V2_AEQE_EVENT_TYPE_S); + sub_type = roce_get_field(aeqe->asyn, + HNS_ROCE_V2_AEQE_SUB_TYPE_M, + HNS_ROCE_V2_AEQE_SUB_TYPE_S); + qpn = roce_get_field(aeqe->event.qp_event.qp, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); + cqn = roce_get_field(aeqe->event.cq_event.cq, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: @@ -3811,7 +4261,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: - hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type); + hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type, + qpn); break; case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: @@ -3820,7 +4271,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, break; case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: - hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type); + hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type, + cqn); break; case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: dev_warn(dev, "DB overflow.\n"); @@ -3843,6 +4295,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, break; }; + eq->event_type = event_type; + eq->sub_type = sub_type; ++eq->cons_index; aeqe_found = 1; @@ -3850,6 +4304,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, dev_warn(dev, "cons_index overflow, set back to 0.\n"); eq->cons_index = 0; } + hns_roce_v2_init_irq_work(hr_dev, eq, qpn); } set_eq_cons_index_v2(eq); @@ -4052,15 +4507,12 @@ static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev, u32 bt_chk_sz; u32 mhop_num; int eqe_alloc; - int ba_num; int i = 0; int j = 0; mhop_num = hr_dev->caps.eqe_hop_num; buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT); bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT); - ba_num = (PAGE_ALIGN(eq->entries * eq->eqe_size) + buf_chk_sz - 1) / - buf_chk_sz; /* hop_num = 0 */ if (mhop_num == HNS_ROCE_HOP_NUM_0) { @@ -4669,6 +5121,13 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) } } + hr_dev->irq_workq = + create_singlethread_workqueue("hns_roce_irq_workqueue"); + if (!hr_dev->irq_workq) { + dev_err(dev, "Create irq workqueue failed!\n"); + goto err_request_irq_fail; + } + return 0; err_request_irq_fail: @@ -4719,12 +5178,17 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev) kfree(hr_dev->irq_names[i]); kfree(eq_table->eq); + + flush_workqueue(hr_dev->irq_workq); + destroy_workqueue(hr_dev->irq_workq); } static const struct hns_roce_hw hns_roce_hw_v2 = { .cmq_init = hns_roce_v2_cmq_init, .cmq_exit = hns_roce_v2_cmq_exit, .hw_profile = hns_roce_v2_profile, + .hw_init = hns_roce_v2_init, + .hw_exit = hns_roce_v2_exit, .post_mbox = hns_roce_v2_post_mbox, .chk_mbox = hns_roce_v2_chk_mbox, .set_gid = hns_roce_v2_set_gid, @@ -4749,6 +5213,8 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0}, + {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0}, + {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0}, /* required last entry */ {0, } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index d47675f365c7..14aa308befef 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -112,6 +112,9 @@ (step_idx == 1 && hop_num == 1) || \ (step_idx == 2 && hop_num == 2)) +#define CMD_CSQ_DESC_NUM 1024 +#define CMD_CRQ_DESC_NUM 1024 + enum { NO_ARMED = 0x0, REG_NXT_CEQE = 0x2, @@ -203,6 +206,10 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004, HNS_ROCE_OPC_QUERY_PF_RES = 0x8400, HNS_ROCE_OPC_ALLOC_VF_RES = 0x8401, + HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403, + HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404, + HNS_ROCE_OPC_CFG_SGID_TB = 0x8500, + HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501, HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506, }; @@ -447,8 +454,8 @@ struct hns_roce_v2_qp_context { #define V2_QPC_BYTE_24_TC_S 8 #define V2_QPC_BYTE_24_TC_M GENMASK(15, 8) -#define V2_QPC_BYTE_24_VLAN_IDX_S 16 -#define V2_QPC_BYTE_24_VLAN_IDX_M GENMASK(27, 16) +#define V2_QPC_BYTE_24_VLAN_ID_S 16 +#define V2_QPC_BYTE_24_VLAN_ID_M GENMASK(27, 16) #define V2_QPC_BYTE_24_MTU_S 28 #define V2_QPC_BYTE_24_MTU_M GENMASK(31, 28) @@ -768,7 +775,7 @@ struct hns_roce_v2_cqe { __le32 byte_4; union { __le32 rkey; - __be32 immtdata; + __le32 immtdata; }; __le32 byte_12; __le32 byte_16; @@ -926,7 +933,7 @@ struct hns_roce_v2_cq_db { struct hns_roce_v2_ud_send_wqe { __le32 byte_4; __le32 msg_len; - __be32 immtdata; + __le32 immtdata; __le32 byte_16; __le32 byte_20; __le32 byte_24; @@ -1012,7 +1019,7 @@ struct hns_roce_v2_rc_send_wqe { __le32 msg_len; union { __le32 inv_key; - __be32 immtdata; + __le32 immtdata; }; __le32 byte_16; __le32 byte_20; @@ -1061,6 +1068,40 @@ struct hns_roce_query_version { __le32 rsv[5]; }; +struct hns_roce_cfg_llm_a { + __le32 base_addr_l; + __le32 base_addr_h; + __le32 depth_pgsz_init_en; + __le32 head_ba_l; + __le32 head_ba_h_nxtptr; + __le32 head_ptr; +}; + +#define CFG_LLM_QUE_DEPTH_S 0 +#define CFG_LLM_QUE_DEPTH_M GENMASK(12, 0) + +#define CFG_LLM_QUE_PGSZ_S 16 +#define CFG_LLM_QUE_PGSZ_M GENMASK(19, 16) + +#define CFG_LLM_INIT_EN_S 20 +#define CFG_LLM_INIT_EN_M GENMASK(20, 20) + +#define CFG_LLM_HEAD_PTR_S 0 +#define CFG_LLM_HEAD_PTR_M GENMASK(11, 0) + +struct hns_roce_cfg_llm_b { + __le32 tail_ba_l; + __le32 tail_ba_h; + __le32 tail_ptr; + __le32 rsv[3]; +}; + +#define CFG_LLM_TAIL_BA_H_S 0 +#define CFG_LLM_TAIL_BA_H_M GENMASK(19, 0) + +#define CFG_LLM_TAIL_PTR_S 0 +#define CFG_LLM_TAIL_PTR_M GENMASK(11, 0) + struct hns_roce_cfg_global_param { __le32 time_cfg_udp_port; __le32 rsv[5]; @@ -1072,7 +1113,7 @@ struct hns_roce_cfg_global_param { #define CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S 16 #define CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_M GENMASK(31, 16) -struct hns_roce_pf_res { +struct hns_roce_pf_res_a { __le32 rsv; __le32 qpc_bt_idx_num; __le32 srqc_bt_idx_num; @@ -1111,6 +1152,32 @@ struct hns_roce_pf_res { #define PF_RES_DATA_5_PF_EQC_BT_NUM_S 16 #define PF_RES_DATA_5_PF_EQC_BT_NUM_M GENMASK(25, 16) +struct hns_roce_pf_res_b { + __le32 rsv0; + __le32 smac_idx_num; + __le32 sgid_idx_num; + __le32 qid_idx_sl_num; + __le32 rsv[2]; +}; + +#define PF_RES_DATA_1_PF_SMAC_IDX_S 0 +#define PF_RES_DATA_1_PF_SMAC_IDX_M GENMASK(7, 0) + +#define PF_RES_DATA_1_PF_SMAC_NUM_S 8 +#define PF_RES_DATA_1_PF_SMAC_NUM_M GENMASK(16, 8) + +#define PF_RES_DATA_2_PF_SGID_IDX_S 0 +#define PF_RES_DATA_2_PF_SGID_IDX_M GENMASK(7, 0) + +#define PF_RES_DATA_2_PF_SGID_NUM_S 8 +#define PF_RES_DATA_2_PF_SGID_NUM_M GENMASK(16, 8) + +#define PF_RES_DATA_3_PF_QID_IDX_S 0 +#define PF_RES_DATA_3_PF_QID_IDX_M GENMASK(9, 0) + +#define PF_RES_DATA_3_PF_SL_NUM_S 16 +#define PF_RES_DATA_3_PF_SL_NUM_M GENMASK(26, 16) + struct hns_roce_vf_res_a { __le32 vf_id; __le32 vf_qpc_bt_idx_num; @@ -1179,13 +1246,6 @@ struct hns_roce_vf_res_b { #define VF_RES_B_DATA_3_VF_SL_NUM_S 16 #define VF_RES_B_DATA_3_VF_SL_NUM_M GENMASK(19, 16) -/* Reg field definition */ -#define ROCEE_VF_SMAC_CFG1_VF_SMAC_H_S 0 -#define ROCEE_VF_SMAC_CFG1_VF_SMAC_H_M GENMASK(15, 0) - -#define ROCEE_VF_SGID_CFG4_SGID_TYPE_S 0 -#define ROCEE_VF_SGID_CFG4_SGID_TYPE_M GENMASK(1, 0) - struct hns_roce_cfg_bt_attr { __le32 vf_qpc_cfg; __le32 vf_srqc_cfg; @@ -1230,6 +1290,32 @@ struct hns_roce_cfg_bt_attr { #define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_S 8 #define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_M GENMASK(9, 8) +struct hns_roce_cfg_sgid_tb { + __le32 table_idx_rsv; + __le32 vf_sgid_l; + __le32 vf_sgid_ml; + __le32 vf_sgid_mh; + __le32 vf_sgid_h; + __le32 vf_sgid_type_rsv; +}; +#define CFG_SGID_TB_TABLE_IDX_S 0 +#define CFG_SGID_TB_TABLE_IDX_M GENMASK(7, 0) + +#define CFG_SGID_TB_VF_SGID_TYPE_S 0 +#define CFG_SGID_TB_VF_SGID_TYPE_M GENMASK(1, 0) + +struct hns_roce_cfg_smac_tb { + __le32 tb_idx_rsv; + __le32 vf_smac_l; + __le32 vf_smac_h_rsv; + __le32 rsv[3]; +}; +#define CFG_SMAC_TB_IDX_S 0 +#define CFG_SMAC_TB_IDX_M GENMASK(7, 0) + +#define CFG_SMAC_TB_VF_SMAC_H_S 0 +#define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0) + struct hns_roce_cmq_desc { __le16 opcode; __le16 flag; @@ -1276,8 +1362,32 @@ struct hns_roce_v2_cmq { u16 last_status; }; +enum hns_roce_link_table_type { + TSQ_LINK_TABLE, + TPQ_LINK_TABLE, +}; + +struct hns_roce_link_table { + struct hns_roce_buf_list table; + struct hns_roce_buf_list *pg_list; + u32 npages; + u32 pg_sz; +}; + +struct hns_roce_link_table_entry { + u32 blk_ba0; + u32 blk_ba1_nxt_ptr; +}; +#define HNS_ROCE_LINK_TABLE_BA1_S 0 +#define HNS_ROCE_LINK_TABLE_BA1_M GENMASK(19, 0) + +#define HNS_ROCE_LINK_TABLE_NXT_PTR_S 20 +#define HNS_ROCE_LINK_TABLE_NXT_PTR_M GENMASK(31, 20) + struct hns_roce_v2_priv { struct hns_roce_v2_cmq cmq; + struct hns_roce_link_table tsq; + struct hns_roce_link_table tpq; }; struct hns_roce_eq_context { diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 21b901cfa2d6..c5cae9a38c04 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -74,8 +74,7 @@ static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) return hr_dev->hw->set_mac(hr_dev, phy_port, addr); } -static int hns_roce_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, void **context) +static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context) { struct hns_roce_dev *hr_dev = to_hr_dev(attr->device); u8 port = attr->port_num - 1; @@ -87,8 +86,7 @@ static int hns_roce_add_gid(const union ib_gid *gid, spin_lock_irqsave(&hr_dev->iboe.lock, flags); - ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, - (union ib_gid *)gid, attr); + ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &attr->gid, attr); spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); @@ -208,7 +206,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev, props->max_qp_wr = hr_dev->caps.max_wqes; props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_RC_RNR_NAK_GEN; - props->max_sge = max(hr_dev->caps.max_sq_sg, hr_dev->caps.max_rq_sg); + props->max_send_sge = hr_dev->caps.max_sq_sg; + props->max_recv_sge = hr_dev->caps.max_rq_sg; props->max_sge_rd = 1; props->max_cq = hr_dev->caps.num_cqs; props->max_cqe = hr_dev->caps.max_cqes; @@ -535,6 +534,9 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) (1ULL << IB_USER_VERBS_CMD_QUERY_QP) | (1ULL << IB_USER_VERBS_CMD_DESTROY_QP); + ib_dev->uverbs_ex_cmd_mask |= + (1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ); + /* HCA||device||port */ ib_dev->modify_device = hns_roce_modify_device; ib_dev->query_device = hns_roce_query_device; @@ -887,8 +889,7 @@ error_failed_cmd_init: error_failed_cmq_init: if (hr_dev->hw->reset) { - ret = hr_dev->hw->reset(hr_dev, false); - if (ret) + if (hr_dev->hw->reset(hr_dev, false)) dev_err(dev, "Dereset RoCE engine failed!\n"); } diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index b9f2c871ff9a..e11c149da04d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -37,7 +37,7 @@ static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn) { - return hns_roce_bitmap_alloc(&hr_dev->pd_bitmap, pdn); + return hns_roce_bitmap_alloc(&hr_dev->pd_bitmap, pdn) ? -ENOMEM : 0; } static void hns_roce_pd_free(struct hns_roce_dev *hr_dev, unsigned long pdn) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index baaf906f7c2e..efb7e961ca65 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -115,7 +115,10 @@ static int hns_roce_reserve_range_qp(struct hns_roce_dev *hr_dev, int cnt, { struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; - return hns_roce_bitmap_alloc_range(&qp_table->bitmap, cnt, align, base); + return hns_roce_bitmap_alloc_range(&qp_table->bitmap, cnt, align, + base) ? + -ENOMEM : + 0; } enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state) @@ -489,6 +492,14 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, return 0; } +static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr) +{ + if (attr->qp_type == IB_QPT_XRC_TGT) + return 0; + + return 1; +} + static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr) { if (attr->qp_type == IB_QPT_XRC_INI || @@ -613,6 +624,23 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, goto err_mtt; } + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) && + (udata->inlen >= sizeof(ucmd)) && + (udata->outlen >= sizeof(resp)) && + hns_roce_qp_has_sq(init_attr)) { + ret = hns_roce_db_map_user( + to_hr_ucontext(ib_pd->uobject->context), + ucmd.sdb_addr, &hr_qp->sdb); + if (ret) { + dev_err(dev, "sq record doorbell map failed!\n"); + goto err_mtt; + } + + /* indicate kernel supports sq record db */ + resp.cap_flags |= HNS_ROCE_SUPPORT_SQ_RECORD_DB; + hr_qp->sdb_en = 1; + } + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && (udata->outlen >= sizeof(resp)) && hns_roce_qp_has_rq(init_attr)) { @@ -621,7 +649,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, ucmd.db_addr, &hr_qp->rdb); if (ret) { dev_err(dev, "rq record doorbell map failed!\n"); - goto err_mtt; + goto err_sq_dbmap; } } } else { @@ -734,7 +762,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, if (ib_pd->uobject && (udata->outlen >= sizeof(resp)) && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) { - /* indicate kernel supports record db */ + /* indicate kernel supports rq record db */ resp.cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB; ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); if (ret) @@ -770,6 +798,16 @@ err_wrid: kfree(hr_qp->rq.wrid); } +err_sq_dbmap: + if (ib_pd->uobject) + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) && + (udata->inlen >= sizeof(ucmd)) && + (udata->outlen >= sizeof(resp)) && + hns_roce_qp_has_sq(init_attr)) + hns_roce_db_unmap_user( + to_hr_ucontext(ib_pd->uobject->context), + &hr_qp->sdb); + err_mtt: hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); @@ -903,6 +941,17 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + if (ibqp->uobject && + (attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) { + if (hr_qp->sdb_en == 1) { + hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr); + hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr); + } else { + dev_warn(dev, "flush cqe is not supported in userspace!\n"); + goto out; + } + } + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, IB_LINK_LAYER_ETHERNET)) { dev_err(dev, "ib_modify_qp_is_ok failed\n"); diff --git a/drivers/infiniband/hw/i40iw/Kconfig b/drivers/infiniband/hw/i40iw/Kconfig index 2962979c06e9..d867ef1ac72a 100644 --- a/drivers/infiniband/hw/i40iw/Kconfig +++ b/drivers/infiniband/hw/i40iw/Kconfig @@ -1,6 +1,7 @@ config INFINIBAND_I40IW tristate "Intel(R) Ethernet X722 iWARP Driver" depends on INET && I40E + depends on IPV6 || !IPV6 depends on PCI select GENERIC_ALLOCATOR ---help--- diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 7b2655128b9f..423818a7d333 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -57,6 +57,7 @@ #include <net/addrconf.h> #include <net/ip6_route.h> #include <net/ip_fib.h> +#include <net/secure_seq.h> #include <net/tcp.h> #include <asm/checksum.h> @@ -2164,7 +2165,6 @@ static struct i40iw_cm_node *i40iw_make_cm_node( struct i40iw_cm_listener *listener) { struct i40iw_cm_node *cm_node; - struct timespec ts; int oldarpindex; int arpindex; struct net_device *netdev = iwdev->netdev; @@ -2214,10 +2214,26 @@ static struct i40iw_cm_node *i40iw_make_cm_node( cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE; cm_node->tcp_cntxt.rcv_wnd = I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE; - ts = current_kernel_time(); - cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec; - cm_node->tcp_cntxt.mss = (cm_node->ipv4) ? (iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV4) : - (iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV6); + if (cm_node->ipv4) { + cm_node->tcp_cntxt.loc_seq_num = secure_tcp_seq(htonl(cm_node->loc_addr[0]), + htonl(cm_node->rem_addr[0]), + htons(cm_node->loc_port), + htons(cm_node->rem_port)); + cm_node->tcp_cntxt.mss = iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV4; + } else if (IS_ENABLED(CONFIG_IPV6)) { + __be32 loc[4] = { + htonl(cm_node->loc_addr[0]), htonl(cm_node->loc_addr[1]), + htonl(cm_node->loc_addr[2]), htonl(cm_node->loc_addr[3]) + }; + __be32 rem[4] = { + htonl(cm_node->rem_addr[0]), htonl(cm_node->rem_addr[1]), + htonl(cm_node->rem_addr[2]), htonl(cm_node->rem_addr[3]) + }; + cm_node->tcp_cntxt.loc_seq_num = secure_tcpv6_seq(loc, rem, + htons(cm_node->loc_port), + htons(cm_node->rem_port)); + cm_node->tcp_cntxt.mss = iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV6; + } cm_node->iwdev = iwdev; cm_node->dev = &iwdev->sc_dev; diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 2836c5420d60..55a1fbf0e670 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -435,45 +435,24 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) } /** - * i40iw_manage_apbvt - add or delete tcp port + * i40iw_cqp_manage_abvpt_cmd - send cqp command manage abpvt * @iwdev: iwarp device * @accel_local_port: port for apbvt * @add_port: add or delete port */ -int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool add_port) +static enum i40iw_status_code +i40iw_cqp_manage_abvpt_cmd(struct i40iw_device *iwdev, + u16 accel_local_port, + bool add_port) { struct i40iw_apbvt_info *info; struct i40iw_cqp_request *cqp_request; struct cqp_commands_info *cqp_info; - unsigned long flags; - struct i40iw_cm_core *cm_core = &iwdev->cm_core; - enum i40iw_status_code status = 0; - bool in_use; - - /* apbvt_lock is held across CQP delete APBVT OP (non-waiting) to - * protect against race where add APBVT CQP can race ahead of the delete - * APBVT for same port. - */ - spin_lock_irqsave(&cm_core->apbvt_lock, flags); - - if (!add_port) { - in_use = i40iw_port_in_use(cm_core, accel_local_port); - if (in_use) - goto exit; - clear_bit(accel_local_port, cm_core->ports_in_use); - } else { - in_use = test_and_set_bit(accel_local_port, - cm_core->ports_in_use); - spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); - if (in_use) - return 0; - } + enum i40iw_status_code status; cqp_request = i40iw_get_cqp_request(&iwdev->cqp, add_port); - if (!cqp_request) { - status = -ENOMEM; - goto exit; - } + if (!cqp_request) + return I40IW_ERR_NO_MEMORY; cqp_info = &cqp_request->info; info = &cqp_info->in.u.manage_apbvt_entry.info; @@ -489,14 +468,54 @@ int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool ad status = i40iw_handle_cqp_op(iwdev, cqp_request); if (status) i40iw_pr_err("CQP-OP Manage APBVT entry fail"); -exit: - if (!add_port) - spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); return status; } /** + * i40iw_manage_apbvt - add or delete tcp port + * @iwdev: iwarp device + * @accel_local_port: port for apbvt + * @add_port: add or delete port + */ +enum i40iw_status_code i40iw_manage_apbvt(struct i40iw_device *iwdev, + u16 accel_local_port, + bool add_port) +{ + struct i40iw_cm_core *cm_core = &iwdev->cm_core; + enum i40iw_status_code status; + unsigned long flags; + bool in_use; + + /* apbvt_lock is held across CQP delete APBVT OP (non-waiting) to + * protect against race where add APBVT CQP can race ahead of the delete + * APBVT for same port. + */ + if (add_port) { + spin_lock_irqsave(&cm_core->apbvt_lock, flags); + in_use = __test_and_set_bit(accel_local_port, + cm_core->ports_in_use); + spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); + if (in_use) + return 0; + return i40iw_cqp_manage_abvpt_cmd(iwdev, accel_local_port, + true); + } else { + spin_lock_irqsave(&cm_core->apbvt_lock, flags); + in_use = i40iw_port_in_use(cm_core, accel_local_port); + if (in_use) { + spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); + return 0; + } + __clear_bit(accel_local_port, cm_core->ports_in_use); + status = i40iw_cqp_manage_abvpt_cmd(iwdev, accel_local_port, + false); + spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); + return status; + } +} + +/** * i40iw_manage_arp_cache - manage hw arp cache * @iwdev: iwarp device * @mac_addr: mac address ptr diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 68679ad4c6da..e2e6c74a7452 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -71,7 +71,8 @@ static int i40iw_query_device(struct ib_device *ibdev, props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE; props->max_qp = iwdev->max_qp - iwdev->used_qps; props->max_qp_wr = I40IW_MAX_QP_WRS; - props->max_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; + props->max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; + props->max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; props->max_cq = iwdev->max_cq - iwdev->used_cqs; props->max_cqe = iwdev->max_cqe; props->max_mr = iwdev->max_mr - iwdev->used_mrs; @@ -1409,6 +1410,7 @@ static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr) struct vm_area_struct *vma; struct hstate *h; + down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm, addr); if (vma && is_vm_hugetlb_page(vma)) { h = hstate_vma(vma); @@ -1417,6 +1419,7 @@ static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr) iwmr->page_msk = huge_page_mask(h); } } + up_read(¤t->mm->mmap_sem); } /** @@ -2198,8 +2201,8 @@ static void i40iw_copy_sg_list(struct i40iw_sge *sg_list, struct ib_sge *sgl, in * @bad_wr: return of bad wr if err */ static int i40iw_post_send(struct ib_qp *ibqp, - struct ib_send_wr *ib_wr, - struct ib_send_wr **bad_wr) + const struct ib_send_wr *ib_wr, + const struct ib_send_wr **bad_wr) { struct i40iw_qp *iwqp; struct i40iw_qp_uk *ukqp; @@ -2374,9 +2377,8 @@ out: * @ib_wr: work request for receive * @bad_wr: bad wr caused an error */ -static int i40iw_post_recv(struct ib_qp *ibqp, - struct ib_recv_wr *ib_wr, - struct ib_recv_wr **bad_wr) +static int i40iw_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *ib_wr, + const struct ib_recv_wr **bad_wr) { struct i40iw_qp *iwqp; struct i40iw_qp_uk *ukqp; @@ -2701,21 +2703,6 @@ static int i40iw_query_gid(struct ib_device *ibdev, } /** - * i40iw_modify_port Modify port properties - * @ibdev: device pointer from stack - * @port: port number - * @port_modify_mask: mask for port modifications - * @props: port properties - */ -static int i40iw_modify_port(struct ib_device *ibdev, - u8 port, - int port_modify_mask, - struct ib_port_modify *props) -{ - return -ENOSYS; -} - -/** * i40iw_query_pkey - Query partition key * @ibdev: device pointer from stack * @port: port number @@ -2732,28 +2719,6 @@ static int i40iw_query_pkey(struct ib_device *ibdev, } /** - * i40iw_create_ah - create address handle - * @ibpd: ptr of pd - * @ah_attr: address handle attributes - */ -static struct ib_ah *i40iw_create_ah(struct ib_pd *ibpd, - struct rdma_ah_attr *attr, - struct ib_udata *udata) - -{ - return ERR_PTR(-ENOSYS); -} - -/** - * i40iw_destroy_ah - Destroy address handle - * @ah: pointer to address handle - */ -static int i40iw_destroy_ah(struct ib_ah *ah) -{ - return -ENOSYS; -} - -/** * i40iw_get_vector_affinity - report IRQ affinity mask * @ibdev: IB device * @comp_vector: completion vector index @@ -2820,7 +2785,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count; iwibdev->ibdev.dev.parent = &pcidev->dev; iwibdev->ibdev.query_port = i40iw_query_port; - iwibdev->ibdev.modify_port = i40iw_modify_port; iwibdev->ibdev.query_pkey = i40iw_query_pkey; iwibdev->ibdev.query_gid = i40iw_query_gid; iwibdev->ibdev.alloc_ucontext = i40iw_alloc_ucontext; @@ -2840,8 +2804,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.alloc_hw_stats = i40iw_alloc_hw_stats; iwibdev->ibdev.get_hw_stats = i40iw_get_hw_stats; iwibdev->ibdev.query_device = i40iw_query_device; - iwibdev->ibdev.create_ah = i40iw_create_ah; - iwibdev->ibdev.destroy_ah = i40iw_destroy_ah; iwibdev->ibdev.drain_sq = i40iw_drain_sq; iwibdev->ibdev.drain_rq = i40iw_drain_rq; iwibdev->ibdev.alloc_mr = i40iw_alloc_mr; diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 9345d5b546d1..e9e3a6f390db 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -82,12 +82,11 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct mlx4_ib_ah *ah) { struct mlx4_ib_dev *ibdev = to_mdev(pd->device); + const struct ib_gid_attr *gid_attr; struct mlx4_dev *dev = ibdev->dev; int is_mcast = 0; struct in6_addr in6; u16 vlan_tag = 0xffff; - union ib_gid sgid; - struct ib_gid_attr gid_attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); int ret; @@ -96,25 +95,30 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, is_mcast = 1; memcpy(ah->av.eth.mac, ah_attr->roce.dmac, ETH_ALEN); - ret = ib_get_cached_gid(pd->device, rdma_ah_get_port_num(ah_attr), - grh->sgid_index, &sgid, &gid_attr); - if (ret) - return ERR_PTR(ret); eth_zero_addr(ah->av.eth.s_mac); - if (is_vlan_dev(gid_attr.ndev)) - vlan_tag = vlan_dev_vlan_id(gid_attr.ndev); - memcpy(ah->av.eth.s_mac, gid_attr.ndev->dev_addr, ETH_ALEN); - dev_put(gid_attr.ndev); + + /* + * If sgid_attr is NULL we are being called by mlx4_ib_create_ah_slave + * and we are directly creating an AV for a slave's gid_index. + */ + gid_attr = ah_attr->grh.sgid_attr; + if (gid_attr) { + if (is_vlan_dev(gid_attr->ndev)) + vlan_tag = vlan_dev_vlan_id(gid_attr->ndev); + memcpy(ah->av.eth.s_mac, gid_attr->ndev->dev_addr, ETH_ALEN); + ret = mlx4_ib_gid_index_to_real_index(ibdev, gid_attr); + if (ret < 0) + return ERR_PTR(ret); + ah->av.eth.gid_index = ret; + } else { + /* mlx4_ib_create_ah_slave fills in the s_mac and the vlan */ + ah->av.eth.gid_index = ah_attr->grh.sgid_index; + } + if (vlan_tag < 0x1000) vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13; ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (rdma_ah_get_port_num(ah_attr) << 24)); - ret = mlx4_ib_gid_index_to_real_index(ibdev, - rdma_ah_get_port_num(ah_attr), - grh->sgid_index); - if (ret < 0) - return ERR_PTR(ret); - ah->av.eth.gid_index = ret; ah->av.eth.vlan = cpu_to_be16(vlan_tag); ah->av.eth.hop_limit = grh->hop_limit; if (rdma_ah_get_static_rate(ah_attr)) { @@ -173,6 +177,40 @@ struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, return create_ib_ah(pd, ah_attr, ah); /* never fails */ } +/* AH's created via this call must be free'd by mlx4_ib_destroy_ah. */ +struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, + struct rdma_ah_attr *ah_attr, + int slave_sgid_index, u8 *s_mac, + u16 vlan_tag) +{ + struct rdma_ah_attr slave_attr = *ah_attr; + struct mlx4_ib_ah *mah; + struct ib_ah *ah; + + slave_attr.grh.sgid_attr = NULL; + slave_attr.grh.sgid_index = slave_sgid_index; + ah = mlx4_ib_create_ah(pd, &slave_attr, NULL); + if (IS_ERR(ah)) + return ah; + + ah->device = pd->device; + ah->pd = pd; + ah->type = ah_attr->type; + mah = to_mah(ah); + + /* get rid of force-loopback bit */ + mah->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF); + + if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) + memcpy(mah->av.eth.s_mac, s_mac, 6); + + if (vlan_tag < 0x1000) + vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13; + mah->av.eth.vlan = cpu_to_be16(vlan_tag); + + return ah; +} + int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) { struct mlx4_ib_ah *ah = to_mah(ibah); diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 90a3e2642c2e..e5466d786bb1 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -506,7 +506,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, { struct ib_sge list; struct ib_ud_wr wr; - struct ib_send_wr *bad_wr; + const struct ib_send_wr *bad_wr; struct mlx4_ib_demux_pv_ctx *tun_ctx; struct mlx4_ib_demux_pv_qp *tun_qp; struct mlx4_rcv_tunnel_mad *tun_mad; @@ -1310,7 +1310,8 @@ static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx, int index) { struct ib_sge sg_list; - struct ib_recv_wr recv_wr, *bad_recv_wr; + struct ib_recv_wr recv_wr; + const struct ib_recv_wr *bad_recv_wr; int size; size = (tun_qp->qp->qp_type == IB_QPT_UD) ? @@ -1361,19 +1362,16 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, { struct ib_sge list; struct ib_ud_wr wr; - struct ib_send_wr *bad_wr; + const struct ib_send_wr *bad_wr; struct mlx4_ib_demux_pv_ctx *sqp_ctx; struct mlx4_ib_demux_pv_qp *sqp; struct mlx4_mad_snd_buf *sqp_mad; struct ib_ah *ah; struct ib_qp *send_qp = NULL; - struct ib_global_route *grh; unsigned wire_tx_ix = 0; int ret = 0; u16 wire_pkey_ix; int src_qpnum; - u8 sgid_index; - sqp_ctx = dev->sriov.sqps[port-1]; @@ -1394,16 +1392,11 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, send_qp = sqp->qp; /* create ah */ - grh = rdma_ah_retrieve_grh(attr); - sgid_index = grh->sgid_index; - grh->sgid_index = 0; - ah = rdma_create_ah(sqp_ctx->pd, attr); + ah = mlx4_ib_create_ah_slave(sqp_ctx->pd, attr, + rdma_ah_retrieve_grh(attr)->sgid_index, + s_mac, vlan_id); if (IS_ERR(ah)) return -ENOMEM; - grh->sgid_index = sgid_index; - to_mah(ah)->av.ib.gid_index = sgid_index; - /* get rid of force-loopback bit */ - to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF); spin_lock(&sqp->tx_lock); if (sqp->tx_ix_head - sqp->tx_ix_tail >= (MLX4_NUM_TUNNEL_BUFS - 1)) @@ -1445,12 +1438,6 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, wr.wr.num_sge = 1; wr.wr.opcode = IB_WR_SEND; wr.wr.send_flags = IB_SEND_SIGNALED; - if (s_mac) - memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6); - if (vlan_id < 0x1000) - vlan_id |= (rdma_ah_get_sl(attr) & 7) << 13; - to_mah(ah)->av.eth.vlan = cpu_to_be16(vlan_id); - ret = ib_post_send(send_qp, &wr.wr, &bad_wr); if (!ret) @@ -1461,7 +1448,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, spin_unlock(&sqp->tx_lock); sqp->tx_ring[wire_tx_ix].ah = NULL; out: - rdma_destroy_ah(ah); + mlx4_ib_destroy_ah(ah); return ret; } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 4ec519afc45b..ca0f1ee26091 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -246,9 +246,7 @@ static int mlx4_ib_update_gids(struct gid_entry *gids, return mlx4_ib_update_gids_v1(gids, ibdev, port_num); } -static int mlx4_ib_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, - void **context) +static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) { struct mlx4_ib_dev *ibdev = to_mdev(attr->device); struct mlx4_ib_iboe *iboe = &ibdev->iboe; @@ -271,8 +269,9 @@ static int mlx4_ib_add_gid(const union ib_gid *gid, port_gid_table = &iboe->gids[attr->port_num - 1]; spin_lock_bh(&iboe->lock); for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) { - if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) && - (port_gid_table->gids[i].gid_type == attr->gid_type)) { + if (!memcmp(&port_gid_table->gids[i].gid, + &attr->gid, sizeof(attr->gid)) && + port_gid_table->gids[i].gid_type == attr->gid_type) { found = i; break; } @@ -289,7 +288,8 @@ static int mlx4_ib_add_gid(const union ib_gid *gid, ret = -ENOMEM; } else { *context = port_gid_table->gids[free].ctx; - memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid)); + memcpy(&port_gid_table->gids[free].gid, + &attr->gid, sizeof(attr->gid)); port_gid_table->gids[free].gid_type = attr->gid_type; port_gid_table->gids[free].ctx->real_index = free; port_gid_table->gids[free].ctx->refcount = 1; @@ -380,17 +380,15 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context) } int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, - u8 port_num, int index) + const struct ib_gid_attr *attr) { struct mlx4_ib_iboe *iboe = &ibdev->iboe; struct gid_cache_context *ctx = NULL; - union ib_gid gid; struct mlx4_port_gid_table *port_gid_table; int real_index = -EINVAL; int i; - int ret; unsigned long flags; - struct ib_gid_attr attr; + u8 port_num = attr->port_num; if (port_num > MLX4_MAX_PORTS) return -EINVAL; @@ -399,21 +397,15 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, port_num = 1; if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num)) - return index; - - ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr); - if (ret) - return ret; - - if (attr.ndev) - dev_put(attr.ndev); + return attr->index; spin_lock_irqsave(&iboe->lock, flags); port_gid_table = &iboe->gids[port_num - 1]; for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) - if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) && - attr.gid_type == port_gid_table->gids[i].gid_type) { + if (!memcmp(&port_gid_table->gids[i].gid, + &attr->gid, sizeof(attr->gid)) && + attr->gid_type == port_gid_table->gids[i].gid_type) { ctx = port_gid_table->gids[i].ctx; break; } @@ -525,8 +517,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->page_size_cap = dev->dev->caps.page_size_cap; props->max_qp = dev->dev->quotas.qp; props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE; - props->max_sge = min(dev->dev->caps.max_sq_sg, - dev->dev->caps.max_rq_sg); + props->max_send_sge = dev->dev->caps.max_sq_sg; + props->max_recv_sge = dev->dev->caps.max_rq_sg; props->max_sge_rd = MLX4_MAX_SGE_RD; props->max_cq = dev->dev->quotas.cq; props->max_cqe = dev->dev->caps.max_cqes; @@ -770,7 +762,8 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, IB_WIDTH_4X : IB_WIDTH_1X; props->active_speed = (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ? IB_SPEED_FDR : IB_SPEED_QDR; - props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS; + props->port_cap_flags = IB_PORT_CM_SUP; + props->ip_gids = true; props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; props->max_msg_sz = mdev->dev->caps.max_msg_sz; props->pkey_tbl_len = 1; @@ -2709,6 +2702,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp; ibdev->ib_dev.query_qp = mlx4_ib_query_qp; ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp; + ibdev->ib_dev.drain_sq = mlx4_ib_drain_sq; + ibdev->ib_dev.drain_rq = mlx4_ib_drain_rq; ibdev->ib_dev.post_send = mlx4_ib_post_send; ibdev->ib_dev.post_recv = mlx4_ib_post_recv; ibdev->ib_dev.create_cq = mlx4_ib_create_cq; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 7b1429917aba..e10dccc7958f 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -322,7 +322,6 @@ struct mlx4_ib_qp { u32 doorbell_qpn; __be32 sq_signal_bits; unsigned sq_next_wqe; - int sq_max_wqes_per_wr; int sq_spare_wqes; struct mlx4_ib_wq sq; @@ -760,6 +759,10 @@ void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, struct ib_udata *udata); +struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, + struct rdma_ah_attr *ah_attr, + int slave_sgid_index, u8 *s_mac, + u16 vlan_tag); int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); int mlx4_ib_destroy_ah(struct ib_ah *ah); @@ -771,21 +774,23 @@ int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); int mlx4_ib_destroy_srq(struct ib_srq *srq); void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index); -int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); int mlx4_ib_destroy_qp(struct ib_qp *qp); +void mlx4_ib_drain_sq(struct ib_qp *qp); +void mlx4_ib_drain_rq(struct ib_qp *qp); int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); -int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, @@ -900,7 +905,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata); int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, - u8 port_num, int index); + const struct ib_gid_attr *attr); void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev, int port); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 3b8045fd23ed..6dd3cd2c2f80 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -204,91 +204,26 @@ static void *get_send_wqe(struct mlx4_ib_qp *qp, int n) /* * Stamp a SQ WQE so that it is invalid if prefetched by marking the - * first four bytes of every 64 byte chunk with - * 0x7FFFFFF | (invalid_ownership_value << 31). - * - * When the max work request size is less than or equal to the WQE - * basic block size, as an optimization, we can stamp all WQEs with - * 0xffffffff, and skip the very first chunk of each WQE. + * first four bytes of every 64 byte chunk with 0xffffffff, except for + * the very first chunk of the WQE. */ -static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) +static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n) { __be32 *wqe; int i; int s; - int ind; void *buf; - __be32 stamp; struct mlx4_wqe_ctrl_seg *ctrl; - if (qp->sq_max_wqes_per_wr > 1) { - s = roundup(size, 1U << qp->sq.wqe_shift); - for (i = 0; i < s; i += 64) { - ind = (i >> qp->sq.wqe_shift) + n; - stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) : - cpu_to_be32(0xffffffff); - buf = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); - wqe = buf + (i & ((1 << qp->sq.wqe_shift) - 1)); - *wqe = stamp; - } - } else { - ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); - s = (ctrl->qpn_vlan.fence_size & 0x3f) << 4; - for (i = 64; i < s; i += 64) { - wqe = buf + i; - *wqe = cpu_to_be32(0xffffffff); - } + buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); + ctrl = (struct mlx4_wqe_ctrl_seg *)buf; + s = (ctrl->qpn_vlan.fence_size & 0x3f) << 4; + for (i = 64; i < s; i += 64) { + wqe = buf + i; + *wqe = cpu_to_be32(0xffffffff); } } -static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size) -{ - struct mlx4_wqe_ctrl_seg *ctrl; - struct mlx4_wqe_inline_seg *inl; - void *wqe; - int s; - - ctrl = wqe = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); - s = sizeof(struct mlx4_wqe_ctrl_seg); - - if (qp->ibqp.qp_type == IB_QPT_UD) { - struct mlx4_wqe_datagram_seg *dgram = wqe + sizeof *ctrl; - struct mlx4_av *av = (struct mlx4_av *)dgram->av; - memset(dgram, 0, sizeof *dgram); - av->port_pd = cpu_to_be32((qp->port << 24) | to_mpd(qp->ibqp.pd)->pdn); - s += sizeof(struct mlx4_wqe_datagram_seg); - } - - /* Pad the remainder of the WQE with an inline data segment. */ - if (size > s) { - inl = wqe + s; - inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl)); - } - ctrl->srcrb_flags = 0; - ctrl->qpn_vlan.fence_size = size / 16; - /* - * Make sure descriptor is fully written before setting ownership bit - * (because HW can start executing as soon as we do). - */ - wmb(); - - ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) | - (n & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0); - - stamp_send_wqe(qp, n + qp->sq_spare_wqes, size); -} - -/* Post NOP WQE to prevent wrap-around in the middle of WR */ -static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind) -{ - unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1)); - if (unlikely(s < qp->sq_max_wqes_per_wr)) { - post_nop_wqe(qp, ind, s << qp->sq.wqe_shift); - ind += s; - } - return ind; -} - static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) { struct ib_event event; @@ -433,8 +368,7 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, } static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, - enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp, - bool shrink_wqe) + enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp) { int s; @@ -461,70 +395,20 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, if (s > dev->dev->caps.max_sq_desc_sz) return -EINVAL; + qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); + /* - * Hermon supports shrinking WQEs, such that a single work - * request can include multiple units of 1 << wqe_shift. This - * way, work requests can differ in size, and do not have to - * be a power of 2 in size, saving memory and speeding up send - * WR posting. Unfortunately, if we do this then the - * wqe_index field in CQEs can't be used to look up the WR ID - * anymore, so we do this only if selective signaling is off. - * - * Further, on 32-bit platforms, we can't use vmap() to make - * the QP buffer virtually contiguous. Thus we have to use - * constant-sized WRs to make sure a WR is always fully within - * a single page-sized chunk. - * - * Finally, we use NOP work requests to pad the end of the - * work queue, to avoid wrap-around in the middle of WR. We - * set NEC bit to avoid getting completions with error for - * these NOP WRs, but since NEC is only supported starting - * with firmware 2.2.232, we use constant-sized WRs for older - * firmware. - * - * And, since MLX QPs only support SEND, we use constant-sized - * WRs in this case. - * - * We look for the smallest value of wqe_shift such that the - * resulting number of wqes does not exceed device - * capabilities. - * - * We set WQE size to at least 64 bytes, this way stamping - * invalidates each WQE. + * We need to leave 2 KB + 1 WR of headroom in the SQ to + * allow HW to prefetch. */ - if (shrink_wqe && dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC && - qp->sq_signal_bits && BITS_PER_LONG == 64 && - type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI && - !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | - MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) - qp->sq.wqe_shift = ilog2(64); - else - qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); - - for (;;) { - qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift); - - /* - * We need to leave 2 KB + 1 WR of headroom in the SQ to - * allow HW to prefetch. - */ - qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + qp->sq_max_wqes_per_wr; - qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr * - qp->sq_max_wqes_per_wr + - qp->sq_spare_wqes); - - if (qp->sq.wqe_cnt <= dev->dev->caps.max_wqes) - break; - - if (qp->sq_max_wqes_per_wr <= 1) - return -EINVAL; - - ++qp->sq.wqe_shift; - } - - qp->sq.max_gs = (min(dev->dev->caps.max_sq_desc_sz, - (qp->sq_max_wqes_per_wr << qp->sq.wqe_shift)) - - send_wqe_overhead(type, qp->flags)) / + qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1; + qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + + qp->sq_spare_wqes); + + qp->sq.max_gs = + (min(dev->dev->caps.max_sq_desc_sz, + (1 << qp->sq.wqe_shift)) - + send_wqe_overhead(type, qp->flags)) / sizeof (struct mlx4_wqe_data_seg); qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + @@ -538,7 +422,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, } cap->max_send_wr = qp->sq.max_post = - (qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr; + qp->sq.wqe_cnt - qp->sq_spare_wqes; cap->max_send_sge = min(qp->sq.max_gs, min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg)); @@ -977,7 +861,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, { int qpn; int err; - struct ib_qp_cap backup_cap; struct mlx4_ib_sqp *sqp = NULL; struct mlx4_ib_qp *qp; enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; @@ -1178,9 +1061,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err; } - memcpy(&backup_cap, &init_attr->cap, sizeof(backup_cap)); - err = set_kernel_sq_size(dev, &init_attr->cap, - qp_type, qp, true); + err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp); if (err) goto err; @@ -1192,20 +1073,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, *qp->db.db = 0; } - if (mlx4_buf_alloc(dev->dev, qp->buf_size, qp->buf_size, + if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) { - memcpy(&init_attr->cap, &backup_cap, - sizeof(backup_cap)); - err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, - qp, false); - if (err) - goto err_db; - - if (mlx4_buf_alloc(dev->dev, qp->buf_size, - PAGE_SIZE * 2, &qp->buf)) { - err = -ENOMEM; - goto err_db; - } + err = -ENOMEM; + goto err_db; } err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift, @@ -1859,8 +1730,7 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, if (rdma_ah_get_ah_flags(ah) & IB_AH_GRH) { const struct ib_global_route *grh = rdma_ah_read_grh(ah); int real_sgid_index = - mlx4_ib_gid_index_to_real_index(dev, port, - grh->sgid_index); + mlx4_ib_gid_index_to_real_index(dev, grh->sgid_attr); if (real_sgid_index < 0) return real_sgid_index; @@ -2176,6 +2046,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, { struct ib_uobject *ibuobject; struct ib_srq *ibsrq; + const struct ib_gid_attr *gid_attr = NULL; struct ib_rwq_ind_table *rwq_ind_tbl; enum ib_qp_type qp_type; struct mlx4_ib_dev *dev; @@ -2356,29 +2227,17 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, if (attr_mask & IB_QP_AV) { u8 port_num = mlx4_is_bonded(dev->dev) ? 1 : attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - union ib_gid gid; - struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB}; u16 vlan = 0xffff; u8 smac[ETH_ALEN]; - int status = 0; int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) && rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH; if (is_eth) { - int index = - rdma_ah_read_grh(&attr->ah_attr)->sgid_index; - - status = ib_get_cached_gid(&dev->ib_dev, port_num, - index, &gid, &gid_attr); - if (!status) { - vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev); - memcpy(smac, gid_attr.ndev->dev_addr, ETH_ALEN); - dev_put(gid_attr.ndev); - } + gid_attr = attr->ah_attr.grh.sgid_attr; + vlan = rdma_vlan_dev_vlan_id(gid_attr->ndev); + memcpy(smac, gid_attr->ndev->dev_addr, ETH_ALEN); } - if (status) - goto out; if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path, port_num, vlan, smac)) @@ -2389,7 +2248,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, if (is_eth && (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) { - u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type); + u8 qpc_roce_mode = gid_type_to_qpc(gid_attr->gid_type); if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) { err = -EINVAL; @@ -2594,11 +2453,9 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, for (i = 0; i < qp->sq.wqe_cnt; ++i) { ctrl = get_send_wqe(qp, i); ctrl->owner_opcode = cpu_to_be32(1 << 31); - if (qp->sq_max_wqes_per_wr == 1) - ctrl->qpn_vlan.fence_size = - 1 << (qp->sq.wqe_shift - 4); - - stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift); + ctrl->qpn_vlan.fence_size = + 1 << (qp->sq.wqe_shift - 4); + stamp_send_wqe(qp, i); } } @@ -2937,7 +2794,7 @@ static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey) } static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, - struct ib_ud_wr *wr, + const struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device); @@ -3085,7 +2942,7 @@ static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num, } #define MLX4_ROCEV2_QP1_SPORT 0xC000 -static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, +static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { struct ib_device *ib_dev = sqp->qp.ibqp.device; @@ -3181,10 +3038,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. guid_cache[ah->av.ib.gid_index]; } else { - ib_get_cached_gid(ib_dev, - be32_to_cpu(ah->av.ib.port_pd) >> 24, - ah->av.ib.gid_index, - &sqp->ud_header.grh.source_gid, NULL); + sqp->ud_header.grh.source_gid = + ah->ibah.sgid_attr->gid; } } memcpy(sqp->ud_header.grh.destination_gid.raw, @@ -3369,7 +3224,7 @@ static __be32 convert_access(int acc) } static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg, - struct ib_reg_wr *wr) + const struct ib_reg_wr *wr) { struct mlx4_ib_mr *mr = to_mmr(wr->mr); @@ -3399,7 +3254,7 @@ static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, } static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, - struct ib_atomic_wr *wr) + const struct ib_atomic_wr *wr) { if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { aseg->swap_add = cpu_to_be64(wr->swap); @@ -3415,7 +3270,7 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, } static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, - struct ib_atomic_wr *wr) + const struct ib_atomic_wr *wr) { aseg->swap_add = cpu_to_be64(wr->swap); aseg->swap_add_mask = cpu_to_be64(wr->swap_mask); @@ -3424,7 +3279,7 @@ static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, } static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, - struct ib_ud_wr *wr) + const struct ib_ud_wr *wr) { memcpy(dseg->av, &to_mah(wr->ah)->av, sizeof (struct mlx4_av)); dseg->dqpn = cpu_to_be32(wr->remote_qpn); @@ -3435,7 +3290,7 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, struct mlx4_wqe_datagram_seg *dseg, - struct ib_ud_wr *wr, + const struct ib_ud_wr *wr, enum mlx4_ib_qp_type qpt) { union mlx4_ext_av *av = &to_mah(wr->ah)->av; @@ -3457,7 +3312,8 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY); } -static void build_tunnel_header(struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) +static void build_tunnel_header(const struct ib_ud_wr *wr, void *wqe, + unsigned *mlx_seg_len) { struct mlx4_wqe_inline_seg *inl = wqe; struct mlx4_ib_tunnel_header hdr; @@ -3540,9 +3396,9 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) dseg->addr = cpu_to_be64(sg->addr); } -static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr, - struct mlx4_ib_qp *qp, unsigned *lso_seg_len, - __be32 *lso_hdr_sz, __be32 *blh) +static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, + const struct ib_ud_wr *wr, struct mlx4_ib_qp *qp, + unsigned *lso_seg_len, __be32 *lso_hdr_sz, __be32 *blh) { unsigned halign = ALIGN(sizeof *wqe + wr->hlen, 16); @@ -3560,7 +3416,7 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr, return 0; } -static __be32 send_ieth(struct ib_send_wr *wr) +static __be32 send_ieth(const struct ib_send_wr *wr) { switch (wr->opcode) { case IB_WR_SEND_WITH_IMM: @@ -3582,8 +3438,8 @@ static void add_zero_len_inline(void *wqe) inl->byte_count = cpu_to_be32(1 << 31); } -int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr, bool drain) { struct mlx4_ib_qp *qp = to_mqp(ibqp); void *wqe; @@ -3593,7 +3449,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int nreq; int err = 0; unsigned ind; - int uninitialized_var(stamp); int uninitialized_var(size); unsigned uninitialized_var(seglen); __be32 dummy; @@ -3623,7 +3478,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } spin_lock_irqsave(&qp->sq.lock, flags); - if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR && + !drain) { err = -EIO; *bad_wr = wr; nreq = 0; @@ -3865,22 +3721,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh; - stamp = ind + qp->sq_spare_wqes; - ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); - /* * We can improve latency by not stamping the last * send queue WQE until after ringing the doorbell, so * only stamp here if there are still more WQEs to post. - * - * Same optimization applies to padding with NOP wqe - * in case of WQE shrinking (used to prevent wrap-around - * in the middle of WR). */ - if (wr->next) { - stamp_send_wqe(qp, stamp, size * 16); - ind = pad_wraparound(qp, ind); - } + if (wr->next) + stamp_send_wqe(qp, ind + qp->sq_spare_wqes); + ind++; } out: @@ -3902,9 +3750,8 @@ out: */ mmiowb(); - stamp_send_wqe(qp, stamp, size * 16); + stamp_send_wqe(qp, ind + qp->sq_spare_wqes - 1); - ind = pad_wraparound(qp, ind); qp->sq_next_wqe = ind; } @@ -3913,8 +3760,14 @@ out: return err; } -int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) +{ + return _mlx4_ib_post_send(ibqp, wr, bad_wr, false); +} + +static int _mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr, bool drain) { struct mlx4_ib_qp *qp = to_mqp(ibqp); struct mlx4_wqe_data_seg *scat; @@ -3929,7 +3782,8 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, max_gs = qp->rq.max_gs; spin_lock_irqsave(&qp->rq.lock, flags); - if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR && + !drain) { err = -EIO; *bad_wr = wr; nreq = 0; @@ -4000,6 +3854,12 @@ out: return err; } +int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + return _mlx4_ib_post_recv(ibqp, wr, bad_wr, false); +} + static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state) { switch (mlx4_state) { @@ -4047,9 +3907,9 @@ static void to_rdma_ah_attr(struct mlx4_ib_dev *ibdev, u8 port_num = path->sched_queue & 0x40 ? 2 : 1; memset(ah_attr, 0, sizeof(*ah_attr)); - ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port_num); if (port_num == 0 || port_num > dev->caps.num_ports) return; + ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port_num); if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) rdma_ah_set_sl(ah_attr, ((path->sched_queue >> 3) & 0x7) | @@ -4465,3 +4325,132 @@ int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) kfree(ib_rwq_ind_tbl); return 0; } + +struct mlx4_ib_drain_cqe { + struct ib_cqe cqe; + struct completion done; +}; + +static void mlx4_ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct mlx4_ib_drain_cqe *cqe = container_of(wc->wr_cqe, + struct mlx4_ib_drain_cqe, + cqe); + + complete(&cqe->done); +} + +/* This function returns only once the drained WR was completed */ +static void handle_drain_completion(struct ib_cq *cq, + struct mlx4_ib_drain_cqe *sdrain, + struct mlx4_ib_dev *dev) +{ + struct mlx4_dev *mdev = dev->dev; + + if (cq->poll_ctx == IB_POLL_DIRECT) { + while (wait_for_completion_timeout(&sdrain->done, HZ / 10) <= 0) + ib_process_cq_direct(cq, -1); + return; + } + + if (mdev->persist->state == MLX4_DEVICE_STATE_INTERNAL_ERROR) { + struct mlx4_ib_cq *mcq = to_mcq(cq); + bool triggered = false; + unsigned long flags; + + spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); + /* Make sure that the CQ handler won't run if wasn't run yet */ + if (!mcq->mcq.reset_notify_added) + mcq->mcq.reset_notify_added = 1; + else + triggered = true; + spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); + + if (triggered) { + /* Wait for any scheduled/running task to be ended */ + switch (cq->poll_ctx) { + case IB_POLL_SOFTIRQ: + irq_poll_disable(&cq->iop); + irq_poll_enable(&cq->iop); + break; + case IB_POLL_WORKQUEUE: + cancel_work_sync(&cq->work); + break; + default: + WARN_ON_ONCE(1); + } + } + + /* Run the CQ handler - this makes sure that the drain WR will + * be processed if wasn't processed yet. + */ + mcq->mcq.comp(&mcq->mcq); + } + + wait_for_completion(&sdrain->done); +} + +void mlx4_ib_drain_sq(struct ib_qp *qp) +{ + struct ib_cq *cq = qp->send_cq; + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct mlx4_ib_drain_cqe sdrain; + const struct ib_send_wr *bad_swr; + struct ib_rdma_wr swr = { + .wr = { + .next = NULL, + { .wr_cqe = &sdrain.cqe, }, + .opcode = IB_WR_RDMA_WRITE, + }, + }; + int ret; + struct mlx4_ib_dev *dev = to_mdev(qp->device); + struct mlx4_dev *mdev = dev->dev; + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret && mdev->persist->state != MLX4_DEVICE_STATE_INTERNAL_ERROR) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + sdrain.cqe.done = mlx4_ib_drain_qp_done; + init_completion(&sdrain.done); + + ret = _mlx4_ib_post_send(qp, &swr.wr, &bad_swr, true); + if (ret) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + handle_drain_completion(cq, &sdrain, dev); +} + +void mlx4_ib_drain_rq(struct ib_qp *qp) +{ + struct ib_cq *cq = qp->recv_cq; + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct mlx4_ib_drain_cqe rdrain; + struct ib_recv_wr rwr = {}; + const struct ib_recv_wr *bad_rwr; + int ret; + struct mlx4_ib_dev *dev = to_mdev(qp->device); + struct mlx4_dev *mdev = dev->dev; + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret && mdev->persist->state != MLX4_DEVICE_STATE_INTERNAL_ERROR) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + rwr.wr_cqe = &rdrain.cqe; + rdrain.cqe.done = mlx4_ib_drain_qp_done; + init_completion(&rdrain.done); + + ret = _mlx4_ib_post_recv(qp, &rwr, &bad_rwr, true); + if (ret) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + handle_drain_completion(cq, &rdrain, dev); +} diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index ebee56cbc0e2..3731b31c3653 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -307,8 +307,8 @@ void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index) spin_unlock(&srq->lock); } -int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct mlx4_ib_srq *srq = to_msrq(ibsrq); struct mlx4_wqe_srq_next_seg *next; diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index d42b922bede8..b8e4b15e2674 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -3,3 +3,5 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o +mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o +mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += flow.o diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index e6bde32a83f3..ffd03bf1a71e 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -37,7 +37,6 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, struct rdma_ah_attr *ah_attr) { enum ib_gid_type gid_type; - int err; if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); @@ -53,18 +52,12 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4); if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { - err = mlx5_get_roce_gid_type(dev, ah_attr->port_num, - ah_attr->grh.sgid_index, - &gid_type); - if (err) - return ERR_PTR(err); + gid_type = ah_attr->grh.sgid_attr->gid_type; memcpy(ah->av.rmac, ah_attr->roce.dmac, sizeof(ah_attr->roce.dmac)); ah->av.udp_sport = - mlx5_get_roce_udp_sport(dev, - rdma_ah_get_port_num(ah_attr), - rdma_ah_read_grh(ah_attr)->sgid_index); + mlx5_get_roce_udp_sport(dev, ah_attr->grh.sgid_attr); ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0x7) << 1; if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) #define MLX5_ECN_ENABLED BIT(1) diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index ccc0b5d06a7d..c84fef9a8a08 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -185,3 +185,15 @@ int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length) return err; } + +int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out) +{ + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + + MLX5_SET(ppcnt_reg, in, local_port, 1); + + MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); + return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT, + 0, 0); +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 98ea4648c655..88cbb1c41703 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -41,6 +41,7 @@ int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey); int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, void *out, int out_size); +int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out); int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, void *in, int in_size); int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c index 985fa2637390..7e4e358a4fd8 100644 --- a/drivers/infiniband/hw/mlx5/cong.c +++ b/drivers/infiniband/hw/mlx5/cong.c @@ -359,9 +359,6 @@ static ssize_t get_param(struct file *filp, char __user *buf, size_t count, int ret; char lbuf[11]; - if (*pos) - return 0; - ret = mlx5_ib_get_cc_params(param->dev, param->port_num, offset, &var); if (ret) return ret; @@ -370,11 +367,7 @@ static ssize_t get_param(struct file *filp, char __user *buf, size_t count, if (ret < 0) return ret; - if (copy_to_user(buf, lbuf, ret)) - return -EFAULT; - - *pos += ret; - return ret; + return simple_read_from_buffer(buf, count, pos, lbuf, ret); } static const struct file_operations dbg_cc_fops = { diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index ad39d64b8108..088205d7f1a1 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -1184,7 +1184,7 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) int err; if (!MLX5_CAP_GEN(dev->mdev, cq_moderation)) - return -ENOSYS; + return -EOPNOTSUPP; if (cq_period > MLX5_MAX_CQ_PERIOD) return -EINVAL; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c new file mode 100644 index 000000000000..ac116d63e466 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -0,0 +1,1119 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + */ + +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_verbs.h> +#include <rdma/uverbs_types.h> +#include <rdma/uverbs_ioctl.h> +#include <rdma/mlx5_user_ioctl_cmds.h> +#include <rdma/ib_umem.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/fs.h> +#include "mlx5_ib.h" + +#define UVERBS_MODULE_NAME mlx5_ib +#include <rdma/uverbs_named_ioctl.h> + +#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in) +struct devx_obj { + struct mlx5_core_dev *mdev; + u32 obj_id; + u32 dinlen; /* destroy inbox length */ + u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; +}; + +struct devx_umem { + struct mlx5_core_dev *mdev; + struct ib_umem *umem; + u32 page_offset; + int page_shift; + int ncont; + u32 dinlen; + u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)]; +}; + +struct devx_umem_reg_cmd { + void *in; + u32 inlen; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; +}; + +static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file) +{ + return to_mucontext(ib_uverbs_get_ucontext(file)); +} + +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) +{ + u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; + u64 general_obj_types; + void *hdr; + int err; + + hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr); + + general_obj_types = MLX5_CAP_GEN_64(dev->mdev, general_obj_types); + if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) || + !(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM)) + return -EINVAL; + + if (!capable(CAP_NET_RAW)) + return -EPERM; + + MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX); + + err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + context->devx_uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + return 0; +} + +void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_UCTX); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, context->devx_uid); + + mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); +} + +bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) +{ + struct devx_obj *devx_obj = obj; + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); + + switch (opcode) { + case MLX5_CMD_OP_DESTROY_TIR: + *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; + *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, + obj_id); + return true; + + case MLX5_CMD_OP_DESTROY_FLOW_TABLE: + *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox, + table_id); + return true; + default: + return false; + } +} + +static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + u32 obj_id; + + switch (opcode) { + case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: + case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: + obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id); + break; + case MLX5_CMD_OP_QUERY_MKEY: + obj_id = MLX5_GET(query_mkey_in, in, mkey_index); + break; + case MLX5_CMD_OP_QUERY_CQ: + obj_id = MLX5_GET(query_cq_in, in, cqn); + break; + case MLX5_CMD_OP_MODIFY_CQ: + obj_id = MLX5_GET(modify_cq_in, in, cqn); + break; + case MLX5_CMD_OP_QUERY_SQ: + obj_id = MLX5_GET(query_sq_in, in, sqn); + break; + case MLX5_CMD_OP_MODIFY_SQ: + obj_id = MLX5_GET(modify_sq_in, in, sqn); + break; + case MLX5_CMD_OP_QUERY_RQ: + obj_id = MLX5_GET(query_rq_in, in, rqn); + break; + case MLX5_CMD_OP_MODIFY_RQ: + obj_id = MLX5_GET(modify_rq_in, in, rqn); + break; + case MLX5_CMD_OP_QUERY_RMP: + obj_id = MLX5_GET(query_rmp_in, in, rmpn); + break; + case MLX5_CMD_OP_MODIFY_RMP: + obj_id = MLX5_GET(modify_rmp_in, in, rmpn); + break; + case MLX5_CMD_OP_QUERY_RQT: + obj_id = MLX5_GET(query_rqt_in, in, rqtn); + break; + case MLX5_CMD_OP_MODIFY_RQT: + obj_id = MLX5_GET(modify_rqt_in, in, rqtn); + break; + case MLX5_CMD_OP_QUERY_TIR: + obj_id = MLX5_GET(query_tir_in, in, tirn); + break; + case MLX5_CMD_OP_MODIFY_TIR: + obj_id = MLX5_GET(modify_tir_in, in, tirn); + break; + case MLX5_CMD_OP_QUERY_TIS: + obj_id = MLX5_GET(query_tis_in, in, tisn); + break; + case MLX5_CMD_OP_MODIFY_TIS: + obj_id = MLX5_GET(modify_tis_in, in, tisn); + break; + case MLX5_CMD_OP_QUERY_FLOW_TABLE: + obj_id = MLX5_GET(query_flow_table_in, in, table_id); + break; + case MLX5_CMD_OP_MODIFY_FLOW_TABLE: + obj_id = MLX5_GET(modify_flow_table_in, in, table_id); + break; + case MLX5_CMD_OP_QUERY_FLOW_GROUP: + obj_id = MLX5_GET(query_flow_group_in, in, group_id); + break; + case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: + obj_id = MLX5_GET(query_fte_in, in, flow_index); + break; + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + obj_id = MLX5_GET(set_fte_in, in, flow_index); + break; + case MLX5_CMD_OP_QUERY_Q_COUNTER: + obj_id = MLX5_GET(query_q_counter_in, in, counter_set_id); + break; + case MLX5_CMD_OP_QUERY_FLOW_COUNTER: + obj_id = MLX5_GET(query_flow_counter_in, in, flow_counter_id); + break; + case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT: + obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id); + break; + case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: + obj_id = MLX5_GET(query_scheduling_element_in, in, + scheduling_element_id); + break; + case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: + obj_id = MLX5_GET(modify_scheduling_element_in, in, + scheduling_element_id); + break; + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port); + break; + case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY: + obj_id = MLX5_GET(query_l2_table_entry_in, in, table_index); + break; + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index); + break; + case MLX5_CMD_OP_QUERY_QP: + obj_id = MLX5_GET(query_qp_in, in, qpn); + break; + case MLX5_CMD_OP_RST2INIT_QP: + obj_id = MLX5_GET(rst2init_qp_in, in, qpn); + break; + case MLX5_CMD_OP_INIT2RTR_QP: + obj_id = MLX5_GET(init2rtr_qp_in, in, qpn); + break; + case MLX5_CMD_OP_RTR2RTS_QP: + obj_id = MLX5_GET(rtr2rts_qp_in, in, qpn); + break; + case MLX5_CMD_OP_RTS2RTS_QP: + obj_id = MLX5_GET(rts2rts_qp_in, in, qpn); + break; + case MLX5_CMD_OP_SQERR2RTS_QP: + obj_id = MLX5_GET(sqerr2rts_qp_in, in, qpn); + break; + case MLX5_CMD_OP_2ERR_QP: + obj_id = MLX5_GET(qp_2err_in, in, qpn); + break; + case MLX5_CMD_OP_2RST_QP: + obj_id = MLX5_GET(qp_2rst_in, in, qpn); + break; + case MLX5_CMD_OP_QUERY_DCT: + obj_id = MLX5_GET(query_dct_in, in, dctn); + break; + case MLX5_CMD_OP_QUERY_XRQ: + obj_id = MLX5_GET(query_xrq_in, in, xrqn); + break; + case MLX5_CMD_OP_QUERY_XRC_SRQ: + obj_id = MLX5_GET(query_xrc_srq_in, in, xrc_srqn); + break; + case MLX5_CMD_OP_ARM_XRC_SRQ: + obj_id = MLX5_GET(arm_xrc_srq_in, in, xrc_srqn); + break; + case MLX5_CMD_OP_QUERY_SRQ: + obj_id = MLX5_GET(query_srq_in, in, srqn); + break; + case MLX5_CMD_OP_ARM_RQ: + obj_id = MLX5_GET(arm_rq_in, in, srq_number); + break; + case MLX5_CMD_OP_DRAIN_DCT: + case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: + obj_id = MLX5_GET(drain_dct_in, in, dctn); + break; + case MLX5_CMD_OP_ARM_XRQ: + obj_id = MLX5_GET(arm_xrq_in, in, xrqn); + break; + default: + return false; + } + + if (obj_id == obj->obj_id) + return true; + + return false; +} + +static bool devx_is_obj_create_cmd(const void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (opcode) { + case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + case MLX5_CMD_OP_CREATE_MKEY: + case MLX5_CMD_OP_CREATE_CQ: + case MLX5_CMD_OP_ALLOC_PD: + case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: + case MLX5_CMD_OP_CREATE_RMP: + case MLX5_CMD_OP_CREATE_SQ: + case MLX5_CMD_OP_CREATE_RQ: + case MLX5_CMD_OP_CREATE_RQT: + case MLX5_CMD_OP_CREATE_TIR: + case MLX5_CMD_OP_CREATE_TIS: + case MLX5_CMD_OP_ALLOC_Q_COUNTER: + case MLX5_CMD_OP_CREATE_FLOW_TABLE: + case MLX5_CMD_OP_CREATE_FLOW_GROUP: + case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: + case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: + case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + case MLX5_CMD_OP_CREATE_QP: + case MLX5_CMD_OP_CREATE_SRQ: + case MLX5_CMD_OP_CREATE_XRC_SRQ: + case MLX5_CMD_OP_CREATE_DCT: + case MLX5_CMD_OP_CREATE_XRQ: + case MLX5_CMD_OP_ATTACH_TO_MCG: + case MLX5_CMD_OP_ALLOC_XRCD: + return true; + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + { + u16 op_mod = MLX5_GET(set_fte_in, in, op_mod); + if (op_mod == 0) + return true; + return false; + } + default: + return false; + } +} + +static bool devx_is_obj_modify_cmd(const void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (opcode) { + case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: + case MLX5_CMD_OP_MODIFY_CQ: + case MLX5_CMD_OP_MODIFY_RMP: + case MLX5_CMD_OP_MODIFY_SQ: + case MLX5_CMD_OP_MODIFY_RQ: + case MLX5_CMD_OP_MODIFY_RQT: + case MLX5_CMD_OP_MODIFY_TIR: + case MLX5_CMD_OP_MODIFY_TIS: + case MLX5_CMD_OP_MODIFY_FLOW_TABLE: + case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + case MLX5_CMD_OP_RST2INIT_QP: + case MLX5_CMD_OP_INIT2RTR_QP: + case MLX5_CMD_OP_RTR2RTS_QP: + case MLX5_CMD_OP_RTS2RTS_QP: + case MLX5_CMD_OP_SQERR2RTS_QP: + case MLX5_CMD_OP_2ERR_QP: + case MLX5_CMD_OP_2RST_QP: + case MLX5_CMD_OP_ARM_XRC_SRQ: + case MLX5_CMD_OP_ARM_RQ: + case MLX5_CMD_OP_DRAIN_DCT: + case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: + case MLX5_CMD_OP_ARM_XRQ: + return true; + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + { + u16 op_mod = MLX5_GET(set_fte_in, in, op_mod); + + if (op_mod == 1) + return true; + return false; + } + default: + return false; + } +} + +static bool devx_is_obj_query_cmd(const void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (opcode) { + case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: + case MLX5_CMD_OP_QUERY_MKEY: + case MLX5_CMD_OP_QUERY_CQ: + case MLX5_CMD_OP_QUERY_RMP: + case MLX5_CMD_OP_QUERY_SQ: + case MLX5_CMD_OP_QUERY_RQ: + case MLX5_CMD_OP_QUERY_RQT: + case MLX5_CMD_OP_QUERY_TIR: + case MLX5_CMD_OP_QUERY_TIS: + case MLX5_CMD_OP_QUERY_Q_COUNTER: + case MLX5_CMD_OP_QUERY_FLOW_TABLE: + case MLX5_CMD_OP_QUERY_FLOW_GROUP: + case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_QUERY_FLOW_COUNTER: + case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT: + case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY: + case MLX5_CMD_OP_QUERY_QP: + case MLX5_CMD_OP_QUERY_SRQ: + case MLX5_CMD_OP_QUERY_XRC_SRQ: + case MLX5_CMD_OP_QUERY_DCT: + case MLX5_CMD_OP_QUERY_XRQ: + return true; + default: + return false; + } +} + +static bool devx_is_general_cmd(void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (opcode) { + case MLX5_CMD_OP_QUERY_HCA_CAP: + case MLX5_CMD_OP_QUERY_VPORT_STATE: + case MLX5_CMD_OP_QUERY_ADAPTER: + case MLX5_CMD_OP_QUERY_ISSI: + case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_ROCE_ADDRESS: + case MLX5_CMD_OP_QUERY_VNIC_ENV: + case MLX5_CMD_OP_QUERY_VPORT_COUNTER: + case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG: + case MLX5_CMD_OP_NOP: + case MLX5_CMD_OP_QUERY_CONG_STATUS: + case MLX5_CMD_OP_QUERY_CONG_PARAMS: + case MLX5_CMD_OP_QUERY_CONG_STATISTICS: + return true; + default: + return false; + } +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_ucontext *c; + struct mlx5_ib_dev *dev; + int user_vector; + int dev_eqn; + unsigned int irqn; + int err; + + if (uverbs_copy_from(&user_vector, attrs, + MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC)) + return -EFAULT; + + c = devx_ufile2uctx(file); + if (IS_ERR(c)) + return PTR_ERR(c); + dev = to_mdev(c->ibucontext.device); + + err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn); + if (err < 0) + return err; + + if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, + &dev_eqn, sizeof(dev_eqn))) + return -EFAULT; + + return 0; +} + +/* + *Security note: + * The hardware protection mechanism works like this: Each device object that + * is subject to UAR doorbells (QP/SQ/CQ) gets a UAR ID (called uar_page in + * the device specification manual) upon its creation. Then upon doorbell, + * hardware fetches the object context for which the doorbell was rang, and + * validates that the UAR through which the DB was rang matches the UAR ID + * of the object. + * If no match the doorbell is silently ignored by the hardware. Of course, + * the user cannot ring a doorbell on a UAR that was not mapped to it. + * Now in devx, as the devx kernel does not manipulate the QP/SQ/CQ command + * mailboxes (except tagging them with UID), we expose to the user its UAR + * ID, so it can embed it in these objects in the expected specification + * format. So the only thing the user can do is hurt itself by creating a + * QP/SQ/CQ with a UAR ID other than his, and then in this case other users + * may ring a doorbell on its objects. + * The consequence of that will be that another user can schedule a QP/SQ + * of the buggy user for execution (just insert it to the hardware schedule + * queue or arm its CQ for event generation), no further harm is expected. + */ +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_ucontext *c; + struct mlx5_ib_dev *dev; + u32 user_idx; + s32 dev_idx; + + c = devx_ufile2uctx(file); + if (IS_ERR(c)) + return PTR_ERR(c); + dev = to_mdev(c->ibucontext.device); + + if (uverbs_copy_from(&user_idx, attrs, + MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX)) + return -EFAULT; + + dev_idx = bfregn_to_uar_index(dev, &c->bfregi, user_idx, true); + if (dev_idx < 0) + return dev_idx; + + if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, + &dev_idx, sizeof(dev_idx))) + return -EFAULT; + + return 0; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_ucontext *c; + struct mlx5_ib_dev *dev; + void *cmd_in = uverbs_attr_get_alloced_ptr( + attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN); + int cmd_out_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT); + void *cmd_out; + int err; + + c = devx_ufile2uctx(file); + if (IS_ERR(c)) + return PTR_ERR(c); + dev = to_mdev(c->ibucontext.device); + + if (!c->devx_uid) + return -EPERM; + + /* Only white list of some general HCA commands are allowed for this method. */ + if (!devx_is_general_cmd(cmd_in)) + return -EINVAL; + + cmd_out = uverbs_zalloc(attrs, cmd_out_len); + if (IS_ERR(cmd_out)) + return PTR_ERR(cmd_out); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); + err = mlx5_cmd_exec(dev->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN), + cmd_out, cmd_out_len); + if (err) + return err; + + return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out, + cmd_out_len); +} + +static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, + u32 *dinlen, + u32 *obj_id) +{ + u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type); + u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid); + + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + *dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr); + + MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid); + + switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) { + case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type); + break; + + case MLX5_CMD_OP_CREATE_MKEY: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY); + break; + case MLX5_CMD_OP_CREATE_CQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ); + break; + case MLX5_CMD_OP_ALLOC_PD: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD); + break; + case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); + break; + case MLX5_CMD_OP_CREATE_RMP: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP); + break; + case MLX5_CMD_OP_CREATE_SQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ); + break; + case MLX5_CMD_OP_CREATE_RQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ); + break; + case MLX5_CMD_OP_CREATE_RQT: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT); + break; + case MLX5_CMD_OP_CREATE_TIR: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR); + break; + case MLX5_CMD_OP_CREATE_TIS: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS); + break; + case MLX5_CMD_OP_ALLOC_Q_COUNTER: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_Q_COUNTER); + break; + case MLX5_CMD_OP_CREATE_FLOW_TABLE: + *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in); + *obj_id = MLX5_GET(create_flow_table_out, out, table_id); + MLX5_SET(destroy_flow_table_in, din, other_vport, + MLX5_GET(create_flow_table_in, in, other_vport)); + MLX5_SET(destroy_flow_table_in, din, vport_number, + MLX5_GET(create_flow_table_in, in, vport_number)); + MLX5_SET(destroy_flow_table_in, din, table_type, + MLX5_GET(create_flow_table_in, in, table_type)); + MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_FLOW_TABLE); + break; + case MLX5_CMD_OP_CREATE_FLOW_GROUP: + *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in); + *obj_id = MLX5_GET(create_flow_group_out, out, group_id); + MLX5_SET(destroy_flow_group_in, din, other_vport, + MLX5_GET(create_flow_group_in, in, other_vport)); + MLX5_SET(destroy_flow_group_in, din, vport_number, + MLX5_GET(create_flow_group_in, in, vport_number)); + MLX5_SET(destroy_flow_group_in, din, table_type, + MLX5_GET(create_flow_group_in, in, table_type)); + MLX5_SET(destroy_flow_group_in, din, table_id, + MLX5_GET(create_flow_group_in, in, table_id)); + MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_FLOW_GROUP); + break; + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + *dinlen = MLX5_ST_SZ_BYTES(delete_fte_in); + *obj_id = MLX5_GET(set_fte_in, in, flow_index); + MLX5_SET(delete_fte_in, din, other_vport, + MLX5_GET(set_fte_in, in, other_vport)); + MLX5_SET(delete_fte_in, din, vport_number, + MLX5_GET(set_fte_in, in, vport_number)); + MLX5_SET(delete_fte_in, din, table_type, + MLX5_GET(set_fte_in, in, table_type)); + MLX5_SET(delete_fte_in, din, table_id, + MLX5_GET(set_fte_in, in, table_id)); + MLX5_SET(delete_fte_in, din, flow_index, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); + break; + case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_FLOW_COUNTER); + break; + case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_ENCAP_HEADER); + break; + case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); + break; + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + *dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in); + *obj_id = MLX5_GET(create_scheduling_element_out, out, + scheduling_element_id); + MLX5_SET(destroy_scheduling_element_in, din, + scheduling_hierarchy, + MLX5_GET(create_scheduling_element_in, in, + scheduling_hierarchy)); + MLX5_SET(destroy_scheduling_element_in, din, + scheduling_element_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT); + break; + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + *dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in); + *obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port); + MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT); + break; + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + *dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in); + *obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index); + MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); + break; + case MLX5_CMD_OP_CREATE_QP: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP); + break; + case MLX5_CMD_OP_CREATE_SRQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ); + break; + case MLX5_CMD_OP_CREATE_XRC_SRQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_XRC_SRQ); + break; + case MLX5_CMD_OP_CREATE_DCT: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT); + break; + case MLX5_CMD_OP_CREATE_XRQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ); + break; + case MLX5_CMD_OP_ATTACH_TO_MCG: + *dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in); + MLX5_SET(detach_from_mcg_in, din, qpn, + MLX5_GET(attach_to_mcg_in, in, qpn)); + memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid), + MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid), + MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid)); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG); + break; + case MLX5_CMD_OP_ALLOC_XRCD: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD); + break; + default: + /* The entry must match to one of the devx_is_obj_create_cmd */ + WARN_ON(true); + break; + } +} + +static int devx_obj_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + struct devx_obj *obj = uobject->object; + int ret; + + ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + kfree(obj); + return ret; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) +{ + void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN); + int cmd_out_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT); + void *cmd_out; + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE); + struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); + struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device); + struct devx_obj *obj; + int err; + + if (!c->devx_uid) + return -EPERM; + + if (!devx_is_obj_create_cmd(cmd_in)) + return -EINVAL; + + cmd_out = uverbs_zalloc(attrs, cmd_out_len); + if (IS_ERR(cmd_out)) + return PTR_ERR(cmd_out); + + obj = kzalloc(sizeof(struct devx_obj), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); + err = mlx5_cmd_exec(dev->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN), + cmd_out, cmd_out_len); + if (err) + goto obj_free; + + uobj->object = obj; + obj->mdev = dev->mdev; + devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen, &obj->obj_id); + WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32)); + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); + if (err) + goto obj_free; + + return 0; + +obj_free: + kfree(obj); + return err; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) +{ + void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN); + int cmd_out_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT); + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE); + struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); + struct devx_obj *obj = uobj->object; + void *cmd_out; + int err; + + if (!c->devx_uid) + return -EPERM; + + if (!devx_is_obj_modify_cmd(cmd_in)) + return -EINVAL; + + if (!devx_is_valid_obj_id(obj, cmd_in)) + return -EINVAL; + + cmd_out = uverbs_zalloc(attrs, cmd_out_len); + if (IS_ERR(cmd_out)) + return PTR_ERR(cmd_out); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); + err = mlx5_cmd_exec(obj->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN), + cmd_out, cmd_out_len); + if (err) + return err; + + return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, + cmd_out, cmd_out_len); +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) +{ + void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN); + int cmd_out_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT); + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE); + struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); + struct devx_obj *obj = uobj->object; + void *cmd_out; + int err; + + if (!c->devx_uid) + return -EPERM; + + if (!devx_is_obj_query_cmd(cmd_in)) + return -EINVAL; + + if (!devx_is_valid_obj_id(obj, cmd_in)) + return -EINVAL; + + cmd_out = uverbs_zalloc(attrs, cmd_out_len); + if (IS_ERR(cmd_out)) + return PTR_ERR(cmd_out); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); + err = mlx5_cmd_exec(obj->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN), + cmd_out, cmd_out_len); + if (err) + return err; + + return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, + cmd_out, cmd_out_len); +} + +static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, + struct uverbs_attr_bundle *attrs, + struct devx_umem *obj) +{ + u64 addr; + size_t size; + u32 access; + int npages; + int err; + u32 page_mask; + + if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) || + uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN)) + return -EFAULT; + + err = uverbs_get_flags32(&access, attrs, + MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, + IB_ACCESS_SUPPORTED); + if (err) + return err; + + err = ib_check_mr_access(access); + if (err) + return err; + + obj->umem = ib_umem_get(ucontext, addr, size, access, 0); + if (IS_ERR(obj->umem)) + return PTR_ERR(obj->umem); + + mlx5_ib_cont_pages(obj->umem, obj->umem->address, + MLX5_MKEY_PAGE_SHIFT_MASK, &npages, + &obj->page_shift, &obj->ncont, NULL); + + if (!npages) { + ib_umem_release(obj->umem); + return -EINVAL; + } + + page_mask = (1 << obj->page_shift) - 1; + obj->page_offset = obj->umem->address & page_mask; + + return 0; +} + +static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs, + struct devx_umem *obj, + struct devx_umem_reg_cmd *cmd) +{ + cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) + + (MLX5_ST_SZ_BYTES(mtt) * obj->ncont); + cmd->in = uverbs_zalloc(attrs, cmd->inlen); + return PTR_ERR_OR_ZERO(cmd->in); +} + +static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, + struct devx_umem *obj, + struct devx_umem_reg_cmd *cmd) +{ + void *umem; + __be64 *mtt; + + umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem); + mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt); + + MLX5_SET(general_obj_in_cmd_hdr, cmd->in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, cmd->in, obj_type, MLX5_OBJ_TYPE_UMEM); + MLX5_SET64(umem, umem, num_of_mtt, obj->ncont); + MLX5_SET(umem, umem, log_page_size, obj->page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(umem, umem, page_offset, obj->page_offset); + mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt, + (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) | + MLX5_IB_MTT_READ); +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) +{ + struct devx_umem_reg_cmd cmd; + struct devx_umem *obj; + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE); + u32 obj_id; + struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); + struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device); + int err; + + if (!c->devx_uid) + return -EPERM; + + obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + err = devx_umem_get(dev, &c->ibucontext, attrs, obj); + if (err) + goto err_obj_free; + + err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd); + if (err) + goto err_umem_release; + + devx_umem_reg_cmd_build(dev, obj, &cmd); + + MLX5_SET(general_obj_in_cmd_hdr, cmd.in, uid, c->devx_uid); + err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out, + sizeof(cmd.out)); + if (err) + goto err_umem_release; + + obj->mdev = dev->mdev; + uobj->object = obj; + devx_obj_build_destroy_cmd(cmd.in, cmd.out, obj->dinbox, &obj->dinlen, &obj_id); + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id, sizeof(obj_id)); + if (err) + goto err_umem_destroy; + + return 0; + +err_umem_destroy: + mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, cmd.out, sizeof(cmd.out)); +err_umem_release: + ib_umem_release(obj->umem); +err_obj_free: + kfree(obj); + return err; +} + +static int devx_umem_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct devx_umem *obj = uobject->object; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + int err; + + err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); + if (ib_is_destroy_retryable(err, why, uobject)) + return err; + + ib_umem_release(obj->umem); + kfree(obj); + return 0; +} + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_UMEM_REG, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE, + MLX5_IB_OBJECT_DEVX_UMEM, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, + enum ib_access_flags), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_DEVX_UMEM_DEREG, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE, + MLX5_IB_OBJECT_DEVX_UMEM, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_QUERY_EQN, + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_QUERY_UAR, + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OTHER, + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OTHER_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OBJ_CREATE, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_DEVX_OBJ_DESTROY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OBJ_MODIFY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_WRITE, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OBJ_QUERY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_MANDATORY)); + +DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX, + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN)); + +DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY)); + +DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM, + UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG)); + +DECLARE_UVERBS_OBJECT_TREE(devx_objects, + &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX), + &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ), + &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM)); + +const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void) +{ + return &devx_objects; +} diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c new file mode 100644 index 000000000000..1a29f47f836e --- /dev/null +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + */ + +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_verbs.h> +#include <rdma/uverbs_types.h> +#include <rdma/uverbs_ioctl.h> +#include <rdma/mlx5_user_ioctl_cmds.h> +#include <rdma/ib_umem.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/fs.h> +#include "mlx5_ib.h" + +#define UVERBS_MODULE_NAME mlx5_ib +#include <rdma/uverbs_named_ioctl.h> + +static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { + [MLX5_IB_FLOW_TYPE_NORMAL] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + .u.ptr = { + .len = sizeof(u16), /* data is priority */ + .min_len = sizeof(u16), + } + }, + [MLX5_IB_FLOW_TYPE_SNIFFER] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_NO_DATA(), + }, + [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_NO_DATA(), + }, + [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_NO_DATA(), + }, +}; + +static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_flow_handler *flow_handler; + struct mlx5_ib_flow_matcher *fs_matcher; + void *devx_obj; + int dest_id, dest_type; + void *cmd_in; + int inlen; + bool dest_devx, dest_qp; + struct ib_qp *qp = NULL; + struct ib_uobject *uobj = + uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); + struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); + + if (!capable(CAP_NET_RAW)) + return -EPERM; + + dest_devx = + uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); + dest_qp = uverbs_attr_is_valid(attrs, + MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); + + if ((dest_devx && dest_qp) || (!dest_devx && !dest_qp)) + return -EINVAL; + + if (dest_devx) { + devx_obj = uverbs_attr_get_obj( + attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); + if (IS_ERR(devx_obj)) + return PTR_ERR(devx_obj); + + /* Verify that the given DEVX object is a flow + * steering destination. + */ + if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type)) + return -EINVAL; + } else { + struct mlx5_ib_qp *mqp; + + qp = uverbs_attr_get_obj(attrs, + MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); + if (IS_ERR(qp)) + return PTR_ERR(qp); + + if (qp->qp_type != IB_QPT_RAW_PACKET) + return -EINVAL; + + mqp = to_mqp(qp); + if (mqp->flags & MLX5_IB_QP_RSS) + dest_id = mqp->rss_qp.tirn; + else + dest_id = mqp->raw_packet_qp.rq.tirn; + dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; + } + + if (dev->rep) + return -ENOTSUPP; + + cmd_in = uverbs_attr_get_alloced_ptr( + attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); + inlen = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); + fs_matcher = uverbs_attr_get_obj(attrs, + MLX5_IB_ATTR_CREATE_FLOW_MATCHER); + flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, cmd_in, inlen, + dest_id, dest_type); + if (IS_ERR(flow_handler)) + return PTR_ERR(flow_handler); + + ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev); + + return 0; +} + +static int flow_matcher_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct mlx5_ib_flow_matcher *obj = uobject->object; + int ret; + + ret = ib_destroy_usecnt(&obj->usecnt, why, uobject); + if (ret) + return ret; + + kfree(obj); + return 0; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); + struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); + struct mlx5_ib_flow_matcher *obj; + int err; + + obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + obj->mask_len = uverbs_attr_get_len( + attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); + err = uverbs_copy_from(&obj->matcher_mask, + attrs, + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); + if (err) + goto end; + + obj->flow_type = uverbs_attr_get_enum_id( + attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE); + + if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) { + err = uverbs_copy_from(&obj->priority, + attrs, + MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE); + if (err) + goto end; + } + + err = uverbs_copy_from(&obj->match_criteria_enable, + attrs, + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA); + if (err) + goto end; + + uobj->object = obj; + obj->mdev = dev->mdev; + atomic_set(&obj->usecnt, 0); + return 0; + +end: + kfree(obj); + return err; +} + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_CREATE_FLOW, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, + UVERBS_OBJECT_FLOW, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE, + UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER, + MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP, + UVERBS_OBJECT_QP, + UVERBS_ACCESS_READ), + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_READ)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_DESTROY_FLOW, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, + UVERBS_OBJECT_FLOW, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +ADD_UVERBS_METHODS(mlx5_ib_fs, + UVERBS_OBJECT_FLOW, + &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW), + &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_FLOW_MATCHER_CREATE, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE, + MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK, + UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)), + UA_MANDATORY), + UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, + mlx5_ib_flow_type, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, + UVERBS_ATTR_TYPE(u8), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_FLOW_MATCHER_DESTROY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE, + MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE), + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY)); + +DECLARE_UVERBS_OBJECT_TREE(flow_objects, + &UVERBS_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER)); + +int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root) +{ + int i = 0; + + root[i++] = &flow_objects; + root[i++] = &mlx5_ib_fs; + + return i; +} diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 79e6309460dc..4950df3f71b6 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -477,8 +477,8 @@ static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr) return gsi->tx_qps[qp_index]; } -int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); struct ib_qp *tx_qp; @@ -522,8 +522,8 @@ err: return ret; } -int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b3ba9a222550..c414f3809e5c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -419,8 +419,8 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, translate_eth_proto_oper(eth_prot_oper, &props->active_speed, &props->active_width); - props->port_cap_flags |= IB_PORT_CM_SUP; - props->port_cap_flags |= IB_PORT_IP_BASED_GIDS; + props->port_cap_flags |= IB_PORT_CM_SUP; + props->ip_gids = true; props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, roce_address_table_size); @@ -510,12 +510,11 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, vlan_id, port_num); } -static int mlx5_ib_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, +static int mlx5_ib_add_gid(const struct ib_gid_attr *attr, __always_unused void **context) { return set_roce_addr(to_mdev(attr->device), attr->port_num, - attr->index, gid, attr); + attr->index, &attr->gid, attr); } static int mlx5_ib_del_gid(const struct ib_gid_attr *attr, @@ -525,41 +524,15 @@ static int mlx5_ib_del_gid(const struct ib_gid_attr *attr, attr->index, NULL, NULL); } -__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, - int index) +__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, + const struct ib_gid_attr *attr) { - struct ib_gid_attr attr; - union ib_gid gid; - - if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr)) - return 0; - - dev_put(attr.ndev); - - if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) + if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) return 0; return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port)); } -int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num, - int index, enum ib_gid_type *gid_type) -{ - struct ib_gid_attr attr; - union ib_gid gid; - int ret; - - ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr); - if (ret) - return ret; - - dev_put(attr.ndev); - - *gid_type = attr.gid_type; - - return 0; -} - static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) @@ -915,7 +888,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) - sizeof(struct mlx5_wqe_raddr_seg)) / sizeof(struct mlx5_wqe_data_seg); - props->max_sge = min(max_rq_sg, max_sq_sg); + props->max_send_sge = max_sq_sg; + props->max_recv_sge = max_rq_sg; props->max_sge_rd = MLX5_MAX_SGE_RD; props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; @@ -1246,7 +1220,6 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, props->qkey_viol_cntr = rep->qkey_violation_counter; props->subnet_timeout = rep->subnet_timeout; props->init_type_reply = rep->init_type_reply; - props->grh_required = rep->grh_required; err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port); if (err) @@ -1585,31 +1558,26 @@ error: return err; } -static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) +static void deallocate_uars(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context) { struct mlx5_bfreg_info *bfregi; - int err; int i; bfregi = &context->bfregi; - for (i = 0; i < bfregi->num_sys_pages; i++) { + for (i = 0; i < bfregi->num_sys_pages; i++) if (i < bfregi->num_static_sys_pages || - bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX) { - err = mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]); - if (err) { - mlx5_ib_warn(dev, "failed to free uar %d, err=%d\n", i, err); - return err; - } - } - } - - return 0; + bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX) + mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]); } static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn) { int err; + if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) + return 0; + err = mlx5_core_alloc_transport_domain(dev->mdev, tdn); if (err) return err; @@ -1631,6 +1599,9 @@ static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn) static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn) { + if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) + return; + mlx5_core_dealloc_transport_domain(dev->mdev, tdn); if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) || @@ -1660,6 +1631,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, int err; size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2, max_cqe_version); + u32 dump_fill_mkey; bool lib_uar_4k; if (!dev->ib_active) @@ -1676,8 +1648,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (err) return ERR_PTR(err); - if (req.flags) - return ERR_PTR(-EINVAL); + if (req.flags & ~MLX5_IB_ALLOC_UCTX_DEVX) + return ERR_PTR(-EOPNOTSUPP); if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2) return ERR_PTR(-EOPNOTSUPP); @@ -1755,10 +1727,26 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; #endif - if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) { - err = mlx5_ib_alloc_transport_domain(dev, &context->tdn); + err = mlx5_ib_alloc_transport_domain(dev, &context->tdn); + if (err) + goto out_uars; + + if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { + /* Block DEVX on Infiniband as of SELinux */ + if (mlx5_ib_port_link_layer(ibdev, 1) != IB_LINK_LAYER_ETHERNET) { + err = -EPERM; + goto out_td; + } + + err = mlx5_ib_devx_create(dev, context); if (err) - goto out_uars; + goto out_td; + } + + if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { + err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey); + if (err) + goto out_mdev; } INIT_LIST_HEAD(&context->vma_private_list); @@ -1819,9 +1807,18 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, resp.response_length += sizeof(resp.num_dyn_bfregs); } + if (field_avail(typeof(resp), dump_fill_mkey, udata->outlen)) { + if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { + resp.dump_fill_mkey = dump_fill_mkey; + resp.comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY; + } + resp.response_length += sizeof(resp.dump_fill_mkey); + } + err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) - goto out_td; + goto out_mdev; bfregi->ver = ver; bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs; @@ -1831,9 +1828,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, return &context->ibucontext; +out_mdev: + if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) + mlx5_ib_devx_destroy(dev, context); out_td: - if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) - mlx5_ib_dealloc_transport_domain(dev, context->tdn); + mlx5_ib_dealloc_transport_domain(dev, context->tdn); out_uars: deallocate_uars(dev, context); @@ -1856,9 +1855,11 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); struct mlx5_bfreg_info *bfregi; + if (context->devx_uid) + mlx5_ib_devx_destroy(dev, context); + bfregi = &context->bfregi; - if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) - mlx5_ib_dealloc_transport_domain(dev, context->tdn); + mlx5_ib_dealloc_transport_domain(dev, context->tdn); deallocate_uars(dev, context); kfree(bfregi->sys_pages); @@ -2040,7 +2041,7 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, struct mlx5_bfreg_info *bfregi = &context->bfregi; int err; unsigned long idx; - phys_addr_t pfn, pa; + phys_addr_t pfn; pgprot_t prot; u32 bfreg_dyn_idx = 0; u32 uar_index; @@ -2131,8 +2132,6 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, goto err; } - pa = pfn << PAGE_SHIFT; - err = mlx5_ib_set_vma_data(vma, context); if (err) goto err; @@ -2699,7 +2698,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, IPPROTO_GRE); MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol, - 0xffff); + ntohs(ib_spec->gre.mask.protocol)); MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol, ntohs(ib_spec->gre.val.protocol)); @@ -2979,11 +2978,11 @@ static void counters_clear_description(struct ib_counters *counters) static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) { - struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device); struct mlx5_ib_flow_handler *handler = container_of(flow_id, struct mlx5_ib_flow_handler, ibflow); struct mlx5_ib_flow_handler *iter, *tmp; + struct mlx5_ib_dev *dev = handler->dev; mutex_lock(&dev->flow_db->lock); @@ -3001,6 +3000,8 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) counters_clear_description(handler->ibcounters); mutex_unlock(&dev->flow_db->lock); + if (handler->flow_matcher) + atomic_dec(&handler->flow_matcher->usecnt); kfree(handler); return 0; @@ -3021,6 +3022,26 @@ enum flow_table_type { #define MLX5_FS_MAX_TYPES 6 #define MLX5_FS_MAX_ENTRIES BIT(16) + +static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns, + struct mlx5_ib_flow_prio *prio, + int priority, + int num_entries, int num_groups) +{ + struct mlx5_flow_table *ft; + + ft = mlx5_create_auto_grouped_flow_table(ns, priority, + num_entries, + num_groups, + 0, 0); + if (IS_ERR(ft)) + return ERR_CAST(ft); + + prio->flow_table = ft; + prio->refcount = 0; + return prio; +} + static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, struct ib_flow_attr *flow_attr, enum flow_table_type ft_type) @@ -3033,7 +3054,6 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, int num_entries; int num_groups; int priority; - int err = 0; max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size)); @@ -3083,21 +3103,10 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, return ERR_PTR(-ENOMEM); ft = prio->flow_table; - if (!ft) { - ft = mlx5_create_auto_grouped_flow_table(ns, priority, - num_entries, - num_groups, - 0, 0); - - if (!IS_ERR(ft)) { - prio->refcount = 0; - prio->flow_table = ft; - } else { - err = PTR_ERR(ft); - } - } + if (!ft) + return _get_prio(ns, prio, priority, num_entries, num_groups); - return err ? ERR_PTR(err) : prio; + return prio; } static void set_underlay_qp(struct mlx5_ib_dev *dev, @@ -3356,6 +3365,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, ft_prio->refcount++; handler->prio = ft_prio; + handler->dev = dev; ft_prio->flow_table = ft; free: @@ -3648,6 +3658,189 @@ free_ucmd: return ERR_PTR(err); } +static struct mlx5_ib_flow_prio *_get_flow_table(struct mlx5_ib_dev *dev, + int priority, bool mcast) +{ + int max_table_size; + struct mlx5_flow_namespace *ns = NULL; + struct mlx5_ib_flow_prio *prio; + + max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + log_max_ft_size)); + if (max_table_size < MLX5_FS_MAX_ENTRIES) + return ERR_PTR(-ENOMEM); + + if (mcast) + priority = MLX5_IB_FLOW_MCAST_PRIO; + else + priority = ib_prio_to_core_prio(priority, false); + + ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS); + if (!ns) + return ERR_PTR(-ENOTSUPP); + + prio = &dev->flow_db->prios[priority]; + + if (prio->flow_table) + return prio; + + return _get_prio(ns, prio, priority, MLX5_FS_MAX_ENTRIES, + MLX5_FS_MAX_TYPES); +} + +static struct mlx5_ib_flow_handler * +_create_raw_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + struct mlx5_flow_destination *dst, + struct mlx5_ib_flow_matcher *fs_matcher, + void *cmd_in, int inlen) +{ + struct mlx5_ib_flow_handler *handler; + struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; + struct mlx5_flow_spec *spec; + struct mlx5_flow_table *ft = ft_prio->flow_table; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + handler = kzalloc(sizeof(*handler), GFP_KERNEL); + if (!handler || !spec) { + err = -ENOMEM; + goto free; + } + + INIT_LIST_HEAD(&handler->list); + + memcpy(spec->match_value, cmd_in, inlen); + memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params, + fs_matcher->mask_len); + spec->match_criteria_enable = fs_matcher->match_criteria_enable; + + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + handler->rule = mlx5_add_flow_rules(ft, spec, + &flow_act, dst, 1); + + if (IS_ERR(handler->rule)) { + err = PTR_ERR(handler->rule); + goto free; + } + + ft_prio->refcount++; + handler->prio = ft_prio; + handler->dev = dev; + ft_prio->flow_table = ft; + +free: + if (err) + kfree(handler); + kvfree(spec); + return err ? ERR_PTR(err) : handler; +} + +static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher, + void *match_v) +{ + void *match_c; + void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4; + void *dmac, *dmac_mask; + void *ipv4, *ipv4_mask; + + if (!(fs_matcher->match_criteria_enable & + (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT))) + return false; + + match_c = fs_matcher->matcher_mask.match_params; + match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + + dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, + dmac_47_16); + dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, + dmac_47_16); + + if (is_multicast_ether_addr(dmac) && + is_multicast_ether_addr(dmac_mask)) + return true; + + ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + + ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + + if (ipv4_is_multicast(*(__be32 *)(ipv4)) && + ipv4_is_multicast(*(__be32 *)(ipv4_mask))) + return true; + + return false; +} + +struct mlx5_ib_flow_handler * +mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_matcher *fs_matcher, + void *cmd_in, int inlen, int dest_id, + int dest_type) +{ + struct mlx5_flow_destination *dst; + struct mlx5_ib_flow_prio *ft_prio; + int priority = fs_matcher->priority; + struct mlx5_ib_flow_handler *handler; + bool mcast; + int err; + + if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL) + return ERR_PTR(-EOPNOTSUPP); + + if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO) + return ERR_PTR(-ENOMEM); + + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) + return ERR_PTR(-ENOMEM); + + mcast = raw_fs_is_multicast(fs_matcher, cmd_in); + mutex_lock(&dev->flow_db->lock); + + ft_prio = _get_flow_table(dev, priority, mcast); + if (IS_ERR(ft_prio)) { + err = PTR_ERR(ft_prio); + goto unlock; + } + + if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { + dst->type = dest_type; + dst->tir_num = dest_id; + } else { + dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; + dst->ft_num = dest_id; + } + + handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, cmd_in, + inlen); + + if (IS_ERR(handler)) { + err = PTR_ERR(handler); + goto destroy_ft; + } + + mutex_unlock(&dev->flow_db->lock); + atomic_inc(&fs_matcher->usecnt); + handler->flow_matcher = fs_matcher; + + kfree(dst); + + return handler; + +destroy_ft: + put_flow_table(dev, ft_prio, false); +unlock: + mutex_unlock(&dev->flow_db->lock); + kfree(dst); + + return ERR_PTR(err); +} + static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags) { u32 flags = 0; @@ -3672,12 +3865,11 @@ mlx5_ib_create_flow_action_esp(struct ib_device *device, u64 flags; int err = 0; - if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&action_flags, attrs, - MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS))) - return ERR_PTR(-EFAULT); - - if (action_flags >= (MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1)) - return ERR_PTR(-EOPNOTSUPP); + err = uverbs_get_flags64( + &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, + ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1)); + if (err) + return ERR_PTR(err); flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags); @@ -4466,7 +4658,8 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr) cancel_work_sync(&devr->ports[port].pkey_change_work); } -static u32 get_core_cap_flags(struct ib_device *ibdev) +static u32 get_core_cap_flags(struct ib_device *ibdev, + struct mlx5_hca_vport_context *rep) { struct mlx5_ib_dev *dev = to_mdev(ibdev); enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1); @@ -4475,11 +4668,14 @@ static u32 get_core_cap_flags(struct ib_device *ibdev) bool raw_support = !mlx5_core_mp_enabled(dev->mdev); u32 ret = 0; + if (rep->grh_required) + ret |= RDMA_CORE_CAP_IB_GRH_REQUIRED; + if (ll == IB_LINK_LAYER_INFINIBAND) - return RDMA_CORE_PORT_IBA_IB; + return ret | RDMA_CORE_PORT_IBA_IB; if (raw_support) - ret = RDMA_CORE_PORT_RAW_PACKET; + ret |= RDMA_CORE_PORT_RAW_PACKET; if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP)) return ret; @@ -4502,17 +4698,23 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_attr attr; struct mlx5_ib_dev *dev = to_mdev(ibdev); enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num); + struct mlx5_hca_vport_context rep = {0}; int err; - immutable->core_cap_flags = get_core_cap_flags(ibdev); - err = ib_query_port(ibdev, port_num, &attr); if (err) return err; + if (ll == IB_LINK_LAYER_INFINIBAND) { + err = mlx5_query_hca_vport_context(dev->mdev, 0, port_num, 0, + &rep); + if (err) + return err; + } + immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = get_core_cap_flags(ibdev); + immutable->core_cap_flags = get_core_cap_flags(ibdev, &rep); if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce)) immutable->max_mad_size = IB_MGMT_MAD_SIZE; @@ -4610,7 +4812,7 @@ static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) } } -static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num) +static int mlx5_enable_eth(struct mlx5_ib_dev *dev) { int err; @@ -4689,12 +4891,21 @@ static const struct mlx5_ib_counter extended_err_cnts[] = { INIT_Q_COUNTER(req_cqe_flush_error), }; +#define INIT_EXT_PPCNT_COUNTER(_name) \ + { .name = #_name, .offset = \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_extended_cntrs_grp_data_layout._name##_high)} + +static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { + INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), +}; + static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) { int i; for (i = 0; i < dev->num_ports; i++) { - if (dev->port[i].cnts.set_id) + if (dev->port[i].cnts.set_id_valid) mlx5_core_dealloc_q_counter(dev->mdev, dev->port[i].cnts.set_id); kfree(dev->port[i].cnts.names); @@ -4724,7 +4935,10 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, cnts->num_cong_counters = ARRAY_SIZE(cong_cnts); num_counters += ARRAY_SIZE(cong_cnts); } - + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { + cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); + num_counters += ARRAY_SIZE(ext_ppcnt_cnts); + } cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL); if (!cnts->names) return -ENOMEM; @@ -4781,6 +4995,13 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, offsets[j] = cong_cnts[i].offset; } } + + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { + for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) { + names[j] = ext_ppcnt_cnts[i].name; + offsets[j] = ext_ppcnt_cnts[i].offset; + } + } } static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) @@ -4826,7 +5047,8 @@ static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, return rdma_alloc_hw_stats_struct(port->cnts.names, port->cnts.num_q_counters + - port->cnts.num_cong_counters, + port->cnts.num_cong_counters + + port->cnts.num_ext_ppcnt_counters, RDMA_HW_STATS_DEFAULT_LIFESPAN); } @@ -4859,6 +5081,34 @@ free: return ret; } +static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev, + struct mlx5_ib_port *port, + struct rdma_hw_stats *stats) +{ + int offset = port->cnts.num_q_counters + port->cnts.num_cong_counters; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + int ret, i; + void *out; + + out = kvzalloc(sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + ret = mlx5_cmd_query_ext_ppcnt_counters(dev->mdev, out); + if (ret) + goto free; + + for (i = 0; i < port->cnts.num_ext_ppcnt_counters; i++) { + stats->value[i + offset] = + be64_to_cpup((__be64 *)(out + + port->cnts.offsets[i + offset])); + } + +free: + kvfree(out); + return ret; +} + static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, u8 port_num, int index) @@ -4872,13 +5122,21 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, if (!stats) return -EINVAL; - num_counters = port->cnts.num_q_counters + port->cnts.num_cong_counters; + num_counters = port->cnts.num_q_counters + + port->cnts.num_cong_counters + + port->cnts.num_ext_ppcnt_counters; /* q_counters are per IB device, query the master mdev */ ret = mlx5_ib_query_q_counters(dev->mdev, port, stats); if (ret) return ret; + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { + ret = mlx5_ib_query_ext_ppcnt_counters(dev, port, stats); + if (ret) + return ret; + } + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num); @@ -4905,11 +5163,6 @@ done: return num_counters; } -static void mlx5_ib_free_rdma_netdev(struct net_device *netdev) -{ - return mlx5_rdma_netdev_free(netdev); -} - static struct net_device* mlx5_ib_alloc_rdma_netdev(struct ib_device *hca, u8 port_num, @@ -4919,17 +5172,12 @@ mlx5_ib_alloc_rdma_netdev(struct ib_device *hca, void (*setup)(struct net_device *)) { struct net_device *netdev; - struct rdma_netdev *rn; if (type != RDMA_NETDEV_IPOIB) return ERR_PTR(-EOPNOTSUPP); netdev = mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca, name, setup); - if (likely(!IS_ERR_OR_NULL(netdev))) { - rn = netdev_priv(netdev); - rn->free_rdma_netdev = mlx5_ib_free_rdma_netdev; - } return netdev; } @@ -5127,8 +5375,8 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, spin_lock(&ibdev->port[port_num].mp.mpi_lock); if (ibdev->port[port_num].mp.mpi) { - mlx5_ib_warn(ibdev, "port %d already affiliated.\n", - port_num + 1); + mlx5_ib_dbg(ibdev, "port %d already affiliated.\n", + port_num + 1); spin_unlock(&ibdev->port[port_num].mp.mpi_lock); return false; } @@ -5263,45 +5511,47 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev) mlx5_nic_vport_disable_roce(dev->mdev); } -ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_dm, UVERBS_OBJECT_DM, - UVERBS_METHOD_DM_ALLOC, - &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, - UVERBS_ATTR_TYPE(u16), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_flow_action, UVERBS_OBJECT_FLOW_ACTION, - UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, - &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); +ADD_UVERBS_ATTRIBUTES_SIMPLE( + mlx5_ib_dm, + UVERBS_OBJECT_DM, + UVERBS_METHOD_DM_ALLOC, + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, + UVERBS_ATTR_TYPE(u16), + UA_MANDATORY)); + +ADD_UVERBS_ATTRIBUTES_SIMPLE( + mlx5_ib_flow_action, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, + UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, + enum mlx5_ib_uapi_flow_action_flags)); -#define NUM_TREES 2 static int populate_specs_root(struct mlx5_ib_dev *dev) { - const struct uverbs_object_tree_def *default_root[NUM_TREES + 1] = { - uverbs_default_get_objects()}; - size_t num_trees = 1; + const struct uverbs_object_tree_def **trees = dev->driver_trees; + size_t num_trees = 0; - if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE && - !WARN_ON(num_trees >= ARRAY_SIZE(default_root))) - default_root[num_trees++] = &mlx5_ib_flow_action; + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_DEVICE) + trees[num_trees++] = &mlx5_ib_flow_action; - if (MLX5_CAP_DEV_MEM(dev->mdev, memic) && - !WARN_ON(num_trees >= ARRAY_SIZE(default_root))) - default_root[num_trees++] = &mlx5_ib_dm; + if (MLX5_CAP_DEV_MEM(dev->mdev, memic)) + trees[num_trees++] = &mlx5_ib_dm; - dev->ib_dev.specs_root = - uverbs_alloc_spec_tree(num_trees, default_root); + if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) + trees[num_trees++] = mlx5_ib_get_devx_tree(); - return PTR_ERR_OR_ZERO(dev->ib_dev.specs_root); -} + num_trees += mlx5_ib_get_flow_trees(trees + num_trees); -static void depopulate_specs_root(struct mlx5_ib_dev *dev) -{ - uverbs_free_spec_tree(dev->ib_dev.specs_root); + WARN_ON(num_trees >= ARRAY_SIZE(dev->driver_trees)); + trees[num_trees] = NULL; + dev->ib_dev.driver_specs = trees; + + return 0; } static int mlx5_ib_read_counters(struct ib_counters *counters, @@ -5552,6 +5802,8 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) dev->ib_dev.modify_qp = mlx5_ib_modify_qp; dev->ib_dev.query_qp = mlx5_ib_query_qp; dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp; + dev->ib_dev.drain_sq = mlx5_ib_drain_sq; + dev->ib_dev.drain_rq = mlx5_ib_drain_rq; dev->ib_dev.post_send = mlx5_ib_post_send; dev->ib_dev.post_recv = mlx5_ib_post_recv; dev->ib_dev.create_cq = mlx5_ib_create_cq; @@ -5649,9 +5901,9 @@ int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev) return 0; } -static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev, - u8 port_num) +static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) { + u8 port_num; int i; for (i = 0; i < dev->num_ports; i++) { @@ -5674,6 +5926,8 @@ static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev, (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + port_num = mlx5_core_native_port_num(dev->mdev) - 1; + return mlx5_add_netdev_notifier(dev, port_num); } @@ -5690,14 +5944,12 @@ int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev) enum rdma_link_layer ll; int port_type_cap; int err = 0; - u8 port_num; - port_num = mlx5_core_native_port_num(dev->mdev) - 1; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) - err = mlx5_ib_stage_common_roce_init(dev, port_num); + err = mlx5_ib_stage_common_roce_init(dev); return err; } @@ -5712,19 +5964,17 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; - u8 port_num; int err; - port_num = mlx5_core_native_port_num(dev->mdev) - 1; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - err = mlx5_ib_stage_common_roce_init(dev, port_num); + err = mlx5_ib_stage_common_roce_init(dev); if (err) return err; - err = mlx5_enable_eth(dev, port_num); + err = mlx5_enable_eth(dev); if (err) goto cleanup; } @@ -5741,9 +5991,7 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; - u8 port_num; - port_num = mlx5_core_native_port_num(dev->mdev) - 1; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); @@ -5842,11 +6090,6 @@ int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) return ib_register_device(&dev->ib_dev, NULL); } -static void mlx5_ib_stage_depopulate_specs(struct mlx5_ib_dev *dev) -{ - depopulate_specs_root(dev); -} - void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) { destroy_umrc_res(dev); @@ -5915,8 +6158,6 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev, ib_dealloc_device((struct ib_device *)dev); } -static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num); - void *__mlx5_ib_add(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile) { @@ -5983,7 +6224,7 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_ib_stage_pre_ib_reg_umr_cleanup), STAGE_CREATE(MLX5_IB_STAGE_SPECS, mlx5_ib_stage_populate_specs, - mlx5_ib_stage_depopulate_specs), + NULL), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), @@ -6031,7 +6272,7 @@ static const struct mlx5_ib_profile nic_rep_profile = { mlx5_ib_stage_pre_ib_reg_umr_cleanup), STAGE_CREATE(MLX5_IB_STAGE_SPECS, mlx5_ib_stage_populate_specs, - mlx5_ib_stage_depopulate_specs), + NULL), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), @@ -6046,7 +6287,7 @@ static const struct mlx5_ib_profile nic_rep_profile = { mlx5_ib_stage_rep_reg_cleanup), }; -static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num) +static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev) { struct mlx5_ib_multiport_info *mpi; struct mlx5_ib_dev *dev; @@ -6080,8 +6321,6 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num) if (!bound) { list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list); dev_dbg(&mdev->pdev->dev, "no suitable IB device found to bind to, added to unaffiliated list.\n"); - } else { - mlx5_ib_dbg(dev, "bound port %u\n", port_num + 1); } mutex_unlock(&mlx5_ib_multiport_mutex); @@ -6099,11 +6338,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); - if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) { - u8 port_num = mlx5_core_native_port_num(mdev) - 1; - - return mlx5_ib_add_slave_port(mdev, port_num); - } + if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) + return mlx5_ib_add_slave_port(mdev); dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); if (!dev) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d89c8fe626f6..320d4dfe8c2f 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -46,6 +46,7 @@ #include <rdma/ib_user_verbs.h> #include <rdma/mlx5-abi.h> #include <rdma/uverbs_ioctl.h> +#include <rdma/mlx5_user_ioctl_cmds.h> #define mlx5_ib_dbg(dev, format, arg...) \ pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ @@ -78,12 +79,6 @@ enum { MLX5_REQ_SCAT_DATA64_CQE = 0x22, }; -enum mlx5_ib_latency_class { - MLX5_IB_LATENCY_CLASS_LOW, - MLX5_IB_LATENCY_CLASS_MEDIUM, - MLX5_IB_LATENCY_CLASS_HIGH, -}; - enum mlx5_ib_mad_ifc_flags { MLX5_MAD_IFC_IGNORE_MKEY = 1, MLX5_MAD_IFC_IGNORE_BKEY = 2, @@ -143,6 +138,7 @@ struct mlx5_ib_ucontext { u64 lib_caps; DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES); + u16 devx_uid; }; static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) @@ -176,6 +172,18 @@ struct mlx5_ib_flow_handler { struct mlx5_ib_flow_prio *prio; struct mlx5_flow_handle *rule; struct ib_counters *ibcounters; + struct mlx5_ib_dev *dev; + struct mlx5_ib_flow_matcher *flow_matcher; +}; + +struct mlx5_ib_flow_matcher { + struct mlx5_ib_match_params matcher_mask; + int mask_len; + enum mlx5_ib_flow_type flow_type; + u16 priority; + struct mlx5_core_dev *mdev; + atomic_t usecnt; + u8 match_criteria_enable; }; struct mlx5_ib_flow_db { @@ -461,7 +469,7 @@ struct mlx5_umr_wr { u32 mkey; }; -static inline struct mlx5_umr_wr *umr_wr(struct ib_send_wr *wr) +static inline const struct mlx5_umr_wr *umr_wr(const struct ib_send_wr *wr) { return container_of(wr, struct mlx5_umr_wr, wr); } @@ -665,6 +673,7 @@ struct mlx5_ib_counters { size_t *offsets; u32 num_q_counters; u32 num_cong_counters; + u32 num_ext_ppcnt_counters; u16 set_id; bool set_id_valid; }; @@ -851,6 +860,7 @@ to_mcounters(struct ib_counters *ibcntrs) struct mlx5_ib_dev { struct ib_device ib_dev; + const struct uverbs_object_tree_def *driver_trees[6]; struct mlx5_core_dev *mdev; struct mlx5_roce roce[MLX5_MAX_PORTS]; int num_ports; @@ -1004,8 +1014,8 @@ int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr); int mlx5_ib_destroy_srq(struct ib_srq *srq); -int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); @@ -1014,10 +1024,12 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int mlx5_ib_destroy_qp(struct ib_qp *qp); -int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +void mlx5_ib_drain_sq(struct ib_qp *qp); +void mlx5_ib_drain_rq(struct ib_qp *qp); +int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n); int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, void *buffer, u32 length, @@ -1183,10 +1195,8 @@ int mlx5_ib_get_vf_stats(struct ib_device *device, int vf, int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, int type); -__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, - int index); -int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num, - int index, enum ib_gid_type *gid_type); +__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, + const struct ib_gid_attr *attr); void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); @@ -1200,10 +1210,10 @@ int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); -int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi); int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc); @@ -1217,6 +1227,36 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev, void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context); +void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context); +const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void); +struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( + struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, + void *cmd_in, int inlen, int dest_id, int dest_type); +bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); +int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root); +#else +static inline int +mlx5_ib_devx_create(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context) { return -EOPNOTSUPP; }; +static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context) {} +static inline const struct uverbs_object_tree_def * +mlx5_ib_get_devx_tree(void) { return NULL; } +static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, + int *dest_type) +{ + return false; +} +static inline int +mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root) +{ + return 0; +} +#endif static inline void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; @@ -1318,4 +1358,7 @@ static inline int get_num_static_uars(struct mlx5_ib_dev *dev, unsigned long mlx5_ib_get_xlt_emergency_page(void); void mlx5_ib_put_xlt_emergency_page(void); +int bfregn_to_uar_index(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi, u32 bfregn, + bool dyn_bfreg); #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 90a9c461cedc..9fb1d9cb9401 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -271,16 +271,16 @@ static ssize_t size_write(struct file *filp, const char __user *buf, { struct mlx5_cache_ent *ent = filp->private_data; struct mlx5_ib_dev *dev = ent->dev; - char lbuf[20]; + char lbuf[20] = {0}; u32 var; int err; int c; - if (copy_from_user(lbuf, buf, sizeof(lbuf))) + count = min(count, sizeof(lbuf) - 1); + if (copy_from_user(lbuf, buf, count)) return -EFAULT; c = order2idx(dev, ent->order); - lbuf[sizeof(lbuf) - 1] = 0; if (sscanf(lbuf, "%u", &var) != 1) return -EINVAL; @@ -310,19 +310,11 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count, char lbuf[20]; int err; - if (*pos) - return 0; - err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); if (err < 0) return err; - if (copy_to_user(buf, lbuf, err)) - return -EFAULT; - - *pos += err; - - return err; + return simple_read_from_buffer(buf, count, pos, lbuf, err); } static const struct file_operations size_fops = { @@ -337,16 +329,16 @@ static ssize_t limit_write(struct file *filp, const char __user *buf, { struct mlx5_cache_ent *ent = filp->private_data; struct mlx5_ib_dev *dev = ent->dev; - char lbuf[20]; + char lbuf[20] = {0}; u32 var; int err; int c; - if (copy_from_user(lbuf, buf, sizeof(lbuf))) + count = min(count, sizeof(lbuf) - 1); + if (copy_from_user(lbuf, buf, count)) return -EFAULT; c = order2idx(dev, ent->order); - lbuf[sizeof(lbuf) - 1] = 0; if (sscanf(lbuf, "%u", &var) != 1) return -EINVAL; @@ -372,19 +364,11 @@ static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, char lbuf[20]; int err; - if (*pos) - return 0; - err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); if (err < 0) return err; - if (copy_to_user(buf, lbuf, err)) - return -EFAULT; - - *pos += err; - - return err; + return simple_read_from_buffer(buf, count, pos, lbuf, err); } static const struct file_operations limit_fops = { @@ -914,7 +898,7 @@ static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev, struct mlx5_umr_wr *umrwr) { struct umr_common *umrc = &dev->umrc; - struct ib_send_wr *bad; + const struct ib_send_wr *bad; int err; struct mlx5_ib_umr_context umr_context; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index a4f1f638509f..6cba2a02d11b 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -563,32 +563,21 @@ static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev, } static int alloc_bfreg(struct mlx5_ib_dev *dev, - struct mlx5_bfreg_info *bfregi, - enum mlx5_ib_latency_class lat) + struct mlx5_bfreg_info *bfregi) { - int bfregn = -EINVAL; + int bfregn = -ENOMEM; mutex_lock(&bfregi->lock); - switch (lat) { - case MLX5_IB_LATENCY_CLASS_LOW: + if (bfregi->ver >= 2) { + bfregn = alloc_high_class_bfreg(dev, bfregi); + if (bfregn < 0) + bfregn = alloc_med_class_bfreg(dev, bfregi); + } + + if (bfregn < 0) { BUILD_BUG_ON(NUM_NON_BLUE_FLAME_BFREGS != 1); bfregn = 0; bfregi->count[bfregn]++; - break; - - case MLX5_IB_LATENCY_CLASS_MEDIUM: - if (bfregi->ver < 2) - bfregn = -ENOMEM; - else - bfregn = alloc_med_class_bfreg(dev, bfregi); - break; - - case MLX5_IB_LATENCY_CLASS_HIGH: - if (bfregi->ver < 2) - bfregn = -ENOMEM; - else - bfregn = alloc_high_class_bfreg(dev, bfregi); - break; } mutex_unlock(&bfregi->lock); @@ -641,13 +630,13 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq); -static int bfregn_to_uar_index(struct mlx5_ib_dev *dev, - struct mlx5_bfreg_info *bfregi, int bfregn, - bool dyn_bfreg) +int bfregn_to_uar_index(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi, u32 bfregn, + bool dyn_bfreg) { - int bfregs_per_sys_page; - int index_of_sys_page; - int offset; + unsigned int bfregs_per_sys_page; + u32 index_of_sys_page; + u32 offset; bfregs_per_sys_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * MLX5_NON_FP_BFREGS_PER_UAR; @@ -655,6 +644,10 @@ static int bfregn_to_uar_index(struct mlx5_ib_dev *dev, if (dyn_bfreg) { index_of_sys_page += bfregi->num_static_sys_pages; + + if (index_of_sys_page >= bfregi->num_sys_pages) + return -EINVAL; + if (bfregn > bfregi->num_dyn_bfregs || bfregi->sys_pages[index_of_sys_page] == MLX5_IB_INVALID_UAR_INDEX) { mlx5_ib_dbg(dev, "Invalid dynamic uar index\n"); @@ -819,21 +812,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, bfregn = MLX5_CROSS_CHANNEL_BFREG; } else { - bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_HIGH); - if (bfregn < 0) { - mlx5_ib_dbg(dev, "failed to allocate low latency BFREG\n"); - mlx5_ib_dbg(dev, "reverting to medium latency\n"); - bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_MEDIUM); - if (bfregn < 0) { - mlx5_ib_dbg(dev, "failed to allocate medium latency BFREG\n"); - mlx5_ib_dbg(dev, "reverting to high latency\n"); - bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_LOW); - if (bfregn < 0) { - mlx5_ib_warn(dev, "bfreg allocation failed\n"); - return bfregn; - } - } - } + bfregn = alloc_bfreg(dev, &context->bfregi); + if (bfregn < 0) + return bfregn; } mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index); @@ -1626,7 +1607,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_resources *devr = &dev->devr; int inlen = MLX5_ST_SZ_BYTES(create_qp_in); struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_ib_create_qp_resp resp; + struct mlx5_ib_create_qp_resp resp = {}; struct mlx5_ib_cq *send_cq; struct mlx5_ib_cq *recv_cq; unsigned long flags; @@ -2555,18 +2536,16 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (ah->type == RDMA_AH_ATTR_TYPE_ROCE) { if (!(ah_flags & IB_AH_GRH)) return -EINVAL; - err = mlx5_get_roce_gid_type(dev, port, grh->sgid_index, - &gid_type); - if (err) - return err; + memcpy(path->rmac, ah->roce.dmac, sizeof(ah->roce.dmac)); if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || qp->ibqp.qp_type == IB_QPT_XRC_INI || qp->ibqp.qp_type == IB_QPT_XRC_TGT) - path->udp_sport = mlx5_get_roce_udp_sport(dev, port, - grh->sgid_index); + path->udp_sport = + mlx5_get_roce_udp_sport(dev, ah->grh.sgid_attr); path->dci_cfi_prio_sl = (sl & 0x7) << 4; + gid_type = ah->grh.sgid_attr->gid_type; if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) path->ecn_dscp = (grh->traffic_class >> 2) & 0x3f; } else { @@ -3529,7 +3508,7 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, } static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, - struct ib_send_wr *wr, void *qend, + const struct ib_send_wr *wr, void *qend, struct mlx5_ib_qp *qp, int *size) { void *seg = eseg; @@ -3582,7 +3561,7 @@ static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, } static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av)); dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV); @@ -3730,9 +3709,9 @@ static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) static int set_reg_umr_segment(struct mlx5_ib_dev *dev, struct mlx5_wqe_umr_ctrl_seg *umr, - struct ib_send_wr *wr, int atomic) + const struct ib_send_wr *wr, int atomic) { - struct mlx5_umr_wr *umrwr = umr_wr(wr); + const struct mlx5_umr_wr *umrwr = umr_wr(wr); memset(umr, 0, sizeof(*umr)); @@ -3803,9 +3782,10 @@ static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) seg->status = MLX5_MKEY_STATUS_FREE; } -static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr) +static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, + const struct ib_send_wr *wr) { - struct mlx5_umr_wr *umrwr = umr_wr(wr); + const struct mlx5_umr_wr *umrwr = umr_wr(wr); memset(seg, 0, sizeof(*seg)); if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) @@ -3854,7 +3834,7 @@ static void set_reg_umr_inline_seg(void *seg, struct mlx5_ib_qp *qp, seg += mr_list_size; } -static __be32 send_ieth(struct ib_send_wr *wr) +static __be32 send_ieth(const struct ib_send_wr *wr) { switch (wr->opcode) { case IB_WR_SEND_WITH_IMM: @@ -3886,7 +3866,7 @@ static u8 wq_sig(void *wqe) return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4); } -static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr, +static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, void *wqe, int *sz) { struct mlx5_wqe_inline_seg *seg; @@ -4032,7 +4012,7 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, return 0; } -static int set_sig_data_segment(struct ib_sig_handover_wr *wr, +static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, struct mlx5_ib_qp *qp, void **seg, int *size) { struct ib_sig_attrs *sig_attrs = wr->sig_attrs; @@ -4134,7 +4114,7 @@ static int set_sig_data_segment(struct ib_sig_handover_wr *wr, } static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, - struct ib_sig_handover_wr *wr, u32 size, + const struct ib_sig_handover_wr *wr, u32 size, u32 length, u32 pdn) { struct ib_mr *sig_mr = wr->sig_mr; @@ -4165,10 +4145,10 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, } -static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp, - void **seg, int *size) +static int set_sig_umr_wr(const struct ib_send_wr *send_wr, + struct mlx5_ib_qp *qp, void **seg, int *size) { - struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); + const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr); u32 pdn = get_pd(qp)->pdn; u32 xlt_size; @@ -4243,7 +4223,7 @@ static int set_psv_wr(struct ib_sig_domain *domain, } static int set_reg_wr(struct mlx5_ib_qp *qp, - struct ib_reg_wr *wr, + const struct ib_reg_wr *wr, void **seg, int *size) { struct mlx5_ib_mr *mr = to_mmr(wr->mr); @@ -4314,10 +4294,10 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) } } -static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, +static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct mlx5_wqe_ctrl_seg **ctrl, - struct ib_send_wr *wr, unsigned *idx, - int *size, int nreq) + const struct ib_send_wr *wr, unsigned *idx, + int *size, int nreq, bool send_signaled, bool solicited) { if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) return -ENOMEM; @@ -4328,10 +4308,8 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, *(uint32_t *)(*seg + 8) = 0; (*ctrl)->imm = send_ieth(wr); (*ctrl)->fm_ce_se = qp->sq_signal_bits | - (wr->send_flags & IB_SEND_SIGNALED ? - MLX5_WQE_CTRL_CQ_UPDATE : 0) | - (wr->send_flags & IB_SEND_SOLICITED ? - MLX5_WQE_CTRL_SOLICITED : 0); + (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) | + (solicited ? MLX5_WQE_CTRL_SOLICITED : 0); *seg += sizeof(**ctrl); *size = sizeof(**ctrl) / 16; @@ -4339,6 +4317,16 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, return 0; } +static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, + struct mlx5_wqe_ctrl_seg **ctrl, + const struct ib_send_wr *wr, unsigned *idx, + int *size, int nreq) +{ + return __begin_wqe(qp, seg, ctrl, wr, idx, size, nreq, + wr->send_flags & IB_SEND_SIGNALED, + wr->send_flags & IB_SEND_SOLICITED); +} + static void finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, u8 size, unsigned idx, u64 wr_id, @@ -4360,9 +4348,8 @@ static void finish_wqe(struct mlx5_ib_qp *qp, qp->sq.w_list[idx].next = qp->sq.cur_post; } - -int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr, bool drain) { struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_ib_dev *dev = to_mdev(ibqp->device); @@ -4393,7 +4380,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, spin_lock_irqsave(&qp->sq.lock, flags); - if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) { err = -EIO; *bad_wr = wr; nreq = 0; @@ -4498,10 +4485,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, * SET_PSV WQEs are not signaled and solicited * on error */ - wr->send_flags &= ~IB_SEND_SIGNALED; - wr->send_flags |= IB_SEND_SOLICITED; - err = begin_wqe(qp, &seg, &ctrl, wr, - &idx, &size, nreq); + err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, + &size, nreq, false, true); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -4520,8 +4505,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, fence, MLX5_OPCODE_SET_PSV); - err = begin_wqe(qp, &seg, &ctrl, wr, - &idx, &size, nreq); + err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, + &size, nreq, false, true); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -4690,13 +4675,19 @@ out: return err; } +int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) +{ + return _mlx5_ib_post_send(ibqp, wr, bad_wr, false); +} + static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size) { sig->signature = calc_sig(sig, size); } -int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr, bool drain) { struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_wqe_data_seg *scat; @@ -4714,7 +4705,7 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, spin_lock_irqsave(&qp->rq.lock, flags); - if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) { err = -EIO; *bad_wr = wr; nreq = 0; @@ -4776,6 +4767,12 @@ out: return err; } +int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + return _mlx5_ib_post_recv(ibqp, wr, bad_wr, false); +} + static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state) { switch (mlx5_state) { @@ -5365,7 +5362,9 @@ static int set_user_rq_size(struct mlx5_ib_dev *dev, rwq->wqe_count = ucmd->rq_wqe_count; rwq->wqe_shift = ucmd->rq_wqe_shift; - rwq->buf_size = (rwq->wqe_count << rwq->wqe_shift); + if (check_shl_overflow(rwq->wqe_count, rwq->wqe_shift, &rwq->buf_size)) + return -EINVAL; + rwq->log_rq_stride = rwq->wqe_shift; rwq->log_rq_size = ilog2(rwq->wqe_count); return 0; @@ -5697,3 +5696,132 @@ out: kvfree(in); return err; } + +struct mlx5_ib_drain_cqe { + struct ib_cqe cqe; + struct completion done; +}; + +static void mlx5_ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct mlx5_ib_drain_cqe *cqe = container_of(wc->wr_cqe, + struct mlx5_ib_drain_cqe, + cqe); + + complete(&cqe->done); +} + +/* This function returns only once the drained WR was completed */ +static void handle_drain_completion(struct ib_cq *cq, + struct mlx5_ib_drain_cqe *sdrain, + struct mlx5_ib_dev *dev) +{ + struct mlx5_core_dev *mdev = dev->mdev; + + if (cq->poll_ctx == IB_POLL_DIRECT) { + while (wait_for_completion_timeout(&sdrain->done, HZ / 10) <= 0) + ib_process_cq_direct(cq, -1); + return; + } + + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + struct mlx5_ib_cq *mcq = to_mcq(cq); + bool triggered = false; + unsigned long flags; + + spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); + /* Make sure that the CQ handler won't run if wasn't run yet */ + if (!mcq->mcq.reset_notify_added) + mcq->mcq.reset_notify_added = 1; + else + triggered = true; + spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); + + if (triggered) { + /* Wait for any scheduled/running task to be ended */ + switch (cq->poll_ctx) { + case IB_POLL_SOFTIRQ: + irq_poll_disable(&cq->iop); + irq_poll_enable(&cq->iop); + break; + case IB_POLL_WORKQUEUE: + cancel_work_sync(&cq->work); + break; + default: + WARN_ON_ONCE(1); + } + } + + /* Run the CQ handler - this makes sure that the drain WR will + * be processed if wasn't processed yet. + */ + mcq->mcq.comp(&mcq->mcq); + } + + wait_for_completion(&sdrain->done); +} + +void mlx5_ib_drain_sq(struct ib_qp *qp) +{ + struct ib_cq *cq = qp->send_cq; + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct mlx5_ib_drain_cqe sdrain; + const struct ib_send_wr *bad_swr; + struct ib_rdma_wr swr = { + .wr = { + .next = NULL, + { .wr_cqe = &sdrain.cqe, }, + .opcode = IB_WR_RDMA_WRITE, + }, + }; + int ret; + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_core_dev *mdev = dev->mdev; + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + sdrain.cqe.done = mlx5_ib_drain_qp_done; + init_completion(&sdrain.done); + + ret = _mlx5_ib_post_send(qp, &swr.wr, &bad_swr, true); + if (ret) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + handle_drain_completion(cq, &sdrain, dev); +} + +void mlx5_ib_drain_rq(struct ib_qp *qp) +{ + struct ib_cq *cq = qp->recv_cq; + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct mlx5_ib_drain_cqe rdrain; + struct ib_recv_wr rwr = {}; + const struct ib_recv_wr *bad_rwr; + int ret; + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_core_dev *mdev = dev->mdev; + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + rwr.wr_cqe = &rdrain.cqe; + rdrain.cqe.done = mlx5_ib_drain_qp_done; + init_completion(&rdrain.done); + + ret = _mlx5_ib_post_recv(qp, &rwr, &bad_rwr, true); + if (ret) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + handle_drain_completion(cq, &rdrain, dev); +} diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index f5de5adc9b1a..d359fecf7a5b 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -446,8 +446,8 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index) spin_unlock(&srq->lock); } -int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct mlx5_ib_srq *srq = to_msrq(ibsrq); struct mlx5_wqe_srq_next_seg *next; diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index e7f6223e9c60..0823c0bc7e73 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -281,10 +281,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah, header->grh.flow_label = ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff); header->grh.hop_limit = ah->av->hop_limit; - ib_get_cached_gid(&dev->ib_dev, - be32_to_cpu(ah->av->port_pd) >> 24, - ah->av->gid_index % dev->limits.gid_table_len, - &header->grh.source_gid, NULL); + header->grh.source_gid = ah->ibah.sgid_attr->gid; memcpy(header->grh.destination_gid.raw, ah->av->dgid, 16); } diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 5508afbf1c67..220a3e4717a3 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -519,10 +519,10 @@ int mthca_max_srq_sge(struct mthca_dev *dev); void mthca_srq_event(struct mthca_dev *dev, u32 srqn, enum ib_event_type event_type); void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr); -int mthca_tavor_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); -int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int mthca_tavor_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); +int mthca_arbel_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); void mthca_qp_event(struct mthca_dev *dev, u32 qpn, enum ib_event_type event_type); @@ -530,14 +530,14 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m struct ib_qp_init_attr *qp_init_attr); int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); -int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); -int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int mthca_tavor_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int mthca_tavor_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); +int mthca_arbel_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int mthca_arbel_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, int index, int *dbd, __be32 *new_wqe); int mthca_alloc_qp(struct mthca_dev *dev, diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 541f237965c7..0d3473b4596e 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -96,8 +96,9 @@ static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *pr props->page_size_cap = mdev->limits.page_size_cap; props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps; props->max_qp_wr = mdev->limits.max_wqes; - props->max_sge = mdev->limits.max_sg; - props->max_sge_rd = props->max_sge; + props->max_send_sge = mdev->limits.max_sg; + props->max_recv_sge = mdev->limits.max_sg; + props->max_sge_rd = mdev->limits.max_sg; props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs; props->max_cqe = mdev->limits.max_cqes; props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws; @@ -448,7 +449,7 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd, int err; if (init_attr->srq_type != IB_SRQT_BASIC) - return ERR_PTR(-ENOSYS); + return ERR_PTR(-EOPNOTSUPP); srq = kmalloc(sizeof *srq, GFP_KERNEL); if (!srq) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index af1c49d70b89..3d37f2373d63 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1488,7 +1488,7 @@ void mthca_free_qp(struct mthca_dev *dev, /* Create UD header for an MLX send and build a data segment for it */ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, - int ind, struct ib_ud_wr *wr, + int ind, const struct ib_ud_wr *wr, struct mthca_mlx_seg *mlx, struct mthca_data_seg *data) { @@ -1581,7 +1581,7 @@ static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg, } static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg, - struct ib_atomic_wr *wr) + const struct ib_atomic_wr *wr) { if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { aseg->swap_add = cpu_to_be64(wr->swap); @@ -1594,7 +1594,7 @@ static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg, } static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg, - struct ib_ud_wr *wr) + const struct ib_ud_wr *wr) { useg->lkey = cpu_to_be32(to_mah(wr->ah)->key); useg->av_addr = cpu_to_be64(to_mah(wr->ah)->avdma); @@ -1604,15 +1604,15 @@ static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg, } static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg, - struct ib_ud_wr *wr) + const struct ib_ud_wr *wr) { memcpy(useg->av, to_mah(wr->ah)->av, MTHCA_AV_SIZE); useg->dqpn = cpu_to_be32(wr->remote_qpn); useg->qkey = cpu_to_be32(wr->remote_qkey); } -int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int mthca_tavor_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); @@ -1814,8 +1814,8 @@ out: return err; } -int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mthca_tavor_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); @@ -1925,8 +1925,8 @@ out: return err; } -int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int mthca_arbel_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); @@ -2165,8 +2165,8 @@ out: return err; } -int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mthca_arbel_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index f79732bc73b4..9a3fc6fb0d7e 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -472,8 +472,8 @@ void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr) spin_unlock(&srq->lock); } -int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibsrq->device); struct mthca_srq *srq = to_msrq(ibsrq); @@ -572,8 +572,8 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, return err; } -int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibsrq->device); struct mthca_srq *srq = to_msrq(ibsrq); diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 00c27291dc26..bedaa02749fb 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -159,7 +159,7 @@ do { \ #define NES_EVENT_TIMEOUT 1200000 #else -#define nes_debug(level, fmt, args...) +#define nes_debug(level, fmt, args...) no_printk(fmt, ##args) #define assert(expr) do {} while (0) #define NES_EVENT_TIMEOUT 100000 diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 6cdfbf8c5674..2b67ace5b614 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -58,6 +58,7 @@ #include <net/neighbour.h> #include <net/route.h> #include <net/ip_fib.h> +#include <net/secure_seq.h> #include <net/tcp.h> #include <linux/fcntl.h> @@ -1445,7 +1446,6 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, struct nes_cm_listener *listener) { struct nes_cm_node *cm_node; - struct timespec ts; int oldarpindex = 0; int arpindex = 0; struct nes_device *nesdev; @@ -1496,8 +1496,10 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >> NES_CM_DEFAULT_RCV_WND_SCALE; - ts = current_kernel_time(); - cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec); + cm_node->tcp_cntxt.loc_seq_num = secure_tcp_seq(htonl(cm_node->loc_addr), + htonl(cm_node->rem_addr), + htons(cm_node->loc_port), + htons(cm_node->rem_port)); cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) - sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN; cm_node->tcp_cntxt.rcv_nxt = 0; diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 18a7de1c3923..bd0675d8f298 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -70,8 +70,7 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number); static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode); static void nes_terminate_start_timer(struct nes_qp *nesqp); -#ifdef CONFIG_INFINIBAND_NES_DEBUG -static unsigned char *nes_iwarp_state_str[] = { +static const char *const nes_iwarp_state_str[] = { "Non-Existent", "Idle", "RTS", @@ -82,7 +81,7 @@ static unsigned char *nes_iwarp_state_str[] = { "RSVD2", }; -static unsigned char *nes_tcp_state_str[] = { +static const char *const nes_tcp_state_str[] = { "Non-Existent", "Closed", "Listen", @@ -100,7 +99,6 @@ static unsigned char *nes_tcp_state_str[] = { "RSVD3", "RSVD4", }; -#endif static inline void print_ip(struct nes_cm_node *cm_node) { diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 32f26556c808..6940c7215961 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -436,7 +436,8 @@ static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *prop props->max_mr_size = 0x80000000; props->max_qp = nesibdev->max_qp; props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2; - props->max_sge = nesdev->nesadapter->max_sge; + props->max_send_sge = nesdev->nesadapter->max_sge; + props->max_recv_sge = nesdev->nesadapter->max_sge; props->max_cq = nesibdev->max_cq; props->max_cqe = nesdev->nesadapter->max_cqe; props->max_mr = nesibdev->max_mr; @@ -754,26 +755,6 @@ static int nes_dealloc_pd(struct ib_pd *ibpd) /** - * nes_create_ah - */ -static struct ib_ah *nes_create_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - struct ib_udata *udata) -{ - return ERR_PTR(-ENOSYS); -} - - -/** - * nes_destroy_ah - */ -static int nes_destroy_ah(struct ib_ah *ah) -{ - return -ENOSYS; -} - - -/** * nes_get_encoded_size */ static inline u8 nes_get_encoded_size(int *size) @@ -3004,42 +2985,9 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, return err; } - -/** - * nes_muticast_attach - */ -static int nes_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - nes_debug(NES_DBG_INIT, "\n"); - return -ENOSYS; -} - - -/** - * nes_multicast_detach - */ -static int nes_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - nes_debug(NES_DBG_INIT, "\n"); - return -ENOSYS; -} - - -/** - * nes_process_mad - */ -static int nes_process_mad(struct ib_device *ibdev, int mad_flags, - u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) -{ - nes_debug(NES_DBG_INIT, "\n"); - return -ENOSYS; -} - static inline void -fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, struct ib_send_wr *ib_wr, u32 uselkey) +fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, const struct ib_send_wr *ib_wr, + u32 uselkey) { int sge_index; int total_payload_length = 0; @@ -3065,8 +3013,8 @@ fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, struct ib_send_wr *ib_wr, u32 uselke /** * nes_post_send */ -static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, - struct ib_send_wr **bad_wr) +static int nes_post_send(struct ib_qp *ibqp, const struct ib_send_wr *ib_wr, + const struct ib_send_wr **bad_wr) { u64 u64temp; unsigned long flags = 0; @@ -3327,8 +3275,8 @@ out: /** * nes_post_recv */ -static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, - struct ib_recv_wr **bad_wr) +static int nes_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *ib_wr, + const struct ib_recv_wr **bad_wr) { u64 u64temp; unsigned long flags = 0; @@ -3735,8 +3683,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.mmap = nes_mmap; nesibdev->ibdev.alloc_pd = nes_alloc_pd; nesibdev->ibdev.dealloc_pd = nes_dealloc_pd; - nesibdev->ibdev.create_ah = nes_create_ah; - nesibdev->ibdev.destroy_ah = nes_destroy_ah; nesibdev->ibdev.create_qp = nes_create_qp; nesibdev->ibdev.modify_qp = nes_modify_qp; nesibdev->ibdev.query_qp = nes_query_qp; @@ -3753,10 +3699,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.alloc_mr = nes_alloc_mr; nesibdev->ibdev.map_mr_sg = nes_map_mr_sg; - nesibdev->ibdev.attach_mcast = nes_multicast_attach; - nesibdev->ibdev.detach_mcast = nes_multicast_detach; - nesibdev->ibdev.process_mad = nes_process_mad; - nesibdev->ibdev.req_notify_cq = nes_req_notify_cq; nesibdev->ibdev.post_send = nes_post_send; nesibdev->ibdev.post_recv = nes_post_recv; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 3897b64532e1..58188fe5aed2 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -71,7 +71,7 @@ static u16 ocrdma_hdr_type_to_proto_num(int devid, u8 hdr_type) } static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, - struct rdma_ah_attr *attr, union ib_gid *sgid, + struct rdma_ah_attr *attr, const union ib_gid *sgid, int pdid, bool *isvlan, u16 vlan_tag) { int status; @@ -164,17 +164,14 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, struct ocrdma_ah *ah; bool isvlan = false; u16 vlan_tag = 0xffff; - struct ib_gid_attr sgid_attr; + const struct ib_gid_attr *sgid_attr; struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); - const struct ib_global_route *grh; - union ib_gid sgid; if ((attr->type != RDMA_AH_ATTR_TYPE_ROCE) || !(rdma_ah_get_ah_flags(attr) & IB_AH_GRH)) return ERR_PTR(-EINVAL); - grh = rdma_ah_read_grh(attr); if (atomic_cmpxchg(&dev->update_sl, 1, 0)) ocrdma_init_service_level(dev); @@ -186,20 +183,15 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, if (status) goto av_err; - status = ib_get_cached_gid(&dev->ibdev, 1, grh->sgid_index, &sgid, - &sgid_attr); - if (status) { - pr_err("%s(): Failed to query sgid, status = %d\n", - __func__, status); - goto av_conf_err; - } - if (is_vlan_dev(sgid_attr.ndev)) - vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev); - dev_put(sgid_attr.ndev); + sgid_attr = attr->grh.sgid_attr; + if (is_vlan_dev(sgid_attr->ndev)) + vlan_tag = vlan_dev_vlan_id(sgid_attr->ndev); + /* Get network header type for this GID */ - ah->hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); + ah->hdr_type = rdma_gid_attr_network_type(sgid_attr); - status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan, vlan_tag); + status = set_av_attr(dev, ah, attr, &sgid_attr->gid, pd->id, + &isvlan, vlan_tag); if (status) goto av_conf_err; @@ -262,12 +254,6 @@ int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) return 0; } -int ocrdma_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) -{ - /* modify_ah is unsupported */ - return -ENOSYS; -} - int ocrdma_process_mad(struct ib_device *ibdev, int process_mad_flags, u8 port_num, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 1a65c47945aa..c0c32c9b80ae 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -55,7 +55,6 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, struct ib_udata *udata); int ocrdma_destroy_ah(struct ib_ah *ah); int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); -int ocrdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int ocrdma_process_mad(struct ib_device *, int process_mad_flags, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 6c136e5017fe..e578281471af 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1365,8 +1365,9 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev) dev->hba_port_num = (hba_attribs->ptpnum_maxdoms_hbast_cv & OCRDMA_HBA_ATTRB_PTNUM_MASK) >> OCRDMA_HBA_ATTRB_PTNUM_SHIFT; - strncpy(dev->model_number, - hba_attribs->controller_model_number, 31); + strlcpy(dev->model_number, + hba_attribs->controller_model_number, + sizeof(dev->model_number)); } dma_free_coherent(&dev->nic_info.pdev->dev, dma.size, dma.va, dma.pa); free_mqe: @@ -2494,8 +2495,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, { int status; struct rdma_ah_attr *ah_attr = &attrs->ah_attr; - union ib_gid sgid; - struct ib_gid_attr sgid_attr; + const struct ib_gid_attr *sgid_attr; u32 vlan_id = 0xFFFF; u8 mac_addr[6], hdr_type; union { @@ -2525,25 +2525,23 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, memcpy(&cmd->params.dgid[0], &grh->dgid.raw[0], sizeof(cmd->params.dgid)); - status = ib_get_cached_gid(&dev->ibdev, 1, grh->sgid_index, - &sgid, &sgid_attr); - if (!status) { - vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev); - memcpy(mac_addr, sgid_attr.ndev->dev_addr, ETH_ALEN); - dev_put(sgid_attr.ndev); - } + sgid_attr = ah_attr->grh.sgid_attr; + vlan_id = rdma_vlan_dev_vlan_id(sgid_attr->ndev); + memcpy(mac_addr, sgid_attr->ndev->dev_addr, ETH_ALEN); qp->sgid_idx = grh->sgid_index; - memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid)); + memcpy(&cmd->params.sgid[0], &sgid_attr->gid.raw[0], + sizeof(cmd->params.sgid)); status = ocrdma_resolve_dmac(dev, ah_attr, &mac_addr[0]); if (status) return status; + cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) | (mac_addr[2] << 16) | (mac_addr[3] << 24); - hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); + hdr_type = rdma_gid_attr_network_type(sgid_attr); if (hdr_type == RDMA_NETWORK_IPV4) { - rdma_gid2ip(&sgid_addr._sockaddr, &sgid); + rdma_gid2ip(&sgid_addr._sockaddr, &sgid_attr->gid); rdma_gid2ip(&dgid_addr._sockaddr, &grh->dgid); memcpy(&cmd->params.dgid[0], &dgid_addr._sockaddr_in.sin_addr.s_addr, 4); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 5962c0ed9847..7832ee3e0c84 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -176,7 +176,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.create_ah = ocrdma_create_ah; dev->ibdev.destroy_ah = ocrdma_destroy_ah; dev->ibdev.query_ah = ocrdma_query_ah; - dev->ibdev.modify_ah = ocrdma_modify_ah; dev->ibdev.poll_cq = ocrdma_poll_cq; dev->ibdev.post_send = ocrdma_post_send; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 82e20fc32890..c158ca9fde6d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -89,7 +89,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; - attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_recv_sge); + attr->max_send_sge = dev->attr.max_send_sge; + attr->max_recv_sge = dev->attr.max_recv_sge; attr->max_sge_rd = dev->attr.max_rdma_sge; attr->max_cq = dev->attr.max_cq; attr->max_cqe = dev->attr.max_cqe; @@ -196,11 +197,10 @@ int ocrdma_query_port(struct ib_device *ibdev, props->sm_lid = 0; props->sm_sl = 0; props->state = port_state; - props->port_cap_flags = - IB_PORT_CM_SUP | - IB_PORT_REINIT_SUP | - IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP | - IB_PORT_IP_BASED_GIDS; + props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | + IB_PORT_DEVICE_MGMT_SUP | + IB_PORT_VENDOR_CLASS_SUP; + props->ip_gids = true; props->gid_tbl_len = OCRDMA_MAX_SGID; props->pkey_tbl_len = 1; props->bad_pkey_cntr = 0; @@ -1774,13 +1774,13 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp) * protect against proessing in-flight CQEs for this QP. */ spin_lock_irqsave(&qp->sq_cq->cq_lock, flags); - if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) + if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) { spin_lock(&qp->rq_cq->cq_lock); - - ocrdma_del_qpn_map(dev, qp); - - if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) + ocrdma_del_qpn_map(dev, qp); spin_unlock(&qp->rq_cq->cq_lock); + } else { + ocrdma_del_qpn_map(dev, qp); + } spin_unlock_irqrestore(&qp->sq_cq->cq_lock, flags); if (!pd->uctx) { @@ -1953,7 +1953,7 @@ int ocrdma_destroy_srq(struct ib_srq *ibsrq) /* unprivileged verbs and their support functions. */ static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { struct ocrdma_ewqe_ud_hdr *ud_hdr = (struct ocrdma_ewqe_ud_hdr *)(hdr + 1); @@ -2000,7 +2000,7 @@ static inline uint32_t ocrdma_sglist_len(struct ib_sge *sg_list, int num_sge) static int ocrdma_build_inline_sges(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, struct ocrdma_sge *sge, - struct ib_send_wr *wr, u32 wqe_size) + const struct ib_send_wr *wr, u32 wqe_size) { int i; char *dpp_addr; @@ -2038,7 +2038,7 @@ static int ocrdma_build_inline_sges(struct ocrdma_qp *qp, } static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { int status; struct ocrdma_sge *sge; @@ -2057,7 +2057,7 @@ static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, } static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { int status; struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1); @@ -2075,7 +2075,7 @@ static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, } static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1); struct ocrdma_sge *sge = ext_rw + 1; @@ -2105,7 +2105,7 @@ static int get_encoded_page_size(int pg_sz) static int ocrdma_build_reg(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, - struct ib_reg_wr *wr) + const struct ib_reg_wr *wr) { u64 fbo; struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1); @@ -2166,8 +2166,8 @@ static void ocrdma_ring_sq_db(struct ocrdma_qp *qp) iowrite32(val, qp->sq_db); } -int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int ocrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { int status = 0; struct ocrdma_qp *qp = get_ocrdma_qp(ibqp); @@ -2278,8 +2278,8 @@ static void ocrdma_ring_rq_db(struct ocrdma_qp *qp) iowrite32(val, qp->rq_db); } -static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr, - u16 tag) +static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, + const struct ib_recv_wr *wr, u16 tag) { u32 wqe_size = 0; struct ocrdma_sge *sge; @@ -2299,8 +2299,8 @@ static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr, ocrdma_cpu_to_le32(rqe, wqe_size); } -int ocrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int ocrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { int status = 0; unsigned long flags; @@ -2369,8 +2369,8 @@ static void ocrdma_ring_srq_db(struct ocrdma_srq *srq) iowrite32(val, srq->db + OCRDMA_DB_GEN2_SRQ_OFFSET); } -int ocrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int ocrdma_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { int status = 0; unsigned long flags; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index 9a9971708646..b69cfdce7970 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -43,10 +43,10 @@ #ifndef __OCRDMA_VERBS_H__ #define __OCRDMA_VERBS_H__ -int ocrdma_post_send(struct ib_qp *, struct ib_send_wr *, - struct ib_send_wr **bad_wr); -int ocrdma_post_recv(struct ib_qp *, struct ib_recv_wr *, - struct ib_recv_wr **bad_wr); +int ocrdma_post_send(struct ib_qp *, const struct ib_send_wr *, + const struct ib_send_wr **bad_wr); +int ocrdma_post_recv(struct ib_qp *, const struct ib_recv_wr *, + const struct ib_recv_wr **bad_wr); int ocrdma_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc); int ocrdma_arm_cq(struct ib_cq *, enum ib_cq_notify_flags flags); @@ -100,8 +100,8 @@ int ocrdma_modify_srq(struct ib_srq *, struct ib_srq_attr *, enum ib_srq_attr_mask, struct ib_udata *); int ocrdma_query_srq(struct ib_srq *, struct ib_srq_attr *); int ocrdma_destroy_srq(struct ib_srq *); -int ocrdma_post_srq_recv(struct ib_srq *, struct ib_recv_wr *, - struct ib_recv_wr **bad_recv_wr); +int ocrdma_post_srq_recv(struct ib_srq *, const struct ib_recv_wr *, + const struct ib_recv_wr **bad_recv_wr); int ocrdma_dereg_mr(struct ib_mr *); struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index ad22b32bbd9c..a0af6d424aed 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -191,6 +191,11 @@ static int qedr_register_device(struct qedr_dev *dev) QEDR_UVERBS(MODIFY_QP) | QEDR_UVERBS(QUERY_QP) | QEDR_UVERBS(DESTROY_QP) | + QEDR_UVERBS(CREATE_SRQ) | + QEDR_UVERBS(DESTROY_SRQ) | + QEDR_UVERBS(QUERY_SRQ) | + QEDR_UVERBS(MODIFY_SRQ) | + QEDR_UVERBS(POST_SRQ_RECV) | QEDR_UVERBS(REG_MR) | QEDR_UVERBS(DEREG_MR) | QEDR_UVERBS(POLL_CQ) | @@ -229,6 +234,11 @@ static int qedr_register_device(struct qedr_dev *dev) dev->ibdev.query_qp = qedr_query_qp; dev->ibdev.destroy_qp = qedr_destroy_qp; + dev->ibdev.create_srq = qedr_create_srq; + dev->ibdev.destroy_srq = qedr_destroy_srq; + dev->ibdev.modify_srq = qedr_modify_srq; + dev->ibdev.query_srq = qedr_query_srq; + dev->ibdev.post_srq_recv = qedr_post_srq_recv; dev->ibdev.query_pkey = qedr_query_pkey; dev->ibdev.create_ah = qedr_create_ah; @@ -325,8 +335,8 @@ static int qedr_alloc_resources(struct qedr_dev *dev) spin_lock_init(&dev->sgid_lock); if (IS_IWARP(dev)) { - spin_lock_init(&dev->idr_lock); - idr_init(&dev->qpidr); + spin_lock_init(&dev->qpidr.idr_lock); + idr_init(&dev->qpidr.idr); dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq"); } @@ -653,42 +663,70 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) #define EVENT_TYPE_NOT_DEFINED 0 #define EVENT_TYPE_CQ 1 #define EVENT_TYPE_QP 2 +#define EVENT_TYPE_SRQ 3 struct qedr_dev *dev = (struct qedr_dev *)context; struct regpair *async_handle = (struct regpair *)fw_handle; u64 roce_handle64 = ((u64) async_handle->hi << 32) + async_handle->lo; u8 event_type = EVENT_TYPE_NOT_DEFINED; struct ib_event event; + struct ib_srq *ibsrq; + struct qedr_srq *srq; + unsigned long flags; struct ib_cq *ibcq; struct ib_qp *ibqp; struct qedr_cq *cq; struct qedr_qp *qp; + u16 srq_id; - switch (e_code) { - case ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR: - event.event = IB_EVENT_CQ_ERR; - event_type = EVENT_TYPE_CQ; - break; - case ROCE_ASYNC_EVENT_SQ_DRAINED: - event.event = IB_EVENT_SQ_DRAINED; - event_type = EVENT_TYPE_QP; - break; - case ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR: - event.event = IB_EVENT_QP_FATAL; - event_type = EVENT_TYPE_QP; - break; - case ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR: - event.event = IB_EVENT_QP_REQ_ERR; - event_type = EVENT_TYPE_QP; - break; - case ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR: - event.event = IB_EVENT_QP_ACCESS_ERR; - event_type = EVENT_TYPE_QP; - break; - default: + if (IS_ROCE(dev)) { + switch (e_code) { + case ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR: + event.event = IB_EVENT_CQ_ERR; + event_type = EVENT_TYPE_CQ; + break; + case ROCE_ASYNC_EVENT_SQ_DRAINED: + event.event = IB_EVENT_SQ_DRAINED; + event_type = EVENT_TYPE_QP; + break; + case ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR: + event.event = IB_EVENT_QP_FATAL; + event_type = EVENT_TYPE_QP; + break; + case ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR: + event.event = IB_EVENT_QP_REQ_ERR; + event_type = EVENT_TYPE_QP; + break; + case ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR: + event.event = IB_EVENT_QP_ACCESS_ERR; + event_type = EVENT_TYPE_QP; + break; + case ROCE_ASYNC_EVENT_SRQ_LIMIT: + event.event = IB_EVENT_SRQ_LIMIT_REACHED; + event_type = EVENT_TYPE_SRQ; + break; + case ROCE_ASYNC_EVENT_SRQ_EMPTY: + event.event = IB_EVENT_SRQ_ERR; + event_type = EVENT_TYPE_SRQ; + break; + default: + DP_ERR(dev, "unsupported event %d on handle=%llx\n", + e_code, roce_handle64); + } + } else { + switch (e_code) { + case QED_IWARP_EVENT_SRQ_LIMIT: + event.event = IB_EVENT_SRQ_LIMIT_REACHED; + event_type = EVENT_TYPE_SRQ; + break; + case QED_IWARP_EVENT_SRQ_EMPTY: + event.event = IB_EVENT_SRQ_ERR; + event_type = EVENT_TYPE_SRQ; + break; + default: DP_ERR(dev, "unsupported event %d on handle=%llx\n", e_code, roce_handle64); + } } - switch (event_type) { case EVENT_TYPE_CQ: cq = (struct qedr_cq *)(uintptr_t)roce_handle64; @@ -722,6 +760,25 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) } DP_ERR(dev, "QP event %d on handle %p\n", e_code, qp); break; + case EVENT_TYPE_SRQ: + srq_id = (u16)roce_handle64; + spin_lock_irqsave(&dev->srqidr.idr_lock, flags); + srq = idr_find(&dev->srqidr.idr, srq_id); + if (srq) { + ibsrq = &srq->ibsrq; + if (ibsrq->event_handler) { + event.device = ibsrq->device; + event.element.srq = ibsrq; + ibsrq->event_handler(&event, + ibsrq->srq_context); + } + } else { + DP_NOTICE(dev, + "SRQ event with NULL pointer ibsrq. Handle=%llx\n", + roce_handle64); + } + spin_unlock_irqrestore(&dev->srqidr.idr_lock, flags); + DP_NOTICE(dev, "SRQ event %d on handle %p\n", e_code, srq); default: break; } diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 86d4511e0d75..a2d708dceb8d 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -58,6 +58,7 @@ #define QEDR_MSG_RQ " RQ" #define QEDR_MSG_SQ " SQ" #define QEDR_MSG_QP " QP" +#define QEDR_MSG_SRQ " SRQ" #define QEDR_MSG_GSI " GSI" #define QEDR_MSG_IWARP " IW" @@ -122,6 +123,11 @@ struct qedr_device_attr { #define QEDR_ENET_STATE_BIT (0) +struct qedr_idr { + spinlock_t idr_lock; /* Protect idr data-structure */ + struct idr idr; +}; + struct qedr_dev { struct ib_device ibdev; struct qed_dev *cdev; @@ -165,8 +171,8 @@ struct qedr_dev { struct qedr_cq *gsi_rqcq; struct qedr_qp *gsi_qp; enum qed_rdma_type rdma_type; - spinlock_t idr_lock; /* Protect qpidr data-structure */ - struct idr qpidr; + struct qedr_idr qpidr; + struct qedr_idr srqidr; struct workqueue_struct *iwarp_wq; u16 iwarp_max_mtu; @@ -337,6 +343,34 @@ struct qedr_qp_hwq_info { qed_chain_get_capacity(p_info->pbl) \ } while (0) +struct qedr_srq_hwq_info { + u32 max_sges; + u32 max_wr; + struct qed_chain pbl; + u64 p_phys_addr_tbl; + u32 wqe_prod; + u32 sge_prod; + u32 wr_prod_cnt; + u32 wr_cons_cnt; + u32 num_elems; + + u32 *virt_prod_pair_addr; + dma_addr_t phy_prod_pair_addr; +}; + +struct qedr_srq { + struct ib_srq ibsrq; + struct qedr_dev *dev; + + struct qedr_userq usrq; + struct qedr_srq_hwq_info hw_srq; + struct ib_umem *prod_umem; + u16 srq_id; + u32 srq_limit; + /* lock to protect srq recv post */ + spinlock_t lock; +}; + enum qedr_qp_err_bitmap { QEDR_QP_ERR_SQ_FULL = 1, QEDR_QP_ERR_RQ_FULL = 2, @@ -538,4 +572,9 @@ static inline struct qedr_mr *get_qedr_mr(struct ib_mr *ibmr) { return container_of(ibmr, struct qedr_mr, ibmr); } + +static inline struct qedr_srq *get_qedr_srq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct qedr_srq, ibsrq); +} #endif diff --git a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h index 7e1f7021396a..228dd7d49622 100644 --- a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h +++ b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h @@ -161,12 +161,23 @@ struct rdma_rq_sge { #define RDMA_RQ_SGE_L_KEY_HI_SHIFT 29 }; +struct rdma_srq_wqe_header { + struct regpair wr_id; + u8 num_sges /* number of SGEs in WQE */; + u8 reserved2[7]; +}; + struct rdma_srq_sge { struct regpair addr; __le32 length; __le32 l_key; }; +union rdma_srq_elm { + struct rdma_srq_wqe_header header; + struct rdma_srq_sge sge; +}; + /* Rdma doorbell data for flags update */ struct rdma_pwm_flags_data { __le16 icid; /* internal CID */ diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 26dc374787f7..505fa3648762 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -491,7 +491,7 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) int rc = 0; int i; - qp = idr_find(&dev->qpidr, conn_param->qpn); + qp = idr_find(&dev->qpidr.idr, conn_param->qpn); laddr = (struct sockaddr_in *)&cm_id->m_local_addr; raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; @@ -679,7 +679,7 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn); - qp = idr_find(&dev->qpidr, conn_param->qpn); + qp = idr_find(&dev->qpidr.idr, conn_param->qpn); if (!qp) { DP_ERR(dev, "Invalid QP number %d\n", conn_param->qpn); return -EINVAL; @@ -737,9 +737,9 @@ void qedr_iw_qp_rem_ref(struct ib_qp *ibqp) struct qedr_qp *qp = get_qedr_qp(ibqp); if (atomic_dec_and_test(&qp->refcnt)) { - spin_lock_irq(&qp->dev->idr_lock); - idr_remove(&qp->dev->qpidr, qp->qp_id); - spin_unlock_irq(&qp->dev->idr_lock); + spin_lock_irq(&qp->dev->qpidr.idr_lock); + idr_remove(&qp->dev->qpidr.idr, qp->qp_id); + spin_unlock_irq(&qp->dev->qpidr.idr_lock); kfree(qp); } } @@ -748,5 +748,5 @@ struct ib_qp *qedr_iw_get_qp(struct ib_device *ibdev, int qpn) { struct qedr_dev *dev = get_qedr_dev(ibdev); - return idr_find(&dev->qpidr, qpn); + return idr_find(&dev->qpidr.idr, qpn); } diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c index 0f14e687bb91..85578887421b 100644 --- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c @@ -380,18 +380,17 @@ int qedr_destroy_gsi_qp(struct qedr_dev *dev) #define QEDR_GSI_QPN (1) static inline int qedr_gsi_build_header(struct qedr_dev *dev, struct qedr_qp *qp, - struct ib_send_wr *swr, + const struct ib_send_wr *swr, struct ib_ud_header *udh, int *roce_mode) { bool has_vlan = false, has_grh_ipv6 = true; struct rdma_ah_attr *ah_attr = &get_qedr_ah(ud_wr(swr)->ah)->attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); - union ib_gid sgid; + const struct ib_gid_attr *sgid_attr = grh->sgid_attr; int send_size = 0; u16 vlan_id = 0; u16 ether_type; - struct ib_gid_attr sgid_attr; int rc; int ip_ver = 0; @@ -402,28 +401,16 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev, for (i = 0; i < swr->num_sge; ++i) send_size += swr->sg_list[i].length; - rc = ib_get_cached_gid(qp->ibqp.device, rdma_ah_get_port_num(ah_attr), - grh->sgid_index, &sgid, &sgid_attr); - if (rc) { - DP_ERR(dev, - "gsi post send: failed to get cached GID (port=%d, ix=%d)\n", - rdma_ah_get_port_num(ah_attr), - grh->sgid_index); - return rc; - } - - vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev); + vlan_id = rdma_vlan_dev_vlan_id(sgid_attr->ndev); if (vlan_id < VLAN_CFI_MASK) has_vlan = true; - dev_put(sgid_attr.ndev); - - has_udp = (sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP); + has_udp = (sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP); if (!has_udp) { /* RoCE v1 */ ether_type = ETH_P_IBOE; *roce_mode = ROCE_V1; - } else if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) { + } else if (ipv6_addr_v4mapped((struct in6_addr *)&sgid_attr->gid)) { /* RoCE v2 IPv4 */ ip_ver = 4; ether_type = ETH_P_IP; @@ -471,7 +458,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev, udh->grh.flow_label = grh->flow_label; udh->grh.hop_limit = grh->hop_limit; udh->grh.destination_gid = grh->dgid; - memcpy(&udh->grh.source_gid.raw, &sgid.raw, + memcpy(&udh->grh.source_gid.raw, sgid_attr->gid.raw, sizeof(udh->grh.source_gid.raw)); } else { /* IPv4 header */ @@ -482,7 +469,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev, udh->ip4.frag_off = htons(IP_DF); udh->ip4.ttl = grh->hop_limit; - ipv4_addr = qedr_get_ipv4_from_gid(sgid.raw); + ipv4_addr = qedr_get_ipv4_from_gid(sgid_attr->gid.raw); udh->ip4.saddr = ipv4_addr; ipv4_addr = qedr_get_ipv4_from_gid(grh->dgid.raw); udh->ip4.daddr = ipv4_addr; @@ -501,7 +488,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev, static inline int qedr_gsi_build_packet(struct qedr_dev *dev, struct qedr_qp *qp, - struct ib_send_wr *swr, + const struct ib_send_wr *swr, struct qed_roce_ll2_packet **p_packet) { u8 ud_header_buffer[QEDR_MAX_UD_HEADER_SIZE]; @@ -550,8 +537,8 @@ static inline int qedr_gsi_build_packet(struct qedr_dev *dev, return 0; } -int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int qedr_gsi_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct qed_roce_ll2_packet *pkt = NULL; struct qedr_qp *qp = get_qedr_qp(ibqp); @@ -620,8 +607,8 @@ err: return rc; } -int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int qedr_gsi_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct qedr_dev *dev = get_qedr_dev(ibqp->device); struct qedr_qp *qp = get_qedr_qp(ibqp); diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.h b/drivers/infiniband/hw/qedr/qedr_roce_cm.h index a55916323ea9..d46dcd3f6424 100644 --- a/drivers/infiniband/hw/qedr/qedr_roce_cm.h +++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.h @@ -46,10 +46,10 @@ static inline u32 qedr_get_ipv4_from_gid(const u8 *gid) /* RDMA CM */ int qedr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); -int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); -int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); +int qedr_gsi_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); +int qedr_gsi_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); struct ib_qp *qedr_create_gsi_qp(struct qedr_dev *dev, struct ib_qp_init_attr *attrs, struct qedr_qp *qp); diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index f07b8df96f43..8cc3df24e04e 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -51,6 +51,10 @@ #include <rdma/qedr-abi.h> #include "qedr_roce_cm.h" +#define QEDR_SRQ_WQE_ELEM_SIZE sizeof(union rdma_srq_elm) +#define RDMA_MAX_SGE_PER_SRQ (4) +#define RDMA_MAX_SRQ_WQE_SIZE (RDMA_MAX_SGE_PER_SRQ + 1) + #define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT) static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src, @@ -84,6 +88,19 @@ int qedr_iw_query_gid(struct ib_device *ibdev, u8 port, return 0; } +int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) +{ + struct qedr_dev *dev = get_qedr_dev(ibsrq->device); + struct qedr_device_attr *qattr = &dev->attr; + struct qedr_srq *srq = get_qedr_srq(ibsrq); + + srq_attr->srq_limit = srq->srq_limit; + srq_attr->max_wr = qattr->max_srq_wr; + srq_attr->max_sge = qattr->max_sge; + + return 0; +} + int qedr_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, struct ib_udata *udata) { @@ -112,7 +129,8 @@ int qedr_query_device(struct ib_device *ibdev, IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; - attr->max_sge = qattr->max_sge; + attr->max_send_sge = qattr->max_sge; + attr->max_recv_sge = qattr->max_sge; attr->max_sge_rd = qattr->max_sge; attr->max_cq = qattr->max_cq; attr->max_cqe = qattr->max_cqe; @@ -224,7 +242,7 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) attr->lmc = 0; attr->sm_lid = 0; attr->sm_sl = 0; - attr->port_cap_flags = IB_PORT_IP_BASED_GIDS; + attr->ip_gids = true; if (rdma_protocol_iwarp(&dev->ibdev, 1)) { attr->gid_tbl_len = 1; attr->pkey_tbl_len = 1; @@ -1075,27 +1093,19 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, struct qed_rdma_modify_qp_in_params *qp_params) { + const struct ib_gid_attr *gid_attr; enum rdma_network_type nw_type; - struct ib_gid_attr gid_attr; const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); - union ib_gid gid; u32 ipv4_addr; - int rc = 0; int i; - rc = ib_get_cached_gid(ibqp->device, - rdma_ah_get_port_num(&attr->ah_attr), - grh->sgid_index, &gid, &gid_attr); - if (rc) - return rc; - - qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev); + gid_attr = grh->sgid_attr; + qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr->ndev); - dev_put(gid_attr.ndev); - nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid); + nw_type = rdma_gid_attr_network_type(gid_attr); switch (nw_type) { case RDMA_NETWORK_IPV6: - memcpy(&qp_params->sgid.bytes[0], &gid.raw[0], + memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0], sizeof(qp_params->sgid)); memcpy(&qp_params->dgid.bytes[0], &grh->dgid, @@ -1105,7 +1115,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); break; case RDMA_NETWORK_IB: - memcpy(&qp_params->sgid.bytes[0], &gid.raw[0], + memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0], sizeof(qp_params->sgid)); memcpy(&qp_params->dgid.bytes[0], &grh->dgid, @@ -1115,7 +1125,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, case RDMA_NETWORK_IPV4: memset(&qp_params->sgid, 0, sizeof(qp_params->sgid)); memset(&qp_params->dgid, 0, sizeof(qp_params->dgid)); - ipv4_addr = qedr_get_ipv4_from_gid(gid.raw); + ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw); qp_params->sgid.ipv4_addr = ipv4_addr; ipv4_addr = qedr_get_ipv4_from_gid(grh->dgid.raw); @@ -1189,6 +1199,21 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, return 0; } +static int qedr_copy_srq_uresp(struct qedr_dev *dev, + struct qedr_srq *srq, struct ib_udata *udata) +{ + struct qedr_create_srq_uresp uresp = {}; + int rc; + + uresp.srq_id = srq->srq_id; + + rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (rc) + DP_ERR(dev, "create srq: problem copying data to user space\n"); + + return rc; +} + static void qedr_copy_rq_uresp(struct qedr_dev *dev, struct qedr_create_qp_uresp *uresp, struct qedr_qp *qp) @@ -1255,13 +1280,18 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, qp->state = QED_ROCE_QP_STATE_RESET; qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; qp->sq_cq = get_qedr_cq(attrs->send_cq); - qp->rq_cq = get_qedr_cq(attrs->recv_cq); qp->dev = dev; - qp->rq.max_sges = attrs->cap.max_recv_sge; - DP_DEBUG(dev, QEDR_MSG_QP, - "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n", - qp->rq.max_sges, qp->rq_cq->icid); + if (attrs->srq) { + qp->srq = get_qedr_srq(attrs->srq); + } else { + qp->rq_cq = get_qedr_cq(attrs->recv_cq); + qp->rq.max_sges = attrs->cap.max_recv_sge; + DP_DEBUG(dev, QEDR_MSG_QP, + "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n", + qp->rq.max_sges, qp->rq_cq->icid); + } + DP_DEBUG(dev, QEDR_MSG_QP, "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n", pd->pd_id, qp->qp_type, qp->max_inline_data, @@ -1276,9 +1306,303 @@ static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp) qp->sq.db = dev->db_addr + DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); qp->sq.db_data.data.icid = qp->icid + 1; - qp->rq.db = dev->db_addr + - DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); - qp->rq.db_data.data.icid = qp->icid; + if (!qp->srq) { + qp->rq.db = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); + qp->rq.db_data.data.icid = qp->icid; + } +} + +static int qedr_check_srq_params(struct ib_pd *ibpd, struct qedr_dev *dev, + struct ib_srq_init_attr *attrs, + struct ib_udata *udata) +{ + struct qedr_device_attr *qattr = &dev->attr; + + if (attrs->attr.max_wr > qattr->max_srq_wr) { + DP_ERR(dev, + "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n", + attrs->attr.max_wr, qattr->max_srq_wr); + return -EINVAL; + } + + if (attrs->attr.max_sge > qattr->max_sge) { + DP_ERR(dev, + "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n", + attrs->attr.max_sge, qattr->max_sge); + return -EINVAL; + } + + return 0; +} + +static void qedr_free_srq_user_params(struct qedr_srq *srq) +{ + qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl); + ib_umem_release(srq->usrq.umem); + ib_umem_release(srq->prod_umem); +} + +static void qedr_free_srq_kernel_params(struct qedr_srq *srq) +{ + struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq; + struct qedr_dev *dev = srq->dev; + + dev->ops->common->chain_free(dev->cdev, &hw_srq->pbl); + + dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers), + hw_srq->virt_prod_pair_addr, + hw_srq->phy_prod_pair_addr); +} + +static int qedr_init_srq_user_params(struct ib_ucontext *ib_ctx, + struct qedr_srq *srq, + struct qedr_create_srq_ureq *ureq, + int access, int dmasync) +{ + struct scatterlist *sg; + int rc; + + rc = qedr_init_user_queue(ib_ctx, srq->dev, &srq->usrq, ureq->srq_addr, + ureq->srq_len, access, dmasync, 1); + if (rc) + return rc; + + srq->prod_umem = ib_umem_get(ib_ctx, ureq->prod_pair_addr, + sizeof(struct rdma_srq_producers), + access, dmasync); + if (IS_ERR(srq->prod_umem)) { + qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl); + ib_umem_release(srq->usrq.umem); + DP_ERR(srq->dev, + "create srq: failed ib_umem_get for producer, got %ld\n", + PTR_ERR(srq->prod_umem)); + return PTR_ERR(srq->prod_umem); + } + + sg = srq->prod_umem->sg_head.sgl; + srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg); + + return 0; +} + +static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq, + struct qedr_dev *dev, + struct ib_srq_init_attr *init_attr) +{ + struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq; + dma_addr_t phy_prod_pair_addr; + u32 num_elems; + void *va; + int rc; + + va = dma_alloc_coherent(&dev->pdev->dev, + sizeof(struct rdma_srq_producers), + &phy_prod_pair_addr, GFP_KERNEL); + if (!va) { + DP_ERR(dev, + "create srq: failed to allocate dma memory for producer\n"); + return -ENOMEM; + } + + hw_srq->phy_prod_pair_addr = phy_prod_pair_addr; + hw_srq->virt_prod_pair_addr = va; + + num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE; + rc = dev->ops->common->chain_alloc(dev->cdev, + QED_CHAIN_USE_TO_CONSUME_PRODUCE, + QED_CHAIN_MODE_PBL, + QED_CHAIN_CNT_TYPE_U32, + num_elems, + QEDR_SRQ_WQE_ELEM_SIZE, + &hw_srq->pbl, NULL); + if (rc) + goto err0; + + hw_srq->num_elems = num_elems; + + return 0; + +err0: + dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers), + va, phy_prod_pair_addr); + return rc; +} + +static int qedr_idr_add(struct qedr_dev *dev, struct qedr_idr *qidr, + void *ptr, u32 id); +static void qedr_idr_remove(struct qedr_dev *dev, + struct qedr_idr *qidr, u32 id); + +struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct qed_rdma_destroy_srq_in_params destroy_in_params; + struct qed_rdma_create_srq_in_params in_params = {}; + struct qedr_dev *dev = get_qedr_dev(ibpd->device); + struct qed_rdma_create_srq_out_params out_params; + struct qedr_pd *pd = get_qedr_pd(ibpd); + struct qedr_create_srq_ureq ureq = {}; + u64 pbl_base_addr, phy_prod_pair_addr; + struct ib_ucontext *ib_ctx = NULL; + struct qedr_srq_hwq_info *hw_srq; + struct qedr_ucontext *ctx = NULL; + u32 page_cnt, page_size; + struct qedr_srq *srq; + int rc = 0; + + DP_DEBUG(dev, QEDR_MSG_QP, + "create SRQ called from %s (pd %p)\n", + (udata) ? "User lib" : "kernel", pd); + + rc = qedr_check_srq_params(ibpd, dev, init_attr, udata); + if (rc) + return ERR_PTR(-EINVAL); + + srq = kzalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + srq->dev = dev; + hw_srq = &srq->hw_srq; + spin_lock_init(&srq->lock); + + hw_srq->max_wr = init_attr->attr.max_wr; + hw_srq->max_sges = init_attr->attr.max_sge; + + if (udata && ibpd->uobject && ibpd->uobject->context) { + ib_ctx = ibpd->uobject->context; + ctx = get_qedr_ucontext(ib_ctx); + + if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { + DP_ERR(dev, + "create srq: problem copying data from user space\n"); + goto err0; + } + + rc = qedr_init_srq_user_params(ib_ctx, srq, &ureq, 0, 0); + if (rc) + goto err0; + + page_cnt = srq->usrq.pbl_info.num_pbes; + pbl_base_addr = srq->usrq.pbl_tbl->pa; + phy_prod_pair_addr = hw_srq->phy_prod_pair_addr; + page_size = BIT(srq->usrq.umem->page_shift); + } else { + struct qed_chain *pbl; + + rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr); + if (rc) + goto err0; + + pbl = &hw_srq->pbl; + page_cnt = qed_chain_get_page_cnt(pbl); + pbl_base_addr = qed_chain_get_pbl_phys(pbl); + phy_prod_pair_addr = hw_srq->phy_prod_pair_addr; + page_size = QED_CHAIN_PAGE_SIZE; + } + + in_params.pd_id = pd->pd_id; + in_params.pbl_base_addr = pbl_base_addr; + in_params.prod_pair_addr = phy_prod_pair_addr; + in_params.num_pages = page_cnt; + in_params.page_size = page_size; + + rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params); + if (rc) + goto err1; + + srq->srq_id = out_params.srq_id; + + if (udata) { + rc = qedr_copy_srq_uresp(dev, srq, udata); + if (rc) + goto err2; + } + + rc = qedr_idr_add(dev, &dev->srqidr, srq, srq->srq_id); + if (rc) + goto err2; + + DP_DEBUG(dev, QEDR_MSG_SRQ, + "create srq: created srq with srq_id=0x%0x\n", srq->srq_id); + return &srq->ibsrq; + +err2: + destroy_in_params.srq_id = srq->srq_id; + + dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params); +err1: + if (udata) + qedr_free_srq_user_params(srq); + else + qedr_free_srq_kernel_params(srq); +err0: + kfree(srq); + + return ERR_PTR(-EFAULT); +} + +int qedr_destroy_srq(struct ib_srq *ibsrq) +{ + struct qed_rdma_destroy_srq_in_params in_params = {}; + struct qedr_dev *dev = get_qedr_dev(ibsrq->device); + struct qedr_srq *srq = get_qedr_srq(ibsrq); + + qedr_idr_remove(dev, &dev->srqidr, srq->srq_id); + in_params.srq_id = srq->srq_id; + dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params); + + if (ibsrq->pd->uobject) + qedr_free_srq_user_params(srq); + else + qedr_free_srq_kernel_params(srq); + + DP_DEBUG(dev, QEDR_MSG_SRQ, + "destroy srq: destroyed srq with srq_id=0x%0x\n", + srq->srq_id); + kfree(srq); + + return 0; +} + +int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) +{ + struct qed_rdma_modify_srq_in_params in_params = {}; + struct qedr_dev *dev = get_qedr_dev(ibsrq->device); + struct qedr_srq *srq = get_qedr_srq(ibsrq); + int rc; + + if (attr_mask & IB_SRQ_MAX_WR) { + DP_ERR(dev, + "modify srq: invalid attribute mask=0x%x specified for %p\n", + attr_mask, srq); + return -EINVAL; + } + + if (attr_mask & IB_SRQ_LIMIT) { + if (attr->srq_limit >= srq->hw_srq.max_wr) { + DP_ERR(dev, + "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n", + attr->srq_limit, srq->hw_srq.max_wr); + return -EINVAL; + } + + in_params.srq_id = srq->srq_id; + in_params.wqe_limit = attr->srq_limit; + rc = dev->ops->rdma_modify_srq(dev->rdma_ctx, &in_params); + if (rc) + return rc; + } + + srq->srq_limit = attr->srq_limit; + + DP_DEBUG(dev, QEDR_MSG_SRQ, + "modify srq: modified srq with srq_id=0x%0x\n", srq->srq_id); + + return 0; } static inline void @@ -1299,9 +1623,17 @@ qedr_init_common_qp_in_params(struct qedr_dev *dev, params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi; params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid; params->stats_queue = 0; - params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid; params->srq_id = 0; params->use_srq = false; + + if (!qp->srq) { + params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid; + + } else { + params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid; + params->srq_id = qp->srq->srq_id; + params->use_srq = true; + } } static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp) @@ -1318,32 +1650,27 @@ static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp) qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len); } -static int qedr_idr_add(struct qedr_dev *dev, void *ptr, u32 id) +static int qedr_idr_add(struct qedr_dev *dev, struct qedr_idr *qidr, + void *ptr, u32 id) { int rc; - if (!rdma_protocol_iwarp(&dev->ibdev, 1)) - return 0; - idr_preload(GFP_KERNEL); - spin_lock_irq(&dev->idr_lock); + spin_lock_irq(&qidr->idr_lock); - rc = idr_alloc(&dev->qpidr, ptr, id, id + 1, GFP_ATOMIC); + rc = idr_alloc(&qidr->idr, ptr, id, id + 1, GFP_ATOMIC); - spin_unlock_irq(&dev->idr_lock); + spin_unlock_irq(&qidr->idr_lock); idr_preload_end(); return rc < 0 ? rc : 0; } -static void qedr_idr_remove(struct qedr_dev *dev, u32 id) +static void qedr_idr_remove(struct qedr_dev *dev, struct qedr_idr *qidr, u32 id) { - if (!rdma_protocol_iwarp(&dev->ibdev, 1)) - return; - - spin_lock_irq(&dev->idr_lock); - idr_remove(&dev->qpidr, id); - spin_unlock_irq(&dev->idr_lock); + spin_lock_irq(&qidr->idr_lock); + idr_remove(&qidr->idr, id); + spin_unlock_irq(&qidr->idr_lock); } static inline void @@ -1356,9 +1683,10 @@ qedr_iwarp_populate_user_qp(struct qedr_dev *dev, qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl, &qp->usq.pbl_info, FW_PAGE_SHIFT); - - qp->urq.pbl_tbl->va = out_params->rq_pbl_virt; - qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys; + if (!qp->srq) { + qp->urq.pbl_tbl->va = out_params->rq_pbl_virt; + qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys; + } qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl, &qp->urq.pbl_info, FW_PAGE_SHIFT); @@ -1404,11 +1732,13 @@ static int qedr_create_user_qp(struct qedr_dev *dev, if (rc) return rc; - /* RQ - read access only (0), dma sync not required (0) */ - rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr, - ureq.rq_len, 0, 0, alloc_and_init); - if (rc) - return rc; + if (!qp->srq) { + /* RQ - read access only (0), dma sync not required (0) */ + rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr, + ureq.rq_len, 0, 0, alloc_and_init); + if (rc) + return rc; + } memset(&in_params, 0, sizeof(in_params)); qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params); @@ -1416,8 +1746,10 @@ static int qedr_create_user_qp(struct qedr_dev *dev, in_params.qp_handle_hi = ureq.qp_handle_hi; in_params.sq_num_pages = qp->usq.pbl_info.num_pbes; in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa; - in_params.rq_num_pages = qp->urq.pbl_info.num_pbes; - in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa; + if (!qp->srq) { + in_params.rq_num_pages = qp->urq.pbl_info.num_pbes; + in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa; + } qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx, &in_params, &out_params); @@ -1679,16 +2011,13 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, if (rc) return ERR_PTR(rc); - if (attrs->srq) - return ERR_PTR(-EINVAL); - DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n", udata ? "user library" : "kernel", attrs->event_handler, pd, get_qedr_cq(attrs->send_cq), get_qedr_cq(attrs->send_cq)->icid, get_qedr_cq(attrs->recv_cq), - get_qedr_cq(attrs->recv_cq)->icid); + attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0); qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) { @@ -1715,9 +2044,11 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, qp->ibqp.qp_num = qp->qp_id; - rc = qedr_idr_add(dev, qp, qp->qp_id); - if (rc) - goto err; + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + rc = qedr_idr_add(dev, &dev->qpidr, qp, qp->qp_id); + if (rc) + goto err; + } return &qp->ibqp; @@ -2289,8 +2620,9 @@ int qedr_destroy_qp(struct ib_qp *ibqp) qedr_free_qp_resources(dev, qp); - if (atomic_dec_and_test(&qp->refcnt)) { - qedr_idr_remove(dev, qp->qp_id); + if (atomic_dec_and_test(&qp->refcnt) && + rdma_protocol_iwarp(&dev->ibdev, 1)) { + qedr_idr_remove(dev, &dev->qpidr, qp->qp_id); kfree(qp); } return rc; @@ -2305,7 +2637,7 @@ struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, if (!ah) return ERR_PTR(-ENOMEM); - ah->attr = *attr; + rdma_copy_ah_attr(&ah->attr, attr); return &ah->ibah; } @@ -2314,6 +2646,7 @@ int qedr_destroy_ah(struct ib_ah *ibah) { struct qedr_ah *ah = get_qedr_ah(ibah); + rdma_destroy_ah_attr(&ah->attr); kfree(ah); return 0; } @@ -2705,9 +3038,9 @@ static void swap_wqe_data64(u64 *p) static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev, struct qedr_qp *qp, u8 *wqe_size, - struct ib_send_wr *wr, - struct ib_send_wr **bad_wr, u8 *bits, - u8 bit) + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr, + u8 *bits, u8 bit) { u32 data_size = sge_data_len(wr->sg_list, wr->num_sge); char *seg_prt, *wqe; @@ -2790,7 +3123,7 @@ static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev, } while (0) static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { u32 data_size = 0; int i; @@ -2814,8 +3147,8 @@ static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev, struct qedr_qp *qp, struct rdma_sq_rdma_wqe_1st *rwqe, struct rdma_sq_rdma_wqe_2nd *rwqe2, - struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey); DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr); @@ -2837,8 +3170,8 @@ static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev, struct qedr_qp *qp, struct rdma_sq_send_wqe_1st *swqe, struct rdma_sq_send_wqe_2st *swqe2, - struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { memset(swqe2, 0, sizeof(*swqe2)); if (wr->send_flags & IB_SEND_INLINE) { @@ -2854,7 +3187,7 @@ static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev, static int qedr_prepare_reg(struct qedr_qp *qp, struct rdma_sq_fmr_wqe_1st *fwqe1, - struct ib_reg_wr *wr) + const struct ib_reg_wr *wr) { struct qedr_mr *mr = get_qedr_mr(wr->mr); struct rdma_sq_fmr_wqe_2nd *fwqe2; @@ -2916,7 +3249,8 @@ static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode) } } -static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr) +static inline bool qedr_can_post_send(struct qedr_qp *qp, + const struct ib_send_wr *wr) { int wq_is_full, err_wr, pbl_is_full; struct qedr_dev *dev = qp->dev; @@ -2953,8 +3287,8 @@ static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr) return true; } -static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct qedr_dev *dev = get_qedr_dev(ibqp->device); struct qedr_qp *qp = get_qedr_qp(ibqp); @@ -3168,8 +3502,8 @@ static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, return rc; } -int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct qedr_dev *dev = get_qedr_dev(ibqp->device); struct qedr_qp *qp = get_qedr_qp(ibqp); @@ -3234,8 +3568,104 @@ int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, return rc; } -int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq) +{ + u32 used; + + /* Calculate number of elements used based on producer + * count and consumer count and subtract it from max + * work request supported so that we get elements left. + */ + used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt; + + return hw_srq->max_wr - used; +} + +int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + struct qedr_srq *srq = get_qedr_srq(ibsrq); + struct qedr_srq_hwq_info *hw_srq; + struct qedr_dev *dev = srq->dev; + struct qed_chain *pbl; + unsigned long flags; + int status = 0; + u32 num_sge; + u32 offset; + + spin_lock_irqsave(&srq->lock, flags); + + hw_srq = &srq->hw_srq; + pbl = &srq->hw_srq.pbl; + while (wr) { + struct rdma_srq_wqe_header *hdr; + int i; + + if (!qedr_srq_elem_left(hw_srq) || + wr->num_sge > srq->hw_srq.max_sges) { + DP_ERR(dev, "Can't post WR (%d,%d) || (%d > %d)\n", + hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt, + wr->num_sge, srq->hw_srq.max_sges); + status = -ENOMEM; + *bad_wr = wr; + break; + } + + hdr = qed_chain_produce(pbl); + num_sge = wr->num_sge; + /* Set number of sge and work request id in header */ + SRQ_HDR_SET(hdr, wr->wr_id, num_sge); + + srq->hw_srq.wr_prod_cnt++; + hw_srq->wqe_prod++; + hw_srq->sge_prod++; + + DP_DEBUG(dev, QEDR_MSG_SRQ, + "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n", + wr->num_sge, hw_srq->wqe_prod, wr->wr_id); + + for (i = 0; i < wr->num_sge; i++) { + struct rdma_srq_sge *srq_sge = qed_chain_produce(pbl); + + /* Set SGE length, lkey and address */ + SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr, + wr->sg_list[i].length, wr->sg_list[i].lkey); + + DP_DEBUG(dev, QEDR_MSG_SRQ, + "[%d]: len %d key %x addr %x:%x\n", + i, srq_sge->length, srq_sge->l_key, + srq_sge->addr.hi, srq_sge->addr.lo); + hw_srq->sge_prod++; + } + + /* Flush WQE and SGE information before + * updating producer. + */ + wmb(); + + /* SRQ producer is 8 bytes. Need to update SGE producer index + * in first 4 bytes and need to update WQE producer in + * next 4 bytes. + */ + *srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod; + offset = offsetof(struct rdma_srq_producers, wqe_prod); + *((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) = + hw_srq->wqe_prod; + + /* Flush producer after updating it. */ + wmb(); + wr = wr->next; + } + + DP_DEBUG(dev, QEDR_MSG_SRQ, "POST: Elements in S-RQ: %d\n", + qed_chain_get_elem_left(pbl)); + spin_unlock_irqrestore(&srq->lock, flags); + + return status; +} + +int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct qedr_qp *qp = get_qedr_qp(ibqp); struct qedr_dev *dev = qp->dev; @@ -3625,6 +4055,31 @@ static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp, wc->wr_id = wr_id; } +static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp, + struct qedr_cq *cq, struct ib_wc *wc, + struct rdma_cqe_responder *resp) +{ + struct qedr_srq *srq = qp->srq; + u64 wr_id; + + wr_id = HILO_GEN(le32_to_cpu(resp->srq_wr_id.hi), + le32_to_cpu(resp->srq_wr_id.lo), u64); + + if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) { + wc->status = IB_WC_WR_FLUSH_ERR; + wc->vendor_err = 0; + wc->wr_id = wr_id; + wc->byte_len = 0; + wc->src_qp = qp->id; + wc->qp = &qp->ibqp; + wc->wr_id = wr_id; + } else { + __process_resp_one(dev, qp, cq, wc, resp, wr_id); + } + srq->hw_srq.wr_cons_cnt++; + + return 1; +} static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp, struct qedr_cq *cq, struct ib_wc *wc, struct rdma_cqe_responder *resp) @@ -3674,6 +4129,19 @@ static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp, } } +static int qedr_poll_cq_resp_srq(struct qedr_dev *dev, struct qedr_qp *qp, + struct qedr_cq *cq, int num_entries, + struct ib_wc *wc, + struct rdma_cqe_responder *resp) +{ + int cnt; + + cnt = process_resp_one_srq(dev, qp, cq, wc, resp); + consume_cqe(cq); + + return cnt; +} + static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp, struct qedr_cq *cq, int num_entries, struct ib_wc *wc, struct rdma_cqe_responder *resp, @@ -3751,6 +4219,11 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc, &cqe->resp, &update); break; + case RDMA_CQE_TYPE_RESPONDER_SRQ: + cnt = qedr_poll_cq_resp_srq(dev, qp, cq, num_entries, + wc, &cqe->resp); + update = 1; + break; case RDMA_CQE_TYPE_INVALID: default: DP_ERR(dev, "Error: invalid CQE type = %d\n", diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 2c57e4c592a6..0b7d0124b16c 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -66,6 +66,15 @@ int qedr_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *); int qedr_destroy_qp(struct ib_qp *ibqp); +struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, + struct ib_srq_init_attr *attr, + struct ib_udata *udata); +int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); +int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); +int qedr_destroy_srq(struct ib_srq *ibsrq); +int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_recv_wr); struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, struct ib_udata *udata); int qedr_destroy_ah(struct ib_ah *ibah); @@ -82,10 +91,10 @@ int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, struct ib_mr *qedr_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int qedr_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc); -int qedr_post_send(struct ib_qp *, struct ib_send_wr *, - struct ib_send_wr **bad_wr); -int qedr_post_recv(struct ib_qp *, struct ib_recv_wr *, - struct ib_recv_wr **bad_wr); +int qedr_post_send(struct ib_qp *, const struct ib_send_wr *, + const struct ib_send_wr **bad_wr); +int qedr_post_recv(struct ib_qp *, const struct ib_recv_wr *, + const struct ib_recv_wr **bad_wr); int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 14b4057a2b8f..41babbc0db58 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1489,7 +1489,8 @@ static void qib_fill_device_attr(struct qib_devdata *dd) rdi->dparms.props.max_mr_size = ~0ULL; rdi->dparms.props.max_qp = ib_qib_max_qps; rdi->dparms.props.max_qp_wr = ib_qib_max_qp_wrs; - rdi->dparms.props.max_sge = ib_qib_max_sges; + rdi->dparms.props.max_send_sge = ib_qib_max_sges; + rdi->dparms.props.max_recv_sge = ib_qib_max_sges; rdi->dparms.props.max_sge_rd = ib_qib_max_sges; rdi->dparms.props.max_cq = ib_qib_max_cqs; rdi->dparms.props.max_cqe = ib_qib_max_cqes; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index f9a46768a19a..666613eef88f 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -78,9 +78,6 @@ struct qib_verbs_txreq; #define QIB_VENDOR_IPG cpu_to_be16(0xFFA0) -/* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */ -#define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26) - #define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL) /* Values for set/get portinfo VLCap OperationalVLs */ @@ -314,7 +311,7 @@ void qib_rc_rnr_retry(unsigned long arg); void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr); -int qib_post_ud_send(struct rvt_qp *qp, struct ib_send_wr *wr); +int qib_post_ud_send(struct rvt_qp *qp, const struct ib_send_wr *wr); void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp); diff --git a/drivers/infiniband/hw/usnic/Kconfig b/drivers/infiniband/hw/usnic/Kconfig index 29ab11c34f3f..d1dae2af4ca9 100644 --- a/drivers/infiniband/hw/usnic/Kconfig +++ b/drivers/infiniband/hw/usnic/Kconfig @@ -1,10 +1,10 @@ config INFINIBAND_USNIC tristate "Verbs support for Cisco VIC" depends on NETDEVICES && ETHERNET && INET && PCI && INTEL_IOMMU + depends on INFINIBAND_USER_ACCESS select ENIC select NET_VENDOR_CISCO select PCI_IOV - select INFINIBAND_USER_ACCESS ---help--- This is a low-level driver for Cisco's Virtual Interface Cards (VICs), including the VIC 1240 and 1280 cards. diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.c b/drivers/infiniband/hw/usnic/usnic_fwd.c index 995a26b65156..7875883621f4 100644 --- a/drivers/infiniband/hw/usnic/usnic_fwd.c +++ b/drivers/infiniband/hw/usnic/usnic_fwd.c @@ -92,8 +92,8 @@ struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev) ufdev->pdev = pdev; ufdev->netdev = pci_get_drvdata(pdev); spin_lock_init(&ufdev->lock); - strncpy(ufdev->name, netdev_name(ufdev->netdev), - sizeof(ufdev->name) - 1); + BUILD_BUG_ON(sizeof(ufdev->name) != sizeof(ufdev->netdev->name)); + strcpy(ufdev->name, ufdev->netdev->name); return ufdev; } diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.h b/drivers/infiniband/hw/usnic/usnic_fwd.h index 0b2cc4e79707..f0b71d593da5 100644 --- a/drivers/infiniband/hw/usnic/usnic_fwd.h +++ b/drivers/infiniband/hw/usnic/usnic_fwd.h @@ -57,7 +57,7 @@ struct usnic_fwd_dev { char mac[ETH_ALEN]; unsigned int mtu; __be32 inaddr; - char name[IFNAMSIZ+1]; + char name[IFNAMSIZ]; }; struct usnic_fwd_flow { diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index a688a5669168..9973ac893635 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -666,7 +666,7 @@ int usnic_ib_dereg_mr(struct ib_mr *ibmr) usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length); - usnic_uiom_reg_release(mr->umem, ibmr->pd->uobject->context->closing); + usnic_uiom_reg_release(mr->umem, ibmr->uobject->context); kfree(mr); return 0; } @@ -771,15 +771,15 @@ int usnic_ib_destroy_ah(struct ib_ah *ah) return -EINVAL; } -int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { usnic_dbg("\n"); return -EINVAL; } -int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { usnic_dbg("\n"); return -EINVAL; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index 1fda94425116..2a2c9beb715f 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -80,10 +80,10 @@ struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, struct ib_udata *udata); int usnic_ib_destroy_ah(struct ib_ah *ah); -int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int usnic_ib_req_notify_cq(struct ib_cq *cq, diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 4381c0a9a873..9dd39daa602b 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -41,6 +41,7 @@ #include <linux/workqueue.h> #include <linux/list.h> #include <linux/pci.h> +#include <rdma/ib_verbs.h> #include "usnic_log.h" #include "usnic_uiom.h" @@ -88,7 +89,7 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty) for_each_sg(chunk->page_list, sg, chunk->nents, i) { page = sg_page(sg); pa = sg_phys(sg); - if (dirty) + if (!PageDirty(page) && dirty) set_page_dirty_lock(page); put_page(page); usnic_dbg("pa: %pa\n", &pa); @@ -114,6 +115,16 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, dma_addr_t pa; unsigned int gup_flags; + /* + * If the combination of the addr and size requested for this memory + * region causes an integer overflow, return error. + */ + if (((addr + size) < addr) || PAGE_ALIGN(addr + size) < (addr + size)) + return -EINVAL; + + if (!size) + return -EINVAL; + if (!can_do_mlock()) return -EPERM; @@ -127,7 +138,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, down_write(¤t->mm->mmap_sem); - locked = npages + current->mm->locked_vm; + locked = npages + current->mm->pinned_vm; lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { @@ -143,7 +154,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, ret = 0; while (npages) { - ret = get_user_pages(cur_base, + ret = get_user_pages_longterm(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof(struct page *)), gup_flags, page_list, NULL); @@ -186,7 +197,7 @@ out: if (ret < 0) usnic_uiom_put_pages(chunk_list, 0); else - current->mm->locked_vm = locked; + current->mm->pinned_vm = locked; up_write(¤t->mm->mmap_sem); free_page((unsigned long) page_list); @@ -420,18 +431,22 @@ out_free_uiomr: return ERR_PTR(err); } -void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing) +void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, + struct ib_ucontext *ucontext) { + struct task_struct *task; struct mm_struct *mm; unsigned long diff; __usnic_uiom_reg_release(uiomr->pd, uiomr, 1); - mm = get_task_mm(current); - if (!mm) { - kfree(uiomr); - return; - } + task = get_pid_task(ucontext->tgid, PIDTYPE_PID); + if (!task) + goto out; + mm = get_task_mm(task); + put_task_struct(task); + if (!mm) + goto out; diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT; @@ -443,7 +458,7 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing) * up here and not be able to take the mmap_sem. In that case * we defer the vm_locked accounting to the system workqueue. */ - if (closing) { + if (ucontext->closing) { if (!down_write_trylock(&mm->mmap_sem)) { INIT_WORK(&uiomr->work, usnic_uiom_reg_account); uiomr->mm = mm; @@ -455,9 +470,10 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing) } else down_write(&mm->mmap_sem); - current->mm->locked_vm -= diff; + mm->pinned_vm -= diff; up_write(&mm->mmap_sem); mmput(mm); +out: kfree(uiomr); } diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h index 431efe4143f4..8c096acff123 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.h +++ b/drivers/infiniband/hw/usnic/usnic_uiom.h @@ -39,6 +39,8 @@ #include "usnic_uiom_interval_tree.h" +struct ib_ucontext; + #define USNIC_UIOM_READ (1) #define USNIC_UIOM_WRITE (2) @@ -89,7 +91,8 @@ void usnic_uiom_free_dev_list(struct device **devs); struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, unsigned long addr, size_t size, int access, int dmasync); -void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing); +void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, + struct ib_ucontext *ucontext); int usnic_uiom_init(char *drv_name); void usnic_uiom_fini(void); #endif /* USNIC_UIOM_H_ */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index 44cb1cfba417..42b8685c997e 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -378,11 +378,6 @@ static inline enum ib_port_speed pvrdma_port_speed_to_ib( return (enum ib_port_speed)speed; } -static inline int pvrdma_qp_attr_mask_to_ib(int attr_mask) -{ - return attr_mask; -} - static inline int ib_qp_attr_mask_to_pvrdma(int attr_mask) { return attr_mask & PVRDMA_MASK(PVRDMA_QP_ATTR_MASK_MAX); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index f95b97646c25..0f004c737620 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -278,19 +278,6 @@ int pvrdma_destroy_cq(struct ib_cq *cq) return ret; } -/** - * pvrdma_modify_cq - modify the CQ moderation parameters - * @ibcq: the CQ to modify - * @cq_count: number of CQEs that will trigger an event - * @cq_period: max period of time in usec before triggering an event - * - * @return: -EOPNOTSUPP as CQ resize is not supported. - */ -int pvrdma_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) -{ - return -EOPNOTSUPP; -} - static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i) { return (struct pvrdma_cqe *)pvrdma_page_dir_get_ptr( @@ -428,16 +415,3 @@ int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) /* Ensure we do not return errors from poll_cq */ return npolled; } - -/** - * pvrdma_resize_cq - resize CQ - * @ibcq: the completion queue - * @entries: CQ entries - * @udata: user data - * - * @return: -EOPNOTSUPP as CQ resize is not supported. - */ -int pvrdma_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) -{ - return -EOPNOTSUPP; -} diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 0be33a81bbe6..a5719899f49a 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -62,9 +62,7 @@ static DEFINE_MUTEX(pvrdma_device_list_lock); static LIST_HEAD(pvrdma_device_list); static struct workqueue_struct *event_wq; -static int pvrdma_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, - void **context); +static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context); static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context); static ssize_t show_hca(struct device *device, struct device_attribute *attr, @@ -216,8 +214,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) dev->ib_dev.post_send = pvrdma_post_send; dev->ib_dev.post_recv = pvrdma_post_recv; dev->ib_dev.create_cq = pvrdma_create_cq; - dev->ib_dev.modify_cq = pvrdma_modify_cq; - dev->ib_dev.resize_cq = pvrdma_resize_cq; dev->ib_dev.destroy_cq = pvrdma_destroy_cq; dev->ib_dev.poll_cq = pvrdma_poll_cq; dev->ib_dev.req_notify_cq = pvrdma_req_notify_cq; @@ -261,7 +257,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) dev->ib_dev.modify_srq = pvrdma_modify_srq; dev->ib_dev.query_srq = pvrdma_query_srq; dev->ib_dev.destroy_srq = pvrdma_destroy_srq; - dev->ib_dev.post_srq_recv = pvrdma_post_srq_recv; dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq, sizeof(struct pvrdma_srq *), @@ -650,13 +645,11 @@ static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev, return 0; } -static int pvrdma_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, - void **context) +static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context) { struct pvrdma_dev *dev = to_vdev(attr->device); - return pvrdma_add_gid_at_index(dev, gid, + return pvrdma_add_gid_at_index(dev, &attr->gid, ib_gid_type_to_pvrdma(attr->gid_type), attr->index); } @@ -699,8 +692,12 @@ static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context) } static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, + struct net_device *ndev, unsigned long event) { + struct pci_dev *pdev_net; + unsigned int slot; + switch (event) { case NETDEV_REBOOT: case NETDEV_DOWN: @@ -718,6 +715,24 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, else pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); break; + case NETDEV_UNREGISTER: + dev_put(dev->netdev); + dev->netdev = NULL; + break; + case NETDEV_REGISTER: + /* vmxnet3 will have same bus, slot. But func will be 0 */ + slot = PCI_SLOT(dev->pdev->devfn); + pdev_net = pci_get_slot(dev->pdev->bus, + PCI_DEVFN(slot, 0)); + if ((dev->netdev == NULL) && + (pci_get_drvdata(pdev_net) == ndev)) { + /* this is our netdev */ + dev->netdev = ndev; + dev_hold(ndev); + } + pci_dev_put(pdev_net); + break; + default: dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n", event, dev->ib_dev.name); @@ -734,8 +749,11 @@ static void pvrdma_netdevice_event_work(struct work_struct *work) mutex_lock(&pvrdma_device_list_lock); list_for_each_entry(dev, &pvrdma_device_list, device_link) { - if (dev->netdev == netdev_work->event_netdev) { - pvrdma_netdevice_event_handle(dev, netdev_work->event); + if ((netdev_work->event == NETDEV_REGISTER) || + (dev->netdev == netdev_work->event_netdev)) { + pvrdma_netdevice_event_handle(dev, + netdev_work->event_netdev, + netdev_work->event); break; } } @@ -968,6 +986,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, ret = -ENODEV; goto err_free_cq_ring; } + dev_hold(dev->netdev); dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name); @@ -1040,6 +1059,10 @@ err_free_intrs: pvrdma_free_irq(dev); pci_free_irq_vectors(pdev); err_free_cq_ring: + if (dev->netdev) { + dev_put(dev->netdev); + dev->netdev = NULL; + } pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); err_free_async_ring: pvrdma_page_dir_cleanup(dev, &dev->async_pdir); @@ -1079,6 +1102,11 @@ static void pvrdma_pci_remove(struct pci_dev *pdev) flush_workqueue(event_wq); + if (dev->netdev) { + dev_put(dev->netdev); + dev->netdev = NULL; + } + /* Unregister ib device */ ib_unregister_device(&dev->ib_dev); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index eb5b1065ec08..60083c0363a5 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -599,7 +599,8 @@ static inline void *get_rq_wqe(struct pvrdma_qp *qp, unsigned int n) qp->rq.offset + n * qp->rq.wqe_size); } -static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr, struct ib_reg_wr *wr) +static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr, + const struct ib_reg_wr *wr) { struct pvrdma_user_mr *mr = to_vmr(wr->mr); @@ -623,8 +624,8 @@ static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr, struct ib_reg_wr *wr) * * @return: 0 on success, otherwise errno returned. */ -int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct pvrdma_qp *qp = to_vqp(ibqp); struct pvrdma_dev *dev = to_vdev(ibqp->device); @@ -827,8 +828,8 @@ out: * * @return: 0 on success, otherwise errno returned. */ -int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int pvrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct pvrdma_dev *dev = to_vdev(ibqp->device); unsigned long flags; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index af235967a9c2..dc0ce877c7a3 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -52,13 +52,6 @@ #include "pvrdma.h" -int pvrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) -{ - /* No support for kernel clients. */ - return -EOPNOTSUPP; -} - /** * pvrdma_query_srq - query shared receive queue * @ibsrq: the shared receive queue to query diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index a51463cd2f37..b65d10b0a875 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -82,7 +82,8 @@ int pvrdma_query_device(struct ib_device *ibdev, props->max_qp = dev->dsr->caps.max_qp; props->max_qp_wr = dev->dsr->caps.max_qp_wr; props->device_cap_flags = dev->dsr->caps.device_cap_flags; - props->max_sge = dev->dsr->caps.max_sge; + props->max_send_sge = dev->dsr->caps.max_sge; + props->max_recv_sge = dev->dsr->caps.max_sge; props->max_sge_rd = PVRDMA_GET_CAP(dev, dev->dsr->caps.max_sge, dev->dsr->caps.max_sge_rd); props->max_srq = dev->dsr->caps.max_srq; @@ -154,7 +155,8 @@ int pvrdma_query_port(struct ib_device *ibdev, u8 port, props->gid_tbl_len = resp->attrs.gid_tbl_len; props->port_cap_flags = pvrdma_port_cap_flags_to_ib(resp->attrs.port_cap_flags); - props->port_cap_flags |= IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS; + props->port_cap_flags |= IB_PORT_CM_SUP; + props->ip_gids = true; props->max_msg_sz = resp->attrs.max_msg_sz; props->bad_pkey_cntr = resp->attrs.bad_pkey_cntr; props->qkey_viol_cntr = resp->attrs.qkey_viol_cntr; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index b7b25728a7e5..b2e3ab50cb08 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -412,15 +412,10 @@ struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); -int pvrdma_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); -int pvrdma_resize_cq(struct ib_cq *ibcq, int entries, - struct ib_udata *udata); struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); -int pvrdma_resize_cq(struct ib_cq *ibcq, int entries, - struct ib_udata *udata); int pvrdma_destroy_cq(struct ib_cq *cq); int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); @@ -435,8 +430,6 @@ int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int pvrdma_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); int pvrdma_destroy_srq(struct ib_srq *srq); -int pvrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, @@ -446,9 +439,9 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int pvrdma_destroy_qp(struct ib_qp *qp); -int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int pvrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); #endif /* __PVRDMA_VERBS_H__ */ diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index ba3639a0d77c..89ec0f64abfc 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -120,7 +120,8 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd, dev->n_ahs_allocated++; spin_unlock_irqrestore(&dev->n_ahs_lock, flags); - ah->attr = *ah_attr; + rdma_copy_ah_attr(&ah->attr, ah_attr); + atomic_set(&ah->refcount, 0); if (dev->driver_f.notify_new_ah) @@ -148,6 +149,7 @@ int rvt_destroy_ah(struct ib_ah *ibah) dev->n_ahs_allocated--; spin_unlock_irqrestore(&dev->n_ahs_lock, flags); + rdma_destroy_ah_attr(&ah->attr); kfree(ah); return 0; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 41183bd665ca..5ce403c6cddb 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -780,14 +780,15 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, if (!rdi) return ERR_PTR(-EINVAL); - if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge || + if (init_attr->cap.max_send_sge > rdi->dparms.props.max_send_sge || init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || init_attr->create_flags) return ERR_PTR(-EINVAL); /* Check receive queue parameters if no SRQ is specified. */ if (!init_attr->srq) { - if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge || + if (init_attr->cap.max_recv_sge > + rdi->dparms.props.max_recv_sge || init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr) return ERR_PTR(-EINVAL); @@ -1336,13 +1337,13 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp->qp_access_flags = attr->qp_access_flags; if (attr_mask & IB_QP_AV) { - qp->remote_ah_attr = attr->ah_attr; + rdma_replace_ah_attr(&qp->remote_ah_attr, &attr->ah_attr); qp->s_srate = rdma_ah_get_static_rate(&attr->ah_attr); qp->srate_mbps = ib_rate_to_mbps(qp->s_srate); } if (attr_mask & IB_QP_ALT_PATH) { - qp->alt_ah_attr = attr->alt_ah_attr; + rdma_replace_ah_attr(&qp->alt_ah_attr, &attr->alt_ah_attr); qp->s_alt_pkey_index = attr->alt_pkey_index; } @@ -1459,6 +1460,8 @@ int rvt_destroy_qp(struct ib_qp *ibqp) vfree(qp->s_wq); rdi->driver_f.qp_priv_free(rdi, qp); kfree(qp->s_ack_queue); + rdma_destroy_ah_attr(&qp->remote_ah_attr); + rdma_destroy_ah_attr(&qp->alt_ah_attr); kfree(qp); return 0; } @@ -1535,8 +1538,8 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, * * Return: 0 on success otherwise errno */ -int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct rvt_rwq *wq = qp->r_rq.wq; @@ -1617,7 +1620,7 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, static inline int rvt_qp_valid_operation( struct rvt_qp *qp, const struct rvt_operation_params *post_parms, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { int len; @@ -1714,7 +1717,7 @@ static inline int rvt_qp_is_avail( * @wr: the work request to send */ static int rvt_post_one_wr(struct rvt_qp *qp, - struct ib_send_wr *wr, + const struct ib_send_wr *wr, int *call_send) { struct rvt_swqe *wqe; @@ -1888,8 +1891,8 @@ bail_inval_free: * * Return: 0 on success else errno */ -int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); @@ -1945,8 +1948,8 @@ bail: * * Return: 0 on success else errno */ -int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); struct rvt_rwq *wq; diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h index 8409f80d5f25..264811fdc530 100644 --- a/drivers/infiniband/sw/rdmavt/qp.h +++ b/drivers/infiniband/sw/rdmavt/qp.h @@ -60,10 +60,10 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int rvt_destroy_qp(struct ib_qp *ibqp); int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr); -int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); -int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); +int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); #endif /* DEF_RVTQP_H */ diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index 3707952b4364..78e06fc456c5 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -82,7 +82,7 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, struct ib_srq *ret; if (srq_init_attr->srq_type != IB_SRQT_BASIC) - return ERR_PTR(-ENOSYS); + return ERR_PTR(-EOPNOTSUPP); if (srq_init_attr->attr.max_sge == 0 || srq_init_attr->attr.max_sge > dev->dparms.props.max_srq_sge || diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 7121e1b1eb89..10999fa69281 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -91,7 +91,8 @@ static void rxe_init_device_param(struct rxe_dev *rxe) rxe->attr.max_qp = RXE_MAX_QP; rxe->attr.max_qp_wr = RXE_MAX_QP_WR; rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS; - rxe->attr.max_sge = RXE_MAX_SGE; + rxe->attr.max_send_sge = RXE_MAX_SGE; + rxe->attr.max_recv_sge = RXE_MAX_SGE; rxe->attr.max_sge_rd = RXE_MAX_SGE_RD; rxe->attr.max_cq = RXE_MAX_CQ; rxe->attr.max_cqe = (1 << RXE_MAX_LOG_CQE) - 1; diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index 7f1ae364088a..26fe8d7dbc55 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -55,29 +55,41 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr) void rxe_av_from_attr(u8 port_num, struct rxe_av *av, struct rdma_ah_attr *attr) { + const struct ib_global_route *grh = rdma_ah_read_grh(attr); + memset(av, 0, sizeof(*av)); - memcpy(&av->grh, rdma_ah_read_grh(attr), - sizeof(*rdma_ah_read_grh(attr))); + memcpy(av->grh.dgid.raw, grh->dgid.raw, sizeof(grh->dgid.raw)); + av->grh.flow_label = grh->flow_label; + av->grh.sgid_index = grh->sgid_index; + av->grh.hop_limit = grh->hop_limit; + av->grh.traffic_class = grh->traffic_class; av->port_num = port_num; } void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr) { + struct ib_global_route *grh = rdma_ah_retrieve_grh(attr); + attr->type = RDMA_AH_ATTR_TYPE_ROCE; - memcpy(rdma_ah_retrieve_grh(attr), &av->grh, sizeof(av->grh)); + + memcpy(grh->dgid.raw, av->grh.dgid.raw, sizeof(av->grh.dgid.raw)); + grh->flow_label = av->grh.flow_label; + grh->sgid_index = av->grh.sgid_index; + grh->hop_limit = av->grh.hop_limit; + grh->traffic_class = av->grh.traffic_class; + rdma_ah_set_ah_flags(attr, IB_AH_GRH); rdma_ah_set_port_num(attr, av->port_num); } -void rxe_av_fill_ip_info(struct rxe_av *av, - struct rdma_ah_attr *attr, - struct ib_gid_attr *sgid_attr, - union ib_gid *sgid) +void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr) { - rdma_gid2ip((struct sockaddr *)&av->sgid_addr, sgid); + const struct ib_gid_attr *sgid_attr = attr->grh.sgid_attr; + + rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid_attr->gid); rdma_gid2ip((struct sockaddr *)&av->dgid_addr, &rdma_ah_read_grh(attr)->dgid); - av->network_type = ib_gid_to_network_type(sgid_attr->gid_type, sgid); + av->network_type = rdma_gid_attr_network_type(sgid_attr); } struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 98d470d1f3fc..83311dd07019 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -276,6 +276,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: if (wqe->wr.opcode != IB_WR_RDMA_READ && wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) { + wqe->status = IB_WC_FATAL_ERR; return COMPST_ERROR; } reset_retry_counters(qp); diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index a51ece596c43..87d14f7ef21b 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -43,10 +43,7 @@ void rxe_av_from_attr(u8 port_num, struct rxe_av *av, void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr); -void rxe_av_fill_ip_info(struct rxe_av *av, - struct rdma_ah_attr *attr, - struct ib_gid_attr *sgid_attr, - union ib_gid *sgid); +void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr); struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt); diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 59ec6d918ed4..8094cbaa54a9 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -182,39 +182,19 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev, #endif -/* - * Derive the net_device from the av. - * For physical devices, this will just return rxe->ndev. - * But for VLAN devices, it will return the vlan dev. - * Caller should dev_put() the returned net_device. - */ -static struct net_device *rxe_netdev_from_av(struct rxe_dev *rxe, - int port_num, - struct rxe_av *av) -{ - union ib_gid gid; - struct ib_gid_attr attr; - struct net_device *ndev = rxe->ndev; - - if (ib_get_cached_gid(&rxe->ib_dev, port_num, av->grh.sgid_index, - &gid, &attr) == 0 && - attr.ndev && attr.ndev != ndev) - ndev = attr.ndev; - else - /* Only to ensure that caller may call dev_put() */ - dev_hold(ndev); - - return ndev; -} - static struct dst_entry *rxe_find_route(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_av *av) { + const struct ib_gid_attr *attr; struct dst_entry *dst = NULL; struct net_device *ndev; - ndev = rxe_netdev_from_av(rxe, qp->attr.port_num, av); + attr = rdma_get_gid_attr(&rxe->ib_dev, qp->attr.port_num, + av->grh.sgid_index); + if (IS_ERR(attr)) + return NULL; + ndev = attr->ndev; if (qp_type(qp) == IB_QPT_RC) dst = sk_dst_get(qp->sk->sk); @@ -243,9 +223,13 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe, rt6_get_cookie((struct rt6_info *)dst); #endif } - } - dev_put(ndev); + if (dst && (qp_type(qp) == IB_QPT_RC)) { + dst_hold(dst); + sk_dst_set(qp->sk->sk, dst); + } + } + rdma_put_gid_attr(attr); return dst; } @@ -418,11 +402,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP, av->grh.traffic_class, av->grh.hop_limit, df, xnet); - if (qp_type(qp) == IB_QPT_RC) - sk_dst_set(qp->sk->sk, dst); - else - dst_release(dst); - + dst_release(dst); return 0; } @@ -450,11 +430,7 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, av->grh.traffic_class, av->grh.hop_limit); - if (qp_type(qp) == IB_QPT_RC) - sk_dst_set(qp->sk->sk, dst); - else - dst_release(dst); - + dst_release(dst); return 0; } @@ -536,9 +512,13 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, unsigned int hdr_len; struct sk_buff *skb; struct net_device *ndev; + const struct ib_gid_attr *attr; const int port_num = 1; - ndev = rxe_netdev_from_av(rxe, port_num, av); + attr = rdma_get_gid_attr(&rxe->ib_dev, port_num, av->grh.sgid_index); + if (IS_ERR(attr)) + return NULL; + ndev = attr->ndev; if (av->network_type == RDMA_NETWORK_IPV4) hdr_len = ETH_HLEN + sizeof(struct udphdr) + @@ -550,10 +530,8 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(ndev), GFP_ATOMIC); - if (unlikely(!skb)) { - dev_put(ndev); - return NULL; - } + if (unlikely(!skb)) + goto out; skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(rxe->ndev)); @@ -568,7 +546,8 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, pkt->hdr = skb_put_zero(skb, paylen); pkt->mask |= RXE_GRH_MASK; - dev_put(ndev); +out: + rdma_put_gid_attr(attr); return skb; } diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index 1b596fbbe251..4555510d86c4 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -83,7 +83,7 @@ enum rxe_device_param { RXE_MAX_SGE_RD = 32, RXE_MAX_CQ = 16384, RXE_MAX_LOG_CQE = 15, - RXE_MAX_MR = 2 * 1024, + RXE_MAX_MR = 256 * 1024, RXE_MAX_PD = 0x7ffc, RXE_MAX_QP_RD_ATOM = 128, RXE_MAX_EE_RD_ATOM = 0, diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index b9f7aa1114b2..c58452daffc7 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -49,9 +49,9 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap, goto err1; } - if (cap->max_send_sge > rxe->attr.max_sge) { + if (cap->max_send_sge > rxe->attr.max_send_sge) { pr_warn("invalid send sge = %d > %d\n", - cap->max_send_sge, rxe->attr.max_sge); + cap->max_send_sge, rxe->attr.max_send_sge); goto err1; } @@ -62,9 +62,9 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap, goto err1; } - if (cap->max_recv_sge > rxe->attr.max_sge) { + if (cap->max_recv_sge > rxe->attr.max_recv_sge) { pr_warn("invalid recv sge = %d > %d\n", - cap->max_recv_sge, rxe->attr.max_sge); + cap->max_recv_sge, rxe->attr.max_recv_sge); goto err1; } } @@ -580,9 +580,6 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, struct ib_udata *udata) { int err; - struct rxe_dev *rxe = to_rdev(qp->ibqp.device); - union ib_gid sgid; - struct ib_gid_attr sgid_attr; if (mask & IB_QP_MAX_QP_RD_ATOMIC) { int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic); @@ -623,30 +620,14 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, qp->attr.qkey = attr->qkey; if (mask & IB_QP_AV) { - ib_get_cached_gid(&rxe->ib_dev, 1, - rdma_ah_read_grh(&attr->ah_attr)->sgid_index, - &sgid, &sgid_attr); rxe_av_from_attr(attr->port_num, &qp->pri_av, &attr->ah_attr); - rxe_av_fill_ip_info(&qp->pri_av, &attr->ah_attr, - &sgid_attr, &sgid); - if (sgid_attr.ndev) - dev_put(sgid_attr.ndev); + rxe_av_fill_ip_info(&qp->pri_av, &attr->ah_attr); } if (mask & IB_QP_ALT_PATH) { - u8 sgid_index = - rdma_ah_read_grh(&attr->alt_ah_attr)->sgid_index; - - ib_get_cached_gid(&rxe->ib_dev, 1, sgid_index, - &sgid, &sgid_attr); - rxe_av_from_attr(attr->alt_port_num, &qp->alt_av, &attr->alt_ah_attr); - rxe_av_fill_ip_info(&qp->alt_av, &attr->alt_ah_attr, - &sgid_attr, &sgid); - if (sgid_attr.ndev) - dev_put(sgid_attr.ndev); - + rxe_av_fill_ip_info(&qp->alt_av, &attr->alt_ah_attr); qp->attr.alt_port_num = attr->alt_port_num; qp->attr.alt_pkey_index = attr->alt_pkey_index; qp->attr.alt_timeout = attr->alt_timeout; diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index dfba44a40f0b..d30dbac24583 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -225,9 +225,14 @@ static int hdr_check(struct rxe_pkt_info *pkt) goto err1; } + if (unlikely(qpn == 0)) { + pr_warn_once("QP 0 not supported"); + goto err1; + } + if (qpn != IB_MULTICAST_QPN) { - index = (qpn == 0) ? port->qp_smi_index : - ((qpn == 1) ? port->qp_gsi_index : qpn); + index = (qpn == 1) ? port->qp_gsi_index : qpn; + qp = rxe_pool_get_index(&rxe->qp_pool, index); if (unlikely(!qp)) { pr_warn_ratelimited("no qp matches qpn 0x%x\n", qpn); @@ -256,8 +261,7 @@ static int hdr_check(struct rxe_pkt_info *pkt) return 0; err2: - if (qp) - rxe_drop_ref(qp); + rxe_drop_ref(qp); err1: return -EINVAL; } @@ -328,6 +332,7 @@ err1: static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) { + const struct ib_gid_attr *gid_attr; union ib_gid dgid; union ib_gid *pdgid; @@ -339,9 +344,14 @@ static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr; } - return ib_find_cached_gid_by_port(&rxe->ib_dev, pdgid, - IB_GID_TYPE_ROCE_UDP_ENCAP, - 1, skb->dev, NULL); + gid_attr = rdma_find_gid_by_port(&rxe->ib_dev, pdgid, + IB_GID_TYPE_ROCE_UDP_ENCAP, + 1, skb->dev); + if (IS_ERR(gid_attr)) + return PTR_ERR(gid_attr); + + rdma_put_gid_attr(gid_attr); + return 0; } /* rxe_rcv is called from the interface driver */ diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 5b57de30dee4..aa5833318372 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -884,6 +884,11 @@ static enum resp_states do_complete(struct rxe_qp *qp, else wc->network_hdr_type = RDMA_NETWORK_IPV6; + if (is_vlan_dev(skb->dev)) { + wc->wc_flags |= IB_WC_WITH_VLAN; + wc->vlan_id = vlan_dev_vlan_id(skb->dev); + } + if (pkt->mask & RXE_IMMDT_MASK) { wc->wc_flags |= IB_WC_WITH_IMM; wc->ex.imm_data = immdt_imm(pkt); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 9deafc3aa6af..f5b1e0ad6142 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -222,25 +222,11 @@ static int rxe_dealloc_pd(struct ib_pd *ibpd) return 0; } -static int rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr, - struct rxe_av *av) +static void rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr, + struct rxe_av *av) { - int err; - union ib_gid sgid; - struct ib_gid_attr sgid_attr; - - err = ib_get_cached_gid(&rxe->ib_dev, rdma_ah_get_port_num(attr), - rdma_ah_read_grh(attr)->sgid_index, &sgid, - &sgid_attr); - if (err) { - pr_err("Failed to query sgid. err = %d\n", err); - return err; - } - rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr); - rxe_av_fill_ip_info(av, attr, &sgid_attr, &sgid); - dev_put(sgid_attr.ndev); - return 0; + rxe_av_fill_ip_info(av, attr); } static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, @@ -255,28 +241,17 @@ static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, err = rxe_av_chk_attr(rxe, attr); if (err) - goto err1; + return ERR_PTR(err); ah = rxe_alloc(&rxe->ah_pool); - if (!ah) { - err = -ENOMEM; - goto err1; - } + if (!ah) + return ERR_PTR(-ENOMEM); rxe_add_ref(pd); ah->pd = pd; - err = rxe_init_av(rxe, attr, &ah->av); - if (err) - goto err2; - + rxe_init_av(rxe, attr, &ah->av); return &ah->ibah; - -err2: - rxe_drop_ref(pd); - rxe_drop_ref(ah); -err1: - return ERR_PTR(err); } static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) @@ -289,10 +264,7 @@ static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) if (err) return err; - err = rxe_init_av(rxe, attr, &ah->av); - if (err) - return err; - + rxe_init_av(rxe, attr, &ah->av); return 0; } @@ -315,7 +287,7 @@ static int rxe_destroy_ah(struct ib_ah *ibah) return 0; } -static int post_one_recv(struct rxe_rq *rq, struct ib_recv_wr *ibwr) +static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) { int err; int i; @@ -466,8 +438,8 @@ static int rxe_destroy_srq(struct ib_srq *ibsrq) return 0; } -static int rxe_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { int err = 0; unsigned long flags; @@ -582,7 +554,7 @@ static int rxe_destroy_qp(struct ib_qp *ibqp) return 0; } -static int validate_send_wr(struct rxe_qp *qp, struct ib_send_wr *ibwr, +static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr, unsigned int mask, unsigned int length) { int num_sge = ibwr->num_sge; @@ -610,7 +582,7 @@ err1: } static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, - struct ib_send_wr *ibwr) + const struct ib_send_wr *ibwr) { wr->wr_id = ibwr->wr_id; wr->num_sge = ibwr->num_sge; @@ -665,7 +637,7 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, } } -static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr, +static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, unsigned int mask, unsigned int length, struct rxe_send_wqe *wqe) { @@ -713,7 +685,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr, return 0; } -static int post_one_send(struct rxe_qp *qp, struct ib_send_wr *ibwr, +static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, unsigned int mask, u32 length) { int err; @@ -754,8 +726,8 @@ err1: return err; } -static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { int err = 0; unsigned int mask; @@ -797,8 +769,8 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr, return err; } -static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct rxe_qp *qp = to_rqp(ibqp); @@ -820,8 +792,8 @@ static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, return rxe_post_send_kernel(qp, wr, bad_wr); } -static int rxe_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { int err = 0; struct rxe_qp *qp = to_rqp(ibqp); diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index a50b062ed13e..1abe3c62f106 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -91,11 +91,9 @@ enum { IPOIB_STOP_REAPER = 7, IPOIB_FLAG_ADMIN_CM = 9, IPOIB_FLAG_UMCAST = 10, - IPOIB_STOP_NEIGH_GC = 11, IPOIB_NEIGH_TBL_FLUSH = 12, IPOIB_FLAG_DEV_ADDR_SET = 13, IPOIB_FLAG_DEV_ADDR_CTRL = 14, - IPOIB_FLAG_GOING_DOWN = 15, IPOIB_MAX_BACKOFF_SECONDS = 16, @@ -252,11 +250,11 @@ struct ipoib_cm_tx { struct ipoib_neigh *neigh; struct ipoib_path *path; struct ipoib_tx_buf *tx_ring; - unsigned tx_head; - unsigned tx_tail; + unsigned int tx_head; + unsigned int tx_tail; unsigned long flags; u32 mtu; - unsigned max_send_sge; + unsigned int max_send_sge; }; struct ipoib_cm_rx_buf { @@ -325,15 +323,22 @@ struct ipoib_dev_priv { spinlock_t lock; struct net_device *dev; + void (*next_priv_destructor)(struct net_device *dev); struct napi_struct send_napi; struct napi_struct recv_napi; unsigned long flags; + /* + * This protects access to the child_intfs list. + * To READ from child_intfs the RTNL or vlan_rwsem read side must be + * held. To WRITE RTNL and the vlan_rwsem write side must be held (in + * that order) This lock exists because we have a few contexts where + * we need the child_intfs, but do not want to grab the RTNL. + */ struct rw_semaphore vlan_rwsem; struct mutex mcast_mutex; - struct mutex sysfs_mutex; struct rb_root path_tree; struct list_head path_list; @@ -373,8 +378,8 @@ struct ipoib_dev_priv { struct ipoib_rx_buf *rx_ring; struct ipoib_tx_buf *tx_ring; - unsigned tx_head; - unsigned tx_tail; + unsigned int tx_head; + unsigned int tx_tail; struct ib_sge tx_sge[MAX_SKB_FRAGS + 1]; struct ib_ud_wr tx_wr; struct ib_wc send_wc[MAX_SEND_CQE]; @@ -404,7 +409,7 @@ struct ipoib_dev_priv { #endif u64 hca_caps; struct ipoib_ethtool_st ethtool; - unsigned max_send_sge; + unsigned int max_send_sge; bool sm_fullmember_sendonly_support; const struct net_device_ops *rn_ops; }; @@ -414,7 +419,7 @@ struct ipoib_ah { struct ib_ah *ah; struct list_head list; struct kref ref; - unsigned last_send; + unsigned int last_send; int valid; }; @@ -483,6 +488,7 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah) kref_put(&ah->ref, ipoib_free_ah); } int ipoib_open(struct net_device *dev); +void ipoib_intf_free(struct net_device *dev); int ipoib_add_pkey_attr(struct net_device *dev); int ipoib_add_umcast_attr(struct net_device *dev); @@ -510,9 +516,6 @@ void ipoib_ib_dev_down(struct net_device *dev); int ipoib_ib_dev_stop_default(struct net_device *dev); void ipoib_pkey_dev_check_presence(struct net_device *dev); -int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); -void ipoib_dev_cleanup(struct net_device *dev); - void ipoib_mcast_join_task(struct work_struct *work); void ipoib_mcast_carrier_on_task(struct work_struct *work); void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb); @@ -600,7 +603,6 @@ void ipoib_pkey_open(struct ipoib_dev_priv *priv); void ipoib_drain_cq(struct net_device *dev); void ipoib_set_ethtool_ops(struct net_device *dev); -void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca); #define IPOIB_FLAGS_RC 0x80 #define IPOIB_FLAGS_UC 0x40 @@ -729,7 +731,7 @@ void ipoib_cm_dev_stop(struct net_device *dev) static inline int ipoib_cm_dev_init(struct net_device *dev) { - return -ENOSYS; + return -EOPNOTSUPP; } static inline diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 6535d9beb24d..ea01b8dd2be6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -78,7 +78,7 @@ static struct ib_send_wr ipoib_cm_rx_drain_wr = { }; static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, - struct ib_cm_event *event); + const struct ib_cm_event *event); static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags, u64 mapping[IPOIB_CM_RX_SG]) @@ -94,7 +94,6 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags, static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - struct ib_recv_wr *bad_wr; int i, ret; priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; @@ -102,7 +101,7 @@ static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) for (i = 0; i < priv->cm.num_frags; ++i) priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; - ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); + ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, NULL); if (unlikely(ret)) { ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1, @@ -120,7 +119,6 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev, struct ib_sge *sge, int id) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - struct ib_recv_wr *bad_wr; int i, ret; wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; @@ -128,7 +126,7 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev, for (i = 0; i < IPOIB_CM_RX_SG; ++i) sge[i].addr = rx->rx_ring[id].mapping[i]; - ret = ib_post_recv(rx->qp, wr, &bad_wr); + ret = ib_post_recv(rx->qp, wr, NULL); if (unlikely(ret)) { ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret); ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, @@ -212,7 +210,6 @@ static void ipoib_cm_free_rx_ring(struct net_device *dev, static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv) { - struct ib_send_wr *bad_wr; struct ipoib_cm_rx *p; /* We only reserved 1 extra slot in CQ for drain WRs, so @@ -227,7 +224,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv) */ p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list); ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID; - if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr)) + if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, NULL)) ipoib_warn(priv, "failed to post drain wr\n"); list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list); @@ -275,7 +272,7 @@ static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, static int ipoib_cm_modify_rx_qp(struct net_device *dev, struct ib_cm_id *cm_id, struct ib_qp *qp, - unsigned psn) + unsigned int psn) { struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ib_qp_attr qp_attr; @@ -363,7 +360,7 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i if (!rx->rx_ring) return -ENOMEM; - t = kmalloc(sizeof *t, GFP_KERNEL); + t = kmalloc(sizeof(*t), GFP_KERNEL); if (!t) { ret = -ENOMEM; goto err_free_1; @@ -421,8 +418,9 @@ err_free_1: } static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, - struct ib_qp *qp, struct ib_cm_req_event_param *req, - unsigned psn) + struct ib_qp *qp, + const struct ib_cm_req_event_param *req, + unsigned int psn) { struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_cm_data data = {}; @@ -432,7 +430,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE); rep.private_data = &data; - rep.private_data_len = sizeof data; + rep.private_data_len = sizeof(data); rep.flow_control = 0; rep.rnr_retry_count = req->rnr_retry_count; rep.srq = ipoib_cm_has_srq(dev); @@ -441,16 +439,17 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, return ib_send_cm_rep(cm_id, &rep); } -static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *event) { struct net_device *dev = cm_id->context; struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_cm_rx *p; - unsigned psn; + unsigned int psn; int ret; ipoib_dbg(priv, "REQ arrived\n"); - p = kzalloc(sizeof *p, GFP_KERNEL); + p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; p->dev = dev; @@ -503,7 +502,7 @@ err_qp: } static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id, - struct ib_cm_event *event) + const struct ib_cm_event *event) { struct ipoib_cm_rx *p; struct ipoib_dev_priv *priv; @@ -547,7 +546,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, 0, PAGE_SIZE); --skb_shinfo(skb)->nr_frags; } else { - size = min(length, (unsigned) PAGE_SIZE); + size = min_t(unsigned int, length, PAGE_SIZE); skb_frag_size_set(frag, size); skb->data_len += size; @@ -641,8 +640,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } } - frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, - (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; + frags = PAGE_ALIGN(wc->byte_len - + min_t(u32, wc->byte_len, IPOIB_CM_HEAD_SIZE)) / + PAGE_SIZE; newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, mapping, GFP_ATOMIC); @@ -657,7 +657,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } ipoib_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping); - memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping); + memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof(*mapping)); ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); @@ -698,13 +698,11 @@ static inline int post_send(struct ipoib_dev_priv *priv, unsigned int wr_id, struct ipoib_tx_buf *tx_req) { - struct ib_send_wr *bad_wr; - ipoib_build_sge(priv, tx_req); priv->tx_wr.wr.wr_id = wr_id | IPOIB_OP_CM; - return ib_post_send(tx->qp, &priv->tx_wr.wr, &bad_wr); + return ib_post_send(tx->qp, &priv->tx_wr.wr, NULL); } void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) @@ -712,7 +710,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_tx_buf *tx_req; int rc; - unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb); + unsigned int usable_sge = tx->max_send_sge - !!skb_headlen(skb); if (unlikely(skb->len > tx->mtu)) { ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", @@ -982,7 +980,8 @@ void ipoib_cm_dev_stop(struct net_device *dev) cancel_delayed_work(&priv->cm.stale_task); } -static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *event) { struct ipoib_cm_tx *p = cm_id->context; struct ipoib_dev_priv *priv = ipoib_priv(p->dev); @@ -1068,8 +1067,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ struct ib_qp *tx_qp; if (dev->features & NETIF_F_SG) - attr.cap.max_send_sge = - min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1); + attr.cap.max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge, + MAX_SKB_FRAGS + 1); tx_qp = ib_create_qp(priv->pd, &attr); tx->max_send_sge = attr.cap.max_send_sge; @@ -1094,7 +1093,7 @@ static int ipoib_cm_send_req(struct net_device *dev, req.qp_num = qp->qp_num; req.qp_type = qp->qp_type; req.private_data = &data; - req.private_data_len = sizeof data; + req.private_data_len = sizeof(data); req.flow_control = 0; req.starting_psn = 0; /* FIXME */ @@ -1152,7 +1151,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, ret = -ENOMEM; goto err_tx; } - memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring); + memset(p->tx_ring, 0, ipoib_sendq_size * sizeof(*p->tx_ring)); p->qp = ipoib_cm_create_tx_qp(p->dev, p); memalloc_noio_restore(noio_flag); @@ -1248,7 +1247,7 @@ timeout: } static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, - struct ib_cm_event *event) + const struct ib_cm_event *event) { struct ipoib_cm_tx *tx = cm_id->context; struct ipoib_dev_priv *priv = ipoib_priv(tx->dev); @@ -1305,7 +1304,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_cm_tx *tx; - tx = kzalloc(sizeof *tx, GFP_ATOMIC); + tx = kzalloc(sizeof(*tx), GFP_ATOMIC); if (!tx) return NULL; @@ -1370,7 +1369,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) neigh->daddr + QPN_AND_OPTIONS_OFFSET); goto free_neigh; } - memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); + memcpy(&pathrec, &p->path->pathrec, sizeof(pathrec)); spin_unlock_irqrestore(&priv->lock, flags); netif_tx_unlock_bh(dev); @@ -1428,7 +1427,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work) struct net_device *dev = priv->dev; struct sk_buff *skb; unsigned long flags; - unsigned mtu = priv->mcast_mtu; + unsigned int mtu = priv->mcast_mtu; netif_tx_lock_bh(dev); spin_lock_irqsave(&priv->lock, flags); @@ -1518,19 +1517,16 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, { struct net_device *dev = to_net_dev(d); int ret; - struct ipoib_dev_priv *priv = ipoib_priv(dev); - - if (test_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags)) - return -EPERM; - - if (!mutex_trylock(&priv->sysfs_mutex)) - return restart_syscall(); if (!rtnl_trylock()) { - mutex_unlock(&priv->sysfs_mutex); return restart_syscall(); } + if (dev->reg_state != NETREG_REGISTERED) { + rtnl_unlock(); + return -EPERM; + } + ret = ipoib_set_mode(dev, buf); /* The assumption is that the function ipoib_set_mode returned @@ -1539,7 +1535,6 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, */ if (ret != -EBUSY) rtnl_unlock(); - mutex_unlock(&priv->sysfs_mutex); return (!ret || ret == -EBUSY) ? count : ret; } @@ -1564,7 +1559,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge) priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); if (IS_ERR(priv->cm.srq)) { - if (PTR_ERR(priv->cm.srq) != -ENOSYS) + if (PTR_ERR(priv->cm.srq) != -EOPNOTSUPP) pr_warn("%s: failed to allocate SRQ, error %ld\n", priv->ca->name, PTR_ERR(priv->cm.srq)); priv->cm.srq = NULL; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 2706bf26cbac..83429925dfc6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -102,7 +102,7 @@ static int ipoib_set_coalesce(struct net_device *dev, ret = rdma_set_cq_moderation(priv->recv_cq, coal->rx_max_coalesced_frames, coal->rx_coalesce_usecs); - if (ret && ret != -ENOSYS) { + if (ret && ret != -EOPNOTSUPP) { ipoib_warn(priv, "failed modifying CQ (%d)\n", ret); return ret; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index ea302b054601..178488028734 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -262,15 +262,15 @@ static const struct file_operations ipoib_path_fops = { void ipoib_create_debug_files(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - char name[IFNAMSIZ + sizeof "_path"]; + char name[IFNAMSIZ + sizeof("_path")]; - snprintf(name, sizeof name, "%s_mcg", dev->name); + snprintf(name, sizeof(name), "%s_mcg", dev->name); priv->mcg_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO, ipoib_root, dev, &ipoib_mcg_fops); if (!priv->mcg_dentry) ipoib_warn(priv, "failed to create mcg debug file\n"); - snprintf(name, sizeof name, "%s_path", dev->name); + snprintf(name, sizeof(name), "%s_path", dev->name); priv->path_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO, ipoib_root, dev, &ipoib_path_fops); if (!priv->path_dentry) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index f47f9ace1f48..9006a13af1de 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -40,6 +40,7 @@ #include <linux/ip.h> #include <linux/tcp.h> +#include <rdma/ib_cache.h> #include "ipoib.h" @@ -57,7 +58,7 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev, struct ipoib_ah *ah; struct ib_ah *vah; - ah = kmalloc(sizeof *ah, GFP_KERNEL); + ah = kmalloc(sizeof(*ah), GFP_KERNEL); if (!ah) return ERR_PTR(-ENOMEM); @@ -100,7 +101,6 @@ static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv, static int ipoib_ib_post_receive(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - struct ib_recv_wr *bad_wr; int ret; priv->rx_wr.wr_id = id | IPOIB_OP_RECV; @@ -108,7 +108,7 @@ static int ipoib_ib_post_receive(struct net_device *dev, int id) priv->rx_sge[1].addr = priv->rx_ring[id].mapping[1]; - ret = ib_post_recv(priv->qp, &priv->rx_wr, &bad_wr); + ret = ib_post_recv(priv->qp, &priv->rx_wr, NULL); if (unlikely(ret)) { ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[id].mapping); @@ -202,7 +202,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } memcpy(mapping, priv->rx_ring[wr_id].mapping, - IPOIB_UD_RX_SG * sizeof *mapping); + IPOIB_UD_RX_SG * sizeof(*mapping)); /* * If we can't allocate a new RX buffer, dump @@ -541,7 +541,6 @@ static inline int post_send(struct ipoib_dev_priv *priv, struct ipoib_tx_buf *tx_req, void *head, int hlen) { - struct ib_send_wr *bad_wr; struct sk_buff *skb = tx_req->skb; ipoib_build_sge(priv, tx_req); @@ -558,7 +557,7 @@ static inline int post_send(struct ipoib_dev_priv *priv, } else priv->tx_wr.wr.opcode = IB_WR_SEND; - return ib_post_send(priv->qp, &priv->tx_wr.wr, &bad_wr); + return ib_post_send(priv->qp, &priv->tx_wr.wr, NULL); } int ipoib_send(struct net_device *dev, struct sk_buff *skb, @@ -568,7 +567,7 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_tx_buf *tx_req; int hlen, rc; void *phead; - unsigned usable_sge = priv->max_send_sge - !!skb_headlen(skb); + unsigned int usable_sge = priv->max_send_sge - !!skb_headlen(skb); if (skb_is_gso(skb)) { hlen = skb_transport_offset(skb) + tcp_hdrlen(skb); @@ -1069,7 +1068,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) bool ret = false; netdev_gid = (union ib_gid *)(priv->dev->dev_addr + 4); - if (ib_query_gid(priv->ca, priv->port, 0, &gid0, NULL)) + if (rdma_query_gid(priv->ca, priv->port, 0, &gid0)) return false; netif_addr_lock_bh(priv->dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 26cde95bc0f3..e3d28f9ad9c0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -215,11 +215,6 @@ static int ipoib_stop(struct net_device *dev) return 0; } -static void ipoib_uninit(struct net_device *dev) -{ - ipoib_dev_cleanup(dev); -} - static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -634,7 +629,7 @@ struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev) { struct ipoib_path_iter *iter; - iter = kmalloc(sizeof *iter, GFP_KERNEL); + iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return NULL; @@ -770,8 +765,10 @@ static void path_rec_completion(int status, struct rdma_ah_attr av; if (!ib_init_ah_attr_from_path(priv->ca, priv->port, - pathrec, &av)) + pathrec, &av, NULL)) { ah = ipoib_create_ah(dev, priv->pd, &av); + rdma_destroy_ah_attr(&av); + } } spin_lock_irqsave(&priv->lock, flags); @@ -883,7 +880,7 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid) if (!priv->broadcast) return NULL; - path = kzalloc(sizeof *path, GFP_ATOMIC); + path = kzalloc(sizeof(*path), GFP_ATOMIC); if (!path) return NULL; @@ -1199,11 +1196,13 @@ static void ipoib_timeout(struct net_device *dev) static int ipoib_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - const void *daddr, const void *saddr, unsigned len) + const void *daddr, + const void *saddr, + unsigned int len) { struct ipoib_header *header; - header = skb_push(skb, sizeof *header); + header = skb_push(skb, sizeof(*header)); header->proto = htons(type); header->reserved = 0; @@ -1306,9 +1305,6 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) int i; LIST_HEAD(remove_list); - if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) - return; - spin_lock_irqsave(&priv->lock, flags); htbl = rcu_dereference_protected(ntbl->htbl, @@ -1320,9 +1316,6 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) /* neigh is obsolete if it was idle for two GC periods */ dt = 2 * arp_tbl.gc_interval; neigh_obsolete = jiffies - dt; - /* handle possible race condition */ - if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) - goto out_unlock; for (i = 0; i < htbl->size; i++) { struct ipoib_neigh *neigh; @@ -1360,9 +1353,8 @@ static void ipoib_reap_neigh(struct work_struct *work) __ipoib_reap_neigh(priv); - if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) - queue_delayed_work(priv->wq, &priv->neigh_reap_task, - arp_tbl.gc_interval); + queue_delayed_work(priv->wq, &priv->neigh_reap_task, + arp_tbl.gc_interval); } @@ -1371,7 +1363,7 @@ static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr, { struct ipoib_neigh *neigh; - neigh = kzalloc(sizeof *neigh, GFP_ATOMIC); + neigh = kzalloc(sizeof(*neigh), GFP_ATOMIC); if (!neigh) return NULL; @@ -1524,9 +1516,8 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) htbl = kzalloc(sizeof(*htbl), GFP_KERNEL); if (!htbl) return -ENOMEM; - set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); size = roundup_pow_of_two(arp_tbl.gc_thresh3); - buckets = kcalloc(size, sizeof(*buckets), GFP_KERNEL); + buckets = kvcalloc(size, sizeof(*buckets), GFP_KERNEL); if (!buckets) { kfree(htbl); return -ENOMEM; @@ -1539,7 +1530,6 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) atomic_set(&ntbl->entries, 0); /* start garbage collection */ - clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); queue_delayed_work(priv->wq, &priv->neigh_reap_task, arp_tbl.gc_interval); @@ -1554,7 +1544,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head) struct ipoib_neigh __rcu **buckets = htbl->buckets; struct ipoib_neigh_table *ntbl = htbl->ntbl; - kfree(buckets); + kvfree(buckets); kfree(htbl); complete(&ntbl->deleted); } @@ -1649,15 +1639,11 @@ out_unlock: static void ipoib_neigh_hash_uninit(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - int stopped; ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n"); init_completion(&priv->ntbl.deleted); - /* Stop GC if called at init fail need to cancel work */ - stopped = test_and_set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); - if (!stopped) - cancel_delayed_work(&priv->neigh_reap_task); + cancel_delayed_work_sync(&priv->neigh_reap_task); ipoib_flush_neighs(priv); @@ -1755,13 +1741,11 @@ static int ipoib_ioctl(struct net_device *dev, struct ifreq *ifr, return priv->rn_ops->ndo_do_ioctl(dev, ifr, cmd); } -int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) +static int ipoib_dev_init(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); int ret = -ENOMEM; - priv->ca = ca; - priv->port = port; priv->qp = NULL; /* @@ -1777,7 +1761,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) /* create pd, which used both for control and datapath*/ priv->pd = ib_alloc_pd(priv->ca, 0); if (IS_ERR(priv->pd)) { - pr_warn("%s: failed to allocate PD\n", ca->name); + pr_warn("%s: failed to allocate PD\n", priv->ca->name); goto clean_wq; } @@ -1787,7 +1771,8 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) goto out_free_pd; } - if (ipoib_neigh_hash_init(priv) < 0) { + ret = ipoib_neigh_hash_init(priv); + if (ret) { pr_warn("%s failed to init neigh hash\n", dev->name); goto out_dev_uninit; } @@ -1796,12 +1781,15 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) if (ipoib_ib_dev_open(dev)) { pr_warn("%s failed to open device\n", dev->name); ret = -ENODEV; - goto out_dev_uninit; + goto out_hash_uninit; } } return 0; +out_hash_uninit: + ipoib_neigh_hash_uninit(dev); + out_dev_uninit: ipoib_ib_dev_cleanup(dev); @@ -1821,21 +1809,151 @@ out: return ret; } -void ipoib_dev_cleanup(struct net_device *dev) +/* + * This must be called before doing an unregister_netdev on a parent device to + * shutdown the IB event handler. + */ +static void ipoib_parent_unregister_pre(struct net_device *ndev) { - struct ipoib_dev_priv *priv = ipoib_priv(dev), *cpriv, *tcpriv; - LIST_HEAD(head); + struct ipoib_dev_priv *priv = ipoib_priv(ndev); - ASSERT_RTNL(); + /* + * ipoib_set_mac checks netif_running before pushing work, clearing + * running ensures the it will not add more work. + */ + rtnl_lock(); + dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); + rtnl_unlock(); - /* Delete any child interfaces first */ - list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { - /* Stop GC on child */ - set_bit(IPOIB_STOP_NEIGH_GC, &cpriv->flags); - cancel_delayed_work(&cpriv->neigh_reap_task); - unregister_netdevice_queue(cpriv->dev, &head); + /* ipoib_event() cannot be running once this returns */ + ib_unregister_event_handler(&priv->event_handler); + + /* + * Work on the queue grabs the rtnl lock, so this cannot be done while + * also holding it. + */ + flush_workqueue(ipoib_workqueue); +} + +static void ipoib_set_dev_features(struct ipoib_dev_priv *priv) +{ + priv->hca_caps = priv->ca->attrs.device_cap_flags; + + if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { + priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM; + + if (priv->hca_caps & IB_DEVICE_UD_TSO) + priv->dev->hw_features |= NETIF_F_TSO; + + priv->dev->features |= priv->dev->hw_features; + } +} + +static int ipoib_parent_init(struct net_device *ndev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(ndev); + struct ib_port_attr attr; + int result; + + result = ib_query_port(priv->ca, priv->port, &attr); + if (result) { + pr_warn("%s: ib_query_port %d failed\n", priv->ca->name, + priv->port); + return result; + } + priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); + + result = ib_query_pkey(priv->ca, priv->port, 0, &priv->pkey); + if (result) { + pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n", + priv->ca->name, priv->port, result); + return result; } - unregister_netdevice_many(&head); + + result = rdma_query_gid(priv->ca, priv->port, 0, &priv->local_gid); + if (result) { + pr_warn("%s: rdma_query_gid port %d failed (ret = %d)\n", + priv->ca->name, priv->port, result); + return result; + } + memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, + sizeof(union ib_gid)); + + SET_NETDEV_DEV(priv->dev, priv->ca->dev.parent); + priv->dev->dev_id = priv->port - 1; + + return 0; +} + +static void ipoib_child_init(struct net_device *ndev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(ndev); + struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); + + dev_hold(priv->parent); + + down_write(&ppriv->vlan_rwsem); + list_add_tail(&priv->list, &ppriv->child_intfs); + up_write(&ppriv->vlan_rwsem); + + priv->max_ib_mtu = ppriv->max_ib_mtu; + set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); + memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN); + memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid)); +} + +static int ipoib_ndo_init(struct net_device *ndev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(ndev); + int rc; + + if (priv->parent) { + ipoib_child_init(ndev); + } else { + rc = ipoib_parent_init(ndev); + if (rc) + return rc; + } + + /* MTU will be reset when mcast join happens */ + ndev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); + priv->mcast_mtu = priv->admin_mtu = ndev->mtu; + ndev->max_mtu = IPOIB_CM_MTU; + + ndev->neigh_priv_len = sizeof(struct ipoib_neigh); + + /* + * Set the full membership bit, so that we join the right + * broadcast group, etc. + */ + priv->pkey |= 0x8000; + + ndev->broadcast[8] = priv->pkey >> 8; + ndev->broadcast[9] = priv->pkey & 0xff; + set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); + + ipoib_set_dev_features(priv); + + rc = ipoib_dev_init(ndev); + if (rc) { + pr_warn("%s: failed to initialize device: %s port %d (ret = %d)\n", + priv->ca->name, priv->dev->name, priv->port, rc); + } + + return 0; +} + +static void ipoib_ndo_uninit(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + ASSERT_RTNL(); + + /* + * ipoib_remove_one guarantees the children are removed before the + * parent, and that is the only place where a parent can be removed. + */ + WARN_ON(!list_empty(&priv->child_intfs)); ipoib_neigh_hash_uninit(dev); @@ -1847,6 +1965,16 @@ void ipoib_dev_cleanup(struct net_device *dev) destroy_workqueue(priv->wq); priv->wq = NULL; } + + if (priv->parent) { + struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); + + down_write(&ppriv->vlan_rwsem); + list_del(&priv->list); + up_write(&ppriv->vlan_rwsem); + + dev_put(priv->parent); + } } static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state) @@ -1894,7 +2022,8 @@ static const struct header_ops ipoib_header_ops = { }; static const struct net_device_ops ipoib_netdev_ops_pf = { - .ndo_uninit = ipoib_uninit, + .ndo_init = ipoib_ndo_init, + .ndo_uninit = ipoib_ndo_uninit, .ndo_open = ipoib_open, .ndo_stop = ipoib_stop, .ndo_change_mtu = ipoib_change_mtu, @@ -1913,7 +2042,8 @@ static const struct net_device_ops ipoib_netdev_ops_pf = { }; static const struct net_device_ops ipoib_netdev_ops_vf = { - .ndo_uninit = ipoib_uninit, + .ndo_init = ipoib_ndo_init, + .ndo_uninit = ipoib_ndo_uninit, .ndo_open = ipoib_open, .ndo_stop = ipoib_stop, .ndo_change_mtu = ipoib_change_mtu, @@ -1945,6 +2075,13 @@ void ipoib_setup_common(struct net_device *dev) netif_keep_dst(dev); memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); + + /* + * unregister_netdev always frees the netdev, we use this mode + * consistently to unify all the various unregister paths, including + * those connected to rtnl_link_ops which require it. + */ + dev->needs_free_netdev = true; } static void ipoib_build_priv(struct net_device *dev) @@ -1955,7 +2092,6 @@ static void ipoib_build_priv(struct net_device *dev) spin_lock_init(&priv->lock); init_rwsem(&priv->vlan_rwsem); mutex_init(&priv->mcast_mutex); - mutex_init(&priv->sysfs_mutex); INIT_LIST_HEAD(&priv->path_list); INIT_LIST_HEAD(&priv->child_intfs); @@ -1999,9 +2135,7 @@ static struct net_device rn->send = ipoib_send; rn->attach_mcast = ipoib_mcast_attach; rn->detach_mcast = ipoib_mcast_detach; - rn->free_rdma_netdev = free_netdev; rn->hca = hca; - dev->netdev_ops = &ipoib_netdev_default_pf; return dev; @@ -2039,6 +2173,9 @@ struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port, if (!priv) return NULL; + priv->ca = hca; + priv->port = port; + dev = ipoib_get_netdev(hca, port, name); if (!dev) goto free_priv; @@ -2053,6 +2190,15 @@ struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port, rn = netdev_priv(dev); rn->clnt_priv = priv; + + /* + * Only the child register_netdev flows can handle priv_destructor + * being set, so we force it to NULL here and handle manually until it + * is safe to turn on. + */ + priv->next_priv_destructor = dev->priv_destructor; + dev->priv_destructor = NULL; + ipoib_build_priv(dev); return priv; @@ -2061,6 +2207,27 @@ free_priv: return NULL; } +void ipoib_intf_free(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + struct rdma_netdev *rn = netdev_priv(dev); + + dev->priv_destructor = priv->next_priv_destructor; + if (dev->priv_destructor) + dev->priv_destructor(dev); + + /* + * There are some error flows around register_netdev failing that may + * attempt to call priv_destructor twice, prevent that from happening. + */ + dev->priv_destructor = NULL; + + /* unregister/destroy is very complicated. Make bugs more obvious. */ + rn->clnt_priv = NULL; + + kfree(priv); +} + static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, char *buf) { @@ -2186,12 +2353,6 @@ static ssize_t create_child(struct device *dev, if (pkey <= 0 || pkey > 0xffff || pkey == 0x8000) return -EINVAL; - /* - * Set the full membership bit, so that we join the right - * broadcast group, etc. - */ - pkey |= 0x8000; - ret = ipoib_vlan_add(to_net_dev(dev), pkey); return ret ? ret : count; @@ -2223,87 +2384,19 @@ int ipoib_add_pkey_attr(struct net_device *dev) return device_create_file(&dev->dev, &dev_attr_pkey); } -void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca) -{ - priv->hca_caps = hca->attrs.device_cap_flags; - - if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { - priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM; - - if (priv->hca_caps & IB_DEVICE_UD_TSO) - priv->dev->hw_features |= NETIF_F_TSO; - - priv->dev->features |= priv->dev->hw_features; - } -} - static struct net_device *ipoib_add_port(const char *format, struct ib_device *hca, u8 port) { struct ipoib_dev_priv *priv; - struct ib_port_attr attr; - struct rdma_netdev *rn; - int result = -ENOMEM; + struct net_device *ndev; + int result; priv = ipoib_intf_alloc(hca, port, format); if (!priv) { pr_warn("%s, %d: ipoib_intf_alloc failed\n", hca->name, port); - goto alloc_mem_failed; - } - - SET_NETDEV_DEV(priv->dev, hca->dev.parent); - priv->dev->dev_id = port - 1; - - result = ib_query_port(hca, port, &attr); - if (result) { - pr_warn("%s: ib_query_port %d failed\n", hca->name, port); - goto device_init_failed; - } - - priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); - - /* MTU will be reset when mcast join happens */ - priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); - priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; - priv->dev->max_mtu = IPOIB_CM_MTU; - - priv->dev->neigh_priv_len = sizeof(struct ipoib_neigh); - - result = ib_query_pkey(hca, port, 0, &priv->pkey); - if (result) { - pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n", - hca->name, port, result); - goto device_init_failed; - } - - ipoib_set_dev_features(priv, hca); - - /* - * Set the full membership bit, so that we join the right - * broadcast group, etc. - */ - priv->pkey |= 0x8000; - - priv->dev->broadcast[8] = priv->pkey >> 8; - priv->dev->broadcast[9] = priv->pkey & 0xff; - - result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL); - if (result) { - pr_warn("%s: ib_query_gid port %d failed (ret = %d)\n", - hca->name, port, result); - goto device_init_failed; - } - - memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, - sizeof(union ib_gid)); - set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); - - result = ipoib_dev_init(priv->dev, hca, port); - if (result) { - pr_warn("%s: failed to initialize port %d (ret = %d)\n", - hca->name, port, result); - goto device_init_failed; + return ERR_PTR(-ENOMEM); } + ndev = priv->dev; INIT_IB_EVENT_HANDLER(&priv->event_handler, priv->ca, ipoib_event); @@ -2312,46 +2405,43 @@ static struct net_device *ipoib_add_port(const char *format, /* call event handler to ensure pkey in sync */ queue_work(ipoib_workqueue, &priv->flush_heavy); - result = register_netdev(priv->dev); + result = register_netdev(ndev); if (result) { pr_warn("%s: couldn't register ipoib port %d; error %d\n", hca->name, port, result); - goto register_failed; + + ipoib_parent_unregister_pre(ndev); + ipoib_intf_free(ndev); + free_netdev(ndev); + + return ERR_PTR(result); } - result = -ENOMEM; - if (ipoib_cm_add_mode_attr(priv->dev)) + /* + * We cannot set priv_destructor before register_netdev because we + * need priv to be always valid during the error flow to execute + * ipoib_parent_unregister_pre(). Instead handle it manually and only + * enter priv_destructor mode once we are completely registered. + */ + ndev->priv_destructor = ipoib_intf_free; + + if (ipoib_cm_add_mode_attr(ndev)) goto sysfs_failed; - if (ipoib_add_pkey_attr(priv->dev)) + if (ipoib_add_pkey_attr(ndev)) goto sysfs_failed; - if (ipoib_add_umcast_attr(priv->dev)) + if (ipoib_add_umcast_attr(ndev)) goto sysfs_failed; - if (device_create_file(&priv->dev->dev, &dev_attr_create_child)) + if (device_create_file(&ndev->dev, &dev_attr_create_child)) goto sysfs_failed; - if (device_create_file(&priv->dev->dev, &dev_attr_delete_child)) + if (device_create_file(&ndev->dev, &dev_attr_delete_child)) goto sysfs_failed; - return priv->dev; + return ndev; sysfs_failed: - unregister_netdev(priv->dev); - -register_failed: - ib_unregister_event_handler(&priv->event_handler); - flush_workqueue(ipoib_workqueue); - /* Stop GC if started before flush */ - set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); - cancel_delayed_work(&priv->neigh_reap_task); - flush_workqueue(priv->wq); - ipoib_dev_cleanup(priv->dev); - -device_init_failed: - rn = netdev_priv(priv->dev); - rn->free_rdma_netdev(priv->dev); - kfree(priv); - -alloc_mem_failed: - return ERR_PTR(result); + ipoib_parent_unregister_pre(ndev); + unregister_netdev(ndev); + return ERR_PTR(-ENOMEM); } static void ipoib_add_one(struct ib_device *device) @@ -2362,7 +2452,7 @@ static void ipoib_add_one(struct ib_device *device) int p; int count = 0; - dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); + dev_list = kmalloc(sizeof(*dev_list), GFP_KERNEL); if (!dev_list) return; @@ -2396,39 +2486,18 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data) return; list_for_each_entry_safe(priv, tmp, dev_list, list) { - struct rdma_netdev *parent_rn = netdev_priv(priv->dev); - - ib_unregister_event_handler(&priv->event_handler); - flush_workqueue(ipoib_workqueue); - - /* mark interface in the middle of destruction */ - set_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags); + LIST_HEAD(head); + ipoib_parent_unregister_pre(priv->dev); rtnl_lock(); - dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); - rtnl_unlock(); - - /* Stop GC */ - set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); - cancel_delayed_work(&priv->neigh_reap_task); - flush_workqueue(priv->wq); - - /* Wrap rtnl_lock/unlock with mutex to protect sysfs calls */ - mutex_lock(&priv->sysfs_mutex); - unregister_netdev(priv->dev); - mutex_unlock(&priv->sysfs_mutex); - - parent_rn->free_rdma_netdev(priv->dev); - list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { - struct rdma_netdev *child_rn; + list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, + list) + unregister_netdevice_queue(cpriv->dev, &head); + unregister_netdevice_queue(priv->dev, &head); + unregister_netdevice_many(&head); - child_rn = netdev_priv(cpriv->dev); - child_rn->free_rdma_netdev(cpriv->dev); - kfree(cpriv); - } - - kfree(priv); + rtnl_unlock(); } kfree(dev_list); @@ -2476,8 +2545,7 @@ static int __init ipoib_init_module(void) * its private workqueue, and we only queue up flush events * on our global flush workqueue. This avoids the deadlocks. */ - ipoib_workqueue = alloc_ordered_workqueue("ipoib_flush", - WQ_MEM_RECLAIM); + ipoib_workqueue = alloc_ordered_workqueue("ipoib_flush", 0); if (!ipoib_workqueue) { ret = -ENOMEM; goto err_fs; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 6709328d90f8..b9e9562f5034 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -140,7 +140,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev, { struct ipoib_mcast *mcast; - mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC); + mcast = kzalloc(sizeof(*mcast), can_sleep ? GFP_KERNEL : GFP_ATOMIC); if (!mcast) return NULL; @@ -822,6 +822,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) if (neigh && list_empty(&neigh->list)) { kref_get(&mcast->ah->ref); neigh->ah = mcast->ah; + neigh->ah->valid = 1; list_add_tail(&neigh->list, &mcast->neigh_list); } } @@ -917,7 +918,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) if (!ipoib_mcast_addr_is_valid(ha->addr, dev->broadcast)) continue; - memcpy(mgid.raw, ha->addr + 4, sizeof mgid); + memcpy(mgid.raw, ha->addr + 4, sizeof(mgid)); mcast = __ipoib_mcast_find(dev, &mgid); if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { @@ -997,7 +998,7 @@ struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev) { struct ipoib_mcast_iter *iter; - iter = kmalloc(sizeof *iter, GFP_KERNEL); + iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return NULL; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c index 3e44087935ae..d4d553a51fa9 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c @@ -122,15 +122,6 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev, } else child_pkey = nla_get_u16(data[IFLA_IPOIB_PKEY]); - if (child_pkey == 0 || child_pkey == 0x8000) - return -EINVAL; - - /* - * Set the full membership bit, so that we join the right - * broadcast group, etc. - */ - child_pkey |= 0x8000; - err = __ipoib_vlan_add(ppriv, ipoib_priv(dev), child_pkey, IPOIB_RTNL_CHILD); @@ -139,19 +130,6 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev, return err; } -static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head *head) -{ - struct ipoib_dev_priv *priv, *ppriv; - - priv = ipoib_priv(dev); - ppriv = ipoib_priv(priv->parent); - - down_write(&ppriv->vlan_rwsem); - unregister_netdevice_queue(dev, head); - list_del(&priv->list); - up_write(&ppriv->vlan_rwsem); -} - static size_t ipoib_get_size(const struct net_device *dev) { return nla_total_size(2) + /* IFLA_IPOIB_PKEY */ @@ -167,7 +145,6 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = { .setup = ipoib_setup_common, .newlink = ipoib_new_child_link, .changelink = ipoib_changelink, - .dellink = ipoib_unregister_child_dev, .get_size = ipoib_get_size, .fill_info = ipoib_fill_info, }; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 984a88096f39..9f36ca786df8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -52,7 +52,7 @@ int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca, if (set_qkey) { ret = -ENOMEM; - qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); + qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL); if (!qp_attr) goto out; @@ -147,7 +147,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) .cap = { .max_send_wr = ipoib_sendq_size, .max_recv_wr = ipoib_recvq_size, - .max_send_sge = min_t(u32, priv->ca->attrs.max_sge, + .max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge, MAX_SKB_FRAGS + 1), .max_recv_sge = IPOIB_UD_RX_SG }, @@ -168,8 +168,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) else size += ipoib_recvq_size * ipoib_max_conn_qp; } else - if (ret != -ENOSYS) - return -ENODEV; + if (ret != -EOPNOTSUPP) + return ret; req_vec = (priv->port - 1) * 2; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 55a9b71ed05a..341753fbda54 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -50,68 +50,112 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr, } static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL); +static bool is_child_unique(struct ipoib_dev_priv *ppriv, + struct ipoib_dev_priv *priv) +{ + struct ipoib_dev_priv *tpriv; + + ASSERT_RTNL(); + + /* + * Since the legacy sysfs interface uses pkey for deletion it cannot + * support more than one interface with the same pkey, it creates + * ambiguity. The RTNL interface deletes using the netdev so it does + * not have a problem to support duplicated pkeys. + */ + if (priv->child_type != IPOIB_LEGACY_CHILD) + return true; + + /* + * First ensure this isn't a duplicate. We check the parent device and + * then all of the legacy child interfaces to make sure the Pkey + * doesn't match. + */ + if (ppriv->pkey == priv->pkey) + return false; + + list_for_each_entry(tpriv, &ppriv->child_intfs, list) { + if (tpriv->pkey == priv->pkey && + tpriv->child_type == IPOIB_LEGACY_CHILD) + return false; + } + + return true; +} + +/* + * NOTE: If this function fails then the priv->dev will remain valid, however + * priv can have been freed and must not be touched by caller in the error + * case. + * + * If (ndev->reg_state == NETREG_UNINITIALIZED) then it is up to the caller to + * free the net_device (just as rtnl_newlink does) otherwise the net_device + * will be freed when the rtnl is unlocked. + */ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, u16 pkey, int type) { + struct net_device *ndev = priv->dev; int result; - priv->max_ib_mtu = ppriv->max_ib_mtu; - /* MTU will be reset when mcast join happens */ - priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); - priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; - priv->parent = ppriv->dev; - set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); + ASSERT_RTNL(); + + /* + * Racing with unregister of the parent must be prevented by the + * caller. + */ + WARN_ON(ppriv->dev->reg_state != NETREG_REGISTERED); - ipoib_set_dev_features(priv, ppriv->ca); + if (pkey == 0 || pkey == 0x8000) { + result = -EINVAL; + goto out_early; + } + priv->parent = ppriv->dev; priv->pkey = pkey; + priv->child_type = type; - memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN); - memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid)); - set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); - priv->dev->broadcast[8] = pkey >> 8; - priv->dev->broadcast[9] = pkey & 0xff; - - result = ipoib_dev_init(priv->dev, ppriv->ca, ppriv->port); - if (result < 0) { - ipoib_warn(ppriv, "failed to initialize subinterface: " - "device %s, port %d", - ppriv->ca->name, ppriv->port); - goto err; + if (!is_child_unique(ppriv, priv)) { + result = -ENOTUNIQ; + goto out_early; } - result = register_netdevice(priv->dev); + /* We do not need to touch priv if register_netdevice fails */ + ndev->priv_destructor = ipoib_intf_free; + + result = register_netdevice(ndev); if (result) { ipoib_warn(priv, "failed to initialize; error %i", result); - goto register_failed; + + /* + * register_netdevice sometimes calls priv_destructor, + * sometimes not. Make sure it was done. + */ + goto out_early; } /* RTNL childs don't need proprietary sysfs entries */ if (type == IPOIB_LEGACY_CHILD) { - if (ipoib_cm_add_mode_attr(priv->dev)) + if (ipoib_cm_add_mode_attr(ndev)) goto sysfs_failed; - if (ipoib_add_pkey_attr(priv->dev)) + if (ipoib_add_pkey_attr(ndev)) goto sysfs_failed; - if (ipoib_add_umcast_attr(priv->dev)) + if (ipoib_add_umcast_attr(ndev)) goto sysfs_failed; - if (device_create_file(&priv->dev->dev, &dev_attr_parent)) + if (device_create_file(&ndev->dev, &dev_attr_parent)) goto sysfs_failed; } - priv->child_type = type; - list_add_tail(&priv->list, &ppriv->child_intfs); - return 0; sysfs_failed: - result = -ENOMEM; unregister_netdevice(priv->dev); + return -ENOMEM; -register_failed: - ipoib_dev_cleanup(priv->dev); - -err: +out_early: + if (ndev->priv_destructor) + ndev->priv_destructor(ndev); return result; } @@ -119,129 +163,124 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) { struct ipoib_dev_priv *ppriv, *priv; char intf_name[IFNAMSIZ]; - struct ipoib_dev_priv *tpriv; + struct net_device *ndev; int result; if (!capable(CAP_NET_ADMIN)) return -EPERM; - ppriv = ipoib_priv(pdev); - - if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags)) - return -EPERM; - - snprintf(intf_name, sizeof intf_name, "%s.%04x", - ppriv->dev->name, pkey); - - if (!mutex_trylock(&ppriv->sysfs_mutex)) + if (!rtnl_trylock()) return restart_syscall(); - if (!rtnl_trylock()) { - mutex_unlock(&ppriv->sysfs_mutex); - return restart_syscall(); - } - - if (!down_write_trylock(&ppriv->vlan_rwsem)) { + if (pdev->reg_state != NETREG_REGISTERED) { rtnl_unlock(); - mutex_unlock(&ppriv->sysfs_mutex); - return restart_syscall(); + return -EPERM; } + ppriv = ipoib_priv(pdev); + + snprintf(intf_name, sizeof(intf_name), "%s.%04x", + ppriv->dev->name, pkey); + priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name); if (!priv) { result = -ENOMEM; goto out; } - - /* - * First ensure this isn't a duplicate. We check the parent device and - * then all of the legacy child interfaces to make sure the Pkey - * doesn't match. - */ - if (ppriv->pkey == pkey) { - result = -ENOTUNIQ; - goto out; - } - - list_for_each_entry(tpriv, &ppriv->child_intfs, list) { - if (tpriv->pkey == pkey && - tpriv->child_type == IPOIB_LEGACY_CHILD) { - result = -ENOTUNIQ; - goto out; - } - } + ndev = priv->dev; result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD); + if (result && ndev->reg_state == NETREG_UNINITIALIZED) + free_netdev(ndev); + out: - up_write(&ppriv->vlan_rwsem); rtnl_unlock(); - mutex_unlock(&ppriv->sysfs_mutex); - if (result && priv) { - struct rdma_netdev *rn; + return result; +} + +struct ipoib_vlan_delete_work { + struct work_struct work; + struct net_device *dev; +}; + +/* + * sysfs callbacks of a netdevice cannot obtain the rtnl lock as + * unregister_netdev ultimately deletes the sysfs files while holding the rtnl + * lock. This deadlocks the system. + * + * A callback can use rtnl_trylock to avoid the deadlock but it cannot call + * unregister_netdev as that internally takes and releases the rtnl_lock. So + * instead we find the netdev to unregister and then do the actual unregister + * from the global work queue where we can obtain the rtnl_lock safely. + */ +static void ipoib_vlan_delete_task(struct work_struct *work) +{ + struct ipoib_vlan_delete_work *pwork = + container_of(work, struct ipoib_vlan_delete_work, work); + struct net_device *dev = pwork->dev; + + rtnl_lock(); + + /* Unregistering tasks can race with another task or parent removal */ + if (dev->reg_state == NETREG_REGISTERED) { + struct ipoib_dev_priv *priv = ipoib_priv(dev); + struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); - rn = netdev_priv(priv->dev); - rn->free_rdma_netdev(priv->dev); - kfree(priv); + ipoib_dbg(ppriv, "delete child vlan %s\n", dev->name); + unregister_netdevice(dev); } - return result; + rtnl_unlock(); + + kfree(pwork); } int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) { struct ipoib_dev_priv *ppriv, *priv, *tpriv; - struct net_device *dev = NULL; + int rc; if (!capable(CAP_NET_ADMIN)) return -EPERM; - ppriv = ipoib_priv(pdev); - - if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags)) - return -EPERM; - - if (!mutex_trylock(&ppriv->sysfs_mutex)) + if (!rtnl_trylock()) return restart_syscall(); - if (!rtnl_trylock()) { - mutex_unlock(&ppriv->sysfs_mutex); - return restart_syscall(); - } - - if (!down_write_trylock(&ppriv->vlan_rwsem)) { + if (pdev->reg_state != NETREG_REGISTERED) { rtnl_unlock(); - mutex_unlock(&ppriv->sysfs_mutex); - return restart_syscall(); + return -EPERM; } + ppriv = ipoib_priv(pdev); + + rc = -ENODEV; list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) { if (priv->pkey == pkey && priv->child_type == IPOIB_LEGACY_CHILD) { - list_del(&priv->list); - dev = priv->dev; + struct ipoib_vlan_delete_work *work; + + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (!work) { + rc = -ENOMEM; + goto out; + } + + down_write(&ppriv->vlan_rwsem); + list_del_init(&priv->list); + up_write(&ppriv->vlan_rwsem); + work->dev = priv->dev; + INIT_WORK(&work->work, ipoib_vlan_delete_task); + queue_work(ipoib_workqueue, &work->work); + + rc = 0; break; } } - up_write(&ppriv->vlan_rwsem); - - if (dev) { - ipoib_dbg(ppriv, "delete child vlan %s\n", dev->name); - unregister_netdevice(dev); - } +out: rtnl_unlock(); - mutex_unlock(&ppriv->sysfs_mutex); - - if (dev) { - struct rdma_netdev *rn; - - rn = netdev_priv(dev); - rn->free_rdma_netdev(priv->dev); - kfree(priv); - return 0; - } - return -ENODEV; + return rc; } diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 9a6434c31db2..3fecd87c9f2b 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -610,12 +610,10 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, uint32_t initial_cmdsn) { struct iscsi_cls_session *cls_session; - struct iscsi_session *session; struct Scsi_Host *shost; struct iser_conn *iser_conn = NULL; struct ib_conn *ib_conn; u32 max_fr_sectors; - u16 max_cmds; shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0); if (!shost) @@ -633,8 +631,8 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, */ if (ep) { iser_conn = ep->dd_data; - max_cmds = iser_conn->max_cmds; shost->sg_tablesize = iser_conn->scsi_sg_tablesize; + shost->can_queue = min_t(u16, cmds_max, iser_conn->max_cmds); mutex_lock(&iser_conn->state_mutex); if (iser_conn->state != ISER_CONN_UP) { @@ -660,7 +658,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, } mutex_unlock(&iser_conn->state_mutex); } else { - max_cmds = ISER_DEF_XMIT_CMDS_MAX; + shost->can_queue = min_t(u16, cmds_max, ISER_DEF_XMIT_CMDS_MAX); if (iscsi_host_add(shost, NULL)) goto free_host; } @@ -676,21 +674,13 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, iser_warn("max_sectors was reduced from %u to %u\n", iser_max_sectors, shost->max_sectors); - if (cmds_max > max_cmds) { - iser_info("cmds_max changed from %u to %u\n", - cmds_max, max_cmds); - cmds_max = max_cmds; - } - cls_session = iscsi_session_setup(&iscsi_iser_transport, shost, - cmds_max, 0, + shost->can_queue, 0, sizeof(struct iscsi_iser_task), initial_cmdsn, 0); if (!cls_session) goto remove_host; - session = cls_session->dd_data; - shost->can_queue = session->scsi_cmds_max; return cls_session; remove_host: diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 130bf163f066..009be8889d71 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -405,7 +405,8 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); - wr = sig_handover_wr(iser_tx_next_wr(tx_desc)); + wr = container_of(iser_tx_next_wr(tx_desc), struct ib_sig_handover_wr, + wr); wr->wr.opcode = IB_WR_REG_SIG_MR; wr->wr.wr_cqe = cqe; wr->wr.sg_list = &data_reg->sge; @@ -457,7 +458,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, return n < 0 ? n : -EINVAL; } - wr = reg_wr(iser_tx_next_wr(tx_desc)); + wr = container_of(iser_tx_next_wr(tx_desc), struct ib_reg_wr, wr); wr->wr.opcode = IB_WR_REG_MR; wr->wr.wr_cqe = cqe; wr->wr.send_flags = 0; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 616d978cbf2b..b686a4aaffe8 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1022,7 +1022,7 @@ int iser_post_recvl(struct iser_conn *iser_conn) { struct ib_conn *ib_conn = &iser_conn->ib_conn; struct iser_login_desc *desc = &iser_conn->login_desc; - struct ib_recv_wr wr, *wr_failed; + struct ib_recv_wr wr; int ib_ret; desc->sge.addr = desc->rsp_dma; @@ -1036,7 +1036,7 @@ int iser_post_recvl(struct iser_conn *iser_conn) wr.next = NULL; ib_conn->post_recv_buf_count++; - ib_ret = ib_post_recv(ib_conn->qp, &wr, &wr_failed); + ib_ret = ib_post_recv(ib_conn->qp, &wr, NULL); if (ib_ret) { iser_err("ib_post_recv failed ret=%d\n", ib_ret); ib_conn->post_recv_buf_count--; @@ -1050,7 +1050,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) struct ib_conn *ib_conn = &iser_conn->ib_conn; unsigned int my_rx_head = iser_conn->rx_desc_head; struct iser_rx_desc *rx_desc; - struct ib_recv_wr *wr, *wr_failed; + struct ib_recv_wr *wr; int i, ib_ret; for (wr = ib_conn->rx_wr, i = 0; i < count; i++, wr++) { @@ -1067,7 +1067,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) wr->next = NULL; /* mark end of work requests list */ ib_conn->post_recv_buf_count += count; - ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &wr_failed); + ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, NULL); if (ib_ret) { iser_err("ib_post_recv failed ret=%d\n", ib_ret); ib_conn->post_recv_buf_count -= count; @@ -1086,7 +1086,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, bool signal) { - struct ib_send_wr *bad_wr, *wr = iser_tx_next_wr(tx_desc); + struct ib_send_wr *wr = iser_tx_next_wr(tx_desc); int ib_ret; ib_dma_sync_single_for_device(ib_conn->device->ib_device, @@ -1100,10 +1100,10 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, wr->opcode = IB_WR_SEND; wr->send_flags = signal ? IB_SEND_SIGNALED : 0; - ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0].send, &bad_wr); + ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0].send, NULL); if (ib_ret) iser_err("ib_post_send failed, ret:%d opcode:%d\n", - ib_ret, bad_wr->opcode); + ib_ret, wr->opcode); return ib_ret; } diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index cccbcf0eb035..f39670c5c25c 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -136,7 +136,7 @@ isert_create_qp(struct isert_conn *isert_conn, attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1; attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX; - attr.cap.max_send_sge = device->ib_device->attrs.max_sge; + attr.cap.max_send_sge = device->ib_device->attrs.max_send_sge; attr.cap.max_recv_sge = 1; attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.qp_type = IB_QPT_RC; @@ -299,7 +299,8 @@ isert_create_device_ib_res(struct isert_device *device) struct ib_device *ib_dev = device->ib_device; int ret; - isert_dbg("devattr->max_sge: %d\n", ib_dev->attrs.max_sge); + isert_dbg("devattr->max_send_sge: %d devattr->max_recv_sge %d\n", + ib_dev->attrs.max_send_sge, ib_dev->attrs.max_recv_sge); isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd); ret = isert_alloc_comps(device); @@ -809,7 +810,7 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) static int isert_post_recvm(struct isert_conn *isert_conn, u32 count) { - struct ib_recv_wr *rx_wr, *rx_wr_failed; + struct ib_recv_wr *rx_wr; int i, ret; struct iser_rx_desc *rx_desc; @@ -825,8 +826,7 @@ isert_post_recvm(struct isert_conn *isert_conn, u32 count) rx_wr--; rx_wr->next = NULL; /* mark end of work requests list */ - ret = ib_post_recv(isert_conn->qp, isert_conn->rx_wr, - &rx_wr_failed); + ret = ib_post_recv(isert_conn->qp, isert_conn->rx_wr, NULL); if (ret) isert_err("ib_post_recv() failed with ret: %d\n", ret); @@ -836,7 +836,7 @@ isert_post_recvm(struct isert_conn *isert_conn, u32 count) static int isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc) { - struct ib_recv_wr *rx_wr_failed, rx_wr; + struct ib_recv_wr rx_wr; int ret; if (!rx_desc->in_use) { @@ -853,7 +853,7 @@ isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc) rx_wr.num_sge = 1; rx_wr.next = NULL; - ret = ib_post_recv(isert_conn->qp, &rx_wr, &rx_wr_failed); + ret = ib_post_recv(isert_conn->qp, &rx_wr, NULL); if (ret) isert_err("ib_post_recv() failed with ret: %d\n", ret); @@ -864,7 +864,7 @@ static int isert_login_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_desc) { struct ib_device *ib_dev = isert_conn->cm_id->device; - struct ib_send_wr send_wr, *send_wr_failed; + struct ib_send_wr send_wr; int ret; ib_dma_sync_single_for_device(ib_dev, tx_desc->dma_addr, @@ -879,7 +879,7 @@ isert_login_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_des send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; - ret = ib_post_send(isert_conn->qp, &send_wr, &send_wr_failed); + ret = ib_post_send(isert_conn->qp, &send_wr, NULL); if (ret) isert_err("ib_post_send() failed, ret: %d\n", ret); @@ -967,7 +967,7 @@ isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, static int isert_login_post_recv(struct isert_conn *isert_conn) { - struct ib_recv_wr rx_wr, *rx_wr_fail; + struct ib_recv_wr rx_wr; struct ib_sge sge; int ret; @@ -986,7 +986,7 @@ isert_login_post_recv(struct isert_conn *isert_conn) rx_wr.sg_list = &sge; rx_wr.num_sge = 1; - ret = ib_post_recv(isert_conn->qp, &rx_wr, &rx_wr_fail); + ret = ib_post_recv(isert_conn->qp, &rx_wr, NULL); if (ret) isert_err("ib_post_recv() failed: %d\n", ret); @@ -1829,7 +1829,6 @@ isert_send_done(struct ib_cq *cq, struct ib_wc *wc) static int isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd) { - struct ib_send_wr *wr_failed; int ret; ret = isert_post_recv(isert_conn, isert_cmd->rx_desc); @@ -1838,8 +1837,7 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd) return ret; } - ret = ib_post_send(isert_conn->qp, &isert_cmd->tx_desc.send_wr, - &wr_failed); + ret = ib_post_send(isert_conn->qp, &isert_cmd->tx_desc.send_wr, NULL); if (ret) { isert_err("ib_post_send failed with %d\n", ret); return ret; diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 9786b24b956f..444d16520506 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -57,13 +57,10 @@ #define DRV_NAME "ib_srp" #define PFX DRV_NAME ": " -#define DRV_VERSION "2.0" -#define DRV_RELDATE "July 26, 2015" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_INFO(release_date, DRV_RELDATE); #if !defined(CONFIG_DYNAMIC_DEBUG) #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) @@ -145,7 +142,8 @@ static void srp_remove_one(struct ib_device *device, void *client_data); static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, const char *opname); -static int srp_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); +static int srp_ib_cm_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *event); static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event); @@ -1211,7 +1209,6 @@ static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc) static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch, u32 rkey) { - struct ib_send_wr *bad_wr; struct ib_send_wr wr = { .opcode = IB_WR_LOCAL_INV, .next = NULL, @@ -1222,7 +1219,7 @@ static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch, wr.wr_cqe = &req->reg_cqe; req->reg_cqe.done = srp_inv_rkey_err_done; - return ib_post_send(ch->qp, &wr, &bad_wr); + return ib_post_send(ch->qp, &wr, NULL); } static void srp_unmap_data(struct scsi_cmnd *scmnd, @@ -1503,7 +1500,6 @@ static int srp_map_finish_fr(struct srp_map_state *state, { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; - struct ib_send_wr *bad_wr; struct ib_reg_wr wr; struct srp_fr_desc *desc; u32 rkey; @@ -1567,7 +1563,7 @@ static int srp_map_finish_fr(struct srp_map_state *state, srp_map_desc(state, desc->mr->iova, desc->mr->length, desc->mr->rkey); - err = ib_post_send(ch->qp, &wr.wr, &bad_wr); + err = ib_post_send(ch->qp, &wr.wr, NULL); if (unlikely(err)) { WARN_ON_ONCE(err == -ENOMEM); return err; @@ -2018,7 +2014,7 @@ static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len) { struct srp_target_port *target = ch->target; struct ib_sge list; - struct ib_send_wr wr, *bad_wr; + struct ib_send_wr wr; list.addr = iu->dma; list.length = len; @@ -2033,13 +2029,13 @@ static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len) wr.opcode = IB_WR_SEND; wr.send_flags = IB_SEND_SIGNALED; - return ib_post_send(ch->qp, &wr, &bad_wr); + return ib_post_send(ch->qp, &wr, NULL); } static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu) { struct srp_target_port *target = ch->target; - struct ib_recv_wr wr, *bad_wr; + struct ib_recv_wr wr; struct ib_sge list; list.addr = iu->dma; @@ -2053,7 +2049,7 @@ static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu) wr.sg_list = &list; wr.num_sge = 1; - return ib_post_recv(ch->qp, &wr, &bad_wr); + return ib_post_recv(ch->qp, &wr, NULL); } static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) @@ -2558,7 +2554,7 @@ error: } static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id, - struct ib_cm_event *event, + const struct ib_cm_event *event, struct srp_rdma_ch *ch) { struct srp_target_port *target = ch->target; @@ -2643,7 +2639,8 @@ static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id, } } -static int srp_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +static int srp_ib_cm_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *event) { struct srp_rdma_ch *ch = cm_id->context; struct srp_target_port *target = ch->target; @@ -3843,7 +3840,7 @@ static ssize_t srp_create_target(struct device *dev, INIT_WORK(&target->tl_err_work, srp_tl_err_work); INIT_WORK(&target->remove_work, srp_remove_work); spin_lock_init(&target->lock); - ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL); + ret = rdma_query_gid(ibdev, host->port, 0, &target->sgid); if (ret) goto out; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 1ae638b58b63..f37cbad022a2 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -575,8 +575,7 @@ static int srpt_refresh_port(struct srpt_port *sport) sport->sm_lid = port_attr.sm_lid; sport->lid = port_attr.lid; - ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid, - NULL); + ret = rdma_query_gid(sport->sdev->device, sport->port, 0, &sport->gid); if (ret) goto err_query_port; @@ -720,7 +719,7 @@ static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev, WARN_ON(ioctx_size != sizeof(struct srpt_recv_ioctx) && ioctx_size != sizeof(struct srpt_send_ioctx)); - ring = kmalloc_array(ring_size, sizeof(ring[0]), GFP_KERNEL); + ring = kvmalloc_array(ring_size, sizeof(ring[0]), GFP_KERNEL); if (!ring) goto out; for (i = 0; i < ring_size; ++i) { @@ -734,7 +733,7 @@ static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev, err: while (--i >= 0) srpt_free_ioctx(sdev, ring[i], dma_size, dir); - kfree(ring); + kvfree(ring); ring = NULL; out: return ring; @@ -759,7 +758,7 @@ static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring, for (i = 0; i < ring_size; ++i) srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir); - kfree(ioctx_ring); + kvfree(ioctx_ring); } /** @@ -817,7 +816,7 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch, struct srpt_recv_ioctx *ioctx) { struct ib_sge list; - struct ib_recv_wr wr, *bad_wr; + struct ib_recv_wr wr; BUG_ON(!sdev); list.addr = ioctx->ioctx.dma; @@ -831,9 +830,9 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch, wr.num_sge = 1; if (sdev->use_srq) - return ib_post_srq_recv(sdev->srq, &wr, &bad_wr); + return ib_post_srq_recv(sdev->srq, &wr, NULL); else - return ib_post_recv(ch->qp, &wr, &bad_wr); + return ib_post_recv(ch->qp, &wr, NULL); } /** @@ -847,7 +846,6 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch, */ static int srpt_zerolength_write(struct srpt_rdma_ch *ch) { - struct ib_send_wr *bad_wr; struct ib_rdma_wr wr = { .wr = { .next = NULL, @@ -860,7 +858,7 @@ static int srpt_zerolength_write(struct srpt_rdma_ch *ch) pr_debug("%s-%d: queued zerolength write\n", ch->sess_name, ch->qp->qp_num); - return ib_post_send(ch->qp, &wr.wr, &bad_wr); + return ib_post_send(ch->qp, &wr.wr, NULL); } static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc) @@ -1754,13 +1752,15 @@ retry: */ qp_init->cap.max_send_wr = min(sq_size / 2, attrs->max_qp_wr); qp_init->cap.max_rdma_ctxs = sq_size / 2; - qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE); + qp_init->cap.max_send_sge = min(attrs->max_send_sge, + SRPT_MAX_SG_PER_WQE); qp_init->port_num = ch->sport->port; if (sdev->use_srq) { qp_init->srq = sdev->srq; } else { qp_init->cap.max_recv_wr = ch->rq_size; - qp_init->cap.max_recv_sge = qp_init->cap.max_send_sge; + qp_init->cap.max_recv_sge = min(attrs->max_recv_sge, + SRPT_MAX_SG_PER_WQE); } if (ch->using_rdma_cm) { @@ -1833,8 +1833,7 @@ static bool srpt_close_ch(struct srpt_rdma_ch *ch) int ret; if (!srpt_set_ch_state(ch, CH_DRAINING)) { - pr_debug("%s-%d: already closed\n", ch->sess_name, - ch->qp->qp_num); + pr_debug("%s: already closed\n", ch->sess_name); return false; } @@ -1940,8 +1939,8 @@ static void __srpt_close_all_ch(struct srpt_port *sport) list_for_each_entry(nexus, &sport->nexus_list, entry) { list_for_each_entry(ch, &nexus->ch_list, list) { if (srpt_disconnect_ch(ch) >= 0) - pr_info("Closing channel %s-%d because target %s_%d has been disabled\n", - ch->sess_name, ch->qp->qp_num, + pr_info("Closing channel %s because target %s_%d has been disabled\n", + ch->sess_name, sport->sdev->device->name, sport->port); srpt_close_ch(ch); } @@ -2086,7 +2085,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, struct rdma_conn_param rdma_cm; struct ib_cm_rep_param ib_cm; } *rep_param = NULL; - struct srpt_rdma_ch *ch; + struct srpt_rdma_ch *ch = NULL; char i_port_id[36]; u32 it_iu_len; int i, ret; @@ -2233,13 +2232,15 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, TARGET_PROT_NORMAL, i_port_id + 2, ch, NULL); if (IS_ERR_OR_NULL(ch->sess)) { + WARN_ON_ONCE(ch->sess == NULL); ret = PTR_ERR(ch->sess); + ch->sess = NULL; pr_info("Rejected login for initiator %s: ret = %d.\n", ch->sess_name, ret); rej->reason = cpu_to_be32(ret == -ENOMEM ? SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES : SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED); - goto reject; + goto destroy_ib; } mutex_lock(&sport->mutex); @@ -2278,7 +2279,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_err("rejected SRP_LOGIN_REQ because enabling RTR failed (error code = %d)\n", ret); - goto destroy_ib; + goto reject; } pr_debug("Establish connection sess=%p name=%s ch=%p\n", ch->sess, @@ -2357,8 +2358,11 @@ free_ring: srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, ch->sport->sdev, ch->rq_size, ch->max_rsp_size, DMA_TO_DEVICE); + free_ch: - if (ib_cm_id) + if (rdma_cm_id) + rdma_cm_id->context = NULL; + else ib_cm_id->context = NULL; kfree(ch); ch = NULL; @@ -2378,6 +2382,15 @@ reject: ib_send_cm_rej(ib_cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, rej, sizeof(*rej)); + if (ch && ch->sess) { + srpt_close_ch(ch); + /* + * Tell the caller not to free cm_id since + * srpt_release_channel_work() will do that. + */ + ret = 0; + } + out: kfree(rep_param); kfree(rsp); @@ -2387,7 +2400,7 @@ out: } static int srpt_ib_cm_req_recv(struct ib_cm_id *cm_id, - struct ib_cm_req_event_param *param, + const struct ib_cm_req_event_param *param, void *private_data) { char sguid[40]; @@ -2499,7 +2512,8 @@ static void srpt_cm_rtu_recv(struct srpt_rdma_ch *ch) * a non-zero value in any other case will trigger a race with the * ib_destroy_cm_id() call in srpt_release_channel(). */ -static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +static int srpt_cm_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *event) { struct srpt_rdma_ch *ch = cm_id->context; int ret; @@ -2609,7 +2623,7 @@ static int srpt_write_pending(struct se_cmd *se_cmd) struct srpt_send_ioctx *ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); struct srpt_rdma_ch *ch = ioctx->ch; - struct ib_send_wr *first_wr = NULL, *bad_wr; + struct ib_send_wr *first_wr = NULL; struct ib_cqe *cqe = &ioctx->rdma_cqe; enum srpt_command_state new_state; int ret, i; @@ -2633,7 +2647,7 @@ static int srpt_write_pending(struct se_cmd *se_cmd) cqe = NULL; } - ret = ib_post_send(ch->qp, first_wr, &bad_wr); + ret = ib_post_send(ch->qp, first_wr, NULL); if (ret) { pr_err("%s: ib_post_send() returned %d for %d (avail: %d)\n", __func__, ret, ioctx->n_rdma, @@ -2671,7 +2685,7 @@ static void srpt_queue_response(struct se_cmd *cmd) container_of(cmd, struct srpt_send_ioctx, cmd); struct srpt_rdma_ch *ch = ioctx->ch; struct srpt_device *sdev = ch->sport->sdev; - struct ib_send_wr send_wr, *first_wr = &send_wr, *bad_wr; + struct ib_send_wr send_wr, *first_wr = &send_wr; struct ib_sge sge; enum srpt_command_state state; int resp_len, ret, i; @@ -2744,7 +2758,7 @@ static void srpt_queue_response(struct se_cmd *cmd) send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; - ret = ib_post_send(ch->qp, first_wr, &bad_wr); + ret = ib_post_send(ch->qp, first_wr, NULL); if (ret < 0) { pr_err("%s: sending cmd response failed for tag %llu (%d)\n", __func__, ioctx->cmd.tag, ret); @@ -2968,7 +2982,8 @@ static void srpt_add_one(struct ib_device *device) pr_debug("device = %p\n", device); - sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); + sdev = kzalloc(struct_size(sdev, port, device->phys_port_cnt), + GFP_KERNEL); if (!sdev) goto err; @@ -3022,8 +3037,6 @@ static void srpt_add_one(struct ib_device *device) srpt_event_handler); ib_register_event_handler(&sdev->event_handler); - WARN_ON(sdev->device->phys_port_cnt > ARRAY_SIZE(sdev->port)); - for (i = 1; i <= sdev->device->phys_port_cnt; i++) { sport = &sdev->port[i - 1]; INIT_LIST_HEAD(&sport->nexus_list); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 2361483476a0..444dfd7281b5 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -396,9 +396,9 @@ struct srpt_port { * @sdev_mutex: Serializes use_srq changes. * @use_srq: Whether or not to use SRQ. * @ioctx_ring: Per-HCA SRQ. - * @port: Information about the ports owned by this HCA. * @event_handler: Per-HCA asynchronous IB event handler. * @list: Node in srpt_dev_list. + * @port: Information about the ports owned by this HCA. */ struct srpt_device { struct ib_device *device; @@ -410,9 +410,9 @@ struct srpt_device { struct mutex sdev_mutex; bool use_srq; struct srpt_recv_ioctx **ioctx_ring; - struct srpt_port port[2]; struct ib_event_handler event_handler; struct list_head list; + struct srpt_port port[]; }; #endif /* IB_SRPT_H */ diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig index 97a420c11eed..47c350cdfb12 100644 --- a/drivers/macintosh/Kconfig +++ b/drivers/macintosh/Kconfig @@ -39,17 +39,6 @@ config ADB_IOP <http://www.angelfire.com/ca2/dev68k/iopdesc.html> to enable direct support for it, say 'Y' here. -config ADB_PMU68K - bool "Include PMU (Powerbook) ADB driver" - depends on ADB && MAC - help - Say Y here if want your kernel to support the m68k based Powerbooks. - This includes the PowerBook 140, PowerBook 145, PowerBook 150, - PowerBook 160, PowerBook 165, PowerBook 165c, PowerBook 170, - PowerBook 180, PowerBook, 180c, PowerBook 190cs, PowerBook 520, - PowerBook Duo 210, PowerBook Duo 230, PowerBook Duo 250, - PowerBook Duo 270c, PowerBook Duo 280 and PowerBook Duo 280c. - # we want to change this to something like CONFIG_SYSCTRL_CUDA/PMU config ADB_CUDA bool "Support for Cuda/Egret based Macs and PowerMacs" @@ -65,8 +54,8 @@ config ADB_CUDA If unsure say Y. config ADB_PMU - bool "Support for PMU based PowerMacs" - depends on PPC_PMAC + bool "Support for PMU based PowerMacs and PowerBooks" + depends on PPC_PMAC || MAC help On PowerBooks, iBooks, and recent iMacs and Power Macintoshes, the PMU is an embedded microprocessor whose primary function is to @@ -79,7 +68,7 @@ config ADB_PMU config ADB_PMU_LED bool "Support for the Power/iBook front LED" - depends on ADB_PMU + depends on PPC_PMAC && ADB_PMU select NEW_LEDS select LEDS_CLASS help @@ -122,7 +111,7 @@ config PMAC_MEDIABAY config PMAC_BACKLIGHT bool "Backlight control for LCD screens" - depends on ADB_PMU && FB = y && (BROKEN || !PPC64) + depends on PPC_PMAC && ADB_PMU && FB = y && (BROKEN || !PPC64) select FB_BACKLIGHT help Say Y here to enable Macintosh specific extensions of the generic diff --git a/drivers/macintosh/Makefile b/drivers/macintosh/Makefile index ee803638e595..49819b1b6f20 100644 --- a/drivers/macintosh/Makefile +++ b/drivers/macintosh/Makefile @@ -22,7 +22,6 @@ obj-$(CONFIG_PMAC_SMU) += smu.o obj-$(CONFIG_ADB) += adb.o obj-$(CONFIG_ADB_MACII) += via-macii.o obj-$(CONFIG_ADB_IOP) += adb-iop.o -obj-$(CONFIG_ADB_PMU68K) += via-pmu68k.o obj-$(CONFIG_ADB_MACIO) += macio-adb.o obj-$(CONFIG_THERM_WINDTUNNEL) += therm_windtunnel.o diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c index 4c8097e0e6fe..76e98f0f7a3e 100644 --- a/drivers/macintosh/adb.c +++ b/drivers/macintosh/adb.c @@ -65,7 +65,7 @@ static struct adb_driver *adb_driver_list[] = { #ifdef CONFIG_ADB_IOP &adb_iop_driver, #endif -#if defined(CONFIG_ADB_PMU) || defined(CONFIG_ADB_PMU68K) +#ifdef CONFIG_ADB_PMU &via_pmu_driver, #endif #ifdef CONFIG_ADB_MACIO diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 25c1ce811053..d72c450aebe5 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Device driver for the via-pmu on Apple Powermacs. + * Device driver for the PMU in Apple PowerBooks and PowerMacs. * * The VIA (versatile interface adapter) interfaces to the PMU, * a 6805 microprocessor core whose primary function is to control @@ -49,20 +49,26 @@ #include <linux/compat.h> #include <linux/of_address.h> #include <linux/of_irq.h> -#include <asm/prom.h> +#include <linux/uaccess.h> #include <asm/machdep.h> #include <asm/io.h> #include <asm/pgtable.h> #include <asm/sections.h> #include <asm/irq.h> +#ifdef CONFIG_PPC_PMAC #include <asm/pmac_feature.h> #include <asm/pmac_pfunc.h> #include <asm/pmac_low_i2c.h> -#include <linux/uaccess.h> +#include <asm/prom.h> #include <asm/mmu_context.h> #include <asm/cputable.h> #include <asm/time.h> #include <asm/backlight.h> +#else +#include <asm/macintosh.h> +#include <asm/macints.h> +#include <asm/mac_via.h> +#endif #include "via-pmu-event.h" @@ -76,7 +82,6 @@ #define BATTERY_POLLING_COUNT 2 static DEFINE_MUTEX(pmu_info_proc_mutex); -static volatile unsigned char __iomem *via; /* VIA registers - spaced 0x200 bytes apart */ #define RS 0x200 /* skip between registers */ @@ -98,8 +103,13 @@ static volatile unsigned char __iomem *via; #define ANH (15*RS) /* A-side data, no handshake */ /* Bits in B data register: both active low */ +#ifdef CONFIG_PPC_PMAC #define TACK 0x08 /* Transfer acknowledge (input) */ #define TREQ 0x10 /* Transfer request (output) */ +#else +#define TACK 0x02 +#define TREQ 0x04 +#endif /* Bits in ACR */ #define SR_CTRL 0x1c /* Shift register control bits */ @@ -114,6 +124,7 @@ static volatile unsigned char __iomem *via; #define CB1_INT 0x10 /* transition on CB1 input */ static volatile enum pmu_state { + uninitialized = 0, idle, sending, intack, @@ -140,11 +151,15 @@ static int data_index; static int data_len; static volatile int adb_int_pending; static volatile int disable_poll; -static struct device_node *vias; static int pmu_kind = PMU_UNKNOWN; static int pmu_fully_inited; static int pmu_has_adb; +#ifdef CONFIG_PPC_PMAC +static volatile unsigned char __iomem *via1; +static volatile unsigned char __iomem *via2; +static struct device_node *vias; static struct device_node *gpio_node; +#endif static unsigned char __iomem *gpio_reg; static int gpio_irq = 0; static int gpio_irq_enabled = -1; @@ -157,7 +172,9 @@ static int drop_interrupts; static int option_lid_wakeup = 1; #endif /* CONFIG_SUSPEND && CONFIG_PPC32 */ static unsigned long async_req_locks; -static unsigned int pmu_irq_stats[11]; + +#define NUM_IRQ_STATS 13 +static unsigned int pmu_irq_stats[NUM_IRQ_STATS]; static struct proc_dir_entry *proc_pmu_root; static struct proc_dir_entry *proc_pmu_info; @@ -271,10 +288,11 @@ static char *pbook_type[] = { int __init find_via_pmu(void) { +#ifdef CONFIG_PPC_PMAC u64 taddr; const u32 *reg; - if (via) + if (pmu_state != uninitialized) return 1; vias = of_find_node_by_name(NULL, "via-pmu"); if (vias == NULL) @@ -339,50 +357,70 @@ int __init find_via_pmu(void) } else pmu_kind = PMU_UNKNOWN; - via = ioremap(taddr, 0x2000); - if (via == NULL) { + via1 = via2 = ioremap(taddr, 0x2000); + if (via1 == NULL) { printk(KERN_ERR "via-pmu: Can't map address !\n"); goto fail_via_remap; } - out_8(&via[IER], IER_CLR | 0x7f); /* disable all intrs */ - out_8(&via[IFR], 0x7f); /* clear IFR */ + out_8(&via1[IER], IER_CLR | 0x7f); /* disable all intrs */ + out_8(&via1[IFR], 0x7f); /* clear IFR */ pmu_state = idle; if (!init_pmu()) goto fail_init; - printk(KERN_INFO "PMU driver v%d initialized for %s, firmware: %02x\n", - PMU_DRIVER_VERSION, pbook_type[pmu_kind], pmu_version); - sys_ctrler = SYS_CTRLER_PMU; return 1; fail_init: - iounmap(via); - via = NULL; + iounmap(via1); + via1 = via2 = NULL; fail_via_remap: iounmap(gpio_reg); gpio_reg = NULL; fail: of_node_put(vias); vias = NULL; + pmu_state = uninitialized; return 0; +#else + if (macintosh_config->adb_type != MAC_ADB_PB2) + return 0; + + pmu_kind = PMU_UNKNOWN; + + spin_lock_init(&pmu_lock); + + pmu_has_adb = 1; + + pmu_intr_mask = PMU_INT_PCEJECT | + PMU_INT_SNDBRT | + PMU_INT_ADB | + PMU_INT_TICK; + + pmu_state = idle; + + if (!init_pmu()) { + pmu_state = uninitialized; + return 0; + } + + return 1; +#endif /* !CONFIG_PPC_PMAC */ } #ifdef CONFIG_ADB static int pmu_probe(void) { - return vias == NULL? -ENODEV: 0; + return pmu_state == uninitialized ? -ENODEV : 0; } -static int __init pmu_init(void) +static int pmu_init(void) { - if (vias == NULL) - return -ENODEV; - return 0; + return pmu_state == uninitialized ? -ENODEV : 0; } #endif /* CONFIG_ADB */ @@ -395,13 +433,14 @@ static int __init pmu_init(void) */ static int __init via_pmu_start(void) { - unsigned int irq; + unsigned int __maybe_unused irq; - if (vias == NULL) + if (pmu_state == uninitialized) return -ENODEV; batt_req.complete = 1; +#ifdef CONFIG_PPC_PMAC irq = irq_of_parse_and_map(vias, 0); if (!irq) { printk(KERN_ERR "via-pmu: can't map interrupt\n"); @@ -437,7 +476,20 @@ static int __init via_pmu_start(void) } /* Enable interrupts */ - out_8(&via[IER], IER_SET | SR_INT | CB1_INT); + out_8(&via1[IER], IER_SET | SR_INT | CB1_INT); +#else + if (request_irq(IRQ_MAC_ADB_SR, via_pmu_interrupt, IRQF_NO_SUSPEND, + "VIA-PMU-SR", NULL)) { + pr_err("%s: couldn't get SR irq\n", __func__); + return -ENODEV; + } + if (request_irq(IRQ_MAC_ADB_CL, via_pmu_interrupt, IRQF_NO_SUSPEND, + "VIA-PMU-CL", NULL)) { + pr_err("%s: couldn't get CL irq\n", __func__); + free_irq(IRQ_MAC_ADB_SR, NULL); + return -ENODEV; + } +#endif /* !CONFIG_PPC_PMAC */ pmu_fully_inited = 1; @@ -463,7 +515,7 @@ arch_initcall(via_pmu_start); */ static int __init via_pmu_dev_init(void) { - if (vias == NULL) + if (pmu_state == uninitialized) return -ENODEV; #ifdef CONFIG_PMAC_BACKLIGHT @@ -534,8 +586,9 @@ init_pmu(void) int timeout; struct adb_request req; - out_8(&via[B], via[B] | TREQ); /* negate TREQ */ - out_8(&via[DIRB], (via[DIRB] | TREQ) & ~TACK); /* TACK in, TREQ out */ + /* Negate TREQ. Set TACK to input and TREQ to output. */ + out_8(&via2[B], in_8(&via2[B]) | TREQ); + out_8(&via2[DIRB], (in_8(&via2[DIRB]) | TREQ) & ~TACK); pmu_request(&req, NULL, 2, PMU_SET_INTR_MASK, pmu_intr_mask); timeout = 100000; @@ -587,6 +640,10 @@ init_pmu(void) option_server_mode ? "enabled" : "disabled"); } } + + printk(KERN_INFO "PMU driver v%d initialized for %s, firmware: %02x\n", + PMU_DRIVER_VERSION, pbook_type[pmu_kind], pmu_version); + return 1; } @@ -625,6 +682,7 @@ static void pmu_set_server_mode(int server_mode) static void done_battery_state_ohare(struct adb_request* req) { +#ifdef CONFIG_PPC_PMAC /* format: * [0] : flags * 0x01 : AC indicator @@ -706,6 +764,7 @@ done_battery_state_ohare(struct adb_request* req) pmu_batteries[pmu_cur_battery].amperage = amperage; pmu_batteries[pmu_cur_battery].voltage = voltage; pmu_batteries[pmu_cur_battery].time_remaining = time; +#endif /* CONFIG_PPC_PMAC */ clear_bit(0, &async_req_locks); } @@ -816,9 +875,9 @@ static int pmu_info_proc_show(struct seq_file *m, void *v) static int pmu_irqstats_proc_show(struct seq_file *m, void *v) { int i; - static const char *irq_names[] = { - "Total CB1 triggered events", - "Total GPIO1 triggered events", + static const char *irq_names[NUM_IRQ_STATS] = { + "Unknown interrupt (type 0)", + "Unknown interrupt (type 1)", "PC-Card eject button", "Sound/Brightness button", "ADB message", @@ -827,10 +886,12 @@ static int pmu_irqstats_proc_show(struct seq_file *m, void *v) "Tick timer", "Ghost interrupt (zero len)", "Empty interrupt (empty mask)", - "Max irqs in a row" + "Max irqs in a row", + "Total CB1 triggered events", + "Total GPIO1 triggered events", }; - for (i=0; i<11; i++) { + for (i = 0; i < NUM_IRQ_STATS; i++) { seq_printf(m, " %2u: %10u (%s)\n", i, pmu_irq_stats[i], irq_names[i]); } @@ -928,7 +989,7 @@ static int pmu_send_request(struct adb_request *req, int sync) { int i, ret; - if ((vias == NULL) || (!pmu_fully_inited)) { + if (pmu_state == uninitialized || !pmu_fully_inited) { req->complete = 1; return -ENXIO; } @@ -1022,7 +1083,7 @@ static int __pmu_adb_autopoll(int devs) static int pmu_adb_autopoll(int devs) { - if ((vias == NULL) || (!pmu_fully_inited) || !pmu_has_adb) + if (pmu_state == uninitialized || !pmu_fully_inited || !pmu_has_adb) return -ENXIO; adb_dev_map = devs; @@ -1035,7 +1096,7 @@ static int pmu_adb_reset_bus(void) struct adb_request req; int save_autopoll = adb_dev_map; - if ((vias == NULL) || (!pmu_fully_inited) || !pmu_has_adb) + if (pmu_state == uninitialized || !pmu_fully_inited || !pmu_has_adb) return -ENXIO; /* anyone got a better idea?? */ @@ -1071,7 +1132,7 @@ pmu_request(struct adb_request *req, void (*done)(struct adb_request *), va_list list; int i; - if (vias == NULL) + if (pmu_state == uninitialized) return -ENXIO; if (nbytes < 0 || nbytes > 32) { @@ -1096,7 +1157,7 @@ pmu_queue_request(struct adb_request *req) unsigned long flags; int nsend; - if (via == NULL) { + if (pmu_state == uninitialized) { req->complete = 1; return -ENXIO; } @@ -1136,7 +1197,7 @@ wait_for_ack(void) * reported */ int timeout = 4000; - while ((in_8(&via[B]) & TACK) == 0) { + while ((in_8(&via2[B]) & TACK) == 0) { if (--timeout < 0) { printk(KERN_ERR "PMU not responding (!ack)\n"); return; @@ -1150,23 +1211,19 @@ wait_for_ack(void) static inline void send_byte(int x) { - volatile unsigned char __iomem *v = via; - - out_8(&v[ACR], in_8(&v[ACR]) | SR_OUT | SR_EXT); - out_8(&v[SR], x); - out_8(&v[B], in_8(&v[B]) & ~TREQ); /* assert TREQ */ - (void)in_8(&v[B]); + out_8(&via1[ACR], in_8(&via1[ACR]) | SR_OUT | SR_EXT); + out_8(&via1[SR], x); + out_8(&via2[B], in_8(&via2[B]) & ~TREQ); /* assert TREQ */ + (void)in_8(&via2[B]); } static inline void recv_byte(void) { - volatile unsigned char __iomem *v = via; - - out_8(&v[ACR], (in_8(&v[ACR]) & ~SR_OUT) | SR_EXT); - in_8(&v[SR]); /* resets SR */ - out_8(&v[B], in_8(&v[B]) & ~TREQ); - (void)in_8(&v[B]); + out_8(&via1[ACR], (in_8(&via1[ACR]) & ~SR_OUT) | SR_EXT); + in_8(&via1[SR]); /* resets SR */ + out_8(&via2[B], in_8(&via2[B]) & ~TREQ); + (void)in_8(&via2[B]); } static inline void @@ -1209,7 +1266,7 @@ pmu_start(void) void pmu_poll(void) { - if (!via) + if (pmu_state == uninitialized) return; if (disable_poll) return; @@ -1219,7 +1276,7 @@ pmu_poll(void) void pmu_poll_adb(void) { - if (!via) + if (pmu_state == uninitialized) return; if (disable_poll) return; @@ -1234,7 +1291,7 @@ pmu_poll_adb(void) void pmu_wait_complete(struct adb_request *req) { - if (!via) + if (pmu_state == uninitialized) return; while((pmu_state != idle && pmu_state != locked) || !req->complete) via_pmu_interrupt(0, NULL); @@ -1250,7 +1307,7 @@ pmu_suspend(void) { unsigned long flags; - if (!via) + if (pmu_state == uninitialized) return; spin_lock_irqsave(&pmu_lock, flags); @@ -1269,7 +1326,7 @@ pmu_suspend(void) if (!adb_int_pending && pmu_state == idle && !req_awaiting_reply) { if (gpio_irq >= 0) disable_irq_nosync(gpio_irq); - out_8(&via[IER], CB1_INT | IER_CLR); + out_8(&via1[IER], CB1_INT | IER_CLR); spin_unlock_irqrestore(&pmu_lock, flags); break; } @@ -1281,7 +1338,7 @@ pmu_resume(void) { unsigned long flags; - if (!via || (pmu_suspended < 1)) + if (pmu_state == uninitialized || pmu_suspended < 1) return; spin_lock_irqsave(&pmu_lock, flags); @@ -1293,7 +1350,7 @@ pmu_resume(void) adb_int_pending = 1; if (gpio_irq >= 0) enable_irq(gpio_irq); - out_8(&via[IER], CB1_INT | IER_SET); + out_8(&via1[IER], CB1_INT | IER_SET); spin_unlock_irqrestore(&pmu_lock, flags); pmu_poll(); } @@ -1302,7 +1359,8 @@ pmu_resume(void) static void pmu_handle_data(unsigned char *data, int len) { - unsigned char ints, pirq; + unsigned char ints; + int idx; int i = 0; asleep = 0; @@ -1324,25 +1382,24 @@ pmu_handle_data(unsigned char *data, int len) ints &= ~(PMU_INT_ADB_AUTO | PMU_INT_AUTO_SRQ_POLL); next: - if (ints == 0) { if (i > pmu_irq_stats[10]) pmu_irq_stats[10] = i; return; } - - for (pirq = 0; pirq < 8; pirq++) - if (ints & (1 << pirq)) - break; - pmu_irq_stats[pirq]++; i++; - ints &= ~(1 << pirq); + + idx = ffs(ints) - 1; + ints &= ~BIT(idx); + + pmu_irq_stats[idx]++; /* Note: for some reason, we get an interrupt with len=1, * data[0]==0 after each normal ADB interrupt, at least * on the Pismo. Still investigating... --BenH */ - if ((1 << pirq) & PMU_INT_ADB) { + switch (BIT(idx)) { + case PMU_INT_ADB: if ((data[0] & PMU_INT_ADB_AUTO) == 0) { struct adb_request *req = req_awaiting_reply; if (!req) { @@ -1358,6 +1415,7 @@ next: } pmu_done(req); } else { +#ifdef CONFIG_XMON if (len == 4 && data[1] == 0x2c) { extern int xmon_wants_key, xmon_adb_keycode; if (xmon_wants_key) { @@ -1365,6 +1423,7 @@ next: return; } } +#endif /* CONFIG_XMON */ #ifdef CONFIG_ADB /* * XXX On the [23]400 the PMU gives us an up @@ -1378,25 +1437,28 @@ next: adb_input(data+1, len-1, 1); #endif /* CONFIG_ADB */ } - } + break; + /* Sound/brightness button pressed */ - else if ((1 << pirq) & PMU_INT_SNDBRT) { + case PMU_INT_SNDBRT: #ifdef CONFIG_PMAC_BACKLIGHT if (len == 3) pmac_backlight_set_legacy_brightness_pmu(data[1] >> 4); #endif - } + break; + /* Tick interrupt */ - else if ((1 << pirq) & PMU_INT_TICK) { - /* Environement or tick interrupt, query batteries */ + case PMU_INT_TICK: + /* Environment or tick interrupt, query batteries */ if (pmu_battery_count) { if ((--query_batt_timer) == 0) { query_battery_state(); query_batt_timer = BATTERY_POLLING_COUNT; } } - } - else if ((1 << pirq) & PMU_INT_ENVIRONMENT) { + break; + + case PMU_INT_ENVIRONMENT: if (pmu_battery_count) query_battery_state(); pmu_pass_intr(data, len); @@ -1406,7 +1468,9 @@ next: via_pmu_event(PMU_EVT_POWER, !!(data[1]&8)); via_pmu_event(PMU_EVT_LID, data[1]&1); } - } else { + break; + + default: pmu_pass_intr(data, len); } goto next; @@ -1418,21 +1482,20 @@ pmu_sr_intr(void) struct adb_request *req; int bite = 0; - if (via[B] & TREQ) { - printk(KERN_ERR "PMU: spurious SR intr (%x)\n", via[B]); - out_8(&via[IFR], SR_INT); + if (in_8(&via2[B]) & TREQ) { + printk(KERN_ERR "PMU: spurious SR intr (%x)\n", in_8(&via2[B])); return NULL; } /* The ack may not yet be low when we get the interrupt */ - while ((in_8(&via[B]) & TACK) != 0) + while ((in_8(&via2[B]) & TACK) != 0) ; /* if reading grab the byte, and reset the interrupt */ if (pmu_state == reading || pmu_state == reading_intr) - bite = in_8(&via[SR]); + bite = in_8(&via1[SR]); /* reset TREQ and wait for TACK to go high */ - out_8(&via[B], in_8(&via[B]) | TREQ); + out_8(&via2[B], in_8(&via2[B]) | TREQ); wait_for_ack(); switch (pmu_state) { @@ -1533,26 +1596,46 @@ via_pmu_interrupt(int irq, void *arg) ++disable_poll; for (;;) { - intr = in_8(&via[IFR]) & (SR_INT | CB1_INT); + /* On 68k Macs, VIA interrupts are dispatched individually. + * Unless we are polling, the relevant IRQ flag has already + * been cleared. + */ + intr = 0; + if (IS_ENABLED(CONFIG_PPC_PMAC) || !irq) { + intr = in_8(&via1[IFR]) & (SR_INT | CB1_INT); + out_8(&via1[IFR], intr); + } +#ifndef CONFIG_PPC_PMAC + switch (irq) { + case IRQ_MAC_ADB_CL: + intr = CB1_INT; + break; + case IRQ_MAC_ADB_SR: + intr = SR_INT; + break; + } +#endif if (intr == 0) break; handled = 1; if (++nloop > 1000) { printk(KERN_DEBUG "PMU: stuck in intr loop, " "intr=%x, ier=%x pmu_state=%d\n", - intr, in_8(&via[IER]), pmu_state); + intr, in_8(&via1[IER]), pmu_state); break; } - out_8(&via[IFR], intr); if (intr & CB1_INT) { adb_int_pending = 1; - pmu_irq_stats[0]++; + pmu_irq_stats[11]++; } if (intr & SR_INT) { req = pmu_sr_intr(); if (req) break; } +#ifndef CONFIG_PPC_PMAC + break; +#endif } recheck: @@ -1619,7 +1702,7 @@ pmu_unlock(void) } -static irqreturn_t +static __maybe_unused irqreturn_t gpio1_interrupt(int irq, void *arg) { unsigned long flags; @@ -1630,7 +1713,7 @@ gpio1_interrupt(int irq, void *arg) disable_irq_nosync(gpio_irq); gpio_irq_enabled = 0; } - pmu_irq_stats[1]++; + pmu_irq_stats[12]++; adb_int_pending = 1; spin_unlock_irqrestore(&pmu_lock, flags); via_pmu_interrupt(0, NULL); @@ -1644,7 +1727,7 @@ pmu_enable_irled(int on) { struct adb_request req; - if (vias == NULL) + if (pmu_state == uninitialized) return ; if (pmu_kind == PMU_KEYLARGO_BASED) return ; @@ -1659,7 +1742,7 @@ pmu_restart(void) { struct adb_request req; - if (via == NULL) + if (pmu_state == uninitialized) return; local_irq_disable(); @@ -1684,7 +1767,7 @@ pmu_shutdown(void) { struct adb_request req; - if (via == NULL) + if (pmu_state == uninitialized) return; local_irq_disable(); @@ -1712,7 +1795,7 @@ pmu_shutdown(void) int pmu_present(void) { - return via != NULL; + return pmu_state != uninitialized; } #if defined(CONFIG_SUSPEND) && defined(CONFIG_PPC32) @@ -1725,29 +1808,29 @@ static u32 save_via[8]; static void save_via_state(void) { - save_via[0] = in_8(&via[ANH]); - save_via[1] = in_8(&via[DIRA]); - save_via[2] = in_8(&via[B]); - save_via[3] = in_8(&via[DIRB]); - save_via[4] = in_8(&via[PCR]); - save_via[5] = in_8(&via[ACR]); - save_via[6] = in_8(&via[T1CL]); - save_via[7] = in_8(&via[T1CH]); + save_via[0] = in_8(&via1[ANH]); + save_via[1] = in_8(&via1[DIRA]); + save_via[2] = in_8(&via1[B]); + save_via[3] = in_8(&via1[DIRB]); + save_via[4] = in_8(&via1[PCR]); + save_via[5] = in_8(&via1[ACR]); + save_via[6] = in_8(&via1[T1CL]); + save_via[7] = in_8(&via1[T1CH]); } static void restore_via_state(void) { - out_8(&via[ANH], save_via[0]); - out_8(&via[DIRA], save_via[1]); - out_8(&via[B], save_via[2]); - out_8(&via[DIRB], save_via[3]); - out_8(&via[PCR], save_via[4]); - out_8(&via[ACR], save_via[5]); - out_8(&via[T1CL], save_via[6]); - out_8(&via[T1CH], save_via[7]); - out_8(&via[IER], IER_CLR | 0x7f); /* disable all intrs */ - out_8(&via[IFR], 0x7f); /* clear IFR */ - out_8(&via[IER], IER_SET | SR_INT | CB1_INT); + out_8(&via1[ANH], save_via[0]); + out_8(&via1[DIRA], save_via[1]); + out_8(&via1[B], save_via[2]); + out_8(&via1[DIRB], save_via[3]); + out_8(&via1[PCR], save_via[4]); + out_8(&via1[ACR], save_via[5]); + out_8(&via1[T1CL], save_via[6]); + out_8(&via1[T1CH], save_via[7]); + out_8(&via1[IER], IER_CLR | 0x7f); /* disable all intrs */ + out_8(&via1[IFR], 0x7f); /* clear IFR */ + out_8(&via1[IER], IER_SET | SR_INT | CB1_INT); } #define GRACKLE_PM (1<<7) @@ -2253,6 +2336,7 @@ static int pmu_ioctl(struct file *filp, int error = -EINVAL; switch (cmd) { +#ifdef CONFIG_PPC_PMAC case PMU_IOC_SLEEP: if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -2262,6 +2346,7 @@ static int pmu_ioctl(struct file *filp, return put_user(0, argp); else return put_user(1, argp); +#endif #ifdef CONFIG_PMAC_BACKLIGHT_LEGACY /* Compatibility ioctl's for backlight */ @@ -2378,7 +2463,7 @@ static struct miscdevice pmu_device = { static int pmu_device_init(void) { - if (!via) + if (pmu_state == uninitialized) return 0; if (misc_register(&pmu_device) < 0) printk(KERN_ERR "via-pmu: cannot register misc device.\n"); @@ -2389,33 +2474,33 @@ device_initcall(pmu_device_init); #ifdef DEBUG_SLEEP static inline void -polled_handshake(volatile unsigned char __iomem *via) +polled_handshake(void) { - via[B] &= ~TREQ; eieio(); - while ((via[B] & TACK) != 0) + via2[B] &= ~TREQ; eieio(); + while ((via2[B] & TACK) != 0) ; - via[B] |= TREQ; eieio(); - while ((via[B] & TACK) == 0) + via2[B] |= TREQ; eieio(); + while ((via2[B] & TACK) == 0) ; } static inline void -polled_send_byte(volatile unsigned char __iomem *via, int x) +polled_send_byte(int x) { - via[ACR] |= SR_OUT | SR_EXT; eieio(); - via[SR] = x; eieio(); - polled_handshake(via); + via1[ACR] |= SR_OUT | SR_EXT; eieio(); + via1[SR] = x; eieio(); + polled_handshake(); } static inline int -polled_recv_byte(volatile unsigned char __iomem *via) +polled_recv_byte(void) { int x; - via[ACR] = (via[ACR] & ~SR_OUT) | SR_EXT; eieio(); - x = via[SR]; eieio(); - polled_handshake(via); - x = via[SR]; eieio(); + via1[ACR] = (via1[ACR] & ~SR_OUT) | SR_EXT; eieio(); + x = via1[SR]; eieio(); + polled_handshake(); + x = via1[SR]; eieio(); return x; } @@ -2424,7 +2509,6 @@ pmu_polled_request(struct adb_request *req) { unsigned long flags; int i, l, c; - volatile unsigned char __iomem *v = via; req->complete = 1; c = req->data[0]; @@ -2436,21 +2520,21 @@ pmu_polled_request(struct adb_request *req) while (pmu_state != idle) pmu_poll(); - while ((via[B] & TACK) == 0) + while ((via2[B] & TACK) == 0) ; - polled_send_byte(v, c); + polled_send_byte(c); if (l < 0) { l = req->nbytes - 1; - polled_send_byte(v, l); + polled_send_byte(l); } for (i = 1; i <= l; ++i) - polled_send_byte(v, req->data[i]); + polled_send_byte(req->data[i]); l = pmu_data_len[c][1]; if (l < 0) - l = polled_recv_byte(v); + l = polled_recv_byte(); for (i = 0; i < l; ++i) - req->reply[i + req->reply_len] = polled_recv_byte(v); + req->reply[i + req->reply_len] = polled_recv_byte(); if (req->done) (*req->done)(req); diff --git a/drivers/macintosh/via-pmu68k.c b/drivers/macintosh/via-pmu68k.c deleted file mode 100644 index d545ed45e482..000000000000 --- a/drivers/macintosh/via-pmu68k.c +++ /dev/null @@ -1,850 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Device driver for the PMU on 68K-based Apple PowerBooks - * - * The VIA (versatile interface adapter) interfaces to the PMU, - * a 6805 microprocessor core whose primary function is to control - * battery charging and system power on the PowerBooks. - * The PMU also controls the ADB (Apple Desktop Bus) which connects - * to the keyboard and mouse, as well as the non-volatile RAM - * and the RTC (real time clock) chip. - * - * Adapted for 68K PMU by Joshua M. Thompson - * - * Based largely on the PowerMac PMU code by Paul Mackerras and - * Fabio Riccardi. - * - * Also based on the PMU driver from MkLinux by Apple Computer, Inc. - * and the Open Software Foundation, Inc. - */ - -#include <stdarg.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/delay.h> -#include <linux/miscdevice.h> -#include <linux/blkdev.h> -#include <linux/pci.h> -#include <linux/init.h> -#include <linux/interrupt.h> - -#include <linux/adb.h> -#include <linux/pmu.h> -#include <linux/cuda.h> - -#include <asm/macintosh.h> -#include <asm/macints.h> -#include <asm/mac_via.h> - -#include <asm/pgtable.h> -#include <asm/irq.h> -#include <linux/uaccess.h> - -/* Misc minor number allocated for /dev/pmu */ -#define PMU_MINOR 154 - -/* VIA registers - spaced 0x200 bytes apart */ -#define RS 0x200 /* skip between registers */ -#define B 0 /* B-side data */ -#define A RS /* A-side data */ -#define DIRB (2*RS) /* B-side direction (1=output) */ -#define DIRA (3*RS) /* A-side direction (1=output) */ -#define T1CL (4*RS) /* Timer 1 ctr/latch (low 8 bits) */ -#define T1CH (5*RS) /* Timer 1 counter (high 8 bits) */ -#define T1LL (6*RS) /* Timer 1 latch (low 8 bits) */ -#define T1LH (7*RS) /* Timer 1 latch (high 8 bits) */ -#define T2CL (8*RS) /* Timer 2 ctr/latch (low 8 bits) */ -#define T2CH (9*RS) /* Timer 2 counter (high 8 bits) */ -#define SR (10*RS) /* Shift register */ -#define ACR (11*RS) /* Auxiliary control register */ -#define PCR (12*RS) /* Peripheral control register */ -#define IFR (13*RS) /* Interrupt flag register */ -#define IER (14*RS) /* Interrupt enable register */ -#define ANH (15*RS) /* A-side data, no handshake */ - -/* Bits in B data register: both active low */ -#define TACK 0x02 /* Transfer acknowledge (input) */ -#define TREQ 0x04 /* Transfer request (output) */ - -/* Bits in ACR */ -#define SR_CTRL 0x1c /* Shift register control bits */ -#define SR_EXT 0x0c /* Shift on external clock */ -#define SR_OUT 0x10 /* Shift out if 1 */ - -/* Bits in IFR and IER */ -#define SR_INT 0x04 /* Shift register full/empty */ -#define CB1_INT 0x10 /* transition on CB1 input */ - -static enum pmu_state { - idle, - sending, - intack, - reading, - reading_intr, -} pmu_state; - -static struct adb_request *current_req; -static struct adb_request *last_req; -static struct adb_request *req_awaiting_reply; -static unsigned char interrupt_data[32]; -static unsigned char *reply_ptr; -static int data_index; -static int data_len; -static int adb_int_pending; -static int pmu_adb_flags; -static int adb_dev_map; -static struct adb_request bright_req_1, bright_req_2, bright_req_3; -static int pmu_kind = PMU_UNKNOWN; -static int pmu_fully_inited; - -int asleep; - -static int pmu_probe(void); -static int pmu_init(void); -static void pmu_start(void); -static irqreturn_t pmu_interrupt(int irq, void *arg); -static int pmu_send_request(struct adb_request *req, int sync); -static int pmu_autopoll(int devs); -void pmu_poll(void); -static int pmu_reset_bus(void); - -static int init_pmu(void); -static void pmu_start(void); -static void send_byte(int x); -static void recv_byte(void); -static void pmu_done(struct adb_request *req); -static void pmu_handle_data(unsigned char *data, int len); -static void set_volume(int level); -static void pmu_enable_backlight(int on); -static void pmu_set_brightness(int level); - -struct adb_driver via_pmu_driver = { - .name = "68K PMU", - .probe = pmu_probe, - .init = pmu_init, - .send_request = pmu_send_request, - .autopoll = pmu_autopoll, - .poll = pmu_poll, - .reset_bus = pmu_reset_bus, -}; - -/* - * This table indicates for each PMU opcode: - * - the number of data bytes to be sent with the command, or -1 - * if a length byte should be sent, - * - the number of response bytes which the PMU will return, or - * -1 if it will send a length byte. - */ -static s8 pmu_data_len[256][2] = { -/* 0 1 2 3 4 5 6 7 */ -/*00*/ {-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*08*/ {-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1}, -/*10*/ { 1, 0},{ 1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*18*/ { 0, 1},{ 0, 1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{ 0, 0}, -/*20*/ {-1, 0},{ 0, 0},{ 2, 0},{ 1, 0},{ 1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*28*/ { 0,-1},{ 0,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{ 0,-1}, -/*30*/ { 4, 0},{20, 0},{-1, 0},{ 3, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*38*/ { 0, 4},{ 0,20},{ 2,-1},{ 2, 1},{ 3,-1},{-1,-1},{-1,-1},{ 4, 0}, -/*40*/ { 1, 0},{ 1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*48*/ { 0, 1},{ 0, 1},{-1,-1},{ 1, 0},{ 1, 0},{-1,-1},{-1,-1},{-1,-1}, -/*50*/ { 1, 0},{ 0, 0},{ 2, 0},{ 2, 0},{-1, 0},{ 1, 0},{ 3, 0},{ 1, 0}, -/*58*/ { 0, 1},{ 1, 0},{ 0, 2},{ 0, 2},{ 0,-1},{-1,-1},{-1,-1},{-1,-1}, -/*60*/ { 2, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*68*/ { 0, 3},{ 0, 3},{ 0, 2},{ 0, 8},{ 0,-1},{ 0,-1},{-1,-1},{-1,-1}, -/*70*/ { 1, 0},{ 1, 0},{ 1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*78*/ { 0,-1},{ 0,-1},{-1,-1},{-1,-1},{-1,-1},{ 5, 1},{ 4, 1},{ 4, 1}, -/*80*/ { 4, 0},{-1, 0},{ 0, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*88*/ { 0, 5},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1}, -/*90*/ { 1, 0},{ 2, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*98*/ { 0, 1},{ 0, 1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1}, -/*a0*/ { 2, 0},{ 2, 0},{ 2, 0},{ 4, 0},{-1, 0},{ 0, 0},{-1, 0},{-1, 0}, -/*a8*/ { 1, 1},{ 1, 0},{ 3, 0},{ 2, 0},{-1,-1},{-1,-1},{-1,-1},{-1,-1}, -/*b0*/ {-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*b8*/ {-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1}, -/*c0*/ {-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*c8*/ {-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1}, -/*d0*/ { 0, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*d8*/ { 1, 1},{ 1, 1},{-1,-1},{-1,-1},{ 0, 1},{ 0,-1},{-1,-1},{-1,-1}, -/*e0*/ {-1, 0},{ 4, 0},{ 0, 1},{-1, 0},{-1, 0},{ 4, 0},{-1, 0},{-1, 0}, -/*e8*/ { 3,-1},{-1,-1},{ 0, 1},{-1,-1},{ 0,-1},{-1,-1},{-1,-1},{ 0, 0}, -/*f0*/ {-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*f8*/ {-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1}, -}; - -int __init find_via_pmu(void) -{ - switch (macintosh_config->adb_type) { - case MAC_ADB_PB1: - pmu_kind = PMU_68K_V1; - break; - case MAC_ADB_PB2: - pmu_kind = PMU_68K_V2; - break; - default: - pmu_kind = PMU_UNKNOWN; - return -ENODEV; - } - - pmu_state = idle; - - if (!init_pmu()) - goto fail_init; - - pr_info("adb: PMU 68K driver v0.5 for Unified ADB\n"); - - return 1; - -fail_init: - pmu_kind = PMU_UNKNOWN; - return 0; -} - -static int pmu_probe(void) -{ - if (pmu_kind == PMU_UNKNOWN) - return -ENODEV; - return 0; -} - -static int pmu_init(void) -{ - if (pmu_kind == PMU_UNKNOWN) - return -ENODEV; - return 0; -} - -static int __init via_pmu_start(void) -{ - if (pmu_kind == PMU_UNKNOWN) - return -ENODEV; - - if (request_irq(IRQ_MAC_ADB_SR, pmu_interrupt, 0, "PMU_SR", - pmu_interrupt)) { - pr_err("%s: can't get SR irq\n", __func__); - return -ENODEV; - } - if (request_irq(IRQ_MAC_ADB_CL, pmu_interrupt, 0, "PMU_CL", - pmu_interrupt)) { - pr_err("%s: can't get CL irq\n", __func__); - free_irq(IRQ_MAC_ADB_SR, pmu_interrupt); - return -ENODEV; - } - - pmu_fully_inited = 1; - - /* Enable backlight */ - pmu_enable_backlight(1); - - return 0; -} - -arch_initcall(via_pmu_start); - -static int __init init_pmu(void) -{ - int timeout; - volatile struct adb_request req; - - via2[B] |= TREQ; /* negate TREQ */ - via2[DIRB] = (via2[DIRB] | TREQ) & ~TACK; /* TACK in, TREQ out */ - - pmu_request((struct adb_request *) &req, NULL, 2, PMU_SET_INTR_MASK, PMU_INT_ADB); - timeout = 100000; - while (!req.complete) { - if (--timeout < 0) { - printk(KERN_ERR "pmu_init: no response from PMU\n"); - return -EAGAIN; - } - udelay(10); - pmu_poll(); - } - - /* ack all pending interrupts */ - timeout = 100000; - interrupt_data[0] = 1; - while (interrupt_data[0] || pmu_state != idle) { - if (--timeout < 0) { - printk(KERN_ERR "pmu_init: timed out acking intrs\n"); - return -EAGAIN; - } - if (pmu_state == idle) { - adb_int_pending = 1; - pmu_interrupt(0, NULL); - } - pmu_poll(); - udelay(10); - } - - pmu_request((struct adb_request *) &req, NULL, 2, PMU_SET_INTR_MASK, - PMU_INT_ADB_AUTO|PMU_INT_SNDBRT|PMU_INT_ADB); - timeout = 100000; - while (!req.complete) { - if (--timeout < 0) { - printk(KERN_ERR "pmu_init: no response from PMU\n"); - return -EAGAIN; - } - udelay(10); - pmu_poll(); - } - - bright_req_1.complete = 1; - bright_req_2.complete = 1; - bright_req_3.complete = 1; - - return 1; -} - -int -pmu_get_model(void) -{ - return pmu_kind; -} - -/* Send an ADB command */ -static int -pmu_send_request(struct adb_request *req, int sync) -{ - int i, ret; - - if (!pmu_fully_inited) - { - req->complete = 1; - return -ENXIO; - } - - ret = -EINVAL; - - switch (req->data[0]) { - case PMU_PACKET: - for (i = 0; i < req->nbytes - 1; ++i) - req->data[i] = req->data[i+1]; - --req->nbytes; - if (pmu_data_len[req->data[0]][1] != 0) { - req->reply[0] = ADB_RET_OK; - req->reply_len = 1; - } else - req->reply_len = 0; - ret = pmu_queue_request(req); - break; - case CUDA_PACKET: - switch (req->data[1]) { - case CUDA_GET_TIME: - if (req->nbytes != 2) - break; - req->data[0] = PMU_READ_RTC; - req->nbytes = 1; - req->reply_len = 3; - req->reply[0] = CUDA_PACKET; - req->reply[1] = 0; - req->reply[2] = CUDA_GET_TIME; - ret = pmu_queue_request(req); - break; - case CUDA_SET_TIME: - if (req->nbytes != 6) - break; - req->data[0] = PMU_SET_RTC; - req->nbytes = 5; - for (i = 1; i <= 4; ++i) - req->data[i] = req->data[i+1]; - req->reply_len = 3; - req->reply[0] = CUDA_PACKET; - req->reply[1] = 0; - req->reply[2] = CUDA_SET_TIME; - ret = pmu_queue_request(req); - break; - case CUDA_GET_PRAM: - if (req->nbytes != 4) - break; - req->data[0] = PMU_READ_NVRAM; - req->data[1] = req->data[2]; - req->data[2] = req->data[3]; - req->nbytes = 3; - req->reply_len = 3; - req->reply[0] = CUDA_PACKET; - req->reply[1] = 0; - req->reply[2] = CUDA_GET_PRAM; - ret = pmu_queue_request(req); - break; - case CUDA_SET_PRAM: - if (req->nbytes != 5) - break; - req->data[0] = PMU_WRITE_NVRAM; - req->data[1] = req->data[2]; - req->data[2] = req->data[3]; - req->data[3] = req->data[4]; - req->nbytes = 4; - req->reply_len = 3; - req->reply[0] = CUDA_PACKET; - req->reply[1] = 0; - req->reply[2] = CUDA_SET_PRAM; - ret = pmu_queue_request(req); - break; - } - break; - case ADB_PACKET: - for (i = req->nbytes - 1; i > 1; --i) - req->data[i+2] = req->data[i]; - req->data[3] = req->nbytes - 2; - req->data[2] = pmu_adb_flags; - /*req->data[1] = req->data[1];*/ - req->data[0] = PMU_ADB_CMD; - req->nbytes += 2; - req->reply_expected = 1; - req->reply_len = 0; - ret = pmu_queue_request(req); - break; - } - if (ret) - { - req->complete = 1; - return ret; - } - - if (sync) { - while (!req->complete) - pmu_poll(); - } - - return 0; -} - -/* Enable/disable autopolling */ -static int -pmu_autopoll(int devs) -{ - struct adb_request req; - - if (!pmu_fully_inited) return -ENXIO; - - if (devs) { - adb_dev_map = devs; - pmu_request(&req, NULL, 5, PMU_ADB_CMD, 0, 0x86, - adb_dev_map >> 8, adb_dev_map); - pmu_adb_flags = 2; - } else { - pmu_request(&req, NULL, 1, PMU_ADB_POLL_OFF); - pmu_adb_flags = 0; - } - while (!req.complete) - pmu_poll(); - return 0; -} - -/* Reset the ADB bus */ -static int -pmu_reset_bus(void) -{ - struct adb_request req; - long timeout; - int save_autopoll = adb_dev_map; - - if (!pmu_fully_inited) return -ENXIO; - - /* anyone got a better idea?? */ - pmu_autopoll(0); - - req.nbytes = 5; - req.done = NULL; - req.data[0] = PMU_ADB_CMD; - req.data[1] = 0; - req.data[2] = 3; /* ADB_BUSRESET ??? */ - req.data[3] = 0; - req.data[4] = 0; - req.reply_len = 0; - req.reply_expected = 1; - if (pmu_queue_request(&req) != 0) - { - printk(KERN_ERR "pmu_adb_reset_bus: pmu_queue_request failed\n"); - return -EIO; - } - while (!req.complete) - pmu_poll(); - timeout = 100000; - while (!req.complete) { - if (--timeout < 0) { - printk(KERN_ERR "pmu_adb_reset_bus (reset): no response from PMU\n"); - return -EIO; - } - udelay(10); - pmu_poll(); - } - - if (save_autopoll != 0) - pmu_autopoll(save_autopoll); - - return 0; -} - -/* Construct and send a pmu request */ -int -pmu_request(struct adb_request *req, void (*done)(struct adb_request *), - int nbytes, ...) -{ - va_list list; - int i; - - if (nbytes < 0 || nbytes > 32) { - printk(KERN_ERR "pmu_request: bad nbytes (%d)\n", nbytes); - req->complete = 1; - return -EINVAL; - } - req->nbytes = nbytes; - req->done = done; - va_start(list, nbytes); - for (i = 0; i < nbytes; ++i) - req->data[i] = va_arg(list, int); - va_end(list); - if (pmu_data_len[req->data[0]][1] != 0) { - req->reply[0] = ADB_RET_OK; - req->reply_len = 1; - } else - req->reply_len = 0; - req->reply_expected = 0; - return pmu_queue_request(req); -} - -int -pmu_queue_request(struct adb_request *req) -{ - unsigned long flags; - int nsend; - - if (req->nbytes <= 0) { - req->complete = 1; - return 0; - } - nsend = pmu_data_len[req->data[0]][0]; - if (nsend >= 0 && req->nbytes != nsend + 1) { - req->complete = 1; - return -EINVAL; - } - - req->next = NULL; - req->sent = 0; - req->complete = 0; - local_irq_save(flags); - - if (current_req != 0) { - last_req->next = req; - last_req = req; - } else { - current_req = req; - last_req = req; - if (pmu_state == idle) - pmu_start(); - } - - local_irq_restore(flags); - return 0; -} - -static void -send_byte(int x) -{ - via1[ACR] |= SR_CTRL; - via1[SR] = x; - via2[B] &= ~TREQ; /* assert TREQ */ -} - -static void -recv_byte(void) -{ - char c; - - via1[ACR] = (via1[ACR] | SR_EXT) & ~SR_OUT; - c = via1[SR]; /* resets SR */ - via2[B] &= ~TREQ; -} - -static void -pmu_start(void) -{ - unsigned long flags; - struct adb_request *req; - - /* assert pmu_state == idle */ - /* get the packet to send */ - local_irq_save(flags); - req = current_req; - if (req == 0 || pmu_state != idle - || (req->reply_expected && req_awaiting_reply)) - goto out; - - pmu_state = sending; - data_index = 1; - data_len = pmu_data_len[req->data[0]][0]; - - /* set the shift register to shift out and send a byte */ - send_byte(req->data[0]); - -out: - local_irq_restore(flags); -} - -void -pmu_poll(void) -{ - unsigned long flags; - - local_irq_save(flags); - if (via1[IFR] & SR_INT) { - via1[IFR] = SR_INT; - pmu_interrupt(IRQ_MAC_ADB_SR, NULL); - } - if (via1[IFR] & CB1_INT) { - via1[IFR] = CB1_INT; - pmu_interrupt(IRQ_MAC_ADB_CL, NULL); - } - local_irq_restore(flags); -} - -static irqreturn_t -pmu_interrupt(int irq, void *dev_id) -{ - struct adb_request *req; - int timeout, bite = 0; /* to prevent compiler warning */ - -#if 0 - printk("pmu_interrupt: irq %d state %d acr %02X, b %02X data_index %d/%d adb_int_pending %d\n", - irq, pmu_state, (uint) via1[ACR], (uint) via2[B], data_index, data_len, adb_int_pending); -#endif - - if (irq == IRQ_MAC_ADB_CL) { /* CB1 interrupt */ - adb_int_pending = 1; - } else if (irq == IRQ_MAC_ADB_SR) { /* SR interrupt */ - if (via2[B] & TACK) { - printk(KERN_DEBUG "PMU: SR_INT but ack still high! (%x)\n", via2[B]); - } - - /* if reading grab the byte */ - if ((via1[ACR] & SR_OUT) == 0) bite = via1[SR]; - - /* reset TREQ and wait for TACK to go high */ - via2[B] |= TREQ; - timeout = 3200; - while (!(via2[B] & TACK)) { - if (--timeout < 0) { - printk(KERN_ERR "PMU not responding (!ack)\n"); - goto finish; - } - udelay(10); - } - - switch (pmu_state) { - case sending: - req = current_req; - if (data_len < 0) { - data_len = req->nbytes - 1; - send_byte(data_len); - break; - } - if (data_index <= data_len) { - send_byte(req->data[data_index++]); - break; - } - req->sent = 1; - data_len = pmu_data_len[req->data[0]][1]; - if (data_len == 0) { - pmu_state = idle; - current_req = req->next; - if (req->reply_expected) - req_awaiting_reply = req; - else - pmu_done(req); - } else { - pmu_state = reading; - data_index = 0; - reply_ptr = req->reply + req->reply_len; - recv_byte(); - } - break; - - case intack: - data_index = 0; - data_len = -1; - pmu_state = reading_intr; - reply_ptr = interrupt_data; - recv_byte(); - break; - - case reading: - case reading_intr: - if (data_len == -1) { - data_len = bite; - if (bite > 32) - printk(KERN_ERR "PMU: bad reply len %d\n", - bite); - } else { - reply_ptr[data_index++] = bite; - } - if (data_index < data_len) { - recv_byte(); - break; - } - - if (pmu_state == reading_intr) { - pmu_handle_data(interrupt_data, data_index); - } else { - req = current_req; - current_req = req->next; - req->reply_len += data_index; - pmu_done(req); - } - pmu_state = idle; - - break; - - default: - printk(KERN_ERR "pmu_interrupt: unknown state %d?\n", - pmu_state); - } - } -finish: - if (pmu_state == idle) { - if (adb_int_pending) { - pmu_state = intack; - send_byte(PMU_INT_ACK); - adb_int_pending = 0; - } else if (current_req) { - pmu_start(); - } - } - -#if 0 - printk("pmu_interrupt: exit state %d acr %02X, b %02X data_index %d/%d adb_int_pending %d\n", - pmu_state, (uint) via1[ACR], (uint) via2[B], data_index, data_len, adb_int_pending); -#endif - return IRQ_HANDLED; -} - -static void -pmu_done(struct adb_request *req) -{ - req->complete = 1; - if (req->done) - (*req->done)(req); -} - -/* Interrupt data could be the result data from an ADB cmd */ -static void -pmu_handle_data(unsigned char *data, int len) -{ - static int show_pmu_ints = 1; - - asleep = 0; - if (len < 1) { - adb_int_pending = 0; - return; - } - if (data[0] & PMU_INT_ADB) { - if ((data[0] & PMU_INT_ADB_AUTO) == 0) { - struct adb_request *req = req_awaiting_reply; - if (req == 0) { - printk(KERN_ERR "PMU: extra ADB reply\n"); - return; - } - req_awaiting_reply = NULL; - if (len <= 2) - req->reply_len = 0; - else { - memcpy(req->reply, data + 1, len - 1); - req->reply_len = len - 1; - } - pmu_done(req); - } else { - adb_input(data+1, len-1, 1); - } - } else { - if (data[0] == 0x08 && len == 3) { - /* sound/brightness buttons pressed */ - pmu_set_brightness(data[1] >> 3); - set_volume(data[2]); - } else if (show_pmu_ints - && !(data[0] == PMU_INT_TICK && len == 1)) { - int i; - printk(KERN_DEBUG "pmu intr"); - for (i = 0; i < len; ++i) - printk(" %.2x", data[i]); - printk("\n"); - } - } -} - -static int backlight_level = -1; -static int backlight_enabled = 0; - -#define LEVEL_TO_BRIGHT(lev) ((lev) < 1? 0x7f: 0x4a - ((lev) << 1)) - -static void -pmu_enable_backlight(int on) -{ - struct adb_request req; - - if (on) { - /* first call: get current backlight value */ - if (backlight_level < 0) { - switch(pmu_kind) { - case PMU_68K_V1: - case PMU_68K_V2: - pmu_request(&req, NULL, 3, PMU_READ_NVRAM, 0x14, 0xe); - while (!req.complete) - pmu_poll(); - printk(KERN_DEBUG "pmu: nvram returned bright: %d\n", (int)req.reply[1]); - backlight_level = req.reply[1]; - break; - default: - backlight_enabled = 0; - return; - } - } - pmu_request(&req, NULL, 2, PMU_BACKLIGHT_BRIGHT, - LEVEL_TO_BRIGHT(backlight_level)); - while (!req.complete) - pmu_poll(); - } - pmu_request(&req, NULL, 2, PMU_POWER_CTRL, - PMU_POW_BACKLIGHT | (on ? PMU_POW_ON : PMU_POW_OFF)); - while (!req.complete) - pmu_poll(); - backlight_enabled = on; -} - -static void -pmu_set_brightness(int level) -{ - int bright; - - backlight_level = level; - bright = LEVEL_TO_BRIGHT(level); - if (!backlight_enabled) - return; - if (bright_req_1.complete) - pmu_request(&bright_req_1, NULL, 2, PMU_BACKLIGHT_BRIGHT, - bright); - if (bright_req_2.complete) - pmu_request(&bright_req_2, NULL, 2, PMU_POWER_CTRL, - PMU_POW_BACKLIGHT | (bright < 0x7f ? PMU_POW_ON : PMU_POW_OFF)); -} - -void -pmu_enable_irled(int on) -{ - struct adb_request req; - - pmu_request(&req, NULL, 2, PMU_POWER_CTRL, PMU_POW_IRLED | - (on ? PMU_POW_ON : PMU_POW_OFF)); - while (!req.complete) - pmu_poll(); -} - -static void -set_volume(int level) -{ -} - -int -pmu_present(void) -{ - return (pmu_kind != PMU_UNKNOWN); -} diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 0d7212410e21..69dddeab124c 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -363,7 +363,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) disk_super->version = cpu_to_le32(cmd->version); memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name)); memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version)); - disk_super->policy_hint_size = 0; + disk_super->policy_hint_size = cpu_to_le32(0); __copy_sm_root(cmd, disk_super); @@ -701,6 +701,7 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]); disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]); disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]); + disk_super->policy_hint_size = cpu_to_le32(cmd->policy_hint_size); disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits); disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses); @@ -1322,6 +1323,7 @@ static int __load_mapping_v1(struct dm_cache_metadata *cmd, dm_oblock_t oblock; unsigned flags; + bool dirty = true; dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le); memcpy(&mapping, mapping_value_le, sizeof(mapping)); @@ -1332,8 +1334,10 @@ static int __load_mapping_v1(struct dm_cache_metadata *cmd, dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le); memcpy(&hint, hint_value_le, sizeof(hint)); } + if (cmd->clean_when_opened) + dirty = flags & M_DIRTY; - r = fn(context, oblock, to_cblock(cb), flags & M_DIRTY, + r = fn(context, oblock, to_cblock(cb), dirty, le32_to_cpu(hint), hints_valid); if (r) { DMERR("policy couldn't load cache block %llu", @@ -1361,7 +1365,7 @@ static int __load_mapping_v2(struct dm_cache_metadata *cmd, dm_oblock_t oblock; unsigned flags; - bool dirty; + bool dirty = true; dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le); memcpy(&mapping, mapping_value_le, sizeof(mapping)); @@ -1372,8 +1376,9 @@ static int __load_mapping_v2(struct dm_cache_metadata *cmd, dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le); memcpy(&hint, hint_value_le, sizeof(hint)); } + if (cmd->clean_when_opened) + dirty = dm_bitset_cursor_get_value(dirty_cursor); - dirty = dm_bitset_cursor_get_value(dirty_cursor); r = fn(context, oblock, to_cblock(cb), dirty, le32_to_cpu(hint), hints_valid); if (r) { diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index ce14a3d1f609..a53413371725 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1188,9 +1188,8 @@ static void copy_complete(int read_err, unsigned long write_err, void *context) queue_continuation(mg->cache->wq, &mg->k); } -static int copy(struct dm_cache_migration *mg, bool promote) +static void copy(struct dm_cache_migration *mg, bool promote) { - int r; struct dm_io_region o_region, c_region; struct cache *cache = mg->cache; @@ -1203,11 +1202,9 @@ static int copy(struct dm_cache_migration *mg, bool promote) c_region.count = cache->sectors_per_block; if (promote) - r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, &mg->k); + dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, &mg->k); else - r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, &mg->k); - - return r; + dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, &mg->k); } static void bio_drop_shared_lock(struct cache *cache, struct bio *bio) @@ -1449,12 +1446,7 @@ static void mg_full_copy(struct work_struct *ws) } init_continuation(&mg->k, mg_upgrade_lock); - - if (copy(mg, is_policy_promote)) { - DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache)); - mg->k.input = BLK_STS_IOERR; - mg_complete(mg, false); - } + copy(mg, is_policy_promote); } static void mg_copy(struct work_struct *ws) @@ -2250,7 +2242,7 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as, {0, 2, "Invalid number of cache feature arguments"}, }; - int r; + int r, mode_ctr = 0; unsigned argc; const char *arg; struct cache_features *cf = &ca->features; @@ -2264,14 +2256,20 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as, while (argc--) { arg = dm_shift_arg(as); - if (!strcasecmp(arg, "writeback")) + if (!strcasecmp(arg, "writeback")) { cf->io_mode = CM_IO_WRITEBACK; + mode_ctr++; + } - else if (!strcasecmp(arg, "writethrough")) + else if (!strcasecmp(arg, "writethrough")) { cf->io_mode = CM_IO_WRITETHROUGH; + mode_ctr++; + } - else if (!strcasecmp(arg, "passthrough")) + else if (!strcasecmp(arg, "passthrough")) { cf->io_mode = CM_IO_PASSTHROUGH; + mode_ctr++; + } else if (!strcasecmp(arg, "metadata2")) cf->metadata_version = 2; @@ -2282,6 +2280,11 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as, } } + if (mode_ctr > 1) { + *error = "Duplicate cache io_mode features requested"; + return -EINVAL; + } + return 0; } diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index b61b069c33af..f266c81f396f 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -99,7 +99,7 @@ struct crypt_iv_operations { }; struct iv_essiv_private { - struct crypto_ahash *hash_tfm; + struct crypto_shash *hash_tfm; u8 *salt; }; @@ -144,7 +144,7 @@ struct crypt_config { struct workqueue_struct *io_queue; struct workqueue_struct *crypt_queue; - wait_queue_head_t write_thread_wait; + spinlock_t write_thread_lock; struct task_struct *write_thread; struct rb_root write_tree; @@ -327,25 +327,22 @@ static int crypt_iv_plain64be_gen(struct crypt_config *cc, u8 *iv, static int crypt_iv_essiv_init(struct crypt_config *cc) { struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; - AHASH_REQUEST_ON_STACK(req, essiv->hash_tfm); - struct scatterlist sg; + SHASH_DESC_ON_STACK(desc, essiv->hash_tfm); struct crypto_cipher *essiv_tfm; int err; - sg_init_one(&sg, cc->key, cc->key_size); - ahash_request_set_tfm(req, essiv->hash_tfm); - ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); - ahash_request_set_crypt(req, &sg, essiv->salt, cc->key_size); + desc->tfm = essiv->hash_tfm; + desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; - err = crypto_ahash_digest(req); - ahash_request_zero(req); + err = crypto_shash_digest(desc, cc->key, cc->key_size, essiv->salt); + shash_desc_zero(desc); if (err) return err; essiv_tfm = cc->iv_private; err = crypto_cipher_setkey(essiv_tfm, essiv->salt, - crypto_ahash_digestsize(essiv->hash_tfm)); + crypto_shash_digestsize(essiv->hash_tfm)); if (err) return err; @@ -356,7 +353,7 @@ static int crypt_iv_essiv_init(struct crypt_config *cc) static int crypt_iv_essiv_wipe(struct crypt_config *cc) { struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; - unsigned salt_size = crypto_ahash_digestsize(essiv->hash_tfm); + unsigned salt_size = crypto_shash_digestsize(essiv->hash_tfm); struct crypto_cipher *essiv_tfm; int r, err = 0; @@ -408,7 +405,7 @@ static void crypt_iv_essiv_dtr(struct crypt_config *cc) struct crypto_cipher *essiv_tfm; struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; - crypto_free_ahash(essiv->hash_tfm); + crypto_free_shash(essiv->hash_tfm); essiv->hash_tfm = NULL; kzfree(essiv->salt); @@ -426,7 +423,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, const char *opts) { struct crypto_cipher *essiv_tfm = NULL; - struct crypto_ahash *hash_tfm = NULL; + struct crypto_shash *hash_tfm = NULL; u8 *salt = NULL; int err; @@ -436,14 +433,14 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, } /* Allocate hash algorithm */ - hash_tfm = crypto_alloc_ahash(opts, 0, CRYPTO_ALG_ASYNC); + hash_tfm = crypto_alloc_shash(opts, 0, 0); if (IS_ERR(hash_tfm)) { ti->error = "Error initializing ESSIV hash"; err = PTR_ERR(hash_tfm); goto bad; } - salt = kzalloc(crypto_ahash_digestsize(hash_tfm), GFP_KERNEL); + salt = kzalloc(crypto_shash_digestsize(hash_tfm), GFP_KERNEL); if (!salt) { ti->error = "Error kmallocing salt storage in ESSIV"; err = -ENOMEM; @@ -454,7 +451,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, cc->iv_gen_private.essiv.hash_tfm = hash_tfm; essiv_tfm = alloc_essiv_cipher(cc, ti, salt, - crypto_ahash_digestsize(hash_tfm)); + crypto_shash_digestsize(hash_tfm)); if (IS_ERR(essiv_tfm)) { crypt_iv_essiv_dtr(cc); return PTR_ERR(essiv_tfm); @@ -465,7 +462,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, bad: if (hash_tfm && !IS_ERR(hash_tfm)) - crypto_free_ahash(hash_tfm); + crypto_free_shash(hash_tfm); kfree(salt); return err; } @@ -1620,36 +1617,31 @@ static int dmcrypt_write(void *data) struct rb_root write_tree; struct blk_plug plug; - DECLARE_WAITQUEUE(wait, current); - - spin_lock_irq(&cc->write_thread_wait.lock); + spin_lock_irq(&cc->write_thread_lock); continue_locked: if (!RB_EMPTY_ROOT(&cc->write_tree)) goto pop_from_list; set_current_state(TASK_INTERRUPTIBLE); - __add_wait_queue(&cc->write_thread_wait, &wait); - spin_unlock_irq(&cc->write_thread_wait.lock); + spin_unlock_irq(&cc->write_thread_lock); if (unlikely(kthread_should_stop())) { set_current_state(TASK_RUNNING); - remove_wait_queue(&cc->write_thread_wait, &wait); break; } schedule(); set_current_state(TASK_RUNNING); - spin_lock_irq(&cc->write_thread_wait.lock); - __remove_wait_queue(&cc->write_thread_wait, &wait); + spin_lock_irq(&cc->write_thread_lock); goto continue_locked; pop_from_list: write_tree = cc->write_tree; cc->write_tree = RB_ROOT; - spin_unlock_irq(&cc->write_thread_wait.lock); + spin_unlock_irq(&cc->write_thread_lock); BUG_ON(rb_parent(write_tree.rb_node)); @@ -1693,7 +1685,9 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) return; } - spin_lock_irqsave(&cc->write_thread_wait.lock, flags); + spin_lock_irqsave(&cc->write_thread_lock, flags); + if (RB_EMPTY_ROOT(&cc->write_tree)) + wake_up_process(cc->write_thread); rbp = &cc->write_tree.rb_node; parent = NULL; sector = io->sector; @@ -1706,9 +1700,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) } rb_link_node(&io->rb_node, parent, rbp); rb_insert_color(&io->rb_node, &cc->write_tree); - - wake_up_locked(&cc->write_thread_wait); - spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags); + spin_unlock_irqrestore(&cc->write_thread_lock, flags); } static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) @@ -2831,7 +2823,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - init_waitqueue_head(&cc->write_thread_wait); + spin_lock_init(&cc->write_thread_lock); cc->write_tree = RB_ROOT; cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write"); @@ -3069,11 +3061,11 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) */ limits->max_segment_size = PAGE_SIZE; - if (cc->sector_size != (1 << SECTOR_SHIFT)) { - limits->logical_block_size = cc->sector_size; - limits->physical_block_size = cc->sector_size; - blk_limits_io_min(limits, cc->sector_size); - } + limits->logical_block_size = + max_t(unsigned short, limits->logical_block_size, cc->sector_size); + limits->physical_block_size = + max_t(unsigned, limits->physical_block_size, cc->sector_size); + limits->io_min = max_t(unsigned, limits->io_min, cc->sector_size); } static struct target_type crypt_target = { diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 1783d80c9cad..2fb7bb4304ad 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -17,6 +17,13 @@ #define DM_MSG_PREFIX "delay" +struct delay_class { + struct dm_dev *dev; + sector_t start; + unsigned delay; + unsigned ops; +}; + struct delay_c { struct timer_list delay_timer; struct mutex timer_lock; @@ -25,19 +32,16 @@ struct delay_c { struct list_head delayed_bios; atomic_t may_delay; - struct dm_dev *dev_read; - sector_t start_read; - unsigned read_delay; - unsigned reads; + struct delay_class read; + struct delay_class write; + struct delay_class flush; - struct dm_dev *dev_write; - sector_t start_write; - unsigned write_delay; - unsigned writes; + int argc; }; struct dm_delay_info { struct delay_c *context; + struct delay_class *class; struct list_head list; unsigned long expires; }; @@ -77,7 +81,7 @@ static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all) { struct dm_delay_info *delayed, *next; unsigned long next_expires = 0; - int start_timer = 0; + unsigned long start_timer = 0; struct bio_list flush_bios = { }; mutex_lock(&delayed_bios_lock); @@ -87,10 +91,7 @@ static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all) sizeof(struct dm_delay_info)); list_del(&delayed->list); bio_list_add(&flush_bios, bio); - if ((bio_data_dir(bio) == WRITE)) - delayed->context->writes--; - else - delayed->context->reads--; + delayed->class->ops--; continue; } @@ -100,7 +101,6 @@ static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all) } else next_expires = min(next_expires, delayed->expires); } - mutex_unlock(&delayed_bios_lock); if (start_timer) @@ -117,6 +117,50 @@ static void flush_expired_bios(struct work_struct *work) flush_bios(flush_delayed_bios(dc, 0)); } +static void delay_dtr(struct dm_target *ti) +{ + struct delay_c *dc = ti->private; + + destroy_workqueue(dc->kdelayd_wq); + + if (dc->read.dev) + dm_put_device(ti, dc->read.dev); + if (dc->write.dev) + dm_put_device(ti, dc->write.dev); + if (dc->flush.dev) + dm_put_device(ti, dc->flush.dev); + + mutex_destroy(&dc->timer_lock); + + kfree(dc); +} + +static int delay_class_ctr(struct dm_target *ti, struct delay_class *c, char **argv) +{ + int ret; + unsigned long long tmpll; + char dummy; + + if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1) { + ti->error = "Invalid device sector"; + return -EINVAL; + } + c->start = tmpll; + + if (sscanf(argv[2], "%u%c", &c->delay, &dummy) != 1) { + ti->error = "Invalid delay"; + return -EINVAL; + } + + ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &c->dev); + if (ret) { + ti->error = "Device lookup failed"; + return ret; + } + + return 0; +} + /* * Mapping parameters: * <device> <offset> <delay> [<write_device> <write_offset> <write_delay>] @@ -128,134 +172,89 @@ static void flush_expired_bios(struct work_struct *work) static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct delay_c *dc; - unsigned long long tmpll; - char dummy; int ret; - if (argc != 3 && argc != 6) { - ti->error = "Requires exactly 3 or 6 arguments"; + if (argc != 3 && argc != 6 && argc != 9) { + ti->error = "Requires exactly 3, 6 or 9 arguments"; return -EINVAL; } - dc = kmalloc(sizeof(*dc), GFP_KERNEL); + dc = kzalloc(sizeof(*dc), GFP_KERNEL); if (!dc) { ti->error = "Cannot allocate context"; return -ENOMEM; } - dc->reads = dc->writes = 0; + ti->private = dc; + timer_setup(&dc->delay_timer, handle_delayed_timer, 0); + INIT_WORK(&dc->flush_expired_bios, flush_expired_bios); + INIT_LIST_HEAD(&dc->delayed_bios); + mutex_init(&dc->timer_lock); + atomic_set(&dc->may_delay, 1); + dc->argc = argc; - ret = -EINVAL; - if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1) { - ti->error = "Invalid device sector"; + ret = delay_class_ctr(ti, &dc->read, argv); + if (ret) goto bad; - } - dc->start_read = tmpll; - if (sscanf(argv[2], "%u%c", &dc->read_delay, &dummy) != 1) { - ti->error = "Invalid delay"; - goto bad; + if (argc == 3) { + ret = delay_class_ctr(ti, &dc->write, argv); + if (ret) + goto bad; + ret = delay_class_ctr(ti, &dc->flush, argv); + if (ret) + goto bad; + goto out; } - ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), - &dc->dev_read); - if (ret) { - ti->error = "Device lookup failed"; + ret = delay_class_ctr(ti, &dc->write, argv + 3); + if (ret) goto bad; - } - - ret = -EINVAL; - dc->dev_write = NULL; - if (argc == 3) + if (argc == 6) { + ret = delay_class_ctr(ti, &dc->flush, argv + 3); + if (ret) + goto bad; goto out; - - if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1) { - ti->error = "Invalid write device sector"; - goto bad_dev_read; } - dc->start_write = tmpll; - if (sscanf(argv[5], "%u%c", &dc->write_delay, &dummy) != 1) { - ti->error = "Invalid write delay"; - goto bad_dev_read; - } - - ret = dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), - &dc->dev_write); - if (ret) { - ti->error = "Write device lookup failed"; - goto bad_dev_read; - } + ret = delay_class_ctr(ti, &dc->flush, argv + 6); + if (ret) + goto bad; out: - ret = -EINVAL; dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0); if (!dc->kdelayd_wq) { + ret = -EINVAL; DMERR("Couldn't start kdelayd"); - goto bad_queue; + goto bad; } - timer_setup(&dc->delay_timer, handle_delayed_timer, 0); - - INIT_WORK(&dc->flush_expired_bios, flush_expired_bios); - INIT_LIST_HEAD(&dc->delayed_bios); - mutex_init(&dc->timer_lock); - atomic_set(&dc->may_delay, 1); - ti->num_flush_bios = 1; ti->num_discard_bios = 1; ti->per_io_data_size = sizeof(struct dm_delay_info); - ti->private = dc; return 0; -bad_queue: - if (dc->dev_write) - dm_put_device(ti, dc->dev_write); -bad_dev_read: - dm_put_device(ti, dc->dev_read); bad: - kfree(dc); + delay_dtr(ti); return ret; } -static void delay_dtr(struct dm_target *ti) -{ - struct delay_c *dc = ti->private; - - destroy_workqueue(dc->kdelayd_wq); - - dm_put_device(ti, dc->dev_read); - - if (dc->dev_write) - dm_put_device(ti, dc->dev_write); - - mutex_destroy(&dc->timer_lock); - - kfree(dc); -} - -static int delay_bio(struct delay_c *dc, int delay, struct bio *bio) +static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio) { struct dm_delay_info *delayed; unsigned long expires = 0; - if (!delay || !atomic_read(&dc->may_delay)) + if (!c->delay || !atomic_read(&dc->may_delay)) return DM_MAPIO_REMAPPED; delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info)); delayed->context = dc; - delayed->expires = expires = jiffies + msecs_to_jiffies(delay); + delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay); mutex_lock(&delayed_bios_lock); - - if (bio_data_dir(bio) == WRITE) - dc->writes++; - else - dc->reads++; - + c->ops++; list_add_tail(&delayed->list, &dc->delayed_bios); - mutex_unlock(&delayed_bios_lock); queue_timeout(dc, expires); @@ -282,23 +281,28 @@ static void delay_resume(struct dm_target *ti) static int delay_map(struct dm_target *ti, struct bio *bio) { struct delay_c *dc = ti->private; - - if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) { - bio_set_dev(bio, dc->dev_write->bdev); - if (bio_sectors(bio)) - bio->bi_iter.bi_sector = dc->start_write + - dm_target_offset(ti, bio->bi_iter.bi_sector); - - return delay_bio(dc, dc->write_delay, bio); + struct delay_class *c; + struct dm_delay_info *delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info)); + + if (bio_data_dir(bio) == WRITE) { + if (unlikely(bio->bi_opf & REQ_PREFLUSH)) + c = &dc->flush; + else + c = &dc->write; + } else { + c = &dc->read; } + delayed->class = c; + bio_set_dev(bio, c->dev->bdev); + if (bio_sectors(bio)) + bio->bi_iter.bi_sector = c->start + dm_target_offset(ti, bio->bi_iter.bi_sector); - bio_set_dev(bio, dc->dev_read->bdev); - bio->bi_iter.bi_sector = dc->start_read + - dm_target_offset(ti, bio->bi_iter.bi_sector); - - return delay_bio(dc, dc->read_delay, bio); + return delay_bio(dc, c, bio); } +#define DMEMIT_DELAY_CLASS(c) \ + DMEMIT("%s %llu %u", (c)->dev->name, (unsigned long long)(c)->start, (c)->delay) + static void delay_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { @@ -307,17 +311,19 @@ static void delay_status(struct dm_target *ti, status_type_t type, switch (type) { case STATUSTYPE_INFO: - DMEMIT("%u %u", dc->reads, dc->writes); + DMEMIT("%u %u %u", dc->read.ops, dc->write.ops, dc->flush.ops); break; case STATUSTYPE_TABLE: - DMEMIT("%s %llu %u", dc->dev_read->name, - (unsigned long long) dc->start_read, - dc->read_delay); - if (dc->dev_write) - DMEMIT(" %s %llu %u", dc->dev_write->name, - (unsigned long long) dc->start_write, - dc->write_delay); + DMEMIT_DELAY_CLASS(&dc->read); + if (dc->argc >= 6) { + DMEMIT(" "); + DMEMIT_DELAY_CLASS(&dc->write); + } + if (dc->argc >= 9) { + DMEMIT(" "); + DMEMIT_DELAY_CLASS(&dc->flush); + } break; } } @@ -328,12 +334,15 @@ static int delay_iterate_devices(struct dm_target *ti, struct delay_c *dc = ti->private; int ret = 0; - ret = fn(ti, dc->dev_read, dc->start_read, ti->len, data); + ret = fn(ti, dc->read.dev, dc->read.start, ti->len, data); + if (ret) + goto out; + ret = fn(ti, dc->write.dev, dc->write.start, ti->len, data); + if (ret) + goto out; + ret = fn(ti, dc->flush.dev, dc->flush.start, ti->len, data); if (ret) goto out; - - if (dc->dev_write) - ret = fn(ti, dc->dev_write, dc->start_write, ti->len, data); out: return ret; diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 86438b2f10dd..378878599466 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -31,6 +31,8 @@ #define MIN_LOG2_INTERLEAVE_SECTORS 3 #define MAX_LOG2_INTERLEAVE_SECTORS 31 #define METADATA_WORKQUEUE_MAX_ACTIVE 16 +#define RECALC_SECTORS 8192 +#define RECALC_WRITE_SUPER 16 /* * Warning - DEBUG_PRINT prints security-sensitive data to the log, @@ -44,7 +46,8 @@ */ #define SB_MAGIC "integrt" -#define SB_VERSION 1 +#define SB_VERSION_1 1 +#define SB_VERSION_2 2 #define SB_SECTORS 8 #define MAX_SECTORS_PER_BLOCK 8 @@ -57,9 +60,12 @@ struct superblock { __u64 provided_data_sectors; /* userspace uses this value */ __u32 flags; __u8 log2_sectors_per_block; + __u8 pad[3]; + __u64 recalc_sector; }; #define SB_FLAG_HAVE_JOURNAL_MAC 0x1 +#define SB_FLAG_RECALCULATING 0x2 #define JOURNAL_ENTRY_ROUNDUP 8 @@ -139,6 +145,7 @@ struct alg_spec { struct dm_integrity_c { struct dm_dev *dev; + struct dm_dev *meta_dev; unsigned tag_size; __s8 log2_tag_size; sector_t start; @@ -170,7 +177,8 @@ struct dm_integrity_c { unsigned short journal_section_sectors; unsigned journal_sections; unsigned journal_entries; - sector_t device_sectors; + sector_t data_device_sectors; + sector_t meta_device_sectors; unsigned initial_sectors; unsigned metadata_run; __s8 log2_metadata_run; @@ -178,7 +186,7 @@ struct dm_integrity_c { __u8 sectors_per_block; unsigned char mode; - bool suspending; + int suspending; int failed; @@ -186,6 +194,7 @@ struct dm_integrity_c { /* these variables are locked with endio_wait.lock */ struct rb_root in_progress; + struct list_head wait_list; wait_queue_head_t endio_wait; struct workqueue_struct *wait_wq; @@ -210,6 +219,11 @@ struct dm_integrity_c { struct workqueue_struct *writer_wq; struct work_struct writer_work; + struct workqueue_struct *recalc_wq; + struct work_struct recalc_work; + u8 *recalc_buffer; + u8 *recalc_tags; + struct bio_list flush_bio_list; unsigned long autocommit_jiffies; @@ -233,7 +247,14 @@ struct dm_integrity_c { struct dm_integrity_range { sector_t logical_sector; unsigned n_sectors; - struct rb_node node; + bool waiting; + union { + struct rb_node node; + struct { + struct task_struct *task; + struct list_head wait_entry; + }; + }; }; struct dm_integrity_io { @@ -337,10 +358,14 @@ static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned i, static void get_area_and_offset(struct dm_integrity_c *ic, sector_t data_sector, sector_t *area, sector_t *offset) { - __u8 log2_interleave_sectors = ic->sb->log2_interleave_sectors; - - *area = data_sector >> log2_interleave_sectors; - *offset = (unsigned)data_sector & ((1U << log2_interleave_sectors) - 1); + if (!ic->meta_dev) { + __u8 log2_interleave_sectors = ic->sb->log2_interleave_sectors; + *area = data_sector >> log2_interleave_sectors; + *offset = (unsigned)data_sector & ((1U << log2_interleave_sectors) - 1); + } else { + *area = 0; + *offset = data_sector; + } } #define sector_to_block(ic, n) \ @@ -379,6 +404,9 @@ static sector_t get_data_sector(struct dm_integrity_c *ic, sector_t area, sector { sector_t result; + if (ic->meta_dev) + return offset; + result = area << ic->sb->log2_interleave_sectors; if (likely(ic->log2_metadata_run >= 0)) result += (area + 1) << ic->log2_metadata_run; @@ -386,6 +414,8 @@ static sector_t get_data_sector(struct dm_integrity_c *ic, sector_t area, sector result += (area + 1) * ic->metadata_run; result += (sector_t)ic->initial_sectors + offset; + result += ic->start; + return result; } @@ -395,6 +425,14 @@ static void wraparound_section(struct dm_integrity_c *ic, unsigned *sec_ptr) *sec_ptr -= ic->journal_sections; } +static void sb_set_version(struct dm_integrity_c *ic) +{ + if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) + ic->sb->version = SB_VERSION_2; + else + ic->sb->version = SB_VERSION_1; +} + static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags) { struct dm_io_request io_req; @@ -406,7 +444,7 @@ static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags) io_req.mem.ptr.addr = ic->sb; io_req.notify.fn = NULL; io_req.client = ic->io; - io_loc.bdev = ic->dev->bdev; + io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev; io_loc.sector = ic->start; io_loc.count = SB_SECTORS; @@ -753,7 +791,7 @@ static void rw_journal(struct dm_integrity_c *ic, int op, int op_flags, unsigned io_req.notify.fn = NULL; } io_req.client = ic->io; - io_loc.bdev = ic->dev->bdev; + io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev; io_loc.sector = ic->start + SB_SECTORS + sector; io_loc.count = n_sectors; @@ -857,7 +895,7 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsig io_req.notify.context = data; io_req.client = ic->io; io_loc.bdev = ic->dev->bdev; - io_loc.sector = ic->start + target; + io_loc.sector = target; io_loc.count = n_sectors; r = dm_io(&io_req, 1, &io_loc, NULL); @@ -867,13 +905,27 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsig } } -static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range) +static bool ranges_overlap(struct dm_integrity_range *range1, struct dm_integrity_range *range2) +{ + return range1->logical_sector < range2->logical_sector + range2->n_sectors && + range2->logical_sector + range2->n_sectors > range2->logical_sector; +} + +static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range, bool check_waiting) { struct rb_node **n = &ic->in_progress.rb_node; struct rb_node *parent; BUG_ON((new_range->logical_sector | new_range->n_sectors) & (unsigned)(ic->sectors_per_block - 1)); + if (likely(check_waiting)) { + struct dm_integrity_range *range; + list_for_each_entry(range, &ic->wait_list, wait_entry) { + if (unlikely(ranges_overlap(range, new_range))) + return false; + } + } + parent = NULL; while (*n) { @@ -898,7 +950,22 @@ static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range * static void remove_range_unlocked(struct dm_integrity_c *ic, struct dm_integrity_range *range) { rb_erase(&range->node, &ic->in_progress); - wake_up_locked(&ic->endio_wait); + while (unlikely(!list_empty(&ic->wait_list))) { + struct dm_integrity_range *last_range = + list_first_entry(&ic->wait_list, struct dm_integrity_range, wait_entry); + struct task_struct *last_range_task; + if (!ranges_overlap(range, last_range)) + break; + last_range_task = last_range->task; + list_del(&last_range->wait_entry); + if (!add_new_range(ic, last_range, false)) { + last_range->task = last_range_task; + list_add(&last_range->wait_entry, &ic->wait_list); + break; + } + last_range->waiting = false; + wake_up_process(last_range_task); + } } static void remove_range(struct dm_integrity_c *ic, struct dm_integrity_range *range) @@ -910,6 +977,19 @@ static void remove_range(struct dm_integrity_c *ic, struct dm_integrity_range *r spin_unlock_irqrestore(&ic->endio_wait.lock, flags); } +static void wait_and_add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range) +{ + new_range->waiting = true; + list_add_tail(&new_range->wait_entry, &ic->wait_list); + new_range->task = current; + do { + __set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock_irq(&ic->endio_wait.lock); + io_schedule(); + spin_lock_irq(&ic->endio_wait.lock); + } while (unlikely(new_range->waiting)); +} + static void init_journal_node(struct journal_node *node) { RB_CLEAR_NODE(&node->node); @@ -1599,8 +1679,12 @@ retry: dio->range.n_sectors = min(dio->range.n_sectors, ic->free_sectors << ic->sb->log2_sectors_per_block); - if (unlikely(!dio->range.n_sectors)) - goto sleep; + if (unlikely(!dio->range.n_sectors)) { + if (from_map) + goto offload_to_thread; + sleep_on_endio_wait(ic); + goto retry; + } range_sectors = dio->range.n_sectors >> ic->sb->log2_sectors_per_block; ic->free_sectors -= range_sectors; journal_section = ic->free_section; @@ -1654,22 +1738,20 @@ retry: } } } - if (unlikely(!add_new_range(ic, &dio->range))) { + if (unlikely(!add_new_range(ic, &dio->range, true))) { /* * We must not sleep in the request routine because it could * stall bios on current->bio_list. * So, we offload the bio to a workqueue if we have to sleep. */ -sleep: if (from_map) { +offload_to_thread: spin_unlock_irq(&ic->endio_wait.lock); INIT_WORK(&dio->work, integrity_bio_wait); queue_work(ic->wait_wq, &dio->work); return; - } else { - sleep_on_endio_wait(ic); - goto retry; } + wait_and_add_new_range(ic, &dio->range); } spin_unlock_irq(&ic->endio_wait.lock); @@ -1701,14 +1783,18 @@ sleep: bio->bi_end_io = integrity_end_io; bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT; - bio->bi_iter.bi_sector += ic->start; generic_make_request(bio); if (need_sync_io) { wait_for_completion_io(&read_comp); + if (unlikely(ic->recalc_wq != NULL) && + ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) && + dio->range.logical_sector + dio->range.n_sectors > le64_to_cpu(ic->sb->recalc_sector)) + goto skip_check; if (likely(!bio->bi_status)) integrity_metadata(&dio->work); else +skip_check: dec_in_flight(dio); } else { @@ -1892,8 +1978,8 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block; spin_lock_irq(&ic->endio_wait.lock); - while (unlikely(!add_new_range(ic, &io->range))) - sleep_on_endio_wait(ic); + if (unlikely(!add_new_range(ic, &io->range, true))) + wait_and_add_new_range(ic, &io->range); if (likely(!from_replay)) { struct journal_node *section_node = &ic->journal_tree[i * ic->journal_section_entries]; @@ -1981,7 +2067,7 @@ static void integrity_writer(struct work_struct *w) unsigned prev_free_sectors; /* the following test is not needed, but it tests the replay code */ - if (READ_ONCE(ic->suspending)) + if (READ_ONCE(ic->suspending) && !ic->meta_dev) return; spin_lock_irq(&ic->endio_wait.lock); @@ -2008,6 +2094,108 @@ static void integrity_writer(struct work_struct *w) spin_unlock_irq(&ic->endio_wait.lock); } +static void recalc_write_super(struct dm_integrity_c *ic) +{ + int r; + + dm_integrity_flush_buffers(ic); + if (dm_integrity_failed(ic)) + return; + + sb_set_version(ic); + r = sync_rw_sb(ic, REQ_OP_WRITE, 0); + if (unlikely(r)) + dm_integrity_io_error(ic, "writing superblock", r); +} + +static void integrity_recalc(struct work_struct *w) +{ + struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, recalc_work); + struct dm_integrity_range range; + struct dm_io_request io_req; + struct dm_io_region io_loc; + sector_t area, offset; + sector_t metadata_block; + unsigned metadata_offset; + __u8 *t; + unsigned i; + int r; + unsigned super_counter = 0; + + spin_lock_irq(&ic->endio_wait.lock); + +next_chunk: + + if (unlikely(READ_ONCE(ic->suspending))) + goto unlock_ret; + + range.logical_sector = le64_to_cpu(ic->sb->recalc_sector); + if (unlikely(range.logical_sector >= ic->provided_data_sectors)) + goto unlock_ret; + + get_area_and_offset(ic, range.logical_sector, &area, &offset); + range.n_sectors = min((sector_t)RECALC_SECTORS, ic->provided_data_sectors - range.logical_sector); + if (!ic->meta_dev) + range.n_sectors = min(range.n_sectors, (1U << ic->sb->log2_interleave_sectors) - (unsigned)offset); + + if (unlikely(!add_new_range(ic, &range, true))) + wait_and_add_new_range(ic, &range); + + spin_unlock_irq(&ic->endio_wait.lock); + + if (unlikely(++super_counter == RECALC_WRITE_SUPER)) { + recalc_write_super(ic); + super_counter = 0; + } + + if (unlikely(dm_integrity_failed(ic))) + goto err; + + io_req.bi_op = REQ_OP_READ; + io_req.bi_op_flags = 0; + io_req.mem.type = DM_IO_VMA; + io_req.mem.ptr.addr = ic->recalc_buffer; + io_req.notify.fn = NULL; + io_req.client = ic->io; + io_loc.bdev = ic->dev->bdev; + io_loc.sector = get_data_sector(ic, area, offset); + io_loc.count = range.n_sectors; + + r = dm_io(&io_req, 1, &io_loc, NULL); + if (unlikely(r)) { + dm_integrity_io_error(ic, "reading data", r); + goto err; + } + + t = ic->recalc_tags; + for (i = 0; i < range.n_sectors; i += ic->sectors_per_block) { + integrity_sector_checksum(ic, range.logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t); + t += ic->tag_size; + } + + metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset); + + r = dm_integrity_rw_tag(ic, ic->recalc_tags, &metadata_block, &metadata_offset, t - ic->recalc_tags, TAG_WRITE); + if (unlikely(r)) { + dm_integrity_io_error(ic, "writing tags", r); + goto err; + } + + spin_lock_irq(&ic->endio_wait.lock); + remove_range_unlocked(ic, &range); + ic->sb->recalc_sector = cpu_to_le64(range.logical_sector + range.n_sectors); + goto next_chunk; + +err: + remove_range(ic, &range); + return; + +unlock_ret: + spin_unlock_irq(&ic->endio_wait.lock); + + recalc_write_super(ic); +} + static void init_journal(struct dm_integrity_c *ic, unsigned start_section, unsigned n_sections, unsigned char commit_seq) { @@ -2210,17 +2398,22 @@ static void dm_integrity_postsuspend(struct dm_target *ti) del_timer_sync(&ic->autocommit_timer); - ic->suspending = true; + WRITE_ONCE(ic->suspending, 1); + + if (ic->recalc_wq) + drain_workqueue(ic->recalc_wq); queue_work(ic->commit_wq, &ic->commit_work); drain_workqueue(ic->commit_wq); if (ic->mode == 'J') { + if (ic->meta_dev) + queue_work(ic->writer_wq, &ic->writer_work); drain_workqueue(ic->writer_wq); dm_integrity_flush_buffers(ic); } - ic->suspending = false; + WRITE_ONCE(ic->suspending, 0); BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); @@ -2232,6 +2425,16 @@ static void dm_integrity_resume(struct dm_target *ti) struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private; replay_journal(ic); + + if (ic->recalc_wq && ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { + __u64 recalc_pos = le64_to_cpu(ic->sb->recalc_sector); + if (recalc_pos < ic->provided_data_sectors) { + queue_work(ic->recalc_wq, &ic->recalc_work); + } else if (recalc_pos > ic->provided_data_sectors) { + ic->sb->recalc_sector = cpu_to_le64(ic->provided_data_sectors); + recalc_write_super(ic); + } + } } static void dm_integrity_status(struct dm_target *ti, status_type_t type, @@ -2243,7 +2446,13 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, switch (type) { case STATUSTYPE_INFO: - DMEMIT("%llu", (unsigned long long)atomic64_read(&ic->number_of_mismatches)); + DMEMIT("%llu %llu", + (unsigned long long)atomic64_read(&ic->number_of_mismatches), + (unsigned long long)ic->provided_data_sectors); + if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) + DMEMIT(" %llu", (unsigned long long)le64_to_cpu(ic->sb->recalc_sector)); + else + DMEMIT(" -"); break; case STATUSTYPE_TABLE: { @@ -2251,19 +2460,25 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, watermark_percentage += ic->journal_entries / 2; do_div(watermark_percentage, ic->journal_entries); arg_count = 5; + arg_count += !!ic->meta_dev; arg_count += ic->sectors_per_block != 1; + arg_count += !!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)); arg_count += !!ic->internal_hash_alg.alg_string; arg_count += !!ic->journal_crypt_alg.alg_string; arg_count += !!ic->journal_mac_alg.alg_string; DMEMIT("%s %llu %u %c %u", ic->dev->name, (unsigned long long)ic->start, ic->tag_size, ic->mode, arg_count); + if (ic->meta_dev) + DMEMIT(" meta_device:%s", ic->meta_dev->name); + if (ic->sectors_per_block != 1) + DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT); + if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) + DMEMIT(" recalculate"); DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS); DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors); DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors); DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage); DMEMIT(" commit_time:%u", ic->autocommit_msec); - if (ic->sectors_per_block != 1) - DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT); #define EMIT_ALG(a, n) \ do { \ @@ -2286,7 +2501,10 @@ static int dm_integrity_iterate_devices(struct dm_target *ti, { struct dm_integrity_c *ic = ti->private; - return fn(ti, ic->dev, ic->start + ic->initial_sectors + ic->metadata_run, ti->len, data); + if (!ic->meta_dev) + return fn(ti, ic->dev, ic->start + ic->initial_sectors + ic->metadata_run, ti->len, data); + else + return fn(ti, ic->dev, 0, ti->len, data); } static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *limits) @@ -2319,26 +2537,38 @@ static void calculate_journal_section_size(struct dm_integrity_c *ic) static int calculate_device_limits(struct dm_integrity_c *ic) { __u64 initial_sectors; - sector_t last_sector, last_area, last_offset; calculate_journal_section_size(ic); initial_sectors = SB_SECTORS + (__u64)ic->journal_section_sectors * ic->journal_sections; - if (initial_sectors + METADATA_PADDING_SECTORS >= ic->device_sectors || initial_sectors > UINT_MAX) + if (initial_sectors + METADATA_PADDING_SECTORS >= ic->meta_device_sectors || initial_sectors > UINT_MAX) return -EINVAL; ic->initial_sectors = initial_sectors; - ic->metadata_run = roundup((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block), - (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS)) >> SECTOR_SHIFT; - if (!(ic->metadata_run & (ic->metadata_run - 1))) - ic->log2_metadata_run = __ffs(ic->metadata_run); - else - ic->log2_metadata_run = -1; + if (!ic->meta_dev) { + sector_t last_sector, last_area, last_offset; - get_area_and_offset(ic, ic->provided_data_sectors - 1, &last_area, &last_offset); - last_sector = get_data_sector(ic, last_area, last_offset); + ic->metadata_run = roundup((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block), + (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS)) >> SECTOR_SHIFT; + if (!(ic->metadata_run & (ic->metadata_run - 1))) + ic->log2_metadata_run = __ffs(ic->metadata_run); + else + ic->log2_metadata_run = -1; - if (ic->start + last_sector < last_sector || ic->start + last_sector >= ic->device_sectors) - return -EINVAL; + get_area_and_offset(ic, ic->provided_data_sectors - 1, &last_area, &last_offset); + last_sector = get_data_sector(ic, last_area, last_offset); + if (last_sector < ic->start || last_sector >= ic->meta_device_sectors) + return -EINVAL; + } else { + __u64 meta_size = ic->provided_data_sectors * ic->tag_size; + meta_size = (meta_size + ((1U << (ic->log2_buffer_sectors + SECTOR_SHIFT)) - 1)) + >> (ic->log2_buffer_sectors + SECTOR_SHIFT); + meta_size <<= ic->log2_buffer_sectors; + if (ic->initial_sectors + meta_size < ic->initial_sectors || + ic->initial_sectors + meta_size > ic->meta_device_sectors) + return -EINVAL; + ic->metadata_run = 1; + ic->log2_metadata_run = 0; + } return 0; } @@ -2350,7 +2580,6 @@ static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sec memset(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT); memcpy(ic->sb->magic, SB_MAGIC, 8); - ic->sb->version = SB_VERSION; ic->sb->integrity_tag_size = cpu_to_le16(ic->tag_size); ic->sb->log2_sectors_per_block = __ffs(ic->sectors_per_block); if (ic->journal_mac_alg.alg_string) @@ -2360,28 +2589,55 @@ static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sec journal_sections = journal_sectors / ic->journal_section_sectors; if (!journal_sections) journal_sections = 1; - ic->sb->journal_sections = cpu_to_le32(journal_sections); - if (!interleave_sectors) - interleave_sectors = DEFAULT_INTERLEAVE_SECTORS; - ic->sb->log2_interleave_sectors = __fls(interleave_sectors); - ic->sb->log2_interleave_sectors = max((__u8)MIN_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); - ic->sb->log2_interleave_sectors = min((__u8)MAX_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); - - ic->provided_data_sectors = 0; - for (test_bit = fls64(ic->device_sectors) - 1; test_bit >= 3; test_bit--) { - __u64 prev_data_sectors = ic->provided_data_sectors; + if (!ic->meta_dev) { + ic->sb->journal_sections = cpu_to_le32(journal_sections); + if (!interleave_sectors) + interleave_sectors = DEFAULT_INTERLEAVE_SECTORS; + ic->sb->log2_interleave_sectors = __fls(interleave_sectors); + ic->sb->log2_interleave_sectors = max((__u8)MIN_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); + ic->sb->log2_interleave_sectors = min((__u8)MAX_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); + + ic->provided_data_sectors = 0; + for (test_bit = fls64(ic->meta_device_sectors) - 1; test_bit >= 3; test_bit--) { + __u64 prev_data_sectors = ic->provided_data_sectors; + + ic->provided_data_sectors |= (sector_t)1 << test_bit; + if (calculate_device_limits(ic)) + ic->provided_data_sectors = prev_data_sectors; + } + if (!ic->provided_data_sectors) + return -EINVAL; + } else { + ic->sb->log2_interleave_sectors = 0; + ic->provided_data_sectors = ic->data_device_sectors; + ic->provided_data_sectors &= ~(sector_t)(ic->sectors_per_block - 1); + +try_smaller_buffer: + ic->sb->journal_sections = cpu_to_le32(0); + for (test_bit = fls(journal_sections) - 1; test_bit >= 0; test_bit--) { + __u32 prev_journal_sections = le32_to_cpu(ic->sb->journal_sections); + __u32 test_journal_sections = prev_journal_sections | (1U << test_bit); + if (test_journal_sections > journal_sections) + continue; + ic->sb->journal_sections = cpu_to_le32(test_journal_sections); + if (calculate_device_limits(ic)) + ic->sb->journal_sections = cpu_to_le32(prev_journal_sections); - ic->provided_data_sectors |= (sector_t)1 << test_bit; - if (calculate_device_limits(ic)) - ic->provided_data_sectors = prev_data_sectors; + } + if (!le32_to_cpu(ic->sb->journal_sections)) { + if (ic->log2_buffer_sectors > 3) { + ic->log2_buffer_sectors--; + goto try_smaller_buffer; + } + return -EINVAL; + } } - if (!ic->provided_data_sectors) - return -EINVAL; - ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors); + sb_set_version(ic); + return 0; } @@ -2828,6 +3084,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) {0, 9, "Invalid number of feature args"}, }; unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; + bool recalculate; bool should_write_sb; __u64 threshold; unsigned long long start; @@ -2848,6 +3105,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->per_io_data_size = sizeof(struct dm_integrity_io); ic->in_progress = RB_ROOT; + INIT_LIST_HEAD(&ic->wait_list); init_waitqueue_head(&ic->endio_wait); bio_list_init(&ic->flush_bio_list); init_waitqueue_head(&ic->copy_to_journal_wait); @@ -2883,13 +3141,12 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } - ic->device_sectors = i_size_read(ic->dev->bdev->bd_inode) >> SECTOR_SHIFT; - journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS, - ic->device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR); + journal_sectors = 0; interleave_sectors = DEFAULT_INTERLEAVE_SECTORS; buffer_sectors = DEFAULT_BUFFER_SECTORS; journal_watermark = DEFAULT_JOURNAL_WATERMARK; sync_msec = DEFAULT_SYNC_MSEC; + recalculate = false; ic->sectors_per_block = 1; as.argc = argc - DIRECT_ARGUMENTS; @@ -2908,7 +3165,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } if (sscanf(opt_string, "journal_sectors:%u%c", &val, &dummy) == 1) - journal_sectors = val; + journal_sectors = val ? val : 1; else if (sscanf(opt_string, "interleave_sectors:%u%c", &val, &dummy) == 1) interleave_sectors = val; else if (sscanf(opt_string, "buffer_sectors:%u%c", &val, &dummy) == 1) @@ -2917,7 +3174,17 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) journal_watermark = val; else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1) sync_msec = val; - else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) { + else if (!memcmp(opt_string, "meta_device:", strlen("meta_device:"))) { + if (ic->meta_dev) { + dm_put_device(ti, ic->meta_dev); + ic->meta_dev = NULL; + } + r = dm_get_device(ti, strchr(opt_string, ':') + 1, dm_table_get_mode(ti->table), &ic->meta_dev); + if (r) { + ti->error = "Device lookup failed"; + goto bad; + } + } else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) { if (val < 1 << SECTOR_SHIFT || val > MAX_SECTORS_PER_BLOCK << SECTOR_SHIFT || (val & (val -1))) { @@ -2941,6 +3208,8 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) "Invalid journal_mac argument"); if (r) goto bad; + } else if (!strcmp(opt_string, "recalculate")) { + recalculate = true; } else { r = -EINVAL; ti->error = "Invalid argument"; @@ -2948,6 +3217,21 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) } } + ic->data_device_sectors = i_size_read(ic->dev->bdev->bd_inode) >> SECTOR_SHIFT; + if (!ic->meta_dev) + ic->meta_device_sectors = ic->data_device_sectors; + else + ic->meta_device_sectors = i_size_read(ic->meta_dev->bdev->bd_inode) >> SECTOR_SHIFT; + + if (!journal_sectors) { + journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS, + ic->data_device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR); + } + + if (!buffer_sectors) + buffer_sectors = 1; + ic->log2_buffer_sectors = min((int)__fls(buffer_sectors), 31 - SECTOR_SHIFT); + r = get_mac(&ic->internal_hash, &ic->internal_hash_alg, &ti->error, "Invalid internal hash", "Error setting internal hash key"); if (r) @@ -3062,7 +3346,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) should_write_sb = true; } - if (ic->sb->version != SB_VERSION) { + if (!ic->sb->version || ic->sb->version > SB_VERSION_2) { r = -EINVAL; ti->error = "Unknown version"; goto bad; @@ -3083,11 +3367,19 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } /* make sure that ti->max_io_len doesn't overflow */ - if (ic->sb->log2_interleave_sectors < MIN_LOG2_INTERLEAVE_SECTORS || - ic->sb->log2_interleave_sectors > MAX_LOG2_INTERLEAVE_SECTORS) { - r = -EINVAL; - ti->error = "Invalid interleave_sectors in the superblock"; - goto bad; + if (!ic->meta_dev) { + if (ic->sb->log2_interleave_sectors < MIN_LOG2_INTERLEAVE_SECTORS || + ic->sb->log2_interleave_sectors > MAX_LOG2_INTERLEAVE_SECTORS) { + r = -EINVAL; + ti->error = "Invalid interleave_sectors in the superblock"; + goto bad; + } + } else { + if (ic->sb->log2_interleave_sectors) { + r = -EINVAL; + ti->error = "Invalid interleave_sectors in the superblock"; + goto bad; + } } ic->provided_data_sectors = le64_to_cpu(ic->sb->provided_data_sectors); if (ic->provided_data_sectors != le64_to_cpu(ic->sb->provided_data_sectors)) { @@ -3101,20 +3393,28 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->error = "Journal mac mismatch"; goto bad; } + +try_smaller_buffer: r = calculate_device_limits(ic); if (r) { + if (ic->meta_dev) { + if (ic->log2_buffer_sectors > 3) { + ic->log2_buffer_sectors--; + goto try_smaller_buffer; + } + } ti->error = "The device is too small"; goto bad; } + if (!ic->meta_dev) + ic->log2_buffer_sectors = min(ic->log2_buffer_sectors, (__u8)__ffs(ic->metadata_run)); + if (ti->len > ic->provided_data_sectors) { r = -EINVAL; ti->error = "Not enough provided sectors for requested mapping size"; goto bad; } - if (!buffer_sectors) - buffer_sectors = 1; - ic->log2_buffer_sectors = min3((int)__fls(buffer_sectors), (int)__ffs(ic->metadata_run), 31 - SECTOR_SHIFT); threshold = (__u64)ic->journal_entries * (100 - journal_watermark); threshold += 50; @@ -3138,8 +3438,40 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) (unsigned long long)ic->provided_data_sectors); DEBUG_print(" log2_buffer_sectors %u\n", ic->log2_buffer_sectors); - ic->bufio = dm_bufio_client_create(ic->dev->bdev, 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), - 1, 0, NULL, NULL); + if (recalculate && !(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) { + ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); + ic->sb->recalc_sector = cpu_to_le64(0); + } + + if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { + if (!ic->internal_hash) { + r = -EINVAL; + ti->error = "Recalculate is only valid with internal hash"; + goto bad; + } + ic->recalc_wq = alloc_workqueue("dm-intergrity-recalc", WQ_MEM_RECLAIM, 1); + if (!ic->recalc_wq ) { + ti->error = "Cannot allocate workqueue"; + r = -ENOMEM; + goto bad; + } + INIT_WORK(&ic->recalc_work, integrity_recalc); + ic->recalc_buffer = vmalloc(RECALC_SECTORS << SECTOR_SHIFT); + if (!ic->recalc_buffer) { + ti->error = "Cannot allocate buffer for recalculating"; + r = -ENOMEM; + goto bad; + } + ic->recalc_tags = kvmalloc((RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size, GFP_KERNEL); + if (!ic->recalc_tags) { + ti->error = "Cannot allocate tags for recalculating"; + r = -ENOMEM; + goto bad; + } + } + + ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev, + 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL); if (IS_ERR(ic->bufio)) { r = PTR_ERR(ic->bufio); ti->error = "Cannot initialize dm-bufio"; @@ -3171,9 +3503,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) ic->just_formatted = true; } - r = dm_set_target_max_io_len(ti, 1U << ic->sb->log2_interleave_sectors); - if (r) - goto bad; + if (!ic->meta_dev) { + r = dm_set_target_max_io_len(ti, 1U << ic->sb->log2_interleave_sectors); + if (r) + goto bad; + } if (!ic->internal_hash) dm_integrity_set(ti, ic); @@ -3192,6 +3526,7 @@ static void dm_integrity_dtr(struct dm_target *ti) struct dm_integrity_c *ic = ti->private; BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); + BUG_ON(!list_empty(&ic->wait_list)); if (ic->metadata_wq) destroy_workqueue(ic->metadata_wq); @@ -3201,6 +3536,12 @@ static void dm_integrity_dtr(struct dm_target *ti) destroy_workqueue(ic->commit_wq); if (ic->writer_wq) destroy_workqueue(ic->writer_wq); + if (ic->recalc_wq) + destroy_workqueue(ic->recalc_wq); + if (ic->recalc_buffer) + vfree(ic->recalc_buffer); + if (ic->recalc_tags) + kvfree(ic->recalc_tags); if (ic->bufio) dm_bufio_client_destroy(ic->bufio); mempool_exit(&ic->journal_io_mempool); @@ -3208,6 +3549,8 @@ static void dm_integrity_dtr(struct dm_target *ti) dm_io_client_destroy(ic->io); if (ic->dev) dm_put_device(ti, ic->dev); + if (ic->meta_dev) + dm_put_device(ti, ic->meta_dev); dm_integrity_free_page_list(ic, ic->journal); dm_integrity_free_page_list(ic, ic->journal_io); dm_integrity_free_page_list(ic, ic->journal_xor); @@ -3248,7 +3591,7 @@ static void dm_integrity_dtr(struct dm_target *ti) static struct target_type integrity_target = { .name = "integrity", - .version = {1, 1, 0}, + .version = {1, 2, 0}, .module = THIS_MODULE, .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY, .ctr = dm_integrity_ctr, diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 3c7547a3c371..2fc4213e02b5 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -487,6 +487,8 @@ static int run_complete_job(struct kcopyd_job *job) if (atomic_dec_and_test(&kc->nr_jobs)) wake_up(&kc->destroyq); + cond_resched(); + return 0; } @@ -741,9 +743,9 @@ static void split_job(struct kcopyd_job *master_job) } } -int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, - unsigned int num_dests, struct dm_io_region *dests, - unsigned int flags, dm_kcopyd_notify_fn fn, void *context) +void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, + unsigned int num_dests, struct dm_io_region *dests, + unsigned int flags, dm_kcopyd_notify_fn fn, void *context) { struct kcopyd_job *job; int i; @@ -818,16 +820,14 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, job->progress = 0; split_job(job); } - - return 0; } EXPORT_SYMBOL(dm_kcopyd_copy); -int dm_kcopyd_zero(struct dm_kcopyd_client *kc, - unsigned num_dests, struct dm_io_region *dests, - unsigned flags, dm_kcopyd_notify_fn fn, void *context) +void dm_kcopyd_zero(struct dm_kcopyd_client *kc, + unsigned num_dests, struct dm_io_region *dests, + unsigned flags, dm_kcopyd_notify_fn fn, void *context) { - return dm_kcopyd_copy(kc, NULL, num_dests, dests, flags, fn, context); + dm_kcopyd_copy(kc, NULL, num_dests, dests, flags, fn, context); } EXPORT_SYMBOL(dm_kcopyd_zero); diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 5903e492bb34..79eab1071ec2 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -326,9 +326,8 @@ static void recovery_complete(int read_err, unsigned long write_err, dm_rh_recovery_end(reg, !(read_err || write_err)); } -static int recover(struct mirror_set *ms, struct dm_region *reg) +static void recover(struct mirror_set *ms, struct dm_region *reg) { - int r; unsigned i; struct dm_io_region from, to[DM_KCOPYD_MAX_REGIONS], *dest; struct mirror *m; @@ -367,10 +366,8 @@ static int recover(struct mirror_set *ms, struct dm_region *reg) if (!errors_handled(ms)) set_bit(DM_KCOPYD_IGNORE_ERROR, &flags); - r = dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to, - flags, recovery_complete, reg); - - return r; + dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to, + flags, recovery_complete, reg); } static void reset_ms_flags(struct mirror_set *ms) @@ -388,7 +385,6 @@ static void do_recovery(struct mirror_set *ms) { struct dm_region *reg; struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); - int r; /* * Start quiescing some regions. @@ -398,11 +394,8 @@ static void do_recovery(struct mirror_set *ms) /* * Copy any already quiesced regions. */ - while ((reg = dm_rh_recovery_start(ms->rh))) { - r = recover(ms, reg); - if (r) - dm_rh_recovery_end(reg, 0); - } + while ((reg = dm_rh_recovery_start(ms->rh))) + recover(ms, reg); /* * Update the in sync flag. diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 97de7a7334d4..ae4b33d10924 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -85,7 +85,7 @@ struct dm_snapshot { * A list of pending exceptions that completed out of order. * Protected by kcopyd single-threaded callback. */ - struct list_head out_of_order_list; + struct rb_root out_of_order_tree; mempool_t pending_pool; @@ -200,7 +200,7 @@ struct dm_snap_pending_exception { /* A sequence number, it is used for in-order completion. */ sector_t exception_sequence; - struct list_head out_of_order_entry; + struct rb_node out_of_order_node; /* * For writing a complete chunk, bypassing the copy. @@ -1173,7 +1173,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) atomic_set(&s->pending_exceptions_count, 0); s->exception_start_sequence = 0; s->exception_complete_sequence = 0; - INIT_LIST_HEAD(&s->out_of_order_list); + s->out_of_order_tree = RB_ROOT; mutex_init(&s->lock); INIT_LIST_HEAD(&s->list); spin_lock_init(&s->pe_lock); @@ -1539,28 +1539,41 @@ static void copy_callback(int read_err, unsigned long write_err, void *context) pe->copy_error = read_err || write_err; if (pe->exception_sequence == s->exception_complete_sequence) { + struct rb_node *next; + s->exception_complete_sequence++; complete_exception(pe); - while (!list_empty(&s->out_of_order_list)) { - pe = list_entry(s->out_of_order_list.next, - struct dm_snap_pending_exception, out_of_order_entry); + next = rb_first(&s->out_of_order_tree); + while (next) { + pe = rb_entry(next, struct dm_snap_pending_exception, + out_of_order_node); if (pe->exception_sequence != s->exception_complete_sequence) break; + next = rb_next(next); s->exception_complete_sequence++; - list_del(&pe->out_of_order_entry); + rb_erase(&pe->out_of_order_node, &s->out_of_order_tree); complete_exception(pe); + cond_resched(); } } else { - struct list_head *lh; + struct rb_node *parent = NULL; + struct rb_node **p = &s->out_of_order_tree.rb_node; struct dm_snap_pending_exception *pe2; - list_for_each_prev(lh, &s->out_of_order_list) { - pe2 = list_entry(lh, struct dm_snap_pending_exception, out_of_order_entry); - if (pe2->exception_sequence < pe->exception_sequence) - break; + while (*p) { + pe2 = rb_entry(*p, struct dm_snap_pending_exception, out_of_order_node); + parent = *p; + + BUG_ON(pe->exception_sequence == pe2->exception_sequence); + if (pe->exception_sequence < pe2->exception_sequence) + p = &((*p)->rb_left); + else + p = &((*p)->rb_right); } - list_add(&pe->out_of_order_entry, lh); + + rb_link_node(&pe->out_of_order_node, parent, p); + rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree); } } @@ -1694,8 +1707,6 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) if (!s->valid) return DM_MAPIO_KILL; - /* FIXME: should only take write lock if we need - * to copy an exception */ mutex_lock(&s->lock); if (!s->valid || (unlikely(s->snapshot_overflowed) && diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index b900723bbd0f..7bd60a150f8f 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1220,18 +1220,13 @@ static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool) static void ll_zero(struct thin_c *tc, struct dm_thin_new_mapping *m, sector_t begin, sector_t end) { - int r; struct dm_io_region to; to.bdev = tc->pool_dev->bdev; to.sector = begin; to.count = end - begin; - r = dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m); - if (r < 0) { - DMERR_LIMIT("dm_kcopyd_zero() failed"); - copy_complete(1, 1, m); - } + dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m); } static void remap_and_issue_overwrite(struct thin_c *tc, struct bio *bio, @@ -1257,7 +1252,6 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, struct dm_bio_prison_cell *cell, struct bio *bio, sector_t len) { - int r; struct pool *pool = tc->pool; struct dm_thin_new_mapping *m = get_next_mapping(pool); @@ -1296,19 +1290,8 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, to.sector = data_dest * pool->sectors_per_block; to.count = len; - r = dm_kcopyd_copy(pool->copier, &from, 1, &to, - 0, copy_complete, m); - if (r < 0) { - DMERR_LIMIT("dm_kcopyd_copy() failed"); - copy_complete(1, 1, m); - - /* - * We allow the zero to be issued, to simplify the - * error path. Otherwise we'd need to start - * worrying about decrementing the prepare_actions - * counter. - */ - } + dm_kcopyd_copy(pool->copier, &from, 1, &to, + 0, copy_complete, m); /* * Do we need to zero a tail region? @@ -2520,6 +2503,8 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) case PM_WRITE: if (old_mode != new_mode) notify_of_pool_mode_change(pool, "write"); + if (old_mode == PM_OUT_OF_DATA_SPACE) + cancel_delayed_work_sync(&pool->no_space_timeout); pool->out_of_data_space = false; pool->pf.error_if_no_space = pt->requested_pf.error_if_no_space; dm_pool_metadata_read_write(pool->pmd); @@ -3890,6 +3875,8 @@ static void pool_status(struct dm_target *ti, status_type_t type, else DMEMIT("- "); + DMEMIT("%llu ", (unsigned long long)calc_metadata_threshold(pt)); + break; case STATUSTYPE_TABLE: @@ -3979,7 +3966,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 19, 0}, + .version = {1, 20, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -4353,7 +4340,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type thin_target = { .name = "thin", - .version = {1, 19, 0}, + .version = {1, 20, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 87107c995cb5..3a28a68f184c 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -457,7 +457,7 @@ static void ssd_commit_flushed(struct dm_writecache *wc) COMPLETION_INITIALIZER_ONSTACK(endio.c), ATOMIC_INIT(1), }; - unsigned bitmap_bits = wc->dirty_bitmap_size * BITS_PER_LONG; + unsigned bitmap_bits = wc->dirty_bitmap_size * 8; unsigned i = 0; while (1) { @@ -2240,6 +2240,8 @@ static void writecache_status(struct dm_target *ti, status_type_t type, DMEMIT("%c %s %s %u ", WC_MODE_PMEM(wc) ? 'p' : 's', wc->dev->name, wc->ssd_dev->name, wc->block_size); extra_args = 0; + if (wc->start_sector) + extra_args += 2; if (wc->high_wm_percent_set) extra_args += 2; if (wc->low_wm_percent_set) @@ -2254,6 +2256,8 @@ static void writecache_status(struct dm_target *ti, status_type_t type, extra_args++; DMEMIT("%u", extra_args); + if (wc->start_sector) + DMEMIT(" start_sector %llu", (unsigned long long)wc->start_sector); if (wc->high_wm_percent_set) { x = (uint64_t)wc->freelist_high_watermark * 100; x += wc->n_blocks / 2; @@ -2280,7 +2284,7 @@ static void writecache_status(struct dm_target *ti, status_type_t type, static struct target_type writecache_target = { .name = "writecache", - .version = {1, 1, 0}, + .version = {1, 1, 1}, .module = THIS_MODULE, .ctr = writecache_ctr, .dtr = writecache_dtr, diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index 44a119e12f1a..edf4b95eb075 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -161,10 +161,8 @@ static int dmz_reclaim_copy(struct dmz_reclaim *zrc, /* Copy the valid region */ set_bit(DMZ_RECLAIM_KCOPY, &zrc->flags); - ret = dm_kcopyd_copy(zrc->kc, &src, 1, &dst, flags, - dmz_reclaim_kcopy_end, zrc); - if (ret) - return ret; + dm_kcopyd_copy(zrc->kc, &src, 1, &dst, flags, + dmz_reclaim_kcopy_end, zrc); /* Wait for copy to complete */ wait_on_bit_io(&zrc->flags, DMZ_RECLAIM_KCOPY, diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig index 93397cb05b15..3ce933707828 100644 --- a/drivers/misc/cxl/Kconfig +++ b/drivers/misc/cxl/Kconfig @@ -33,11 +33,3 @@ config CXL CAPI adapters are found in POWER8 based systems. If unsure, say N. - -config CXL_BIMODAL - bool "Support for bi-modal CAPI cards" - depends on HOTPLUG_PCI_POWERNV = y && CXL || HOTPLUG_PCI_POWERNV = m && CXL = m - default y - help - Select this option to enable support for bi-modal CAPI cards, such as - the Mellanox CX-4. diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile index 502d41fc9ea5..5eea61b9584f 100644 --- a/drivers/misc/cxl/Makefile +++ b/drivers/misc/cxl/Makefile @@ -4,7 +4,7 @@ ccflags-$(CONFIG_PPC_WERROR) += -Werror cxl-y += main.o file.o irq.o fault.o native.o cxl-y += context.o sysfs.o pci.o trace.o -cxl-y += vphb.o phb.o api.o cxllib.o +cxl-y += vphb.o api.o cxllib.o cxl-$(CONFIG_PPC_PSERIES) += flash.o guest.o of.o hcalls.o cxl-$(CONFIG_DEBUG_FS) += debugfs.o obj-$(CONFIG_CXL) += cxl.o diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 8fd5ec4d6042..750470ef2049 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -11,7 +11,6 @@ #include <linux/slab.h> #include <linux/file.h> #include <misc/cxl.h> -#include <linux/msi.h> #include <linux/module.h> #include <linux/mount.h> #include <linux/sched/mm.h> @@ -168,21 +167,6 @@ static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num) return 0; } -int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq) -{ - if (*ctx == NULL || *afu_irq == 0) { - *afu_irq = 1; - *ctx = cxl_get_context(pdev); - } else { - (*afu_irq)++; - if (*afu_irq > cxl_get_max_irqs_per_process(pdev)) { - *ctx = list_next_entry(*ctx, extra_irq_contexts); - *afu_irq = 1; - } - } - return cxl_find_afu_irq(*ctx, *afu_irq); -} -/* Exported via cxl_base */ int cxl_set_priv(struct cxl_context *ctx, void *priv) { @@ -310,7 +294,6 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed, if (task) { ctx->pid = get_task_pid(task, PIDTYPE_PID); kernel = false; - ctx->real_mode = false; /* acquire a reference to the task's mm */ ctx->mm = get_task_mm(current); @@ -374,24 +357,6 @@ void cxl_set_master(struct cxl_context *ctx) } EXPORT_SYMBOL_GPL(cxl_set_master); -int cxl_set_translation_mode(struct cxl_context *ctx, bool real_mode) -{ - if (ctx->status == STARTED) { - /* - * We could potentially update the PE and issue an update LLCMD - * to support this, but it doesn't seem to have a good use case - * since it's trivial to just create a second kernel context - * with different translation modes, so until someone convinces - * me otherwise: - */ - return -EBUSY; - } - - ctx->real_mode = real_mode; - return 0; -} -EXPORT_SYMBOL_GPL(cxl_set_translation_mode); - /* wrappers around afu_* file ops which are EXPORTED */ int cxl_fd_open(struct inode *inode, struct file *file) { @@ -573,100 +538,3 @@ ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count) return cxl_ops->read_adapter_vpd(afu->adapter, buf, count); } EXPORT_SYMBOL_GPL(cxl_read_adapter_vpd); - -int cxl_set_max_irqs_per_process(struct pci_dev *dev, int irqs) -{ - struct cxl_afu *afu = cxl_pci_to_afu(dev); - if (IS_ERR(afu)) - return -ENODEV; - - if (irqs > afu->adapter->user_irqs) - return -EINVAL; - - /* Limit user_irqs to prevent the user increasing this via sysfs */ - afu->adapter->user_irqs = irqs; - afu->irqs_max = irqs; - - return 0; -} -EXPORT_SYMBOL_GPL(cxl_set_max_irqs_per_process); - -int cxl_get_max_irqs_per_process(struct pci_dev *dev) -{ - struct cxl_afu *afu = cxl_pci_to_afu(dev); - if (IS_ERR(afu)) - return -ENODEV; - - return afu->irqs_max; -} -EXPORT_SYMBOL_GPL(cxl_get_max_irqs_per_process); - -/* - * This is a special interrupt allocation routine called from the PHB's MSI - * setup function. When capi interrupts are allocated in this manner they must - * still be associated with a running context, but since the MSI APIs have no - * way to specify this we use the default context associated with the device. - * - * The Mellanox CX4 has a hardware limitation that restricts the maximum AFU - * interrupt number, so in order to overcome this their driver informs us of - * the restriction by setting the maximum interrupts per context, and we - * allocate additional contexts as necessary so that we can keep the AFU - * interrupt number within the supported range. - */ -int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) -{ - struct cxl_context *ctx, *new_ctx, *default_ctx; - int remaining; - int rc; - - ctx = default_ctx = cxl_get_context(pdev); - if (WARN_ON(!default_ctx)) - return -ENODEV; - - remaining = nvec; - while (remaining > 0) { - rc = cxl_allocate_afu_irqs(ctx, min(remaining, ctx->afu->irqs_max)); - if (rc) { - pr_warn("%s: Failed to find enough free MSIs\n", pci_name(pdev)); - return rc; - } - remaining -= ctx->afu->irqs_max; - - if (ctx != default_ctx && default_ctx->status == STARTED) { - WARN_ON(cxl_start_context(ctx, - be64_to_cpu(default_ctx->elem->common.wed), - NULL)); - } - - if (remaining > 0) { - new_ctx = cxl_dev_context_init(pdev); - if (IS_ERR(new_ctx)) { - pr_warn("%s: Failed to allocate enough contexts for MSIs\n", pci_name(pdev)); - return -ENOSPC; - } - list_add(&new_ctx->extra_irq_contexts, &ctx->extra_irq_contexts); - ctx = new_ctx; - } - } - - return 0; -} -/* Exported via cxl_base */ - -void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) -{ - struct cxl_context *ctx, *pos, *tmp; - - ctx = cxl_get_context(pdev); - if (WARN_ON(!ctx)) - return; - - cxl_free_afu_irqs(ctx); - list_for_each_entry_safe(pos, tmp, &ctx->extra_irq_contexts, extra_irq_contexts) { - cxl_stop_context(pos); - cxl_free_afu_irqs(pos); - list_del(&pos->extra_irq_contexts); - cxl_release_context(pos); - } -} -/* Exported via cxl_base */ diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c index cd54ce6f6230..7557835cdfcd 100644 --- a/drivers/misc/cxl/base.c +++ b/drivers/misc/cxl/base.c @@ -106,89 +106,6 @@ int cxl_update_properties(struct device_node *dn, } EXPORT_SYMBOL_GPL(cxl_update_properties); -/* - * API calls into the driver that may be called from the PHB code and must be - * built in. - */ -bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu) -{ - bool ret; - struct cxl_calls *calls; - - calls = cxl_calls_get(); - if (!calls) - return false; - - ret = calls->cxl_pci_associate_default_context(dev, afu); - - cxl_calls_put(calls); - - return ret; -} -EXPORT_SYMBOL_GPL(cxl_pci_associate_default_context); - -void cxl_pci_disable_device(struct pci_dev *dev) -{ - struct cxl_calls *calls; - - calls = cxl_calls_get(); - if (!calls) - return; - - calls->cxl_pci_disable_device(dev); - - cxl_calls_put(calls); -} -EXPORT_SYMBOL_GPL(cxl_pci_disable_device); - -int cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq) -{ - int ret; - struct cxl_calls *calls; - - calls = cxl_calls_get(); - if (!calls) - return -EBUSY; - - ret = calls->cxl_next_msi_hwirq(pdev, ctx, afu_irq); - - cxl_calls_put(calls); - - return ret; -} -EXPORT_SYMBOL_GPL(cxl_next_msi_hwirq); - -int cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) -{ - int ret; - struct cxl_calls *calls; - - calls = cxl_calls_get(); - if (!calls) - return false; - - ret = calls->cxl_cx4_setup_msi_irqs(pdev, nvec, type); - - cxl_calls_put(calls); - - return ret; -} -EXPORT_SYMBOL_GPL(cxl_cx4_setup_msi_irqs); - -void cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) -{ - struct cxl_calls *calls; - - calls = cxl_calls_get(); - if (!calls) - return; - - calls->cxl_cx4_teardown_msi_irqs(pdev); - - cxl_calls_put(calls); -} -EXPORT_SYMBOL_GPL(cxl_cx4_teardown_msi_irqs); - static int __init cxl_base_init(void) { struct device_node *np; diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index c6ec872800a2..5fe529b43ebe 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -74,7 +74,6 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master) ctx->pending_afu_err = false; INIT_LIST_HEAD(&ctx->irq_names); - INIT_LIST_HEAD(&ctx->extra_irq_contexts); /* * When we have to destroy all contexts in cxl_context_detach_all() we @@ -96,7 +95,7 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master) */ mutex_lock(&afu->contexts_lock); idr_preload(GFP_KERNEL); - i = idr_alloc(&ctx->afu->contexts_idr, ctx, ctx->afu->adapter->min_pe, + i = idr_alloc(&ctx->afu->contexts_idr, ctx, 0, ctx->afu->num_procs, GFP_NOWAIT); idr_preload_end(); mutex_unlock(&afu->contexts_lock); diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 918d4fb742d1..d1d927ccb589 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -93,11 +93,6 @@ static const cxl_p1_reg_t CXL_PSL_FIR_CNTL = {0x0148}; static const cxl_p1_reg_t CXL_PSL_DSNDCTL = {0x0150}; static const cxl_p1_reg_t CXL_PSL_SNWRALLOC = {0x0158}; static const cxl_p1_reg_t CXL_PSL_TRACE = {0x0170}; -/* XSL registers (Mellanox CX4) */ -static const cxl_p1_reg_t CXL_XSL_Timebase = {0x0100}; -static const cxl_p1_reg_t CXL_XSL_TB_CTLSTAT = {0x0108}; -static const cxl_p1_reg_t CXL_XSL_FEC = {0x0158}; -static const cxl_p1_reg_t CXL_XSL_DSNCTL = {0x0168}; /* PSL registers - CAIA 2 */ static const cxl_p1_reg_t CXL_PSL9_CONTROL = {0x0020}; static const cxl_p1_reg_t CXL_XSL9_INV = {0x0110}; @@ -613,7 +608,6 @@ struct cxl_context { bool pe_inserted; bool master; bool kernel; - bool real_mode; bool pending_irq; bool pending_fault; bool pending_afu_err; @@ -624,14 +618,6 @@ struct cxl_context { struct rcu_head rcu; - /* - * Only used when more interrupts are allocated via - * pci_enable_msix_range than are supported in the default context, to - * use additional contexts to overcome the limitation. i.e. Mellanox - * CX4 only: - */ - struct list_head extra_irq_contexts; - struct mm_struct *mm; u16 tidr; @@ -704,7 +690,6 @@ struct cxl { struct bin_attribute cxl_attr; int adapter_num; int user_irqs; - int min_pe; u64 ps_size; u16 psl_rev; u16 base_image; @@ -865,32 +850,12 @@ static inline bool cxl_is_power9(void) return false; } -static inline bool cxl_is_power9_dd1(void) -{ - if ((pvr_version_is(PVR_POWER9)) && - cpu_has_feature(CPU_FTR_POWER9_DD1)) - return true; - return false; -} - ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf, loff_t off, size_t count); -/* Internal functions wrapped in cxl_base to allow PHB to call them */ -bool _cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu); -void _cxl_pci_disable_device(struct pci_dev *dev); -int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq); -int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); -void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev); struct cxl_calls { void (*cxl_slbia)(struct mm_struct *mm); - bool (*cxl_pci_associate_default_context)(struct pci_dev *dev, struct cxl_afu *afu); - void (*cxl_pci_disable_device)(struct pci_dev *dev); - int (*cxl_next_msi_hwirq)(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq); - int (*cxl_cx4_setup_msi_irqs)(struct pci_dev *pdev, int nvec, int type); - void (*cxl_cx4_teardown_msi_irqs)(struct pci_dev *pdev); - struct module *owner; }; int register_cxl_calls(struct cxl_calls *calls); @@ -955,7 +920,6 @@ int cxl_debugfs_afu_add(struct cxl_afu *afu); void cxl_debugfs_afu_remove(struct cxl_afu *afu); void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter, struct dentry *dir); void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter, struct dentry *dir); -void cxl_debugfs_add_adapter_regs_xsl(struct cxl *adapter, struct dentry *dir); void cxl_debugfs_add_afu_regs_psl9(struct cxl_afu *afu, struct dentry *dir); void cxl_debugfs_add_afu_regs_psl8(struct cxl_afu *afu, struct dentry *dir); @@ -998,11 +962,6 @@ static inline void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter, { } -static inline void cxl_debugfs_add_adapter_regs_xsl(struct cxl *adapter, - struct dentry *dir) -{ -} - static inline void cxl_debugfs_add_afu_regs_psl9(struct cxl_afu *afu, struct dentry *dir) { } diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c index 0bc7c31cf739..5a3f91255258 100644 --- a/drivers/misc/cxl/cxllib.c +++ b/drivers/misc/cxl/cxllib.c @@ -102,10 +102,6 @@ int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg) rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl); if (rc) return rc; - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { - /* workaround for DD1 - nbwind = capiind */ - cfg->dsnctl |= ((u64)0x02 << (63-47)); - } cfg->version = CXL_XSL_CONFIG_CURRENT_VERSION; cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE; diff --git a/drivers/misc/cxl/debugfs.c b/drivers/misc/cxl/debugfs.c index 1643850d2302..a1921d81593a 100644 --- a/drivers/misc/cxl/debugfs.c +++ b/drivers/misc/cxl/debugfs.c @@ -58,11 +58,6 @@ void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter, struct dentry *dir) debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_TRACE)); } -void cxl_debugfs_add_adapter_regs_xsl(struct cxl *adapter, struct dentry *dir) -{ - debugfs_create_io_x64("fec", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_XSL_FEC)); -} - int cxl_debugfs_adapter_add(struct cxl *adapter) { struct dentry *dir; diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c index 93ecc67a0f3b..dc7b34174f85 100644 --- a/drivers/misc/cxl/fault.c +++ b/drivers/misc/cxl/fault.c @@ -33,7 +33,7 @@ static bool sste_matches(struct cxl_sste *sste, struct copro_slb *slb) * This finds a free SSTE for the given SLB, or returns NULL if it's already in * the segment table. */ -static struct cxl_sste* find_free_sste(struct cxl_context *ctx, +static struct cxl_sste *find_free_sste(struct cxl_context *ctx, struct copro_slb *slb) { struct cxl_sste *primary, *sste, *ret = NULL; diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c index 4644f16606a3..3bc0c15d4d85 100644 --- a/drivers/misc/cxl/guest.c +++ b/drivers/misc/cxl/guest.c @@ -623,9 +623,6 @@ static int guest_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u { pr_devel("in %s\n", __func__); - if (ctx->real_mode) - return -EPERM; - ctx->kernel = kernel; if (ctx->afu->current_mode == CXL_MODE_DIRECTED) return attach_afu_directed(ctx, wed, amr); @@ -916,11 +913,6 @@ static int afu_properties_look_ok(struct cxl_afu *afu) return -EINVAL; } - if (afu->crs_len < 0) { - dev_err(&afu->dev, "Unexpected configuration record size value\n"); - return -EINVAL; - } - return 0; } diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index c1ba0d42cbc8..f35406be465a 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -104,11 +104,6 @@ static inline void cxl_slbia_core(struct mm_struct *mm) static struct cxl_calls cxl_calls = { .cxl_slbia = cxl_slbia_core, - .cxl_pci_associate_default_context = _cxl_pci_associate_default_context, - .cxl_pci_disable_device = _cxl_pci_disable_device, - .cxl_next_msi_hwirq = _cxl_next_msi_hwirq, - .cxl_cx4_setup_msi_irqs = _cxl_cx4_setup_msi_irqs, - .cxl_cx4_teardown_msi_irqs = _cxl_cx4_teardown_msi_irqs, .owner = THIS_MODULE, }; @@ -287,7 +282,7 @@ int cxl_adapter_context_get(struct cxl *adapter) int rc; rc = atomic_inc_unless_negative(&adapter->contexts_num); - return rc >= 0 ? 0 : -EBUSY; + return rc ? 0 : -EBUSY; } void cxl_adapter_context_put(struct cxl *adapter) diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 98f867fcef24..c9d5d82dce8e 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -605,6 +605,7 @@ u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9) sr |= CXL_PSL_SR_An_MP; if (mfspr(SPRN_LPCR) & LPCR_TC) sr |= CXL_PSL_SR_An_TC; + if (kernel) { if (!real_mode) sr |= CXL_PSL_SR_An_R; @@ -629,7 +630,7 @@ u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9) static u64 calculate_sr(struct cxl_context *ctx) { - return cxl_calculate_sr(ctx->master, ctx->kernel, ctx->real_mode, + return cxl_calculate_sr(ctx->master, ctx->kernel, false, cxl_is_power9()); } diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 429d6de1dde7..b66d832d3233 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -55,8 +55,6 @@ pci_read_config_byte(dev, vsec + 0xa, dest) #define CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val) \ pci_write_config_byte(dev, vsec + 0xa, val) -#define CXL_WRITE_VSEC_MODE_CONTROL_BUS(bus, devfn, vsec, val) \ - pci_bus_write_config_byte(bus, devfn, vsec + 0xa, val) #define CXL_VSEC_PROTOCOL_MASK 0xe0 #define CXL_VSEC_PROTOCOL_1024TB 0x80 #define CXL_VSEC_PROTOCOL_512TB 0x40 @@ -465,23 +463,21 @@ int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg) /* nMMU_ID Defaults to: b’000001001’*/ xsl_dsnctl |= ((u64)0x09 << (63-28)); - if (!(cxl_is_power9_dd1())) { - /* - * Used to identify CAPI packets which should be sorted into - * the Non-Blocking queues by the PHB. This field should match - * the PHB PBL_NBW_CMPM register - * nbwind=0x03, bits [57:58], must include capi indicator. - * Not supported on P9 DD1. - */ - xsl_dsnctl |= (nbwind << (63-55)); + /* + * Used to identify CAPI packets which should be sorted into + * the Non-Blocking queues by the PHB. This field should match + * the PHB PBL_NBW_CMPM register + * nbwind=0x03, bits [57:58], must include capi indicator. + * Not supported on P9 DD1. + */ + xsl_dsnctl |= (nbwind << (63-55)); - /* - * Upper 16b address bits of ASB_Notify messages sent to the - * system. Need to match the PHB’s ASN Compare/Mask Register. - * Not supported on P9 DD1. - */ - xsl_dsnctl |= asnind; - } + /* + * Upper 16b address bits of ASB_Notify messages sent to the + * system. Need to match the PHB’s ASN Compare/Mask Register. + * Not supported on P9 DD1. + */ + xsl_dsnctl |= asnind; *reg = xsl_dsnctl; return 0; @@ -539,15 +535,8 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter, /* Snoop machines */ cxl_p1_write(adapter, CXL_PSL9_APCDEDALLOC, 0x800F000200000000ULL); - if (cxl_is_power9_dd1()) { - /* Disabling deadlock counter CAR */ - cxl_p1_write(adapter, CXL_PSL9_GP_CT, 0x0020000000000001ULL); - /* Enable NORST */ - cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0x8000000000000000ULL); - } else { - /* Enable NORST and DD2 features */ - cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0xC000000000000000ULL); - } + /* Enable NORST and DD2 features */ + cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0xC000000000000000ULL); /* * Check if PSL has data-cache. We need to flush adapter datacache @@ -595,27 +584,7 @@ static int init_implementation_adapter_regs_psl8(struct cxl *adapter, struct pci return 0; } -static int init_implementation_adapter_regs_xsl(struct cxl *adapter, struct pci_dev *dev) -{ - u64 xsl_dsnctl; - u64 chipid; - u32 phb_index; - u64 capp_unit_id; - int rc; - - rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id); - if (rc) - return rc; - - /* Tell XSL where to route data to */ - xsl_dsnctl = 0x0000600000000000ULL | (chipid << (63-5)); - xsl_dsnctl |= (capp_unit_id << (63-13)); - cxl_p1_write(adapter, CXL_XSL_DSNCTL, xsl_dsnctl); - - return 0; -} - -/* PSL & XSL */ +/* PSL */ #define TBSYNC_CAL(n) (((u64)n & 0x7) << (63-3)) #define TBSYNC_CNT(n) (((u64)n & 0x7) << (63-6)) /* For the PSL this is a multiple for 0 < n <= 7: */ @@ -627,21 +596,6 @@ static void write_timebase_ctrl_psl8(struct cxl *adapter) TBSYNC_CNT(2 * PSL_2048_250MHZ_CYCLES)); } -/* XSL */ -#define TBSYNC_ENA (1ULL << 63) -/* For the XSL this is 2**n * 2000 clocks for 0 < n <= 6: */ -#define XSL_2000_CLOCKS 1 -#define XSL_4000_CLOCKS 2 -#define XSL_8000_CLOCKS 3 - -static void write_timebase_ctrl_xsl(struct cxl *adapter) -{ - cxl_p1_write(adapter, CXL_XSL_TB_CTLSTAT, - TBSYNC_ENA | - TBSYNC_CAL(3) | - TBSYNC_CNT(XSL_4000_CLOCKS)); -} - static u64 timebase_read_psl9(struct cxl *adapter) { return cxl_p1_read(adapter, CXL_PSL9_Timebase); @@ -652,11 +606,6 @@ static u64 timebase_read_psl8(struct cxl *adapter) return cxl_p1_read(adapter, CXL_PSL_Timebase); } -static u64 timebase_read_xsl(struct cxl *adapter) -{ - return cxl_p1_read(adapter, CXL_XSL_Timebase); -} - static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev) { struct device_node *np; @@ -800,234 +749,36 @@ static int setup_cxl_bars(struct pci_dev *dev) return 0; } -#ifdef CONFIG_CXL_BIMODAL - -struct cxl_switch_work { - struct pci_dev *dev; - struct work_struct work; - int vsec; - int mode; -}; - -static void switch_card_to_cxl(struct work_struct *work) +/* pciex node: ibm,opal-m64-window = <0x3d058 0x0 0x3d058 0x0 0x8 0x0>; */ +static int switch_card_to_cxl(struct pci_dev *dev) { - struct cxl_switch_work *switch_work = - container_of(work, struct cxl_switch_work, work); - struct pci_dev *dev = switch_work->dev; - struct pci_bus *bus = dev->bus; - struct pci_controller *hose = pci_bus_to_host(bus); - struct pci_dev *bridge; - struct pnv_php_slot *php_slot; - unsigned int devfn; + int vsec; u8 val; int rc; - dev_info(&bus->dev, "cxl: Preparing for mode switch...\n"); - bridge = list_first_entry_or_null(&hose->bus->devices, struct pci_dev, - bus_list); - if (!bridge) { - dev_WARN(&bus->dev, "cxl: Couldn't find root port!\n"); - goto err_dev_put; - } - - php_slot = pnv_php_find_slot(pci_device_to_OF_node(bridge)); - if (!php_slot) { - dev_err(&bus->dev, "cxl: Failed to find slot hotplug " - "information. You may need to upgrade " - "skiboot. Aborting.\n"); - goto err_dev_put; - } - - rc = CXL_READ_VSEC_MODE_CONTROL(dev, switch_work->vsec, &val); - if (rc) { - dev_err(&bus->dev, "cxl: Failed to read CAPI mode control: %i\n", rc); - goto err_dev_put; - } - devfn = dev->devfn; - - /* Release the reference obtained in cxl_check_and_switch_mode() */ - pci_dev_put(dev); - - dev_dbg(&bus->dev, "cxl: Removing PCI devices from kernel\n"); - pci_lock_rescan_remove(); - pci_hp_remove_devices(bridge->subordinate); - pci_unlock_rescan_remove(); - - /* Switch the CXL protocol on the card */ - if (switch_work->mode == CXL_BIMODE_CXL) { - dev_info(&bus->dev, "cxl: Switching card to CXL mode\n"); - val &= ~CXL_VSEC_PROTOCOL_MASK; - val |= CXL_VSEC_PROTOCOL_256TB | CXL_VSEC_PROTOCOL_ENABLE; - rc = pnv_cxl_enable_phb_kernel_api(hose, true); - if (rc) { - dev_err(&bus->dev, "cxl: Failed to enable kernel API" - " on real PHB, aborting\n"); - goto err_free_work; - } - } else { - dev_WARN(&bus->dev, "cxl: Switching card to PCI mode not supported!\n"); - goto err_free_work; - } - - rc = CXL_WRITE_VSEC_MODE_CONTROL_BUS(bus, devfn, switch_work->vsec, val); - if (rc) { - dev_err(&bus->dev, "cxl: Failed to configure CXL protocol: %i\n", rc); - goto err_free_work; - } + dev_info(&dev->dev, "switch card to CXL\n"); - /* - * The CAIA spec (v1.1, Section 10.6 Bi-modal Device Support) states - * we must wait 100ms after this mode switch before touching PCIe config - * space. - */ - msleep(100); - - /* - * Hot reset to cause the card to come back in cxl mode. A - * OPAL_RESET_PCI_LINK would be sufficient, but currently lacks support - * in skiboot, so we use a hot reset instead. - * - * We call pci_set_pcie_reset_state() on the bridge, as a CAPI card is - * guaranteed to sit directly under the root port, and setting the reset - * state on a device directly under the root port is equivalent to doing - * it on the root port iself. - */ - dev_info(&bus->dev, "cxl: Configuration write complete, resetting card\n"); - pci_set_pcie_reset_state(bridge, pcie_hot_reset); - pci_set_pcie_reset_state(bridge, pcie_deassert_reset); - - dev_dbg(&bus->dev, "cxl: Offlining slot\n"); - rc = pnv_php_set_slot_power_state(&php_slot->slot, OPAL_PCI_SLOT_OFFLINE); - if (rc) { - dev_err(&bus->dev, "cxl: OPAL offlining call failed: %i\n", rc); - goto err_free_work; - } - - dev_dbg(&bus->dev, "cxl: Onlining and probing slot\n"); - rc = pnv_php_set_slot_power_state(&php_slot->slot, OPAL_PCI_SLOT_ONLINE); - if (rc) { - dev_err(&bus->dev, "cxl: OPAL onlining call failed: %i\n", rc); - goto err_free_work; - } - - pci_lock_rescan_remove(); - pci_hp_add_devices(bridge->subordinate); - pci_unlock_rescan_remove(); - - dev_info(&bus->dev, "cxl: CAPI mode switch completed\n"); - kfree(switch_work); - return; - -err_dev_put: - /* Release the reference obtained in cxl_check_and_switch_mode() */ - pci_dev_put(dev); -err_free_work: - kfree(switch_work); -} - -int cxl_check_and_switch_mode(struct pci_dev *dev, int mode, int vsec) -{ - struct cxl_switch_work *work; - u8 val; - int rc; - - if (!cpu_has_feature(CPU_FTR_HVMODE)) + if (!(vsec = find_cxl_vsec(dev))) { + dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n"); return -ENODEV; - - if (!vsec) { - vsec = find_cxl_vsec(dev); - if (!vsec) { - dev_info(&dev->dev, "CXL VSEC not found\n"); - return -ENODEV; - } } - rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val); - if (rc) { - dev_err(&dev->dev, "Failed to read current mode control: %i", rc); + if ((rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val))) { + dev_err(&dev->dev, "failed to read current mode control: %i", rc); return rc; } - - if (mode == CXL_BIMODE_PCI) { - if (!(val & CXL_VSEC_PROTOCOL_ENABLE)) { - dev_info(&dev->dev, "Card is already in PCI mode\n"); - return 0; - } - /* - * TODO: Before it's safe to switch the card back to PCI mode - * we need to disable the CAPP and make sure any cachelines the - * card holds have been flushed out. Needs skiboot support. - */ - dev_WARN(&dev->dev, "CXL mode switch to PCI unsupported!\n"); - return -EIO; - } - - if (val & CXL_VSEC_PROTOCOL_ENABLE) { - dev_info(&dev->dev, "Card is already in CXL mode\n"); - return 0; + val &= ~CXL_VSEC_PROTOCOL_MASK; + val |= CXL_VSEC_PROTOCOL_256TB | CXL_VSEC_PROTOCOL_ENABLE; + if ((rc = CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val))) { + dev_err(&dev->dev, "failed to enable CXL protocol: %i", rc); + return rc; } - - dev_info(&dev->dev, "Card is in PCI mode, scheduling kernel thread " - "to switch to CXL mode\n"); - - work = kmalloc(sizeof(struct cxl_switch_work), GFP_KERNEL); - if (!work) - return -ENOMEM; - - pci_dev_get(dev); - work->dev = dev; - work->vsec = vsec; - work->mode = mode; - INIT_WORK(&work->work, switch_card_to_cxl); - - schedule_work(&work->work); - /* - * We return a failure now to abort the driver init. Once the - * link has been cycled and the card is in cxl mode we will - * come back (possibly using the generic cxl driver), but - * return success as the card should then be in cxl mode. - * - * TODO: What if the card comes back in PCI mode even after - * the switch? Don't want to spin endlessly. + * The CAIA spec (v0.12 11.6 Bi-modal Device Support) states + * we must wait 100ms after this mode switch before touching + * PCIe config space. */ - return -EBUSY; -} -EXPORT_SYMBOL_GPL(cxl_check_and_switch_mode); - -#endif /* CONFIG_CXL_BIMODAL */ - -static int setup_cxl_protocol_area(struct pci_dev *dev) -{ - u8 val; - int rc; - int vsec = find_cxl_vsec(dev); - - if (!vsec) { - dev_info(&dev->dev, "CXL VSEC not found\n"); - return -ENODEV; - } - - rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val); - if (rc) { - dev_err(&dev->dev, "Failed to read current mode control: %i\n", rc); - return rc; - } - - if (!(val & CXL_VSEC_PROTOCOL_ENABLE)) { - dev_err(&dev->dev, "Card not in CAPI mode!\n"); - return -EIO; - } - - if ((val & CXL_VSEC_PROTOCOL_MASK) != CXL_VSEC_PROTOCOL_256TB) { - val &= ~CXL_VSEC_PROTOCOL_MASK; - val |= CXL_VSEC_PROTOCOL_256TB; - rc = CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val); - if (rc) { - dev_err(&dev->dev, "Failed to set CXL protocol area: %i\n", rc); - return rc; - } - } + msleep(100); return 0; } @@ -1724,7 +1475,7 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev) if ((rc = setup_cxl_bars(dev))) return rc; - if ((rc = setup_cxl_protocol_area(dev))) + if ((rc = switch_card_to_cxl(dev))) return rc; if ((rc = cxl_update_image_control(adapter))) @@ -1871,37 +1622,14 @@ static const struct cxl_service_layer_ops psl8_ops = { .needs_reset_before_disable = true, }; -static const struct cxl_service_layer_ops xsl_ops = { - .adapter_regs_init = init_implementation_adapter_regs_xsl, - .invalidate_all = cxl_invalidate_all_psl8, - .sanitise_afu_regs = sanitise_afu_regs_psl8, - .handle_interrupt = cxl_irq_psl8, - .fail_irq = cxl_fail_irq_psl, - .activate_dedicated_process = cxl_activate_dedicated_process_psl8, - .attach_afu_directed = cxl_attach_afu_directed_psl8, - .attach_dedicated_process = cxl_attach_dedicated_process_psl8, - .update_dedicated_ivtes = cxl_update_dedicated_ivtes_psl8, - .debugfs_add_adapter_regs = cxl_debugfs_add_adapter_regs_xsl, - .write_timebase_ctrl = write_timebase_ctrl_xsl, - .timebase_read = timebase_read_xsl, - .capi_mode = OPAL_PHB_CAPI_MODE_DMA, -}; - static void set_sl_ops(struct cxl *adapter, struct pci_dev *dev) { - if (dev->vendor == PCI_VENDOR_ID_MELLANOX && dev->device == 0x1013) { - /* Mellanox CX-4 */ - dev_info(&dev->dev, "Device uses an XSL\n"); - adapter->native->sl_ops = &xsl_ops; - adapter->min_pe = 1; /* Workaround for CX-4 hardware bug */ + if (cxl_is_power8()) { + dev_info(&dev->dev, "Device uses a PSL8\n"); + adapter->native->sl_ops = &psl8_ops; } else { - if (cxl_is_power8()) { - dev_info(&dev->dev, "Device uses a PSL8\n"); - adapter->native->sl_ops = &psl8_ops; - } else { - dev_info(&dev->dev, "Device uses a PSL9\n"); - adapter->native->sl_ops = &psl9_ops; - } + dev_info(&dev->dev, "Device uses a PSL9\n"); + adapter->native->sl_ops = &psl9_ops; } } @@ -2008,43 +1736,6 @@ int cxl_slot_is_switched(struct pci_dev *dev) return (depth > CXL_MAX_PCIEX_PARENT); } -bool cxl_slot_is_supported(struct pci_dev *dev, int flags) -{ - if (!cpu_has_feature(CPU_FTR_HVMODE)) - return false; - - if ((flags & CXL_SLOT_FLAG_DMA) && (!pvr_version_is(PVR_POWER8NVL))) { - /* - * CAPP DMA mode is technically supported on regular P8, but - * will EEH if the card attempts to access memory < 4GB, which - * we cannot realistically avoid. We might be able to work - * around the issue, but until then return unsupported: - */ - return false; - } - - if (cxl_slot_is_switched(dev)) - return false; - - /* - * XXX: This gets a little tricky on regular P8 (not POWER8NVL) since - * the CAPP can be connected to PHB 0, 1 or 2 on a first come first - * served basis, which is racy to check from here. If we need to - * support this in future we might need to consider having this - * function effectively reserve it ahead of time. - * - * Currently, the only user of this API is the Mellanox CX4, which is - * only supported on P8NVL due to the above mentioned limitation of - * CAPP DMA mode and therefore does not need to worry about this. If the - * issue with CAPP DMA mode is later worked around on P8 we might need - * to revisit this. - */ - - return true; -} -EXPORT_SYMBOL_GPL(cxl_slot_is_supported); - - static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) { struct cxl *adapter; @@ -2086,9 +1777,6 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) dev_err(&dev->dev, "AFU %i failed to start: %i\n", slice, rc); } - if (pnv_pci_on_cxl_phb(dev) && adapter->slices >= 1) - pnv_cxl_phb_set_peer_afu(dev, adapter->afu[0]); - return 0; } diff --git a/drivers/misc/cxl/phb.c b/drivers/misc/cxl/phb.c deleted file mode 100644 index 6ec69ada19f4..000000000000 --- a/drivers/misc/cxl/phb.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright 2014-2016 IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/pci.h> -#include "cxl.h" - -bool _cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu) -{ - struct cxl_context *ctx; - - /* - * Allocate a context to do cxl things to. This is used for interrupts - * in the peer model using a real phb, and if we eventually do DMA ops - * in the virtual phb, we'll need a default context to attach them to. - */ - ctx = cxl_dev_context_init(dev); - if (IS_ERR(ctx)) - return false; - dev->dev.archdata.cxl_ctx = ctx; - - return (cxl_ops->afu_check_and_enable(afu) == 0); -} -/* exported via cxl_base */ - -void _cxl_pci_disable_device(struct pci_dev *dev) -{ - struct cxl_context *ctx = cxl_get_context(dev); - - if (ctx) { - if (ctx->status == STARTED) { - dev_err(&dev->dev, "Default context started\n"); - return; - } - dev->dev.archdata.cxl_ctx = NULL; - cxl_release_context(ctx); - } -} -/* exported via cxl_base */ diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index 7fd0bdc1436a..7908633d9204 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -9,7 +9,6 @@ #include <linux/pci.h> #include <misc/cxl.h> -#include <asm/pnv-pci.h> #include "cxl.h" static int cxl_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) @@ -45,6 +44,7 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev) { struct pci_controller *phb; struct cxl_afu *afu; + struct cxl_context *ctx; phb = pci_bus_to_host(dev->bus); afu = (struct cxl_afu *)phb->private_data; @@ -57,7 +57,30 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev) set_dma_ops(&dev->dev, &dma_nommu_ops); set_dma_offset(&dev->dev, PAGE_OFFSET); - return _cxl_pci_associate_default_context(dev, afu); + /* + * Allocate a context to do cxl things too. If we eventually do real + * DMA ops, we'll need a default context to attach them to + */ + ctx = cxl_dev_context_init(dev); + if (IS_ERR(ctx)) + return false; + dev->dev.archdata.cxl_ctx = ctx; + + return (cxl_ops->afu_check_and_enable(afu) == 0); +} + +static void cxl_pci_disable_device(struct pci_dev *dev) +{ + struct cxl_context *ctx = cxl_get_context(dev); + + if (ctx) { + if (ctx->status == STARTED) { + dev_err(&dev->dev, "Default context started\n"); + return; + } + dev->dev.archdata.cxl_ctx = NULL; + cxl_release_context(ctx); + } } static resource_size_t cxl_pci_window_alignment(struct pci_bus *bus, @@ -191,8 +214,8 @@ static struct pci_controller_ops cxl_pci_controller_ops = { .probe_mode = cxl_pci_probe_mode, .enable_device_hook = cxl_pci_enable_device_hook, - .disable_device = _cxl_pci_disable_device, - .release_device = _cxl_pci_disable_device, + .disable_device = cxl_pci_disable_device, + .release_device = cxl_pci_disable_device, .window_alignment = cxl_pci_window_alignment, .reset_secondary_bus = cxl_pci_reset_secondary_bus, .setup_msi_irqs = cxl_setup_msi_irqs, @@ -284,18 +307,13 @@ void cxl_pci_vphb_remove(struct cxl_afu *afu) */ } -static bool _cxl_pci_is_vphb_device(struct pci_controller *phb) -{ - return (phb->ops == &cxl_pcie_pci_ops); -} - bool cxl_pci_is_vphb_device(struct pci_dev *dev) { struct pci_controller *phb; phb = pci_bus_to_host(dev->bus); - return _cxl_pci_is_vphb_device(phb); + return (phb->ops == &cxl_pcie_pci_ops); } struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev) @@ -304,13 +322,7 @@ struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev) phb = pci_bus_to_host(dev->bus); - if (_cxl_pci_is_vphb_device(phb)) - return (struct cxl_afu *)phb->private_data; - - if (pnv_pci_on_cxl_phb(dev)) - return pnv_cxl_phb_to_afu(phb); - - return ERR_PTR(-ENODEV); + return (struct cxl_afu *)phb->private_data; } EXPORT_SYMBOL_GPL(cxl_pci_to_afu); diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c index 95f74623113e..c10a940e3b38 100644 --- a/drivers/misc/ocxl/context.c +++ b/drivers/misc/ocxl/context.c @@ -86,7 +86,7 @@ out: return rc; } -static int map_afu_irq(struct vm_area_struct *vma, unsigned long address, +static vm_fault_t map_afu_irq(struct vm_area_struct *vma, unsigned long address, u64 offset, struct ocxl_context *ctx) { u64 trigger_addr; @@ -95,15 +95,15 @@ static int map_afu_irq(struct vm_area_struct *vma, unsigned long address, if (!trigger_addr) return VM_FAULT_SIGBUS; - vm_insert_pfn(vma, address, trigger_addr >> PAGE_SHIFT); - return VM_FAULT_NOPAGE; + return vmf_insert_pfn(vma, address, trigger_addr >> PAGE_SHIFT); } -static int map_pp_mmio(struct vm_area_struct *vma, unsigned long address, +static vm_fault_t map_pp_mmio(struct vm_area_struct *vma, unsigned long address, u64 offset, struct ocxl_context *ctx) { u64 pp_mmio_addr; int pasid_off; + vm_fault_t ret; if (offset >= ctx->afu->config.pp_mmio_stride) return VM_FAULT_SIGBUS; @@ -121,27 +121,27 @@ static int map_pp_mmio(struct vm_area_struct *vma, unsigned long address, pasid_off * ctx->afu->config.pp_mmio_stride + offset; - vm_insert_pfn(vma, address, pp_mmio_addr >> PAGE_SHIFT); + ret = vmf_insert_pfn(vma, address, pp_mmio_addr >> PAGE_SHIFT); mutex_unlock(&ctx->status_mutex); - return VM_FAULT_NOPAGE; + return ret; } -static int ocxl_mmap_fault(struct vm_fault *vmf) +static vm_fault_t ocxl_mmap_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct ocxl_context *ctx = vma->vm_file->private_data; u64 offset; - int rc; + vm_fault_t ret; offset = vmf->pgoff << PAGE_SHIFT; pr_debug("%s: pasid %d address 0x%lx offset 0x%llx\n", __func__, ctx->pasid, vmf->address, offset); if (offset < ctx->afu->irq_base_offset) - rc = map_pp_mmio(vma, vmf->address, offset, ctx); + ret = map_pp_mmio(vma, vmf->address, offset, ctx); else - rc = map_afu_irq(vma, vmf->address, offset, ctx); - return rc; + ret = map_afu_irq(vma, vmf->address, offset, ctx); + return ret; } static const struct vm_operations_struct ocxl_vmops = { diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c index ffc731b0731a..31695a078485 100644 --- a/drivers/misc/ocxl/link.c +++ b/drivers/misc/ocxl/link.c @@ -137,7 +137,7 @@ static void xsl_fault_handler_bh(struct work_struct *fault_work) int rc; /* - * We need to release a reference on the mm whenever exiting this + * We must release a reference on mm_users whenever exiting this * function (taken in the memory fault interrupt handler) */ rc = copro_handle_mm_fault(fault->pe_data.mm, fault->dar, fault->dsisr, @@ -173,7 +173,7 @@ static void xsl_fault_handler_bh(struct work_struct *fault_work) } r = RESTART; ack: - mmdrop(fault->pe_data.mm); + mmput(fault->pe_data.mm); ack_irq(spa, r); } @@ -185,6 +185,7 @@ static irqreturn_t xsl_fault_handler(int irq, void *data) struct pe_data *pe_data; struct ocxl_process_element *pe; int lpid, pid, tid; + bool schedule = false; read_irq(spa, &dsisr, &dar, &pe_handle); trace_ocxl_fault(spa->spa_mem, pe_handle, dsisr, dar, -1); @@ -227,14 +228,19 @@ static irqreturn_t xsl_fault_handler(int irq, void *data) } WARN_ON(pe_data->mm->context.id != pid); - spa->xsl_fault.pe = pe_handle; - spa->xsl_fault.dar = dar; - spa->xsl_fault.dsisr = dsisr; - spa->xsl_fault.pe_data = *pe_data; - mmgrab(pe_data->mm); /* mm count is released by bottom half */ - + if (mmget_not_zero(pe_data->mm)) { + spa->xsl_fault.pe = pe_handle; + spa->xsl_fault.dar = dar; + spa->xsl_fault.dsisr = dsisr; + spa->xsl_fault.pe_data = *pe_data; + schedule = true; + /* mm_users count released by bottom half */ + } rcu_read_unlock(); - schedule_work(&spa->xsl_fault.fault_work); + if (schedule) + schedule_work(&spa->xsl_fault.fault_work); + else + ack_irq(spa, ADDRESS_ERROR); return IRQ_HANDLED; } diff --git a/drivers/misc/ocxl/sysfs.c b/drivers/misc/ocxl/sysfs.c index d9753a1db14b..0ab1fd1b2682 100644 --- a/drivers/misc/ocxl/sysfs.c +++ b/drivers/misc/ocxl/sysfs.c @@ -64,7 +64,7 @@ static ssize_t global_mmio_read(struct file *filp, struct kobject *kobj, return count; } -static int global_mmio_fault(struct vm_fault *vmf) +static vm_fault_t global_mmio_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct ocxl_afu *afu = vma->vm_private_data; @@ -75,8 +75,7 @@ static int global_mmio_fault(struct vm_fault *vmf) offset = vmf->pgoff; offset += (afu->global_mmio_start >> PAGE_SHIFT); - vm_insert_pfn(vma, vmf->address, offset); - return VM_FAULT_NOPAGE; + return vmf_insert_pfn(vma, vmf->address, offset); } static const struct vm_operations_struct global_mmio_vmops = { diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index 09e38f0733bd..b8f75a22fb6c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -755,7 +755,7 @@ struct cpl_abort_req_rss { struct cpl_abort_req_rss6 { WR_HDR; union opcode_tid ot; - __u32 srqidx_status; + __be32 srqidx_status; }; #define ABORT_RSS_STATUS_S 0 @@ -785,7 +785,7 @@ struct cpl_abort_rpl_rss { struct cpl_abort_rpl_rss6 { union opcode_tid ot; - __u32 srqidx_status; + __be32 srqidx_status; }; struct cpl_abort_rpl { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index fe4ac40dbade..3ce14d42ddc8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -433,6 +433,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_FPGA_QUERY_QP: case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS: case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: + case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -612,6 +614,9 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(ARM_XRQ); MLX5_COMMAND_STR_CASE(CREATE_GENERAL_OBJECT); MLX5_COMMAND_STR_CASE(DESTROY_GENERAL_OBJECT); + MLX5_COMMAND_STR_CASE(MODIFY_GENERAL_OBJECT); + MLX5_COMMAND_STR_CASE(QUERY_GENERAL_OBJECT); + MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c index b3820a34e773..0f11fff32a9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -240,6 +240,9 @@ const char *parse_fs_dst(struct trace_seq *p, case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: trace_seq_printf(p, "ft=%p\n", dst->ft); break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: + trace_seq_printf(p, "ft_num=%u\n", dst->ft_num); + break; case MLX5_FLOW_DESTINATION_TYPE_TIR: trace_seq_printf(p, "tir=%u\n", dst->tir_num); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 6a62b84e57f4..8e01f818021b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -368,18 +368,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, int list_size = 0; list_for_each_entry(dst, &fte->node.children, node.list) { - unsigned int id; + unsigned int id, type = dst->dest_attr.type; - if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) continue; - MLX5_SET(dest_format_struct, in_dests, destination_type, - dst->dest_attr.type); - if (dst->dest_attr.type == - MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { + switch (type) { + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: + id = dst->dest_attr.ft_num; + type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: id = dst->dest_attr.ft->id; - } else if (dst->dest_attr.type == - MLX5_FLOW_DESTINATION_TYPE_VPORT) { + break; + case MLX5_FLOW_DESTINATION_TYPE_VPORT: id = dst->dest_attr.vport.num; MLX5_SET(dest_format_struct, in_dests, destination_eswitch_owner_vhca_id_valid, @@ -387,9 +389,13 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(dest_format_struct, in_dests, destination_eswitch_owner_vhca_id, dst->dest_attr.vport.vhca_id); - } else { + break; + default: id = dst->dest_attr.tir_num; } + + MLX5_SET(dest_format_struct, in_dests, destination_type, + type); MLX5_SET(dest_format_struct, in_dests, destination_id, id); in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); list_size++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 261cb6aacf12..f418541af7cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -310,89 +310,17 @@ static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns, return NULL; } -static bool check_last_reserved(const u32 *match_criteria) -{ - char *match_criteria_reserved = - MLX5_ADDR_OF(fte_match_param, match_criteria, MLX5_FTE_MATCH_PARAM_RESERVED); - - return !match_criteria_reserved[0] && - !memcmp(match_criteria_reserved, match_criteria_reserved + 1, - MLX5_FLD_SZ_BYTES(fte_match_param, - MLX5_FTE_MATCH_PARAM_RESERVED) - 1); -} - -static bool check_valid_mask(u8 match_criteria_enable, const u32 *match_criteria) -{ - if (match_criteria_enable & ~( - (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) | - (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) | - (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) | - (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_2))) - return false; - - if (!(match_criteria_enable & - 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS)) { - char *fg_type_mask = MLX5_ADDR_OF(fte_match_param, - match_criteria, outer_headers); - - if (fg_type_mask[0] || - memcmp(fg_type_mask, fg_type_mask + 1, - MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4) - 1)) - return false; - } - - if (!(match_criteria_enable & - 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS)) { - char *fg_type_mask = MLX5_ADDR_OF(fte_match_param, - match_criteria, misc_parameters); - - if (fg_type_mask[0] || - memcmp(fg_type_mask, fg_type_mask + 1, - MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1)) - return false; - } - - if (!(match_criteria_enable & - 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS)) { - char *fg_type_mask = MLX5_ADDR_OF(fte_match_param, - match_criteria, inner_headers); - - if (fg_type_mask[0] || - memcmp(fg_type_mask, fg_type_mask + 1, - MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4) - 1)) - return false; - } - - if (!(match_criteria_enable & - 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_2)) { - char *fg_type_mask = MLX5_ADDR_OF(fte_match_param, - match_criteria, misc_parameters_2); - - if (fg_type_mask[0] || - memcmp(fg_type_mask, fg_type_mask + 1, - MLX5_ST_SZ_BYTES(fte_match_set_misc2) - 1)) - return false; - } - - return check_last_reserved(match_criteria); -} - static bool check_valid_spec(const struct mlx5_flow_spec *spec) { int i; - if (!check_valid_mask(spec->match_criteria_enable, spec->match_criteria)) { - pr_warn("mlx5_core: Match criteria given mismatches match_criteria_enable\n"); - return false; - } - for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++) if (spec->match_value[i] & ~spec->match_criteria[i]) { pr_warn("mlx5_core: match_value differs from match_criteria\n"); return false; } - return check_last_reserved(spec->match_value); + return true; } static struct mlx5_flow_root_namespace *find_root(struct fs_node *node) @@ -1159,9 +1087,6 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, struct mlx5_flow_group *fg; int err; - if (!check_valid_mask(match_criteria_enable, match_criteria)) - return ERR_PTR(-EINVAL); - if (ft->autogroup.active) return ERR_PTR(-EPERM); @@ -1432,7 +1357,9 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && d1->ft == d2->ft) || (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR && - d1->tir_num == d2->tir_num)) + d1->tir_num == d2->tir_num) || + (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM && + d1->ft_num == d2->ft_num)) return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index b7c21eb21a21..e3797a44e074 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -584,6 +584,22 @@ static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev) return 0; } +static void mlx5_rdma_netdev_free(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = priv->ppriv; + const struct mlx5e_profile *profile = priv->profile; + + mlx5e_detach_netdev(priv); + profile->cleanup(priv); + destroy_workqueue(priv->wq); + + if (!ipriv->sub_interface) { + mlx5i_pkey_qpn_ht_cleanup(netdev); + mlx5e_destroy_mdev_resources(priv->mdev); + } +} + struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, struct ib_device *ibdev, const char *name, @@ -657,6 +673,9 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, rn->detach_mcast = mlx5i_detach_mcast; rn->set_id = mlx5i_set_pkey_index; + netdev->priv_destructor = mlx5_rdma_netdev_free; + netdev->needs_free_netdev = 1; + return netdev; destroy_ht: @@ -669,21 +688,3 @@ err_free_netdev: return NULL; } EXPORT_SYMBOL(mlx5_rdma_netdev_alloc); - -void mlx5_rdma_netdev_free(struct net_device *netdev) -{ - struct mlx5e_priv *priv = mlx5i_epriv(netdev); - struct mlx5i_priv *ipriv = priv->ppriv; - const struct mlx5e_profile *profile = priv->profile; - - mlx5e_detach_netdev(priv); - profile->cleanup(priv); - destroy_workqueue(priv->wq); - - if (!ipriv->sub_interface) { - mlx5i_pkey_qpn_ht_cleanup(netdev); - mlx5e_destroy_mdev_resources(priv->mdev); - } - free_netdev(netdev); -} -EXPORT_SYMBOL(mlx5_rdma_netdev_free); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 0805fa6215ee..dc042017c293 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -378,7 +378,7 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id) } ndev->num_inline_segments = min(NVME_RDMA_MAX_INLINE_SEGMENTS, - ndev->dev->attrs.max_sge - 1); + ndev->dev->attrs.max_send_sge - 1); list_add(&ndev->entry, &device_list); out_unlock: mutex_unlock(&device_list_mutex); @@ -1093,7 +1093,6 @@ static void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc) static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue, struct nvme_rdma_request *req) { - struct ib_send_wr *bad_wr; struct ib_send_wr wr = { .opcode = IB_WR_LOCAL_INV, .next = NULL, @@ -1105,7 +1104,7 @@ static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue, req->reg_cqe.done = nvme_rdma_inv_rkey_done; wr.wr_cqe = &req->reg_cqe; - return ib_post_send(queue->qp, &wr, &bad_wr); + return ib_post_send(queue->qp, &wr, NULL); } static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, @@ -1308,7 +1307,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge, struct ib_send_wr *first) { - struct ib_send_wr wr, *bad_wr; + struct ib_send_wr wr; int ret; sge->addr = qe->dma; @@ -1327,7 +1326,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, else first = ≀ - ret = ib_post_send(queue->qp, first, &bad_wr); + ret = ib_post_send(queue->qp, first, NULL); if (unlikely(ret)) { dev_err(queue->ctrl->ctrl.device, "%s failed with error code %d\n", __func__, ret); @@ -1338,7 +1337,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, static int nvme_rdma_post_recv(struct nvme_rdma_queue *queue, struct nvme_rdma_qe *qe) { - struct ib_recv_wr wr, *bad_wr; + struct ib_recv_wr wr; struct ib_sge list; int ret; @@ -1353,7 +1352,7 @@ static int nvme_rdma_post_recv(struct nvme_rdma_queue *queue, wr.sg_list = &list; wr.num_sge = 1; - ret = ib_post_recv(queue->qp, &wr, &bad_wr); + ret = ib_post_recv(queue->qp, &wr, NULL); if (unlikely(ret)) { dev_err(queue->ctrl->ctrl.device, "%s failed with error code %d\n", __func__, ret); diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index e7f43d1e1779..3533e918ea37 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -435,7 +435,6 @@ static void nvmet_rdma_free_rsps(struct nvmet_rdma_queue *queue) static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, struct nvmet_rdma_cmd *cmd) { - struct ib_recv_wr *bad_wr; int ret; ib_dma_sync_single_for_device(ndev->device, @@ -443,9 +442,9 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, DMA_FROM_DEVICE); if (ndev->srq) - ret = ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr); + ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL); else - ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, &bad_wr); + ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, NULL); if (unlikely(ret)) pr_err("post_recv cmd failed\n"); @@ -532,7 +531,7 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req) struct nvmet_rdma_rsp *rsp = container_of(req, struct nvmet_rdma_rsp, req); struct rdma_cm_id *cm_id = rsp->queue->cm_id; - struct ib_send_wr *first_wr, *bad_wr; + struct ib_send_wr *first_wr; if (rsp->flags & NVMET_RDMA_REQ_INVALIDATE_RKEY) { rsp->send_wr.opcode = IB_WR_SEND_WITH_INV; @@ -553,7 +552,7 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req) rsp->send_sge.addr, rsp->send_sge.length, DMA_TO_DEVICE); - if (unlikely(ib_post_send(cm_id->qp, first_wr, &bad_wr))) { + if (unlikely(ib_post_send(cm_id->qp, first_wr, NULL))) { pr_err("sending cmd response failed\n"); nvmet_rdma_release_rsp(rsp); } @@ -892,7 +891,7 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) inline_page_count = num_pages(port->inline_data_size); inline_sge_count = max(cm_id->device->attrs.max_sge_rd, - cm_id->device->attrs.max_sge) - 1; + cm_id->device->attrs.max_recv_sge) - 1; if (inline_page_count > inline_sge_count) { pr_warn("inline_data_size %d cannot be supported by device %s. Reducing to %lu.\n", port->inline_data_size, cm_id->device->name, @@ -969,7 +968,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) qp_attr.cap.max_send_wr = queue->send_queue_size + 1; qp_attr.cap.max_rdma_ctxs = queue->send_queue_size; qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd, - ndev->device->attrs.max_sge); + ndev->device->attrs.max_send_sge); if (ndev->srq) { qp_attr.srq = ndev->srq; diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c index 7709fcc707f4..5414c4a87bea 100644 --- a/drivers/tty/hvc/hvc_console.c +++ b/drivers/tty/hvc/hvc_console.c @@ -73,7 +73,7 @@ static LIST_HEAD(hvc_structs); * Protect the list of hvc_struct instances from inserts and removals during * list traversal. */ -static DEFINE_SPINLOCK(hvc_structs_lock); +static DEFINE_MUTEX(hvc_structs_mutex); /* * This value is used to assign a tty->index value to a hvc_struct based @@ -83,7 +83,7 @@ static DEFINE_SPINLOCK(hvc_structs_lock); static int last_hvc = -1; /* - * Do not call this function with either the hvc_structs_lock or the hvc_struct + * Do not call this function with either the hvc_structs_mutex or the hvc_struct * lock held. If successful, this function increments the kref reference * count against the target hvc_struct so it should be released when finished. */ @@ -92,24 +92,46 @@ static struct hvc_struct *hvc_get_by_index(int index) struct hvc_struct *hp; unsigned long flags; - spin_lock(&hvc_structs_lock); + mutex_lock(&hvc_structs_mutex); list_for_each_entry(hp, &hvc_structs, next) { spin_lock_irqsave(&hp->lock, flags); if (hp->index == index) { tty_port_get(&hp->port); spin_unlock_irqrestore(&hp->lock, flags); - spin_unlock(&hvc_structs_lock); + mutex_unlock(&hvc_structs_mutex); return hp; } spin_unlock_irqrestore(&hp->lock, flags); } hp = NULL; + mutex_unlock(&hvc_structs_mutex); - spin_unlock(&hvc_structs_lock); return hp; } +static int __hvc_flush(const struct hv_ops *ops, uint32_t vtermno, bool wait) +{ + if (wait) + might_sleep(); + + if (ops->flush) + return ops->flush(vtermno, wait); + return 0; +} + +static int hvc_console_flush(const struct hv_ops *ops, uint32_t vtermno) +{ + return __hvc_flush(ops, vtermno, false); +} + +/* + * Wait for the console to flush before writing more to it. This sleeps. + */ +static int hvc_flush(struct hvc_struct *hp) +{ + return __hvc_flush(hp->ops, hp->vtermno, true); +} /* * Initial console vtermnos for console API usage prior to full console @@ -156,8 +178,12 @@ static void hvc_console_print(struct console *co, const char *b, if (r <= 0) { /* throw away characters on error * but spin in case of -EAGAIN */ - if (r != -EAGAIN) + if (r != -EAGAIN) { i = 0; + } else { + hvc_console_flush(cons_ops[index], + vtermnos[index]); + } } else if (r > 0) { i -= r; if (i > 0) @@ -165,6 +191,7 @@ static void hvc_console_print(struct console *co, const char *b, } } } + hvc_console_flush(cons_ops[index], vtermnos[index]); } static struct tty_driver *hvc_console_device(struct console *c, int *index) @@ -224,13 +251,13 @@ static void hvc_port_destruct(struct tty_port *port) struct hvc_struct *hp = container_of(port, struct hvc_struct, port); unsigned long flags; - spin_lock(&hvc_structs_lock); + mutex_lock(&hvc_structs_mutex); spin_lock_irqsave(&hp->lock, flags); list_del(&(hp->next)); spin_unlock_irqrestore(&hp->lock, flags); - spin_unlock(&hvc_structs_lock); + mutex_unlock(&hvc_structs_mutex); kfree(hp); } @@ -494,23 +521,32 @@ static int hvc_write(struct tty_struct *tty, const unsigned char *buf, int count if (hp->port.count <= 0) return -EIO; - spin_lock_irqsave(&hp->lock, flags); + while (count > 0) { + spin_lock_irqsave(&hp->lock, flags); - /* Push pending writes */ - if (hp->n_outbuf > 0) - hvc_push(hp); - - while (count > 0 && (rsize = hp->outbuf_size - hp->n_outbuf) > 0) { - if (rsize > count) - rsize = count; - memcpy(hp->outbuf + hp->n_outbuf, buf, rsize); - count -= rsize; - buf += rsize; - hp->n_outbuf += rsize; - written += rsize; - hvc_push(hp); + rsize = hp->outbuf_size - hp->n_outbuf; + + if (rsize) { + if (rsize > count) + rsize = count; + memcpy(hp->outbuf + hp->n_outbuf, buf, rsize); + count -= rsize; + buf += rsize; + hp->n_outbuf += rsize; + written += rsize; + } + + if (hp->n_outbuf > 0) + hvc_push(hp); + + spin_unlock_irqrestore(&hp->lock, flags); + + if (count) { + if (hp->n_outbuf > 0) + hvc_flush(hp); + cond_resched(); + } } - spin_unlock_irqrestore(&hp->lock, flags); /* * Racy, but harmless, kick thread if there is still pending data. @@ -590,10 +626,10 @@ static u32 timeout = MIN_TIMEOUT; #define HVC_POLL_READ 0x00000001 #define HVC_POLL_WRITE 0x00000002 -int hvc_poll(struct hvc_struct *hp) +static int __hvc_poll(struct hvc_struct *hp, bool may_sleep) { struct tty_struct *tty; - int i, n, poll_mask = 0; + int i, n, count, poll_mask = 0; char buf[N_INBUF] __ALIGNED__; unsigned long flags; int read_total = 0; @@ -612,6 +648,12 @@ int hvc_poll(struct hvc_struct *hp) timeout = (written_total) ? 0 : MIN_TIMEOUT; } + if (may_sleep) { + spin_unlock_irqrestore(&hp->lock, flags); + cond_resched(); + spin_lock_irqsave(&hp->lock, flags); + } + /* No tty attached, just skip */ tty = tty_port_tty_get(&hp->port); if (tty == NULL) @@ -619,7 +661,7 @@ int hvc_poll(struct hvc_struct *hp) /* Now check if we can get data (are we throttled ?) */ if (tty_throttled(tty)) - goto throttled; + goto out; /* If we aren't notifier driven and aren't throttled, we always * request a reschedule @@ -628,56 +670,58 @@ int hvc_poll(struct hvc_struct *hp) poll_mask |= HVC_POLL_READ; /* Read data if any */ - for (;;) { - int count = tty_buffer_request_room(&hp->port, N_INBUF); - /* If flip is full, just reschedule a later read */ - if (count == 0) { + count = tty_buffer_request_room(&hp->port, N_INBUF); + + /* If flip is full, just reschedule a later read */ + if (count == 0) { + poll_mask |= HVC_POLL_READ; + goto out; + } + + n = hp->ops->get_chars(hp->vtermno, buf, count); + if (n <= 0) { + /* Hangup the tty when disconnected from host */ + if (n == -EPIPE) { + spin_unlock_irqrestore(&hp->lock, flags); + tty_hangup(tty); + spin_lock_irqsave(&hp->lock, flags); + } else if ( n == -EAGAIN ) { + /* + * Some back-ends can only ensure a certain min + * num of bytes read, which may be > 'count'. + * Let the tty clear the flip buff to make room. + */ poll_mask |= HVC_POLL_READ; - break; } + goto out; + } - n = hp->ops->get_chars(hp->vtermno, buf, count); - if (n <= 0) { - /* Hangup the tty when disconnected from host */ - if (n == -EPIPE) { - spin_unlock_irqrestore(&hp->lock, flags); - tty_hangup(tty); - spin_lock_irqsave(&hp->lock, flags); - } else if ( n == -EAGAIN ) { - /* - * Some back-ends can only ensure a certain min - * num of bytes read, which may be > 'count'. - * Let the tty clear the flip buff to make room. - */ - poll_mask |= HVC_POLL_READ; - } - break; - } - for (i = 0; i < n; ++i) { + for (i = 0; i < n; ++i) { #ifdef CONFIG_MAGIC_SYSRQ - if (hp->index == hvc_console.index) { - /* Handle the SysRq Hack */ - /* XXX should support a sequence */ - if (buf[i] == '\x0f') { /* ^O */ - /* if ^O is pressed again, reset - * sysrq_pressed and flip ^O char */ - sysrq_pressed = !sysrq_pressed; - if (sysrq_pressed) - continue; - } else if (sysrq_pressed) { - handle_sysrq(buf[i]); - sysrq_pressed = 0; + if (hp->index == hvc_console.index) { + /* Handle the SysRq Hack */ + /* XXX should support a sequence */ + if (buf[i] == '\x0f') { /* ^O */ + /* if ^O is pressed again, reset + * sysrq_pressed and flip ^O char */ + sysrq_pressed = !sysrq_pressed; + if (sysrq_pressed) continue; - } + } else if (sysrq_pressed) { + handle_sysrq(buf[i]); + sysrq_pressed = 0; + continue; } -#endif /* CONFIG_MAGIC_SYSRQ */ - tty_insert_flip_char(&hp->port, buf[i], 0); } - - read_total += n; +#endif /* CONFIG_MAGIC_SYSRQ */ + tty_insert_flip_char(&hp->port, buf[i], 0); } - throttled: + if (n == count) + poll_mask |= HVC_POLL_READ; + read_total = n; + + out: /* Wakeup write queue if necessary */ if (hp->do_wakeup) { hp->do_wakeup = 0; @@ -697,6 +741,11 @@ int hvc_poll(struct hvc_struct *hp) return poll_mask; } + +int hvc_poll(struct hvc_struct *hp) +{ + return __hvc_poll(hp, false); +} EXPORT_SYMBOL_GPL(hvc_poll); /** @@ -733,11 +782,12 @@ static int khvcd(void *unused) try_to_freeze(); wmb(); if (!cpus_are_in_xmon()) { - spin_lock(&hvc_structs_lock); + mutex_lock(&hvc_structs_mutex); list_for_each_entry(hp, &hvc_structs, next) { - poll_mask |= hvc_poll(hp); + poll_mask |= __hvc_poll(hp, true); + cond_resched(); } - spin_unlock(&hvc_structs_lock); + mutex_unlock(&hvc_structs_mutex); } else poll_mask |= HVC_POLL_READ; if (hvc_kicked) @@ -871,7 +921,7 @@ struct hvc_struct *hvc_alloc(uint32_t vtermno, int data, INIT_WORK(&hp->tty_resize, hvc_set_winsz); spin_lock_init(&hp->lock); - spin_lock(&hvc_structs_lock); + mutex_lock(&hvc_structs_mutex); /* * find index to use: @@ -891,7 +941,7 @@ struct hvc_struct *hvc_alloc(uint32_t vtermno, int data, vtermnos[i] = vtermno; list_add_tail(&(hp->next), &hvc_structs); - spin_unlock(&hvc_structs_lock); + mutex_unlock(&hvc_structs_mutex); /* check if we need to re-register the kernel console */ hvc_check_console(i); diff --git a/drivers/tty/hvc/hvc_console.h b/drivers/tty/hvc/hvc_console.h index ea63090e013f..e9319954c832 100644 --- a/drivers/tty/hvc/hvc_console.h +++ b/drivers/tty/hvc/hvc_console.h @@ -54,6 +54,7 @@ struct hvc_struct { struct hv_ops { int (*get_chars)(uint32_t vtermno, char *buf, int count); int (*put_chars)(uint32_t vtermno, const char *buf, int count); + int (*flush)(uint32_t vtermno, bool wait); /* Callbacks for notification. Called in open, close and hangup */ int (*notifier_add)(struct hvc_struct *hp, int irq); diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c index 9645c0062a90..f631f8bee308 100644 --- a/drivers/tty/hvc/hvc_opal.c +++ b/drivers/tty/hvc/hvc_opal.c @@ -183,9 +183,15 @@ static int hvc_opal_probe(struct platform_device *dev) return -ENOMEM; pv->proto = proto; hvc_opal_privs[termno] = pv; - if (proto == HV_PROTOCOL_HVSI) - hvsilib_init(&pv->hvsi, opal_get_chars, opal_put_chars, + if (proto == HV_PROTOCOL_HVSI) { + /* + * We want put_chars to be atomic to avoid mangling of + * hvsi packets. + */ + hvsilib_init(&pv->hvsi, + opal_get_chars, opal_put_chars_atomic, termno, 0); + } /* Instanciate now to establish a mapping index==vtermno */ hvc_instantiate(termno, termno, ops); @@ -275,6 +281,11 @@ static void udbg_opal_putc(char c) count = hvc_opal_hvsi_put_chars(termno, &c, 1); break; } + + /* This is needed for the cosole to flush + * when there aren't any interrupts. + */ + opal_flush_console(termno); } while(count == 0 || count == -EAGAIN); } @@ -302,14 +313,8 @@ static int udbg_opal_getc(void) int ch; for (;;) { ch = udbg_opal_getc_poll(); - if (ch == -1) { - /* This shouldn't be needed...but... */ - volatile unsigned long delay; - for (delay=0; delay < 2000000; delay++) - ; - } else { + if (ch != -1) return ch; - } } } @@ -370,8 +375,9 @@ void __init hvc_opal_init_early(void) else if (of_device_is_compatible(stdout_node,"ibm,opal-console-hvsi")) { hvc_opal_boot_priv.proto = HV_PROTOCOL_HVSI; ops = &hvc_opal_hvsi_ops; - hvsilib_init(&hvc_opal_boot_priv.hvsi, opal_get_chars, - opal_put_chars, index, 1); + hvsilib_init(&hvc_opal_boot_priv.hvsi, + opal_get_chars, opal_put_chars_atomic, + index, 1); /* HVSI, perform the handshake now */ hvsilib_establish(&hvc_opal_boot_priv.hvsi); pr_devel("hvc_opal: Found HVSI console\n"); @@ -403,7 +409,8 @@ void __init udbg_init_debug_opal_hvsi(void) hvc_opal_privs[index] = &hvc_opal_boot_priv; hvc_opal_boot_termno = index; udbg_init_opal_common(); - hvsilib_init(&hvc_opal_boot_priv.hvsi, opal_get_chars, opal_put_chars, + hvsilib_init(&hvc_opal_boot_priv.hvsi, + opal_get_chars, opal_put_chars_atomic, index, 1); hvsilib_establish(&hvc_opal_boot_priv.hvsi); } diff --git a/drivers/usb/host/ehci-ps3.c b/drivers/usb/host/ehci-ps3.c index 8c733492d8fe..454d8c624a3f 100644 --- a/drivers/usb/host/ehci-ps3.c +++ b/drivers/usb/host/ehci-ps3.c @@ -86,7 +86,7 @@ static int ps3_ehci_probe(struct ps3_system_bus_device *dev) int result; struct usb_hcd *hcd; unsigned int virq; - static u64 dummy_mask = DMA_BIT_MASK(32); + static u64 dummy_mask; if (usb_disabled()) { result = -ENODEV; @@ -131,7 +131,9 @@ static int ps3_ehci_probe(struct ps3_system_bus_device *dev) goto fail_irq; } - dev->core.dma_mask = &dummy_mask; /* FIXME: for improper usb code */ + dummy_mask = DMA_BIT_MASK(32); + dev->core.dma_mask = &dummy_mask; + dma_set_coherent_mask(&dev->core, dummy_mask); hcd = usb_create_hcd(&ps3_ehci_hc_driver, &dev->core, dev_name(&dev->core)); diff --git a/drivers/usb/host/ohci-ps3.c b/drivers/usb/host/ohci-ps3.c index 20a23d795adf..395f9d3bc849 100644 --- a/drivers/usb/host/ohci-ps3.c +++ b/drivers/usb/host/ohci-ps3.c @@ -69,7 +69,7 @@ static int ps3_ohci_probe(struct ps3_system_bus_device *dev) int result; struct usb_hcd *hcd; unsigned int virq; - static u64 dummy_mask = DMA_BIT_MASK(32); + static u64 dummy_mask; if (usb_disabled()) { result = -ENODEV; @@ -115,7 +115,9 @@ static int ps3_ohci_probe(struct ps3_system_bus_device *dev) goto fail_irq; } - dev->core.dma_mask = &dummy_mask; /* FIXME: for improper usb code */ + dummy_mask = DMA_BIT_MASK(32); + dev->core.dma_mask = &dummy_mask; + dma_set_coherent_mask(&dev->core, dummy_mask); hcd = usb_create_hcd(&ps3_ohci_hc_driver, &dev->core, dev_name(&dev->core)); diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 7cd63b0c1a46..96721b154454 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -211,44 +211,6 @@ static long tce_iommu_register_pages(struct tce_container *container, return 0; } -static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl, - struct mm_struct *mm) -{ - unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) * - tbl->it_size, PAGE_SIZE); - unsigned long *uas; - long ret; - - BUG_ON(tbl->it_userspace); - - ret = try_increment_locked_vm(mm, cb >> PAGE_SHIFT); - if (ret) - return ret; - - uas = vzalloc(cb); - if (!uas) { - decrement_locked_vm(mm, cb >> PAGE_SHIFT); - return -ENOMEM; - } - tbl->it_userspace = uas; - - return 0; -} - -static void tce_iommu_userspace_view_free(struct iommu_table *tbl, - struct mm_struct *mm) -{ - unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) * - tbl->it_size, PAGE_SIZE); - - if (!tbl->it_userspace) - return; - - vfree(tbl->it_userspace); - tbl->it_userspace = NULL; - decrement_locked_vm(mm, cb >> PAGE_SHIFT); -} - static bool tce_page_is_contained(struct page *page, unsigned page_shift) { /* @@ -482,20 +444,20 @@ static void tce_iommu_unuse_page_v2(struct tce_container *container, struct mm_iommu_table_group_mem_t *mem = NULL; int ret; unsigned long hpa = 0; - unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); if (!pua) return; - ret = tce_iommu_prereg_ua_to_hpa(container, *pua, tbl->it_page_shift, - &hpa, &mem); + ret = tce_iommu_prereg_ua_to_hpa(container, be64_to_cpu(*pua), + tbl->it_page_shift, &hpa, &mem); if (ret) - pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n", - __func__, *pua, entry, ret); + pr_debug("%s: tce %llx at #%lx was not cached, ret=%d\n", + __func__, be64_to_cpu(*pua), entry, ret); if (mem) mm_iommu_mapped_dec(mem); - *pua = 0; + *pua = cpu_to_be64(0); } static int tce_iommu_clear(struct tce_container *container, @@ -599,16 +561,9 @@ static long tce_iommu_build_v2(struct tce_container *container, unsigned long hpa; enum dma_data_direction dirtmp; - if (!tbl->it_userspace) { - ret = tce_iommu_userspace_view_alloc(tbl, container->mm); - if (ret) - return ret; - } - for (i = 0; i < pages; ++i) { struct mm_iommu_table_group_mem_t *mem = NULL; - unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, - entry + i); + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry + i); ret = tce_iommu_prereg_ua_to_hpa(container, tce, tbl->it_page_shift, &hpa, &mem); @@ -642,7 +597,7 @@ static long tce_iommu_build_v2(struct tce_container *container, if (dirtmp != DMA_NONE) tce_iommu_unuse_page_v2(container, tbl, entry + i); - *pua = tce; + *pua = cpu_to_be64(tce); tce += IOMMU_PAGE_SIZE(tbl); } @@ -676,7 +631,7 @@ static long tce_iommu_create_table(struct tce_container *container, page_shift, window_size, levels, ptbl); WARN_ON(!ret && !(*ptbl)->it_ops->free); - WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size)); + WARN_ON(!ret && ((*ptbl)->it_allocated_size > table_size)); return ret; } @@ -686,7 +641,6 @@ static void tce_iommu_free_table(struct tce_container *container, { unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT; - tce_iommu_userspace_view_free(tbl, container->mm); iommu_tce_table_put(tbl); decrement_locked_vm(container->mm, pages); } @@ -1201,7 +1155,6 @@ static void tce_iommu_release_ownership(struct tce_container *container, continue; tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); - tce_iommu_userspace_view_free(tbl, container->mm); if (tbl->it_map) iommu_release_ownership(tbl); |