diff options
Diffstat (limited to 'drivers/net/ethernet/netronome/nfp')
27 files changed, 1095 insertions, 276 deletions
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c index cb87fccb9f6a..2572a4b91c7c 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c @@ -43,8 +43,6 @@ #include "fw.h" #include "main.h" -#define cmsg_warn(bpf, msg...) nn_dp_warn(&(bpf)->app->ctrl->dp, msg) - #define NFP_BPF_TAG_ALLOC_SPAN (U16_MAX / 4) static bool nfp_bpf_all_tags_busy(struct nfp_app_bpf *bpf) @@ -441,7 +439,10 @@ void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb) } if (nfp_bpf_cmsg_get_type(skb) == CMSG_TYPE_BPF_EVENT) { - nfp_bpf_event_output(bpf, skb); + if (!nfp_bpf_event_output(bpf, skb->data, skb->len)) + dev_consume_skb_any(skb); + else + dev_kfree_skb_any(skb); return; } @@ -465,3 +466,21 @@ err_unlock: err_free: dev_kfree_skb_any(skb); } + +void +nfp_bpf_ctrl_msg_rx_raw(struct nfp_app *app, const void *data, unsigned int len) +{ + struct nfp_app_bpf *bpf = app->priv; + const struct cmsg_hdr *hdr = data; + + if (unlikely(len < sizeof(struct cmsg_reply_map_simple))) { + cmsg_warn(bpf, "cmsg drop - too short %d!\n", len); + return; + } + + if (hdr->type == CMSG_TYPE_BPF_EVENT) + nfp_bpf_event_output(bpf, data, len); + else + cmsg_warn(bpf, "cmsg drop - msg type %d with raw buffer!\n", + hdr->type); +} diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h index 4c7972e3db63..e4f9b7ec8528 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h @@ -51,6 +51,7 @@ enum bpf_cap_tlv_type { NFP_BPF_CAP_TYPE_MAPS = 3, NFP_BPF_CAP_TYPE_RANDOM = 4, NFP_BPF_CAP_TYPE_QUEUE_SELECT = 5, + NFP_BPF_CAP_TYPE_ADJUST_TAIL = 6, }; struct nfp_bpf_cap_tlv_func { diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 8a92088df0d7..eff57f7d056a 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -34,10 +34,11 @@ #define pr_fmt(fmt) "NFP net bpf: " fmt #include <linux/bug.h> -#include <linux/kernel.h> #include <linux/bpf.h> #include <linux/filter.h> +#include <linux/kernel.h> #include <linux/pkt_cls.h> +#include <linux/reciprocal_div.h> #include <linux/unistd.h> #include "main.h" @@ -416,6 +417,60 @@ emit_alu(struct nfp_prog *nfp_prog, swreg dst, } static void +__emit_mul(struct nfp_prog *nfp_prog, enum alu_dst_ab dst_ab, u16 areg, + enum mul_type type, enum mul_step step, u16 breg, bool swap, + bool wr_both, bool dst_lmextn, bool src_lmextn) +{ + u64 insn; + + insn = OP_MUL_BASE | + FIELD_PREP(OP_MUL_A_SRC, areg) | + FIELD_PREP(OP_MUL_B_SRC, breg) | + FIELD_PREP(OP_MUL_STEP, step) | + FIELD_PREP(OP_MUL_DST_AB, dst_ab) | + FIELD_PREP(OP_MUL_SW, swap) | + FIELD_PREP(OP_MUL_TYPE, type) | + FIELD_PREP(OP_MUL_WR_AB, wr_both) | + FIELD_PREP(OP_MUL_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_MUL_DST_LMEXTN, dst_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_mul(struct nfp_prog *nfp_prog, swreg lreg, enum mul_type type, + enum mul_step step, swreg rreg) +{ + struct nfp_insn_ur_regs reg; + u16 areg; + int err; + + if (type == MUL_TYPE_START && step != MUL_STEP_NONE) { + nfp_prog->error = -EINVAL; + return; + } + + if (step == MUL_LAST || step == MUL_LAST_2) { + /* When type is step and step Number is LAST or LAST2, left + * source is used as destination. + */ + err = swreg_to_unrestricted(lreg, reg_none(), rreg, ®); + areg = reg.dst; + } else { + err = swreg_to_unrestricted(reg_none(), lreg, rreg, ®); + areg = reg.areg; + } + + if (err) { + nfp_prog->error = err; + return; + } + + __emit_mul(nfp_prog, reg.dst_ab, areg, type, step, reg.breg, reg.swap, + reg.wr_both, reg.dst_lmextn, reg.src_lmextn); +} + +static void __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc, u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8, bool zero, bool swap, bool wr_both, @@ -670,7 +725,7 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) xfer_num = round_up(len, 4) / 4; if (src_40bit_addr) - addr40_offset(nfp_prog, meta->insn.src_reg, off, &src_base, + addr40_offset(nfp_prog, meta->insn.src_reg * 2, off, &src_base, &off); /* Setup PREV_ALU fields to override memory read length. */ @@ -1380,6 +1435,133 @@ static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out) SHF_SC_R_ROT, 16); } +static void +wrp_mul_u32(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg, + swreg rreg, bool gen_high_half) +{ + emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_1, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_2, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_3, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_4, rreg); + emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_32x32, MUL_LAST, reg_none()); + if (gen_high_half) + emit_mul(nfp_prog, dst_hi, MUL_TYPE_STEP_32x32, MUL_LAST_2, + reg_none()); + else + wrp_immed(nfp_prog, dst_hi, 0); +} + +static void +wrp_mul_u16(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg, + swreg rreg) +{ + emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_1, rreg); + emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_2, rreg); + emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_16x16, MUL_LAST, reg_none()); +} + +static int +wrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + bool gen_high_half, bool ropnd_from_reg) +{ + swreg multiplier, multiplicand, dst_hi, dst_lo; + const struct bpf_insn *insn = &meta->insn; + u32 lopnd_max, ropnd_max; + u8 dst_reg; + + dst_reg = insn->dst_reg; + multiplicand = reg_a(dst_reg * 2); + dst_hi = reg_both(dst_reg * 2 + 1); + dst_lo = reg_both(dst_reg * 2); + lopnd_max = meta->umax_dst; + if (ropnd_from_reg) { + multiplier = reg_b(insn->src_reg * 2); + ropnd_max = meta->umax_src; + } else { + u32 imm = insn->imm; + + multiplier = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); + ropnd_max = imm; + } + if (lopnd_max > U16_MAX || ropnd_max > U16_MAX) + wrp_mul_u32(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier, + gen_high_half); + else + wrp_mul_u16(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier); + + return 0; +} + +static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm) +{ + swreg dst_both = reg_both(dst), dst_a = reg_a(dst), dst_b = reg_a(dst); + struct reciprocal_value_adv rvalue; + u8 pre_shift, exp; + swreg magic; + + if (imm > U32_MAX) { + wrp_immed(nfp_prog, dst_both, 0); + return 0; + } + + /* NOTE: because we are using "reciprocal_value_adv" which doesn't + * support "divisor > (1u << 31)", we need to JIT separate NFP sequence + * to handle such case which actually equals to the result of unsigned + * comparison "dst >= imm" which could be calculated using the following + * NFP sequence: + * + * alu[--, dst, -, imm] + * immed[imm, 0] + * alu[dst, imm, +carry, 0] + * + */ + if (imm > 1U << 31) { + swreg tmp_b = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); + + emit_alu(nfp_prog, reg_none(), dst_a, ALU_OP_SUB, tmp_b); + wrp_immed(nfp_prog, imm_a(nfp_prog), 0); + emit_alu(nfp_prog, dst_both, imm_a(nfp_prog), ALU_OP_ADD_C, + reg_imm(0)); + return 0; + } + + rvalue = reciprocal_value_adv(imm, 32); + exp = rvalue.exp; + if (rvalue.is_wide_m && !(imm & 1)) { + pre_shift = fls(imm & -imm) - 1; + rvalue = reciprocal_value_adv(imm >> pre_shift, 32 - pre_shift); + } else { + pre_shift = 0; + } + magic = ur_load_imm_any(nfp_prog, rvalue.m, imm_b(nfp_prog)); + if (imm == 1U << exp) { + emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, + SHF_SC_R_SHF, exp); + } else if (rvalue.is_wide_m) { + wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), dst_a, + magic, true); + emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB, + imm_b(nfp_prog)); + emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, + SHF_SC_R_SHF, 1); + emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD, + imm_b(nfp_prog)); + emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, + SHF_SC_R_SHF, rvalue.sh - 1); + } else { + if (pre_shift) + emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, + dst_b, SHF_SC_R_SHF, pre_shift); + wrp_mul_u32(nfp_prog, dst_both, reg_none(), dst_a, magic, true); + emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, + dst_b, SHF_SC_R_SHF, rvalue.sh); + } + + return 0; +} + static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog); @@ -1460,6 +1642,51 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return 0; } +static int adjust_tail(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u32 ret_einval, end; + swreg plen, delta; + + BUILD_BUG_ON(plen_reg(nfp_prog) != reg_b(STATIC_REG_PKT_LEN)); + + plen = imm_a(nfp_prog); + delta = reg_a(2 * 2); + + ret_einval = nfp_prog_current_offset(nfp_prog) + 9; + end = nfp_prog_current_offset(nfp_prog) + 11; + + /* Calculate resulting length */ + emit_alu(nfp_prog, plen, plen_reg(nfp_prog), ALU_OP_ADD, delta); + /* delta == 0 is not allowed by the kernel, add must overflow to make + * length smaller. + */ + emit_br(nfp_prog, BR_BCC, ret_einval, 0); + + /* if (new_len < 14) then -EINVAL */ + emit_alu(nfp_prog, reg_none(), plen, ALU_OP_SUB, reg_imm(ETH_HLEN)); + emit_br(nfp_prog, BR_BMI, ret_einval, 0); + + emit_alu(nfp_prog, plen_reg(nfp_prog), + plen_reg(nfp_prog), ALU_OP_ADD, delta); + emit_alu(nfp_prog, pv_len(nfp_prog), + pv_len(nfp_prog), ALU_OP_ADD, delta); + + emit_br(nfp_prog, BR_UNC, end, 2); + wrp_immed(nfp_prog, reg_both(0), 0); + wrp_immed(nfp_prog, reg_both(1), 0); + + if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval)) + return -EINVAL; + + wrp_immed(nfp_prog, reg_both(0), -22); + wrp_immed(nfp_prog, reg_both(1), ~0); + + if (!nfp_prog_confirm_current_offset(nfp_prog, end)) + return -EINVAL; + + return 0; +} + static int map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { @@ -1684,6 +1911,31 @@ static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return 0; } +static int mul_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_mul(nfp_prog, meta, true, true); +} + +static int mul_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_mul(nfp_prog, meta, true, false); +} + +static int div_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + return wrp_div_imm(nfp_prog, insn->dst_reg * 2, insn->imm); +} + +static int div_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + /* NOTE: verifier hook has rejected cases for which verifier doesn't + * know whether the source operand is constant or not. + */ + return wrp_div_imm(nfp_prog, meta->insn.dst_reg * 2, meta->umin_src); +} + static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; @@ -1772,8 +2024,8 @@ static int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) u8 dst, src; dst = insn->dst_reg * 2; - umin = meta->umin; - umax = meta->umax; + umin = meta->umin_src; + umax = meta->umax_src; if (umin == umax) return __shl_imm64(nfp_prog, dst, umin); @@ -1881,8 +2133,8 @@ static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) u8 dst, src; dst = insn->dst_reg * 2; - umin = meta->umin; - umax = meta->umax; + umin = meta->umin_src; + umax = meta->umax_src; if (umin == umax) return __shr_imm64(nfp_prog, dst, umin); @@ -1995,8 +2247,8 @@ static int ashr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) u8 dst, src; dst = insn->dst_reg * 2; - umin = meta->umin; - umax = meta->umax; + umin = meta->umin_src; + umax = meta->umax_src; if (umin == umax) return __ashr_imm64(nfp_prog, dst, umin); @@ -2097,6 +2349,26 @@ static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm); } +static int mul_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_mul(nfp_prog, meta, false, true); +} + +static int mul_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_mul(nfp_prog, meta, false, false); +} + +static int div_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return div_reg64(nfp_prog, meta); +} + +static int div_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return div_imm64(nfp_prog, meta); +} + static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { u8 dst = meta->insn.dst_reg * 2; @@ -2814,6 +3086,8 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) switch (meta->insn.imm) { case BPF_FUNC_xdp_adjust_head: return adjust_head(nfp_prog, meta); + case BPF_FUNC_xdp_adjust_tail: + return adjust_tail(nfp_prog, meta); case BPF_FUNC_map_lookup_elem: case BPF_FUNC_map_update_elem: case BPF_FUNC_map_delete_elem: @@ -2848,6 +3122,10 @@ static const instr_cb_t instr_cb[256] = { [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64, [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64, [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64, + [BPF_ALU64 | BPF_MUL | BPF_X] = mul_reg64, + [BPF_ALU64 | BPF_MUL | BPF_K] = mul_imm64, + [BPF_ALU64 | BPF_DIV | BPF_X] = div_reg64, + [BPF_ALU64 | BPF_DIV | BPF_K] = div_imm64, [BPF_ALU64 | BPF_NEG] = neg_reg64, [BPF_ALU64 | BPF_LSH | BPF_X] = shl_reg64, [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64, @@ -2867,6 +3145,10 @@ static const instr_cb_t instr_cb[256] = { [BPF_ALU | BPF_ADD | BPF_K] = add_imm, [BPF_ALU | BPF_SUB | BPF_X] = sub_reg, [BPF_ALU | BPF_SUB | BPF_K] = sub_imm, + [BPF_ALU | BPF_MUL | BPF_X] = mul_reg, + [BPF_ALU | BPF_MUL | BPF_K] = mul_imm, + [BPF_ALU | BPF_DIV | BPF_X] = div_reg, + [BPF_ALU | BPF_DIV | BPF_K] = div_imm, [BPF_ALU | BPF_NEG] = neg_reg, [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, [BPF_ALU | BPF_END | BPF_X] = end_reg32, @@ -3299,7 +3581,8 @@ curr_pair_is_memcpy(struct nfp_insn_meta *ld_meta, if (!is_mbpf_load(ld_meta) || !is_mbpf_store(st_meta)) return false; - if (ld_meta->ptr.type != PTR_TO_PACKET) + if (ld_meta->ptr.type != PTR_TO_PACKET && + ld_meta->ptr.type != PTR_TO_MAP_VALUE) return false; if (st_meta->ptr.type != PTR_TO_PACKET) @@ -3647,6 +3930,7 @@ static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog) struct nfp_insn_meta *meta1, *meta2; struct nfp_bpf_map *nfp_map; struct bpf_map *map; + u32 id; nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { if (meta1->skip || meta2->skip) @@ -3658,11 +3942,14 @@ static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog) map = (void *)(unsigned long)((u32)meta1->insn.imm | (u64)meta2->insn.imm << 32); - if (bpf_map_offload_neutral(map)) - continue; - nfp_map = map_to_offmap(map)->dev_priv; + if (bpf_map_offload_neutral(map)) { + id = map->id; + } else { + nfp_map = map_to_offmap(map)->dev_priv; + id = nfp_map->tid; + } - meta1->insn.imm = nfp_map->tid; + meta1->insn.imm = id; meta2->insn.imm = 0; } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 40216d56dddc..970af07f4656 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -45,8 +45,8 @@ const struct rhashtable_params nfp_bpf_maps_neutral_params = { .nelem_hint = 4, - .key_len = FIELD_SIZEOF(struct nfp_bpf_neutral_map, ptr), - .key_offset = offsetof(struct nfp_bpf_neutral_map, ptr), + .key_len = FIELD_SIZEOF(struct bpf_map, id), + .key_offset = offsetof(struct nfp_bpf_neutral_map, map_id), .head_offset = offsetof(struct nfp_bpf_neutral_map, l), .automatic_shrinking = true, }; @@ -66,26 +66,19 @@ nfp_bpf_xdp_offload(struct nfp_app *app, struct nfp_net *nn, struct bpf_prog *prog, struct netlink_ext_ack *extack) { bool running, xdp_running; - int ret; if (!nfp_net_ebpf_capable(nn)) return -EINVAL; running = nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF; - xdp_running = running && nn->dp.bpf_offload_xdp; + xdp_running = running && nn->xdp_hw.prog; if (!prog && !xdp_running) return 0; if (prog && running && !xdp_running) return -EBUSY; - ret = nfp_net_bpf_offload(nn, prog, running, extack); - /* Stop offload if replace not possible */ - if (ret) - return ret; - - nn->dp.bpf_offload_xdp = !!prog; - return ret; + return nfp_net_bpf_offload(nn, prog, running, extack); } static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn) @@ -202,14 +195,11 @@ static int nfp_bpf_setup_tc_block(struct net_device *netdev, if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) return -EOPNOTSUPP; - if (tcf_block_shared(f->block)) - return -EOPNOTSUPP; - switch (f->command) { case TC_BLOCK_BIND: return tcf_block_cb_register(f->block, nfp_bpf_setup_tc_block_cb, - nn, nn); + nn, nn, f->extack); case TC_BLOCK_UNBIND: tcf_block_cb_unregister(f->block, nfp_bpf_setup_tc_block_cb, @@ -344,6 +334,14 @@ nfp_bpf_parse_cap_qsel(struct nfp_app_bpf *bpf, void __iomem *value, u32 length) return 0; } +static int +nfp_bpf_parse_cap_adjust_tail(struct nfp_app_bpf *bpf, void __iomem *value, + u32 length) +{ + bpf->adjust_tail = true; + return 0; +} + static int nfp_bpf_parse_capabilities(struct nfp_app *app) { struct nfp_cpp *cpp = app->pf->cpp; @@ -390,6 +388,11 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app) if (nfp_bpf_parse_cap_qsel(app->priv, value, length)) goto err_release_free; break; + case NFP_BPF_CAP_TYPE_ADJUST_TAIL: + if (nfp_bpf_parse_cap_adjust_tail(app->priv, value, + length)) + goto err_release_free; + break; default: nfp_dbg(cpp, "unknown BPF capability: %d\n", type); break; @@ -411,6 +414,20 @@ err_release_free: return -EINVAL; } +static int nfp_bpf_ndo_init(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_app_bpf *bpf = app->priv; + + return bpf_offload_dev_netdev_register(bpf->bpf_dev, netdev); +} + +static void nfp_bpf_ndo_uninit(struct nfp_app *app, struct net_device *netdev) +{ + struct nfp_app_bpf *bpf = app->priv; + + bpf_offload_dev_netdev_unregister(bpf->bpf_dev, netdev); +} + static int nfp_bpf_init(struct nfp_app *app) { struct nfp_app_bpf *bpf; @@ -434,6 +451,11 @@ static int nfp_bpf_init(struct nfp_app *app) if (err) goto err_free_neutral_maps; + bpf->bpf_dev = bpf_offload_dev_create(); + err = PTR_ERR_OR_ZERO(bpf->bpf_dev); + if (err) + goto err_free_neutral_maps; + return 0; err_free_neutral_maps: @@ -452,6 +474,7 @@ static void nfp_bpf_clean(struct nfp_app *app) { struct nfp_app_bpf *bpf = app->priv; + bpf_offload_dev_destroy(bpf->bpf_dev); WARN_ON(!skb_queue_empty(&bpf->cmsg_replies)); WARN_ON(!list_empty(&bpf->map_list)); WARN_ON(bpf->maps_in_use || bpf->map_elems_in_use); @@ -473,10 +496,14 @@ const struct nfp_app_type app_bpf = { .extra_cap = nfp_bpf_extra_cap, + .ndo_init = nfp_bpf_ndo_init, + .ndo_uninit = nfp_bpf_ndo_uninit, + .vnic_alloc = nfp_bpf_vnic_alloc, .vnic_free = nfp_bpf_vnic_free, .ctrl_msg_rx = nfp_bpf_ctrl_msg_rx, + .ctrl_msg_rx_raw = nfp_bpf_ctrl_msg_rx_raw, .setup_tc = nfp_bpf_setup_tc, .bpf = nfp_ndo_bpf, diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 654fe7823e5e..dbd00982fd2b 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -47,6 +47,8 @@ #include "../nfp_asm.h" #include "fw.h" +#define cmsg_warn(bpf, msg...) nn_dp_warn(&(bpf)->app->ctrl->dp, msg) + /* For relocation logic use up-most byte of branch instruction as scratch * area. Remember to clear this before sending instructions to HW! */ @@ -110,6 +112,8 @@ enum pkt_vec { * struct nfp_app_bpf - bpf app priv structure * @app: backpointer to the app * + * @bpf_dev: BPF offload device handle + * * @tag_allocator: bitmap of control message tags in use * @tag_alloc_next: next tag bit to allocate * @tag_alloc_last: next tag bit to be freed @@ -146,10 +150,13 @@ enum pkt_vec { * * @pseudo_random: FW initialized the pseudo-random machinery (CSRs) * @queue_select: BPF can set the RX queue ID in packet vector + * @adjust_tail: BPF can simply trunc packet size for adjust tail */ struct nfp_app_bpf { struct nfp_app *app; + struct bpf_offload_dev *bpf_dev; + DECLARE_BITMAP(tag_allocator, U16_MAX + 1); u16 tag_alloc_next; u16 tag_alloc_last; @@ -189,6 +196,7 @@ struct nfp_app_bpf { bool pseudo_random; bool queue_select; + bool adjust_tail; }; enum nfp_bpf_map_use { @@ -217,6 +225,7 @@ struct nfp_bpf_map { struct nfp_bpf_neutral_map { struct rhash_head l; struct bpf_map *ptr; + u32 map_id; u32 count; }; @@ -263,8 +272,10 @@ struct nfp_bpf_reg_state { * @func_id: function id for call instructions * @arg1: arg1 for call instructions * @arg2: arg2 for call instructions - * @umin: copy of core verifier umin_value. - * @umax: copy of core verifier umax_value. + * @umin_src: copy of core verifier umin_value for src opearnd. + * @umax_src: copy of core verifier umax_value for src operand. + * @umin_dst: copy of core verifier umin_value for dst opearnd. + * @umax_dst: copy of core verifier umax_value for dst operand. * @off: index of first generated machine instruction (in nfp_prog.prog) * @n: eBPF instruction number * @flags: eBPF instruction extra optimization flags @@ -300,12 +311,15 @@ struct nfp_insn_meta { struct bpf_reg_state arg1; struct nfp_bpf_reg_state arg2; }; - /* We are interested in range info for some operands, - * for example, the shift amount. + /* We are interested in range info for operands of ALU + * operations. For example, shift amount, multiplicand and + * multiplier etc. */ struct { - u64 umin; - u64 umax; + u64 umin_src; + u64 umax_src; + u64 umin_dst; + u64 umax_dst; }; }; unsigned int off; @@ -339,6 +353,11 @@ static inline u8 mbpf_mode(const struct nfp_insn_meta *meta) return BPF_MODE(meta->insn.code); } +static inline bool is_mbpf_alu(const struct nfp_insn_meta *meta) +{ + return mbpf_class(meta) == BPF_ALU64 || mbpf_class(meta) == BPF_ALU; +} + static inline bool is_mbpf_load(const struct nfp_insn_meta *meta) { return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM); @@ -384,23 +403,14 @@ static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta) return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_XADD); } -static inline bool is_mbpf_indir_shift(const struct nfp_insn_meta *meta) +static inline bool is_mbpf_mul(const struct nfp_insn_meta *meta) { - u8 code = meta->insn.code; - bool is_alu, is_shift; - u8 opclass, opcode; - - opclass = BPF_CLASS(code); - is_alu = opclass == BPF_ALU64 || opclass == BPF_ALU; - if (!is_alu) - return false; - - opcode = BPF_OP(code); - is_shift = opcode == BPF_LSH || opcode == BPF_RSH || opcode == BPF_ARSH; - if (!is_shift) - return false; + return is_mbpf_alu(meta) && mbpf_op(meta) == BPF_MUL; +} - return BPF_SRC(code) == BPF_X; +static inline bool is_mbpf_div(const struct nfp_insn_meta *meta) +{ + return is_mbpf_alu(meta) && mbpf_op(meta) == BPF_DIV; } /** @@ -496,7 +506,11 @@ int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap, int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap, void *key, void *next_key); -int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb); +int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data, + unsigned int len); void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb); +void +nfp_bpf_ctrl_msg_rx_raw(struct nfp_app *app, const void *data, + unsigned int len); #endif diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 7eae4c0266f8..1ccd6371a15b 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -67,7 +67,7 @@ nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog, ASSERT_RTNL(); /* Reuse path - other offloaded program is already tracking this map. */ - record = rhashtable_lookup_fast(&bpf->maps_neutral, &map, + record = rhashtable_lookup_fast(&bpf->maps_neutral, &map->id, nfp_bpf_maps_neutral_params); if (record) { nfp_prog->map_records[nfp_prog->map_records_cnt++] = record; @@ -89,6 +89,7 @@ nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog, } record->ptr = map; + record->map_id = map->id; record->count = 1; err = rhashtable_insert_fast(&bpf->maps_neutral, &record->l, @@ -190,8 +191,10 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, meta->insn = prog[i]; meta->n = i; - if (is_mbpf_indir_shift(meta)) - meta->umin = U64_MAX; + if (is_mbpf_alu(meta)) { + meta->umin_src = U64_MAX; + meta->umin_dst = U64_MAX; + } list_add_tail(&meta->l, &nfp_prog->insns); } @@ -377,11 +380,23 @@ nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap) bpf->maps.max_elems - bpf->map_elems_in_use); return -ENOMEM; } - if (offmap->map.key_size > bpf->maps.max_key_sz || - offmap->map.value_size > bpf->maps.max_val_sz || - round_up(offmap->map.key_size, 8) + + + if (round_up(offmap->map.key_size, 8) + round_up(offmap->map.value_size, 8) > bpf->maps.max_elem_sz) { - pr_info("elements don't fit in device constraints\n"); + pr_info("map elements too large: %u, FW max element size (key+value): %u\n", + round_up(offmap->map.key_size, 8) + + round_up(offmap->map.value_size, 8), + bpf->maps.max_elem_sz); + return -ENOMEM; + } + if (offmap->map.key_size > bpf->maps.max_key_sz) { + pr_info("map key size %u, FW max is %u\n", + offmap->map.key_size, bpf->maps.max_key_sz); + return -ENOMEM; + } + if (offmap->map.value_size > bpf->maps.max_val_sz) { + pr_info("map value size %u, FW max is %u\n", + offmap->map.value_size, bpf->maps.max_val_sz); return -ENOMEM; } @@ -451,43 +466,43 @@ nfp_bpf_perf_event_copy(void *dst, const void *src, return 0; } -int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb) +int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data, + unsigned int len) { - struct cmsg_bpf_event *cbe = (void *)skb->data; - u32 pkt_size, data_size; - struct bpf_map *map; + struct cmsg_bpf_event *cbe = (void *)data; + struct nfp_bpf_neutral_map *record; + u32 pkt_size, data_size, map_id; + u64 map_id_full; - if (skb->len < sizeof(struct cmsg_bpf_event)) - goto err_drop; + if (len < sizeof(struct cmsg_bpf_event)) + return -EINVAL; pkt_size = be32_to_cpu(cbe->pkt_size); data_size = be32_to_cpu(cbe->data_size); - map = (void *)(unsigned long)be64_to_cpu(cbe->map_ptr); + map_id_full = be64_to_cpu(cbe->map_ptr); + map_id = map_id_full; - if (skb->len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size) - goto err_drop; + if (len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size) + return -EINVAL; if (cbe->hdr.ver != CMSG_MAP_ABI_VERSION) - goto err_drop; + return -EINVAL; rcu_read_lock(); - if (!rhashtable_lookup_fast(&bpf->maps_neutral, &map, - nfp_bpf_maps_neutral_params)) { + record = rhashtable_lookup_fast(&bpf->maps_neutral, &map_id, + nfp_bpf_maps_neutral_params); + if (!record || map_id_full > U32_MAX) { rcu_read_unlock(); - pr_warn("perf event: dest map pointer %px not recognized, dropping event\n", - map); - goto err_drop; + cmsg_warn(bpf, "perf event: map id %lld (0x%llx) not recognized, dropping event\n", + map_id_full, map_id_full); + return -EINVAL; } - bpf_event_output(map, be32_to_cpu(cbe->cpu_id), + bpf_event_output(record->ptr, be32_to_cpu(cbe->cpu_id), &cbe->data[round_up(pkt_size, 4)], data_size, cbe->data, pkt_size, nfp_bpf_perf_event_copy); rcu_read_unlock(); - dev_consume_skb_any(skb); return 0; -err_drop: - dev_kfree_skb_any(skb); - return -EINVAL; } static int @@ -564,14 +579,8 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, { int err; - if (prog) { - struct bpf_prog_offload *offload = prog->aux->offload; - - if (!offload) - return -EINVAL; - if (offload->netdev != nn->dp.netdev) - return -EINVAL; - } + if (prog && !bpf_offload_dev_match(prog, nn->dp.netdev)) + return -EINVAL; if (prog && old_prog) { u8 cap; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 4bfeba7b21b2..a6e9248669e1 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -178,6 +178,13 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env, nfp_record_adjust_head(bpf, nfp_prog, meta, reg2); break; + case BPF_FUNC_xdp_adjust_tail: + if (!bpf->adjust_tail) { + pr_vlog(env, "adjust_tail not supported by FW\n"); + return -EOPNOTSUPP; + } + break; + case BPF_FUNC_map_lookup_elem: if (!nfp_bpf_map_call_ok("map_lookup", env, meta, bpf->helpers.map_lookup, reg1) || @@ -517,6 +524,82 @@ nfp_bpf_check_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, } static int +nfp_bpf_check_alu(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + struct bpf_verifier_env *env) +{ + const struct bpf_reg_state *sreg = + cur_regs(env) + meta->insn.src_reg; + const struct bpf_reg_state *dreg = + cur_regs(env) + meta->insn.dst_reg; + + meta->umin_src = min(meta->umin_src, sreg->umin_value); + meta->umax_src = max(meta->umax_src, sreg->umax_value); + meta->umin_dst = min(meta->umin_dst, dreg->umin_value); + meta->umax_dst = max(meta->umax_dst, dreg->umax_value); + + /* NFP supports u16 and u32 multiplication. + * + * For ALU64, if either operand is beyond u32's value range, we reject + * it. One thing to note, if the source operand is BPF_K, then we need + * to check "imm" field directly, and we'd reject it if it is negative. + * Because for ALU64, "imm" (with s32 type) is expected to be sign + * extended to s64 which NFP mul doesn't support. + * + * For ALU32, it is fine for "imm" be negative though, because the + * result is 32-bits and there is no difference on the low halve of + * the result for signed/unsigned mul, so we will get correct result. + */ + if (is_mbpf_mul(meta)) { + if (meta->umax_dst > U32_MAX) { + pr_vlog(env, "multiplier is not within u32 value range\n"); + return -EINVAL; + } + if (mbpf_src(meta) == BPF_X && meta->umax_src > U32_MAX) { + pr_vlog(env, "multiplicand is not within u32 value range\n"); + return -EINVAL; + } + if (mbpf_class(meta) == BPF_ALU64 && + mbpf_src(meta) == BPF_K && meta->insn.imm < 0) { + pr_vlog(env, "sign extended multiplicand won't be within u32 value range\n"); + return -EINVAL; + } + } + + /* NFP doesn't have divide instructions, we support divide by constant + * through reciprocal multiplication. Given NFP support multiplication + * no bigger than u32, we'd require divisor and dividend no bigger than + * that as well. + * + * Also eBPF doesn't support signed divide and has enforced this on C + * language level by failing compilation. However LLVM assembler hasn't + * enforced this, so it is possible for negative constant to leak in as + * a BPF_K operand through assembly code, we reject such cases as well. + */ + if (is_mbpf_div(meta)) { + if (meta->umax_dst > U32_MAX) { + pr_vlog(env, "dividend is not within u32 value range\n"); + return -EINVAL; + } + if (mbpf_src(meta) == BPF_X) { + if (meta->umin_src != meta->umax_src) { + pr_vlog(env, "divisor is not constant\n"); + return -EINVAL; + } + if (meta->umax_src > U32_MAX) { + pr_vlog(env, "divisor is not within u32 value range\n"); + return -EINVAL; + } + } + if (mbpf_src(meta) == BPF_K && meta->insn.imm < 0) { + pr_vlog(env, "divide by negative constant is not supported\n"); + return -EINVAL; + } + } + + return 0; +} + +static int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) { struct nfp_prog *nfp_prog = env->prog->aux->offload->dev_priv; @@ -551,13 +634,8 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) if (is_mbpf_xadd(meta)) return nfp_bpf_check_xadd(nfp_prog, meta, env); - if (is_mbpf_indir_shift(meta)) { - const struct bpf_reg_state *sreg = - cur_regs(env) + meta->insn.src_reg; - - meta->umin = min(meta->umin, sreg->umin_value); - meta->umax = max(meta->umax, sreg->umax_value); - } + if (is_mbpf_alu(meta)) + return nfp_bpf_check_alu(nfp_prog, meta, env); return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 4a6d2db75071..0ba0356ec4e6 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -32,8 +32,10 @@ */ #include <linux/bitfield.h> +#include <net/geneve.h> #include <net/pkt_cls.h> #include <net/switchdev.h> +#include <net/tc_act/tc_csum.h> #include <net/tc_act/tc_gact.h> #include <net/tc_act/tc_mirred.h> #include <net/tc_act/tc_pedit.h> @@ -44,6 +46,16 @@ #include "main.h" #include "../nfp_net_repr.h" +/* The kernel versions of TUNNEL_* are not ABI and therefore vulnerable + * to change. Such changes will break our FW ABI. + */ +#define NFP_FL_TUNNEL_CSUM cpu_to_be16(0x01) +#define NFP_FL_TUNNEL_KEY cpu_to_be16(0x04) +#define NFP_FL_TUNNEL_GENEVE_OPT cpu_to_be16(0x0800) +#define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS (NFP_FL_TUNNEL_CSUM | \ + NFP_FL_TUNNEL_KEY | \ + NFP_FL_TUNNEL_GENEVE_OPT) + static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan) { size_t act_size = sizeof(struct nfp_fl_pop_vlan); @@ -226,7 +238,71 @@ static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len) } static int -nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, +nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len, + const struct tc_action *action) +{ + struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action); + int opt_len, opt_cnt, act_start, tot_push_len; + u8 *src = ip_tunnel_info_opts(ip_tun); + + /* We need to populate the options in reverse order for HW. + * Therefore we go through the options, calculating the + * number of options and the total size, then we populate + * them in reverse order in the action list. + */ + opt_cnt = 0; + tot_push_len = 0; + opt_len = ip_tun->options_len; + while (opt_len > 0) { + struct geneve_opt *opt = (struct geneve_opt *)src; + + opt_cnt++; + if (opt_cnt > NFP_FL_MAX_GENEVE_OPT_CNT) + return -EOPNOTSUPP; + + tot_push_len += sizeof(struct nfp_fl_push_geneve) + + opt->length * 4; + if (tot_push_len > NFP_FL_MAX_GENEVE_OPT_ACT) + return -EOPNOTSUPP; + + opt_len -= sizeof(struct geneve_opt) + opt->length * 4; + src += sizeof(struct geneve_opt) + opt->length * 4; + } + + if (*list_len + tot_push_len > NFP_FL_MAX_A_SIZ) + return -EOPNOTSUPP; + + act_start = *list_len; + *list_len += tot_push_len; + src = ip_tunnel_info_opts(ip_tun); + while (opt_cnt) { + struct geneve_opt *opt = (struct geneve_opt *)src; + struct nfp_fl_push_geneve *push; + size_t act_size, len; + + opt_cnt--; + act_size = sizeof(struct nfp_fl_push_geneve) + opt->length * 4; + tot_push_len -= act_size; + len = act_start + tot_push_len; + + push = (struct nfp_fl_push_geneve *)&nfp_fl->action_data[len]; + push->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_GENEVE; + push->head.len_lw = act_size >> NFP_FL_LW_SIZ; + push->reserved = 0; + push->class = opt->opt_class; + push->type = opt->type; + push->length = opt->length; + memcpy(&push->opt_data, opt->opt_data, opt->length * 4); + + src += sizeof(struct geneve_opt) + opt->length * 4; + } + + return 0; +} + +static int +nfp_fl_set_ipv4_udp_tun(struct nfp_app *app, + struct nfp_fl_set_ipv4_udp_tun *set_tun, const struct tc_action *action, struct nfp_fl_pre_tunnel *pre_tun, enum nfp_flower_tun_type tun_type, @@ -234,16 +310,19 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, { size_t act_size = sizeof(struct nfp_fl_set_ipv4_udp_tun); struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action); + struct nfp_flower_priv *priv = app->priv; u32 tmp_set_ip_tun_type_index = 0; /* Currently support one pre-tunnel so index is always 0. */ int pretun_idx = 0; - struct net *net; - if (ip_tun->options_len) + BUILD_BUG_ON(NFP_FL_TUNNEL_CSUM != TUNNEL_CSUM || + NFP_FL_TUNNEL_KEY != TUNNEL_KEY || + NFP_FL_TUNNEL_GENEVE_OPT != TUNNEL_GENEVE_OPT); + if (ip_tun->options_len && + (tun_type != NFP_FL_TUNNEL_GENEVE || + !(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT))) return -EOPNOTSUPP; - net = dev_net(netdev); - set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL; set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ; @@ -254,7 +333,42 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index); set_tun->tun_id = ip_tun->key.tun_id; - set_tun->ttl = net->ipv4.sysctl_ip_default_ttl; + + if (ip_tun->key.ttl) { + set_tun->ttl = ip_tun->key.ttl; + } else { + struct net *net = dev_net(netdev); + struct flowi4 flow = {}; + struct rtable *rt; + int err; + + /* Do a route lookup to determine ttl - if fails then use + * default. Note that CONFIG_INET is a requirement of + * CONFIG_NET_SWITCHDEV so must be defined here. + */ + flow.daddr = ip_tun->key.u.ipv4.dst; + flow.flowi4_proto = IPPROTO_UDP; + rt = ip_route_output_key(net, &flow); + err = PTR_ERR_OR_ZERO(rt); + if (!err) { + set_tun->ttl = ip4_dst_hoplimit(&rt->dst); + ip_rt_put(rt); + } else { + set_tun->ttl = net->ipv4.sysctl_ip_default_ttl; + } + } + + set_tun->tos = ip_tun->key.tos; + + if (!(ip_tun->key.tun_flags & NFP_FL_TUNNEL_KEY) || + ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS) + return -EOPNOTSUPP; + set_tun->tun_flags = ip_tun->key.tun_flags; + + if (tun_type == NFP_FL_TUNNEL_GENEVE) { + set_tun->tun_proto = htons(ETH_P_TEB); + set_tun->tun_len = ip_tun->options_len / 4; + } /* Complete pre_tunnel action. */ pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst; @@ -398,8 +512,27 @@ nfp_fl_set_tport(const struct tc_action *action, int idx, u32 off, return 0; } +static u32 nfp_fl_csum_l4_to_flag(u8 ip_proto) +{ + switch (ip_proto) { + case 0: + /* Filter doesn't force proto match, + * both TCP and UDP will be updated if encountered + */ + return TCA_CSUM_UPDATE_FLAG_TCP | TCA_CSUM_UPDATE_FLAG_UDP; + case IPPROTO_TCP: + return TCA_CSUM_UPDATE_FLAG_TCP; + case IPPROTO_UDP: + return TCA_CSUM_UPDATE_FLAG_UDP; + default: + /* All other protocols will be ignored by FW */ + return 0; + } +} + static int -nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) +nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow, + char *nfp_action, int *a_len, u32 *csum_updated) { struct nfp_fl_set_ipv6_addr set_ip6_dst, set_ip6_src; struct nfp_fl_set_ip4_addrs set_ip_addr; @@ -409,6 +542,7 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) int idx, nkeys, err; size_t act_size; u32 offset, cmd; + u8 ip_proto = 0; memset(&set_ip6_dst, 0, sizeof(set_ip6_dst)); memset(&set_ip6_src, 0, sizeof(set_ip6_src)); @@ -451,6 +585,15 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) return err; } + if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_dissector_key_basic *basic; + + basic = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_BASIC, + flow->key); + ip_proto = basic->ip_proto; + } + if (set_eth.head.len_lw) { act_size = sizeof(set_eth); memcpy(nfp_action, &set_eth, act_size); @@ -459,6 +602,10 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) act_size = sizeof(set_ip_addr); memcpy(nfp_action, &set_ip_addr, act_size); *a_len += act_size; + + /* Hardware will automatically fix IPv4 and TCP/UDP checksum. */ + *csum_updated |= TCA_CSUM_UPDATE_FLAG_IPV4HDR | + nfp_fl_csum_l4_to_flag(ip_proto); } else if (set_ip6_dst.head.len_lw && set_ip6_src.head.len_lw) { /* TC compiles set src and dst IPv6 address as a single action, * the hardware requires this to be 2 separate actions. @@ -471,18 +618,30 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) memcpy(&nfp_action[sizeof(set_ip6_src)], &set_ip6_dst, act_size); *a_len += act_size; + + /* Hardware will automatically fix TCP/UDP checksum. */ + *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); } else if (set_ip6_dst.head.len_lw) { act_size = sizeof(set_ip6_dst); memcpy(nfp_action, &set_ip6_dst, act_size); *a_len += act_size; + + /* Hardware will automatically fix TCP/UDP checksum. */ + *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); } else if (set_ip6_src.head.len_lw) { act_size = sizeof(set_ip6_src); memcpy(nfp_action, &set_ip6_src, act_size); *a_len += act_size; + + /* Hardware will automatically fix TCP/UDP checksum. */ + *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); } else if (set_tport.head.len_lw) { act_size = sizeof(set_tport); memcpy(nfp_action, &set_tport, act_size); *a_len += act_size; + + /* Hardware will automatically fix TCP/UDP checksum. */ + *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); } return 0; @@ -493,12 +652,18 @@ nfp_flower_output_action(struct nfp_app *app, const struct tc_action *a, struct nfp_fl_payload *nfp_fl, int *a_len, struct net_device *netdev, bool last, enum nfp_flower_tun_type *tun_type, int *tun_out_cnt, - int *out_cnt) + int *out_cnt, u32 *csum_updated) { struct nfp_flower_priv *priv = app->priv; struct nfp_fl_output *output; int err, prelag_size; + /* If csum_updated has not been reset by now, it means HW will + * incorrectly update csums when they are not requested. + */ + if (*csum_updated) + return -EOPNOTSUPP; + if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ) return -EOPNOTSUPP; @@ -529,10 +694,11 @@ nfp_flower_output_action(struct nfp_app *app, const struct tc_action *a, static int nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a, + struct tc_cls_flower_offload *flow, struct nfp_fl_payload *nfp_fl, int *a_len, struct net_device *netdev, enum nfp_flower_tun_type *tun_type, int *tun_out_cnt, - int *out_cnt) + int *out_cnt, u32 *csum_updated) { struct nfp_fl_set_ipv4_udp_tun *set_tun; struct nfp_fl_pre_tunnel *pre_tun; @@ -545,14 +711,14 @@ nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a, } else if (is_tcf_mirred_egress_redirect(a)) { err = nfp_flower_output_action(app, a, nfp_fl, a_len, netdev, true, tun_type, tun_out_cnt, - out_cnt); + out_cnt, csum_updated); if (err) return err; } else if (is_tcf_mirred_egress_mirror(a)) { err = nfp_flower_output_action(app, a, nfp_fl, a_len, netdev, false, tun_type, tun_out_cnt, - out_cnt); + out_cnt, csum_updated); if (err) return err; @@ -592,9 +758,13 @@ nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a, nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); *a_len += sizeof(struct nfp_fl_pre_tunnel); + err = nfp_fl_push_geneve_options(nfp_fl, a_len, a); + if (err) + return err; + set_tun = (void *)&nfp_fl->action_data[*a_len]; - err = nfp_fl_set_ipv4_udp_tun(set_tun, a, pre_tun, *tun_type, - netdev); + err = nfp_fl_set_ipv4_udp_tun(app, set_tun, a, pre_tun, + *tun_type, netdev); if (err) return err; *a_len += sizeof(struct nfp_fl_set_ipv4_udp_tun); @@ -602,8 +772,17 @@ nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a, /* Tunnel decap is handled by default so accept action. */ return 0; } else if (is_tcf_pedit(a)) { - if (nfp_fl_pedit(a, &nfp_fl->action_data[*a_len], a_len)) + if (nfp_fl_pedit(a, flow, &nfp_fl->action_data[*a_len], + a_len, csum_updated)) + return -EOPNOTSUPP; + } else if (is_tcf_csum(a)) { + /* csum action requests recalc of something we have not fixed */ + if (tcf_csum_update_flags(a) & ~*csum_updated) return -EOPNOTSUPP; + /* If we will correctly fix the csum we can remove it from the + * csum update list. Which will later be used to check support. + */ + *csum_updated &= ~tcf_csum_update_flags(a); } else { /* Currently we do not handle any other actions. */ return -EOPNOTSUPP; @@ -620,6 +799,7 @@ int nfp_flower_compile_action(struct nfp_app *app, int act_len, act_cnt, err, tun_out_cnt, out_cnt; enum nfp_flower_tun_type tun_type; const struct tc_action *a; + u32 csum_updated = 0; LIST_HEAD(actions); memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ); @@ -632,8 +812,9 @@ int nfp_flower_compile_action(struct nfp_app *app, tcf_exts_to_list(flow->exts, &actions); list_for_each_entry(a, &actions, list) { - err = nfp_flower_loop_action(app, a, nfp_flow, &act_len, netdev, - &tun_type, &tun_out_cnt, &out_cnt); + err = nfp_flower_loop_action(app, a, flow, nfp_flow, &act_len, + netdev, &tun_type, &tun_out_cnt, + &out_cnt, &csum_updated); if (err) return err; act_cnt++; diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 4a7f3510a296..325954b829c8 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -37,6 +37,7 @@ #include <linux/bitfield.h> #include <linux/skbuff.h> #include <linux/types.h> +#include <net/geneve.h> #include "../nfp_app.h" #include "../nfpcore/nfp_cpp.h" @@ -51,6 +52,7 @@ #define NFP_FLOWER_LAYER_VXLAN BIT(7) #define NFP_FLOWER_LAYER2_GENEVE BIT(5) +#define NFP_FLOWER_LAYER2_GENEVE_OP BIT(6) #define NFP_FLOWER_MASK_VLAN_PRIO GENMASK(15, 13) #define NFP_FLOWER_MASK_VLAN_CFI BIT(12) @@ -81,6 +83,11 @@ #define NFP_FL_MAX_A_SIZ 1216 #define NFP_FL_LW_SIZ 2 +/* Maximum allowed geneve options */ +#define NFP_FL_MAX_GENEVE_OPT_ACT 32 +#define NFP_FL_MAX_GENEVE_OPT_CNT 64 +#define NFP_FL_MAX_GENEVE_OPT_KEY 32 + /* Action opcodes */ #define NFP_FL_ACTION_OPCODE_OUTPUT 0 #define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1 @@ -94,6 +101,7 @@ #define NFP_FL_ACTION_OPCODE_SET_TCP 15 #define NFP_FL_ACTION_OPCODE_PRE_LAG 16 #define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17 +#define NFP_FL_ACTION_OPCODE_PUSH_GENEVE 26 #define NFP_FL_ACTION_OPCODE_NUM 32 #define NFP_FL_OUT_FLAGS_LAST BIT(15) @@ -203,10 +211,22 @@ struct nfp_fl_set_ipv4_udp_tun { __be16 reserved; __be64 tun_id __packed; __be32 tun_type_index; - __be16 reserved2; + __be16 tun_flags; u8 ttl; - u8 reserved3; - __be32 extra[2]; + u8 tos; + __be32 extra; + u8 tun_len; + u8 res2; + __be16 tun_proto; +}; + +struct nfp_fl_push_geneve { + struct nfp_fl_act_head head; + __be16 reserved; + __be16 class; + u8 type; + u8 length; + u8 opt_data[]; }; /* Metadata with L2 (1W/4B) @@ -346,7 +366,7 @@ struct nfp_flower_ipv6 { * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | ipv4_addr_dst | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | Reserved | + * | Reserved | tos | ttl | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Reserved | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -356,10 +376,17 @@ struct nfp_flower_ipv6 { struct nfp_flower_ipv4_udp_tun { __be32 ip_src; __be32 ip_dst; - __be32 reserved[2]; + __be16 reserved1; + u8 tos; + u8 ttl; + __be32 reserved2; __be32 tun_id; }; +struct nfp_flower_geneve_options { + u8 data[NFP_FL_MAX_GENEVE_OPT_KEY]; +}; + #define NFP_FL_TUN_VNI_OFFSET 8 /* The base header for a control message packet. diff --git a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c index 0c4c957717ea..bf10598f66ae 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c @@ -564,8 +564,9 @@ nfp_fl_lag_changeupper_event(struct nfp_fl_lag *lag, if (lag_upper_info && lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH || - (lag_upper_info->hash_type != NETDEV_LAG_HASH_L34 && - lag_upper_info->hash_type != NETDEV_LAG_HASH_E34))) { + (lag_upper_info->hash_type != NETDEV_LAG_HASH_L34 && + lag_upper_info->hash_type != NETDEV_LAG_HASH_E34 && + lag_upper_info->hash_type != NETDEV_LAG_HASH_UNKNOWN))) { can_offload = false; nfp_flower_cmsg_warn(priv->app, "Unable to offload tx_type %u hash %u\n", diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index bbe5764d26cb..85f8209bf007 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -69,11 +69,12 @@ struct nfp_app; /* Extra features bitmap. */ #define NFP_FL_FEATS_GENEVE BIT(0) #define NFP_FL_NBI_MTU_SETTING BIT(1) +#define NFP_FL_FEATS_GENEVE_OPT BIT(2) #define NFP_FL_FEATS_LAG BIT(31) struct nfp_fl_mask_id { struct circ_buf mask_id_free_list; - struct timespec64 *last_used; + ktime_t *last_used; u8 init_unallocated; }; diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index 84f7a5dbea9d..a0c72f277faa 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -262,6 +262,21 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame, nfp_flower_compile_ip_ext(&frame->ip_ext, flow, mask_version); } +static int +nfp_flower_compile_geneve_opt(void *key_buf, struct tc_cls_flower_offload *flow, + bool mask_version) +{ + struct fl_flow_key *target = mask_version ? flow->mask : flow->key; + struct flow_dissector_key_enc_opts *opts; + + opts = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_OPTS, + target); + memcpy(key_buf, opts->data, opts->len); + + return 0; +} + static void nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame, struct tc_cls_flower_offload *flow, @@ -270,6 +285,7 @@ nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame, struct fl_flow_key *target = mask_version ? flow->mask : flow->key; struct flow_dissector_key_ipv4_addrs *tun_ips; struct flow_dissector_key_keyid *vni; + struct flow_dissector_key_ip *ip; memset(frame, 0, sizeof(struct nfp_flower_ipv4_udp_tun)); @@ -293,6 +309,14 @@ nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame, frame->ip_src = tun_ips->src; frame->ip_dst = tun_ips->dst; } + + if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) { + ip = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_IP, + target); + frame->tos = ip->tos; + frame->ttl = ip->ttl; + } } int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, @@ -415,6 +439,16 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, nfp_flow->nfp_tun_ipv4_addr = tun_dst; nfp_tunnel_add_ipv4_off(netdev_repr->app, tun_dst); } + + if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { + err = nfp_flower_compile_geneve_opt(ext, flow, false); + if (err) + return err; + + err = nfp_flower_compile_geneve_opt(msk, flow, true); + if (err) + return err; + } } return 0; diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c index 93fb809f50d1..c098730544b7 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c +++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c @@ -158,7 +158,6 @@ static int nfp_release_mask_id(struct nfp_app *app, u8 mask_id) { struct nfp_flower_priv *priv = app->priv; struct circ_buf *ring; - struct timespec64 now; ring = &priv->mask_ids.mask_id_free_list; /* Checking if buffer is full. */ @@ -169,8 +168,7 @@ static int nfp_release_mask_id(struct nfp_app *app, u8 mask_id) ring->head = (ring->head + NFP_FLOWER_MASK_ELEMENT_RS) % (NFP_FLOWER_MASK_ENTRY_RS * NFP_FLOWER_MASK_ELEMENT_RS); - getnstimeofday64(&now); - priv->mask_ids.last_used[mask_id] = now; + priv->mask_ids.last_used[mask_id] = ktime_get(); return 0; } @@ -178,7 +176,7 @@ static int nfp_release_mask_id(struct nfp_app *app, u8 mask_id) static int nfp_mask_alloc(struct nfp_app *app, u8 *mask_id) { struct nfp_flower_priv *priv = app->priv; - struct timespec64 delta, now; + ktime_t reuse_timeout; struct circ_buf *ring; u8 temp_id, freed_id; @@ -198,10 +196,10 @@ static int nfp_mask_alloc(struct nfp_app *app, u8 *mask_id) memcpy(&temp_id, &ring->buf[ring->tail], NFP_FLOWER_MASK_ELEMENT_RS); *mask_id = temp_id; - getnstimeofday64(&now); - delta = timespec64_sub(now, priv->mask_ids.last_used[*mask_id]); + reuse_timeout = ktime_add_ns(priv->mask_ids.last_used[*mask_id], + NFP_FL_MASK_REUSE_TIME_NS); - if (timespec64_to_ns(&delta) < NFP_FL_MASK_REUSE_TIME_NS) + if (ktime_before(ktime_get(), reuse_timeout)) goto err_not_found; memcpy(&ring->buf[ring->tail], &freed_id, NFP_FLOWER_MASK_ELEMENT_RS); diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 525057bee0ed..2edab01c3beb 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -66,6 +66,8 @@ BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IP) | \ BIT(FLOW_DISSECTOR_KEY_MPLS) | \ BIT(FLOW_DISSECTOR_KEY_IP)) @@ -74,7 +76,9 @@ BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \ BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \ BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \ - BIT(FLOW_DISSECTOR_KEY_ENC_PORTS)) + BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IP)) #define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R \ (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ @@ -139,6 +143,21 @@ static bool nfp_flower_check_higher_than_mac(struct tc_cls_flower_offload *f) } static int +nfp_flower_calc_opt_layer(struct flow_dissector_key_enc_opts *enc_opts, + u32 *key_layer_two, int *key_size) +{ + if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY) + return -EOPNOTSUPP; + + if (enc_opts->len > 0) { + *key_layer_two |= NFP_FLOWER_LAYER2_GENEVE_OP; + *key_size += sizeof(struct nfp_flower_geneve_options); + } + + return 0; +} + +static int nfp_flower_calculate_key_layers(struct nfp_app *app, struct nfp_fl_key_ls *ret_key_ls, struct tc_cls_flower_offload *flow, @@ -151,6 +170,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, u32 key_layer_two; u8 key_layer; int key_size; + int err; if (flow->dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR) return -EOPNOTSUPP; @@ -176,6 +196,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { struct flow_dissector_key_ipv4_addrs *mask_ipv4 = NULL; struct flow_dissector_key_ports *mask_enc_ports = NULL; + struct flow_dissector_key_enc_opts *enc_op = NULL; struct flow_dissector_key_ports *enc_ports = NULL; struct flow_dissector_key_control *mask_enc_ctl = skb_flow_dissector_target(flow->dissector, @@ -212,11 +233,21 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, if (mask_enc_ports->dst != cpu_to_be16(~0)) return -EOPNOTSUPP; + if (dissector_uses_key(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_OPTS)) { + enc_op = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_OPTS, + flow->key); + } + switch (enc_ports->dst) { case htons(NFP_FL_VXLAN_PORT): *tun_type = NFP_FL_TUNNEL_VXLAN; key_layer |= NFP_FLOWER_LAYER_VXLAN; key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + + if (enc_op) + return -EOPNOTSUPP; break; case htons(NFP_FL_GENEVE_PORT): if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE)) @@ -226,6 +257,15 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, key_size += sizeof(struct nfp_flower_ext_meta); key_layer_two |= NFP_FLOWER_LAYER2_GENEVE; key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + + if (!enc_op) + break; + if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT)) + return -EOPNOTSUPP; + err = nfp_flower_calc_opt_layer(enc_op, &key_layer_two, + &key_size); + if (err) + return err; break; default: return -EOPNOTSUPP; @@ -584,9 +624,9 @@ nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev, return nfp_flower_del_offload(app, netdev, flower, egress); case TC_CLSFLOWER_STATS: return nfp_flower_get_stats(app, netdev, flower, egress); + default: + return -EOPNOTSUPP; } - - return -EOPNOTSUPP; } int nfp_flower_setup_tc_egress_cb(enum tc_setup_type type, void *type_data, @@ -631,14 +671,11 @@ static int nfp_flower_setup_tc_block(struct net_device *netdev, if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) return -EOPNOTSUPP; - if (tcf_block_shared(f->block)) - return -EOPNOTSUPP; - switch (f->command) { case TC_BLOCK_BIND: return tcf_block_cb_register(f->block, nfp_flower_setup_tc_block_cb, - repr, repr); + repr, repr, f->extack); case TC_BLOCK_UNBIND: tcf_block_cb_unregister(f->block, nfp_flower_setup_tc_block_cb, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c index f28b244f4ee7..8607d09ab732 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c @@ -86,6 +86,23 @@ const char *nfp_app_mip_name(struct nfp_app *app) return nfp_mip_name(app->pf->mip); } +int nfp_app_ndo_init(struct net_device *netdev) +{ + struct nfp_app *app = nfp_app_from_netdev(netdev); + + if (!app || !app->type->ndo_init) + return 0; + return app->type->ndo_init(app, netdev); +} + +void nfp_app_ndo_uninit(struct net_device *netdev) +{ + struct nfp_app *app = nfp_app_from_netdev(netdev); + + if (app && app->type->ndo_uninit) + app->type->ndo_uninit(app, netdev); +} + u64 *nfp_app_port_get_stats(struct nfp_port *port, u64 *data) { if (!port || !port->app || !port->app->type->port_get_stats) @@ -155,6 +172,8 @@ struct nfp_app *nfp_app_alloc(struct nfp_pf *pf, enum nfp_app_id id) if (WARN_ON(!apps[id]->name || !apps[id]->vnic_alloc)) return ERR_PTR(-EINVAL); + if (WARN_ON(!apps[id]->ctrl_msg_rx && apps[id]->ctrl_msg_rx_raw)) + return ERR_PTR(-EINVAL); app = kzalloc(sizeof(*app), GFP_KERNEL); if (!app) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h index ee74caacb015..4e1eb3395648 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h @@ -78,6 +78,8 @@ extern const struct nfp_app_type app_abm; * @init: perform basic app checks and init * @clean: clean app state * @extra_cap: extra capabilities string + * @ndo_init: vNIC and repr netdev .ndo_init + * @ndo_uninit: vNIC and repr netdev .ndo_unint * @vnic_alloc: allocate vNICs (assign port types, etc.) * @vnic_free: free up app's vNIC state * @vnic_init: vNIC netdev was registered @@ -96,6 +98,7 @@ extern const struct nfp_app_type app_abm; * @start: start application logic * @stop: stop application logic * @ctrl_msg_rx: control message handler + * @ctrl_msg_rx_raw: handler for control messages from data queues * @setup_tc: setup TC ndo * @bpf: BPF ndo offload-related calls * @xdp_offload: offload an XDP program @@ -117,6 +120,9 @@ struct nfp_app_type { const char *(*extra_cap)(struct nfp_app *app, struct nfp_net *nn); + int (*ndo_init)(struct nfp_app *app, struct net_device *netdev); + void (*ndo_uninit)(struct nfp_app *app, struct net_device *netdev); + int (*vnic_alloc)(struct nfp_app *app, struct nfp_net *nn, unsigned int id); void (*vnic_free)(struct nfp_app *app, struct nfp_net *nn); @@ -145,6 +151,8 @@ struct nfp_app_type { void (*stop)(struct nfp_app *app); void (*ctrl_msg_rx)(struct nfp_app *app, struct sk_buff *skb); + void (*ctrl_msg_rx_raw)(struct nfp_app *app, const void *data, + unsigned int len); int (*setup_tc)(struct nfp_app *app, struct net_device *netdev, enum tc_setup_type type, void *type_data); @@ -200,6 +208,9 @@ static inline void nfp_app_clean(struct nfp_app *app) app->type->clean(app); } +int nfp_app_ndo_init(struct net_device *netdev); +void nfp_app_ndo_uninit(struct net_device *netdev); + static inline int nfp_app_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id) { @@ -310,6 +321,11 @@ static inline bool nfp_app_ctrl_has_meta(struct nfp_app *app) return app->type->ctrl_has_meta; } +static inline bool nfp_app_ctrl_uses_data_vnics(struct nfp_app *app) +{ + return app && app->type->ctrl_msg_rx_raw; +} + static inline const char *nfp_app_extra_cap(struct nfp_app *app, struct nfp_net *nn) { @@ -373,6 +389,16 @@ static inline void nfp_app_ctrl_rx(struct nfp_app *app, struct sk_buff *skb) app->type->ctrl_msg_rx(app, skb); } +static inline void +nfp_app_ctrl_rx_raw(struct nfp_app *app, const void *data, unsigned int len) +{ + if (!app || !app->type->ctrl_msg_rx_raw) + return; + + trace_devlink_hwmsg(priv_to_devlink(app->pf), true, 0, data, len); + app->type->ctrl_msg_rx_raw(app, data, len); +} + static inline int nfp_app_eswitch_mode_get(struct nfp_app *app, u16 *mode) { if (!app->type->eswitch_mode_get) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index f6677bc9875a..fad0e62a910c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -93,6 +93,7 @@ enum br_mask { BR_BNE = 0x01, BR_BMI = 0x02, BR_BHS = 0x04, + BR_BCC = 0x05, BR_BLO = 0x05, BR_BGE = 0x08, BR_BLT = 0x09, @@ -426,4 +427,32 @@ static inline u32 nfp_get_ind_csr_ctx_ptr_offs(u32 read_offset) return (read_offset & ~NFP_IND_ME_CTX_PTR_BASE_MASK) | NFP_CSR_CTX_PTR; } +enum mul_type { + MUL_TYPE_START = 0x00, + MUL_TYPE_STEP_24x8 = 0x01, + MUL_TYPE_STEP_16x16 = 0x02, + MUL_TYPE_STEP_32x32 = 0x03, +}; + +enum mul_step { + MUL_STEP_1 = 0x00, + MUL_STEP_NONE = MUL_STEP_1, + MUL_STEP_2 = 0x01, + MUL_STEP_3 = 0x02, + MUL_STEP_4 = 0x03, + MUL_LAST = 0x04, + MUL_LAST_2 = 0x05, +}; + +#define OP_MUL_BASE 0x0f800000000ULL +#define OP_MUL_A_SRC 0x000000003ffULL +#define OP_MUL_B_SRC 0x000000ffc00ULL +#define OP_MUL_STEP 0x00000700000ULL +#define OP_MUL_DST_AB 0x00000800000ULL +#define OP_MUL_SW 0x00040000000ULL +#define OP_MUL_TYPE 0x00180000000ULL +#define OP_MUL_WR_AB 0x20000000000ULL +#define OP_MUL_SRC_LMEXTN 0x40000000000ULL +#define OP_MUL_DST_LMEXTN 0x80000000000ULL + #endif diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index 152283d7e59c..4a540c5e27fe 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -236,16 +236,20 @@ static int nfp_pcie_sriov_read_nfd_limit(struct nfp_pf *pf) int err; pf->limit_vfs = nfp_rtsym_read_le(pf->rtbl, "nfd_vf_cfg_max_vfs", &err); - if (!err) - return pci_sriov_set_totalvfs(pf->pdev, pf->limit_vfs); + if (err) { + /* For backwards compatibility if symbol not found allow all */ + pf->limit_vfs = ~0; + if (err == -ENOENT) + return 0; - pf->limit_vfs = ~0; - /* Allow any setting for backwards compatibility if symbol not found */ - if (err == -ENOENT) - return 0; + nfp_warn(pf->cpp, "Warning: VF limit read failed: %d\n", err); + return err; + } - nfp_warn(pf->cpp, "Warning: VF limit read failed: %d\n", err); - return err; + err = pci_sriov_set_totalvfs(pf->pdev, pf->limit_vfs); + if (err) + nfp_warn(pf->cpp, "Failed to set VF count in sysfs: %d\n", err); + return 0; } static int nfp_pcie_sriov_enable(struct pci_dev *pdev, int num_vfs) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 2a71a9ffd095..439e6ffe2f05 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -250,7 +250,7 @@ struct nfp_net_tx_ring { struct nfp_net_tx_desc *txds; dma_addr_t dma; - unsigned int size; + size_t size; bool is_xdp; } ____cacheline_aligned; @@ -350,9 +350,9 @@ struct nfp_net_rx_buf { * @qcp_fl: Pointer to base of the QCP freelist queue * @rxbufs: Array of transmitted FL/RX buffers * @rxds: Virtual address of FL/RX ring in host memory + * @xdp_rxq: RX-ring info avail for XDP * @dma: DMA address of the FL/RX ring * @size: Size, in bytes, of the FL/RX ring (needed to free) - * @xdp_rxq: RX-ring info avail for XDP */ struct nfp_net_rx_ring { struct nfp_net_r_vector *r_vec; @@ -364,14 +364,15 @@ struct nfp_net_rx_ring { u32 idx; int fl_qcidx; - unsigned int size; u8 __iomem *qcp_fl; struct nfp_net_rx_buf *rxbufs; struct nfp_net_rx_desc *rxds; - dma_addr_t dma; struct xdp_rxq_info xdp_rxq; + + dma_addr_t dma; + size_t size; } ____cacheline_aligned; /** @@ -485,7 +486,6 @@ struct nfp_stat_pair { * @dev: Backpointer to struct device * @netdev: Backpointer to net_device structure * @is_vf: Is the driver attached to a VF? - * @bpf_offload_xdp: Offloaded BPF program is XDP * @chained_metadata_format: Firemware will use new metadata format * @rx_dma_dir: Mapping direction for RX buffers * @rx_dma_off: Offset at which DMA packets (for XDP headroom) @@ -510,7 +510,6 @@ struct nfp_net_dp { struct net_device *netdev; u8 is_vf:1; - u8 bpf_offload_xdp:1; u8 chained_metadata_format:1; u8 rx_dma_dir; @@ -553,8 +552,8 @@ struct nfp_net_dp { * @rss_cfg: RSS configuration * @rss_key: RSS secret key * @rss_itbl: RSS indirection table - * @xdp_flags: Flags with which XDP prog was loaded - * @xdp_prog: XDP prog (for ctrl path, both DRV and HW modes) + * @xdp: Information about the driver XDP program + * @xdp_hw: Information about the HW XDP program * @max_r_vecs: Number of allocated interrupt vectors for RX/TX * @max_tx_rings: Maximum number of TX rings supported by the Firmware * @max_rx_rings: Maximum number of RX rings supported by the Firmware @@ -610,8 +609,8 @@ struct nfp_net { u8 rss_key[NFP_NET_CFG_RSS_KEY_SZ]; u8 rss_itbl[NFP_NET_CFG_RSS_ITBL_SZ]; - u32 xdp_flags; - struct bpf_prog *xdp_prog; + struct xdp_attachment_info xdp; + struct xdp_attachment_info xdp_hw; unsigned int max_tx_rings; unsigned int max_rx_rings; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index d4c27f849f9b..a8b9fbab5f73 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -53,6 +53,8 @@ #include <linux/interrupt.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/mm.h> +#include <linux/overflow.h> #include <linux/page_ref.h> #include <linux/pci.h> #include <linux/pci_regs.h> @@ -945,11 +947,10 @@ err_free: /** * nfp_net_tx_complete() - Handled completed TX packets - * @tx_ring: TX ring structure - * - * Return: Number of completed TX descriptors + * @tx_ring: TX ring structure + * @budget: NAPI budget (only used as bool to determine if in NAPI context) */ -static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring) +static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget) { struct nfp_net_r_vector *r_vec = tx_ring->r_vec; struct nfp_net_dp *dp = &r_vec->nfp_net->dp; @@ -999,7 +1000,7 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring) /* check for last gather fragment */ if (fidx == nr_frags - 1) - dev_consume_skb_any(skb); + napi_consume_skb(skb, budget); tx_ring->txbufs[idx].dma_addr = 0; tx_ring->txbufs[idx].skb = NULL; @@ -1077,7 +1078,7 @@ static bool nfp_net_xdp_complete(struct nfp_net_tx_ring *tx_ring) * @dp: NFP Net data path struct * @tx_ring: TX ring structure * - * Assumes that the device is stopped + * Assumes that the device is stopped, must be idempotent. */ static void nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) @@ -1119,7 +1120,7 @@ nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) tx_ring->rd_p++; } - memset(tx_ring->txds, 0, sizeof(*tx_ring->txds) * tx_ring->cnt); + memset(tx_ring->txds, 0, tx_ring->size); tx_ring->wr_p = 0; tx_ring->rd_p = 0; tx_ring->qcp_rd_p = 0; @@ -1279,13 +1280,18 @@ static void nfp_net_rx_give_one(const struct nfp_net_dp *dp, * nfp_net_rx_ring_reset() - Reflect in SW state of freelist after disable * @rx_ring: RX ring structure * - * Warning: Do *not* call if ring buffers were never put on the FW freelist - * (i.e. device was not enabled)! + * Assumes that the device is stopped, must be idempotent. */ static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring) { unsigned int wr_idx, last_idx; + /* wr_p == rd_p means ring was never fed FL bufs. RX rings are always + * kept at cnt - 1 FL bufs. + */ + if (rx_ring->wr_p == 0 && rx_ring->rd_p == 0) + return; + /* Move the empty entry to the end of the list */ wr_idx = D_IDX(rx_ring, rx_ring->wr_p); last_idx = rx_ring->cnt - 1; @@ -1294,7 +1300,7 @@ static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring) rx_ring->rxbufs[last_idx].dma_addr = 0; rx_ring->rxbufs[last_idx].frag = NULL; - memset(rx_ring->rxds, 0, sizeof(*rx_ring->rxds) * rx_ring->cnt); + memset(rx_ring->rxds, 0, rx_ring->size); rx_ring->wr_p = 0; rx_ring->rd_p = 0; } @@ -1709,8 +1715,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) } } - if (xdp_prog && !(rxd->rxd.flags & PCIE_DESC_RX_BPF && - dp->bpf_offload_xdp) && !meta.portid) { + if (xdp_prog && !meta.portid) { void *orig_data = rxbuf->frag + pkt_off; unsigned int dma_off; int act; @@ -1752,6 +1757,29 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) } } + if (likely(!meta.portid)) { + netdev = dp->netdev; + } else if (meta.portid == NFP_META_PORT_ID_CTRL) { + struct nfp_net *nn = netdev_priv(dp->netdev); + + nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off, + pkt_len); + nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag, + rxbuf->dma_addr); + continue; + } else { + struct nfp_net *nn; + + nn = netdev_priv(dp->netdev); + netdev = nfp_app_repr_get(nn->app, meta.portid); + if (unlikely(!netdev)) { + nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, + NULL); + continue; + } + nfp_repr_inc_rx_stats(netdev, pkt_len); + } + skb = build_skb(rxbuf->frag, true_bufsz); if (unlikely(!skb)) { nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); @@ -1767,20 +1795,6 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); - if (likely(!meta.portid)) { - netdev = dp->netdev; - } else { - struct nfp_net *nn; - - nn = netdev_priv(dp->netdev); - netdev = nfp_app_repr_get(nn->app, meta.portid); - if (unlikely(!netdev)) { - nfp_net_rx_drop(dp, r_vec, rx_ring, NULL, skb); - continue; - } - nfp_repr_inc_rx_stats(netdev, pkt_len); - } - skb_reserve(skb, pkt_off); skb_put(skb, pkt_len); @@ -1828,7 +1842,7 @@ static int nfp_net_poll(struct napi_struct *napi, int budget) unsigned int pkts_polled = 0; if (r_vec->tx_ring) - nfp_net_tx_complete(r_vec->tx_ring); + nfp_net_tx_complete(r_vec->tx_ring, budget); if (r_vec->rx_ring) pkts_polled = nfp_net_rx(r_vec->rx_ring, budget); @@ -2062,7 +2076,7 @@ static void nfp_ctrl_poll(unsigned long arg) struct nfp_net_r_vector *r_vec = (void *)arg; spin_lock_bh(&r_vec->lock); - nfp_net_tx_complete(r_vec->tx_ring); + nfp_net_tx_complete(r_vec->tx_ring, 0); __nfp_ctrl_tx_queued(r_vec); spin_unlock_bh(&r_vec->lock); @@ -2121,7 +2135,7 @@ static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring) struct nfp_net_r_vector *r_vec = tx_ring->r_vec; struct nfp_net_dp *dp = &r_vec->nfp_net->dp; - kfree(tx_ring->txbufs); + kvfree(tx_ring->txbufs); if (tx_ring->txds) dma_free_coherent(dp->dev, tx_ring->size, @@ -2145,18 +2159,17 @@ static int nfp_net_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) { struct nfp_net_r_vector *r_vec = tx_ring->r_vec; - int sz; tx_ring->cnt = dp->txd_cnt; - tx_ring->size = sizeof(*tx_ring->txds) * tx_ring->cnt; + tx_ring->size = array_size(tx_ring->cnt, sizeof(*tx_ring->txds)); tx_ring->txds = dma_zalloc_coherent(dp->dev, tx_ring->size, &tx_ring->dma, GFP_KERNEL); if (!tx_ring->txds) goto err_alloc; - sz = sizeof(*tx_ring->txbufs) * tx_ring->cnt; - tx_ring->txbufs = kzalloc(sz, GFP_KERNEL); + tx_ring->txbufs = kvcalloc(tx_ring->cnt, sizeof(*tx_ring->txbufs), + GFP_KERNEL); if (!tx_ring->txbufs) goto err_alloc; @@ -2270,7 +2283,7 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring) if (dp->netdev) xdp_rxq_info_unreg(&rx_ring->xdp_rxq); - kfree(rx_ring->rxbufs); + kvfree(rx_ring->rxbufs); if (rx_ring->rxds) dma_free_coherent(dp->dev, rx_ring->size, @@ -2293,7 +2306,7 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring) static int nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring) { - int sz, err; + int err; if (dp->netdev) { err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev, @@ -2303,14 +2316,14 @@ nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring) } rx_ring->cnt = dp->rxd_cnt; - rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt; + rx_ring->size = array_size(rx_ring->cnt, sizeof(*rx_ring->rxds)); rx_ring->rxds = dma_zalloc_coherent(dp->dev, rx_ring->size, &rx_ring->dma, GFP_KERNEL); if (!rx_ring->rxds) goto err_alloc; - sz = sizeof(*rx_ring->rxbufs) * rx_ring->cnt; - rx_ring->rxbufs = kzalloc(sz, GFP_KERNEL); + rx_ring->rxbufs = kvcalloc(rx_ring->cnt, sizeof(*rx_ring->rxbufs), + GFP_KERNEL); if (!rx_ring->rxbufs) goto err_alloc; @@ -2508,6 +2521,8 @@ static void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx) /** * nfp_net_clear_config_and_disable() - Clear control BAR and disable NFP * @nn: NFP Net device to reconfigure + * + * Warning: must be fully idempotent. */ static void nfp_net_clear_config_and_disable(struct nfp_net *nn) { @@ -3115,6 +3130,21 @@ nfp_net_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) return nfp_net_reconfig_mbox(nn, NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL); } +#ifdef CONFIG_NET_POLL_CONTROLLER +static void nfp_net_netpoll(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + int i; + + /* nfp_net's NAPIs are statically allocated so even if there is a race + * with reconfig path this will simply try to schedule some disabled + * NAPI instances. + */ + for (i = 0; i < nn->dp.num_stack_tx_rings; i++) + napi_schedule_irqoff(&nn->r_vecs[i].napi); +} +#endif + static void nfp_net_stat64(struct net_device *netdev, struct rtnl_link_stats64 *stats) { @@ -3377,14 +3407,18 @@ static void nfp_net_del_vxlan_port(struct net_device *netdev, nfp_net_set_vxlan_port(nn, idx, 0); } -static int -nfp_net_xdp_setup_drv(struct nfp_net *nn, struct bpf_prog *prog, - struct netlink_ext_ack *extack) +static int nfp_net_xdp_setup_drv(struct nfp_net *nn, struct netdev_bpf *bpf) { + struct bpf_prog *prog = bpf->prog; struct nfp_net_dp *dp; + int err; + + if (!xdp_attachment_flags_ok(&nn->xdp, bpf)) + return -EBUSY; if (!prog == !nn->dp.xdp_prog) { WRITE_ONCE(nn->dp.xdp_prog, prog); + xdp_attachment_setup(&nn->xdp, bpf); return 0; } @@ -3398,38 +3432,26 @@ nfp_net_xdp_setup_drv(struct nfp_net *nn, struct bpf_prog *prog, dp->rx_dma_off = prog ? XDP_PACKET_HEADROOM - nn->dp.rx_offset : 0; /* We need RX reconfig to remap the buffers (BIDIR vs FROM_DEV) */ - return nfp_net_ring_reconfig(nn, dp, extack); + err = nfp_net_ring_reconfig(nn, dp, bpf->extack); + if (err) + return err; + + xdp_attachment_setup(&nn->xdp, bpf); + return 0; } -static int -nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog, u32 flags, - struct netlink_ext_ack *extack) +static int nfp_net_xdp_setup_hw(struct nfp_net *nn, struct netdev_bpf *bpf) { - struct bpf_prog *drv_prog, *offload_prog; int err; - if (nn->xdp_prog && (flags ^ nn->xdp_flags) & XDP_FLAGS_MODES) + if (!xdp_attachment_flags_ok(&nn->xdp_hw, bpf)) return -EBUSY; - /* Load both when no flags set to allow easy activation of driver path - * when program is replaced by one which can't be offloaded. - */ - drv_prog = flags & XDP_FLAGS_HW_MODE ? NULL : prog; - offload_prog = flags & XDP_FLAGS_DRV_MODE ? NULL : prog; - - err = nfp_net_xdp_setup_drv(nn, drv_prog, extack); + err = nfp_app_xdp_offload(nn->app, nn, bpf->prog, bpf->extack); if (err) return err; - err = nfp_app_xdp_offload(nn->app, nn, offload_prog, extack); - if (err && flags & XDP_FLAGS_HW_MODE) - return err; - - if (nn->xdp_prog) - bpf_prog_put(nn->xdp_prog); - nn->xdp_prog = prog; - nn->xdp_flags = flags; - + xdp_attachment_setup(&nn->xdp_hw, bpf); return 0; } @@ -3439,16 +3461,13 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp) switch (xdp->command) { case XDP_SETUP_PROG: + return nfp_net_xdp_setup_drv(nn, xdp); case XDP_SETUP_PROG_HW: - return nfp_net_xdp_setup(nn, xdp->prog, xdp->flags, - xdp->extack); + return nfp_net_xdp_setup_hw(nn, xdp); case XDP_QUERY_PROG: - xdp->prog_attached = !!nn->xdp_prog; - if (nn->dp.bpf_offload_xdp) - xdp->prog_attached = XDP_ATTACHED_HW; - xdp->prog_id = nn->xdp_prog ? nn->xdp_prog->aux->id : 0; - xdp->prog_flags = nn->xdp_prog ? nn->xdp_flags : 0; - return 0; + return xdp_attachment_query(&nn->xdp, xdp); + case XDP_QUERY_PROG_HW: + return xdp_attachment_query(&nn->xdp_hw, xdp); default: return nfp_app_bpf(nn->app, nn, xdp); } @@ -3476,12 +3495,17 @@ static int nfp_net_set_mac_address(struct net_device *netdev, void *addr) } const struct net_device_ops nfp_net_netdev_ops = { + .ndo_init = nfp_app_ndo_init, + .ndo_uninit = nfp_app_ndo_uninit, .ndo_open = nfp_net_netdev_open, .ndo_stop = nfp_net_netdev_close, .ndo_start_xmit = nfp_net_tx, .ndo_get_stats64 = nfp_net_stat64, .ndo_vlan_rx_add_vid = nfp_net_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = nfp_net_vlan_rx_kill_vid, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = nfp_net_netpoll, +#endif .ndo_set_vf_mac = nfp_app_set_vf_mac, .ndo_set_vf_vlan = nfp_app_set_vf_vlan, .ndo_set_vf_spoofchk = nfp_app_set_vf_spoofchk, @@ -3840,6 +3864,9 @@ int nfp_net_init(struct nfp_net *nn) nn->dp.mtu = NFP_NET_DEFAULT_MTU; nn->dp.fl_bufsz = nfp_net_calc_fl_bufsz(&nn->dp); + if (nfp_app_ctrl_uses_data_vnics(nn->app)) + nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_CMSG_DATA; + if (nn->cap & NFP_NET_CFG_CTRL_RSS_ANY) { nfp_net_rss_init(nn); nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_RSS2 ?: diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index bb63c115537d..44d3ea75d043 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -127,6 +127,7 @@ #define NFP_NET_CFG_CTRL_GATHER (0x1 << 9) /* Gather DMA */ #define NFP_NET_CFG_CTRL_LSO (0x1 << 10) /* LSO/TSO (version 1) */ #define NFP_NET_CFG_CTRL_CTAG_FILTER (0x1 << 11) /* VLAN CTAG filtering */ +#define NFP_NET_CFG_CTRL_CMSG_DATA (0x1 << 12) /* RX cmsgs on data Qs */ #define NFP_NET_CFG_CTRL_RINGCFG (0x1 << 16) /* Ring runtime changes */ #define NFP_NET_CFG_CTRL_RSS (0x1 << 17) /* RSS (version 1) */ #define NFP_NET_CFG_CTRL_IRQMOD (0x1 << 18) /* Interrupt moderation */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 26d1cc4e2906..6a79c8e4a7a4 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -233,12 +233,10 @@ nfp_net_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) static void nfp_app_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { - struct nfp_app *app; - - app = nfp_app_from_netdev(netdev); - if (!app) - return; + struct nfp_app *app = nfp_app_from_netdev(netdev); + strlcpy(drvinfo->bus_info, pci_name(app->pdev), + sizeof(drvinfo->bus_info)); nfp_get_drvinfo(app, app->pdev, "*", drvinfo); } @@ -452,7 +450,7 @@ static unsigned int nfp_vnic_get_sw_stats_count(struct net_device *netdev) { struct nfp_net *nn = netdev_priv(netdev); - return NN_RVEC_GATHER_STATS + nn->dp.num_r_vecs * NN_RVEC_PER_Q_STATS; + return NN_RVEC_GATHER_STATS + nn->max_r_vecs * NN_RVEC_PER_Q_STATS; } static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data) @@ -460,7 +458,7 @@ static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data) struct nfp_net *nn = netdev_priv(netdev); int i; - for (i = 0; i < nn->dp.num_r_vecs; i++) { + for (i = 0; i < nn->max_r_vecs; i++) { data = nfp_pr_et(data, "rvec_%u_rx_pkts", i); data = nfp_pr_et(data, "rvec_%u_tx_pkts", i); data = nfp_pr_et(data, "rvec_%u_tx_busy", i); @@ -486,7 +484,7 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) u64 tmp[NN_RVEC_GATHER_STATS]; unsigned int i, j; - for (i = 0; i < nn->dp.num_r_vecs; i++) { + for (i = 0; i < nn->max_r_vecs; i++) { unsigned int start; do { @@ -521,15 +519,13 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) return data; } -static unsigned int -nfp_vnic_get_hw_stats_count(unsigned int rx_rings, unsigned int tx_rings) +static unsigned int nfp_vnic_get_hw_stats_count(unsigned int num_vecs) { - return NN_ET_GLOBAL_STATS_LEN + (rx_rings + tx_rings) * 2; + return NN_ET_GLOBAL_STATS_LEN + num_vecs * 4; } static u8 * -nfp_vnic_get_hw_stats_strings(u8 *data, unsigned int rx_rings, - unsigned int tx_rings, bool repr) +nfp_vnic_get_hw_stats_strings(u8 *data, unsigned int num_vecs, bool repr) { int swap_off, i; @@ -549,36 +545,29 @@ nfp_vnic_get_hw_stats_strings(u8 *data, unsigned int rx_rings, for (i = NN_ET_SWITCH_STATS_LEN * 2; i < NN_ET_GLOBAL_STATS_LEN; i++) data = nfp_pr_et(data, nfp_net_et_stats[i].name); - for (i = 0; i < tx_rings; i++) { - data = nfp_pr_et(data, "txq_%u_pkts", i); - data = nfp_pr_et(data, "txq_%u_bytes", i); - } - - for (i = 0; i < rx_rings; i++) { + for (i = 0; i < num_vecs; i++) { data = nfp_pr_et(data, "rxq_%u_pkts", i); data = nfp_pr_et(data, "rxq_%u_bytes", i); + data = nfp_pr_et(data, "txq_%u_pkts", i); + data = nfp_pr_et(data, "txq_%u_bytes", i); } return data; } static u64 * -nfp_vnic_get_hw_stats(u64 *data, u8 __iomem *mem, - unsigned int rx_rings, unsigned int tx_rings) +nfp_vnic_get_hw_stats(u64 *data, u8 __iomem *mem, unsigned int num_vecs) { unsigned int i; for (i = 0; i < NN_ET_GLOBAL_STATS_LEN; i++) *data++ = readq(mem + nfp_net_et_stats[i].off); - for (i = 0; i < tx_rings; i++) { - *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i)); - *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i) + 8); - } - - for (i = 0; i < rx_rings; i++) { + for (i = 0; i < num_vecs; i++) { *data++ = readq(mem + NFP_NET_CFG_RXR_STATS(i)); *data++ = readq(mem + NFP_NET_CFG_RXR_STATS(i) + 8); + *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i)); + *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i) + 8); } return data; @@ -633,8 +622,7 @@ static void nfp_net_get_strings(struct net_device *netdev, switch (stringset) { case ETH_SS_STATS: data = nfp_vnic_get_sw_stats_strings(netdev, data); - data = nfp_vnic_get_hw_stats_strings(data, nn->dp.num_rx_rings, - nn->dp.num_tx_rings, + data = nfp_vnic_get_hw_stats_strings(data, nn->max_r_vecs, false); data = nfp_mac_get_stats_strings(netdev, data); data = nfp_app_port_get_stats_strings(nn->port, data); @@ -649,8 +637,7 @@ nfp_net_get_stats(struct net_device *netdev, struct ethtool_stats *stats, struct nfp_net *nn = netdev_priv(netdev); data = nfp_vnic_get_sw_stats(netdev, data); - data = nfp_vnic_get_hw_stats(data, nn->dp.ctrl_bar, - nn->dp.num_rx_rings, nn->dp.num_tx_rings); + data = nfp_vnic_get_hw_stats(data, nn->dp.ctrl_bar, nn->max_r_vecs); data = nfp_mac_get_stats(netdev, data); data = nfp_app_port_get_stats(nn->port, data); } @@ -662,8 +649,7 @@ static int nfp_net_get_sset_count(struct net_device *netdev, int sset) switch (sset) { case ETH_SS_STATS: return nfp_vnic_get_sw_stats_count(netdev) + - nfp_vnic_get_hw_stats_count(nn->dp.num_rx_rings, - nn->dp.num_tx_rings) + + nfp_vnic_get_hw_stats_count(nn->max_r_vecs) + nfp_mac_get_stats_count(netdev) + nfp_app_port_get_stats_count(nn->port); default: @@ -679,7 +665,7 @@ static void nfp_port_get_strings(struct net_device *netdev, switch (stringset) { case ETH_SS_STATS: if (nfp_port_is_vnic(port)) - data = nfp_vnic_get_hw_stats_strings(data, 0, 0, true); + data = nfp_vnic_get_hw_stats_strings(data, 0, true); else data = nfp_mac_get_stats_strings(netdev, data); data = nfp_app_port_get_stats_strings(port, data); @@ -694,7 +680,7 @@ nfp_port_get_stats(struct net_device *netdev, struct ethtool_stats *stats, struct nfp_port *port = nfp_port_from_netdev(netdev); if (nfp_port_is_vnic(port)) - data = nfp_vnic_get_hw_stats(data, port->vnic, 0, 0); + data = nfp_vnic_get_hw_stats(data, port->vnic, 0); else data = nfp_mac_get_stats(netdev, data); data = nfp_app_port_get_stats(port, data); @@ -708,7 +694,7 @@ static int nfp_port_get_sset_count(struct net_device *netdev, int sset) switch (sset) { case ETH_SS_STATS: if (nfp_port_is_vnic(port)) - count = nfp_vnic_get_hw_stats_count(0, 0); + count = nfp_vnic_get_hw_stats_count(0); else count = nfp_mac_get_stats_count(netdev); count += nfp_app_port_get_stats_count(port); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index d7b712f6362f..18a09cdcd9c6 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -262,6 +262,8 @@ err_port_disable: } const struct net_device_ops nfp_repr_netdev_ops = { + .ndo_init = nfp_app_ndo_init, + .ndo_uninit = nfp_app_ndo_uninit, .ndo_open = nfp_repr_open, .ndo_stop = nfp_repr_stop, .ndo_start_xmit = nfp_repr_xmit, diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c index 749655c329b2..c8d0b1016a64 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c @@ -1248,7 +1248,7 @@ static void nfp6000_free(struct nfp_cpp *cpp) kfree(nfp); } -static void nfp6000_read_serial(struct device *dev, u8 *serial) +static int nfp6000_read_serial(struct device *dev, u8 *serial) { struct pci_dev *pdev = to_pci_dev(dev); int pos; @@ -1256,25 +1256,29 @@ static void nfp6000_read_serial(struct device *dev, u8 *serial) pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DSN); if (!pos) { - memset(serial, 0, NFP_SERIAL_LEN); - return; + dev_err(dev, "can't find PCIe Serial Number Capability\n"); + return -EINVAL; } pci_read_config_dword(pdev, pos + 4, ®); put_unaligned_be16(reg >> 16, serial + 4); pci_read_config_dword(pdev, pos + 8, ®); put_unaligned_be32(reg, serial); + + return 0; } -static u16 nfp6000_get_interface(struct device *dev) +static int nfp6000_get_interface(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); int pos; u32 reg; pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DSN); - if (!pos) - return NFP_CPP_INTERFACE(NFP_CPP_INTERFACE_TYPE_PCI, 0, 0xff); + if (!pos) { + dev_err(dev, "can't find PCIe Serial Number Capability\n"); + return -EINVAL; + } pci_read_config_dword(pdev, pos + 4, ®); diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h index b0da3d436850..c338d539fa96 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h @@ -364,8 +364,8 @@ struct nfp_cpp_operations { int (*init)(struct nfp_cpp *cpp); void (*free)(struct nfp_cpp *cpp); - void (*read_serial)(struct device *dev, u8 *serial); - u16 (*get_interface)(struct device *dev); + int (*read_serial)(struct device *dev, u8 *serial); + int (*get_interface)(struct device *dev); int (*area_init)(struct nfp_cpp_area *area, u32 dest, unsigned long long address, diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c index ef30597aa319..73de57a09800 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c @@ -1163,10 +1163,10 @@ nfp_cpp_from_operations(const struct nfp_cpp_operations *ops, { const u32 arm = NFP_CPP_ID(NFP_CPP_TARGET_ARM, NFP_CPP_ACTION_RW, 0); struct nfp_cpp *cpp; + int ifc, err; u32 mask[2]; u32 xpbaddr; size_t tgt; - int err; cpp = kzalloc(sizeof(*cpp), GFP_KERNEL); if (!cpp) { @@ -1176,9 +1176,19 @@ nfp_cpp_from_operations(const struct nfp_cpp_operations *ops, cpp->op = ops; cpp->priv = priv; - cpp->interface = ops->get_interface(parent); - if (ops->read_serial) - ops->read_serial(parent, cpp->serial); + + ifc = ops->get_interface(parent); + if (ifc < 0) { + err = ifc; + goto err_free_cpp; + } + cpp->interface = ifc; + if (ops->read_serial) { + err = ops->read_serial(parent, cpp->serial); + if (err) + goto err_free_cpp; + } + rwlock_init(&cpp->resource_lock); init_waitqueue_head(&cpp->waitq); lockdep_set_class(&cpp->resource_lock, &nfp_cpp_resource_lock_key); @@ -1191,7 +1201,7 @@ nfp_cpp_from_operations(const struct nfp_cpp_operations *ops, err = device_register(&cpp->dev); if (err < 0) { put_device(&cpp->dev); - goto err_dev; + goto err_free_cpp; } dev_set_drvdata(&cpp->dev, cpp); @@ -1238,7 +1248,7 @@ nfp_cpp_from_operations(const struct nfp_cpp_operations *ops, err_out: device_unregister(&cpp->dev); -err_dev: +err_free_cpp: kfree(cpp); err_malloc: return ERR_PTR(err); diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c index 37a6d7822a38..40510860341b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c @@ -207,7 +207,7 @@ nffw_res_fwinfos(struct nfp_nffw_info_data *fwinf, struct nffw_fwinfo **arr) * nfp_nffw_info_open() - Acquire the lock on the NFFW table * @cpp: NFP CPP handle * - * Return: 0, or -ERRNO + * Return: pointer to nfp_nffw_info object or ERR_PTR() */ struct nfp_nffw_info *nfp_nffw_info_open(struct nfp_cpp *cpp) { @@ -253,10 +253,8 @@ err_free: } /** - * nfp_nffw_info_release() - Release the lock on the NFFW table + * nfp_nffw_info_close() - Release the lock on the NFFW table and free state * @state: NFP FW info state - * - * Return: 0, or -ERRNO */ void nfp_nffw_info_close(struct nfp_nffw_info *state) { |