diff options
163 files changed, 4499 insertions, 1521 deletions
diff --git a/Documentation/bpf/libbpf/index.rst b/Documentation/bpf/libbpf/index.rst index 4e8c656b539a..3722537d1384 100644 --- a/Documentation/bpf/libbpf/index.rst +++ b/Documentation/bpf/libbpf/index.rst @@ -6,14 +6,13 @@ libbpf .. toctree:: :maxdepth: 1 + API Documentation <https://libbpf.readthedocs.io/en/latest/api.html> libbpf_naming_convention libbpf_build This is documentation for libbpf, a userspace library for loading and interacting with bpf programs. -For API documentation see the `versioned API documentation site <https://libbpf.readthedocs.io/en/latest/api.html>`_. - All general BPF questions, including kernel functionality, libbpf APIs and their application, should be sent to bpf@vger.kernel.org mailing list. You can `subscribe <http://vger.kernel.org/vger-lists.html#bpf>`_ to the diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h index f42d9cd3b64d..2a3715bf29fe 100644 --- a/arch/riscv/net/bpf_jit.h +++ b/arch/riscv/net/bpf_jit.h @@ -535,6 +535,43 @@ static inline u32 rv_amoadd_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) return rv_amo_insn(0, aq, rl, rs2, rs1, 2, rd, 0x2f); } +static inline u32 rv_amoand_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0xc, aq, rl, rs2, rs1, 2, rd, 0x2f); +} + +static inline u32 rv_amoor_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0x8, aq, rl, rs2, rs1, 2, rd, 0x2f); +} + +static inline u32 rv_amoxor_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0x4, aq, rl, rs2, rs1, 2, rd, 0x2f); +} + +static inline u32 rv_amoswap_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0x1, aq, rl, rs2, rs1, 2, rd, 0x2f); +} + +static inline u32 rv_lr_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0x2, aq, rl, rs2, rs1, 2, rd, 0x2f); +} + +static inline u32 rv_sc_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0x3, aq, rl, rs2, rs1, 2, rd, 0x2f); +} + +static inline u32 rv_fence(u8 pred, u8 succ) +{ + u16 imm11_0 = pred << 4 | succ; + + return rv_i_insn(imm11_0, 0, 0, 0, 0xf); +} + /* RVC instrutions. */ static inline u16 rvc_addi4spn(u8 rd, u32 imm10) @@ -753,6 +790,36 @@ static inline u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f); } +static inline u32 rv_amoand_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0xc, aq, rl, rs2, rs1, 3, rd, 0x2f); +} + +static inline u32 rv_amoor_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0x8, aq, rl, rs2, rs1, 3, rd, 0x2f); +} + +static inline u32 rv_amoxor_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0x4, aq, rl, rs2, rs1, 3, rd, 0x2f); +} + +static inline u32 rv_amoswap_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0x1, aq, rl, rs2, rs1, 3, rd, 0x2f); +} + +static inline u32 rv_lr_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0x2, aq, rl, rs2, rs1, 3, rd, 0x2f); +} + +static inline u32 rv_sc_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0x3, aq, rl, rs2, rs1, 3, rd, 0x2f); +} + /* RV64-only RVC instructions. */ static inline u16 rvc_ld(u8 rd, u32 imm8, u8 rs1) diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 0bcda99d1d68..00df3a8f92ac 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -455,6 +455,90 @@ static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx) return 0; } +static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64, + struct rv_jit_context *ctx) +{ + u8 r0; + int jmp_offset; + + if (off) { + if (is_12b_int(off)) { + emit_addi(RV_REG_T1, rd, off, ctx); + } else { + emit_imm(RV_REG_T1, off, ctx); + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); + } + rd = RV_REG_T1; + } + + switch (imm) { + /* lock *(u32/u64 *)(dst_reg + off16) <op>= src_reg */ + case BPF_ADD: + emit(is64 ? rv_amoadd_d(RV_REG_ZERO, rs, rd, 0, 0) : + rv_amoadd_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); + break; + case BPF_AND: + emit(is64 ? rv_amoand_d(RV_REG_ZERO, rs, rd, 0, 0) : + rv_amoand_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); + break; + case BPF_OR: + emit(is64 ? rv_amoor_d(RV_REG_ZERO, rs, rd, 0, 0) : + rv_amoor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); + break; + case BPF_XOR: + emit(is64 ? rv_amoxor_d(RV_REG_ZERO, rs, rd, 0, 0) : + rv_amoxor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); + break; + /* src_reg = atomic_fetch_<op>(dst_reg + off16, src_reg) */ + case BPF_ADD | BPF_FETCH: + emit(is64 ? rv_amoadd_d(rs, rs, rd, 0, 0) : + rv_amoadd_w(rs, rs, rd, 0, 0), ctx); + if (!is64) + emit_zext_32(rs, ctx); + break; + case BPF_AND | BPF_FETCH: + emit(is64 ? rv_amoand_d(rs, rs, rd, 0, 0) : + rv_amoand_w(rs, rs, rd, 0, 0), ctx); + if (!is64) + emit_zext_32(rs, ctx); + break; + case BPF_OR | BPF_FETCH: + emit(is64 ? rv_amoor_d(rs, rs, rd, 0, 0) : + rv_amoor_w(rs, rs, rd, 0, 0), ctx); + if (!is64) + emit_zext_32(rs, ctx); + break; + case BPF_XOR | BPF_FETCH: + emit(is64 ? rv_amoxor_d(rs, rs, rd, 0, 0) : + rv_amoxor_w(rs, rs, rd, 0, 0), ctx); + if (!is64) + emit_zext_32(rs, ctx); + break; + /* src_reg = atomic_xchg(dst_reg + off16, src_reg); */ + case BPF_XCHG: + emit(is64 ? rv_amoswap_d(rs, rs, rd, 0, 0) : + rv_amoswap_w(rs, rs, rd, 0, 0), ctx); + if (!is64) + emit_zext_32(rs, ctx); + break; + /* r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg); */ + case BPF_CMPXCHG: + r0 = bpf_to_rv_reg(BPF_REG_0, ctx); + emit(is64 ? rv_addi(RV_REG_T2, r0, 0) : + rv_addiw(RV_REG_T2, r0, 0), ctx); + emit(is64 ? rv_lr_d(r0, 0, rd, 0, 0) : + rv_lr_w(r0, 0, rd, 0, 0), ctx); + jmp_offset = ninsns_rvoff(8); + emit(rv_bne(RV_REG_T2, r0, jmp_offset >> 1), ctx); + emit(is64 ? rv_sc_d(RV_REG_T3, rs, rd, 0, 0) : + rv_sc_w(RV_REG_T3, rs, rd, 0, 0), ctx); + jmp_offset = ninsns_rvoff(-6); + emit(rv_bne(RV_REG_T3, 0, jmp_offset >> 1), ctx); + emit(rv_fence(0x3, 0x3), ctx); + break; + } +} + #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) #define BPF_FIXUP_REG_MASK GENMASK(31, 27) @@ -1146,30 +1230,8 @@ out_be: break; case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: - if (insn->imm != BPF_ADD) { - pr_err("bpf-jit: not supported: atomic operation %02x ***\n", - insn->imm); - return -EINVAL; - } - - /* atomic_add: lock *(u32 *)(dst + off) += src - * atomic_add: lock *(u64 *)(dst + off) += src - */ - - if (off) { - if (is_12b_int(off)) { - emit_addi(RV_REG_T1, rd, off, ctx); - } else { - emit_imm(RV_REG_T1, off, ctx); - emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); - } - - rd = RV_REG_T1; - } - - emit(BPF_SIZE(code) == BPF_W ? - rv_amoadd_w(RV_REG_ZERO, rs, rd, 0, 0) : - rv_amoadd_d(RV_REG_ZERO, rs, rd, 0, 0), ctx); + emit_atomic(rd, rs, off, imm, + BPF_SIZE(code) == BPF_DW, ctx); break; default: pr_err("bpf-jit: unknown opcode %02x\n", code); diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c index 3eff08d7b8e5..fe17c7f98e81 100644 --- a/drivers/media/rc/bpf-lirc.c +++ b/drivers/media/rc/bpf-lirc.c @@ -216,8 +216,12 @@ void lirc_bpf_run(struct rc_dev *rcdev, u32 sample) raw->bpf_sample = sample; - if (raw->progs) - BPF_PROG_RUN_ARRAY(raw->progs, &raw->bpf_sample, bpf_prog_run); + if (raw->progs) { + rcu_read_lock(); + bpf_prog_run_array(rcu_dereference(raw->progs), + &raw->bpf_sample, bpf_prog_run); + rcu_read_unlock(); + } } /* diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h index 19da3b22160f..8c5118c8baaf 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h @@ -20,6 +20,7 @@ void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val); #define I40E_XDP_CONSUMED BIT(0) #define I40E_XDP_TX BIT(1) #define I40E_XDP_REDIR BIT(2) +#define I40E_XDP_EXIT BIT(3) /* * build_ctob - Builds the Tx descriptor (cmd, offset and type) qword diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index c1d25b0b0ca2..af3e7e6afc85 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -161,9 +161,13 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp) if (likely(act == XDP_REDIRECT)) { err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); - if (err) - goto out_failure; - return I40E_XDP_REDIR; + if (!err) + return I40E_XDP_REDIR; + if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS) + result = I40E_XDP_EXIT; + else + result = I40E_XDP_CONSUMED; + goto out_failure; } switch (act) { @@ -175,16 +179,16 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp) if (result == I40E_XDP_CONSUMED) goto out_failure; break; + case XDP_DROP: + result = I40E_XDP_CONSUMED; + break; default: bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act); fallthrough; case XDP_ABORTED: + result = I40E_XDP_CONSUMED; out_failure: trace_xdp_exception(rx_ring->netdev, xdp_prog, act); - fallthrough; /* handle aborts by dropping packet */ - case XDP_DROP: - result = I40E_XDP_CONSUMED; - break; } return result; } @@ -271,7 +275,8 @@ static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring, unsigned int *rx_packets, unsigned int *rx_bytes, unsigned int size, - unsigned int xdp_res) + unsigned int xdp_res, + bool *failure) { struct sk_buff *skb; @@ -281,11 +286,15 @@ static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring, if (likely(xdp_res == I40E_XDP_REDIR) || xdp_res == I40E_XDP_TX) return; + if (xdp_res == I40E_XDP_EXIT) { + *failure = true; + return; + } + if (xdp_res == I40E_XDP_CONSUMED) { xsk_buff_free(xdp_buff); return; } - if (xdp_res == I40E_XDP_PASS) { /* NB! We are not checking for errors using * i40e_test_staterr with @@ -371,7 +380,9 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) xdp_res = i40e_run_xdp_zc(rx_ring, bi); i40e_handle_xdp_result_zc(rx_ring, bi, rx_desc, &rx_packets, - &rx_bytes, size, xdp_res); + &rx_bytes, size, xdp_res, &failure); + if (failure) + break; total_rx_packets += rx_packets; total_rx_bytes += rx_bytes; xdp_xmit |= xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR); @@ -382,7 +393,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) cleaned_count = (next_to_clean - rx_ring->next_to_use - 1) & count_mask; if (cleaned_count >= I40E_RX_BUFFER_WRITE) - failure = !i40e_alloc_rx_buffers_zc(rx_ring, cleaned_count); + failure |= !i40e_alloc_rx_buffers_zc(rx_ring, cleaned_count); i40e_finalize_xdp_rx(rx_ring, xdp_xmit); i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets); @@ -594,13 +605,13 @@ int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) return -ENETDOWN; if (!i40e_enabled_xdp_vsi(vsi)) - return -ENXIO; + return -EINVAL; if (queue_id >= vsi->num_queue_pairs) - return -ENXIO; + return -EINVAL; if (!vsi->xdp_rings[queue_id]->xsk_pool) - return -ENXIO; + return -EINVAL; ring = vsi->xdp_rings[queue_id]; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index cead3eb149bd..f5a906c03669 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -133,6 +133,7 @@ static inline int ice_skb_pad(void) #define ICE_XDP_CONSUMED BIT(0) #define ICE_XDP_TX BIT(1) #define ICE_XDP_REDIR BIT(2) +#define ICE_XDP_EXIT BIT(3) #define ICE_RX_DMA_ATTR \ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 9dd38f667059..49ba8bfdbf04 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -545,9 +545,13 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, if (likely(act == XDP_REDIRECT)) { err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); - if (err) - goto out_failure; - return ICE_XDP_REDIR; + if (!err) + return ICE_XDP_REDIR; + if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS) + result = ICE_XDP_EXIT; + else + result = ICE_XDP_CONSUMED; + goto out_failure; } switch (act) { @@ -558,15 +562,16 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, if (result == ICE_XDP_CONSUMED) goto out_failure; break; + case XDP_DROP: + result = ICE_XDP_CONSUMED; + break; default: bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act); fallthrough; case XDP_ABORTED: + result = ICE_XDP_CONSUMED; out_failure: trace_xdp_exception(rx_ring->netdev, xdp_prog, act); - fallthrough; - case XDP_DROP: - result = ICE_XDP_CONSUMED; break; } @@ -587,6 +592,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) unsigned int xdp_xmit = 0; struct bpf_prog *xdp_prog; bool failure = false; + int entries_to_alloc; /* ZC patch is enabled only when XDP program is set, * so here it can not be NULL @@ -634,18 +640,23 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) xsk_buff_dma_sync_for_cpu(xdp, rx_ring->xsk_pool); xdp_res = ice_run_xdp_zc(rx_ring, xdp, xdp_prog, xdp_ring); - if (xdp_res) { - if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) - xdp_xmit |= xdp_res; - else - xsk_buff_free(xdp); + if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) { + xdp_xmit |= xdp_res; + } else if (xdp_res == ICE_XDP_EXIT) { + failure = true; + break; + } else if (xdp_res == ICE_XDP_CONSUMED) { + xsk_buff_free(xdp); + } else if (xdp_res == ICE_XDP_PASS) { + goto construct_skb; + } - total_rx_bytes += size; - total_rx_packets++; + total_rx_bytes += size; + total_rx_packets++; + + ice_bump_ntc(rx_ring); + continue; - ice_bump_ntc(rx_ring); - continue; - } construct_skb: /* XDP_PASS path */ skb = ice_construct_skb_zc(rx_ring, xdp); @@ -673,7 +684,9 @@ construct_skb: ice_receive_skb(rx_ring, skb, vlan_tag); } - failure = !ice_alloc_rx_bufs_zc(rx_ring, ICE_DESC_UNUSED(rx_ring)); + entries_to_alloc = ICE_DESC_UNUSED(rx_ring); + if (entries_to_alloc > ICE_RING_QUARTER(rx_ring)) + failure |= !ice_alloc_rx_bufs_zc(rx_ring, entries_to_alloc); ice_finalize_xdp_rx(xdp_ring, xdp_xmit); ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes); @@ -929,13 +942,13 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, return -ENETDOWN; if (!ice_is_xdp_ena_vsi(vsi)) - return -ENXIO; + return -EINVAL; if (queue_id >= vsi->num_txq) - return -ENXIO; + return -EINVAL; if (!vsi->xdp_rings[queue_id]->xsk_pool) - return -ENXIO; + return -EINVAL; ring = vsi->xdp_rings[queue_id]; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h index bba3feaf3318..f1f69ce67420 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h @@ -8,6 +8,7 @@ #define IXGBE_XDP_CONSUMED BIT(0) #define IXGBE_XDP_TX BIT(1) #define IXGBE_XDP_REDIR BIT(2) +#define IXGBE_XDP_EXIT BIT(3) #define IXGBE_TXD_CMD (IXGBE_TXD_CMD_EOP | \ IXGBE_TXD_CMD_RS) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index dd7ff66d422f..1703c640a434 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -109,9 +109,13 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter, if (likely(act == XDP_REDIRECT)) { err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); - if (err) - goto out_failure; - return IXGBE_XDP_REDIR; + if (!err) + return IXGBE_XDP_REDIR; + if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS) + result = IXGBE_XDP_EXIT; + else + result = IXGBE_XDP_CONSUMED; + goto out_failure; } switch (act) { @@ -130,16 +134,16 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter, if (result == IXGBE_XDP_CONSUMED) goto out_failure; break; + case XDP_DROP: + result = IXGBE_XDP_CONSUMED; + break; default: bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act); fallthrough; case XDP_ABORTED: + result = IXGBE_XDP_CONSUMED; out_failure: trace_xdp_exception(rx_ring->netdev, xdp_prog, act); - fallthrough; /* handle aborts by dropping packet */ - case XDP_DROP: - result = IXGBE_XDP_CONSUMED; - break; } return result; } @@ -303,21 +307,26 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector, xsk_buff_dma_sync_for_cpu(bi->xdp, rx_ring->xsk_pool); xdp_res = ixgbe_run_xdp_zc(adapter, rx_ring, bi->xdp); - if (xdp_res) { - if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) - xdp_xmit |= xdp_res; - else - xsk_buff_free(bi->xdp); + if (likely(xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR))) { + xdp_xmit |= xdp_res; + } else if (xdp_res == IXGBE_XDP_EXIT) { + failure = true; + break; + } else if (xdp_res == IXGBE_XDP_CONSUMED) { + xsk_buff_free(bi->xdp); + } else if (xdp_res == IXGBE_XDP_PASS) { + goto construct_skb; + } - bi->xdp = NULL; - total_rx_packets++; - total_rx_bytes += size; + bi->xdp = NULL; + total_rx_packets++; + total_rx_bytes += size; - cleaned_count++; - ixgbe_inc_ntc(rx_ring); - continue; - } + cleaned_count++; + ixgbe_inc_ntc(rx_ring); + continue; +construct_skb: /* XDP_PASS path */ skb = ixgbe_construct_skb_zc(rx_ring, bi->xdp); if (!skb) { @@ -516,10 +525,10 @@ int ixgbe_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) return -ENETDOWN; if (!READ_ONCE(adapter->xdp_prog)) - return -ENXIO; + return -EINVAL; if (qid >= adapter->num_xdp_queues) - return -ENXIO; + return -EINVAL; ring = adapter->xdp_ring[qid]; @@ -527,7 +536,7 @@ int ixgbe_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) return -ENETDOWN; if (!ring->xsk_pool) - return -ENXIO; + return -EINVAL; if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) { u64 eics = BIT_ULL(ring->q_vector->v_idx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c index 3ec0c17db010..4902ef74fedf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c @@ -23,7 +23,7 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) c = priv->channels.c[ix]; if (unlikely(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))) - return -ENXIO; + return -EINVAL; if (!napi_if_scheduled_mark_missed(&c->napi)) { /* To avoid WQE overrun, don't post a NOP if async_icosq is not diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 580cc5d3c4fa..7c834c02e084 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -6559,7 +6559,7 @@ int stmmac_xsk_wakeup(struct net_device *dev, u32 queue, u32 flags) return -ENETDOWN; if (!stmmac_xdp_is_enabled(priv)) - return -ENXIO; + return -EINVAL; if (queue >= priv->plat->rx_queues_to_use || queue >= priv->plat->tx_queues_to_use) @@ -6570,7 +6570,7 @@ int stmmac_xsk_wakeup(struct net_device *dev, u32 queue, u32 flags) ch = &priv->channel[queue]; if (!rx_q->xsk_pool && !tx_q->xsk_pool) - return -ENXIO; + return -EINVAL; if (!napi_if_scheduled_mark_missed(&ch->rxtx_napi)) { /* EQoS does not have per-DMA channel SW interrupt, diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 88a51b242adc..669d96d074ad 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -225,24 +225,20 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, #define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) \ ({ \ - u32 __unused_flags; \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) \ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, \ - &__unused_flags); \ + NULL, NULL); \ __ret; \ }) #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) \ ({ \ - u32 __unused_flags; \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - t_ctx, \ - &__unused_flags); \ + t_ctx, NULL); \ release_sock(sk); \ } \ __ret; \ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bdb5298735ce..be94833d390a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -23,6 +23,7 @@ #include <linux/slab.h> #include <linux/percpu-refcount.h> #include <linux/bpfptr.h> +#include <linux/btf.h> struct bpf_verifier_env; struct bpf_verifier_log; @@ -147,14 +148,48 @@ struct bpf_map_ops { bpf_callback_t callback_fn, void *callback_ctx, u64 flags); - /* BTF name and id of struct allocated by map_alloc */ - const char * const map_btf_name; + /* BTF id of struct allocated by map_alloc */ int *map_btf_id; /* bpf_iter info used to open a seq_file */ const struct bpf_iter_seq_info *iter_seq_info; }; +enum { + /* Support at most 8 pointers in a BPF map value */ + BPF_MAP_VALUE_OFF_MAX = 8, + BPF_MAP_OFF_ARR_MAX = BPF_MAP_VALUE_OFF_MAX + + 1 + /* for bpf_spin_lock */ + 1, /* for bpf_timer */ +}; + +enum bpf_kptr_type { + BPF_KPTR_UNREF, + BPF_KPTR_REF, +}; + +struct bpf_map_value_off_desc { + u32 offset; + enum bpf_kptr_type type; + struct { + struct btf *btf; + struct module *module; + btf_dtor_kfunc_t dtor; + u32 btf_id; + } kptr; +}; + +struct bpf_map_value_off { + u32 nr_off; + struct bpf_map_value_off_desc off[]; +}; + +struct bpf_map_off_arr { + u32 cnt; + u32 field_off[BPF_MAP_OFF_ARR_MAX]; + u8 field_sz[BPF_MAP_OFF_ARR_MAX]; +}; + struct bpf_map { /* The first two cachelines with read-mostly members of which some * are also accessed in fast-path (e.g. ops, max_entries). @@ -171,6 +206,7 @@ struct bpf_map { u64 map_extra; /* any per-map-type extra fields */ u32 map_flags; int spin_lock_off; /* >=0 valid offset, <0 error */ + struct bpf_map_value_off *kptr_off_tab; int timer_off; /* >=0 valid offset, <0 error */ u32 id; int numa_node; @@ -182,10 +218,7 @@ struct bpf_map { struct mem_cgroup *memcg; #endif char name[BPF_OBJ_NAME_LEN]; - bool bypass_spec_v1; - bool frozen; /* write-once; write-protected by freeze_mutex */ - /* 14 bytes hole */ - + struct bpf_map_off_arr *off_arr; /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. */ @@ -205,6 +238,8 @@ struct bpf_map { bool jited; bool xdp_has_frags; } owner; + bool bypass_spec_v1; + bool frozen; /* write-once; write-protected by freeze_mutex */ }; static inline bool map_value_has_spin_lock(const struct bpf_map *map) @@ -217,43 +252,44 @@ static inline bool map_value_has_timer(const struct bpf_map *map) return map->timer_off >= 0; } +static inline bool map_value_has_kptrs(const struct bpf_map *map) +{ + return !IS_ERR_OR_NULL(map->kptr_off_tab); +} + static inline void check_and_init_map_value(struct bpf_map *map, void *dst) { if (unlikely(map_value_has_spin_lock(map))) memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock)); if (unlikely(map_value_has_timer(map))) memset(dst + map->timer_off, 0, sizeof(struct bpf_timer)); + if (unlikely(map_value_has_kptrs(map))) { + struct bpf_map_value_off *tab = map->kptr_off_tab; + int i; + + for (i = 0; i < tab->nr_off; i++) + *(u64 *)(dst + tab->off[i].offset) = 0; + } } /* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) { - u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0; + u32 curr_off = 0; + int i; - if (unlikely(map_value_has_spin_lock(map))) { - s_off = map->spin_lock_off; - s_sz = sizeof(struct bpf_spin_lock); - } - if (unlikely(map_value_has_timer(map))) { - t_off = map->timer_off; - t_sz = sizeof(struct bpf_timer); + if (likely(!map->off_arr)) { + memcpy(dst, src, map->value_size); + return; } - if (unlikely(s_sz || t_sz)) { - if (s_off < t_off || !s_sz) { - swap(s_off, t_off); - swap(s_sz, t_sz); - } - memcpy(dst, src, t_off); - memcpy(dst + t_off + t_sz, - src + t_off + t_sz, - s_off - t_off - t_sz); - memcpy(dst + s_off + s_sz, - src + s_off + s_sz, - map->value_size - s_off - s_sz); - } else { - memcpy(dst, src, map->value_size); + for (i = 0; i < map->off_arr->cnt; i++) { + u32 next_off = map->off_arr->field_off[i]; + + memcpy(dst + curr_off, src + curr_off, next_off - curr_off); + curr_off += map->off_arr->field_sz[i]; } + memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off); } void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src); @@ -342,7 +378,18 @@ enum bpf_type_flag { */ MEM_PERCPU = BIT(4 + BPF_BASE_TYPE_BITS), - __BPF_TYPE_LAST_FLAG = MEM_PERCPU, + /* Indicates that the argument will be released. */ + OBJ_RELEASE = BIT(5 + BPF_BASE_TYPE_BITS), + + /* PTR is not trusted. This is only used with PTR_TO_BTF_ID, to mark + * unreferenced and referenced kptr loaded from map value using a load + * instruction, so that they can only be dereferenced but not escape the + * BPF program into the kernel (i.e. cannot be passed as arguments to + * kfunc or bpf helpers). + */ + PTR_UNTRUSTED = BIT(6 + BPF_BASE_TYPE_BITS), + + __BPF_TYPE_LAST_FLAG = PTR_UNTRUSTED, }; /* Max number of base types. */ @@ -391,6 +438,7 @@ enum bpf_arg_type { ARG_PTR_TO_STACK, /* pointer to stack */ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ + ARG_PTR_TO_KPTR, /* pointer to referenced kptr */ __BPF_ARG_TYPE_MAX, /* Extended arg_types. */ @@ -400,6 +448,7 @@ enum bpf_arg_type { ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET, ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM, ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK, + ARG_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID, /* This must be the last entry. Its purpose is to ensure the enum is * wide enough to hold the higher bits reserved for bpf_type_flag. @@ -1221,7 +1270,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, /* an array of programs to be executed under rcu_lock. * * Typical usage: - * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, bpf_prog_run); + * ret = bpf_prog_run_array(rcu_dereference(&bpf_prog_array), ctx, bpf_prog_run); * * the structure returned by bpf_prog_array_alloc() should be populated * with program pointers and the last pointer must be NULL. @@ -1315,83 +1364,22 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx) typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx); -static __always_inline int -BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, - const void *ctx, bpf_prog_run_fn run_prog, - int retval, u32 *ret_flags) -{ - const struct bpf_prog_array_item *item; - const struct bpf_prog *prog; - const struct bpf_prog_array *array; - struct bpf_run_ctx *old_run_ctx; - struct bpf_cg_run_ctx run_ctx; - u32 func_ret; - - run_ctx.retval = retval; - migrate_disable(); - rcu_read_lock(); - array = rcu_dereference(array_rcu); - item = &array->items[0]; - old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); - while ((prog = READ_ONCE(item->prog))) { - run_ctx.prog_item = item; - func_ret = run_prog(prog, ctx); - if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval)) - run_ctx.retval = -EPERM; - *(ret_flags) |= (func_ret >> 1); - item++; - } - bpf_reset_run_ctx(old_run_ctx); - rcu_read_unlock(); - migrate_enable(); - return run_ctx.retval; -} - -static __always_inline int -BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, - const void *ctx, bpf_prog_run_fn run_prog, - int retval) -{ - const struct bpf_prog_array_item *item; - const struct bpf_prog *prog; - const struct bpf_prog_array *array; - struct bpf_run_ctx *old_run_ctx; - struct bpf_cg_run_ctx run_ctx; - - run_ctx.retval = retval; - migrate_disable(); - rcu_read_lock(); - array = rcu_dereference(array_rcu); - item = &array->items[0]; - old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); - while ((prog = READ_ONCE(item->prog))) { - run_ctx.prog_item = item; - if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval)) - run_ctx.retval = -EPERM; - item++; - } - bpf_reset_run_ctx(old_run_ctx); - rcu_read_unlock(); - migrate_enable(); - return run_ctx.retval; -} - static __always_inline u32 -BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu, +bpf_prog_run_array(const struct bpf_prog_array *array, const void *ctx, bpf_prog_run_fn run_prog) { const struct bpf_prog_array_item *item; const struct bpf_prog *prog; - const struct bpf_prog_array *array; struct bpf_run_ctx *old_run_ctx; struct bpf_trace_run_ctx run_ctx; u32 ret = 1; - migrate_disable(); - rcu_read_lock(); - array = rcu_dereference(array_rcu); + RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "no rcu lock held"); + if (unlikely(!array)) - goto out; + return ret; + + migrate_disable(); old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); item = &array->items[0]; while ((prog = READ_ONCE(item->prog))) { @@ -1400,50 +1388,10 @@ BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu, item++; } bpf_reset_run_ctx(old_run_ctx); -out: - rcu_read_unlock(); migrate_enable(); return ret; } -/* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs - * so BPF programs can request cwr for TCP packets. - * - * Current cgroup skb programs can only return 0 or 1 (0 to drop the - * packet. This macro changes the behavior so the low order bit - * indicates whether the packet should be dropped (0) or not (1) - * and the next bit is a congestion notification bit. This could be - * used by TCP to call tcp_enter_cwr() - * - * Hence, new allowed return values of CGROUP EGRESS BPF programs are: - * 0: drop packet - * 1: keep packet - * 2: drop packet and cn - * 3: keep packet and cn - * - * This macro then converts it to one of the NET_XMIT or an error - * code that is then interpreted as drop packet (and no cn): - * 0: NET_XMIT_SUCCESS skb should be transmitted - * 1: NET_XMIT_DROP skb should be dropped and cn - * 2: NET_XMIT_CN skb should be transmitted and cn - * 3: -err skb should be dropped - */ -#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \ - ({ \ - u32 _flags = 0; \ - bool _cn; \ - u32 _ret; \ - _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \ - _cn = _flags & BPF_RET_SET_CN; \ - if (_ret && !IS_ERR_VALUE((long)_ret)) \ - _ret = -EFAULT; \ - if (!_ret) \ - _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ - else \ - _ret = (_cn ? NET_XMIT_DROP : _ret); \ - _ret; \ - }) - #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); extern struct mutex bpf_stats_enabled_mutex; @@ -1497,6 +1445,12 @@ void bpf_prog_put(struct bpf_prog *prog); void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock); void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); +struct bpf_map_value_off_desc *bpf_map_kptr_off_contains(struct bpf_map *map, u32 offset); +void bpf_map_free_kptr_off_tab(struct bpf_map *map); +struct bpf_map_value_off *bpf_map_copy_kptr_off_tab(const struct bpf_map *map); +bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b); +void bpf_map_free_kptrs(struct bpf_map *map, void *map_value); + struct bpf_map *bpf_map_get(u32 ufd); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); @@ -1793,7 +1747,8 @@ int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, u32 *next_btf_id, enum bpf_type_flag *flag); bool btf_struct_ids_match(struct bpf_verifier_log *log, const struct btf *btf, u32 id, int off, - const struct btf *need_btf, u32 need_type_id); + const struct btf *need_btf, u32 need_type_id, + bool strict); int btf_distill_func_proto(struct bpf_verifier_log *log, struct btf *btf, diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 493e63258497..7ea18d4da84b 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -143,9 +143,9 @@ void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem, - bool uncharge_omem); + bool uncharge_omem, bool use_trace_rcu); -void bpf_selem_unlink(struct bpf_local_storage_elem *selem); +void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu); void bpf_selem_link_map(struct bpf_local_storage_map *smap, struct bpf_local_storage_elem *selem); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 3a9d2d7cc6b7..1f1e7f2ea967 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -523,8 +523,7 @@ int check_ptr_off_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno); int check_func_arg_reg_off(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno, - enum bpf_arg_type arg_type, - bool is_release_func); + enum bpf_arg_type arg_type); int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno); int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, diff --git a/include/linux/btf.h b/include/linux/btf.h index 36bc09b8e890..2611cea2c2b6 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -17,6 +17,7 @@ enum btf_kfunc_type { BTF_KFUNC_TYPE_ACQUIRE, BTF_KFUNC_TYPE_RELEASE, BTF_KFUNC_TYPE_RET_NULL, + BTF_KFUNC_TYPE_KPTR_ACQUIRE, BTF_KFUNC_TYPE_MAX, }; @@ -35,11 +36,19 @@ struct btf_kfunc_id_set { struct btf_id_set *acquire_set; struct btf_id_set *release_set; struct btf_id_set *ret_null_set; + struct btf_id_set *kptr_acquire_set; }; struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX]; }; }; +struct btf_id_dtor_kfunc { + u32 btf_id; + u32 kfunc_btf_id; +}; + +typedef void (*btf_dtor_kfunc_t)(void *); + extern const struct file_operations btf_fops; void btf_get(struct btf *btf); @@ -123,6 +132,8 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, u32 expected_offset, u32 expected_size); int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); int btf_find_timer(const struct btf *btf, const struct btf_type *t); +struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf, + const struct btf_type *t); bool btf_type_is_void(const struct btf_type *t); s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind); const struct btf_type *btf_type_skip_modifiers(const struct btf *btf, @@ -344,6 +355,9 @@ bool btf_kfunc_id_set_contains(const struct btf *btf, enum btf_kfunc_type type, u32 kfunc_btf_id); int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, const struct btf_kfunc_id_set *s); +s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id); +int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt, + struct module *owner); #else static inline const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) @@ -367,6 +381,15 @@ static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, { return 0; } +static inline s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id) +{ + return -ENOENT; +} +static inline int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, + u32 add_cnt, struct module *owner) +{ + return 0; +} #endif #endif diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5cbc184ca685..dc4b3e1cf21b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3925,7 +3925,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features, unsigned int offset); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); -int skb_ensure_writable(struct sk_buff *skb, int write_len); +int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len); int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci); int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d14b10b85e51..444fe6f1cf35 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5143,6 +5143,17 @@ union bpf_attr { * The **hash_algo** is returned on success, * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if * invalid arguments are passed. + * + * void *bpf_kptr_xchg(void *map_value, void *ptr) + * Description + * Exchange kptr at pointer *map_value* with *ptr*, and return the + * old value. *ptr* can be NULL, otherwise it must be a referenced + * pointer which will be released when this helper is called. + * Return + * The old value of kptr (which can be NULL). The returned pointer + * if not NULL, is a reference which must be released using its + * corresponding release function, or moved into a BPF map before + * program exit. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5339,6 +5350,7 @@ union bpf_attr { FN(copy_from_user_task), \ FN(skb_set_tstamp), \ FN(ima_file_hash), \ + FN(kptr_xchg), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 7f145aefbff8..b3bf31fd9458 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -11,6 +11,7 @@ #include <linux/perf_event.h> #include <uapi/linux/btf.h> #include <linux/rcupdate_trace.h> +#include <linux/btf_ids.h> #include "map_in_map.h" @@ -287,10 +288,12 @@ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key return 0; } -static void check_and_free_timer_in_array(struct bpf_array *arr, void *val) +static void check_and_free_fields(struct bpf_array *arr, void *val) { - if (unlikely(map_value_has_timer(&arr->map))) + if (map_value_has_timer(&arr->map)) bpf_timer_cancel_and_free(val + arr->map.timer_off); + if (map_value_has_kptrs(&arr->map)) + bpf_map_free_kptrs(&arr->map, val); } /* Called from syscall or from eBPF program */ @@ -327,7 +330,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, copy_map_value_locked(map, val, value, false); else copy_map_value(map, val, value); - check_and_free_timer_in_array(array, val); + check_and_free_fields(array, val); } return 0; } @@ -386,7 +389,8 @@ static void array_map_free_timers(struct bpf_map *map) struct bpf_array *array = container_of(map, struct bpf_array, map); int i; - if (likely(!map_value_has_timer(map))) + /* We don't reset or free kptr on uref dropping to zero. */ + if (!map_value_has_timer(map)) return; for (i = 0; i < array->map.max_entries; i++) @@ -398,6 +402,13 @@ static void array_map_free_timers(struct bpf_map *map) static void array_map_free(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); + int i; + + if (map_value_has_kptrs(map)) { + for (i = 0; i < array->map.max_entries; i++) + bpf_map_free_kptrs(map, array->value + array->elem_size * i); + bpf_map_free_kptr_off_tab(map); + } if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) bpf_array_free_percpu(array); @@ -680,7 +691,7 @@ static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_ return num_elems; } -static int array_map_btf_id; +BTF_ID_LIST_SINGLE(array_map_btf_ids, struct, bpf_array) const struct bpf_map_ops array_map_ops = { .map_meta_equal = array_map_meta_equal, .map_alloc_check = array_map_alloc_check, @@ -701,12 +712,10 @@ const struct bpf_map_ops array_map_ops = { .map_update_batch = generic_map_update_batch, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_array_elem, - .map_btf_name = "bpf_array", - .map_btf_id = &array_map_btf_id, + .map_btf_id = &array_map_btf_ids[0], .iter_seq_info = &iter_seq_info, }; -static int percpu_array_map_btf_id; const struct bpf_map_ops percpu_array_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = array_map_alloc_check, @@ -722,8 +731,7 @@ const struct bpf_map_ops percpu_array_map_ops = { .map_update_batch = generic_map_update_batch, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_array_elem, - .map_btf_name = "bpf_array", - .map_btf_id = &percpu_array_map_btf_id, + .map_btf_id = &array_map_btf_ids[0], .iter_seq_info = &iter_seq_info, }; @@ -1102,7 +1110,6 @@ static void prog_array_map_free(struct bpf_map *map) * Thus, prog_array_map cannot be used as an inner_map * and map_meta_equal is not implemented. */ -static int prog_array_map_btf_id; const struct bpf_map_ops prog_array_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = prog_array_map_alloc, @@ -1118,8 +1125,7 @@ const struct bpf_map_ops prog_array_map_ops = { .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, .map_release_uref = prog_array_map_clear, .map_seq_show_elem = prog_array_map_seq_show_elem, - .map_btf_name = "bpf_array", - .map_btf_id = &prog_array_map_btf_id, + .map_btf_id = &array_map_btf_ids[0], }; static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, @@ -1208,7 +1214,6 @@ static void perf_event_fd_array_map_free(struct bpf_map *map) fd_array_map_free(map); } -static int perf_event_array_map_btf_id; const struct bpf_map_ops perf_event_array_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = fd_array_map_alloc_check, @@ -1221,8 +1226,7 @@ const struct bpf_map_ops perf_event_array_map_ops = { .map_fd_put_ptr = perf_event_fd_array_put_ptr, .map_release = perf_event_fd_array_release, .map_check_btf = map_check_no_btf, - .map_btf_name = "bpf_array", - .map_btf_id = &perf_event_array_map_btf_id, + .map_btf_id = &array_map_btf_ids[0], }; #ifdef CONFIG_CGROUPS @@ -1245,7 +1249,6 @@ static void cgroup_fd_array_free(struct bpf_map *map) fd_array_map_free(map); } -static int cgroup_array_map_btf_id; const struct bpf_map_ops cgroup_array_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = fd_array_map_alloc_check, @@ -1257,8 +1260,7 @@ const struct bpf_map_ops cgroup_array_map_ops = { .map_fd_get_ptr = cgroup_fd_array_get_ptr, .map_fd_put_ptr = cgroup_fd_array_put_ptr, .map_check_btf = map_check_no_btf, - .map_btf_name = "bpf_array", - .map_btf_id = &cgroup_array_map_btf_id, + .map_btf_id = &array_map_btf_ids[0], }; #endif @@ -1332,7 +1334,6 @@ static int array_of_map_gen_lookup(struct bpf_map *map, return insn - insn_buf; } -static int array_of_maps_map_btf_id; const struct bpf_map_ops array_of_maps_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_of_map_alloc, @@ -1345,6 +1346,5 @@ const struct bpf_map_ops array_of_maps_map_ops = { .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, .map_gen_lookup = array_of_map_gen_lookup, .map_check_btf = map_check_no_btf, - .map_btf_name = "bpf_array", - .map_btf_id = &array_of_maps_map_btf_id, + .map_btf_id = &array_map_btf_ids[0], }; diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c index b141a1346f72..b9ea539a5561 100644 --- a/kernel/bpf/bloom_filter.c +++ b/kernel/bpf/bloom_filter.c @@ -7,6 +7,7 @@ #include <linux/err.h> #include <linux/jhash.h> #include <linux/random.h> +#include <linux/btf_ids.h> #define BLOOM_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_ZERO_SEED | BPF_F_ACCESS_MASK) @@ -192,7 +193,7 @@ static int bloom_map_check_btf(const struct bpf_map *map, return btf_type_is_void(key_type) ? 0 : -EINVAL; } -static int bpf_bloom_map_btf_id; +BTF_ID_LIST_SINGLE(bpf_bloom_map_btf_ids, struct, bpf_bloom_filter) const struct bpf_map_ops bloom_filter_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc = bloom_map_alloc, @@ -205,6 +206,5 @@ const struct bpf_map_ops bloom_filter_map_ops = { .map_update_elem = bloom_map_update_elem, .map_delete_elem = bloom_map_delete_elem, .map_check_btf = bloom_map_check_btf, - .map_btf_name = "bpf_bloom_filter", - .map_btf_id = &bpf_bloom_map_btf_id, + .map_btf_id = &bpf_bloom_map_btf_ids[0], }; diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 96be8d518885..5f7683b19199 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -90,7 +90,7 @@ void bpf_inode_storage_free(struct inode *inode) */ bpf_selem_unlink_map(selem); free_inode_storage = bpf_selem_unlink_storage_nolock( - local_storage, selem, false); + local_storage, selem, false, false); } raw_spin_unlock_bh(&local_storage->lock); rcu_read_unlock(); @@ -149,7 +149,7 @@ static int inode_storage_delete(struct inode *inode, struct bpf_map *map) if (!sdata) return -ENOENT; - bpf_selem_unlink(SELEM(sdata)); + bpf_selem_unlink(SELEM(sdata), true); return 0; } @@ -245,7 +245,8 @@ static void inode_storage_map_free(struct bpf_map *map) bpf_local_storage_map_free(smap, NULL); } -static int inode_storage_map_btf_id; +BTF_ID_LIST_SINGLE(inode_storage_map_btf_ids, struct, + bpf_local_storage_map) const struct bpf_map_ops inode_storage_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = bpf_local_storage_map_alloc_check, @@ -256,8 +257,7 @@ const struct bpf_map_ops inode_storage_map_ops = { .map_update_elem = bpf_fd_inode_storage_update_elem, .map_delete_elem = bpf_fd_inode_storage_delete_elem, .map_check_btf = bpf_local_storage_map_check_btf, - .map_btf_name = "bpf_local_storage_map", - .map_btf_id = &inode_storage_map_btf_id, + .map_btf_id = &inode_storage_map_btf_ids[0], .map_owner_storage_ptr = inode_storage_ptr, }; diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index dea920b3b840..d5d96ceca105 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -545,7 +545,7 @@ int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog); link->tinfo = tinfo; - err = bpf_link_prime(&link->link, &link_primer); + err = bpf_link_prime(&link->link, &link_primer); if (err) { kfree(link); return err; diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 01aa2b51ec4d..8ce40fd869f6 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -106,7 +106,7 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu) */ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem, - bool uncharge_mem) + bool uncharge_mem, bool use_trace_rcu) { struct bpf_local_storage_map *smap; bool free_local_storage; @@ -150,11 +150,16 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, SDATA(selem)) RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); - call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu); + if (use_trace_rcu) + call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu); + else + kfree_rcu(selem, rcu); + return free_local_storage; } -static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem) +static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, + bool use_trace_rcu) { struct bpf_local_storage *local_storage; bool free_local_storage = false; @@ -169,12 +174,16 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem) raw_spin_lock_irqsave(&local_storage->lock, flags); if (likely(selem_linked_to_storage(selem))) free_local_storage = bpf_selem_unlink_storage_nolock( - local_storage, selem, true); + local_storage, selem, true, use_trace_rcu); raw_spin_unlock_irqrestore(&local_storage->lock, flags); - if (free_local_storage) - call_rcu_tasks_trace(&local_storage->rcu, + if (free_local_storage) { + if (use_trace_rcu) + call_rcu_tasks_trace(&local_storage->rcu, bpf_local_storage_free_rcu); + else + kfree_rcu(local_storage, rcu); + } } void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, @@ -214,14 +223,14 @@ void bpf_selem_link_map(struct bpf_local_storage_map *smap, raw_spin_unlock_irqrestore(&b->lock, flags); } -void bpf_selem_unlink(struct bpf_local_storage_elem *selem) +void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu) { /* Always unlink from map before unlinking from local_storage * because selem will be freed after successfully unlinked from * the local_storage. */ bpf_selem_unlink_map(selem); - __bpf_selem_unlink_storage(selem); + __bpf_selem_unlink_storage(selem, use_trace_rcu); } struct bpf_local_storage_data * @@ -466,7 +475,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, if (old_sdata) { bpf_selem_unlink_map(SELEM(old_sdata)); bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata), - false); + false, true); } unlock: @@ -548,7 +557,7 @@ void bpf_local_storage_map_free(struct bpf_local_storage_map *smap, migrate_disable(); __this_cpu_inc(*busy_counter); } - bpf_selem_unlink(selem); + bpf_selem_unlink(selem, false); if (busy_counter) { __this_cpu_dec(*busy_counter); migrate_enable(); diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 21069dbe9138..3a0103ad97bc 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -10,6 +10,7 @@ #include <linux/seq_file.h> #include <linux/refcount.h> #include <linux/mutex.h> +#include <linux/btf_ids.h> enum bpf_struct_ops_state { BPF_STRUCT_OPS_STATE_INIT, @@ -263,7 +264,7 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, /* No lock is needed. state and refcnt do not need * to be updated together under atomic context. */ - uvalue = (struct bpf_struct_ops_value *)value; + uvalue = value; memcpy(uvalue, st_map->uvalue, map->value_size); uvalue->state = state; refcount_set(&uvalue->refcnt, refcount_read(&kvalue->refcnt)); @@ -353,7 +354,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, if (err) return err; - uvalue = (struct bpf_struct_ops_value *)value; + uvalue = value; err = check_zero_holes(t, uvalue->data); if (err) return err; @@ -612,7 +613,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) return map; } -static int bpf_struct_ops_map_btf_id; +BTF_ID_LIST_SINGLE(bpf_struct_ops_map_btf_ids, struct, bpf_struct_ops_map) const struct bpf_map_ops bpf_struct_ops_map_ops = { .map_alloc_check = bpf_struct_ops_map_alloc_check, .map_alloc = bpf_struct_ops_map_alloc, @@ -622,8 +623,7 @@ const struct bpf_map_ops bpf_struct_ops_map_ops = { .map_delete_elem = bpf_struct_ops_map_delete_elem, .map_update_elem = bpf_struct_ops_map_update_elem, .map_seq_show_elem = bpf_struct_ops_map_seq_show_elem, - .map_btf_name = "bpf_struct_ops_map", - .map_btf_id = &bpf_struct_ops_map_btf_id, + .map_btf_id = &bpf_struct_ops_map_btf_ids[0], }; /* "const void *" because some subsystem is diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index 6638a0ecc3d2..e9014dc62682 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -102,7 +102,7 @@ void bpf_task_storage_free(struct task_struct *task) */ bpf_selem_unlink_map(selem); free_task_storage = bpf_selem_unlink_storage_nolock( - local_storage, selem, false); + local_storage, selem, false, false); } raw_spin_unlock_irqrestore(&local_storage->lock, flags); bpf_task_storage_unlock(); @@ -192,7 +192,7 @@ static int task_storage_delete(struct task_struct *task, struct bpf_map *map) if (!sdata) return -ENOENT; - bpf_selem_unlink(SELEM(sdata)); + bpf_selem_unlink(SELEM(sdata), true); return 0; } @@ -307,7 +307,7 @@ static void task_storage_map_free(struct bpf_map *map) bpf_local_storage_map_free(smap, &bpf_task_storage_busy); } -static int task_storage_map_btf_id; +BTF_ID_LIST_SINGLE(task_storage_map_btf_ids, struct, bpf_local_storage_map) const struct bpf_map_ops task_storage_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = bpf_local_storage_map_alloc_check, @@ -318,8 +318,7 @@ const struct bpf_map_ops task_storage_map_ops = { .map_update_elem = bpf_pid_task_storage_update_elem, .map_delete_elem = bpf_pid_task_storage_delete_elem, .map_check_btf = bpf_local_storage_map_check_btf, - .map_btf_name = "bpf_local_storage_map", - .map_btf_id = &task_storage_map_btf_id, + .map_btf_id = &task_storage_map_btf_ids[0], .map_owner_storage_ptr = task_storage_ptr, }; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 0918a39279f6..2f0b0440131c 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -207,12 +207,18 @@ enum btf_kfunc_hook { enum { BTF_KFUNC_SET_MAX_CNT = 32, + BTF_DTOR_KFUNC_MAX_CNT = 256, }; struct btf_kfunc_set_tab { struct btf_id_set *sets[BTF_KFUNC_HOOK_MAX][BTF_KFUNC_TYPE_MAX]; }; +struct btf_id_dtor_kfunc_tab { + u32 cnt; + struct btf_id_dtor_kfunc dtors[]; +}; + struct btf { void *data; struct btf_type **types; @@ -228,6 +234,7 @@ struct btf { u32 id; struct rcu_head rcu; struct btf_kfunc_set_tab *kfunc_set_tab; + struct btf_id_dtor_kfunc_tab *dtor_kfunc_tab; /* split BTF support */ struct btf *base_btf; @@ -1616,8 +1623,19 @@ free_tab: btf->kfunc_set_tab = NULL; } +static void btf_free_dtor_kfunc_tab(struct btf *btf) +{ + struct btf_id_dtor_kfunc_tab *tab = btf->dtor_kfunc_tab; + + if (!tab) + return; + kfree(tab); + btf->dtor_kfunc_tab = NULL; +} + static void btf_free(struct btf *btf) { + btf_free_dtor_kfunc_tab(btf); btf_free_kfunc_set_tab(btf); kvfree(btf->types); kvfree(btf->resolved_sizes); @@ -3163,24 +3181,86 @@ static void btf_struct_log(struct btf_verifier_env *env, btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); } +enum btf_field_type { + BTF_FIELD_SPIN_LOCK, + BTF_FIELD_TIMER, + BTF_FIELD_KPTR, +}; + +enum { + BTF_FIELD_IGNORE = 0, + BTF_FIELD_FOUND = 1, +}; + +struct btf_field_info { + u32 type_id; + u32 off; + enum bpf_kptr_type type; +}; + +static int btf_find_struct(const struct btf *btf, const struct btf_type *t, + u32 off, int sz, struct btf_field_info *info) +{ + if (!__btf_type_is_struct(t)) + return BTF_FIELD_IGNORE; + if (t->size != sz) + return BTF_FIELD_IGNORE; + info->off = off; + return BTF_FIELD_FOUND; +} + +static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, + u32 off, int sz, struct btf_field_info *info) +{ + enum bpf_kptr_type type; + u32 res_id; + + /* For PTR, sz is always == 8 */ + if (!btf_type_is_ptr(t)) + return BTF_FIELD_IGNORE; + t = btf_type_by_id(btf, t->type); + + if (!btf_type_is_type_tag(t)) + return BTF_FIELD_IGNORE; + /* Reject extra tags */ + if (btf_type_is_type_tag(btf_type_by_id(btf, t->type))) + return -EINVAL; + if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off))) + type = BPF_KPTR_UNREF; + else if (!strcmp("kptr_ref", __btf_name_by_offset(btf, t->name_off))) + type = BPF_KPTR_REF; + else + return -EINVAL; + + /* Get the base type */ + t = btf_type_skip_modifiers(btf, t->type, &res_id); + /* Only pointer to struct is allowed */ + if (!__btf_type_is_struct(t)) + return -EINVAL; + + info->type_id = res_id; + info->off = off; + info->type = type; + return BTF_FIELD_FOUND; +} + static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t, - const char *name, int sz, int align) + const char *name, int sz, int align, + enum btf_field_type field_type, + struct btf_field_info *info, int info_cnt) { const struct btf_member *member; - u32 i, off = -ENOENT; + struct btf_field_info tmp; + int ret, idx = 0; + u32 i, off; for_each_member(i, t, member) { const struct btf_type *member_type = btf_type_by_id(btf, member->type); - if (!__btf_type_is_struct(member_type)) - continue; - if (member_type->size != sz) - continue; - if (strcmp(__btf_name_by_offset(btf, member_type->name_off), name)) + + if (name && strcmp(__btf_name_by_offset(btf, member_type->name_off), name)) continue; - if (off != -ENOENT) - /* only one such field is allowed */ - return -E2BIG; + off = __btf_member_bit_offset(t, member); if (off % 8) /* valid C code cannot generate such BTF */ @@ -3188,46 +3268,115 @@ static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t off /= 8; if (off % align) return -EINVAL; + + switch (field_type) { + case BTF_FIELD_SPIN_LOCK: + case BTF_FIELD_TIMER: + ret = btf_find_struct(btf, member_type, off, sz, + idx < info_cnt ? &info[idx] : &tmp); + if (ret < 0) + return ret; + break; + case BTF_FIELD_KPTR: + ret = btf_find_kptr(btf, member_type, off, sz, + idx < info_cnt ? &info[idx] : &tmp); + if (ret < 0) + return ret; + break; + default: + return -EFAULT; + } + + if (ret == BTF_FIELD_IGNORE) + continue; + if (idx >= info_cnt) + return -E2BIG; + ++idx; } - return off; + return idx; } static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t, - const char *name, int sz, int align) + const char *name, int sz, int align, + enum btf_field_type field_type, + struct btf_field_info *info, int info_cnt) { const struct btf_var_secinfo *vsi; - u32 i, off = -ENOENT; + struct btf_field_info tmp; + int ret, idx = 0; + u32 i, off; for_each_vsi(i, t, vsi) { const struct btf_type *var = btf_type_by_id(btf, vsi->type); const struct btf_type *var_type = btf_type_by_id(btf, var->type); - if (!__btf_type_is_struct(var_type)) - continue; - if (var_type->size != sz) + off = vsi->offset; + + if (name && strcmp(__btf_name_by_offset(btf, var_type->name_off), name)) continue; if (vsi->size != sz) continue; - if (strcmp(__btf_name_by_offset(btf, var_type->name_off), name)) - continue; - if (off != -ENOENT) - /* only one such field is allowed */ - return -E2BIG; - off = vsi->offset; if (off % align) return -EINVAL; + + switch (field_type) { + case BTF_FIELD_SPIN_LOCK: + case BTF_FIELD_TIMER: + ret = btf_find_struct(btf, var_type, off, sz, + idx < info_cnt ? &info[idx] : &tmp); + if (ret < 0) + return ret; + break; + case BTF_FIELD_KPTR: + ret = btf_find_kptr(btf, var_type, off, sz, + idx < info_cnt ? &info[idx] : &tmp); + if (ret < 0) + return ret; + break; + default: + return -EFAULT; + } + + if (ret == BTF_FIELD_IGNORE) + continue; + if (idx >= info_cnt) + return -E2BIG; + ++idx; } - return off; + return idx; } static int btf_find_field(const struct btf *btf, const struct btf_type *t, - const char *name, int sz, int align) + enum btf_field_type field_type, + struct btf_field_info *info, int info_cnt) { + const char *name; + int sz, align; + + switch (field_type) { + case BTF_FIELD_SPIN_LOCK: + name = "bpf_spin_lock"; + sz = sizeof(struct bpf_spin_lock); + align = __alignof__(struct bpf_spin_lock); + break; + case BTF_FIELD_TIMER: + name = "bpf_timer"; + sz = sizeof(struct bpf_timer); + align = __alignof__(struct bpf_timer); + break; + case BTF_FIELD_KPTR: + name = NULL; + sz = sizeof(u64); + align = 8; + break; + default: + return -EFAULT; + } if (__btf_type_is_struct(t)) - return btf_find_struct_field(btf, t, name, sz, align); + return btf_find_struct_field(btf, t, name, sz, align, field_type, info, info_cnt); else if (btf_type_is_datasec(t)) - return btf_find_datasec_var(btf, t, name, sz, align); + return btf_find_datasec_var(btf, t, name, sz, align, field_type, info, info_cnt); return -EINVAL; } @@ -3237,16 +3386,130 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t, */ int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t) { - return btf_find_field(btf, t, "bpf_spin_lock", - sizeof(struct bpf_spin_lock), - __alignof__(struct bpf_spin_lock)); + struct btf_field_info info; + int ret; + + ret = btf_find_field(btf, t, BTF_FIELD_SPIN_LOCK, &info, 1); + if (ret < 0) + return ret; + if (!ret) + return -ENOENT; + return info.off; } int btf_find_timer(const struct btf *btf, const struct btf_type *t) { - return btf_find_field(btf, t, "bpf_timer", - sizeof(struct bpf_timer), - __alignof__(struct bpf_timer)); + struct btf_field_info info; + int ret; + + ret = btf_find_field(btf, t, BTF_FIELD_TIMER, &info, 1); + if (ret < 0) + return ret; + if (!ret) + return -ENOENT; + return info.off; +} + +struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf, + const struct btf_type *t) +{ + struct btf_field_info info_arr[BPF_MAP_VALUE_OFF_MAX]; + struct bpf_map_value_off *tab; + struct btf *kernel_btf = NULL; + struct module *mod = NULL; + int ret, i, nr_off; + + ret = btf_find_field(btf, t, BTF_FIELD_KPTR, info_arr, ARRAY_SIZE(info_arr)); + if (ret < 0) + return ERR_PTR(ret); + if (!ret) + return NULL; + + nr_off = ret; + tab = kzalloc(offsetof(struct bpf_map_value_off, off[nr_off]), GFP_KERNEL | __GFP_NOWARN); + if (!tab) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < nr_off; i++) { + const struct btf_type *t; + s32 id; + + /* Find type in map BTF, and use it to look up the matching type + * in vmlinux or module BTFs, by name and kind. + */ + t = btf_type_by_id(btf, info_arr[i].type_id); + id = bpf_find_btf_id(__btf_name_by_offset(btf, t->name_off), BTF_INFO_KIND(t->info), + &kernel_btf); + if (id < 0) { + ret = id; + goto end; + } + + /* Find and stash the function pointer for the destruction function that + * needs to be eventually invoked from the map free path. + */ + if (info_arr[i].type == BPF_KPTR_REF) { + const struct btf_type *dtor_func; + const char *dtor_func_name; + unsigned long addr; + s32 dtor_btf_id; + + /* This call also serves as a whitelist of allowed objects that + * can be used as a referenced pointer and be stored in a map at + * the same time. + */ + dtor_btf_id = btf_find_dtor_kfunc(kernel_btf, id); + if (dtor_btf_id < 0) { + ret = dtor_btf_id; + goto end_btf; + } + + dtor_func = btf_type_by_id(kernel_btf, dtor_btf_id); + if (!dtor_func) { + ret = -ENOENT; + goto end_btf; + } + + if (btf_is_module(kernel_btf)) { + mod = btf_try_get_module(kernel_btf); + if (!mod) { + ret = -ENXIO; + goto end_btf; + } + } + + /* We already verified dtor_func to be btf_type_is_func + * in register_btf_id_dtor_kfuncs. + */ + dtor_func_name = __btf_name_by_offset(kernel_btf, dtor_func->name_off); + addr = kallsyms_lookup_name(dtor_func_name); + if (!addr) { + ret = -EINVAL; + goto end_mod; + } + tab->off[i].kptr.dtor = (void *)addr; + } + + tab->off[i].offset = info_arr[i].off; + tab->off[i].type = info_arr[i].type; + tab->off[i].kptr.btf_id = id; + tab->off[i].kptr.btf = kernel_btf; + tab->off[i].kptr.module = mod; + } + tab->nr_off = nr_off; + return tab; +end_mod: + module_put(mod); +end_btf: + btf_put(kernel_btf); +end: + while (i--) { + btf_put(tab->off[i].kptr.btf); + if (tab->off[i].kptr.module) + module_put(tab->off[i].kptr.module); + } + kfree(tab); + return ERR_PTR(ret); } static void __btf_struct_show(const struct btf *btf, const struct btf_type *t, @@ -4541,6 +4804,48 @@ static int btf_parse_hdr(struct btf_verifier_env *env) return 0; } +static int btf_check_type_tags(struct btf_verifier_env *env, + struct btf *btf, int start_id) +{ + int i, n, good_id = start_id - 1; + bool in_tags; + + n = btf_nr_types(btf); + for (i = start_id; i < n; i++) { + const struct btf_type *t; + u32 cur_id = i; + + t = btf_type_by_id(btf, i); + if (!t) + return -EINVAL; + if (!btf_type_is_modifier(t)) + continue; + + cond_resched(); + + in_tags = btf_type_is_type_tag(t); + while (btf_type_is_modifier(t)) { + if (btf_type_is_type_tag(t)) { + if (!in_tags) { + btf_verifier_log(env, "Type tags don't precede modifiers"); + return -EINVAL; + } + } else if (in_tags) { + in_tags = false; + } + if (cur_id <= good_id) + break; + /* Move to next type */ + cur_id = t->type; + t = btf_type_by_id(btf, cur_id); + if (!t) + return -EINVAL; + } + good_id = i; + } + return 0; +} + static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size, u32 log_level, char __user *log_ubuf, u32 log_size) { @@ -4608,6 +4913,10 @@ static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size, if (err) goto errout; + err = btf_check_type_tags(env, btf, 1); + if (err) + goto errout; + if (log->level && bpf_verifier_log_full(log)) { err = -ENOSPC; goto errout; @@ -4716,41 +5025,6 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, return ctx_type; } -static const struct bpf_map_ops * const btf_vmlinux_map_ops[] = { -#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) -#define BPF_LINK_TYPE(_id, _name) -#define BPF_MAP_TYPE(_id, _ops) \ - [_id] = &_ops, -#include <linux/bpf_types.h> -#undef BPF_PROG_TYPE -#undef BPF_LINK_TYPE -#undef BPF_MAP_TYPE -}; - -static int btf_vmlinux_map_ids_init(const struct btf *btf, - struct bpf_verifier_log *log) -{ - const struct bpf_map_ops *ops; - int i, btf_id; - - for (i = 0; i < ARRAY_SIZE(btf_vmlinux_map_ops); ++i) { - ops = btf_vmlinux_map_ops[i]; - if (!ops || (!ops->map_btf_name && !ops->map_btf_id)) - continue; - if (!ops->map_btf_name || !ops->map_btf_id) { - bpf_log(log, "map type %d is misconfigured\n", i); - return -EINVAL; - } - btf_id = btf_find_by_name_kind(btf, ops->map_btf_name, - BTF_KIND_STRUCT); - if (btf_id < 0) - return btf_id; - *ops->map_btf_id = btf_id; - } - - return 0; -} - static int btf_translate_to_vmlinux(struct bpf_verifier_log *log, struct btf *btf, const struct btf_type *t, @@ -4809,14 +5083,13 @@ struct btf *btf_parse_vmlinux(void) if (err) goto errout; + err = btf_check_type_tags(env, btf, 1); + if (err) + goto errout; + /* btf_parse_vmlinux() runs under bpf_verifier_lock */ bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]); - /* find bpf map structs for map_ptr access checking */ - err = btf_vmlinux_map_ids_init(btf, log); - if (err < 0) - goto errout; - bpf_struct_ops_init(btf, log); refcount_set(&btf->refcnt, 1); @@ -4894,6 +5167,10 @@ static struct btf *btf_parse_module(const char *module_name, const void *data, u if (err) goto errout; + err = btf_check_type_tags(env, btf, btf_nr_types(base_btf)); + if (err) + goto errout; + btf_verifier_env_free(env); refcount_set(&btf->refcnt, 1); return btf; @@ -5429,7 +5706,8 @@ static bool btf_types_are_same(const struct btf *btf1, u32 id1, bool btf_struct_ids_match(struct bpf_verifier_log *log, const struct btf *btf, u32 id, int off, - const struct btf *need_btf, u32 need_type_id) + const struct btf *need_btf, u32 need_type_id, + bool strict) { const struct btf_type *type; enum bpf_type_flag flag; @@ -5438,7 +5716,12 @@ bool btf_struct_ids_match(struct bpf_verifier_log *log, /* Are we already done? */ if (off == 0 && btf_types_are_same(btf, id, need_btf, need_type_id)) return true; - + /* In case of strict type match, we do not walk struct, the top level + * type match must succeed. When strict is true, off should have already + * been 0. + */ + if (strict) + return false; again: type = btf_type_by_id(btf, id); if (!type) @@ -5772,11 +6055,11 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, struct bpf_verifier_log *log = &env->log; u32 i, nargs, ref_id, ref_obj_id = 0; bool is_kfunc = btf_is_kernel(btf); + bool rel = false, kptr_get = false; const char *func_name, *ref_tname; const struct btf_type *t, *ref_t; const struct btf_param *args; int ref_regno = 0, ret; - bool rel = false; t = btf_type_by_id(btf, func_id); if (!t || !btf_type_is_func(t)) { @@ -5802,14 +6085,19 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, return -EINVAL; } - /* Only kfunc can be release func */ - if (is_kfunc) + if (is_kfunc) { + /* Only kfunc can be release func */ rel = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog), BTF_KFUNC_TYPE_RELEASE, func_id); + kptr_get = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog), + BTF_KFUNC_TYPE_KPTR_ACQUIRE, func_id); + } + /* check that BTF function arguments match actual types that the * verifier sees. */ for (i = 0; i < nargs; i++) { + enum bpf_arg_type arg_type = ARG_DONTCARE; u32 regno = i + 1; struct bpf_reg_state *reg = ®s[regno]; @@ -5830,12 +6118,58 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id); ref_tname = btf_name_by_offset(btf, ref_t->name_off); - ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE, rel); + if (rel && reg->ref_obj_id) + arg_type |= OBJ_RELEASE; + ret = check_func_arg_reg_off(env, reg, regno, arg_type); if (ret < 0) return ret; - if (btf_get_prog_ctx_type(log, btf, t, - env->prog->type, i)) { + /* kptr_get is only true for kfunc */ + if (i == 0 && kptr_get) { + struct bpf_map_value_off_desc *off_desc; + + if (reg->type != PTR_TO_MAP_VALUE) { + bpf_log(log, "arg#0 expected pointer to map value\n"); + return -EINVAL; + } + + /* check_func_arg_reg_off allows var_off for + * PTR_TO_MAP_VALUE, but we need fixed offset to find + * off_desc. + */ + if (!tnum_is_const(reg->var_off)) { + bpf_log(log, "arg#0 must have constant offset\n"); + return -EINVAL; + } + + off_desc = bpf_map_kptr_off_contains(reg->map_ptr, reg->off + reg->var_off.value); + if (!off_desc || off_desc->type != BPF_KPTR_REF) { + bpf_log(log, "arg#0 no referenced kptr at map value offset=%llu\n", + reg->off + reg->var_off.value); + return -EINVAL; + } + + if (!btf_type_is_ptr(ref_t)) { + bpf_log(log, "arg#0 BTF type must be a double pointer\n"); + return -EINVAL; + } + + ref_t = btf_type_skip_modifiers(btf, ref_t->type, &ref_id); + ref_tname = btf_name_by_offset(btf, ref_t->name_off); + + if (!btf_type_is_struct(ref_t)) { + bpf_log(log, "kernel function %s args#%d pointer type %s %s is not supported\n", + func_name, i, btf_type_str(ref_t), ref_tname); + return -EINVAL; + } + if (!btf_struct_ids_match(log, btf, ref_id, 0, off_desc->kptr.btf, + off_desc->kptr.btf_id, true)) { + bpf_log(log, "kernel function %s args#%d expected pointer to %s %s\n", + func_name, i, btf_type_str(ref_t), ref_tname); + return -EINVAL; + } + /* rest of the arguments can be anything, like normal kfunc */ + } else if (btf_get_prog_ctx_type(log, btf, t, env->prog->type, i)) { /* If function expects ctx type in BTF check that caller * is passing PTR_TO_CTX. */ @@ -5862,11 +6196,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, if (reg->type == PTR_TO_BTF_ID) { reg_btf = reg->btf; reg_ref_id = reg->btf_id; - /* Ensure only one argument is referenced - * PTR_TO_BTF_ID, check_func_arg_reg_off relies - * on only one referenced register being allowed - * for kfuncs. - */ + /* Ensure only one argument is referenced PTR_TO_BTF_ID */ if (reg->ref_obj_id) { if (ref_obj_id) { bpf_log(log, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", @@ -5886,7 +6216,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off); if (!btf_struct_ids_match(log, reg_btf, reg_ref_id, - reg->off, btf, ref_id)) { + reg->off, btf, ref_id, rel && reg->ref_obj_id)) { bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n", func_name, i, btf_type_str(ref_t), ref_tname, @@ -6832,6 +7162,138 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, } EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set); +s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id) +{ + struct btf_id_dtor_kfunc_tab *tab = btf->dtor_kfunc_tab; + struct btf_id_dtor_kfunc *dtor; + + if (!tab) + return -ENOENT; + /* Even though the size of tab->dtors[0] is > sizeof(u32), we only need + * to compare the first u32 with btf_id, so we can reuse btf_id_cmp_func. + */ + BUILD_BUG_ON(offsetof(struct btf_id_dtor_kfunc, btf_id) != 0); + dtor = bsearch(&btf_id, tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func); + if (!dtor) + return -ENOENT; + return dtor->kfunc_btf_id; +} + +static int btf_check_dtor_kfuncs(struct btf *btf, const struct btf_id_dtor_kfunc *dtors, u32 cnt) +{ + const struct btf_type *dtor_func, *dtor_func_proto, *t; + const struct btf_param *args; + s32 dtor_btf_id; + u32 nr_args, i; + + for (i = 0; i < cnt; i++) { + dtor_btf_id = dtors[i].kfunc_btf_id; + + dtor_func = btf_type_by_id(btf, dtor_btf_id); + if (!dtor_func || !btf_type_is_func(dtor_func)) + return -EINVAL; + + dtor_func_proto = btf_type_by_id(btf, dtor_func->type); + if (!dtor_func_proto || !btf_type_is_func_proto(dtor_func_proto)) + return -EINVAL; + + /* Make sure the prototype of the destructor kfunc is 'void func(type *)' */ + t = btf_type_by_id(btf, dtor_func_proto->type); + if (!t || !btf_type_is_void(t)) + return -EINVAL; + + nr_args = btf_type_vlen(dtor_func_proto); + if (nr_args != 1) + return -EINVAL; + args = btf_params(dtor_func_proto); + t = btf_type_by_id(btf, args[0].type); + /* Allow any pointer type, as width on targets Linux supports + * will be same for all pointer types (i.e. sizeof(void *)) + */ + if (!t || !btf_type_is_ptr(t)) + return -EINVAL; + } + return 0; +} + +/* This function must be invoked only from initcalls/module init functions */ +int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt, + struct module *owner) +{ + struct btf_id_dtor_kfunc_tab *tab; + struct btf *btf; + u32 tab_cnt; + int ret; + + btf = btf_get_module_btf(owner); + if (!btf) { + if (!owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) { + pr_err("missing vmlinux BTF, cannot register dtor kfuncs\n"); + return -ENOENT; + } + if (owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) { + pr_err("missing module BTF, cannot register dtor kfuncs\n"); + return -ENOENT; + } + return 0; + } + if (IS_ERR(btf)) + return PTR_ERR(btf); + + if (add_cnt >= BTF_DTOR_KFUNC_MAX_CNT) { + pr_err("cannot register more than %d kfunc destructors\n", BTF_DTOR_KFUNC_MAX_CNT); + ret = -E2BIG; + goto end; + } + + /* Ensure that the prototype of dtor kfuncs being registered is sane */ + ret = btf_check_dtor_kfuncs(btf, dtors, add_cnt); + if (ret < 0) + goto end; + + tab = btf->dtor_kfunc_tab; + /* Only one call allowed for modules */ + if (WARN_ON_ONCE(tab && btf_is_module(btf))) { + ret = -EINVAL; + goto end; + } + + tab_cnt = tab ? tab->cnt : 0; + if (tab_cnt > U32_MAX - add_cnt) { + ret = -EOVERFLOW; + goto end; + } + if (tab_cnt + add_cnt >= BTF_DTOR_KFUNC_MAX_CNT) { + pr_err("cannot register more than %d kfunc destructors\n", BTF_DTOR_KFUNC_MAX_CNT); + ret = -E2BIG; + goto end; + } + + tab = krealloc(btf->dtor_kfunc_tab, + offsetof(struct btf_id_dtor_kfunc_tab, dtors[tab_cnt + add_cnt]), + GFP_KERNEL | __GFP_NOWARN); + if (!tab) { + ret = -ENOMEM; + goto end; + } + + if (!btf->dtor_kfunc_tab) + tab->cnt = 0; + btf->dtor_kfunc_tab = tab; + + memcpy(tab->dtors + tab->cnt, dtors, add_cnt * sizeof(tab->dtors[0])); + tab->cnt += add_cnt; + + sort(tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func, NULL); + + return 0; +end: + btf_free_dtor_kfunc_tab(btf); + btf_put(btf); + return ret; +} +EXPORT_SYMBOL_GPL(register_btf_id_dtor_kfuncs); + #define MAX_TYPES_ARE_COMPAT_DEPTH 2 static diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 128028efda64..afb414b26d01 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -22,6 +22,45 @@ DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_CGROUP_BPF_ATTACH_TYPE); EXPORT_SYMBOL(cgroup_bpf_enabled_key); +/* __always_inline is necessary to prevent indirect call through run_prog + * function pointer. + */ +static __always_inline int +bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp, + enum cgroup_bpf_attach_type atype, + const void *ctx, bpf_prog_run_fn run_prog, + int retval, u32 *ret_flags) +{ + const struct bpf_prog_array_item *item; + const struct bpf_prog *prog; + const struct bpf_prog_array *array; + struct bpf_run_ctx *old_run_ctx; + struct bpf_cg_run_ctx run_ctx; + u32 func_ret; + + run_ctx.retval = retval; + migrate_disable(); + rcu_read_lock(); + array = rcu_dereference(cgrp->effective[atype]); + item = &array->items[0]; + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); + while ((prog = READ_ONCE(item->prog))) { + run_ctx.prog_item = item; + func_ret = run_prog(prog, ctx); + if (ret_flags) { + *(ret_flags) |= (func_ret >> 1); + func_ret &= 1; + } + if (!func_ret && !IS_ERR_VALUE((long)run_ctx.retval)) + run_ctx.retval = -EPERM; + item++; + } + bpf_reset_run_ctx(old_run_ctx); + rcu_read_unlock(); + migrate_enable(); + return run_ctx.retval; +} + void cgroup_bpf_offline(struct cgroup *cgrp) { cgroup_get(cgrp); @@ -1075,11 +1114,38 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, bpf_compute_and_save_data_end(skb, &saved_data_end); if (atype == CGROUP_INET_EGRESS) { - ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY( - cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb); + u32 flags = 0; + bool cn; + + ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, skb, + __bpf_prog_run_save_cb, 0, &flags); + + /* Return values of CGROUP EGRESS BPF programs are: + * 0: drop packet + * 1: keep packet + * 2: drop packet and cn + * 3: keep packet and cn + * + * The returned value is then converted to one of the NET_XMIT + * or an error code that is then interpreted as drop packet + * (and no cn): + * 0: NET_XMIT_SUCCESS skb should be transmitted + * 1: NET_XMIT_DROP skb should be dropped and cn + * 2: NET_XMIT_CN skb should be transmitted and cn + * 3: -err skb should be dropped + */ + + cn = flags & BPF_RET_SET_CN; + if (ret && !IS_ERR_VALUE((long)ret)) + ret = -EFAULT; + if (!ret) + ret = (cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); + else + ret = (cn ? NET_XMIT_DROP : ret); } else { - ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb, - __bpf_prog_run_save_cb, 0); + ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, + skb, __bpf_prog_run_save_cb, 0, + NULL); if (ret && !IS_ERR_VALUE((long)ret)) ret = -EFAULT; } @@ -1109,8 +1175,8 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, { struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk, - bpf_prog_run, 0); + return bpf_prog_run_array_cg(&cgrp->bpf, atype, sk, bpf_prog_run, 0, + NULL); } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); @@ -1155,8 +1221,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, } cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx, - bpf_prog_run, 0, flags); + return bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, + 0, flags); } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); @@ -1182,8 +1248,8 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, { struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops, - bpf_prog_run, 0); + return bpf_prog_run_array_cg(&cgrp->bpf, atype, sock_ops, bpf_prog_run, + 0, NULL); } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); @@ -1200,8 +1266,8 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, rcu_read_lock(); cgrp = task_dfl_cgroup(current); - ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, - bpf_prog_run, 0); + ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0, + NULL); rcu_read_unlock(); return ret; @@ -1366,8 +1432,8 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, rcu_read_lock(); cgrp = task_dfl_cgroup(current); - ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, - bpf_prog_run, 0); + ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0, + NULL); rcu_read_unlock(); kfree(ctx.cur_val); @@ -1459,8 +1525,8 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, } lock_sock(sk); - ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT], - &ctx, bpf_prog_run, 0); + ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_SETSOCKOPT, + &ctx, bpf_prog_run, 0, NULL); release_sock(sk); if (ret) @@ -1559,8 +1625,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, } lock_sock(sk); - ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT], - &ctx, bpf_prog_run, retval); + ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT, + &ctx, bpf_prog_run, retval, NULL); release_sock(sk); if (ret < 0) @@ -1608,8 +1674,8 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, * be called if that data shouldn't be "exported". */ - ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT], - &ctx, bpf_prog_run, retval); + ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT, + &ctx, bpf_prog_run, retval, NULL); if (ret < 0) return ret; diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 650e5d21f90d..f4860ac756cd 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -27,6 +27,7 @@ #include <linux/kthread.h> #include <linux/capability.h> #include <trace/events/xdp.h> +#include <linux/btf_ids.h> #include <linux/netdevice.h> /* netif_receive_skb_list */ #include <linux/etherdevice.h> /* eth_type_trans */ @@ -673,7 +674,7 @@ static int cpu_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags) __cpu_map_lookup_elem); } -static int cpu_map_btf_id; +BTF_ID_LIST_SINGLE(cpu_map_btf_ids, struct, bpf_cpu_map) const struct bpf_map_ops cpu_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc = cpu_map_alloc, @@ -683,8 +684,7 @@ const struct bpf_map_ops cpu_map_ops = { .map_lookup_elem = cpu_map_lookup_elem, .map_get_next_key = cpu_map_get_next_key, .map_check_btf = map_check_no_btf, - .map_btf_name = "bpf_cpu_map", - .map_btf_id = &cpu_map_btf_id, + .map_btf_id = &cpu_map_btf_ids[0], .map_redirect = cpu_map_redirect, }; diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 038f6d7a83e4..c2867068e5bd 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -48,6 +48,7 @@ #include <net/xdp.h> #include <linux/filter.h> #include <trace/events/xdp.h> +#include <linux/btf_ids.h> #define DEV_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) @@ -1005,7 +1006,7 @@ static int dev_hash_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags) __dev_map_hash_lookup_elem); } -static int dev_map_btf_id; +BTF_ID_LIST_SINGLE(dev_map_btf_ids, struct, bpf_dtab) const struct bpf_map_ops dev_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc = dev_map_alloc, @@ -1015,12 +1016,10 @@ const struct bpf_map_ops dev_map_ops = { .map_update_elem = dev_map_update_elem, .map_delete_elem = dev_map_delete_elem, .map_check_btf = map_check_no_btf, - .map_btf_name = "bpf_dtab", - .map_btf_id = &dev_map_btf_id, + .map_btf_id = &dev_map_btf_ids[0], .map_redirect = dev_map_redirect, }; -static int dev_map_hash_map_btf_id; const struct bpf_map_ops dev_map_hash_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc = dev_map_alloc, @@ -1030,8 +1029,7 @@ const struct bpf_map_ops dev_map_hash_ops = { .map_update_elem = dev_map_hash_update_elem, .map_delete_elem = dev_map_hash_delete_elem, .map_check_btf = map_check_no_btf, - .map_btf_name = "bpf_dtab", - .map_btf_id = &dev_map_hash_map_btf_id, + .map_btf_id = &dev_map_btf_ids[0], .map_redirect = dev_hash_map_redirect, }; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 65877967f414..3e00e62b2218 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -10,6 +10,7 @@ #include <linux/random.h> #include <uapi/linux/btf.h> #include <linux/rcupdate_trace.h> +#include <linux/btf_ids.h> #include "percpu_freelist.h" #include "bpf_lru_list.h" #include "map_in_map.h" @@ -238,7 +239,7 @@ static void htab_free_prealloced_timers(struct bpf_htab *htab) u32 num_entries = htab->map.max_entries; int i; - if (likely(!map_value_has_timer(&htab->map))) + if (!map_value_has_timer(&htab->map)) return; if (htab_has_extra_elems(htab)) num_entries += num_possible_cpus(); @@ -254,6 +255,25 @@ static void htab_free_prealloced_timers(struct bpf_htab *htab) } } +static void htab_free_prealloced_kptrs(struct bpf_htab *htab) +{ + u32 num_entries = htab->map.max_entries; + int i; + + if (!map_value_has_kptrs(&htab->map)) + return; + if (htab_has_extra_elems(htab)) + num_entries += num_possible_cpus(); + + for (i = 0; i < num_entries; i++) { + struct htab_elem *elem; + + elem = get_htab_elem(htab, i); + bpf_map_free_kptrs(&htab->map, elem->key + round_up(htab->map.key_size, 8)); + cond_resched(); + } +} + static void htab_free_elems(struct bpf_htab *htab) { int i; @@ -725,12 +745,15 @@ static int htab_lru_map_gen_lookup(struct bpf_map *map, return insn - insn_buf; } -static void check_and_free_timer(struct bpf_htab *htab, struct htab_elem *elem) +static void check_and_free_fields(struct bpf_htab *htab, + struct htab_elem *elem) { - if (unlikely(map_value_has_timer(&htab->map))) - bpf_timer_cancel_and_free(elem->key + - round_up(htab->map.key_size, 8) + - htab->map.timer_off); + void *map_value = elem->key + round_up(htab->map.key_size, 8); + + if (map_value_has_timer(&htab->map)) + bpf_timer_cancel_and_free(map_value + htab->map.timer_off); + if (map_value_has_kptrs(&htab->map)) + bpf_map_free_kptrs(&htab->map, map_value); } /* It is called from the bpf_lru_list when the LRU needs to delete @@ -738,7 +761,7 @@ static void check_and_free_timer(struct bpf_htab *htab, struct htab_elem *elem) */ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) { - struct bpf_htab *htab = (struct bpf_htab *)arg; + struct bpf_htab *htab = arg; struct htab_elem *l = NULL, *tgt_l; struct hlist_nulls_head *head; struct hlist_nulls_node *n; @@ -757,7 +780,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) if (l == tgt_l) { hlist_nulls_del_rcu(&l->hash_node); - check_and_free_timer(htab, l); + check_and_free_fields(htab, l); break; } @@ -829,7 +852,7 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) { if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) free_percpu(htab_elem_get_ptr(l, htab->map.key_size)); - check_and_free_timer(htab, l); + check_and_free_fields(htab, l); kfree(l); } @@ -857,7 +880,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) htab_put_fd_value(htab, l); if (htab_is_prealloc(htab)) { - check_and_free_timer(htab, l); + check_and_free_fields(htab, l); __pcpu_freelist_push(&htab->freelist, &l->fnode); } else { atomic_dec(&htab->count); @@ -1104,7 +1127,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, if (!htab_is_prealloc(htab)) free_htab_elem(htab, l_old); else - check_and_free_timer(htab, l_old); + check_and_free_fields(htab, l_old); } ret = 0; err: @@ -1114,7 +1137,7 @@ err: static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem) { - check_and_free_timer(htab, elem); + check_and_free_fields(htab, elem); bpf_lru_push_free(&htab->lru, &elem->lru_node); } @@ -1419,8 +1442,14 @@ static void htab_free_malloced_timers(struct bpf_htab *htab) struct hlist_nulls_node *n; struct htab_elem *l; - hlist_nulls_for_each_entry(l, n, head, hash_node) - check_and_free_timer(htab, l); + hlist_nulls_for_each_entry(l, n, head, hash_node) { + /* We don't reset or free kptr on uref dropping to zero, + * hence just free timer. + */ + bpf_timer_cancel_and_free(l->key + + round_up(htab->map.key_size, 8) + + htab->map.timer_off); + } cond_resched_rcu(); } rcu_read_unlock(); @@ -1430,7 +1459,8 @@ static void htab_map_free_timers(struct bpf_map *map) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - if (likely(!map_value_has_timer(&htab->map))) + /* We don't reset or free kptr on uref dropping to zero. */ + if (!map_value_has_timer(&htab->map)) return; if (!htab_is_prealloc(htab)) htab_free_malloced_timers(htab); @@ -1453,11 +1483,14 @@ static void htab_map_free(struct bpf_map *map) * not have executed. Wait for them. */ rcu_barrier(); - if (!htab_is_prealloc(htab)) + if (!htab_is_prealloc(htab)) { delete_all_elements(htab); - else + } else { + htab_free_prealloced_kptrs(htab); prealloc_destroy(htab); + } + bpf_map_free_kptr_off_tab(map); free_percpu(htab->extra_elems); bpf_map_area_free(htab->buckets); for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) @@ -2105,7 +2138,7 @@ out: return num_elems; } -static int htab_map_btf_id; +BTF_ID_LIST_SINGLE(htab_map_btf_ids, struct, bpf_htab) const struct bpf_map_ops htab_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = htab_map_alloc_check, @@ -2122,12 +2155,10 @@ const struct bpf_map_ops htab_map_ops = { .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, BATCH_OPS(htab), - .map_btf_name = "bpf_htab", - .map_btf_id = &htab_map_btf_id, + .map_btf_id = &htab_map_btf_ids[0], .iter_seq_info = &iter_seq_info, }; -static int htab_lru_map_btf_id; const struct bpf_map_ops htab_lru_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = htab_map_alloc_check, @@ -2145,8 +2176,7 @@ const struct bpf_map_ops htab_lru_map_ops = { .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, BATCH_OPS(htab_lru), - .map_btf_name = "bpf_htab", - .map_btf_id = &htab_lru_map_btf_id, + .map_btf_id = &htab_map_btf_ids[0], .iter_seq_info = &iter_seq_info, }; @@ -2252,7 +2282,6 @@ static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key, rcu_read_unlock(); } -static int htab_percpu_map_btf_id; const struct bpf_map_ops htab_percpu_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = htab_map_alloc_check, @@ -2267,12 +2296,10 @@ const struct bpf_map_ops htab_percpu_map_ops = { .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, BATCH_OPS(htab_percpu), - .map_btf_name = "bpf_htab", - .map_btf_id = &htab_percpu_map_btf_id, + .map_btf_id = &htab_map_btf_ids[0], .iter_seq_info = &iter_seq_info, }; -static int htab_lru_percpu_map_btf_id; const struct bpf_map_ops htab_lru_percpu_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = htab_map_alloc_check, @@ -2287,8 +2314,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = { .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, BATCH_OPS(htab_lru_percpu), - .map_btf_name = "bpf_htab", - .map_btf_id = &htab_lru_percpu_map_btf_id, + .map_btf_id = &htab_map_btf_ids[0], .iter_seq_info = &iter_seq_info, }; @@ -2412,7 +2438,6 @@ static void htab_of_map_free(struct bpf_map *map) fd_htab_map_free(map); } -static int htab_of_maps_map_btf_id; const struct bpf_map_ops htab_of_maps_map_ops = { .map_alloc_check = fd_htab_map_alloc_check, .map_alloc = htab_of_map_alloc, @@ -2425,6 +2450,5 @@ const struct bpf_map_ops htab_of_maps_map_ops = { .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, .map_gen_lookup = htab_of_map_gen_lookup, .map_check_btf = map_check_no_btf, - .map_btf_name = "bpf_htab", - .map_btf_id = &htab_of_maps_map_btf_id, + .map_btf_id = &htab_map_btf_ids[0], }; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 315053ef6a75..3e709fed5306 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1374,6 +1374,28 @@ out: kfree(t); } +BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr) +{ + unsigned long *kptr = map_value; + + return xchg(kptr, (unsigned long)ptr); +} + +/* Unlike other PTR_TO_BTF_ID helpers the btf_id in bpf_kptr_xchg() + * helper is determined dynamically by the verifier. + */ +#define BPF_PTR_POISON ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) + +const struct bpf_func_proto bpf_kptr_xchg_proto = { + .func = bpf_kptr_xchg, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .ret_btf_id = BPF_PTR_POISON, + .arg1_type = ARG_PTR_TO_KPTR, + .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE, + .arg2_btf_id = BPF_PTR_POISON, +}; + const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_get_current_task_btf_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; @@ -1452,6 +1474,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_timer_start_proto; case BPF_FUNC_timer_cancel: return &bpf_timer_cancel_proto; + case BPF_FUNC_kptr_xchg: + return &bpf_kptr_xchg_proto; default: break; } diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 497916060ac7..8654fc97f5fe 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -9,6 +9,7 @@ #include <linux/rbtree.h> #include <linux/slab.h> #include <uapi/linux/btf.h> +#include <linux/btf_ids.h> #ifdef CONFIG_CGROUP_BPF @@ -446,7 +447,8 @@ static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *key, rcu_read_unlock(); } -static int cgroup_storage_map_btf_id; +BTF_ID_LIST_SINGLE(cgroup_storage_map_btf_ids, struct, + bpf_cgroup_storage_map) const struct bpf_map_ops cgroup_storage_map_ops = { .map_alloc = cgroup_storage_map_alloc, .map_free = cgroup_storage_map_free, @@ -456,8 +458,7 @@ const struct bpf_map_ops cgroup_storage_map_ops = { .map_delete_elem = cgroup_storage_delete_elem, .map_check_btf = cgroup_storage_check_btf, .map_seq_show_elem = cgroup_storage_seq_show_elem, - .map_btf_name = "bpf_cgroup_storage_map", - .map_btf_id = &cgroup_storage_map_btf_id, + .map_btf_id = &cgroup_storage_map_btf_ids[0], }; int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *_map) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 5763cc7ac4f1..f0d05a3cc4b9 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -14,6 +14,7 @@ #include <linux/vmalloc.h> #include <net/ipv6.h> #include <uapi/linux/btf.h> +#include <linux/btf_ids.h> /* Intermediate node */ #define LPM_TREE_NODE_FLAG_IM BIT(0) @@ -719,7 +720,7 @@ static int trie_check_btf(const struct bpf_map *map, -EINVAL : 0; } -static int trie_map_btf_id; +BTF_ID_LIST_SINGLE(trie_map_btf_ids, struct, lpm_trie) const struct bpf_map_ops trie_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc = trie_alloc, @@ -732,6 +733,5 @@ const struct bpf_map_ops trie_map_ops = { .map_update_batch = generic_map_update_batch, .map_delete_batch = generic_map_delete_batch, .map_check_btf = trie_check_btf, - .map_btf_name = "lpm_trie", - .map_btf_id = &trie_map_btf_id, + .map_btf_id = &trie_map_btf_ids[0], }; diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 5cd8f5277279..135205d0d560 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -52,6 +52,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) inner_map_meta->max_entries = inner_map->max_entries; inner_map_meta->spin_lock_off = inner_map->spin_lock_off; inner_map_meta->timer_off = inner_map->timer_off; + inner_map_meta->kptr_off_tab = bpf_map_copy_kptr_off_tab(inner_map); if (inner_map->btf) { btf_get(inner_map->btf); inner_map_meta->btf = inner_map->btf; @@ -71,6 +72,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) void bpf_map_meta_free(struct bpf_map *map_meta) { + bpf_map_free_kptr_off_tab(map_meta); btf_put(map_meta->btf); kfree(map_meta); } @@ -83,7 +85,8 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0, meta0->key_size == meta1->key_size && meta0->value_size == meta1->value_size && meta0->timer_off == meta1->timer_off && - meta0->map_flags == meta1->map_flags; + meta0->map_flags == meta1->map_flags && + bpf_map_equal_kptr_off_tab(meta0, meta1); } void *bpf_map_fd_get_ptr(struct bpf_map *map, diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index f9c734aaa990..a1c0794ae49d 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -8,6 +8,7 @@ #include <linux/list.h> #include <linux/slab.h> #include <linux/capability.h> +#include <linux/btf_ids.h> #include "percpu_freelist.h" #define QUEUE_STACK_CREATE_FLAG_MASK \ @@ -247,7 +248,7 @@ static int queue_stack_map_get_next_key(struct bpf_map *map, void *key, return -EINVAL; } -static int queue_map_btf_id; +BTF_ID_LIST_SINGLE(queue_map_btf_ids, struct, bpf_queue_stack) const struct bpf_map_ops queue_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = queue_stack_map_alloc_check, @@ -260,11 +261,9 @@ const struct bpf_map_ops queue_map_ops = { .map_pop_elem = queue_map_pop_elem, .map_peek_elem = queue_map_peek_elem, .map_get_next_key = queue_stack_map_get_next_key, - .map_btf_name = "bpf_queue_stack", - .map_btf_id = &queue_map_btf_id, + .map_btf_id = &queue_map_btf_ids[0], }; -static int stack_map_btf_id; const struct bpf_map_ops stack_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = queue_stack_map_alloc_check, @@ -277,6 +276,5 @@ const struct bpf_map_ops stack_map_ops = { .map_pop_elem = stack_map_pop_elem, .map_peek_elem = stack_map_peek_elem, .map_get_next_key = queue_stack_map_get_next_key, - .map_btf_name = "bpf_queue_stack", - .map_btf_id = &stack_map_btf_id, + .map_btf_id = &queue_map_btf_ids[0], }; diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index 8251243022a2..e2618fb5870e 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -6,6 +6,7 @@ #include <linux/err.h> #include <linux/sock_diag.h> #include <net/sock_reuseport.h> +#include <linux/btf_ids.h> struct reuseport_array { struct bpf_map map; @@ -337,7 +338,7 @@ static int reuseport_array_get_next_key(struct bpf_map *map, void *key, return 0; } -static int reuseport_array_map_btf_id; +BTF_ID_LIST_SINGLE(reuseport_array_map_btf_ids, struct, reuseport_array) const struct bpf_map_ops reuseport_array_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = reuseport_array_alloc_check, @@ -346,6 +347,5 @@ const struct bpf_map_ops reuseport_array_ops = { .map_lookup_elem = reuseport_array_lookup_elem, .map_get_next_key = reuseport_array_get_next_key, .map_delete_elem = reuseport_array_delete_elem, - .map_btf_name = "reuseport_array", - .map_btf_id = &reuseport_array_map_btf_id, + .map_btf_id = &reuseport_array_map_btf_ids[0], }; diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 710ba9de12ce..311264ab80c4 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -10,6 +10,7 @@ #include <linux/poll.h> #include <linux/kmemleak.h> #include <uapi/linux/btf.h> +#include <linux/btf_ids.h> #define RINGBUF_CREATE_FLAG_MASK (BPF_F_NUMA_NODE) @@ -263,7 +264,7 @@ static __poll_t ringbuf_map_poll(struct bpf_map *map, struct file *filp, return 0; } -static int ringbuf_map_btf_id; +BTF_ID_LIST_SINGLE(ringbuf_map_btf_ids, struct, bpf_ringbuf_map) const struct bpf_map_ops ringbuf_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc = ringbuf_map_alloc, @@ -274,8 +275,7 @@ const struct bpf_map_ops ringbuf_map_ops = { .map_update_elem = ringbuf_map_update_elem, .map_delete_elem = ringbuf_map_delete_elem, .map_get_next_key = ringbuf_map_get_next_key, - .map_btf_name = "bpf_ringbuf_map", - .map_btf_id = &ringbuf_map_btf_id, + .map_btf_id = &ringbuf_map_btf_ids[0], }; /* Given pointer to ring buffer record metadata and struct bpf_ringbuf itself, @@ -404,7 +404,7 @@ BPF_CALL_2(bpf_ringbuf_submit, void *, sample, u64, flags) const struct bpf_func_proto bpf_ringbuf_submit_proto = { .func = bpf_ringbuf_submit, .ret_type = RET_VOID, - .arg1_type = ARG_PTR_TO_ALLOC_MEM, + .arg1_type = ARG_PTR_TO_ALLOC_MEM | OBJ_RELEASE, .arg2_type = ARG_ANYTHING, }; @@ -417,7 +417,7 @@ BPF_CALL_2(bpf_ringbuf_discard, void *, sample, u64, flags) const struct bpf_func_proto bpf_ringbuf_discard_proto = { .func = bpf_ringbuf_discard, .ret_type = RET_VOID, - .arg1_type = ARG_PTR_TO_ALLOC_MEM, + .arg1_type = ARG_PTR_TO_ALLOC_MEM | OBJ_RELEASE, .arg2_type = ARG_ANYTHING, }; diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 1dd5266fbebb..1adbe67cdb95 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -654,7 +654,7 @@ static void stack_map_free(struct bpf_map *map) put_callchain_buffers(); } -static int stack_trace_map_btf_id; +BTF_ID_LIST_SINGLE(stack_trace_map_btf_ids, struct, bpf_stack_map) const struct bpf_map_ops stack_trace_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc = stack_map_alloc, @@ -664,6 +664,5 @@ const struct bpf_map_ops stack_trace_map_ops = { .map_update_elem = stack_map_update_elem, .map_delete_elem = stack_map_delete_elem, .map_check_btf = map_check_no_btf, - .map_btf_name = "bpf_stack_map", - .map_btf_id = &stack_trace_map_btf_id, + .map_btf_id = &stack_trace_map_btf_ids[0], }; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index cdaa1152436a..e0aead17dff4 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -6,6 +6,7 @@ #include <linux/bpf_trace.h> #include <linux/bpf_lirc.h> #include <linux/bpf_verifier.h> +#include <linux/bsearch.h> #include <linux/btf.h> #include <linux/syscalls.h> #include <linux/slab.h> @@ -29,6 +30,7 @@ #include <linux/pgtable.h> #include <linux/bpf_lsm.h> #include <linux/poll.h> +#include <linux/sort.h> #include <linux/bpf-netns.h> #include <linux/rcupdate_trace.h> #include <linux/memcontrol.h> @@ -473,14 +475,128 @@ static void bpf_map_release_memcg(struct bpf_map *map) } #endif +static int bpf_map_kptr_off_cmp(const void *a, const void *b) +{ + const struct bpf_map_value_off_desc *off_desc1 = a, *off_desc2 = b; + + if (off_desc1->offset < off_desc2->offset) + return -1; + else if (off_desc1->offset > off_desc2->offset) + return 1; + return 0; +} + +struct bpf_map_value_off_desc *bpf_map_kptr_off_contains(struct bpf_map *map, u32 offset) +{ + /* Since members are iterated in btf_find_field in increasing order, + * offsets appended to kptr_off_tab are in increasing order, so we can + * do bsearch to find exact match. + */ + struct bpf_map_value_off *tab; + + if (!map_value_has_kptrs(map)) + return NULL; + tab = map->kptr_off_tab; + return bsearch(&offset, tab->off, tab->nr_off, sizeof(tab->off[0]), bpf_map_kptr_off_cmp); +} + +void bpf_map_free_kptr_off_tab(struct bpf_map *map) +{ + struct bpf_map_value_off *tab = map->kptr_off_tab; + int i; + + if (!map_value_has_kptrs(map)) + return; + for (i = 0; i < tab->nr_off; i++) { + if (tab->off[i].kptr.module) + module_put(tab->off[i].kptr.module); + btf_put(tab->off[i].kptr.btf); + } + kfree(tab); + map->kptr_off_tab = NULL; +} + +struct bpf_map_value_off *bpf_map_copy_kptr_off_tab(const struct bpf_map *map) +{ + struct bpf_map_value_off *tab = map->kptr_off_tab, *new_tab; + int size, i; + + if (!map_value_has_kptrs(map)) + return ERR_PTR(-ENOENT); + size = offsetof(struct bpf_map_value_off, off[tab->nr_off]); + new_tab = kmemdup(tab, size, GFP_KERNEL | __GFP_NOWARN); + if (!new_tab) + return ERR_PTR(-ENOMEM); + /* Do a deep copy of the kptr_off_tab */ + for (i = 0; i < tab->nr_off; i++) { + btf_get(tab->off[i].kptr.btf); + if (tab->off[i].kptr.module && !try_module_get(tab->off[i].kptr.module)) { + while (i--) { + if (tab->off[i].kptr.module) + module_put(tab->off[i].kptr.module); + btf_put(tab->off[i].kptr.btf); + } + kfree(new_tab); + return ERR_PTR(-ENXIO); + } + } + return new_tab; +} + +bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b) +{ + struct bpf_map_value_off *tab_a = map_a->kptr_off_tab, *tab_b = map_b->kptr_off_tab; + bool a_has_kptr = map_value_has_kptrs(map_a), b_has_kptr = map_value_has_kptrs(map_b); + int size; + + if (!a_has_kptr && !b_has_kptr) + return true; + if (a_has_kptr != b_has_kptr) + return false; + if (tab_a->nr_off != tab_b->nr_off) + return false; + size = offsetof(struct bpf_map_value_off, off[tab_a->nr_off]); + return !memcmp(tab_a, tab_b, size); +} + +/* Caller must ensure map_value_has_kptrs is true. Note that this function can + * be called on a map value while the map_value is visible to BPF programs, as + * it ensures the correct synchronization, and we already enforce the same using + * the bpf_kptr_xchg helper on the BPF program side for referenced kptrs. + */ +void bpf_map_free_kptrs(struct bpf_map *map, void *map_value) +{ + struct bpf_map_value_off *tab = map->kptr_off_tab; + unsigned long *btf_id_ptr; + int i; + + for (i = 0; i < tab->nr_off; i++) { + struct bpf_map_value_off_desc *off_desc = &tab->off[i]; + unsigned long old_ptr; + + btf_id_ptr = map_value + off_desc->offset; + if (off_desc->type == BPF_KPTR_UNREF) { + u64 *p = (u64 *)btf_id_ptr; + + WRITE_ONCE(p, 0); + continue; + } + old_ptr = xchg(btf_id_ptr, 0); + off_desc->kptr.dtor((void *)old_ptr); + } +} + /* called from workqueue */ static void bpf_map_free_deferred(struct work_struct *work) { struct bpf_map *map = container_of(work, struct bpf_map, work); security_bpf_map_free(map); + kfree(map->off_arr); bpf_map_release_memcg(map); - /* implementation dependent freeing */ + /* implementation dependent freeing, map_free callback also does + * bpf_map_free_kptr_off_tab, if needed. + */ map->ops->map_free(map); } @@ -640,7 +756,7 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) int err; if (!map->ops->map_mmap || map_value_has_spin_lock(map) || - map_value_has_timer(map)) + map_value_has_timer(map) || map_value_has_kptrs(map)) return -ENOTSUPP; if (!(vma->vm_flags & VM_SHARED)) @@ -767,6 +883,84 @@ int map_check_no_btf(const struct bpf_map *map, return -ENOTSUPP; } +static int map_off_arr_cmp(const void *_a, const void *_b, const void *priv) +{ + const u32 a = *(const u32 *)_a; + const u32 b = *(const u32 *)_b; + + if (a < b) + return -1; + else if (a > b) + return 1; + return 0; +} + +static void map_off_arr_swap(void *_a, void *_b, int size, const void *priv) +{ + struct bpf_map *map = (struct bpf_map *)priv; + u32 *off_base = map->off_arr->field_off; + u32 *a = _a, *b = _b; + u8 *sz_a, *sz_b; + + sz_a = map->off_arr->field_sz + (a - off_base); + sz_b = map->off_arr->field_sz + (b - off_base); + + swap(*a, *b); + swap(*sz_a, *sz_b); +} + +static int bpf_map_alloc_off_arr(struct bpf_map *map) +{ + bool has_spin_lock = map_value_has_spin_lock(map); + bool has_timer = map_value_has_timer(map); + bool has_kptrs = map_value_has_kptrs(map); + struct bpf_map_off_arr *off_arr; + u32 i; + + if (!has_spin_lock && !has_timer && !has_kptrs) { + map->off_arr = NULL; + return 0; + } + + off_arr = kmalloc(sizeof(*map->off_arr), GFP_KERNEL | __GFP_NOWARN); + if (!off_arr) + return -ENOMEM; + map->off_arr = off_arr; + + off_arr->cnt = 0; + if (has_spin_lock) { + i = off_arr->cnt; + + off_arr->field_off[i] = map->spin_lock_off; + off_arr->field_sz[i] = sizeof(struct bpf_spin_lock); + off_arr->cnt++; + } + if (has_timer) { + i = off_arr->cnt; + + off_arr->field_off[i] = map->timer_off; + off_arr->field_sz[i] = sizeof(struct bpf_timer); + off_arr->cnt++; + } + if (has_kptrs) { + struct bpf_map_value_off *tab = map->kptr_off_tab; + u32 *off = &off_arr->field_off[off_arr->cnt]; + u8 *sz = &off_arr->field_sz[off_arr->cnt]; + + for (i = 0; i < tab->nr_off; i++) { + *off++ = tab->off[i].offset; + *sz++ = sizeof(u64); + } + off_arr->cnt += tab->nr_off; + } + + if (off_arr->cnt == 1) + return 0; + sort_r(off_arr->field_off, off_arr->cnt, sizeof(off_arr->field_off[0]), + map_off_arr_cmp, map_off_arr_swap, map); + return 0; +} + static int map_check_btf(struct bpf_map *map, const struct btf *btf, u32 btf_key_id, u32 btf_value_id) { @@ -820,10 +1014,34 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, return -EOPNOTSUPP; } - if (map->ops->map_check_btf) + map->kptr_off_tab = btf_parse_kptrs(btf, value_type); + if (map_value_has_kptrs(map)) { + if (!bpf_capable()) { + ret = -EPERM; + goto free_map_tab; + } + if (map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) { + ret = -EACCES; + goto free_map_tab; + } + if (map->map_type != BPF_MAP_TYPE_HASH && + map->map_type != BPF_MAP_TYPE_LRU_HASH && + map->map_type != BPF_MAP_TYPE_ARRAY) { + ret = -EOPNOTSUPP; + goto free_map_tab; + } + } + + if (map->ops->map_check_btf) { ret = map->ops->map_check_btf(map, btf, key_type, value_type); + if (ret < 0) + goto free_map_tab; + } return ret; +free_map_tab: + bpf_map_free_kptr_off_tab(map); + return ret; } #define BPF_MAP_CREATE_LAST_FIELD map_extra @@ -912,10 +1130,14 @@ static int map_create(union bpf_attr *attr) attr->btf_vmlinux_value_type_id; } - err = security_bpf_map_alloc(map); + err = bpf_map_alloc_off_arr(map); if (err) goto free_map; + err = security_bpf_map_alloc(map); + if (err) + goto free_map_off_arr; + err = bpf_map_alloc_id(map); if (err) goto free_map_sec; @@ -938,6 +1160,8 @@ static int map_create(union bpf_attr *attr) free_map_sec: security_bpf_map_free(map); +free_map_off_arr: + kfree(map->off_arr); free_map: btf_put(map->btf); map->ops->map_free(map); @@ -1639,7 +1863,7 @@ static int map_freeze(const union bpf_attr *attr) return PTR_ERR(map); if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || - map_value_has_timer(map)) { + map_value_has_timer(map) || map_value_has_kptrs(map)) { fdput(f); return -ENOTSUPP; } @@ -3030,66 +3254,45 @@ static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *pro } #endif /* CONFIG_PERF_EVENTS */ -#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd - -static int bpf_raw_tracepoint_open(const union bpf_attr *attr) +static int bpf_raw_tp_link_attach(struct bpf_prog *prog, + const char __user *user_tp_name) { struct bpf_link_primer link_primer; struct bpf_raw_tp_link *link; struct bpf_raw_event_map *btp; - struct bpf_prog *prog; const char *tp_name; char buf[128]; int err; - if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) - return -EINVAL; - - prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); - if (IS_ERR(prog)) - return PTR_ERR(prog); - switch (prog->type) { case BPF_PROG_TYPE_TRACING: case BPF_PROG_TYPE_EXT: case BPF_PROG_TYPE_LSM: - if (attr->raw_tracepoint.name) { + if (user_tp_name) /* The attach point for this category of programs * should be specified via btf_id during program load. */ - err = -EINVAL; - goto out_put_prog; - } + return -EINVAL; if (prog->type == BPF_PROG_TYPE_TRACING && prog->expected_attach_type == BPF_TRACE_RAW_TP) { tp_name = prog->aux->attach_func_name; break; } - err = bpf_tracing_prog_attach(prog, 0, 0); - if (err >= 0) - return err; - goto out_put_prog; + return bpf_tracing_prog_attach(prog, 0, 0); case BPF_PROG_TYPE_RAW_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: - if (strncpy_from_user(buf, - u64_to_user_ptr(attr->raw_tracepoint.name), - sizeof(buf) - 1) < 0) { - err = -EFAULT; - goto out_put_prog; - } + if (strncpy_from_user(buf, user_tp_name, sizeof(buf) - 1) < 0) + return -EFAULT; buf[sizeof(buf) - 1] = 0; tp_name = buf; break; default: - err = -EINVAL; - goto out_put_prog; + return -EINVAL; } btp = bpf_get_raw_tracepoint(tp_name); - if (!btp) { - err = -ENOENT; - goto out_put_prog; - } + if (!btp) + return -ENOENT; link = kzalloc(sizeof(*link), GFP_USER); if (!link) { @@ -3116,11 +3319,29 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) out_put_btp: bpf_put_raw_tracepoint(btp); -out_put_prog: - bpf_prog_put(prog); return err; } +#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd + +static int bpf_raw_tracepoint_open(const union bpf_attr *attr) +{ + struct bpf_prog *prog; + int fd; + + if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) + return -EINVAL; + + prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + fd = bpf_raw_tp_link_attach(prog, u64_to_user_ptr(attr->raw_tracepoint.name)); + if (fd < 0) + bpf_prog_put(prog); + return fd; +} + static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, enum bpf_attach_type attach_type) { @@ -3189,7 +3410,13 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) case BPF_CGROUP_SETSOCKOPT: return BPF_PROG_TYPE_CGROUP_SOCKOPT; case BPF_TRACE_ITER: + case BPF_TRACE_RAW_TP: + case BPF_TRACE_FENTRY: + case BPF_TRACE_FEXIT: + case BPF_MODIFY_RETURN: return BPF_PROG_TYPE_TRACING; + case BPF_LSM_MAC: + return BPF_PROG_TYPE_LSM; case BPF_SK_LOOKUP: return BPF_PROG_TYPE_SK_LOOKUP; case BPF_XDP: @@ -4246,21 +4473,6 @@ err_put: return err; } -static int tracing_bpf_link_attach(const union bpf_attr *attr, bpfptr_t uattr, - struct bpf_prog *prog) -{ - if (attr->link_create.attach_type != prog->expected_attach_type) - return -EINVAL; - - if (prog->expected_attach_type == BPF_TRACE_ITER) - return bpf_iter_link_attach(attr, uattr, prog); - else if (prog->type == BPF_PROG_TYPE_EXT) - return bpf_tracing_prog_attach(prog, - attr->link_create.target_fd, - attr->link_create.target_btf_id); - return -EINVAL; -} - #define BPF_LINK_CREATE_LAST_FIELD link_create.kprobe_multi.cookies static int link_create(union bpf_attr *attr, bpfptr_t uattr) { @@ -4282,15 +4494,13 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) switch (prog->type) { case BPF_PROG_TYPE_EXT: - ret = tracing_bpf_link_attach(attr, uattr, prog); - goto out; + break; case BPF_PROG_TYPE_PERF_EVENT: case BPF_PROG_TYPE_TRACEPOINT: if (attr->link_create.attach_type != BPF_PERF_EVENT) { ret = -EINVAL; goto out; } - ptype = prog->type; break; case BPF_PROG_TYPE_KPROBE: if (attr->link_create.attach_type != BPF_PERF_EVENT && @@ -4298,7 +4508,6 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) ret = -EINVAL; goto out; } - ptype = prog->type; break; default: ptype = attach_type_to_prog_type(attr->link_create.attach_type); @@ -4309,7 +4518,7 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) break; } - switch (ptype) { + switch (prog->type) { case BPF_PROG_TYPE_CGROUP_SKB: case BPF_PROG_TYPE_CGROUP_SOCK: case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: @@ -4319,8 +4528,25 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) case BPF_PROG_TYPE_CGROUP_SOCKOPT: ret = cgroup_bpf_link_attach(attr, prog); break; + case BPF_PROG_TYPE_EXT: + ret = bpf_tracing_prog_attach(prog, + attr->link_create.target_fd, + attr->link_create.target_btf_id); + break; + case BPF_PROG_TYPE_LSM: case BPF_PROG_TYPE_TRACING: - ret = tracing_bpf_link_attach(attr, uattr, prog); + if (attr->link_create.attach_type != prog->expected_attach_type) { + ret = -EINVAL; + goto out; + } + if (prog->expected_attach_type == BPF_TRACE_RAW_TP) + ret = bpf_raw_tp_link_attach(prog, NULL); + else if (prog->expected_attach_type == BPF_TRACE_ITER) + ret = bpf_iter_link_attach(attr, uattr, prog); + else + ret = bpf_tracing_prog_attach(prog, + attr->link_create.target_fd, + attr->link_create.target_btf_id); break; case BPF_PROG_TYPE_FLOW_DISSECTOR: case BPF_PROG_TYPE_SK_LOOKUP: @@ -4908,3 +5134,90 @@ const struct bpf_verifier_ops bpf_syscall_verifier_ops = { const struct bpf_prog_ops bpf_syscall_prog_ops = { .test_run = bpf_prog_test_run_syscall, }; + +#ifdef CONFIG_SYSCTL +static int bpf_stats_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct static_key *key = (struct static_key *)table->data; + static int saved_val; + int val, ret; + struct ctl_table tmp = { + .data = &val, + .maxlen = sizeof(val), + .mode = table->mode, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + mutex_lock(&bpf_stats_enabled_mutex); + val = saved_val; + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + if (write && !ret && val != saved_val) { + if (val) + static_key_slow_inc(key); + else + static_key_slow_dec(key); + saved_val = val; + } + mutex_unlock(&bpf_stats_enabled_mutex); + return ret; +} + +void __weak unpriv_ebpf_notify(int new_state) +{ +} + +static int bpf_unpriv_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int ret, unpriv_enable = *(int *)table->data; + bool locked_state = unpriv_enable == 1; + struct ctl_table tmp = *table; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + tmp.data = &unpriv_enable; + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + if (write && !ret) { + if (locked_state && unpriv_enable != 1) + return -EPERM; + *(int *)table->data = unpriv_enable; + } + + unpriv_ebpf_notify(unpriv_enable); + + return ret; +} + +static struct ctl_table bpf_syscall_table[] = { + { + .procname = "unprivileged_bpf_disabled", + .data = &sysctl_unprivileged_bpf_disabled, + .maxlen = sizeof(sysctl_unprivileged_bpf_disabled), + .mode = 0644, + .proc_handler = bpf_unpriv_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_TWO, + }, + { + .procname = "bpf_stats_enabled", + .data = &bpf_stats_enabled_key.key, + .maxlen = sizeof(bpf_stats_enabled_key), + .mode = 0644, + .proc_handler = bpf_stats_handler, + }, + { } +}; + +static int __init bpf_syscall_sysctl_init(void) +{ + register_sysctl_init("kernel", bpf_syscall_table); + return 0; +} +late_initcall(bpf_syscall_sysctl_init); +#endif /* CONFIG_SYSCTL */ diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index d94696198ef8..8c921799def4 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -99,7 +99,6 @@ static int __task_seq_show(struct seq_file *seq, struct task_struct *task, if (!prog) return 0; - meta.seq = seq; ctx.meta = &meta; ctx.task = task; return bpf_iter_run_prog(prog, &ctx); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9c1a02b82ecd..813f6ee80419 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -245,6 +245,7 @@ struct bpf_call_arg_meta { struct bpf_map *map_ptr; bool raw_mode; bool pkt_access; + u8 release_regno; int regno; int access_size; int mem_size; @@ -257,6 +258,7 @@ struct bpf_call_arg_meta { struct btf *ret_btf; u32 ret_btf_id; u32 subprogno; + struct bpf_map_value_off_desc *kptr_off_desc; }; struct btf *btf_vmlinux; @@ -471,17 +473,6 @@ static bool type_may_be_null(u32 type) return type & PTR_MAYBE_NULL; } -/* Determine whether the function releases some resources allocated by another - * function call. The first reference type argument will be assumed to be - * released by release_reference(). - */ -static bool is_release_function(enum bpf_func_id func_id) -{ - return func_id == BPF_FUNC_sk_release || - func_id == BPF_FUNC_ringbuf_submit || - func_id == BPF_FUNC_ringbuf_discard; -} - static bool may_be_acquire_function(enum bpf_func_id func_id) { return func_id == BPF_FUNC_sk_lookup_tcp || @@ -499,7 +490,8 @@ static bool is_acquire_function(enum bpf_func_id func_id, if (func_id == BPF_FUNC_sk_lookup_tcp || func_id == BPF_FUNC_sk_lookup_udp || func_id == BPF_FUNC_skc_lookup_tcp || - func_id == BPF_FUNC_ringbuf_reserve) + func_id == BPF_FUNC_ringbuf_reserve || + func_id == BPF_FUNC_kptr_xchg) return true; if (func_id == BPF_FUNC_map_lookup_elem && @@ -575,6 +567,8 @@ static const char *reg_type_str(struct bpf_verifier_env *env, strncpy(prefix, "user_", 32); if (type & MEM_PERCPU) strncpy(prefix, "percpu_", 32); + if (type & PTR_UNTRUSTED) + strncpy(prefix, "untrusted_", 32); snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s", prefix, str[base_type(type)], postfix); @@ -3211,7 +3205,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, return 0; } -enum stack_access_src { +enum bpf_access_src { ACCESS_DIRECT = 1, /* the access is performed by an instruction */ ACCESS_HELPER = 2, /* the access is performed by a helper */ }; @@ -3219,7 +3213,7 @@ enum stack_access_src { static int check_stack_range_initialized(struct bpf_verifier_env *env, int regno, int off, int access_size, bool zero_size_allowed, - enum stack_access_src type, + enum bpf_access_src type, struct bpf_call_arg_meta *meta); static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno) @@ -3469,9 +3463,175 @@ static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno, return 0; } +static int __check_ptr_off_reg(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int regno, + bool fixed_off_ok) +{ + /* Access to this pointer-typed register or passing it to a helper + * is only allowed in its original, unmodified form. + */ + + if (reg->off < 0) { + verbose(env, "negative offset %s ptr R%d off=%d disallowed\n", + reg_type_str(env, reg->type), regno, reg->off); + return -EACCES; + } + + if (!fixed_off_ok && reg->off) { + verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n", + reg_type_str(env, reg->type), regno, reg->off); + return -EACCES; + } + + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, "variable %s access var_off=%s disallowed\n", + reg_type_str(env, reg->type), tn_buf); + return -EACCES; + } + + return 0; +} + +int check_ptr_off_reg(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int regno) +{ + return __check_ptr_off_reg(env, reg, regno, false); +} + +static int map_kptr_match_type(struct bpf_verifier_env *env, + struct bpf_map_value_off_desc *off_desc, + struct bpf_reg_state *reg, u32 regno) +{ + const char *targ_name = kernel_type_name(off_desc->kptr.btf, off_desc->kptr.btf_id); + int perm_flags = PTR_MAYBE_NULL; + const char *reg_name = ""; + + /* Only unreferenced case accepts untrusted pointers */ + if (off_desc->type == BPF_KPTR_UNREF) + perm_flags |= PTR_UNTRUSTED; + + if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags)) + goto bad_type; + + if (!btf_is_kernel(reg->btf)) { + verbose(env, "R%d must point to kernel BTF\n", regno); + return -EINVAL; + } + /* We need to verify reg->type and reg->btf, before accessing reg->btf */ + reg_name = kernel_type_name(reg->btf, reg->btf_id); + + /* For ref_ptr case, release function check should ensure we get one + * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the + * normal store of unreferenced kptr, we must ensure var_off is zero. + * Since ref_ptr cannot be accessed directly by BPF insns, checks for + * reg->off and reg->ref_obj_id are not needed here. + */ + if (__check_ptr_off_reg(env, reg, regno, true)) + return -EACCES; + + /* A full type match is needed, as BTF can be vmlinux or module BTF, and + * we also need to take into account the reg->off. + * + * We want to support cases like: + * + * struct foo { + * struct bar br; + * struct baz bz; + * }; + * + * struct foo *v; + * v = func(); // PTR_TO_BTF_ID + * val->foo = v; // reg->off is zero, btf and btf_id match type + * val->bar = &v->br; // reg->off is still zero, but we need to retry with + * // first member type of struct after comparison fails + * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked + * // to match type + * + * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off + * is zero. We must also ensure that btf_struct_ids_match does not walk + * the struct to match type against first member of struct, i.e. reject + * second case from above. Hence, when type is BPF_KPTR_REF, we set + * strict mode to true for type match. + */ + if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off, + off_desc->kptr.btf, off_desc->kptr.btf_id, + off_desc->type == BPF_KPTR_REF)) + goto bad_type; + return 0; +bad_type: + verbose(env, "invalid kptr access, R%d type=%s%s ", regno, + reg_type_str(env, reg->type), reg_name); + verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name); + if (off_desc->type == BPF_KPTR_UNREF) + verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED), + targ_name); + else + verbose(env, "\n"); + return -EINVAL; +} + +static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno, + int value_regno, int insn_idx, + struct bpf_map_value_off_desc *off_desc) +{ + struct bpf_insn *insn = &env->prog->insnsi[insn_idx]; + int class = BPF_CLASS(insn->code); + struct bpf_reg_state *val_reg; + + /* Things we already checked for in check_map_access and caller: + * - Reject cases where variable offset may touch kptr + * - size of access (must be BPF_DW) + * - tnum_is_const(reg->var_off) + * - off_desc->offset == off + reg->var_off.value + */ + /* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */ + if (BPF_MODE(insn->code) != BPF_MEM) { + verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n"); + return -EACCES; + } + + /* We only allow loading referenced kptr, since it will be marked as + * untrusted, similar to unreferenced kptr. + */ + if (class != BPF_LDX && off_desc->type == BPF_KPTR_REF) { + verbose(env, "store to referenced kptr disallowed\n"); + return -EACCES; + } + + if (class == BPF_LDX) { + val_reg = reg_state(env, value_regno); + /* We can simply mark the value_regno receiving the pointer + * value from map as PTR_TO_BTF_ID, with the correct type. + */ + mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, off_desc->kptr.btf, + off_desc->kptr.btf_id, PTR_MAYBE_NULL | PTR_UNTRUSTED); + /* For mark_ptr_or_null_reg */ + val_reg->id = ++env->id_gen; + } else if (class == BPF_STX) { + val_reg = reg_state(env, value_regno); + if (!register_is_null(val_reg) && + map_kptr_match_type(env, off_desc, val_reg, value_regno)) + return -EACCES; + } else if (class == BPF_ST) { + if (insn->imm) { + verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n", + off_desc->offset); + return -EACCES; + } + } else { + verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n"); + return -EACCES; + } + return 0; +} + /* check read/write into a map element with possible variable offset */ static int check_map_access(struct bpf_verifier_env *env, u32 regno, - int off, int size, bool zero_size_allowed) + int off, int size, bool zero_size_allowed, + enum bpf_access_src src) { struct bpf_verifier_state *vstate = env->cur_state; struct bpf_func_state *state = vstate->frame[vstate->curframe]; @@ -3507,6 +3667,36 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, return -EACCES; } } + if (map_value_has_kptrs(map)) { + struct bpf_map_value_off *tab = map->kptr_off_tab; + int i; + + for (i = 0; i < tab->nr_off; i++) { + u32 p = tab->off[i].offset; + + if (reg->smin_value + off < p + sizeof(u64) && + p < reg->umax_value + off + size) { + if (src != ACCESS_DIRECT) { + verbose(env, "kptr cannot be accessed indirectly by helper\n"); + return -EACCES; + } + if (!tnum_is_const(reg->var_off)) { + verbose(env, "kptr access cannot have variable offset\n"); + return -EACCES; + } + if (p != off + reg->var_off.value) { + verbose(env, "kptr access misaligned expected=%u off=%llu\n", + p, off + reg->var_off.value); + return -EACCES; + } + if (size != bpf_size_to_bytes(BPF_DW)) { + verbose(env, "kptr access size must be BPF_DW\n"); + return -EACCES; + } + break; + } + } + } return err; } @@ -3980,44 +4170,6 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env, } #endif -static int __check_ptr_off_reg(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, int regno, - bool fixed_off_ok) -{ - /* Access to this pointer-typed register or passing it to a helper - * is only allowed in its original, unmodified form. - */ - - if (reg->off < 0) { - verbose(env, "negative offset %s ptr R%d off=%d disallowed\n", - reg_type_str(env, reg->type), regno, reg->off); - return -EACCES; - } - - if (!fixed_off_ok && reg->off) { - verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n", - reg_type_str(env, reg->type), regno, reg->off); - return -EACCES; - } - - if (!tnum_is_const(reg->var_off) || reg->var_off.value) { - char tn_buf[48]; - - tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "variable %s access var_off=%s disallowed\n", - reg_type_str(env, reg->type), tn_buf); - return -EACCES; - } - - return 0; -} - -int check_ptr_off_reg(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, int regno) -{ - return __check_ptr_off_reg(env, reg, regno, false); -} - static int __check_buffer_access(struct bpf_verifier_env *env, const char *buf_info, const struct bpf_reg_state *reg, @@ -4224,6 +4376,12 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, if (ret < 0) return ret; + /* If this is an untrusted pointer, all pointers formed by walking it + * also inherit the untrusted flag. + */ + if (type_flag(reg->type) & PTR_UNTRUSTED) + flag |= PTR_UNTRUSTED; + if (atype == BPF_READ && value_regno >= 0) mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag); @@ -4316,7 +4474,7 @@ static int check_stack_slot_within_bounds(int off, static int check_stack_access_within_bounds( struct bpf_verifier_env *env, int regno, int off, int access_size, - enum stack_access_src src, enum bpf_access_type type) + enum bpf_access_src src, enum bpf_access_type type) { struct bpf_reg_state *regs = cur_regs(env); struct bpf_reg_state *reg = regs + regno; @@ -4412,6 +4570,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn if (value_regno >= 0) mark_reg_unknown(env, regs, value_regno); } else if (reg->type == PTR_TO_MAP_VALUE) { + struct bpf_map_value_off_desc *kptr_off_desc = NULL; + if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { verbose(env, "R%d leaks addr into map\n", value_regno); @@ -4420,8 +4580,16 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn err = check_map_access_type(env, regno, off, size, t); if (err) return err; - err = check_map_access(env, regno, off, size, false); - if (!err && t == BPF_READ && value_regno >= 0) { + err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT); + if (err) + return err; + if (tnum_is_const(reg->var_off)) + kptr_off_desc = bpf_map_kptr_off_contains(reg->map_ptr, + off + reg->var_off.value); + if (kptr_off_desc) { + err = check_map_kptr_access(env, regno, value_regno, insn_idx, + kptr_off_desc); + } else if (t == BPF_READ && value_regno >= 0) { struct bpf_map *map = reg->map_ptr; /* if map is read-only, track its contents as scalars */ @@ -4724,7 +4892,7 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i static int check_stack_range_initialized( struct bpf_verifier_env *env, int regno, int off, int access_size, bool zero_size_allowed, - enum stack_access_src type, struct bpf_call_arg_meta *meta) + enum bpf_access_src type, struct bpf_call_arg_meta *meta) { struct bpf_reg_state *reg = reg_state(env, regno); struct bpf_func_state *state = func(env, reg); @@ -4874,7 +5042,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, BPF_READ)) return -EACCES; return check_map_access(env, regno, reg->off, access_size, - zero_size_allowed); + zero_size_allowed, ACCESS_HELPER); case PTR_TO_MEM: if (type_is_rdonly_mem(reg->type)) { if (meta && meta->raw_mode) { @@ -5163,6 +5331,53 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno, return 0; } +static int process_kptr_func(struct bpf_verifier_env *env, int regno, + struct bpf_call_arg_meta *meta) +{ + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + struct bpf_map_value_off_desc *off_desc; + struct bpf_map *map_ptr = reg->map_ptr; + u32 kptr_off; + int ret; + + if (!tnum_is_const(reg->var_off)) { + verbose(env, + "R%d doesn't have constant offset. kptr has to be at the constant offset\n", + regno); + return -EINVAL; + } + if (!map_ptr->btf) { + verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n", + map_ptr->name); + return -EINVAL; + } + if (!map_value_has_kptrs(map_ptr)) { + ret = PTR_ERR(map_ptr->kptr_off_tab); + if (ret == -E2BIG) + verbose(env, "map '%s' has more than %d kptr\n", map_ptr->name, + BPF_MAP_VALUE_OFF_MAX); + else if (ret == -EEXIST) + verbose(env, "map '%s' has repeating kptr BTF tags\n", map_ptr->name); + else + verbose(env, "map '%s' has no valid kptr\n", map_ptr->name); + return -EINVAL; + } + + meta->map_ptr = map_ptr; + kptr_off = reg->off + reg->var_off.value; + off_desc = bpf_map_kptr_off_contains(map_ptr, kptr_off); + if (!off_desc) { + verbose(env, "off=%d doesn't point to kptr\n", kptr_off); + return -EACCES; + } + if (off_desc->type != BPF_KPTR_REF) { + verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off); + return -EACCES; + } + meta->kptr_off_desc = off_desc; + return 0; +} + static bool arg_type_is_mem_ptr(enum bpf_arg_type type) { return base_type(type) == ARG_PTR_TO_MEM || @@ -5186,6 +5401,11 @@ static bool arg_type_is_int_ptr(enum bpf_arg_type type) type == ARG_PTR_TO_LONG; } +static bool arg_type_is_release(enum bpf_arg_type type) +{ + return type & OBJ_RELEASE; +} + static int int_ptr_type_to_size(enum bpf_arg_type type) { if (type == ARG_PTR_TO_INT) @@ -5298,6 +5518,7 @@ static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } }; static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } }; static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } }; static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } }; +static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } }; static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, @@ -5325,11 +5546,13 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_STACK] = &stack_ptr_types, [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, [ARG_PTR_TO_TIMER] = &timer_types, + [ARG_PTR_TO_KPTR] = &kptr_types, }; static int check_reg_type(struct bpf_verifier_env *env, u32 regno, enum bpf_arg_type arg_type, - const u32 *arg_btf_id) + const u32 *arg_btf_id, + struct bpf_call_arg_meta *meta) { struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; enum bpf_reg_type expected, type = reg->type; @@ -5374,6 +5597,13 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno, found: if (reg->type == PTR_TO_BTF_ID) { + /* For bpf_sk_release, it needs to match against first member + * 'struct sock_common', hence make an exception for it. This + * allows bpf_sk_release to work for multiple socket types. + */ + bool strict_type_match = arg_type_is_release(arg_type) && + meta->func_id != BPF_FUNC_sk_release; + if (!arg_btf_id) { if (!compatible->btf_id) { verbose(env, "verifier internal error: missing arg compatible BTF ID\n"); @@ -5382,8 +5612,12 @@ found: arg_btf_id = compatible->btf_id; } - if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off, - btf_vmlinux, *arg_btf_id)) { + if (meta->func_id == BPF_FUNC_kptr_xchg) { + if (map_kptr_match_type(env, meta->kptr_off_desc, reg, regno)) + return -EACCES; + } else if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off, + btf_vmlinux, *arg_btf_id, + strict_type_match)) { verbose(env, "R%d is of type %s but %s is expected\n", regno, kernel_type_name(reg->btf, reg->btf_id), kernel_type_name(btf_vmlinux, *arg_btf_id)); @@ -5396,11 +5630,10 @@ found: int check_func_arg_reg_off(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno, - enum bpf_arg_type arg_type, - bool is_release_func) + enum bpf_arg_type arg_type) { - bool fixed_off_ok = false, release_reg; enum bpf_reg_type type = reg->type; + bool fixed_off_ok = false; switch ((u32)type) { case SCALAR_VALUE: @@ -5418,7 +5651,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, /* Some of the argument types nevertheless require a * zero register offset. */ - if (arg_type != ARG_PTR_TO_ALLOC_MEM) + if (base_type(arg_type) != ARG_PTR_TO_ALLOC_MEM) return 0; break; /* All the rest must be rejected, except PTR_TO_BTF_ID which allows @@ -5426,19 +5659,17 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, */ case PTR_TO_BTF_ID: /* When referenced PTR_TO_BTF_ID is passed to release function, - * it's fixed offset must be 0. We rely on the property that - * only one referenced register can be passed to BPF helpers and - * kfuncs. In the other cases, fixed offset can be non-zero. + * it's fixed offset must be 0. In the other cases, fixed offset + * can be non-zero. */ - release_reg = is_release_func && reg->ref_obj_id; - if (release_reg && reg->off) { + if (arg_type_is_release(arg_type) && reg->off) { verbose(env, "R%d must have zero offset when passed to release func\n", regno); return -EINVAL; } - /* For release_reg == true, fixed_off_ok must be false, but we - * already checked and rejected reg->off != 0 above, so set to - * true to allow fixed offset for all other cases. + /* For arg is release pointer, fixed_off_ok must be false, but + * we already checked and rejected reg->off != 0 above, so set + * to true to allow fixed offset for all other cases. */ fixed_off_ok = true; break; @@ -5493,18 +5724,28 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, */ goto skip_type_check; - err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]); + err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg], meta); if (err) return err; - err = check_func_arg_reg_off(env, reg, regno, arg_type, is_release_function(meta->func_id)); + err = check_func_arg_reg_off(env, reg, regno, arg_type); if (err) return err; skip_type_check: - /* check_func_arg_reg_off relies on only one referenced register being - * allowed for BPF helpers. - */ + if (arg_type_is_release(arg_type)) { + if (!reg->ref_obj_id && !register_is_null(reg)) { + verbose(env, "R%d must be referenced when passed to release function\n", + regno); + return -EINVAL; + } + if (meta->release_regno) { + verbose(env, "verifier internal error: more than one release argument\n"); + return -EFAULT; + } + meta->release_regno = regno; + } + if (reg->ref_obj_id) { if (meta->ref_obj_id) { verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", @@ -5642,7 +5883,8 @@ skip_type_check: } err = check_map_access(env, regno, reg->off, - map->value_size - reg->off, false); + map->value_size - reg->off, false, + ACCESS_HELPER); if (err) return err; @@ -5658,6 +5900,9 @@ skip_type_check: verbose(env, "string is not zero-terminated\n"); return -EINVAL; } + } else if (arg_type == ARG_PTR_TO_KPTR) { + if (process_kptr_func(env, regno, meta)) + return -EACCES; } return err; @@ -6000,17 +6245,18 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn) int i; for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) { - if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i]) + if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i]) return false; - if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i]) + if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i]) return false; } return true; } -static int check_func_proto(const struct bpf_func_proto *fn, int func_id) +static int check_func_proto(const struct bpf_func_proto *fn, int func_id, + struct bpf_call_arg_meta *meta) { return check_raw_mode_ok(fn) && check_arg_pair_ok(fn) && @@ -6694,7 +6940,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn memset(&meta, 0, sizeof(meta)); meta.pkt_access = fn->pkt_access; - err = check_func_proto(fn, func_id); + err = check_func_proto(fn, func_id, &meta); if (err) { verbose(env, "kernel subsystem misconfigured func %s#%d\n", func_id_name(func_id), func_id); @@ -6727,8 +6973,17 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return err; } - if (is_release_function(func_id)) { - err = release_reference(env, meta.ref_obj_id); + regs = cur_regs(env); + + if (meta.release_regno) { + err = -EINVAL; + if (meta.ref_obj_id) + err = release_reference(env, meta.ref_obj_id); + /* meta.ref_obj_id can only be 0 if register that is meant to be + * released is NULL, which must be > R0. + */ + else if (register_is_null(®s[meta.release_regno])) + err = 0; if (err) { verbose(env, "func %s#%d reference has not been acquired before\n", func_id_name(func_id), func_id); @@ -6736,8 +6991,6 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn } } - regs = cur_regs(env); - switch (func_id) { case BPF_FUNC_tail_call: err = check_reference_leak(env); @@ -6861,21 +7114,25 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs[BPF_REG_0].btf_id = meta.ret_btf_id; } } else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) { + struct btf *ret_btf; int ret_btf_id; mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; - ret_btf_id = *fn->ret_btf_id; + if (func_id == BPF_FUNC_kptr_xchg) { + ret_btf = meta.kptr_off_desc->kptr.btf; + ret_btf_id = meta.kptr_off_desc->kptr.btf_id; + } else { + ret_btf = btf_vmlinux; + ret_btf_id = *fn->ret_btf_id; + } if (ret_btf_id == 0) { verbose(env, "invalid return type %u of func %s#%d\n", base_type(ret_type), func_id_name(func_id), func_id); return -EINVAL; } - /* current BPF helper definitions are only coming from - * built-in code with type IDs from vmlinux BTF - */ - regs[BPF_REG_0].btf = btf_vmlinux; + regs[BPF_REG_0].btf = ret_btf; regs[BPF_REG_0].btf_id = ret_btf_id; } else { verbose(env, "unknown return type %u of func %s#%d\n", @@ -7462,7 +7719,7 @@ static int sanitize_check_bounds(struct bpf_verifier_env *env, return -EACCES; break; case PTR_TO_MAP_VALUE: - if (check_map_access(env, dst, dst_reg->off, 1, false)) { + if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) { verbose(env, "R%d pointer arithmetic of map value goes out of range, " "prohibited for !root\n", dst); return -EACCES; @@ -12851,7 +13108,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) if (!ctx_access) continue; - switch (env->insn_aux_data[i + delta].ptr_type) { + switch ((int)env->insn_aux_data[i + delta].ptr_type) { case PTR_TO_CTX: if (!ops->convert_ctx_access) continue; @@ -12868,6 +13125,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) convert_ctx_access = bpf_xdp_sock_convert_ctx_access; break; case PTR_TO_BTF_ID: + case PTR_TO_BTF_ID | PTR_UNTRUSTED: if (type == BPF_READ) { insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 830aaf8ca08e..47139877f62d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -62,7 +62,6 @@ #include <linux/binfmts.h> #include <linux/sched/sysctl.h> #include <linux/kexec.h> -#include <linux/bpf.h> #include <linux/mount.h> #include <linux/userfaultfd_k.h> #include <linux/latencytop.h> @@ -148,66 +147,6 @@ static const int max_extfrag_threshold = 1000; #endif /* CONFIG_SYSCTL */ -#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL) -static int bpf_stats_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - struct static_key *key = (struct static_key *)table->data; - static int saved_val; - int val, ret; - struct ctl_table tmp = { - .data = &val, - .maxlen = sizeof(val), - .mode = table->mode, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }; - - if (write && !capable(CAP_SYS_ADMIN)) - return -EPERM; - - mutex_lock(&bpf_stats_enabled_mutex); - val = saved_val; - ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); - if (write && !ret && val != saved_val) { - if (val) - static_key_slow_inc(key); - else - static_key_slow_dec(key); - saved_val = val; - } - mutex_unlock(&bpf_stats_enabled_mutex); - return ret; -} - -void __weak unpriv_ebpf_notify(int new_state) -{ -} - -static int bpf_unpriv_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - int ret, unpriv_enable = *(int *)table->data; - bool locked_state = unpriv_enable == 1; - struct ctl_table tmp = *table; - - if (write && !capable(CAP_SYS_ADMIN)) - return -EPERM; - - tmp.data = &unpriv_enable; - ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); - if (write && !ret) { - if (locked_state && unpriv_enable != 1) - return -EPERM; - *(int *)table->data = unpriv_enable; - } - - unpriv_ebpf_notify(unpriv_enable); - - return ret; -} -#endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */ - /* * /proc/sys support */ @@ -2299,24 +2238,6 @@ static struct ctl_table kern_table[] = { .extra2 = SYSCTL_ONE, }, #endif -#ifdef CONFIG_BPF_SYSCALL - { - .procname = "unprivileged_bpf_disabled", - .data = &sysctl_unprivileged_bpf_disabled, - .maxlen = sizeof(sysctl_unprivileged_bpf_disabled), - .mode = 0644, - .proc_handler = bpf_unpriv_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_TWO, - }, - { - .procname = "bpf_stats_enabled", - .data = &bpf_stats_enabled_key.key, - .maxlen = sizeof(bpf_stats_enabled_key), - .mode = 0644, - .proc_handler = bpf_stats_handler, - }, -#endif #if defined(CONFIG_TREE_RCU) { .procname = "panic_on_rcu_stall", diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b26f3da943de..f15b826f9899 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -129,7 +129,10 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) * out of events when it was updated in between this and the * rcu_dereference() which is accepted risk. */ - ret = BPF_PROG_RUN_ARRAY(call->prog_array, ctx, bpf_prog_run); + rcu_read_lock(); + ret = bpf_prog_run_array(rcu_dereference(call->prog_array), + ctx, bpf_prog_run); + rcu_read_unlock(); out: __this_cpu_dec(bpf_prog_active); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index e7b9c2636d10..7a1579c91432 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -550,8 +550,13 @@ struct sock * noinline bpf_kfunc_call_test3(struct sock *sk) return sk; } +struct prog_test_member1 { + int a; +}; + struct prog_test_member { - u64 c; + struct prog_test_member1 m; + int c; }; struct prog_test_ref_kfunc { @@ -576,6 +581,12 @@ bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) return &prog_test_struct; } +noinline struct prog_test_member * +bpf_kfunc_call_memb_acquire(void) +{ + return &prog_test_struct.memb; +} + noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) { } @@ -584,6 +595,16 @@ noinline void bpf_kfunc_call_memb_release(struct prog_test_member *p) { } +noinline void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p) +{ +} + +noinline struct prog_test_ref_kfunc * +bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) +{ + return &prog_test_struct; +} + struct prog_test_pass1 { int x0; struct { @@ -667,8 +688,11 @@ BTF_ID(func, bpf_kfunc_call_test1) BTF_ID(func, bpf_kfunc_call_test2) BTF_ID(func, bpf_kfunc_call_test3) BTF_ID(func, bpf_kfunc_call_test_acquire) +BTF_ID(func, bpf_kfunc_call_memb_acquire) BTF_ID(func, bpf_kfunc_call_test_release) BTF_ID(func, bpf_kfunc_call_memb_release) +BTF_ID(func, bpf_kfunc_call_memb1_release) +BTF_ID(func, bpf_kfunc_call_test_kptr_get) BTF_ID(func, bpf_kfunc_call_test_pass_ctx) BTF_ID(func, bpf_kfunc_call_test_pass1) BTF_ID(func, bpf_kfunc_call_test_pass2) @@ -682,17 +706,26 @@ BTF_SET_END(test_sk_check_kfunc_ids) BTF_SET_START(test_sk_acquire_kfunc_ids) BTF_ID(func, bpf_kfunc_call_test_acquire) +BTF_ID(func, bpf_kfunc_call_memb_acquire) +BTF_ID(func, bpf_kfunc_call_test_kptr_get) BTF_SET_END(test_sk_acquire_kfunc_ids) BTF_SET_START(test_sk_release_kfunc_ids) BTF_ID(func, bpf_kfunc_call_test_release) BTF_ID(func, bpf_kfunc_call_memb_release) +BTF_ID(func, bpf_kfunc_call_memb1_release) BTF_SET_END(test_sk_release_kfunc_ids) BTF_SET_START(test_sk_ret_null_kfunc_ids) BTF_ID(func, bpf_kfunc_call_test_acquire) +BTF_ID(func, bpf_kfunc_call_memb_acquire) +BTF_ID(func, bpf_kfunc_call_test_kptr_get) BTF_SET_END(test_sk_ret_null_kfunc_ids) +BTF_SET_START(test_sk_kptr_acquire_kfunc_ids) +BTF_ID(func, bpf_kfunc_call_test_kptr_get) +BTF_SET_END(test_sk_kptr_acquire_kfunc_ids) + static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, u32 size, u32 headroom, u32 tailroom) { @@ -1579,14 +1612,36 @@ out: static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = { .owner = THIS_MODULE, - .check_set = &test_sk_check_kfunc_ids, - .acquire_set = &test_sk_acquire_kfunc_ids, - .release_set = &test_sk_release_kfunc_ids, - .ret_null_set = &test_sk_ret_null_kfunc_ids, + .check_set = &test_sk_check_kfunc_ids, + .acquire_set = &test_sk_acquire_kfunc_ids, + .release_set = &test_sk_release_kfunc_ids, + .ret_null_set = &test_sk_ret_null_kfunc_ids, + .kptr_acquire_set = &test_sk_kptr_acquire_kfunc_ids }; +BTF_ID_LIST(bpf_prog_test_dtor_kfunc_ids) +BTF_ID(struct, prog_test_ref_kfunc) +BTF_ID(func, bpf_kfunc_call_test_release) +BTF_ID(struct, prog_test_member) +BTF_ID(func, bpf_kfunc_call_memb_release) + static int __init bpf_prog_test_run_init(void) { - return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set); + const struct btf_id_dtor_kfunc bpf_prog_test_dtor_kfunc[] = { + { + .btf_id = bpf_prog_test_dtor_kfunc_ids[0], + .kfunc_btf_id = bpf_prog_test_dtor_kfunc_ids[1] + }, + { + .btf_id = bpf_prog_test_dtor_kfunc_ids[2], + .kfunc_btf_id = bpf_prog_test_dtor_kfunc_ids[3], + }, + }; + int ret; + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set); + return ret ?: register_btf_id_dtor_kfuncs(bpf_prog_test_dtor_kfunc, + ARRAY_SIZE(bpf_prog_test_dtor_kfunc), + THIS_MODULE); } late_initcall(bpf_prog_test_run_init); diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index e3ac36380520..a25ec93729b9 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -40,7 +40,7 @@ static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) if (!sdata) return -ENOENT; - bpf_selem_unlink(SELEM(sdata)); + bpf_selem_unlink(SELEM(sdata), true); return 0; } @@ -75,8 +75,8 @@ void bpf_sk_storage_free(struct sock *sk) * sk_storage. */ bpf_selem_unlink_map(selem); - free_sk_storage = bpf_selem_unlink_storage_nolock(sk_storage, - selem, true); + free_sk_storage = bpf_selem_unlink_storage_nolock( + sk_storage, selem, true, false); } raw_spin_unlock_bh(&sk_storage->lock); rcu_read_unlock(); @@ -338,7 +338,7 @@ bpf_sk_storage_ptr(void *owner) return &sk->sk_bpf_storage; } -static int sk_storage_map_btf_id; +BTF_ID_LIST_SINGLE(sk_storage_map_btf_ids, struct, bpf_local_storage_map) const struct bpf_map_ops sk_storage_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = bpf_local_storage_map_alloc_check, @@ -349,8 +349,7 @@ const struct bpf_map_ops sk_storage_map_ops = { .map_update_elem = bpf_fd_sk_storage_update_elem, .map_delete_elem = bpf_fd_sk_storage_delete_elem, .map_check_btf = bpf_local_storage_map_check_btf, - .map_btf_name = "bpf_local_storage_map", - .map_btf_id = &sk_storage_map_btf_id, + .map_btf_id = &sk_storage_map_btf_ids[0], .map_local_storage_charge = bpf_sk_storage_charge, .map_local_storage_uncharge = bpf_sk_storage_uncharge, .map_owner_storage_ptr = bpf_sk_storage_ptr, diff --git a/net/core/filter.c b/net/core/filter.c index 2ed81c48c282..b741b9f7e6a9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1687,7 +1687,7 @@ BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset, if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH))) return -EINVAL; - if (unlikely(offset > 0xffff)) + if (unlikely(offset > INT_MAX)) return -EFAULT; if (unlikely(bpf_try_make_writable(skb, offset + len))) return -EFAULT; @@ -1722,7 +1722,7 @@ BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset, { void *ptr; - if (unlikely(offset > 0xffff)) + if (unlikely(offset > INT_MAX)) goto err_clear; ptr = skb_header_pointer(skb, offset, len, to); @@ -6621,7 +6621,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = { .func = bpf_sk_release, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON | OBJ_RELEASE, }; BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 028a280fbabd..00980e62415b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5602,7 +5602,7 @@ err_free: } EXPORT_SYMBOL(skb_vlan_untag); -int skb_ensure_writable(struct sk_buff *skb, int write_len) +int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len) { if (!pskb_may_pull(skb, write_len)) return -ENOMEM; diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 2d213c4011db..81d4b4756a02 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -793,7 +793,7 @@ static const struct bpf_iter_seq_info sock_map_iter_seq_info = { .seq_priv_size = sizeof(struct sock_map_seq_info), }; -static int sock_map_btf_id; +BTF_ID_LIST_SINGLE(sock_map_btf_ids, struct, bpf_stab) const struct bpf_map_ops sock_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc = sock_map_alloc, @@ -805,8 +805,7 @@ const struct bpf_map_ops sock_map_ops = { .map_lookup_elem = sock_map_lookup, .map_release_uref = sock_map_release_progs, .map_check_btf = map_check_no_btf, - .map_btf_name = "bpf_stab", - .map_btf_id = &sock_map_btf_id, + .map_btf_id = &sock_map_btf_ids[0], .iter_seq_info = &sock_map_iter_seq_info, }; @@ -1385,7 +1384,7 @@ static const struct bpf_iter_seq_info sock_hash_iter_seq_info = { .seq_priv_size = sizeof(struct sock_hash_seq_info), }; -static int sock_hash_map_btf_id; +BTF_ID_LIST_SINGLE(sock_hash_map_btf_ids, struct, bpf_shtab) const struct bpf_map_ops sock_hash_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc = sock_hash_alloc, @@ -1397,8 +1396,7 @@ const struct bpf_map_ops sock_hash_ops = { .map_lookup_elem_sys_only = sock_hash_lookup_sys, .map_release_uref = sock_hash_release_progs, .map_check_btf = map_check_no_btf, - .map_btf_name = "bpf_shtab", - .map_btf_id = &sock_hash_map_btf_id, + .map_btf_id = &sock_hash_map_btf_ids[0], .iter_seq_info = &sock_hash_iter_seq_info, }; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 2c34caee0fd1..040c73345b7c 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -184,7 +184,7 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) xsk_xdp = xsk_buff_alloc(xs->pool); if (!xsk_xdp) { xs->rx_dropped++; - return -ENOSPC; + return -ENOMEM; } xsk_copy_xdp(xsk_xdp, xdp, len); @@ -217,7 +217,7 @@ static bool xsk_is_bound(struct xdp_sock *xs) static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp) { if (!xsk_is_bound(xs)) - return -EINVAL; + return -ENXIO; if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) return -EINVAL; diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 801cda5d1938..a794410989cc 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -263,7 +263,7 @@ static inline u32 xskq_cons_nb_entries(struct xsk_queue *q, u32 max) static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt) { - return xskq_cons_nb_entries(q, cnt) >= cnt ? true : false; + return xskq_cons_nb_entries(q, cnt) >= cnt; } static inline bool xskq_cons_peek_addr_unchecked(struct xsk_queue *q, u64 *addr) @@ -382,7 +382,7 @@ static inline int xskq_prod_reserve_desc(struct xsk_queue *q, u32 idx; if (xskq_prod_is_full(q)) - return -ENOSPC; + return -ENOBUFS; /* A, matches D */ idx = q->cached_prod++ & q->ring_mask; diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index 65b53fb3de13..acc8e52a4f5f 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -9,6 +9,7 @@ #include <net/xdp_sock.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/btf_ids.h> #include "xsk.h" @@ -254,7 +255,7 @@ static bool xsk_map_meta_equal(const struct bpf_map *meta0, bpf_map_meta_equal(meta0, meta1); } -static int xsk_map_btf_id; +BTF_ID_LIST_SINGLE(xsk_map_btf_ids, struct, xsk_map) const struct bpf_map_ops xsk_map_ops = { .map_meta_equal = xsk_map_meta_equal, .map_alloc = xsk_map_alloc, @@ -266,7 +267,6 @@ const struct bpf_map_ops xsk_map_ops = { .map_update_elem = xsk_map_update_elem, .map_delete_elem = xsk_map_delete_elem, .map_check_btf = map_check_no_btf, - .map_btf_name = "xsk_map", - .map_btf_id = &xsk_map_btf_id, + .map_btf_id = &xsk_map_btf_ids[0], .map_redirect = xsk_map_redirect, }; diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c index 96675985e9e0..ab90bb08a2b4 100644 --- a/samples/bpf/cpustat_user.c +++ b/samples/bpf/cpustat_user.c @@ -13,7 +13,6 @@ #include <sys/types.h> #include <sys/stat.h> #include <sys/time.h> -#include <sys/resource.h> #include <sys/wait.h> #include <bpf/bpf.h> diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c index 1fe5bcafb3bc..516fbac28b71 100644 --- a/samples/bpf/hbm.c +++ b/samples/bpf/hbm.c @@ -34,7 +34,6 @@ #include <stdio.h> #include <stdlib.h> #include <assert.h> -#include <sys/resource.h> #include <sys/time.h> #include <unistd.h> #include <errno.h> @@ -46,7 +45,6 @@ #include <bpf/bpf.h> #include <getopt.h> -#include "bpf_rlimit.h" #include "cgroup_helpers.h" #include "hbm.h" #include "bpf_util.h" @@ -510,5 +508,8 @@ int main(int argc, char **argv) prog = argv[optind]; printf("HBM prog: %s\n", prog != NULL ? prog : "NULL"); + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + return run_bpf_prog(prog, cg_id); } diff --git a/samples/bpf/ibumad_user.c b/samples/bpf/ibumad_user.c index 0746ca516097..d074c978aac7 100644 --- a/samples/bpf/ibumad_user.c +++ b/samples/bpf/ibumad_user.c @@ -19,7 +19,6 @@ #include <sys/types.h> #include <limits.h> -#include <sys/resource.h> #include <getopt.h> #include <net/if.h> diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index e69651a6902f..b6fc174ab1f2 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -13,7 +13,6 @@ #include <signal.h> #include <string.h> #include <time.h> -#include <sys/resource.h> #include <arpa/inet.h> #include <errno.h> diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c index 73a986876c1a..b6eedcb98fb9 100644 --- a/samples/bpf/offwaketime_user.c +++ b/samples/bpf/offwaketime_user.c @@ -8,7 +8,6 @@ #include <linux/perf_event.h> #include <errno.h> #include <stdbool.h> -#include <sys/resource.h> #include <bpf/libbpf.h> #include <bpf/bpf.h> #include "trace_helpers.h" diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c index 6a3fd369d3fc..2c18471336f0 100644 --- a/samples/bpf/sockex2_user.c +++ b/samples/bpf/sockex2_user.c @@ -7,7 +7,6 @@ #include "sock_example.h" #include <unistd.h> #include <arpa/inet.h> -#include <sys/resource.h> struct pair { __u64 packets; diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c index 6ae99ecc766c..cd6fa79df900 100644 --- a/samples/bpf/sockex3_user.c +++ b/samples/bpf/sockex3_user.c @@ -6,7 +6,6 @@ #include "sock_example.h" #include <unistd.h> #include <arpa/inet.h> -#include <sys/resource.h> struct flow_key_record { __be32 src; diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c index 0d7e1e5a8658..aadac14f748a 100644 --- a/samples/bpf/spintest_user.c +++ b/samples/bpf/spintest_user.c @@ -3,7 +3,6 @@ #include <unistd.h> #include <string.h> #include <assert.h> -#include <sys/resource.h> #include <bpf/libbpf.h> #include <bpf/bpf.h> #include "trace_helpers.h" diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c index c55383068384..7a788bb837fc 100644 --- a/samples/bpf/syscall_tp_user.c +++ b/samples/bpf/syscall_tp_user.c @@ -8,7 +8,6 @@ #include <string.h> #include <linux/perf_event.h> #include <errno.h> -#include <sys/resource.h> #include <bpf/libbpf.h> #include <bpf/bpf.h> diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c index c9a0ca8351fd..424718c0872c 100644 --- a/samples/bpf/task_fd_query_user.c +++ b/samples/bpf/task_fd_query_user.c @@ -10,7 +10,6 @@ #include <fcntl.h> #include <linux/bpf.h> #include <sys/ioctl.h> -#include <sys/resource.h> #include <sys/types.h> #include <sys/stat.h> #include <linux/perf_event.h> diff --git a/samples/bpf/test_lru_dist.c b/samples/bpf/test_lru_dist.c index 75e877853596..be98ccb4952f 100644 --- a/samples/bpf/test_lru_dist.c +++ b/samples/bpf/test_lru_dist.c @@ -13,7 +13,6 @@ #include <sched.h> #include <sys/wait.h> #include <sys/stat.h> -#include <sys/resource.h> #include <fcntl.h> #include <stdlib.h> #include <time.h> diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c index 472d65c70354..e8b4cc184ac9 100644 --- a/samples/bpf/test_map_in_map_user.c +++ b/samples/bpf/test_map_in_map_user.c @@ -2,7 +2,6 @@ /* * Copyright (c) 2017 Facebook */ -#include <sys/resource.h> #include <sys/socket.h> #include <arpa/inet.h> #include <stdint.h> diff --git a/samples/bpf/test_overhead_user.c b/samples/bpf/test_overhead_user.c index 4821f9d99c1f..88717f8ec6ac 100644 --- a/samples/bpf/test_overhead_user.c +++ b/samples/bpf/test_overhead_user.c @@ -16,7 +16,6 @@ #include <linux/bpf.h> #include <string.h> #include <time.h> -#include <sys/resource.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c index 1626d51dfffd..dd6205c6b6a7 100644 --- a/samples/bpf/tracex2_user.c +++ b/samples/bpf/tracex2_user.c @@ -4,7 +4,6 @@ #include <stdlib.h> #include <signal.h> #include <string.h> -#include <sys/resource.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c index 33e16ba39f25..d5eebace31e6 100644 --- a/samples/bpf/tracex3_user.c +++ b/samples/bpf/tracex3_user.c @@ -7,7 +7,6 @@ #include <unistd.h> #include <stdbool.h> #include <string.h> -#include <sys/resource.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c index 566e6440e8c2..227b05a0bc88 100644 --- a/samples/bpf/tracex4_user.c +++ b/samples/bpf/tracex4_user.c @@ -8,7 +8,6 @@ #include <stdbool.h> #include <string.h> #include <time.h> -#include <sys/resource.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c index 08dfdc77ad2a..e910dc265c31 100644 --- a/samples/bpf/tracex5_user.c +++ b/samples/bpf/tracex5_user.c @@ -7,7 +7,6 @@ #include <sys/prctl.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> -#include <sys/resource.h> #include "trace_helpers.h" #ifdef __mips__ diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c index 28296f40c133..8e83bf2a84a4 100644 --- a/samples/bpf/tracex6_user.c +++ b/samples/bpf/tracex6_user.c @@ -8,7 +8,6 @@ #include <stdio.h> #include <stdlib.h> #include <sys/ioctl.h> -#include <sys/resource.h> #include <sys/time.h> #include <sys/types.h> #include <sys/wait.h> diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index 631f0cabe139..ac370e638fa3 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -11,7 +11,6 @@ #include <string.h> #include <unistd.h> #include <libgen.h> -#include <sys/resource.h> #include <net/if.h> #include "bpf_util.h" @@ -161,7 +160,7 @@ int main(int argc, char **argv) } prog_id = info.id; - poll_stats(map_fd, 2); + poll_stats(map_fd, 1); return 0; } diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c index b3f6e49676ed..167646077c8f 100644 --- a/samples/bpf/xdp_adjust_tail_user.c +++ b/samples/bpf/xdp_adjust_tail_user.c @@ -14,7 +14,6 @@ #include <stdlib.h> #include <string.h> #include <net/if.h> -#include <sys/resource.h> #include <arpa/inet.h> #include <netinet/ether.h> #include <unistd.h> diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c index fb9391a5ec62..58015eb2ffae 100644 --- a/samples/bpf/xdp_monitor_user.c +++ b/samples/bpf/xdp_monitor_user.c @@ -17,7 +17,6 @@ static const char *__doc_err_only__= #include <ctype.h> #include <unistd.h> #include <locale.h> -#include <sys/resource.h> #include <getopt.h> #include <net/if.h> #include <time.h> diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 5f74a70a9021..a12381c37d2b 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -21,7 +21,6 @@ static const char *__doc__ = #include <string.h> #include <unistd.h> #include <locale.h> -#include <sys/resource.h> #include <sys/sysinfo.h> #include <getopt.h> #include <net/if.h> diff --git a/samples/bpf/xdp_redirect_map_multi_user.c b/samples/bpf/xdp_redirect_map_multi_user.c index 315314716121..9e24f2705b67 100644 --- a/samples/bpf/xdp_redirect_map_multi_user.c +++ b/samples/bpf/xdp_redirect_map_multi_user.c @@ -15,7 +15,6 @@ static const char *__doc__ = #include <net/if.h> #include <unistd.h> #include <libgen.h> -#include <sys/resource.h> #include <sys/ioctl.h> #include <sys/types.h> #include <sys/socket.h> diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c index 7af5b07a7523..8663dd631b6e 100644 --- a/samples/bpf/xdp_redirect_user.c +++ b/samples/bpf/xdp_redirect_user.c @@ -18,7 +18,6 @@ static const char *__doc__ = #include <unistd.h> #include <libgen.h> #include <getopt.h> -#include <sys/resource.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> #include "bpf_util.h" diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index f32bbd5c32bf..294fc15ad1cb 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -22,7 +22,6 @@ #include <sys/syscall.h> #include "bpf_util.h" #include <bpf/libbpf.h> -#include <sys/resource.h> #include <libgen.h> #include <getopt.h> #include <pthread.h> diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c index f2d90cba5164..05a24a712d7d 100644 --- a/samples/bpf/xdp_rxq_info_user.c +++ b/samples/bpf/xdp_rxq_info_user.c @@ -14,7 +14,6 @@ static const char *__doc__ = " XDP RX-queue info extract example\n\n" #include <string.h> #include <unistd.h> #include <locale.h> -#include <sys/resource.h> #include <getopt.h> #include <net/if.h> #include <time.h> diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c index 0a2b3e997aed..7df7163239ac 100644 --- a/samples/bpf/xdp_sample_pkts_user.c +++ b/samples/bpf/xdp_sample_pkts_user.c @@ -12,7 +12,6 @@ #include <signal.h> #include <bpf/libbpf.h> #include <bpf/bpf.h> -#include <sys/resource.h> #include <libgen.h> #include <linux/if_link.h> diff --git a/samples/bpf/xdp_sample_user.c b/samples/bpf/xdp_sample_user.c index c4332d068b91..158682852162 100644 --- a/samples/bpf/xdp_sample_user.c +++ b/samples/bpf/xdp_sample_user.c @@ -25,7 +25,6 @@ #include <string.h> #include <sys/ioctl.h> #include <sys/mman.h> -#include <sys/resource.h> #include <sys/signalfd.h> #include <sys/sysinfo.h> #include <sys/timerfd.h> diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index 2e811e4331cc..307baef6861a 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -10,7 +10,6 @@ #include <stdlib.h> #include <string.h> #include <net/if.h> -#include <sys/resource.h> #include <arpa/inet.h> #include <netinet/ether.h> #include <unistd.h> diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 6f3fe30ad283..be7d2572e3e6 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -25,7 +25,6 @@ #include <string.h> #include <sys/capability.h> #include <sys/mman.h> -#include <sys/resource.h> #include <sys/socket.h> #include <sys/types.h> #include <sys/un.h> @@ -1886,7 +1885,6 @@ int main(int argc, char **argv) { struct __user_cap_header_struct hdr = { _LINUX_CAPABILITY_VERSION_3, 0 }; struct __user_cap_data_struct data[2] = { { 0 } }; - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; bool rx = false, tx = false; struct sched_param schparam; struct xsk_umem_info *umem; @@ -1917,11 +1915,8 @@ int main(int argc, char **argv) data[1].effective, data[1].inheritable, data[1].permitted); } } else { - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n", - strerror(errno)); - exit(EXIT_FAILURE); - } + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); if (opt_num_xsks > 1) load_xdp_program(argv, &obj); diff --git a/samples/bpf/xsk_fwd.c b/samples/bpf/xsk_fwd.c index 2220509588a0..2324e18ccc7e 100644 --- a/samples/bpf/xsk_fwd.c +++ b/samples/bpf/xsk_fwd.c @@ -10,7 +10,6 @@ #include <stdlib.h> #include <string.h> #include <sys/mman.h> -#include <sys/resource.h> #include <sys/socket.h> #include <sys/types.h> #include <time.h> @@ -131,7 +130,6 @@ static struct bpool * bpool_init(struct bpool_params *params, struct xsk_umem_config *umem_cfg) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; u64 n_slabs, n_slabs_reserved, n_buffers, n_buffers_reserved; u64 slabs_size, slabs_reserved_size; u64 buffers_size, buffers_reserved_size; @@ -140,9 +138,8 @@ bpool_init(struct bpool_params *params, u8 *p; int status; - /* mmap prep. */ - if (setrlimit(RLIMIT_MEMLOCK, &r)) - return NULL; + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); /* bpool internals dimensioning. */ n_slabs = (params->n_buffers + params->n_buffers_per_slab - 1) / diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 0c1e06cf50b9..c740142c24d8 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -17,7 +17,6 @@ #include <linux/magic.h> #include <net/if.h> #include <sys/mount.h> -#include <sys/resource.h> #include <sys/stat.h> #include <sys/vfs.h> @@ -119,13 +118,6 @@ static bool is_bpffs(char *path) return (unsigned long)st_fs.f_type == BPF_FS_MAGIC; } -void set_max_rlimit(void) -{ - struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; - - setrlimit(RLIMIT_MEMLOCK, &rinf); -} - static int mnt_fs(const char *target, const char *type, char *buff, size_t bufflen) { diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index f041c4a6a1f2..be130e35462f 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -1136,8 +1136,6 @@ static int do_probe(int argc, char **argv) __u32 ifindex = 0; char *ifname; - set_max_rlimit(); - while (argc) { if (is_prefix(*argv, "kernel")) { if (target != COMPONENT_UNSPEC) { diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index e81227761f5d..9062ef2b8767 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -507,9 +507,9 @@ int main(int argc, char **argv) * It will still be rejected if users use LIBBPF_STRICT_ALL * mode for loading generated skeleton. */ - ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS); - if (ret) - p_err("failed to enable libbpf strict mode: %d", ret); + libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS); + } else { + libbpf_set_strict_mode(LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK); } argc -= optind; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 6e9277ffc68c..aa99ffab451a 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -102,8 +102,6 @@ int detect_common_prefix(const char *arg, ...); void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep); void usage(void) __noreturn; -void set_max_rlimit(void); - int mount_tracefs(const char *target); struct obj_ref { diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index c26378f20831..877387ef79c7 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -1342,8 +1342,6 @@ static int do_create(int argc, char **argv) goto exit; } - set_max_rlimit(); - fd = bpf_map_create(map_type, map_name, key_size, value_size, max_entries, &attr); if (fd < 0) { p_err("map create failed: %s", strerror(errno)); diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c index 50de087b0db7..226ec2c39052 100644 --- a/tools/bpf/bpftool/perf.c +++ b/tools/bpf/bpftool/perf.c @@ -11,7 +11,7 @@ #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> -#include <ftw.h> +#include <dirent.h> #include <bpf/bpf.h> @@ -147,81 +147,83 @@ static void print_perf_plain(int pid, int fd, __u32 prog_id, __u32 fd_type, } } -static int show_proc(const char *fpath, const struct stat *sb, - int tflag, struct FTW *ftwbuf) +static int show_proc(void) { + struct dirent *proc_de, *pid_fd_de; __u64 probe_offset, probe_addr; __u32 len, prog_id, fd_type; - int err, pid = 0, fd = 0; + DIR *proc, *pid_fd; + int err, pid, fd; const char *pch; char buf[4096]; - /* prefix always /proc */ - pch = fpath + 5; - if (*pch == '\0') - return 0; + proc = opendir("/proc"); + if (!proc) + return -1; - /* pid should be all numbers */ - pch++; - while (isdigit(*pch)) { - pid = pid * 10 + *pch - '0'; - pch++; - } - if (*pch == '\0') - return 0; - if (*pch != '/') - return FTW_SKIP_SUBTREE; - - /* check /proc/<pid>/fd directory */ - pch++; - if (strncmp(pch, "fd", 2)) - return FTW_SKIP_SUBTREE; - pch += 2; - if (*pch == '\0') - return 0; - if (*pch != '/') - return FTW_SKIP_SUBTREE; - - /* check /proc/<pid>/fd/<fd_num> */ - pch++; - while (isdigit(*pch)) { - fd = fd * 10 + *pch - '0'; - pch++; - } - if (*pch != '\0') - return FTW_SKIP_SUBTREE; + while ((proc_de = readdir(proc))) { + pid = 0; + pch = proc_de->d_name; - /* query (pid, fd) for potential perf events */ - len = sizeof(buf); - err = bpf_task_fd_query(pid, fd, 0, buf, &len, &prog_id, &fd_type, - &probe_offset, &probe_addr); - if (err < 0) - return 0; + /* pid should be all numbers */ + while (isdigit(*pch)) { + pid = pid * 10 + *pch - '0'; + pch++; + } + if (*pch != '\0') + continue; - if (json_output) - print_perf_json(pid, fd, prog_id, fd_type, buf, probe_offset, - probe_addr); - else - print_perf_plain(pid, fd, prog_id, fd_type, buf, probe_offset, - probe_addr); + err = snprintf(buf, sizeof(buf), "/proc/%s/fd", proc_de->d_name); + if (err < 0 || err >= (int)sizeof(buf)) + continue; + + pid_fd = opendir(buf); + if (!pid_fd) + continue; + while ((pid_fd_de = readdir(pid_fd))) { + fd = 0; + pch = pid_fd_de->d_name; + + /* fd should be all numbers */ + while (isdigit(*pch)) { + fd = fd * 10 + *pch - '0'; + pch++; + } + if (*pch != '\0') + continue; + + /* query (pid, fd) for potential perf events */ + len = sizeof(buf); + err = bpf_task_fd_query(pid, fd, 0, buf, &len, + &prog_id, &fd_type, + &probe_offset, &probe_addr); + if (err < 0) + continue; + + if (json_output) + print_perf_json(pid, fd, prog_id, fd_type, buf, + probe_offset, probe_addr); + else + print_perf_plain(pid, fd, prog_id, fd_type, buf, + probe_offset, probe_addr); + } + closedir(pid_fd); + } + closedir(proc); return 0; } static int do_show(int argc, char **argv) { - int flags = FTW_ACTIONRETVAL | FTW_PHYS; - int err = 0, nopenfd = 16; + int err; if (!has_perf_query_support()) return -1; if (json_output) jsonw_start_array(json_wtr); - if (nftw("/proc", show_proc, nopenfd, flags) == -1) { - p_err("%s", strerror(errno)); - err = -1; - } + err = show_proc(); if (json_output) jsonw_end_array(json_wtr); diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index bb6c969a114a..e2d00d3cd868 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -108,7 +108,6 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type) p_err("failed to create hashmap for PID references"); return -1; } - set_max_rlimit(); skel = pid_iter_bpf__open(); if (!skel) { diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 8643b37d4e43..5c2c63df92e8 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1604,8 +1604,6 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) } } - set_max_rlimit(); - if (verifier_logs) /* log_level1 + log_level2 + stats, but not stable UAPI */ open_opts.kernel_log_level = 1 + 2 + 4; @@ -2303,7 +2301,6 @@ static int do_profile(int argc, char **argv) } } - set_max_rlimit(); err = profiler_bpf__load(profile_obj); if (err) { p_err("failed to load profile_obj"); diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c index e08a6ff2866c..2535f079ed67 100644 --- a/tools/bpf/bpftool/struct_ops.c +++ b/tools/bpf/bpftool/struct_ops.c @@ -501,8 +501,6 @@ static int do_register(int argc, char **argv) if (libbpf_get_error(obj)) return -1; - set_max_rlimit(); - if (bpf_object__load(obj)) { bpf_object__close(obj); return -1; diff --git a/tools/bpf/bpftool/tracelog.c b/tools/bpf/bpftool/tracelog.c index e80a5c79b38f..bf1f02212797 100644 --- a/tools/bpf/bpftool/tracelog.c +++ b/tools/bpf/bpftool/tracelog.c @@ -9,7 +9,7 @@ #include <string.h> #include <unistd.h> #include <linux/magic.h> -#include <sys/fcntl.h> +#include <fcntl.h> #include <sys/vfs.h> #include "main.h" diff --git a/tools/bpf/runqslower/runqslower.c b/tools/bpf/runqslower/runqslower.c index d78f4148597f..83c5993a139a 100644 --- a/tools/bpf/runqslower/runqslower.c +++ b/tools/bpf/runqslower/runqslower.c @@ -4,7 +4,6 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <sys/resource.h> #include <time.h> #include <bpf/libbpf.h> #include <bpf/bpf.h> @@ -88,16 +87,6 @@ int libbpf_print_fn(enum libbpf_print_level level, return vfprintf(stderr, format, args); } -static int bump_memlock_rlimit(void) -{ - struct rlimit rlim_new = { - .rlim_cur = RLIM_INFINITY, - .rlim_max = RLIM_INFINITY, - }; - - return setrlimit(RLIMIT_MEMLOCK, &rlim_new); -} - void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) { const struct runq_event *e = data; @@ -133,11 +122,8 @@ int main(int argc, char **argv) libbpf_set_print(libbpf_print_fn); - err = bump_memlock_rlimit(); - if (err) { - fprintf(stderr, "failed to increase rlimit: %d", err); - return 1; - } + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); obj = runqslower_bpf__open(); if (!obj) { diff --git a/tools/include/uapi/asm/bpf_perf_event.h b/tools/include/uapi/asm/bpf_perf_event.h index 39acc149d843..d7dfeab0d71a 100644 --- a/tools/include/uapi/asm/bpf_perf_event.h +++ b/tools/include/uapi/asm/bpf_perf_event.h @@ -1,5 +1,7 @@ #if defined(__aarch64__) #include "../../arch/arm64/include/uapi/asm/bpf_perf_event.h" +#elif defined(__arc__) +#include "../../arch/arc/include/uapi/asm/bpf_perf_event.h" #elif defined(__s390__) #include "../../arch/s390/include/uapi/asm/bpf_perf_event.h" #elif defined(__riscv) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d14b10b85e51..444fe6f1cf35 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5143,6 +5143,17 @@ union bpf_attr { * The **hash_algo** is returned on success, * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if * invalid arguments are passed. + * + * void *bpf_kptr_xchg(void *map_value, void *ptr) + * Description + * Exchange kptr at pointer *map_value* with *ptr*, and return the + * old value. *ptr* can be NULL, otherwise it must be a referenced + * pointer which will be released when this helper is called. + * Return + * The old value of kptr (which can be NULL). The returned pointer + * if not NULL, is a reference which must be released using its + * corresponding release function, or moved into a BPF map before + * program exit. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5339,6 +5350,7 @@ union bpf_attr { FN(copy_from_user_task), \ FN(skb_set_tstamp), \ FN(ima_file_hash), \ + FN(kptr_xchg), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index cf27251adb92..a9d292c106c2 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -817,7 +817,7 @@ int bpf_link_create(int prog_fd, int target_fd, { __u32 target_btf_id, iter_info_len; union bpf_attr attr; - int fd; + int fd, err; if (!OPTS_VALID(opts, bpf_link_create_opts)) return libbpf_err(-EINVAL); @@ -870,7 +870,37 @@ int bpf_link_create(int prog_fd, int target_fd, } proceed: fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, sizeof(attr)); - return libbpf_err_errno(fd); + if (fd >= 0) + return fd; + /* we'll get EINVAL if LINK_CREATE doesn't support attaching fentry + * and other similar programs + */ + err = -errno; + if (err != -EINVAL) + return libbpf_err(err); + + /* if user used features not supported by + * BPF_RAW_TRACEPOINT_OPEN command, then just give up immediately + */ + if (attr.link_create.target_fd || attr.link_create.target_btf_id) + return libbpf_err(err); + if (!OPTS_ZEROED(opts, sz)) + return libbpf_err(err); + + /* otherwise, for few select kinds of programs that can be + * attached using BPF_RAW_TRACEPOINT_OPEN command, try that as + * a fallback for older kernels + */ + switch (attach_type) { + case BPF_TRACE_RAW_TP: + case BPF_LSM_MAC: + case BPF_TRACE_FENTRY: + case BPF_TRACE_FEXIT: + case BPF_MODIFY_RETURN: + return bpf_raw_tracepoint_open(NULL, prog_fd); + default: + return libbpf_err(err); + } } int bpf_link_detach(int link_fd) diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 44df982d2a5c..5de3eb267125 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -149,6 +149,13 @@ enum libbpf_tristate { #define __kconfig __attribute__((section(".kconfig"))) #define __ksym __attribute__((section(".ksyms"))) +#if __has_attribute(btf_type_tag) +#define __kptr __attribute__((btf_type_tag("kptr"))) +#define __kptr_ref __attribute__((btf_type_tag("kptr_ref"))) +#else +#define __kptr +#define __kptr_ref +#endif #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index e3a8c947e89f..01ce121c302d 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -27,6 +27,9 @@ #elif defined(__TARGET_ARCH_riscv) #define bpf_target_riscv #define bpf_target_defined +#elif defined(__TARGET_ARCH_arc) + #define bpf_target_arc + #define bpf_target_defined #else /* Fall back to what the compiler says */ @@ -54,6 +57,9 @@ #elif defined(__riscv) && __riscv_xlen == 64 #define bpf_target_riscv #define bpf_target_defined +#elif defined(__arc__) + #define bpf_target_arc + #define bpf_target_defined #endif /* no compiler target */ #endif @@ -233,6 +239,23 @@ struct pt_regs___arm64 { /* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */ #define PT_REGS_SYSCALL_REGS(ctx) ctx +#elif defined(bpf_target_arc) + +/* arc provides struct user_pt_regs instead of struct pt_regs to userspace */ +#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x)) +#define __PT_PARM1_REG scratch.r0 +#define __PT_PARM2_REG scratch.r1 +#define __PT_PARM3_REG scratch.r2 +#define __PT_PARM4_REG scratch.r3 +#define __PT_PARM5_REG scratch.r4 +#define __PT_RET_REG scratch.blink +#define __PT_FP_REG __unsupported__ +#define __PT_RC_REG scratch.r0 +#define __PT_SP_REG scratch.sp +#define __PT_IP_REG scratch.ret +/* arc does not select ARCH_HAS_SYSCALL_WRAPPER. */ +#define PT_REGS_SYSCALL_REGS(ctx) ctx + #endif #if defined(bpf_target_defined) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index d124e9e533f0..bb1e06eb1eca 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -2626,6 +2626,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, const struct btf_ext_info_sec *sinfo; struct btf_ext_info *ext_info; __u32 info_left, record_size; + size_t sec_cnt = 0; /* The start of the info sec (including the __u32 record_size). */ void *info; @@ -2689,8 +2690,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return -EINVAL; } - total_record_size = sec_hdrlen + - (__u64)num_records * record_size; + total_record_size = sec_hdrlen + (__u64)num_records * record_size; if (info_left < total_record_size) { pr_debug("%s section has incorrect num_records in .BTF.ext\n", ext_sec->desc); @@ -2699,12 +2699,14 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, info_left -= total_record_size; sinfo = (void *)sinfo + total_record_size; + sec_cnt++; } ext_info = ext_sec->ext_info; ext_info->len = ext_sec->len - sizeof(__u32); ext_info->rec_size = record_size; ext_info->info = info + sizeof(__u32); + ext_info->sec_cnt = sec_cnt; return 0; } @@ -2788,6 +2790,9 @@ void btf_ext__free(struct btf_ext *btf_ext) { if (IS_ERR_OR_NULL(btf_ext)) return; + free(btf_ext->func_info.sec_idxs); + free(btf_ext->line_info.sec_idxs); + free(btf_ext->core_relo_info.sec_idxs); free(btf_ext->data); free(btf_ext); } diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 465b7c0996f1..73a5192defb3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -302,7 +302,7 @@ struct bpf_program { void *priv; bpf_program_clear_priv_t clear_priv; - bool load; + bool autoload; bool mark_btf_static; enum bpf_prog_type type; enum bpf_attach_type expected_attach_type; @@ -672,7 +672,18 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, prog->insns_cnt = prog->sec_insn_cnt; prog->type = BPF_PROG_TYPE_UNSPEC; - prog->load = true; + + /* libbpf's convention for SEC("?abc...") is that it's just like + * SEC("abc...") but the corresponding bpf_program starts out with + * autoload set to false. + */ + if (sec_name[0] == '?') { + prog->autoload = false; + /* from now on forget there was ? in section name */ + sec_name++; + } else { + prog->autoload = true; + } prog->instances.fds = NULL; prog->instances.nr = -1; @@ -1222,10 +1233,8 @@ static void bpf_object__elf_finish(struct bpf_object *obj) if (!obj->efile.elf) return; - if (obj->efile.elf) { - elf_end(obj->efile.elf); - obj->efile.elf = NULL; - } + elf_end(obj->efile.elf); + obj->efile.elf = NULL; obj->efile.symbols = NULL; obj->efile.st_ops_data = NULL; @@ -2756,6 +2765,9 @@ static int bpf_object__init_btf(struct bpf_object *obj, btf__set_pointer_size(obj->btf, 8); } if (btf_ext_data) { + struct btf_ext_info *ext_segs[3]; + int seg_num, sec_num; + if (!obj->btf) { pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", BTF_EXT_ELF_SEC, BTF_ELF_SEC); @@ -2769,6 +2781,43 @@ static int bpf_object__init_btf(struct bpf_object *obj, obj->btf_ext = NULL; goto out; } + + /* setup .BTF.ext to ELF section mapping */ + ext_segs[0] = &obj->btf_ext->func_info; + ext_segs[1] = &obj->btf_ext->line_info; + ext_segs[2] = &obj->btf_ext->core_relo_info; + for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) { + struct btf_ext_info *seg = ext_segs[seg_num]; + const struct btf_ext_info_sec *sec; + const char *sec_name; + Elf_Scn *scn; + + if (seg->sec_cnt == 0) + continue; + + seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs)); + if (!seg->sec_idxs) { + err = -ENOMEM; + goto out; + } + + sec_num = 0; + for_each_btf_ext_sec(seg, sec) { + /* preventively increment index to avoid doing + * this before every continue below + */ + sec_num++; + + sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); + if (str_is_empty(sec_name)) + continue; + scn = elf_sec_by_name(obj, sec_name); + if (!scn) + continue; + + seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn); + } + } } out: if (err && libbpf_needs_btf(obj)) { @@ -2927,7 +2976,7 @@ static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) } bpf_object__for_each_program(prog, obj) { - if (!prog->load) + if (!prog->autoload) continue; if (prog_needs_vmlinux_btf(prog)) return true; @@ -4594,7 +4643,7 @@ static int probe_kern_probe_read_kernel(void) }; int fd, insn_cnt = ARRAY_SIZE(insns); - fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); + fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); return probe_fd(fd); } @@ -5577,6 +5626,22 @@ static int record_relo_core(struct bpf_program *prog, return 0; } +static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx) +{ + struct reloc_desc *relo; + int i; + + for (i = 0; i < prog->nr_reloc; i++) { + relo = &prog->reloc_desc[i]; + if (relo->type != RELO_CORE || relo->insn_idx != insn_idx) + continue; + + return relo->core_relo; + } + + return NULL; +} + static int bpf_core_resolve_relo(struct bpf_program *prog, const struct bpf_core_relo *relo, int relo_idx, @@ -5633,7 +5698,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) struct bpf_program *prog; struct bpf_insn *insn; const char *sec_name; - int i, err = 0, insn_idx, sec_idx; + int i, err = 0, insn_idx, sec_idx, sec_num; if (obj->btf_ext->core_relo_info.len == 0) return 0; @@ -5654,32 +5719,18 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) } seg = &obj->btf_ext->core_relo_info; + sec_num = 0; for_each_btf_ext_sec(seg, sec) { + sec_idx = seg->sec_idxs[sec_num]; + sec_num++; + sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); if (str_is_empty(sec_name)) { err = -EINVAL; goto out; } - /* bpf_object's ELF is gone by now so it's not easy to find - * section index by section name, but we can find *any* - * bpf_program within desired section name and use it's - * prog->sec_idx to do a proper search by section index and - * instruction offset - */ - prog = NULL; - for (i = 0; i < obj->nr_programs; i++) { - prog = &obj->programs[i]; - if (strcmp(prog->sec_name, sec_name) == 0) - break; - } - if (!prog) { - pr_warn("sec '%s': failed to find a BPF program\n", sec_name); - return -ENOENT; - } - sec_idx = prog->sec_idx; - pr_debug("sec '%s': found %d CO-RE relocations\n", - sec_name, sec->num_info); + pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info); for_each_btf_ext_rec(seg, sec, i, rec) { if (rec->insn_off % BPF_INSN_SZ) @@ -5702,7 +5753,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) /* no need to apply CO-RE relocation if the program is * not going to be loaded */ - if (!prog->load) + if (!prog->autoload) continue; /* adjust insn_idx from section frame of reference to the local @@ -5714,16 +5765,16 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) return -EINVAL; insn = &prog->insns[insn_idx]; - if (prog->obj->gen_loader) { - err = record_relo_core(prog, rec, insn_idx); - if (err) { - pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n", - prog->name, i, err); - goto out; - } - continue; + err = record_relo_core(prog, rec, insn_idx); + if (err) { + pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n", + prog->name, i, err); + goto out; } + if (prog->obj->gen_loader) + continue; + err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res); if (err) { pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", @@ -5863,14 +5914,13 @@ static int adjust_prog_btf_ext_info(const struct bpf_object *obj, void *rec, *rec_end, *new_prog_info; const struct btf_ext_info_sec *sec; size_t old_sz, new_sz; - const char *sec_name; - int i, off_adj; + int i, sec_num, sec_idx, off_adj; + sec_num = 0; for_each_btf_ext_sec(ext_info, sec) { - sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); - if (!sec_name) - return -EINVAL; - if (strcmp(sec_name, prog->sec_name) != 0) + sec_idx = ext_info->sec_idxs[sec_num]; + sec_num++; + if (prog->sec_idx != sec_idx) continue; for_each_btf_ext_rec(ext_info, sec, i, rec) { @@ -6265,7 +6315,6 @@ bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog) if (err) return err; - return 0; } @@ -6326,8 +6375,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) err); return err; } - if (obj->gen_loader) - bpf_object__sort_relos(obj); + bpf_object__sort_relos(obj); } /* Before relocating calls pre-process relocations and mark @@ -6363,7 +6411,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) */ if (prog_is_subprog(obj, prog)) continue; - if (!prog->load) + if (!prog->autoload) continue; err = bpf_object__relocate_calls(obj, prog); @@ -6378,7 +6426,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) prog = &obj->programs[i]; if (prog_is_subprog(obj, prog)) continue; - if (!prog->load) + if (!prog->autoload) continue; err = bpf_object__relocate_data(obj, prog); if (err) { @@ -6387,8 +6435,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) return err; } } - if (!obj->gen_loader) - bpf_object__free_relocs(obj); + return 0; } @@ -6665,6 +6712,8 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog, return 0; } +static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz); + static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, const char *license, __u32 kern_version, @@ -6811,6 +6860,10 @@ retry_load: goto retry_load; ret = -errno; + + /* post-process verifier log to improve error descriptions */ + fixup_verifier_log(prog, log_buf, log_buf_size); + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp); pr_perm_msg(ret); @@ -6819,10 +6872,6 @@ retry_load: pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", prog->name, log_buf); } - if (insns_cnt >= BPF_MAXINSNS) { - pr_warn("prog '%s': program too large (%d insns), at most %d insns\n", - prog->name, insns_cnt, BPF_MAXINSNS); - } out: if (own_log_buf) @@ -6830,6 +6879,128 @@ out: return ret; } +static char *find_prev_line(char *buf, char *cur) +{ + char *p; + + if (cur == buf) /* end of a log buf */ + return NULL; + + p = cur - 1; + while (p - 1 >= buf && *(p - 1) != '\n') + p--; + + return p; +} + +static void patch_log(char *buf, size_t buf_sz, size_t log_sz, + char *orig, size_t orig_sz, const char *patch) +{ + /* size of the remaining log content to the right from the to-be-replaced part */ + size_t rem_sz = (buf + log_sz) - (orig + orig_sz); + size_t patch_sz = strlen(patch); + + if (patch_sz != orig_sz) { + /* If patch line(s) are longer than original piece of verifier log, + * shift log contents by (patch_sz - orig_sz) bytes to the right + * starting from after to-be-replaced part of the log. + * + * If patch line(s) are shorter than original piece of verifier log, + * shift log contents by (orig_sz - patch_sz) bytes to the left + * starting from after to-be-replaced part of the log + * + * We need to be careful about not overflowing available + * buf_sz capacity. If that's the case, we'll truncate the end + * of the original log, as necessary. + */ + if (patch_sz > orig_sz) { + if (orig + patch_sz >= buf + buf_sz) { + /* patch is big enough to cover remaining space completely */ + patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1; + rem_sz = 0; + } else if (patch_sz - orig_sz > buf_sz - log_sz) { + /* patch causes part of remaining log to be truncated */ + rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz); + } + } + /* shift remaining log to the right by calculated amount */ + memmove(orig + patch_sz, orig + orig_sz, rem_sz); + } + + memcpy(orig, patch, patch_sz); +} + +static void fixup_log_failed_core_relo(struct bpf_program *prog, + char *buf, size_t buf_sz, size_t log_sz, + char *line1, char *line2, char *line3) +{ + /* Expected log for failed and not properly guarded CO-RE relocation: + * line1 -> 123: (85) call unknown#195896080 + * line2 -> invalid func unknown#195896080 + * line3 -> <anything else or end of buffer> + * + * "123" is the index of the instruction that was poisoned. We extract + * instruction index to find corresponding CO-RE relocation and + * replace this part of the log with more relevant information about + * failed CO-RE relocation. + */ + const struct bpf_core_relo *relo; + struct bpf_core_spec spec; + char patch[512], spec_buf[256]; + int insn_idx, err; + + if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1) + return; + + relo = find_relo_core(prog, insn_idx); + if (!relo) + return; + + err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec); + if (err) + return; + + bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec); + snprintf(patch, sizeof(patch), + "%d: <invalid CO-RE relocation>\n" + "failed to resolve CO-RE relocation %s\n", + insn_idx, spec_buf); + + patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); +} + +static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz) +{ + /* look for familiar error patterns in last N lines of the log */ + const size_t max_last_line_cnt = 10; + char *prev_line, *cur_line, *next_line; + size_t log_sz; + int i; + + if (!buf) + return; + + log_sz = strlen(buf) + 1; + next_line = buf + log_sz - 1; + + for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) { + cur_line = find_prev_line(buf, next_line); + if (!cur_line) + return; + + /* failed CO-RE relocation case */ + if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) { + prev_line = find_prev_line(buf, cur_line); + if (!prev_line) + continue; + + fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz, + prev_line, cur_line, next_line); + return; + } + } +} + static int bpf_program_record_relos(struct bpf_program *prog) { struct bpf_object *obj = prog->obj; @@ -6975,7 +7146,7 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) prog = &obj->programs[i]; if (prog_is_subprog(obj, prog)) continue; - if (!prog->load) { + if (!prog->autoload) { pr_debug("prog '%s': skipped loading\n", prog->name); continue; } @@ -6984,8 +7155,8 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) if (err) return err; } - if (obj->gen_loader) - bpf_object__free_relocs(obj); + + bpf_object__free_relocs(obj); return 0; } @@ -7005,8 +7176,8 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object continue; } - bpf_program__set_type(prog, prog->sec_def->prog_type); - bpf_program__set_expected_attach_type(prog, prog->sec_def->expected_attach_type); + prog->type = prog->sec_def->prog_type; + prog->expected_attach_type = prog->sec_def->expected_attach_type; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" @@ -8455,7 +8626,7 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy) bool bpf_program__autoload(const struct bpf_program *prog) { - return prog->load; + return prog->autoload; } int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) @@ -8463,7 +8634,7 @@ int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) if (prog->obj->loaded) return libbpf_err(-EINVAL); - prog->load = autoload; + prog->autoload = autoload; return 0; } @@ -8551,9 +8722,13 @@ enum bpf_prog_type bpf_program__type(const struct bpf_program *prog) return prog->type; } -void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) +int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) { + if (prog->obj->loaded) + return libbpf_err(-EBUSY); + prog->type = type; + return 0; } static bool bpf_program__is_type(const struct bpf_program *prog, @@ -8567,8 +8742,7 @@ int bpf_program__set_##NAME(struct bpf_program *prog) \ { \ if (!prog) \ return libbpf_err(-EINVAL); \ - bpf_program__set_type(prog, TYPE); \ - return 0; \ + return bpf_program__set_type(prog, TYPE); \ } \ \ bool bpf_program__is_##NAME(const struct bpf_program *prog) \ @@ -8598,10 +8772,14 @@ enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program return prog->expected_attach_type; } -void bpf_program__set_expected_attach_type(struct bpf_program *prog, +int bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type) { + if (prog->obj->loaded) + return libbpf_err(-EBUSY); + prog->expected_attach_type = type; + return 0; } __u32 bpf_program__flags(const struct bpf_program *prog) @@ -9671,9 +9849,8 @@ static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr, * bpf_object__open guessed */ if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) { - bpf_program__set_type(prog, attr->prog_type); - bpf_program__set_expected_attach_type(prog, - attach_type); + prog->type = attr->prog_type; + prog->expected_attach_type = attach_type; } if (bpf_program__type(prog) == BPF_PROG_TYPE_UNSPEC) { /* @@ -10982,7 +11159,7 @@ struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, char resolved_path[512]; struct bpf_object *obj = prog->obj; struct bpf_link *link; - long usdt_cookie; + __u64 usdt_cookie; int err; if (!OPTS_VALID(opts, bpf_uprobe_opts)) @@ -11245,7 +11422,8 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro return libbpf_err_ptr(-ENOMEM); link->detach = &bpf_link__detach_fd; - pfd = bpf_raw_tracepoint_open(NULL, prog_fd); + /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */ + pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), NULL); if (pfd < 0) { pfd = -errno; free(link); @@ -11254,7 +11432,7 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro return libbpf_err_ptr(pfd); } link->fd = pfd; - return (struct bpf_link *)link; + return link; } struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog) @@ -12665,7 +12843,7 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) struct bpf_program *prog = *s->progs[i].prog; struct bpf_link **link = s->progs[i].link; - if (!prog->load) + if (!prog->autoload) continue; /* auto-attaching not supported for this program */ diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 63d66f1adf1a..cdbfee60ea3e 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -378,7 +378,31 @@ struct bpf_link; LIBBPF_API struct bpf_link *bpf_link__open(const char *path); LIBBPF_API int bpf_link__fd(const struct bpf_link *link); LIBBPF_API const char *bpf_link__pin_path(const struct bpf_link *link); +/** + * @brief **bpf_link__pin()** pins the BPF link to a file + * in the BPF FS specified by a path. This increments the links + * reference count, allowing it to stay loaded after the process + * which loaded it has exited. + * + * @param link BPF link to pin, must already be loaded + * @param path file path in a BPF file system + * @return 0, on success; negative error code, otherwise + */ + LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path); + +/** + * @brief **bpf_link__unpin()** unpins the BPF link from a file + * in the BPFFS specified by a path. This decrements the links + * reference count. + * + * The file pinning the BPF link can also be unlinked by a different + * process in which case this function will return an error. + * + * @param prog BPF program to unpin + * @param path file path to the pin in a BPF file system + * @return 0, on success; negative error code, otherwise + */ LIBBPF_API int bpf_link__unpin(struct bpf_link *link); LIBBPF_API int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog); @@ -386,6 +410,22 @@ LIBBPF_API void bpf_link__disconnect(struct bpf_link *link); LIBBPF_API int bpf_link__detach(struct bpf_link *link); LIBBPF_API int bpf_link__destroy(struct bpf_link *link); +/** + * @brief **bpf_program__attach()** is a generic function for attaching + * a BPF program based on auto-detection of program type, attach type, + * and extra paremeters, where applicable. + * + * @param prog BPF program to attach + * @return Reference to the newly created BPF link; or NULL is returned on error, + * error code is stored in errno + * + * This is supported for: + * - kprobe/kretprobe (depends on SEC() definition) + * - uprobe/uretprobe (depends on SEC() definition) + * - tracepoint + * - raw tracepoint + * - tracing programs (typed raw TP/fentry/fexit/fmod_ret) + */ LIBBPF_API struct bpf_link * bpf_program__attach(const struct bpf_program *prog); @@ -686,12 +726,37 @@ LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog); LIBBPF_API enum bpf_prog_type bpf_program__type(const struct bpf_program *prog); -LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, - enum bpf_prog_type type); + +/** + * @brief **bpf_program__set_type()** sets the program + * type of the passed BPF program. + * @param prog BPF program to set the program type for + * @param type program type to set the BPF map to have + * @return error code; or 0 if no error. An error occurs + * if the object is already loaded. + * + * This must be called before the BPF object is loaded, + * otherwise it has no effect and an error is returned. + */ +LIBBPF_API int bpf_program__set_type(struct bpf_program *prog, + enum bpf_prog_type type); LIBBPF_API enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog); -LIBBPF_API void + +/** + * @brief **bpf_program__set_expected_attach_type()** sets the + * attach type of the passed BPF program. This is used for + * auto-detection of attachment when programs are loaded. + * @param prog BPF program to set the attach type for + * @param type attach type to set the BPF map to have + * @return error code; or 0 if no error. An error occurs + * if the object is already loaded. + * + * This must be called before the BPF object is loaded, + * otherwise it has no effect and an error is returned. + */ +LIBBPF_API int bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type); @@ -707,6 +772,17 @@ LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_le LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size); LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size); +/** + * @brief **bpf_program__set_attach_target()** sets BTF-based attach target + * for supported BPF program types: + * - BTF-aware raw tracepoints (tp_btf); + * - fentry/fexit/fmod_ret; + * - lsm; + * - freplace. + * @param prog BPF program to set the attach type for + * @param type attach type to set the BPF map to have + * @return error code; or 0 if no error occurred. + */ LIBBPF_API int bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, const char *attach_func_name); diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 080272421f6c..4abdbe2fea9d 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -376,6 +376,13 @@ struct btf_ext_info { void *info; __u32 rec_size; __u32 len; + /* optional (maintained internally by libbpf) mapping between .BTF.ext + * section and corresponding ELF section. This is used to join + * information like CO-RE relocation records with corresponding BPF + * programs defined in ELF sections + */ + __u32 *sec_idxs; + int sec_cnt; }; #define for_each_btf_ext_sec(seg, sec) \ @@ -571,6 +578,6 @@ struct bpf_link * usdt_manager_attach_usdt(struct usdt_manager *man, const struct bpf_program *prog, pid_t pid, const char *path, const char *usdt_provider, const char *usdt_name, - long usdt_cookie); + __u64 usdt_cookie); #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index f946f23eab20..ba4453dfd1ed 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -178,29 +178,28 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access * string to specify enumerator's value index that need to be relocated. */ -static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, - __u32 type_id, - const char *spec_str, - enum bpf_core_relo_kind relo_kind, - struct bpf_core_spec *spec) +int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, + const struct bpf_core_relo *relo, + struct bpf_core_spec *spec) { int access_idx, parsed_len, i; struct bpf_core_accessor *acc; const struct btf_type *t; - const char *name; + const char *name, *spec_str; __u32 id; __s64 sz; + spec_str = btf__name_by_offset(btf, relo->access_str_off); if (str_is_empty(spec_str) || *spec_str == ':') return -EINVAL; memset(spec, 0, sizeof(*spec)); spec->btf = btf; - spec->root_type_id = type_id; - spec->relo_kind = relo_kind; + spec->root_type_id = relo->type_id; + spec->relo_kind = relo->kind; /* type-based relocations don't have a field access string */ - if (core_relo_is_type_based(relo_kind)) { + if (core_relo_is_type_based(relo->kind)) { if (strcmp(spec_str, "0")) return -EINVAL; return 0; @@ -221,7 +220,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, if (spec->raw_len == 0) return -EINVAL; - t = skip_mods_and_typedefs(btf, type_id, &id); + t = skip_mods_and_typedefs(btf, relo->type_id, &id); if (!t) return -EINVAL; @@ -231,7 +230,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, acc->idx = access_idx; spec->len++; - if (core_relo_is_enumval_based(relo_kind)) { + if (core_relo_is_enumval_based(relo->kind)) { if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) return -EINVAL; @@ -240,7 +239,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, return 0; } - if (!core_relo_is_field_based(relo_kind)) + if (!core_relo_is_field_based(relo->kind)) return -EINVAL; sz = btf__resolve_size(btf, id); @@ -301,7 +300,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, spec->bit_offset += access_idx * sz * 8; } else { pr_warn("prog '%s': relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", - prog_name, type_id, spec_str, i, id, btf_kind_str(t)); + prog_name, relo->type_id, spec_str, i, id, btf_kind_str(t)); return -EINVAL; } } @@ -1055,51 +1054,66 @@ poison: * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b */ -static void bpf_core_dump_spec(const char *prog_name, int level, const struct bpf_core_spec *spec) +int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec) { const struct btf_type *t; const struct btf_enum *e; const char *s; __u32 type_id; - int i; + int i, len = 0; + +#define append_buf(fmt, args...) \ + ({ \ + int r; \ + r = snprintf(buf, buf_sz, fmt, ##args); \ + len += r; \ + if (r >= buf_sz) \ + r = buf_sz; \ + buf += r; \ + buf_sz -= r; \ + }) type_id = spec->root_type_id; t = btf_type_by_id(spec->btf, type_id); s = btf__name_by_offset(spec->btf, t->name_off); - libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); + append_buf("<%s> [%u] %s %s", + core_relo_kind_str(spec->relo_kind), + type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); if (core_relo_is_type_based(spec->relo_kind)) - return; + return len; if (core_relo_is_enumval_based(spec->relo_kind)) { t = skip_mods_and_typedefs(spec->btf, type_id, NULL); e = btf_enum(t) + spec->raw_spec[0]; s = btf__name_by_offset(spec->btf, e->name_off); - libbpf_print(level, "::%s = %u", s, e->val); - return; + append_buf("::%s = %u", s, e->val); + return len; } if (core_relo_is_field_based(spec->relo_kind)) { for (i = 0; i < spec->len; i++) { if (spec->spec[i].name) - libbpf_print(level, ".%s", spec->spec[i].name); + append_buf(".%s", spec->spec[i].name); else if (i > 0 || spec->spec[i].idx > 0) - libbpf_print(level, "[%u]", spec->spec[i].idx); + append_buf("[%u]", spec->spec[i].idx); } - libbpf_print(level, " ("); + append_buf(" ("); for (i = 0; i < spec->raw_len; i++) - libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); + append_buf("%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); if (spec->bit_offset % 8) - libbpf_print(level, " @ offset %u.%u)", - spec->bit_offset / 8, spec->bit_offset % 8); + append_buf(" @ offset %u.%u)", spec->bit_offset / 8, spec->bit_offset % 8); else - libbpf_print(level, " @ offset %u)", spec->bit_offset / 8); - return; + append_buf(" @ offset %u)", spec->bit_offset / 8); + return len; } + + return len; +#undef append_buf } /* @@ -1167,7 +1181,7 @@ int bpf_core_calc_relo_insn(const char *prog_name, const struct btf_type *local_type; const char *local_name; __u32 local_id; - const char *spec_str; + char spec_buf[256]; int i, j, err; local_id = relo->type_id; @@ -1176,24 +1190,20 @@ int bpf_core_calc_relo_insn(const char *prog_name, if (!local_name) return -EINVAL; - spec_str = btf__name_by_offset(local_btf, relo->access_str_off); - if (str_is_empty(spec_str)) - return -EINVAL; - - err = bpf_core_parse_spec(prog_name, local_btf, local_id, spec_str, - relo->kind, local_spec); + err = bpf_core_parse_spec(prog_name, local_btf, relo, local_spec); if (err) { + const char *spec_str; + + spec_str = btf__name_by_offset(local_btf, relo->access_str_off); pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", prog_name, relo_idx, local_id, btf_kind_str(local_type), str_is_empty(local_name) ? "<anon>" : local_name, - spec_str, err); + spec_str ?: "<?>", err); return -EINVAL; } - pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name, - relo_idx, core_relo_kind_str(relo->kind), relo->kind); - bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, local_spec); - libbpf_print(LIBBPF_DEBUG, "\n"); + bpf_core_format_spec(spec_buf, sizeof(spec_buf), local_spec); + pr_debug("prog '%s': relo #%d: %s\n", prog_name, relo_idx, spec_buf); /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) { @@ -1207,7 +1217,7 @@ int bpf_core_calc_relo_insn(const char *prog_name, } /* libbpf doesn't support candidate search for anonymous types */ - if (str_is_empty(spec_str)) { + if (str_is_empty(local_name)) { pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n", prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); return -EOPNOTSUPP; @@ -1217,17 +1227,15 @@ int bpf_core_calc_relo_insn(const char *prog_name, err = bpf_core_spec_match(local_spec, cands->cands[i].btf, cands->cands[i].id, cand_spec); if (err < 0) { - pr_warn("prog '%s': relo #%d: error matching candidate #%d ", - prog_name, relo_idx, i); - bpf_core_dump_spec(prog_name, LIBBPF_WARN, cand_spec); - libbpf_print(LIBBPF_WARN, ": %d\n", err); + bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec); + pr_warn("prog '%s': relo #%d: error matching candidate #%d %s: %d\n ", + prog_name, relo_idx, i, spec_buf, err); return err; } - pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name, - relo_idx, err == 0 ? "non-matching" : "matching", i); - bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, cand_spec); - libbpf_print(LIBBPF_DEBUG, "\n"); + bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec); + pr_debug("prog '%s': relo #%d: %s candidate #%d %s\n", prog_name, + relo_idx, err == 0 ? "non-matching" : "matching", i, spec_buf); if (err == 0) continue; diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h index a28bf3711ce2..073039d8ca4f 100644 --- a/tools/lib/bpf/relo_core.h +++ b/tools/lib/bpf/relo_core.h @@ -84,4 +84,10 @@ int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn, int insn_idx, const struct bpf_core_relo *relo, int relo_idx, const struct bpf_core_relo_res *res); +int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, + const struct bpf_core_relo *relo, + struct bpf_core_spec *spec); + +int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec); + #endif diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index acf2d99a9e77..f1c9339cfbbc 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -10,6 +10,11 @@ #include <linux/ptrace.h> #include <linux/kernel.h> +/* s8 will be marked as poison while it's a reg of riscv */ +#if defined(__riscv) +#define rv_s8 s8 +#endif + #include "bpf.h" #include "libbpf.h" #include "libbpf_common.h" @@ -557,10 +562,10 @@ static int parse_usdt_note(Elf *elf, const char *path, long base_addr, GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off, struct usdt_note *usdt_note); -static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, long usdt_cookie); +static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie); static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *path, pid_t pid, - const char *usdt_provider, const char *usdt_name, long usdt_cookie, + const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie, struct usdt_target **out_targets, size_t *out_target_cnt) { size_t off, name_off, desc_off, seg_cnt = 0, lib_seg_cnt = 0, target_cnt = 0; @@ -939,7 +944,7 @@ static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct bpf_program *prog, pid_t pid, const char *path, const char *usdt_provider, const char *usdt_name, - long usdt_cookie) + __u64 usdt_cookie) { int i, fd, err, spec_map_fd, ip_map_fd; LIBBPF_OPTS(bpf_uprobe_opts, opts); @@ -1141,7 +1146,7 @@ static int parse_usdt_note(Elf *elf, const char *path, long base_addr, static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg); -static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, long usdt_cookie) +static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie) { const char *s; int len; @@ -1324,6 +1329,184 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec return len; } +#elif defined(__aarch64__) + +static int calc_pt_regs_off(const char *reg_name) +{ + int reg_num; + + if (sscanf(reg_name, "x%d", ®_num) == 1) { + if (reg_num >= 0 && reg_num < 31) + return offsetof(struct user_pt_regs, regs[reg_num]); + } else if (strcmp(reg_name, "sp") == 0) { + return offsetof(struct user_pt_regs, sp); + } + pr_warn("usdt: unrecognized register '%s'\n", reg_name); + return -ENOENT; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +{ + char *reg_name = NULL; + int arg_sz, len, reg_off; + long off; + + if (sscanf(arg_str, " %d @ \[ %m[a-z0-9], %ld ] %n", &arg_sz, ®_name, &off, &len) == 3) { + /* Memory dereference case, e.g., -4@[sp, 96] */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = off; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ \[ %m[a-z0-9] ] %n", &arg_sz, ®_name, &len) == 2) { + /* Memory dereference case, e.g., -4@[sp] */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = 0; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) { + /* Constant value case, e.g., 4@5 */ + arg->arg_type = USDT_ARG_CONST; + arg->val_off = off; + arg->reg_off = 0; + } else if (sscanf(arg_str, " %d @ %m[a-z0-9] %n", &arg_sz, ®_name, &len) == 2) { + /* Register read case, e.g., -8@x4 */ + arg->arg_type = USDT_ARG_REG; + arg->val_off = 0; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else { + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); + return -EINVAL; + } + + arg->arg_signed = arg_sz < 0; + if (arg_sz < 0) + arg_sz = -arg_sz; + + switch (arg_sz) { + case 1: case 2: case 4: case 8: + arg->arg_bitshift = 64 - arg_sz * 8; + break; + default: + pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", + arg_num, arg_str, arg_sz); + return -EINVAL; + } + + return len; +} + +#elif defined(__riscv) + +static int calc_pt_regs_off(const char *reg_name) +{ + static struct { + const char *name; + size_t pt_regs_off; + } reg_map[] = { + { "ra", offsetof(struct user_regs_struct, ra) }, + { "sp", offsetof(struct user_regs_struct, sp) }, + { "gp", offsetof(struct user_regs_struct, gp) }, + { "tp", offsetof(struct user_regs_struct, tp) }, + { "a0", offsetof(struct user_regs_struct, a0) }, + { "a1", offsetof(struct user_regs_struct, a1) }, + { "a2", offsetof(struct user_regs_struct, a2) }, + { "a3", offsetof(struct user_regs_struct, a3) }, + { "a4", offsetof(struct user_regs_struct, a4) }, + { "a5", offsetof(struct user_regs_struct, a5) }, + { "a6", offsetof(struct user_regs_struct, a6) }, + { "a7", offsetof(struct user_regs_struct, a7) }, + { "s0", offsetof(struct user_regs_struct, s0) }, + { "s1", offsetof(struct user_regs_struct, s1) }, + { "s2", offsetof(struct user_regs_struct, s2) }, + { "s3", offsetof(struct user_regs_struct, s3) }, + { "s4", offsetof(struct user_regs_struct, s4) }, + { "s5", offsetof(struct user_regs_struct, s5) }, + { "s6", offsetof(struct user_regs_struct, s6) }, + { "s7", offsetof(struct user_regs_struct, s7) }, + { "s8", offsetof(struct user_regs_struct, rv_s8) }, + { "s9", offsetof(struct user_regs_struct, s9) }, + { "s10", offsetof(struct user_regs_struct, s10) }, + { "s11", offsetof(struct user_regs_struct, s11) }, + { "t0", offsetof(struct user_regs_struct, t0) }, + { "t1", offsetof(struct user_regs_struct, t1) }, + { "t2", offsetof(struct user_regs_struct, t2) }, + { "t3", offsetof(struct user_regs_struct, t3) }, + { "t4", offsetof(struct user_regs_struct, t4) }, + { "t5", offsetof(struct user_regs_struct, t5) }, + { "t6", offsetof(struct user_regs_struct, t6) }, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(reg_map); i++) { + if (strcmp(reg_name, reg_map[i].name) == 0) + return reg_map[i].pt_regs_off; + } + + pr_warn("usdt: unrecognized register '%s'\n", reg_name); + return -ENOENT; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +{ + char *reg_name = NULL; + int arg_sz, len, reg_off; + long off; + + if (sscanf(arg_str, " %d @ %ld ( %m[a-z0-9] ) %n", &arg_sz, &off, ®_name, &len) == 3) { + /* Memory dereference case, e.g., -8@-88(s0) */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = off; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) { + /* Constant value case, e.g., 4@5 */ + arg->arg_type = USDT_ARG_CONST; + arg->val_off = off; + arg->reg_off = 0; + } else if (sscanf(arg_str, " %d @ %m[a-z0-9] %n", &arg_sz, ®_name, &len) == 2) { + /* Register read case, e.g., -8@a1 */ + arg->arg_type = USDT_ARG_REG; + arg->val_off = 0; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else { + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); + return -EINVAL; + } + + arg->arg_signed = arg_sz < 0; + if (arg_sz < 0) + arg_sz = -arg_sz; + + switch (arg_sz) { + case 1: case 2: case 4: case 8: + arg->arg_bitshift = 64 - arg_sz * 8; + break; + default: + pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", + arg_num, arg_str, arg_sz); + return -EINVAL; + } + + return len; +} + #else static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index f973320e6dbf..f061cc20e776 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -8,7 +8,6 @@ #include <fcntl.h> #include <pthread.h> #include <sys/sysinfo.h> -#include <sys/resource.h> #include <signal.h> #include "bench.h" #include "testing_helpers.h" diff --git a/tools/testing/selftests/bpf/bpf_rlimit.h b/tools/testing/selftests/bpf/bpf_rlimit.h deleted file mode 100644 index 9dac9b30f8ef..000000000000 --- a/tools/testing/selftests/bpf/bpf_rlimit.h +++ /dev/null @@ -1,28 +0,0 @@ -#include <sys/resource.h> -#include <stdio.h> - -static __attribute__((constructor)) void bpf_rlimit_ctor(void) -{ - struct rlimit rlim_old, rlim_new = { - .rlim_cur = RLIM_INFINITY, - .rlim_max = RLIM_INFINITY, - }; - - getrlimit(RLIMIT_MEMLOCK, &rlim_old); - /* For the sake of running the test cases, we temporarily - * set rlimit to infinity in order for kernel to focus on - * errors from actual test cases and not getting noise - * from hitting memlock limits. The limit is on per-process - * basis and not a global one, hence destructor not really - * needed here. - */ - if (setrlimit(RLIMIT_MEMLOCK, &rlim_new) < 0) { - perror("Unable to lift memlock rlimit"); - /* Trying out lower limit, but expect potential test - * case failures from this! - */ - rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20); - rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20); - setrlimit(RLIMIT_MEMLOCK, &rlim_new); - } -} diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c index 87fd1aa323a9..c8be6406777f 100644 --- a/tools/testing/selftests/bpf/flow_dissector_load.c +++ b/tools/testing/selftests/bpf/flow_dissector_load.c @@ -11,7 +11,6 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> -#include "bpf_rlimit.h" #include "flow_dissector_load.h" const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector"; @@ -25,9 +24,8 @@ static void load_and_attach_program(void) int prog_fd, ret; struct bpf_object *obj; - ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - if (ret) - error(1, 0, "failed to enable libbpf strict mode: %d", ret); + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); ret = bpf_flow_load(&obj, cfg_path_name, cfg_prog_name, cfg_map_name, NULL, &prog_fd, NULL); diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c index 3a7b82bd9e94..e021cc67dc02 100644 --- a/tools/testing/selftests/bpf/get_cgroup_id_user.c +++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c @@ -20,7 +20,6 @@ #include "cgroup_helpers.h" #include "testing_helpers.h" -#include "bpf_rlimit.h" #define CHECK(condition, tag, format...) ({ \ int __ret = !!(condition); \ @@ -67,6 +66,9 @@ int main(int argc, char **argv) if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno)) return 1; + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); if (CHECK(err, "bpf_prog_test_load", "err %d errno %d\n", err, errno)) goto cleanup_cgroup_env; diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c new file mode 100644 index 000000000000..b17bfa0e0aac --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +#include "test_progs.h" +#include "testing_helpers.h" + +static void init_test_filter_set(struct test_filter_set *set) +{ + set->cnt = 0; + set->tests = NULL; +} + +static void free_test_filter_set(struct test_filter_set *set) +{ + int i, j; + + for (i = 0; i < set->cnt; i++) { + for (j = 0; j < set->tests[i].subtest_cnt; j++) + free((void *)set->tests[i].subtests[j]); + free(set->tests[i].subtests); + free(set->tests[i].name); + } + + free(set->tests); + init_test_filter_set(set); +} + +static void test_parse_test_list(void) +{ + struct test_filter_set set; + + init_test_filter_set(&set); + + ASSERT_OK(parse_test_list("arg_parsing", &set, true), "parsing"); + if (!ASSERT_EQ(set.cnt, 1, "test filters count")) + goto error; + if (!ASSERT_OK_PTR(set.tests, "test filters initialized")) + goto error; + ASSERT_EQ(set.tests[0].subtest_cnt, 0, "subtest filters count"); + ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "subtest name"); + free_test_filter_set(&set); + + ASSERT_OK(parse_test_list("arg_parsing,bpf_cookie", &set, true), + "parsing"); + if (!ASSERT_EQ(set.cnt, 2, "count of test filters")) + goto error; + if (!ASSERT_OK_PTR(set.tests, "test filters initialized")) + goto error; + ASSERT_EQ(set.tests[0].subtest_cnt, 0, "subtest filters count"); + ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count"); + ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name"); + ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name"); + free_test_filter_set(&set); + + ASSERT_OK(parse_test_list("arg_parsing/arg_parsing,bpf_cookie", + &set, + true), + "parsing"); + if (!ASSERT_EQ(set.cnt, 2, "count of test filters")) + goto error; + if (!ASSERT_OK_PTR(set.tests, "test filters initialized")) + goto error; + if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count")) + goto error; + ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count"); + ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name"); + ASSERT_OK(strcmp("arg_parsing", set.tests[0].subtests[0]), + "subtest name"); + ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name"); + free_test_filter_set(&set); + + ASSERT_OK(parse_test_list("arg_parsing/arg_parsing", &set, true), + "parsing"); + ASSERT_OK(parse_test_list("bpf_cookie", &set, true), "parsing"); + ASSERT_OK(parse_test_list("send_signal", &set, true), "parsing"); + if (!ASSERT_EQ(set.cnt, 3, "count of test filters")) + goto error; + if (!ASSERT_OK_PTR(set.tests, "test filters initialized")) + goto error; + if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count")) + goto error; + ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count"); + ASSERT_EQ(set.tests[2].subtest_cnt, 0, "subtest filters count"); + ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name"); + ASSERT_OK(strcmp("arg_parsing", set.tests[0].subtests[0]), + "subtest name"); + ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name"); + ASSERT_OK(strcmp("send_signal", set.tests[2].name), "test name"); + free_test_filter_set(&set); + + ASSERT_OK(parse_test_list("bpf_cookie/trace", &set, false), "parsing"); + if (!ASSERT_EQ(set.cnt, 1, "count of test filters")) + goto error; + if (!ASSERT_OK_PTR(set.tests, "test filters initialized")) + goto error; + if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count")) + goto error; + ASSERT_OK(strcmp("*bpf_cookie*", set.tests[0].name), "test name"); + ASSERT_OK(strcmp("*trace*", set.tests[0].subtests[0]), "subtest name"); +error: + free_test_filter_set(&set); +} + +void test_arg_parsing(void) +{ + if (test__start_subtest("test_parse_test_list")) + test_parse_test_list(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 5142a7d130b2..2c403ddc8076 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -1192,8 +1192,6 @@ static void str_strip_first_line(char *str) *dst = '\0'; } -#define min(a, b) ((a) < (b) ? (a) : (b)) - static void test_task_vma(void) { int err, iter_fd = -1, proc_maps_fd = -1; @@ -1229,7 +1227,7 @@ static void test_task_vma(void) len = 0; while (len < CMP_BUFFER_SIZE) { err = read_fd_into_buffer(iter_fd, task_vma_output + len, - min(read_size, CMP_BUFFER_SIZE - len)); + MIN(read_size, CMP_BUFFER_SIZE - len)); if (!err) break; if (CHECK(err < 0, "read_iter_fd", "read_iter_fd failed\n")) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c index d43f548c572c..a4d0cc9d3367 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c @@ -36,13 +36,13 @@ struct test_config { void (*bpf_destroy)(void *); }; -enum test_state { +enum bpf_test_state { _TS_INVALID, TS_MODULE_LOAD, TS_MODULE_LOAD_FAIL, }; -static _Atomic enum test_state state = _TS_INVALID; +static _Atomic enum bpf_test_state state = _TS_INVALID; static int sys_finit_module(int fd, const char *param_values, int flags) { diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 8f7a1cef7d87..e9a9a31b2ffe 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -10,8 +10,6 @@ #include "bpf_tcp_nogpl.skel.h" #include "bpf_dctcp_release.skel.h" -#define min(a, b) ((a) < (b) ? (a) : (b)) - #ifndef ENOTSUPP #define ENOTSUPP 524 #endif @@ -53,7 +51,7 @@ static void *server(void *arg) while (bytes < total_bytes && !READ_ONCE(stop)) { nr_sent = send(fd, &batch, - min(total_bytes - bytes, sizeof(batch)), 0); + MIN(total_bytes - bytes, sizeof(batch)), 0); if (nr_sent == -1 && errno == EINTR) continue; if (nr_sent == -1) { @@ -146,7 +144,7 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) /* recv total_bytes */ while (bytes < total_bytes && !READ_ONCE(stop)) { nr_recv = recv(fd, &batch, - min(total_bytes - bytes, sizeof(batch)), 0); + MIN(total_bytes - bytes, sizeof(batch)), 0); if (nr_recv == -1 && errno == EINTR) continue; if (nr_recv == -1) diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index ec823561b912..ba5bde53d418 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -8,7 +8,6 @@ #include <linux/filter.h> #include <linux/unistd.h> #include <bpf/bpf.h> -#include <sys/resource.h> #include <libelf.h> #include <gelf.h> #include <string.h> @@ -3974,6 +3973,105 @@ static struct btf_raw_test raw_tests[] = { .value_type_id = 1, .max_entries = 1, }, +{ + .descr = "type_tag test #2, type tag order", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_CONST_ENC(3), /* [2] */ + BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [3] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Type tags don't precede modifiers", +}, +{ + .descr = "type_tag test #3, type tag order", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_TAG_ENC(NAME_TBD, 3), /* [2] */ + BTF_CONST_ENC(4), /* [3] */ + BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Type tags don't precede modifiers", +}, +{ + .descr = "type_tag test #4, type tag order", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), /* [2] */ + BTF_CONST_ENC(4), /* [3] */ + BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Type tags don't precede modifiers", +}, +{ + .descr = "type_tag test #5, type tag order", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_TAG_ENC(NAME_TBD, 3), /* [2] */ + BTF_CONST_ENC(1), /* [3] */ + BTF_TYPE_TAG_ENC(NAME_TBD, 2), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, +}, +{ + .descr = "type_tag test #6, type tag order", + .raw_types = { + BTF_PTR_ENC(2), /* [1] */ + BTF_TYPE_TAG_ENC(NAME_TBD, 3), /* [2] */ + BTF_CONST_ENC(4), /* [3] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [4] */ + BTF_PTR_ENC(6), /* [5] */ + BTF_CONST_ENC(2), /* [6] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Type tags don't precede modifiers", +}, }; /* struct btf_raw_test raw_tests[] */ diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c index 3ee2107bbf7a..fe1f0f26ea14 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c @@ -53,7 +53,7 @@ void test_fexit_stress(void) &trace_opts); if (!ASSERT_GE(fexit_fd[i], 0, "fexit load")) goto out; - link_fd[i] = bpf_raw_tracepoint_open(NULL, fexit_fd[i]); + link_fd[i] = bpf_link_create(fexit_fd[i], 0, BPF_TRACE_FEXIT, NULL); if (!ASSERT_GE(link_fd[i], 0, "fexit attach")) goto out; } diff --git a/tools/testing/selftests/bpf/prog_tests/helper_restricted.c b/tools/testing/selftests/bpf/prog_tests/helper_restricted.c index e1de5f80c3b2..0354f9b82c65 100644 --- a/tools/testing/selftests/bpf/prog_tests/helper_restricted.c +++ b/tools/testing/selftests/bpf/prog_tests/helper_restricted.c @@ -6,11 +6,10 @@ void test_helper_restricted(void) { int prog_i = 0, prog_cnt; - int duration = 0; do { struct test_helper_restricted *test; - int maybeOK; + int err; test = test_helper_restricted__open(); if (!ASSERT_OK_PTR(test, "open")) @@ -21,12 +20,11 @@ void test_helper_restricted(void) for (int j = 0; j < prog_cnt; ++j) { struct bpf_program *prog = *test->skeleton->progs[j].prog; - maybeOK = bpf_program__set_autoload(prog, prog_i == j); - ASSERT_OK(maybeOK, "set autoload"); + bpf_program__set_autoload(prog, true); } - maybeOK = test_helper_restricted__load(test); - CHECK(!maybeOK, test->skeleton->progs[prog_i].name, "helper isn't restricted"); + err = test_helper_restricted__load(test); + ASSERT_ERR(err, "load_should_fail"); test_helper_restricted__destroy(test); } while (++prog_i < prog_cnt); diff --git a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c index e9916f2817ec..cad664546912 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c @@ -14,6 +14,12 @@ void test_linked_funcs(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; + /* handler1 and handler2 are marked as SEC("?raw_tp/sys_enter") and + * are set to not autoload by default + */ + bpf_program__set_autoload(skel->progs.handler1, true); + bpf_program__set_autoload(skel->progs.handler2, true); + skel->rodata->my_tid = syscall(SYS_gettid); skel->bss->syscall_id = SYS_getpgid; diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c new file mode 100644 index 000000000000..be3a956cb3a5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <bpf/btf.h> + +#include "test_log_fixup.skel.h" + +enum trunc_type { + TRUNC_NONE, + TRUNC_PARTIAL, + TRUNC_FULL, +}; + +static void bad_core_relo(size_t log_buf_size, enum trunc_type trunc_type) +{ + char log_buf[8 * 1024]; + struct test_log_fixup* skel; + int err; + + skel = test_log_fixup__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bpf_program__set_autoload(skel->progs.bad_relo, true); + memset(log_buf, 0, sizeof(log_buf)); + bpf_program__set_log_buf(skel->progs.bad_relo, log_buf, log_buf_size ?: sizeof(log_buf)); + + err = test_log_fixup__load(skel); + if (!ASSERT_ERR(err, "load_fail")) + goto cleanup; + + ASSERT_HAS_SUBSTR(log_buf, + "0: <invalid CO-RE relocation>\n" + "failed to resolve CO-RE relocation <byte_sz> ", + "log_buf_part1"); + + switch (trunc_type) { + case TRUNC_NONE: + ASSERT_HAS_SUBSTR(log_buf, + "struct task_struct___bad.fake_field (0:1 @ offset 4)\n", + "log_buf_part2"); + ASSERT_HAS_SUBSTR(log_buf, + "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n", + "log_buf_end"); + break; + case TRUNC_PARTIAL: + /* we should get full libbpf message patch */ + ASSERT_HAS_SUBSTR(log_buf, + "struct task_struct___bad.fake_field (0:1 @ offset 4)\n", + "log_buf_part2"); + /* we shouldn't get full end of BPF verifier log */ + ASSERT_NULL(strstr(log_buf, "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n"), + "log_buf_end"); + break; + case TRUNC_FULL: + /* we shouldn't get second part of libbpf message patch */ + ASSERT_NULL(strstr(log_buf, "struct task_struct___bad.fake_field (0:1 @ offset 4)\n"), + "log_buf_part2"); + /* we shouldn't get full end of BPF verifier log */ + ASSERT_NULL(strstr(log_buf, "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n"), + "log_buf_end"); + break; + } + + if (env.verbosity > VERBOSE_NONE) + printf("LOG: \n=================\n%s=================\n", log_buf); +cleanup: + test_log_fixup__destroy(skel); +} + +static void bad_core_relo_subprog(void) +{ + char log_buf[8 * 1024]; + struct test_log_fixup* skel; + int err; + + skel = test_log_fixup__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bpf_program__set_autoload(skel->progs.bad_relo_subprog, true); + bpf_program__set_log_buf(skel->progs.bad_relo_subprog, log_buf, sizeof(log_buf)); + + err = test_log_fixup__load(skel); + if (!ASSERT_ERR(err, "load_fail")) + goto cleanup; + + /* there should be no prog loading log because we specified per-prog log buf */ + ASSERT_HAS_SUBSTR(log_buf, + ": <invalid CO-RE relocation>\n" + "failed to resolve CO-RE relocation <byte_off> ", + "log_buf"); + ASSERT_HAS_SUBSTR(log_buf, + "struct task_struct___bad.fake_field_subprog (0:2 @ offset 8)\n", + "log_buf"); + + if (env.verbosity > VERBOSE_NONE) + printf("LOG: \n=================\n%s=================\n", log_buf); + +cleanup: + test_log_fixup__destroy(skel); +} + +void test_log_fixup(void) +{ + if (test__start_subtest("bad_core_relo_trunc_none")) + bad_core_relo(0, TRUNC_NONE /* full buf */); + if (test__start_subtest("bad_core_relo_trunc_partial")) + bad_core_relo(300, TRUNC_PARTIAL /* truncate original log a bit */); + if (test__start_subtest("bad_core_relo_trunc_full")) + bad_core_relo(250, TRUNC_FULL /* truncate also libbpf's message patch */); + if (test__start_subtest("bad_core_relo_subprog")) + bad_core_relo_subprog(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c new file mode 100644 index 000000000000..9e2fbda64a65 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> + +#include "map_kptr.skel.h" + +void test_map_kptr(void) +{ + struct map_kptr *skel; + int key = 0, ret; + char buf[24]; + + skel = map_kptr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load")) + return; + + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.array_map), &key, buf, 0); + ASSERT_OK(ret, "array_map update"); + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.array_map), &key, buf, 0); + ASSERT_OK(ret, "array_map update2"); + + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.hash_map), &key, buf, 0); + ASSERT_OK(ret, "hash_map update"); + ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.hash_map), &key); + ASSERT_OK(ret, "hash_map delete"); + + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.hash_malloc_map), &key, buf, 0); + ASSERT_OK(ret, "hash_malloc_map update"); + ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.hash_malloc_map), &key); + ASSERT_OK(ret, "hash_malloc_map delete"); + + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.lru_hash_map), &key, buf, 0); + ASSERT_OK(ret, "lru_hash_map update"); + ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.lru_hash_map), &key); + ASSERT_OK(ret, "lru_hash_map delete"); + + map_kptr__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c new file mode 100644 index 000000000000..14f2796076e0 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +#include "test_progs.h" +#include "testing_helpers.h" + +static void clear_test_state(struct test_state *state) +{ + state->error_cnt = 0; + state->sub_succ_cnt = 0; + state->skip_cnt = 0; +} + +void test_prog_tests_framework(void) +{ + struct test_state *state = env.test_state; + + /* in all the ASSERT calls below we need to return on the first + * error due to the fact that we are cleaning the test state after + * each dummy subtest + */ + + /* test we properly count skipped tests with subtests */ + if (test__start_subtest("test_good_subtest")) + test__end_subtest(); + if (!ASSERT_EQ(state->skip_cnt, 0, "skip_cnt_check")) + return; + if (!ASSERT_EQ(state->error_cnt, 0, "error_cnt_check")) + return; + if (!ASSERT_EQ(state->subtest_num, 1, "subtest_num_check")) + return; + clear_test_state(state); + + if (test__start_subtest("test_skip_subtest")) { + test__skip(); + test__end_subtest(); + } + if (test__start_subtest("test_skip_subtest")) { + test__skip(); + test__end_subtest(); + } + if (!ASSERT_EQ(state->skip_cnt, 2, "skip_cnt_check")) + return; + if (!ASSERT_EQ(state->subtest_num, 3, "subtest_num_check")) + return; + clear_test_state(state); + + if (test__start_subtest("test_fail_subtest")) { + test__fail(); + test__end_subtest(); + } + if (!ASSERT_EQ(state->error_cnt, 1, "error_cnt_check")) + return; + if (!ASSERT_EQ(state->subtest_num, 4, "subtest_num_check")) + return; + clear_test_state(state); +} diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c index 873323fb18ba..739d2ea6ca55 100644 --- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c +++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c @@ -1,21 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> -static void toggle_object_autoload_progs(const struct bpf_object *obj, - const char *name_load) -{ - struct bpf_program *prog; - - bpf_object__for_each_program(prog, obj) { - const char *name = bpf_program__name(prog); - - if (!strcmp(name_load, name)) - bpf_program__set_autoload(prog, true); - else - bpf_program__set_autoload(prog, false); - } -} - void test_reference_tracking(void) { const char *file = "test_sk_lookup_kern.o"; @@ -39,6 +24,7 @@ void test_reference_tracking(void) goto cleanup; bpf_object__for_each_program(prog, obj_iter) { + struct bpf_program *p; const char *name; name = bpf_program__name(prog); @@ -49,7 +35,12 @@ void test_reference_tracking(void) if (!ASSERT_OK_PTR(obj, "obj_open_file")) goto cleanup; - toggle_object_autoload_progs(obj, name); + /* all programs are not loaded by default, so just set + * autoload to true for the single prog under test + */ + p = bpf_object__find_program_by_name(obj, name); + bpf_program__set_autoload(p, true); + /* Expect verifier failure if test name has 'err' */ if (strncmp(name, "err_", sizeof("err_") - 1) == 0) { libbpf_print_fn_t old_print_fn; diff --git a/tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c new file mode 100644 index 000000000000..d7f83c0a40a5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <network_helpers.h> +#include "skb_load_bytes.skel.h" + +void test_skb_load_bytes(void) +{ + struct skb_load_bytes *skel; + int err, prog_fd, test_result; + struct __sk_buff skb = { 0 }; + + LIBBPF_OPTS(bpf_test_run_opts, tattr, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .ctx_in = &skb, + .ctx_size_in = sizeof(skb), + ); + + skel = skb_load_bytes__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + prog_fd = bpf_program__fd(skel->progs.skb_process); + if (!ASSERT_GE(prog_fd, 0, "prog_fd")) + goto out; + + skel->bss->load_offset = (uint32_t)(-1); + err = bpf_prog_test_run_opts(prog_fd, &tattr); + if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) + goto out; + test_result = skel->bss->test_result; + if (!ASSERT_EQ(test_result, -EFAULT, "offset -1")) + goto out; + + skel->bss->load_offset = (uint32_t)10; + err = bpf_prog_test_run_opts(prog_fd, &tattr); + if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) + goto out; + test_result = skel->bss->test_result; + if (!ASSERT_EQ(test_result, 0, "offset 10")) + goto out; + +out: + skb_load_bytes__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c index 394ebfc3bbf3..4be6fdb78c6a 100644 --- a/tools/testing/selftests/bpf/prog_tests/snprintf.c +++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c @@ -83,8 +83,6 @@ cleanup: test_snprintf__destroy(skel); } -#define min(a, b) ((a) < (b) ? (a) : (b)) - /* Loads an eBPF object calling bpf_snprintf with up to 10 characters of fmt */ static int load_single_snprintf(char *fmt) { @@ -95,7 +93,7 @@ static int load_single_snprintf(char *fmt) if (!skel) return -EINVAL; - memcpy(skel->rodata->fmt, fmt, min(strlen(fmt) + 1, 10)); + memcpy(skel->rodata->fmt, fmt, MIN(strlen(fmt) + 1, 10)); ret = test_snprintf_single__load(skel); test_snprintf_single__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 7ad66a247c02..958dae769c52 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -949,7 +949,6 @@ fail: return -1; } -#define MAX(a, b) ((a) > (b) ? (a) : (b)) enum { SRC_TO_TARGET = 0, TARGET_TO_SRC = 1, diff --git a/tools/testing/selftests/bpf/prog_tests/test_strncmp.c b/tools/testing/selftests/bpf/prog_tests/test_strncmp.c index b57a3009465f..7ddd6615b7e7 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_strncmp.c +++ b/tools/testing/selftests/bpf/prog_tests/test_strncmp.c @@ -44,16 +44,12 @@ static void strncmp_full_str_cmp(struct strncmp_test *skel, const char *name, static void test_strncmp_ret(void) { struct strncmp_test *skel; - struct bpf_program *prog; int err, got; skel = strncmp_test__open(); if (!ASSERT_OK_PTR(skel, "strncmp_test open")) return; - bpf_object__for_each_program(prog, skel->obj) - bpf_program__set_autoload(prog, false); - bpf_program__set_autoload(skel->progs.do_strncmp, true); err = strncmp_test__load(skel); @@ -91,18 +87,13 @@ out: static void test_strncmp_bad_not_const_str_size(void) { struct strncmp_test *skel; - struct bpf_program *prog; int err; skel = strncmp_test__open(); if (!ASSERT_OK_PTR(skel, "strncmp_test open")) return; - bpf_object__for_each_program(prog, skel->obj) - bpf_program__set_autoload(prog, false); - - bpf_program__set_autoload(skel->progs.strncmp_bad_not_const_str_size, - true); + bpf_program__set_autoload(skel->progs.strncmp_bad_not_const_str_size, true); err = strncmp_test__load(skel); ASSERT_ERR(err, "strncmp_test load bad_not_const_str_size"); @@ -113,18 +104,13 @@ static void test_strncmp_bad_not_const_str_size(void) static void test_strncmp_bad_writable_target(void) { struct strncmp_test *skel; - struct bpf_program *prog; int err; skel = strncmp_test__open(); if (!ASSERT_OK_PTR(skel, "strncmp_test open")) return; - bpf_object__for_each_program(prog, skel->obj) - bpf_program__set_autoload(prog, false); - - bpf_program__set_autoload(skel->progs.strncmp_bad_writable_target, - true); + bpf_program__set_autoload(skel->progs.strncmp_bad_writable_target, true); err = strncmp_test__load(skel); ASSERT_ERR(err, "strncmp_test load bad_writable_target"); @@ -135,18 +121,13 @@ static void test_strncmp_bad_writable_target(void) static void test_strncmp_bad_not_null_term_target(void) { struct strncmp_test *skel; - struct bpf_program *prog; int err; skel = strncmp_test__open(); if (!ASSERT_OK_PTR(skel, "strncmp_test open")) return; - bpf_object__for_each_program(prog, skel->obj) - bpf_program__set_autoload(prog, false); - - bpf_program__set_autoload(skel->progs.strncmp_bad_not_null_term_target, - true); + bpf_program__set_autoload(skel->progs.strncmp_bad_not_null_term_target, true); err = strncmp_test__load(skel); ASSERT_ERR(err, "strncmp_test load bad_not_null_term_target"); diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c index d6003dc8cc99..35b87c7ba5be 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c @@ -34,7 +34,6 @@ void test_uprobe_autoattach(void) /* trigger & validate shared library u[ret]probes attached by name */ mem = malloc(malloc_sz); - free(mem); ASSERT_EQ(skel->bss->uprobe_byname_parm1, trigger_val, "check_uprobe_byname_parm1"); ASSERT_EQ(skel->bss->uprobe_byname_ran, 1, "check_uprobe_byname_ran"); @@ -44,6 +43,8 @@ void test_uprobe_autoattach(void) ASSERT_EQ(skel->bss->uprobe_byname2_ran, 3, "check_uprobe_byname2_ran"); ASSERT_EQ(skel->bss->uretprobe_byname2_rc, mem, "check_uretprobe_byname2_rc"); ASSERT_EQ(skel->bss->uretprobe_byname2_ran, 4, "check_uretprobe_byname2_ran"); + + free(mem); cleanup: test_uprobe_autoattach__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/exhandler_kern.c b/tools/testing/selftests/bpf/progs/exhandler_kern.c index f5ca142abf8f..dd9b30a0f0fc 100644 --- a/tools/testing/selftests/bpf/progs/exhandler_kern.c +++ b/tools/testing/selftests/bpf/progs/exhandler_kern.c @@ -7,6 +7,8 @@ #include <bpf/bpf_tracing.h> #include <bpf/bpf_core_read.h> +#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) + char _license[] SEC("license") = "GPL"; unsigned int exception_triggered; @@ -37,7 +39,16 @@ int BPF_PROG(trace_task_newtask, struct task_struct *task, u64 clone_flags) */ work = task->task_works; func = work->func; - if (!work && !func) - exception_triggered++; + /* Currently verifier will fail for `btf_ptr |= btf_ptr` * instruction. + * To workaround the issue, use barrier_var() and rewrite as below to + * prevent compiler from generating verifier-unfriendly code. + */ + barrier_var(work); + if (work) + return 0; + barrier_var(func); + if (func) + return 0; + exception_triggered++; return 0; } diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c index 963b393c37e8..b05571bc67d5 100644 --- a/tools/testing/selftests/bpf/progs/linked_funcs1.c +++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c @@ -61,12 +61,17 @@ extern int set_output_val2(int x); /* here we'll force set_output_ctx2() to be __hidden in the final obj file */ __hidden extern void set_output_ctx2(__u64 *ctx); -SEC("raw_tp/sys_enter") +SEC("?raw_tp/sys_enter") int BPF_PROG(handler1, struct pt_regs *regs, long id) { + static volatile int whatever; + if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id) return 0; + /* make sure we have CO-RE relocations in main program */ + whatever = bpf_core_type_size(struct task_struct); + set_output_val2(1000); set_output_ctx2(ctx); /* ctx definition is hidden in BPF_PROG macro */ diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c index db195872f4eb..ee7e3848ee4f 100644 --- a/tools/testing/selftests/bpf/progs/linked_funcs2.c +++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c @@ -61,12 +61,17 @@ extern int set_output_val1(int x); /* here we'll force set_output_ctx1() to be __hidden in the final obj file */ __hidden extern void set_output_ctx1(__u64 *ctx); -SEC("raw_tp/sys_enter") +SEC("?raw_tp/sys_enter") int BPF_PROG(handler2, struct pt_regs *regs, long id) { + static volatile int whatever; + if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id) return 0; + /* make sure we have CO-RE relocations in main program */ + whatever = bpf_core_type_size(struct task_struct); + set_output_val1(2000); set_output_ctx1(ctx); /* ctx definition is hidden in BPF_PROG macro */ diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c new file mode 100644 index 000000000000..1b0e0409eaa5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/map_kptr.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +struct map_value { + struct prog_test_ref_kfunc __kptr *unref_ptr; + struct prog_test_ref_kfunc __kptr_ref *ref_ptr; +}; + +struct array_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} array_map SEC(".maps"); + +struct hash_map { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} hash_map SEC(".maps"); + +struct hash_malloc_map { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); + __uint(map_flags, BPF_F_NO_PREALLOC); +} hash_malloc_map SEC(".maps"); + +struct lru_hash_map { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} lru_hash_map SEC(".maps"); + +#define DEFINE_MAP_OF_MAP(map_type, inner_map_type, name) \ + struct { \ + __uint(type, map_type); \ + __uint(max_entries, 1); \ + __uint(key_size, sizeof(int)); \ + __uint(value_size, sizeof(int)); \ + __array(values, struct inner_map_type); \ + } name SEC(".maps") = { \ + .values = { [0] = &inner_map_type }, \ + } + +DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_map, array_of_array_maps); +DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, hash_map, array_of_hash_maps); +DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, hash_malloc_map, array_of_hash_malloc_maps); +DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, lru_hash_map, array_of_lru_hash_maps); +DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, array_map, hash_of_array_maps); +DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_map, hash_of_hash_maps); +DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_malloc_map, hash_of_hash_malloc_maps); +DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, lru_hash_map, hash_of_lru_hash_maps); + +extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; +extern struct prog_test_ref_kfunc * +bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym; +extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; + +static void test_kptr_unref(struct map_value *v) +{ + struct prog_test_ref_kfunc *p; + + p = v->unref_ptr; + /* store untrusted_ptr_or_null_ */ + v->unref_ptr = p; + if (!p) + return; + if (p->a + p->b > 100) + return; + /* store untrusted_ptr_ */ + v->unref_ptr = p; + /* store NULL */ + v->unref_ptr = NULL; +} + +static void test_kptr_ref(struct map_value *v) +{ + struct prog_test_ref_kfunc *p; + + p = v->ref_ptr; + /* store ptr_or_null_ */ + v->unref_ptr = p; + if (!p) + return; + if (p->a + p->b > 100) + return; + /* store NULL */ + p = bpf_kptr_xchg(&v->ref_ptr, NULL); + if (!p) + return; + if (p->a + p->b > 100) { + bpf_kfunc_call_test_release(p); + return; + } + /* store ptr_ */ + v->unref_ptr = p; + bpf_kfunc_call_test_release(p); + + p = bpf_kfunc_call_test_acquire(&(unsigned long){0}); + if (!p) + return; + /* store ptr_ */ + p = bpf_kptr_xchg(&v->ref_ptr, p); + if (!p) + return; + if (p->a + p->b > 100) { + bpf_kfunc_call_test_release(p); + return; + } + bpf_kfunc_call_test_release(p); +} + +static void test_kptr_get(struct map_value *v) +{ + struct prog_test_ref_kfunc *p; + + p = bpf_kfunc_call_test_kptr_get(&v->ref_ptr, 0, 0); + if (!p) + return; + if (p->a + p->b > 100) { + bpf_kfunc_call_test_release(p); + return; + } + bpf_kfunc_call_test_release(p); +} + +static void test_kptr(struct map_value *v) +{ + test_kptr_unref(v); + test_kptr_ref(v); + test_kptr_get(v); +} + +SEC("tc") +int test_map_kptr(struct __sk_buff *ctx) +{ + struct map_value *v; + int i, key = 0; + +#define TEST(map) \ + v = bpf_map_lookup_elem(&map, &key); \ + if (!v) \ + return 0; \ + test_kptr(v) + + TEST(array_map); + TEST(hash_map); + TEST(hash_malloc_map); + TEST(lru_hash_map); + +#undef TEST + return 0; +} + +SEC("tc") +int test_map_in_map_kptr(struct __sk_buff *ctx) +{ + struct map_value *v; + int i, key = 0; + void *map; + +#define TEST(map_in_map) \ + map = bpf_map_lookup_elem(&map_in_map, &key); \ + if (!map) \ + return 0; \ + v = bpf_map_lookup_elem(map, &key); \ + if (!v) \ + return 0; \ + test_kptr(v) + + TEST(array_of_array_maps); + TEST(array_of_hash_maps); + TEST(array_of_hash_malloc_maps); + TEST(array_of_lru_hash_maps); + TEST(hash_of_array_maps); + TEST(hash_of_hash_maps); + TEST(hash_of_hash_malloc_maps); + TEST(hash_of_lru_hash_maps); + +#undef TEST + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index 1ed28882daf3..5d3dc4d66d47 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -299,7 +299,11 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx) #ifdef NO_UNROLL #pragma clang loop unroll(disable) #else +#ifdef UNROLL_COUNT +#pragma clang loop unroll_count(UNROLL_COUNT) +#else #pragma clang loop unroll(full) +#endif #endif /* NO_UNROLL */ /* Unwind python stack */ for (int i = 0; i < STACK_MAX_LEN; ++i) { diff --git a/tools/testing/selftests/bpf/progs/pyperf600.c b/tools/testing/selftests/bpf/progs/pyperf600.c index cb49b89e37cd..ce1aa5189cc4 100644 --- a/tools/testing/selftests/bpf/progs/pyperf600.c +++ b/tools/testing/selftests/bpf/progs/pyperf600.c @@ -1,9 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook #define STACK_MAX_LEN 600 -/* clang will not unroll the loop 600 times. - * Instead it will unroll it to the amount it deemed - * appropriate, but the loop will still execute 600 times. - * Total program size is around 90k insns +/* Full unroll of 600 iterations will have total + * program size close to 298k insns and this may + * cause BPF_JMP insn out of 16-bit integer range. + * So limit the unroll size to 150 so the + * total program size is around 80k insns but + * the loop will still execute 600 times. */ +#define UNROLL_COUNT 150 #include "pyperf.h" diff --git a/tools/testing/selftests/bpf/progs/skb_load_bytes.c b/tools/testing/selftests/bpf/progs/skb_load_bytes.c new file mode 100644 index 000000000000..e4252fd973be --- /dev/null +++ b/tools/testing/selftests/bpf/progs/skb_load_bytes.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +__u32 load_offset = 0; +int test_result = 0; + +SEC("tc") +int skb_process(struct __sk_buff *skb) +{ + char buf[16]; + + test_result = bpf_skb_load_bytes(skb, load_offset, buf, 10); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/strncmp_test.c b/tools/testing/selftests/bpf/progs/strncmp_test.c index 900d930d48a8..769668feed48 100644 --- a/tools/testing/selftests/bpf/progs/strncmp_test.c +++ b/tools/testing/selftests/bpf/progs/strncmp_test.c @@ -19,7 +19,7 @@ unsigned int no_const_str_size = STRNCMP_STR_SZ; char _license[] SEC("license") = "GPL"; -SEC("tp/syscalls/sys_enter_nanosleep") +SEC("?tp/syscalls/sys_enter_nanosleep") int do_strncmp(void *ctx) { if ((bpf_get_current_pid_tgid() >> 32) != target_pid) @@ -29,7 +29,7 @@ int do_strncmp(void *ctx) return 0; } -SEC("tp/syscalls/sys_enter_nanosleep") +SEC("?tp/syscalls/sys_enter_nanosleep") int strncmp_bad_not_const_str_size(void *ctx) { /* The value of string size is not const, so will fail */ @@ -37,7 +37,7 @@ int strncmp_bad_not_const_str_size(void *ctx) return 0; } -SEC("tp/syscalls/sys_enter_nanosleep") +SEC("?tp/syscalls/sys_enter_nanosleep") int strncmp_bad_writable_target(void *ctx) { /* Compared target is not read-only, so will fail */ @@ -45,7 +45,7 @@ int strncmp_bad_writable_target(void *ctx) return 0; } -SEC("tp/syscalls/sys_enter_nanosleep") +SEC("?tp/syscalls/sys_enter_nanosleep") int strncmp_bad_not_null_term_target(void *ctx) { /* Compared target is not null-terminated, so will fail */ diff --git a/tools/testing/selftests/bpf/progs/test_helper_restricted.c b/tools/testing/selftests/bpf/progs/test_helper_restricted.c index 68d64c365f90..20ef9d433b97 100644 --- a/tools/testing/selftests/bpf/progs/test_helper_restricted.c +++ b/tools/testing/selftests/bpf/progs/test_helper_restricted.c @@ -56,7 +56,7 @@ static void spin_lock_work(void) } } -SEC("raw_tp/sys_enter") +SEC("?raw_tp/sys_enter") int raw_tp_timer(void *ctx) { timer_work(); @@ -64,7 +64,7 @@ int raw_tp_timer(void *ctx) return 0; } -SEC("tp/syscalls/sys_enter_nanosleep") +SEC("?tp/syscalls/sys_enter_nanosleep") int tp_timer(void *ctx) { timer_work(); @@ -72,7 +72,7 @@ int tp_timer(void *ctx) return 0; } -SEC("kprobe/sys_nanosleep") +SEC("?kprobe/sys_nanosleep") int kprobe_timer(void *ctx) { timer_work(); @@ -80,7 +80,7 @@ int kprobe_timer(void *ctx) return 0; } -SEC("perf_event") +SEC("?perf_event") int perf_event_timer(void *ctx) { timer_work(); @@ -88,7 +88,7 @@ int perf_event_timer(void *ctx) return 0; } -SEC("raw_tp/sys_enter") +SEC("?raw_tp/sys_enter") int raw_tp_spin_lock(void *ctx) { spin_lock_work(); @@ -96,7 +96,7 @@ int raw_tp_spin_lock(void *ctx) return 0; } -SEC("tp/syscalls/sys_enter_nanosleep") +SEC("?tp/syscalls/sys_enter_nanosleep") int tp_spin_lock(void *ctx) { spin_lock_work(); @@ -104,7 +104,7 @@ int tp_spin_lock(void *ctx) return 0; } -SEC("kprobe/sys_nanosleep") +SEC("?kprobe/sys_nanosleep") int kprobe_spin_lock(void *ctx) { spin_lock_work(); @@ -112,7 +112,7 @@ int kprobe_spin_lock(void *ctx) return 0; } -SEC("perf_event") +SEC("?perf_event") int perf_event_spin_lock(void *ctx) { spin_lock_work(); diff --git a/tools/testing/selftests/bpf/progs/test_log_fixup.c b/tools/testing/selftests/bpf/progs/test_log_fixup.c new file mode 100644 index 000000000000..a78980d897b3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_log_fixup.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +struct task_struct___bad { + int pid; + int fake_field; + void *fake_field_subprog; +} __attribute__((preserve_access_index)); + +SEC("?raw_tp/sys_enter") +int bad_relo(const void *ctx) +{ + static struct task_struct___bad *t; + + return bpf_core_field_size(t->fake_field); +} + +static __noinline int bad_subprog(void) +{ + static struct task_struct___bad *t; + + /* ugliness below is a field offset relocation */ + return (void *)&t->fake_field_subprog - (void *)t; +} + +SEC("?raw_tp/sys_enter") +int bad_relo_subprog(const void *ctx) +{ + static struct task_struct___bad *t; + + return bad_subprog() + bpf_core_field_size(t->pid); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c index 40f161480a2f..b502e5c92e33 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c @@ -52,7 +52,7 @@ static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off, return result; } -SEC("tc") +SEC("?tc") int sk_lookup_success(struct __sk_buff *skb) { void *data_end = (void *)(long)skb->data_end; @@ -78,7 +78,7 @@ int sk_lookup_success(struct __sk_buff *skb) return sk ? TC_ACT_OK : TC_ACT_UNSPEC; } -SEC("tc") +SEC("?tc") int sk_lookup_success_simple(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -90,7 +90,7 @@ int sk_lookup_success_simple(struct __sk_buff *skb) return 0; } -SEC("tc") +SEC("?tc") int err_use_after_free(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -105,7 +105,7 @@ int err_use_after_free(struct __sk_buff *skb) return family; } -SEC("tc") +SEC("?tc") int err_modify_sk_pointer(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -120,7 +120,7 @@ int err_modify_sk_pointer(struct __sk_buff *skb) return 0; } -SEC("tc") +SEC("?tc") int err_modify_sk_or_null_pointer(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -134,7 +134,7 @@ int err_modify_sk_or_null_pointer(struct __sk_buff *skb) return 0; } -SEC("tc") +SEC("?tc") int err_no_release(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -143,7 +143,7 @@ int err_no_release(struct __sk_buff *skb) return 0; } -SEC("tc") +SEC("?tc") int err_release_twice(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -155,7 +155,7 @@ int err_release_twice(struct __sk_buff *skb) return 0; } -SEC("tc") +SEC("?tc") int err_release_unchecked(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -172,7 +172,7 @@ void lookup_no_release(struct __sk_buff *skb) bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); } -SEC("tc") +SEC("?tc") int err_no_release_subcall(struct __sk_buff *skb) { lookup_no_release(skb); diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c index 2ffa08198d1c..0861ea60dcdd 100644 --- a/tools/testing/selftests/bpf/test_cgroup_storage.c +++ b/tools/testing/selftests/bpf/test_cgroup_storage.c @@ -6,7 +6,6 @@ #include <stdlib.h> #include <sys/sysinfo.h> -#include "bpf_rlimit.h" #include "bpf_util.h" #include "cgroup_helpers.h" #include "testing_helpers.h" @@ -52,6 +51,9 @@ int main(int argc, char **argv) goto err; } + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + map_fd = bpf_map_create(BPF_MAP_TYPE_CGROUP_STORAGE, NULL, sizeof(key), sizeof(value), 0, NULL); if (map_fd < 0) { diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c index c299d3452695..7886265846a0 100644 --- a/tools/testing/selftests/bpf/test_dev_cgroup.c +++ b/tools/testing/selftests/bpf/test_dev_cgroup.c @@ -15,7 +15,6 @@ #include "cgroup_helpers.h" #include "testing_helpers.h" -#include "bpf_rlimit.h" #define DEV_CGROUP_PROG "./dev_cgroup.o" @@ -28,6 +27,9 @@ int main(int argc, char **argv) int prog_fd, cgroup_fd; __u32 prog_cnt; + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + if (bpf_prog_test_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE, &obj, &prog_fd)) { printf("Failed to load DEV_CGROUP program\n"); diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index aa294612e0a7..c028d621c744 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -26,7 +26,6 @@ #include <bpf/bpf.h> #include "bpf_util.h" -#include "bpf_rlimit.h" struct tlpm_node { struct tlpm_node *next; @@ -409,16 +408,13 @@ static void test_lpm_ipaddr(void) /* Test some lookups that should not match any entry */ inet_pton(AF_INET, "10.0.0.1", key_ipv4->data); - assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -ENOENT); inet_pton(AF_INET, "11.11.11.11", key_ipv4->data); - assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -ENOENT); inet_pton(AF_INET6, "2a00:ffff::", key_ipv6->data); - assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -ENOENT); close(map_fd_ipv4); close(map_fd_ipv6); @@ -475,18 +471,15 @@ static void test_lpm_delete(void) /* remove non-existent node */ key->prefixlen = 32; inet_pton(AF_INET, "10.0.0.1", key->data); - assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(map_fd, key, &value) == -ENOENT); key->prefixlen = 30; // unused prefix so far inet_pton(AF_INET, "192.255.0.0", key->data); - assert(bpf_map_delete_elem(map_fd, key) == -1 && - errno == ENOENT); + assert(bpf_map_delete_elem(map_fd, key) == -ENOENT); key->prefixlen = 16; // same prefix as the root node inet_pton(AF_INET, "192.255.0.0", key->data); - assert(bpf_map_delete_elem(map_fd, key) == -1 && - errno == ENOENT); + assert(bpf_map_delete_elem(map_fd, key) == -ENOENT); /* assert initial lookup */ key->prefixlen = 32; @@ -531,8 +524,7 @@ static void test_lpm_delete(void) key->prefixlen = 32; inet_pton(AF_INET, "192.168.128.1", key->data); - assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(map_fd, key, &value) == -ENOENT); close(map_fd); } @@ -553,8 +545,7 @@ static void test_lpm_get_next_key(void) assert(map_fd >= 0); /* empty tree. get_next_key should return ENOENT */ - assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -1 && - errno == ENOENT); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -ENOENT); /* get and verify the first key, get the second one should fail. */ key_p->prefixlen = 16; @@ -566,8 +557,7 @@ static void test_lpm_get_next_key(void) assert(key_p->prefixlen == 16 && key_p->data[0] == 192 && key_p->data[1] == 168); - assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && - errno == ENOENT); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT); /* no exact matching key should get the first one in post order. */ key_p->prefixlen = 8; @@ -591,8 +581,7 @@ static void test_lpm_get_next_key(void) next_key_p->data[1] == 168); memcpy(key_p, next_key_p, key_size); - assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && - errno == ENOENT); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT); /* Add one more element (total three) */ key_p->prefixlen = 24; @@ -615,8 +604,7 @@ static void test_lpm_get_next_key(void) next_key_p->data[1] == 168); memcpy(key_p, next_key_p, key_size); - assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && - errno == ENOENT); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT); /* Add one more element (total four) */ key_p->prefixlen = 24; @@ -644,8 +632,7 @@ static void test_lpm_get_next_key(void) next_key_p->data[1] == 168); memcpy(key_p, next_key_p, key_size); - assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && - errno == ENOENT); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT); /* Add one more element (total five) */ key_p->prefixlen = 28; @@ -679,8 +666,7 @@ static void test_lpm_get_next_key(void) next_key_p->data[1] == 168); memcpy(key_p, next_key_p, key_size); - assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && - errno == ENOENT); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT); /* no exact matching key should return the first one in post order */ key_p->prefixlen = 22; @@ -791,6 +777,9 @@ int main(void) /* we want predictable, pseudo random tests */ srand(0xf00ba1); + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + test_lpm_basic(); test_lpm_order(); diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c index 563bbe18c172..4d0650cfb5cd 100644 --- a/tools/testing/selftests/bpf/test_lru_map.c +++ b/tools/testing/selftests/bpf/test_lru_map.c @@ -18,7 +18,6 @@ #include <bpf/libbpf.h> #include "bpf_util.h" -#include "bpf_rlimit.h" #include "../../../include/linux/filter.h" #define LOCAL_FREE_TARGET (128) @@ -176,24 +175,20 @@ static void test_lru_sanity0(int map_type, int map_flags) BPF_NOEXIST)); /* BPF_NOEXIST means: add new element if it doesn't exist */ - assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1 - /* key=1 already exists */ - && errno == EEXIST); + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST); + /* key=1 already exists */ - assert(bpf_map_update_elem(lru_map_fd, &key, value, -1) == -1 && - errno == EINVAL); + assert(bpf_map_update_elem(lru_map_fd, &key, value, -1) == -EINVAL); /* insert key=2 element */ /* check that key=2 is not found */ key = 2; - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); /* BPF_EXIST means: update existing element */ - assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 && - /* key=2 is not there */ - errno == ENOENT); + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT); + /* key=2 is not there */ assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -201,8 +196,7 @@ static void test_lru_sanity0(int map_type, int map_flags) /* check that key=3 is not found */ key = 3; - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); /* check that key=1 can be found and mark the ref bit to * stop LRU from removing key=1 @@ -218,8 +212,7 @@ static void test_lru_sanity0(int map_type, int map_flags) /* key=2 has been removed from the LRU */ key = 2; - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); /* lookup elem key=1 and delete it, then check it doesn't exist */ key = 1; @@ -382,8 +375,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) end_key = 1 + batch_size; value[0] = 4321; for (key = 1; key < end_key; key++) { - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); @@ -563,8 +555,7 @@ static void do_test_lru_sanity5(unsigned long long last_key, int map_fd) assert(!bpf_map_lookup_elem_with_ref_bit(map_fd, key, value)); /* Cannot find the last key because it was removed by LRU */ - assert(bpf_map_lookup_elem(map_fd, &last_key, value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(map_fd, &last_key, value) == -ENOENT); } /* Test map with only one element */ @@ -712,21 +703,18 @@ static void test_lru_sanity7(int map_type, int map_flags) BPF_NOEXIST)); /* BPF_NOEXIST means: add new element if it doesn't exist */ - assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1 - /* key=1 already exists */ - && errno == EEXIST); + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST); + /* key=1 already exists */ /* insert key=2 element */ /* check that key=2 is not found */ key = 2; - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); /* BPF_EXIST means: update existing element */ - assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 && - /* key=2 is not there */ - errno == ENOENT); + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT); + /* key=2 is not there */ assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -734,8 +722,7 @@ static void test_lru_sanity7(int map_type, int map_flags) /* check that key=3 is not found */ key = 3; - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); /* check that key=1 can be found and mark the ref bit to * stop LRU from removing key=1 @@ -758,8 +745,7 @@ static void test_lru_sanity7(int map_type, int map_flags) /* key=2 has been removed from the LRU */ key = 2; - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); assert(map_equal(lru_map_fd, expected_map_fd)); @@ -806,21 +792,18 @@ static void test_lru_sanity8(int map_type, int map_flags) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); /* BPF_NOEXIST means: add new element if it doesn't exist */ - assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1 - /* key=1 already exists */ - && errno == EEXIST); + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST); + /* key=1 already exists */ /* insert key=2 element */ /* check that key=2 is not found */ key = 2; - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); /* BPF_EXIST means: update existing element */ - assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 && - /* key=2 is not there */ - errno == ENOENT); + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT); + /* key=2 is not there */ assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, @@ -830,8 +813,7 @@ static void test_lru_sanity8(int map_type, int map_flags) /* check that key=3 is not found */ key = 3; - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); /* check that key=1 can be found and do _not_ mark ref bit. * this will be evicted on next update. @@ -854,8 +836,7 @@ static void test_lru_sanity8(int map_type, int map_flags) /* key=1 has been removed from the LRU */ key = 1; - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && - errno == ENOENT); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); assert(map_equal(lru_map_fd, expected_map_fd)); @@ -878,6 +859,9 @@ int main(int argc, char **argv) assert(nr_cpus != -1); printf("nr_cpus:%d\n\n", nr_cpus); + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + for (f = 0; f < ARRAY_SIZE(map_flags); f++) { unsigned int tgt_free = (map_flags[f] & BPF_F_NO_COMMON_LRU) ? PERCPU_FREE_TARGET : LOCAL_FREE_TARGET; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 0a4b45d7b515..c536d1d29d57 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -3,6 +3,7 @@ */ #define _GNU_SOURCE #include "test_progs.h" +#include "testing_helpers.h" #include "cgroup_helpers.h" #include <argp.h> #include <pthread.h> @@ -50,19 +51,8 @@ struct prog_test_def { int test_num; void (*run_test)(void); void (*run_serial_test)(void); - bool force_log; - int error_cnt; - int skip_cnt; - int sub_succ_cnt; bool should_run; - bool tested; bool need_cgroup_cleanup; - - char *subtest_name; - int subtest_num; - - /* store counts before subtest started */ - int old_error_cnt; }; /* Override C runtime library's usleep() implementation to ensure nanosleep() @@ -84,12 +74,13 @@ static bool should_run(struct test_selector *sel, int num, const char *name) int i; for (i = 0; i < sel->blacklist.cnt; i++) { - if (glob_match(name, sel->blacklist.strs[i])) + if (glob_match(name, sel->blacklist.tests[i].name) && + !sel->blacklist.tests[i].subtest_cnt) return false; } for (i = 0; i < sel->whitelist.cnt; i++) { - if (glob_match(name, sel->whitelist.strs[i])) + if (glob_match(name, sel->whitelist.tests[i].name)) return true; } @@ -99,32 +90,69 @@ static bool should_run(struct test_selector *sel, int num, const char *name) return num < sel->num_set_len && sel->num_set[num]; } -static void dump_test_log(const struct prog_test_def *test, bool failed) +static bool should_run_subtest(struct test_selector *sel, + struct test_selector *subtest_sel, + int subtest_num, + const char *test_name, + const char *subtest_name) { - if (stdout == env.stdout) - return; + int i, j; - /* worker always holds log */ - if (env.worker_id != -1) - return; + for (i = 0; i < sel->blacklist.cnt; i++) { + if (glob_match(test_name, sel->blacklist.tests[i].name)) { + if (!sel->blacklist.tests[i].subtest_cnt) + return false; + + for (j = 0; j < sel->blacklist.tests[i].subtest_cnt; j++) { + if (glob_match(subtest_name, + sel->blacklist.tests[i].subtests[j])) + return false; + } + } + } - fflush(stdout); /* exports env.log_buf & env.log_cnt */ + for (i = 0; i < sel->whitelist.cnt; i++) { + if (glob_match(test_name, sel->whitelist.tests[i].name)) { + if (!sel->whitelist.tests[i].subtest_cnt) + return true; - if (env.verbosity > VERBOSE_NONE || test->force_log || failed) { - if (env.log_cnt) { - env.log_buf[env.log_cnt] = '\0'; - fprintf(env.stdout, "%s", env.log_buf); - if (env.log_buf[env.log_cnt - 1] != '\n') - fprintf(env.stdout, "\n"); + for (j = 0; j < sel->whitelist.tests[i].subtest_cnt; j++) { + if (glob_match(subtest_name, + sel->whitelist.tests[i].subtests[j])) + return true; + } } } + + if (!sel->whitelist.cnt && !subtest_sel->num_set) + return true; + + return subtest_num < subtest_sel->num_set_len && subtest_sel->num_set[subtest_num]; } -static void skip_account(void) +static void dump_test_log(const struct prog_test_def *test, + const struct test_state *test_state, + bool force_failed) { - if (env.test->skip_cnt) { - env.skip_cnt++; - env.test->skip_cnt = 0; + bool failed = test_state->error_cnt > 0 || force_failed; + + /* worker always holds log */ + if (env.worker_id != -1) + return; + + fflush(stdout); /* exports test_state->log_buf & test_state->log_cnt */ + + fprintf(env.stdout, "#%-3d %s:%s\n", + test->test_num, test->test_name, + failed ? "FAIL" : (test_state->skip_cnt ? "SKIP" : "OK")); + + if (env.verbosity > VERBOSE_NONE || test_state->force_log || failed) { + if (test_state->log_cnt) { + test_state->log_buf[test_state->log_cnt] = '\0'; + fprintf(env.stdout, "%s", test_state->log_buf); + if (test_state->log_buf[test_state->log_cnt - 1] != '\n') + fprintf(env.stdout, "\n"); + } } } @@ -135,7 +163,6 @@ static void stdio_restore(void); */ static void reset_affinity(void) { - cpu_set_t cpuset; int i, err; @@ -178,68 +205,78 @@ static void restore_netns(void) void test__end_subtest(void) { struct prog_test_def *test = env.test; - int sub_error_cnt = test->error_cnt - test->old_error_cnt; - - dump_test_log(test, sub_error_cnt); + struct test_state *state = env.test_state; + int sub_error_cnt = state->error_cnt - state->old_error_cnt; fprintf(stdout, "#%d/%d %s/%s:%s\n", - test->test_num, test->subtest_num, test->test_name, test->subtest_name, - sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK")); + test->test_num, state->subtest_num, test->test_name, state->subtest_name, + sub_error_cnt ? "FAIL" : (state->subtest_skip_cnt ? "SKIP" : "OK")); - if (sub_error_cnt) - test->error_cnt++; - else if (test->skip_cnt == 0) - test->sub_succ_cnt++; - skip_account(); + if (sub_error_cnt == 0) { + if (state->subtest_skip_cnt == 0) { + state->sub_succ_cnt++; + } else { + state->subtest_skip_cnt = 0; + state->skip_cnt++; + } + } - free(test->subtest_name); - test->subtest_name = NULL; + free(state->subtest_name); + state->subtest_name = NULL; } -bool test__start_subtest(const char *name) +bool test__start_subtest(const char *subtest_name) { struct prog_test_def *test = env.test; + struct test_state *state = env.test_state; - if (test->subtest_name) + if (state->subtest_name) test__end_subtest(); - test->subtest_num++; + state->subtest_num++; - if (!name || !name[0]) { + if (!subtest_name || !subtest_name[0]) { fprintf(env.stderr, "Subtest #%d didn't provide sub-test name!\n", - test->subtest_num); + state->subtest_num); return false; } - if (!should_run(&env.subtest_selector, test->subtest_num, name)) + if (!should_run_subtest(&env.test_selector, + &env.subtest_selector, + state->subtest_num, + test->test_name, + subtest_name)) return false; - test->subtest_name = strdup(name); - if (!test->subtest_name) { + state->subtest_name = strdup(subtest_name); + if (!state->subtest_name) { fprintf(env.stderr, "Subtest #%d: failed to copy subtest name!\n", - test->subtest_num); + state->subtest_num); return false; } - env.test->old_error_cnt = env.test->error_cnt; + state->old_error_cnt = state->error_cnt; return true; } void test__force_log(void) { - env.test->force_log = true; + env.test_state->force_log = true; } void test__skip(void) { - env.test->skip_cnt++; + if (env.test_state->subtest_name) + env.test_state->subtest_skip_cnt++; + else + env.test_state->skip_cnt++; } void test__fail(void) { - env.test->error_cnt++; + env.test_state->error_cnt++; } int test__join_cgroup(const char *path) @@ -472,8 +509,11 @@ static struct prog_test_def prog_test_defs[] = { #include <prog_tests/tests.h> #undef DEFINE_TEST }; + static const int prog_test_cnt = ARRAY_SIZE(prog_test_defs); +static struct test_state test_states[ARRAY_SIZE(prog_test_defs)]; + const char *argp_program_version = "test_progs 0.1"; const char *argp_program_bug_address = "<bpf@vger.kernel.org>"; static const char argp_program_doc[] = "BPF selftests test runner"; @@ -527,63 +567,29 @@ static int libbpf_print_fn(enum libbpf_print_level level, return 0; } -static void free_str_set(const struct str_set *set) +static void free_test_filter_set(const struct test_filter_set *set) { - int i; + int i, j; if (!set) return; - for (i = 0; i < set->cnt; i++) - free((void *)set->strs[i]); - free(set->strs); -} - -static int parse_str_list(const char *s, struct str_set *set, bool is_glob_pattern) -{ - char *input, *state = NULL, *next, **tmp, **strs = NULL; - int i, cnt = 0; + for (i = 0; i < set->cnt; i++) { + free((void *)set->tests[i].name); + for (j = 0; j < set->tests[i].subtest_cnt; j++) + free((void *)set->tests[i].subtests[j]); - input = strdup(s); - if (!input) - return -ENOMEM; - - while ((next = strtok_r(state ? NULL : input, ",", &state))) { - tmp = realloc(strs, sizeof(*strs) * (cnt + 1)); - if (!tmp) - goto err; - strs = tmp; - - if (is_glob_pattern) { - strs[cnt] = strdup(next); - if (!strs[cnt]) - goto err; - } else { - strs[cnt] = malloc(strlen(next) + 2 + 1); - if (!strs[cnt]) - goto err; - sprintf(strs[cnt], "*%s*", next); - } - - cnt++; + free((void *)set->tests[i].subtests); } - tmp = realloc(set->strs, sizeof(*strs) * (cnt + set->cnt)); - if (!tmp) - goto err; - memcpy(tmp + set->cnt, strs, sizeof(*strs) * cnt); - set->strs = (const char **)tmp; - set->cnt += cnt; + free((void *)set->tests); +} - free(input); - free(strs); - return 0; -err: - for (i = 0; i < cnt; i++) - free(strs[i]); - free(strs); - free(input); - return -ENOMEM; +static void free_test_selector(struct test_selector *test_selector) +{ + free_test_filter_set(&test_selector->blacklist); + free_test_filter_set(&test_selector->whitelist); + free(test_selector->num_set); } extern int extra_prog_load_log_flags; @@ -615,33 +621,17 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) } case ARG_TEST_NAME_GLOB_ALLOWLIST: case ARG_TEST_NAME: { - char *subtest_str = strchr(arg, '/'); - - if (subtest_str) { - *subtest_str = '\0'; - if (parse_str_list(subtest_str + 1, - &env->subtest_selector.whitelist, - key == ARG_TEST_NAME_GLOB_ALLOWLIST)) - return -ENOMEM; - } - if (parse_str_list(arg, &env->test_selector.whitelist, - key == ARG_TEST_NAME_GLOB_ALLOWLIST)) + if (parse_test_list(arg, + &env->test_selector.whitelist, + key == ARG_TEST_NAME_GLOB_ALLOWLIST)) return -ENOMEM; break; } case ARG_TEST_NAME_GLOB_DENYLIST: case ARG_TEST_NAME_BLACKLIST: { - char *subtest_str = strchr(arg, '/'); - - if (subtest_str) { - *subtest_str = '\0'; - if (parse_str_list(subtest_str + 1, - &env->subtest_selector.blacklist, - key == ARG_TEST_NAME_GLOB_DENYLIST)) - return -ENOMEM; - } - if (parse_str_list(arg, &env->test_selector.blacklist, - key == ARG_TEST_NAME_GLOB_DENYLIST)) + if (parse_test_list(arg, + &env->test_selector.blacklist, + key == ARG_TEST_NAME_GLOB_DENYLIST)) return -ENOMEM; break; } @@ -706,7 +696,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) return 0; } -static void stdio_hijack(void) +static void stdio_hijack(char **log_buf, size_t *log_cnt) { #ifdef __GLIBC__ env.stdout = stdout; @@ -720,7 +710,7 @@ static void stdio_hijack(void) /* stdout and stderr -> buffer */ fflush(stdout); - stdout = open_memstream(&env.log_buf, &env.log_cnt); + stdout = open_memstream(log_buf, log_cnt); if (!stdout) { stdout = env.stdout; perror("open_memstream"); @@ -823,7 +813,7 @@ void crash_handler(int signum) sz = backtrace(bt, ARRAY_SIZE(bt)); if (env.test) - dump_test_log(env.test, true); + dump_test_log(env.test, env.test_state, true); if (env.stdout) stdio_restore(); if (env.worker_id != -1) @@ -845,17 +835,6 @@ static int current_test_idx; static pthread_mutex_t current_test_lock; static pthread_mutex_t stdout_output_lock; -struct test_result { - int error_cnt; - int skip_cnt; - int sub_succ_cnt; - - size_t log_cnt; - char *log_buf; -}; - -static struct test_result test_results[ARRAY_SIZE(prog_test_defs)]; - static inline const char *str_msg(const struct msg *msg, char *buf) { switch (msg->type) { @@ -909,8 +888,12 @@ static int recv_message(int sock, struct msg *msg) static void run_one_test(int test_num) { struct prog_test_def *test = &prog_test_defs[test_num]; + struct test_state *state = &test_states[test_num]; env.test = test; + env.test_state = state; + + stdio_hijack(&state->log_buf, &state->log_cnt); if (test->run_test) test->run_test(); @@ -918,17 +901,19 @@ static void run_one_test(int test_num) test->run_serial_test(); /* ensure last sub-test is finalized properly */ - if (test->subtest_name) + if (state->subtest_name) test__end_subtest(); - test->tested = true; + state->tested = true; - dump_test_log(test, test->error_cnt); + dump_test_log(test, state, false); reset_affinity(); restore_netns(); if (test->need_cgroup_cleanup) cleanup_cgroup_environment(); + + stdio_restore(); } struct dispatch_data { @@ -947,7 +932,7 @@ static void *dispatch_thread(void *ctx) while (true) { int test_to_run = -1; struct prog_test_def *test; - struct test_result *result; + struct test_state *state; /* grab a test */ { @@ -994,16 +979,15 @@ static void *dispatch_thread(void *ctx) if (test_to_run != msg_test_done.test_done.test_num) goto error; - test->tested = true; - result = &test_results[test_to_run]; - - result->error_cnt = msg_test_done.test_done.error_cnt; - result->skip_cnt = msg_test_done.test_done.skip_cnt; - result->sub_succ_cnt = msg_test_done.test_done.sub_succ_cnt; + state = &test_states[test_to_run]; + state->tested = true; + state->error_cnt = msg_test_done.test_done.error_cnt; + state->skip_cnt = msg_test_done.test_done.skip_cnt; + state->sub_succ_cnt = msg_test_done.test_done.sub_succ_cnt; /* collect all logs */ if (msg_test_done.test_done.have_log) { - log_fp = open_memstream(&result->log_buf, &result->log_cnt); + log_fp = open_memstream(&state->log_buf, &state->log_cnt); if (!log_fp) goto error; @@ -1022,25 +1006,11 @@ static void *dispatch_thread(void *ctx) fclose(log_fp); log_fp = NULL; } - /* output log */ - { - pthread_mutex_lock(&stdout_output_lock); - - if (result->log_cnt) { - result->log_buf[result->log_cnt] = '\0'; - fprintf(stdout, "%s", result->log_buf); - if (result->log_buf[result->log_cnt - 1] != '\n') - fprintf(stdout, "\n"); - } - - fprintf(stdout, "#%d %s:%s\n", - test->test_num, test->test_name, - result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK")); - - pthread_mutex_unlock(&stdout_output_lock); - } - } /* wait for test done */ + + pthread_mutex_lock(&stdout_output_lock); + dump_test_log(test, state, false); + pthread_mutex_unlock(&stdout_output_lock); } /* while (true) */ error: if (env.debug) @@ -1062,38 +1032,50 @@ done: return NULL; } -static void print_all_error_logs(void) +static void calculate_summary_and_print_errors(struct test_env *env) { int i; + int succ_cnt = 0, fail_cnt = 0, sub_succ_cnt = 0, skip_cnt = 0; + + for (i = 0; i < prog_test_cnt; i++) { + struct test_state *state = &test_states[i]; + + if (!state->tested) + continue; + + sub_succ_cnt += state->sub_succ_cnt; + skip_cnt += state->skip_cnt; + + if (state->error_cnt) + fail_cnt++; + else + succ_cnt++; + } - if (env.fail_cnt) - fprintf(stdout, "\nAll error logs:\n"); + if (fail_cnt) + printf("\nAll error logs:\n"); /* print error logs again */ for (i = 0; i < prog_test_cnt; i++) { - struct prog_test_def *test; - struct test_result *result; - - test = &prog_test_defs[i]; - result = &test_results[i]; + struct prog_test_def *test = &prog_test_defs[i]; + struct test_state *state = &test_states[i]; - if (!test->tested || !result->error_cnt) + if (!state->tested || !state->error_cnt) continue; - fprintf(stdout, "\n#%d %s:%s\n", - test->test_num, test->test_name, - result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK")); - - if (result->log_cnt) { - result->log_buf[result->log_cnt] = '\0'; - fprintf(stdout, "%s", result->log_buf); - if (result->log_buf[result->log_cnt - 1] != '\n') - fprintf(stdout, "\n"); - } + dump_test_log(test, state, true); } + + printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", + succ_cnt, sub_succ_cnt, skip_cnt, fail_cnt); + + env->succ_cnt = succ_cnt; + env->sub_succ_cnt = sub_succ_cnt; + env->fail_cnt = fail_cnt; + env->skip_cnt = skip_cnt; } -static int server_main(void) +static void server_main(void) { pthread_t *dispatcher_threads; struct dispatch_data *data; @@ -1149,60 +1131,18 @@ static int server_main(void) for (int i = 0; i < prog_test_cnt; i++) { struct prog_test_def *test = &prog_test_defs[i]; - struct test_result *result = &test_results[i]; if (!test->should_run || !test->run_serial_test) continue; - stdio_hijack(); - run_one_test(i); - - stdio_restore(); - if (env.log_buf) { - result->log_cnt = env.log_cnt; - result->log_buf = strdup(env.log_buf); - - free(env.log_buf); - env.log_buf = NULL; - env.log_cnt = 0; - } - restore_netns(); - - fprintf(stdout, "#%d %s:%s\n", - test->test_num, test->test_name, - test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK")); - - result->error_cnt = test->error_cnt; - result->skip_cnt = test->skip_cnt; - result->sub_succ_cnt = test->sub_succ_cnt; } /* generate summary */ fflush(stderr); fflush(stdout); - for (i = 0; i < prog_test_cnt; i++) { - struct prog_test_def *current_test; - struct test_result *result; - - current_test = &prog_test_defs[i]; - result = &test_results[i]; - - if (!current_test->tested) - continue; - - env.succ_cnt += result->error_cnt ? 0 : 1; - env.skip_cnt += result->skip_cnt; - if (result->error_cnt) - env.fail_cnt++; - env.sub_succ_cnt += result->sub_succ_cnt; - } - - print_all_error_logs(); - - fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", - env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); + calculate_summary_and_print_errors(&env); /* reap all workers */ for (i = 0; i < env.workers; i++) { @@ -1212,8 +1152,6 @@ static int server_main(void) if (pid != env.worker_pids[i]) perror("Unable to reap worker"); } - - return 0; } static int worker_main(int sock) @@ -1234,35 +1172,29 @@ static int worker_main(int sock) env.worker_id); goto out; case MSG_DO_TEST: { - int test_to_run; - struct prog_test_def *test; + int test_to_run = msg.do_test.test_num; + struct prog_test_def *test = &prog_test_defs[test_to_run]; + struct test_state *state = &test_states[test_to_run]; struct msg msg_done; - test_to_run = msg.do_test.test_num; - test = &prog_test_defs[test_to_run]; - if (env.debug) fprintf(stderr, "[%d]: #%d:%s running.\n", env.worker_id, test_to_run + 1, test->test_name); - stdio_hijack(); - run_one_test(test_to_run); - stdio_restore(); - memset(&msg_done, 0, sizeof(msg_done)); msg_done.type = MSG_TEST_DONE; msg_done.test_done.test_num = test_to_run; - msg_done.test_done.error_cnt = test->error_cnt; - msg_done.test_done.skip_cnt = test->skip_cnt; - msg_done.test_done.sub_succ_cnt = test->sub_succ_cnt; + msg_done.test_done.error_cnt = state->error_cnt; + msg_done.test_done.skip_cnt = state->skip_cnt; + msg_done.test_done.sub_succ_cnt = state->sub_succ_cnt; msg_done.test_done.have_log = false; - if (env.verbosity > VERBOSE_NONE || test->force_log || test->error_cnt) { - if (env.log_cnt) + if (env.verbosity > VERBOSE_NONE || state->force_log || state->error_cnt) { + if (state->log_cnt) msg_done.test_done.have_log = true; } if (send_message(sock, &msg_done) < 0) { @@ -1275,8 +1207,8 @@ static int worker_main(int sock) char *src; size_t slen; - src = env.log_buf; - slen = env.log_cnt; + src = state->log_buf; + slen = state->log_cnt; while (slen) { struct msg msg_log; char *dest; @@ -1296,10 +1228,10 @@ static int worker_main(int sock) assert(send_message(sock, &msg_log) >= 0); } } - if (env.log_buf) { - free(env.log_buf); - env.log_buf = NULL; - env.log_cnt = 0; + if (state->log_buf) { + free(state->log_buf); + state->log_buf = NULL; + state->log_cnt = 0; } if (env.debug) fprintf(stderr, "[%d]: #%d:%s done.\n", @@ -1430,7 +1362,6 @@ int main(int argc, char **argv) for (i = 0; i < prog_test_cnt; i++) { struct prog_test_def *test = &prog_test_defs[i]; - struct test_result *result; if (!test->should_run) continue; @@ -1446,34 +1377,7 @@ int main(int argc, char **argv) continue; } - stdio_hijack(); - run_one_test(i); - - stdio_restore(); - - fprintf(env.stdout, "#%d %s:%s\n", - test->test_num, test->test_name, - test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK")); - - result = &test_results[i]; - result->error_cnt = test->error_cnt; - if (env.log_buf) { - result->log_buf = strdup(env.log_buf); - result->log_cnt = env.log_cnt; - - free(env.log_buf); - env.log_buf = NULL; - env.log_cnt = 0; - } - - if (test->error_cnt) - env.fail_cnt++; - else - env.succ_cnt++; - - skip_account(); - env.sub_succ_cnt += test->sub_succ_cnt; } if (env.get_test_cnt) { @@ -1484,21 +1388,14 @@ int main(int argc, char **argv) if (env.list_test_names) goto out; - print_all_error_logs(); - - fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", - env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); + calculate_summary_and_print_errors(&env); close(env.saved_netns_fd); out: if (!env.list_test_names && env.has_testmod) unload_bpf_testmod(); - free_str_set(&env.test_selector.blacklist); - free_str_set(&env.test_selector.whitelist); - free(env.test_selector.num_set); - free_str_set(&env.subtest_selector.blacklist); - free_str_set(&env.subtest_selector.whitelist); - free(env.subtest_selector.num_set); + + free_test_selector(&env.test_selector); if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0) return EXIT_NO_TEST; diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index eec4c7385b14..d3fee3b98888 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -25,6 +25,7 @@ typedef __u16 __sum16; #include <sys/wait.h> #include <sys/types.h> #include <sys/time.h> +#include <sys/param.h> #include <fcntl.h> #include <pthread.h> #include <linux/bpf.h> @@ -37,7 +38,6 @@ typedef __u16 __sum16; #include <bpf/bpf_endian.h> #include "trace_helpers.h" #include "testing_helpers.h" -#include "flow_dissector_load.h" enum verbosity { VERBOSE_NONE, @@ -46,18 +46,43 @@ enum verbosity { VERBOSE_SUPER, }; -struct str_set { - const char **strs; +struct test_filter { + char *name; + char **subtests; + int subtest_cnt; +}; + +struct test_filter_set { + struct test_filter *tests; int cnt; }; struct test_selector { - struct str_set whitelist; - struct str_set blacklist; + struct test_filter_set whitelist; + struct test_filter_set blacklist; bool *num_set; int num_set_len; }; +struct test_state { + bool tested; + bool force_log; + + int error_cnt; + int skip_cnt; + int subtest_skip_cnt; + int sub_succ_cnt; + + char *subtest_name; + int subtest_num; + + /* store counts before subtest started */ + int old_error_cnt; + + size_t log_cnt; + char *log_buf; +}; + struct test_env { struct test_selector test_selector; struct test_selector subtest_selector; @@ -70,12 +95,11 @@ struct test_env { bool get_test_cnt; bool list_test_names; - struct prog_test_def *test; /* current running tests */ + struct prog_test_def *test; /* current running test */ + struct test_state *test_state; /* current running test result */ FILE *stdout; FILE *stderr; - char *log_buf; - size_t log_cnt; int nr_cpus; int succ_cnt; /* successful tests */ @@ -120,11 +144,12 @@ struct msg { extern struct test_env env; -extern void test__force_log(); -extern bool test__start_subtest(const char *name); -extern void test__skip(void); -extern void test__fail(void); -extern int test__join_cgroup(const char *path); +void test__force_log(void); +bool test__start_subtest(const char *name); +void test__end_subtest(void); +void test__skip(void); +void test__fail(void); +int test__join_cgroup(const char *path); #define PRINT_FAIL(format...) \ ({ \ @@ -267,6 +292,17 @@ extern int test__join_cgroup(const char *path); ___ok; \ }) +#define ASSERT_HAS_SUBSTR(str, substr, name) ({ \ + static int duration = 0; \ + const char *___str = str; \ + const char *___substr = substr; \ + bool ___ok = strstr(___str, ___substr) != NULL; \ + CHECK(!___ok, (name), \ + "unexpected %s: '%s' is not a substring of '%s'\n", \ + (name), ___substr, ___str); \ + ___ok; \ +}) + #define ASSERT_OK(res, name) ({ \ static int duration = 0; \ long long ___res = (res); \ diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c index 4a64306728ab..3256de30f563 100644 --- a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c +++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c @@ -15,7 +15,6 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> -#include "bpf_rlimit.h" #include "cgroup_helpers.h" #define CGROUP_PATH "/skb_cgroup_test" @@ -160,6 +159,9 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + cgfd = cgroup_setup_and_join(CGROUP_PATH); if (cgfd < 0) goto err; diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c index fe10f8134278..810c3740b2cc 100644 --- a/tools/testing/selftests/bpf/test_sock.c +++ b/tools/testing/selftests/bpf/test_sock.c @@ -14,7 +14,6 @@ #include "cgroup_helpers.h" #include <bpf/bpf_endian.h> -#include "bpf_rlimit.h" #include "bpf_util.h" #define CG_PATH "/foo" @@ -493,7 +492,7 @@ static int run_test_case(int cgfd, const struct sock_test *test) goto err; } - if (attach_sock_prog(cgfd, progfd, test->attach_type) == -1) { + if (attach_sock_prog(cgfd, progfd, test->attach_type) < 0) { if (test->result == ATTACH_REJECT) goto out; else @@ -541,6 +540,9 @@ int main(int argc, char **argv) if (cgfd < 0) goto err; + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + if (run_tests(cgfd)) goto err; diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index f3d5d7ac6505..458564fcfc82 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -19,7 +19,6 @@ #include <bpf/libbpf.h> #include "cgroup_helpers.h" -#include "bpf_rlimit.h" #include "bpf_util.h" #ifndef ENOTSUPP @@ -1418,6 +1417,9 @@ int main(int argc, char **argv) if (cgfd < 0) goto err; + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + if (run_tests(cgfd)) goto err; diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index dfb4f5c0fcb9..0fbaccdc8861 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -18,7 +18,6 @@ #include <sched.h> #include <sys/time.h> -#include <sys/resource.h> #include <sys/types.h> #include <sys/sendfile.h> @@ -37,7 +36,6 @@ #include <bpf/libbpf.h> #include "bpf_util.h" -#include "bpf_rlimit.h" #include "cgroup_helpers.h" int running; @@ -2017,6 +2015,9 @@ int main(int argc, char **argv) cg_created = 1; } + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + if (test == SELFTESTS) { err = test_selftest(cg_fd, &options); goto out; diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c index 4f6cf833b522..57620e7c9048 100644 --- a/tools/testing/selftests/bpf/test_sysctl.c +++ b/tools/testing/selftests/bpf/test_sysctl.c @@ -14,7 +14,6 @@ #include <bpf/libbpf.h> #include <bpf/bpf_endian.h> -#include "bpf_rlimit.h" #include "bpf_util.h" #include "cgroup_helpers.h" #include "testing_helpers.h" @@ -1561,7 +1560,7 @@ static int run_test_case(int cgfd, struct sysctl_test *test) goto err; } - if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) == -1) { + if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) < 0) { if (test->result == ATTACH_REJECT) goto out; else @@ -1618,6 +1617,9 @@ int main(int argc, char **argv) if (cgfd < 0) goto err; + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + if (run_tests(cgfd)) goto err; diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c index 0851c42ee31c..5546b05a0486 100644 --- a/tools/testing/selftests/bpf/test_tag.c +++ b/tools/testing/selftests/bpf/test_tag.c @@ -20,7 +20,6 @@ #include <bpf/bpf.h> #include "../../../include/linux/filter.h" -#include "bpf_rlimit.h" #include "testing_helpers.h" static struct bpf_insn prog[BPF_MAXINSNS]; @@ -189,6 +188,9 @@ int main(void) uint32_t tests = 0; int i, fd_map; + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + fd_map = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int), sizeof(int), 1, &opts); assert(fd_map > 0); diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c index e7775d3bbe08..5c8ef062f760 100644 --- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c +++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c @@ -15,7 +15,6 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> -#include "bpf_rlimit.h" #include "cgroup_helpers.h" static int start_server(const struct sockaddr *addr, socklen_t len, bool dual) @@ -235,6 +234,9 @@ int main(int argc, char **argv) exit(1); } + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + results = get_map_fd_by_prog_id(atoi(argv[1]), &xdp); if (results < 0) { log_err("Can't get map"); diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c index 4c5114765b23..8284db8b0f13 100644 --- a/tools/testing/selftests/bpf/test_tcpnotify_user.c +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -19,7 +19,6 @@ #include <linux/perf_event.h> #include <linux/err.h> -#include "bpf_rlimit.h" #include "bpf_util.h" #include "cgroup_helpers.h" diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index a2cd236c32eb..372579c9f45e 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -53,7 +53,7 @@ #define MAX_INSNS BPF_MAXINSNS #define MAX_TEST_INSNS 1000000 #define MAX_FIXUPS 8 -#define MAX_NR_MAPS 22 +#define MAX_NR_MAPS 23 #define MAX_TEST_RUNS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 @@ -101,6 +101,7 @@ struct bpf_test { int fixup_map_reuseport_array[MAX_FIXUPS]; int fixup_map_ringbuf[MAX_FIXUPS]; int fixup_map_timer[MAX_FIXUPS]; + int fixup_map_kptr[MAX_FIXUPS]; struct kfunc_btf_id_pair fixup_kfunc_btf_id[MAX_FIXUPS]; /* Expected verifier log output for result REJECT or VERBOSE_ACCEPT. * Can be a tab-separated sequence of expected strings. An empty string @@ -621,8 +622,15 @@ static int create_cgroup_storage(bool percpu) * struct timer { * struct bpf_timer t; * }; + * struct btf_ptr { + * struct prog_test_ref_kfunc __kptr *ptr; + * struct prog_test_ref_kfunc __kptr_ref *ptr; + * struct prog_test_member __kptr_ref *ptr; + * } */ -static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t"; +static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t" + "\0btf_ptr\0prog_test_ref_kfunc\0ptr\0kptr\0kptr_ref" + "\0prog_test_member"; static __u32 btf_raw_types[] = { /* int */ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ @@ -638,6 +646,22 @@ static __u32 btf_raw_types[] = { /* struct timer */ /* [5] */ BTF_TYPE_ENC(35, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16), BTF_MEMBER_ENC(41, 4, 0), /* struct bpf_timer t; */ + /* struct prog_test_ref_kfunc */ /* [6] */ + BTF_STRUCT_ENC(51, 0, 0), + BTF_STRUCT_ENC(89, 0, 0), /* [7] */ + /* type tag "kptr" */ + BTF_TYPE_TAG_ENC(75, 6), /* [8] */ + /* type tag "kptr_ref" */ + BTF_TYPE_TAG_ENC(80, 6), /* [9] */ + BTF_TYPE_TAG_ENC(80, 7), /* [10] */ + BTF_PTR_ENC(8), /* [11] */ + BTF_PTR_ENC(9), /* [12] */ + BTF_PTR_ENC(10), /* [13] */ + /* struct btf_ptr */ /* [14] */ + BTF_STRUCT_ENC(43, 3, 24), + BTF_MEMBER_ENC(71, 11, 0), /* struct prog_test_ref_kfunc __kptr *ptr; */ + BTF_MEMBER_ENC(71, 12, 64), /* struct prog_test_ref_kfunc __kptr_ref *ptr; */ + BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr_ref *ptr; */ }; static int load_btf(void) @@ -727,6 +751,25 @@ static int create_map_timer(void) return fd; } +static int create_map_kptr(void) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts, + .btf_key_type_id = 1, + .btf_value_type_id = 14, + ); + int fd, btf_fd; + + btf_fd = load_btf(); + if (btf_fd < 0) + return -1; + + opts.btf_fd = btf_fd; + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_map", 4, 24, 1, &opts); + if (fd < 0) + printf("Failed to create map with btf_id pointer\n"); + return fd; +} + static char bpf_vlog[UINT_MAX >> 8]; static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, @@ -754,6 +797,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, int *fixup_map_reuseport_array = test->fixup_map_reuseport_array; int *fixup_map_ringbuf = test->fixup_map_ringbuf; int *fixup_map_timer = test->fixup_map_timer; + int *fixup_map_kptr = test->fixup_map_kptr; struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id; if (test->fill_helper) { @@ -947,6 +991,13 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, fixup_map_timer++; } while (*fixup_map_timer); } + if (*fixup_map_kptr) { + map_fds[22] = create_map_kptr(); + do { + prog[*fixup_map_kptr].imm = map_fds[22]; + fixup_map_kptr++; + } while (*fixup_map_kptr); + } /* Patch in kfunc BTF IDs */ if (fixup_kfunc_btf_id->kfunc) { diff --git a/tools/testing/selftests/bpf/test_verifier_log.c b/tools/testing/selftests/bpf/test_verifier_log.c index 8d6918c3b4a2..70feda97cee5 100644 --- a/tools/testing/selftests/bpf/test_verifier_log.c +++ b/tools/testing/selftests/bpf/test_verifier_log.c @@ -11,8 +11,6 @@ #include <bpf/bpf.h> -#include "bpf_rlimit.h" - #define LOG_SIZE (1 << 20) #define err(str...) printf("ERROR: " str) @@ -141,6 +139,9 @@ int main(int argc, char **argv) memset(log, 1, LOG_SIZE); + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + /* Test incorrect attr */ printf("Test log_level 0...\n"); test_log_bad(log, LOG_SIZE, 0); diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 87867f7a78c3..9695318e8132 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -6,6 +6,7 @@ #include <errno.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> +#include "test_progs.h" #include "testing_helpers.h" int parse_num_list(const char *s, bool **num_set, int *num_set_len) @@ -69,6 +70,94 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len) return 0; } +int parse_test_list(const char *s, + struct test_filter_set *set, + bool is_glob_pattern) +{ + char *input, *state = NULL, *next; + struct test_filter *tmp, *tests = NULL; + int i, j, cnt = 0; + + input = strdup(s); + if (!input) + return -ENOMEM; + + while ((next = strtok_r(state ? NULL : input, ",", &state))) { + char *subtest_str = strchr(next, '/'); + char *pattern = NULL; + int glob_chars = 0; + + tmp = realloc(tests, sizeof(*tests) * (cnt + 1)); + if (!tmp) + goto err; + tests = tmp; + + tests[cnt].subtest_cnt = 0; + tests[cnt].subtests = NULL; + + if (is_glob_pattern) { + pattern = "%s"; + } else { + pattern = "*%s*"; + glob_chars = 2; + } + + if (subtest_str) { + char **tmp_subtests = NULL; + int subtest_cnt = tests[cnt].subtest_cnt; + + *subtest_str = '\0'; + subtest_str += 1; + tmp_subtests = realloc(tests[cnt].subtests, + sizeof(*tmp_subtests) * + (subtest_cnt + 1)); + if (!tmp_subtests) + goto err; + tests[cnt].subtests = tmp_subtests; + + tests[cnt].subtests[subtest_cnt] = + malloc(strlen(subtest_str) + glob_chars + 1); + if (!tests[cnt].subtests[subtest_cnt]) + goto err; + sprintf(tests[cnt].subtests[subtest_cnt], + pattern, + subtest_str); + + tests[cnt].subtest_cnt++; + } + + tests[cnt].name = malloc(strlen(next) + glob_chars + 1); + if (!tests[cnt].name) + goto err; + sprintf(tests[cnt].name, pattern, next); + + cnt++; + } + + tmp = realloc(set->tests, sizeof(*tests) * (cnt + set->cnt)); + if (!tmp) + goto err; + + memcpy(tmp + set->cnt, tests, sizeof(*tests) * cnt); + set->tests = tmp; + set->cnt += cnt; + + free(tests); + free(input); + return 0; + +err: + for (i = 0; i < cnt; i++) { + for (j = 0; j < tests[i].subtest_cnt; j++) + free(tests[i].subtests[j]); + + free(tests[i].name); + } + free(tests); + free(input); + return -ENOMEM; +} + __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info) { __u32 info_len = sizeof(*info); diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index f46ebc476ee8..6ec00bf79cb5 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -12,3 +12,11 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, const char *license, __u32 kern_version, char *log_buf, size_t log_buf_sz); + +/* + * below function is exported for testing in prog_test test + */ +struct test_filter_set; +int parse_test_list(const char *s, + struct test_filter_set *test_set, + bool is_glob_pattern); diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 2e03decb11b6..743ed34c1238 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -139,6 +139,26 @@ }, }, { + "calls: invalid kfunc call: don't match first member type when passed to release kfunc", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "kernel function bpf_kfunc_call_memb1_release args#0 expected pointer", + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_memb_acquire", 1 }, + { "bpf_kfunc_call_memb1_release", 5 }, + }, +}, +{ "calls: invalid kfunc call: PTR_TO_BTF_ID with negative offset", .insns = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c new file mode 100644 index 000000000000..9113834640e6 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/map_kptr.c @@ -0,0 +1,469 @@ +/* Common tests */ +{ + "map_kptr: BPF_ST imm != 0", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "BPF_ST imm must be 0 when storing to kptr at off=0", +}, +{ + "map_kptr: size != bpf_size_to_bytes(BPF_DW)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "kptr access size must be BPF_DW", +}, +{ + "map_kptr: map_value non-const var_off", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "kptr access cannot have variable offset", +}, +{ + "map_kptr: bpf_kptr_xchg non-const var_off", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "R1 doesn't have constant offset. kptr has to be at the constant offset", +}, +{ + "map_kptr: unaligned boundary load/store", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 7), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "kptr access misaligned expected=0 off=7", +}, +{ + "map_kptr: reject var_off != 0", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "variable untrusted_ptr_ access var_off=(0x0; 0x7) disallowed", +}, +/* Tests for unreferened PTR_TO_BTF_ID */ +{ + "map_kptr: unref: reject btf_struct_ids_match == false", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc expected=ptr_prog_test", +}, +{ + "map_kptr: unref: loaded pointer marked as untrusted", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "R0 invalid mem access 'untrusted_ptr_or_null_'", +}, +{ + "map_kptr: unref: correct in kernel type size", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 24), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "access beyond struct prog_test_ref_kfunc at off 24 size 8", +}, +{ + "map_kptr: unref: inherit PTR_UNTRUSTED on struct walk", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_this_cpu_ptr), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "R1 type=untrusted_ptr_ expected=percpu_ptr_", +}, +{ + "map_kptr: unref: no reference state created", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = ACCEPT, +}, +{ + "map_kptr: unref: bpf_kptr_xchg rejected", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "off=0 kptr isn't referenced kptr", +}, +{ + "map_kptr: unref: bpf_kfunc_call_test_kptr_get rejected", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "arg#0 no referenced kptr at map value offset=0", + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_test_kptr_get", 13 }, + } +}, +/* Tests for referenced PTR_TO_BTF_ID */ +{ + "map_kptr: ref: loaded pointer marked as untrusted", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_this_cpu_ptr), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_", +}, +{ + "map_kptr: ref: reject off != 0", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member", +}, +{ + "map_kptr: ref: reference state created and released on xchg", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "Unreleased reference id=5 alloc_insn=20", + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_test_acquire", 15 }, + } +}, +{ + "map_kptr: ref: reject STX", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "store to referenced kptr disallowed", +}, +{ + "map_kptr: ref: reject ST", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 8, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "store to referenced kptr disallowed", +}, +{ + "map_kptr: reject helper access to kptr", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_kptr = { 1 }, + .result = REJECT, + .errstr = "kptr cannot be accessed indirectly by helper", +}, diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c index fbd682520e47..57a83d763ec1 100644 --- a/tools/testing/selftests/bpf/verifier/ref_tracking.c +++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c @@ -796,7 +796,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "reference has not been acquired before", + .errstr = "R1 must be referenced when passed to release function", }, { /* !bpf_sk_fullsock(sk) is checked but !bpf_tcp_sock(sk) is not checked */ diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c index 86b24cad27a7..d11d0b28be41 100644 --- a/tools/testing/selftests/bpf/verifier/sock.c +++ b/tools/testing/selftests/bpf/verifier/sock.c @@ -417,7 +417,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "reference has not been acquired before", + .errstr = "R1 must be referenced when passed to release function", }, { "bpf_sk_release(bpf_sk_fullsock(skb->sk))", @@ -436,7 +436,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "reference has not been acquired before", + .errstr = "R1 must be referenced when passed to release function", }, { "bpf_sk_release(bpf_tcp_sock(skb->sk))", @@ -455,7 +455,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "reference has not been acquired before", + .errstr = "R1 must be referenced when passed to release function", }, { "sk_storage_get(map, skb->sk, NULL, 0): value == NULL", diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c index aaedbf4955c3..c03b3a75991f 100644 --- a/tools/testing/selftests/bpf/xdp_redirect_multi.c +++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c @@ -10,7 +10,6 @@ #include <net/if.h> #include <unistd.h> #include <libgen.h> -#include <sys/resource.h> #include <sys/ioctl.h> #include <sys/types.h> #include <sys/socket.h> diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c index c567856fd1bc..5b6f977870f8 100644 --- a/tools/testing/selftests/bpf/xdping.c +++ b/tools/testing/selftests/bpf/xdping.c @@ -12,7 +12,6 @@ #include <string.h> #include <unistd.h> #include <libgen.h> -#include <sys/resource.h> #include <net/if.h> #include <sys/types.h> #include <sys/socket.h> @@ -89,7 +88,6 @@ int main(int argc, char **argv) { __u32 mode_flags = XDP_FLAGS_DRV_MODE | XDP_FLAGS_SKB_MODE; struct addrinfo *a, hints = { .ai_family = AF_INET }; - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; __u16 count = XDPING_DEFAULT_COUNT; struct pinginfo pinginfo = { 0 }; const char *optstr = "c:I:NsS"; @@ -167,10 +165,8 @@ int main(int argc, char **argv) freeaddrinfo(a); } - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 5f8296d29e77..cfcb031323c5 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -90,7 +90,6 @@ #include <string.h> #include <stddef.h> #include <sys/mman.h> -#include <sys/resource.h> #include <sys/types.h> #include <sys/queue.h> #include <time.h> @@ -1448,14 +1447,13 @@ static void ifobject_delete(struct ifobject *ifobj) int main(int argc, char **argv) { - struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY }; struct pkt_stream *pkt_stream_default; struct ifobject *ifobj_tx, *ifobj_rx; struct test_spec test; u32 i, j; - if (setrlimit(RLIMIT_MEMLOCK, &_rlim)) - exit_with_error(errno); + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); ifobj_tx = ifobject_create(); if (!ifobj_tx) |