diff options
author | David S. Miller <davem@davemloft.net> | 2018-12-17 02:20:35 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-12-17 02:20:35 +0300 |
commit | ae6750e0a5ef3ec93b01008306183fd7a6d2721f (patch) | |
tree | 79795936c11ca42f4b59ca1f14c0ed3571e6c49e | |
parent | 0634d694b06fb522ec6bc8001b24c4b828897f69 (diff) | |
parent | 5d06a76d9e6d2d00eadba72548636ff0ff6fabde (diff) | |
download | linux-ae6750e0a5ef3ec93b01008306183fd7a6d2721f.tar.xz |
Merge branch 'mlxsw-spectrum_acl-Add-Bloom-filter-support'
Ido Schimmel says:
====================
mlxsw: spectrum_acl: Add Bloom filter support
Nir says:
Spectrum-2 uses Bloom filter to reduce the number of lookups in the
algorithmic TCAM (A-TCAM). HW performs multiple exact match lookups in a
given region using a key composed of { packet & mask, mask ID, region ID }.
The masks which are used in a region are called rule patterns or RP.
When such multiple masks are used, the A-TCAM region uses an eRP
(extended RP) table that describes which rule patterns are in use and
defines the order of the lookup. When eRP table is used in a region, one
way to reduce the number of the lookups is to consult a Bloom filter
before doing the lookup.
A Bloom filter is a space-efficient probabilistic data structure, on
which a query returns either "possibly in set" or "definitely not in
set". HW can skip a lookup if a query on the Bloom filter results a
"definitely not set" response. The mlxsw driver implements a "counting
filter" and when either a new entry is marked or the last entry is
removed it will update the HW. Update of this counting filter occurs
when rule is configured or deleted from a region.
Patch #1 adds PEABFE register which is used for setting Bloom filter
entries.
Patch #2 adds Bloom filter resources.
Patch #3 and patch #4 provide Bloom filter handling within mlxsw, by
adding initialization and logic for updating the Bloom bit vector in HW.
Patch #5 and patch #6 add required calls for Bloom filter update as part
of rule configuration flow.
Patch #7 handles transitions to and from eRP table. It uses a list to
keep A-TCAM rules in order to update rules in Bloom filter, in cases of
transitions from master mask based A-TCAM region to an eRP table based
region and vice versa.
Patch #8 removes a trick done on master RP index to a remaining RP,
since Bloom filter is updated on eRP transitions.
Finally, patch #9 activates Bloom filter mechanism in HW, by cancelling
the bypass that was configured before and the remaining three patches
are selftests that exercise the new code.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
8 files changed, 661 insertions, 9 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 1f77e97e2d7a..bbf45f10c208 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -20,7 +20,7 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_acl_tcam.o spectrum_acl_ctcam.o \ spectrum_acl_atcam.o spectrum_acl_erp.o \ spectrum1_acl_tcam.o spectrum2_acl_tcam.o \ - spectrum_acl.o \ + spectrum_acl_bloom_filter.o spectrum_acl.o \ spectrum_flower.o spectrum_cnt.o \ spectrum_fid.o spectrum_ipip.o \ spectrum_acl_flex_actions.o \ diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index ace3a897b3f0..9b48dffc9f63 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -2743,7 +2743,7 @@ mlxsw_reg_perpt_pack(char *payload, u8 erpt_bank, u8 erpt_index, mlxsw_reg_perpt_erpt_bank_set(payload, erpt_bank); mlxsw_reg_perpt_erpt_index_set(payload, erpt_index); mlxsw_reg_perpt_key_size_set(payload, key_size); - mlxsw_reg_perpt_bf_bypass_set(payload, true); + mlxsw_reg_perpt_bf_bypass_set(payload, false); mlxsw_reg_perpt_erp_id_set(payload, erp_id); mlxsw_reg_perpt_erpt_base_bank_set(payload, erpt_base_bank); mlxsw_reg_perpt_erpt_base_index_set(payload, erpt_base_index); @@ -3006,7 +3006,7 @@ static inline void mlxsw_reg_percr_pack(char *payload, u16 region_id) mlxsw_reg_percr_region_id_set(payload, region_id); mlxsw_reg_percr_atcam_ignore_prune_set(payload, false); mlxsw_reg_percr_ctcam_ignore_prune_set(payload, false); - mlxsw_reg_percr_bf_bypass_set(payload, true); + mlxsw_reg_percr_bf_bypass_set(payload, false); } /* PERERP - Policy-Engine Region eRP Register @@ -3095,6 +3095,72 @@ static inline void mlxsw_reg_pererp_pack(char *payload, u16 region_id, mlxsw_reg_pererp_master_rp_id_set(payload, master_rp_id); } +/* PEABFE - Policy-Engine Algorithmic Bloom Filter Entries Register + * ---------------------------------------------------------------- + * This register configures the Bloom filter entries. + */ +#define MLXSW_REG_PEABFE_ID 0x3022 +#define MLXSW_REG_PEABFE_BASE_LEN 0x10 +#define MLXSW_REG_PEABFE_BF_REC_LEN 0x4 +#define MLXSW_REG_PEABFE_BF_REC_MAX_COUNT 256 +#define MLXSW_REG_PEABFE_LEN (MLXSW_REG_PEABFE_BASE_LEN + \ + MLXSW_REG_PEABFE_BF_REC_LEN * \ + MLXSW_REG_PEABFE_BF_REC_MAX_COUNT) + +MLXSW_REG_DEFINE(peabfe, MLXSW_REG_PEABFE_ID, MLXSW_REG_PEABFE_LEN); + +/* reg_peabfe_size + * Number of BF entries to be updated. + * Range 1..256 + * Access: Op + */ +MLXSW_ITEM32(reg, peabfe, size, 0x00, 0, 9); + +/* reg_peabfe_bf_entry_state + * Bloom filter state + * 0 - Clear + * 1 - Set + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, peabfe, bf_entry_state, + MLXSW_REG_PEABFE_BASE_LEN, 31, 1, + MLXSW_REG_PEABFE_BF_REC_LEN, 0x00, false); + +/* reg_peabfe_bf_entry_bank + * Bloom filter bank ID + * Range 0..cap_max_erp_table_banks-1 + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, peabfe, bf_entry_bank, + MLXSW_REG_PEABFE_BASE_LEN, 24, 4, + MLXSW_REG_PEABFE_BF_REC_LEN, 0x00, false); + +/* reg_peabfe_bf_entry_index + * Bloom filter entry index + * Range 0..2^cap_max_bf_log-1 + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, peabfe, bf_entry_index, + MLXSW_REG_PEABFE_BASE_LEN, 0, 24, + MLXSW_REG_PEABFE_BF_REC_LEN, 0x00, false); + +static inline void mlxsw_reg_peabfe_pack(char *payload) +{ + MLXSW_REG_ZERO(peabfe, payload); +} + +static inline void mlxsw_reg_peabfe_rec_pack(char *payload, int rec_index, + u8 state, u8 bank, u32 bf_index) +{ + u8 num_rec = mlxsw_reg_peabfe_size_get(payload); + + if (rec_index >= num_rec) + mlxsw_reg_peabfe_size_set(payload, rec_index + 1); + mlxsw_reg_peabfe_bf_entry_state_set(payload, rec_index, state); + mlxsw_reg_peabfe_bf_entry_bank_set(payload, rec_index, bank); + mlxsw_reg_peabfe_bf_entry_index_set(payload, rec_index, bf_index); +} + /* IEDR - Infrastructure Entry Delete Register * ---------------------------------------------------- * This register is used for deleting entries from the entry tables. @@ -9608,6 +9674,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(pemrbt), MLXSW_REG(ptce2), MLXSW_REG(perpt), + MLXSW_REG(peabfe), MLXSW_REG(perar), MLXSW_REG(ptce3), MLXSW_REG(percr), diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h index 99b341539870..b8b3a01c2a9e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/resources.h +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -41,6 +41,7 @@ enum mlxsw_res_id { MLXSW_RES_ID_ACL_ERPT_ENTRIES_4KB, MLXSW_RES_ID_ACL_ERPT_ENTRIES_8KB, MLXSW_RES_ID_ACL_ERPT_ENTRIES_12KB, + MLXSW_RES_ID_ACL_MAX_BF_LOG, MLXSW_RES_ID_MAX_CPU_POLICERS, MLXSW_RES_ID_MAX_VRS, MLXSW_RES_ID_MAX_RIFS, @@ -93,6 +94,7 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_ACL_ERPT_ENTRIES_4KB] = 0x2951, [MLXSW_RES_ID_ACL_ERPT_ENTRIES_8KB] = 0x2952, [MLXSW_RES_ID_ACL_ERPT_ENTRIES_12KB] = 0x2953, + [MLXSW_RES_ID_ACL_MAX_BF_LOG] = 0x2960, [MLXSW_RES_ID_MAX_CPU_POLICERS] = 0x2A13, [MLXSW_RES_ID_MAX_VRS] = 0x2C01, [MLXSW_RES_ID_MAX_RIFS] = 0x2C02, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c index 75f11c2d7b97..80fb268d51a5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c @@ -323,6 +323,7 @@ mlxsw_sp_acl_atcam_region_init(struct mlxsw_sp *mlxsw_sp, aregion->region = region; aregion->atcam = atcam; mlxsw_sp_acl_atcam_region_type_init(aregion); + INIT_LIST_HEAD(&aregion->entries_list); err = rhashtable_init(&aregion->entries_ht, &mlxsw_sp_acl_atcam_entries_ht_params); @@ -356,6 +357,7 @@ void mlxsw_sp_acl_atcam_region_fini(struct mlxsw_sp_acl_atcam_region *aregion) mlxsw_sp_acl_erp_region_fini(aregion); aregion->ops->fini(aregion); rhashtable_destroy(&aregion->entries_ht); + WARN_ON(!list_empty(&aregion->entries_list)); } void mlxsw_sp_acl_atcam_chunk_init(struct mlxsw_sp_acl_atcam_region *aregion, @@ -499,6 +501,12 @@ __mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_erp_delta_value(delta, aentry->full_enc_key); mlxsw_sp_acl_erp_delta_clear(delta, aentry->ht_key.enc_key); + /* Add rule to the list of A-TCAM rules, assuming this + * rule is intended to A-TCAM. In case this rule does + * not fit into A-TCAM it will be removed from the list. + */ + list_add(&aentry->list, &aregion->entries_list); + /* We can't insert identical rules into the A-TCAM, so fail and * let the rule spill into C-TCAM */ @@ -508,6 +516,13 @@ __mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp, if (err) goto err_rhashtable_insert; + /* Bloom filter must be updated here, before inserting the rule into + * the A-TCAM. + */ + err = mlxsw_sp_acl_erp_bf_insert(mlxsw_sp, aregion, erp_mask, aentry); + if (err) + goto err_bf_insert; + err = mlxsw_sp_acl_atcam_region_entry_insert(mlxsw_sp, aregion, aentry, rulei); if (err) @@ -516,9 +531,12 @@ __mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp, return 0; err_rule_insert: + mlxsw_sp_acl_erp_bf_remove(mlxsw_sp, aregion, erp_mask, aentry); +err_bf_insert: rhashtable_remove_fast(&aregion->entries_ht, &aentry->ht_node, mlxsw_sp_acl_atcam_entries_ht_params); err_rhashtable_insert: + list_del(&aentry->list); mlxsw_sp_acl_erp_mask_put(aregion, erp_mask); return err; } @@ -529,8 +547,10 @@ __mlxsw_sp_acl_atcam_entry_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam_entry *aentry) { mlxsw_sp_acl_atcam_region_entry_remove(mlxsw_sp, aregion, aentry); + mlxsw_sp_acl_erp_bf_remove(mlxsw_sp, aregion, aentry->erp_mask, aentry); rhashtable_remove_fast(&aregion->entries_ht, &aentry->ht_node, mlxsw_sp_acl_atcam_entries_ht_params); + list_del(&aentry->list); mlxsw_sp_acl_erp_mask_put(aregion, aentry->erp_mask); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c new file mode 100644 index 000000000000..505b87846acc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include <linux/errno.h> +#include <linux/gfp.h> +#include <linux/kernel.h> +#include <linux/refcount.h> + +#include "spectrum.h" +#include "spectrum_acl_tcam.h" + +struct mlxsw_sp_acl_bf { + unsigned int bank_size; + refcount_t refcnt[0]; +}; + +/* Bloom filter uses a crc-16 hash over chunks of data which contain 4 key + * blocks, eRP ID and region ID. In Spectrum-2, region key is combined of up to + * 12 key blocks, so there can be up to 3 chunks in the Bloom filter key, + * depending on the actual number of key blocks used in the region. + * The layout of the Bloom filter key is as follows: + * + * +-------------------------+------------------------+------------------------+ + * | Chunk 2 Key blocks 11-8 | Chunk 1 Key blocks 7-4 | Chunk 0 Key blocks 3-0 | + * +-------------------------+------------------------+------------------------+ + */ +#define MLXSW_BLOOM_KEY_CHUNKS 3 +#define MLXSW_BLOOM_KEY_LEN 69 + +/* Each chunk size is 23 bytes. 18 bytes of it contain 4 key blocks, each is + * 36 bits, 2 bytes which hold eRP ID and region ID, and 3 bytes of zero + * padding. + * The layout of each chunk is as follows: + * + * +---------+----------------------+-----------------------------------+ + * | 3 bytes | 2 bytes | 18 bytes | + * +---------+-----------+----------+-----------------------------------+ + * | 183:158 | 157:148 | 147:144 | 143:0 | + * +---------+-----------+----------+-----------------------------------+ + * | 0 | region ID | eRP ID | 4 Key blocks (18 Bytes) | + * +---------+-----------+----------+-----------------------------------+ + */ +#define MLXSW_BLOOM_CHUNK_PAD_BYTES 3 +#define MLXSW_BLOOM_CHUNK_KEY_BYTES 18 +#define MLXSW_BLOOM_KEY_CHUNK_BYTES 23 + +/* The offset of the key block within a chunk is 5 bytes as it comes after + * 3 bytes of zero padding and 16 bits of region ID and eRP ID. + */ +#define MLXSW_BLOOM_CHUNK_KEY_OFFSET 5 + +/* Each chunk contains 4 key blocks. Chunk 2 uses key blocks 11-8, + * and we need to populate it with 4 key blocks copied from the entry encoded + * key. Since the encoded key contains a padding, key block 11 starts at offset + * 2. block 7 that is used in chunk 1 starts at offset 20 as 4 key blocks take + * 18 bytes. + * This array defines key offsets for easy access when copying key blocks from + * entry key to Bloom filter chunk. + */ +static const u8 chunk_key_offsets[MLXSW_BLOOM_KEY_CHUNKS] = {2, 20, 38}; + +/* This table is just the CRC of each possible byte. It is + * computed, Msbit first, for the Bloom filter polynomial + * which is 0x8529 (1 + x^3 + x^5 + x^8 + x^10 + x^15 and + * the implicit x^16). + */ +static const u16 mlxsw_sp_acl_bf_crc_tab[256] = { +0x0000, 0x8529, 0x8f7b, 0x0a52, 0x9bdf, 0x1ef6, 0x14a4, 0x918d, +0xb297, 0x37be, 0x3dec, 0xb8c5, 0x2948, 0xac61, 0xa633, 0x231a, +0xe007, 0x652e, 0x6f7c, 0xea55, 0x7bd8, 0xfef1, 0xf4a3, 0x718a, +0x5290, 0xd7b9, 0xddeb, 0x58c2, 0xc94f, 0x4c66, 0x4634, 0xc31d, +0x4527, 0xc00e, 0xca5c, 0x4f75, 0xdef8, 0x5bd1, 0x5183, 0xd4aa, +0xf7b0, 0x7299, 0x78cb, 0xfde2, 0x6c6f, 0xe946, 0xe314, 0x663d, +0xa520, 0x2009, 0x2a5b, 0xaf72, 0x3eff, 0xbbd6, 0xb184, 0x34ad, +0x17b7, 0x929e, 0x98cc, 0x1de5, 0x8c68, 0x0941, 0x0313, 0x863a, +0x8a4e, 0x0f67, 0x0535, 0x801c, 0x1191, 0x94b8, 0x9eea, 0x1bc3, +0x38d9, 0xbdf0, 0xb7a2, 0x328b, 0xa306, 0x262f, 0x2c7d, 0xa954, +0x6a49, 0xef60, 0xe532, 0x601b, 0xf196, 0x74bf, 0x7eed, 0xfbc4, +0xd8de, 0x5df7, 0x57a5, 0xd28c, 0x4301, 0xc628, 0xcc7a, 0x4953, +0xcf69, 0x4a40, 0x4012, 0xc53b, 0x54b6, 0xd19f, 0xdbcd, 0x5ee4, +0x7dfe, 0xf8d7, 0xf285, 0x77ac, 0xe621, 0x6308, 0x695a, 0xec73, +0x2f6e, 0xaa47, 0xa015, 0x253c, 0xb4b1, 0x3198, 0x3bca, 0xbee3, +0x9df9, 0x18d0, 0x1282, 0x97ab, 0x0626, 0x830f, 0x895d, 0x0c74, +0x91b5, 0x149c, 0x1ece, 0x9be7, 0x0a6a, 0x8f43, 0x8511, 0x0038, +0x2322, 0xa60b, 0xac59, 0x2970, 0xb8fd, 0x3dd4, 0x3786, 0xb2af, +0x71b2, 0xf49b, 0xfec9, 0x7be0, 0xea6d, 0x6f44, 0x6516, 0xe03f, +0xc325, 0x460c, 0x4c5e, 0xc977, 0x58fa, 0xddd3, 0xd781, 0x52a8, +0xd492, 0x51bb, 0x5be9, 0xdec0, 0x4f4d, 0xca64, 0xc036, 0x451f, +0x6605, 0xe32c, 0xe97e, 0x6c57, 0xfdda, 0x78f3, 0x72a1, 0xf788, +0x3495, 0xb1bc, 0xbbee, 0x3ec7, 0xaf4a, 0x2a63, 0x2031, 0xa518, +0x8602, 0x032b, 0x0979, 0x8c50, 0x1ddd, 0x98f4, 0x92a6, 0x178f, +0x1bfb, 0x9ed2, 0x9480, 0x11a9, 0x8024, 0x050d, 0x0f5f, 0x8a76, +0xa96c, 0x2c45, 0x2617, 0xa33e, 0x32b3, 0xb79a, 0xbdc8, 0x38e1, +0xfbfc, 0x7ed5, 0x7487, 0xf1ae, 0x6023, 0xe50a, 0xef58, 0x6a71, +0x496b, 0xcc42, 0xc610, 0x4339, 0xd2b4, 0x579d, 0x5dcf, 0xd8e6, +0x5edc, 0xdbf5, 0xd1a7, 0x548e, 0xc503, 0x402a, 0x4a78, 0xcf51, +0xec4b, 0x6962, 0x6330, 0xe619, 0x7794, 0xf2bd, 0xf8ef, 0x7dc6, +0xbedb, 0x3bf2, 0x31a0, 0xb489, 0x2504, 0xa02d, 0xaa7f, 0x2f56, +0x0c4c, 0x8965, 0x8337, 0x061e, 0x9793, 0x12ba, 0x18e8, 0x9dc1, +}; + +static u16 mlxsw_sp_acl_bf_crc_byte(u16 crc, u8 c) +{ + return (crc << 8) ^ mlxsw_sp_acl_bf_crc_tab[(crc >> 8) ^ c]; +} + +static u16 mlxsw_sp_acl_bf_crc(const u8 *buffer, size_t len) +{ + u16 crc = 0; + + while (len--) + crc = mlxsw_sp_acl_bf_crc_byte(crc, *buffer++); + return crc; +} + +static void +mlxsw_sp_acl_bf_key_encode(struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_entry *aentry, + char *output, u8 *len) +{ + struct mlxsw_afk_key_info *key_info = aregion->region->key_info; + u8 chunk_index, chunk_count, block_count; + char *chunk = output; + __be16 erp_region_id; + + block_count = mlxsw_afk_key_info_blocks_count_get(key_info); + chunk_count = 1 + ((block_count - 1) >> 2); + erp_region_id = cpu_to_be16(aentry->ht_key.erp_id | + (aregion->region->id << 4)); + for (chunk_index = MLXSW_BLOOM_KEY_CHUNKS - chunk_count; + chunk_index < MLXSW_BLOOM_KEY_CHUNKS; chunk_index++) { + memset(chunk, 0, MLXSW_BLOOM_CHUNK_PAD_BYTES); + memcpy(chunk + MLXSW_BLOOM_CHUNK_PAD_BYTES, &erp_region_id, + sizeof(erp_region_id)); + memcpy(chunk + MLXSW_BLOOM_CHUNK_KEY_OFFSET, + &aentry->ht_key.enc_key[chunk_key_offsets[chunk_index]], + MLXSW_BLOOM_CHUNK_KEY_BYTES); + chunk += MLXSW_BLOOM_KEY_CHUNK_BYTES; + } + *len = chunk_count * MLXSW_BLOOM_KEY_CHUNK_BYTES; +} + +static unsigned int +mlxsw_sp_acl_bf_rule_count_index_get(struct mlxsw_sp_acl_bf *bf, + unsigned int erp_bank, + unsigned int bf_index) +{ + return erp_bank * bf->bank_size + bf_index; +} + +static unsigned int +mlxsw_sp_acl_bf_index_get(struct mlxsw_sp_acl_bf *bf, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_entry *aentry) +{ + char bf_key[MLXSW_BLOOM_KEY_LEN]; + u8 bf_size; + + mlxsw_sp_acl_bf_key_encode(aregion, aentry, bf_key, &bf_size); + return mlxsw_sp_acl_bf_crc(bf_key, bf_size); +} + +int +mlxsw_sp_acl_bf_entry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_bf *bf, + struct mlxsw_sp_acl_atcam_region *aregion, + unsigned int erp_bank, + struct mlxsw_sp_acl_atcam_entry *aentry) +{ + unsigned int rule_index; + char *peabfe_pl; + u16 bf_index; + int err; + + bf_index = mlxsw_sp_acl_bf_index_get(bf, aregion, aentry); + rule_index = mlxsw_sp_acl_bf_rule_count_index_get(bf, erp_bank, + bf_index); + + if (refcount_inc_not_zero(&bf->refcnt[rule_index])) + return 0; + + peabfe_pl = kmalloc(MLXSW_REG_PEABFE_LEN, GFP_KERNEL); + if (!peabfe_pl) + return -ENOMEM; + + mlxsw_reg_peabfe_pack(peabfe_pl); + mlxsw_reg_peabfe_rec_pack(peabfe_pl, 0, 1, erp_bank, bf_index); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(peabfe), peabfe_pl); + kfree(peabfe_pl); + if (err) + return err; + + refcount_set(&bf->refcnt[rule_index], 1); + return 0; +} + +void +mlxsw_sp_acl_bf_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_bf *bf, + struct mlxsw_sp_acl_atcam_region *aregion, + unsigned int erp_bank, + struct mlxsw_sp_acl_atcam_entry *aentry) +{ + unsigned int rule_index; + char *peabfe_pl; + u16 bf_index; + + bf_index = mlxsw_sp_acl_bf_index_get(bf, aregion, aentry); + rule_index = mlxsw_sp_acl_bf_rule_count_index_get(bf, erp_bank, + bf_index); + + if (refcount_dec_and_test(&bf->refcnt[rule_index])) { + peabfe_pl = kmalloc(MLXSW_REG_PEABFE_LEN, GFP_KERNEL); + if (!peabfe_pl) + return; + + mlxsw_reg_peabfe_pack(peabfe_pl); + mlxsw_reg_peabfe_rec_pack(peabfe_pl, 0, 0, erp_bank, bf_index); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(peabfe), peabfe_pl); + kfree(peabfe_pl); + } +} + +struct mlxsw_sp_acl_bf * +mlxsw_sp_acl_bf_init(struct mlxsw_sp *mlxsw_sp, unsigned int num_erp_banks) +{ + struct mlxsw_sp_acl_bf *bf; + unsigned int bf_bank_size; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_BF_LOG)) + return ERR_PTR(-EIO); + + /* Bloom filter size per erp_table_bank + * is 2^ACL_MAX_BF_LOG + */ + bf_bank_size = 1 << MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_BF_LOG); + bf = kzalloc(sizeof(*bf) + bf_bank_size * num_erp_banks * + sizeof(*bf->refcnt), GFP_KERNEL); + if (!bf) + return ERR_PTR(-ENOMEM); + + bf->bank_size = bf_bank_size; + return bf; +} + +void mlxsw_sp_acl_bf_fini(struct mlxsw_sp_acl_bf *bf) +{ + kfree(bf); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c index d9a4b7e8434b..1c19feefa5f2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c @@ -24,6 +24,7 @@ struct mlxsw_sp_acl_erp_core { unsigned int erpt_entries_size[MLXSW_SP_ACL_ATCAM_REGION_TYPE_MAX + 1]; struct gen_pool *erp_tables; struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_acl_bf *bf; unsigned int num_erp_banks; }; @@ -114,6 +115,19 @@ static const struct mlxsw_sp_acl_erp_table_ops erp_no_mask_ops = { .erp_destroy = mlxsw_sp_acl_erp_no_mask_destroy, }; +static bool +mlxsw_sp_acl_erp_table_is_used(const struct mlxsw_sp_acl_erp_table *erp_table) +{ + return erp_table->ops != &erp_single_mask_ops && + erp_table->ops != &erp_no_mask_ops; +} + +static unsigned int +mlxsw_sp_acl_erp_bank_get(const struct mlxsw_sp_acl_erp *erp) +{ + return erp->index % erp->erp_table->erp_core->num_erp_banks; +} + static unsigned int mlxsw_sp_acl_erp_table_entry_size(const struct mlxsw_sp_acl_erp_table *erp_table) { @@ -504,6 +518,48 @@ err_table_relocate: } static int +mlxsw_acl_erp_table_bf_add(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp *erp) +{ + struct mlxsw_sp_acl_atcam_region *aregion = erp_table->aregion; + unsigned int erp_bank = mlxsw_sp_acl_erp_bank_get(erp); + struct mlxsw_sp_acl_atcam_entry *aentry; + int err; + + list_for_each_entry(aentry, &aregion->entries_list, list) { + err = mlxsw_sp_acl_bf_entry_add(aregion->region->mlxsw_sp, + erp_table->erp_core->bf, + aregion, erp_bank, aentry); + if (err) + goto bf_entry_add_err; + } + + return 0; + +bf_entry_add_err: + list_for_each_entry_continue_reverse(aentry, &aregion->entries_list, + list) + mlxsw_sp_acl_bf_entry_del(aregion->region->mlxsw_sp, + erp_table->erp_core->bf, + aregion, erp_bank, aentry); + return err; +} + +static void +mlxsw_acl_erp_table_bf_del(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp *erp) +{ + struct mlxsw_sp_acl_atcam_region *aregion = erp_table->aregion; + unsigned int erp_bank = mlxsw_sp_acl_erp_bank_get(erp); + struct mlxsw_sp_acl_atcam_entry *aentry; + + list_for_each_entry_reverse(aentry, &aregion->entries_list, list) + mlxsw_sp_acl_bf_entry_del(aregion->region->mlxsw_sp, + erp_table->erp_core->bf, + aregion, erp_bank, aentry); +} + +static int mlxsw_sp_acl_erp_region_table_trans(struct mlxsw_sp_acl_erp_table *erp_table) { struct mlxsw_sp_acl_erp_core *erp_core = erp_table->erp_core; @@ -527,16 +583,24 @@ mlxsw_sp_acl_erp_region_table_trans(struct mlxsw_sp_acl_erp_table *erp_table) goto err_table_master_rp; } - /* Maintain the same eRP bank for the master RP, so that we - * wouldn't need to update the bloom filter + /* Make sure the master RP is using a valid index, as + * only a single eRP row is currently allocated. */ - master_rp->index = master_rp->index % erp_core->num_erp_banks; + master_rp->index = 0; __set_bit(master_rp->index, erp_table->erp_index_bitmap); err = mlxsw_sp_acl_erp_table_erp_add(erp_table, master_rp); if (err) goto err_table_master_rp_add; + /* Update Bloom filter before enabling eRP table, as rules + * on the master RP were not set to Bloom filter up to this + * point. + */ + err = mlxsw_acl_erp_table_bf_add(erp_table, master_rp); + if (err) + goto err_table_bf_add; + err = mlxsw_sp_acl_erp_table_enable(erp_table, false); if (err) goto err_table_enable; @@ -544,6 +608,8 @@ mlxsw_sp_acl_erp_region_table_trans(struct mlxsw_sp_acl_erp_table *erp_table) return 0; err_table_enable: + mlxsw_acl_erp_table_bf_del(erp_table, master_rp); +err_table_bf_add: mlxsw_sp_acl_erp_table_erp_del(master_rp); err_table_master_rp_add: __clear_bit(master_rp->index, erp_table->erp_index_bitmap); @@ -564,6 +630,7 @@ mlxsw_sp_acl_erp_region_master_mask_trans(struct mlxsw_sp_acl_erp_table *erp_tab master_rp = mlxsw_sp_acl_erp_table_master_rp(erp_table); if (!master_rp) return; + mlxsw_acl_erp_table_bf_del(erp_table, master_rp); mlxsw_sp_acl_erp_table_erp_del(master_rp); __clear_bit(master_rp->index, erp_table->erp_index_bitmap); mlxsw_sp_acl_erp_table_free(erp_core, erp_table->num_max_atcam_erps, @@ -635,8 +702,7 @@ __mlxsw_sp_acl_erp_table_other_inc(struct mlxsw_sp_acl_erp_table *erp_table, /* If there are C-TCAM eRP or deltas in use we need to transition * the region to use eRP table, if it is not already done */ - if (erp_table->ops != &erp_two_masks_ops && - erp_table->ops != &erp_multiple_masks_ops) { + if (!mlxsw_sp_acl_erp_table_is_used(erp_table)) { err = mlxsw_sp_acl_erp_region_table_trans(erp_table); if (err) return err; @@ -960,6 +1026,44 @@ void mlxsw_sp_acl_erp_mask_put(struct mlxsw_sp_acl_atcam_region *aregion, objagg_obj_put(aregion->erp_table->objagg, objagg_obj); } +int mlxsw_sp_acl_erp_bf_insert(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_erp_mask *erp_mask, + struct mlxsw_sp_acl_atcam_entry *aentry) +{ + struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask; + const struct mlxsw_sp_acl_erp *erp = objagg_obj_root_priv(objagg_obj); + unsigned int erp_bank; + + ASSERT_RTNL(); + if (!mlxsw_sp_acl_erp_table_is_used(erp->erp_table)) + return 0; + + erp_bank = mlxsw_sp_acl_erp_bank_get(erp); + return mlxsw_sp_acl_bf_entry_add(mlxsw_sp, + erp->erp_table->erp_core->bf, + aregion, erp_bank, aentry); +} + +void mlxsw_sp_acl_erp_bf_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_erp_mask *erp_mask, + struct mlxsw_sp_acl_atcam_entry *aentry) +{ + struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask; + const struct mlxsw_sp_acl_erp *erp = objagg_obj_root_priv(objagg_obj); + unsigned int erp_bank; + + ASSERT_RTNL(); + if (!mlxsw_sp_acl_erp_table_is_used(erp->erp_table)) + return; + + erp_bank = mlxsw_sp_acl_erp_bank_get(erp); + mlxsw_sp_acl_bf_entry_del(mlxsw_sp, + erp->erp_table->erp_core->bf, + aregion, erp_bank, aentry); +} + bool mlxsw_sp_acl_erp_mask_is_ctcam(const struct mlxsw_sp_acl_erp_mask *erp_mask) { @@ -1320,6 +1424,12 @@ static int mlxsw_sp_acl_erp_tables_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_gen_pool_add; + erp_core->bf = mlxsw_sp_acl_bf_init(mlxsw_sp, erp_core->num_erp_banks); + if (IS_ERR(erp_core->bf)) { + err = PTR_ERR(erp_core->bf); + goto err_bf_init; + } + /* Different regions require masks of different sizes */ err = mlxsw_sp_acl_erp_tables_sizes_query(mlxsw_sp, erp_core); if (err) @@ -1328,6 +1438,8 @@ static int mlxsw_sp_acl_erp_tables_init(struct mlxsw_sp *mlxsw_sp, return 0; err_erp_tables_sizes_query: + mlxsw_sp_acl_bf_fini(erp_core->bf); +err_bf_init: err_gen_pool_add: gen_pool_destroy(erp_core->erp_tables); return err; @@ -1336,6 +1448,7 @@ err_gen_pool_add: static void mlxsw_sp_acl_erp_tables_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_erp_core *erp_core) { + mlxsw_sp_acl_bf_fini(erp_core->bf); gen_pool_destroy(erp_core->erp_tables); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h index 95929b9039ec..0f1a9dee63de 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h @@ -152,6 +152,7 @@ struct mlxsw_sp_acl_atcam { struct mlxsw_sp_acl_atcam_region { struct rhashtable entries_ht; /* A-TCAM only */ + struct list_head entries_list; /* A-TCAM only */ struct mlxsw_sp_acl_ctcam_region cregion; const struct mlxsw_sp_acl_atcam_region_ops *ops; struct mlxsw_sp_acl_tcam_region *region; @@ -174,6 +175,7 @@ struct mlxsw_sp_acl_atcam_chunk { struct mlxsw_sp_acl_atcam_entry { struct rhash_head ht_node; + struct list_head list; /* Member in entries_list */ struct mlxsw_sp_acl_atcam_entry_ht_key ht_key; char full_enc_key[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; /* Encoded key */ struct { @@ -251,6 +253,14 @@ mlxsw_sp_acl_erp_mask_get(struct mlxsw_sp_acl_atcam_region *aregion, const char *mask, bool ctcam); void mlxsw_sp_acl_erp_mask_put(struct mlxsw_sp_acl_atcam_region *aregion, struct mlxsw_sp_acl_erp_mask *erp_mask); +int mlxsw_sp_acl_erp_bf_insert(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_erp_mask *erp_mask, + struct mlxsw_sp_acl_atcam_entry *aentry); +void mlxsw_sp_acl_erp_bf_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_erp_mask *erp_mask, + struct mlxsw_sp_acl_atcam_entry *aentry); int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion); void mlxsw_sp_acl_erp_region_fini(struct mlxsw_sp_acl_atcam_region *aregion); int mlxsw_sp_acl_erps_init(struct mlxsw_sp *mlxsw_sp, @@ -258,4 +268,22 @@ int mlxsw_sp_acl_erps_init(struct mlxsw_sp *mlxsw_sp, void mlxsw_sp_acl_erps_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam *atcam); +struct mlxsw_sp_acl_bf; + +int +mlxsw_sp_acl_bf_entry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_bf *bf, + struct mlxsw_sp_acl_atcam_region *aregion, + unsigned int erp_bank, + struct mlxsw_sp_acl_atcam_entry *aentry); +void +mlxsw_sp_acl_bf_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_bf *bf, + struct mlxsw_sp_acl_atcam_region *aregion, + unsigned int erp_bank, + struct mlxsw_sp_acl_atcam_entry *aentry); +struct mlxsw_sp_acl_bf * +mlxsw_sp_acl_bf_init(struct mlxsw_sp *mlxsw_sp, unsigned int num_erp_banks); +void mlxsw_sp_acl_bf_fini(struct mlxsw_sp_acl_bf *bf); + #endif diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index 00ae99fbc253..b41d6256b2d0 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -8,7 +8,8 @@ lib_dir=$(dirname $0)/../../../../net/forwarding ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ - multiple_masks_test ctcam_edge_cases_test delta_simple_test" + multiple_masks_test ctcam_edge_cases_test delta_simple_test \ + bloom_simple_test bloom_complex_test bloom_delta_test" NUM_NETIFS=2 source $lib_dir/tc_common.sh source $lib_dir/lib.sh @@ -404,6 +405,178 @@ delta_simple_test() log_test "delta simple test ($tcflags)" } +bloom_simple_test() +{ + # Bloom filter requires that the eRP table is used. This test + # verifies that Bloom filter is not harming correctness of ACLs. + # First, make sure that eRP table is used and then set rule patterns + # which are distant enough and will result skipping a lookup after + # consulting the Bloom filter. Although some eRP lookups are skipped, + # the correct filter should be hit. + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action drop + tc filter add dev $h2 ingress protocol ip pref 5 handle 104 flower \ + $tcflags dst_ip 198.51.100.2 action drop + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.0.0.0/8 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Two filters - did not match highest priority" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 104 1 + check_err $? "Single filter - did not match" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Low prio filter - did not match" + + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 198.0.0.0/8 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Two filters - did not match highest priority after add" + + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol ip pref 5 handle 104 flower + + log_test "bloom simple test ($tcflags)" +} + +bloom_complex_test() +{ + # Bloom filter index computation is affected from region ID, eRP + # ID and from the region key size. In order to excercise those parts + # of the Bloom filter code, use a series of regions, each with a + # different key size and send packet that should hit all of them. + local index + + RET=0 + NUM_CHAINS=4 + BASE_INDEX=100 + + # Create chain with up to 2 key blocks (ip_proto only) + tc chain add dev $h2 ingress chain 1 protocol ip flower \ + ip_proto tcp &> /dev/null + # Create chain with 2-4 key blocks (ip_proto, src MAC) + tc chain add dev $h2 ingress chain 2 protocol ip flower \ + ip_proto tcp \ + src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF &> /dev/null + # Create chain with 4-8 key blocks (ip_proto, src & dst MAC, IPv4 dest) + tc chain add dev $h2 ingress chain 3 protocol ip flower \ + ip_proto tcp \ + dst_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF \ + src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF \ + dst_ip 0.0.0.0/32 &> /dev/null + # Default chain contains all fields and therefore is 8-12 key blocks + tc chain add dev $h2 ingress chain 4 + + # We need at least 2 rules in every region to have eRP table active + # so create a dummy rule per chain using a different pattern + for i in $(eval echo {0..$NUM_CHAINS}); do + index=$((BASE_INDEX - 1 - i)) + tc filter add dev $h2 ingress chain $i protocol ip \ + pref 2 handle $index flower \ + $tcflags ip_proto tcp action drop + done + + # Add rules to test Bloom filter, each in a different chain + index=$BASE_INDEX + tc filter add dev $h2 ingress protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags dst_ip 192.0.0.0/16 action goto chain 1 + tc filter add dev $h2 ingress chain 1 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags action goto chain 2 + tc filter add dev $h2 ingress chain 2 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags src_mac $h1mac action goto chain 3 + tc filter add dev $h2 ingress chain 3 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags dst_ip 192.0.0.0/8 action goto chain 4 + tc filter add dev $h2 ingress chain 4 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags src_ip 192.0.2.0/24 action drop + + # Send a packet that is supposed to hit all chains + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + for i in $(eval echo {0..$NUM_CHAINS}); do + index=$((BASE_INDEX + i + 1)) + tc_check_packets "dev $h2 ingress" $index 1 + check_err $? "Did not match chain $i" + done + + # Rules cleanup + for i in $(eval echo {$NUM_CHAINS..0}); do + index=$((BASE_INDEX - i - 1)) + tc filter del dev $h2 ingress chain $i \ + pref 2 handle $index flower + index=$((BASE_INDEX + i + 1)) + tc filter del dev $h2 ingress chain $i \ + pref 1 handle $index flower + done + + # Chains cleanup + for i in $(eval echo {$NUM_CHAINS..1}); do + tc chain del dev $h2 ingress chain $i + done + + log_test "bloom complex test ($tcflags)" +} + + +bloom_delta_test() +{ + # When multiple masks are used, the eRP table is activated. When + # masks are close enough (delta) the masks reside on the same + # eRP table. This test verifies that the eRP table is correctly + # allocated and used in delta condition and that Bloom filter is + # still functional with delta. + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.1.0.0/16 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.1.2.1 -B 192.1.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Single filter - did not match" + + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.2.1.0/24 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.2.1.1 -B 192.2.1.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Delta filters - did not match second filter" + + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + log_test "bloom delta test ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} |