From 6469933605a3ecdfa66b98160cde98ecd256cb3f Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 4 Jun 2012 12:44:16 +0000 Subject: ethernet: Remove casts to same type Adding casts of objects to the same type is unnecessary and confusing for a human reader. For example, this cast: int y; int *p = (int *)&y; I used the coccinelle script below to find and remove these unnecessary casts. I manually removed the conversions this script produces of casts with __force, __iomem and __user. @@ type T; T *p; @@ - (T *)p + p A function in atl1e_main.c was passed a const pointer when it actually modified elements of the structure. Change the argument to a non-const pointer. A function in stmmac needed a __force to avoid a sparse warning. Added it. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index b45d0e7f6ab0..766b8c5a235e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -779,7 +779,7 @@ static int qp_res_start_move_to(struct mlx4_dev *dev, int slave, int qpn, r->com.to_state = state; r->com.state = RES_QP_BUSY; if (qp) - *qp = (struct res_qp *)r; + *qp = r; } } @@ -832,7 +832,7 @@ static int mr_res_start_move_to(struct mlx4_dev *dev, int slave, int index, r->com.to_state = state; r->com.state = RES_MPT_BUSY; if (mpt) - *mpt = (struct res_mpt *)r; + *mpt = r; } } -- cgit v1.2.3 From 90b1ebe7af2592597e7568bb203d59f4918f76ad Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Sun, 1 Jul 2012 03:18:51 +0000 Subject: mlx4: set maximal number of default RSS queues Signed-off-by: Yuval Mintz Signed-off-by: Eilon Greenstein Cc: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index a0313de122de..14d9c762b60f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -1539,8 +1540,8 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); struct msix_entry *entries; int nreq = min_t(int, dev->caps.num_ports * - min_t(int, num_online_cpus() + 1, MAX_MSIX_P_PORT) - + MSIX_LEGACY_SZ, MAX_MSIX); + min_t(int, netif_get_num_default_rss_queues() + 1, + MAX_MSIX_P_PORT) + MSIX_LEGACY_SZ, MAX_MSIX); int err; int i; -- cgit v1.2.3 From 4af1c0488da97293c09d37ca49543566406073e6 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Thu, 5 Jul 2012 04:03:41 +0000 Subject: net/mlx4_core: Change resource tracking mechanism to use red-black tree Change the data structure used for managing the SRIOV resource tracking mechanism from radix tree to red-black tree. This is preparation step for supporting resource IDs which are 64bit long, such as network flow steering rules. Such IDs can't be used as radix-tree keys on 32bit architectures and hence the reason for the change. Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 3 +- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 106 +++++++++++++++------ 2 files changed, 77 insertions(+), 32 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index e5d20220762c..1a2f37285ef5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -39,6 +39,7 @@ #include #include +#include #include #include #include @@ -509,7 +510,7 @@ struct slave_list { struct mlx4_resource_tracker { spinlock_t lock; /* tree for each resources */ - struct radix_tree_root res_tree[MLX4_NUM_OF_RESOURCE_TYPE]; + struct rb_root res_tree[MLX4_NUM_OF_RESOURCE_TYPE]; /* num_of_slave's lists, one per slave */ struct slave_list *slave_list; }; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 766b8c5a235e..80c03c802301 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -57,6 +57,7 @@ struct mac_res { struct res_common { struct list_head list; + struct rb_node node; u32 res_id; int owner; int state; @@ -189,6 +190,49 @@ struct res_xrcdn { int port; }; +static void *res_tracker_lookup(struct rb_root *root, u64 res_id) +{ + struct rb_node *node = root->rb_node; + + while (node) { + struct res_common *res = container_of(node, struct res_common, + node); + + if (res_id < res->res_id) + node = node->rb_left; + else if (res_id > res->res_id) + node = node->rb_right; + else + return res; + } + return NULL; +} + +static int res_tracker_insert(struct rb_root *root, struct res_common *res) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct res_common *this = container_of(*new, struct res_common, + node); + + parent = *new; + if (res->res_id < this->res_id) + new = &((*new)->rb_left); + else if (res->res_id > this->res_id) + new = &((*new)->rb_right); + else + return -EEXIST; + } + + /* Add new node and rebalance tree. */ + rb_link_node(&res->node, parent, new); + rb_insert_color(&res->node, root); + + return 0; +} + /* For Debug uses */ static const char *ResourceType(enum mlx4_resource rt) { @@ -228,8 +272,7 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) mlx4_dbg(dev, "Started init_resource_tracker: %ld slaves\n", dev->num_slaves); for (i = 0 ; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) - INIT_RADIX_TREE(&priv->mfunc.master.res_tracker.res_tree[i], - GFP_ATOMIC|__GFP_NOWARN); + priv->mfunc.master.res_tracker.res_tree[i] = RB_ROOT; spin_lock_init(&priv->mfunc.master.res_tracker.lock); return 0 ; @@ -277,8 +320,8 @@ static void *find_res(struct mlx4_dev *dev, int res_id, { struct mlx4_priv *priv = mlx4_priv(dev); - return radix_tree_lookup(&priv->mfunc.master.res_tracker.res_tree[type], - res_id); + return res_tracker_lookup(&priv->mfunc.master.res_tracker.res_tree[type], + res_id); } static int get_res(struct mlx4_dev *dev, int slave, int res_id, @@ -523,7 +566,7 @@ static int add_res_range(struct mlx4_dev *dev, int slave, int base, int count, struct mlx4_priv *priv = mlx4_priv(dev); struct res_common **res_arr; struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; - struct radix_tree_root *root = &tracker->res_tree[type]; + struct rb_root *root = &tracker->res_tree[type]; res_arr = kzalloc(count * sizeof *res_arr, GFP_KERNEL); if (!res_arr) @@ -546,7 +589,7 @@ static int add_res_range(struct mlx4_dev *dev, int slave, int base, int count, err = -EEXIST; goto undo; } - err = radix_tree_insert(root, base + i, res_arr[i]); + err = res_tracker_insert(root, res_arr[i]); if (err) goto undo; list_add_tail(&res_arr[i]->list, @@ -559,7 +602,7 @@ static int add_res_range(struct mlx4_dev *dev, int slave, int base, int count, undo: for (--i; i >= base; --i) - radix_tree_delete(&tracker->res_tree[type], i); + rb_erase(&res_arr[i]->node, root); spin_unlock_irq(mlx4_tlock(dev)); @@ -695,7 +738,7 @@ static int rem_res_range(struct mlx4_dev *dev, int slave, int base, int count, spin_lock_irq(mlx4_tlock(dev)); for (i = base; i < base + count; ++i) { - r = radix_tree_lookup(&tracker->res_tree[type], i); + r = res_tracker_lookup(&tracker->res_tree[type], i); if (!r) { err = -ENOENT; goto out; @@ -710,8 +753,8 @@ static int rem_res_range(struct mlx4_dev *dev, int slave, int base, int count, } for (i = base; i < base + count; ++i) { - r = radix_tree_lookup(&tracker->res_tree[type], i); - radix_tree_delete(&tracker->res_tree[type], i); + r = res_tracker_lookup(&tracker->res_tree[type], i); + rb_erase(&r->node, &tracker->res_tree[type]); list_del(&r->list); kfree(r); } @@ -733,7 +776,7 @@ static int qp_res_start_move_to(struct mlx4_dev *dev, int slave, int qpn, int err = 0; spin_lock_irq(mlx4_tlock(dev)); - r = radix_tree_lookup(&tracker->res_tree[RES_QP], qpn); + r = res_tracker_lookup(&tracker->res_tree[RES_QP], qpn); if (!r) err = -ENOENT; else if (r->com.owner != slave) @@ -797,7 +840,7 @@ static int mr_res_start_move_to(struct mlx4_dev *dev, int slave, int index, int err = 0; spin_lock_irq(mlx4_tlock(dev)); - r = radix_tree_lookup(&tracker->res_tree[RES_MPT], index); + r = res_tracker_lookup(&tracker->res_tree[RES_MPT], index); if (!r) err = -ENOENT; else if (r->com.owner != slave) @@ -850,7 +893,7 @@ static int eq_res_start_move_to(struct mlx4_dev *dev, int slave, int index, int err = 0; spin_lock_irq(mlx4_tlock(dev)); - r = radix_tree_lookup(&tracker->res_tree[RES_EQ], index); + r = res_tracker_lookup(&tracker->res_tree[RES_EQ], index); if (!r) err = -ENOENT; else if (r->com.owner != slave) @@ -898,7 +941,7 @@ static int cq_res_start_move_to(struct mlx4_dev *dev, int slave, int cqn, int err; spin_lock_irq(mlx4_tlock(dev)); - r = radix_tree_lookup(&tracker->res_tree[RES_CQ], cqn); + r = res_tracker_lookup(&tracker->res_tree[RES_CQ], cqn); if (!r) err = -ENOENT; else if (r->com.owner != slave) @@ -952,7 +995,7 @@ static int srq_res_start_move_to(struct mlx4_dev *dev, int slave, int index, int err = 0; spin_lock_irq(mlx4_tlock(dev)); - r = radix_tree_lookup(&tracker->res_tree[RES_SRQ], index); + r = res_tracker_lookup(&tracker->res_tree[RES_SRQ], index); if (!r) err = -ENOENT; else if (r->com.owner != slave) @@ -1001,7 +1044,7 @@ static void res_abort_move(struct mlx4_dev *dev, int slave, struct res_common *r; spin_lock_irq(mlx4_tlock(dev)); - r = radix_tree_lookup(&tracker->res_tree[type], id); + r = res_tracker_lookup(&tracker->res_tree[type], id); if (r && (r->owner == slave)) r->state = r->from_state; spin_unlock_irq(mlx4_tlock(dev)); @@ -1015,7 +1058,7 @@ static void res_end_move(struct mlx4_dev *dev, int slave, struct res_common *r; spin_lock_irq(mlx4_tlock(dev)); - r = radix_tree_lookup(&tracker->res_tree[type], id); + r = res_tracker_lookup(&tracker->res_tree[type], id); if (r && (r->owner == slave)) r->state = r->to_state; spin_unlock_irq(mlx4_tlock(dev)); @@ -2817,8 +2860,8 @@ static void rem_slave_qps(struct mlx4_dev *dev, int slave) switch (state) { case RES_QP_RESERVED: spin_lock_irq(mlx4_tlock(dev)); - radix_tree_delete(&tracker->res_tree[RES_QP], - qp->com.res_id); + rb_erase(&qp->com.node, + &tracker->res_tree[RES_QP]); list_del(&qp->com.list); spin_unlock_irq(mlx4_tlock(dev)); kfree(qp); @@ -2888,8 +2931,8 @@ static void rem_slave_srqs(struct mlx4_dev *dev, int slave) case RES_SRQ_ALLOCATED: __mlx4_srq_free_icm(dev, srqn); spin_lock_irq(mlx4_tlock(dev)); - radix_tree_delete(&tracker->res_tree[RES_SRQ], - srqn); + rb_erase(&srq->com.node, + &tracker->res_tree[RES_SRQ]); list_del(&srq->com.list); spin_unlock_irq(mlx4_tlock(dev)); kfree(srq); @@ -2954,8 +2997,8 @@ static void rem_slave_cqs(struct mlx4_dev *dev, int slave) case RES_CQ_ALLOCATED: __mlx4_cq_free_icm(dev, cqn); spin_lock_irq(mlx4_tlock(dev)); - radix_tree_delete(&tracker->res_tree[RES_CQ], - cqn); + rb_erase(&cq->com.node, + &tracker->res_tree[RES_CQ]); list_del(&cq->com.list); spin_unlock_irq(mlx4_tlock(dev)); kfree(cq); @@ -3017,8 +3060,8 @@ static void rem_slave_mrs(struct mlx4_dev *dev, int slave) case RES_MPT_RESERVED: __mlx4_mr_release(dev, mpt->key); spin_lock_irq(mlx4_tlock(dev)); - radix_tree_delete(&tracker->res_tree[RES_MPT], - mptn); + rb_erase(&mpt->com.node, + &tracker->res_tree[RES_MPT]); list_del(&mpt->com.list); spin_unlock_irq(mlx4_tlock(dev)); kfree(mpt); @@ -3086,8 +3129,8 @@ static void rem_slave_mtts(struct mlx4_dev *dev, int slave) __mlx4_free_mtt_range(dev, base, mtt->order); spin_lock_irq(mlx4_tlock(dev)); - radix_tree_delete(&tracker->res_tree[RES_MTT], - base); + rb_erase(&mtt->com.node, + &tracker->res_tree[RES_MTT]); list_del(&mtt->com.list); spin_unlock_irq(mlx4_tlock(dev)); kfree(mtt); @@ -3133,8 +3176,8 @@ static void rem_slave_eqs(struct mlx4_dev *dev, int slave) switch (state) { case RES_EQ_RESERVED: spin_lock_irq(mlx4_tlock(dev)); - radix_tree_delete(&tracker->res_tree[RES_EQ], - eqn); + rb_erase(&eq->com.node, + &tracker->res_tree[RES_EQ]); list_del(&eq->com.list); spin_unlock_irq(mlx4_tlock(dev)); kfree(eq); @@ -3191,7 +3234,8 @@ static void rem_slave_counters(struct mlx4_dev *dev, int slave) list_for_each_entry_safe(counter, tmp, counter_list, com.list) { if (counter->com.owner == slave) { index = counter->com.res_id; - radix_tree_delete(&tracker->res_tree[RES_COUNTER], index); + rb_erase(&counter->com.node, + &tracker->res_tree[RES_COUNTER]); list_del(&counter->com.list); kfree(counter); __mlx4_counter_free(dev, index); @@ -3220,7 +3264,7 @@ static void rem_slave_xrcdns(struct mlx4_dev *dev, int slave) list_for_each_entry_safe(xrcd, tmp, xrcdn_list, com.list) { if (xrcd->com.owner == slave) { xrcdn = xrcd->com.res_id; - radix_tree_delete(&tracker->res_tree[RES_XRCD], xrcdn); + rb_erase(&xrcd->com.node, &tracker->res_tree[RES_XRCD]); list_del(&xrcd->com.list); kfree(xrcd); __mlx4_xrcd_free(dev, xrcdn); -- cgit v1.2.3 From aa1ec3dde1d818dcc94e307e25df98242aff5538 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Thu, 5 Jul 2012 04:03:42 +0000 Subject: net/mlx4_core: Change resource tracking ID to be 64 bit Currently the IDs used by the resource tracker are of type u32, so far this was ok since all the different resources we were tracking could be encoded in 32bit. As a preparation step for tracking of resources whose IDs need > 32 bits such as network flow steering rules, who are 64 bit in size, move to use 64 bit based resource IDs. Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 +- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 26 +++++++++++----------- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 1a2f37285ef5..a425a984758f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1033,7 +1033,7 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port); /* resource tracker functions*/ int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev, enum mlx4_resource resource_type, - int resource_id, int *slave); + u64 resource_id, int *slave); void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave_id); int mlx4_init_resource_tracker(struct mlx4_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 80c03c802301..6bdac2955f8b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -58,7 +58,7 @@ struct mac_res { struct res_common { struct list_head list; struct rb_node node; - u32 res_id; + u64 res_id; int owner; int state; int from_state; @@ -324,7 +324,7 @@ static void *find_res(struct mlx4_dev *dev, int res_id, res_id); } -static int get_res(struct mlx4_dev *dev, int slave, int res_id, +static int get_res(struct mlx4_dev *dev, int slave, u64 res_id, enum mlx4_resource type, void *res) { @@ -350,7 +350,7 @@ static int get_res(struct mlx4_dev *dev, int slave, int res_id, r->from_state = r->state; r->state = RES_ANY_BUSY; - mlx4_dbg(dev, "res %s id 0x%x to busy\n", + mlx4_dbg(dev, "res %s id 0x%llx to busy\n", ResourceType(type), r->res_id); if (res) @@ -363,7 +363,7 @@ exit: int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev, enum mlx4_resource type, - int res_id, int *slave) + u64 res_id, int *slave) { struct res_common *r; @@ -384,7 +384,7 @@ int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev, return err; } -static void put_res(struct mlx4_dev *dev, int slave, int res_id, +static void put_res(struct mlx4_dev *dev, int slave, u64 res_id, enum mlx4_resource type) { struct res_common *r; @@ -516,7 +516,7 @@ static struct res_common *alloc_xrcdn_tr(int id) return &ret->com; } -static struct res_common *alloc_tr(int id, enum mlx4_resource type, int slave, +static struct res_common *alloc_tr(u64 id, enum mlx4_resource type, int slave, int extra) { struct res_common *ret; @@ -558,7 +558,7 @@ static struct res_common *alloc_tr(int id, enum mlx4_resource type, int slave, return ret; } -static int add_res_range(struct mlx4_dev *dev, int slave, int base, int count, +static int add_res_range(struct mlx4_dev *dev, int slave, u64 base, int count, enum mlx4_resource type, int extra) { int i; @@ -727,10 +727,10 @@ static int remove_ok(struct res_common *res, enum mlx4_resource type, int extra) } } -static int rem_res_range(struct mlx4_dev *dev, int slave, int base, int count, +static int rem_res_range(struct mlx4_dev *dev, int slave, u64 base, int count, enum mlx4_resource type, int extra) { - int i; + u64 i; int err; struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; @@ -784,7 +784,7 @@ static int qp_res_start_move_to(struct mlx4_dev *dev, int slave, int qpn, else { switch (state) { case RES_QP_BUSY: - mlx4_dbg(dev, "%s: failed RES_QP, 0x%x\n", + mlx4_dbg(dev, "%s: failed RES_QP, 0x%llx\n", __func__, r->com.res_id); err = -EBUSY; break; @@ -793,7 +793,7 @@ static int qp_res_start_move_to(struct mlx4_dev *dev, int slave, int qpn, if (r->com.state == RES_QP_MAPPED && !alloc) break; - mlx4_dbg(dev, "failed RES_QP, 0x%x\n", r->com.res_id); + mlx4_dbg(dev, "failed RES_QP, 0x%llx\n", r->com.res_id); err = -EINVAL; break; @@ -802,7 +802,7 @@ static int qp_res_start_move_to(struct mlx4_dev *dev, int slave, int qpn, r->com.state == RES_QP_HW) break; else { - mlx4_dbg(dev, "failed RES_QP, 0x%x\n", + mlx4_dbg(dev, "failed RES_QP, 0x%llx\n", r->com.res_id); err = -EINVAL; } @@ -2794,7 +2794,7 @@ static int _move_all_busy(struct mlx4_dev *dev, int slave, if (r->state == RES_ANY_BUSY) { if (print) mlx4_dbg(dev, - "%s id 0x%x is busy\n", + "%s id 0x%llx is busy\n", ResourceType(type), r->res_id); ++busy; -- cgit v1.2.3 From 6d19993788e080edb557178cc6aba2d963edce4e Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Thu, 5 Jul 2012 04:03:43 +0000 Subject: net/mlx4_en: Re-design multicast attachments flow Currently, for every change in the net device multicast list, the driver detaches all the addresses from the HW device, and then attaches the updated list. This behavior is wrong from two aspects: first, it causes a load of firmware commands and second, there is period of time where the correct addresses are not attached, which turned into packet loss. To improve - a copy of the multicast list is saved by the driver. For every change in the multicast list, the multicast list copy is used to find the delta between those two lists and add or remove multicast addresses as needed. Reported-by: Shawn Bohrer Cc: Shawn Bohrer Signed-off-by: Hadar Hen Zion Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 143 +++++++++++++++++++------ drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 16 ++- 2 files changed, 124 insertions(+), 35 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 073b85b45fc5..bedcbb30d38f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -170,33 +170,81 @@ static void mlx4_en_do_set_mac(struct work_struct *work) static void mlx4_en_clear_list(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_mc_list *tmp, *mc_to_del; - kfree(priv->mc_addrs); - priv->mc_addrs = NULL; - priv->mc_addrs_cnt = 0; + list_for_each_entry_safe(mc_to_del, tmp, &priv->mc_list, list) { + list_del(&mc_to_del->list); + kfree(mc_to_del); + } } static void mlx4_en_cache_mclist(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct netdev_hw_addr *ha; - char *mc_addrs; - int mc_addrs_cnt = netdev_mc_count(dev); - int i; + struct mlx4_en_mc_list *tmp; - mc_addrs = kmalloc(mc_addrs_cnt * ETH_ALEN, GFP_ATOMIC); - if (!mc_addrs) { - en_err(priv, "failed to allocate multicast list\n"); - return; - } - i = 0; - netdev_for_each_mc_addr(ha, dev) - memcpy(mc_addrs + i++ * ETH_ALEN, ha->addr, ETH_ALEN); mlx4_en_clear_list(dev); - priv->mc_addrs = mc_addrs; - priv->mc_addrs_cnt = mc_addrs_cnt; + netdev_for_each_mc_addr(ha, dev) { + tmp = kzalloc(sizeof(struct mlx4_en_mc_list), GFP_ATOMIC); + if (!tmp) { + en_err(priv, "failed to allocate multicast list\n"); + mlx4_en_clear_list(dev); + return; + } + memcpy(tmp->addr, ha->addr, ETH_ALEN); + list_add_tail(&tmp->list, &priv->mc_list); + } } +static void update_mclist_flags(struct mlx4_en_priv *priv, + struct list_head *dst, + struct list_head *src) +{ + struct mlx4_en_mc_list *dst_tmp, *src_tmp, *new_mc; + bool found; + + /* Find all the entries that should be removed from dst, + * These are the entries that are not found in src + */ + list_for_each_entry(dst_tmp, dst, list) { + found = false; + list_for_each_entry(src_tmp, src, list) { + if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) { + found = true; + break; + } + } + if (!found) + dst_tmp->action = MCLIST_REM; + } + + /* Add entries that exist in src but not in dst + * mark them as need to add + */ + list_for_each_entry(src_tmp, src, list) { + found = false; + list_for_each_entry(dst_tmp, dst, list) { + if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) { + dst_tmp->action = MCLIST_NONE; + found = true; + break; + } + } + if (!found) { + new_mc = kmalloc(sizeof(struct mlx4_en_mc_list), + GFP_KERNEL); + if (!new_mc) { + en_err(priv, "Failed to allocate current multicast list\n"); + return; + } + memcpy(new_mc, src_tmp, + sizeof(struct mlx4_en_mc_list)); + new_mc->action = MCLIST_ADD; + list_add_tail(&new_mc->list, dst); + } + } +} static void mlx4_en_set_multicast(struct net_device *dev) { @@ -214,6 +262,7 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) mcast_task); struct mlx4_en_dev *mdev = priv->mdev; struct net_device *dev = priv->dev; + struct mlx4_en_mc_list *mclist, *tmp; u64 mcast_addr = 0; u8 mc_list[16] = {0}; int err; @@ -336,7 +385,6 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) priv->flags |= MLX4_EN_FLAG_MC_PROMISC; } } else { - int i; /* Disable Multicast promisc */ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { err = mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, @@ -351,13 +399,6 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) if (err) en_err(priv, "Failed disabling multicast filter\n"); - /* Detach our qp from all the multicast addresses */ - for (i = 0; i < priv->mc_addrs_cnt; i++) { - memcpy(&mc_list[10], priv->mc_addrs + i * ETH_ALEN, ETH_ALEN); - mc_list[5] = priv->port; - mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, - mc_list, MLX4_PROT_ETH); - } /* Flush mcast filter and init it with broadcast address */ mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, ETH_BCAST, 1, MLX4_MCAST_CONFIG); @@ -367,13 +408,8 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) netif_tx_lock_bh(dev); mlx4_en_cache_mclist(dev); netif_tx_unlock_bh(dev); - for (i = 0; i < priv->mc_addrs_cnt; i++) { - mcast_addr = - mlx4_en_mac_to_u64(priv->mc_addrs + i * ETH_ALEN); - memcpy(&mc_list[10], priv->mc_addrs + i * ETH_ALEN, ETH_ALEN); - mc_list[5] = priv->port; - mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, - mc_list, 0, MLX4_PROT_ETH); + list_for_each_entry(mclist, &priv->mc_list, list) { + mcast_addr = mlx4_en_mac_to_u64(mclist->addr); mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, mcast_addr, 0, MLX4_MCAST_CONFIG); } @@ -381,6 +417,38 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) 0, MLX4_MCAST_ENABLE); if (err) en_err(priv, "Failed enabling multicast filter\n"); + + update_mclist_flags(priv, &priv->curr_list, &priv->mc_list); + list_for_each_entry_safe(mclist, tmp, &priv->curr_list, list) { + if (mclist->action == MCLIST_REM) { + /* detach this address and delete from list */ + memcpy(&mc_list[10], mclist->addr, ETH_ALEN); + mc_list[5] = priv->port; + err = mlx4_multicast_detach(mdev->dev, + &priv->rss_map.indir_qp, + mc_list, + MLX4_PROT_ETH); + if (err) + en_err(priv, "Fail to detach multicast address\n"); + + /* remove from list */ + list_del(&mclist->list); + kfree(mclist); + } + + if (mclist->action == MCLIST_ADD) { + /* attach the address */ + memcpy(&mc_list[10], mclist->addr, ETH_ALEN); + mc_list[5] = priv->port; + err = mlx4_multicast_attach(mdev->dev, + &priv->rss_map.indir_qp, + mc_list, 0, + MLX4_PROT_ETH); + if (err) + en_err(priv, "Fail to attach multicast address\n"); + + } + } } out: mutex_unlock(&mdev->state_lock); @@ -605,6 +673,9 @@ int mlx4_en_start_port(struct net_device *dev) return 0; } + INIT_LIST_HEAD(&priv->mc_list); + INIT_LIST_HEAD(&priv->curr_list); + /* Calculate Rx buf size */ dev->mtu = min(dev->mtu, priv->max_mtu); mlx4_en_calc_rx_buf(dev); @@ -760,6 +831,7 @@ void mlx4_en_stop_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_mc_list *mclist, *tmp; int i; u8 mc_list[16] = {0}; @@ -781,13 +853,18 @@ void mlx4_en_stop_port(struct net_device *dev) mc_list[5] = priv->port; mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH); - for (i = 0; i < priv->mc_addrs_cnt; i++) { - memcpy(&mc_list[10], priv->mc_addrs + i * ETH_ALEN, ETH_ALEN); + list_for_each_entry(mclist, &priv->curr_list, list) { + memcpy(&mc_list[10], mclist->addr, ETH_ALEN); mc_list[5] = priv->port; mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH); } mlx4_en_clear_list(dev); + list_for_each_entry_safe(mclist, tmp, &priv->curr_list, list) { + list_del(&mclist->list); + kfree(mclist); + } + /* Flush multicast filter */ mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 1, MLX4_MCAST_CONFIG); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 225c20d47900..1bb00cd22d42 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -404,6 +404,18 @@ struct mlx4_en_perf_stats { #define NUM_PERF_COUNTERS 6 }; +enum mlx4_en_mclist_act { + MCLIST_NONE, + MCLIST_REM, + MCLIST_ADD, +}; + +struct mlx4_en_mc_list { + struct list_head list; + enum mlx4_en_mclist_act action; + u8 addr[ETH_ALEN]; +}; + struct mlx4_en_frag_info { u16 frag_size; u16 frag_prefix_size; @@ -489,8 +501,8 @@ struct mlx4_en_priv { struct mlx4_en_pkt_stats pkstats; struct mlx4_en_port_stats port_stats; u64 stats_bitmap; - char *mc_addrs; - int mc_addrs_cnt; + struct list_head mc_list; + struct list_head curr_list; struct mlx4_en_stat_out_mbox hw_stats; int vids[128]; bool wol; -- cgit v1.2.3 From c96d97f4d127b61def87b3ee056bec20cfc265d1 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Thu, 5 Jul 2012 04:03:44 +0000 Subject: net/mlx4: Set steering mode according to device capabilities Instead of checking the firmware supported steering mode in various places in the code, add a dedicated field in the mlx4 device capabilities structure which is written once during the initialization flow and read across the code. This also set the grounds for add new steering modes. Currently two modes are supported, and are named after the ConnectX HW versions A0 and B0. A0 steering uses mac_index, vlan_index and priority to steer traffic into pre-defined range of QPs. B0 steering uses Ethernet L2 hashing rules and is enabled only if the firmware supports both unicast and multicast B0 steering, The current steering modes are relevant for Ethernet traffic only, such that Infiniband steering remains untouched. Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 108 ++++++++++++++++--------- drivers/net/ethernet/mellanox/mlx4/fw.c | 2 +- drivers/net/ethernet/mellanox/mlx4/main.c | 16 +++- drivers/net/ethernet/mellanox/mlx4/mcg.c | 70 ++++++++-------- drivers/net/ethernet/mellanox/mlx4/port.c | 9 +-- include/linux/mlx4/device.h | 24 ++++++ 6 files changed, 148 insertions(+), 81 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index bedcbb30d38f..44ff7cdb15e5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -265,7 +265,7 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) struct mlx4_en_mc_list *mclist, *tmp; u64 mcast_addr = 0; u8 mc_list[16] = {0}; - int err; + int err = 0; mutex_lock(&mdev->state_lock); if (!mdev->device_up) { @@ -300,16 +300,36 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) priv->flags |= MLX4_EN_FLAG_PROMISC; /* Enable promiscouos mode */ - if (!(mdev->dev->caps.flags & - MLX4_DEV_CAP_FLAG_VEP_UC_STEER)) - err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, - priv->base_qpn, 1); - else - err = mlx4_unicast_promisc_add(mdev->dev, priv->base_qpn, + switch (mdev->dev->caps.steering_mode) { + case MLX4_STEERING_MODE_B0: + err = mlx4_unicast_promisc_add(mdev->dev, + priv->base_qpn, priv->port); - if (err) - en_err(priv, "Failed enabling " - "promiscuous mode\n"); + if (err) + en_err(priv, "Failed enabling unicast promiscuous mode\n"); + + /* Add the default qp number as multicast + * promisc + */ + if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) { + err = mlx4_multicast_promisc_add(mdev->dev, + priv->base_qpn, + priv->port); + if (err) + en_err(priv, "Failed enabling multicast promiscuous mode\n"); + priv->flags |= MLX4_EN_FLAG_MC_PROMISC; + } + break; + + case MLX4_STEERING_MODE_A0: + err = mlx4_SET_PORT_qpn_calc(mdev->dev, + priv->port, + priv->base_qpn, + 1); + if (err) + en_err(priv, "Failed enabling promiscuous mode\n"); + break; + } /* Disable port multicast filter (unconditionally) */ err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, @@ -318,15 +338,6 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) en_err(priv, "Failed disabling " "multicast filter\n"); - /* Add the default qp number as multicast promisc */ - if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) { - err = mlx4_multicast_promisc_add(mdev->dev, priv->base_qpn, - priv->port); - if (err) - en_err(priv, "Failed entering multicast promisc mode\n"); - priv->flags |= MLX4_EN_FLAG_MC_PROMISC; - } - /* Disable port VLAN filter */ err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); if (err) @@ -345,22 +356,31 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) priv->flags &= ~MLX4_EN_FLAG_PROMISC; /* Disable promiscouos mode */ - if (!(mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER)) - err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, - priv->base_qpn, 0); - else - err = mlx4_unicast_promisc_remove(mdev->dev, priv->base_qpn, + switch (mdev->dev->caps.steering_mode) { + case MLX4_STEERING_MODE_B0: + err = mlx4_unicast_promisc_remove(mdev->dev, + priv->base_qpn, priv->port); - if (err) - en_err(priv, "Failed disabling promiscuous mode\n"); + if (err) + en_err(priv, "Failed disabling unicast promiscuous mode\n"); + /* Disable Multicast promisc */ + if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { + err = mlx4_multicast_promisc_remove(mdev->dev, + priv->base_qpn, + priv->port); + if (err) + en_err(priv, "Failed disabling multicast promiscuous mode\n"); + priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; + } + break; - /* Disable Multicast promisc */ - if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { - err = mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, - priv->port); + case MLX4_STEERING_MODE_A0: + err = mlx4_SET_PORT_qpn_calc(mdev->dev, + priv->port, + priv->base_qpn, 0); if (err) - en_err(priv, "Failed disabling multicast promiscuous mode\n"); - priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; + en_err(priv, "Failed disabling promiscuous mode\n"); + break; } /* Enable port VLAN filter */ @@ -378,8 +398,16 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) /* Add the default qp number as multicast promisc */ if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) { - err = mlx4_multicast_promisc_add(mdev->dev, priv->base_qpn, - priv->port); + switch (mdev->dev->caps.steering_mode) { + case MLX4_STEERING_MODE_B0: + err = mlx4_multicast_promisc_add(mdev->dev, + priv->base_qpn, + priv->port); + break; + + case MLX4_STEERING_MODE_A0: + break; + } if (err) en_err(priv, "Failed entering multicast promisc mode\n"); priv->flags |= MLX4_EN_FLAG_MC_PROMISC; @@ -387,8 +415,16 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) } else { /* Disable Multicast promisc */ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { - err = mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, - priv->port); + switch (mdev->dev->caps.steering_mode) { + case MLX4_STEERING_MODE_B0: + err = mlx4_multicast_promisc_remove(mdev->dev, + priv->base_qpn, + priv->port); + break; + + case MLX4_STEERING_MODE_A0: + break; + } if (err) en_err(priv, "Failed disabling multicast promiscuous mode\n"); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 9c83bb8151ea..40e048bac024 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -1124,7 +1124,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) MLX4_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET); MLX4_PUT(inbox, param->log_mc_entry_sz, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); MLX4_PUT(inbox, param->log_mc_hash_sz, INIT_HCA_LOG_MC_HASH_SZ_OFFSET); - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) + if (dev->caps.steering_mode == MLX4_STEERING_MODE_B0) MLX4_PUT(inbox, (u8) (1 << 3), INIT_HCA_UC_STEERING_OFFSET); MLX4_PUT(inbox, param->log_mc_table_sz, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 14d9c762b60f..f8125a82c0cb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -244,7 +244,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.reserved_srqs = dev_cap->reserved_srqs; dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz; dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz; - dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); /* * Subtract 1 from the limit because we need to allocate a * spare CQE so the HCA HW can tell the difference between an @@ -275,6 +274,21 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.max_gso_sz = dev_cap->max_gso_sz; dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && + dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) { + dev->caps.steering_mode = MLX4_STEERING_MODE_B0; + } else { + dev->caps.steering_mode = MLX4_STEERING_MODE_A0; + + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || + dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) + mlx4_warn(dev, "Must have UC_STEER and MC_STEER flags " + "set to use B0 steering. Falling back to A0 steering mode.\n"); + } + mlx4_dbg(dev, "Steering mode is: %s\n", + mlx4_steering_mode_str(dev->caps.steering_mode)); + dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); + /* Sense port always allowed on supported devices for ConnectX1 and 2 */ if (dev->pdev->device != 0x1003) dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index f4a8f98e402a..319c9d45d59a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -868,36 +868,50 @@ static int mlx4_QP_ATTACH(struct mlx4_dev *dev, struct mlx4_qp *qp, int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int block_mcast_loopback, enum mlx4_protocol prot) { - if (prot == MLX4_PROT_ETH && - !(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)) - return 0; - if (prot == MLX4_PROT_ETH) - gid[7] |= (MLX4_MC_STEER << 1); + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_A0: + if (prot == MLX4_PROT_ETH) + return 0; - if (mlx4_is_mfunc(dev)) - return mlx4_QP_ATTACH(dev, qp, gid, 1, - block_mcast_loopback, prot); + case MLX4_STEERING_MODE_B0: + if (prot == MLX4_PROT_ETH) + gid[7] |= (MLX4_MC_STEER << 1); - return mlx4_qp_attach_common(dev, qp, gid, block_mcast_loopback, - prot, MLX4_MC_STEER); + if (mlx4_is_mfunc(dev)) + return mlx4_QP_ATTACH(dev, qp, gid, 1, + block_mcast_loopback, prot); + return mlx4_qp_attach_common(dev, qp, gid, + block_mcast_loopback, prot, + MLX4_MC_STEER); + + default: + return -EINVAL; + } } EXPORT_SYMBOL_GPL(mlx4_multicast_attach); int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], enum mlx4_protocol prot) { - if (prot == MLX4_PROT_ETH && - !(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)) - return 0; + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_A0: + if (prot == MLX4_PROT_ETH) + return 0; - if (prot == MLX4_PROT_ETH) - gid[7] |= (MLX4_MC_STEER << 1); + case MLX4_STEERING_MODE_B0: + if (prot == MLX4_PROT_ETH) + gid[7] |= (MLX4_MC_STEER << 1); - if (mlx4_is_mfunc(dev)) - return mlx4_QP_ATTACH(dev, qp, gid, 0, 0, prot); + if (mlx4_is_mfunc(dev)) + return mlx4_QP_ATTACH(dev, qp, gid, 0, 0, prot); + + return mlx4_qp_detach_common(dev, qp, gid, prot, + MLX4_MC_STEER); - return mlx4_qp_detach_common(dev, qp, gid, prot, MLX4_MC_STEER); + default: + return -EINVAL; + } } EXPORT_SYMBOL_GPL(mlx4_multicast_detach); @@ -905,10 +919,6 @@ int mlx4_unicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int block_mcast_loopback, enum mlx4_protocol prot) { - if (prot == MLX4_PROT_ETH && - !(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER)) - return 0; - if (prot == MLX4_PROT_ETH) gid[7] |= (MLX4_UC_STEER << 1); @@ -924,10 +934,6 @@ EXPORT_SYMBOL_GPL(mlx4_unicast_attach); int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], enum mlx4_protocol prot) { - if (prot == MLX4_PROT_ETH && - !(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER)) - return 0; - if (prot == MLX4_PROT_ETH) gid[7] |= (MLX4_UC_STEER << 1); @@ -968,9 +974,6 @@ static int mlx4_PROMISC(struct mlx4_dev *dev, u32 qpn, int mlx4_multicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port) { - if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)) - return 0; - if (mlx4_is_mfunc(dev)) return mlx4_PROMISC(dev, qpn, MLX4_MC_STEER, 1, port); @@ -980,9 +983,6 @@ EXPORT_SYMBOL_GPL(mlx4_multicast_promisc_add); int mlx4_multicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port) { - if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)) - return 0; - if (mlx4_is_mfunc(dev)) return mlx4_PROMISC(dev, qpn, MLX4_MC_STEER, 0, port); @@ -992,9 +992,6 @@ EXPORT_SYMBOL_GPL(mlx4_multicast_promisc_remove); int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port) { - if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER)) - return 0; - if (mlx4_is_mfunc(dev)) return mlx4_PROMISC(dev, qpn, MLX4_UC_STEER, 1, port); @@ -1004,9 +1001,6 @@ EXPORT_SYMBOL_GPL(mlx4_unicast_promisc_add); int mlx4_unicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port) { - if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER)) - return 0; - if (mlx4_is_mfunc(dev)) return mlx4_PROMISC(dev, qpn, MLX4_UC_STEER, 0, port); diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index a8fb52992c64..58de7237f57a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -155,7 +155,7 @@ int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn) return err; } - if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER)) { + if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { *qpn = info->base_qpn + index; return 0; } @@ -206,7 +206,7 @@ void mlx4_put_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int qpn) (unsigned long long) mac); mlx4_unregister_mac(dev, port, mac); - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER) { + if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) { entry = radix_tree_lookup(&info->mac_tree, qpn); if (entry) { mlx4_dbg(dev, "Releasing qp: port %d, mac 0x%llx," @@ -359,7 +359,7 @@ int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac) int index = qpn - info->base_qpn; int err = 0; - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER) { + if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) { entry = radix_tree_lookup(&info->mac_tree, qpn); if (!entry) return -EINVAL; @@ -803,8 +803,7 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, u32 m_promisc = (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) ? MCAST_DIRECT : MCAST_DEFAULT; - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER && - dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER) + if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) return 0; mailbox = mlx4_alloc_cmd_mailbox(dev); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 6a8f002b8ed3..7f5c9ee42f96 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -70,6 +70,29 @@ enum { MLX4_MFUNC_EQE_MASK = (MLX4_MFUNC_MAX_EQES - 1) }; +/* Driver supports 2 diffrent device methods to manage traffic steering: + * - B0 steering mode - Common low level API for ib and (if supported) eth. + * - A0 steering mode - Limited low level API for eth. In case of IB, + * B0 mode is in use. + */ +enum { + MLX4_STEERING_MODE_A0, + MLX4_STEERING_MODE_B0 +}; + +static inline const char *mlx4_steering_mode_str(int steering_mode) +{ + switch (steering_mode) { + case MLX4_STEERING_MODE_A0: + return "A0 steering"; + + case MLX4_STEERING_MODE_B0: + return "B0 steering"; + default: + return "Unrecognize steering mode"; + } +} + enum { MLX4_DEV_CAP_FLAG_RC = 1LL << 0, MLX4_DEV_CAP_FLAG_UC = 1LL << 1, @@ -295,6 +318,7 @@ struct mlx4_caps { int num_amgms; int reserved_mcgs; int num_qp_per_mgm; + int steering_mode; int num_pds; int reserved_pds; int max_xrcds; -- cgit v1.2.3 From 8fcfb4db74352d3d447b7a559ad54f7577074d19 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Thu, 5 Jul 2012 04:03:45 +0000 Subject: net/mlx4_core: Add firmware commands to support device managed flow steering Add support for firmware commands to attach/detach a new device managed steering mode. Such network steering rules allow the user to provide an L2/L3/L4 flow specification to the firmware and have the device to steer traffic that matches that specification to the provided QP. Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 19 ++++++++++++++ drivers/net/ethernet/mellanox/mlx4/mcg.c | 29 ++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 10 ++++++++ .../net/ethernet/mellanox/mlx4/resource_tracker.c | 24 ++++++++++++++++++ include/linux/mlx4/cmd.h | 4 +++ 5 files changed, 86 insertions(+) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 842c8ce9494e..7e94987d030c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1080,6 +1080,25 @@ static struct mlx4_cmd_info cmd_info[] = { .verify = NULL, .wrapper = NULL }, + /* flow steering commands */ + { + .opcode = MLX4_QP_FLOW_STEERING_ATTACH, + .has_inbox = true, + .has_outbox = false, + .out_is_imm = true, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_QP_FLOW_STEERING_ATTACH_wrapper + }, + { + .opcode = MLX4_QP_FLOW_STEERING_DETACH, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_QP_FLOW_STEERING_DETACH_wrapper + }, }; static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 319c9d45d59a..3c59a33a98a5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -62,6 +62,35 @@ int mlx4_get_qp_per_mgm(struct mlx4_dev *dev) return 4 * (mlx4_get_mgm_entry_size(dev) / 16 - 2); } +static int mlx4_QP_FLOW_STEERING_ATTACH(struct mlx4_dev *dev, + struct mlx4_cmd_mailbox *mailbox, + u32 size, + u64 *reg_id) +{ + u64 imm; + int err = 0; + + err = mlx4_cmd_imm(dev, mailbox->dma, &imm, size, 0, + MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) + return err; + *reg_id = imm; + + return err; +} + +static int mlx4_QP_FLOW_STEERING_DETACH(struct mlx4_dev *dev, u64 regid) +{ + int err = 0; + + err = mlx4_cmd(dev, regid, 0, 0, + MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + + return err; +} + static int mlx4_READ_ENTRY(struct mlx4_dev *dev, int index, struct mlx4_cmd_mailbox *mailbox) { diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index a425a984758f..c07e882e8369 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1118,6 +1118,16 @@ int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); +int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); int mlx4_get_mgm_entry_size(struct mlx4_dev *dev); int mlx4_get_qp_per_mgm(struct mlx4_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 6bdac2955f8b..a8ca960f4620 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2738,6 +2738,30 @@ ex_put: return err; } +int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + return mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param, + vhcr->in_modifier, 0, + MLX4_QP_FLOW_STEERING_ATTACH, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); +} + +int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + return mlx4_cmd(dev, vhcr->in_param, 0, 0, + MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); +} + enum { BUSY_MAX_RETRIES = 10 }; diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 1f3860a8a109..260695186256 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -154,6 +154,10 @@ enum { /* set port opcode modifiers */ MLX4_SET_PORT_PRIO2TC = 0x8, MLX4_SET_PORT_SCHEDULER = 0x9, + + /* register/delete flow steering network rules */ + MLX4_QP_FLOW_STEERING_ATTACH = 0x65, + MLX4_QP_FLOW_STEERING_DETACH = 0x66, }; enum { -- cgit v1.2.3 From 0ff1fb654bec0cff62ddf81a8a8edec4263604a0 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Thu, 5 Jul 2012 04:03:46 +0000 Subject: {NET, IB}/mlx4: Add device managed flow steering firmware API The driver is modified to support three operation modes. If supported by firmware use the device managed flow steering API, that which we call device managed steering mode. Else, if the firmware supports the B0 steering mode use it, and finally, if none of the above, use the A0 steering mode. When the steering mode is device managed, the code is modified such that L2 based rules set by the mlx4_en driver for Ethernet unicast and multicast, and the IB stack multicast attach calls done through the mlx4_ib driver are all routed to use the device managed API. When attaching rule using device managed flow steering API, the firmware returns a 64 bit registration id, which is to be provided during detach. Currently the firmware is always programmed during HCA initialization to use standard L2 hashing. Future work should be done to allow configuring the flow-steering hash function with common, non proprietary means. Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 62 +++- drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 + drivers/infiniband/hw/mlx4/qp.c | 1 + drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 21 +- drivers/net/ethernet/mellanox/mlx4/fw.c | 91 ++++- drivers/net/ethernet/mellanox/mlx4/fw.h | 3 + drivers/net/ethernet/mellanox/mlx4/main.c | 56 +++- drivers/net/ethernet/mellanox/mlx4/mcg.c | 365 ++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 13 + drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 2 + drivers/net/ethernet/mellanox/mlx4/port.c | 98 ++++-- drivers/net/ethernet/mellanox/mlx4/profile.c | 12 +- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 6 + include/linux/mlx4/device.h | 108 +++++- 14 files changed, 758 insertions(+), 81 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 3530c41fcd1f..8a3a2037b005 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -718,26 +718,53 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, return ret; } +struct mlx4_ib_steering { + struct list_head list; + u64 reg_id; + union ib_gid gid; +}; + static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { int err; struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); struct mlx4_ib_qp *mqp = to_mqp(ibqp); + u64 reg_id; + struct mlx4_ib_steering *ib_steering = NULL; + + if (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL); + if (!ib_steering) + return -ENOMEM; + } - err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, - !!(mqp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), - MLX4_PROT_IB_IPV6); + err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port, + !!(mqp->flags & + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), + MLX4_PROT_IB_IPV6, ®_id); if (err) - return err; + goto err_malloc; err = add_gid_entry(ibqp, gid); if (err) goto err_add; + if (ib_steering) { + memcpy(ib_steering->gid.raw, gid->raw, 16); + ib_steering->reg_id = reg_id; + mutex_lock(&mqp->mutex); + list_add(&ib_steering->list, &mqp->steering_rules); + mutex_unlock(&mqp->mutex); + } return 0; err_add: - mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6); + mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, + MLX4_PROT_IB_IPV6, reg_id); +err_malloc: + kfree(ib_steering); + return err; } @@ -765,9 +792,30 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) u8 mac[6]; struct net_device *ndev; struct mlx4_ib_gid_entry *ge; + u64 reg_id = 0; + + if (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + struct mlx4_ib_steering *ib_steering; + + mutex_lock(&mqp->mutex); + list_for_each_entry(ib_steering, &mqp->steering_rules, list) { + if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) { + list_del(&ib_steering->list); + break; + } + } + mutex_unlock(&mqp->mutex); + if (&ib_steering->list == &mqp->steering_rules) { + pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n"); + return -EINVAL; + } + reg_id = ib_steering->reg_id; + kfree(ib_steering); + } - err = mlx4_multicast_detach(mdev->dev, - &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6); + err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, + MLX4_PROT_IB_IPV6, reg_id); if (err) return err; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index ff36655d23d3..42df4f7a6a5b 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -163,6 +163,7 @@ struct mlx4_ib_qp { u8 state; int mlx_type; struct list_head gid_list; + struct list_head steering_rules; }; struct mlx4_ib_srq { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 8d4ed24aef93..6af19f6c2b11 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -495,6 +495,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); INIT_LIST_HEAD(&qp->gid_list); + INIT_LIST_HEAD(&qp->steering_rules); qp->state = IB_QPS_RESET; if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 44ff7cdb15e5..eb5ed8e39873 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -463,7 +463,8 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) err = mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, - MLX4_PROT_ETH); + MLX4_PROT_ETH, + mclist->reg_id); if (err) en_err(priv, "Fail to detach multicast address\n"); @@ -475,11 +476,14 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) if (mclist->action == MCLIST_ADD) { /* attach the address */ memcpy(&mc_list[10], mclist->addr, ETH_ALEN); + /* needed for B0 steering support */ mc_list[5] = priv->port; err = mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, - mc_list, 0, - MLX4_PROT_ETH); + mc_list, + priv->port, 0, + MLX4_PROT_ETH, + &mclist->reg_id); if (err) en_err(priv, "Fail to attach multicast address\n"); @@ -827,9 +831,10 @@ int mlx4_en_start_port(struct net_device *dev) /* Attach rx QP to bradcast address */ memset(&mc_list[10], 0xff, ETH_ALEN); - mc_list[5] = priv->port; + mc_list[5] = priv->port; /* needed for B0 steering support */ if (mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, mc_list, - 0, MLX4_PROT_ETH)) + priv->port, 0, MLX4_PROT_ETH, + &priv->broadcast_id)) mlx4_warn(mdev, "Failed Attaching Broadcast\n"); /* Must redo promiscuous mode setup. */ @@ -886,14 +891,14 @@ void mlx4_en_stop_port(struct net_device *dev) /* Detach All multicasts */ memset(&mc_list[10], 0xff, ETH_ALEN); - mc_list[5] = priv->port; + mc_list[5] = priv->port; /* needed for B0 steering support */ mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, - MLX4_PROT_ETH); + MLX4_PROT_ETH, priv->broadcast_id); list_for_each_entry(mclist, &priv->curr_list, list) { memcpy(&mc_list[10], mclist->addr, ETH_ALEN); mc_list[5] = priv->port; mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, - mc_list, MLX4_PROT_ETH); + mc_list, MLX4_PROT_ETH, mclist->reg_id); } mlx4_en_clear_list(dev); list_for_each_entry_safe(mclist, tmp, &priv->curr_list, list) { diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 40e048bac024..1d70657058a5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -123,7 +123,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) static const char * const fname[] = { [0] = "RSS support", [1] = "RSS Toeplitz Hash Function support", - [2] = "RSS XOR Hash Function support" + [2] = "RSS XOR Hash Function support", + [3] = "Device manage flow steering support" }; int i; @@ -391,6 +392,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_RSVD_XRC_OFFSET 0x66 #define QUERY_DEV_CAP_MAX_XRC_OFFSET 0x67 #define QUERY_DEV_CAP_MAX_COUNTERS_OFFSET 0x68 +#define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76 +#define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77 #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80 #define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET 0x82 #define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET 0x84 @@ -474,6 +477,12 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->num_ports = field & 0xf; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET); dev_cap->max_msg_sz = 1 << (field & 0x1f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET); + if (field & 0x80) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN; + dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f; + MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET); + dev_cap->fs_max_num_qp_per_entry = field; MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET); dev_cap->stat_rate_support = stat_rate; MLX4_GET(ext_flags, outbox, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); @@ -1061,6 +1070,15 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) #define INIT_HCA_LOG_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x16) #define INIT_HCA_UC_STEERING_OFFSET (INIT_HCA_MCAST_OFFSET + 0x18) #define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b) +#define INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN 0x6 +#define INIT_HCA_FS_PARAM_OFFSET 0x1d0 +#define INIT_HCA_FS_BASE_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x00) +#define INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x12) +#define INIT_HCA_FS_LOG_TABLE_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x1b) +#define INIT_HCA_FS_ETH_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x21) +#define INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x22) +#define INIT_HCA_FS_IB_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x25) +#define INIT_HCA_FS_IB_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x26) #define INIT_HCA_TPT_OFFSET 0x0f0 #define INIT_HCA_DMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x00) #define INIT_HCA_LOG_MPT_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x0b) @@ -1119,14 +1137,44 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) MLX4_PUT(inbox, param->rdmarc_base, INIT_HCA_RDMARC_BASE_OFFSET); MLX4_PUT(inbox, param->log_rd_per_qp, INIT_HCA_LOG_RD_OFFSET); - /* multicast attributes */ - - MLX4_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET); - MLX4_PUT(inbox, param->log_mc_entry_sz, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); - MLX4_PUT(inbox, param->log_mc_hash_sz, INIT_HCA_LOG_MC_HASH_SZ_OFFSET); - if (dev->caps.steering_mode == MLX4_STEERING_MODE_B0) - MLX4_PUT(inbox, (u8) (1 << 3), INIT_HCA_UC_STEERING_OFFSET); - MLX4_PUT(inbox, param->log_mc_table_sz, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); + /* steering attributes */ + if (dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= + cpu_to_be32(1 << + INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN); + + MLX4_PUT(inbox, param->mc_base, INIT_HCA_FS_BASE_OFFSET); + MLX4_PUT(inbox, param->log_mc_entry_sz, + INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); + MLX4_PUT(inbox, param->log_mc_table_sz, + INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); + /* Enable Ethernet flow steering + * with udp unicast and tcp unicast + */ + MLX4_PUT(inbox, param->fs_hash_enable_bits, + INIT_HCA_FS_ETH_BITS_OFFSET); + MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR, + INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET); + /* Enable IPoIB flow steering + * with udp unicast and tcp unicast + */ + MLX4_PUT(inbox, param->fs_hash_enable_bits, + INIT_HCA_FS_IB_BITS_OFFSET); + MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR, + INIT_HCA_FS_IB_NUM_ADDRS_OFFSET); + } else { + MLX4_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET); + MLX4_PUT(inbox, param->log_mc_entry_sz, + INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); + MLX4_PUT(inbox, param->log_mc_hash_sz, + INIT_HCA_LOG_MC_HASH_SZ_OFFSET); + MLX4_PUT(inbox, param->log_mc_table_sz, + INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); + if (dev->caps.steering_mode == MLX4_STEERING_MODE_B0) + MLX4_PUT(inbox, (u8) (1 << 3), + INIT_HCA_UC_STEERING_OFFSET); + } /* TPT attributes */ @@ -1188,15 +1236,24 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET); MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET); - /* multicast attributes */ + /* steering attributes */ + if (dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { - MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET); - MLX4_GET(param->log_mc_entry_sz, outbox, - INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); - MLX4_GET(param->log_mc_hash_sz, outbox, - INIT_HCA_LOG_MC_HASH_SZ_OFFSET); - MLX4_GET(param->log_mc_table_sz, outbox, - INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); + MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET); + MLX4_GET(param->log_mc_entry_sz, outbox, + INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); + MLX4_GET(param->log_mc_table_sz, outbox, + INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); + } else { + MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET); + MLX4_GET(param->log_mc_entry_sz, outbox, + INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); + MLX4_GET(param->log_mc_hash_sz, outbox, + INIT_HCA_LOG_MC_HASH_SZ_OFFSET); + MLX4_GET(param->log_mc_table_sz, outbox, + INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); + } /* TPT attributes */ diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 64c0399e4b78..83fcbbf1b169 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -78,6 +78,8 @@ struct mlx4_dev_cap { u16 wavelength[MLX4_MAX_PORTS + 1]; u64 trans_code[MLX4_MAX_PORTS + 1]; u16 stat_rate_support; + int fs_log_max_ucast_qp_range_size; + int fs_max_num_qp_per_entry; u64 flags; u64 flags2; int reserved_uars; @@ -165,6 +167,7 @@ struct mlx4_init_hca_param { u8 log_mpt_sz; u8 log_uar_sz; u8 uar_page_sz; /* log pg sz in 4k chunks */ + u8 fs_hash_enable_bits; }; struct mlx4_init_ib_param { diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index f8125a82c0cb..42645166bae2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -91,7 +91,9 @@ module_param_named(log_num_mgm_entry_size, MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" " of qp per mcg, for example:" " 10 gives 248.range: 9<=" - " log_num_mgm_entry_size <= 12"); + " log_num_mgm_entry_size <= 12." + " Not in use with device managed" + " flow steering"); #define MLX4_VF (1 << 0) @@ -274,20 +276,27 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.max_gso_sz = dev_cap->max_gso_sz; dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && - dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) { - dev->caps.steering_mode = MLX4_STEERING_MODE_B0; + if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) { + dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; + dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; + dev->caps.fs_log_max_ucast_qp_range_size = + dev_cap->fs_log_max_ucast_qp_range_size; } else { - dev->caps.steering_mode = MLX4_STEERING_MODE_A0; + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && + dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) { + dev->caps.steering_mode = MLX4_STEERING_MODE_B0; + } else { + dev->caps.steering_mode = MLX4_STEERING_MODE_A0; - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || - dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) - mlx4_warn(dev, "Must have UC_STEER and MC_STEER flags " - "set to use B0 steering. Falling back to A0 steering mode.\n"); + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || + dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) + mlx4_warn(dev, "Must have UC_STEER and MC_STEER flags " + "set to use B0 steering. Falling back to A0 steering mode.\n"); + } + dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); } mlx4_dbg(dev, "Steering mode is: %s\n", mlx4_steering_mode_str(dev->caps.steering_mode)); - dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); /* Sense port always allowed on supported devices for ConnectX1 and 2 */ if (dev->pdev->device != 0x1003) @@ -982,9 +991,11 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, } /* - * It's not strictly required, but for simplicity just map the - * whole multicast group table now. The table isn't very big - * and it's a lot easier than trying to track ref counts. + * For flow steering device managed mode it is required to use + * mlx4_init_icm_table. For B0 steering mode it's not strictly + * required, but for simplicity just map the whole multicast + * group table now. The table isn't very big and it's a lot + * easier than trying to track ref counts. */ err = mlx4_init_icm_table(dev, &priv->mcg_table.table, init_hca->mc_base, @@ -1220,7 +1231,26 @@ static int mlx4_init_hca(struct mlx4_dev *dev) goto err_stop_fw; } + priv->fs_hash_mode = MLX4_FS_L2_HASH; + + switch (priv->fs_hash_mode) { + case MLX4_FS_L2_HASH: + init_hca.fs_hash_enable_bits = 0; + break; + + case MLX4_FS_L2_L3_L4_HASH: + /* Enable flow steering with + * udp unicast and tcp unicast + */ + init_hca.fs_hash_enable_bits = + MLX4_FS_UDP_UC_EN | MLX4_FS_TCP_UC_EN; + break; + } + profile = default_profile; + if (dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) + profile.num_mcg = MLX4_FS_NUM_MCG; icm_size = mlx4_make_profile(dev, &profile, &dev_cap, &init_hca); diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 3c59a33a98a5..768a2a4530e8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -41,6 +41,7 @@ #define MGM_QPN_MASK 0x00FFFFFF #define MGM_BLCK_LB_BIT 30 +#define MLX4_MAC_MASK 0xffffffffffffULL static const u8 zero_gid[16]; /* automatically initialized to 0 */ @@ -54,7 +55,12 @@ struct mlx4_mgm { int mlx4_get_mgm_entry_size(struct mlx4_dev *dev) { - return min((1 << mlx4_log_num_mgm_entry_size), MLX4_MAX_MGM_ENTRY_SIZE); + if (dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) + return 1 << MLX4_FS_MGM_LOG_ENTRY_SIZE; + else + return min((1 << mlx4_log_num_mgm_entry_size), + MLX4_MAX_MGM_ENTRY_SIZE); } int mlx4_get_qp_per_mgm(struct mlx4_dev *dev) @@ -643,6 +649,311 @@ static int find_entry(struct mlx4_dev *dev, u8 port, return err; } +struct mlx4_net_trans_rule_hw_ctrl { + __be32 ctrl; + __be32 vf_vep_port; + __be32 qpn; + __be32 reserved; +}; + +static void trans_rule_ctrl_to_hw(struct mlx4_net_trans_rule *ctrl, + struct mlx4_net_trans_rule_hw_ctrl *hw) +{ + static const u8 __promisc_mode[] = { + [MLX4_FS_PROMISC_NONE] = 0x0, + [MLX4_FS_PROMISC_UPLINK] = 0x1, + [MLX4_FS_PROMISC_FUNCTION_PORT] = 0x2, + [MLX4_FS_PROMISC_ALL_MULTI] = 0x3, + }; + + u32 dw = 0; + + dw = ctrl->queue_mode == MLX4_NET_TRANS_Q_LIFO ? 1 : 0; + dw |= ctrl->exclusive ? (1 << 2) : 0; + dw |= ctrl->allow_loopback ? (1 << 3) : 0; + dw |= __promisc_mode[ctrl->promisc_mode] << 8; + dw |= ctrl->priority << 16; + + hw->ctrl = cpu_to_be32(dw); + hw->vf_vep_port = cpu_to_be32(ctrl->port); + hw->qpn = cpu_to_be32(ctrl->qpn); +} + +struct mlx4_net_trans_rule_hw_ib { + u8 size; + u8 rsvd1; + __be16 id; + u32 rsvd2; + __be32 qpn; + __be32 qpn_mask; + u8 dst_gid[16]; + u8 dst_gid_msk[16]; +} __packed; + +struct mlx4_net_trans_rule_hw_eth { + u8 size; + u8 rsvd; + __be16 id; + u8 rsvd1[6]; + u8 dst_mac[6]; + u16 rsvd2; + u8 dst_mac_msk[6]; + u16 rsvd3; + u8 src_mac[6]; + u16 rsvd4; + u8 src_mac_msk[6]; + u8 rsvd5; + u8 ether_type_enable; + __be16 ether_type; + __be16 vlan_id_msk; + __be16 vlan_id; +} __packed; + +struct mlx4_net_trans_rule_hw_tcp_udp { + u8 size; + u8 rsvd; + __be16 id; + __be16 rsvd1[3]; + __be16 dst_port; + __be16 rsvd2; + __be16 dst_port_msk; + __be16 rsvd3; + __be16 src_port; + __be16 rsvd4; + __be16 src_port_msk; +} __packed; + +struct mlx4_net_trans_rule_hw_ipv4 { + u8 size; + u8 rsvd; + __be16 id; + __be32 rsvd1; + __be32 dst_ip; + __be32 dst_ip_msk; + __be32 src_ip; + __be32 src_ip_msk; +} __packed; + +struct _rule_hw { + union { + struct { + u8 size; + u8 rsvd; + __be16 id; + }; + struct mlx4_net_trans_rule_hw_eth eth; + struct mlx4_net_trans_rule_hw_ib ib; + struct mlx4_net_trans_rule_hw_ipv4 ipv4; + struct mlx4_net_trans_rule_hw_tcp_udp tcp_udp; + }; +}; + +static int parse_trans_rule(struct mlx4_dev *dev, struct mlx4_spec_list *spec, + struct _rule_hw *rule_hw) +{ + static const u16 __sw_id_hw[] = { + [MLX4_NET_TRANS_RULE_ID_ETH] = 0xE001, + [MLX4_NET_TRANS_RULE_ID_IB] = 0xE005, + [MLX4_NET_TRANS_RULE_ID_IPV6] = 0xE003, + [MLX4_NET_TRANS_RULE_ID_IPV4] = 0xE002, + [MLX4_NET_TRANS_RULE_ID_TCP] = 0xE004, + [MLX4_NET_TRANS_RULE_ID_UDP] = 0xE006 + }; + + static const size_t __rule_hw_sz[] = { + [MLX4_NET_TRANS_RULE_ID_ETH] = + sizeof(struct mlx4_net_trans_rule_hw_eth), + [MLX4_NET_TRANS_RULE_ID_IB] = + sizeof(struct mlx4_net_trans_rule_hw_ib), + [MLX4_NET_TRANS_RULE_ID_IPV6] = 0, + [MLX4_NET_TRANS_RULE_ID_IPV4] = + sizeof(struct mlx4_net_trans_rule_hw_ipv4), + [MLX4_NET_TRANS_RULE_ID_TCP] = + sizeof(struct mlx4_net_trans_rule_hw_tcp_udp), + [MLX4_NET_TRANS_RULE_ID_UDP] = + sizeof(struct mlx4_net_trans_rule_hw_tcp_udp) + }; + if (spec->id > MLX4_NET_TRANS_RULE_NUM) { + mlx4_err(dev, "Invalid network rule id. id = %d\n", spec->id); + return -EINVAL; + } + memset(rule_hw, 0, __rule_hw_sz[spec->id]); + rule_hw->id = cpu_to_be16(__sw_id_hw[spec->id]); + rule_hw->size = __rule_hw_sz[spec->id] >> 2; + + switch (spec->id) { + case MLX4_NET_TRANS_RULE_ID_ETH: + memcpy(rule_hw->eth.dst_mac, spec->eth.dst_mac, ETH_ALEN); + memcpy(rule_hw->eth.dst_mac_msk, spec->eth.dst_mac_msk, + ETH_ALEN); + memcpy(rule_hw->eth.src_mac, spec->eth.src_mac, ETH_ALEN); + memcpy(rule_hw->eth.src_mac_msk, spec->eth.src_mac_msk, + ETH_ALEN); + if (spec->eth.ether_type_enable) { + rule_hw->eth.ether_type_enable = 1; + rule_hw->eth.ether_type = spec->eth.ether_type; + } + rule_hw->eth.vlan_id = spec->eth.vlan_id; + rule_hw->eth.vlan_id_msk = spec->eth.vlan_id_msk; + break; + + case MLX4_NET_TRANS_RULE_ID_IB: + rule_hw->ib.qpn = spec->ib.r_qpn; + rule_hw->ib.qpn_mask = spec->ib.qpn_msk; + memcpy(&rule_hw->ib.dst_gid, &spec->ib.dst_gid, 16); + memcpy(&rule_hw->ib.dst_gid_msk, &spec->ib.dst_gid_msk, 16); + break; + + case MLX4_NET_TRANS_RULE_ID_IPV6: + return -EOPNOTSUPP; + + case MLX4_NET_TRANS_RULE_ID_IPV4: + rule_hw->ipv4.src_ip = spec->ipv4.src_ip; + rule_hw->ipv4.src_ip_msk = spec->ipv4.src_ip_msk; + rule_hw->ipv4.dst_ip = spec->ipv4.dst_ip; + rule_hw->ipv4.dst_ip_msk = spec->ipv4.dst_ip_msk; + break; + + case MLX4_NET_TRANS_RULE_ID_TCP: + case MLX4_NET_TRANS_RULE_ID_UDP: + rule_hw->tcp_udp.dst_port = spec->tcp_udp.dst_port; + rule_hw->tcp_udp.dst_port_msk = spec->tcp_udp.dst_port_msk; + rule_hw->tcp_udp.src_port = spec->tcp_udp.src_port; + rule_hw->tcp_udp.src_port_msk = spec->tcp_udp.src_port_msk; + break; + + default: + return -EINVAL; + } + + return __rule_hw_sz[spec->id]; +} + +static void mlx4_err_rule(struct mlx4_dev *dev, char *str, + struct mlx4_net_trans_rule *rule) +{ +#define BUF_SIZE 256 + struct mlx4_spec_list *cur; + char buf[BUF_SIZE]; + int len = 0; + + mlx4_err(dev, "%s", str); + len += snprintf(buf + len, BUF_SIZE - len, + "port = %d prio = 0x%x qp = 0x%x ", + rule->port, rule->priority, rule->qpn); + + list_for_each_entry(cur, &rule->list, list) { + switch (cur->id) { + case MLX4_NET_TRANS_RULE_ID_ETH: + len += snprintf(buf + len, BUF_SIZE - len, + "dmac = %pM ", &cur->eth.dst_mac); + if (cur->eth.ether_type) + len += snprintf(buf + len, BUF_SIZE - len, + "ethertype = 0x%x ", + be16_to_cpu(cur->eth.ether_type)); + if (cur->eth.vlan_id) + len += snprintf(buf + len, BUF_SIZE - len, + "vlan-id = %d ", + be16_to_cpu(cur->eth.vlan_id)); + break; + + case MLX4_NET_TRANS_RULE_ID_IPV4: + if (cur->ipv4.src_ip) + len += snprintf(buf + len, BUF_SIZE - len, + "src-ip = %pI4 ", + &cur->ipv4.src_ip); + if (cur->ipv4.dst_ip) + len += snprintf(buf + len, BUF_SIZE - len, + "dst-ip = %pI4 ", + &cur->ipv4.dst_ip); + break; + + case MLX4_NET_TRANS_RULE_ID_TCP: + case MLX4_NET_TRANS_RULE_ID_UDP: + if (cur->tcp_udp.src_port) + len += snprintf(buf + len, BUF_SIZE - len, + "src-port = %d ", + be16_to_cpu(cur->tcp_udp.src_port)); + if (cur->tcp_udp.dst_port) + len += snprintf(buf + len, BUF_SIZE - len, + "dst-port = %d ", + be16_to_cpu(cur->tcp_udp.dst_port)); + break; + + case MLX4_NET_TRANS_RULE_ID_IB: + len += snprintf(buf + len, BUF_SIZE - len, + "dst-gid = %pI6\n", cur->ib.dst_gid); + len += snprintf(buf + len, BUF_SIZE - len, + "dst-gid-mask = %pI6\n", + cur->ib.dst_gid_msk); + break; + + case MLX4_NET_TRANS_RULE_ID_IPV6: + break; + + default: + break; + } + } + len += snprintf(buf + len, BUF_SIZE - len, "\n"); + mlx4_err(dev, "%s", buf); + + if (len >= BUF_SIZE) + mlx4_err(dev, "Network rule error message was truncated, print buffer is too small.\n"); +} + +int mlx4_flow_attach(struct mlx4_dev *dev, + struct mlx4_net_trans_rule *rule, u64 *reg_id) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_spec_list *cur; + u32 size = 0; + int ret; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memset(mailbox->buf, 0, sizeof(struct mlx4_net_trans_rule_hw_ctrl)); + trans_rule_ctrl_to_hw(rule, mailbox->buf); + + size += sizeof(struct mlx4_net_trans_rule_hw_ctrl); + + list_for_each_entry(cur, &rule->list, list) { + ret = parse_trans_rule(dev, cur, mailbox->buf + size); + if (ret < 0) { + mlx4_free_cmd_mailbox(dev, mailbox); + return -EINVAL; + } + size += ret; + } + + ret = mlx4_QP_FLOW_STEERING_ATTACH(dev, mailbox, size >> 2, reg_id); + if (ret == -ENOMEM) + mlx4_err_rule(dev, + "mcg table is full. Fail to register network rule.\n", + rule); + else if (ret) + mlx4_err_rule(dev, "Fail to register network rule.\n", rule); + + mlx4_free_cmd_mailbox(dev, mailbox); + + return ret; +} +EXPORT_SYMBOL_GPL(mlx4_flow_attach); + +int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id) +{ + int err; + + err = mlx4_QP_FLOW_STEERING_DETACH(dev, reg_id); + if (err) + mlx4_err(dev, "Fail to detach network rule. registration id = 0x%llx\n", + reg_id); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_flow_detach); + int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int block_mcast_loopback, enum mlx4_protocol prot, enum mlx4_steer_type steer) @@ -895,7 +1206,8 @@ static int mlx4_QP_ATTACH(struct mlx4_dev *dev, struct mlx4_qp *qp, } int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], - int block_mcast_loopback, enum mlx4_protocol prot) + u8 port, int block_mcast_loopback, + enum mlx4_protocol prot, u64 *reg_id) { switch (dev->caps.steering_mode) { @@ -914,6 +1226,42 @@ int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], block_mcast_loopback, prot, MLX4_MC_STEER); + case MLX4_STEERING_MODE_DEVICE_MANAGED: { + struct mlx4_spec_list spec = { {NULL} }; + __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); + + struct mlx4_net_trans_rule rule = { + .queue_mode = MLX4_NET_TRANS_Q_FIFO, + .exclusive = 0, + .promisc_mode = MLX4_FS_PROMISC_NONE, + .priority = MLX4_DOMAIN_NIC, + }; + + rule.allow_loopback = ~block_mcast_loopback; + rule.port = port; + rule.qpn = qp->qpn; + INIT_LIST_HEAD(&rule.list); + + switch (prot) { + case MLX4_PROT_ETH: + spec.id = MLX4_NET_TRANS_RULE_ID_ETH; + memcpy(spec.eth.dst_mac, &gid[10], ETH_ALEN); + memcpy(spec.eth.dst_mac_msk, &mac_mask, ETH_ALEN); + break; + + case MLX4_PROT_IB_IPV6: + spec.id = MLX4_NET_TRANS_RULE_ID_IB; + memcpy(spec.ib.dst_gid, gid, 16); + memset(&spec.ib.dst_gid_msk, 0xff, 16); + break; + default: + return -EINVAL; + } + list_add_tail(&spec.list, &rule.list); + + return mlx4_flow_attach(dev, &rule, reg_id); + } + default: return -EINVAL; } @@ -921,7 +1269,7 @@ int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], EXPORT_SYMBOL_GPL(mlx4_multicast_attach); int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], - enum mlx4_protocol prot) + enum mlx4_protocol prot, u64 reg_id) { switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_A0: @@ -938,6 +1286,9 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], return mlx4_qp_detach_common(dev, qp, gid, prot, MLX4_MC_STEER); + case MLX4_STEERING_MODE_DEVICE_MANAGED: + return mlx4_flow_detach(dev, reg_id); + default: return -EINVAL; } @@ -1042,6 +1393,10 @@ int mlx4_init_mcg_table(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); int err; + /* No need for mcg_table when fw managed the mcg table*/ + if (dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) + return 0; err = mlx4_bitmap_init(&priv->mcg_table.bitmap, dev->caps.num_amgms, dev->caps.num_amgms - 1, 0, 0); if (err) @@ -1054,5 +1409,7 @@ int mlx4_init_mcg_table(struct mlx4_dev *dev) void mlx4_cleanup_mcg_table(struct mlx4_dev *dev) { - mlx4_bitmap_cleanup(&mlx4_priv(dev)->mcg_table.bitmap); + if (dev->caps.steering_mode != + MLX4_STEERING_MODE_DEVICE_MANAGED) + mlx4_bitmap_cleanup(&mlx4_priv(dev)->mcg_table.bitmap); } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index c07e882e8369..0084967be19e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -54,6 +54,17 @@ #define DRV_VERSION "1.1" #define DRV_RELDATE "Dec, 2011" +#define MLX4_FS_UDP_UC_EN (1 << 1) +#define MLX4_FS_TCP_UC_EN (1 << 2) +#define MLX4_FS_NUM_OF_L2_ADDR 8 +#define MLX4_FS_MGM_LOG_ENTRY_SIZE 7 +#define MLX4_FS_NUM_MCG (1 << 17) + +enum { + MLX4_FS_L2_HASH = 0, + MLX4_FS_L2_L3_L4_HASH, +}; + #define MLX4_NUM_UP 8 #define MLX4_NUM_TC 8 #define MLX4_RATELIMIT_UNITS 3 /* 100 Mbps */ @@ -704,6 +715,7 @@ struct mlx4_set_port_rqp_calc_context { struct mlx4_mac_entry { u64 mac; + u64 reg_id; }; struct mlx4_port_info { @@ -777,6 +789,7 @@ struct mlx4_priv { struct mutex bf_mutex; struct io_mapping *bf_mapping; int reserved_mtts; + int fs_hash_mode; }; static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 1bb00cd22d42..2d6dabe7f55d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -414,6 +414,7 @@ struct mlx4_en_mc_list { struct list_head list; enum mlx4_en_mclist_act action; u8 addr[ETH_ALEN]; + u64 reg_id; }; struct mlx4_en_frag_info { @@ -503,6 +504,7 @@ struct mlx4_en_priv { u64 stats_bitmap; struct list_head mc_list; struct list_head curr_list; + u64 broadcast_id; struct mlx4_en_stat_out_mbox hw_stats; int vids[128]; bool wol; diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 58de7237f57a..a51d1b9bf1d1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -75,21 +75,54 @@ void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table) table->total = 0; } -static int mlx4_uc_steer_add(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn) +static int mlx4_uc_steer_add(struct mlx4_dev *dev, u8 port, + u64 mac, int *qpn, u64 *reg_id) { - struct mlx4_qp qp; - u8 gid[16] = {0}; __be64 be_mac; int err; - qp.qpn = *qpn; - - mac &= 0xffffffffffffULL; + mac &= MLX4_MAC_MASK; be_mac = cpu_to_be64(mac << 16); - memcpy(&gid[10], &be_mac, ETH_ALEN); - gid[5] = port; - err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH); + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_B0: { + struct mlx4_qp qp; + u8 gid[16] = {0}; + + qp.qpn = *qpn; + memcpy(&gid[10], &be_mac, ETH_ALEN); + gid[5] = port; + + err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH); + break; + } + case MLX4_STEERING_MODE_DEVICE_MANAGED: { + struct mlx4_spec_list spec_eth = { {NULL} }; + __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); + + struct mlx4_net_trans_rule rule = { + .queue_mode = MLX4_NET_TRANS_Q_FIFO, + .exclusive = 0, + .allow_loopback = 1, + .promisc_mode = MLX4_FS_PROMISC_NONE, + .priority = MLX4_DOMAIN_NIC, + }; + + rule.port = port; + rule.qpn = *qpn; + INIT_LIST_HEAD(&rule.list); + + spec_eth.id = MLX4_NET_TRANS_RULE_ID_ETH; + memcpy(spec_eth.eth.dst_mac, &be_mac, ETH_ALEN); + memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN); + list_add_tail(&spec_eth.list, &rule.list); + + err = mlx4_flow_attach(dev, &rule, reg_id); + break; + } + default: + return -EINVAL; + } if (err) mlx4_warn(dev, "Failed Attaching Unicast\n"); @@ -97,19 +130,30 @@ static int mlx4_uc_steer_add(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn) } static void mlx4_uc_steer_release(struct mlx4_dev *dev, u8 port, - u64 mac, int qpn) + u64 mac, int qpn, u64 reg_id) { - struct mlx4_qp qp; - u8 gid[16] = {0}; - __be64 be_mac; + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_B0: { + struct mlx4_qp qp; + u8 gid[16] = {0}; + __be64 be_mac; - qp.qpn = qpn; - mac &= 0xffffffffffffULL; - be_mac = cpu_to_be64(mac << 16); - memcpy(&gid[10], &be_mac, ETH_ALEN); - gid[5] = port; + qp.qpn = qpn; + mac &= MLX4_MAC_MASK; + be_mac = cpu_to_be64(mac << 16); + memcpy(&gid[10], &be_mac, ETH_ALEN); + gid[5] = port; - mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH); + mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH); + break; + } + case MLX4_STEERING_MODE_DEVICE_MANAGED: { + mlx4_flow_detach(dev, reg_id); + break; + } + default: + mlx4_err(dev, "Invalid steering mode.\n"); + } } static int validate_index(struct mlx4_dev *dev, @@ -144,6 +188,7 @@ int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn) struct mlx4_mac_entry *entry; int index = 0; int err = 0; + u64 reg_id; mlx4_dbg(dev, "Registering MAC: 0x%llx for adding\n", (unsigned long long) mac); @@ -167,7 +212,7 @@ int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn) goto qp_err; } - err = mlx4_uc_steer_add(dev, port, mac, qpn); + err = mlx4_uc_steer_add(dev, port, mac, qpn, ®_id); if (err) goto steer_err; @@ -177,6 +222,7 @@ int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn) goto alloc_err; } entry->mac = mac; + entry->reg_id = reg_id; err = radix_tree_insert(&info->mac_tree, *qpn, entry); if (err) goto insert_err; @@ -186,7 +232,7 @@ insert_err: kfree(entry); alloc_err: - mlx4_uc_steer_release(dev, port, mac, *qpn); + mlx4_uc_steer_release(dev, port, mac, *qpn, reg_id); steer_err: mlx4_qp_release_range(dev, *qpn, 1); @@ -212,7 +258,8 @@ void mlx4_put_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int qpn) mlx4_dbg(dev, "Releasing qp: port %d, mac 0x%llx," " qpn %d\n", port, (unsigned long long) mac, qpn); - mlx4_uc_steer_release(dev, port, entry->mac, qpn); + mlx4_uc_steer_release(dev, port, entry->mac, + qpn, entry->reg_id); mlx4_qp_release_range(dev, qpn, 1); radix_tree_delete(&info->mac_tree, qpn); kfree(entry); @@ -363,11 +410,14 @@ int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac) entry = radix_tree_lookup(&info->mac_tree, qpn); if (!entry) return -EINVAL; - mlx4_uc_steer_release(dev, port, entry->mac, qpn); + mlx4_uc_steer_release(dev, port, entry->mac, + qpn, entry->reg_id); mlx4_unregister_mac(dev, port, entry->mac); entry->mac = new_mac; + entry->reg_id = 0; mlx4_register_mac(dev, port, new_mac); - err = mlx4_uc_steer_add(dev, port, entry->mac, &qpn); + err = mlx4_uc_steer_add(dev, port, entry->mac, + &qpn, &entry->reg_id); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/profile.c b/drivers/net/ethernet/mellanox/mlx4/profile.c index b83bc928d52a..9ee4725363d5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/profile.c +++ b/drivers/net/ethernet/mellanox/mlx4/profile.c @@ -237,13 +237,19 @@ u64 mlx4_make_profile(struct mlx4_dev *dev, init_hca->mtt_base = profile[i].start; break; case MLX4_RES_MCG: - dev->caps.num_mgms = profile[i].num >> 1; - dev->caps.num_amgms = profile[i].num >> 1; init_hca->mc_base = profile[i].start; init_hca->log_mc_entry_sz = ilog2(mlx4_get_mgm_entry_size(dev)); init_hca->log_mc_table_sz = profile[i].log_num; - init_hca->log_mc_hash_sz = profile[i].log_num - 1; + if (dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + dev->caps.num_mgms = profile[i].num; + } else { + init_hca->log_mc_hash_sz = + profile[i].log_num - 1; + dev->caps.num_mgms = profile[i].num >> 1; + dev->caps.num_amgms = profile[i].num >> 1; + } break; default: break; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index a8ca960f4620..5a6f3555d806 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2744,6 +2744,9 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { + if (dev->caps.steering_mode != + MLX4_STEERING_MODE_DEVICE_MANAGED) + return -EOPNOTSUPP; return mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param, vhcr->in_modifier, 0, MLX4_QP_FLOW_STEERING_ATTACH, @@ -2757,6 +2760,9 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { + if (dev->caps.steering_mode != + MLX4_STEERING_MODE_DEVICE_MANAGED) + return -EOPNOTSUPP; return mlx4_cmd(dev, vhcr->in_param, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 7f5c9ee42f96..e45fc20bd01f 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -70,14 +70,17 @@ enum { MLX4_MFUNC_EQE_MASK = (MLX4_MFUNC_MAX_EQES - 1) }; -/* Driver supports 2 diffrent device methods to manage traffic steering: +/* Driver supports 3 diffrent device methods to manage traffic steering: + * -device managed - High level API for ib and eth flow steering. FW is + * managing flow steering tables. * - B0 steering mode - Common low level API for ib and (if supported) eth. * - A0 steering mode - Limited low level API for eth. In case of IB, * B0 mode is in use. */ enum { MLX4_STEERING_MODE_A0, - MLX4_STEERING_MODE_B0 + MLX4_STEERING_MODE_B0, + MLX4_STEERING_MODE_DEVICE_MANAGED }; static inline const char *mlx4_steering_mode_str(int steering_mode) @@ -88,6 +91,10 @@ static inline const char *mlx4_steering_mode_str(int steering_mode) case MLX4_STEERING_MODE_B0: return "B0 steering"; + + case MLX4_STEERING_MODE_DEVICE_MANAGED: + return "Device managed flow steering"; + default: return "Unrecognize steering mode"; } @@ -125,7 +132,8 @@ enum { enum { MLX4_DEV_CAP_FLAG2_RSS = 1LL << 0, MLX4_DEV_CAP_FLAG2_RSS_TOP = 1LL << 1, - MLX4_DEV_CAP_FLAG2_RSS_XOR = 1LL << 2 + MLX4_DEV_CAP_FLAG2_RSS_XOR = 1LL << 2, + MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3 }; #define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) @@ -319,6 +327,7 @@ struct mlx4_caps { int reserved_mcgs; int num_qp_per_mgm; int steering_mode; + int fs_log_max_ucast_qp_range_size; int num_pds; int reserved_pds; int max_xrcds; @@ -647,9 +656,94 @@ int mlx4_unicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], enum mlx4_protocol prot); int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], - int block_mcast_loopback, enum mlx4_protocol protocol); + u8 port, int block_mcast_loopback, + enum mlx4_protocol protocol, u64 *reg_id); int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], - enum mlx4_protocol protocol); + enum mlx4_protocol protocol, u64 reg_id); + +enum { + MLX4_DOMAIN_UVERBS = 0x1000, + MLX4_DOMAIN_ETHTOOL = 0x2000, + MLX4_DOMAIN_RFS = 0x3000, + MLX4_DOMAIN_NIC = 0x5000, +}; + +enum mlx4_net_trans_rule_id { + MLX4_NET_TRANS_RULE_ID_ETH = 0, + MLX4_NET_TRANS_RULE_ID_IB, + MLX4_NET_TRANS_RULE_ID_IPV6, + MLX4_NET_TRANS_RULE_ID_IPV4, + MLX4_NET_TRANS_RULE_ID_TCP, + MLX4_NET_TRANS_RULE_ID_UDP, + MLX4_NET_TRANS_RULE_NUM, /* should be last */ +}; + +enum mlx4_net_trans_promisc_mode { + MLX4_FS_PROMISC_NONE = 0, + MLX4_FS_PROMISC_UPLINK, + MLX4_FS_PROMISC_FUNCTION_PORT, + MLX4_FS_PROMISC_ALL_MULTI, +}; + +struct mlx4_spec_eth { + u8 dst_mac[6]; + u8 dst_mac_msk[6]; + u8 src_mac[6]; + u8 src_mac_msk[6]; + u8 ether_type_enable; + __be16 ether_type; + __be16 vlan_id_msk; + __be16 vlan_id; +}; + +struct mlx4_spec_tcp_udp { + __be16 dst_port; + __be16 dst_port_msk; + __be16 src_port; + __be16 src_port_msk; +}; + +struct mlx4_spec_ipv4 { + __be32 dst_ip; + __be32 dst_ip_msk; + __be32 src_ip; + __be32 src_ip_msk; +}; + +struct mlx4_spec_ib { + __be32 r_qpn; + __be32 qpn_msk; + u8 dst_gid[16]; + u8 dst_gid_msk[16]; +}; + +struct mlx4_spec_list { + struct list_head list; + enum mlx4_net_trans_rule_id id; + union { + struct mlx4_spec_eth eth; + struct mlx4_spec_ib ib; + struct mlx4_spec_ipv4 ipv4; + struct mlx4_spec_tcp_udp tcp_udp; + }; +}; + +enum mlx4_net_trans_hw_rule_queue { + MLX4_NET_TRANS_Q_FIFO, + MLX4_NET_TRANS_Q_LIFO, +}; + +struct mlx4_net_trans_rule { + struct list_head list; + enum mlx4_net_trans_hw_rule_queue queue_mode; + bool exclusive; + bool allow_loopback; + enum mlx4_net_trans_promisc_mode promisc_mode; + u8 port; + u16 priority; + u32 qpn; +}; + int mlx4_multicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port); int mlx4_multicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port); int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port); @@ -692,4 +786,8 @@ int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port); int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx); void mlx4_counter_free(struct mlx4_dev *dev, u32 idx); +int mlx4_flow_attach(struct mlx4_dev *dev, + struct mlx4_net_trans_rule *rule, u64 *reg_id); +int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id); + #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3 From 1b9c6b064ef4fbc6f55485a8b5aab7ce889592c2 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Thu, 5 Jul 2012 04:03:47 +0000 Subject: net/mlx4_core: Add resource tracking for device managed flow steering rules As with other device resources, the resource tracker is needed for supporting device managed flow steering rules under SRIOV: make sure virtual functions delete only rules created by them, and clean all rules attached by a crashed VF. Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 1 + .../net/ethernet/mellanox/mlx4/resource_tracker.c | 132 +++++++++++++++++++-- 2 files changed, 125 insertions(+), 8 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 0084967be19e..d2c436b10fbf 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -149,6 +149,7 @@ enum mlx4_resource { RES_VLAN, RES_EQ, RES_COUNTER, + RES_FS_RULE, MLX4_NUM_OF_RESOURCE_TYPE }; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 5a6f3555d806..c3fa91986190 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -190,6 +190,15 @@ struct res_xrcdn { int port; }; +enum res_fs_rule_states { + RES_FS_RULE_BUSY = RES_ANY_BUSY, + RES_FS_RULE_ALLOCATED, +}; + +struct res_fs_rule { + struct res_common com; +}; + static void *res_tracker_lookup(struct rb_root *root, u64 res_id) { struct rb_node *node = root->rb_node; @@ -245,6 +254,7 @@ static const char *ResourceType(enum mlx4_resource rt) case RES_MAC: return "RES_MAC"; case RES_EQ: return "RES_EQ"; case RES_COUNTER: return "RES_COUNTER"; + case RES_FS_RULE: return "RES_FS_RULE"; case RES_XRCD: return "RES_XRCD"; default: return "Unknown resource type !!!"; }; @@ -516,6 +526,20 @@ static struct res_common *alloc_xrcdn_tr(int id) return &ret->com; } +static struct res_common *alloc_fs_rule_tr(u64 id) +{ + struct res_fs_rule *ret; + + ret = kzalloc(sizeof *ret, GFP_KERNEL); + if (!ret) + return NULL; + + ret->com.res_id = id; + ret->com.state = RES_FS_RULE_ALLOCATED; + + return &ret->com; +} + static struct res_common *alloc_tr(u64 id, enum mlx4_resource type, int slave, int extra) { @@ -549,6 +573,9 @@ static struct res_common *alloc_tr(u64 id, enum mlx4_resource type, int slave, case RES_XRCD: ret = alloc_xrcdn_tr(id); break; + case RES_FS_RULE: + ret = alloc_fs_rule_tr(id); + break; default: return NULL; } @@ -681,6 +708,16 @@ static int remove_xrcdn_ok(struct res_xrcdn *res) return 0; } +static int remove_fs_rule_ok(struct res_fs_rule *res) +{ + if (res->com.state == RES_FS_RULE_BUSY) + return -EBUSY; + else if (res->com.state != RES_FS_RULE_ALLOCATED) + return -EPERM; + + return 0; +} + static int remove_cq_ok(struct res_cq *res) { if (res->com.state == RES_CQ_BUSY) @@ -722,6 +759,8 @@ static int remove_ok(struct res_common *res, enum mlx4_resource type, int extra) return remove_counter_ok((struct res_counter *)res); case RES_XRCD: return remove_xrcdn_ok((struct res_xrcdn *)res); + case RES_FS_RULE: + return remove_fs_rule_ok((struct res_fs_rule *)res); default: return -EINVAL; } @@ -2744,14 +2783,28 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { + int err; + if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) return -EOPNOTSUPP; - return mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param, - vhcr->in_modifier, 0, - MLX4_QP_FLOW_STEERING_ATTACH, - MLX4_CMD_TIME_CLASS_A, - MLX4_CMD_NATIVE); + + err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param, + vhcr->in_modifier, 0, + MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) + return err; + + err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, 0); + if (err) { + mlx4_err(dev, "Fail to add flow steering resources.\n "); + /* detach rule*/ + mlx4_cmd(dev, vhcr->out_param, 0, 0, + MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + } + return err; } int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, @@ -2760,12 +2813,22 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { + int err; + if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) return -EOPNOTSUPP; - return mlx4_cmd(dev, vhcr->in_param, 0, 0, - MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, - MLX4_CMD_NATIVE); + + err = rem_res_range(dev, slave, vhcr->in_param, 1, RES_FS_RULE, 0); + if (err) { + mlx4_err(dev, "Fail to remove flow steering resources.\n "); + return err; + } + + err = mlx4_cmd(dev, vhcr->in_param, 0, 0, + MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + return err; } enum { @@ -3177,6 +3240,58 @@ static void rem_slave_mtts(struct mlx4_dev *dev, int slave) spin_unlock_irq(mlx4_tlock(dev)); } +static void rem_slave_fs_rule(struct mlx4_dev *dev, int slave) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = + &priv->mfunc.master.res_tracker; + struct list_head *fs_rule_list = + &tracker->slave_list[slave].res_list[RES_FS_RULE]; + struct res_fs_rule *fs_rule; + struct res_fs_rule *tmp; + int state; + u64 base; + int err; + + err = move_all_busy(dev, slave, RES_FS_RULE); + if (err) + mlx4_warn(dev, "rem_slave_fs_rule: Could not move all mtts to busy for slave %d\n", + slave); + + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry_safe(fs_rule, tmp, fs_rule_list, com.list) { + spin_unlock_irq(mlx4_tlock(dev)); + if (fs_rule->com.owner == slave) { + base = fs_rule->com.res_id; + state = fs_rule->com.from_state; + while (state != 0) { + switch (state) { + case RES_FS_RULE_ALLOCATED: + /* detach rule */ + err = mlx4_cmd(dev, base, 0, 0, + MLX4_QP_FLOW_STEERING_DETACH, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + + spin_lock_irq(mlx4_tlock(dev)); + rb_erase(&fs_rule->com.node, + &tracker->res_tree[RES_FS_RULE]); + list_del(&fs_rule->com.list); + spin_unlock_irq(mlx4_tlock(dev)); + kfree(fs_rule); + state = 0; + break; + + default: + state = 0; + } + } + } + spin_lock_irq(mlx4_tlock(dev)); + } + spin_unlock_irq(mlx4_tlock(dev)); +} + static void rem_slave_eqs(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -3318,5 +3433,6 @@ void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave) rem_slave_mtts(dev, slave); rem_slave_counters(dev, slave); rem_slave_xrcdns(dev, slave); + rem_slave_fs_rule(dev, slave); mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex); } -- cgit v1.2.3 From 592e49dda8122ab621cdc59cc429bdb968ee6364 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Thu, 5 Jul 2012 04:03:48 +0000 Subject: net/mlx4: Implement promiscuous mode with device managed flow-steering The device managed flow steering API has three promiscuous modes: 1. Uplink - captures all the packets that arrive to the port. 2. Allmulti - captures all multicast packets arriving to the port. 3. Function port - for future use, this mode is not implemented yet. Use these modes with the flow_attach and flow_detach firmware commands according to the promiscuous state of the netdevice. Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 41 ++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/mcg.c | 60 ++++++++++++++++++++++++++ include/linux/mlx4/device.h | 7 +++ 3 files changed, 108 insertions(+) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index eb5ed8e39873..b7945a80ad15 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -301,6 +301,16 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) /* Enable promiscouos mode */ switch (mdev->dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + err = mlx4_flow_steer_promisc_add(mdev->dev, + priv->port, + priv->base_qpn, + MLX4_FS_PROMISC_UPLINK); + if (err) + en_err(priv, "Failed enabling promiscuous mode\n"); + priv->flags |= MLX4_EN_FLAG_MC_PROMISC; + break; + case MLX4_STEERING_MODE_B0: err = mlx4_unicast_promisc_add(mdev->dev, priv->base_qpn, @@ -357,6 +367,15 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) /* Disable promiscouos mode */ switch (mdev->dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + err = mlx4_flow_steer_promisc_remove(mdev->dev, + priv->port, + MLX4_FS_PROMISC_UPLINK); + if (err) + en_err(priv, "Failed disabling promiscuous mode\n"); + priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; + break; + case MLX4_STEERING_MODE_B0: err = mlx4_unicast_promisc_remove(mdev->dev, priv->base_qpn, @@ -399,6 +418,13 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) /* Add the default qp number as multicast promisc */ if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) { switch (mdev->dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + err = mlx4_flow_steer_promisc_add(mdev->dev, + priv->port, + priv->base_qpn, + MLX4_FS_PROMISC_ALL_MULTI); + break; + case MLX4_STEERING_MODE_B0: err = mlx4_multicast_promisc_add(mdev->dev, priv->base_qpn, @@ -416,6 +442,12 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) /* Disable Multicast promisc */ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { switch (mdev->dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + err = mlx4_flow_steer_promisc_remove(mdev->dev, + priv->port, + MLX4_FS_PROMISC_ALL_MULTI); + break; + case MLX4_STEERING_MODE_B0: err = mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, @@ -839,6 +871,15 @@ int mlx4_en_start_port(struct net_device *dev) /* Must redo promiscuous mode setup. */ priv->flags &= ~(MLX4_EN_FLAG_PROMISC | MLX4_EN_FLAG_MC_PROMISC); + if (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + mlx4_flow_steer_promisc_remove(mdev->dev, + priv->port, + MLX4_FS_PROMISC_UPLINK); + mlx4_flow_steer_promisc_remove(mdev->dev, + priv->port, + MLX4_FS_PROMISC_ALL_MULTI); + } /* Schedule multicast task to populate multicast list */ queue_work(mdev->workqueue, &priv->mcast_task); diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 768a2a4530e8..bc62f536ffae 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -1295,6 +1295,66 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], } EXPORT_SYMBOL_GPL(mlx4_multicast_detach); +int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, + u32 qpn, enum mlx4_net_trans_promisc_mode mode) +{ + struct mlx4_net_trans_rule rule; + u64 *regid_p; + + switch (mode) { + case MLX4_FS_PROMISC_UPLINK: + case MLX4_FS_PROMISC_FUNCTION_PORT: + regid_p = &dev->regid_promisc_array[port]; + break; + case MLX4_FS_PROMISC_ALL_MULTI: + regid_p = &dev->regid_allmulti_array[port]; + break; + default: + return -1; + } + + if (*regid_p != 0) + return -1; + + rule.promisc_mode = mode; + rule.port = port; + rule.qpn = qpn; + INIT_LIST_HEAD(&rule.list); + mlx4_err(dev, "going promisc on %x\n", port); + + return mlx4_flow_attach(dev, &rule, regid_p); +} +EXPORT_SYMBOL_GPL(mlx4_flow_steer_promisc_add); + +int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port, + enum mlx4_net_trans_promisc_mode mode) +{ + int ret; + u64 *regid_p; + + switch (mode) { + case MLX4_FS_PROMISC_UPLINK: + case MLX4_FS_PROMISC_FUNCTION_PORT: + regid_p = &dev->regid_promisc_array[port]; + break; + case MLX4_FS_PROMISC_ALL_MULTI: + regid_p = &dev->regid_allmulti_array[port]; + break; + default: + return -1; + } + + if (*regid_p == 0) + return -1; + + ret = mlx4_flow_detach(dev, *regid_p); + if (ret == 0) + *regid_p = 0; + + return ret; +} +EXPORT_SYMBOL_GPL(mlx4_flow_steer_promisc_remove); + int mlx4_unicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int block_mcast_loopback, enum mlx4_protocol prot) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index e45fc20bd01f..6f0d133cc7ad 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -542,6 +542,8 @@ struct mlx4_dev { u8 rev_id; char board_id[MLX4_BOARD_ID_LEN]; int num_vfs; + u64 regid_promisc_array[MLX4_MAX_PORTS + 1]; + u64 regid_allmulti_array[MLX4_MAX_PORTS + 1]; }; struct mlx4_init_port_param { @@ -681,6 +683,7 @@ enum mlx4_net_trans_rule_id { enum mlx4_net_trans_promisc_mode { MLX4_FS_PROMISC_NONE = 0, MLX4_FS_PROMISC_UPLINK, + /* For future use. Not implemented yet */ MLX4_FS_PROMISC_FUNCTION_PORT, MLX4_FS_PROMISC_ALL_MULTI, }; @@ -744,6 +747,10 @@ struct mlx4_net_trans_rule { u32 qpn; }; +int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn, + enum mlx4_net_trans_promisc_mode mode); +int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port, + enum mlx4_net_trans_promisc_mode mode); int mlx4_multicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port); int mlx4_multicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port); int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port); -- cgit v1.2.3 From 820672812f8284143f933da8ccc60e296230d25d Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Thu, 5 Jul 2012 04:03:49 +0000 Subject: net/mlx4_en: Manage flow steering rules with ethtool Implement the ethtool APIs for attaching L2/L3/L4 based flow steering rules to the netdevice RX rings. Added set_rxnfc callback and enhanced the existing get_rxnfc callback. Signed-off-by: Hadar Hen Zion Signed-off-by: Amir Vadai Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 382 ++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 7 + 2 files changed, 389 insertions(+) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 72901ce2b088..3e72a2076fb5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -38,6 +38,10 @@ #include "mlx4_en.h" #include "en_port.h" +#define EN_ETHTOOL_QP_ATTACH (1ull << 63) +#define EN_ETHTOOL_MAC_MASK 0xffffffffffffULL +#define EN_ETHTOOL_SHORT_MASK cpu_to_be16(0xffff) +#define EN_ETHTOOL_WORD_MASK cpu_to_be32(0xffffffff) static void mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) @@ -599,16 +603,369 @@ static int mlx4_en_set_rxfh_indir(struct net_device *dev, return err; } +#define all_zeros_or_all_ones(field) \ + ((field) == 0 || (field) == (__force typeof(field))-1) + +static int mlx4_en_validate_flow(struct net_device *dev, + struct ethtool_rxnfc *cmd) +{ + struct ethtool_usrip4_spec *l3_mask; + struct ethtool_tcpip4_spec *l4_mask; + struct ethhdr *eth_mask; + u64 full_mac = ~0ull; + u64 zero_mac = 0; + + if (cmd->fs.location >= MAX_NUM_OF_FS_RULES) + return -EINVAL; + + switch (cmd->fs.flow_type & ~FLOW_EXT) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + if (cmd->fs.m_u.tcp_ip4_spec.tos) + return -EINVAL; + l4_mask = &cmd->fs.m_u.tcp_ip4_spec; + /* don't allow mask which isn't all 0 or 1 */ + if (!all_zeros_or_all_ones(l4_mask->ip4src) || + !all_zeros_or_all_ones(l4_mask->ip4dst) || + !all_zeros_or_all_ones(l4_mask->psrc) || + !all_zeros_or_all_ones(l4_mask->pdst)) + return -EINVAL; + break; + case IP_USER_FLOW: + l3_mask = &cmd->fs.m_u.usr_ip4_spec; + if (l3_mask->l4_4_bytes || l3_mask->tos || l3_mask->proto || + cmd->fs.h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4 || + (!l3_mask->ip4src && !l3_mask->ip4dst) || + !all_zeros_or_all_ones(l3_mask->ip4src) || + !all_zeros_or_all_ones(l3_mask->ip4dst)) + return -EINVAL; + break; + case ETHER_FLOW: + eth_mask = &cmd->fs.m_u.ether_spec; + /* source mac mask must not be set */ + if (memcmp(eth_mask->h_source, &zero_mac, ETH_ALEN)) + return -EINVAL; + + /* dest mac mask must be ff:ff:ff:ff:ff:ff */ + if (memcmp(eth_mask->h_dest, &full_mac, ETH_ALEN)) + return -EINVAL; + + if (!all_zeros_or_all_ones(eth_mask->h_proto)) + return -EINVAL; + break; + default: + return -EINVAL; + } + + if ((cmd->fs.flow_type & FLOW_EXT)) { + if (cmd->fs.m_ext.vlan_etype || + !(cmd->fs.m_ext.vlan_tci == 0 || + cmd->fs.m_ext.vlan_tci == cpu_to_be16(0xfff))) + return -EINVAL; + } + + return 0; +} + +static int add_ip_rule(struct mlx4_en_priv *priv, + struct ethtool_rxnfc *cmd, + struct list_head *list_h) +{ + struct mlx4_spec_list *spec_l3; + struct ethtool_usrip4_spec *l3_mask = &cmd->fs.m_u.usr_ip4_spec; + + spec_l3 = kzalloc(sizeof *spec_l3, GFP_KERNEL); + if (!spec_l3) { + en_err(priv, "Fail to alloc ethtool rule.\n"); + return -ENOMEM; + } + + spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4; + spec_l3->ipv4.src_ip = cmd->fs.h_u.usr_ip4_spec.ip4src; + if (l3_mask->ip4src) + spec_l3->ipv4.src_ip_msk = EN_ETHTOOL_WORD_MASK; + spec_l3->ipv4.dst_ip = cmd->fs.h_u.usr_ip4_spec.ip4dst; + if (l3_mask->ip4dst) + spec_l3->ipv4.dst_ip_msk = EN_ETHTOOL_WORD_MASK; + list_add_tail(&spec_l3->list, list_h); + + return 0; +} + +static int add_tcp_udp_rule(struct mlx4_en_priv *priv, + struct ethtool_rxnfc *cmd, + struct list_head *list_h, int proto) +{ + struct mlx4_spec_list *spec_l3; + struct mlx4_spec_list *spec_l4; + struct ethtool_tcpip4_spec *l4_mask = &cmd->fs.m_u.tcp_ip4_spec; + + spec_l3 = kzalloc(sizeof *spec_l3, GFP_KERNEL); + spec_l4 = kzalloc(sizeof *spec_l4, GFP_KERNEL); + if (!spec_l4 || !spec_l3) { + en_err(priv, "Fail to alloc ethtool rule.\n"); + kfree(spec_l3); + kfree(spec_l4); + return -ENOMEM; + } + + spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4; + + if (proto == TCP_V4_FLOW) { + spec_l4->id = MLX4_NET_TRANS_RULE_ID_TCP; + spec_l3->ipv4.src_ip = cmd->fs.h_u.tcp_ip4_spec.ip4src; + spec_l3->ipv4.dst_ip = cmd->fs.h_u.tcp_ip4_spec.ip4dst; + spec_l4->tcp_udp.src_port = cmd->fs.h_u.tcp_ip4_spec.psrc; + spec_l4->tcp_udp.dst_port = cmd->fs.h_u.tcp_ip4_spec.pdst; + } else { + spec_l4->id = MLX4_NET_TRANS_RULE_ID_UDP; + spec_l3->ipv4.src_ip = cmd->fs.h_u.udp_ip4_spec.ip4src; + spec_l3->ipv4.dst_ip = cmd->fs.h_u.udp_ip4_spec.ip4dst; + spec_l4->tcp_udp.src_port = cmd->fs.h_u.udp_ip4_spec.psrc; + spec_l4->tcp_udp.dst_port = cmd->fs.h_u.udp_ip4_spec.pdst; + } + + if (l4_mask->ip4src) + spec_l3->ipv4.src_ip_msk = EN_ETHTOOL_WORD_MASK; + if (l4_mask->ip4dst) + spec_l3->ipv4.dst_ip_msk = EN_ETHTOOL_WORD_MASK; + + if (l4_mask->psrc) + spec_l4->tcp_udp.src_port_msk = EN_ETHTOOL_SHORT_MASK; + if (l4_mask->pdst) + spec_l4->tcp_udp.dst_port_msk = EN_ETHTOOL_SHORT_MASK; + + list_add_tail(&spec_l3->list, list_h); + list_add_tail(&spec_l4->list, list_h); + + return 0; +} + +static int mlx4_en_ethtool_to_net_trans_rule(struct net_device *dev, + struct ethtool_rxnfc *cmd, + struct list_head *rule_list_h) +{ + int err; + u64 mac; + __be64 be_mac; + struct ethhdr *eth_spec; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_spec_list *spec_l2; + __be64 mac_msk = cpu_to_be64(EN_ETHTOOL_MAC_MASK << 16); + + err = mlx4_en_validate_flow(dev, cmd); + if (err) + return err; + + spec_l2 = kzalloc(sizeof *spec_l2, GFP_KERNEL); + if (!spec_l2) + return -ENOMEM; + + mac = priv->mac & EN_ETHTOOL_MAC_MASK; + be_mac = cpu_to_be64(mac << 16); + + spec_l2->id = MLX4_NET_TRANS_RULE_ID_ETH; + memcpy(spec_l2->eth.dst_mac_msk, &mac_msk, ETH_ALEN); + if ((cmd->fs.flow_type & ~FLOW_EXT) != ETHER_FLOW) + memcpy(spec_l2->eth.dst_mac, &be_mac, ETH_ALEN); + + if ((cmd->fs.flow_type & FLOW_EXT) && cmd->fs.m_ext.vlan_tci) { + spec_l2->eth.vlan_id = cmd->fs.h_ext.vlan_tci; + spec_l2->eth.vlan_id_msk = cpu_to_be16(0xfff); + } + + list_add_tail(&spec_l2->list, rule_list_h); + + switch (cmd->fs.flow_type & ~FLOW_EXT) { + case ETHER_FLOW: + eth_spec = &cmd->fs.h_u.ether_spec; + memcpy(&spec_l2->eth.dst_mac, eth_spec->h_dest, ETH_ALEN); + spec_l2->eth.ether_type = eth_spec->h_proto; + if (eth_spec->h_proto) + spec_l2->eth.ether_type_enable = 1; + break; + case IP_USER_FLOW: + err = add_ip_rule(priv, cmd, rule_list_h); + break; + case TCP_V4_FLOW: + err = add_tcp_udp_rule(priv, cmd, rule_list_h, TCP_V4_FLOW); + break; + case UDP_V4_FLOW: + err = add_tcp_udp_rule(priv, cmd, rule_list_h, UDP_V4_FLOW); + break; + } + + return err; +} + +static int mlx4_en_flow_replace(struct net_device *dev, + struct ethtool_rxnfc *cmd) +{ + int err; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct ethtool_flow_id *loc_rule; + struct mlx4_spec_list *spec, *tmp_spec; + u32 qpn; + u64 reg_id; + + struct mlx4_net_trans_rule rule = { + .queue_mode = MLX4_NET_TRANS_Q_FIFO, + .exclusive = 0, + .allow_loopback = 1, + .promisc_mode = MLX4_FS_PROMISC_NONE, + }; + + rule.port = priv->port; + rule.priority = MLX4_DOMAIN_ETHTOOL | cmd->fs.location; + INIT_LIST_HEAD(&rule.list); + + /* Allow direct QP attaches if the EN_ETHTOOL_QP_ATTACH flag is set */ + if (cmd->fs.ring_cookie == RX_CLS_FLOW_DISC) + return -EINVAL; + else if (cmd->fs.ring_cookie & EN_ETHTOOL_QP_ATTACH) { + qpn = cmd->fs.ring_cookie & (EN_ETHTOOL_QP_ATTACH - 1); + } else { + if (cmd->fs.ring_cookie >= priv->rx_ring_num) { + en_warn(priv, "rxnfc: RX ring (%llu) doesn't exist.\n", + cmd->fs.ring_cookie); + return -EINVAL; + } + qpn = priv->rss_map.qps[cmd->fs.ring_cookie].qpn; + if (!qpn) { + en_warn(priv, "rxnfc: RX ring (%llu) is inactive.\n", + cmd->fs.ring_cookie); + return -EINVAL; + } + } + rule.qpn = qpn; + err = mlx4_en_ethtool_to_net_trans_rule(dev, cmd, &rule.list); + if (err) + goto out_free_list; + + loc_rule = &priv->ethtool_rules[cmd->fs.location]; + if (loc_rule->id) { + err = mlx4_flow_detach(priv->mdev->dev, loc_rule->id); + if (err) { + en_err(priv, "Fail to detach network rule at location %d. registration id = %llx\n", + cmd->fs.location, loc_rule->id); + goto out_free_list; + } + loc_rule->id = 0; + memset(&loc_rule->flow_spec, 0, + sizeof(struct ethtool_rx_flow_spec)); + } + err = mlx4_flow_attach(priv->mdev->dev, &rule, ®_id); + if (err) { + en_err(priv, "Fail to attach network rule at location %d.\n", + cmd->fs.location); + goto out_free_list; + } + loc_rule->id = reg_id; + memcpy(&loc_rule->flow_spec, &cmd->fs, + sizeof(struct ethtool_rx_flow_spec)); + +out_free_list: + list_for_each_entry_safe(spec, tmp_spec, &rule.list, list) { + list_del(&spec->list); + kfree(spec); + } + return err; +} + +static int mlx4_en_flow_detach(struct net_device *dev, + struct ethtool_rxnfc *cmd) +{ + int err = 0; + struct ethtool_flow_id *rule; + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (cmd->fs.location >= MAX_NUM_OF_FS_RULES) + return -EINVAL; + + rule = &priv->ethtool_rules[cmd->fs.location]; + if (!rule->id) { + err = -ENOENT; + goto out; + } + + err = mlx4_flow_detach(priv->mdev->dev, rule->id); + if (err) { + en_err(priv, "Fail to detach network rule at location %d. registration id = 0x%llx\n", + cmd->fs.location, rule->id); + goto out; + } + rule->id = 0; + memset(&rule->flow_spec, 0, sizeof(struct ethtool_rx_flow_spec)); +out: + return err; + +} + +static int mlx4_en_get_flow(struct net_device *dev, struct ethtool_rxnfc *cmd, + int loc) +{ + int err = 0; + struct ethtool_flow_id *rule; + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (loc < 0 || loc >= MAX_NUM_OF_FS_RULES) + return -EINVAL; + + rule = &priv->ethtool_rules[loc]; + if (rule->id) + memcpy(&cmd->fs, &rule->flow_spec, + sizeof(struct ethtool_rx_flow_spec)); + else + err = -ENOENT; + + return err; +} + +static int mlx4_en_get_num_flows(struct mlx4_en_priv *priv) +{ + + int i, res = 0; + for (i = 0; i < MAX_NUM_OF_FS_RULES; i++) { + if (priv->ethtool_rules[i].id) + res++; + } + return res; + +} + static int mlx4_en_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, u32 *rule_locs) { struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; int err = 0; + int i = 0, priority = 0; + + if ((cmd->cmd == ETHTOOL_GRXCLSRLCNT || + cmd->cmd == ETHTOOL_GRXCLSRULE || + cmd->cmd == ETHTOOL_GRXCLSRLALL) && + mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) + return -EINVAL; switch (cmd->cmd) { case ETHTOOL_GRXRINGS: cmd->data = priv->rx_ring_num; break; + case ETHTOOL_GRXCLSRLCNT: + cmd->rule_cnt = mlx4_en_get_num_flows(priv); + break; + case ETHTOOL_GRXCLSRULE: + err = mlx4_en_get_flow(dev, cmd, cmd->fs.location); + break; + case ETHTOOL_GRXCLSRLALL: + while ((!err || err == -ENOENT) && priority < cmd->rule_cnt) { + err = mlx4_en_get_flow(dev, cmd, i); + if (!err) + rule_locs[priority++] = i; + i++; + } + err = 0; + break; default: err = -EOPNOTSUPP; break; @@ -617,6 +974,30 @@ static int mlx4_en_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, return err; } +static int mlx4_en_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + int err = 0; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + + if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) + return -EINVAL; + + switch (cmd->cmd) { + case ETHTOOL_SRXCLSRLINS: + err = mlx4_en_flow_replace(dev, cmd); + break; + case ETHTOOL_SRXCLSRLDEL: + err = mlx4_en_flow_detach(dev, cmd); + break; + default: + en_warn(priv, "Unsupported ethtool command. (%d)\n", cmd->cmd); + return -EINVAL; + } + + return err; +} + const struct ethtool_ops mlx4_en_ethtool_ops = { .get_drvinfo = mlx4_en_get_drvinfo, .get_settings = mlx4_en_get_settings, @@ -637,6 +1018,7 @@ const struct ethtool_ops mlx4_en_ethtool_ops = { .get_ringparam = mlx4_en_get_ringparam, .set_ringparam = mlx4_en_set_ringparam, .get_rxnfc = mlx4_en_get_rxnfc, + .set_rxnfc = mlx4_en_set_rxnfc, .get_rxfh_indir_size = mlx4_en_get_rxfh_indir_size, .get_rxfh_indir = mlx4_en_get_rxfh_indir, .set_rxfh_indir = mlx4_en_set_rxfh_indir, diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 2d6dabe7f55d..8882e70493fe 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -75,6 +75,7 @@ #define STAMP_SHIFT 31 #define STAMP_VAL 0x7fffffff #define STATS_DELAY (HZ / 4) +#define MAX_NUM_OF_FS_RULES 256 /* Typical TSO descriptor with 16 gather entries is 352 bytes... */ #define MAX_DESC_SIZE 512 @@ -435,6 +436,11 @@ struct mlx4_en_frag_info { #endif +struct ethtool_flow_id { + struct ethtool_rx_flow_spec flow_spec; + u64 id; +}; + struct mlx4_en_priv { struct mlx4_en_dev *mdev; struct mlx4_en_port_profile *prof; @@ -444,6 +450,7 @@ struct mlx4_en_priv { struct net_device_stats ret_stats; struct mlx4_en_port_state port_state; spinlock_t stats_lock; + struct ethtool_flow_id ethtool_rules[MAX_NUM_OF_FS_RULES]; unsigned long last_moder_packets[MAX_RX_RINGS]; unsigned long last_moder_tx_packets; -- cgit v1.2.3 From cabdc8ee3768ceb6367e88c6fe84a66dd667bdf9 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Thu, 5 Jul 2012 04:03:50 +0000 Subject: net/mlx4_en: Add support for drop action through ethtool The drop action is implemented by allocating a QP and keeping it in a reset state such that the HW drops any packets which are steered to that QP. When a drop action is requested, we attach the relevant flow to that QP. Sign-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 9 +++++++- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 30 +++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 3 +++ 4 files changed, 42 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 3e72a2076fb5..dd6a77b21149 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -821,7 +821,7 @@ static int mlx4_en_flow_replace(struct net_device *dev, /* Allow direct QP attaches if the EN_ETHTOOL_QP_ATTACH flag is set */ if (cmd->fs.ring_cookie == RX_CLS_FLOW_DISC) - return -EINVAL; + qpn = priv->drop_qp.qpn; else if (cmd->fs.ring_cookie & EN_ETHTOOL_QP_ATTACH) { qpn = cmd->fs.ring_cookie & (EN_ETHTOOL_QP_ATTACH - 1); } else { diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index b7945a80ad15..94375a8c6d42 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -796,6 +796,10 @@ int mlx4_en_start_port(struct net_device *dev) goto mac_err; } + err = mlx4_en_create_drop_qp(priv); + if (err) + goto rss_err; + /* Configure tx cq's and rings */ for (i = 0; i < priv->tx_ring_num; i++) { /* Configure cq */ @@ -895,7 +899,8 @@ tx_err: mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[tx_index]); mlx4_en_deactivate_cq(priv, &priv->tx_cq[tx_index]); } - + mlx4_en_destroy_drop_qp(priv); +rss_err: mlx4_en_release_rss_steer(priv); mac_err: mlx4_put_eth_qp(mdev->dev, priv->port, priv->mac, priv->base_qpn); @@ -950,6 +955,8 @@ void mlx4_en_stop_port(struct net_device *dev) /* Flush multicast filter */ mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 1, MLX4_MCAST_CONFIG); + mlx4_en_destroy_drop_qp(priv); + /* Free TX Rings */ for (i = 0; i < priv->tx_ring_num; i++) { mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[i]); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index d49a7ac3187d..a04cbf767eb5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -844,6 +844,36 @@ out: return err; } +int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv) +{ + int err; + u32 qpn; + + err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn); + if (err) { + en_err(priv, "Failed reserving drop qpn\n"); + return err; + } + err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp); + if (err) { + en_err(priv, "Failed allocating drop qp\n"); + mlx4_qp_release_range(priv->mdev->dev, qpn, 1); + return err; + } + + return 0; +} + +void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv) +{ + u32 qpn; + + qpn = priv->drop_qp.qpn; + mlx4_qp_remove(priv->mdev->dev, &priv->drop_qp); + mlx4_qp_free(priv->mdev->dev, &priv->drop_qp); + mlx4_qp_release_range(priv->mdev->dev, qpn, 1); +} + /* Allocate rx qp's and configure them according to rss map */ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) { diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 8882e70493fe..a12632150b34 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -500,6 +500,7 @@ struct mlx4_en_priv { struct mlx4_en_rx_ring rx_ring[MAX_RX_RINGS]; struct mlx4_en_cq *tx_cq; struct mlx4_en_cq rx_cq[MAX_RX_RINGS]; + struct mlx4_qp drop_qp; struct work_struct mcast_task; struct work_struct mac_task; struct work_struct watchdog_task; @@ -586,6 +587,8 @@ void mlx4_en_unmap_buffer(struct mlx4_buf *buf); void mlx4_en_calc_rx_buf(struct net_device *dev); int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv); void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv); +int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv); +void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv); int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring); void mlx4_en_rx_irq(struct mlx4_cq *mcq); -- cgit v1.2.3 From 752a50cab600c6d46c5a1921c6a6d2fb116c8a4b Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 19 Jun 2012 11:21:33 +0300 Subject: mlx4_core: Pass an invalid PCI id number to VFs Currently, VFs have 0 in their dev->caps.function field. This is a valid pci id (usually of the PF). Instead, pass an invalid PCI id to the VF via QUERY_FW, so that if the value gets accessed in the VF driver, we'll catch the problem. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/fw.c | 10 +++++++--- include/linux/mlx4/device.h | 2 ++ 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 9c83bb8151ea..4281ce09add8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -881,11 +881,12 @@ int mlx4_QUERY_FW(struct mlx4_dev *dev) ((fw_ver & 0xffff0000ull) >> 16) | ((fw_ver & 0x0000ffffull) << 16); + MLX4_GET(lg, outbox, QUERY_FW_PPF_ID); + dev->caps.function = lg; + if (mlx4_is_slave(dev)) goto out; - MLX4_GET(lg, outbox, QUERY_FW_PPF_ID); - dev->caps.function = lg; MLX4_GET(cmd_if_rev, outbox, QUERY_FW_CMD_IF_REV_OFFSET); if (cmd_if_rev < MLX4_COMMAND_INTERFACE_MIN_REV || @@ -966,9 +967,12 @@ int mlx4_QUERY_FW_wrapper(struct mlx4_dev *dev, int slave, if (err) return err; - /* for slaves, zero out everything except FW version */ + /* for slaves, set pci PPF ID to invalid and zero out everything + * else except FW version */ outbuf[0] = outbuf[1] = 0; memset(&outbuf[8], 0, QUERY_FW_OUT_SIZE - 8); + outbuf[QUERY_FW_PPF_ID] = MLX4_INVALID_SLAVE_ID; + return 0; } diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 6a8f002b8ed3..8eadf0f14cc5 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -534,6 +534,8 @@ struct mlx4_init_port_param { if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \ ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) +#define MLX4_INVALID_SLAVE_ID 0xFF + static inline int mlx4_is_master(struct mlx4_dev *dev) { return dev->flags & MLX4_FLAG_MASTER; -- cgit v1.2.3 From 00f5ce99dc6ee46c3113393cc8fa12173f9bbcd7 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 19 Jun 2012 11:21:40 +0300 Subject: mlx4: Use port management change event instead of smp_snoop The port management change event can replace smp_snoop. If the capability bit for this event is set in dev-caps, the event is used (by the driver setting the PORT_MNG_CHG_EVENT bit in the async event mask in the MAP_EQ fw command). In this case, when the driver passes incoming SMP PORT_INFO SET mads to the FW, the FW generates port management change events to signal any changes to the driver. If the FW generates these events, smp_snoop shouldn't be invoked in ib_process_mad(), or duplicate events will occur (once from the FW-generated event, and once from smp_snoop). In the case where the FW does not generate port management change events smp_snoop needs to be invoked to create these events. The flow in smp_snoop has been modified to make use of the same procedures as in the fw-generated-event event case to generate the port management events (LID change, Client-rereg, Pkey change, and/or GID change). Port management change event handling required changing the mlx4_ib_event and mlx4_dispatch_event prototypes; the "param" argument (last argument) had to be changed to unsigned long in order to accomodate passing the EQE pointer. We also needed to move the definition of struct mlx4_eqe from net/mlx4.h to file device.h -- to make it available to the IB driver, to handle port management change events. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 118 ++++++++++++++++++++------- drivers/infiniband/hw/mlx4/main.c | 29 +++++-- drivers/infiniband/hw/mlx4/mlx4_ib.h | 9 ++ drivers/net/ethernet/mellanox/mlx4/en_main.c | 5 +- drivers/net/ethernet/mellanox/mlx4/eq.c | 22 ++++- drivers/net/ethernet/mellanox/mlx4/fw.c | 1 + drivers/net/ethernet/mellanox/mlx4/intf.c | 5 +- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 63 +------------- include/linux/mlx4/device.h | 99 +++++++++++++++++++++- include/linux/mlx4/driver.h | 3 +- 10 files changed, 249 insertions(+), 105 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 84786a9fb64f..58c45fb5bd31 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -147,47 +147,49 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl) } /* - * Snoop SM MADs for port info and P_Key table sets, so we can - * synthesize LID change and P_Key change events. + * Snoop SM MADs for port info, GUID info, and P_Key table sets, so we can + * synthesize LID change, Client-Rereg, GID change, and P_Key change events. */ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, - u16 prev_lid) + u16 prev_lid) { - struct ib_event event; + struct ib_port_info *pinfo; + u16 lid; + struct mlx4_ib_dev *dev = to_mdev(ibdev); if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && - mad->mad_hdr.method == IB_MGMT_METHOD_SET) { - if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) { - struct ib_port_info *pinfo = - (struct ib_port_info *) ((struct ib_smp *) mad)->data; - u16 lid = be16_to_cpu(pinfo->lid); + mad->mad_hdr.method == IB_MGMT_METHOD_SET) + switch (mad->mad_hdr.attr_id) { + case IB_SMP_ATTR_PORT_INFO: + pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data; + lid = be16_to_cpu(pinfo->lid); - update_sm_ah(to_mdev(ibdev), port_num, + update_sm_ah(dev, port_num, be16_to_cpu(pinfo->sm_lid), pinfo->neighbormtu_mastersmsl & 0xf); - event.device = ibdev; - event.element.port_num = port_num; + if (pinfo->clientrereg_resv_subnetto & 0x80) + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_CLIENT_REREGISTER); - if (pinfo->clientrereg_resv_subnetto & 0x80) { - event.event = IB_EVENT_CLIENT_REREGISTER; - ib_dispatch_event(&event); - } + if (prev_lid != lid) + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_LID_CHANGE); + break; - if (prev_lid != lid) { - event.event = IB_EVENT_LID_CHANGE; - ib_dispatch_event(&event); - } - } + case IB_SMP_ATTR_PKEY_TABLE: + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_PKEY_CHANGE); + break; - if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) { - event.device = ibdev; - event.event = IB_EVENT_PKEY_CHANGE; - event.element.port_num = port_num; - ib_dispatch_event(&event); + case IB_SMP_ATTR_GUID_INFO: + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_GID_CHANGE); + break; + default: + break; } - } } static void node_desc_override(struct ib_device *dev, @@ -305,7 +307,8 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_FAILURE; if (!out_mad->mad_hdr.status) { - smp_snoop(ibdev, port_num, in_mad, prev_lid); + if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)) + smp_snoop(ibdev, port_num, in_mad, prev_lid); node_desc_override(ibdev, out_mad); } @@ -446,3 +449,62 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev) ib_destroy_ah(dev->sm_ah[p]); } } + +void handle_port_mgmt_change_event(struct work_struct *work) +{ + struct ib_event_work *ew = container_of(work, struct ib_event_work, work); + struct mlx4_ib_dev *dev = ew->ib_dev; + struct mlx4_eqe *eqe = &(ew->ib_eqe); + u8 port = eqe->event.port_mgmt_change.port; + u32 changed_attr; + + switch (eqe->subtype) { + case MLX4_DEV_PMC_SUBTYPE_PORT_INFO: + changed_attr = be32_to_cpu(eqe->event.port_mgmt_change.params.port_info.changed_attr); + + /* Update the SM ah - This should be done before handling + the other changed attributes so that MADs can be sent to the SM */ + if (changed_attr & MSTR_SM_CHANGE_MASK) { + u16 lid = be16_to_cpu(eqe->event.port_mgmt_change.params.port_info.mstr_sm_lid); + u8 sl = eqe->event.port_mgmt_change.params.port_info.mstr_sm_sl & 0xf; + update_sm_ah(dev, port, lid, sl); + } + + /* Check if it is a lid change event */ + if (changed_attr & MLX4_EQ_PORT_INFO_LID_CHANGE_MASK) + mlx4_ib_dispatch_event(dev, port, IB_EVENT_LID_CHANGE); + + /* Generate GUID changed event */ + if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) + mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); + + if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK) + mlx4_ib_dispatch_event(dev, port, + IB_EVENT_CLIENT_REREGISTER); + break; + + case MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE: + mlx4_ib_dispatch_event(dev, port, IB_EVENT_PKEY_CHANGE); + break; + case MLX4_DEV_PMC_SUBTYPE_GUID_INFO: + mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); + break; + default: + pr_warn("Unsupported subtype 0x%x for " + "Port Management Change event\n", eqe->subtype); + } + + kfree(ew); +} + +void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, + enum ib_event_type type) +{ + struct ib_event event; + + event.device = &dev->ib_dev; + event.element.port_num = port_num; + event.event = type; + + ib_dispatch_event(&event); +} diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 5266b49c46ee..4f230c26622d 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -898,7 +898,6 @@ static void update_gids_task(struct work_struct *work) union ib_gid *gids; int err; struct mlx4_dev *dev = gw->dev->dev; - struct ib_event event; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { @@ -916,10 +915,7 @@ static void update_gids_task(struct work_struct *work) pr_warn("set port command failed\n"); else { memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids); - event.device = &gw->dev->ib_dev; - event.element.port_num = gw->port; - event.event = IB_EVENT_GID_CHANGE; - ib_dispatch_event(&event); + mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE); } mlx4_free_cmd_mailbox(dev, mailbox); @@ -1383,10 +1379,18 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) } static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, - enum mlx4_dev_event event, int port) + enum mlx4_dev_event event, unsigned long param) { struct ib_event ibev; struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr); + struct mlx4_eqe *eqe = NULL; + struct ib_event_work *ew; + int port = 0; + + if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE) + eqe = (struct mlx4_eqe *)param; + else + port = (u8)param; if (port > ibdev->num_ports) return; @@ -1405,6 +1409,19 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, ibev.event = IB_EVENT_DEVICE_FATAL; break; + case MLX4_DEV_EVENT_PORT_MGMT_CHANGE: + ew = kmalloc(sizeof *ew, GFP_ATOMIC); + if (!ew) { + pr_err("failed to allocate memory for events work\n"); + break; + } + + INIT_WORK(&ew->work, handle_port_mgmt_change_event); + memcpy(&ew->ib_eqe, eqe, sizeof *eqe); + ew->ib_dev = ibdev; + handle_port_mgmt_change_event(&ew->work); + return; + default: return; } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 5f298afaa81f..23bfbf9ee0e0 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -224,6 +224,12 @@ struct mlx4_ib_dev { int eq_added; }; +struct ib_event_work { + struct work_struct work; + struct mlx4_ib_dev *ib_dev; + struct mlx4_eqe ib_eqe; +}; + static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) { return container_of(ibdev, struct mlx4_ib_dev, ib_dev); @@ -381,4 +387,7 @@ static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, union ib_gid *gid); +void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, + enum ib_event_type type); + #endif /* MLX4_IB_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index 69ba57270481..a52922ed85c1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -131,7 +131,7 @@ static void *mlx4_en_get_netdev(struct mlx4_dev *dev, void *ctx, u8 port) } static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr, - enum mlx4_dev_event event, int port) + enum mlx4_dev_event event, unsigned long port) { struct mlx4_en_dev *mdev = (struct mlx4_en_dev *) endev_ptr; struct mlx4_en_priv *priv; @@ -156,7 +156,8 @@ static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr, if (port < 1 || port > dev->caps.num_ports || !mdev->pndev[port]) return; - mlx4_warn(mdev, "Unhandled event %d for port %d\n", event, port); + mlx4_warn(mdev, "Unhandled event %d for port %d\n", event, + (int) port); } } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index bce98d9c0039..9b15d0219950 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -82,6 +82,15 @@ enum { (1ull << MLX4_EVENT_TYPE_FLR_EVENT) | \ (1ull << MLX4_EVENT_TYPE_FATAL_WARNING)) +static u64 get_async_ev_mask(struct mlx4_dev *dev) +{ + u64 async_ev_mask = MLX4_ASYNC_EVENT_MASK; + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV) + async_ev_mask |= (1ull << MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT); + + return async_ev_mask; +} + static void eq_set_ci(struct mlx4_eq *eq, int req_not) { __raw_writel((__force u32) cpu_to_be32((eq->cons_index & 0xffffff) | @@ -473,6 +482,11 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) break; + case MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT: + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_MGMT_CHANGE, + (unsigned long) eqe); + break; + case MLX4_EVENT_TYPE_EEC_CATAS_ERROR: case MLX4_EVENT_TYPE_ECC_DETECT: default: @@ -956,7 +970,7 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) priv->eq_table.have_irq = 1; } - err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0, + err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); if (err) mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n", @@ -996,7 +1010,7 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); int i; - mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 1, + mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 1, priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); mlx4_free_irqs(dev); @@ -1040,7 +1054,7 @@ int mlx4_test_interrupts(struct mlx4_dev *dev) mlx4_cmd_use_polling(dev); /* Map the new eq to handle all asyncronous events */ - err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0, + err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, priv->eq_table.eq[i].eqn); if (err) { mlx4_warn(dev, "Failed mapping eq for interrupt test\n"); @@ -1054,7 +1068,7 @@ int mlx4_test_interrupts(struct mlx4_dev *dev) } /* Return to default */ - mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0, + mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 4281ce09add8..ee9d6b0b4d20 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -109,6 +109,7 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags) [41] = "Unicast VEP steering support", [42] = "Multicast VEP steering support", [48] = "Counters support", + [59] = "Port management change event support", }; int i; diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index b4e9f6f5cc04..116895ac8b35 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -115,7 +115,8 @@ void mlx4_unregister_interface(struct mlx4_interface *intf) } EXPORT_SYMBOL_GPL(mlx4_unregister_interface); -void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, int port) +void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, + unsigned long param) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_device_context *dev_ctx; @@ -125,7 +126,7 @@ void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, int por list_for_each_entry(dev_ctx, &priv->ctx_list, list) if (dev_ctx->intf->event) - dev_ctx->intf->event(dev, dev_ctx->context, type, port); + dev_ctx->intf->event(dev, dev_ctx->context, type, param); spin_unlock_irqrestore(&priv->ctx_lock, flags); } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index e5d20220762c..4d11d12b9db4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -338,66 +338,6 @@ struct mlx4_srq_context { __be64 db_rec_addr; }; -struct mlx4_eqe { - u8 reserved1; - u8 type; - u8 reserved2; - u8 subtype; - union { - u32 raw[6]; - struct { - __be32 cqn; - } __packed comp; - struct { - u16 reserved1; - __be16 token; - u32 reserved2; - u8 reserved3[3]; - u8 status; - __be64 out_param; - } __packed cmd; - struct { - __be32 qpn; - } __packed qp; - struct { - __be32 srqn; - } __packed srq; - struct { - __be32 cqn; - u32 reserved1; - u8 reserved2[3]; - u8 syndrome; - } __packed cq_err; - struct { - u32 reserved1[2]; - __be32 port; - } __packed port_change; - struct { - #define COMM_CHANNEL_BIT_ARRAY_SIZE 4 - u32 reserved; - u32 bit_vec[COMM_CHANNEL_BIT_ARRAY_SIZE]; - } __packed comm_channel_arm; - struct { - u8 port; - u8 reserved[3]; - __be64 mac; - } __packed mac_update; - struct { - u8 port; - } __packed sw_event; - struct { - __be32 slave_id; - } __packed flr_event; - struct { - __be16 current_temperature; - __be16 warning_threshold; - } __packed warming; - } event; - u8 slave_id; - u8 reserved3[2]; - u8 owner; -} __packed; - struct mlx4_eq { struct mlx4_dev *dev; void __iomem *doorbell; @@ -887,7 +827,8 @@ void mlx4_catas_init(void); int mlx4_restart_one(struct pci_dev *pdev); int mlx4_register_device(struct mlx4_dev *dev); void mlx4_unregister_device(struct mlx4_dev *dev); -void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, int port); +void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, + unsigned long param); struct mlx4_dev_cap; struct mlx4_init_hca_param; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 8eadf0f14cc5..560b2201519f 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -96,7 +96,8 @@ enum { MLX4_DEV_CAP_FLAG_VEP_UC_STEER = 1LL << 41, MLX4_DEV_CAP_FLAG_VEP_MC_STEER = 1LL << 42, MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48, - MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55 + MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55, + MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59, }; enum { @@ -138,6 +139,7 @@ enum mlx4_event { MLX4_EVENT_TYPE_COMM_CHANNEL = 0x18, MLX4_EVENT_TYPE_FATAL_WARNING = 0x1b, MLX4_EVENT_TYPE_FLR_EVENT = 0x1c, + MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT = 0x1d, MLX4_EVENT_TYPE_NONE = 0xff, }; @@ -235,6 +237,24 @@ enum { MLX4_MAX_FAST_REG_PAGES = 511, }; +enum { + MLX4_DEV_PMC_SUBTYPE_GUID_INFO = 0x14, + MLX4_DEV_PMC_SUBTYPE_PORT_INFO = 0x15, + MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE = 0x16, +}; + +/* Port mgmt change event handling */ +enum { + MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK = 1 << 0, + MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK = 1 << 1, + MLX4_EQ_PORT_INFO_LID_CHANGE_MASK = 1 << 2, + MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK = 1 << 3, + MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK = 1 << 4, +}; + +#define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ + MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK) + static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor) { return (major << 32) | (minor << 16) | subminor; @@ -511,6 +531,81 @@ struct mlx4_dev { int num_vfs; }; +struct mlx4_eqe { + u8 reserved1; + u8 type; + u8 reserved2; + u8 subtype; + union { + u32 raw[6]; + struct { + __be32 cqn; + } __packed comp; + struct { + u16 reserved1; + __be16 token; + u32 reserved2; + u8 reserved3[3]; + u8 status; + __be64 out_param; + } __packed cmd; + struct { + __be32 qpn; + } __packed qp; + struct { + __be32 srqn; + } __packed srq; + struct { + __be32 cqn; + u32 reserved1; + u8 reserved2[3]; + u8 syndrome; + } __packed cq_err; + struct { + u32 reserved1[2]; + __be32 port; + } __packed port_change; + struct { + #define COMM_CHANNEL_BIT_ARRAY_SIZE 4 + u32 reserved; + u32 bit_vec[COMM_CHANNEL_BIT_ARRAY_SIZE]; + } __packed comm_channel_arm; + struct { + u8 port; + u8 reserved[3]; + __be64 mac; + } __packed mac_update; + struct { + __be32 slave_id; + } __packed flr_event; + struct { + __be16 current_temperature; + __be16 warning_threshold; + } __packed warming; + struct { + u8 reserved[3]; + u8 port; + union { + struct { + __be16 mstr_sm_lid; + __be16 port_lid; + __be32 changed_attr; + u8 reserved[3]; + u8 mstr_sm_sl; + __be64 gid_prefix; + } __packed port_info; + struct { + __be32 block_ptr; + __be32 tbl_entries_mask; + } __packed tbl_change_info; + } params; + } __packed port_mgmt_change; + } event; + u8 slave_id; + u8 reserved3[2]; + u8 owner; +} __packed; + struct mlx4_init_port_param { int set_guid0; int set_node_guid; @@ -536,6 +631,8 @@ struct mlx4_init_port_param { #define MLX4_INVALID_SLAVE_ID 0xFF +void handle_port_mgmt_change_event(struct work_struct *work); + static inline int mlx4_is_master(struct mlx4_dev *dev) { return dev->flags & MLX4_FLAG_MASTER; diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index 5f1298b1b5ef..0f509229fb3d 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -42,13 +42,14 @@ enum mlx4_dev_event { MLX4_DEV_EVENT_PORT_UP, MLX4_DEV_EVENT_PORT_DOWN, MLX4_DEV_EVENT_PORT_REINIT, + MLX4_DEV_EVENT_PORT_MGMT_CHANGE, }; struct mlx4_interface { void * (*add) (struct mlx4_dev *dev); void (*remove)(struct mlx4_dev *dev, void *context); void (*event) (struct mlx4_dev *dev, void *context, - enum mlx4_dev_event event, int port); + enum mlx4_dev_event event, unsigned long param); void * (*get_dev)(struct mlx4_dev *dev, void *context, u8 port); struct list_head list; enum mlx4_protocol protocol; -- cgit v1.2.3 From 2aca1172c2f5b27fbc37297574f716c1c15f4153 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 19 Jun 2012 11:21:41 +0300 Subject: net/mlx4_core: Initialize IB port capabilities for all slaves With IB SR-IOV, each slave has its own separate copy of the port capabilities flags. For example, the master can run a subnet manager (which causes the IsSM bit to be set in the master's port capabilities) without affecting the port capabilities seen by the slaves (the IsSM bit will be seen as cleared in the slaves). Also add a static inline mlx4_master_func_num() to enhance readability of the code. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/main.c | 11 +++++++++++ include/linux/mlx4/device.h | 5 +++++ 2 files changed, 16 insertions(+) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index a0313de122de..83afb1541a74 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1477,6 +1477,17 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) "with caps = 0\n", port, err); dev->caps.ib_port_def_cap[port] = ib_port_default_caps; + /* initialize per-slave default ib port capabilities */ + if (mlx4_is_master(dev)) { + int i; + for (i = 0; i < dev->num_slaves; i++) { + if (i == mlx4_master_func_num(dev)) + continue; + priv->mfunc.master.slave_state[i].ib_cap_mask[port] = + ib_port_default_caps; + } + } + if (mlx4_is_mfunc(dev)) dev->caps.port_ib_mtu[port] = IB_MTU_2048; else diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 560b2201519f..7fbdc89de495 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -633,6 +633,11 @@ struct mlx4_init_port_param { void handle_port_mgmt_change_event(struct work_struct *work); +static inline int mlx4_master_func_num(struct mlx4_dev *dev) +{ + return dev->caps.function; +} + static inline int mlx4_is_master(struct mlx4_dev *dev) { return dev->flags & MLX4_FLAG_MASTER; -- cgit v1.2.3 From f457ce471c522cadf697b873e2cf46e458e90bef Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Wed, 11 Jul 2012 15:39:30 +0000 Subject: mlx4_core: Remove double function declarations Spotted four duplicate declarations in icm.h, remove them. Signed-off-by: Dotan Barak Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/icm.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h index b10c07a1dc1a..19e4efc0b342 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.h +++ b/drivers/net/ethernet/mellanox/mlx4/icm.h @@ -81,13 +81,7 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, u64 virt, int obj_size, int nobj, int reserved, int use_lowmem, int use_coherent); void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table); -int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj); -void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj); void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle); -int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, - int start, int end); -void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, - int start, int end); static inline void mlx4_icm_first(struct mlx4_icm *icm, struct mlx4_icm_iter *iter) -- cgit v1.2.3 From 240a9207aae24916dba7070aa7047c3732102cb8 Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Wed, 11 Jul 2012 15:39:32 +0000 Subject: net/mlx4_core: Free ICM table in case of error In mlx4_init_icm_table(), free the allocated table if we failed to allocate memory to its entries. Signed-off-by: Dotan Barak Reviewed-by: Yevgeny Petrilin Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/icm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index a9ade1c3cad5..88b7b3e75ab1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -413,6 +413,8 @@ err: mlx4_free_icm(dev, table->icm[i], use_coherent); } + kfree(table->icm); + return -ENOMEM; } -- cgit v1.2.3 From 396f2feb05d7cc5549c611c05abfb4108cd1c6d6 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 19 Jun 2012 11:21:42 +0300 Subject: mlx4_core: Implement mechanism for reserved Q_Keys The SR-IOV special QP tunneling mechanism uses proxy special QPs (instead of the real special QPs) for MADs on guests. These proxy QPs send their packets to a "tunnel" QP owned by the master. The master then forwards the MAD (after any required paravirtualization) to the real special QP, which sends out the MAD. For security reasons (i.e., to prevent guests from sending MADs to tunnel QPs belonging to other guests), each proxy-tunnel QP pair is assigned a unique, reserved, Q_Key. These Q_Keys are available only for proxy and tunnel QPs -- if the guest tries to use these Q_Keys with other QPs, it will fail. This patch introduces a mechanism for reserving a block of 64K Q_Keys for proxy/tunneling use. The patch introduces also two new fields into mlx4_dev: base_sqpn and base_tunnel_sqpn. In SR-IOV mode, the QP numbers for the "real," proxy, and tunnel sqps are added to the reserved QPN area (so that they will not change). There are 8 special QPs per port in the HCA, and each of them is assigned both a proxy and a tunnel QP, for each VF and for the PF as well in SR-IOV mode. The QPNs for these QPs are arranged as follows: 1. The real SQP numbers (8) 2. The proxy SQPs (8 * (max number of VFs + max number of PFs) 3. The tunnel SQPs (8 * (max number of VFs + max number of PFs) To support these QPs, two new fields are added to struct mlx4_dev: base_sqp: this is the QP number of the first of the real SQPs base_tunnel_sqp: this is the qp number of the first qp in the tunnel sqp region. (On guests, this is the first tunnel sqp of the 8 which are assigned to that guest). In addition, in SR-IOV mode, sqp_start is the number of the first proxy SQP in the proxy SQP region. (In guests, this is the first proxy SQP of the 8 which are assigned to that guest) Note that in non-SR-IOV mode, there are no proxies and no tunnels. In this case, sqp_start is set to sqp_base -- which minimizes code changes. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/main.c | 17 +++++++++++++++++ include/linux/mlx4/device.h | 11 +++++++++++ 2 files changed, 28 insertions(+) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 83afb1541a74..81154a16d6b8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -391,6 +391,23 @@ static int mlx4_how_many_lives_vf(struct mlx4_dev *dev) return ret; } +int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey) +{ + u32 qk = MLX4_RESERVED_QKEY_BASE; + if (qpn >= dev->caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || + qpn < dev->caps.sqp_start) + return -EINVAL; + + if (qpn >= dev->caps.base_tunnel_sqpn) + /* tunnel qp */ + qk += qpn - dev->caps.base_tunnel_sqpn; + else + qk += qpn - dev->caps.sqp_start; + *qkey = qk; + return 0; +} +EXPORT_SYMBOL(mlx4_get_parav_qkey); + int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 7fbdc89de495..c30a314e095c 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -56,6 +56,13 @@ enum { MLX4_MAX_PORTS = 2 }; +/* base qkey for use in sriov tunnel-qp/proxy-qp communication. + * These qkeys must not be allowed for general use. This is a 64k range, + * and to test for violation, we use the mask (protect against future chg). + */ +#define MLX4_RESERVED_QKEY_BASE (0xFFFF0000) +#define MLX4_RESERVED_QKEY_MASK (0xFFFF0000) + enum { MLX4_BOARD_ID_LEN = 64 }; @@ -293,6 +300,8 @@ struct mlx4_caps { int max_qp_init_rdma; int max_qp_dest_rdma; int sqp_start; + u32 base_sqpn; + u32 base_tunnel_sqpn; int num_srqs; int max_srq_wqes; int max_srq_sge; @@ -772,4 +781,6 @@ int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port); int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx); void mlx4_counter_free(struct mlx4_dev *dev, u32 idx); +int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey); + #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3 From 105c320f6ac37af30252577d419e47b39edb5843 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 19 Jun 2012 11:21:43 +0300 Subject: mlx4_core: Allow guests to have IB ports Modify mlx4_dev_cap to allow IB support when SR-IOV is active. Modify mlx4_slave_cap to set the "rdma-supported" bit in its flags area, and pass that to the guests (this is done in QUERY_FUNC_CAP and its wrapper). However, we don't activate IB support quite yet -- we leave the error return at the start of mlx4_ib_add in the mlx4_ib driver. In addition, set "protected fmr supported" bit to zero in the QUERY_FUNC_CAP wrapper. Finally, in the QUERY_FUNC_CAP wrapper, we needed to add code which checks for the port type (IB or Ethernet). Previously, this was not an issue, since only Ethernet ports were supported. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/fw.c | 78 ++++++++++++++++++++----------- drivers/net/ethernet/mellanox/mlx4/main.c | 26 ++++------- 2 files changed, 59 insertions(+), 45 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index ee9d6b0b4d20..5549f6b3bb67 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -174,6 +174,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_FLAGS_OFFSET 0x0 #define QUERY_FUNC_CAP_NUM_PORTS_OFFSET 0x1 #define QUERY_FUNC_CAP_PF_BHVR_OFFSET 0x4 +#define QUERY_FUNC_CAP_FMR_OFFSET 0x8 #define QUERY_FUNC_CAP_QP_QUOTA_OFFSET 0x10 #define QUERY_FUNC_CAP_CQ_QUOTA_OFFSET 0x14 #define QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET 0x18 @@ -183,25 +184,44 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_MAX_EQ_OFFSET 0x2c #define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0X30 +#define QUERY_FUNC_CAP_FMR_FLAG 0x80 +#define QUERY_FUNC_CAP_FLAG_RDMA 0x40 +#define QUERY_FUNC_CAP_FLAG_ETH 0x80 + +/* when opcode modifier = 1 */ #define QUERY_FUNC_CAP_PHYS_PORT_OFFSET 0x3 +#define QUERY_FUNC_CAP_RDMA_PROPS_OFFSET 0x8 #define QUERY_FUNC_CAP_ETH_PROPS_OFFSET 0xc +#define QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC 0x40 +#define QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN 0x80 + +#define QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID 0x80 + if (vhcr->op_modifier == 1) { field = vhcr->in_modifier; MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); - field = 0; /* ensure fvl bit is not set */ + field = 0; + /* ensure force vlan and force mac bits are not set */ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); + /* ensure that phy_wqe_gid bit is not set */ + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET); + } else if (vhcr->op_modifier == 0) { - field = 1 << 7; /* enable only ethernet interface */ + /* enable rdma and ethernet interfaces */ + field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA); MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS_OFFSET); field = dev->caps.num_ports; MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); - size = 0; /* no PF behavious is set for now */ + size = 0; /* no PF behaviour is set for now */ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_PF_BHVR_OFFSET); + field = 0; /* protected FMR support not available as yet */ + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FMR_OFFSET); + size = dev->caps.num_qps; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); @@ -254,11 +274,12 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, struct mlx4_func_cap *func_cap) outbox = mailbox->buf; MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS_OFFSET); - if (!(field & (1 << 7))) { - mlx4_err(dev, "The host doesn't support eth interface\n"); + if (!(field & (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA))) { + mlx4_err(dev, "The host supports neither eth nor rdma interfaces\n"); err = -EPROTONOSUPPORT; goto out; } + func_cap->flags = field; MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); func_cap->num_ports = field; @@ -297,17 +318,27 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, struct mlx4_func_cap *func_cap) if (err) goto out; - MLX4_GET(field, outbox, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); - if (field & (1 << 7)) { - mlx4_err(dev, "VLAN is enforced on this port\n"); - err = -EPROTONOSUPPORT; - goto out; - } + if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) { + MLX4_GET(field, outbox, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); + if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN) { + mlx4_err(dev, "VLAN is enforced on this port\n"); + err = -EPROTONOSUPPORT; + goto out; + } - if (field & (1 << 6)) { - mlx4_err(dev, "Force mac is enabled on this port\n"); - err = -EPROTONOSUPPORT; - goto out; + if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC) { + mlx4_err(dev, "Force mac is enabled on this port\n"); + err = -EPROTONOSUPPORT; + goto out; + } + } else if (dev->caps.port_type[i] == MLX4_PORT_TYPE_IB) { + MLX4_GET(field, outbox, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET); + if (field & QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID) { + mlx4_err(dev, "phy_wqe_gid is " + "enforced on this ib port\n"); + err = -EPROTONOSUPPORT; + goto out; + } } MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); @@ -701,12 +732,7 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, u8 port_type; int err; -#define MLX4_PORT_SUPPORT_IB (1 << 0) -#define MLX4_PORT_SUGGEST_TYPE (1 << 3) -#define MLX4_PORT_DEFAULT_SENSE (1 << 4) -#define MLX4_VF_PORT_ETH_ONLY_MASK (0xff & ~MLX4_PORT_SUPPORT_IB & \ - ~MLX4_PORT_SUGGEST_TYPE & \ - ~MLX4_PORT_DEFAULT_SENSE) +#define MLX4_VF_PORT_NO_LINK_SENSE_MASK 0xE0 err = mlx4_cmd_box(dev, 0, outbox->dma, vhcr->in_modifier, 0, MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, @@ -722,12 +748,10 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, MLX4_GET(port_type, outbox->buf, QUERY_PORT_SUPPORTED_TYPE_OFFSET); - /* Allow only Eth port, no link sensing allowed */ - port_type &= MLX4_VF_PORT_ETH_ONLY_MASK; - - /* check eth is enabled for this port */ - if (!(port_type & 2)) - mlx4_dbg(dev, "QUERY PORT: eth not supported by host"); + /* No link sensing allowed */ + port_type &= MLX4_VF_PORT_NO_LINK_SENSE_MASK; + /* set port type to currently operating port type */ + port_type |= (dev->caps.port_type[vhcr->in_modifier] & 0x3); MLX4_PUT(outbox->buf, port_type, QUERY_PORT_SUPPORTED_TYPE_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 81154a16d6b8..58544b72bacb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -288,29 +288,19 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) /* if only ETH is supported - assign ETH */ if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH) dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH; - /* if only IB is supported, - * assign IB only if SRIOV is off*/ + /* if only IB is supported, assign IB */ else if (dev->caps.supported_type[i] == - MLX4_PORT_TYPE_IB) { - if (dev->flags & MLX4_FLAG_SRIOV) - dev->caps.port_type[i] = - MLX4_PORT_TYPE_NONE; - else - dev->caps.port_type[i] = - MLX4_PORT_TYPE_IB; - /* if IB and ETH are supported, - * first of all check if SRIOV is on */ - } else if (dev->flags & MLX4_FLAG_SRIOV) - dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH; + MLX4_PORT_TYPE_IB) + dev->caps.port_type[i] = MLX4_PORT_TYPE_IB; else { - /* In non-SRIOV mode, we set the port type - * according to user selection of port type, - * if usere selected none, take the FW hint */ - if (port_type_array[i-1] == MLX4_PORT_TYPE_NONE) + /* if IB and ETH are supported, we set the port + * type according to user selection of port type; + * if user selected none, take the FW hint */ + if (port_type_array[i - 1] == MLX4_PORT_TYPE_NONE) dev->caps.port_type[i] = dev->caps.suggested_type[i] ? MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB; else - dev->caps.port_type[i] = port_type_array[i-1]; + dev->caps.port_type[i] = port_type_array[i - 1]; } } /* -- cgit v1.2.3 From 6634961c14d38ef64ec284c07aecb03d3dd03b4a Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 19 Jun 2012 11:21:44 +0300 Subject: mlx4: Put physical GID and P_Key table sizes in mlx4_phys_caps struct and paravirtualize them To allow easy paravirtualization of P_Key and GID table sizes, keep paravirtualized sizes in mlx4_dev->caps, but save the actual physical sizes from FW in struct: mlx4_dev->phys_cap. In addition, in SR-IOV mode, do the following: 1. Reduce reported P_Key table size by 1. This is done to reserve the highest P_Key index for internal use, for declaring an invalid P_Key in P_Key paravirtualization. We require a P_Key index which always contain an invalid P_Key value for this purpose (i.e., one which cannot be modified by the subnet manager). The way to do this is to reduce the P_Key table size reported to the subnet manager by 1, so that it will not attempt to access the P_Key at index #127. 2. Paravirtualize the GID table size to 1. Thus, each guest sees only a single GID (at its paravirtualized index 0). In addition, since we are paravirtualizing the GID table size to 1, we add paravirtualization of the master GID event here (i.e., we do not do ib_dispatch_event() for the GUID change event on the master, since its (only) GUID never changes). Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 10 ++++--- drivers/net/ethernet/mellanox/mlx4/fw.c | 43 +++++++++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/main.c | 32 ++++++++++++++++++++--- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 4 ++- drivers/net/ethernet/mellanox/mlx4/port.c | 11 ++++++-- include/linux/mlx4/device.h | 2 ++ 6 files changed, 92 insertions(+), 10 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 58c45fb5bd31..c27141fef1ab 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -184,8 +184,10 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, break; case IB_SMP_ATTR_GUID_INFO: - mlx4_ib_dispatch_event(dev, port_num, - IB_EVENT_GID_CHANGE); + /* paravirtualized master's guid is guid 0 -- does not change */ + if (!mlx4_is_master(dev->dev)) + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_GID_CHANGE); break; default: break; @@ -487,7 +489,9 @@ void handle_port_mgmt_change_event(struct work_struct *work) mlx4_ib_dispatch_event(dev, port, IB_EVENT_PKEY_CHANGE); break; case MLX4_DEV_PMC_SUBTYPE_GUID_INFO: - mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); + /* paravirtualized master's guid is guid 0 -- does not change */ + if (!mlx4_is_master(dev->dev)) + mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); break; default: pr_warn("Unsupported subtype 0x%x for " diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 5549f6b3bb67..473d63b63b4e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -730,9 +730,12 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, { u64 def_mac; u8 port_type; + u16 short_field; int err; #define MLX4_VF_PORT_NO_LINK_SENSE_MASK 0xE0 +#define QUERY_PORT_CUR_MAX_PKEY_OFFSET 0x0c +#define QUERY_PORT_CUR_MAX_GID_OFFSET 0x0e err = mlx4_cmd_box(dev, 0, outbox->dma, vhcr->in_modifier, 0, MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, @@ -755,11 +758,51 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, port_type, QUERY_PORT_SUPPORTED_TYPE_OFFSET); + + short_field = 1; /* slave max gids */ + MLX4_PUT(outbox->buf, short_field, + QUERY_PORT_CUR_MAX_GID_OFFSET); + + short_field = dev->caps.pkey_table_len[vhcr->in_modifier]; + MLX4_PUT(outbox->buf, short_field, + QUERY_PORT_CUR_MAX_PKEY_OFFSET); } return err; } +int mlx4_get_slave_pkey_gid_tbl_len(struct mlx4_dev *dev, u8 port, + int *gid_tbl_len, int *pkey_tbl_len) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *outbox; + u16 field; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + err = mlx4_cmd_box(dev, 0, mailbox->dma, port, 0, + MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + if (err) + goto out; + + outbox = mailbox->buf; + + MLX4_GET(field, outbox, QUERY_PORT_CUR_MAX_GID_OFFSET); + *gid_tbl_len = field; + + MLX4_GET(field, outbox, QUERY_PORT_CUR_MAX_PKEY_OFFSET); + *pkey_tbl_len = field; + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_get_slave_pkey_gid_tbl_len); + int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt) { struct mlx4_cmd_mailbox *mailbox; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 58544b72bacb..5df3ac40a490 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -215,6 +215,10 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) for (i = 1; i <= dev->caps.num_ports; ++i) { dev->caps.vl_cap[i] = dev_cap->max_vl[i]; dev->caps.ib_mtu_cap[i] = dev_cap->ib_mtu[i]; + dev->phys_caps.gid_phys_table_len[i] = dev_cap->max_gids[i]; + dev->phys_caps.pkey_phys_table_len[i] = dev_cap->max_pkeys[i]; + /* set gid and pkey table operating lengths by default + * to non-sriov values */ dev->caps.gid_table_len[i] = dev_cap->max_gids[i]; dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i]; dev->caps.port_width_cap[i] = dev_cap->max_port_width[i]; @@ -498,8 +502,13 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) return -ENODEV; } - for (i = 1; i <= dev->caps.num_ports; ++i) + for (i = 1; i <= dev->caps.num_ports; ++i) { dev->caps.port_mask[i] = dev->caps.port_type[i]; + if (mlx4_get_slave_pkey_gid_tbl_len(dev, i, + &dev->caps.gid_table_len[i], + &dev->caps.pkey_table_len[i])) + return -ENODEV; + } if (dev->caps.uar_page_size * (dev->caps.num_uars - dev->caps.reserved_uars) > @@ -536,7 +545,7 @@ int mlx4_change_port_types(struct mlx4_dev *dev, for (port = 1; port <= dev->caps.num_ports; port++) { mlx4_CLOSE_PORT(dev, port); dev->caps.port_type[port] = port_types[port - 1]; - err = mlx4_SET_PORT(dev, port); + err = mlx4_SET_PORT(dev, port, -1); if (err) { mlx4_err(dev, "Failed to set port %d, " "aborting\n", port); @@ -722,7 +731,7 @@ static ssize_t set_port_ib_mtu(struct device *dev, mlx4_unregister_device(mdev); for (port = 1; port <= mdev->caps.num_ports; port++) { mlx4_CLOSE_PORT(mdev, port); - err = mlx4_SET_PORT(mdev, port); + err = mlx4_SET_PORT(mdev, port, -1); if (err) { mlx4_err(mdev, "Failed to set port %d, " "aborting\n", port); @@ -1173,6 +1182,17 @@ err: return -EIO; } +static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) +{ + int i; + + for (i = 1; i <= dev->caps.num_ports; i++) { + dev->caps.gid_table_len[i] = 1; + dev->caps.pkey_table_len[i] = + dev->phys_caps.pkey_phys_table_len[i] - 1; + } +} + static int mlx4_init_hca(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1212,6 +1232,9 @@ static int mlx4_init_hca(struct mlx4_dev *dev) goto err_stop_fw; } + if (mlx4_is_master(dev)) + mlx4_parav_master_pf_caps(dev); + profile = default_profile; icm_size = mlx4_make_profile(dev, &profile, &dev_cap, @@ -1500,7 +1523,8 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) else dev->caps.port_ib_mtu[port] = IB_MTU_4096; - err = mlx4_SET_PORT(dev, port); + err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ? + dev->caps.pkey_table_len[port] : -1); if (err) { mlx4_err(dev, "Failed to set port %d, aborting\n", port); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 4d11d12b9db4..cde6e511899f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -969,7 +969,7 @@ int mlx4_change_port_types(struct mlx4_dev *dev, void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table); void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table); -int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port); +int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz); /* resource tracker functions*/ int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev, enum mlx4_resource resource_type, @@ -1012,6 +1012,8 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_info *cmd); int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps); +int mlx4_get_slave_pkey_gid_tbl_len(struct mlx4_dev *dev, u8 port, + int *gid_tbl_len, int *pkey_tbl_len); int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index a8fb52992c64..90dc47542b8b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -726,14 +726,15 @@ int mlx4_SET_PORT_wrapper(struct mlx4_dev *dev, int slave, enum { MLX4_SET_PORT_VL_CAP = 4, /* bits 7:4 */ MLX4_SET_PORT_MTU_CAP = 12, /* bits 15:12 */ + MLX4_CHANGE_PORT_PKEY_TBL_SZ = 20, MLX4_CHANGE_PORT_VL_CAP = 21, MLX4_CHANGE_PORT_MTU_CAP = 22, }; -int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port) +int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz) { struct mlx4_cmd_mailbox *mailbox; - int err, vl_cap; + int err, vl_cap, pkey_tbl_flag = 0; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) return 0; @@ -746,11 +747,17 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port) ((__be32 *) mailbox->buf)[1] = dev->caps.ib_port_def_cap[port]; + if (pkey_tbl_sz >= 0 && mlx4_is_master(dev)) { + pkey_tbl_flag = 1; + ((__be16 *) mailbox->buf)[20] = cpu_to_be16(pkey_tbl_sz); + } + /* IB VL CAP enum isn't used by the firmware, just numerical values */ for (vl_cap = 8; vl_cap >= 1; vl_cap >>= 1) { ((__be32 *) mailbox->buf)[0] = cpu_to_be32( (1 << MLX4_CHANGE_PORT_MTU_CAP) | (1 << MLX4_CHANGE_PORT_VL_CAP) | + (pkey_tbl_flag << MLX4_CHANGE_PORT_PKEY_TBL_SZ) | (dev->caps.port_ib_mtu[port] << MLX4_SET_PORT_MTU_CAP) | (vl_cap << MLX4_SET_PORT_VL_CAP)); err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index c30a314e095c..441caf1a497d 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -268,6 +268,8 @@ static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor) } struct mlx4_phys_caps { + u32 gid_phys_table_len[MLX4_MAX_PORTS + 1]; + u32 pkey_phys_table_len[MLX4_MAX_PORTS + 1]; u32 num_phys_eqs; }; -- cgit v1.2.3 From 447458c01f4c765ab37ac7b380e60d977daae4c5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 10 Jul 2012 20:33:36 +0000 Subject: net/mlx4: off by one in parse_trans_rule() This should be ">=" here instead of ">". MLX4_NET_TRANS_RULE_NUM is 6. We use "spec->id" as an array offset into the __rule_hw_sz[] and __sw_id_hw[] arrays which have 6 elements. Signed-off-by: Dan Carpenter Acked-by: Hadar Hen Zion Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/mcg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index bc62f536ffae..5bac0dfafbd8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -773,7 +773,7 @@ static int parse_trans_rule(struct mlx4_dev *dev, struct mlx4_spec_list *spec, [MLX4_NET_TRANS_RULE_ID_UDP] = sizeof(struct mlx4_net_trans_rule_hw_tcp_udp) }; - if (spec->id > MLX4_NET_TRANS_RULE_NUM) { + if (spec->id >= MLX4_NET_TRANS_RULE_NUM) { mlx4_err(dev, "Invalid network rule id. id = %d\n", spec->id); return -EINVAL; } -- cgit v1.2.3 From 9c64508af2c171f57a198729c59a7a6f6998b3a1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 10 Jul 2012 20:34:07 +0000 Subject: net/mlx4_en: dereferencing freed memory We dereferenced "mclist" after the kfree(). Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 94375a8c6d42..4ce5ca81a010 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -503,9 +503,7 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) /* remove from list */ list_del(&mclist->list); kfree(mclist); - } - - if (mclist->action == MCLIST_ADD) { + } else if (mclist->action == MCLIST_ADD) { /* attach the address */ memcpy(&mc_list[10], mclist->addr, ETH_ALEN); /* needed for B0 steering support */ -- cgit v1.2.3 From af22d9de45caf8b2a99f2b27a927169c029528b4 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Wed, 18 Jul 2012 22:33:49 +0000 Subject: net/mlx4: Move MAC_MASK to a common place Define this macro is one common place instead of duplicating it over the code Signed-off-by: Amir Vadai Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 6 +++--- drivers/net/ethernet/mellanox/mlx4/mcg.c | 1 - drivers/net/ethernet/mellanox/mlx4/port.c | 1 - drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 3 +-- include/linux/mlx4/driver.h | 2 ++ 5 files changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index dd6a77b21149..9d0b88eea02b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -34,12 +34,12 @@ #include #include #include +#include #include "mlx4_en.h" #include "en_port.h" #define EN_ETHTOOL_QP_ATTACH (1ull << 63) -#define EN_ETHTOOL_MAC_MASK 0xffffffffffffULL #define EN_ETHTOOL_SHORT_MASK cpu_to_be16(0xffff) #define EN_ETHTOOL_WORD_MASK cpu_to_be32(0xffffffff) @@ -751,7 +751,7 @@ static int mlx4_en_ethtool_to_net_trans_rule(struct net_device *dev, struct ethhdr *eth_spec; struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_spec_list *spec_l2; - __be64 mac_msk = cpu_to_be64(EN_ETHTOOL_MAC_MASK << 16); + __be64 mac_msk = cpu_to_be64(MLX4_MAC_MASK << 16); err = mlx4_en_validate_flow(dev, cmd); if (err) @@ -761,7 +761,7 @@ static int mlx4_en_ethtool_to_net_trans_rule(struct net_device *dev, if (!spec_l2) return -ENOMEM; - mac = priv->mac & EN_ETHTOOL_MAC_MASK; + mac = priv->mac & MLX4_MAC_MASK; be_mac = cpu_to_be64(mac << 16); spec_l2->id = MLX4_NET_TRANS_RULE_ID_ETH; diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 5bac0dfafbd8..4ec3835e1bc2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -41,7 +41,6 @@ #define MGM_QPN_MASK 0x00FFFFFF #define MGM_BLCK_LB_BIT 30 -#define MLX4_MAC_MASK 0xffffffffffffULL static const u8 zero_gid[16]; /* automatically initialized to 0 */ diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index a51d1b9bf1d1..028833ffc56f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -39,7 +39,6 @@ #include "mlx4.h" #define MLX4_MAC_VALID (1ull << 63) -#define MLX4_MAC_MASK 0xffffffffffffULL #define MLX4_VLAN_VALID (1u << 31) #define MLX4_VLAN_MASK 0xfff diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index c3fa91986190..94ceddd17ab2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -41,13 +41,12 @@ #include #include #include +#include #include "mlx4.h" #include "fw.h" #define MLX4_MAC_VALID (1ull << 63) -#define MLX4_MAC_MASK 0x7fffffffffffffffULL -#define ETH_ALEN 6 struct mac_res { struct list_head list; diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index 5f1298b1b5ef..8dc485febc6b 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -37,6 +37,8 @@ struct mlx4_dev; +#define MLX4_MAC_MASK 0xffffffffffffULL + enum mlx4_dev_event { MLX4_DEV_EVENT_CATASTROPHIC_ERROR, MLX4_DEV_EVENT_PORT_UP, -- cgit v1.2.3 From d9236c3f10490cd0b3fd4516af12ba62dcbae0b0 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Wed, 18 Jul 2012 22:33:51 +0000 Subject: {NET,IB}/mlx4: Add rmap support to mlx4_assign_eq Enable callers of mlx4_assign_eq to supply a pointer to cpu_rmap. If supplied, the assigned IRQ is tracked using rmap infrastructure. Signed-off-by: Amir Vadai Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 3 ++- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 3 ++- drivers/net/ethernet/mellanox/mlx4/eq.c | 12 +++++++++++- include/linux/mlx4/device.h | 4 +++- 4 files changed, 18 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 8a3a2037b005..a07b774e7864 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1159,7 +1159,8 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) sprintf(name, "mlx4-ib-%d-%d@%s", i, j, dev->pdev->bus->name); /* Set IRQ for specific name (per ring) */ - if (mlx4_assign_eq(dev, name, &ibdev->eq_table[eq])) { + if (mlx4_assign_eq(dev, name, NULL, + &ibdev->eq_table[eq])) { /* Use legacy (same as mlx4_en driver) */ pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq); ibdev->eq_table[eq] = diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 908a460d8db6..0ef615684021 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -91,7 +91,8 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, sprintf(name, "%s-%d", priv->dev->name, cq->ring); /* Set IRQ for specific name (per ring) */ - if (mlx4_assign_eq(mdev->dev, name, &cq->vector)) { + if (mlx4_assign_eq(mdev->dev, name, NULL, + &cq->vector)) { cq->vector = (cq->ring + 1 + priv->port) % mdev->dev->caps.num_comp_vectors; mlx4_warn(mdev, "Failed Assigning an EQ to " diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index bce98d9c0039..cd48337cbfc0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -39,6 +39,7 @@ #include #include +#include #include "mlx4.h" #include "fw.h" @@ -1060,7 +1061,8 @@ int mlx4_test_interrupts(struct mlx4_dev *dev) } EXPORT_SYMBOL(mlx4_test_interrupts); -int mlx4_assign_eq(struct mlx4_dev *dev, char* name, int * vector) +int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, + int *vector) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1074,6 +1076,14 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char* name, int * vector) snprintf(priv->eq_table.irq_names + vec * MLX4_IRQNAME_SIZE, MLX4_IRQNAME_SIZE, "%s", name); +#ifdef CONFIG_RFS_ACCEL + if (rmap) { + err = irq_cpu_rmap_add(rmap, + priv->eq_table.eq[vec].irq); + if (err) + mlx4_warn(dev, "Failed adding irq rmap\n"); + } +#endif err = request_irq(priv->eq_table.eq[vec].irq, mlx4_msi_x_interrupt, 0, &priv->eq_table.irq_names[vec<<5], diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 6f0d133cc7ad..4d7761f8c3f6 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -36,6 +36,7 @@ #include #include #include +#include #include @@ -784,7 +785,8 @@ void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); int mlx4_SYNC_TPT(struct mlx4_dev *dev); int mlx4_test_interrupts(struct mlx4_dev *dev); -int mlx4_assign_eq(struct mlx4_dev *dev, char* name , int* vector); +int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, + int *vector); void mlx4_release_eq(struct mlx4_dev *dev, int vec); int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port); -- cgit v1.2.3 From 1eb8c695bda92ccaec30e9a3351e37a1896da54f Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Wed, 18 Jul 2012 22:33:52 +0000 Subject: net/mlx4_en: Add accelerated RFS support Use RFS infrastructure and flow steering in HW to keep CPU affinity of rx interrupts and application per TCP stream. A flow steering filter is added to the HW whenever the RFS ndo callback is invoked by core networking code. Because the invocation takes place in interrupt context, the actual setup of HW is done using workqueue. Whenever new filter is added, the driver checks for expiry of existing filters. Since there's window in time between the point where the core RFS code invoked the ndo callback, to the point where the HW is configured from the workqueue context, the 2nd, 3rd etc packets from that stream will cause the net core to invoke the callback again and again. To prevent inefficient/double configuration of the HW, the filters are kept in a database which is indexed using hash function to enable fast access. Signed-off-by: Amir Vadai Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 8 +- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 316 +++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/en_rx.c | 3 + drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 16 ++ 4 files changed, 342 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 0ef615684021..aa9c2f6cf3c0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -77,6 +77,12 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, struct mlx4_en_dev *mdev = priv->mdev; int err = 0; char name[25]; + struct cpu_rmap *rmap = +#ifdef CONFIG_RFS_ACCEL + priv->dev->rx_cpu_rmap; +#else + NULL; +#endif cq->dev = mdev->pndev[priv->port]; cq->mcq.set_ci_db = cq->wqres.db.db; @@ -91,7 +97,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, sprintf(name, "%s-%d", priv->dev->name, cq->ring); /* Set IRQ for specific name (per ring) */ - if (mlx4_assign_eq(mdev->dev, name, NULL, + if (mlx4_assign_eq(mdev->dev, name, rmap, &cq->vector)) { cq->vector = (cq->ring + 1 + priv->port) % mdev->dev->caps.num_comp_vectors; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 4ce5ca81a010..8864d8b53737 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include #include @@ -66,6 +68,299 @@ static int mlx4_en_setup_tc(struct net_device *dev, u8 up) return 0; } +#ifdef CONFIG_RFS_ACCEL + +struct mlx4_en_filter { + struct list_head next; + struct work_struct work; + + __be32 src_ip; + __be32 dst_ip; + __be16 src_port; + __be16 dst_port; + + int rxq_index; + struct mlx4_en_priv *priv; + u32 flow_id; /* RFS infrastructure id */ + int id; /* mlx4_en driver id */ + u64 reg_id; /* Flow steering API id */ + u8 activated; /* Used to prevent expiry before filter + * is attached + */ + struct hlist_node filter_chain; +}; + +static void mlx4_en_filter_rfs_expire(struct mlx4_en_priv *priv); + +static void mlx4_en_filter_work(struct work_struct *work) +{ + struct mlx4_en_filter *filter = container_of(work, + struct mlx4_en_filter, + work); + struct mlx4_en_priv *priv = filter->priv; + struct mlx4_spec_list spec_tcp = { + .id = MLX4_NET_TRANS_RULE_ID_TCP, + { + .tcp_udp = { + .dst_port = filter->dst_port, + .dst_port_msk = (__force __be16)-1, + .src_port = filter->src_port, + .src_port_msk = (__force __be16)-1, + }, + }, + }; + struct mlx4_spec_list spec_ip = { + .id = MLX4_NET_TRANS_RULE_ID_IPV4, + { + .ipv4 = { + .dst_ip = filter->dst_ip, + .dst_ip_msk = (__force __be32)-1, + .src_ip = filter->src_ip, + .src_ip_msk = (__force __be32)-1, + }, + }, + }; + struct mlx4_spec_list spec_eth = { + .id = MLX4_NET_TRANS_RULE_ID_ETH, + }; + struct mlx4_net_trans_rule rule = { + .list = LIST_HEAD_INIT(rule.list), + .queue_mode = MLX4_NET_TRANS_Q_LIFO, + .exclusive = 1, + .allow_loopback = 1, + .promisc_mode = MLX4_FS_PROMISC_NONE, + .port = priv->port, + .priority = MLX4_DOMAIN_RFS, + }; + int rc; + __be64 mac; + __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); + + list_add_tail(&spec_eth.list, &rule.list); + list_add_tail(&spec_ip.list, &rule.list); + list_add_tail(&spec_tcp.list, &rule.list); + + mac = cpu_to_be64((priv->mac & MLX4_MAC_MASK) << 16); + + rule.qpn = priv->rss_map.qps[filter->rxq_index].qpn; + memcpy(spec_eth.eth.dst_mac, &mac, ETH_ALEN); + memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN); + + filter->activated = 0; + + if (filter->reg_id) { + rc = mlx4_flow_detach(priv->mdev->dev, filter->reg_id); + if (rc && rc != -ENOENT) + en_err(priv, "Error detaching flow. rc = %d\n", rc); + } + + rc = mlx4_flow_attach(priv->mdev->dev, &rule, &filter->reg_id); + if (rc) + en_err(priv, "Error attaching flow. err = %d\n", rc); + + mlx4_en_filter_rfs_expire(priv); + + filter->activated = 1; +} + +static inline struct hlist_head * +filter_hash_bucket(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip, + __be16 src_port, __be16 dst_port) +{ + unsigned long l; + int bucket_idx; + + l = (__force unsigned long)src_port | + ((__force unsigned long)dst_port << 2); + l ^= (__force unsigned long)(src_ip ^ dst_ip); + + bucket_idx = hash_long(l, MLX4_EN_FILTER_HASH_SHIFT); + + return &priv->filter_hash[bucket_idx]; +} + +static struct mlx4_en_filter * +mlx4_en_filter_alloc(struct mlx4_en_priv *priv, int rxq_index, __be32 src_ip, + __be32 dst_ip, __be16 src_port, __be16 dst_port, + u32 flow_id) +{ + struct mlx4_en_filter *filter = NULL; + + filter = kzalloc(sizeof(struct mlx4_en_filter), GFP_ATOMIC); + if (!filter) + return NULL; + + filter->priv = priv; + filter->rxq_index = rxq_index; + INIT_WORK(&filter->work, mlx4_en_filter_work); + + filter->src_ip = src_ip; + filter->dst_ip = dst_ip; + filter->src_port = src_port; + filter->dst_port = dst_port; + + filter->flow_id = flow_id; + + filter->id = priv->last_filter_id++; + + list_add_tail(&filter->next, &priv->filters); + hlist_add_head(&filter->filter_chain, + filter_hash_bucket(priv, src_ip, dst_ip, src_port, + dst_port)); + + return filter; +} + +static void mlx4_en_filter_free(struct mlx4_en_filter *filter) +{ + struct mlx4_en_priv *priv = filter->priv; + int rc; + + list_del(&filter->next); + + rc = mlx4_flow_detach(priv->mdev->dev, filter->reg_id); + if (rc && rc != -ENOENT) + en_err(priv, "Error detaching flow. rc = %d\n", rc); + + kfree(filter); +} + +static inline struct mlx4_en_filter * +mlx4_en_filter_find(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip, + __be16 src_port, __be16 dst_port) +{ + struct hlist_node *elem; + struct mlx4_en_filter *filter; + struct mlx4_en_filter *ret = NULL; + + hlist_for_each_entry(filter, elem, + filter_hash_bucket(priv, src_ip, dst_ip, + src_port, dst_port), + filter_chain) { + if (filter->src_ip == src_ip && + filter->dst_ip == dst_ip && + filter->src_port == src_port && + filter->dst_port == dst_port) { + ret = filter; + break; + } + } + + return ret; +} + +static int +mlx4_en_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, + u16 rxq_index, u32 flow_id) +{ + struct mlx4_en_priv *priv = netdev_priv(net_dev); + struct mlx4_en_filter *filter; + const struct iphdr *ip; + const __be16 *ports; + __be32 src_ip; + __be32 dst_ip; + __be16 src_port; + __be16 dst_port; + int nhoff = skb_network_offset(skb); + int ret = 0; + + if (skb->protocol != htons(ETH_P_IP)) + return -EPROTONOSUPPORT; + + ip = (const struct iphdr *)(skb->data + nhoff); + if (ip_is_fragment(ip)) + return -EPROTONOSUPPORT; + + ports = (const __be16 *)(skb->data + nhoff + 4 * ip->ihl); + + src_ip = ip->saddr; + dst_ip = ip->daddr; + src_port = ports[0]; + dst_port = ports[1]; + + if (ip->protocol != IPPROTO_TCP) + return -EPROTONOSUPPORT; + + spin_lock_bh(&priv->filters_lock); + filter = mlx4_en_filter_find(priv, src_ip, dst_ip, src_port, dst_port); + if (filter) { + if (filter->rxq_index == rxq_index) + goto out; + + filter->rxq_index = rxq_index; + } else { + filter = mlx4_en_filter_alloc(priv, rxq_index, + src_ip, dst_ip, + src_port, dst_port, flow_id); + if (!filter) { + ret = -ENOMEM; + goto err; + } + } + + queue_work(priv->mdev->workqueue, &filter->work); + +out: + ret = filter->id; +err: + spin_unlock_bh(&priv->filters_lock); + + return ret; +} + +void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *rx_ring) +{ + struct mlx4_en_filter *filter, *tmp; + LIST_HEAD(del_list); + + spin_lock_bh(&priv->filters_lock); + list_for_each_entry_safe(filter, tmp, &priv->filters, next) { + list_move(&filter->next, &del_list); + hlist_del(&filter->filter_chain); + } + spin_unlock_bh(&priv->filters_lock); + + list_for_each_entry_safe(filter, tmp, &del_list, next) { + cancel_work_sync(&filter->work); + mlx4_en_filter_free(filter); + } +} + +static void mlx4_en_filter_rfs_expire(struct mlx4_en_priv *priv) +{ + struct mlx4_en_filter *filter = NULL, *tmp, *last_filter = NULL; + LIST_HEAD(del_list); + int i = 0; + + spin_lock_bh(&priv->filters_lock); + list_for_each_entry_safe(filter, tmp, &priv->filters, next) { + if (i > MLX4_EN_FILTER_EXPIRY_QUOTA) + break; + + if (filter->activated && + !work_pending(&filter->work) && + rps_may_expire_flow(priv->dev, + filter->rxq_index, filter->flow_id, + filter->id)) { + list_move(&filter->next, &del_list); + hlist_del(&filter->filter_chain); + } else + last_filter = filter; + + i++; + } + + if (last_filter && (&last_filter->next != priv->filters.next)) + list_move(&priv->filters, &last_filter->next); + + spin_unlock_bh(&priv->filters_lock); + + list_for_each_entry_safe(filter, tmp, &del_list, next) + mlx4_en_filter_free(filter); +} +#endif + static int mlx4_en_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -1079,6 +1374,11 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv) { int i; +#ifdef CONFIG_RFS_ACCEL + free_irq_cpu_rmap(priv->dev->rx_cpu_rmap); + priv->dev->rx_cpu_rmap = NULL; +#endif + for (i = 0; i < priv->tx_ring_num; i++) { if (priv->tx_ring[i].tx_info) mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]); @@ -1134,6 +1434,15 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) goto err; } +#ifdef CONFIG_RFS_ACCEL + priv->dev->rx_cpu_rmap = alloc_irq_cpu_rmap(priv->rx_ring_num); + if (!priv->dev->rx_cpu_rmap) + goto err; + + INIT_LIST_HEAD(&priv->filters); + spin_lock_init(&priv->filters_lock); +#endif + return 0; err: @@ -1241,6 +1550,9 @@ static const struct net_device_ops mlx4_netdev_ops = { #endif .ndo_set_features = mlx4_en_set_features, .ndo_setup_tc = mlx4_en_setup_tc, +#ifdef CONFIG_RFS_ACCEL + .ndo_rx_flow_steer = mlx4_en_filter_rfs, +#endif }; int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, @@ -1358,6 +1670,10 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, NETIF_F_HW_VLAN_FILTER; dev->hw_features |= NETIF_F_LOOPBACK; + if (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) + dev->hw_features |= NETIF_F_NTUPLE; + mdev->pndev[port] = dev; netif_carrier_off(dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index a04cbf767eb5..796cd58e6229 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -389,6 +389,9 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE); vfree(ring->rx_info); ring->rx_info = NULL; +#ifdef CONFIG_RFS_ACCEL + mlx4_en_cleanup_filters(priv, ring); +#endif } void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index a12632150b34..af34c98d43e5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -43,6 +43,7 @@ #ifdef CONFIG_MLX4_EN_DCB #include #endif +#include #include #include @@ -77,6 +78,9 @@ #define STATS_DELAY (HZ / 4) #define MAX_NUM_OF_FS_RULES 256 +#define MLX4_EN_FILTER_HASH_SHIFT 4 +#define MLX4_EN_FILTER_EXPIRY_QUOTA 60 + /* Typical TSO descriptor with 16 gather entries is 352 bytes... */ #define MAX_DESC_SIZE 512 #define MAX_DESC_TXBBS (MAX_DESC_SIZE / TXBB_SIZE) @@ -523,6 +527,13 @@ struct mlx4_en_priv { struct ieee_ets ets; u16 maxrate[IEEE_8021QAZ_MAX_TCS]; #endif +#ifdef CONFIG_RFS_ACCEL + spinlock_t filters_lock; + int last_filter_id; + struct list_head filters; + struct hlist_head filter_hash[1 << MLX4_EN_FILTER_HASH_SHIFT]; +#endif + }; enum mlx4_en_wol { @@ -602,6 +613,11 @@ int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port); extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops; #endif +#ifdef CONFIG_RFS_ACCEL +void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *rx_ring); +#endif + #define MLX4_EN_NUM_SELF_TEST 5 void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf); u64 mlx4_en_mac_to_u64(u8 *addr); -- cgit v1.2.3 From 4cce66cdd14aa5006a011505865d932adb49f600 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Mon, 16 Jul 2012 07:01:53 +0000 Subject: mlx4_en: map entire pages to increase throughput In its receive path, mlx4_en driver maps each page chunk that it pushes to the hardware and unmaps it when pushing it up the stack. This limits throughput to about 3Gbps on a Power7 8-core machine. One solution is to map the entire allocated page at once. However, this requires that we keep track of every page fragment we give to a descriptor. We also need to work with the discipline that all fragments will be released (in the sense that it will not be reused by the driver anymore) in the order they are allocated to the driver. This requires that we don't reuse any fragments, every single one of them must be reallocated. We do that by releasing all the fragments that are processed and only after finished processing the descriptors, we start the refill. We also must somehow guarantee that we either refill all fragments in a descriptor or none at all, without resorting to giving up a page fragment that we would have already given. Otherwise, we would break the discipline of only releasing the fragments in the order they were allocated. This has passed page allocation fault injections (restricted to the driver by using required-start and required-end) and device hotplug while 16 TCP streams were able to deliver more than 9Gbps. Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 237 +++++++++++++++------------ drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 3 +- 2 files changed, 131 insertions(+), 109 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 796cd58e6229..f32e70300770 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -41,41 +41,75 @@ #include "mlx4_en.h" - -static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv, - struct mlx4_en_rx_desc *rx_desc, - struct page_frag *skb_frags, - struct mlx4_en_rx_alloc *ring_alloc, - int i) +static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, + struct mlx4_en_rx_desc *rx_desc, + struct mlx4_en_rx_alloc *frags, + struct mlx4_en_rx_alloc *ring_alloc) { - struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; - struct mlx4_en_rx_alloc *page_alloc = &ring_alloc[i]; + struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS]; + struct mlx4_en_frag_info *frag_info; struct page *page; dma_addr_t dma; + int i; - if (page_alloc->offset == frag_info->last_offset) { - /* Allocate new page */ - page = alloc_pages(GFP_ATOMIC | __GFP_COMP, MLX4_EN_ALLOC_ORDER); - if (!page) - return -ENOMEM; - - skb_frags[i].page = page_alloc->page; - skb_frags[i].offset = page_alloc->offset; - page_alloc->page = page; - page_alloc->offset = frag_info->frag_align; - } else { - page = page_alloc->page; - get_page(page); + for (i = 0; i < priv->num_frags; i++) { + frag_info = &priv->frag_info[i]; + if (ring_alloc[i].offset == frag_info->last_offset) { + page = alloc_pages(GFP_ATOMIC | __GFP_COMP, + MLX4_EN_ALLOC_ORDER); + if (!page) + goto out; + dma = dma_map_page(priv->ddev, page, 0, + MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE); + if (dma_mapping_error(priv->ddev, dma)) { + put_page(page); + goto out; + } + page_alloc[i].page = page; + page_alloc[i].dma = dma; + page_alloc[i].offset = frag_info->frag_align; + } else { + page_alloc[i].page = ring_alloc[i].page; + get_page(ring_alloc[i].page); + page_alloc[i].dma = ring_alloc[i].dma; + page_alloc[i].offset = ring_alloc[i].offset + + frag_info->frag_stride; + } + } - skb_frags[i].page = page; - skb_frags[i].offset = page_alloc->offset; - page_alloc->offset += frag_info->frag_stride; + for (i = 0; i < priv->num_frags; i++) { + frags[i] = ring_alloc[i]; + dma = ring_alloc[i].dma + ring_alloc[i].offset; + ring_alloc[i] = page_alloc[i]; + rx_desc->data[i].addr = cpu_to_be64(dma); } - dma = dma_map_single(priv->ddev, page_address(skb_frags[i].page) + - skb_frags[i].offset, frag_info->frag_size, - PCI_DMA_FROMDEVICE); - rx_desc->data[i].addr = cpu_to_be64(dma); + return 0; + + +out: + while (i--) { + frag_info = &priv->frag_info[i]; + if (ring_alloc[i].offset == frag_info->last_offset) + dma_unmap_page(priv->ddev, page_alloc[i].dma, + MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE); + put_page(page_alloc[i].page); + } + return -ENOMEM; +} + +static void mlx4_en_free_frag(struct mlx4_en_priv *priv, + struct mlx4_en_rx_alloc *frags, + int i) +{ + struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; + + if (frags[i].offset == frag_info->last_offset) { + dma_unmap_page(priv->ddev, frags[i].dma, MLX4_EN_ALLOC_SIZE, + PCI_DMA_FROMDEVICE); + } + if (frags[i].page) + put_page(frags[i].page); } static int mlx4_en_init_allocator(struct mlx4_en_priv *priv, @@ -91,6 +125,13 @@ static int mlx4_en_init_allocator(struct mlx4_en_priv *priv, if (!page_alloc->page) goto out; + page_alloc->dma = dma_map_page(priv->ddev, page_alloc->page, 0, + MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE); + if (dma_mapping_error(priv->ddev, page_alloc->dma)) { + put_page(page_alloc->page); + page_alloc->page = NULL; + goto out; + } page_alloc->offset = priv->frag_info[i].frag_align; en_dbg(DRV, priv, "Initialized allocator:%d with page:%p\n", i, page_alloc->page); @@ -100,6 +141,8 @@ static int mlx4_en_init_allocator(struct mlx4_en_priv *priv, out: while (i--) { page_alloc = &ring->page_alloc[i]; + dma_unmap_page(priv->ddev, page_alloc->dma, + MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE); put_page(page_alloc->page); page_alloc->page = NULL; } @@ -117,24 +160,22 @@ static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv, en_dbg(DRV, priv, "Freeing allocator:%d count:%d\n", i, page_count(page_alloc->page)); + dma_unmap_page(priv->ddev, page_alloc->dma, + MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE); put_page(page_alloc->page); page_alloc->page = NULL; } } - static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, int index) { struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index; - struct skb_frag_struct *skb_frags = ring->rx_info + - (index << priv->log_rx_info); int possible_frags; int i; /* Set size and memtype fields */ for (i = 0; i < priv->num_frags; i++) { - skb_frag_size_set(&skb_frags[i], priv->frag_info[i].frag_size); rx_desc->data[i].byte_count = cpu_to_be32(priv->frag_info[i].frag_size); rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key); @@ -151,29 +192,14 @@ static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv, } } - static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, int index) { struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride); - struct page_frag *skb_frags = ring->rx_info + - (index << priv->log_rx_info); - int i; + struct mlx4_en_rx_alloc *frags = ring->rx_info + + (index << priv->log_rx_info); - for (i = 0; i < priv->num_frags; i++) - if (mlx4_en_alloc_frag(priv, rx_desc, skb_frags, ring->page_alloc, i)) - goto err; - - return 0; - -err: - while (i--) { - dma_addr_t dma = be64_to_cpu(rx_desc->data[i].addr); - pci_unmap_single(priv->mdev->pdev, dma, skb_frags[i].size, - PCI_DMA_FROMDEVICE); - put_page(skb_frags[i].page); - } - return -ENOMEM; + return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc); } static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring) @@ -185,20 +211,13 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, int index) { - struct page_frag *skb_frags; - struct mlx4_en_rx_desc *rx_desc = ring->buf + (index << ring->log_stride); - dma_addr_t dma; + struct mlx4_en_rx_alloc *frags; int nr; - skb_frags = ring->rx_info + (index << priv->log_rx_info); + frags = ring->rx_info + (index << priv->log_rx_info); for (nr = 0; nr < priv->num_frags; nr++) { en_dbg(DRV, priv, "Freeing fragment:%d\n", nr); - dma = be64_to_cpu(rx_desc->data[nr].addr); - - en_dbg(DRV, priv, "Unmapping buffer at dma:0x%llx\n", (u64) dma); - dma_unmap_single(priv->ddev, dma, skb_frags[nr].size, - PCI_DMA_FROMDEVICE); - put_page(skb_frags[nr].page); + mlx4_en_free_frag(priv, frags, nr); } } @@ -268,10 +287,9 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, u32 size, u16 stride) { struct mlx4_en_dev *mdev = priv->mdev; - int err; + int err = -ENOMEM; int tmp; - ring->prod = 0; ring->cons = 0; ring->size = size; @@ -281,7 +299,7 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, ring->buf_size = ring->size * ring->stride + TXBB_SIZE; tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS * - sizeof(struct skb_frag_struct)); + sizeof(struct mlx4_en_rx_alloc)); ring->rx_info = vmalloc(tmp); if (!ring->rx_info) return -ENOMEM; @@ -338,7 +356,7 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) memset(ring->buf, 0, ring->buf_size); mlx4_en_update_rx_prod_db(ring); - /* Initailize all descriptors */ + /* Initialize all descriptors */ for (i = 0; i < ring->size; i++) mlx4_en_init_rx_desc(priv, ring, i); @@ -404,12 +422,10 @@ void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, } -/* Unmap a completed descriptor and free unused pages */ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_desc *rx_desc, - struct page_frag *skb_frags, + struct mlx4_en_rx_alloc *frags, struct sk_buff *skb, - struct mlx4_en_rx_alloc *page_alloc, int length) { struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags; @@ -417,26 +433,24 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, int nr; dma_addr_t dma; - /* Collect used fragments while replacing them in the HW descirptors */ + /* Collect used fragments while replacing them in the HW descriptors */ for (nr = 0; nr < priv->num_frags; nr++) { frag_info = &priv->frag_info[nr]; if (length <= frag_info->frag_prefix_size) break; + if (!frags[nr].page) + goto fail; - /* Save page reference in skb */ - __skb_frag_set_page(&skb_frags_rx[nr], skb_frags[nr].page); - skb_frag_size_set(&skb_frags_rx[nr], skb_frags[nr].size); - skb_frags_rx[nr].page_offset = skb_frags[nr].offset; - skb->truesize += frag_info->frag_stride; dma = be64_to_cpu(rx_desc->data[nr].addr); + dma_sync_single_for_cpu(priv->ddev, dma, frag_info->frag_size, + DMA_FROM_DEVICE); - /* Allocate a replacement page */ - if (mlx4_en_alloc_frag(priv, rx_desc, skb_frags, page_alloc, nr)) - goto fail; - - /* Unmap buffer */ - dma_unmap_single(priv->ddev, dma, skb_frag_size(&skb_frags_rx[nr]), - PCI_DMA_FROMDEVICE); + /* Save page reference in skb */ + get_page(frags[nr].page); + __skb_frag_set_page(&skb_frags_rx[nr], frags[nr].page); + skb_frag_size_set(&skb_frags_rx[nr], frag_info->frag_size); + skb_frags_rx[nr].page_offset = frags[nr].offset; + skb->truesize += frag_info->frag_stride; } /* Adjust size of last fragment to match actual length */ if (nr > 0) @@ -445,8 +459,6 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, return nr; fail: - /* Drop all accumulated fragments (which have already been replaced in - * the descriptor) of this packet; remaining fragments are reused... */ while (nr > 0) { nr--; __skb_frag_unref(&skb_frags_rx[nr]); @@ -457,8 +469,7 @@ fail: static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, struct mlx4_en_rx_desc *rx_desc, - struct page_frag *skb_frags, - struct mlx4_en_rx_alloc *page_alloc, + struct mlx4_en_rx_alloc *frags, unsigned int length) { struct sk_buff *skb; @@ -476,23 +487,20 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, /* Get pointer to first fragment so we could copy the headers into the * (linear part of the) skb */ - va = page_address(skb_frags[0].page) + skb_frags[0].offset; + va = page_address(frags[0].page) + frags[0].offset; if (length <= SMALL_PACKET_SIZE) { /* We are copying all relevant data to the skb - temporarily - * synch buffers for the copy */ + * sync buffers for the copy */ dma = be64_to_cpu(rx_desc->data[0].addr); dma_sync_single_for_cpu(priv->ddev, dma, length, DMA_FROM_DEVICE); skb_copy_to_linear_data(skb, va, length); - dma_sync_single_for_device(priv->ddev, dma, length, - DMA_FROM_DEVICE); skb->tail += length; } else { - /* Move relevant fragments to skb */ - used_frags = mlx4_en_complete_rx_desc(priv, rx_desc, skb_frags, - skb, page_alloc, length); + used_frags = mlx4_en_complete_rx_desc(priv, rx_desc, frags, + skb, length); if (unlikely(!used_frags)) { kfree_skb(skb); return NULL; @@ -529,12 +537,25 @@ out_loopback: dev_kfree_skb_any(skb); } +static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring) +{ + int index = ring->prod & ring->size_mask; + + while ((u32) (ring->prod - ring->cons) < ring->actual_size) { + if (mlx4_en_prepare_rx_desc(priv, ring, index)) + break; + ring->prod++; + index = ring->prod & ring->size_mask; + } +} + int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cqe *cqe; struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring]; - struct page_frag *skb_frags; + struct mlx4_en_rx_alloc *frags; struct mlx4_en_rx_desc *rx_desc; struct sk_buff *skb; int index; @@ -543,6 +564,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud int polled = 0; int ip_summed; struct ethhdr *ethh; + dma_addr_t dma; u64 s_mac; if (!priv->port_up) @@ -558,7 +580,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cq->mcq.cons_index & cq->size)) { - skb_frags = ring->rx_info + (index << priv->log_rx_info); + frags = ring->rx_info + (index << priv->log_rx_info); rx_desc = ring->buf + (index << ring->log_stride); /* @@ -582,8 +604,11 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud /* Get pointer to first fragment since we haven't skb yet and * cast it to ethhdr struct */ - ethh = (struct ethhdr *)(page_address(skb_frags[0].page) + - skb_frags[0].offset); + dma = be64_to_cpu(rx_desc->data[0].addr); + dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh), + DMA_FROM_DEVICE); + ethh = (struct ethhdr *)(page_address(frags[0].page) + + frags[0].offset); s_mac = mlx4_en_mac_to_u64(ethh->h_source); /* If source MAC is equal to our own MAC and not performing @@ -615,10 +640,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (!gro_skb) goto next; - nr = mlx4_en_complete_rx_desc( - priv, rx_desc, - skb_frags, gro_skb, - ring->page_alloc, length); + nr = mlx4_en_complete_rx_desc(priv, + rx_desc, frags, gro_skb, + length); if (!nr) goto next; @@ -654,8 +678,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud ring->csum_none++; } - skb = mlx4_en_rx_skb(priv, rx_desc, skb_frags, - ring->page_alloc, length); + skb = mlx4_en_rx_skb(priv, rx_desc, frags, length); if (!skb) { priv->stats.rx_dropped++; goto next; @@ -681,6 +704,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud netif_receive_skb(skb); next: + for (nr = 0; nr < priv->num_frags; nr++) + mlx4_en_free_frag(priv, frags, nr); + ++cq->mcq.cons_index; index = (cq->mcq.cons_index) & ring->size_mask; cqe = &cq->buf[index]; @@ -696,7 +722,7 @@ out: mlx4_cq_set_ci(&cq->mcq); wmb(); /* ensure HW sees CQ consumer before we post new buffers */ ring->cons = cq->mcq.cons_index; - ring->prod += polled; /* Polled descriptors were realocated in place */ + mlx4_en_refill_rx_buffers(priv, ring); mlx4_en_update_rx_prod_db(ring); return polled; } @@ -785,7 +811,7 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) priv->num_frags = i; priv->rx_skb_size = eff_mtu; - priv->log_rx_info = ROUNDUP_LOG2(i * sizeof(struct skb_frag_struct)); + priv->log_rx_info = ROUNDUP_LOG2(i * sizeof(struct mlx4_en_rx_alloc)); en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d " "num_frags:%d):\n", eff_mtu, priv->num_frags); @@ -987,8 +1013,3 @@ void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv) } mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num); } - - - - - diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index af34c98d43e5..5f1ab105debc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -111,7 +111,7 @@ enum { #define MLX4_EN_MAX_TX_SIZE 8192 #define MLX4_EN_MAX_RX_SIZE 8192 -/* Minimum ring size for our page-allocation sceme to work */ +/* Minimum ring size for our page-allocation scheme to work */ #define MLX4_EN_MIN_RX_SIZE (MLX4_EN_ALLOC_SIZE / SMP_CACHE_BYTES) #define MLX4_EN_MIN_TX_SIZE (4096 / TXBB_SIZE) @@ -232,6 +232,7 @@ struct mlx4_en_tx_desc { struct mlx4_en_rx_alloc { struct page *page; + dma_addr_t dma; u16 offset; }; -- cgit v1.2.3