summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-06-03 00:08:37 +0300
committerDavid S. Miller <davem@davemloft.net>2021-06-03 00:08:37 +0300
commit270d47dc1fc4756a0158778084a236bc83c156d2 (patch)
treec4596358121dc31d42875ed93b4ab32d8c665f83 /include
parent53c7bb553056d3a6713ea413576c6d1b0c3f0f61 (diff)
parentb62767e7bab3a397166a2fa36b409e5e2859f100 (diff)
downloadlinux-270d47dc1fc4756a0158778084a236bc83c156d2.tar.xz
Merge branch 'devlink-rate-objects'
Dmytro Linkin says: ==================== devlink: rate objects API Resending without RFC. Currently kernel provides a way to change tx rate of single VF in switchdev mode via tc-police action. When lots of VFs are configured management of theirs rates becomes non-trivial task and some grouping mechanism is required. Implementing such grouping in tc-police will bring flow related limitations and unwanted complications, like: - tc-police is a policer and there is a user request for a traffic shaper, so shared tc-police action is not suitable; - flows requires net device to be placed on, means "groups" wouldn't have net device instance itself. Taking into the account previous point was reviewed a sollution, when representor have a policer and the driver use a shaper if qdisc contains group of VFs - such approach ugly, compilated and misleading; - TC is ingress only, while configuring "other" side of the wire looks more like a "real" picture where shaping is outside of the steering world, similar to "ip link" command; According to that devlink is the most appropriate place. This series introduces devlink API for managing tx rate of single devlink port or of a group by invoking callbacks (see below) of corresponding driver. Also devlink port or a group can be added to the parent group, where driver responsible to handle rates of a group elements. To achieve all of that new rate object is added. It can be one of the two types: - leaf - represents a single devlink port; created/destroyed by the driver and bound to the devlink port. As example, some driver may create leaf rate object for every devlink port associated with VF. Since leaf have 1to1 mapping to it's devlink port, in user space it is referred as pci/<bus_addr>/<port_index>; - node - represents a group of rate objects; created/deleted by request from the userspace; initially empty (no rate objects added). In userspace it is referred as pci/<bus_addr>/<node_name>, where node name can be any, except decimal number, to avoid collisions with leafs. devlink_ops extended with following callbacks: - rate_{leaf|node}_tx_{share|max}_set - rate_node_{new|del} - rate_{leaf|node}_parent_set KAPI provides: - creation/destruction of the leaf rate object associated with devlink port - destruction of rate nodes to allow a vendor driver to free allocated resources on driver removal or due to the other reasons when nodes destruction required UAPI provides: - dumping all or single rate objects - setting tx_{share|max} of rate object of any type - creating/deleting node rate object - setting/unsetting parent of any rate object Added devlink rate object support for netdevsim driver Issues/open questions: - Does user need DEVLINK_CMD_RATE_DEL_ALL_CHILD command to clean all children of particular parent node? For example: $ devlink port function rate flush netdevsim/netdevsim10/group - priv pointer passed to the callbacks is a source of bugs; in leaf case driver can embed rate object into internal structure and use container_of() on it; in node case it cannot be done since nodes are created from userspace v1->v2: - fixed kernel-doc for devlink_rate_leaf_{create|destroy}() - s/func/function/ for all devlink port command occurences v2->v3: - devlink: - added devlink_rate_nodes_destroy() function - netdevsim: - added call of devlink_rate_nodes_destroy() function ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/net/devlink.h48
-rw-r--r--include/uapi/linux/devlink.h17
2 files changed, 65 insertions, 0 deletions
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 7c984cadfec4..eb045f1b5d1d 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -34,6 +34,7 @@ struct devlink_ops;
struct devlink {
struct list_head list;
struct list_head port_list;
+ struct list_head rate_list;
struct list_head sb_list;
struct list_head dpipe_table_list;
struct list_head resource_list;
@@ -133,6 +134,24 @@ struct devlink_port_attrs {
};
};
+struct devlink_rate {
+ struct list_head list;
+ enum devlink_rate_type type;
+ struct devlink *devlink;
+ void *priv;
+ u64 tx_share;
+ u64 tx_max;
+
+ struct devlink_rate *parent;
+ union {
+ struct devlink_port *devlink_port;
+ struct {
+ char *name;
+ refcount_t refcnt;
+ };
+ };
+};
+
struct devlink_port {
struct list_head list;
struct list_head param_list;
@@ -152,6 +171,8 @@ struct devlink_port {
struct delayed_work type_warn_dw;
struct list_head reporter_list;
struct mutex reporters_lock; /* Protects reporter_list */
+
+ struct devlink_rate *devlink_rate;
};
struct devlink_port_new_attrs {
@@ -1453,6 +1474,30 @@ struct devlink_ops {
struct devlink_port *port,
enum devlink_port_fn_state state,
struct netlink_ext_ack *extack);
+
+ /**
+ * Rate control callbacks.
+ */
+ int (*rate_leaf_tx_share_set)(struct devlink_rate *devlink_rate, void *priv,
+ u64 tx_share, struct netlink_ext_ack *extack);
+ int (*rate_leaf_tx_max_set)(struct devlink_rate *devlink_rate, void *priv,
+ u64 tx_max, struct netlink_ext_ack *extack);
+ int (*rate_node_tx_share_set)(struct devlink_rate *devlink_rate, void *priv,
+ u64 tx_share, struct netlink_ext_ack *extack);
+ int (*rate_node_tx_max_set)(struct devlink_rate *devlink_rate, void *priv,
+ u64 tx_max, struct netlink_ext_ack *extack);
+ int (*rate_node_new)(struct devlink_rate *rate_node, void **priv,
+ struct netlink_ext_ack *extack);
+ int (*rate_node_del)(struct devlink_rate *rate_node, void *priv,
+ struct netlink_ext_ack *extack);
+ int (*rate_leaf_parent_set)(struct devlink_rate *child,
+ struct devlink_rate *parent,
+ void *priv_child, void *priv_parent,
+ struct netlink_ext_ack *extack);
+ int (*rate_node_parent_set)(struct devlink_rate *child,
+ struct devlink_rate *parent,
+ void *priv_child, void *priv_parent,
+ struct netlink_ext_ack *extack);
};
static inline void *devlink_priv(struct devlink *devlink)
@@ -1512,6 +1557,9 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro
void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port,
u32 controller, u16 pf, u32 sf,
bool external);
+int devlink_rate_leaf_create(struct devlink_port *port, void *priv);
+void devlink_rate_leaf_destroy(struct devlink_port *devlink_port);
+void devlink_rate_nodes_destroy(struct devlink *devlink);
int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
u32 size, u16 ingress_pools_count,
u16 egress_pools_count, u16 ingress_tc_count,
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index f6008b2fa60f..32f53a0069d6 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -126,6 +126,11 @@ enum devlink_command {
DEVLINK_CMD_HEALTH_REPORTER_TEST,
+ DEVLINK_CMD_RATE_GET, /* can dump */
+ DEVLINK_CMD_RATE_SET,
+ DEVLINK_CMD_RATE_NEW,
+ DEVLINK_CMD_RATE_DEL,
+
/* add new commands above here */
__DEVLINK_CMD_MAX,
DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -206,6 +211,11 @@ enum devlink_port_flavour {
*/
};
+enum devlink_rate_type {
+ DEVLINK_RATE_TYPE_LEAF,
+ DEVLINK_RATE_TYPE_NODE,
+};
+
enum devlink_param_cmode {
DEVLINK_PARAM_CMODE_RUNTIME,
DEVLINK_PARAM_CMODE_DRIVERINIT,
@@ -534,6 +544,13 @@ enum devlink_attr {
DEVLINK_ATTR_RELOAD_ACTION_STATS, /* nested */
DEVLINK_ATTR_PORT_PCI_SF_NUMBER, /* u32 */
+
+ DEVLINK_ATTR_RATE_TYPE, /* u16 */
+ DEVLINK_ATTR_RATE_TX_SHARE, /* u64 */
+ DEVLINK_ATTR_RATE_TX_MAX, /* u64 */
+ DEVLINK_ATTR_RATE_NODE_NAME, /* string */
+ DEVLINK_ATTR_RATE_PARENT_NODE_NAME, /* string */
+
/* add new attributes above here, update the policy in devlink.c */
__DEVLINK_ATTR_MAX,