diff options
Diffstat (limited to 'net/openvswitch')
-rw-r--r-- | net/openvswitch/actions.c | 4 | ||||
-rw-r--r-- | net/openvswitch/datapath.c | 237 | ||||
-rw-r--r-- | net/openvswitch/flow.c | 6 | ||||
-rw-r--r-- | net/openvswitch/flow.h | 42 | ||||
-rw-r--r-- | net/openvswitch/flow_netlink.c | 388 | ||||
-rw-r--r-- | net/openvswitch/flow_netlink.h | 13 | ||||
-rw-r--r-- | net/openvswitch/flow_table.c | 226 | ||||
-rw-r--r-- | net/openvswitch/flow_table.h | 8 | ||||
-rw-r--r-- | net/openvswitch/vport-geneve.c | 32 | ||||
-rw-r--r-- | net/openvswitch/vport-gre.c | 14 | ||||
-rw-r--r-- | net/openvswitch/vport-vxlan.c | 110 | ||||
-rw-r--r-- | net/openvswitch/vport-vxlan.h | 11 | ||||
-rw-r--r-- | net/openvswitch/vport.c | 12 | ||||
-rw-r--r-- | net/openvswitch/vport.h | 18 |
14 files changed, 822 insertions, 299 deletions
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 770064c83711..b4cffe686126 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -212,7 +212,7 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key) int err; err = skb_vlan_pop(skb); - if (vlan_tx_tag_present(skb)) + if (skb_vlan_tag_present(skb)) invalidate_flow_key(key); else key->eth.tci = 0; @@ -222,7 +222,7 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key) static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_action_push_vlan *vlan) { - if (vlan_tx_tag_present(skb)) + if (skb_vlan_tag_present(skb)) invalidate_flow_key(key); else key->eth.tci = vlan->vlan_tci; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index b07349e82d78..ae5e77cdc0ca 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -65,6 +65,8 @@ static struct genl_family dp_packet_genl_family; static struct genl_family dp_flow_genl_family; static struct genl_family dp_datapath_genl_family; +static const struct nla_policy flow_policy[]; + static const struct genl_multicast_group ovs_dp_flow_multicast_group = { .name = OVS_FLOW_MCGROUP, }; @@ -419,7 +421,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, if (!dp_ifindex) return -ENODEV; - if (vlan_tx_tag_present(skb)) { + if (skb_vlan_tag_present(skb)) { nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) return -ENOMEM; @@ -461,10 +463,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, 0, upcall_info->cmd); upcall->dp_ifindex = dp_ifindex; - nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); - err = ovs_nla_put_flow(key, key, user_skb); + err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb); BUG_ON(err); - nla_nest_end(user_skb, nla); if (upcall_info->userdata) __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, @@ -664,46 +664,48 @@ static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats, } } -static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) +static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags) { - return NLMSG_ALIGN(sizeof(struct ovs_header)) - + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_KEY */ - + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_MASK */ - + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ - + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ - + nla_total_size(8) /* OVS_FLOW_ATTR_USED */ - + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */ + return ovs_identifier_is_ufid(sfid) && + !(ufid_flags & OVS_UFID_F_OMIT_KEY); } -/* Called with ovs_mutex or RCU read lock. */ -static int ovs_flow_cmd_fill_match(const struct sw_flow *flow, - struct sk_buff *skb) +static bool should_fill_mask(uint32_t ufid_flags) { - struct nlattr *nla; - int err; + return !(ufid_flags & OVS_UFID_F_OMIT_MASK); +} - /* Fill flow key. */ - nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); - if (!nla) - return -EMSGSIZE; +static bool should_fill_actions(uint32_t ufid_flags) +{ + return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS); +} - err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb); - if (err) - return err; +static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts, + const struct sw_flow_id *sfid, + uint32_t ufid_flags) +{ + size_t len = NLMSG_ALIGN(sizeof(struct ovs_header)); - nla_nest_end(skb, nla); + /* OVS_FLOW_ATTR_UFID */ + if (sfid && ovs_identifier_is_ufid(sfid)) + len += nla_total_size(sfid->ufid_len); - /* Fill flow mask. */ - nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK); - if (!nla) - return -EMSGSIZE; + /* OVS_FLOW_ATTR_KEY */ + if (!sfid || should_fill_key(sfid, ufid_flags)) + len += nla_total_size(ovs_key_attr_size()); - err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb); - if (err) - return err; + /* OVS_FLOW_ATTR_MASK */ + if (should_fill_mask(ufid_flags)) + len += nla_total_size(ovs_key_attr_size()); - nla_nest_end(skb, nla); - return 0; + /* OVS_FLOW_ATTR_ACTIONS */ + if (should_fill_actions(ufid_flags)) + len += nla_total_size(acts->actions_len); + + return len + + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ + + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ + + nla_total_size(8); /* OVS_FLOW_ATTR_USED */ } /* Called with ovs_mutex or RCU read lock. */ @@ -774,7 +776,7 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow, /* Called with ovs_mutex or RCU read lock. */ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, struct sk_buff *skb, u32 portid, - u32 seq, u32 flags, u8 cmd) + u32 seq, u32 flags, u8 cmd, u32 ufid_flags) { const int skb_orig_len = skb->len; struct ovs_header *ovs_header; @@ -787,19 +789,34 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, ovs_header->dp_ifindex = dp_ifindex; - err = ovs_flow_cmd_fill_match(flow, skb); + err = ovs_nla_put_identifier(flow, skb); if (err) goto error; + if (should_fill_key(&flow->id, ufid_flags)) { + err = ovs_nla_put_masked_key(flow, skb); + if (err) + goto error; + } + + if (should_fill_mask(ufid_flags)) { + err = ovs_nla_put_mask(flow, skb); + if (err) + goto error; + } + err = ovs_flow_cmd_fill_stats(flow, skb); if (err) goto error; - err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len); - if (err) - goto error; + if (should_fill_actions(ufid_flags)) { + err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len); + if (err) + goto error; + } - return genlmsg_end(skb, ovs_header); + genlmsg_end(skb, ovs_header); + return 0; error: genlmsg_cancel(skb, ovs_header); @@ -808,15 +825,19 @@ error: /* May not be called with RCU read lock. */ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts, + const struct sw_flow_id *sfid, struct genl_info *info, - bool always) + bool always, + uint32_t ufid_flags) { struct sk_buff *skb; + size_t len; if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0)) return NULL; - skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL); + len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags); + skb = genlmsg_new_unicast(len, info, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); @@ -827,19 +848,19 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, int dp_ifindex, struct genl_info *info, u8 cmd, - bool always) + bool always, u32 ufid_flags) { struct sk_buff *skb; int retval; - skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info, - always); + skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), + &flow->id, info, always, ufid_flags); if (IS_ERR_OR_NULL(skb)) return skb; retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, info->snd_portid, info->snd_seq, 0, - cmd); + cmd, ufid_flags); BUG_ON(retval < 0); return skb; } @@ -848,12 +869,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; - struct sw_flow *flow, *new_flow; + struct sw_flow *flow = NULL, *new_flow; struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; + struct sw_flow_key key; struct sw_flow_actions *acts; struct sw_flow_match match; + u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); int error; bool log = !a[OVS_FLOW_ATTR_PROBE]; @@ -878,13 +901,19 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) } /* Extract key. */ - ovs_match_init(&match, &new_flow->unmasked_key, &mask); + ovs_match_init(&match, &key, &mask); error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); if (error) goto err_kfree_flow; - ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask); + ovs_flow_mask_key(&new_flow->key, &key, &mask); + + /* Extract flow identifier. */ + error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], + &key, log); + if (error) + goto err_kfree_flow; /* Validate actions. */ error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, @@ -894,7 +923,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_kfree_flow; } - reply = ovs_flow_cmd_alloc_info(acts, info, false); + reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false, + ufid_flags); if (IS_ERR(reply)) { error = PTR_ERR(reply); goto err_kfree_acts; @@ -906,8 +936,12 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) error = -ENODEV; goto err_unlock_ovs; } + /* Check if this is a duplicate flow */ - flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key); + if (ovs_identifier_is_ufid(&new_flow->id)) + flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id); + if (!flow) + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (likely(!flow)) { rcu_assign_pointer(new_flow->sf_acts, acts); @@ -923,7 +957,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, - OVS_FLOW_CMD_NEW); + OVS_FLOW_CMD_NEW, + ufid_flags); BUG_ON(error < 0); } ovs_unlock(); @@ -941,10 +976,15 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) error = -EEXIST; goto err_unlock_ovs; } - /* The unmasked key has to be the same for flow updates. */ - if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { - /* Look for any overlapping flow. */ - flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + /* The flow identifier has to be the same for flow updates. + * Look for any overlapping flow. + */ + if (unlikely(!ovs_flow_cmp(flow, &match))) { + if (ovs_identifier_is_key(&flow->id)) + flow = ovs_flow_tbl_lookup_exact(&dp->table, + &match); + else /* UFID matches but key is different */ + flow = NULL; if (!flow) { error = -ENOENT; goto err_unlock_ovs; @@ -959,7 +999,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, - OVS_FLOW_CMD_NEW); + OVS_FLOW_CMD_NEW, + ufid_flags); BUG_ON(error < 0); } ovs_unlock(); @@ -1015,8 +1056,11 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct sw_flow_actions *old_acts = NULL, *acts = NULL; struct sw_flow_match match; + struct sw_flow_id sfid; + u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); int error; bool log = !a[OVS_FLOW_ATTR_PROBE]; + bool ufid_present; /* Extract key. */ error = -EINVAL; @@ -1025,6 +1069,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) goto error; } + ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log); ovs_match_init(&match, &key, &mask); error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); @@ -1041,7 +1086,8 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) } /* Can allocate before locking if have acts. */ - reply = ovs_flow_cmd_alloc_info(acts, info, false); + reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false, + ufid_flags); if (IS_ERR(reply)) { error = PTR_ERR(reply); goto err_kfree_acts; @@ -1055,7 +1101,10 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) goto err_unlock_ovs; } /* Check that the flow exists. */ - flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + if (ufid_present) + flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid); + else + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (unlikely(!flow)) { error = -ENOENT; goto err_unlock_ovs; @@ -1071,13 +1120,16 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, - OVS_FLOW_CMD_NEW); + OVS_FLOW_CMD_NEW, + ufid_flags); BUG_ON(error < 0); } } else { /* Could not alloc without acts before locking. */ reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, - info, OVS_FLOW_CMD_NEW, false); + info, OVS_FLOW_CMD_NEW, false, + ufid_flags); + if (unlikely(IS_ERR(reply))) { error = PTR_ERR(reply); goto err_unlock_ovs; @@ -1114,17 +1166,22 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) struct sw_flow *flow; struct datapath *dp; struct sw_flow_match match; - int err; + struct sw_flow_id ufid; + u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); + int err = 0; bool log = !a[OVS_FLOW_ATTR_PROBE]; + bool ufid_present; - if (!a[OVS_FLOW_ATTR_KEY]) { + ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); + if (a[OVS_FLOW_ATTR_KEY]) { + ovs_match_init(&match, &key, NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, + log); + } else if (!ufid_present) { OVS_NLERR(log, "Flow get message rejected, Key attribute missing."); - return -EINVAL; + err = -EINVAL; } - - ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, log); if (err) return err; @@ -1135,14 +1192,17 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + if (ufid_present) + flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid); + else + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (!flow) { err = -ENOENT; goto unlock; } reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info, - OVS_FLOW_CMD_NEW, true); + OVS_FLOW_CMD_NEW, true, ufid_flags); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -1161,13 +1221,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) struct ovs_header *ovs_header = info->userhdr; struct sw_flow_key key; struct sk_buff *reply; - struct sw_flow *flow; + struct sw_flow *flow = NULL; struct datapath *dp; struct sw_flow_match match; + struct sw_flow_id ufid; + u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); int err; bool log = !a[OVS_FLOW_ATTR_PROBE]; + bool ufid_present; - if (likely(a[OVS_FLOW_ATTR_KEY])) { + ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); + if (a[OVS_FLOW_ATTR_KEY]) { ovs_match_init(&match, &key, NULL); err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, log); @@ -1182,12 +1246,15 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) goto unlock; } - if (unlikely(!a[OVS_FLOW_ATTR_KEY])) { + if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) { err = ovs_flow_tbl_flush(&dp->table); goto unlock; } - flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + if (ufid_present) + flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid); + else + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (unlikely(!flow)) { err = -ENOENT; goto unlock; @@ -1197,14 +1264,15 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_unlock(); reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts, - info, false); + &flow->id, info, false, ufid_flags); if (likely(reply)) { if (likely(!IS_ERR(reply))) { rcu_read_lock(); /*To keep RCU checker happy. */ err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, - OVS_FLOW_CMD_DEL); + OVS_FLOW_CMD_DEL, + ufid_flags); rcu_read_unlock(); BUG_ON(err < 0); @@ -1223,9 +1291,18 @@ unlock: static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct nlattr *a[__OVS_FLOW_ATTR_MAX]; struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); struct table_instance *ti; struct datapath *dp; + u32 ufid_flags; + int err; + + err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a, + OVS_FLOW_ATTR_MAX, flow_policy); + if (err) + return err; + ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); rcu_read_lock(); dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); @@ -1248,7 +1325,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - OVS_FLOW_CMD_NEW) < 0) + OVS_FLOW_CMD_NEW, ufid_flags) < 0) break; cb->args[0] = bucket; @@ -1264,6 +1341,8 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG }, + [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 }, + [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 }, }; static const struct genl_ops dp_flow_genl_ops[] = { @@ -1349,7 +1428,8 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) goto nla_put_failure; - return genlmsg_end(skb, ovs_header); + genlmsg_end(skb, ovs_header); + return 0; nla_put_failure: genlmsg_cancel(skb, ovs_header); @@ -1723,7 +1803,8 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, if (err == -EMSGSIZE) goto error; - return genlmsg_end(skb, ovs_header); + genlmsg_end(skb, ovs_header); + return 0; nla_put_failure: err = -EMSGSIZE; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index da2fae0873a5..e2c348b8baca 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -70,7 +70,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, { struct flow_stats *stats; int node = numa_node_id(); - int len = skb->len + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); + int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); stats = rcu_dereference(flow->stats[node]); @@ -472,7 +472,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) */ key->eth.tci = 0; - if (vlan_tx_tag_present(skb)) + if (skb_vlan_tag_present(skb)) key->eth.tci = htons(skb->vlan_tci); else if (eth->h_proto == htons(ETH_P_8021Q)) if (unlikely(parse_vlan(skb, key))) @@ -691,7 +691,7 @@ int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info, BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) * 8)) - 1 > sizeof(key->tun_opts)); - memcpy(GENEVE_OPTS(key, tun_info->options_len), + memcpy(TUN_METADATA_OPTS(key, tun_info->options_len), tun_info->options, tun_info->options_len); key->tun_opts_len = tun_info->options_len; } else { diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index a8b30f334388..a076e445ccc2 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -53,7 +53,7 @@ struct ovs_key_ipv4_tunnel { struct ovs_tunnel_info { struct ovs_key_ipv4_tunnel tunnel; - const struct geneve_opt *options; + const void *options; u8 options_len; }; @@ -61,10 +61,10 @@ struct ovs_tunnel_info { * maximum size. This allows us to get the benefits of variable length * matching for small options. */ -#define GENEVE_OPTS(flow_key, opt_len) \ - ((struct geneve_opt *)((flow_key)->tun_opts + \ - FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \ - opt_len)) +#define TUN_METADATA_OFFSET(opt_len) \ + (FIELD_SIZEOF(struct sw_flow_key, tun_opts) - opt_len) +#define TUN_METADATA_OPTS(flow_key, opt_len) \ + ((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len))) static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, __be32 saddr, __be32 daddr, @@ -73,7 +73,7 @@ static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, __be16 tp_dst, __be64 tun_id, __be16 tun_flags, - const struct geneve_opt *opts, + const void *opts, u8 opts_len) { tun_info->tunnel.tun_id = tun_id; @@ -105,7 +105,7 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, __be16 tp_dst, __be64 tun_id, __be16 tun_flags, - const struct geneve_opt *opts, + const void *opts, u8 opts_len) { __ovs_flow_tun_info_init(tun_info, iph->saddr, iph->daddr, @@ -197,6 +197,16 @@ struct sw_flow_match { struct sw_flow_mask *mask; }; +#define MAX_UFID_LENGTH 16 /* 128 bits */ + +struct sw_flow_id { + u32 ufid_len; + union { + u32 ufid[MAX_UFID_LENGTH / 4]; + struct sw_flow_key *unmasked_key; + }; +}; + struct sw_flow_actions { struct rcu_head rcu; u32 actions_len; @@ -213,13 +223,15 @@ struct flow_stats { struct sw_flow { struct rcu_head rcu; - struct hlist_node hash_node[2]; - u32 hash; + struct { + struct hlist_node node[2]; + u32 hash; + } flow_table, ufid_table; int stats_last_writer; /* NUMA-node id of the last writer on * 'stats[0]'. */ struct sw_flow_key key; - struct sw_flow_key unmasked_key; + struct sw_flow_id id; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; struct flow_stats __rcu *stats[]; /* One for each NUMA node. First one @@ -243,6 +255,16 @@ struct arp_eth_header { unsigned char ar_tip[4]; /* target IP address */ } __packed; +static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid) +{ + return sfid->ufid_len; +} + +static inline bool ovs_identifier_is_key(const struct sw_flow_id *sfid) +{ + return !ovs_identifier_is_ufid(sfid); +} + void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags, const struct sk_buff *); void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *, diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index d1eecf707613..8b9a612b39d1 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -49,6 +49,14 @@ #include <net/mpls.h> #include "flow_netlink.h" +#include "vport-vxlan.h" + +struct ovs_len_tbl { + int len; + const struct ovs_len_tbl *next; +}; + +#define OVS_ATTR_NESTED -1 static void update_range(struct sw_flow_match *match, size_t offset, size_t size, bool is_mask) @@ -261,6 +269,9 @@ size_t ovs_tun_key_attr_size(void) + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ + /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with + * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. + */ + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ } @@ -289,29 +300,45 @@ size_t ovs_key_attr_size(void) + nla_total_size(28); /* OVS_KEY_ATTR_ND */ } +static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { + [OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) }, + [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) }, + [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = { .len = sizeof(u32) }, + [OVS_TUNNEL_KEY_ATTR_TOS] = { .len = 1 }, + [OVS_TUNNEL_KEY_ATTR_TTL] = { .len = 1 }, + [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 }, + [OVS_TUNNEL_KEY_ATTR_CSUM] = { .len = 0 }, + [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) }, + [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) }, + [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 }, + [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_NESTED }, + [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED }, +}; + /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ -static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { - [OVS_KEY_ATTR_ENCAP] = -1, - [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), - [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), - [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), - [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), - [OVS_KEY_ATTR_VLAN] = sizeof(__be16), - [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), - [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), - [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), - [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), - [OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16), - [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), - [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), - [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), - [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), - [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), - [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), - [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32), - [OVS_KEY_ATTR_DP_HASH] = sizeof(u32), - [OVS_KEY_ATTR_TUNNEL] = -1, - [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls), +static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { + [OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED }, + [OVS_KEY_ATTR_PRIORITY] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_IN_PORT] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_SKB_MARK] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_ETHERNET] = { .len = sizeof(struct ovs_key_ethernet) }, + [OVS_KEY_ATTR_VLAN] = { .len = sizeof(__be16) }, + [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) }, + [OVS_KEY_ATTR_IPV4] = { .len = sizeof(struct ovs_key_ipv4) }, + [OVS_KEY_ATTR_IPV6] = { .len = sizeof(struct ovs_key_ipv6) }, + [OVS_KEY_ATTR_TCP] = { .len = sizeof(struct ovs_key_tcp) }, + [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) }, + [OVS_KEY_ATTR_UDP] = { .len = sizeof(struct ovs_key_udp) }, + [OVS_KEY_ATTR_SCTP] = { .len = sizeof(struct ovs_key_sctp) }, + [OVS_KEY_ATTR_ICMP] = { .len = sizeof(struct ovs_key_icmp) }, + [OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) }, + [OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) }, + [OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) }, + [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, + .next = ovs_tunnel_key_lens, }, + [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, }; static bool is_all_zero(const u8 *fp, size_t size) @@ -352,8 +379,8 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, return -EINVAL; } - expected_len = ovs_key_lens[type]; - if (nla_len(nla) != expected_len && expected_len != -1) { + expected_len = ovs_key_lens[type].len; + if (nla_len(nla) != expected_len && expected_len != OVS_ATTR_NESTED) { OVS_NLERR(log, "Key %d has unexpected len %d expected %d", type, nla_len(nla), expected_len); return -EINVAL; @@ -432,13 +459,47 @@ static int genev_tun_opt_from_nlattr(const struct nlattr *a, SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); } - opt_key_offset = (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0, - nla_len(a)); + opt_key_offset = TUN_METADATA_OFFSET(nla_len(a)); SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a), nla_len(a), is_mask); return 0; } +static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = { + [OVS_VXLAN_EXT_GBP] = { .type = NLA_U32 }, +}; + +static int vxlan_tun_opt_from_nlattr(const struct nlattr *a, + struct sw_flow_match *match, bool is_mask, + bool log) +{ + struct nlattr *tb[OVS_VXLAN_EXT_MAX+1]; + unsigned long opt_key_offset; + struct ovs_vxlan_opts opts; + int err; + + BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts)); + + err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy); + if (err < 0) + return err; + + memset(&opts, 0, sizeof(opts)); + + if (tb[OVS_VXLAN_EXT_GBP]) + opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]); + + if (!is_mask) + SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false); + else + SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); + + opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts)); + SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts), + is_mask); + return 0; +} + static int ipv4_tun_from_nlattr(const struct nlattr *attr, struct sw_flow_match *match, bool is_mask, bool log) @@ -447,35 +508,22 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, int rem; bool ttl = false; __be16 tun_flags = 0; + int opts_type = 0; nla_for_each_nested(a, attr, rem) { int type = nla_type(a); int err; - static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { - [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), - [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), - [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), - [OVS_TUNNEL_KEY_ATTR_TOS] = 1, - [OVS_TUNNEL_KEY_ATTR_TTL] = 1, - [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, - [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, - [OVS_TUNNEL_KEY_ATTR_TP_SRC] = sizeof(u16), - [OVS_TUNNEL_KEY_ATTR_TP_DST] = sizeof(u16), - [OVS_TUNNEL_KEY_ATTR_OAM] = 0, - [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1, - }; - if (type > OVS_TUNNEL_KEY_ATTR_MAX) { OVS_NLERR(log, "Tunnel attr %d out of range max %d", type, OVS_TUNNEL_KEY_ATTR_MAX); return -EINVAL; } - if (ovs_tunnel_key_lens[type] != nla_len(a) && - ovs_tunnel_key_lens[type] != -1) { + if (ovs_tunnel_key_lens[type].len != nla_len(a) && + ovs_tunnel_key_lens[type].len != OVS_ATTR_NESTED) { OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d", - type, nla_len(a), ovs_tunnel_key_lens[type]); + type, nla_len(a), ovs_tunnel_key_lens[type].len); return -EINVAL; } @@ -520,11 +568,30 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, tun_flags |= TUNNEL_OAM; break; case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: + if (opts_type) { + OVS_NLERR(log, "Multiple metadata blocks provided"); + return -EINVAL; + } + err = genev_tun_opt_from_nlattr(a, match, is_mask, log); if (err) return err; - tun_flags |= TUNNEL_OPTIONS_PRESENT; + tun_flags |= TUNNEL_GENEVE_OPT; + opts_type = type; + break; + case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: + if (opts_type) { + OVS_NLERR(log, "Multiple metadata blocks provided"); + return -EINVAL; + } + + err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log); + if (err) + return err; + + tun_flags |= TUNNEL_VXLAN_OPT; + opts_type = type; break; default: OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d", @@ -553,13 +620,29 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, } } + return opts_type; +} + +static int vxlan_opt_to_nlattr(struct sk_buff *skb, + const void *tun_opts, int swkey_tun_opts_len) +{ + const struct ovs_vxlan_opts *opts = tun_opts; + struct nlattr *nla; + + nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS); + if (!nla) + return -EMSGSIZE; + + if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0) + return -EMSGSIZE; + + nla_nest_end(skb, nla); return 0; } static int __ipv4_tun_to_nlattr(struct sk_buff *skb, const struct ovs_key_ipv4_tunnel *output, - const struct geneve_opt *tun_opts, - int swkey_tun_opts_len) + const void *tun_opts, int swkey_tun_opts_len) { if (output->tun_flags & TUNNEL_KEY && nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) @@ -590,18 +673,22 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb, if ((output->tun_flags & TUNNEL_OAM) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; - if (tun_opts && - nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, - swkey_tun_opts_len, tun_opts)) - return -EMSGSIZE; + if (tun_opts) { + if (output->tun_flags & TUNNEL_GENEVE_OPT && + nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, + swkey_tun_opts_len, tun_opts)) + return -EMSGSIZE; + else if (output->tun_flags & TUNNEL_VXLAN_OPT && + vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) + return -EMSGSIZE; + } return 0; } static int ipv4_tun_to_nlattr(struct sk_buff *skb, const struct ovs_key_ipv4_tunnel *output, - const struct geneve_opt *tun_opts, - int swkey_tun_opts_len) + const void *tun_opts, int swkey_tun_opts_len) { struct nlattr *nla; int err; @@ -675,7 +762,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, } if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, - is_mask, log)) + is_mask, log) < 0) return -EINVAL; *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); } @@ -915,18 +1002,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, return 0; } -static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key) +static void nlattr_set(struct nlattr *attr, u8 val, + const struct ovs_len_tbl *tbl) { struct nlattr *nla; int rem; /* The nlattr stream should already have been validated */ nla_for_each_nested(nla, attr, rem) { - /* We assume that ovs_key_lens[type] == -1 means that type is a - * nested attribute - */ - if (is_attr_mask_key && ovs_key_lens[nla_type(nla)] == -1) - nlattr_set(nla, val, false); + if (tbl && tbl[nla_type(nla)].len == OVS_ATTR_NESTED) + nlattr_set(nla, val, tbl[nla_type(nla)].next); else memset(nla_data(nla), val, nla_len(nla)); } @@ -934,7 +1019,7 @@ static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key) static void mask_set_nlattr(struct nlattr *attr, u8 val) { - nlattr_set(attr, val, true); + nlattr_set(attr, val, ovs_key_lens); } /** @@ -1095,6 +1180,59 @@ free_newmask: return err; } +static size_t get_ufid_len(const struct nlattr *attr, bool log) +{ + size_t len; + + if (!attr) + return 0; + + len = nla_len(attr); + if (len < 1 || len > MAX_UFID_LENGTH) { + OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)", + nla_len(attr), MAX_UFID_LENGTH); + return 0; + } + + return len; +} + +/* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID, + * or false otherwise. + */ +bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr, + bool log) +{ + sfid->ufid_len = get_ufid_len(attr, log); + if (sfid->ufid_len) + memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len); + + return sfid->ufid_len; +} + +int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, + const struct sw_flow_key *key, bool log) +{ + struct sw_flow_key *new_key; + + if (ovs_nla_get_ufid(sfid, ufid, log)) + return 0; + + /* If UFID was not provided, use unmasked key. */ + new_key = kmalloc(sizeof(*new_key), GFP_KERNEL); + if (!new_key) + return -ENOMEM; + memcpy(new_key, key, sizeof(*key)); + sfid->unmasked_key = new_key; + + return 0; +} + +u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) +{ + return attr ? nla_get_u32(attr) : 0; +} + /** * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. * @key: Receives extracted in_port, priority, tun_key and skb_mark. @@ -1131,12 +1269,12 @@ int ovs_nla_get_flow_metadata(const struct nlattr *attr, return metadata_from_nlattrs(&match, &attrs, a, false, log); } -int ovs_nla_put_flow(const struct sw_flow_key *swkey, - const struct sw_flow_key *output, struct sk_buff *skb) +static int __ovs_nla_put_key(const struct sw_flow_key *swkey, + const struct sw_flow_key *output, bool is_mask, + struct sk_buff *skb) { struct ovs_key_ethernet *eth_key; struct nlattr *nla, *encap; - bool is_mask = (swkey != output); if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id)) goto nla_put_failure; @@ -1148,10 +1286,10 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, goto nla_put_failure; if ((swkey->tun_key.ipv4_dst || is_mask)) { - const struct geneve_opt *opts = NULL; + const void *opts = NULL; if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT) - opts = GENEVE_OPTS(output, swkey->tun_opts_len); + opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len); if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts, swkey->tun_opts_len)) @@ -1346,6 +1484,49 @@ nla_put_failure: return -EMSGSIZE; } +int ovs_nla_put_key(const struct sw_flow_key *swkey, + const struct sw_flow_key *output, int attr, bool is_mask, + struct sk_buff *skb) +{ + int err; + struct nlattr *nla; + + nla = nla_nest_start(skb, attr); + if (!nla) + return -EMSGSIZE; + err = __ovs_nla_put_key(swkey, output, is_mask, skb); + if (err) + return err; + nla_nest_end(skb, nla); + + return 0; +} + +/* Called with ovs_mutex or RCU read lock. */ +int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb) +{ + if (ovs_identifier_is_ufid(&flow->id)) + return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len, + flow->id.ufid); + + return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key, + OVS_FLOW_ATTR_KEY, false, skb); +} + +/* Called with ovs_mutex or RCU read lock. */ +int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb) +{ + return ovs_nla_put_key(&flow->mask->key, &flow->key, + OVS_FLOW_ATTR_KEY, false, skb); +} + +/* Called with ovs_mutex or RCU read lock. */ +int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) +{ + return ovs_nla_put_key(&flow->key, &flow->mask->key, + OVS_FLOW_ATTR_MASK, true, skb); +} + #define MAX_ACTIONS_BUFSIZE (32 * 1024) static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) @@ -1540,6 +1721,34 @@ void ovs_match_init(struct sw_flow_match *match, } } +static int validate_geneve_opts(struct sw_flow_key *key) +{ + struct geneve_opt *option; + int opts_len = key->tun_opts_len; + bool crit_opt = false; + + option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len); + while (opts_len > 0) { + int len; + + if (opts_len < sizeof(*option)) + return -EINVAL; + + len = sizeof(*option) + option->length * 4; + if (len > opts_len) + return -EINVAL; + + crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); + + option = (struct geneve_opt *)((u8 *)option + len); + opts_len -= len; + }; + + key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; + + return 0; +} + static int validate_and_copy_set_tun(const struct nlattr *attr, struct sw_flow_actions **sfa, bool log) { @@ -1547,36 +1756,23 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, struct sw_flow_key key; struct ovs_tunnel_info *tun_info; struct nlattr *a; - int err, start; + int err, start, opts_type; ovs_match_init(&match, &key, NULL); - err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); - if (err) - return err; + opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); + if (opts_type < 0) + return opts_type; if (key.tun_opts_len) { - struct geneve_opt *option = GENEVE_OPTS(&key, - key.tun_opts_len); - int opts_len = key.tun_opts_len; - bool crit_opt = false; - - while (opts_len > 0) { - int len; - - if (opts_len < sizeof(*option)) - return -EINVAL; - - len = sizeof(*option) + option->length * 4; - if (len > opts_len) - return -EINVAL; - - crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); - - option = (struct geneve_opt *)((u8 *)option + len); - opts_len -= len; - }; - - key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; + switch (opts_type) { + case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: + err = validate_geneve_opts(&key); + if (err < 0) + return err; + break; + case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: + break; + } }; start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log); @@ -1597,9 +1793,9 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, * everything else will go away after flow setup. We can append * it to tun_info and then point there. */ - memcpy((tun_info + 1), GENEVE_OPTS(&key, key.tun_opts_len), - key.tun_opts_len); - tun_info->options = (struct geneve_opt *)(tun_info + 1); + memcpy((tun_info + 1), + TUN_METADATA_OPTS(&key, key.tun_opts_len), key.tun_opts_len); + tun_info->options = (tun_info + 1); } else { tun_info->options = NULL; } @@ -1622,8 +1818,8 @@ static int validate_set(const struct nlattr *a, return -EINVAL; if (key_type > OVS_KEY_ATTR_MAX || - (ovs_key_lens[key_type] != nla_len(ovs_key) && - ovs_key_lens[key_type] != -1)) + (ovs_key_lens[key_type].len != nla_len(ovs_key) && + ovs_key_lens[key_type].len != OVS_ATTR_NESTED)) return -EINVAL; switch (key_type) { diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 577f12be3459..5c3d75bff310 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -43,16 +43,25 @@ size_t ovs_key_attr_size(void); void ovs_match_init(struct sw_flow_match *match, struct sw_flow_key *key, struct sw_flow_mask *mask); -int ovs_nla_put_flow(const struct sw_flow_key *, - const struct sw_flow_key *, struct sk_buff *); +int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *, + int attr, bool is_mask, struct sk_buff *); int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *, bool log); +int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb); +int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb); +int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb); + int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key, const struct nlattr *mask, bool log); int ovs_nla_put_egress_tunnel_key(struct sk_buff *, const struct ovs_tunnel_info *); +bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log); +int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, + const struct sw_flow_key *key, bool log); +u32 ovs_nla_get_ufid_flags(const struct nlattr *attr); + int ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, bool log); diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 5899bf161c61..5e57628e6584 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -139,6 +139,8 @@ static void flow_free(struct sw_flow *flow) { int node; + if (ovs_identifier_is_key(&flow->id)) + kfree(flow->id.unmasked_key); kfree((struct sw_flow_actions __force *)flow->sf_acts); for_each_node(node) if (flow->stats[node]) @@ -200,18 +202,28 @@ static struct table_instance *table_instance_alloc(int new_size) int ovs_flow_tbl_init(struct flow_table *table) { - struct table_instance *ti; + struct table_instance *ti, *ufid_ti; ti = table_instance_alloc(TBL_MIN_BUCKETS); if (!ti) return -ENOMEM; + ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS); + if (!ufid_ti) + goto free_ti; + rcu_assign_pointer(table->ti, ti); + rcu_assign_pointer(table->ufid_ti, ufid_ti); INIT_LIST_HEAD(&table->mask_list); table->last_rehash = jiffies; table->count = 0; + table->ufid_count = 0; return 0; + +free_ti: + __table_instance_destroy(ti); + return -ENOMEM; } static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) @@ -221,13 +233,16 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) __table_instance_destroy(ti); } -static void table_instance_destroy(struct table_instance *ti, bool deferred) +static void table_instance_destroy(struct table_instance *ti, + struct table_instance *ufid_ti, + bool deferred) { int i; if (!ti) return; + BUG_ON(!ufid_ti); if (ti->keep_flows) goto skip_flows; @@ -236,18 +251,24 @@ static void table_instance_destroy(struct table_instance *ti, bool deferred) struct hlist_head *head = flex_array_get(ti->buckets, i); struct hlist_node *n; int ver = ti->node_ver; + int ufid_ver = ufid_ti->node_ver; - hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { - hlist_del_rcu(&flow->hash_node[ver]); + hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) { + hlist_del_rcu(&flow->flow_table.node[ver]); + if (ovs_identifier_is_ufid(&flow->id)) + hlist_del_rcu(&flow->ufid_table.node[ufid_ver]); ovs_flow_free(flow, deferred); } } skip_flows: - if (deferred) + if (deferred) { call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); - else + call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb); + } else { __table_instance_destroy(ti); + __table_instance_destroy(ufid_ti); + } } /* No need for locking this function is called from RCU callback or @@ -256,8 +277,9 @@ skip_flows: void ovs_flow_tbl_destroy(struct flow_table *table) { struct table_instance *ti = rcu_dereference_raw(table->ti); + struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti); - table_instance_destroy(ti, false); + table_instance_destroy(ti, ufid_ti, false); } struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, @@ -272,7 +294,7 @@ struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, while (*bucket < ti->n_buckets) { i = 0; head = flex_array_get(ti->buckets, *bucket); - hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { + hlist_for_each_entry_rcu(flow, head, flow_table.node[ver]) { if (i < *last) { i++; continue; @@ -294,16 +316,26 @@ static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash) (hash & (ti->n_buckets - 1))); } -static void table_instance_insert(struct table_instance *ti, struct sw_flow *flow) +static void table_instance_insert(struct table_instance *ti, + struct sw_flow *flow) +{ + struct hlist_head *head; + + head = find_bucket(ti, flow->flow_table.hash); + hlist_add_head_rcu(&flow->flow_table.node[ti->node_ver], head); +} + +static void ufid_table_instance_insert(struct table_instance *ti, + struct sw_flow *flow) { struct hlist_head *head; - head = find_bucket(ti, flow->hash); - hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head); + head = find_bucket(ti, flow->ufid_table.hash); + hlist_add_head_rcu(&flow->ufid_table.node[ti->node_ver], head); } static void flow_table_copy_flows(struct table_instance *old, - struct table_instance *new) + struct table_instance *new, bool ufid) { int old_ver; int i; @@ -318,15 +350,21 @@ static void flow_table_copy_flows(struct table_instance *old, head = flex_array_get(old->buckets, i); - hlist_for_each_entry(flow, head, hash_node[old_ver]) - table_instance_insert(new, flow); + if (ufid) + hlist_for_each_entry(flow, head, + ufid_table.node[old_ver]) + ufid_table_instance_insert(new, flow); + else + hlist_for_each_entry(flow, head, + flow_table.node[old_ver]) + table_instance_insert(new, flow); } old->keep_flows = true; } static struct table_instance *table_instance_rehash(struct table_instance *ti, - int n_buckets) + int n_buckets, bool ufid) { struct table_instance *new_ti; @@ -334,32 +372,45 @@ static struct table_instance *table_instance_rehash(struct table_instance *ti, if (!new_ti) return NULL; - flow_table_copy_flows(ti, new_ti); + flow_table_copy_flows(ti, new_ti, ufid); return new_ti; } int ovs_flow_tbl_flush(struct flow_table *flow_table) { - struct table_instance *old_ti; - struct table_instance *new_ti; + struct table_instance *old_ti, *new_ti; + struct table_instance *old_ufid_ti, *new_ufid_ti; - old_ti = ovsl_dereference(flow_table->ti); new_ti = table_instance_alloc(TBL_MIN_BUCKETS); if (!new_ti) return -ENOMEM; + new_ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS); + if (!new_ufid_ti) + goto err_free_ti; + + old_ti = ovsl_dereference(flow_table->ti); + old_ufid_ti = ovsl_dereference(flow_table->ufid_ti); rcu_assign_pointer(flow_table->ti, new_ti); + rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti); flow_table->last_rehash = jiffies; flow_table->count = 0; + flow_table->ufid_count = 0; - table_instance_destroy(old_ti, true); + table_instance_destroy(old_ti, old_ufid_ti, true); return 0; + +err_free_ti: + __table_instance_destroy(new_ti); + return -ENOMEM; } -static u32 flow_hash(const struct sw_flow_key *key, int key_start, - int key_end) +static u32 flow_hash(const struct sw_flow_key *key, + const struct sw_flow_key_range *range) { + int key_start = range->start; + int key_end = range->end; const u32 *hash_key = (const u32 *)((const u8 *)key + key_start); int hash_u32s = (key_end - key_start) >> 2; @@ -395,19 +446,20 @@ static bool cmp_key(const struct sw_flow_key *key1, static bool flow_cmp_masked_key(const struct sw_flow *flow, const struct sw_flow_key *key, - int key_start, int key_end) + const struct sw_flow_key_range *range) { - return cmp_key(&flow->key, key, key_start, key_end); + return cmp_key(&flow->key, key, range->start, range->end); } -bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, - const struct sw_flow_match *match) +static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, + const struct sw_flow_match *match) { struct sw_flow_key *key = match->key; int key_start = flow_key_start(key); int key_end = match->range.end; - return cmp_key(&flow->unmasked_key, key, key_start, key_end); + BUG_ON(ovs_identifier_is_ufid(&flow->id)); + return cmp_key(flow->id.unmasked_key, key, key_start, key_end); } static struct sw_flow *masked_flow_lookup(struct table_instance *ti, @@ -416,18 +468,15 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, { struct sw_flow *flow; struct hlist_head *head; - int key_start = mask->range.start; - int key_end = mask->range.end; u32 hash; struct sw_flow_key masked_key; ovs_flow_mask_key(&masked_key, unmasked, mask); - hash = flow_hash(&masked_key, key_start, key_end); + hash = flow_hash(&masked_key, &mask->range); head = find_bucket(ti, hash); - hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) { - if (flow->mask == mask && flow->hash == hash && - flow_cmp_masked_key(flow, &masked_key, - key_start, key_end)) + hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) { + if (flow->mask == mask && flow->flow_table.hash == hash && + flow_cmp_masked_key(flow, &masked_key, &mask->range)) return flow; } return NULL; @@ -469,7 +518,48 @@ struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, /* Always called under ovs-mutex. */ list_for_each_entry(mask, &tbl->mask_list, list) { flow = masked_flow_lookup(ti, match->key, mask); - if (flow && ovs_flow_cmp_unmasked_key(flow, match)) /* Found */ + if (flow && ovs_identifier_is_key(&flow->id) && + ovs_flow_cmp_unmasked_key(flow, match)) + return flow; + } + return NULL; +} + +static u32 ufid_hash(const struct sw_flow_id *sfid) +{ + return jhash(sfid->ufid, sfid->ufid_len, 0); +} + +static bool ovs_flow_cmp_ufid(const struct sw_flow *flow, + const struct sw_flow_id *sfid) +{ + if (flow->id.ufid_len != sfid->ufid_len) + return false; + + return !memcmp(flow->id.ufid, sfid->ufid, sfid->ufid_len); +} + +bool ovs_flow_cmp(const struct sw_flow *flow, const struct sw_flow_match *match) +{ + if (ovs_identifier_is_ufid(&flow->id)) + return flow_cmp_masked_key(flow, match->key, &match->range); + + return ovs_flow_cmp_unmasked_key(flow, match); +} + +struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl, + const struct sw_flow_id *ufid) +{ + struct table_instance *ti = rcu_dereference_ovsl(tbl->ufid_ti); + struct sw_flow *flow; + struct hlist_head *head; + u32 hash; + + hash = ufid_hash(ufid); + head = find_bucket(ti, hash); + hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) { + if (flow->ufid_table.hash == hash && + ovs_flow_cmp_ufid(flow, ufid)) return flow; } return NULL; @@ -486,9 +576,10 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table) return num; } -static struct table_instance *table_instance_expand(struct table_instance *ti) +static struct table_instance *table_instance_expand(struct table_instance *ti, + bool ufid) { - return table_instance_rehash(ti, ti->n_buckets * 2); + return table_instance_rehash(ti, ti->n_buckets * 2, ufid); } /* Remove 'mask' from the mask list, if it is not needed any more. */ @@ -513,10 +604,15 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) { struct table_instance *ti = ovsl_dereference(table->ti); + struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti); BUG_ON(table->count == 0); - hlist_del_rcu(&flow->hash_node[ti->node_ver]); + hlist_del_rcu(&flow->flow_table.node[ti->node_ver]); table->count--; + if (ovs_identifier_is_ufid(&flow->id)) { + hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]); + table->ufid_count--; + } /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be * accessible as long as the RCU read lock is held. @@ -585,34 +681,64 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, } /* Must be called with OVS mutex held. */ -int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, - const struct sw_flow_mask *mask) +static void flow_key_insert(struct flow_table *table, struct sw_flow *flow) { struct table_instance *new_ti = NULL; struct table_instance *ti; - int err; - - err = flow_mask_insert(table, flow, mask); - if (err) - return err; - flow->hash = flow_hash(&flow->key, flow->mask->range.start, - flow->mask->range.end); + flow->flow_table.hash = flow_hash(&flow->key, &flow->mask->range); ti = ovsl_dereference(table->ti); table_instance_insert(ti, flow); table->count++; /* Expand table, if necessary, to make room. */ if (table->count > ti->n_buckets) - new_ti = table_instance_expand(ti); + new_ti = table_instance_expand(ti, false); else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL)) - new_ti = table_instance_rehash(ti, ti->n_buckets); + new_ti = table_instance_rehash(ti, ti->n_buckets, false); if (new_ti) { rcu_assign_pointer(table->ti, new_ti); - table_instance_destroy(ti, true); + call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); table->last_rehash = jiffies; } +} + +/* Must be called with OVS mutex held. */ +static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow) +{ + struct table_instance *ti; + + flow->ufid_table.hash = ufid_hash(&flow->id); + ti = ovsl_dereference(table->ufid_ti); + ufid_table_instance_insert(ti, flow); + table->ufid_count++; + + /* Expand table, if necessary, to make room. */ + if (table->ufid_count > ti->n_buckets) { + struct table_instance *new_ti; + + new_ti = table_instance_expand(ti, true); + if (new_ti) { + rcu_assign_pointer(table->ufid_ti, new_ti); + call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); + } + } +} + +/* Must be called with OVS mutex held. */ +int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, + const struct sw_flow_mask *mask) +{ + int err; + + err = flow_mask_insert(table, flow, mask); + if (err) + return err; + flow_key_insert(table, flow); + if (ovs_identifier_is_ufid(&flow->id)) + flow_ufid_insert(table, flow); + return 0; } diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 309fa6415689..616eda10d955 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -47,9 +47,11 @@ struct table_instance { struct flow_table { struct table_instance __rcu *ti; + struct table_instance __rcu *ufid_ti; struct list_head mask_list; unsigned long last_rehash; unsigned int count; + unsigned int ufid_count; }; extern struct kmem_cache *flow_stats_cache; @@ -78,8 +80,10 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, const struct sw_flow_key *); struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, const struct sw_flow_match *match); -bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, - const struct sw_flow_match *match); +struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *, + const struct sw_flow_id *); + +bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *); void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, const struct sw_flow_mask *mask); diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index 484864dd0e68..bf02fd5808c9 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -9,8 +9,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/version.h> - #include <linux/in.h> #include <linux/ip.h> #include <linux/net.h> @@ -90,7 +88,7 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb) opts_len = geneveh->opt_len * 4; - flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT | + flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) | (geneveh->oam ? TUNNEL_OAM : 0) | (geneveh->critical ? TUNNEL_CRIT_OPT : 0); @@ -172,7 +170,7 @@ error: static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) { - struct ovs_key_ipv4_tunnel *tun_key; + const struct ovs_key_ipv4_tunnel *tun_key; struct ovs_tunnel_info *tun_info; struct net *net = ovs_dp_get_net(vport->dp); struct geneve_port *geneve_port = geneve_vport(vport); @@ -180,7 +178,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) __be16 sport; struct rtable *rt; struct flowi4 fl; - u8 vni[3]; + u8 vni[3], opts_len, *opts; __be16 df; int err; @@ -191,16 +189,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) } tun_key = &tun_info->tunnel; - - /* Route lookup */ - memset(&fl, 0, sizeof(fl)); - fl.daddr = tun_key->ipv4_dst; - fl.saddr = tun_key->ipv4_src; - fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos); - fl.flowi4_mark = skb->mark; - fl.flowi4_proto = IPPROTO_UDP; - - rt = ip_route_output_key(net, &fl); + rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_UDP); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto error; @@ -211,12 +200,19 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) tunnel_id_to_vni(tun_key->tun_id, vni); skb->ignore_df = 1; + if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) { + opts = (u8 *)tun_info->options; + opts_len = tun_info->options_len; + } else { + opts = NULL; + opts_len = 0; + } + err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr, tun_key->ipv4_dst, tun_key->ipv4_tos, tun_key->ipv4_ttl, df, sport, dport, - tun_key->tun_flags, vni, - tun_info->options_len, (u8 *)tun_info->options, - false); + tun_key->tun_flags, vni, opts_len, opts, + !!(tun_key->tun_flags & TUNNEL_CSUM), false); if (err < 0) ip_rt_put(rt); return err; diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index d4168c442db5..f17ac9642f4e 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -134,7 +134,7 @@ static int gre_err(struct sk_buff *skb, u32 info, static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) { struct net *net = ovs_dp_get_net(vport->dp); - struct ovs_key_ipv4_tunnel *tun_key; + const struct ovs_key_ipv4_tunnel *tun_key; struct flowi4 fl; struct rtable *rt; int min_headroom; @@ -148,15 +148,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) } tun_key = &OVS_CB(skb)->egress_tun_info->tunnel; - /* Route lookup */ - memset(&fl, 0, sizeof(fl)); - fl.daddr = tun_key->ipv4_dst; - fl.saddr = tun_key->ipv4_src; - fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos); - fl.flowi4_mark = skb->mark; - fl.flowi4_proto = IPPROTO_GRE; - - rt = ip_route_output_key(net, &fl); + rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_GRE); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto err_free_skb; @@ -166,7 +158,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + tunnel_hlen + sizeof(struct iphdr) - + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); + + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { int head_delta = SKB_DATA_ALIGN(min_headroom - skb_headroom(skb) + diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index d7c46b301024..ff07d4062d60 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -40,6 +40,7 @@ #include "datapath.h" #include "vport.h" +#include "vport-vxlan.h" /** * struct vxlan_port - Keeps track of open UDP ports @@ -49,6 +50,7 @@ struct vxlan_port { struct vxlan_sock *vs; char name[IFNAMSIZ]; + u32 exts; /* VXLAN_F_* in <net/vxlan.h> */ }; static struct vport_ops ovs_vxlan_vport_ops; @@ -59,19 +61,30 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport) } /* Called with rcu_read_lock and BH disabled. */ -static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) +static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, + struct vxlan_metadata *md) { struct ovs_tunnel_info tun_info; + struct vxlan_port *vxlan_port; struct vport *vport = vs->data; struct iphdr *iph; + struct ovs_vxlan_opts opts = { + .gbp = md->gbp, + }; __be64 key; + __be16 flags; + + flags = TUNNEL_KEY | (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0); + vxlan_port = vxlan_vport(vport); + if (vxlan_port->exts & VXLAN_F_GBP) + flags |= TUNNEL_VXLAN_OPT; /* Save outer tunnel values */ iph = ip_hdr(skb); - key = cpu_to_be64(ntohl(vx_vni) >> 8); + key = cpu_to_be64(ntohl(md->vni) >> 8); ovs_flow_tun_info_init(&tun_info, iph, udp_hdr(skb)->source, udp_hdr(skb)->dest, - key, TUNNEL_KEY, NULL, 0); + key, flags, &opts, sizeof(opts)); ovs_vport_receive(vport, skb, &tun_info); } @@ -83,6 +96,21 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb) if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port))) return -EMSGSIZE; + + if (vxlan_port->exts) { + struct nlattr *exts; + + exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION); + if (!exts) + return -EMSGSIZE; + + if (vxlan_port->exts & VXLAN_F_GBP && + nla_put_flag(skb, OVS_VXLAN_EXT_GBP)) + return -EMSGSIZE; + + nla_nest_end(skb, exts); + } + return 0; } @@ -95,6 +123,31 @@ static void vxlan_tnl_destroy(struct vport *vport) ovs_vport_deferred_free(vport); } +static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX+1] = { + [OVS_VXLAN_EXT_GBP] = { .type = NLA_FLAG, }, +}; + +static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr) +{ + struct nlattr *exts[OVS_VXLAN_EXT_MAX+1]; + struct vxlan_port *vxlan_port; + int err; + + if (nla_len(attr) < sizeof(struct nlattr)) + return -EINVAL; + + err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy); + if (err < 0) + return err; + + vxlan_port = vxlan_vport(vport); + + if (exts[OVS_VXLAN_EXT_GBP]) + vxlan_port->exts |= VXLAN_F_GBP; + + return 0; +} + static struct vport *vxlan_tnl_create(const struct vport_parms *parms) { struct net *net = ovs_dp_get_net(parms->dp); @@ -127,7 +180,17 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) vxlan_port = vxlan_vport(vport); strncpy(vxlan_port->name, parms->name, IFNAMSIZ); - vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0); + a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION); + if (a) { + err = vxlan_configure_exts(vport, a); + if (err) { + ovs_vport_free(vport); + goto error; + } + } + + vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, + vxlan_port->exts); if (IS_ERR(vs)) { ovs_vport_free(vport); return (void *)vs; @@ -140,17 +203,34 @@ error: return ERR_PTR(err); } +static int vxlan_ext_gbp(struct sk_buff *skb) +{ + const struct ovs_tunnel_info *tun_info; + const struct ovs_vxlan_opts *opts; + + tun_info = OVS_CB(skb)->egress_tun_info; + opts = tun_info->options; + + if (tun_info->tunnel.tun_flags & TUNNEL_VXLAN_OPT && + tun_info->options_len >= sizeof(*opts)) + return opts->gbp; + else + return 0; +} + static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) { struct net *net = ovs_dp_get_net(vport->dp); struct vxlan_port *vxlan_port = vxlan_vport(vport); __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport; - struct ovs_key_ipv4_tunnel *tun_key; + const struct ovs_key_ipv4_tunnel *tun_key; + struct vxlan_metadata md = {0}; struct rtable *rt; struct flowi4 fl; __be16 src_port; __be16 df; int err; + u32 vxflags; if (unlikely(!OVS_CB(skb)->egress_tun_info)) { err = -EINVAL; @@ -158,15 +238,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) } tun_key = &OVS_CB(skb)->egress_tun_info->tunnel; - /* Route lookup */ - memset(&fl, 0, sizeof(fl)); - fl.daddr = tun_key->ipv4_dst; - fl.saddr = tun_key->ipv4_src; - fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos); - fl.flowi4_mark = skb->mark; - fl.flowi4_proto = IPPROTO_UDP; - - rt = ip_route_output_key(net, &fl); + rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_UDP); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto error; @@ -178,13 +250,15 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) skb->ignore_df = 1; src_port = udp_flow_src_port(net, skb, 0, 0, true); + md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8); + md.gbp = vxlan_ext_gbp(skb); + vxflags = vxlan_port->exts | + (tun_key->tun_flags & TUNNEL_CSUM ? VXLAN_F_UDP_CSUM : 0); - err = vxlan_xmit_skb(vxlan_port->vs, rt, skb, - fl.saddr, tun_key->ipv4_dst, + err = vxlan_xmit_skb(rt, skb, fl.saddr, tun_key->ipv4_dst, tun_key->ipv4_tos, tun_key->ipv4_ttl, df, src_port, dst_port, - htonl(be64_to_cpu(tun_key->tun_id) << 8), - false); + &md, false, vxflags); if (err < 0) ip_rt_put(rt); return err; diff --git a/net/openvswitch/vport-vxlan.h b/net/openvswitch/vport-vxlan.h new file mode 100644 index 000000000000..4b08233e73d5 --- /dev/null +++ b/net/openvswitch/vport-vxlan.h @@ -0,0 +1,11 @@ +#ifndef VPORT_VXLAN_H +#define VPORT_VXLAN_H 1 + +#include <linux/kernel.h> +#include <linux/types.h> + +struct ovs_vxlan_opts { + __u32 gbp; +}; + +#endif diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 2034c6d9cb5a..ec2954ffc690 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -480,7 +480,8 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, stats = this_cpu_ptr(vport->percpu_stats); u64_stats_update_begin(&stats->syncp); stats->rx_packets++; - stats->rx_bytes += skb->len + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); + stats->rx_bytes += skb->len + + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); u64_stats_update_end(&stats->syncp); OVS_CB(skb)->input_vport = vport; @@ -594,14 +595,7 @@ int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info, * The process may need to be changed if the corresponding process * in vports ops changed. */ - memset(&fl, 0, sizeof(fl)); - fl.daddr = tun_key->ipv4_dst; - fl.saddr = tun_key->ipv4_src; - fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos); - fl.flowi4_mark = skb_mark; - fl.flowi4_proto = ipproto; - - rt = ip_route_output_key(net, &fl); + rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 99c8e71d9e6c..f8ae295fb001 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -236,4 +236,22 @@ static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, int ovs_vport_ops_register(struct vport_ops *ops); void ovs_vport_ops_unregister(struct vport_ops *ops); +static inline struct rtable *ovs_tunnel_route_lookup(struct net *net, + const struct ovs_key_ipv4_tunnel *key, + u32 mark, + struct flowi4 *fl, + u8 protocol) +{ + struct rtable *rt; + + memset(fl, 0, sizeof(*fl)); + fl->daddr = key->ipv4_dst; + fl->saddr = key->ipv4_src; + fl->flowi4_tos = RT_TOS(key->ipv4_tos); + fl->flowi4_mark = mark; + fl->flowi4_proto = protocol; + + rt = ip_route_output_key(net, fl); + return rt; +} #endif /* vport.h */ |