diff options
author | Mahesh Bandewar <maheshb@google.com> | 2016-09-16 22:59:19 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-09-19 08:25:22 +0300 |
commit | 4fbae7d83c98c30efcf0a2a2ac55fbb75ef5a1a5 (patch) | |
tree | 3ea819d38ad4fbbae8d4db166f58451c2a78ee20 /drivers/net/ipvlan/ipvlan_main.c | |
parent | e8bffe0cf964f0330595bb376b74921cccdaac88 (diff) | |
download | linux-4fbae7d83c98c30efcf0a2a2ac55fbb75ef5a1a5.tar.xz |
ipvlan: Introduce l3s mode
In a typical IPvlan L3 setup where master is in default-ns and
each slave is into different (slave) ns. In this setup egress
packet processing for traffic originating from slave-ns will
hit all NF_HOOKs in slave-ns as well as default-ns. However same
is not true for ingress processing. All these NF_HOOKs are
hit only in the slave-ns skipping them in the default-ns.
IPvlan in L3 mode is restrictive and if admins want to deploy
iptables rules in default-ns, this asymmetric data path makes it
impossible to do so.
This patch makes use of the l3_rcv() (added as part of l3mdev
enhancements) to perform input route lookup on RX packets without
changing the skb->dev and then uses nf_hook at NF_INET_LOCAL_IN
to change the skb->dev just before handing over skb to L4.
Signed-off-by: Mahesh Bandewar <maheshb@google.com>
CC: David Ahern <dsa@cumulusnetworks.com>
Reviewed-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ipvlan/ipvlan_main.c')
-rw-r--r-- | drivers/net/ipvlan/ipvlan_main.c | 87 |
1 files changed, 80 insertions, 7 deletions
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 18b4e8c7f68a..f442eb366863 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -9,24 +9,87 @@ #include "ipvlan.h" +static u32 ipvl_nf_hook_refcnt = 0; + +static struct nf_hook_ops ipvl_nfops[] __read_mostly = { + { + .hook = ipvlan_nf_input, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = INT_MAX, + }, + { + .hook = ipvlan_nf_input, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_IN, + .priority = INT_MAX, + }, +}; + +static struct l3mdev_ops ipvl_l3mdev_ops __read_mostly = { + .l3mdev_l3_rcv = ipvlan_l3_rcv, +}; + static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) { ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj; } -static void ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) +static int ipvlan_register_nf_hook(void) +{ + int err = 0; + + if (!ipvl_nf_hook_refcnt) { + err = _nf_register_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops)); + if (!err) + ipvl_nf_hook_refcnt = 1; + } else { + ipvl_nf_hook_refcnt++; + } + + return err; +} + +static void ipvlan_unregister_nf_hook(void) +{ + WARN_ON(!ipvl_nf_hook_refcnt); + + ipvl_nf_hook_refcnt--; + if (!ipvl_nf_hook_refcnt) + _nf_unregister_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops)); +} + +static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) { struct ipvl_dev *ipvlan; + struct net_device *mdev = port->dev; + int err = 0; + ASSERT_RTNL(); if (port->mode != nval) { + if (nval == IPVLAN_MODE_L3S) { + /* New mode is L3S */ + err = ipvlan_register_nf_hook(); + if (!err) { + mdev->l3mdev_ops = &ipvl_l3mdev_ops; + mdev->priv_flags |= IFF_L3MDEV_MASTER; + } else + return err; + } else if (port->mode == IPVLAN_MODE_L3S) { + /* Old mode was L3S */ + mdev->priv_flags &= ~IFF_L3MDEV_MASTER; + ipvlan_unregister_nf_hook(); + mdev->l3mdev_ops = NULL; + } list_for_each_entry(ipvlan, &port->ipvlans, pnode) { - if (nval == IPVLAN_MODE_L3) + if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) ipvlan->dev->flags |= IFF_NOARP; else ipvlan->dev->flags &= ~IFF_NOARP; } port->mode = nval; } + return err; } static int ipvlan_port_create(struct net_device *dev) @@ -74,6 +137,11 @@ static void ipvlan_port_destroy(struct net_device *dev) struct ipvl_port *port = ipvlan_port_get_rtnl(dev); dev->priv_flags &= ~IFF_IPVLAN_MASTER; + if (port->mode == IPVLAN_MODE_L3S) { + dev->priv_flags &= ~IFF_L3MDEV_MASTER; + ipvlan_unregister_nf_hook(); + dev->l3mdev_ops = NULL; + } netdev_rx_handler_unregister(dev); cancel_work_sync(&port->wq); __skb_queue_purge(&port->backlog); @@ -132,7 +200,8 @@ static int ipvlan_open(struct net_device *dev) struct net_device *phy_dev = ipvlan->phy_dev; struct ipvl_addr *addr; - if (ipvlan->port->mode == IPVLAN_MODE_L3) + if (ipvlan->port->mode == IPVLAN_MODE_L3 || + ipvlan->port->mode == IPVLAN_MODE_L3S) dev->flags |= IFF_NOARP; else dev->flags &= ~IFF_NOARP; @@ -372,13 +441,14 @@ static int ipvlan_nl_changelink(struct net_device *dev, { struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); + int err = 0; if (data && data[IFLA_IPVLAN_MODE]) { u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]); - ipvlan_set_port_mode(port, nmode); + err = ipvlan_set_port_mode(port, nmode); } - return 0; + return err; } static size_t ipvlan_nl_getsize(const struct net_device *dev) @@ -473,10 +543,13 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, unregister_netdevice(dev); return err; } + err = ipvlan_set_port_mode(port, mode); + if (err) { + unregister_netdevice(dev); + return err; + } list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans); - ipvlan_set_port_mode(port, mode); - netif_stacked_transfer_operstate(phy_dev, dev); return 0; } |