From 39e393bb4f653d38aea40190e1aa9a49062eed4d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 11 Sep 2014 11:02:39 +0200 Subject: netfilter: nf_tables: add NFTA_MASQ_UNSPEC to nft_masq_attributes To keep this consistent with other nft_*_attributes. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index eeec0ae845ef..66d66dd3ff79 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -806,6 +806,7 @@ enum nft_nat_attributes { * @NFTA_MASQ_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32) */ enum nft_masq_attributes { + NFTA_MASQ_UNSPEC, NFTA_MASQ_FLAGS, __NFTA_MASQ_MAX }; -- cgit v1.2.3 From 0e9871e3f79fd17c691b50a9669220c54ff084a2 Mon Sep 17 00:00:00 2001 From: Anton Danilov Date: Thu, 28 Aug 2014 10:11:27 +0400 Subject: netfilter: ipset: Add skbinfo extension kernel support in the ipset core. Skbinfo extension provides mapping of metainformation with lookup in the ipset tables. This patch defines the flags, the constants, the functions and the structures for the data type independent support of the extension. Note the firewall mark stores in the kernel structures as two 32bit values, but transfered through netlink as one 64bit value. Signed-off-by: Anton Danilov Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 56 ++++++++++++++++++++++++++++- include/uapi/linux/netfilter/ipset/ip_set.h | 12 +++++++ net/netfilter/ipset/ip_set_core.c | 27 +++++++++++++- 3 files changed, 93 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 96afc29184be..b97aac5142ed 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -57,6 +57,8 @@ enum ip_set_extension { IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER), IPSET_EXT_BIT_COMMENT = 2, IPSET_EXT_COMMENT = (1 << IPSET_EXT_BIT_COMMENT), + IPSET_EXT_BIT_SKBINFO = 3, + IPSET_EXT_SKBINFO = (1 << IPSET_EXT_BIT_SKBINFO), /* Mark set with an extension which needs to call destroy */ IPSET_EXT_BIT_DESTROY = 7, IPSET_EXT_DESTROY = (1 << IPSET_EXT_BIT_DESTROY), @@ -65,12 +67,14 @@ enum ip_set_extension { #define SET_WITH_TIMEOUT(s) ((s)->extensions & IPSET_EXT_TIMEOUT) #define SET_WITH_COUNTER(s) ((s)->extensions & IPSET_EXT_COUNTER) #define SET_WITH_COMMENT(s) ((s)->extensions & IPSET_EXT_COMMENT) +#define SET_WITH_SKBINFO(s) ((s)->extensions & IPSET_EXT_SKBINFO) #define SET_WITH_FORCEADD(s) ((s)->flags & IPSET_CREATE_FLAG_FORCEADD) /* Extension id, in size order */ enum ip_set_ext_id { IPSET_EXT_ID_COUNTER = 0, IPSET_EXT_ID_TIMEOUT, + IPSET_EXT_ID_SKBINFO, IPSET_EXT_ID_COMMENT, IPSET_EXT_ID_MAX, }; @@ -92,6 +96,10 @@ struct ip_set_ext { u64 packets; u64 bytes; u32 timeout; + u32 skbmark; + u32 skbmarkmask; + u32 skbprio; + u16 skbqueue; char *comment; }; @@ -104,6 +112,13 @@ struct ip_set_comment { char *str; }; +struct ip_set_skbinfo { + u32 skbmark; + u32 skbmarkmask; + u32 skbprio; + u16 skbqueue; +}; + struct ip_set; #define ext_timeout(e, s) \ @@ -112,7 +127,8 @@ struct ip_set; (struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER]) #define ext_comment(e, s) \ (struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT]) - +#define ext_skbinfo(e, s) \ +(struct ip_set_skbinfo *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_SKBINFO]) typedef int (*ipset_adtfn)(struct ip_set *set, void *value, const struct ip_set_ext *ext, @@ -256,6 +272,8 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set) cadt_flags |= IPSET_FLAG_WITH_COUNTERS; if (SET_WITH_COMMENT(set)) cadt_flags |= IPSET_FLAG_WITH_COMMENT; + if (SET_WITH_SKBINFO(set)) + cadt_flags |= IPSET_FLAG_WITH_SKBINFO; if (SET_WITH_FORCEADD(set)) cadt_flags |= IPSET_FLAG_WITH_FORCEADD; @@ -304,6 +322,39 @@ ip_set_update_counter(struct ip_set_counter *counter, } } +static inline void +ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo, + const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags) +{ + mext->skbmark = skbinfo->skbmark; + mext->skbmarkmask = skbinfo->skbmarkmask; + mext->skbprio = skbinfo->skbprio; + mext->skbqueue = skbinfo->skbqueue; +} +static inline bool +ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo) +{ + return nla_put_net64(skb, IPSET_ATTR_SKBMARK, + cpu_to_be64((u64)skbinfo->skbmark << 32 | + skbinfo->skbmarkmask)) || + nla_put_net32(skb, IPSET_ATTR_SKBPRIO, + cpu_to_be32(skbinfo->skbprio)) || + nla_put_net16(skb, IPSET_ATTR_SKBQUEUE, + cpu_to_be16(skbinfo->skbqueue)); + +} + +static inline void +ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo, + const struct ip_set_ext *ext) +{ + skbinfo->skbmark = ext->skbmark; + skbinfo->skbmarkmask = ext->skbmarkmask; + skbinfo->skbprio = ext->skbprio; + skbinfo->skbqueue = ext->skbqueue; +} + static inline bool ip_set_put_counter(struct sk_buff *skb, struct ip_set_counter *counter) { @@ -497,6 +548,9 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, if (SET_WITH_COMMENT(set) && ip_set_put_comment(skb, ext_comment(e, set))) return -EMSGSIZE; + if (SET_WITH_SKBINFO(set) && + ip_set_put_skbinfo(skb, ext_skbinfo(e, set))) + return -EMSGSIZE; return 0; } diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 78c2f2e79920..ca03119111a2 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -115,6 +115,9 @@ enum { IPSET_ATTR_BYTES, IPSET_ATTR_PACKETS, IPSET_ATTR_COMMENT, + IPSET_ATTR_SKBMARK, + IPSET_ATTR_SKBPRIO, + IPSET_ATTR_SKBQUEUE, __IPSET_ATTR_ADT_MAX, }; #define IPSET_ATTR_ADT_MAX (__IPSET_ATTR_ADT_MAX - 1) @@ -147,6 +150,7 @@ enum ipset_errno { IPSET_ERR_COUNTER, IPSET_ERR_COMMENT, IPSET_ERR_INVALID_MARKMASK, + IPSET_ERR_SKBINFO, /* Type specific error codes */ IPSET_ERR_TYPE_SPECIFIC = 4352, @@ -170,6 +174,12 @@ enum ipset_cmd_flags { IPSET_FLAG_MATCH_COUNTERS = (1 << IPSET_FLAG_BIT_MATCH_COUNTERS), IPSET_FLAG_BIT_RETURN_NOMATCH = 7, IPSET_FLAG_RETURN_NOMATCH = (1 << IPSET_FLAG_BIT_RETURN_NOMATCH), + IPSET_FLAG_BIT_MAP_SKBMARK = 8, + IPSET_FLAG_MAP_SKBMARK = (1 << IPSET_FLAG_BIT_MAP_SKBMARK), + IPSET_FLAG_BIT_MAP_SKBPRIO = 9, + IPSET_FLAG_MAP_SKBPRIO = (1 << IPSET_FLAG_BIT_MAP_SKBPRIO), + IPSET_FLAG_BIT_MAP_SKBQUEUE = 10, + IPSET_FLAG_MAP_SKBQUEUE = (1 << IPSET_FLAG_BIT_MAP_SKBQUEUE), IPSET_FLAG_CMD_MAX = 15, }; @@ -187,6 +197,8 @@ enum ipset_cadt_flags { IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT), IPSET_FLAG_BIT_WITH_FORCEADD = 5, IPSET_FLAG_WITH_FORCEADD = (1 << IPSET_FLAG_BIT_WITH_FORCEADD), + IPSET_FLAG_BIT_WITH_SKBINFO = 6, + IPSET_FLAG_WITH_SKBINFO = (1 << IPSET_FLAG_BIT_WITH_SKBINFO), IPSET_FLAG_CADT_MAX = 15, }; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 4ca4e5ca6f57..26c795e6b57f 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -337,6 +337,12 @@ const struct ip_set_ext_type ip_set_extensions[] = { .len = sizeof(unsigned long), .align = __alignof__(unsigned long), }, + [IPSET_EXT_ID_SKBINFO] = { + .type = IPSET_EXT_SKBINFO, + .flag = IPSET_FLAG_WITH_SKBINFO, + .len = sizeof(struct ip_set_skbinfo), + .align = __alignof__(struct ip_set_skbinfo), + }, [IPSET_EXT_ID_COMMENT] = { .type = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY, .flag = IPSET_FLAG_WITH_COMMENT, @@ -382,6 +388,7 @@ int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext *ext) { + u64 fullmark; if (tb[IPSET_ATTR_TIMEOUT]) { if (!(set->extensions & IPSET_EXT_TIMEOUT)) return -IPSET_ERR_TIMEOUT; @@ -402,7 +409,25 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], return -IPSET_ERR_COMMENT; ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]); } - + if (tb[IPSET_ATTR_SKBMARK]) { + if (!(set->extensions & IPSET_EXT_SKBINFO)) + return -IPSET_ERR_SKBINFO; + fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK])); + ext->skbmark = fullmark >> 32; + ext->skbmarkmask = fullmark & 0xffffffff; + } + if (tb[IPSET_ATTR_SKBPRIO]) { + if (!(set->extensions & IPSET_EXT_SKBINFO)) + return -IPSET_ERR_SKBINFO; + ext->skbprio = be32_to_cpu(nla_get_be32( + tb[IPSET_ATTR_SKBPRIO])); + } + if (tb[IPSET_ATTR_SKBQUEUE]) { + if (!(set->extensions & IPSET_EXT_SKBINFO)) + return -IPSET_ERR_SKBINFO; + ext->skbqueue = be16_to_cpu(nla_get_be16( + tb[IPSET_ATTR_SKBQUEUE])); + } return 0; } EXPORT_SYMBOL_GPL(ip_set_get_extensions); -- cgit v1.2.3 From 76cea4109ca89dea218fdc652d2e1535fd9b5fc7 Mon Sep 17 00:00:00 2001 From: Anton Danilov Date: Tue, 2 Sep 2014 14:21:20 +0400 Subject: netfilter: ipset: Add skbinfo extension support to SET target. Signed-off-by: Anton Danilov Signed-off-by: Jozsef Kadlecsik --- include/uapi/linux/netfilter/xt_set.h | 10 +++ net/netfilter/xt_set.c | 155 ++++++++++++++++++++++++++++++++++ 2 files changed, 165 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/xt_set.h b/include/uapi/linux/netfilter/xt_set.h index 964d3d42f874..d6a1df1f2947 100644 --- a/include/uapi/linux/netfilter/xt_set.h +++ b/include/uapi/linux/netfilter/xt_set.h @@ -71,4 +71,14 @@ struct xt_set_info_match_v3 { __u32 flags; }; +/* Revision 3 target */ + +struct xt_set_info_target_v3 { + struct xt_set_info add_set; + struct xt_set_info del_set; + struct xt_set_info map_set; + __u32 flags; + __u32 timeout; +}; + #endif /*_XT_SET_H*/ diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index cb70f6ec5695..5732cd64acc0 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -366,6 +366,140 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) #define set_target_v2_checkentry set_target_v1_checkentry #define set_target_v2_destroy set_target_v1_destroy +/* Revision 3 target */ + +static unsigned int +set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_set_info_target_v3 *info = par->targinfo; + ADT_OPT(add_opt, par->family, info->add_set.dim, + info->add_set.flags, info->flags, info->timeout); + ADT_OPT(del_opt, par->family, info->del_set.dim, + info->del_set.flags, 0, UINT_MAX); + ADT_OPT(map_opt, par->family, info->map_set.dim, + info->map_set.flags, 0, UINT_MAX); + + int ret; + + /* Normalize to fit into jiffies */ + if (add_opt.ext.timeout != IPSET_NO_TIMEOUT && + add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC) + add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC; + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_add(info->add_set.index, skb, par, &add_opt); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_del(info->del_set.index, skb, par, &del_opt); + if (info->map_set.index != IPSET_INVALID_ID) { + map_opt.cmdflags |= info->flags & (IPSET_FLAG_MAP_SKBMARK | + IPSET_FLAG_MAP_SKBPRIO | + IPSET_FLAG_MAP_SKBQUEUE); + ret = match_set(info->map_set.index, skb, par, &map_opt, + info->map_set.flags & IPSET_INV_MATCH); + if (!ret) + return XT_CONTINUE; + if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBMARK) + skb->mark = (skb->mark & ~(map_opt.ext.skbmarkmask)) + ^ (map_opt.ext.skbmark); + if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBPRIO) + skb->priority = map_opt.ext.skbprio; + if ((map_opt.cmdflags & IPSET_FLAG_MAP_SKBQUEUE) && + skb->dev && + skb->dev->real_num_tx_queues > map_opt.ext.skbqueue) + skb_set_queue_mapping(skb, map_opt.ext.skbqueue); + } + return XT_CONTINUE; +} + + +static int +set_target_v3_checkentry(const struct xt_tgchk_param *par) +{ + const struct xt_set_info_target_v3 *info = par->targinfo; + ip_set_id_t index; + + if (info->add_set.index != IPSET_INVALID_ID) { + index = ip_set_nfnl_get_byindex(par->net, + info->add_set.index); + if (index == IPSET_INVALID_ID) { + pr_warn("Cannot find add_set index %u as target\n", + info->add_set.index); + return -ENOENT; + } + } + + if (info->del_set.index != IPSET_INVALID_ID) { + index = ip_set_nfnl_get_byindex(par->net, + info->del_set.index); + if (index == IPSET_INVALID_ID) { + pr_warn("Cannot find del_set index %u as target\n", + info->del_set.index); + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(par->net, + info->add_set.index); + return -ENOENT; + } + } + + if (info->map_set.index != IPSET_INVALID_ID) { + if (strncmp(par->table, "mangle", 7)) { + pr_warn("--map-set only usable from mangle table\n"); + return -EINVAL; + } + if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) | + (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) && + !(par->hook_mask & (1 << NF_INET_FORWARD | + 1 << NF_INET_LOCAL_OUT | + 1 << NF_INET_POST_ROUTING))) { + pr_warn("mapping of prio or/and queue is allowed only" + "from OUTPUT/FORWARD/POSTROUTING chains\n"); + return -EINVAL; + } + index = ip_set_nfnl_get_byindex(par->net, + info->map_set.index); + if (index == IPSET_INVALID_ID) { + pr_warn("Cannot find map_set index %u as target\n", + info->map_set.index); + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(par->net, + info->add_set.index); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(par->net, + info->del_set.index); + return -ENOENT; + } + } + + if (info->add_set.dim > IPSET_DIM_MAX || + info->del_set.dim > IPSET_DIM_MAX || + info->map_set.dim > IPSET_DIM_MAX) { + pr_warn("Protocol error: SET target dimension " + "is over the limit!\n"); + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(par->net, info->add_set.index); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(par->net, info->del_set.index); + if (info->map_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(par->net, info->map_set.index); + return -ERANGE; + } + + return 0; +} + +static void +set_target_v3_destroy(const struct xt_tgdtor_param *par) +{ + const struct xt_set_info_target_v3 *info = par->targinfo; + + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(par->net, info->add_set.index); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(par->net, info->del_set.index); + if (info->map_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(par->net, info->map_set.index); +} + + static struct xt_match set_matches[] __read_mostly = { { .name = "set", @@ -493,6 +627,27 @@ static struct xt_target set_targets[] __read_mostly = { .destroy = set_target_v2_destroy, .me = THIS_MODULE }, + /* --map-set support */ + { + .name = "SET", + .revision = 3, + .family = NFPROTO_IPV4, + .target = set_target_v3, + .targetsize = sizeof(struct xt_set_info_target_v3), + .checkentry = set_target_v3_checkentry, + .destroy = set_target_v3_destroy, + .me = THIS_MODULE + }, + { + .name = "SET", + .revision = 3, + .family = NFPROTO_IPV6, + .target = set_target_v3, + .targetsize = sizeof(struct xt_set_info_target_v3), + .checkentry = set_target_v3_checkentry, + .destroy = set_target_v3_destroy, + .me = THIS_MODULE + }, }; static int __init xt_set_init(void) -- cgit v1.2.3 From 6cff339bbd5f9eda7a5e8a521f91a88d046e6d0c Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Tue, 9 Sep 2014 16:40:20 -0700 Subject: ipvs: Add destination address family to netlink interface This is necessary to support heterogeneous pools. For example, if you have an ipv6 addressed network, you'll want to be able to forward ipv4 traffic into it. This patch enforces that destination address family is the same as service family, as none of the forwarding mechanisms support anything else. For the old setsockopt mechanism, we simply set the dest address family to AF_INET as we do with the service. Signed-off-by: Alex Gartrell Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 3 +++ include/uapi/linux/ip_vs.h | 3 +++ net/netfilter/ipvs/ip_vs_ctl.c | 45 +++++++++++++++++++++++++++++++++++------- 3 files changed, 44 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 624a8a54806d..b7e2b624d58e 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -648,6 +648,9 @@ struct ip_vs_dest_user_kern { /* thresholds for active connections */ u32 u_threshold; /* upper threshold */ u32 l_threshold; /* lower threshold */ + + /* Address family of addr */ + u16 af; }; diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h index fbcffe8041f7..cabe95d5b461 100644 --- a/include/uapi/linux/ip_vs.h +++ b/include/uapi/linux/ip_vs.h @@ -384,6 +384,9 @@ enum { IPVS_DEST_ATTR_PERSIST_CONNS, /* persistent connections */ IPVS_DEST_ATTR_STATS, /* nested attribute for dest stats */ + + IPVS_DEST_ATTR_ADDR_FAMILY, /* Address family of address */ + __IPVS_DEST_ATTR_MAX, }; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index bd2b208ba56c..594cec7ebc43 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -816,6 +816,8 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, dest->u_threshold = udest->u_threshold; dest->l_threshold = udest->l_threshold; + dest->af = udest->af; + spin_lock_bh(&dest->dst_lock); __ip_vs_dst_cache_reset(dest); spin_unlock_bh(&dest->dst_lock); @@ -846,8 +848,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, EnterFunction(2); + /* Temporary for consistency */ + if (udest->af != svc->af) + return -EINVAL; + #ifdef CONFIG_IP_VS_IPV6 - if (svc->af == AF_INET6) { + if (udest->af == AF_INET6) { atype = ipv6_addr_type(&udest->addr.in6); if ((!(atype & IPV6_ADDR_UNICAST) || atype & IPV6_ADDR_LINKLOCAL) && @@ -875,12 +881,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, u64_stats_init(&ip_vs_dest_stats->syncp); } - dest->af = svc->af; + dest->af = udest->af; dest->protocol = svc->protocol; dest->vaddr = svc->addr; dest->vport = svc->port; dest->vfwmark = svc->fwmark; - ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr); + ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr); dest->port = udest->port; atomic_set(&dest->activeconns, 0); @@ -928,7 +934,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) return -ERANGE; } - ip_vs_addr_copy(svc->af, &daddr, &udest->addr); + ip_vs_addr_copy(udest->af, &daddr, &udest->addr); /* We use function that requires RCU lock */ rcu_read_lock(); @@ -949,7 +955,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) if (dest != NULL) { IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, " "dest->refcnt=%d, service %u/%s:%u\n", - IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport), + IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport), atomic_read(&dest->refcnt), dest->vfwmark, IP_VS_DBG_ADDR(svc->af, &dest->vaddr), @@ -992,7 +998,7 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) return -ERANGE; } - ip_vs_addr_copy(svc->af, &daddr, &udest->addr); + ip_vs_addr_copy(udest->af, &daddr, &udest->addr); /* We use function that requires RCU lock */ rcu_read_lock(); @@ -2244,6 +2250,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, udest->weight = udest_compat->weight; udest->u_threshold = udest_compat->u_threshold; udest->l_threshold = udest_compat->l_threshold; + udest->af = AF_INET; } static int @@ -2480,6 +2487,12 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, if (count >= get->num_dests) break; + /* Cannot expose heterogeneous members via sockopt + * interface + */ + if (dest->af != svc->af) + continue; + entry.addr = dest->addr.ip; entry.port = dest->port; entry.conn_flags = atomic_read(&dest->conn_flags); @@ -2777,6 +2790,7 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 }, [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 }, [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED }, + [IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 }, }; static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, @@ -3032,7 +3046,8 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS, atomic_read(&dest->inactconns)) || nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS, - atomic_read(&dest->persistconns))) + atomic_read(&dest->persistconns)) || + nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af)) goto nla_put_failure; if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats)) goto nla_put_failure; @@ -3113,6 +3128,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, { struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1]; struct nlattr *nla_addr, *nla_port; + struct nlattr *nla_addr_family; /* Parse mandatory identifying destination fields first */ if (nla == NULL || @@ -3121,6 +3137,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, nla_addr = attrs[IPVS_DEST_ATTR_ADDR]; nla_port = attrs[IPVS_DEST_ATTR_PORT]; + nla_addr_family = attrs[IPVS_DEST_ATTR_ADDR_FAMILY]; if (!(nla_addr && nla_port)) return -EINVAL; @@ -3130,6 +3147,11 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); udest->port = nla_get_be16(nla_port); + if (nla_addr_family) + udest->af = nla_get_u16(nla_addr_family); + else + udest->af = 0; + /* If a full entry was requested, check for the additional fields */ if (full_entry) { struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh, @@ -3357,6 +3379,15 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) need_full_dest); if (ret) goto out; + + /* Old protocols did not allow the user to specify address + * family, so we set it to zero instead. We also didn't + * allow heterogeneous pools in the old code, so it's safe + * to assume that this will have the same address family as + * the service. + */ + if (udest.af == 0) + udest.af = svc->af; } switch (cmd) { -- cgit v1.2.3 From 84d7fce693884897c6196cc98228a2ad56ae2a9a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 4 Sep 2014 14:30:22 +0200 Subject: netfilter: nf_tables: export rule-set generation ID This patch exposes the ruleset generation ID in three ways: 1) The new command NFT_MSG_GETGEN that exposes the 32-bits ruleset generation ID. This ID is incremented in every commit and it should be large enough to avoid wraparound problems. 2) The less significant 16-bits of the generation ID are exposed through the nfgenmsg->res_id header field. This allows us to quickly catch if the ruleset has change between two consecutive list dumps from different object lists (in this specific case I think the risk of wraparound is unlikely). 3) Userspace subscribers may receive notifications of new rule-set generation after every commit. This also provides an alternative way to monitor the generation ID. If the events are lost, the userspace process hits a overrun error, so it knows that it is working with a stale ruleset anyway. Patrick spotted that rule-set transformations in userspace may take quite some time. In that case, it annotates the 32-bits generation ID before fetching the rule-set, then: 1) it compares it to what we obtain after the transformation to make sure it is not working with a stale rule-set and no wraparound has ocurred. 2) it subscribes to ruleset notifications, so it can watch for new generation ID. This is complementary to the NLM_F_DUMP_INTR approach, which allows us to detect an interference in the middle one single list dumping. There is no way to explicitly check that an interference has occurred between two list dumps from the kernel, since it doesn't know how many lists the userspace client is actually going to dump. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 16 ++++ net/netfilter/nf_tables_api.c | 140 +++++++++++++++++++++++++------ 2 files changed, 130 insertions(+), 26 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 66d66dd3ff79..b72ccfeaf865 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -51,6 +51,8 @@ enum nft_verdicts { * @NFT_MSG_NEWSETELEM: create a new set element (enum nft_set_elem_attributes) * @NFT_MSG_GETSETELEM: get a set element (enum nft_set_elem_attributes) * @NFT_MSG_DELSETELEM: delete a set element (enum nft_set_elem_attributes) + * @NFT_MSG_NEWGEN: announce a new generation, only for events (enum nft_gen_attributes) + * @NFT_MSG_GETGEN: get the rule-set generation (enum nft_gen_attributes) */ enum nf_tables_msg_types { NFT_MSG_NEWTABLE, @@ -68,6 +70,8 @@ enum nf_tables_msg_types { NFT_MSG_NEWSETELEM, NFT_MSG_GETSETELEM, NFT_MSG_DELSETELEM, + NFT_MSG_NEWGEN, + NFT_MSG_GETGEN, NFT_MSG_MAX, }; @@ -812,4 +816,16 @@ enum nft_masq_attributes { }; #define NFTA_MASQ_MAX (__NFTA_MASQ_MAX - 1) +/** + * enum nft_gen_attributes - nf_tables ruleset generation attributes + * + * @NFTA_GEN_ID: Ruleset generation ID (NLA_U32) + */ +enum nft_gen_attributes { + NFTA_GEN_UNSPEC, + NFTA_GEN_ID, + __NFTA_GEN_MAX +}; +#define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1) + #endif /* _LINUX_NF_TABLES_H */ diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 82374601577e..a476b9962155 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -405,9 +405,9 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { [NFTA_TABLE_FLAGS] = { .type = NLA_U32 }, }; -static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq, - int event, u32 flags, int family, - const struct nft_table *table) +static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, + u32 portid, u32 seq, int event, u32 flags, + int family, const struct nft_table *table) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; @@ -420,7 +420,7 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq, nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = family; nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; + nfmsg->res_id = htons(net->nft.base_seq & 0xffff); if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) || @@ -448,8 +448,8 @@ static int nf_tables_table_notify(const struct nft_ctx *ctx, int event) if (skb == NULL) goto err; - err = nf_tables_fill_table_info(skb, ctx->portid, ctx->seq, event, 0, - ctx->afi->family, ctx->table); + err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq, + event, 0, ctx->afi->family, ctx->table); if (err < 0) { kfree_skb(skb); goto err; @@ -488,7 +488,7 @@ static int nf_tables_dump_tables(struct sk_buff *skb, if (idx > s_idx) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); - if (nf_tables_fill_table_info(skb, + if (nf_tables_fill_table_info(skb, net, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFT_MSG_NEWTABLE, @@ -540,7 +540,7 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, if (!skb2) return -ENOMEM; - err = nf_tables_fill_table_info(skb2, NETLINK_CB(skb).portid, + err = nf_tables_fill_table_info(skb2, net, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0, family, table); if (err < 0) @@ -914,9 +914,9 @@ nla_put_failure: return -ENOSPC; } -static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, - int event, u32 flags, int family, - const struct nft_table *table, +static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, + u32 portid, u32 seq, int event, u32 flags, + int family, const struct nft_table *table, const struct nft_chain *chain) { struct nlmsghdr *nlh; @@ -930,7 +930,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = family; nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; + nfmsg->res_id = htons(net->nft.base_seq & 0xffff); if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name)) goto nla_put_failure; @@ -988,8 +988,8 @@ static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event) if (skb == NULL) goto err; - err = nf_tables_fill_chain_info(skb, ctx->portid, ctx->seq, event, 0, - ctx->afi->family, ctx->table, + err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq, + event, 0, ctx->afi->family, ctx->table, ctx->chain); if (err < 0) { kfree_skb(skb); @@ -1031,7 +1031,8 @@ static int nf_tables_dump_chains(struct sk_buff *skb, if (idx > s_idx) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); - if (nf_tables_fill_chain_info(skb, NETLINK_CB(cb->skb).portid, + if (nf_tables_fill_chain_info(skb, net, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, NLM_F_MULTI, @@ -1090,7 +1091,7 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, if (!skb2) return -ENOMEM; - err = nf_tables_fill_chain_info(skb2, NETLINK_CB(skb).portid, + err = nf_tables_fill_chain_info(skb2, net, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0, family, table, chain); if (err < 0) @@ -1647,8 +1648,9 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { .len = NFT_USERDATA_MAXLEN }, }; -static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, - int event, u32 flags, int family, +static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, + u32 portid, u32 seq, int event, + u32 flags, int family, const struct nft_table *table, const struct nft_chain *chain, const struct nft_rule *rule) @@ -1668,7 +1670,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = family; nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; + nfmsg->res_id = htons(net->nft.base_seq & 0xffff); if (nla_put_string(skb, NFTA_RULE_TABLE, table->name)) goto nla_put_failure; @@ -1724,8 +1726,8 @@ static int nf_tables_rule_notify(const struct nft_ctx *ctx, if (skb == NULL) goto err; - err = nf_tables_fill_rule_info(skb, ctx->portid, ctx->seq, event, 0, - ctx->afi->family, ctx->table, + err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq, + event, 0, ctx->afi->family, ctx->table, ctx->chain, rule); if (err < 0) { kfree_skb(skb); @@ -1771,7 +1773,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, if (idx > s_idx) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); - if (nf_tables_fill_rule_info(skb, NETLINK_CB(cb->skb).portid, + if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFT_MSG_NEWRULE, NLM_F_MULTI | NLM_F_APPEND, @@ -1837,7 +1839,7 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, if (!skb2) return -ENOMEM; - err = nf_tables_fill_rule_info(skb2, NETLINK_CB(skb).portid, + err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0, family, table, chain, rule); if (err < 0) @@ -2321,7 +2323,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = ctx->afi->family; nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; + nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff); if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name)) goto nla_put_failure; @@ -2925,7 +2927,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = ctx.afi->family; nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; + nfmsg->res_id = htons(ctx.net->nft.base_seq & 0xffff); if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name)) goto nla_put_failure; @@ -3006,7 +3008,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = ctx->afi->family; nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; + nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff); if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name)) goto nla_put_failure; @@ -3293,6 +3295,87 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb, return err; } +static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, + u32 portid, u32 seq) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_NEWGEN; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = AF_UNSPEC; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = htons(net->nft.base_seq & 0xffff); + + if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq))) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -EMSGSIZE; +} + +static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event) +{ + struct nlmsghdr *nlh = nlmsg_hdr(skb); + struct sk_buff *skb2; + int err; + + if (nlmsg_report(nlh) && + !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb2 = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb2 == NULL) + goto err; + + err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid, + nlh->nlmsg_seq); + if (err < 0) { + kfree_skb(skb2); + goto err; + } + + err = nfnetlink_send(skb2, net, NETLINK_CB(skb).portid, + NFNLGRP_NFTABLES, nlmsg_report(nlh), GFP_KERNEL); +err: + if (err < 0) { + nfnetlink_set_err(net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES, + err); + } + return err; +} + +static int nf_tables_getgen(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + struct net *net = sock_net(skb->sk); + struct sk_buff *skb2; + int err; + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb2 == NULL) + return -ENOMEM; + + err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid, + nlh->nlmsg_seq); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); +err: + kfree_skb(skb2); + return err; +} + static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { [NFT_MSG_NEWTABLE] = { .call_batch = nf_tables_newtable, @@ -3369,6 +3452,9 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, }, + [NFT_MSG_GETGEN] = { + .call = nf_tables_getgen, + }, }; static void nft_chain_commit_update(struct nft_trans *trans) @@ -3526,6 +3612,8 @@ static int nf_tables_commit(struct sk_buff *skb) call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu); } + nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); + return 0; } -- cgit v1.2.3