diff options
author | J. Bruce Fields <bfields@redhat.com> | 2012-10-10 02:35:22 +0400 |
---|---|---|
committer | J. Bruce Fields <bfields@redhat.com> | 2012-10-10 02:35:22 +0400 |
commit | f474af7051212b4efc8267583fad9c4ebf33ccff (patch) | |
tree | 1aa46ebc8065a341f247c2a2d9af2f624ad1d4f8 /net/ipv4/tcp_metrics.c | |
parent | 0d22f68f02c10d5d10ec5712917e5828b001a822 (diff) | |
parent | e3dd9a52cb5552c46c2a4ca7ccdfb4dab5c72457 (diff) | |
download | linux-f474af7051212b4efc8267583fad9c4ebf33ccff.tar.xz |
nfs: disintegrate UAPI for nfs
This is to complete part of the Userspace API (UAPI) disintegration for which
the preparatory patches were pulled recently. After these patches, userspace
headers will be segregated into:
include/uapi/linux/.../foo.h
for the userspace interface stuff, and:
include/linux/.../foo.h
for the strictly kernel internal stuff.
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Diffstat (limited to 'net/ipv4/tcp_metrics.c')
-rw-r--r-- | net/ipv4/tcp_metrics.c | 354 |
1 files changed, 341 insertions, 13 deletions
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 0abe67bb4d3a..4c752a6e0bcd 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -8,6 +8,7 @@ #include <linux/init.h> #include <linux/tcp.h> #include <linux/hash.h> +#include <linux/tcp_metrics.h> #include <net/inet_connection_sock.h> #include <net/net_namespace.h> @@ -17,20 +18,10 @@ #include <net/ipv6.h> #include <net/dst.h> #include <net/tcp.h> +#include <net/genetlink.h> int sysctl_tcp_nometrics_save __read_mostly; -enum tcp_metric_index { - TCP_METRIC_RTT, - TCP_METRIC_RTTVAR, - TCP_METRIC_SSTHRESH, - TCP_METRIC_CWND, - TCP_METRIC_REORDERING, - - /* Always last. */ - TCP_METRIC_MAX, -}; - struct tcp_fastopen_metrics { u16 mss; u16 syn_loss:10; /* Recurring Fast Open SYN losses */ @@ -45,8 +36,10 @@ struct tcp_metrics_block { u32 tcpm_ts; u32 tcpm_ts_stamp; u32 tcpm_lock; - u32 tcpm_vals[TCP_METRIC_MAX]; + u32 tcpm_vals[TCP_METRIC_MAX + 1]; struct tcp_fastopen_metrics tcpm_fastopen; + + struct rcu_head rcu_head; }; static bool tcp_metric_locked(struct tcp_metrics_block *tm, @@ -690,6 +683,325 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss, rcu_read_unlock(); } +static struct genl_family tcp_metrics_nl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = TCP_METRICS_GENL_NAME, + .version = TCP_METRICS_GENL_VERSION, + .maxattr = TCP_METRICS_ATTR_MAX, + .netnsok = true, +}; + +static struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = { + [TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, }, + [TCP_METRICS_ATTR_ADDR_IPV6] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr), }, + /* Following attributes are not received for GET/DEL, + * we keep them for reference + */ +#if 0 + [TCP_METRICS_ATTR_AGE] = { .type = NLA_MSECS, }, + [TCP_METRICS_ATTR_TW_TSVAL] = { .type = NLA_U32, }, + [TCP_METRICS_ATTR_TW_TS_STAMP] = { .type = NLA_S32, }, + [TCP_METRICS_ATTR_VALS] = { .type = NLA_NESTED, }, + [TCP_METRICS_ATTR_FOPEN_MSS] = { .type = NLA_U16, }, + [TCP_METRICS_ATTR_FOPEN_SYN_DROPS] = { .type = NLA_U16, }, + [TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS] = { .type = NLA_MSECS, }, + [TCP_METRICS_ATTR_FOPEN_COOKIE] = { .type = NLA_BINARY, + .len = TCP_FASTOPEN_COOKIE_MAX, }, +#endif +}; + +/* Add attributes, caller cancels its header on failure */ +static int tcp_metrics_fill_info(struct sk_buff *msg, + struct tcp_metrics_block *tm) +{ + struct nlattr *nest; + int i; + + switch (tm->tcpm_addr.family) { + case AF_INET: + if (nla_put_be32(msg, TCP_METRICS_ATTR_ADDR_IPV4, + tm->tcpm_addr.addr.a4) < 0) + goto nla_put_failure; + break; + case AF_INET6: + if (nla_put(msg, TCP_METRICS_ATTR_ADDR_IPV6, 16, + tm->tcpm_addr.addr.a6) < 0) + goto nla_put_failure; + break; + default: + return -EAFNOSUPPORT; + } + + if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE, + jiffies - tm->tcpm_stamp) < 0) + goto nla_put_failure; + if (tm->tcpm_ts_stamp) { + if (nla_put_s32(msg, TCP_METRICS_ATTR_TW_TS_STAMP, + (s32) (get_seconds() - tm->tcpm_ts_stamp)) < 0) + goto nla_put_failure; + if (nla_put_u32(msg, TCP_METRICS_ATTR_TW_TSVAL, + tm->tcpm_ts) < 0) + goto nla_put_failure; + } + + { + int n = 0; + + nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS); + if (!nest) + goto nla_put_failure; + for (i = 0; i < TCP_METRIC_MAX + 1; i++) { + if (!tm->tcpm_vals[i]) + continue; + if (nla_put_u32(msg, i + 1, tm->tcpm_vals[i]) < 0) + goto nla_put_failure; + n++; + } + if (n) + nla_nest_end(msg, nest); + else + nla_nest_cancel(msg, nest); + } + + { + struct tcp_fastopen_metrics tfom_copy[1], *tfom; + unsigned int seq; + + do { + seq = read_seqbegin(&fastopen_seqlock); + tfom_copy[0] = tm->tcpm_fastopen; + } while (read_seqretry(&fastopen_seqlock, seq)); + + tfom = tfom_copy; + if (tfom->mss && + nla_put_u16(msg, TCP_METRICS_ATTR_FOPEN_MSS, + tfom->mss) < 0) + goto nla_put_failure; + if (tfom->syn_loss && + (nla_put_u16(msg, TCP_METRICS_ATTR_FOPEN_SYN_DROPS, + tfom->syn_loss) < 0 || + nla_put_msecs(msg, TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS, + jiffies - tfom->last_syn_loss) < 0)) + goto nla_put_failure; + if (tfom->cookie.len > 0 && + nla_put(msg, TCP_METRICS_ATTR_FOPEN_COOKIE, + tfom->cookie.len, tfom->cookie.val) < 0) + goto nla_put_failure; + } + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int tcp_metrics_dump_info(struct sk_buff *skb, + struct netlink_callback *cb, + struct tcp_metrics_block *tm) +{ + void *hdr; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &tcp_metrics_nl_family, NLM_F_MULTI, + TCP_METRICS_CMD_GET); + if (!hdr) + return -EMSGSIZE; + + if (tcp_metrics_fill_info(skb, tm) < 0) + goto nla_put_failure; + + return genlmsg_end(skb, hdr); + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int tcp_metrics_nl_dump(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log; + unsigned int row, s_row = cb->args[0]; + int s_col = cb->args[1], col = s_col; + + for (row = s_row; row < max_rows; row++, s_col = 0) { + struct tcp_metrics_block *tm; + struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash + row; + + rcu_read_lock(); + for (col = 0, tm = rcu_dereference(hb->chain); tm; + tm = rcu_dereference(tm->tcpm_next), col++) { + if (col < s_col) + continue; + if (tcp_metrics_dump_info(skb, cb, tm) < 0) { + rcu_read_unlock(); + goto done; + } + } + rcu_read_unlock(); + } + +done: + cb->args[0] = row; + cb->args[1] = col; + return skb->len; +} + +static int parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, + unsigned int *hash, int optional) +{ + struct nlattr *a; + + a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV4]; + if (a) { + addr->family = AF_INET; + addr->addr.a4 = nla_get_be32(a); + *hash = (__force unsigned int) addr->addr.a4; + return 0; + } + a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV6]; + if (a) { + if (nla_len(a) != sizeof(sizeof(struct in6_addr))) + return -EINVAL; + addr->family = AF_INET6; + memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6)); + *hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6); + return 0; + } + return optional ? 1 : -EAFNOSUPPORT; +} + +static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info) +{ + struct tcp_metrics_block *tm; + struct inetpeer_addr addr; + unsigned int hash; + struct sk_buff *msg; + struct net *net = genl_info_net(info); + void *reply; + int ret; + + ret = parse_nl_addr(info, &addr, &hash, 0); + if (ret < 0) + return ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + reply = genlmsg_put_reply(msg, info, &tcp_metrics_nl_family, 0, + info->genlhdr->cmd); + if (!reply) + goto nla_put_failure; + + hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); + ret = -ESRCH; + rcu_read_lock(); + for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; + tm = rcu_dereference(tm->tcpm_next)) { + if (addr_same(&tm->tcpm_addr, &addr)) { + ret = tcp_metrics_fill_info(msg, tm); + break; + } + } + rcu_read_unlock(); + if (ret < 0) + goto out_free; + + genlmsg_end(msg, reply); + return genlmsg_reply(msg, info); + +nla_put_failure: + ret = -EMSGSIZE; + +out_free: + nlmsg_free(msg); + return ret; +} + +#define deref_locked_genl(p) \ + rcu_dereference_protected(p, lockdep_genl_is_held() && \ + lockdep_is_held(&tcp_metrics_lock)) + +#define deref_genl(p) rcu_dereference_protected(p, lockdep_genl_is_held()) + +static int tcp_metrics_flush_all(struct net *net) +{ + unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log; + struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash; + struct tcp_metrics_block *tm; + unsigned int row; + + for (row = 0; row < max_rows; row++, hb++) { + spin_lock_bh(&tcp_metrics_lock); + tm = deref_locked_genl(hb->chain); + if (tm) + hb->chain = NULL; + spin_unlock_bh(&tcp_metrics_lock); + while (tm) { + struct tcp_metrics_block *next; + + next = deref_genl(tm->tcpm_next); + kfree_rcu(tm, rcu_head); + tm = next; + } + } + return 0; +} + +static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) +{ + struct tcpm_hash_bucket *hb; + struct tcp_metrics_block *tm; + struct tcp_metrics_block __rcu **pp; + struct inetpeer_addr addr; + unsigned int hash; + struct net *net = genl_info_net(info); + int ret; + + ret = parse_nl_addr(info, &addr, &hash, 1); + if (ret < 0) + return ret; + if (ret > 0) + return tcp_metrics_flush_all(net); + + hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); + hb = net->ipv4.tcp_metrics_hash + hash; + pp = &hb->chain; + spin_lock_bh(&tcp_metrics_lock); + for (tm = deref_locked_genl(*pp); tm; + pp = &tm->tcpm_next, tm = deref_locked_genl(*pp)) { + if (addr_same(&tm->tcpm_addr, &addr)) { + *pp = tm->tcpm_next; + break; + } + } + spin_unlock_bh(&tcp_metrics_lock); + if (!tm) + return -ESRCH; + kfree_rcu(tm, rcu_head); + return 0; +} + +static struct genl_ops tcp_metrics_nl_ops[] = { + { + .cmd = TCP_METRICS_CMD_GET, + .doit = tcp_metrics_nl_cmd_get, + .dumpit = tcp_metrics_nl_dump, + .policy = tcp_metrics_nl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = TCP_METRICS_CMD_DEL, + .doit = tcp_metrics_nl_cmd_del, + .policy = tcp_metrics_nl_policy, + .flags = GENL_ADMIN_PERM, + }, +}; + static unsigned int tcpmhash_entries; static int __init set_tcpmhash_entries(char *str) { @@ -753,5 +1065,21 @@ static __net_initdata struct pernet_operations tcp_net_metrics_ops = { void __init tcp_metrics_init(void) { - register_pernet_subsys(&tcp_net_metrics_ops); + int ret; + + ret = register_pernet_subsys(&tcp_net_metrics_ops); + if (ret < 0) + goto cleanup; + ret = genl_register_family_with_ops(&tcp_metrics_nl_family, + tcp_metrics_nl_ops, + ARRAY_SIZE(tcp_metrics_nl_ops)); + if (ret < 0) + goto cleanup_subsys; + return; + +cleanup_subsys: + unregister_pernet_subsys(&tcp_net_metrics_ops); + +cleanup: + return; } |