From 408f22e81ea2fcf96c80e85ee12d20ef5148bf7c Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Sat, 16 Jun 2007 14:03:45 -0400 Subject: SCTP: update sctp_getsockopt helpers to allow oversized buffers I noted the other day while looking at a bug that was ostensibly in some perl networking library, that we strictly avoid allowing getsockopt operations to complete if we pass in oversized buffers. This seems to make libraries like Perl::NET malfunction since it seems to allocate oversized buffers for use in several operations. It also seems to be out of line with the way udp, tcp and ip getsockopt routines handle buffer input (since the *optlen pointer in both an input and an output and gets set to the length of the data that we copy into the buffer). This patch brings our getsockopt helpers into line with other protocols, and allows us to accept oversized buffers for our getsockopt operations. Tested by me with good results. Signed-off-by: Neil Horman Acked-by: Sridhar Samudrala Signed-off-by: Vlad Yasevich --- net/sctp/socket.c | 108 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 74 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6edaaa009d62..c1f239ac12b9 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3375,12 +3375,13 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, sctp_assoc_t associd; int retval = 0; - if (len != sizeof(status)) { + if (len < sizeof(status)) { retval = -EINVAL; goto out; } - if (copy_from_user(&status, optval, sizeof(status))) { + len = sizeof(status); + if (copy_from_user(&status, optval, len)) { retval = -EFAULT; goto out; } @@ -3452,12 +3453,13 @@ static int sctp_getsockopt_peer_addr_info(struct sock *sk, int len, struct sctp_transport *transport; int retval = 0; - if (len != sizeof(pinfo)) { + if (len < sizeof(pinfo)) { retval = -EINVAL; goto out; } - if (copy_from_user(&pinfo, optval, sizeof(pinfo))) { + len = sizeof(pinfo); + if (copy_from_user(&pinfo, optval, len)) { retval = -EFAULT; goto out; } @@ -3523,8 +3525,11 @@ static int sctp_getsockopt_disable_fragments(struct sock *sk, int len, static int sctp_getsockopt_events(struct sock *sk, int len, char __user *optval, int __user *optlen) { - if (len != sizeof(struct sctp_event_subscribe)) + if (len < sizeof(struct sctp_event_subscribe)) return -EINVAL; + len = sizeof(struct sctp_event_subscribe); + if (put_user(len, optlen)) + return -EFAULT; if (copy_to_user(optval, &sctp_sk(sk)->subscribe, len)) return -EFAULT; return 0; @@ -3546,9 +3551,12 @@ static int sctp_getsockopt_autoclose(struct sock *sk, int len, char __user *optv /* Applicable to UDP-style socket only */ if (sctp_style(sk, TCP)) return -EOPNOTSUPP; - if (len != sizeof(int)) + if (len < sizeof(int)) return -EINVAL; - if (copy_to_user(optval, &sctp_sk(sk)->autoclose, len)) + len = sizeof(int); + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &sctp_sk(sk)->autoclose, sizeof(int))) return -EFAULT; return 0; } @@ -3599,8 +3607,9 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval int retval = 0; struct sctp_association *asoc; - if (len != sizeof(sctp_peeloff_arg_t)) + if (len < sizeof(sctp_peeloff_arg_t)) return -EINVAL; + len = sizeof(sctp_peeloff_arg_t); if (copy_from_user(&peeloff, optval, len)) return -EFAULT; @@ -3628,6 +3637,8 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval /* Return the fd mapped to the new socket. */ peeloff.sd = retval; + if (put_user(len, optlen)) + return -EFAULT; if (copy_to_user(optval, &peeloff, len)) retval = -EFAULT; @@ -3736,9 +3747,9 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, struct sctp_association *asoc = NULL; struct sctp_sock *sp = sctp_sk(sk); - if (len != sizeof(struct sctp_paddrparams)) + if (len < sizeof(struct sctp_paddrparams)) return -EINVAL; - + len = sizeof(struct sctp_paddrparams); if (copy_from_user(¶ms, optval, len)) return -EFAULT; @@ -3837,9 +3848,11 @@ static int sctp_getsockopt_delayed_ack_time(struct sock *sk, int len, struct sctp_association *asoc = NULL; struct sctp_sock *sp = sctp_sk(sk); - if (len != sizeof(struct sctp_assoc_value)) + if (len < sizeof(struct sctp_assoc_value)) return - EINVAL; + len = sizeof(struct sctp_assoc_value); + if (copy_from_user(¶ms, optval, len)) return -EFAULT; @@ -3888,8 +3901,11 @@ static int sctp_getsockopt_delayed_ack_time(struct sock *sk, int len, */ static int sctp_getsockopt_initmsg(struct sock *sk, int len, char __user *optval, int __user *optlen) { - if (len != sizeof(struct sctp_initmsg)) + if (len < sizeof(struct sctp_initmsg)) return -EINVAL; + len = sizeof(struct sctp_initmsg); + if (put_user(len, optlen)) + return -EFAULT; if (copy_to_user(optval, &sctp_sk(sk)->initmsg, len)) return -EFAULT; return 0; @@ -3904,7 +3920,7 @@ static int sctp_getsockopt_peer_addrs_num_old(struct sock *sk, int len, struct list_head *pos; int cnt = 0; - if (len != sizeof(sctp_assoc_t)) + if (len < sizeof(sctp_assoc_t)) return -EINVAL; if (copy_from_user(&id, optval, sizeof(sctp_assoc_t))) @@ -3940,10 +3956,12 @@ static int sctp_getsockopt_peer_addrs_old(struct sock *sk, int len, struct sctp_sock *sp = sctp_sk(sk); int addrlen; - if (len != sizeof(struct sctp_getaddrs_old)) + if (len < sizeof(struct sctp_getaddrs_old)) return -EINVAL; - if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs_old))) + len = sizeof(struct sctp_getaddrs_old); + + if (copy_from_user(&getaddrs, optval, len)) return -EFAULT; if (getaddrs.addr_num <= 0) return -EINVAL; @@ -3966,7 +3984,9 @@ static int sctp_getsockopt_peer_addrs_old(struct sock *sk, int len, if (cnt >= getaddrs.addr_num) break; } getaddrs.addr_num = cnt; - if (copy_to_user(optval, &getaddrs, sizeof(struct sctp_getaddrs_old))) + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &getaddrs, len)) return -EFAULT; return 0; @@ -4037,7 +4057,7 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, rwlock_t *addr_lock; int cnt = 0; - if (len != sizeof(sctp_assoc_t)) + if (len < sizeof(sctp_assoc_t)) return -EINVAL; if (copy_from_user(&id, optval, sizeof(sctp_assoc_t))) @@ -4179,10 +4199,11 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, void *buf; int bytes_copied = 0; - if (len != sizeof(struct sctp_getaddrs_old)) + if (len < sizeof(struct sctp_getaddrs_old)) return -EINVAL; - if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs_old))) + len = sizeof(struct sctp_getaddrs_old); + if (copy_from_user(&getaddrs, optval, len)) return -EFAULT; if (getaddrs.addr_num <= 0) return -EINVAL; @@ -4254,7 +4275,7 @@ copy_getaddrs: /* copy the leading structure back to user */ getaddrs.addr_num = cnt; - if (copy_to_user(optval, &getaddrs, sizeof(struct sctp_getaddrs_old))) + if (copy_to_user(optval, &getaddrs, len)) err = -EFAULT; error: @@ -4282,7 +4303,7 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, void *addrs; void *buf; - if (len <= sizeof(struct sctp_getaddrs)) + if (len < sizeof(struct sctp_getaddrs)) return -EINVAL; if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs))) @@ -4379,10 +4400,12 @@ static int sctp_getsockopt_primary_addr(struct sock *sk, int len, struct sctp_association *asoc; struct sctp_sock *sp = sctp_sk(sk); - if (len != sizeof(struct sctp_prim)) + if (len < sizeof(struct sctp_prim)) return -EINVAL; - if (copy_from_user(&prim, optval, sizeof(struct sctp_prim))) + len = sizeof(struct sctp_prim); + + if (copy_from_user(&prim, optval, len)) return -EFAULT; asoc = sctp_id2assoc(sk, prim.ssp_assoc_id); @@ -4398,7 +4421,9 @@ static int sctp_getsockopt_primary_addr(struct sock *sk, int len, sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, (union sctp_addr *)&prim.ssp_addr); - if (copy_to_user(optval, &prim, sizeof(struct sctp_prim))) + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &prim, len)) return -EFAULT; return 0; @@ -4415,10 +4440,15 @@ static int sctp_getsockopt_adaptation_layer(struct sock *sk, int len, { struct sctp_setadaptation adaptation; - if (len != sizeof(struct sctp_setadaptation)) + if (len < sizeof(struct sctp_setadaptation)) return -EINVAL; + len = sizeof(struct sctp_setadaptation); + adaptation.ssb_adaptation_ind = sctp_sk(sk)->adaptation_ind; + + if (put_user(len, optlen)) + return -EFAULT; if (copy_to_user(optval, &adaptation, len)) return -EFAULT; @@ -4452,9 +4482,12 @@ static int sctp_getsockopt_default_send_param(struct sock *sk, struct sctp_association *asoc; struct sctp_sock *sp = sctp_sk(sk); - if (len != sizeof(struct sctp_sndrcvinfo)) + if (len < sizeof(struct sctp_sndrcvinfo)) return -EINVAL; - if (copy_from_user(&info, optval, sizeof(struct sctp_sndrcvinfo))) + + len = sizeof(struct sctp_sndrcvinfo); + + if (copy_from_user(&info, optval, len)) return -EFAULT; asoc = sctp_id2assoc(sk, info.sinfo_assoc_id); @@ -4475,7 +4508,9 @@ static int sctp_getsockopt_default_send_param(struct sock *sk, info.sinfo_timetolive = sp->default_timetolive; } - if (copy_to_user(optval, &info, sizeof(struct sctp_sndrcvinfo))) + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &info, len)) return -EFAULT; return 0; @@ -4526,10 +4561,12 @@ static int sctp_getsockopt_rtoinfo(struct sock *sk, int len, struct sctp_rtoinfo rtoinfo; struct sctp_association *asoc; - if (len != sizeof (struct sctp_rtoinfo)) + if (len < sizeof (struct sctp_rtoinfo)) return -EINVAL; - if (copy_from_user(&rtoinfo, optval, sizeof (struct sctp_rtoinfo))) + len = sizeof(struct sctp_rtoinfo); + + if (copy_from_user(&rtoinfo, optval, len)) return -EFAULT; asoc = sctp_id2assoc(sk, rtoinfo.srto_assoc_id); @@ -4581,11 +4618,12 @@ static int sctp_getsockopt_associnfo(struct sock *sk, int len, struct list_head *pos; int cnt = 0; - if (len != sizeof (struct sctp_assocparams)) + if (len < sizeof (struct sctp_assocparams)) return -EINVAL; - if (copy_from_user(&assocparams, optval, - sizeof (struct sctp_assocparams))) + len = sizeof(struct sctp_assocparams); + + if (copy_from_user(&assocparams, optval, len)) return -EFAULT; asoc = sctp_id2assoc(sk, assocparams.sasoc_assoc_id); @@ -4671,9 +4709,11 @@ static int sctp_getsockopt_context(struct sock *sk, int len, struct sctp_sock *sp; struct sctp_association *asoc; - if (len != sizeof(struct sctp_assoc_value)) + if (len < sizeof(struct sctp_assoc_value)) return -EINVAL; + len = sizeof(struct sctp_assoc_value); + if (copy_from_user(¶ms, optval, len)) return -EFAULT; -- cgit v1.2.3 From 186e234358ba29a4094d0c8c0d3ea00f84d32a3e Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 18 Jun 2007 19:59:16 -0400 Subject: SCTP: Fix sctp_getsockopt_get_peer_addrs This is the split out of the patch that we agreed I should split out from my last patch. It changes space_left to be computed in the same way the to variable is. I know we talked about changing space_left to an int, but I think size_t is more appropriate, since we should never have negative space in our buffer, and computing using offsetof means space_left should now never drop below zero. Signed-off-by: Neil Horman Acked-by: Sridhar Samudrala Signed-off-by: Vlad Yasevich --- net/sctp/socket.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index c1f239ac12b9..2fc036699d48 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4019,8 +4019,7 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, return -EINVAL; to = optval + offsetof(struct sctp_getaddrs,addrs); - space_left = len - sizeof(struct sctp_getaddrs) - - offsetof(struct sctp_getaddrs,addrs); + space_left = len - offsetof(struct sctp_getaddrs,addrs); list_for_each(pos, &asoc->peer.transport_addr_list) { from = list_entry(pos, struct sctp_transport, transports); @@ -4327,8 +4326,8 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, } to = optval + offsetof(struct sctp_getaddrs,addrs); - space_left = len - sizeof(struct sctp_getaddrs) - - offsetof(struct sctp_getaddrs,addrs); + space_left = len - offsetof(struct sctp_getaddrs,addrs); + addrs = kmalloc(space_left, GFP_KERNEL); if (!addrs) return -ENOMEM; -- cgit v1.2.3 From d258131aaea8a3979f82c0313b9a583130b29981 Mon Sep 17 00:00:00 2001 From: Jerome Borsboom Date: Fri, 22 Jun 2007 14:08:17 -0700 Subject: [NETFILTER]: nf_conntrack_sip: add missing message types containing RTP info Signed-off-by: Jerome Borsboom Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_sip.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 7aaa8c91b293..1b5c6c1055f7 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -442,6 +442,9 @@ static int sip_help(struct sk_buff **pskb, /* RTP info only in some SDP pkts */ if (memcmp(dptr, "INVITE", sizeof("INVITE") - 1) != 0 && + memcmp(dptr, "UPDATE", sizeof("UPDATE") - 1) != 0 && + memcmp(dptr, "SIP/2.0 180", sizeof("SIP/2.0 180") - 1) != 0 && + memcmp(dptr, "SIP/2.0 183", sizeof("SIP/2.0 183") - 1) != 0 && memcmp(dptr, "SIP/2.0 200", sizeof("SIP/2.0 200") - 1) != 0) { goto out; } -- cgit v1.2.3 From e2d8e314ad18d4302b3b7ea21ab8b2cb72f2b152 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Fri, 22 Jun 2007 14:10:22 -0700 Subject: [NETFILTER]: nfctnetlink: Don't allow to change helper There is no realistic situation to change helper (Who wants IRC helper to track FTP traffic ?). Moreover, if we want to do that, we need to fix race issue by nfctnetlink and running helper. That will add overhead to packet processing. It wouldn't pay. So this rejects the request to change helper. The requests to add or remove helper are accepted as ever. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_netlink.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 3f73327794ab..d0fe3d769828 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -869,8 +869,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[]) return 0; if (help->helper) - /* we had a helper before ... */ - nf_ct_remove_expectations(ct); + return -EBUSY; /* need to zero data of old helper */ memset(&help->help, 0, sizeof(help->help)); -- cgit v1.2.3 From 6d5b78cdd5a17665674429400b3ed10e3ec60684 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Fri, 22 Jun 2007 16:07:04 -0700 Subject: [IPV6] NDISC: Fix thinko to control Router Preference support. Bug reported by Haruhito Watanabe . Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index d8b36451bada..0358e6066a4e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1062,7 +1062,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) pref = ra_msg->icmph.icmp6_router_pref; /* 10b is handled as if it were 00b (medium) */ if (pref == ICMPV6_ROUTER_PREF_INVALID || - in6_dev->cnf.accept_ra_rtr_pref) + !in6_dev->cnf.accept_ra_rtr_pref) pref = ICMPV6_ROUTER_PREF_MEDIUM; #endif -- cgit v1.2.3 From dbbeb2f9917792b989b6269ebfe24257f9aa1618 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 23 Jun 2007 22:58:34 -0700 Subject: [SKBUFF]: Fix incorrect config #ifdef around skb_copy_secmark secmark doesn't depend on CONFIG_NET_SCHED. Signed-off-by: Patrick McHardy Acked-by: James Morris Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7c6a34e21eee..8d43ae6979e5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -434,8 +434,8 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) n->tc_verd = CLR_TC_MUNGED(n->tc_verd); C(iif); #endif - skb_copy_secmark(n, skb); #endif + skb_copy_secmark(n, skb); C(truesize); atomic_set(&n->users, 1); C(head); -- cgit v1.2.3 From 64beb8f3eb3c724add64ca3272915528e10213c1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 23 Jun 2007 22:59:40 -0700 Subject: [TIPC]: Fix infinite loop in netlink handler The tipc netlink config handler uses the nlmsg_pid from the request header as destination for its reply. If the application initialized nlmsg_pid to 0, the reply is looped back to the kernel, causing hangup. Fix: use nlmsg_pid of the skb that triggered the request. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/tipc/netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 4cdafa2d1d4d..6a7f7b4c2595 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -60,7 +60,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) rep_nlh = nlmsg_hdr(rep_buf); memcpy(rep_nlh, req_nlh, hdr_space); rep_nlh->nlmsg_len = rep_buf->len; - genlmsg_unicast(rep_buf, req_nlh->nlmsg_pid); + genlmsg_unicast(rep_buf, NETLINK_CB(skb).pid); } return 0; -- cgit v1.2.3 From ddb61a57bb6df673986e6476407f97d28b02031f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 23 Jun 2007 23:07:50 -0700 Subject: [TCP] tcp_read_sock: Allow recv_actor() return return negative error value. tcp_read_sock() currently assumes that the recv_actor() only returns number of bytes copied. For network splice receive, we may have to return an error in some cases. So allow the actor to return a negative error value. Signed-off-by: Jens Axboe Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cd3c7e95de9e..450f44bb2c8e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1064,7 +1064,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, break; } used = recv_actor(desc, skb, offset, len); - if (used <= len) { + if (used < 0) { + if (!copied) + copied = used; + break; + } else if (used <= len) { seq += used; copied += used; offset += used; @@ -1086,7 +1090,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, tcp_rcv_space_adjust(sk); /* Clean up data we have read: This will do ACK frames. */ - if (copied) + if (copied > 0) tcp_cleanup_rbuf(sk, copied); return copied; } -- cgit v1.2.3 From 515e06c4556bd8388db6b2bb2cd8859126932946 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Sat, 23 Jun 2007 23:09:23 -0700 Subject: [NET]: Re-enable irqs before pushing pending DMA requests This moves the local_irq_enable() call in net_rx_action() to before calling the CONFIG_NET_DMA's dma_async_memcpy_issue_pending() rather than after. This shortens the irq disabled window and allows for DMA drivers that need to do their own irq hold. Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 26090621ea6b..ee051bb398a0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2009,6 +2009,7 @@ static void net_rx_action(struct softirq_action *h) } } out: + local_irq_enable(); #ifdef CONFIG_NET_DMA /* * There may not be any more sk_buffs coming right now, so push @@ -2022,7 +2023,6 @@ out: rcu_read_unlock(); } #endif - local_irq_enable(); return; softnet_break: -- cgit v1.2.3 From 5b5a60da281c767196427ce8144deae6ec46b389 Mon Sep 17 00:00:00 2001 From: Olaf Kirch Date: Sat, 23 Jun 2007 23:11:52 -0700 Subject: [NET]: Make skb_seq_read unmap the last fragment Having walked through the entire skbuff, skb_seq_read would leave the last fragment mapped. As a consequence, the unwary caller would leak kmaps, and proceed with preempt_count off by one. The only (kind of non-intuitive) workaround is to use skb_seq_read_abort. This patch makes sure skb_seq_read always unmaps frag_data after having cycled through the skb's paged part. Signed-off-by: Olaf Kirch Signed-off-by: David S. Miller --- net/core/skbuff.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8d43ae6979e5..27cfe5fe4bb9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1706,6 +1706,11 @@ next_skb: st->stepped_offset += frag->size; } + if (st->frag_data) { + kunmap_skb_frag(st->frag_data); + st->frag_data = NULL; + } + if (st->cur_skb->next) { st->cur_skb = st->cur_skb->next; st->frag_idx = 0; -- cgit v1.2.3 From 5131a184a3458d9ac47d9eba032cf4c4d3295afd Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Fri, 22 Jun 2007 15:14:46 -0700 Subject: SCTP: lock_sock_nested in sctp_sock_migrate sctp_sock_migrate() grabs the socket lock on a newly allocated socket while holding the socket lock on an old socket. lockdep worries that this might be a recursive lock attempt. task/3026 is trying to acquire lock: (sk_lock-AF_INET){--..}, at: [] sctp_sock_migrate+0x2e3/0x327 [sctp] but task is already holding lock: (sk_lock-AF_INET){--..}, at: [] sctp_accept+0xdf/0x1e3 [sctp] This patch tells lockdep that this locking is safe by using lock_sock_nested(). Signed-off-by: Zach Brown Signed-off-by: Vlad Yasevich --- net/sctp/socket.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 2fc036699d48..67861a8f00cb 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6123,8 +6123,11 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, * queued to the backlog. This prevents a potential race between * backlog processing on the old socket and new-packet processing * on the new socket. + * + * The caller has just allocated newsk so we can guarantee that other + * paths won't try to lock it and then oldsk. */ - sctp_lock_sock(newsk); + lock_sock_nested(newsk, SINGLE_DEPTH_NESTING); sctp_assoc_migrate(assoc, newsk); /* If the association on the newsk is already closed before accept() -- cgit v1.2.3 From 0db3dc73f7a3a73b0dc725b6a991253f5652c905 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 27 Jun 2007 00:39:42 -0700 Subject: [NETPOLL]: tx lock deadlock fix If sky2 device poll routine is called from netpoll_send_skb, it would deadlock. The netpoll_send_skb held the netif_tx_lock, and the poll routine could acquire it to clean up skb's. Other drivers might use same locking model. The driver is correct, netpoll should not introduce more locking problems than it causes already. So change the code to drop lock before calling poll handler. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/netpoll.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 758dafe284c0..f8e74e511ce6 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -250,22 +250,23 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) unsigned long flags; local_irq_save(flags); - if (netif_tx_trylock(dev)) { - /* try until next clock tick */ - for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; - tries > 0; --tries) { + /* try until next clock tick */ + for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; + tries > 0; --tries) { + if (netif_tx_trylock(dev)) { if (!netif_queue_stopped(dev)) status = dev->hard_start_xmit(skb, dev); + netif_tx_unlock(dev); if (status == NETDEV_TX_OK) break; - /* tickle device maybe there is some cleanup */ - netpoll_poll(np); - - udelay(USEC_PER_POLL); } - netif_tx_unlock(dev); + + /* tickle device maybe there is some cleanup */ + netpoll_poll(np); + + udelay(USEC_PER_POLL); } local_irq_restore(flags); } -- cgit v1.2.3 From 17200811cf539b9107a99a39bf71ba3567966285 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Thu, 28 Jun 2007 22:11:47 -0700 Subject: [NETPOLL] netconsole: fix soft lockup when removing module #1 Until kernel ver. 2.6.21 (including) cancel_rearming_delayed_work() required a work function should always (unconditionally) rearm with delay > 0 - otherwise it would endlessly loop. This patch replaces this function with cancel_delayed_work(). Later kernel versions don't require this, so here it's only for uniformity. #2 After deleting a timer in cancel_[rearming_]delayed_work() there could stay a last skb queued in npinfo->txq causing a memory leak after kfree(npinfo). Initial patch & testing by: Jason Wessel Signed-off-by: Jarek Poplawski Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/core/netpoll.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index f8e74e511ce6..cf40ff91ac01 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -72,7 +72,8 @@ static void queue_process(struct work_struct *work) netif_tx_unlock(dev); local_irq_restore(flags); - schedule_delayed_work(&npinfo->tx_work, HZ/10); + if (atomic_read(&npinfo->refcnt)) + schedule_delayed_work(&npinfo->tx_work, HZ/10); return; } netif_tx_unlock(dev); @@ -785,9 +786,15 @@ void netpoll_cleanup(struct netpoll *np) if (atomic_dec_and_test(&npinfo->refcnt)) { skb_queue_purge(&npinfo->arp_tx); skb_queue_purge(&npinfo->txq); - cancel_rearming_delayed_work(&npinfo->tx_work); + cancel_delayed_work(&npinfo->tx_work); flush_scheduled_work(); + /* clean after last, unfinished work */ + if (!skb_queue_empty(&npinfo->txq)) { + struct sk_buff *skb; + skb = __skb_dequeue(&npinfo->txq); + kfree_skb(skb); + } kfree(npinfo); } } -- cgit v1.2.3 From 3663c306609a9322a484fba28b3da66142c50ee9 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 3 Jul 2007 12:43:12 -0400 Subject: SCTP: Fix thinko in sctp_copy_laddrs() Correctly dereference bytes_copied in sctp_copy_laddrs(). I totally must have spaced when doing this. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 67861a8f00cb..1e788279bb22 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4170,7 +4170,7 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, to += addrlen; cnt ++; space_left -= addrlen; - bytes_copied += addrlen; + *bytes_copied += addrlen; } return cnt; -- cgit v1.2.3 From f50f95cab735ebe2993e8d1549f0615bad05f3f2 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 3 Jul 2007 12:47:40 -0400 Subject: SCTP: Check to make sure file is valid before setting timeout In-kernel sockets created with sock_create_kern don't usually have a file and file descriptor allocated to them. As a result, when SCTP tries to check the non-blocking flag, we Oops when dereferencing a NULL file pointer. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1e788279bb22..b1917f68723c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -980,6 +980,7 @@ static int __sctp_connect(struct sock* sk, union sctp_addr *sa_addr; void *addr_buf; unsigned short port; + unsigned int f_flags = 0; sp = sctp_sk(sk); ep = sp->ep; @@ -1106,7 +1107,14 @@ static int __sctp_connect(struct sock* sk, af->to_sk_daddr(&to, sk); sk->sk_err = 0; - timeo = sock_sndtimeo(sk, sk->sk_socket->file->f_flags & O_NONBLOCK); + /* in-kernel sockets don't generally have a file allocated to them + * if all they do is call sock_create_kern(). + */ + if (sk->sk_socket->file) + f_flags = sk->sk_socket->file->f_flags; + + timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK); + err = sctp_wait_for_connect(asoc, &timeo); /* Don't free association on exit. */ -- cgit v1.2.3 From 1669d857a25d62c6d0a6d9216e01c21287a7c844 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 3 Jul 2007 14:29:23 -0400 Subject: SCTP: Add scope_id validation for link-local binds SCTP currently permits users to bind to link-local addresses, but doesn't verify that the scope id specified at bind matches the interface that the address is configured on. It was report that this can hang a system. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 84cd53635fe8..2c29394fd92e 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -844,6 +844,10 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr) dev = dev_get_by_index(addr->v6.sin6_scope_id); if (!dev) return 0; + if (!ipv6_chk_addr(&addr->v6.sin6_addr, dev, 0)) { + dev_put(dev); + return 0; + } dev_put(dev); } af = opt->pf->af; -- cgit v1.2.3 From 2cd052e44329dd2b42eb958f8f346b053de6e2cd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Jul 2007 17:03:09 -0700 Subject: [NET] skbuff: remove export of static symbol skb_clone_fraglist is static so it shouldn't be exported. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/core/skbuff.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 27cfe5fe4bb9..3943c3ad9145 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2211,7 +2211,6 @@ EXPORT_SYMBOL(pskb_copy); EXPORT_SYMBOL(pskb_expand_head); EXPORT_SYMBOL(skb_checksum); EXPORT_SYMBOL(skb_clone); -EXPORT_SYMBOL(skb_clone_fraglist); EXPORT_SYMBOL(skb_copy); EXPORT_SYMBOL(skb_copy_and_csum_bits); EXPORT_SYMBOL(skb_copy_and_csum_dev); -- cgit v1.2.3 From 25845b5155b55cd77e42655ec24161ba3feffa47 Mon Sep 17 00:00:00 2001 From: Jing Min Zhao Date: Thu, 5 Jul 2007 17:05:01 -0700 Subject: [NETFILTER]: nf_conntrack_h323: add checking of out-of-range on choices' index values Choices' index values may be out of range while still encoded in the fixed length bit-field. This bug may cause access to undefined types (NULL pointers) and thus crashes (Reported by Zhongling Wen). This patch also adds checking of decode flag when decoding SEQUENCEs. Signed-off-by: Jing Min Zhao Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_h323_asn1.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index f6fad713d484..6b7eaa019d4c 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -518,7 +518,7 @@ int decode_seq(bitstr_t * bs, field_t * f, char *base, int level) CHECK_BOUND(bs, 2); len = get_len(bs); CHECK_BOUND(bs, len); - if (!base) { + if (!base || !(son->attr & DECODE)) { PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", son->name); bs->cur += len; @@ -704,6 +704,8 @@ int decode_choice(bitstr_t * bs, field_t * f, char *base, int level) } else { ext = 0; type = get_bits(bs, f->sz); + if (type >= f->lb) + return H323_ERROR_RANGE; } /* Write Type */ -- cgit v1.2.3 From 94b83419e5b56a87410fd9c9939f0081fc155d65 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 5 Jul 2007 17:06:21 -0700 Subject: [NET]: net/core/netevent.c should #include Every file should include the headers containing the prototypes for its global functions. Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- net/core/netevent.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/netevent.c b/net/core/netevent.c index 35d02c38554e..95f81de87502 100644 --- a/net/core/netevent.c +++ b/net/core/netevent.c @@ -15,6 +15,7 @@ #include #include +#include static ATOMIC_NOTIFIER_HEAD(netevent_notif_chain); -- cgit v1.2.3 From 25442cafb8cc3d979418caccabc91260707a0947 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Thu, 5 Jul 2007 17:42:44 -0700 Subject: [NETPOLL]: Fixups for 'fix soft lockup when removing module' >From my recent patch: > > #1 > > Until kernel ver. 2.6.21 (including) cancel_rearming_delayed_work() > > required a work function should always (unconditionally) rearm with > > delay > 0 - otherwise it would endlessly loop. This patch replaces > > this function with cancel_delayed_work(). Later kernel versions don't > > require this, so here it's only for uniformity. But Oleg Nesterov found: > But 2.6.22 doesn't need this change, why it was merged? > > In fact, I suspect this change adds a race, ... His description was right (thanks), so this patch reverts #1. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/core/netpoll.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index cf40ff91ac01..a0efdd7a6b37 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -72,8 +72,7 @@ static void queue_process(struct work_struct *work) netif_tx_unlock(dev); local_irq_restore(flags); - if (atomic_read(&npinfo->refcnt)) - schedule_delayed_work(&npinfo->tx_work, HZ/10); + schedule_delayed_work(&npinfo->tx_work, HZ/10); return; } netif_tx_unlock(dev); @@ -786,8 +785,7 @@ void netpoll_cleanup(struct netpoll *np) if (atomic_dec_and_test(&npinfo->refcnt)) { skb_queue_purge(&npinfo->arp_tx); skb_queue_purge(&npinfo->txq); - cancel_delayed_work(&npinfo->tx_work); - flush_scheduled_work(); + cancel_rearming_delayed_work(&npinfo->tx_work); /* clean after last, unfinished work */ if (!skb_queue_empty(&npinfo->txq)) { -- cgit v1.2.3 From 1c39858b5dd46004b12c5acd26d8df346bef8a10 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 7 Jul 2007 14:58:39 -0400 Subject: Fix use-after-free oops in Bluetooth HID. When cleaning up HIDP sessions, we currently close the ACL connection before deregistering the input device. Closing the ACL connection schedules a workqueue to remove the associated objects from sysfs, but the input device still refers to them -- and if the workqueue happens to run before the input device removal, the kernel will oops when trying to look up PHYSDEVPATH for the removed input device. Fix this by deregistering the input device before closing the connections. Signed-off-by: David Woodhouse Acked-by: Marcel Holtmann Signed-off-by: Linus Torvalds --- net/bluetooth/hidp/core.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index ceadfcf457c1..450eb0244bbf 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -581,15 +581,6 @@ static int hidp_session(void *arg) hidp_del_timer(session); - fput(session->intr_sock->file); - - wait_event_timeout(*(ctrl_sk->sk_sleep), - (ctrl_sk->sk_state == BT_CLOSED), msecs_to_jiffies(500)); - - fput(session->ctrl_sock->file); - - __hidp_unlink_session(session); - if (session->input) { input_unregister_device(session->input); session->input = NULL; @@ -601,6 +592,15 @@ static int hidp_session(void *arg) hid_free_device(session->hid); } + fput(session->intr_sock->file); + + wait_event_timeout(*(ctrl_sk->sk_sleep), + (ctrl_sk->sk_state == BT_CLOSED), msecs_to_jiffies(500)); + + fput(session->ctrl_sock->file); + + __hidp_unlink_session(session); + up_write(&hidp_session_sem); kfree(session); -- cgit v1.2.3 From 67c4f7aa9e64d37f32eb44d6d093b7028f1060bb Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 27 May 2007 23:27:40 +0900 Subject: [PATCH] softmac: use list_for_each_entry Cleanup using list_for_each_entry. Cc: Johannes Berg Cc: Joe Jezak Cc: Daniel Drake Signed-off-by: Akinobu Mita Signed-off-by: John W. Linville --- net/ieee80211/softmac/ieee80211softmac_module.c | 32 +++++++++---------------- 1 file changed, 11 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/ieee80211/softmac/ieee80211softmac_module.c b/net/ieee80211/softmac/ieee80211softmac_module.c index c308756c2f9d..6398e6e67493 100644 --- a/net/ieee80211/softmac/ieee80211softmac_module.c +++ b/net/ieee80211/softmac/ieee80211softmac_module.c @@ -456,18 +456,13 @@ void ieee80211softmac_add_network_locked(struct ieee80211softmac_device *mac, struct ieee80211softmac_network *add_net) { - struct list_head *list_ptr; - struct ieee80211softmac_network *softmac_net = NULL; + struct ieee80211softmac_network *softmac_net; - list_for_each(list_ptr, &mac->network_list) { - softmac_net = list_entry(list_ptr, struct ieee80211softmac_network, list); + list_for_each_entry(softmac_net, &mac->network_list, list) { if(!memcmp(softmac_net->bssid, add_net->bssid, ETH_ALEN)) - break; - else - softmac_net = NULL; + return; } - if(softmac_net == NULL) - list_add(&(add_net->list), &mac->network_list); + list_add(&(add_net->list), &mac->network_list); } /* Add a network to the list, with locking */ @@ -506,16 +501,13 @@ struct ieee80211softmac_network * ieee80211softmac_get_network_by_bssid_locked(struct ieee80211softmac_device *mac, u8 *bssid) { - struct list_head *list_ptr; - struct ieee80211softmac_network *softmac_net = NULL; - list_for_each(list_ptr, &mac->network_list) { - softmac_net = list_entry(list_ptr, struct ieee80211softmac_network, list); + struct ieee80211softmac_network *softmac_net; + + list_for_each_entry(softmac_net, &mac->network_list, list) { if(!memcmp(softmac_net->bssid, bssid, ETH_ALEN)) - break; - else - softmac_net = NULL; + return softmac_net; } - return softmac_net; + return NULL; } /* Get a network from the list by BSSID with locking */ @@ -537,11 +529,9 @@ struct ieee80211softmac_network * ieee80211softmac_get_network_by_essid_locked(struct ieee80211softmac_device *mac, struct ieee80211softmac_essid *essid) { - struct list_head *list_ptr; - struct ieee80211softmac_network *softmac_net = NULL; + struct ieee80211softmac_network *softmac_net; - list_for_each(list_ptr, &mac->network_list) { - softmac_net = list_entry(list_ptr, struct ieee80211softmac_network, list); + list_for_each_entry(softmac_net, &mac->network_list, list) { if (softmac_net->essid.len == essid->len && !memcmp(softmac_net->essid.data, essid->data, essid->len)) return softmac_net; -- cgit v1.2.3 From cf8208d0eabd1d5d2625ec02a175a294c3f30d36 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Jun 2007 21:22:14 +0200 Subject: sendfile: convert nfsd to splice_direct_to_actor() Signed-off-by: Jens Axboe --- fs/nfsd/vfs.c | 47 ++++++++++++++++++++++++++------------- include/linux/sunrpc/svc.h | 2 +- net/sunrpc/auth_gss/svcauth_gss.c | 2 +- net/sunrpc/svc.c | 2 +- 4 files changed, 34 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 7e6aa245b5d5..15471a9efe0e 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include @@ -801,26 +801,32 @@ found: } /* - * Grab and keep cached pages assosiated with a file in the svc_rqst - * so that they can be passed to the netowork sendmsg/sendpage routines - * directrly. They will be released after the sending has completed. + * Grab and keep cached pages associated with a file in the svc_rqst + * so that they can be passed to the network sendmsg/sendpage routines + * directly. They will be released after the sending has completed. */ static int -nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset , unsigned long size) +nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, + struct splice_desc *sd) { - unsigned long count = desc->count; - struct svc_rqst *rqstp = desc->arg.data; + struct svc_rqst *rqstp = sd->u.data; struct page **pp = rqstp->rq_respages + rqstp->rq_resused; + struct page *page = buf->page; + size_t size; + int ret; + + ret = buf->ops->pin(pipe, buf); + if (unlikely(ret)) + return ret; - if (size > count) - size = count; + size = sd->len; if (rqstp->rq_res.page_len == 0) { get_page(page); put_page(*pp); *pp = page; rqstp->rq_resused++; - rqstp->rq_res.page_base = offset; + rqstp->rq_res.page_base = buf->offset; rqstp->rq_res.page_len = size; } else if (page != pp[-1]) { get_page(page); @@ -832,11 +838,15 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset } else rqstp->rq_res.page_len += size; - desc->count = count - size; - desc->written += size; return size; } +static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, + struct splice_desc *sd) +{ + return __splice_from_pipe(pipe, sd, nfsd_splice_actor); +} + static __be32 nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, struct kvec *vec, int vlen, unsigned long *count) @@ -861,10 +871,15 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (ra && ra->p_set) file->f_ra = ra->p_ra; - if (file->f_op->sendfile && rqstp->rq_sendfile_ok) { - rqstp->rq_resused = 1; - host_err = file->f_op->sendfile(file, &offset, *count, - nfsd_read_actor, rqstp); + if (file->f_op->splice_read && rqstp->rq_splice_ok) { + struct splice_desc sd = { + .len = 0, + .total_len = *count, + .pos = offset, + .u.data = rqstp, + }; + + host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); } else { oldfs = get_fs(); set_fs(KERNEL_DS); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 4a7ae8ab6eb8..129d50f2225c 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -253,7 +253,7 @@ struct svc_rqst { * determine what device number * to report (real or virtual) */ - int rq_sendfile_ok; /* turned off in gss privacy + int rq_splice_ok; /* turned off in gss privacy * to prevent encrypting page * cache pages */ wait_queue_head_t rq_wait; /* synchronization */ diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 099a983797da..c094583386fd 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -853,7 +853,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs u32 priv_len, maj_stat; int pad, saved_len, remaining_len, offset; - rqstp->rq_sendfile_ok = 0; + rqstp->rq_splice_ok = 0; priv_len = svc_getnl(&buf->head[0]); if (rqstp->rq_deferred) { diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index e673ef993904..55ea6df069de 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -814,7 +814,7 @@ svc_process(struct svc_rqst *rqstp) rqstp->rq_res.tail[0].iov_base = NULL; rqstp->rq_res.tail[0].iov_len = 0; /* Will be turned off only in gss privacy case: */ - rqstp->rq_sendfile_ok = 1; + rqstp->rq_splice_ok = 1; /* tcp needs a space for the record length... */ if (rqstp->rq_prot == IPPROTO_TCP) svc_putnl(resv, 0); -- cgit v1.2.3 From c2edacf80e155ef54ae4774379d461b60896bc2e Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Mon, 9 Jul 2007 10:42:47 -0700 Subject: bonding / ipv6: no addrconf for slaves separately from master At present, when a device is enslaved to bonding, if ipv6 is active then addrconf will be initated on the slave (because it is closed then opened during the enslavement processing). This causes DAD and RS packets to be sent from the slave. These packets in turn can confuse switches that perform ipv6 snooping, causing them to incorrectly update their forwarding tables (if, e.g., the slave being added is an inactve backup that won't be used right away) and direct traffic away from the active slave to a backup slave (where the incoming packets will be dropped). This patch alters the behavior so that addrconf will only run on the master device itself. I believe this is logically correct, as it prevents slaves from having an IPv6 identity independent from the master. This is consistent with the IPv4 behavior for bonding. This is accomplished by (a) having bonding set IFF_SLAVE sooner in the enslavement processing than currently occurs (before open, not after), and (b) having ipv6 addrconf ignore UP and CHANGE events on slave devices. The eql driver also uses the IFF_SLAVE flag. I inspected eql, and I believe this change is reasonable for its usage of IFF_SLAVE, but I did not test it. Signed-off-by: Jay Vosburgh Signed-off-by: Jeff Garzik --- drivers/net/bonding/bond_main.c | 11 +++++------ net/ipv6/addrconf.c | 3 +++ 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 6287ffbda7f7..2bb70e052090 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1390,6 +1390,11 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) goto err_free; } + res = netdev_set_master(slave_dev, bond_dev); + if (res) { + dprintk("Error %d calling netdev_set_master\n", res); + goto err_close; + } /* open the slave since the application closed it */ res = dev_open(slave_dev); if (res) { @@ -1397,12 +1402,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) goto err_restore_mac; } - res = netdev_set_master(slave_dev, bond_dev); - if (res) { - dprintk("Error %d calling netdev_set_master\n", res); - goto err_close; - } - new_slave->dev = slave_dev; slave_dev->priv_flags |= IFF_BONDING; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f96ed76d8fa4..79b79f3de24c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2268,6 +2268,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, break; case NETDEV_UP: case NETDEV_CHANGE: + if (dev->flags & IFF_SLAVE) + break; + if (event == NETDEV_UP) { if (!netif_carrier_ok(dev)) { /* device is not ready yet. */ -- cgit v1.2.3 From 4cf92a3cd9117cb90ad9441172b9f42b67ee9d26 Mon Sep 17 00:00:00 2001 From: Jean Tourrilhes Date: Mon, 9 Jul 2007 20:37:36 -0500 Subject: [PATCH] softmac: Fix ESSID problem Victor Porton reported that the SoftMAC layer had random problem when setting the ESSID : http://bugzilla.kernel.org/show_bug.cgi?id=8686 After investigation, it turned out to be worse, the SoftMAC layer is left in an inconsistent state. The fix is pretty trivial. Signed-off-by: Jean Tourrilhes Acked-by: Michael Buesch Acked-by: Larry Finger Signed-off-by: John W. Linville --- net/ieee80211/softmac/ieee80211softmac_assoc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ieee80211/softmac/ieee80211softmac_assoc.c b/net/ieee80211/softmac/ieee80211softmac_assoc.c index cc8110bdd579..afb6c6698b27 100644 --- a/net/ieee80211/softmac/ieee80211softmac_assoc.c +++ b/net/ieee80211/softmac/ieee80211softmac_assoc.c @@ -271,8 +271,11 @@ ieee80211softmac_assoc_work(struct work_struct *work) */ dprintk(KERN_INFO PFX "Associate: Scanning for networks first.\n"); ieee80211softmac_notify(mac->dev, IEEE80211SOFTMAC_EVENT_SCAN_FINISHED, ieee80211softmac_assoc_notify_scan, NULL); - if (ieee80211softmac_start_scan(mac)) + if (ieee80211softmac_start_scan(mac)) { dprintk(KERN_INFO PFX "Associate: failed to initiate scan. Is device up?\n"); + mac->associnfo.associating = 0; + mac->associnfo.associated = 0; + } goto out; } else { mac->associnfo.associating = 0; -- cgit v1.2.3 From b39e625b6e75aa70e26c13f9378756bb5f2af032 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 11 Jun 2007 23:05:07 -0400 Subject: NFSv4: Clean up nfs4_call_async() Use rpc_run_task() instead of doing it ourselves. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 26 +++++++++----------------- fs/nfs/nfs4state.c | 9 +++++---- net/sunrpc/sunrpc_syms.c | 1 - 3 files changed, 14 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8feaf232f2e4..3cc75445a68d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -295,18 +295,6 @@ static void nfs4_opendata_free(struct nfs4_opendata *p) } } -/* Helper for asynchronous RPC calls */ -static int nfs4_call_async(struct rpc_clnt *clnt, - const struct rpc_call_ops *tk_ops, void *calldata) -{ - struct rpc_task *task; - - if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata))) - return -ENOMEM; - rpc_execute(task); - return 0; -} - static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) { sigset_t oldset; @@ -1218,6 +1206,8 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_closedata *calldata; + struct nfs4_state_owner *sp = state->owner; + struct rpc_task *task; int status = -ENOMEM; calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); @@ -1237,14 +1227,16 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state) calldata->path.mnt = mntget(path->mnt); calldata->path.dentry = dget(path->dentry); - status = nfs4_call_async(server->client, &nfs4_close_ops, calldata); - if (status == 0) - goto out; - - nfs_free_seqid(calldata->arg.seqid); + task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata); + if (IS_ERR(task)) + return PTR_ERR(task); + rpc_put_task(task); + return 0; out_free_calldata: kfree(calldata); out: + nfs4_put_open_state(state); + nfs4_put_state_owner(sp); return status; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a85138ef67ad..5d7ffbfc3483 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -375,10 +375,11 @@ void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode) spin_unlock(&inode->i_lock); spin_unlock(&owner->so_lock); - if (oldstate != newstate && nfs4_do_close(path, state) == 0) - return; - nfs4_put_open_state(state); - nfs4_put_state_owner(owner); + if (oldstate == newstate) { + nfs4_put_open_state(state); + nfs4_put_state_owner(owner); + } else + nfs4_do_close(path, state); } /* diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 73075dec83c0..c46d31ca307b 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -30,7 +30,6 @@ EXPORT_SYMBOL(rpc_wake_up_next); EXPORT_SYMBOL(rpc_wake_up_task); EXPORT_SYMBOL(rpciod_down); EXPORT_SYMBOL(rpciod_up); -EXPORT_SYMBOL(rpc_new_task); EXPORT_SYMBOL(rpc_wake_up_status); /* RPC client functions */ -- cgit v1.2.3 From 6529eba08fe7297852391a468d95322913de73fa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Jun 2007 16:40:14 -0400 Subject: SUNRPC: Move rpc_task->tk_task list into struct rpc_clnt Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 4 ++ include/linux/sunrpc/sched.h | 5 -- net/sunrpc/clnt.c | 5 ++ net/sunrpc/sched.c | 117 +++++++++++++++++++++++++++---------------- 4 files changed, 83 insertions(+), 48 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 66611423c8ee..0801ab5407ce 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -26,6 +26,8 @@ struct rpc_inode; struct rpc_clnt { atomic_t cl_count; /* Number of clones */ atomic_t cl_users; /* number of references */ + struct list_head cl_clients; /* Global list of clients */ + struct list_head cl_tasks; /* List of tasks */ struct rpc_xprt * cl_xprt; /* transport */ struct rpc_procinfo * cl_procinfo; /* procedure info */ u32 cl_prog, /* RPC program number */ @@ -122,6 +124,8 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); int rpc_shutdown_client(struct rpc_clnt *); int rpc_destroy_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); +void rpc_register_client(struct rpc_clnt *); +void rpc_unregister_client(struct rpc_clnt *); int rpcb_register(u32, u32, int, unsigned short, int *); void rpcb_getport(struct rpc_task *); diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 2047fb202a13..3387b008cdfc 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -110,11 +110,6 @@ struct rpc_task { if (!list_empty(head) && \ ((task=list_entry((head)->next, struct rpc_task, u.tk_wait.list)),1)) -/* .. and walking list of all tasks */ -#define alltask_for_each(task, pos, head) \ - list_for_each(pos, head) \ - if ((task=list_entry(pos, struct rpc_task, tk_task)),1) - typedef void (*rpc_action)(struct rpc_task *); struct rpc_call_ops { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d8fbee40a19c..6631ece14983 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -148,6 +148,7 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s if (clnt->cl_metrics == NULL) goto out_no_stats; clnt->cl_program = program; + INIT_LIST_HEAD(&clnt->cl_tasks); if (!xprt_bound(clnt->cl_xprt)) clnt->cl_autobind = 1; @@ -172,6 +173,7 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s if (clnt->cl_nodelen > UNX_MAXNODENAME) clnt->cl_nodelen = UNX_MAXNODENAME; memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen); + rpc_register_client(clnt); return clnt; out_no_auth: @@ -283,9 +285,11 @@ rpc_clone_client(struct rpc_clnt *clnt) new->cl_autobind = 0; new->cl_oneshot = 0; new->cl_dead = 0; + INIT_LIST_HEAD(&new->cl_tasks); rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); + rpc_register_client(new); return new; out_no_path: rpc_free_iostats(new->cl_metrics); @@ -357,6 +361,7 @@ rpc_destroy_client(struct rpc_clnt *clnt) if (clnt->cl_server != clnt->cl_inline_name) kfree(clnt->cl_server); out_free: + rpc_unregister_client(clnt); rpc_free_iostats(clnt->cl_metrics); clnt->cl_metrics = NULL; xprt_put(clnt->cl_xprt); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 944d75396fb3..6309f3b52c53 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -50,9 +50,10 @@ static void rpc_release_task(struct rpc_task *task); static RPC_WAITQ(delay_queue, "delayq"); /* - * All RPC tasks are linked into this list + * All RPC clients are linked into this list */ -static LIST_HEAD(all_tasks); +static LIST_HEAD(all_clients); +static DECLARE_WAIT_QUEUE_HEAD(client_kill_wait); /* * rpciod-related stuff @@ -277,7 +278,8 @@ static void rpc_set_active(struct rpc_task *task) task->tk_pid = rpc_task_id++; #endif /* Add to global list of all tasks */ - list_add_tail(&task->tk_task, &all_tasks); + if (task->tk_client) + list_add_tail(&task->tk_task, &task->tk_client->cl_tasks); spin_unlock(&rpc_sched_lock); } @@ -818,6 +820,7 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons if (tk_ops->rpc_call_prepare != NULL) task->tk_action = rpc_prepare_task; task->tk_calldata = calldata; + INIT_LIST_HEAD(&task->tk_task); /* Initialize retry counters */ task->tk_garb_retry = 2; @@ -920,11 +923,12 @@ static void rpc_release_task(struct rpc_task *task) #endif dprintk("RPC: %5u release task\n", task->tk_pid); - /* Remove from global task list */ - spin_lock(&rpc_sched_lock); - list_del(&task->tk_task); - spin_unlock(&rpc_sched_lock); - + if (!list_empty(&task->tk_task)) { + /* Remove from client task list */ + spin_lock(&rpc_sched_lock); + list_del(&task->tk_task); + spin_unlock(&rpc_sched_lock); + } BUG_ON (RPC_IS_QUEUED(task)); /* Synchronously delete any running timer */ @@ -966,42 +970,52 @@ EXPORT_SYMBOL(rpc_run_task); * Kill all tasks for the given client. * XXX: kill their descendants as well? */ -void rpc_killall_tasks(struct rpc_clnt *clnt) +static void rpc_killall_tasks_locked(struct list_head *head) { struct rpc_task *rovr; - struct list_head *le; - dprintk("RPC: killing all tasks for client %p\n", clnt); - /* - * Spin lock all_tasks to prevent changes... - */ - spin_lock(&rpc_sched_lock); - alltask_for_each(rovr, le, &all_tasks) { + list_for_each_entry(rovr, head, tk_task) { if (! RPC_IS_ACTIVATED(rovr)) continue; - if (!clnt || rovr->tk_client == clnt) { + if (!(rovr->tk_flags & RPC_TASK_KILLED)) { rovr->tk_flags |= RPC_TASK_KILLED; rpc_exit(rovr, -EIO); rpc_wake_up_task(rovr); } } +} + +void rpc_killall_tasks(struct rpc_clnt *clnt) +{ + dprintk("RPC: killing all tasks for client %p\n", clnt); + /* + * Spin lock all_tasks to prevent changes... + */ + spin_lock(&rpc_sched_lock); + rpc_killall_tasks_locked(&clnt->cl_tasks); spin_unlock(&rpc_sched_lock); } static void rpciod_killall(void) { + struct rpc_clnt *clnt; unsigned long flags; - while (!list_empty(&all_tasks)) { + for(;;) { clear_thread_flag(TIF_SIGPENDING); - rpc_killall_tasks(NULL); + + spin_lock(&rpc_sched_lock); + list_for_each_entry(clnt, &all_clients, cl_clients) + rpc_killall_tasks_locked(&clnt->cl_tasks); + spin_unlock(&rpc_sched_lock); flush_workqueue(rpciod_workqueue); - if (!list_empty(&all_tasks)) { - dprintk("RPC: rpciod_killall: waiting for tasks " + if (!list_empty(&all_clients)) + break; + dprintk("RPC: rpciod_killall: waiting for tasks " "to exit\n"); - yield(); - } + wait_event_timeout(client_kill_wait, + list_empty(&all_clients), 1*HZ); } spin_lock_irqsave(¤t->sighand->siglock, flags); @@ -1009,6 +1023,22 @@ static void rpciod_killall(void) spin_unlock_irqrestore(¤t->sighand->siglock, flags); } +void rpc_register_client(struct rpc_clnt *clnt) +{ + spin_lock(&rpc_sched_lock); + list_add(&clnt->cl_clients, &all_clients); + spin_unlock(&rpc_sched_lock); +} + +void rpc_unregister_client(struct rpc_clnt *clnt) +{ + spin_lock(&rpc_sched_lock); + list_del(&clnt->cl_clients); + if (list_empty(&all_clients)) + wake_up(&client_kill_wait); + spin_unlock(&rpc_sched_lock); +} + /* * Start up the rpciod process if it's not already running. */ @@ -1071,32 +1101,33 @@ rpciod_down(void) #ifdef RPC_DEBUG void rpc_show_tasks(void) { - struct list_head *le; + struct rpc_clnt *clnt; struct rpc_task *t; spin_lock(&rpc_sched_lock); - if (list_empty(&all_tasks)) { - spin_unlock(&rpc_sched_lock); - return; - } + if (list_empty(&all_clients)) + goto out; printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " "-rpcwait -action- ---ops--\n"); - alltask_for_each(t, le, &all_tasks) { - const char *rpc_waitq = "none"; - - if (RPC_IS_QUEUED(t)) - rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); - - printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", - t->tk_pid, - (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), - t->tk_flags, t->tk_status, - t->tk_client, - (t->tk_client ? t->tk_client->cl_prog : 0), - t->tk_rqstp, t->tk_timeout, - rpc_waitq, - t->tk_action, t->tk_ops); + list_for_each_entry(clnt, &all_clients, cl_clients) { + list_for_each_entry(t, &clnt->cl_tasks, tk_task) { + const char *rpc_waitq = "none"; + + if (RPC_IS_QUEUED(t)) + rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); + + printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", + t->tk_pid, + (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), + t->tk_flags, t->tk_status, + t->tk_client, + (t->tk_client ? t->tk_client->cl_prog : 0), + t->tk_rqstp, t->tk_timeout, + rpc_waitq, + t->tk_action, t->tk_ops); + } } +out: spin_unlock(&rpc_sched_lock); } #endif -- cgit v1.2.3 From 4bef61ff7514396419563ca54fd42ef846485b06 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 16 Jun 2007 14:17:01 -0400 Subject: SUNRPC: Add a per-rpc_clnt spinlock Use that to protect the rpc_clnt->cl_tasks list instead of using a global lock. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + net/sunrpc/clnt.c | 2 ++ net/sunrpc/sched.c | 47 ++++++++++++++++++++++++++------------------- 3 files changed, 30 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 0801ab5407ce..2f4b520a7419 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -28,6 +28,7 @@ struct rpc_clnt { atomic_t cl_users; /* number of references */ struct list_head cl_clients; /* Global list of clients */ struct list_head cl_tasks; /* List of tasks */ + spinlock_t cl_lock; /* spinlock */ struct rpc_xprt * cl_xprt; /* transport */ struct rpc_procinfo * cl_procinfo; /* procedure info */ u32 cl_prog, /* RPC program number */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 6631ece14983..424dfdc6862c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -149,6 +149,7 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s goto out_no_stats; clnt->cl_program = program; INIT_LIST_HEAD(&clnt->cl_tasks); + spin_lock_init(&clnt->cl_lock); if (!xprt_bound(clnt->cl_xprt)) clnt->cl_autobind = 1; @@ -286,6 +287,7 @@ rpc_clone_client(struct rpc_clnt *clnt) new->cl_oneshot = 0; new->cl_dead = 0; INIT_LIST_HEAD(&new->cl_tasks); + spin_lock_init(&new->cl_lock); rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 6309f3b52c53..f56ebc5a08f7 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -270,17 +270,22 @@ static int rpc_wait_bit_interruptible(void *word) static void rpc_set_active(struct rpc_task *task) { + struct rpc_clnt *clnt; if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0) return; - spin_lock(&rpc_sched_lock); #ifdef RPC_DEBUG task->tk_magic = RPC_TASK_MAGIC_ID; + spin_lock(&rpc_sched_lock); task->tk_pid = rpc_task_id++; + spin_unlock(&rpc_sched_lock); #endif /* Add to global list of all tasks */ - if (task->tk_client) - list_add_tail(&task->tk_task, &task->tk_client->cl_tasks); - spin_unlock(&rpc_sched_lock); + clnt = task->tk_client; + if (clnt != NULL) { + spin_lock(&clnt->cl_lock); + list_add_tail(&task->tk_task, &clnt->cl_tasks); + spin_unlock(&clnt->cl_lock); + } } /* @@ -924,10 +929,11 @@ static void rpc_release_task(struct rpc_task *task) dprintk("RPC: %5u release task\n", task->tk_pid); if (!list_empty(&task->tk_task)) { + struct rpc_clnt *clnt = task->tk_client; /* Remove from client task list */ - spin_lock(&rpc_sched_lock); + spin_lock(&clnt->cl_lock); list_del(&task->tk_task); - spin_unlock(&rpc_sched_lock); + spin_unlock(&clnt->cl_lock); } BUG_ON (RPC_IS_QUEUED(task)); @@ -970,12 +976,19 @@ EXPORT_SYMBOL(rpc_run_task); * Kill all tasks for the given client. * XXX: kill their descendants as well? */ -static void rpc_killall_tasks_locked(struct list_head *head) +void rpc_killall_tasks(struct rpc_clnt *clnt) { struct rpc_task *rovr; - list_for_each_entry(rovr, head, tk_task) { + if (list_empty(&clnt->cl_tasks)) + return; + dprintk("RPC: killing all tasks for client %p\n", clnt); + /* + * Spin lock all_tasks to prevent changes... + */ + spin_lock(&clnt->cl_lock); + list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) { if (! RPC_IS_ACTIVATED(rovr)) continue; if (!(rovr->tk_flags & RPC_TASK_KILLED)) { @@ -984,17 +997,7 @@ static void rpc_killall_tasks_locked(struct list_head *head) rpc_wake_up_task(rovr); } } -} - -void rpc_killall_tasks(struct rpc_clnt *clnt) -{ - dprintk("RPC: killing all tasks for client %p\n", clnt); - /* - * Spin lock all_tasks to prevent changes... - */ - spin_lock(&rpc_sched_lock); - rpc_killall_tasks_locked(&clnt->cl_tasks); - spin_unlock(&rpc_sched_lock); + spin_unlock(&clnt->cl_lock); } static void rpciod_killall(void) @@ -1007,7 +1010,7 @@ static void rpciod_killall(void) spin_lock(&rpc_sched_lock); list_for_each_entry(clnt, &all_clients, cl_clients) - rpc_killall_tasks_locked(&clnt->cl_tasks); + rpc_killall_tasks(clnt); spin_unlock(&rpc_sched_lock); flush_workqueue(rpciod_workqueue); if (!list_empty(&all_clients)) @@ -1110,6 +1113,9 @@ void rpc_show_tasks(void) printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " "-rpcwait -action- ---ops--\n"); list_for_each_entry(clnt, &all_clients, cl_clients) { + if (list_empty(&clnt->cl_tasks)) + continue; + spin_lock(&clnt->cl_lock); list_for_each_entry(t, &clnt->cl_tasks, tk_task) { const char *rpc_waitq = "none"; @@ -1126,6 +1132,7 @@ void rpc_show_tasks(void) rpc_waitq, t->tk_action, t->tk_ops); } + spin_unlock(&clnt->cl_lock); } out: spin_unlock(&rpc_sched_lock); -- cgit v1.2.3 From c44fe705530ff9ea5e563bf9b65bdd29defe682b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 16 Jun 2007 14:17:01 -0400 Subject: SUNRPC: Clean up tk_pid allocation and make it lockless Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index f56ebc5a08f7..0e9fbbd4f987 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -25,7 +25,6 @@ #ifdef RPC_DEBUG #define RPCDBG_FACILITY RPCDBG_SCHED #define RPC_TASK_MAGIC_ID 0xf00baa -static int rpc_task_id; #endif /* @@ -268,17 +267,26 @@ static int rpc_wait_bit_interruptible(void *word) return 0; } +#ifdef RPC_DEBUG +static void rpc_task_set_debuginfo(struct rpc_task *task) +{ + static atomic_t rpc_pid; + + task->tk_magic = RPC_TASK_MAGIC_ID; + task->tk_pid = atomic_inc_return(&rpc_pid); +} +#else +static inline void rpc_task_set_debuginfo(struct rpc_task *task) +{ +} +#endif + static void rpc_set_active(struct rpc_task *task) { struct rpc_clnt *clnt; if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0) return; -#ifdef RPC_DEBUG - task->tk_magic = RPC_TASK_MAGIC_ID; - spin_lock(&rpc_sched_lock); - task->tk_pid = rpc_task_id++; - spin_unlock(&rpc_sched_lock); -#endif + rpc_task_set_debuginfo(task); /* Add to global list of all tasks */ clnt = task->tk_client; if (clnt != NULL) { -- cgit v1.2.3 From 34f52e3591f241b825353ba27def956d8487c400 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Jun 2007 16:40:31 -0400 Subject: SUNRPC: Convert rpc_clnt->cl_users to a kref Signed-off-by: Trond Myklebust --- fs/lockd/host.c | 12 +++------- include/linux/sunrpc/clnt.h | 2 +- net/sunrpc/clnt.c | 57 ++++++++++++++++++++++----------------------- net/sunrpc/rpc_pipe.c | 2 +- net/sunrpc/sched.c | 6 ++--- 5 files changed, 35 insertions(+), 44 deletions(-) (limited to 'net') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 96070bff93fc..c252a1c95857 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -161,15 +161,9 @@ nlm_destroy_host(struct nlm_host *host) */ nsm_unmonitor(host); - if ((clnt = host->h_rpcclnt) != NULL) { - if (atomic_read(&clnt->cl_users)) { - printk(KERN_WARNING - "lockd: active RPC handle\n"); - clnt->cl_dead = 1; - } else { - rpc_destroy_client(host->h_rpcclnt); - } - } + clnt = host->h_rpcclnt; + if (clnt != NULL) + rpc_shutdown_client(clnt); kfree(host); } diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 2f4b520a7419..003d8ea70c19 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -24,8 +24,8 @@ struct rpc_inode; * The high-level client handle */ struct rpc_clnt { + struct kref cl_kref; /* Number of references */ atomic_t cl_count; /* Number of clones */ - atomic_t cl_users; /* number of references */ struct list_head cl_clients; /* Global list of clients */ struct list_head cl_tasks; /* List of tasks */ spinlock_t cl_lock; /* spinlock */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 424dfdc6862c..254a6e1a5770 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -121,7 +121,6 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s clnt = kzalloc(sizeof(*clnt), GFP_KERNEL); if (!clnt) goto out_err; - atomic_set(&clnt->cl_users, 0); atomic_set(&clnt->cl_count, 1); clnt->cl_parent = clnt; @@ -157,6 +156,8 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s clnt->cl_rtt = &clnt->cl_rtt_default; rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval); + kref_init(&clnt->cl_kref); + err = rpc_setup_pipedir(clnt, program->pipe_dir_name); if (err < 0) goto out_no_path; @@ -272,10 +273,10 @@ rpc_clone_client(struct rpc_clnt *clnt) if (!new) goto out_no_clnt; atomic_set(&new->cl_count, 1); - atomic_set(&new->cl_users, 0); new->cl_metrics = rpc_alloc_iostats(clnt); if (new->cl_metrics == NULL) goto out_no_stats; + kref_init(&new->cl_kref); err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name); if (err != 0) goto out_no_path; @@ -311,40 +312,28 @@ out_no_clnt: int rpc_shutdown_client(struct rpc_clnt *clnt) { - dprintk("RPC: shutting down %s client for %s, tasks=%d\n", - clnt->cl_protname, clnt->cl_server, - atomic_read(&clnt->cl_users)); + dprintk("RPC: shutting down %s client for %s\n", + clnt->cl_protname, clnt->cl_server); - while (atomic_read(&clnt->cl_users) > 0) { + while (!list_empty(&clnt->cl_tasks)) { /* Don't let rpc_release_client destroy us */ clnt->cl_oneshot = 0; clnt->cl_dead = 0; rpc_killall_tasks(clnt); wait_event_timeout(destroy_wait, - !atomic_read(&clnt->cl_users), 1*HZ); - } - - if (atomic_read(&clnt->cl_users) < 0) { - printk(KERN_ERR "RPC: rpc_shutdown_client clnt %p tasks=%d\n", - clnt, atomic_read(&clnt->cl_users)); -#ifdef RPC_DEBUG - rpc_show_tasks(); -#endif - BUG(); + list_empty(&clnt->cl_tasks), 1*HZ); } return rpc_destroy_client(clnt); } /* - * Delete an RPC client + * Free an RPC client */ -int -rpc_destroy_client(struct rpc_clnt *clnt) +static void +rpc_free_client(struct kref *kref) { - if (!atomic_dec_and_test(&clnt->cl_count)) - return 1; - BUG_ON(atomic_read(&clnt->cl_users) != 0); + struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref); dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); @@ -368,23 +357,33 @@ out_free: clnt->cl_metrics = NULL; xprt_put(clnt->cl_xprt); kfree(clnt); - return 0; } /* - * Release an RPC client + * Release reference to the RPC client */ void rpc_release_client(struct rpc_clnt *clnt) { - dprintk("RPC: rpc_release_client(%p, %d)\n", - clnt, atomic_read(&clnt->cl_users)); + dprintk("RPC: rpc_release_client(%p)\n", clnt); - if (!atomic_dec_and_test(&clnt->cl_users)) - return; - wake_up(&destroy_wait); + if (list_empty(&clnt->cl_tasks)) + wake_up(&destroy_wait); if (clnt->cl_oneshot || clnt->cl_dead) rpc_destroy_client(clnt); + kref_put(&clnt->cl_kref, rpc_free_client); +} + +/* + * Delete an RPC client + */ +int +rpc_destroy_client(struct rpc_clnt *clnt) +{ + if (!atomic_dec_and_test(&clnt->cl_count)) + return 1; + kref_put(&clnt->cl_kref, rpc_free_client); + return 0; } /** diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 5887457dc936..826190dacfce 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -344,7 +344,7 @@ rpc_info_open(struct inode *inode, struct file *file) mutex_lock(&inode->i_mutex); clnt = RPC_I(inode)->private; if (clnt) { - atomic_inc(&clnt->cl_users); + kref_get(&clnt->cl_kref); m->private = clnt; } else { single_release(inode, file); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 0e9fbbd4f987..bb12983580a0 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -846,7 +846,7 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons task->tk_workqueue = rpciod_workqueue; if (clnt) { - atomic_inc(&clnt->cl_users); + kref_get(&clnt->cl_kref); if (clnt->cl_softrtry) task->tk_flags |= RPC_TASK_SOFT; if (!clnt->cl_intr) @@ -898,9 +898,7 @@ out: cleanup: /* Check whether to release the client */ if (clnt) { - printk("rpc_new_task: failed, users=%d, oneshot=%d\n", - atomic_read(&clnt->cl_users), clnt->cl_oneshot); - atomic_inc(&clnt->cl_users); /* pretend we were used ... */ + kref_get(&clnt->cl_kref); /* pretend we were used ... */ rpc_release_client(clnt); } goto out; -- cgit v1.2.3 From 848f1fe6be2e290691bb6c13cbb8fd92bd0cfaab Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 9 Jun 2007 19:39:12 -0400 Subject: SUNRPC: Kill rpc_clnt->cl_dead Its use is at best racy, and there is only one user (lockd), which has additional locking that makes the whole thing redundant. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 3 +-- net/sunrpc/clnt.c | 18 +++--------------- 2 files changed, 4 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 003d8ea70c19..ab3ef6d629a7 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -45,8 +45,7 @@ struct rpc_clnt { cl_intr : 1,/* interruptible */ cl_discrtry : 1,/* disconnect before retry */ cl_autobind : 1,/* use getport() */ - cl_oneshot : 1,/* dispose after use */ - cl_dead : 1;/* abandoned */ + cl_oneshot : 1;/* dispose after use */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 254a6e1a5770..fb65249538d4 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -286,7 +286,6 @@ rpc_clone_client(struct rpc_clnt *clnt) /* Turn off autobind on clones */ new->cl_autobind = 0; new->cl_oneshot = 0; - new->cl_dead = 0; INIT_LIST_HEAD(&new->cl_tasks); spin_lock_init(&new->cl_lock); rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); @@ -305,9 +304,8 @@ out_no_clnt: /* * Properly shut down an RPC client, terminating all outstanding - * requests. Note that we must be certain that cl_oneshot and - * cl_dead are cleared, or else the client would be destroyed - * when the last task releases it. + * requests. Note that we must be certain that cl_oneshot is cleared, + * or else the client would be destroyed when the last task releases it. */ int rpc_shutdown_client(struct rpc_clnt *clnt) @@ -318,7 +316,6 @@ rpc_shutdown_client(struct rpc_clnt *clnt) while (!list_empty(&clnt->cl_tasks)) { /* Don't let rpc_release_client destroy us */ clnt->cl_oneshot = 0; - clnt->cl_dead = 0; rpc_killall_tasks(clnt); wait_event_timeout(destroy_wait, list_empty(&clnt->cl_tasks), 1*HZ); @@ -369,7 +366,7 @@ rpc_release_client(struct rpc_clnt *clnt) if (list_empty(&clnt->cl_tasks)) wake_up(&destroy_wait); - if (clnt->cl_oneshot || clnt->cl_dead) + if (clnt->cl_oneshot) rpc_destroy_client(clnt); kref_put(&clnt->cl_kref, rpc_free_client); } @@ -483,10 +480,6 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) sigset_t oldset; int status; - /* If this client is slain all further I/O fails */ - if (clnt->cl_dead) - return -EIO; - BUG_ON(flags & RPC_TASK_ASYNC); task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL); @@ -519,11 +512,6 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, sigset_t oldset; int status; - /* If this client is slain all further I/O fails */ - status = -EIO; - if (clnt->cl_dead) - goto out_release; - flags |= RPC_TASK_ASYNC; /* Create/initialize a new RPC task */ -- cgit v1.2.3 From 90c5755ff5111ffdcca10a1e8a823dba29f37b6d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 9 Jun 2007 19:49:36 -0400 Subject: SUNRPC: Kill rpc_clnt->cl_oneshot Replace it with explicit calls to rpc_shutdown_client() or rpc_destroy_client() (for the case of asynchronous calls). Signed-off-by: Trond Myklebust --- fs/lockd/mon.c | 2 +- fs/nfs/mount_clnt.c | 4 ++-- include/linux/sunrpc/clnt.h | 10 ++++------ net/sunrpc/clnt.c | 10 +--------- net/sunrpc/rpcb_clnt.c | 6 ++++-- net/sunrpc/sched.c | 14 ++------------ 6 files changed, 14 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 2102e2d0134d..3353ed8421a7 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -61,6 +61,7 @@ nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res) status); else status = 0; + rpc_shutdown_client(clnt); out: return status; } @@ -138,7 +139,6 @@ nsm_create(void) .program = &nsm_program, .version = SM_VERSION, .authflavor = RPC_AUTH_NULL, - .flags = (RPC_CLNT_CREATE_ONESHOT), }; return rpc_create(&args); diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index ca5a266a3140..878d7a5cb6d4 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -69,6 +69,7 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT]; status = rpc_call_sync(mnt_clnt, &msg, 0); + rpc_shutdown_client(mnt_clnt); return status < 0? status : (result.status? -EACCES : 0); } @@ -84,8 +85,7 @@ mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version, .program = &mnt_program, .version = version, .authflavor = RPC_AUTH_UNIX, - .flags = (RPC_CLNT_CREATE_ONESHOT | - RPC_CLNT_CREATE_INTR), + .flags = RPC_CLNT_CREATE_INTR, }; return rpc_create(&args); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index ab3ef6d629a7..fe7ea65ed0ae 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -44,8 +44,7 @@ struct rpc_clnt { unsigned int cl_softrtry : 1,/* soft timeouts */ cl_intr : 1,/* interruptible */ cl_discrtry : 1,/* disconnect before retry */ - cl_autobind : 1,/* use getport() */ - cl_oneshot : 1;/* dispose after use */ + cl_autobind : 1;/* use getport() */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ @@ -112,10 +111,9 @@ struct rpc_create_args { #define RPC_CLNT_CREATE_HARDRTRY (1UL << 0) #define RPC_CLNT_CREATE_INTR (1UL << 1) #define RPC_CLNT_CREATE_AUTOBIND (1UL << 2) -#define RPC_CLNT_CREATE_ONESHOT (1UL << 3) -#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 4) -#define RPC_CLNT_CREATE_NOPING (1UL << 5) -#define RPC_CLNT_CREATE_DISCRTRY (1UL << 6) +#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 3) +#define RPC_CLNT_CREATE_NOPING (1UL << 4) +#define RPC_CLNT_CREATE_DISCRTRY (1UL << 5) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index fb65249538d4..34662dfa9cc0 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -249,8 +249,6 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) clnt->cl_intr = 1; if (args->flags & RPC_CLNT_CREATE_AUTOBIND) clnt->cl_autobind = 1; - if (args->flags & RPC_CLNT_CREATE_ONESHOT) - clnt->cl_oneshot = 1; if (args->flags & RPC_CLNT_CREATE_DISCRTRY) clnt->cl_discrtry = 1; @@ -285,7 +283,6 @@ rpc_clone_client(struct rpc_clnt *clnt) new->cl_xprt = xprt_get(clnt->cl_xprt); /* Turn off autobind on clones */ new->cl_autobind = 0; - new->cl_oneshot = 0; INIT_LIST_HEAD(&new->cl_tasks); spin_lock_init(&new->cl_lock); rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); @@ -304,8 +301,7 @@ out_no_clnt: /* * Properly shut down an RPC client, terminating all outstanding - * requests. Note that we must be certain that cl_oneshot is cleared, - * or else the client would be destroyed when the last task releases it. + * requests. */ int rpc_shutdown_client(struct rpc_clnt *clnt) @@ -314,8 +310,6 @@ rpc_shutdown_client(struct rpc_clnt *clnt) clnt->cl_protname, clnt->cl_server); while (!list_empty(&clnt->cl_tasks)) { - /* Don't let rpc_release_client destroy us */ - clnt->cl_oneshot = 0; rpc_killall_tasks(clnt); wait_event_timeout(destroy_wait, list_empty(&clnt->cl_tasks), 1*HZ); @@ -366,8 +360,6 @@ rpc_release_client(struct rpc_clnt *clnt) if (list_empty(&clnt->cl_tasks)) wake_up(&destroy_wait); - if (clnt->cl_oneshot) - rpc_destroy_client(clnt); kref_put(&clnt->cl_kref, rpc_free_client); } diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 6c7aa8a1f0c6..00853a326499 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -184,8 +184,7 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, .program = &rpcb_program, .version = version, .authflavor = RPC_AUTH_UNIX, - .flags = (RPC_CLNT_CREATE_ONESHOT | - RPC_CLNT_CREATE_NOPING), + .flags = RPC_CLNT_CREATE_NOPING, }; ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); @@ -238,6 +237,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) error = rpc_call_sync(rpcb_clnt, &msg, 0); + rpc_shutdown_client(rpcb_clnt); if (error < 0) printk(KERN_WARNING "RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); @@ -286,6 +286,7 @@ int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog, return PTR_ERR(rpcb_clnt); status = rpc_call_sync(rpcb_clnt, &msg, 0); + rpc_shutdown_client(rpcb_clnt); if (status >= 0) { if (map.r_port != 0) @@ -379,6 +380,7 @@ void rpcb_getport(struct rpc_task *task) } child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map); + rpc_destroy_client(rpcb_clnt); if (IS_ERR(child)) { status = -EIO; dprintk("RPC: %5u rpcb_getport rpc_run_task failed\n", diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index bb12983580a0..d95fe4e40eb4 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -876,9 +876,7 @@ static void rpc_free_task(struct rcu_head *rcu) } /* - * Create a new task for the specified client. We have to - * clean up after an allocation failure, as the client may - * have specified "oneshot". + * Create a new task for the specified client. */ struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata) { @@ -886,7 +884,7 @@ struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc task = rpc_alloc_task(); if (!task) - goto cleanup; + goto out; rpc_init_task(task, clnt, flags, tk_ops, calldata); @@ -894,14 +892,6 @@ struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc task->tk_flags |= RPC_TASK_DYNAMIC; out: return task; - -cleanup: - /* Check whether to release the client */ - if (clnt) { - kref_get(&clnt->cl_kref); /* pretend we were used ... */ - rpc_release_client(clnt); - } - goto out; } -- cgit v1.2.3 From 8ad7c892e18ff8e6df422eb48ca0f73268ffd632 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Jun 2007 16:40:32 -0400 Subject: SUNRPC: Make rpc_clone take a reference instead of using cl_count Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 34662dfa9cc0..613c10e4ac31 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -279,7 +279,7 @@ rpc_clone_client(struct rpc_clnt *clnt) if (err != 0) goto out_no_path; new->cl_parent = clnt; - atomic_inc(&clnt->cl_count); + kref_get(&clnt->cl_kref); new->cl_xprt = xprt_get(clnt->cl_xprt); /* Turn off autobind on clones */ new->cl_autobind = 0; @@ -337,7 +337,7 @@ rpc_free_client(struct kref *kref) rpc_put_mount(); } if (clnt->cl_parent != clnt) { - rpc_destroy_client(clnt->cl_parent); + rpc_release_client(clnt->cl_parent); goto out_free; } if (clnt->cl_server != clnt->cl_inline_name) -- cgit v1.2.3 From 4c402b40970382ded616eadd544fd63feb76cc79 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Jun 2007 16:40:32 -0400 Subject: SUNRPC: Remove rpc_clnt->cl_count The kref now does most of what cl_count + cl_user used to do. The only remaining role for cl_count is to tell us if we are in a 'shutdown' phase. We can provide that information using a single bit field instead of a full atomic counter. Also rename rpc_destroy_client() to rpc_close_client(), which reflects better what its role is these days. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 4 +--- net/sunrpc/clnt.c | 19 ++----------------- net/sunrpc/rpcb_clnt.c | 2 +- net/sunrpc/sunrpc_syms.c | 1 - 4 files changed, 4 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index fe7ea65ed0ae..cf03494c36e7 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -25,7 +25,6 @@ struct rpc_inode; */ struct rpc_clnt { struct kref cl_kref; /* Number of references */ - atomic_t cl_count; /* Number of clones */ struct list_head cl_clients; /* Global list of clients */ struct list_head cl_tasks; /* List of tasks */ spinlock_t cl_lock; /* spinlock */ @@ -119,8 +118,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, struct rpc_program *, int); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); -int rpc_shutdown_client(struct rpc_clnt *); -int rpc_destroy_client(struct rpc_clnt *); +void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); void rpc_register_client(struct rpc_clnt *); void rpc_unregister_client(struct rpc_clnt *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 613c10e4ac31..be5524d20822 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -121,7 +121,6 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s clnt = kzalloc(sizeof(*clnt), GFP_KERNEL); if (!clnt) goto out_err; - atomic_set(&clnt->cl_count, 1); clnt->cl_parent = clnt; clnt->cl_server = clnt->cl_inline_name; @@ -270,7 +269,6 @@ rpc_clone_client(struct rpc_clnt *clnt) new = kmemdup(clnt, sizeof(*new), GFP_KERNEL); if (!new) goto out_no_clnt; - atomic_set(&new->cl_count, 1); new->cl_metrics = rpc_alloc_iostats(clnt); if (new->cl_metrics == NULL) goto out_no_stats; @@ -303,8 +301,7 @@ out_no_clnt: * Properly shut down an RPC client, terminating all outstanding * requests. */ -int -rpc_shutdown_client(struct rpc_clnt *clnt) +void rpc_shutdown_client(struct rpc_clnt *clnt) { dprintk("RPC: shutting down %s client for %s\n", clnt->cl_protname, clnt->cl_server); @@ -315,7 +312,7 @@ rpc_shutdown_client(struct rpc_clnt *clnt) list_empty(&clnt->cl_tasks), 1*HZ); } - return rpc_destroy_client(clnt); + rpc_release_client(clnt); } /* @@ -363,18 +360,6 @@ rpc_release_client(struct rpc_clnt *clnt) kref_put(&clnt->cl_kref, rpc_free_client); } -/* - * Delete an RPC client - */ -int -rpc_destroy_client(struct rpc_clnt *clnt) -{ - if (!atomic_dec_and_test(&clnt->cl_count)) - return 1; - kref_put(&clnt->cl_kref, rpc_free_client); - return 0; -} - /** * rpc_bind_new_program - bind a new RPC program to an existing client * @old - old rpc_client diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 00853a326499..cf7db59cdcde 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -380,7 +380,7 @@ void rpcb_getport(struct rpc_task *task) } child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map); - rpc_destroy_client(rpcb_clnt); + rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { status = -EIO; dprintk("RPC: %5u rpcb_getport rpc_run_task failed\n", diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index c46d31ca307b..02e83e15fef5 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -35,7 +35,6 @@ EXPORT_SYMBOL(rpc_wake_up_status); /* RPC client functions */ EXPORT_SYMBOL(rpc_clone_client); EXPORT_SYMBOL(rpc_bind_new_program); -EXPORT_SYMBOL(rpc_destroy_client); EXPORT_SYMBOL(rpc_shutdown_client); EXPORT_SYMBOL(rpc_killall_tasks); EXPORT_SYMBOL(rpc_call_sync); -- cgit v1.2.3 From d431a555fcf920e1b5c3e3eba52eb5f5e7836771 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Jun 2007 17:07:54 -0400 Subject: SUNRPC: Don't create an rpc_pipefs directory before rpc_clone is initialised Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index be5524d20822..78bbb359281d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -269,6 +269,12 @@ rpc_clone_client(struct rpc_clnt *clnt) new = kmemdup(clnt, sizeof(*new), GFP_KERNEL); if (!new) goto out_no_clnt; + new->cl_parent = clnt; + /* Turn off autobind on clones */ + new->cl_autobind = 0; + INIT_LIST_HEAD(&new->cl_tasks); + spin_lock_init(&new->cl_lock); + rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); new->cl_metrics = rpc_alloc_iostats(clnt); if (new->cl_metrics == NULL) goto out_no_stats; @@ -276,16 +282,10 @@ rpc_clone_client(struct rpc_clnt *clnt) err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name); if (err != 0) goto out_no_path; - new->cl_parent = clnt; - kref_get(&clnt->cl_kref); - new->cl_xprt = xprt_get(clnt->cl_xprt); - /* Turn off autobind on clones */ - new->cl_autobind = 0; - INIT_LIST_HEAD(&new->cl_tasks); - spin_lock_init(&new->cl_lock); - rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); + xprt_get(clnt->cl_xprt); + kref_get(&clnt->cl_kref); rpc_register_client(new); return new; out_no_path: -- cgit v1.2.3 From ab418d70e1fceda1e2824c45ba3323a1b1413507 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Jun 2007 17:08:36 -0400 Subject: SUNRPC: Optimise rpciod_up() Instead of taking the mutex every time we just need to increment/decrement rpciod_users, we can optmise by using atomic_inc_not_zero and atomic_dec_and_test. Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 49 +++++++++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index d95fe4e40eb4..f6eed4d4e5dd 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -58,7 +58,7 @@ static DECLARE_WAIT_QUEUE_HEAD(client_kill_wait); * rpciod-related stuff */ static DEFINE_MUTEX(rpciod_mutex); -static unsigned int rpciod_users; +static atomic_t rpciod_users = ATOMIC_INIT(0); struct workqueue_struct *rpciod_workqueue; /* @@ -1047,28 +1047,27 @@ rpciod_up(void) struct workqueue_struct *wq; int error = 0; + if (atomic_inc_not_zero(&rpciod_users)) + return 0; + mutex_lock(&rpciod_mutex); - dprintk("RPC: rpciod_up: users %u\n", rpciod_users); - rpciod_users++; - if (rpciod_workqueue) - goto out; - /* - * If there's no pid, we should be the first user. - */ - if (rpciod_users > 1) - printk(KERN_WARNING "rpciod_up: no workqueue, %u users??\n", rpciod_users); + + /* Guard against races with rpciod_down() */ + if (rpciod_workqueue != NULL) + goto out_ok; /* * Create the rpciod thread and wait for it to start. */ + dprintk("RPC: creating workqueue rpciod\n"); error = -ENOMEM; wq = create_workqueue("rpciod"); - if (wq == NULL) { - printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error); - rpciod_users--; + if (wq == NULL) goto out; - } + rpciod_workqueue = wq; error = 0; +out_ok: + atomic_inc(&rpciod_users); out: mutex_unlock(&rpciod_mutex); return error; @@ -1077,23 +1076,17 @@ out: void rpciod_down(void) { + if (!atomic_dec_and_test(&rpciod_users)) + return; + mutex_lock(&rpciod_mutex); - dprintk("RPC: rpciod_down sema %u\n", rpciod_users); - if (rpciod_users) { - if (--rpciod_users) - goto out; - } else - printk(KERN_WARNING "rpciod_down: no users??\n"); + dprintk("RPC: destroying workqueue rpciod\n"); - if (!rpciod_workqueue) { - dprintk("RPC: rpciod_down: Nothing to do!\n"); - goto out; + if (atomic_read(&rpciod_users) == 0 && rpciod_workqueue != NULL) { + rpciod_killall(); + destroy_workqueue(rpciod_workqueue); + rpciod_workqueue = NULL; } - rpciod_killall(); - - destroy_workqueue(rpciod_workqueue); - rpciod_workqueue = NULL; - out: mutex_unlock(&rpciod_mutex); } -- cgit v1.2.3 From 4ada539ed77c7a2bbcb75cafbbd7bd8d2b9bef7b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Jun 2007 17:26:17 -0400 Subject: SUNRPC: Make create_client() take a reference to the rpciod workqueue Ensures that an rpc_client always has the possibility to send asynchronous RPC calls. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 7 +++++++ net/sunrpc/sched.c | 31 ------------------------------- 2 files changed, 7 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 78bbb359281d..fe838e996ee3 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -111,6 +111,9 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s dprintk("RPC: creating %s client for %s (xprt %p)\n", program->name, servname, xprt); + err = rpciod_up(); + if (err) + goto out_no_rpciod; err = -EINVAL; if (!xprt) goto out_no_xprt; @@ -191,6 +194,8 @@ out_no_stats: out_err: xprt_put(xprt); out_no_xprt: + rpciod_down(); +out_no_rpciod: return ERR_PTR(err); } @@ -287,6 +292,7 @@ rpc_clone_client(struct rpc_clnt *clnt) xprt_get(clnt->cl_xprt); kref_get(&clnt->cl_kref); rpc_register_client(new); + rpciod_up(); return new; out_no_path: rpc_free_iostats(new->cl_metrics); @@ -344,6 +350,7 @@ out_free: rpc_free_iostats(clnt->cl_metrics); clnt->cl_metrics = NULL; xprt_put(clnt->cl_xprt); + rpciod_down(); kfree(clnt); } diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index f6eed4d4e5dd..05825154ddd9 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -39,7 +39,6 @@ static mempool_t *rpc_task_mempool __read_mostly; static mempool_t *rpc_buffer_mempool __read_mostly; static void __rpc_default_timer(struct rpc_task *task); -static void rpciod_killall(void); static void rpc_async_schedule(struct work_struct *); static void rpc_release_task(struct rpc_task *task); @@ -52,7 +51,6 @@ static RPC_WAITQ(delay_queue, "delayq"); * All RPC clients are linked into this list */ static LIST_HEAD(all_clients); -static DECLARE_WAIT_QUEUE_HEAD(client_kill_wait); /* * rpciod-related stuff @@ -996,32 +994,6 @@ void rpc_killall_tasks(struct rpc_clnt *clnt) spin_unlock(&clnt->cl_lock); } -static void rpciod_killall(void) -{ - struct rpc_clnt *clnt; - unsigned long flags; - - for(;;) { - clear_thread_flag(TIF_SIGPENDING); - - spin_lock(&rpc_sched_lock); - list_for_each_entry(clnt, &all_clients, cl_clients) - rpc_killall_tasks(clnt); - spin_unlock(&rpc_sched_lock); - flush_workqueue(rpciod_workqueue); - if (!list_empty(&all_clients)) - break; - dprintk("RPC: rpciod_killall: waiting for tasks " - "to exit\n"); - wait_event_timeout(client_kill_wait, - list_empty(&all_clients), 1*HZ); - } - - spin_lock_irqsave(¤t->sighand->siglock, flags); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); -} - void rpc_register_client(struct rpc_clnt *clnt) { spin_lock(&rpc_sched_lock); @@ -1033,8 +1005,6 @@ void rpc_unregister_client(struct rpc_clnt *clnt) { spin_lock(&rpc_sched_lock); list_del(&clnt->cl_clients); - if (list_empty(&all_clients)) - wake_up(&client_kill_wait); spin_unlock(&rpc_sched_lock); } @@ -1083,7 +1053,6 @@ rpciod_down(void) dprintk("RPC: destroying workqueue rpciod\n"); if (atomic_read(&rpciod_users) == 0 && rpciod_workqueue != NULL) { - rpciod_killall(); destroy_workqueue(rpciod_workqueue); rpciod_workqueue = NULL; } -- cgit v1.2.3 From f61534dfd38f895b203e2aadaba04f21a992ca8c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Jun 2007 17:31:58 -0400 Subject: SUNRPC: Remove redundant calls to rpciod_up()/rpciod_down() Signed-off-by: Trond Myklebust --- fs/lockd/svc.c | 6 ------ fs/nfs/client.c | 15 --------------- fs/nfsd/nfs4callback.c | 12 +++--------- fs/nfsd/nfs4state.c | 1 - include/linux/nfs_fs_sb.h | 1 - net/sunrpc/sunrpc_syms.c | 2 -- 6 files changed, 3 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 126b1bf02c0e..26809325469c 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -123,9 +123,6 @@ lockd(struct svc_rqst *rqstp) /* Process request with signals blocked, but allow SIGKILL. */ allow_signal(SIGKILL); - /* kick rpciod */ - rpciod_up(); - dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n"); if (!nlm_timeout) @@ -202,9 +199,6 @@ lockd(struct svc_rqst *rqstp) /* Exit the RPC thread */ svc_exit_thread(rqstp); - /* release rpciod */ - rpciod_down(); - /* Release module */ unlock_kernel(); module_put_and_exit(0); diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 881fa4900923..71d4c4cdac52 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -102,19 +102,10 @@ static struct nfs_client *nfs_alloc_client(const char *hostname, int nfsversion) { struct nfs_client *clp; - int error; if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) goto error_0; - error = rpciod_up(); - if (error < 0) { - dprintk("%s: couldn't start rpciod! Error = %d\n", - __FUNCTION__, error); - goto error_1; - } - __set_bit(NFS_CS_RPCIOD, &clp->cl_res_state); - if (nfsversion == 4) { if (nfs_callback_up() < 0) goto error_2; @@ -154,9 +145,6 @@ error_3: if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) nfs_callback_down(); error_2: - rpciod_down(); - __clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state); -error_1: kfree(clp); error_0: return NULL; @@ -198,9 +186,6 @@ static void nfs_free_client(struct nfs_client *clp) if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) nfs_callback_down(); - if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state)) - rpciod_down(); - kfree(clp->cl_hostname); kfree(clp); diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 864090edc28b..6b1b487db1ec 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -429,29 +429,23 @@ nfsd4_probe_callback(struct nfs4_client *clp) goto out_err; } - /* Kick rpciod, put the call on the wire. */ - if (rpciod_up() != 0) - goto out_clnt; - /* the task holds a reference to the nfs4_client struct */ atomic_inc(&clp->cl_count); msg.rpc_cred = nfsd4_lookupcred(clp,0); if (IS_ERR(msg.rpc_cred)) - goto out_rpciod; + goto out_release_clp; status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL); put_rpccred(msg.rpc_cred); if (status != 0) { dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n"); - goto out_rpciod; + goto out_release_clp; } return; -out_rpciod: +out_release_clp: atomic_dec(&clp->cl_count); - rpciod_down(); -out_clnt: rpc_shutdown_client(cb->cb_client); out_err: cb->cb_client = NULL; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3cc8ce422ab1..8c52913d7cb6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -378,7 +378,6 @@ shutdown_callback_client(struct nfs4_client *clp) if (clnt) { clp->cl_callback.cb_client = NULL; rpc_shutdown_client(clnt); - rpciod_down(); } } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 52b4378311c8..144d955dc46a 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -16,7 +16,6 @@ struct nfs_client { #define NFS_CS_INITING 1 /* busy initialising */ int cl_nfsversion; /* NFS protocol version */ unsigned long cl_res_state; /* NFS resources state */ -#define NFS_CS_RPCIOD 0 /* - rpciod started */ #define NFS_CS_CALLBACK 1 /* - callback started */ #define NFS_CS_IDMAP 2 /* - idmap started */ #define NFS_CS_RENEWD 3 /* - renewd started */ diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 02e83e15fef5..b99b11b11461 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -28,8 +28,6 @@ EXPORT_SYMBOL(rpc_init_task); EXPORT_SYMBOL(rpc_sleep_on); EXPORT_SYMBOL(rpc_wake_up_next); EXPORT_SYMBOL(rpc_wake_up_task); -EXPORT_SYMBOL(rpciod_down); -EXPORT_SYMBOL(rpciod_up); EXPORT_SYMBOL(rpc_wake_up_status); /* RPC client functions */ -- cgit v1.2.3 From 188fef11db219f13f32d055ba59985e7d1a349fe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 16 Jun 2007 14:18:40 -0400 Subject: SUNRPC: Move rpc_register_client and friends into net/sunrpc/clnt.c Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 2 -- net/sunrpc/clnt.c | 57 +++++++++++++++++++++++++++++++++++++++++ net/sunrpc/sched.c | 62 --------------------------------------------- 3 files changed, 57 insertions(+), 64 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index cf03494c36e7..a451351c7eff 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -120,8 +120,6 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); -void rpc_register_client(struct rpc_clnt *); -void rpc_unregister_client(struct rpc_clnt *); int rpcb_register(u32, u32, int, unsigned short, int *); void rpcb_getport(struct rpc_task *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index fe838e996ee3..4f39ab1b04d6 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -44,6 +44,12 @@ dprintk("RPC: %5u %s (status %d)\n", t->tk_pid, \ __FUNCTION__, t->tk_status) +/* + * All RPC clients are linked into this list + */ +static LIST_HEAD(all_clients); +static DEFINE_SPINLOCK(rpc_client_lock); + static DECLARE_WAIT_QUEUE_HEAD(destroy_wait); @@ -66,6 +72,19 @@ static void call_connect_status(struct rpc_task *task); static __be32 * call_header(struct rpc_task *task); static __be32 * call_verify(struct rpc_task *task); +static void rpc_register_client(struct rpc_clnt *clnt) +{ + spin_lock(&rpc_client_lock); + list_add(&clnt->cl_clients, &all_clients); + spin_unlock(&rpc_client_lock); +} + +static void rpc_unregister_client(struct rpc_clnt *clnt) +{ + spin_lock(&rpc_client_lock); + list_del(&clnt->cl_clients); + spin_unlock(&rpc_client_lock); +} static int rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) @@ -1410,3 +1429,41 @@ int rpc_ping(struct rpc_clnt *clnt, int flags) put_rpccred(msg.rpc_cred); return err; } + +#ifdef RPC_DEBUG +void rpc_show_tasks(void) +{ + struct rpc_clnt *clnt; + struct rpc_task *t; + + spin_lock(&rpc_client_lock); + if (list_empty(&all_clients)) + goto out; + printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " + "-rpcwait -action- ---ops--\n"); + list_for_each_entry(clnt, &all_clients, cl_clients) { + if (list_empty(&clnt->cl_tasks)) + continue; + spin_lock(&clnt->cl_lock); + list_for_each_entry(t, &clnt->cl_tasks, tk_task) { + const char *rpc_waitq = "none"; + + if (RPC_IS_QUEUED(t)) + rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); + + printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", + t->tk_pid, + (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), + t->tk_flags, t->tk_status, + t->tk_client, + (t->tk_client ? t->tk_client->cl_prog : 0), + t->tk_rqstp, t->tk_timeout, + rpc_waitq, + t->tk_action, t->tk_ops); + } + spin_unlock(&clnt->cl_lock); + } +out: + spin_unlock(&rpc_client_lock); +} +#endif diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 05825154ddd9..c0f8d25caf57 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -47,11 +47,6 @@ static void rpc_release_task(struct rpc_task *task); */ static RPC_WAITQ(delay_queue, "delayq"); -/* - * All RPC clients are linked into this list - */ -static LIST_HEAD(all_clients); - /* * rpciod-related stuff */ @@ -59,11 +54,6 @@ static DEFINE_MUTEX(rpciod_mutex); static atomic_t rpciod_users = ATOMIC_INIT(0); struct workqueue_struct *rpciod_workqueue; -/* - * Spinlock for other critical sections of code. - */ -static DEFINE_SPINLOCK(rpc_sched_lock); - /* * Disable the timer for a given RPC task. Should be called with * queue->lock and bh_disabled in order to avoid races within @@ -994,20 +984,6 @@ void rpc_killall_tasks(struct rpc_clnt *clnt) spin_unlock(&clnt->cl_lock); } -void rpc_register_client(struct rpc_clnt *clnt) -{ - spin_lock(&rpc_sched_lock); - list_add(&clnt->cl_clients, &all_clients); - spin_unlock(&rpc_sched_lock); -} - -void rpc_unregister_client(struct rpc_clnt *clnt) -{ - spin_lock(&rpc_sched_lock); - list_del(&clnt->cl_clients); - spin_unlock(&rpc_sched_lock); -} - /* * Start up the rpciod process if it's not already running. */ @@ -1059,44 +1035,6 @@ rpciod_down(void) mutex_unlock(&rpciod_mutex); } -#ifdef RPC_DEBUG -void rpc_show_tasks(void) -{ - struct rpc_clnt *clnt; - struct rpc_task *t; - - spin_lock(&rpc_sched_lock); - if (list_empty(&all_clients)) - goto out; - printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " - "-rpcwait -action- ---ops--\n"); - list_for_each_entry(clnt, &all_clients, cl_clients) { - if (list_empty(&clnt->cl_tasks)) - continue; - spin_lock(&clnt->cl_lock); - list_for_each_entry(t, &clnt->cl_tasks, tk_task) { - const char *rpc_waitq = "none"; - - if (RPC_IS_QUEUED(t)) - rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); - - printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", - t->tk_pid, - (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), - t->tk_flags, t->tk_status, - t->tk_client, - (t->tk_client ? t->tk_client->cl_prog : 0), - t->tk_rqstp, t->tk_timeout, - rpc_waitq, - t->tk_action, t->tk_ops); - } - spin_unlock(&clnt->cl_lock); - } -out: - spin_unlock(&rpc_sched_lock); -} -#endif - void rpc_destroy_mempool(void) { -- cgit v1.2.3 From 6e5b70e9d1e712d8dad5514e0ab5240ac4b5fb57 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 12 Jun 2007 10:02:37 -0400 Subject: SUNRPC: clean up rpc_call_async/rpc_call_sync/rpc_run_task Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 115 ++++++++++++++++++++++++++++++++--------------------- net/sunrpc/sched.c | 23 ----------- 2 files changed, 69 insertions(+), 69 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 4f39ab1b04d6..76eef19cf995 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -474,73 +474,96 @@ void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset) rpc_restore_sigmask(oldset); } -/* - * New rpc_call implementation +static +struct rpc_task *rpc_do_run_task(struct rpc_clnt *clnt, + struct rpc_message *msg, + int flags, + const struct rpc_call_ops *ops, + void *data) +{ + struct rpc_task *task, *ret; + sigset_t oldset; + + task = rpc_new_task(clnt, flags, ops, data); + if (task == NULL) { + rpc_release_calldata(ops, data); + return ERR_PTR(-ENOMEM); + } + + /* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */ + rpc_task_sigmask(task, &oldset); + if (msg != NULL) { + rpc_call_setup(task, msg, 0); + if (task->tk_status != 0) { + ret = ERR_PTR(task->tk_status); + rpc_put_task(task); + goto out; + } + } + atomic_inc(&task->tk_count); + rpc_execute(task); + ret = task; +out: + rpc_restore_sigmask(&oldset); + return ret; +} + +/** + * rpc_call_sync - Perform a synchronous RPC call + * @clnt: pointer to RPC client + * @msg: RPC call parameters + * @flags: RPC call flags */ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) { struct rpc_task *task; - sigset_t oldset; - int status; + int status; BUG_ON(flags & RPC_TASK_ASYNC); - task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL); - if (task == NULL) - return -ENOMEM; - - /* Mask signals on RPC calls _and_ GSS_AUTH upcalls */ - rpc_task_sigmask(task, &oldset); - - /* Set up the call info struct and execute the task */ - rpc_call_setup(task, msg, 0); - if (task->tk_status == 0) { - atomic_inc(&task->tk_count); - rpc_execute(task); - } + task = rpc_do_run_task(clnt, msg, flags, &rpc_default_ops, NULL); + if (IS_ERR(task)) + return PTR_ERR(task); status = task->tk_status; rpc_put_task(task); - rpc_restore_sigmask(&oldset); return status; } -/* - * New rpc_call implementation +/** + * rpc_call_async - Perform an asynchronous RPC call + * @clnt: pointer to RPC client + * @msg: RPC call parameters + * @flags: RPC call flags + * @ops: RPC call ops + * @data: user call data */ int rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, const struct rpc_call_ops *tk_ops, void *data) { struct rpc_task *task; - sigset_t oldset; - int status; - - flags |= RPC_TASK_ASYNC; - - /* Create/initialize a new RPC task */ - status = -ENOMEM; - if (!(task = rpc_new_task(clnt, flags, tk_ops, data))) - goto out_release; - - /* Mask signals on GSS_AUTH upcalls */ - rpc_task_sigmask(task, &oldset); - - rpc_call_setup(task, msg, 0); - - /* Set up the call info struct and execute the task */ - status = task->tk_status; - if (status == 0) - rpc_execute(task); - else - rpc_put_task(task); - rpc_restore_sigmask(&oldset); - return status; -out_release: - rpc_release_calldata(tk_ops, data); - return status; + task = rpc_do_run_task(clnt, msg, flags|RPC_TASK_ASYNC, tk_ops, data); + if (IS_ERR(task)) + return PTR_ERR(task); + rpc_put_task(task); + return 0; } +/** + * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it + * @clnt: pointer to RPC client + * @flags: RPC flags + * @ops: RPC call ops + * @data: user call data + */ +struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, + const struct rpc_call_ops *tk_ops, + void *data) +{ + return rpc_do_run_task(clnt, NULL, flags, tk_ops, data); +} +EXPORT_SYMBOL(rpc_run_task); void rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index c0f8d25caf57..2ac43c41c3a9 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -933,29 +933,6 @@ static void rpc_release_task(struct rpc_task *task) rpc_put_task(task); } -/** - * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it - * @clnt: pointer to RPC client - * @flags: RPC flags - * @ops: RPC call ops - * @data: user call data - */ -struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, - const struct rpc_call_ops *ops, - void *data) -{ - struct rpc_task *task; - task = rpc_new_task(clnt, flags, ops, data); - if (task == NULL) { - rpc_release_calldata(ops, data); - return ERR_PTR(-ENOMEM); - } - atomic_inc(&task->tk_count); - rpc_execute(task); - return task; -} -EXPORT_SYMBOL(rpc_run_task); - /* * Kill all tasks for the given client. * XXX: kill their descendants as well? -- cgit v1.2.3 From c1384c9c4c184543375b52a0997d06cd98145164 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Jun 2007 18:00:42 -0400 Subject: SUNRPC: fix hang due to eventd deadlock... Brian Behlendorf writes: The root cause of the NFS hang we were observing appears to be a rare deadlock between the kernel provided usermodehelper API and the linux NFS client. The deadlock can arise because both of these services use the generic linux work queues. The usermodehelper API run the specified user application in the context of the work queue. And NFS submits both cleanup and reconnect work to the generic work queue for handling. Normally this is fine but a deadlock can result in the following situation. - NFS client is in a disconnected state - [events/0] runs a usermodehelper app with an NFS dependent operation, this triggers an NFS reconnect. - NFS reconnect happens to be submitted to [events/0] work queue. - Deadlock, the [events/0] work queue will never process the reconnect because it is blocked on the previous NFS dependent operation which will not complete.` The solution is simply to run reconnect requests on rpciod. Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 4 ++-- net/sunrpc/xprtsock.c | 17 +++++++---------- 2 files changed, 9 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 5b05b73e4c1d..518acb74a5bb 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -127,7 +127,7 @@ static void xprt_clear_locked(struct rpc_xprt *xprt) clear_bit(XPRT_LOCKED, &xprt->state); smp_mb__after_clear_bit(); } else - schedule_work(&xprt->task_cleanup); + queue_work(rpciod_workqueue, &xprt->task_cleanup); } /* @@ -515,7 +515,7 @@ xprt_init_autodisconnect(unsigned long data) if (xprt_connecting(xprt)) xprt_release_write(xprt, NULL); else - schedule_work(&xprt->task_cleanup); + queue_work(rpciod_workqueue, &xprt->task_cleanup); return; out_abort: spin_unlock(&xprt->transport_lock); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index cc33c5880abb..ee6ad3baf680 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -653,8 +653,7 @@ static void xs_destroy(struct rpc_xprt *xprt) dprintk("RPC: xs_destroy xprt %p\n", xprt); - cancel_delayed_work(&transport->connect_worker); - flush_scheduled_work(); + cancel_rearming_delayed_work(&transport->connect_worker); xprt_disconnect(xprt); xs_close(xprt); @@ -1001,7 +1000,7 @@ static void xs_tcp_state_change(struct sock *sk) /* Try to schedule an autoclose RPC calls */ set_bit(XPRT_CLOSE_WAIT, &xprt->state); if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) - schedule_work(&xprt->task_cleanup); + queue_work(rpciod_workqueue, &xprt->task_cleanup); default: xprt_disconnect(xprt); } @@ -1410,18 +1409,16 @@ static void xs_connect(struct rpc_task *task) dprintk("RPC: xs_connect delayed xprt %p for %lu " "seconds\n", xprt, xprt->reestablish_timeout / HZ); - schedule_delayed_work(&transport->connect_worker, - xprt->reestablish_timeout); + queue_delayed_work(rpciod_workqueue, + &transport->connect_worker, + xprt->reestablish_timeout); xprt->reestablish_timeout <<= 1; if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO) xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; } else { dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); - schedule_delayed_work(&transport->connect_worker, 0); - - /* flush_scheduled_work can sleep... */ - if (!RPC_IS_ASYNC(task)) - flush_scheduled_work(); + queue_delayed_work(rpciod_workqueue, + &transport->connect_worker, 0); } } -- cgit v1.2.3 From 4a8c1344dccb848dbcf0edabc8b5c51a8ecf2808 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 7 Jun 2007 10:14:14 -0400 Subject: SUNRPC: Add a backpointer from the struct rpc_cred to the rpc_auth Cleans up an issue whereby rpcsec_gss uses the rpc_clnt->cl_auth. If we want to be able to add several rpc_auths to a single rpc_clnt, then this abuse must go. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 4 ++++ net/sunrpc/auth_gss/auth_gss.c | 3 ++- net/sunrpc/auth_null.c | 1 + net/sunrpc/auth_unix.c | 1 + 4 files changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 534cdc7be58d..8ef27afeea73 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -30,8 +30,11 @@ struct auth_cred { /* * Client user credentials */ +struct rpc_auth; +struct rpc_credops; struct rpc_cred { struct hlist_node cr_hash; /* hash chain */ + struct rpc_auth * cr_auth; struct rpc_credops * cr_ops; unsigned long cr_expire; /* when to gc */ atomic_t cr_count; /* ref count */ @@ -60,6 +63,7 @@ struct rpc_cred_cache { unsigned long expire; /* cache expiry interval */ }; +struct rpc_authops; struct rpc_auth { unsigned int au_cslack; /* call cred size estimate */ /* guess at number of u32's auth adds before diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 4e4ccc5b6fea..e894e2fc360d 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -386,7 +386,7 @@ static inline int gss_refresh_upcall(struct rpc_task *task) { struct rpc_cred *cred = task->tk_msg.rpc_cred; - struct gss_auth *gss_auth = container_of(task->tk_client->cl_auth, + struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); @@ -741,6 +741,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) * fail to flag the credential as RPCAUTH_CRED_UPTODATE. */ cred->gc_flags = 0; + cred->gc_base.cr_auth = auth; cred->gc_base.cr_ops = &gss_credops; cred->gc_base.cr_flags = RPCAUTH_CRED_NEW; cred->gc_service = gss_auth->service; diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 3df9fccab2f8..890bd9b3794b 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -133,6 +133,7 @@ struct rpc_credops null_credops = { static struct rpc_cred null_cred = { + .cr_auth = &null_auth, .cr_ops = &null_credops, .cr_count = ATOMIC_INIT(1), .cr_flags = RPCAUTH_CRED_UPTODATE, diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 4e7733aee36e..82300b83045e 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -92,6 +92,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) if (i < NFS_NGROUPS) cred->uc_gids[i] = NOGROUP; } + cred->uc_base.cr_auth = &unix_auth; cred->uc_base.cr_ops = &unix_credops; return (struct rpc_cred *) cred; -- cgit v1.2.3 From b185f835e243e654047ae85f42346827d3894171 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 7 Jun 2007 10:14:14 -0400 Subject: SUNRPC: Remove the gss_auth spinlock We're just as well off using the inode spinlock instead. Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 47 ++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index e894e2fc360d..653d712a1ffb 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -88,7 +88,6 @@ struct gss_auth { struct list_head upcalls; struct rpc_clnt *client; struct dentry *dentry; - spinlock_t lock; }; static void gss_destroy_ctx(struct gss_cl_ctx *); @@ -290,16 +289,17 @@ __gss_find_upcall(struct gss_auth *gss_auth, uid_t uid) static inline struct gss_upcall_msg * gss_add_msg(struct gss_auth *gss_auth, struct gss_upcall_msg *gss_msg) { + struct inode *inode = gss_auth->dentry->d_inode; struct gss_upcall_msg *old; - spin_lock(&gss_auth->lock); + spin_lock(&inode->i_lock); old = __gss_find_upcall(gss_auth, gss_msg->uid); if (old == NULL) { atomic_inc(&gss_msg->count); list_add(&gss_msg->list, &gss_auth->upcalls); } else gss_msg = old; - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); return gss_msg; } @@ -318,10 +318,11 @@ static void gss_unhash_msg(struct gss_upcall_msg *gss_msg) { struct gss_auth *gss_auth = gss_msg->auth; + struct inode *inode = gss_auth->dentry->d_inode; - spin_lock(&gss_auth->lock); + spin_lock(&inode->i_lock); __gss_unhash_msg(gss_msg); - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); } static void @@ -330,16 +331,16 @@ gss_upcall_callback(struct rpc_task *task) struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall; + struct inode *inode = gss_msg->auth->dentry->d_inode; - BUG_ON(gss_msg == NULL); if (gss_msg->ctx) gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_get_ctx(gss_msg->ctx)); else task->tk_status = gss_msg->msg.errno; - spin_lock(&gss_msg->auth->lock); + spin_lock(&inode->i_lock); gss_cred->gc_upcall = NULL; rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); - spin_unlock(&gss_msg->auth->lock); + spin_unlock(&inode->i_lock); gss_release_msg(gss_msg); } @@ -391,6 +392,7 @@ gss_refresh_upcall(struct rpc_task *task) struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg; + struct inode *inode = gss_auth->dentry->d_inode; int err = 0; dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid, @@ -400,7 +402,7 @@ gss_refresh_upcall(struct rpc_task *task) err = PTR_ERR(gss_msg); goto out; } - spin_lock(&gss_auth->lock); + spin_lock(&inode->i_lock); if (gss_cred->gc_upcall != NULL) rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL, NULL); else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) { @@ -411,7 +413,7 @@ gss_refresh_upcall(struct rpc_task *task) rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback, NULL); } else err = gss_msg->msg.errno; - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); gss_release_msg(gss_msg); out: dprintk("RPC: %5u gss_refresh_upcall for uid %u result %d\n", @@ -422,6 +424,7 @@ out: static inline int gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) { + struct inode *inode = gss_auth->dentry->d_inode; struct rpc_cred *cred = &gss_cred->gc_base; struct gss_upcall_msg *gss_msg; DEFINE_WAIT(wait); @@ -435,12 +438,12 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) } for (;;) { prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_INTERRUPTIBLE); - spin_lock(&gss_auth->lock); + spin_lock(&inode->i_lock); if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) { - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); break; } - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); if (signalled()) { err = -ERESTARTSYS; goto out_intr; @@ -492,6 +495,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) struct gss_auth *gss_auth; struct rpc_cred *cred; struct gss_upcall_msg *gss_msg; + struct inode *inode = filp->f_path.dentry->d_inode; struct gss_cl_ctx *ctx; uid_t uid; int err = -EFBIG; @@ -503,7 +507,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (!buf) goto out; - clnt = RPC_I(filp->f_path.dentry->d_inode)->private; + clnt = RPC_I(inode)->private; err = -EFAULT; if (copy_from_user(buf, src, mlen)) goto err; @@ -527,18 +531,18 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (err != -EACCES) goto err_put_ctx; } - spin_lock(&gss_auth->lock); + spin_lock(&inode->i_lock); gss_msg = __gss_find_upcall(gss_auth, uid); if (gss_msg) { if (err == 0 && gss_msg->ctx == NULL) gss_msg->ctx = gss_get_ctx(ctx); gss_msg->msg.errno = err; __gss_unhash_msg(gss_msg); - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); gss_release_msg(gss_msg); } else { struct auth_cred acred = { .uid = uid }; - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); cred = rpcauth_lookup_credcache(clnt->cl_auth, &acred, RPCAUTH_LOOKUP_NEW); if (IS_ERR(cred)) { err = PTR_ERR(cred); @@ -570,7 +574,7 @@ gss_pipe_release(struct inode *inode) clnt = rpci->private; auth = clnt->cl_auth; gss_auth = container_of(auth, struct gss_auth, rpc_auth); - spin_lock(&gss_auth->lock); + spin_lock(&inode->i_lock); while (!list_empty(&gss_auth->upcalls)) { struct gss_upcall_msg *gss_msg; @@ -579,11 +583,11 @@ gss_pipe_release(struct inode *inode) gss_msg->msg.errno = -EPIPE; atomic_inc(&gss_msg->count); __gss_unhash_msg(gss_msg); - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); gss_release_msg(gss_msg); - spin_lock(&gss_auth->lock); + spin_lock(&inode->i_lock); } - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); } static void @@ -638,7 +642,6 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) if (gss_auth->service == 0) goto err_put_mech; INIT_LIST_HEAD(&gss_auth->upcalls); - spin_lock_init(&gss_auth->lock); auth = &gss_auth->rpc_auth; auth->au_cslack = GSS_CRED_SLACK >> 2; auth->au_rslack = GSS_VERF_SLACK >> 2; -- cgit v1.2.3 From 3b68aaeaf54065e5c44583a1d33ffb7793953ba4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 7 Jun 2007 10:14:15 -0400 Subject: SUNRPC: Always match an upcall message in gss_pipe_downcall() It used to be possible for an rpc.gssd daemon to stuff the RPC credential cache for any rpc client simply by creating RPCSEC_GSS contexts and then doing downcalls. In practice, no daemons ever made use of this feature. Remove this feature now, since it will be impossible to figure out which mechanism a given context actually matches if we enable more than one gss mechanism to use the same upcall pipe. Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 58 ++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 653d712a1ffb..e407a352440f 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -306,8 +306,6 @@ gss_add_msg(struct gss_auth *gss_auth, struct gss_upcall_msg *gss_msg) static void __gss_unhash_msg(struct gss_upcall_msg *gss_msg) { - if (list_empty(&gss_msg->list)) - return; list_del_init(&gss_msg->list); rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); wake_up_all(&gss_msg->waitqueue); @@ -320,8 +318,11 @@ gss_unhash_msg(struct gss_upcall_msg *gss_msg) struct gss_auth *gss_auth = gss_msg->auth; struct inode *inode = gss_auth->dentry->d_inode; + if (list_empty(&gss_msg->list)) + return; spin_lock(&inode->i_lock); - __gss_unhash_msg(gss_msg); + if (!list_empty(&gss_msg->list)) + __gss_unhash_msg(gss_msg); spin_unlock(&inode->i_lock); } @@ -493,12 +494,11 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) void *buf; struct rpc_clnt *clnt; struct gss_auth *gss_auth; - struct rpc_cred *cred; struct gss_upcall_msg *gss_msg; struct inode *inode = filp->f_path.dentry->d_inode; struct gss_cl_ctx *ctx; uid_t uid; - int err = -EFBIG; + ssize_t err = -EFBIG; if (mlen > MSG_BUF_MAXSIZE) goto out; @@ -523,43 +523,39 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) ctx = gss_alloc_context(); if (ctx == NULL) goto err; - err = 0; + + err = -ENOENT; + /* Find a matching upcall */ gss_auth = container_of(clnt->cl_auth, struct gss_auth, rpc_auth); + spin_lock(&inode->i_lock); + gss_msg = __gss_find_upcall(gss_auth, uid); + if (gss_msg == NULL) { + spin_unlock(&inode->i_lock); + goto err_put_ctx; + } + list_del_init(&gss_msg->list); + spin_unlock(&inode->i_lock); + p = gss_fill_context(p, end, ctx, gss_auth->mech); if (IS_ERR(p)) { err = PTR_ERR(p); - if (err != -EACCES) - goto err_put_ctx; + gss_msg->msg.errno = (err == -EACCES) ? -EACCES : -EAGAIN; + goto err_release_msg; } + gss_msg->ctx = gss_get_ctx(ctx); + err = mlen; + +err_release_msg: spin_lock(&inode->i_lock); - gss_msg = __gss_find_upcall(gss_auth, uid); - if (gss_msg) { - if (err == 0 && gss_msg->ctx == NULL) - gss_msg->ctx = gss_get_ctx(ctx); - gss_msg->msg.errno = err; - __gss_unhash_msg(gss_msg); - spin_unlock(&inode->i_lock); - gss_release_msg(gss_msg); - } else { - struct auth_cred acred = { .uid = uid }; - spin_unlock(&inode->i_lock); - cred = rpcauth_lookup_credcache(clnt->cl_auth, &acred, RPCAUTH_LOOKUP_NEW); - if (IS_ERR(cred)) { - err = PTR_ERR(cred); - goto err_put_ctx; - } - gss_cred_set_ctx(cred, gss_get_ctx(ctx)); - } - gss_put_ctx(ctx); - kfree(buf); - dprintk("RPC: gss_pipe_downcall returning length %Zu\n", mlen); - return mlen; + __gss_unhash_msg(gss_msg); + spin_unlock(&inode->i_lock); + gss_release_msg(gss_msg); err_put_ctx: gss_put_ctx(ctx); err: kfree(buf); out: - dprintk("RPC: gss_pipe_downcall returning %d\n", err); + dprintk("RPC: gss_pipe_downcall returning %Zd\n", err); return err; } -- cgit v1.2.3 From 6e84c7b66a0aa0be16a7728d1e687c57978dac2c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 7 Jun 2007 15:31:36 -0400 Subject: SUNRPC: Add a downcall queue to struct rpc_inode Currently, the downcall queue is tied to the struct gss_auth, which means that different RPCSEC_GSS pseudoflavours must use different upcall pipes. Add a list to struct rpc_inode that can be used instead. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/rpc_pipe_fs.h | 1 + net/sunrpc/auth_gss/auth_gss.c | 29 ++++++++++------------------- net/sunrpc/rpc_pipe.c | 1 + 3 files changed, 12 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index ad293760f6eb..430cea104817 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -23,6 +23,7 @@ struct rpc_inode { void *private; struct list_head pipe; struct list_head in_upcall; + struct list_head in_downcall; int pipelen; int nreaders; int nwriters; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index e407a352440f..50809086fa1b 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -85,7 +85,6 @@ struct gss_auth { struct rpc_auth rpc_auth; struct gss_api_mech *mech; enum rpc_gss_svc service; - struct list_head upcalls; struct rpc_clnt *client; struct dentry *dentry; }; @@ -268,10 +267,10 @@ gss_release_msg(struct gss_upcall_msg *gss_msg) } static struct gss_upcall_msg * -__gss_find_upcall(struct gss_auth *gss_auth, uid_t uid) +__gss_find_upcall(struct rpc_inode *rpci, uid_t uid) { struct gss_upcall_msg *pos; - list_for_each_entry(pos, &gss_auth->upcalls, list) { + list_for_each_entry(pos, &rpci->in_downcall, list) { if (pos->uid != uid) continue; atomic_inc(&pos->count); @@ -290,13 +289,14 @@ static inline struct gss_upcall_msg * gss_add_msg(struct gss_auth *gss_auth, struct gss_upcall_msg *gss_msg) { struct inode *inode = gss_auth->dentry->d_inode; + struct rpc_inode *rpci = RPC_I(inode); struct gss_upcall_msg *old; spin_lock(&inode->i_lock); - old = __gss_find_upcall(gss_auth, gss_msg->uid); + old = __gss_find_upcall(rpci, gss_msg->uid); if (old == NULL) { atomic_inc(&gss_msg->count); - list_add(&gss_msg->list, &gss_auth->upcalls); + list_add(&gss_msg->list, &rpci->in_downcall); } else gss_msg = old; spin_unlock(&inode->i_lock); @@ -493,7 +493,6 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) const void *p, *end; void *buf; struct rpc_clnt *clnt; - struct gss_auth *gss_auth; struct gss_upcall_msg *gss_msg; struct inode *inode = filp->f_path.dentry->d_inode; struct gss_cl_ctx *ctx; @@ -526,9 +525,8 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) err = -ENOENT; /* Find a matching upcall */ - gss_auth = container_of(clnt->cl_auth, struct gss_auth, rpc_auth); spin_lock(&inode->i_lock); - gss_msg = __gss_find_upcall(gss_auth, uid); + gss_msg = __gss_find_upcall(RPC_I(inode), uid); if (gss_msg == NULL) { spin_unlock(&inode->i_lock); goto err_put_ctx; @@ -536,7 +534,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) list_del_init(&gss_msg->list); spin_unlock(&inode->i_lock); - p = gss_fill_context(p, end, ctx, gss_auth->mech); + p = gss_fill_context(p, end, ctx, gss_msg->auth->mech); if (IS_ERR(p)) { err = PTR_ERR(p); gss_msg->msg.errno = (err == -EACCES) ? -EACCES : -EAGAIN; @@ -563,18 +561,12 @@ static void gss_pipe_release(struct inode *inode) { struct rpc_inode *rpci = RPC_I(inode); - struct rpc_clnt *clnt; - struct rpc_auth *auth; - struct gss_auth *gss_auth; + struct gss_upcall_msg *gss_msg; - clnt = rpci->private; - auth = clnt->cl_auth; - gss_auth = container_of(auth, struct gss_auth, rpc_auth); spin_lock(&inode->i_lock); - while (!list_empty(&gss_auth->upcalls)) { - struct gss_upcall_msg *gss_msg; + while (!list_empty(&rpci->in_downcall)) { - gss_msg = list_entry(gss_auth->upcalls.next, + gss_msg = list_entry(rpci->in_downcall.next, struct gss_upcall_msg, list); gss_msg->msg.errno = -EPIPE; atomic_inc(&gss_msg->count); @@ -637,7 +629,6 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor); if (gss_auth->service == 0) goto err_put_mech; - INIT_LIST_HEAD(&gss_auth->upcalls); auth = &gss_auth->rpc_auth; auth->au_cslack = GSS_CRED_SLACK >> 2; auth->au_rslack = GSS_VERF_SLACK >> 2; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 826190dacfce..2320f1e42da4 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -833,6 +833,7 @@ init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) rpci->nreaders = 0; rpci->nwriters = 0; INIT_LIST_HEAD(&rpci->in_upcall); + INIT_LIST_HEAD(&rpci->in_downcall); INIT_LIST_HEAD(&rpci->pipe); rpci->pipelen = 0; init_waitqueue_head(&rpci->waitq); -- cgit v1.2.3 From 34f308960818e514fadd9365cb5b14cca319320b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 7 Jun 2007 18:28:02 -0400 Subject: SUNRPC: Enable non-exclusive create in rpc_mkpipe() Signed-off-by: Trond Myklebust --- net/sunrpc/rpc_pipe.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 2320f1e42da4..ebcb8053836e 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -621,7 +621,7 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry) } static struct dentry * -rpc_lookup_create(struct dentry *parent, const char *name, int len) +rpc_lookup_create(struct dentry *parent, const char *name, int len, int exclusive) { struct inode *dir = parent->d_inode; struct dentry *dentry; @@ -630,7 +630,7 @@ rpc_lookup_create(struct dentry *parent, const char *name, int len) dentry = lookup_one_len(name, parent, len); if (IS_ERR(dentry)) goto out_err; - if (dentry->d_inode) { + if (dentry->d_inode && exclusive) { dput(dentry); dentry = ERR_PTR(-EEXIST); goto out_err; @@ -649,7 +649,7 @@ rpc_lookup_negative(char *path, struct nameidata *nd) if ((error = rpc_lookup_parent(path, nd)) != 0) return ERR_PTR(error); - dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len); + dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len, 1); if (IS_ERR(dentry)) rpc_release_path(nd); return dentry; @@ -716,10 +716,20 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi struct inode *dir, *inode; struct rpc_inode *rpci; - dentry = rpc_lookup_create(parent, name, strlen(name)); + dentry = rpc_lookup_create(parent, name, strlen(name), 0); if (IS_ERR(dentry)) return dentry; dir = parent->d_inode; + if (dentry->d_inode) { + rpci = RPC_I(dentry->d_inode); + if (rpci->private != private || + rpci->ops != ops || + rpci->flags != flags) { + dput (dentry); + dentry = ERR_PTR(-EBUSY); + } + goto out; + } inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR); if (!inode) goto err_dput; -- cgit v1.2.3 From 62e1761ceff5dbb78c4b4b9486d8ca9fed11b660 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 8 Jun 2007 14:14:46 -0400 Subject: SUNRPC: Clean up rpc_pipefs. Add a dentry_ops with a d_delete() method in order to ensure that dentries are removed as soon as the last reference is gone. Clean up rpc_depopulate() so that it only removes files that were created via rpc_populate(). Signed-off-by: Trond Myklebust --- net/sunrpc/rpc_pipe.c | 59 ++++++++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index ebcb8053836e..e5fd796e897e 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -448,6 +448,15 @@ void rpc_put_mount(void) simple_release_fs(&rpc_mount, &rpc_mount_count); } +static int rpc_delete_dentry(struct dentry *dentry) +{ + return 1; +} + +static struct dentry_operations rpc_dentry_operations = { + .d_delete = rpc_delete_dentry, +}; + static int rpc_lookup_parent(char *path, struct nameidata *nd) { @@ -506,7 +515,7 @@ rpc_get_inode(struct super_block *sb, int mode) * FIXME: This probably has races. */ static void -rpc_depopulate(struct dentry *parent) +rpc_depopulate(struct dentry *parent, int start, int eof) { struct inode *dir = parent->d_inode; struct list_head *pos, *next; @@ -518,6 +527,10 @@ repeat: spin_lock(&dcache_lock); list_for_each_safe(pos, next, &parent->d_subdirs) { dentry = list_entry(pos, struct dentry, d_u.d_child); + if (!dentry->d_inode || + dentry->d_inode->i_ino < start || + dentry->d_inode->i_ino >= eof) + continue; spin_lock(&dentry->d_lock); if (!d_unhashed(dentry)) { dget_locked(dentry); @@ -533,11 +546,11 @@ repeat: if (n) { do { dentry = dvec[--n]; - if (dentry->d_inode) { - rpc_close_pipes(dentry->d_inode); + if (S_ISREG(dentry->d_inode->i_mode)) simple_unlink(dir, dentry); - } - inode_dir_notify(dir, DN_DELETE); + else if (S_ISDIR(dentry->d_inode->i_mode)) + simple_rmdir(dir, dentry); + d_delete(dentry); dput(dentry); } while (n); goto repeat; @@ -560,6 +573,7 @@ rpc_populate(struct dentry *parent, dentry = d_alloc_name(parent, files[i].name); if (!dentry) goto out_bad; + dentry->d_op = &rpc_dentry_operations; mode = files[i].mode; inode = rpc_get_inode(dir->i_sb, mode); if (!inode) { @@ -607,17 +621,10 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry) { int error; - - shrink_dcache_parent(dentry); - if (d_unhashed(dentry)) - return 0; - if ((error = simple_rmdir(dir, dentry)) != 0) - return error; - if (!error) { - inode_dir_notify(dir, DN_DELETE); - d_drop(dentry); - } - return 0; + error = simple_rmdir(dir, dentry); + if (!error) + d_delete(dentry); + return error; } static struct dentry * @@ -630,7 +637,9 @@ rpc_lookup_create(struct dentry *parent, const char *name, int len, int exclusiv dentry = lookup_one_len(name, parent, len); if (IS_ERR(dentry)) goto out_err; - if (dentry->d_inode && exclusive) { + if (!dentry->d_inode) + dentry->d_op = &rpc_dentry_operations; + else if (exclusive) { dput(dentry); dentry = ERR_PTR(-EEXIST); goto out_err; @@ -681,7 +690,7 @@ out: rpc_release_path(&nd); return dentry; err_depopulate: - rpc_depopulate(dentry); + rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF); __rpc_rmdir(dir, dentry); err_dput: dput(dentry); @@ -701,7 +710,7 @@ rpc_rmdir(struct dentry *dentry) parent = dget_parent(dentry); dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - rpc_depopulate(dentry); + rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF); error = __rpc_rmdir(dir, dentry); dput(dentry); mutex_unlock(&dir->i_mutex); @@ -764,14 +773,10 @@ rpc_unlink(struct dentry *dentry) parent = dget_parent(dentry); dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - if (!d_unhashed(dentry)) { - d_drop(dentry); - if (dentry->d_inode) { - rpc_close_pipes(dentry->d_inode); - error = simple_unlink(dir, dentry); - } - inode_dir_notify(dir, DN_DELETE); - } + rpc_close_pipes(dentry->d_inode); + error = simple_unlink(dir, dentry); + if (!error) + d_delete(dentry); dput(dentry); mutex_unlock(&dir->i_mutex); dput(parent); -- cgit v1.2.3 From 03a1256f06cf1f58e33971fb4a524479e75c200e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 8 Jun 2007 14:14:53 -0400 Subject: SUNRPC: Add a field to track the number of kernel users of an rpc_pipe This allows us to correctly deduce when we need to remove the pipe. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/rpc_pipe_fs.h | 1 + net/sunrpc/rpc_pipe.c | 12 ++++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 430cea104817..51b977a4ca20 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -27,6 +27,7 @@ struct rpc_inode { int pipelen; int nreaders; int nwriters; + int nkern_readwriters; wait_queue_head_t waitq; #define RPC_PIPE_WAIT_FOR_OPEN 1 int flags; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index e5fd796e897e..e787b6a43eee 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -737,6 +737,7 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi dput (dentry); dentry = ERR_PTR(-EBUSY); } + rpci->nkern_readwriters++; goto out; } inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR); @@ -749,6 +750,7 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi rpci->private = private; rpci->flags = flags; rpci->ops = ops; + rpci->nkern_readwriters = 1; inode_dir_notify(dir, DN_CREATE); dget(dentry); out: @@ -773,10 +775,12 @@ rpc_unlink(struct dentry *dentry) parent = dget_parent(dentry); dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - rpc_close_pipes(dentry->d_inode); - error = simple_unlink(dir, dentry); - if (!error) - d_delete(dentry); + if (--RPC_I(dentry->d_inode)->nkern_readwriters == 0) { + rpc_close_pipes(dentry->d_inode); + error = simple_unlink(dir, dentry); + if (!error) + d_delete(dentry); + } dput(dentry); mutex_unlock(&dir->i_mutex); dput(parent); -- cgit v1.2.3 From 3ab9bb7243489f9db3abf3d05521ddfc6b184c0a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 9 Jun 2007 15:41:42 -0400 Subject: SUNRPC: Fix a memory leak in the auth credcache code The leak only affects the RPCSEC_GSS caches, since they are the only ones that are dynamically allocated... Rename the existing rpcauth_free_credcache() to rpcauth_clear_credcache() in order to better describe its role, then add a new function rpcauth_destroy_credcache() that actually frees the cache in addition to clearing it out. Also move the call to destroy the credcache in gss_destroy() to come before the rpc upcall pipe is unlinked. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 3 ++- net/sunrpc/auth.c | 18 ++++++++++++++++-- net/sunrpc/auth_gss/auth_gss.c | 3 ++- net/sunrpc/auth_unix.c | 2 +- net/sunrpc/sunrpc_syms.c | 2 +- 5 files changed, 22 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 8ef27afeea73..3972b8414c88 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -143,7 +143,8 @@ int rpcauth_refreshcred(struct rpc_task *); void rpcauth_invalcred(struct rpc_task *); int rpcauth_uptodatecred(struct rpc_task *); int rpcauth_init_credcache(struct rpc_auth *, unsigned long); -void rpcauth_free_credcache(struct rpc_auth *); +void rpcauth_destroy_credcache(struct rpc_auth *); +void rpcauth_clear_credcache(struct rpc_cred_cache *); static inline struct rpc_cred * get_rpccred(struct rpc_cred *cred) diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 9527f2bb1744..f6b6c81cbc3e 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -137,9 +137,8 @@ void rpcauth_destroy_credlist(struct hlist_head *head) * that are not referenced. */ void -rpcauth_free_credcache(struct rpc_auth *auth) +rpcauth_clear_credcache(struct rpc_cred_cache *cache) { - struct rpc_cred_cache *cache = auth->au_credcache; HLIST_HEAD(free); struct hlist_node *pos, *next; struct rpc_cred *cred; @@ -157,6 +156,21 @@ rpcauth_free_credcache(struct rpc_auth *auth) rpcauth_destroy_credlist(&free); } +/* + * Destroy the RPC credential cache + */ +void +rpcauth_destroy_credcache(struct rpc_auth *auth) +{ + struct rpc_cred_cache *cache = auth->au_credcache; + + if (cache) { + auth->au_credcache = NULL; + rpcauth_clear_credcache(cache); + kfree(cache); + } +} + static void rpcauth_prune_expired(struct rpc_auth *auth, struct rpc_cred *cred, struct hlist_head *free) { diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 50809086fa1b..8b4c02f8befb 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -665,12 +665,13 @@ gss_destroy(struct rpc_auth *auth) dprintk("RPC: destroying GSS authenticator %p flavor %d\n", auth, auth->au_flavor); + rpcauth_destroy_credcache(auth); + gss_auth = container_of(auth, struct gss_auth, rpc_auth); rpc_unlink(gss_auth->dentry); gss_auth->dentry = NULL; gss_mech_put(gss_auth->mech); - rpcauth_free_credcache(auth); kfree(gss_auth); module_put(THIS_MODULE); } diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 82300b83045e..5622783011a4 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -50,7 +50,7 @@ static void unx_destroy(struct rpc_auth *auth) { dprintk("RPC: destroying UNIX authenticator %p\n", auth); - rpcauth_free_credcache(auth); + rpcauth_clear_credcache(auth->au_credcache); } /* diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index b99b11b11461..3e19e7af6799 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -57,7 +57,7 @@ EXPORT_SYMBOL(rpcauth_unregister); EXPORT_SYMBOL(rpcauth_create); EXPORT_SYMBOL(rpcauth_lookupcred); EXPORT_SYMBOL(rpcauth_lookup_credcache); -EXPORT_SYMBOL(rpcauth_free_credcache); +EXPORT_SYMBOL(rpcauth_destroy_credcache); EXPORT_SYMBOL(rpcauth_init_credcache); EXPORT_SYMBOL(put_rpccred); -- cgit v1.2.3 From 5c9cfc828ae34e19dabbdb9f2861b8c920454047 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 24 Jun 2007 15:24:29 -0400 Subject: SUNRPC: Fix a typo in unx_create() We want to set the unix_cred_cache.nextgc on the first call to unx_create(), which should be when unix_auth.au_count === 1 Signed-off-by: Trond Myklebust --- net/sunrpc/auth_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 5622783011a4..e54782e75d59 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -41,7 +41,7 @@ unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) { dprintk("RPC: creating UNIX authenticator for client %p\n", clnt); - if (atomic_inc_return(&unix_auth.au_count) == 0) + if (atomic_inc_return(&unix_auth.au_count) == 1) unix_cred_cache.nextgc = jiffies + (unix_cred_cache.expire >> 1); return &unix_auth; } -- cgit v1.2.3 From 07a2bf1da4765d987ffd1d8045e92ba032e0ad78 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 9 Jun 2007 15:42:01 -0400 Subject: SUNRPC: Fix a memory leak in gss_create() Fix a memory leak in gss_create() whereby the rpc credcache was not being freed if the rpc_mkpipe() call failed. Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 8b4c02f8befb..459dc9b1d1ad 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -636,10 +636,6 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) auth->au_flavor = flavor; atomic_set(&auth->au_count, 1); - err = rpcauth_init_credcache(auth, GSS_CRED_EXPIRE); - if (err) - goto err_put_mech; - gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); if (IS_ERR(gss_auth->dentry)) { @@ -647,7 +643,13 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) goto err_put_mech; } + err = rpcauth_init_credcache(auth, GSS_CRED_EXPIRE); + if (err) + goto err_unlink_pipe; + return auth; +err_unlink_pipe: + rpc_unlink(gss_auth->dentry); err_put_mech: gss_mech_put(gss_auth->mech); err_free: -- cgit v1.2.3 From fc1b356f566fe05929ec2a88ce2c7b15f8b6279f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 9 Jun 2007 16:15:46 -0400 Subject: SUNRPC: Fix races in rpcauth_create See the FIXME: auth_flavors[] really needs a lock and module refcounting. Signed-off-by: Trond Myklebust --- net/sunrpc/auth.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index f6b6c81cbc3e..584f24311a80 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -18,6 +18,7 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif +static DEFINE_SPINLOCK(rpc_authflavor_lock); static struct rpc_authops * auth_flavors[RPC_AUTH_MAXFLAVOR] = { &authnull_ops, /* AUTH_NULL */ &authunix_ops, /* AUTH_UNIX */ @@ -35,26 +36,34 @@ int rpcauth_register(struct rpc_authops *ops) { rpc_authflavor_t flavor; + int ret = -EPERM; if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR) return -EINVAL; - if (auth_flavors[flavor] != NULL) - return -EPERM; /* what else? */ - auth_flavors[flavor] = ops; - return 0; + spin_lock(&rpc_authflavor_lock); + if (auth_flavors[flavor] == NULL) { + auth_flavors[flavor] = ops; + ret = 0; + } + spin_unlock(&rpc_authflavor_lock); + return ret; } int rpcauth_unregister(struct rpc_authops *ops) { rpc_authflavor_t flavor; + int ret = -EPERM; if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR) return -EINVAL; - if (auth_flavors[flavor] != ops) - return -EPERM; /* what else? */ - auth_flavors[flavor] = NULL; - return 0; + spin_lock(&rpc_authflavor_lock); + if (auth_flavors[flavor] == ops) { + auth_flavors[flavor] = NULL; + ret = 0; + } + spin_unlock(&rpc_authflavor_lock); + return ret; } struct rpc_auth * @@ -68,15 +77,19 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) if (flavor >= RPC_AUTH_MAXFLAVOR) goto out; - /* FIXME - auth_flavors[] really needs an rw lock, - * and module refcounting. */ #ifdef CONFIG_KMOD if ((ops = auth_flavors[flavor]) == NULL) request_module("rpc-auth-%u", flavor); #endif - if ((ops = auth_flavors[flavor]) == NULL) + spin_lock(&rpc_authflavor_lock); + ops = auth_flavors[flavor]; + if (ops == NULL || !try_module_get(ops->owner)) { + spin_unlock(&rpc_authflavor_lock); goto out; + } + spin_unlock(&rpc_authflavor_lock); auth = ops->create(clnt, pseudoflavor); + module_put(ops->owner); if (IS_ERR(auth)) return auth; if (clnt->cl_auth) -- cgit v1.2.3 From 64c91a1f1c8bc4295fd6b90df8adf911a7dd64f4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 23 Jun 2007 10:17:16 -0400 Subject: SUNRPC: Make rpc_ping() static Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 - net/sunrpc/clnt.c | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index a451351c7eff..a0e51e193284 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -136,7 +136,6 @@ void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset); void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); size_t rpc_max_payload(struct rpc_clnt *); void rpc_force_rebind(struct rpc_clnt *); -int rpc_ping(struct rpc_clnt *clnt, int flags); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); char * rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 76eef19cf995..4e91f3110938 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -72,6 +72,8 @@ static void call_connect_status(struct rpc_task *task); static __be32 * call_header(struct rpc_task *task); static __be32 * call_verify(struct rpc_task *task); +static int rpc_ping(struct rpc_clnt *clnt, int flags); + static void rpc_register_client(struct rpc_clnt *clnt) { spin_lock(&rpc_client_lock); @@ -1441,7 +1443,7 @@ static struct rpc_procinfo rpcproc_null = { .p_decode = rpcproc_decode_null, }; -int rpc_ping(struct rpc_clnt *clnt, int flags) +static int rpc_ping(struct rpc_clnt *clnt, int flags) { struct rpc_message msg = { .rpc_proc = &rpcproc_null, -- cgit v1.2.3 From 5e1550d6a2c2dd33ff0ca5febefd8e9c65c6ca1e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 23 Jun 2007 10:17:16 -0400 Subject: SUNRPC: Add the helper function 'rpc_call_null()' Does a NULL RPC call and returns a pointer to the resulting rpc_task. The call may be either synchronous or asynchronous. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 2 ++ net/sunrpc/clnt.c | 10 ++++++++++ 2 files changed, 12 insertions(+) (limited to 'net') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index a0e51e193284..097984b03857 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -130,6 +130,8 @@ int rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, void *calldata); int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags); +struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, + int flags); void rpc_restart_call(struct rpc_task *); void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset); void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 4e91f3110938..5a28ffac99ea 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1455,6 +1455,16 @@ static int rpc_ping(struct rpc_clnt *clnt, int flags) return err; } +struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags) +{ + struct rpc_message msg = { + .rpc_proc = &rpcproc_null, + .rpc_cred = cred, + }; + return rpc_do_run_task(clnt, &msg, flags, &rpc_default_ops, NULL); +} +EXPORT_SYMBOL(rpc_call_null); + #ifdef RPC_DEBUG void rpc_show_tasks(void) { -- cgit v1.2.3 From de7a8ce38aea529876db3890b61947bc4bc004da Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 23 Jun 2007 10:46:47 -0400 Subject: SUNRPC: Rename rpcauth_destroy() to rpcauth_release() Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 2 +- net/sunrpc/auth.c | 4 ++-- net/sunrpc/clnt.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 3972b8414c88..bc77c730325c 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -128,7 +128,7 @@ extern struct rpc_authops authdes_ops; int rpcauth_register(struct rpc_authops *); int rpcauth_unregister(struct rpc_authops *); struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *); -void rpcauth_destroy(struct rpc_auth *); +void rpcauth_release(struct rpc_auth *); struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); struct rpc_cred * rpcauth_bindcred(struct rpc_task *); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 584f24311a80..1686dc74c6a9 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -93,7 +93,7 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) if (IS_ERR(auth)) return auth; if (clnt->cl_auth) - rpcauth_destroy(clnt->cl_auth); + rpcauth_release(clnt->cl_auth); clnt->cl_auth = auth; out: @@ -101,7 +101,7 @@ out: } void -rpcauth_destroy(struct rpc_auth *auth) +rpcauth_release(struct rpc_auth *auth) { if (!atomic_dec_and_test(&auth->au_count)) return; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 5a28ffac99ea..98df44e453fe 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -353,7 +353,7 @@ rpc_free_client(struct kref *kref) dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); if (clnt->cl_auth) { - rpcauth_destroy(clnt->cl_auth); + rpcauth_release(clnt->cl_auth); clnt->cl_auth = NULL; } if (!IS_ERR(clnt->cl_dentry)) { -- cgit v1.2.3 From f1c0a8615090359d57e096157feb9f900cbb233c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 23 Jun 2007 20:17:58 -0400 Subject: SUNRPC: Mark auth and cred operation tables as constant. Also do the same for gss_api operation tables. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 15 ++++++--------- include/linux/sunrpc/gss_api.h | 2 +- net/sunrpc/auth.c | 8 ++++---- net/sunrpc/auth_gss/auth_gss.c | 8 ++++---- net/sunrpc/auth_gss/gss_krb5_mech.c | 2 +- net/sunrpc/auth_gss/gss_spkm3_mech.c | 2 +- net/sunrpc/auth_null.c | 4 ++-- net/sunrpc/auth_unix.c | 6 +++--- 8 files changed, 22 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index bc77c730325c..e606c2804685 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -35,7 +35,7 @@ struct rpc_credops; struct rpc_cred { struct hlist_node cr_hash; /* hash chain */ struct rpc_auth * cr_auth; - struct rpc_credops * cr_ops; + const struct rpc_credops *cr_ops; unsigned long cr_expire; /* when to gc */ atomic_t cr_count; /* ref count */ unsigned short cr_flags; /* various flags */ @@ -73,7 +73,7 @@ struct rpc_auth { unsigned int au_verfsize; unsigned int au_flags; /* various flags */ - struct rpc_authops * au_ops; /* operations */ + const struct rpc_authops *au_ops; /* operations */ rpc_authflavor_t au_flavor; /* pseudoflavor (note may * differ from the flavor in * au_ops->au_flavor in gss @@ -119,14 +119,11 @@ struct rpc_credops { void *, __be32 *, void *); }; -extern struct rpc_authops authunix_ops; -extern struct rpc_authops authnull_ops; -#ifdef CONFIG_SUNRPC_SECURE -extern struct rpc_authops authdes_ops; -#endif +extern const struct rpc_authops authunix_ops; +extern const struct rpc_authops authnull_ops; -int rpcauth_register(struct rpc_authops *); -int rpcauth_unregister(struct rpc_authops *); +int rpcauth_register(const struct rpc_authops *); +int rpcauth_unregister(const struct rpc_authops *); struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *); void rpcauth_release(struct rpc_auth *); struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int); diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 5eca9e442051..bbac101ac372 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -77,7 +77,7 @@ struct gss_api_mech { struct module *gm_owner; struct xdr_netobj gm_oid; char *gm_name; - struct gss_api_ops *gm_ops; + const struct gss_api_ops *gm_ops; /* pseudoflavors supported by this mechanism: */ int gm_pf_num; struct pf_desc * gm_pfs; diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 1686dc74c6a9..d3f0f944c0b5 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -19,7 +19,7 @@ #endif static DEFINE_SPINLOCK(rpc_authflavor_lock); -static struct rpc_authops * auth_flavors[RPC_AUTH_MAXFLAVOR] = { +static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = { &authnull_ops, /* AUTH_NULL */ &authunix_ops, /* AUTH_UNIX */ NULL, /* others can be loadable modules */ @@ -33,7 +33,7 @@ pseudoflavor_to_flavor(u32 flavor) { } int -rpcauth_register(struct rpc_authops *ops) +rpcauth_register(const struct rpc_authops *ops) { rpc_authflavor_t flavor; int ret = -EPERM; @@ -50,7 +50,7 @@ rpcauth_register(struct rpc_authops *ops) } int -rpcauth_unregister(struct rpc_authops *ops) +rpcauth_unregister(const struct rpc_authops *ops) { rpc_authflavor_t flavor; int ret = -EPERM; @@ -70,7 +70,7 @@ struct rpc_auth * rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) { struct rpc_auth *auth; - struct rpc_authops *ops; + const struct rpc_authops *ops; u32 flavor = pseudoflavor_to_flavor(pseudoflavor); auth = ERR_PTR(-EINVAL); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 459dc9b1d1ad..177a9e413c0a 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -54,9 +54,9 @@ #include #include -static struct rpc_authops authgss_ops; +static const struct rpc_authops authgss_ops; -static struct rpc_credops gss_credops; +static const struct rpc_credops gss_credops; #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH @@ -1193,7 +1193,7 @@ out: return status; } -static struct rpc_authops authgss_ops = { +static const struct rpc_authops authgss_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_GSS, #ifdef RPC_DEBUG @@ -1205,7 +1205,7 @@ static struct rpc_authops authgss_ops = { .crcreate = gss_create_cred }; -static struct rpc_credops gss_credops = { +static const struct rpc_credops gss_credops = { .cr_name = "AUTH_GSS", .crdestroy = gss_destroy_cred, .cr_init = gss_cred_init, diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 7b1943217053..71b9daefdff3 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -201,7 +201,7 @@ gss_delete_sec_context_kerberos(void *internal_ctx) { kfree(kctx); } -static struct gss_api_ops gss_kerberos_ops = { +static const struct gss_api_ops gss_kerberos_ops = { .gss_import_sec_context = gss_import_sec_context_kerberos, .gss_get_mic = gss_get_mic_kerberos, .gss_verify_mic = gss_verify_mic_kerberos, diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 7e15aa68ae64..577d590e755f 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -202,7 +202,7 @@ gss_get_mic_spkm3(struct gss_ctx *ctx, return err; } -static struct gss_api_ops gss_spkm3_ops = { +static const struct gss_api_ops gss_spkm3_ops = { .gss_import_sec_context = gss_import_sec_context_spkm3, .gss_get_mic = gss_get_mic_spkm3, .gss_verify_mic = gss_verify_mic_spkm3, diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 890bd9b3794b..fe9b6aaf91eb 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -101,7 +101,7 @@ nul_validate(struct rpc_task *task, __be32 *p) return p; } -struct rpc_authops authnull_ops = { +const struct rpc_authops authnull_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_NULL, #ifdef RPC_DEBUG @@ -122,7 +122,7 @@ struct rpc_auth null_auth = { }; static -struct rpc_credops null_credops = { +const struct rpc_credops null_credops = { .cr_name = "AUTH_NULL", .crdestroy = nul_destroy_cred, .crmatch = nul_match, diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index e54782e75d59..6600c7ad72a9 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -34,7 +34,7 @@ struct unx_cred { static struct rpc_auth unix_auth; static struct rpc_cred_cache unix_cred_cache; -static struct rpc_credops unix_credops; +static const struct rpc_credops unix_credops; static struct rpc_auth * unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) @@ -205,7 +205,7 @@ unx_validate(struct rpc_task *task, __be32 *p) return p; } -struct rpc_authops authunix_ops = { +const struct rpc_authops authunix_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_UNIX, #ifdef RPC_DEBUG @@ -233,7 +233,7 @@ struct rpc_auth unix_auth = { }; static -struct rpc_credops unix_credops = { +const struct rpc_credops unix_credops = { .cr_name = "AUTH_UNIX", .crdestroy = unx_destroy_cred, .crmatch = unx_match, -- cgit v1.2.3 From 5fe4755e2526a2aa82b7ed8daeb3aed74a236925 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 23 Jun 2007 19:55:31 -0400 Subject: SUNRPC: Clean up rpc credential initialisation Add a helper rpc_cred_init() Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 1 + include/linux/sunrpc/auth_gss.h | 5 ----- net/sunrpc/auth.c | 24 ++++++++++++++++++------ net/sunrpc/auth_gss/auth_gss.c | 6 +----- net/sunrpc/auth_unix.c | 10 ++-------- 5 files changed, 22 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index e606c2804685..d5bfc67461fc 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -127,6 +127,7 @@ int rpcauth_unregister(const struct rpc_authops *); struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *); void rpcauth_release(struct rpc_auth *); struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int); +void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); struct rpc_cred * rpcauth_bindcred(struct rpc_task *); void rpcauth_holdcred(struct rpc_task *); diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index 2db2fbf34947..0bd1d06777b9 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -85,11 +85,6 @@ struct gss_cred { struct gss_upcall_msg *gc_upcall; }; -#define gc_uid gc_base.cr_uid -#define gc_count gc_base.cr_count -#define gc_flags gc_base.cr_flags -#define gc_expire gc_base.cr_expire - #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_AUTH_GSS_H */ diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index d3f0f944c0b5..2156327da45b 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -264,13 +264,9 @@ retry: if (!cred) { new = auth->au_ops->crcreate(auth, acred, flags); - if (!IS_ERR(new)) { -#ifdef RPC_DEBUG - new->cr_magic = RPCAUTH_CRED_MAGIC; -#endif + if (!IS_ERR(new)) goto retry; - } else - cred = new; + cred = new; } else if ((cred->cr_flags & RPCAUTH_CRED_NEW) && cred->cr_ops->cr_init != NULL && !(flags & RPCAUTH_LOOKUP_NEW)) { @@ -302,6 +298,22 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags) return ret; } +void +rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, + struct rpc_auth *auth, const struct rpc_credops *ops) +{ + INIT_HLIST_NODE(&cred->cr_hash); + atomic_set(&cred->cr_count, 1); + cred->cr_auth = auth; + cred->cr_ops = ops; + cred->cr_expire = jiffies; +#ifdef RPC_DEBUG + cred->cr_magic = RPCAUTH_CRED_MAGIC; +#endif + cred->cr_uid = acred->uid; +} +EXPORT_SYMBOL(rpcauth_init_cred); + struct rpc_cred * rpcauth_bindcred(struct rpc_task *task) { diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 177a9e413c0a..766de0a41b22 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -727,15 +727,11 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) if (!(cred = kzalloc(sizeof(*cred), GFP_KERNEL))) goto out_err; - atomic_set(&cred->gc_count, 1); - cred->gc_uid = acred->uid; + rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops); /* * Note: in order to force a call to call_refresh(), we deliberately * fail to flag the credential as RPCAUTH_CRED_UPTODATE. */ - cred->gc_flags = 0; - cred->gc_base.cr_auth = auth; - cred->gc_base.cr_ops = &gss_credops; cred->gc_base.cr_flags = RPCAUTH_CRED_NEW; cred->gc_service = gss_auth->service; return &cred->gc_base; diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 6600c7ad72a9..2f1bdb5c86b3 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -20,9 +20,6 @@ struct unx_cred { gid_t uc_gids[NFS_NGROUPS]; }; #define uc_uid uc_base.cr_uid -#define uc_count uc_base.cr_count -#define uc_flags uc_base.cr_flags -#define uc_expire uc_base.cr_expire #define UNX_CRED_EXPIRE (60 * HZ) @@ -74,8 +71,8 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL))) return ERR_PTR(-ENOMEM); - atomic_set(&cred->uc_count, 1); - cred->uc_flags = RPCAUTH_CRED_UPTODATE; + rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops); + cred->uc_base.cr_flags = RPCAUTH_CRED_UPTODATE; if (flags & RPCAUTH_LOOKUP_ROOTCREDS) { cred->uc_uid = 0; cred->uc_gid = 0; @@ -85,15 +82,12 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) if (groups > NFS_NGROUPS) groups = NFS_NGROUPS; - cred->uc_uid = acred->uid; cred->uc_gid = acred->gid; for (i = 0; i < groups; i++) cred->uc_gids[i] = GROUP_AT(acred->group_info, i); if (i < NFS_NGROUPS) cred->uc_gids[i] = NOGROUP; } - cred->uc_base.cr_auth = &unix_auth; - cred->uc_base.cr_ops = &unix_credops; return (struct rpc_cred *) cred; } -- cgit v1.2.3 From 696e38df9d1b256e97b077ecde7afb8dd60364fd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 25 Jun 2007 09:48:25 -0400 Subject: SUNRPC: replace casts in auth_unix.c with container_of() Signed-off-by: Trond Myklebust --- net/sunrpc/auth_unix.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 2f1bdb5c86b3..f17dabbab1c7 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -89,12 +89,14 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) cred->uc_gids[i] = NOGROUP; } - return (struct rpc_cred *) cred; + return &cred->uc_base; } static void -unx_destroy_cred(struct rpc_cred *cred) +unx_destroy_cred(struct rpc_cred *rcred) { + struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base); + kfree(cred); } @@ -106,7 +108,7 @@ unx_destroy_cred(struct rpc_cred *cred) static int unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) { - struct unx_cred *cred = (struct unx_cred *) rcred; + struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base); int i; if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) { @@ -137,7 +139,7 @@ static __be32 * unx_marshal(struct rpc_task *task, __be32 *p) { struct rpc_clnt *clnt = task->tk_client; - struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred; + struct unx_cred *cred = container_of(task->tk_msg.rpc_cred, struct unx_cred, uc_base); __be32 *base, *hold; int i; -- cgit v1.2.3 From fc432dd90760a629c57026e57f65ff80a1a31d2f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 25 Jun 2007 10:15:15 -0400 Subject: SUNRPC: Enforce atomic updates of rpc_cred->cr_flags Convert to the use of atomic bitops... Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 10 +++++----- net/sunrpc/auth.c | 22 ++++++++++++---------- net/sunrpc/auth_gss/auth_gss.c | 22 +++++++++++----------- net/sunrpc/auth_null.c | 4 ++-- net/sunrpc/auth_unix.c | 4 ++-- 5 files changed, 32 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index d5bfc67461fc..8586503d5ebd 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -36,19 +36,19 @@ struct rpc_cred { struct hlist_node cr_hash; /* hash chain */ struct rpc_auth * cr_auth; const struct rpc_credops *cr_ops; - unsigned long cr_expire; /* when to gc */ - atomic_t cr_count; /* ref count */ - unsigned short cr_flags; /* various flags */ #ifdef RPC_DEBUG unsigned long cr_magic; /* 0x0f4aa4f0 */ #endif + unsigned long cr_expire; /* when to gc */ + unsigned long cr_flags; /* various flags */ + atomic_t cr_count; /* ref count */ uid_t cr_uid; /* per-flavor data */ }; -#define RPCAUTH_CRED_NEW 0x0001 -#define RPCAUTH_CRED_UPTODATE 0x0002 +#define RPCAUTH_CRED_NEW 0 +#define RPCAUTH_CRED_UPTODATE 1 #define RPCAUTH_CRED_MAGIC 0x0f4aa4f0 diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 2156327da45b..4d7c78b05d1e 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -190,8 +190,8 @@ rpcauth_prune_expired(struct rpc_auth *auth, struct rpc_cred *cred, struct hlist if (atomic_read(&cred->cr_count) != 1) return; if (time_after(jiffies, cred->cr_expire + auth->au_credcache->expire)) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; - if (!(cred->cr_flags & RPCAUTH_CRED_UPTODATE)) { + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); + if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) { __hlist_del(&cred->cr_hash); hlist_add_head(&cred->cr_hash, free); } @@ -267,7 +267,7 @@ retry: if (!IS_ERR(new)) goto retry; cred = new; - } else if ((cred->cr_flags & RPCAUTH_CRED_NEW) + } else if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) && cred->cr_ops->cr_init != NULL && !(flags & RPCAUTH_LOOKUP_NEW)) { int res = cred->cr_ops->cr_init(auth, cred); @@ -440,17 +440,19 @@ rpcauth_refreshcred(struct rpc_task *task) void rpcauth_invalcred(struct rpc_task *task) { + struct rpc_cred *cred = task->tk_msg.rpc_cred; + dprintk("RPC: %5u invalidating %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, task->tk_msg.rpc_cred); - spin_lock(&rpc_credcache_lock); - if (task->tk_msg.rpc_cred) - task->tk_msg.rpc_cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; - spin_unlock(&rpc_credcache_lock); + task->tk_pid, task->tk_auth->au_ops->au_name, cred); + if (cred) + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); } int rpcauth_uptodatecred(struct rpc_task *task) { - return !(task->tk_msg.rpc_cred) || - (task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE); + struct rpc_cred *cred = task->tk_msg.rpc_cred; + + return cred == NULL || + test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0; } diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 766de0a41b22..55c47ae0a258 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -114,8 +114,8 @@ gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx) write_lock(&gss_ctx_lock); old = gss_cred->gc_ctx; gss_cred->gc_ctx = ctx; - cred->cr_flags |= RPCAUTH_CRED_UPTODATE; - cred->cr_flags &= ~RPCAUTH_CRED_NEW; + set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); + clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags); write_unlock(&gss_ctx_lock); if (old) gss_put_ctx(old); @@ -128,7 +128,7 @@ gss_cred_is_uptodate_ctx(struct rpc_cred *cred) int res = 0; read_lock(&gss_ctx_lock); - if ((cred->cr_flags & RPCAUTH_CRED_UPTODATE) && gss_cred->gc_ctx) + if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) && gss_cred->gc_ctx) res = 1; read_unlock(&gss_ctx_lock); return res; @@ -732,7 +732,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) * Note: in order to force a call to call_refresh(), we deliberately * fail to flag the credential as RPCAUTH_CRED_UPTODATE. */ - cred->gc_base.cr_flags = RPCAUTH_CRED_NEW; + cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW; cred->gc_service = gss_auth->service; return &cred->gc_base; @@ -764,7 +764,7 @@ gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags) * we don't really care if the credential has expired or not, * since the caller should be prepared to reinitialise it. */ - if ((flags & RPCAUTH_LOOKUP_NEW) && (rc->cr_flags & RPCAUTH_CRED_NEW)) + if ((flags & RPCAUTH_LOOKUP_NEW) && test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags)) goto out; /* Don't match with creds that have expired. */ if (gss_cred->gc_ctx && time_after(jiffies, gss_cred->gc_ctx->gc_expiry)) @@ -820,7 +820,7 @@ gss_marshal(struct rpc_task *task, __be32 *p) mic.data = (u8 *)(p + 1); maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) { - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); } else if (maj_stat != 0) { printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat); goto out_put_ctx; @@ -873,7 +873,7 @@ gss_validate(struct rpc_task *task, __be32 *p) maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat) goto out_bad; /* We leave it to unwrap to calculate au_rslack. For now we just @@ -927,7 +927,7 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic); status = -EIO; /* XXX? */ if (maj_stat == GSS_S_CONTEXT_EXPIRED) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); else if (maj_stat) return status; q = xdr_encode_opaque(p, NULL, mic.len); @@ -1026,7 +1026,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was * done anyway, so it's safe to put the request on the wire: */ if (maj_stat == GSS_S_CONTEXT_EXPIRED) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); else if (maj_stat) return status; @@ -1113,7 +1113,7 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat != GSS_S_COMPLETE) return status; return 0; @@ -1138,7 +1138,7 @@ gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf); if (maj_stat == GSS_S_CONTEXT_EXPIRED) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat != GSS_S_COMPLETE) return status; if (ntohl(*(*p)++) != rqstp->rq_seqno) diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index fe9b6aaf91eb..6c905fb11c5d 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -76,7 +76,7 @@ nul_marshal(struct rpc_task *task, __be32 *p) static int nul_refresh(struct rpc_task *task) { - task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE; + set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags); return 0; } @@ -136,7 +136,7 @@ struct rpc_cred null_cred = { .cr_auth = &null_auth, .cr_ops = &null_credops, .cr_count = ATOMIC_INIT(1), - .cr_flags = RPCAUTH_CRED_UPTODATE, + .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE, #ifdef RPC_DEBUG .cr_magic = RPCAUTH_CRED_MAGIC, #endif diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index f17dabbab1c7..29d50ffa69d6 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -72,7 +72,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) return ERR_PTR(-ENOMEM); rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops); - cred->uc_base.cr_flags = RPCAUTH_CRED_UPTODATE; + cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; if (flags & RPCAUTH_LOOKUP_ROOTCREDS) { cred->uc_uid = 0; cred->uc_gid = 0; @@ -172,7 +172,7 @@ unx_marshal(struct rpc_task *task, __be32 *p) static int unx_refresh(struct rpc_task *task) { - task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE; + set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags); return 0; } -- cgit v1.2.3 From e092bdcd939416ef911090890096fe07d0281a5e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 23 Jun 2007 19:45:36 -0400 Subject: SUNRPC: cleanup rpc credential cache garbage collection Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 1 + net/sunrpc/auth.c | 121 ++++++++++++++++++++++++++------------------ net/sunrpc/auth_null.c | 1 + 3 files changed, 74 insertions(+), 49 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 8586503d5ebd..4e78f0c5f014 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -34,6 +34,7 @@ struct rpc_auth; struct rpc_credops; struct rpc_cred { struct hlist_node cr_hash; /* hash chain */ + struct list_head cr_lru; /* lru garbage collection */ struct rpc_auth * cr_auth; const struct rpc_credops *cr_ops; #ifdef RPC_DEBUG diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 4d7c78b05d1e..00f9649b0901 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -25,6 +25,8 @@ static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = { NULL, /* others can be loadable modules */ }; +static LIST_HEAD(cred_unused); + static u32 pseudoflavor_to_flavor(u32 flavor) { if (flavor >= RPC_AUTH_MAXFLAVOR) @@ -134,13 +136,13 @@ rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire) * Destroy a list of credentials */ static inline -void rpcauth_destroy_credlist(struct hlist_head *head) +void rpcauth_destroy_credlist(struct list_head *head) { struct rpc_cred *cred; - while (!hlist_empty(head)) { - cred = hlist_entry(head->first, struct rpc_cred, cr_hash); - hlist_del_init(&cred->cr_hash); + while (!list_empty(head)) { + cred = list_entry(head->next, struct rpc_cred, cr_lru); + list_del_init(&cred->cr_lru); put_rpccred(cred); } } @@ -152,17 +154,20 @@ void rpcauth_destroy_credlist(struct hlist_head *head) void rpcauth_clear_credcache(struct rpc_cred_cache *cache) { - HLIST_HEAD(free); - struct hlist_node *pos, *next; + LIST_HEAD(free); + struct hlist_head *head; struct rpc_cred *cred; int i; spin_lock(&rpc_credcache_lock); for (i = 0; i < RPC_CREDCACHE_NR; i++) { - hlist_for_each_safe(pos, next, &cache->hashtable[i]) { - cred = hlist_entry(pos, struct rpc_cred, cr_hash); - __hlist_del(&cred->cr_hash); - hlist_add_head(&cred->cr_hash, &free); + head = &cache->hashtable[i]; + while (!hlist_empty(head)) { + cred = hlist_entry(head->first, struct rpc_cred, cr_hash); + get_rpccred(cred); + list_move_tail(&cred->cr_lru, &free); + smp_wmb(); + hlist_del_init(&cred->cr_hash); } } spin_unlock(&rpc_credcache_lock); @@ -184,38 +189,39 @@ rpcauth_destroy_credcache(struct rpc_auth *auth) } } +/* + * Remove stale credentials. Avoid sleeping inside the loop. + */ static void -rpcauth_prune_expired(struct rpc_auth *auth, struct rpc_cred *cred, struct hlist_head *free) +rpcauth_prune_expired(struct list_head *free) { - if (atomic_read(&cred->cr_count) != 1) - return; - if (time_after(jiffies, cred->cr_expire + auth->au_credcache->expire)) - clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); - if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) { - __hlist_del(&cred->cr_hash); - hlist_add_head(&cred->cr_hash, free); + struct rpc_cred *cred; + + while (!list_empty(&cred_unused)) { + cred = list_entry(cred_unused.next, struct rpc_cred, cr_lru); + if (time_after(jiffies, cred->cr_expire + + cred->cr_auth->au_credcache->expire)) + break; + list_del_init(&cred->cr_lru); + if (atomic_read(&cred->cr_count) != 0) + continue; + get_rpccred(cred); + list_add_tail(&cred->cr_lru, free); + smp_wmb(); + hlist_del_init(&cred->cr_hash); } } /* - * Remove stale credentials. Avoid sleeping inside the loop. + * Run garbage collector. */ static void -rpcauth_gc_credcache(struct rpc_auth *auth, struct hlist_head *free) +rpcauth_gc_credcache(struct rpc_cred_cache *cache, struct list_head *free) { - struct rpc_cred_cache *cache = auth->au_credcache; - struct hlist_node *pos, *next; - struct rpc_cred *cred; - int i; - - dprintk("RPC: gc'ing RPC credentials for auth %p\n", auth); - for (i = 0; i < RPC_CREDCACHE_NR; i++) { - hlist_for_each_safe(pos, next, &cache->hashtable[i]) { - cred = hlist_entry(pos, struct rpc_cred, cr_hash); - rpcauth_prune_expired(auth, cred, free); - } - } + if (time_before(jiffies, cache->nextgc)) + return; cache->nextgc = jiffies + cache->expire; + rpcauth_prune_expired(free); } /* @@ -225,39 +231,35 @@ struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, int flags) { + LIST_HEAD(free); struct rpc_cred_cache *cache = auth->au_credcache; - HLIST_HEAD(free); - struct hlist_node *pos, *next; + struct hlist_node *pos; struct rpc_cred *new = NULL, - *cred = NULL; + *cred = NULL, + *entry; int nr = 0; if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) nr = acred->uid & RPC_CREDCACHE_MASK; retry: spin_lock(&rpc_credcache_lock); - if (time_before(cache->nextgc, jiffies)) - rpcauth_gc_credcache(auth, &free); - hlist_for_each_safe(pos, next, &cache->hashtable[nr]) { - struct rpc_cred *entry; - entry = hlist_entry(pos, struct rpc_cred, cr_hash); - if (entry->cr_ops->crmatch(acred, entry, flags)) { - hlist_del(&entry->cr_hash); - cred = entry; - break; - } - rpcauth_prune_expired(auth, entry, &free); + hlist_for_each_entry(entry, pos, &cache->hashtable[nr], cr_hash) { + if (!entry->cr_ops->crmatch(acred, entry, flags)) + continue; + cred = get_rpccred(entry); + hlist_del(&entry->cr_hash); + break; } if (new) { if (cred) - hlist_add_head(&new->cr_hash, &free); + list_add_tail(&new->cr_lru, &free); else cred = new; } if (cred) { hlist_add_head(&cred->cr_hash, &cache->hashtable[nr]); - get_rpccred(cred); } + rpcauth_gc_credcache(cache, &free); spin_unlock(&rpc_credcache_lock); rpcauth_destroy_credlist(&free); @@ -303,6 +305,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, struct rpc_auth *auth, const struct rpc_credops *ops) { INIT_HLIST_NODE(&cred->cr_hash); + INIT_LIST_HEAD(&cred->cr_lru); atomic_set(&cred->cr_count, 1); cred->cr_auth = auth; cred->cr_ops = ops; @@ -353,9 +356,29 @@ rpcauth_holdcred(struct rpc_task *task) void put_rpccred(struct rpc_cred *cred) { - cred->cr_expire = jiffies; + /* Fast path for unhashed credentials */ + if (!hlist_unhashed(&cred->cr_hash)) + goto need_lock; + if (!atomic_dec_and_test(&cred->cr_count)) return; + goto out_destroy; + +need_lock: + if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock)) + return; + if (!list_empty(&cred->cr_lru)) + list_del_init(&cred->cr_lru); + if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) + hlist_del(&cred->cr_hash); + else if (!hlist_unhashed(&cred->cr_hash)) { + cred->cr_expire = jiffies; + list_add_tail(&cred->cr_lru, &cred_unused); + spin_unlock(&rpc_credcache_lock); + return; + } + spin_unlock(&rpc_credcache_lock); +out_destroy: cred->cr_ops->crdestroy(cred); } diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 6c905fb11c5d..537d0e8589dd 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -133,6 +133,7 @@ const struct rpc_credops null_credops = { static struct rpc_cred null_cred = { + .cr_lru = LIST_HEAD_INIT(null_cred.cr_lru), .cr_auth = &null_auth, .cr_ops = &null_credops, .cr_count = ATOMIC_INIT(1), -- cgit v1.2.3 From 31be5bf15f3dafffce110eb1afadccbf2e3067b4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 24 Jun 2007 15:55:26 -0400 Subject: SUNRPC: Convert the credcache lookup code to use RCU Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 3 ++ net/sunrpc/auth.c | 91 ++++++++++++++++++++++++++---------------- net/sunrpc/auth_gss/auth_gss.c | 22 +++++++--- net/sunrpc/auth_unix.c | 18 +++++++-- 4 files changed, 91 insertions(+), 43 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 4e78f0c5f014..5974e8a493c4 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -16,6 +16,7 @@ #include #include +#include /* size of the nodename buffer */ #define UNX_MAXNODENAME 32 @@ -35,6 +36,7 @@ struct rpc_credops; struct rpc_cred { struct hlist_node cr_hash; /* hash chain */ struct list_head cr_lru; /* lru garbage collection */ + struct rcu_head cr_rcu; struct rpc_auth * cr_auth; const struct rpc_credops *cr_ops; #ifdef RPC_DEBUG @@ -50,6 +52,7 @@ struct rpc_cred { }; #define RPCAUTH_CRED_NEW 0 #define RPCAUTH_CRED_UPTODATE 1 +#define RPCAUTH_CRED_HASHED 2 #define RPCAUTH_CRED_MAGIC 0x0f4aa4f0 diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 00f9649b0901..ad7bde2c437e 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -112,6 +112,14 @@ rpcauth_release(struct rpc_auth *auth) static DEFINE_SPINLOCK(rpc_credcache_lock); +static void +rpcauth_unhash_cred_locked(struct rpc_cred *cred) +{ + hlist_del_rcu(&cred->cr_hash); + smp_mb__before_clear_bit(); + clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags); +} + /* * Initialize RPC credential cache */ @@ -166,8 +174,7 @@ rpcauth_clear_credcache(struct rpc_cred_cache *cache) cred = hlist_entry(head->first, struct rpc_cred, cr_hash); get_rpccred(cred); list_move_tail(&cred->cr_lru, &free); - smp_wmb(); - hlist_del_init(&cred->cr_hash); + rpcauth_unhash_cred_locked(cred); } } spin_unlock(&rpc_credcache_lock); @@ -207,8 +214,7 @@ rpcauth_prune_expired(struct list_head *free) continue; get_rpccred(cred); list_add_tail(&cred->cr_lru, free); - smp_wmb(); - hlist_del_init(&cred->cr_hash); + rpcauth_unhash_cred_locked(cred); } } @@ -218,10 +224,12 @@ rpcauth_prune_expired(struct list_head *free) static void rpcauth_gc_credcache(struct rpc_cred_cache *cache, struct list_head *free) { - if (time_before(jiffies, cache->nextgc)) + if (list_empty(&cred_unused) || time_before(jiffies, cache->nextgc)) return; + spin_lock(&rpc_credcache_lock); cache->nextgc = jiffies + cache->expire; rpcauth_prune_expired(free); + spin_unlock(&rpc_credcache_lock); } /* @@ -234,42 +242,57 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, LIST_HEAD(free); struct rpc_cred_cache *cache = auth->au_credcache; struct hlist_node *pos; - struct rpc_cred *new = NULL, - *cred = NULL, - *entry; + struct rpc_cred *cred = NULL, + *entry, *new; int nr = 0; if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) nr = acred->uid & RPC_CREDCACHE_MASK; -retry: - spin_lock(&rpc_credcache_lock); - hlist_for_each_entry(entry, pos, &cache->hashtable[nr], cr_hash) { + + rcu_read_lock(); + hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) { if (!entry->cr_ops->crmatch(acred, entry, flags)) continue; + spin_lock(&rpc_credcache_lock); + if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) { + spin_unlock(&rpc_credcache_lock); + continue; + } cred = get_rpccred(entry); - hlist_del(&entry->cr_hash); + spin_unlock(&rpc_credcache_lock); break; } - if (new) { - if (cred) - list_add_tail(&new->cr_lru, &free); - else - cred = new; - } - if (cred) { - hlist_add_head(&cred->cr_hash, &cache->hashtable[nr]); + rcu_read_unlock(); + + if (cred != NULL) { + rpcauth_gc_credcache(cache, &free); + goto found; } - rpcauth_gc_credcache(cache, &free); - spin_unlock(&rpc_credcache_lock); - rpcauth_destroy_credlist(&free); + new = auth->au_ops->crcreate(auth, acred, flags); + if (IS_ERR(new)) { + cred = new; + goto out; + } - if (!cred) { - new = auth->au_ops->crcreate(auth, acred, flags); - if (!IS_ERR(new)) - goto retry; + spin_lock(&rpc_credcache_lock); + hlist_for_each_entry(entry, pos, &cache->hashtable[nr], cr_hash) { + if (!entry->cr_ops->crmatch(acred, entry, flags)) + continue; + cred = get_rpccred(entry); + break; + } + if (cred == NULL) { cred = new; - } else if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) + set_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags); + hlist_add_head_rcu(&cred->cr_hash, &cache->hashtable[nr]); + } else + list_add_tail(&new->cr_lru, &free); + rpcauth_prune_expired(&free); + cache->nextgc = jiffies + cache->expire; + spin_unlock(&rpc_credcache_lock); +found: + if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) && cred->cr_ops->cr_init != NULL && !(flags & RPCAUTH_LOOKUP_NEW)) { int res = cred->cr_ops->cr_init(auth, cred); @@ -278,8 +301,9 @@ retry: cred = ERR_PTR(res); } } - - return (struct rpc_cred *) cred; + rpcauth_destroy_credlist(&free); +out: + return cred; } struct rpc_cred * @@ -357,21 +381,20 @@ void put_rpccred(struct rpc_cred *cred) { /* Fast path for unhashed credentials */ - if (!hlist_unhashed(&cred->cr_hash)) + if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) goto need_lock; if (!atomic_dec_and_test(&cred->cr_count)) return; goto out_destroy; - need_lock: if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock)) return; if (!list_empty(&cred->cr_lru)) list_del_init(&cred->cr_lru); if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) - hlist_del(&cred->cr_hash); - else if (!hlist_unhashed(&cred->cr_hash)) { + rpcauth_unhash_cred_locked(cred); + else if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) { cred->cr_expire = jiffies; list_add_tail(&cred->cr_lru, &cred_unused); spin_unlock(&rpc_credcache_lock); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 55c47ae0a258..068fa6dfb64e 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -694,15 +694,25 @@ gss_destroy_ctx(struct gss_cl_ctx *ctx) } static void -gss_destroy_cred(struct rpc_cred *rc) +gss_free_cred(struct gss_cred *gss_cred) { - struct gss_cred *cred = container_of(rc, struct gss_cred, gc_base); + dprintk("RPC: gss_free_cred %p\n", gss_cred); + if (gss_cred->gc_ctx) + gss_put_ctx(gss_cred->gc_ctx); + kfree(gss_cred); +} - dprintk("RPC: gss_destroy_cred \n"); +static void +gss_free_cred_callback(struct rcu_head *head) +{ + struct gss_cred *gss_cred = container_of(head, struct gss_cred, gc_base.cr_rcu); + gss_free_cred(gss_cred); +} - if (cred->gc_ctx) - gss_put_ctx(cred->gc_ctx); - kfree(cred); +static void +gss_destroy_cred(struct rpc_cred *cred) +{ + call_rcu(&cred->cr_rcu, gss_free_cred_callback); } /* diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 29d50ffa69d6..f7ff6ad3259e 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -93,11 +93,23 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) } static void -unx_destroy_cred(struct rpc_cred *rcred) +unx_free_cred(struct unx_cred *unx_cred) { - struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base); + dprintk("RPC: unx_free_cred %p\n", unx_cred); + kfree(unx_cred); +} + +static void +unx_free_cred_callback(struct rcu_head *head) +{ + struct unx_cred *unx_cred = container_of(head, struct unx_cred, uc_base.cr_rcu); + unx_free_cred(unx_cred); +} - kfree(cred); +static void +unx_destroy_cred(struct rpc_cred *cred) +{ + call_rcu(&cred->cr_rcu, unx_free_cred_callback); } /* -- cgit v1.2.3 From 9499b4341b56935f61af9e7e354e7d11e70f5258 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 24 Jun 2007 15:57:57 -0400 Subject: SUNRPC: Give credential cache a local spinlock Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 3 +++ net/sunrpc/auth.c | 46 +++++++++++++++++++++++++++++++-------------- net/sunrpc/auth_unix.c | 5 +++++ net/sunrpc/sunrpc_syms.c | 1 + 4 files changed, 41 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 5974e8a493c4..e5a3b5141ed2 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -63,6 +63,7 @@ struct rpc_cred { #define RPC_CREDCACHE_MASK (RPC_CREDCACHE_NR - 1) struct rpc_cred_cache { struct hlist_head hashtable[RPC_CREDCACHE_NR]; + spinlock_t lock; unsigned long nextgc; /* next garbage collection */ unsigned long expire; /* cache expiry interval */ }; @@ -126,6 +127,8 @@ struct rpc_credops { extern const struct rpc_authops authunix_ops; extern const struct rpc_authops authnull_ops; +void __init rpc_init_authunix(void); + int rpcauth_register(const struct rpc_authops *); int rpcauth_unregister(const struct rpc_authops *); struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index ad7bde2c437e..cf1198d10ee9 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -120,6 +120,18 @@ rpcauth_unhash_cred_locked(struct rpc_cred *cred) clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags); } +static void +rpcauth_unhash_cred(struct rpc_cred *cred) +{ + spinlock_t *cache_lock; + + cache_lock = &cred->cr_auth->au_credcache->lock; + spin_lock(cache_lock); + if (atomic_read(&cred->cr_count) == 0) + rpcauth_unhash_cred_locked(cred); + spin_unlock(cache_lock); +} + /* * Initialize RPC credential cache */ @@ -134,6 +146,7 @@ rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire) return -ENOMEM; for (i = 0; i < RPC_CREDCACHE_NR; i++) INIT_HLIST_HEAD(&new->hashtable[i]); + spin_lock_init(&new->lock); new->expire = expire; new->nextgc = jiffies + (expire >> 1); auth->au_credcache = new; @@ -168,6 +181,7 @@ rpcauth_clear_credcache(struct rpc_cred_cache *cache) int i; spin_lock(&rpc_credcache_lock); + spin_lock(&cache->lock); for (i = 0; i < RPC_CREDCACHE_NR; i++) { head = &cache->hashtable[i]; while (!hlist_empty(head)) { @@ -177,6 +191,7 @@ rpcauth_clear_credcache(struct rpc_cred_cache *cache) rpcauth_unhash_cred_locked(cred); } } + spin_unlock(&cache->lock); spin_unlock(&rpc_credcache_lock); rpcauth_destroy_credlist(&free); } @@ -202,6 +217,7 @@ rpcauth_destroy_credcache(struct rpc_auth *auth) static void rpcauth_prune_expired(struct list_head *free) { + spinlock_t *cache_lock; struct rpc_cred *cred; while (!list_empty(&cred_unused)) { @@ -212,9 +228,14 @@ rpcauth_prune_expired(struct list_head *free) list_del_init(&cred->cr_lru); if (atomic_read(&cred->cr_count) != 0) continue; - get_rpccred(cred); - list_add_tail(&cred->cr_lru, free); - rpcauth_unhash_cred_locked(cred); + cache_lock = &cred->cr_auth->au_credcache->lock; + spin_lock(cache_lock); + if (atomic_read(&cred->cr_count) == 0) { + get_rpccred(cred); + list_add_tail(&cred->cr_lru, free); + rpcauth_unhash_cred_locked(cred); + } + spin_unlock(cache_lock); } } @@ -253,21 +274,19 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) { if (!entry->cr_ops->crmatch(acred, entry, flags)) continue; - spin_lock(&rpc_credcache_lock); + spin_lock(&cache->lock); if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) { - spin_unlock(&rpc_credcache_lock); + spin_unlock(&cache->lock); continue; } cred = get_rpccred(entry); - spin_unlock(&rpc_credcache_lock); + spin_unlock(&cache->lock); break; } rcu_read_unlock(); - if (cred != NULL) { - rpcauth_gc_credcache(cache, &free); + if (cred != NULL) goto found; - } new = auth->au_ops->crcreate(auth, acred, flags); if (IS_ERR(new)) { @@ -275,7 +294,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, goto out; } - spin_lock(&rpc_credcache_lock); + spin_lock(&cache->lock); hlist_for_each_entry(entry, pos, &cache->hashtable[nr], cr_hash) { if (!entry->cr_ops->crmatch(acred, entry, flags)) continue; @@ -288,9 +307,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, hlist_add_head_rcu(&cred->cr_hash, &cache->hashtable[nr]); } else list_add_tail(&new->cr_lru, &free); - rpcauth_prune_expired(&free); - cache->nextgc = jiffies + cache->expire; - spin_unlock(&rpc_credcache_lock); + spin_unlock(&cache->lock); found: if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) && cred->cr_ops->cr_init != NULL @@ -301,6 +318,7 @@ found: cred = ERR_PTR(res); } } + rpcauth_gc_credcache(cache, &free); rpcauth_destroy_credlist(&free); out: return cred; @@ -393,7 +411,7 @@ need_lock: if (!list_empty(&cred->cr_lru)) list_del_init(&cred->cr_lru); if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) - rpcauth_unhash_cred_locked(cred); + rpcauth_unhash_cred(cred); else if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) { cred->cr_expire = jiffies; list_add_tail(&cred->cr_lru, &cred_unused); diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index f7ff6ad3259e..205878a3caa5 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -213,6 +213,11 @@ unx_validate(struct rpc_task *task, __be32 *p) return p; } +void __init rpc_init_authunix(void) +{ + spin_lock_init(&unix_cred_cache.lock); +} + const struct rpc_authops authunix_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_UNIX, diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 3e19e7af6799..018065fca84b 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -152,6 +152,7 @@ init_sunrpc(void) cache_register(&ip_map_cache); cache_register(&unix_gid_cache); init_socket_xprt(); + rpc_init_authunix(); out: return err; } -- cgit v1.2.3 From f5c2187cfef628784d8a09b6d0f77888246d0c0f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 25 Jun 2007 17:11:20 -0400 Subject: SUNRPC: Convert the credential garbage collector into a shrinker callback Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 6 ++-- net/sunrpc/auth.c | 63 ++++++++++++++++++++++++++++++------------ net/sunrpc/auth_gss/auth_gss.c | 3 +- net/sunrpc/auth_unix.c | 6 +--- net/sunrpc/sunrpc_syms.c | 3 +- 5 files changed, 52 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index e5a3b5141ed2..7a69ca3bebaf 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -64,8 +64,6 @@ struct rpc_cred { struct rpc_cred_cache { struct hlist_head hashtable[RPC_CREDCACHE_NR]; spinlock_t lock; - unsigned long nextgc; /* next garbage collection */ - unsigned long expire; /* cache expiry interval */ }; struct rpc_authops; @@ -128,6 +126,8 @@ extern const struct rpc_authops authunix_ops; extern const struct rpc_authops authnull_ops; void __init rpc_init_authunix(void); +void __init rpcauth_init_module(void); +void __exit rpcauth_remove_module(void); int rpcauth_register(const struct rpc_authops *); int rpcauth_unregister(const struct rpc_authops *); @@ -147,7 +147,7 @@ int rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, int rpcauth_refreshcred(struct rpc_task *); void rpcauth_invalcred(struct rpc_task *); int rpcauth_uptodatecred(struct rpc_task *); -int rpcauth_init_credcache(struct rpc_auth *, unsigned long); +int rpcauth_init_credcache(struct rpc_auth *); void rpcauth_destroy_credcache(struct rpc_auth *); void rpcauth_clear_credcache(struct rpc_cred_cache *); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index cf1198d10ee9..81f4c776c558 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -26,6 +26,7 @@ static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = { }; static LIST_HEAD(cred_unused); +static unsigned long number_cred_unused; static u32 pseudoflavor_to_flavor(u32 flavor) { @@ -136,7 +137,7 @@ rpcauth_unhash_cred(struct rpc_cred *cred) * Initialize RPC credential cache */ int -rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire) +rpcauth_init_credcache(struct rpc_auth *auth) { struct rpc_cred_cache *new; int i; @@ -147,8 +148,6 @@ rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire) for (i = 0; i < RPC_CREDCACHE_NR; i++) INIT_HLIST_HEAD(&new->hashtable[i]); spin_lock_init(&new->lock); - new->expire = expire; - new->nextgc = jiffies + (expire >> 1); auth->au_credcache = new; return 0; } @@ -187,7 +186,11 @@ rpcauth_clear_credcache(struct rpc_cred_cache *cache) while (!hlist_empty(head)) { cred = hlist_entry(head->first, struct rpc_cred, cr_hash); get_rpccred(cred); - list_move_tail(&cred->cr_lru, &free); + if (!list_empty(&cred->cr_lru)) { + list_del(&cred->cr_lru); + number_cred_unused--; + } + list_add_tail(&cred->cr_lru, &free); rpcauth_unhash_cred_locked(cred); } } @@ -214,18 +217,16 @@ rpcauth_destroy_credcache(struct rpc_auth *auth) /* * Remove stale credentials. Avoid sleeping inside the loop. */ -static void -rpcauth_prune_expired(struct list_head *free) +static int +rpcauth_prune_expired(struct list_head *free, int nr_to_scan) { spinlock_t *cache_lock; struct rpc_cred *cred; while (!list_empty(&cred_unused)) { cred = list_entry(cred_unused.next, struct rpc_cred, cr_lru); - if (time_after(jiffies, cred->cr_expire + - cred->cr_auth->au_credcache->expire)) - break; list_del_init(&cred->cr_lru); + number_cred_unused--; if (atomic_read(&cred->cr_count) != 0) continue; cache_lock = &cred->cr_auth->au_credcache->lock; @@ -234,23 +235,32 @@ rpcauth_prune_expired(struct list_head *free) get_rpccred(cred); list_add_tail(&cred->cr_lru, free); rpcauth_unhash_cred_locked(cred); + nr_to_scan--; } spin_unlock(cache_lock); + if (nr_to_scan == 0) + break; } + return nr_to_scan; } /* - * Run garbage collector. + * Run memory cache shrinker. */ -static void -rpcauth_gc_credcache(struct rpc_cred_cache *cache, struct list_head *free) +static int +rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) { - if (list_empty(&cred_unused) || time_before(jiffies, cache->nextgc)) - return; + LIST_HEAD(free); + int res; + + if (list_empty(&cred_unused)) + return 0; spin_lock(&rpc_credcache_lock); - cache->nextgc = jiffies + cache->expire; - rpcauth_prune_expired(free); + nr_to_scan = rpcauth_prune_expired(&free, nr_to_scan); + res = (number_cred_unused / 100) * sysctl_vfs_cache_pressure; spin_unlock(&rpc_credcache_lock); + rpcauth_destroy_credlist(&free); + return res; } /* @@ -318,7 +328,6 @@ found: cred = ERR_PTR(res); } } - rpcauth_gc_credcache(cache, &free); rpcauth_destroy_credlist(&free); out: return cred; @@ -408,13 +417,16 @@ put_rpccred(struct rpc_cred *cred) need_lock: if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock)) return; - if (!list_empty(&cred->cr_lru)) + if (!list_empty(&cred->cr_lru)) { + number_cred_unused--; list_del_init(&cred->cr_lru); + } if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) rpcauth_unhash_cred(cred); else if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) { cred->cr_expire = jiffies; list_add_tail(&cred->cr_lru, &cred_unused); + number_cred_unused++; spin_unlock(&rpc_credcache_lock); return; } @@ -520,3 +532,18 @@ rpcauth_uptodatecred(struct rpc_task *task) return cred == NULL || test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0; } + + +static struct shrinker *rpc_cred_shrinker; + +void __init rpcauth_init_module(void) +{ + rpc_init_authunix(); + rpc_cred_shrinker = set_shrinker(DEFAULT_SEEKS, rpcauth_cache_shrinker); +} + +void __exit rpcauth_remove_module(void) +{ + if (rpc_cred_shrinker != NULL) + remove_shrinker(rpc_cred_shrinker); +} diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 068fa6dfb64e..8653a92144ae 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -64,7 +64,6 @@ static const struct rpc_credops gss_credops; #define NFS_NGROUPS 16 -#define GSS_CRED_EXPIRE (60 * HZ) /* XXX: reasonable? */ #define GSS_CRED_SLACK 1024 /* XXX: unused */ /* length of a krb5 verifier (48), plus data added before arguments when * using integrity (two 4-byte integers): */ @@ -643,7 +642,7 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) goto err_put_mech; } - err = rpcauth_init_credcache(auth, GSS_CRED_EXPIRE); + err = rpcauth_init_credcache(auth); if (err) goto err_unlink_pipe; diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 205878a3caa5..d9c50d810d15 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -21,8 +21,6 @@ struct unx_cred { }; #define uc_uid uc_base.cr_uid -#define UNX_CRED_EXPIRE (60 * HZ) - #define UNX_WRITESLACK (21 + (UNX_MAXNODENAME >> 2)) #ifdef RPC_DEBUG @@ -38,8 +36,7 @@ unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) { dprintk("RPC: creating UNIX authenticator for client %p\n", clnt); - if (atomic_inc_return(&unix_auth.au_count) == 1) - unix_cred_cache.nextgc = jiffies + (unix_cred_cache.expire >> 1); + atomic_inc(&unix_auth.au_count); return &unix_auth; } @@ -232,7 +229,6 @@ const struct rpc_authops authunix_ops = { static struct rpc_cred_cache unix_cred_cache = { - .expire = UNX_CRED_EXPIRE, }; static diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 018065fca84b..384c4ad5ab86 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -152,7 +152,7 @@ init_sunrpc(void) cache_register(&ip_map_cache); cache_register(&unix_gid_cache); init_socket_xprt(); - rpc_init_authunix(); + rpcauth_init_module(); out: return err; } @@ -160,6 +160,7 @@ out: static void __exit cleanup_sunrpc(void) { + rpcauth_remove_module(); cleanup_socket_xprt(); unregister_rpc_pipefs(); rpc_destroy_mempool(); -- cgit v1.2.3 From 5d28dc82074f1e64b22c9424b161abc1f5d6bcdb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 Jun 2007 19:18:38 -0400 Subject: SUNRPC: Convert gss_ctx_lock to an RCU lock Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth_gss.h | 1 + net/sunrpc/auth_gss/auth_gss.c | 53 ++++++++++++++++++++++++++++------------- 2 files changed, 37 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index 0bd1d06777b9..67658e17a375 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -75,6 +75,7 @@ struct gss_cl_ctx { struct xdr_netobj gc_wire_ctx; u32 gc_win; unsigned long gc_expiry; + struct rcu_head gc_rcu; }; struct gss_upcall_msg; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 8653a92144ae..15da6f82db36 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -78,8 +78,6 @@ static const struct rpc_credops gss_credops; /* dump the buffer in `emacs-hexl' style */ #define isprint(c) ((c > 0x1f) && (c < 0x7f)) -static DEFINE_RWLOCK(gss_ctx_lock); - struct gss_auth { struct rpc_auth rpc_auth; struct gss_api_mech *mech; @@ -88,7 +86,7 @@ struct gss_auth { struct dentry *dentry; }; -static void gss_destroy_ctx(struct gss_cl_ctx *); +static void gss_free_ctx(struct gss_cl_ctx *); static struct rpc_pipe_ops gss_upcall_ops; static inline struct gss_cl_ctx * @@ -102,20 +100,24 @@ static inline void gss_put_ctx(struct gss_cl_ctx *ctx) { if (atomic_dec_and_test(&ctx->count)) - gss_destroy_ctx(ctx); + gss_free_ctx(ctx); } +/* gss_cred_set_ctx: + * called by gss_upcall_callback and gss_create_upcall in order + * to set the gss context. The actual exchange of an old context + * and a new one is protected by the inode->i_lock. + */ static void gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *old; - write_lock(&gss_ctx_lock); + old = gss_cred->gc_ctx; - gss_cred->gc_ctx = ctx; + rcu_assign_pointer(gss_cred->gc_ctx, ctx); set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags); - write_unlock(&gss_ctx_lock); if (old) gss_put_ctx(old); } @@ -126,10 +128,10 @@ gss_cred_is_uptodate_ctx(struct rpc_cred *cred) struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); int res = 0; - read_lock(&gss_ctx_lock); + rcu_read_lock(); if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) && gss_cred->gc_ctx) res = 1; - read_unlock(&gss_ctx_lock); + rcu_read_unlock(); return res; } @@ -168,10 +170,10 @@ gss_cred_get_ctx(struct rpc_cred *cred) struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = NULL; - read_lock(&gss_ctx_lock); + rcu_read_lock(); if (gss_cred->gc_ctx) ctx = gss_get_ctx(gss_cred->gc_ctx); - read_unlock(&gss_ctx_lock); + rcu_read_unlock(); return ctx; } @@ -333,11 +335,11 @@ gss_upcall_callback(struct rpc_task *task) struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall; struct inode *inode = gss_msg->auth->dentry->d_inode; + spin_lock(&inode->i_lock); if (gss_msg->ctx) gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_get_ctx(gss_msg->ctx)); else task->tk_status = gss_msg->msg.errno; - spin_lock(&inode->i_lock); gss_cred->gc_upcall = NULL; rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); spin_unlock(&inode->i_lock); @@ -440,7 +442,6 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_INTERRUPTIBLE); spin_lock(&inode->i_lock); if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) { - spin_unlock(&inode->i_lock); break; } spin_unlock(&inode->i_lock); @@ -454,6 +455,7 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) gss_cred_set_ctx(cred, gss_get_ctx(gss_msg->ctx)); else err = gss_msg->msg.errno; + spin_unlock(&inode->i_lock); out_intr: finish_wait(&gss_msg->waitqueue, &wait); gss_release_msg(gss_msg); @@ -681,9 +683,9 @@ gss_destroy(struct rpc_auth *auth) * to create a new cred or context, so they check that things have been * allocated before freeing them. */ static void -gss_destroy_ctx(struct gss_cl_ctx *ctx) +gss_do_free_ctx(struct gss_cl_ctx *ctx) { - dprintk("RPC: gss_destroy_ctx\n"); + dprintk("RPC: gss_free_ctx\n"); if (ctx->gc_gss_ctx) gss_delete_sec_context(&ctx->gc_gss_ctx); @@ -692,12 +694,23 @@ gss_destroy_ctx(struct gss_cl_ctx *ctx) kfree(ctx); } +static void +gss_free_ctx_callback(struct rcu_head *head) +{ + struct gss_cl_ctx *ctx = container_of(head, struct gss_cl_ctx, gc_rcu); + gss_do_free_ctx(ctx); +} + +static void +gss_free_ctx(struct gss_cl_ctx *ctx) +{ + call_rcu(&ctx->gc_rcu, gss_free_ctx_callback); +} + static void gss_free_cred(struct gss_cred *gss_cred) { dprintk("RPC: gss_free_cred %p\n", gss_cred); - if (gss_cred->gc_ctx) - gss_put_ctx(gss_cred->gc_ctx); kfree(gss_cred); } @@ -711,7 +724,13 @@ gss_free_cred_callback(struct rcu_head *head) static void gss_destroy_cred(struct rpc_cred *cred) { + struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); + struct gss_cl_ctx *ctx = gss_cred->gc_ctx; + + rcu_assign_pointer(gss_cred->gc_ctx, NULL); call_rcu(&cred->cr_rcu, gss_free_cred_callback); + if (ctx) + gss_put_ctx(ctx); } /* -- cgit v1.2.3 From 1dd17ec693bf4a08b666c2ef76b68ca08ce3c93d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 Jun 2007 16:57:41 -0400 Subject: SUNRPC: Allow rpc_auth to run clean up before the rpc_client is destroyed RPCSEC_GSS needs to be able to send NULL RPC calls to the server in order to free up any remaining GSS contexts. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 98df44e453fe..28a789419f64 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -352,10 +352,6 @@ rpc_free_client(struct kref *kref) dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); - if (clnt->cl_auth) { - rpcauth_release(clnt->cl_auth); - clnt->cl_auth = NULL; - } if (!IS_ERR(clnt->cl_dentry)) { rpc_rmdir(clnt->cl_dentry); rpc_put_mount(); @@ -375,6 +371,30 @@ out_free: kfree(clnt); } +/* + * Free an RPC client + */ +static void +rpc_free_auth(struct kref *kref) +{ + struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref); + + if (clnt->cl_auth == NULL) { + rpc_free_client(kref); + return; + } + + /* + * Note: RPCSEC_GSS may need to send NULL RPC calls in order to + * release remaining GSS contexts. This mechanism ensures + * that it can do so safely. + */ + kref_init(kref); + rpcauth_release(clnt->cl_auth); + clnt->cl_auth = NULL; + kref_put(kref, rpc_free_client); +} + /* * Release reference to the RPC client */ @@ -385,7 +405,7 @@ rpc_release_client(struct rpc_clnt *clnt) if (list_empty(&clnt->cl_tasks)) wake_up(&destroy_wait); - kref_put(&clnt->cl_kref, rpc_free_client); + kref_put(&clnt->cl_kref, rpc_free_auth); } /** -- cgit v1.2.3 From 1be27f36601973815171db684c711d30557cf50c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 27 Jun 2007 14:29:04 -0400 Subject: SUNRPC: Remove the tk_auth macro... We should almost always be deferencing the rpc_auth struct by means of the credential's cr_auth field instead of the rpc_clnt->cl_auth anyway. Fix up that historical mistake, and remove the macro that propagated it. Signed-off-by: Trond Myklebust --- fs/nfs/nfs2xdr.c | 6 +++--- fs/nfs/nfs3xdr.c | 8 ++++---- fs/nfs/nfs4xdr.c | 10 +++++----- include/linux/sunrpc/sched.h | 1 - net/sunrpc/auth.c | 25 +++++++++++++------------ net/sunrpc/auth_gss/auth_gss.c | 4 ++-- net/sunrpc/auth_unix.c | 2 +- net/sunrpc/clnt.c | 2 +- 8 files changed, 29 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index cd3ca7b5d3db..7fcc78f2aa71 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -223,7 +223,7 @@ nfs_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs_diropargs *args) static int nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; u32 offset = (u32)args->offset; u32 count = args->count; @@ -380,7 +380,7 @@ static int nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args) { struct rpc_task *task = req->rq_task; - struct rpc_auth *auth = task->tk_auth; + struct rpc_auth *auth = task->tk_msg.rpc_cred->cr_auth; unsigned int replen; u32 count = args->count; @@ -541,7 +541,7 @@ nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res) static int nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; p = xdr_encode_fhandle(p, args->fh); diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index b51df8eb9f01..b4647a22f349 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -319,7 +319,7 @@ nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *arg static int nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; u32 count = args->count; @@ -458,7 +458,7 @@ nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args) static int nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; u32 count = args->count; @@ -643,7 +643,7 @@ static int nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p, struct nfs3_getaclargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; p = xdr_encode_fhandle(p, args->fh); @@ -773,7 +773,7 @@ nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res) static int nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; p = xdr_encode_fhandle(p, args->fh); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1fcca516e6ee..859b13633258 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1071,7 +1071,7 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args) static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; uint32_t attrs[2] = { FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, FATTR4_WORD1_MOUNTED_ON_FILEID, @@ -1117,7 +1117,7 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; __be32 *p; @@ -1735,7 +1735,7 @@ out: */ static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; struct xdr_stream xdr; struct compound_hdr hdr = { .nops = 2, @@ -1795,7 +1795,7 @@ nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p, struct nfs_getaclargs *args) { struct xdr_stream xdr; - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; struct compound_hdr hdr = { .nops = 2, }; @@ -2030,7 +2030,7 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs struct compound_hdr hdr = { .nops = 3, }; - struct rpc_auth *auth = req->rq_task->tk_auth; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; int replen; int status; diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 3387b008cdfc..8ea077db0099 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -98,7 +98,6 @@ struct rpc_task { unsigned short tk_pid; /* debugging aid */ #endif }; -#define tk_auth tk_client->cl_auth #define tk_xprt tk_client->cl_xprt /* support walking a list of tasks on a wait queue */ diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 81f4c776c558..74baf87ccff9 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -371,7 +371,7 @@ EXPORT_SYMBOL(rpcauth_init_cred); struct rpc_cred * rpcauth_bindcred(struct rpc_task *task) { - struct rpc_auth *auth = task->tk_auth; + struct rpc_auth *auth = task->tk_client->cl_auth; struct auth_cred acred = { .uid = current->fsuid, .gid = current->fsgid, @@ -381,7 +381,7 @@ rpcauth_bindcred(struct rpc_task *task) int flags = 0; dprintk("RPC: %5u looking up %s cred\n", - task->tk_pid, task->tk_auth->au_ops->au_name); + task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); get_group_info(acred.group_info); if (task->tk_flags & RPC_TASK_ROOTCREDS) flags |= RPCAUTH_LOOKUP_ROOTCREDS; @@ -397,11 +397,12 @@ rpcauth_bindcred(struct rpc_task *task) void rpcauth_holdcred(struct rpc_task *task) { - dprintk("RPC: %5u holding %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, - task->tk_msg.rpc_cred); - if (task->tk_msg.rpc_cred) - get_rpccred(task->tk_msg.rpc_cred); + struct rpc_cred *cred = task->tk_msg.rpc_cred; + if (cred != NULL) { + get_rpccred(cred); + dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid, + cred->cr_auth->au_ops->au_name, cred); + } } void @@ -441,7 +442,7 @@ rpcauth_unbindcred(struct rpc_task *task) struct rpc_cred *cred = task->tk_msg.rpc_cred; dprintk("RPC: %5u releasing %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, cred); + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); put_rpccred(cred); task->tk_msg.rpc_cred = NULL; @@ -453,7 +454,7 @@ rpcauth_marshcred(struct rpc_task *task, __be32 *p) struct rpc_cred *cred = task->tk_msg.rpc_cred; dprintk("RPC: %5u marshaling %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, cred); + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); return cred->cr_ops->crmarshal(task, p); } @@ -464,7 +465,7 @@ rpcauth_checkverf(struct rpc_task *task, __be32 *p) struct rpc_cred *cred = task->tk_msg.rpc_cred; dprintk("RPC: %5u validating %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, cred); + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); return cred->cr_ops->crvalidate(task, p); } @@ -505,7 +506,7 @@ rpcauth_refreshcred(struct rpc_task *task) int err; dprintk("RPC: %5u refreshing %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, cred); + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); err = cred->cr_ops->crrefresh(task); if (err < 0) @@ -519,7 +520,7 @@ rpcauth_invalcred(struct rpc_task *task) struct rpc_cred *cred = task->tk_msg.rpc_cred; dprintk("RPC: %5u invalidating %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, cred); + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); if (cred) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); } diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 15da6f82db36..debcda86467c 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -906,7 +906,7 @@ gss_validate(struct rpc_task *task, __be32 *p) goto out_bad; /* We leave it to unwrap to calculate au_rslack. For now we just * calculate the length of the verifier: */ - task->tk_auth->au_verfsize = XDR_QUADLEN(len) + 2; + cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2; gss_put_ctx(ctx); dprintk("RPC: %5u gss_validate: gss_verify_mic succeeded.\n", task->tk_pid); @@ -1206,7 +1206,7 @@ gss_unwrap_resp(struct rpc_task *task, break; } /* take into account extra slack for integrity and privacy cases: */ - task->tk_auth->au_rslack = task->tk_auth->au_verfsize + (p - savedp) + cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp) + (savedlen - head->iov_len); out_decode: status = decode(rqstp, p, obj); diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index d9c50d810d15..5ed91e5bcee4 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -204,7 +204,7 @@ unx_validate(struct rpc_task *task, __be32 *p) printk("RPC: giant verf size: %u\n", size); return NULL; } - task->tk_auth->au_rslack = (size >> 2) + 2; + task->tk_msg.rpc_cred->cr_auth->au_rslack = (size >> 2) + 2; p += (size >> 2); return p; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 28a789419f64..50af8bbe7f20 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -787,7 +787,7 @@ call_reserveresult(struct rpc_task *task) static void call_allocate(struct rpc_task *task) { - unsigned int slack = task->tk_auth->au_cslack; + unsigned int slack = task->tk_msg.rpc_cred->cr_auth->au_cslack; struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = task->tk_xprt; struct rpc_procinfo *proc = task->tk_msg.rpc_proc; -- cgit v1.2.3 From 0285ed1f12298e5304f0f2642e2cf31a5f302e61 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 27 Jun 2007 14:29:12 -0400 Subject: SUNRPC: Ensure that the struct gss_auth lifetime exceeds the credential's Add a refcount in order to ensure that the gss_auth doesn't disappear from underneath us while we're freeing up GSS contexts. Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index debcda86467c..982aba697e4d 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -79,6 +79,7 @@ static const struct rpc_credops gss_credops; #define isprint(c) ((c > 0x1f) && (c < 0x7f)) struct gss_auth { + struct kref kref; struct rpc_auth rpc_auth; struct gss_api_mech *mech; enum rpc_gss_svc service; @@ -636,6 +637,7 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) auth->au_ops = &authgss_ops; auth->au_flavor = flavor; atomic_set(&auth->au_count, 1); + kref_init(&gss_auth->kref); gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); @@ -660,6 +662,25 @@ out_dec: return ERR_PTR(err); } +static void +gss_free(struct gss_auth *gss_auth) +{ + rpc_unlink(gss_auth->dentry); + gss_auth->dentry = NULL; + gss_mech_put(gss_auth->mech); + + kfree(gss_auth); + module_put(THIS_MODULE); +} + +static void +gss_free_callback(struct kref *kref) +{ + struct gss_auth *gss_auth = container_of(kref, struct gss_auth, kref); + + gss_free(gss_auth); +} + static void gss_destroy(struct rpc_auth *auth) { @@ -671,12 +692,7 @@ gss_destroy(struct rpc_auth *auth) rpcauth_destroy_credcache(auth); gss_auth = container_of(auth, struct gss_auth, rpc_auth); - rpc_unlink(gss_auth->dentry); - gss_auth->dentry = NULL; - gss_mech_put(gss_auth->mech); - - kfree(gss_auth); - module_put(THIS_MODULE); + kref_put(&gss_auth->kref, gss_free_callback); } /* gss_destroy_cred (and gss_destroy_ctx) are used to clean up after failure @@ -725,12 +741,14 @@ static void gss_destroy_cred(struct rpc_cred *cred) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); + struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); struct gss_cl_ctx *ctx = gss_cred->gc_ctx; rcu_assign_pointer(gss_cred->gc_ctx, NULL); call_rcu(&cred->cr_rcu, gss_free_cred_callback); if (ctx) gss_put_ctx(ctx); + kref_put(&gss_auth->kref, gss_free_callback); } /* @@ -762,6 +780,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) */ cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW; cred->gc_service = gss_auth->service; + kref_get(&gss_auth->kref); return &cred->gc_base; out_err: -- cgit v1.2.3 From 0df7fb74fbb709591301871a38aac7735a1d6583 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 Jun 2007 17:04:57 -0400 Subject: SUNRPC: Ensure RPCSEC_GSS destroys the security context when freeing a cred Do so by set the gc_proc field to RPC_GSS_PROC_DESTROY, and then sending a NULL RPC call. Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 60 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 982aba697e4d..17d460f85f0e 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -57,6 +57,7 @@ static const struct rpc_authops authgss_ops; static const struct rpc_credops gss_credops; +static const struct rpc_credops gss_nullops; #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH @@ -695,7 +696,39 @@ gss_destroy(struct rpc_auth *auth) kref_put(&gss_auth->kref, gss_free_callback); } -/* gss_destroy_cred (and gss_destroy_ctx) are used to clean up after failure +/* + * gss_destroying_context will cause the RPCSEC_GSS to send a NULL RPC call + * to the server with the GSS control procedure field set to + * RPC_GSS_PROC_DESTROY. This should normally cause the server to release + * all RPCSEC_GSS state associated with that context. + */ +static int +gss_destroying_context(struct rpc_cred *cred) +{ + struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); + struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); + struct rpc_task *task; + + if (gss_cred->gc_ctx == NULL || + gss_cred->gc_ctx->gc_proc == RPC_GSS_PROC_DESTROY) + return 0; + + gss_cred->gc_ctx->gc_proc = RPC_GSS_PROC_DESTROY; + cred->cr_ops = &gss_nullops; + + /* Take a reference to ensure the cred will be destroyed either + * by the RPC call or by the put_rpccred() below */ + get_rpccred(cred); + + task = rpc_call_null(gss_auth->client, cred, RPC_TASK_ASYNC); + if (!IS_ERR(task)) + rpc_put_task(task); + + put_rpccred(cred); + return 1; +} + +/* gss_destroy_cred (and gss_free_ctx) are used to clean up after failure * to create a new cred or context, so they check that things have been * allocated before freeing them. */ static void @@ -744,6 +777,8 @@ gss_destroy_cred(struct rpc_cred *cred) struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); struct gss_cl_ctx *ctx = gss_cred->gc_ctx; + if (gss_destroying_context(cred)) + return; rcu_assign_pointer(gss_cred->gc_ctx, NULL); call_rcu(&cred->cr_rcu, gss_free_cred_callback); if (ctx) @@ -892,6 +927,13 @@ gss_refresh(struct rpc_task *task) return 0; } +/* Dummy refresh routine: used only when destroying the context */ +static int +gss_refresh_null(struct rpc_task *task) +{ + return -EACCES; +} + static __be32 * gss_validate(struct rpc_task *task, __be32 *p) { @@ -921,8 +963,11 @@ gss_validate(struct rpc_task *task, __be32 *p) maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); - if (maj_stat) + if (maj_stat) { + dprintk("RPC: %5u gss_validate: gss_verify_mic returned" + "error 0x%08x\n", task->tk_pid, maj_stat); goto out_bad; + } /* We leave it to unwrap to calculate au_rslack. For now we just * calculate the length of the verifier: */ cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2; @@ -1260,6 +1305,17 @@ static const struct rpc_credops gss_credops = { .crunwrap_resp = gss_unwrap_resp, }; +static const struct rpc_credops gss_nullops = { + .cr_name = "AUTH_GSS", + .crdestroy = gss_destroy_cred, + .crmatch = gss_match, + .crmarshal = gss_marshal, + .crrefresh = gss_refresh_null, + .crvalidate = gss_validate, + .crwrap_req = gss_wrap_req, + .crunwrap_resp = gss_unwrap_resp, +}; + static struct rpc_pipe_ops gss_upcall_ops = { .upcall = gss_pipe_upcall, .downcall = gss_pipe_downcall, -- cgit v1.2.3 From 8a702bbb7ddaa2e78c17dbaaf48e3cd5943676f0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 27 Jun 2007 18:30:26 -0400 Subject: SUNRPC: Suppress some noisy and unnecessary printk() calls in call_verify() Convert them into dprintk() calls. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 50af8bbe7f20..e1553cf2a68f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1315,9 +1315,9 @@ call_verify(struct rpc_task *task) * - if it isn't pointer subtraction in the NFS client may give * undefined results */ - printk(KERN_WARNING - "call_verify: XDR representation not a multiple of" - " 4 bytes: 0x%x\n", task->tk_rqstp->rq_rcv_buf.len); + dprintk("RPC: %5u %s: XDR representation not a multiple of" + " 4 bytes: 0x%x\n", task->tk_pid, __FUNCTION__, + task->tk_rqstp->rq_rcv_buf.len); goto out_eio; } if ((len -= 3) < 0) @@ -1325,7 +1325,8 @@ call_verify(struct rpc_task *task) p += 1; /* skip XID */ if ((n = ntohl(*p++)) != RPC_REPLY) { - printk(KERN_WARNING "call_verify: not an RPC reply: %x\n", n); + dprintk("RPC: %5u %s: not an RPC reply: %x\n", + task->tk_pid, __FUNCTION__, n); goto out_garbage; } if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) { @@ -1376,7 +1377,8 @@ call_verify(struct rpc_task *task) "authentication.\n", task->tk_client->cl_server); break; default: - printk(KERN_WARNING "call_verify: unknown auth error: %x\n", n); + dprintk("RPC: %5u %s: unknown auth error: %x\n", + task->tk_pid, __FUNCTION__, n); error = -EIO; } dprintk("RPC: %5u %s: call rejected %d\n", @@ -1384,7 +1386,8 @@ call_verify(struct rpc_task *task) goto out_err; } if (!(p = rpcauth_checkverf(task, p))) { - printk(KERN_WARNING "call_verify: auth check failed\n"); + dprintk("RPC: %5u %s: auth check failed\n", + task->tk_pid, __FUNCTION__); goto out_garbage; /* bad verifier, retry */ } len = p - (__be32 *)iov->iov_base - 1; @@ -1423,7 +1426,8 @@ call_verify(struct rpc_task *task) task->tk_pid, __FUNCTION__); break; /* retry */ default: - printk(KERN_WARNING "call_verify: server accept status: %x\n", n); + dprintk("RPC: %5u %s: server accept status: %x\n", + task->tk_pid, __FUNCTION__, n); /* Also retry */ } @@ -1437,14 +1441,16 @@ out_garbage: out_retry: return ERR_PTR(-EAGAIN); } - printk(KERN_WARNING "RPC %s: retry failed, exit EIO\n", __FUNCTION__); out_eio: error = -EIO; out_err: rpc_exit(task, error); + dprintk("RPC: %5u %s: call failed with error %d\n", task->tk_pid, + __FUNCTION__, error); return ERR_PTR(error); out_overflow: - printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__); + dprintk("RPC: %5u %s: server reply was truncated.\n", task->tk_pid, + __FUNCTION__); goto out_garbage; } -- cgit v1.2.3 From f7fb558e503dc80e100acaf116f7261cdd97f0ca Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:13:07 -0400 Subject: SUNRPC: Allow rpcbind requests to be interrupted by a signal. This allows NFS mount requests and RPC re-binding to be interruptible if the server isn't responding. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index cf7db59cdcde..9a20f380ab09 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -184,7 +184,8 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, .program = &rpcb_program, .version = version, .authflavor = RPC_AUTH_UNIX, - .flags = RPC_CLNT_CREATE_NOPING, + .flags = (RPC_CLNT_CREATE_NOPING | + RPC_CLNT_CREATE_INTR), }; ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); -- cgit v1.2.3 From cce63cd6374e6f1b4ea897ece1454feb13993d7c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:13:12 -0400 Subject: SUNRPC: Rename rpcb_getport_external routine In preparation for handling NFS mount option parsing in the kernel, rename rpcb_getport_external as rpcb_get_port_sync, and make it available always (instead of only when CONFIG_ROOT_NFS is enabled). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfsroot.c | 2 +- include/linux/sunrpc/clnt.h | 7 ++----- net/sunrpc/rpcb_clnt.c | 21 +++++++++++---------- 3 files changed, 14 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 49d1008ce1d7..f0db4703b1c9 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -428,7 +428,7 @@ static int __init root_nfs_getport(int program, int version, int proto) printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n", program, version, NIPQUAD(servaddr)); set_sockaddr(&sin, servaddr, 0); - return rpcb_getport_external(&sin, program, version, proto); + return rpcb_getport_sync(&sin, program, version, proto); } diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 097984b03857..b28d919c7758 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -120,8 +120,10 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); + int rpcb_register(u32, u32, int, unsigned short, int *); void rpcb_getport(struct rpc_task *); +int rpcb_getport_sync(struct sockaddr_in *, __u32, __u32, int); void rpc_call_setup(struct rpc_task *, struct rpc_message *, int); @@ -141,10 +143,5 @@ void rpc_force_rebind(struct rpc_clnt *); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); char * rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); -/* - * Helper function for NFSroot support - */ -int rpcb_getport_external(struct sockaddr_in *, __u32, __u32, int); - #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 9a20f380ab09..fc881a675eb9 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -12,6 +12,8 @@ * Copyright (C) 1996, Olaf Kirch */ +#include + #include #include #include @@ -247,21 +249,20 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) return error; } -#ifdef CONFIG_ROOT_NFS /** - * rpcb_getport_external - obtain the port for an RPC service on a given host + * rpcb_getport_sync - obtain the port for an RPC service on a given host * @sin: address of remote peer * @prog: RPC program number to bind * @vers: RPC version number to bind * @prot: transport protocol to use to make this request * * Called from outside the RPC client in a synchronous task context. + * Uses default timeout parameters specified by underlying transport. * - * For now, this supports only version 2 queries, but is used only by - * mount_clnt for NFS_ROOT. + * XXX: Needs to support IPv6, and rpcbind versions 3 and 4 */ -int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog, - __u32 vers, int prot) +int rpcb_getport_sync(struct sockaddr_in *sin, __u32 prog, + __u32 vers, int prot) { struct rpcbind_args map = { .r_prog = prog, @@ -278,10 +279,10 @@ int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog, char hostname[40]; int status; - dprintk("RPC: rpcb_getport_external(%u.%u.%u.%u, %u, %u, %d)\n", - NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); + dprintk("RPC: %s(" NIPQUAD_FMT ", %u, %u, %d)\n", + __FUNCTION__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); - sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); + sprintf(hostname, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr)); rpcb_clnt = rpcb_create(hostname, (struct sockaddr *)sin, prot, 2, 0); if (IS_ERR(rpcb_clnt)) return PTR_ERR(rpcb_clnt); @@ -296,7 +297,7 @@ int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog, } return status; } -#endif +EXPORT_SYMBOL_GPL(rpcb_getport_sync); /** * rpcb_getport - obtain the port for a given RPC service on a given host -- cgit v1.2.3 From 45160d6275814e0c86206e6981f0b92c61a50a21 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:13:17 -0400 Subject: SUNRPC: Rename rpcb_getport to be consistent with new rpcb_getport_sync name Clean up, for consistency. Rename rpcb_getport as rpcb_getport_async, to match the naming scheme of rpcb_getport_sync. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 2 +- net/sunrpc/rpcb_clnt.c | 37 +++++++++++++++++++------------------ net/sunrpc/xprtsock.c | 4 ++-- 3 files changed, 22 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index b28d919c7758..c1b37972b0d5 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -122,8 +122,8 @@ void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); int rpcb_register(u32, u32, int, unsigned short, int *); -void rpcb_getport(struct rpc_task *); int rpcb_getport_sync(struct sockaddr_in *, __u32, __u32, int); +void rpcb_getport_async(struct rpc_task *); void rpc_call_setup(struct rpc_task *, struct rpc_message *, int); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index fc881a675eb9..d1740dbab991 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -300,13 +300,13 @@ int rpcb_getport_sync(struct sockaddr_in *sin, __u32 prog, EXPORT_SYMBOL_GPL(rpcb_getport_sync); /** - * rpcb_getport - obtain the port for a given RPC service on a given host + * rpcb_getport_async - obtain the port for a given RPC service on a given host * @task: task that is waiting for portmapper request * * This one can be called for an ongoing RPC request, and can be used in * an async (rpciod) context. */ -void rpcb_getport(struct rpc_task *task) +void rpcb_getport_async(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; int bind_version; @@ -317,17 +317,17 @@ void rpcb_getport(struct rpc_task *task) struct sockaddr addr; int status; - dprintk("RPC: %5u rpcb_getport(%s, %u, %u, %d)\n", - task->tk_pid, clnt->cl_server, - clnt->cl_prog, clnt->cl_vers, xprt->prot); + dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", + task->tk_pid, __FUNCTION__, + clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot); /* Autobind on cloned rpc clients is discouraged */ BUG_ON(clnt->cl_parent != clnt); if (xprt_test_and_set_binding(xprt)) { status = -EACCES; /* tell caller to check again */ - dprintk("RPC: %5u rpcb_getport waiting for another binder\n", - task->tk_pid); + dprintk("RPC: %5u %s: waiting for another binder\n", + task->tk_pid, __FUNCTION__); goto bailout_nowake; } @@ -338,27 +338,28 @@ void rpcb_getport(struct rpc_task *task) /* Someone else may have bound if we slept */ if (xprt_bound(xprt)) { status = 0; - dprintk("RPC: %5u rpcb_getport already bound\n", task->tk_pid); + dprintk("RPC: %5u %s: already bound\n", + task->tk_pid, __FUNCTION__); goto bailout_nofree; } if (rpcb_next_version[xprt->bind_index].rpc_proc == NULL) { xprt->bind_index = 0; status = -EACCES; /* tell caller to try again later */ - dprintk("RPC: %5u rpcb_getport no more getport versions " - "available\n", task->tk_pid); + dprintk("RPC: %5u %s: no more getport versions available\n", + task->tk_pid, __FUNCTION__); goto bailout_nofree; } bind_version = rpcb_next_version[xprt->bind_index].rpc_vers; - dprintk("RPC: %5u rpcb_getport trying rpcbind version %u\n", - task->tk_pid, bind_version); + dprintk("RPC: %5u %s: trying rpcbind version %u\n", + task->tk_pid, __FUNCTION__, bind_version); map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC); if (!map) { status = -ENOMEM; - dprintk("RPC: %5u rpcb_getport no memory available\n", - task->tk_pid); + dprintk("RPC: %5u %s: no memory available\n", + task->tk_pid, __FUNCTION__); goto bailout_nofree; } map->r_prog = clnt->cl_prog; @@ -376,8 +377,8 @@ void rpcb_getport(struct rpc_task *task) rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, bind_version, 0); if (IS_ERR(rpcb_clnt)) { status = PTR_ERR(rpcb_clnt); - dprintk("RPC: %5u rpcb_getport rpcb_create failed, error %ld\n", - task->tk_pid, PTR_ERR(rpcb_clnt)); + dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", + task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt)); goto bailout; } @@ -385,8 +386,8 @@ void rpcb_getport(struct rpc_task *task) rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { status = -EIO; - dprintk("RPC: %5u rpcb_getport rpc_run_task failed\n", - task->tk_pid); + dprintk("RPC: %5u %s: rpc_run_task failed\n", + task->tk_pid, __FUNCTION__); goto bailout_nofree; } rpc_put_task(child); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ee6ad3baf680..a8f7c5fd752c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1473,7 +1473,7 @@ static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, - .rpcbind = rpcb_getport, + .rpcbind = rpcb_getport_async, .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, @@ -1490,7 +1490,7 @@ static struct rpc_xprt_ops xs_udp_ops = { static struct rpc_xprt_ops xs_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xs_tcp_release_xprt, - .rpcbind = rpcb_getport, + .rpcbind = rpcb_getport_async, .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, -- cgit v1.2.3 From 43780b87fa799ae65df11d89d4539d8d6a7c67eb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:13:22 -0400 Subject: SUNRPC: Add a convenient default for the hostname when calling rpc_create() A couple of callers just use a stringified IP address for the rpc client's hostname. Move the logic for constructing this into rpc_create(), so it can be shared. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/mount_clnt.c | 13 ++++--------- fs/nfsd/nfs4callback.c | 6 ------ net/sunrpc/clnt.c | 13 +++++++++++++ 3 files changed, 17 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 878d7a5cb6d4..2892ec843066 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -28,8 +28,7 @@ #define MOUNT_UMNT 3 */ -static struct rpc_clnt * mnt_create(char *, struct sockaddr_in *, - int, int); +static struct rpc_clnt * mnt_create(struct sockaddr_in *, int, int); static struct rpc_program mnt_program; struct mnt_fhstatus { @@ -52,14 +51,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, .rpc_argp = path, .rpc_resp = &result, }; - char hostname[32]; int status; dprintk("NFS: nfs_mount(%08x:%s)\n", (unsigned)ntohl(addr->sin_addr.s_addr), path); - sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr->sin_addr.s_addr)); - mnt_clnt = mnt_create(hostname, addr, version, protocol); + mnt_clnt = mnt_create(addr, version, protocol); if (IS_ERR(mnt_clnt)) return PTR_ERR(mnt_clnt); @@ -73,15 +70,13 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, return status < 0? status : (result.status? -EACCES : 0); } -static struct rpc_clnt * -mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version, - int protocol) +static struct rpc_clnt *mnt_create(struct sockaddr_in *srvaddr, int version, + int protocol) { struct rpc_create_args args = { .protocol = protocol, .address = (struct sockaddr *)srvaddr, .addrsize = sizeof(*srvaddr), - .servername = hostname, .program = &mnt_program, .version = version, .authflavor = RPC_AUTH_UNIX, diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 6b1b487db1ec..5443c52b57aa 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -394,7 +394,6 @@ nfsd4_probe_callback(struct nfs4_client *clp) .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], .rpc_argp = clp, }; - char clientname[16]; int status; if (atomic_read(&cb->cb_set)) @@ -417,11 +416,6 @@ nfsd4_probe_callback(struct nfs4_client *clp) memset(program->stats, 0, sizeof(cb->cb_stat)); program->stats->program = program; - /* Just here to make some printk's more useful: */ - snprintf(clientname, sizeof(clientname), - "%u.%u.%u.%u", NIPQUAD(addr.sin_addr)); - args.servername = clientname; - /* Create RPC client */ cb->cb_client = rpc_create(&args); if (IS_ERR(cb->cb_client)) { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e1553cf2a68f..0d9b5275fac3 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -234,12 +234,25 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) { struct rpc_xprt *xprt; struct rpc_clnt *clnt; + char servername[20]; xprt = xprt_create_transport(args->protocol, args->address, args->addrsize, args->timeout); if (IS_ERR(xprt)) return (struct rpc_clnt *)xprt; + /* + * If the caller chooses not to specify a hostname, whip + * up a string representation of the passed-in address. + */ + if (args->servername == NULL) { + struct sockaddr_in *addr = + (struct sockaddr_in *) &args->address; + snprintf(servername, sizeof(servername), NIPQUAD_FMT, + NIPQUAD(addr->sin_addr.s_addr)); + args->servername = servername; + } + /* * By default, kernel RPC client connects from a reserved port. * CAP_NET_BIND_SERVICE will not be set for unprivileged requesters, -- cgit v1.2.3 From 96802a095171f5b35cf0e1e0d4be943e6696a253 Mon Sep 17 00:00:00 2001 From: Frank van Maarseveen Date: Sun, 8 Jul 2007 13:08:54 +0200 Subject: SUNRPC: cleanup transport creation argument passing Cleanup argument passing to functions for creating an RPC transport. Signed-off-by: Frank van Maarseveen Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 15 +++++++++++---- net/sunrpc/clnt.c | 9 +++++++-- net/sunrpc/xprt.c | 15 ++++++--------- net/sunrpc/xprtsock.c | 36 ++++++++++++++++-------------------- 4 files changed, 40 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 34f7590506fa..ea828b09e4ad 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -17,6 +17,8 @@ #include #include +#ifdef __KERNEL__ + extern unsigned int xprt_udp_slot_table_entries; extern unsigned int xprt_tcp_slot_table_entries; @@ -194,7 +196,12 @@ struct rpc_xprt { char * address_strings[RPC_DISPLAY_MAX]; }; -#ifdef __KERNEL__ +struct rpc_xprtsock_create { + int proto; /* IPPROTO_UDP or IPPROTO_TCP */ + struct sockaddr * dstaddr; /* remote peer address */ + size_t addrlen; + struct rpc_timeout * timeout; /* optional timeout parameters */ +}; /* * Transport operations used by ULPs @@ -204,7 +211,7 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long /* * Generic internal transport functions */ -struct rpc_xprt * xprt_create_transport(int proto, struct sockaddr *addr, size_t size, struct rpc_timeout *toparms); +struct rpc_xprt * xprt_create_transport(struct rpc_xprtsock_create *args); void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_task *task); @@ -242,8 +249,8 @@ void xprt_disconnect(struct rpc_xprt *xprt); /* * Socket transport setup operations */ -struct rpc_xprt * xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to); -struct rpc_xprt * xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to); +struct rpc_xprt * xs_setup_udp(struct rpc_xprtsock_create *args); +struct rpc_xprt * xs_setup_tcp(struct rpc_xprtsock_create *args); int init_socket_xprt(void); void cleanup_socket_xprt(void); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0d9b5275fac3..25bbf2d7f603 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -234,10 +234,15 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) { struct rpc_xprt *xprt; struct rpc_clnt *clnt; + struct rpc_xprtsock_create xprtargs = { + .proto = args->protocol, + .dstaddr = args->address, + .addrlen = args->addrsize, + .timeout = args->timeout + }; char servername[20]; - xprt = xprt_create_transport(args->protocol, args->address, - args->addrsize, args->timeout); + xprt = xprt_create_transport(&xprtargs); if (IS_ERR(xprt)) return (struct rpc_clnt *)xprt; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 518acb74a5bb..c8c2edccad7e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -886,27 +886,24 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i /** * xprt_create_transport - create an RPC transport - * @proto: requested transport protocol - * @ap: remote peer address - * @size: length of address - * @to: timeout parameters + * @args: rpc transport creation arguments * */ -struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t size, struct rpc_timeout *to) +struct rpc_xprt *xprt_create_transport(struct rpc_xprtsock_create *args) { struct rpc_xprt *xprt; struct rpc_rqst *req; - switch (proto) { + switch (args->proto) { case IPPROTO_UDP: - xprt = xs_setup_udp(ap, size, to); + xprt = xs_setup_udp(args); break; case IPPROTO_TCP: - xprt = xs_setup_tcp(ap, size, to); + xprt = xs_setup_tcp(args); break; default: printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", - proto); + args->proto); return ERR_PTR(-EIO); } if (IS_ERR(xprt)) { diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index a8f7c5fd752c..6b7cea57651c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1502,12 +1502,12 @@ static struct rpc_xprt_ops xs_tcp_ops = { .print_stats = xs_tcp_print_stats, }; -static struct rpc_xprt *xs_setup_xprt(struct sockaddr *addr, size_t addrlen, unsigned int slot_table_size) +static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned int slot_table_size) { struct rpc_xprt *xprt; struct sock_xprt *new; - if (addrlen > sizeof(xprt->addr)) { + if (args->addrlen > sizeof(xprt->addr)) { dprintk("RPC: xs_setup_xprt: address too large\n"); return ERR_PTR(-EBADF); } @@ -1529,8 +1529,8 @@ static struct rpc_xprt *xs_setup_xprt(struct sockaddr *addr, size_t addrlen, uns return ERR_PTR(-ENOMEM); } - memcpy(&xprt->addr, addr, addrlen); - xprt->addrlen = addrlen; + memcpy(&xprt->addr, args->dstaddr, args->addrlen); + xprt->addrlen = args->addrlen; new->port = xs_get_random_port(); return xprt; @@ -1538,22 +1538,20 @@ static struct rpc_xprt *xs_setup_xprt(struct sockaddr *addr, size_t addrlen, uns /** * xs_setup_udp - Set up transport to use a UDP socket - * @addr: address of remote server - * @addrlen: length of address in bytes - * @to: timeout parameters + * @args: rpc transport creation arguments * */ -struct rpc_xprt *xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to) +struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) { struct rpc_xprt *xprt; struct sock_xprt *transport; - xprt = xs_setup_xprt(addr, addrlen, xprt_udp_slot_table_entries); + xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries); if (IS_ERR(xprt)) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); - if (ntohs(((struct sockaddr_in *)addr)->sin_port) != 0) + if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0) xprt_set_bound(xprt); xprt->prot = IPPROTO_UDP; @@ -1569,8 +1567,8 @@ struct rpc_xprt *xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_ xprt->ops = &xs_udp_ops; - if (to) - xprt->timeout = *to; + if (args->timeout) + xprt->timeout = *args->timeout; else xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); @@ -1583,22 +1581,20 @@ struct rpc_xprt *xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_ /** * xs_setup_tcp - Set up transport to use a TCP socket - * @addr: address of remote server - * @addrlen: length of address in bytes - * @to: timeout parameters + * @args: rpc transport creation arguments * */ -struct rpc_xprt *xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to) +struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args) { struct rpc_xprt *xprt; struct sock_xprt *transport; - xprt = xs_setup_xprt(addr, addrlen, xprt_tcp_slot_table_entries); + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); if (IS_ERR(xprt)) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); - if (ntohs(((struct sockaddr_in *)addr)->sin_port) != 0) + if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0) xprt_set_bound(xprt); xprt->prot = IPPROTO_TCP; @@ -1613,8 +1609,8 @@ struct rpc_xprt *xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_ xprt->ops = &xs_tcp_ops; - if (to) - xprt->timeout = *to; + if (args->timeout) + xprt->timeout = *args->timeout; else xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); -- cgit v1.2.3 From a97476926ec061f90b77da478620ea6dc71a3237 Mon Sep 17 00:00:00 2001 From: Frank van Maarseveen Date: Mon, 9 Jul 2007 22:21:39 +0200 Subject: SUNRPC server: record the destination address of a request Save the destination address of an incoming request over TCP like is done already for UDP. It is necessary later for callbacks by the server. Signed-off-by: Frank van Maarseveen Signed-off-by: Trond Myklebust --- include/linux/sunrpc/svcsock.h | 1 + net/sunrpc/svcsock.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'net') diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index e21dd93ac4b7..a53e0fa855d2 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -59,6 +59,7 @@ struct svc_sock { /* cache of various info for TCP sockets */ void *sk_info_authunix; + struct sockaddr_storage sk_local; /* local address */ struct sockaddr_storage sk_remote; /* remote peer's address */ int sk_remotelen; /* length of address */ }; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 5baf48de2558..64b9b8c743c4 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -644,6 +644,7 @@ svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen) struct msghdr msg = { .msg_flags = MSG_DONTWAIT, }; + struct sockaddr *sin; int len; len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, @@ -654,6 +655,19 @@ svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen) memcpy(&rqstp->rq_addr, &svsk->sk_remote, svsk->sk_remotelen); rqstp->rq_addrlen = svsk->sk_remotelen; + /* Destination address in request is needed for binding the + * source address in RPC callbacks later. + */ + sin = (struct sockaddr *)&svsk->sk_local; + switch (sin->sa_family) { + case AF_INET: + rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr; + break; + case AF_INET6: + rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr; + break; + } + dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", svsk, iov[0].iov_base, iov[0].iov_len, len); @@ -1064,6 +1078,12 @@ svc_tcp_accept(struct svc_sock *svsk) goto failed; memcpy(&newsvsk->sk_remote, sin, slen); newsvsk->sk_remotelen = slen; + err = kernel_getsockname(newsock, sin, &slen); + if (unlikely(err < 0)) { + dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); + slen = offsetof(struct sockaddr, sa_data); + } + memcpy(&newsvsk->sk_local, sin, slen); svc_sock_received(newsvsk); -- cgit v1.2.3 From d3bc9a1deb8964d774af8535814cb91bf8f6def0 Mon Sep 17 00:00:00 2001 From: Frank van Maarseveen Date: Mon, 9 Jul 2007 22:23:35 +0200 Subject: SUNRPC client: add interface for binding to a local address In addition to binding to a local privileged port the NFS client should allow binding to a specific local address. This is used by the server for callbacks. The patch adds the necessary interface. Signed-off-by: Frank van Maarseveen Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + include/linux/sunrpc/xprt.h | 1 + net/sunrpc/clnt.c | 1 + net/sunrpc/xprtsock.c | 24 ++++++++++++++++-------- 4 files changed, 19 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index c1b37972b0d5..c0d9d14983b3 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -98,6 +98,7 @@ struct rpc_create_args { int protocol; struct sockaddr *address; size_t addrsize; + struct sockaddr *saddress; struct rpc_timeout *timeout; char *servername; struct rpc_program *program; diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index ea828b09e4ad..d11cedd14f0f 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -198,6 +198,7 @@ struct rpc_xprt { struct rpc_xprtsock_create { int proto; /* IPPROTO_UDP or IPPROTO_TCP */ + struct sockaddr * srcaddr; /* optional local address */ struct sockaddr * dstaddr; /* remote peer address */ size_t addrlen; struct rpc_timeout * timeout; /* optional timeout parameters */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 25bbf2d7f603..5d3fe7b22488 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -236,6 +236,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) struct rpc_clnt *clnt; struct rpc_xprtsock_create xprtargs = { .proto = args->protocol, + .srcaddr = args->saddress, .dstaddr = args->address, .addrlen = args->addrsize, .timeout = args->timeout diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 6b7cea57651c..4ae7eed7f617 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -235,6 +235,7 @@ struct sock_xprt { * Connection of transports */ struct delayed_work connect_worker; + struct sockaddr_storage addr; unsigned short port; /* @@ -1145,31 +1146,36 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) sap->sin_port = htons(port); } -static int xs_bindresvport(struct sock_xprt *transport, struct socket *sock) +static int xs_bind(struct sock_xprt *transport, struct socket *sock) { struct sockaddr_in myaddr = { .sin_family = AF_INET, }; + struct sockaddr_in *sa; int err; unsigned short port = transport->port; + if (!transport->xprt.resvport) + port = 0; + sa = (struct sockaddr_in *)&transport->addr; + myaddr.sin_addr = sa->sin_addr; do { myaddr.sin_port = htons(port); err = kernel_bind(sock, (struct sockaddr *) &myaddr, sizeof(myaddr)); + if (!transport->xprt.resvport) + break; if (err == 0) { transport->port = port; - dprintk("RPC: xs_bindresvport bound to port %u\n", - port); - return 0; + break; } if (port <= xprt_min_resvport) port = xprt_max_resvport; else port--; } while (err == -EADDRINUSE && port != transport->port); - - dprintk("RPC: can't bind to reserved port (%d).\n", -err); + dprintk("RPC: xs_bind "NIPQUAD_FMT":%u: %s (%d)\n", + NIPQUAD(myaddr.sin_addr), port, err ? "failed" : "ok", err); return err; } @@ -1228,7 +1234,7 @@ static void xs_udp_connect_worker(struct work_struct *work) } xs_reclassify_socket(sock); - if (xprt->resvport && xs_bindresvport(transport, sock) < 0) { + if (xs_bind(transport, sock)) { sock_release(sock); goto out; } @@ -1315,7 +1321,7 @@ static void xs_tcp_connect_worker(struct work_struct *work) } xs_reclassify_socket(sock); - if (xprt->resvport && xs_bindresvport(transport, sock) < 0) { + if (xs_bind(transport, sock)) { sock_release(sock); goto out; } @@ -1531,6 +1537,8 @@ static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned memcpy(&xprt->addr, args->dstaddr, args->addrlen); xprt->addrlen = args->addrlen; + if (args->srcaddr) + memcpy(&new->addr, args->srcaddr, args->addrlen); new->port = xs_get_random_port(); return xprt; -- cgit v1.2.3 From d8558f99fbc5ef5d4ae76b893784005056450f82 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 10 Jul 2007 15:19:26 -0400 Subject: sunrpc: drop BKL around wrap and unwrap We don't need the BKL when wrapping and unwrapping; and experiments by Avishay Traeger have found that permitting multiple encryption and decryption operations to proceed in parallel can provide significant performance improvements. Signed-off-by: J. Bruce Fields Cc: Avishay Traeger Signed-off-by: Trond Myklebust --- net/sunrpc/auth.c | 13 +++++++++++-- net/sunrpc/auth_gss/auth_gss.c | 10 ++++++++++ net/sunrpc/clnt.c | 4 ---- 3 files changed, 21 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 74baf87ccff9..aa55d0a03e6f 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH @@ -475,13 +476,17 @@ rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, __be32 *data, void *obj) { struct rpc_cred *cred = task->tk_msg.rpc_cred; + int ret; dprintk("RPC: %5u using %s cred %p to wrap rpc data\n", task->tk_pid, cred->cr_ops->cr_name, cred); if (cred->cr_ops->crwrap_req) return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj); /* By default, we encode the arguments normally. */ - return encode(rqstp, data, obj); + lock_kernel(); + ret = encode(rqstp, data, obj); + unlock_kernel(); + return ret; } int @@ -489,6 +494,7 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, __be32 *data, void *obj) { struct rpc_cred *cred = task->tk_msg.rpc_cred; + int ret; dprintk("RPC: %5u using %s cred %p to unwrap rpc data\n", task->tk_pid, cred->cr_ops->cr_name, cred); @@ -496,7 +502,10 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, return cred->cr_ops->crunwrap_resp(task, decode, rqstp, data, obj); /* By default, we decode the arguments normally. */ - return decode(rqstp, data, obj); + lock_kernel(); + ret = decode(rqstp, data, obj); + unlock_kernel(); + return ret; } int diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 17d460f85f0e..baf4096d52d4 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -999,7 +999,9 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; *p++ = htonl(rqstp->rq_seqno); + lock_kernel(); status = encode(rqstp, p, obj); + unlock_kernel(); if (status) return status; @@ -1093,7 +1095,9 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; *p++ = htonl(rqstp->rq_seqno); + lock_kernel(); status = encode(rqstp, p, obj); + unlock_kernel(); if (status) return status; @@ -1152,12 +1156,16 @@ gss_wrap_req(struct rpc_task *task, /* The spec seems a little ambiguous here, but I think that not * wrapping context destruction requests makes the most sense. */ + lock_kernel(); status = encode(rqstp, p, obj); + unlock_kernel(); goto out; } switch (gss_cred->gc_service) { case RPC_GSS_SVC_NONE: + lock_kernel(); status = encode(rqstp, p, obj); + unlock_kernel(); break; case RPC_GSS_SVC_INTEGRITY: status = gss_wrap_req_integ(cred, ctx, encode, @@ -1273,7 +1281,9 @@ gss_unwrap_resp(struct rpc_task *task, cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp) + (savedlen - head->iov_len); out_decode: + lock_kernel(); status = decode(rqstp, p, obj); + unlock_kernel(); out: gss_put_ctx(ctx); dprintk("RPC: %5u gss_unwrap_resp returning %d\n", task->tk_pid, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 5d3fe7b22488..52429b1ffcc1 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -904,10 +904,8 @@ call_encode(struct rpc_task *task) if (encode == NULL) return; - lock_kernel(); task->tk_status = rpcauth_wrap_req(task, encode, req, p, task->tk_msg.rpc_argp); - unlock_kernel(); if (task->tk_status == -ENOMEM) { /* XXX: Is this sane? */ rpc_delay(task, 3*HZ); @@ -1238,10 +1236,8 @@ call_decode(struct rpc_task *task) task->tk_action = rpc_exit_task; if (decode) { - lock_kernel(); task->tk_status = rpcauth_unwrap_resp(task, decode, req, p, task->tk_msg.rpc_resp); - unlock_kernel(); } dprintk("RPC: %5u call_decode result %d\n", task->tk_pid, task->tk_status); -- cgit v1.2.3 From ef222013fc8c1a2fcba5c7ab169be8ffcb778ec4 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 11 Jul 2007 06:42:04 +0200 Subject: [Bluetooth] Add hci_recv_fragment() helper function Most drivers must handle fragmented HCI data packets and events. This patch adds a generic function for their reassembly to the Bluetooth core layer and thus allows to shrink the complexity of the drivers. Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 3 ++ net/bluetooth/hci_core.c | 97 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 98 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index c0fc39620f36..7c78744ec0fd 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -109,6 +109,7 @@ struct hci_dev { struct sk_buff_head cmd_q; struct sk_buff *sent_cmd; + struct sk_buff *reassembly[3]; struct semaphore req_lock; wait_queue_head_t req_wait_q; @@ -437,6 +438,8 @@ static inline int hci_recv_frame(struct sk_buff *skb) return 0; } +int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count); + int hci_register_sysfs(struct hci_dev *hdev); void hci_unregister_sysfs(struct hci_dev *hdev); void hci_conn_add_sysfs(struct hci_conn *conn); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index aa4b56a8c3ea..9c71cffbc6b0 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -826,7 +826,7 @@ EXPORT_SYMBOL(hci_free_dev); int hci_register_dev(struct hci_dev *hdev) { struct list_head *head = &hci_dev_list, *p; - int id = 0; + int i, id = 0; BT_DBG("%p name %s type %d owner %p", hdev, hdev->name, hdev->type, hdev->owner); @@ -865,6 +865,9 @@ int hci_register_dev(struct hci_dev *hdev) skb_queue_head_init(&hdev->cmd_q); skb_queue_head_init(&hdev->raw_q); + for (i = 0; i < 3; i++) + hdev->reassembly[i] = NULL; + init_waitqueue_head(&hdev->req_wait_q); init_MUTEX(&hdev->req_lock); @@ -889,6 +892,8 @@ EXPORT_SYMBOL(hci_register_dev); /* Unregister HCI device */ int hci_unregister_dev(struct hci_dev *hdev) { + int i; + BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type); hci_unregister_sysfs(hdev); @@ -899,9 +904,13 @@ int hci_unregister_dev(struct hci_dev *hdev) hci_dev_do_close(hdev); + for (i = 0; i < 3; i++) + kfree_skb(hdev->reassembly[i]); + hci_notify(hdev, HCI_DEV_UNREG); __hci_dev_put(hdev); + return 0; } EXPORT_SYMBOL(hci_unregister_dev); @@ -922,6 +931,90 @@ int hci_resume_dev(struct hci_dev *hdev) } EXPORT_SYMBOL(hci_resume_dev); +/* Receive packet type fragment */ +#define __reassembly(hdev, type) ((hdev)->reassembly[(type) - 2]) + +int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count) +{ + if (type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT) + return -EILSEQ; + + while (count) { + struct sk_buff *skb = __reassembly(hdev, type); + struct { int expect; } *scb; + int len = 0; + + if (!skb) { + /* Start of the frame */ + + switch (type) { + case HCI_EVENT_PKT: + if (count >= HCI_EVENT_HDR_SIZE) { + struct hci_event_hdr *h = data; + len = HCI_EVENT_HDR_SIZE + h->plen; + } else + return -EILSEQ; + break; + + case HCI_ACLDATA_PKT: + if (count >= HCI_ACL_HDR_SIZE) { + struct hci_acl_hdr *h = data; + len = HCI_ACL_HDR_SIZE + __le16_to_cpu(h->dlen); + } else + return -EILSEQ; + break; + + case HCI_SCODATA_PKT: + if (count >= HCI_SCO_HDR_SIZE) { + struct hci_sco_hdr *h = data; + len = HCI_SCO_HDR_SIZE + h->dlen; + } else + return -EILSEQ; + break; + } + + skb = bt_skb_alloc(len, GFP_ATOMIC); + if (!skb) { + BT_ERR("%s no memory for packet", hdev->name); + return -ENOMEM; + } + + skb->dev = (void *) hdev; + bt_cb(skb)->pkt_type = type; + + __reassembly(hdev, type) = skb; + + scb = (void *) skb->cb; + scb->expect = len; + } else { + /* Continuation */ + + scb = (void *) skb->cb; + len = scb->expect; + } + + len = min(len, count); + + memcpy(skb_put(skb, len), data, len); + + scb->expect -= len; + + if (scb->expect == 0) { + /* Complete frame */ + + __reassembly(hdev, type) = NULL; + + bt_cb(skb)->pkt_type = type; + hci_recv_frame(skb); + } + + count -= len; data += len; + } + + return 0; +} +EXPORT_SYMBOL(hci_recv_fragment); + /* ---- Interface to upper protocols ---- */ /* Register/Unregister protocols. @@ -1029,7 +1122,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 ogf, __u16 ocf, __u32 plen, void *p skb = bt_skb_alloc(len, GFP_ATOMIC); if (!skb) { - BT_ERR("%s Can't allocate memory for HCI command", hdev->name); + BT_ERR("%s no memory for command", hdev->name); return -ENOMEM; } -- cgit v1.2.3 From 84950cf0ba02fd6a5defe2511bc41f9aa2237632 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Wed, 11 Jul 2007 09:18:15 +0200 Subject: [Bluetooth] Hangup TTY before releasing rfcomm_dev The core problem is that RFCOMM socket layer ioctl can release rfcomm_dev struct while RFCOMM TTY layer is still actively using it. Calling tty_vhangup() is needed for a synchronous hangup before rfcomm_dev is freed. Addresses the oops at http://bugzilla.kernel.org/show_bug.cgi?id=7509 Acked-by: Alan Cox Signed-off-by: Marcel Holtmann --- net/bluetooth/rfcomm/tty.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index b2b1cceb102a..ba469b038ea0 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -383,6 +383,10 @@ static int rfcomm_release_dev(void __user *arg) if (req.flags & (1 << RFCOMM_HANGUP_NOW)) rfcomm_dlc_close(dev->dlc, 0); + /* Shut down TTY synchronously before freeing rfcomm_dev */ + if (dev->tty) + tty_vhangup(dev->tty); + rfcomm_dev_del(dev); rfcomm_dev_put(dev); return 0; -- cgit v1.2.3 From e06e7c615877026544ad7f8b309d1a3706410383 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Jun 2007 17:22:39 -0700 Subject: [IPV4]: The scheduled removal of multipath cached routing support. With help from Chris Wedgwood. Signed-off-by: David S. Miller --- Documentation/feature-removal-schedule.txt | 19 -- include/linux/Kbuild | 1 - include/linux/ip_mp_alg.h | 22 -- include/linux/rtnetlink.h | 2 +- include/net/dst.h | 1 - include/net/ip_fib.h | 16 -- include/net/ip_mp_alg.h | 96 --------- include/net/route.h | 1 - net/ipv4/Kconfig | 42 ---- net/ipv4/Makefile | 5 - net/ipv4/fib_frontend.c | 4 - net/ipv4/fib_semantics.c | 16 -- net/ipv4/multipath.c | 55 ----- net/ipv4/multipath_drr.c | 249 ---------------------- net/ipv4/multipath_random.c | 114 ---------- net/ipv4/multipath_rr.c | 95 --------- net/ipv4/multipath_wrandom.c | 329 ----------------------------- net/ipv4/route.c | 259 +---------------------- 18 files changed, 12 insertions(+), 1314 deletions(-) delete mode 100644 include/linux/ip_mp_alg.h delete mode 100644 include/net/ip_mp_alg.h delete mode 100644 net/ipv4/multipath.c delete mode 100644 net/ipv4/multipath_drr.c delete mode 100644 net/ipv4/multipath_random.c delete mode 100644 net/ipv4/multipath_rr.c delete mode 100644 net/ipv4/multipath_wrandom.c (limited to 'net') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 3a159dac04f5..484250dcdbe0 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -262,25 +262,6 @@ Who: Richard Purdie --------------------------- -What: Multipath cached routing support in ipv4 -When: in 2.6.23 -Why: Code was merged, then submitter immediately disappeared leaving - us with no maintainer and lots of bugs. The code should not have - been merged in the first place, and many aspects of it's - implementation are blocking more critical core networking - development. It's marked EXPERIMENTAL and no distribution - enables it because it cause obscure crashes due to unfixable bugs - (interfaces don't return errors so memory allocation can't be - handled, calling contexts of these interfaces make handling - errors impossible too because they get called after we've - totally commited to creating a route object, for example). - This problem has existed for years and no forward progress - has ever been made, and nobody steps up to try and salvage - this code, so we're going to finally just get rid of it. -Who: David S. Miller - ---------------------------- - What: read_dev_chars(), read_conf_data{,_lpm}() (s390 common I/O layer) When: December 2007 Why: These functions are a leftover from 2.4 times. They have several diff --git a/include/linux/Kbuild b/include/linux/Kbuild index afae306b177c..d94451682761 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -91,7 +91,6 @@ header-y += in6.h header-y += in_route.h header-y += ioctl.h header-y += ipmi_msgdefs.h -header-y += ip_mp_alg.h header-y += ipsec.h header-y += ipx.h header-y += irda.h diff --git a/include/linux/ip_mp_alg.h b/include/linux/ip_mp_alg.h deleted file mode 100644 index e234e2008f5d..000000000000 --- a/include/linux/ip_mp_alg.h +++ /dev/null @@ -1,22 +0,0 @@ -/* ip_mp_alg.h: IPV4 multipath algorithm support, user-visible values. - * - * Copyright (C) 2004, 2005 Einar Lueck - * Copyright (C) 2005 David S. Miller - */ - -#ifndef _LINUX_IP_MP_ALG_H -#define _LINUX_IP_MP_ALG_H - -enum ip_mp_alg { - IP_MP_ALG_NONE, - IP_MP_ALG_RR, - IP_MP_ALG_DRR, - IP_MP_ALG_RANDOM, - IP_MP_ALG_WRANDOM, - __IP_MP_ALG_MAX -}; - -#define IP_MP_ALG_MAX (__IP_MP_ALG_MAX - 1) - -#endif /* _LINUX_IP_MP_ALG_H */ - diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 1fae30af91f3..612785848532 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -261,7 +261,7 @@ enum rtattr_type_t RTA_FLOW, RTA_CACHEINFO, RTA_SESSION, - RTA_MP_ALGO, + RTA_MP_ALGO, /* no longer used */ RTA_TABLE, __RTA_MAX }; diff --git a/include/net/dst.h b/include/net/dst.h index 82270f9332db..e9ff4a4caef9 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -47,7 +47,6 @@ struct dst_entry #define DST_NOXFRM 2 #define DST_NOPOLICY 4 #define DST_NOHASH 8 -#define DST_BALANCED 0x10 unsigned long expires; unsigned short header_len; /* more space at head required */ diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 69252cbe05b0..8cadc77c7df4 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -39,7 +39,6 @@ struct fib_config { int fc_mx_len; int fc_mp_len; u32 fc_flow; - u32 fc_mp_alg; u32 fc_nlflags; struct nl_info fc_nlinfo; }; @@ -85,9 +84,6 @@ struct fib_info { int fib_nhs; #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_power; -#endif -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - u32 fib_mp_alg; #endif struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev @@ -103,10 +99,6 @@ struct fib_result { unsigned char nh_sel; unsigned char type; unsigned char scope; -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - __be32 network; - __be32 netmask; -#endif struct fib_info *fi; #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rule *r; @@ -145,14 +137,6 @@ struct fib_result_nl { #define FIB_RES_DEV(res) (FIB_RES_NH(res).nh_dev) #define FIB_RES_OIF(res) (FIB_RES_NH(res).nh_oif) -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED -#define FIB_RES_NETWORK(res) ((res).network) -#define FIB_RES_NETMASK(res) ((res).netmask) -#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ -#define FIB_RES_NETWORK(res) (0) -#define FIB_RES_NETMASK(res) (0) -#endif /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */ - struct fib_table { struct hlist_node tb_hlist; u32 tb_id; diff --git a/include/net/ip_mp_alg.h b/include/net/ip_mp_alg.h deleted file mode 100644 index 25b56571e54b..000000000000 --- a/include/net/ip_mp_alg.h +++ /dev/null @@ -1,96 +0,0 @@ -/* ip_mp_alg.h: IPV4 multipath algorithm support. - * - * Copyright (C) 2004, 2005 Einar Lueck - * Copyright (C) 2005 David S. Miller - */ - -#ifndef _NET_IP_MP_ALG_H -#define _NET_IP_MP_ALG_H - -#include -#include -#include - -struct fib_nh; - -struct ip_mp_alg_ops { - void (*mp_alg_select_route)(const struct flowi *flp, - struct rtable *rth, struct rtable **rp); - void (*mp_alg_flush)(void); - void (*mp_alg_set_nhinfo)(__be32 network, __be32 netmask, - unsigned char prefixlen, - const struct fib_nh *nh); - void (*mp_alg_remove)(struct rtable *rth); -}; - -extern int multipath_alg_register(struct ip_mp_alg_ops *, enum ip_mp_alg); -extern void multipath_alg_unregister(struct ip_mp_alg_ops *, enum ip_mp_alg); - -extern struct ip_mp_alg_ops *ip_mp_alg_table[]; - -static inline int multipath_select_route(const struct flowi *flp, - struct rtable *rth, - struct rtable **rp) -{ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg]; - - /* mp_alg_select_route _MUST_ be implemented */ - if (ops && (rth->u.dst.flags & DST_BALANCED)) { - ops->mp_alg_select_route(flp, rth, rp); - return 1; - } -#endif - return 0; -} - -static inline void multipath_flush(void) -{ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - int i; - - for (i = IP_MP_ALG_NONE; i <= IP_MP_ALG_MAX; i++) { - struct ip_mp_alg_ops *ops = ip_mp_alg_table[i]; - - if (ops && ops->mp_alg_flush) - ops->mp_alg_flush(); - } -#endif -} - -static inline void multipath_set_nhinfo(struct rtable *rth, - __be32 network, __be32 netmask, - unsigned char prefixlen, - const struct fib_nh *nh) -{ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg]; - - if (ops && ops->mp_alg_set_nhinfo) - ops->mp_alg_set_nhinfo(network, netmask, prefixlen, nh); -#endif -} - -static inline void multipath_remove(struct rtable *rth) -{ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg]; - - if (ops && ops->mp_alg_remove && - (rth->u.dst.flags & DST_BALANCED)) - ops->mp_alg_remove(rth); -#endif -} - -static inline int multipath_comparekeys(const struct flowi *flp1, - const struct flowi *flp2) -{ - return flp1->fl4_dst == flp2->fl4_dst && - flp1->fl4_src == flp2->fl4_src && - flp1->oif == flp2->oif && - flp1->mark == flp2->mark && - !((flp1->fl4_tos ^ flp2->fl4_tos) & - (IPTOS_RT_MASK | RTO_ONLINK)); -} - -#endif /* _NET_IP_MP_ALG_H */ diff --git a/include/net/route.h b/include/net/route.h index 749e4dfe5ff3..188b8936bb27 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -62,7 +62,6 @@ struct rtable unsigned rt_flags; __u16 rt_type; - __u16 rt_multipath_alg; __be32 rt_dst; /* Path destination */ __be32 rt_src; /* Path source */ diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 010fbb2d45e9..fb7909774254 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -116,48 +116,6 @@ config IP_ROUTE_MULTIPATH equal "cost" and chooses one of them in a non-deterministic fashion if a matching packet arrives. -config IP_ROUTE_MULTIPATH_CACHED - bool "IP: equal cost multipath with caching support (EXPERIMENTAL)" - depends on IP_ROUTE_MULTIPATH - help - Normally, equal cost multipath routing is not supported by the - routing cache. If you say Y here, alternative routes are cached - and on cache lookup a route is chosen in a configurable fashion. - - If unsure, say N. - -config IP_ROUTE_MULTIPATH_RR - tristate "MULTIPATH: round robin algorithm" - depends on IP_ROUTE_MULTIPATH_CACHED - help - Multipath routes are chosen according to Round Robin - -config IP_ROUTE_MULTIPATH_RANDOM - tristate "MULTIPATH: random algorithm" - depends on IP_ROUTE_MULTIPATH_CACHED - help - Multipath routes are chosen in a random fashion. Actually, - there is no weight for a route. The advantage of this policy - is that it is implemented stateless and therefore introduces only - a very small delay. - -config IP_ROUTE_MULTIPATH_WRANDOM - tristate "MULTIPATH: weighted random algorithm" - depends on IP_ROUTE_MULTIPATH_CACHED - help - Multipath routes are chosen in a weighted random fashion. - The per route weights are the weights visible via ip route 2. As the - corresponding state management introduces some overhead routing delay - is increased. - -config IP_ROUTE_MULTIPATH_DRR - tristate "MULTIPATH: interface round robin algorithm" - depends on IP_ROUTE_MULTIPATH_CACHED - help - Connections are distributed in a round robin fashion over the - available interfaces. This policy makes sense if the connections - should be primarily distributed on interfaces and not on routes. - config IP_ROUTE_VERBOSE bool "IP: verbose route monitoring" depends on IP_ADVANCED_ROUTER diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 4ff6c151d7f3..fbf1674e0c2a 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -29,14 +29,9 @@ obj-$(CONFIG_INET_TUNNEL) += tunnel4.o obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o obj-$(CONFIG_IP_PNP) += ipconfig.o -obj-$(CONFIG_IP_ROUTE_MULTIPATH_RR) += multipath_rr.o -obj-$(CONFIG_IP_ROUTE_MULTIPATH_RANDOM) += multipath_random.o -obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o -obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ obj-$(CONFIG_IP_VS) += ipvs/ obj-$(CONFIG_INET_DIAG) += inet_diag.o -obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 311d633f7f39..2eb909be8041 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -453,7 +453,6 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, [RTA_PROTOINFO] = { .type = NLA_U32 }, [RTA_FLOW] = { .type = NLA_U32 }, - [RTA_MP_ALGO] = { .type = NLA_U32 }, }; static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -515,9 +514,6 @@ static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, case RTA_FLOW: cfg->fc_flow = nla_get_u32(attr); break; - case RTA_MP_ALGO: - cfg->fc_mp_alg = nla_get_u32(attr); - break; case RTA_TABLE: cfg->fc_table = nla_get_u32(attr); break; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index bb94550d95c3..c434119deb52 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include @@ -697,13 +696,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg) goto err_inval; } #endif -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (cfg->fc_mp_alg) { - if (cfg->fc_mp_alg < IP_MP_ALG_NONE || - cfg->fc_mp_alg > IP_MP_ALG_MAX) - goto err_inval; - } -#endif err = -ENOBUFS; if (fib_info_cnt >= fib_hash_size) { @@ -791,10 +783,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg) #endif } -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - fi->fib_mp_alg = cfg->fc_mp_alg; -#endif - if (fib_props[cfg->fc_type].error) { if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) goto err_inval; @@ -940,10 +928,6 @@ out_fill_res: res->type = fa->fa_type; res->scope = fa->fa_scope; res->fi = fa->fa_info; -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - res->netmask = mask; - res->network = zone & inet_make_mask(prefixlen); -#endif atomic_inc(&res->fi->fib_clntref); return 0; } diff --git a/net/ipv4/multipath.c b/net/ipv4/multipath.c deleted file mode 100644 index 4e9ca7c76407..000000000000 --- a/net/ipv4/multipath.c +++ /dev/null @@ -1,55 +0,0 @@ -/* multipath.c: IPV4 multipath algorithm support. - * - * Copyright (C) 2004, 2005 Einar Lueck - * Copyright (C) 2005 David S. Miller - */ - -#include -#include -#include -#include - -#include - -static DEFINE_SPINLOCK(alg_table_lock); -struct ip_mp_alg_ops *ip_mp_alg_table[IP_MP_ALG_MAX + 1]; - -int multipath_alg_register(struct ip_mp_alg_ops *ops, enum ip_mp_alg n) -{ - struct ip_mp_alg_ops **slot; - int err; - - if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX || - !ops->mp_alg_select_route) - return -EINVAL; - - spin_lock(&alg_table_lock); - slot = &ip_mp_alg_table[n]; - if (*slot != NULL) { - err = -EBUSY; - } else { - *slot = ops; - err = 0; - } - spin_unlock(&alg_table_lock); - - return err; -} -EXPORT_SYMBOL(multipath_alg_register); - -void multipath_alg_unregister(struct ip_mp_alg_ops *ops, enum ip_mp_alg n) -{ - struct ip_mp_alg_ops **slot; - - if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX) - return; - - spin_lock(&alg_table_lock); - slot = &ip_mp_alg_table[n]; - if (*slot == ops) - *slot = NULL; - spin_unlock(&alg_table_lock); - - synchronize_net(); -} -EXPORT_SYMBOL(multipath_alg_unregister); diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c deleted file mode 100644 index b03c5ca2c823..000000000000 --- a/net/ipv4/multipath_drr.c +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Device round robin policy for multipath. - * - * - * Version: $Id: multipath_drr.c,v 1.1.2.1 2004/09/16 07:42:34 elueck Exp $ - * - * Authors: Einar Lueck - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct multipath_device { - int ifi; /* interface index of device */ - atomic_t usecount; - int allocated; -}; - -#define MULTIPATH_MAX_DEVICECANDIDATES 10 - -static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES]; -static DEFINE_SPINLOCK(state_lock); - -static int inline __multipath_findslot(void) -{ - int i; - - for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) { - if (state[i].allocated == 0) - return i; - } - return -1; -} - -static int inline __multipath_finddev(int ifindex) -{ - int i; - - for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) { - if (state[i].allocated != 0 && - state[i].ifi == ifindex) - return i; - } - return -1; -} - -static int drr_dev_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct net_device *dev = ptr; - int devidx; - - switch (event) { - case NETDEV_UNREGISTER: - case NETDEV_DOWN: - spin_lock_bh(&state_lock); - - devidx = __multipath_finddev(dev->ifindex); - if (devidx != -1) { - state[devidx].allocated = 0; - state[devidx].ifi = 0; - atomic_set(&state[devidx].usecount, 0); - } - - spin_unlock_bh(&state_lock); - break; - } - - return NOTIFY_DONE; -} - -static struct notifier_block drr_dev_notifier = { - .notifier_call = drr_dev_event, -}; - - -static void drr_safe_inc(atomic_t *usecount) -{ - int n; - - atomic_inc(usecount); - - n = atomic_read(usecount); - if (n <= 0) { - int i; - - spin_lock_bh(&state_lock); - - for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) - atomic_set(&state[i].usecount, 0); - - spin_unlock_bh(&state_lock); - } -} - -static void drr_select_route(const struct flowi *flp, - struct rtable *first, struct rtable **rp) -{ - struct rtable *nh, *result, *cur_min; - int min_usecount = -1; - int devidx = -1; - int cur_min_devidx = -1; - - /* 1. make sure all alt. nexthops have the same GC related data */ - /* 2. determine the new candidate to be returned */ - result = NULL; - cur_min = NULL; - for (nh = rcu_dereference(first); nh; - nh = rcu_dereference(nh->u.dst.rt_next)) { - if ((nh->u.dst.flags & DST_BALANCED) != 0 && - multipath_comparekeys(&nh->fl, flp)) { - int nh_ifidx = nh->u.dst.dev->ifindex; - - nh->u.dst.lastuse = jiffies; - nh->u.dst.__use++; - if (result != NULL) - continue; - - /* search for the output interface */ - - /* this is not SMP safe, only add/remove are - * SMP safe as wrong usecount updates have no big - * impact - */ - devidx = __multipath_finddev(nh_ifidx); - if (devidx == -1) { - /* add the interface to the array - * SMP safe - */ - spin_lock_bh(&state_lock); - - /* due to SMP: search again */ - devidx = __multipath_finddev(nh_ifidx); - if (devidx == -1) { - /* add entry for device */ - devidx = __multipath_findslot(); - if (devidx == -1) { - /* unlikely but possible */ - continue; - } - - state[devidx].allocated = 1; - state[devidx].ifi = nh_ifidx; - atomic_set(&state[devidx].usecount, 0); - min_usecount = 0; - } - - spin_unlock_bh(&state_lock); - } - - if (min_usecount == 0) { - /* if the device has not been used it is - * the primary target - */ - drr_safe_inc(&state[devidx].usecount); - result = nh; - } else { - int count = - atomic_read(&state[devidx].usecount); - - if (min_usecount == -1 || - count < min_usecount) { - cur_min = nh; - cur_min_devidx = devidx; - min_usecount = count; - } - } - } - } - - if (!result) { - if (cur_min) { - drr_safe_inc(&state[cur_min_devidx].usecount); - result = cur_min; - } else { - result = first; - } - } - - *rp = result; -} - -static struct ip_mp_alg_ops drr_ops = { - .mp_alg_select_route = drr_select_route, -}; - -static int __init drr_init(void) -{ - int err = register_netdevice_notifier(&drr_dev_notifier); - - if (err) - return err; - - err = multipath_alg_register(&drr_ops, IP_MP_ALG_DRR); - if (err) - goto fail; - - return 0; - -fail: - unregister_netdevice_notifier(&drr_dev_notifier); - return err; -} - -static void __exit drr_exit(void) -{ - unregister_netdevice_notifier(&drr_dev_notifier); - multipath_alg_unregister(&drr_ops, IP_MP_ALG_DRR); -} - -module_init(drr_init); -module_exit(drr_exit); -MODULE_LICENSE("GPL"); diff --git a/net/ipv4/multipath_random.c b/net/ipv4/multipath_random.c deleted file mode 100644 index c312785d14d0..000000000000 --- a/net/ipv4/multipath_random.c +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Random policy for multipath. - * - * - * Version: $Id: multipath_random.c,v 1.1.2.3 2004/09/21 08:42:11 elueck Exp $ - * - * Authors: Einar Lueck - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define MULTIPATH_MAX_CANDIDATES 40 - -static void random_select_route(const struct flowi *flp, - struct rtable *first, - struct rtable **rp) -{ - struct rtable *rt; - struct rtable *decision; - unsigned char candidate_count = 0; - - /* count all candidate */ - for (rt = rcu_dereference(first); rt; - rt = rcu_dereference(rt->u.dst.rt_next)) { - if ((rt->u.dst.flags & DST_BALANCED) != 0 && - multipath_comparekeys(&rt->fl, flp)) - ++candidate_count; - } - - /* choose a random candidate */ - decision = first; - if (candidate_count > 1) { - unsigned char i = 0; - unsigned char candidate_no = (unsigned char) - (random32() % candidate_count); - - /* find chosen candidate and adjust GC data for all candidates - * to ensure they stay in cache - */ - for (rt = first; rt; rt = rt->u.dst.rt_next) { - if ((rt->u.dst.flags & DST_BALANCED) != 0 && - multipath_comparekeys(&rt->fl, flp)) { - rt->u.dst.lastuse = jiffies; - - if (i == candidate_no) - decision = rt; - - if (i >= candidate_count) - break; - - i++; - } - } - } - - decision->u.dst.__use++; - *rp = decision; -} - -static struct ip_mp_alg_ops random_ops = { - .mp_alg_select_route = random_select_route, -}; - -static int __init random_init(void) -{ - return multipath_alg_register(&random_ops, IP_MP_ALG_RANDOM); -} - -static void __exit random_exit(void) -{ - multipath_alg_unregister(&random_ops, IP_MP_ALG_RANDOM); -} - -module_init(random_init); -module_exit(random_exit); -MODULE_LICENSE("GPL"); diff --git a/net/ipv4/multipath_rr.c b/net/ipv4/multipath_rr.c deleted file mode 100644 index 0ad22524f450..000000000000 --- a/net/ipv4/multipath_rr.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Round robin policy for multipath. - * - * - * Version: $Id: multipath_rr.c,v 1.1.2.2 2004/09/16 07:42:34 elueck Exp $ - * - * Authors: Einar Lueck - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static void rr_select_route(const struct flowi *flp, - struct rtable *first, struct rtable **rp) -{ - struct rtable *nh, *result, *min_use_cand = NULL; - int min_use = -1; - - /* 1. make sure all alt. nexthops have the same GC related data - * 2. determine the new candidate to be returned - */ - result = NULL; - for (nh = rcu_dereference(first); nh; - nh = rcu_dereference(nh->u.dst.rt_next)) { - if ((nh->u.dst.flags & DST_BALANCED) != 0 && - multipath_comparekeys(&nh->fl, flp)) { - nh->u.dst.lastuse = jiffies; - - if (min_use == -1 || nh->u.dst.__use < min_use) { - min_use = nh->u.dst.__use; - min_use_cand = nh; - } - } - } - result = min_use_cand; - if (!result) - result = first; - - result->u.dst.__use++; - *rp = result; -} - -static struct ip_mp_alg_ops rr_ops = { - .mp_alg_select_route = rr_select_route, -}; - -static int __init rr_init(void) -{ - return multipath_alg_register(&rr_ops, IP_MP_ALG_RR); -} - -static void __exit rr_exit(void) -{ - multipath_alg_unregister(&rr_ops, IP_MP_ALG_RR); -} - -module_init(rr_init); -module_exit(rr_exit); -MODULE_LICENSE("GPL"); diff --git a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c deleted file mode 100644 index 57c503694539..000000000000 --- a/net/ipv4/multipath_wrandom.c +++ /dev/null @@ -1,329 +0,0 @@ -/* - * Weighted random policy for multipath. - * - * - * Version: $Id: multipath_wrandom.c,v 1.1.2.3 2004/09/22 07:51:40 elueck Exp $ - * - * Authors: Einar Lueck - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define MULTIPATH_STATE_SIZE 15 - -struct multipath_candidate { - struct multipath_candidate *next; - int power; - struct rtable *rt; -}; - -struct multipath_dest { - struct list_head list; - - const struct fib_nh *nh_info; - __be32 netmask; - __be32 network; - unsigned char prefixlen; - - struct rcu_head rcu; -}; - -struct multipath_bucket { - struct list_head head; - spinlock_t lock; -}; - -struct multipath_route { - struct list_head list; - - int oif; - __be32 gw; - struct list_head dests; - - struct rcu_head rcu; -}; - -/* state: primarily weight per route information */ -static struct multipath_bucket state[MULTIPATH_STATE_SIZE]; - -static unsigned char __multipath_lookup_weight(const struct flowi *fl, - const struct rtable *rt) -{ - const int state_idx = rt->idev->dev->ifindex % MULTIPATH_STATE_SIZE; - struct multipath_route *r; - struct multipath_route *target_route = NULL; - struct multipath_dest *d; - int weight = 1; - - /* lookup the weight information for a certain route */ - rcu_read_lock(); - - /* find state entry for gateway or add one if necessary */ - list_for_each_entry_rcu(r, &state[state_idx].head, list) { - if (r->gw == rt->rt_gateway && - r->oif == rt->idev->dev->ifindex) { - target_route = r; - break; - } - } - - if (!target_route) { - /* this should not happen... but we are prepared */ - printk( KERN_CRIT"%s: missing state for gateway: %u and " \ - "device %d\n", __FUNCTION__, rt->rt_gateway, - rt->idev->dev->ifindex); - goto out; - } - - /* find state entry for destination */ - list_for_each_entry_rcu(d, &target_route->dests, list) { - __be32 targetnetwork = fl->fl4_dst & - inet_make_mask(d->prefixlen); - - if ((targetnetwork & d->netmask) == d->network) { - weight = d->nh_info->nh_weight; - goto out; - } - } - -out: - rcu_read_unlock(); - return weight; -} - -static void wrandom_init_state(void) -{ - int i; - - for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) { - INIT_LIST_HEAD(&state[i].head); - spin_lock_init(&state[i].lock); - } -} - -static void wrandom_select_route(const struct flowi *flp, - struct rtable *first, - struct rtable **rp) -{ - struct rtable *rt; - struct rtable *decision; - struct multipath_candidate *first_mpc = NULL; - struct multipath_candidate *mpc, *last_mpc = NULL; - int power = 0; - int last_power; - int selector; - const size_t size_mpc = sizeof(struct multipath_candidate); - - /* collect all candidates and identify their weights */ - for (rt = rcu_dereference(first); rt; - rt = rcu_dereference(rt->u.dst.rt_next)) { - if ((rt->u.dst.flags & DST_BALANCED) != 0 && - multipath_comparekeys(&rt->fl, flp)) { - struct multipath_candidate* mpc = - (struct multipath_candidate*) - kmalloc(size_mpc, GFP_ATOMIC); - - if (!mpc) - return; - - power += __multipath_lookup_weight(flp, rt) * 10000; - - mpc->power = power; - mpc->rt = rt; - mpc->next = NULL; - - if (!first_mpc) - first_mpc = mpc; - else - last_mpc->next = mpc; - - last_mpc = mpc; - } - } - - /* choose a weighted random candidate */ - decision = first; - selector = random32() % power; - last_power = 0; - - /* select candidate, adjust GC data and cleanup local state */ - decision = first; - last_mpc = NULL; - for (mpc = first_mpc; mpc; mpc = mpc->next) { - mpc->rt->u.dst.lastuse = jiffies; - if (last_power <= selector && selector < mpc->power) - decision = mpc->rt; - - last_power = mpc->power; - kfree(last_mpc); - last_mpc = mpc; - } - - /* concurrent __multipath_flush may lead to !last_mpc */ - kfree(last_mpc); - - decision->u.dst.__use++; - *rp = decision; -} - -static void wrandom_set_nhinfo(__be32 network, - __be32 netmask, - unsigned char prefixlen, - const struct fib_nh *nh) -{ - const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE; - struct multipath_route *r, *target_route = NULL; - struct multipath_dest *d, *target_dest = NULL; - - /* store the weight information for a certain route */ - spin_lock_bh(&state[state_idx].lock); - - /* find state entry for gateway or add one if necessary */ - list_for_each_entry_rcu(r, &state[state_idx].head, list) { - if (r->gw == nh->nh_gw && r->oif == nh->nh_oif) { - target_route = r; - break; - } - } - - if (!target_route) { - const size_t size_rt = sizeof(struct multipath_route); - target_route = (struct multipath_route *) - kmalloc(size_rt, GFP_ATOMIC); - - target_route->gw = nh->nh_gw; - target_route->oif = nh->nh_oif; - memset(&target_route->rcu, 0, sizeof(struct rcu_head)); - INIT_LIST_HEAD(&target_route->dests); - - list_add_rcu(&target_route->list, &state[state_idx].head); - } - - /* find state entry for destination or add one if necessary */ - list_for_each_entry_rcu(d, &target_route->dests, list) { - if (d->nh_info == nh) { - target_dest = d; - break; - } - } - - if (!target_dest) { - const size_t size_dst = sizeof(struct multipath_dest); - target_dest = (struct multipath_dest*) - kmalloc(size_dst, GFP_ATOMIC); - - target_dest->nh_info = nh; - target_dest->network = network; - target_dest->netmask = netmask; - target_dest->prefixlen = prefixlen; - memset(&target_dest->rcu, 0, sizeof(struct rcu_head)); - - list_add_rcu(&target_dest->list, &target_route->dests); - } - /* else: we already stored this info for another destination => - * we are finished - */ - - spin_unlock_bh(&state[state_idx].lock); -} - -static void __multipath_free(struct rcu_head *head) -{ - struct multipath_route *rt = container_of(head, struct multipath_route, - rcu); - kfree(rt); -} - -static void __multipath_free_dst(struct rcu_head *head) -{ - struct multipath_dest *dst = container_of(head, - struct multipath_dest, - rcu); - kfree(dst); -} - -static void wrandom_flush(void) -{ - int i; - - /* defere delete to all entries */ - for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) { - struct multipath_route *r; - - spin_lock_bh(&state[i].lock); - list_for_each_entry_rcu(r, &state[i].head, list) { - struct multipath_dest *d; - list_for_each_entry_rcu(d, &r->dests, list) { - list_del_rcu(&d->list); - call_rcu(&d->rcu, - __multipath_free_dst); - } - list_del_rcu(&r->list); - call_rcu(&r->rcu, - __multipath_free); - } - - spin_unlock_bh(&state[i].lock); - } -} - -static struct ip_mp_alg_ops wrandom_ops = { - .mp_alg_select_route = wrandom_select_route, - .mp_alg_flush = wrandom_flush, - .mp_alg_set_nhinfo = wrandom_set_nhinfo, -}; - -static int __init wrandom_init(void) -{ - wrandom_init_state(); - - return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM); -} - -static void __exit wrandom_exit(void) -{ - multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM); -} - -module_init(wrandom_init); -module_exit(wrandom_exit); -MODULE_LICENSE("GPL"); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 29ca63e81ced..85285021518b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -101,7 +101,6 @@ #include #include #include -#include #include #include #ifdef CONFIG_SYSCTL @@ -495,13 +494,11 @@ static const struct file_operations rt_cpu_seq_fops = { static __inline__ void rt_free(struct rtable *rt) { - multipath_remove(rt); call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); } static __inline__ void rt_drop(struct rtable *rt) { - multipath_remove(rt); ip_rt_put(rt); call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); } @@ -574,52 +571,6 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) (fl1->iif ^ fl2->iif)) == 0; } -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED -static struct rtable **rt_remove_balanced_route(struct rtable **chain_head, - struct rtable *expentry, - int *removed_count) -{ - int passedexpired = 0; - struct rtable **nextstep = NULL; - struct rtable **rthp = chain_head; - struct rtable *rth; - - if (removed_count) - *removed_count = 0; - - while ((rth = *rthp) != NULL) { - if (rth == expentry) - passedexpired = 1; - - if (((*rthp)->u.dst.flags & DST_BALANCED) != 0 && - compare_keys(&(*rthp)->fl, &expentry->fl)) { - if (*rthp == expentry) { - *rthp = rth->u.dst.rt_next; - continue; - } else { - *rthp = rth->u.dst.rt_next; - rt_free(rth); - if (removed_count) - ++(*removed_count); - } - } else { - if (!((*rthp)->u.dst.flags & DST_BALANCED) && - passedexpired && !nextstep) - nextstep = &rth->u.dst.rt_next; - - rthp = &rth->u.dst.rt_next; - } - } - - rt_free(expentry); - if (removed_count) - ++(*removed_count); - - return nextstep; -} -#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ - - /* This runs via a timer and thus is always in BH context. */ static void rt_check_expire(unsigned long dummy) { @@ -658,22 +609,8 @@ static void rt_check_expire(unsigned long dummy) } /* Cleanup aged off entries. */ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - /* remove all related balanced entries if necessary */ - if (rth->u.dst.flags & DST_BALANCED) { - rthp = rt_remove_balanced_route( - &rt_hash_table[i].chain, - rth, NULL); - if (!rthp) - break; - } else { - *rthp = rth->u.dst.rt_next; - rt_free(rth); - } -#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ *rthp = rth->u.dst.rt_next; rt_free(rth); -#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ } spin_unlock(rt_hash_lock_addr(i)); @@ -721,9 +658,6 @@ void rt_cache_flush(int delay) if (delay < 0) delay = ip_rt_min_delay; - /* flush existing multipath state*/ - multipath_flush(); - spin_lock_bh(&rt_flush_lock); if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) { @@ -842,30 +776,9 @@ static int rt_garbage_collect(void) rthp = &rth->u.dst.rt_next; continue; } -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - /* remove all related balanced entries - * if necessary - */ - if (rth->u.dst.flags & DST_BALANCED) { - int r; - - rthp = rt_remove_balanced_route( - &rt_hash_table[k].chain, - rth, - &r); - goal -= r; - if (!rthp) - break; - } else { - *rthp = rth->u.dst.rt_next; - rt_free(rth); - goal--; - } -#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ *rthp = rth->u.dst.rt_next; rt_free(rth); goal--; -#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ } spin_unlock_bh(rt_hash_lock_addr(k)); if (goal <= 0) @@ -939,12 +852,7 @@ restart: spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (!(rth->u.dst.flags & DST_BALANCED) && - compare_keys(&rth->fl, &rt->fl)) { -#else if (compare_keys(&rth->fl, &rt->fl)) { -#endif /* Put it first */ *rthp = rth->u.dst.rt_next; /* @@ -1774,10 +1682,6 @@ static inline int __mkroute_input(struct sk_buff *skb, atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (res->fi->fib_nhs > 1) - rth->u.dst.flags |= DST_BALANCED; -#endif if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) rth->u.dst.flags |= DST_NOPOLICY; if (IN_DEV_CONF_GET(out_dev, NOXFRM)) @@ -1812,11 +1716,11 @@ static inline int __mkroute_input(struct sk_buff *skb, return err; } -static inline int ip_mkroute_input_def(struct sk_buff *skb, - struct fib_result* res, - const struct flowi *fl, - struct in_device *in_dev, - __be32 daddr, __be32 saddr, u32 tos) +static inline int ip_mkroute_input(struct sk_buff *skb, + struct fib_result* res, + const struct flowi *fl, + struct in_device *in_dev, + __be32 daddr, __be32 saddr, u32 tos) { struct rtable* rth = NULL; int err; @@ -1837,63 +1741,6 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb, return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); } -static inline int ip_mkroute_input(struct sk_buff *skb, - struct fib_result* res, - const struct flowi *fl, - struct in_device *in_dev, - __be32 daddr, __be32 saddr, u32 tos) -{ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - struct rtable* rth = NULL, *rtres; - unsigned char hop, hopcount; - int err = -EINVAL; - unsigned int hash; - - if (res->fi) - hopcount = res->fi->fib_nhs; - else - hopcount = 1; - - /* distinguish between multipath and singlepath */ - if (hopcount < 2) - return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, - saddr, tos); - - /* add all alternatives to the routing cache */ - for (hop = 0; hop < hopcount; hop++) { - res->nh_sel = hop; - - /* put reference to previous result */ - if (hop) - ip_rt_put(rtres); - - /* create a routing cache entry */ - err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, - &rth); - if (err) - return err; - - /* put it into the cache */ - hash = rt_hash(daddr, saddr, fl->iif); - err = rt_intern_hash(hash, rth, &rtres); - if (err) - return err; - - /* forward hop information to multipath impl. */ - multipath_set_nhinfo(rth, - FIB_RES_NETWORK(*res), - FIB_RES_NETMASK(*res), - res->prefixlen, - &FIB_RES_NH(*res)); - } - skb->dst = &rtres->u.dst; - return err; -#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ - return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos); -#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ -} - - /* * NOTE. We drop all the packets that has local source * addresses, because every properly looped back packet @@ -2211,13 +2058,6 @@ static inline int __mkroute_output(struct rtable **result, atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (res->fi) { - rth->rt_multipath_alg = res->fi->fib_mp_alg; - if (res->fi->fib_nhs > 1) - rth->u.dst.flags |= DST_BALANCED; - } -#endif if (IN_DEV_CONF_GET(in_dev, NOXFRM)) rth->u.dst.flags |= DST_NOXFRM; if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) @@ -2277,12 +2117,12 @@ static inline int __mkroute_output(struct rtable **result, return err; } -static inline int ip_mkroute_output_def(struct rtable **rp, - struct fib_result* res, - const struct flowi *fl, - const struct flowi *oldflp, - struct net_device *dev_out, - unsigned flags) +static inline int ip_mkroute_output(struct rtable **rp, + struct fib_result* res, + const struct flowi *fl, + const struct flowi *oldflp, + struct net_device *dev_out, + unsigned flags) { struct rtable *rth = NULL; int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); @@ -2295,68 +2135,6 @@ static inline int ip_mkroute_output_def(struct rtable **rp, return err; } -static inline int ip_mkroute_output(struct rtable** rp, - struct fib_result* res, - const struct flowi *fl, - const struct flowi *oldflp, - struct net_device *dev_out, - unsigned flags) -{ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - unsigned char hop; - unsigned hash; - int err = -EINVAL; - struct rtable *rth = NULL; - - if (res->fi && res->fi->fib_nhs > 1) { - unsigned char hopcount = res->fi->fib_nhs; - - for (hop = 0; hop < hopcount; hop++) { - struct net_device *dev2nexthop; - - res->nh_sel = hop; - - /* hold a work reference to the output device */ - dev2nexthop = FIB_RES_DEV(*res); - dev_hold(dev2nexthop); - - /* put reference to previous result */ - if (hop) - ip_rt_put(*rp); - - err = __mkroute_output(&rth, res, fl, oldflp, - dev2nexthop, flags); - - if (err != 0) - goto cleanup; - - hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, - oldflp->oif); - err = rt_intern_hash(hash, rth, rp); - - /* forward hop information to multipath impl. */ - multipath_set_nhinfo(rth, - FIB_RES_NETWORK(*res), - FIB_RES_NETMASK(*res), - res->prefixlen, - &FIB_RES_NH(*res)); - cleanup: - /* release work reference to output device */ - dev_put(dev2nexthop); - - if (err != 0) - return err; - } - return err; - } else { - return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, - flags); - } -#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ - return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, flags); -#endif -} - /* * Major route resolver routine. */ @@ -2570,17 +2348,6 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) rth->fl.mark == flp->mark && !((rth->fl.fl4_tos ^ flp->fl4_tos) & (IPTOS_RT_MASK | RTO_ONLINK))) { - - /* check for multipath routes and choose one if - * necessary - */ - if (multipath_select_route(flp, rth, rp)) { - dst_hold(&(*rp)->u.dst); - RT_CACHE_STAT_INC(out_hit); - rcu_read_unlock_bh(); - return 0; - } - rth->u.dst.lastuse = jiffies; dst_hold(&rth->u.dst); rth->u.dst.__use++; @@ -2728,10 +2495,6 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, #ifdef CONFIG_NET_CLS_ROUTE if (rt->u.dst.tclassid) NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); -#endif -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (rt->rt_multipath_alg != IP_MP_ALG_NONE) - NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg); #endif if (rt->fl.iif) NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); -- cgit v1.2.3 From f3ec75f627c746cfe460482d38a33b06a84d038f Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Sun, 10 Jun 2007 17:24:20 -0700 Subject: [TIPC]: Improved support for Ethernet traffic filtering This patch simplifies TIPC's Ethernet receive routine to take advantage of information already present in each incoming sk_buff indicating whether the packet was explicitly sent to the interface, has been broadcast to all interfaces, or was picked up because the interface is in promiscous mode. This new approach also fixes the problem of TIPC accepting unwanted traffic through UML's multicast-based Ethernet interfaces (which deliver traffic in a promiscuous manner even if the interface is not configured to be promiscuous). Signed-off-by: Allan Stephens Signed-off-by: Jon Paul Maloy Signed-off-by: David S. Miller --- net/tipc/eth_media.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 77d2d9ce8962..711ca4b1f051 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -1,8 +1,8 @@ /* * net/tipc/eth_media.c: Ethernet bearer support for TIPC * - * Copyright (c) 2001-2006, Ericsson AB - * Copyright (c) 2005-2006, Wind River Systems + * Copyright (c) 2001-2007, Ericsson AB + * Copyright (c) 2005-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -87,6 +87,9 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr, /** * recv_msg - handle incoming TIPC message from an Ethernet interface * + * Accept only packets explicitly sent to this node, or broadcast packets; + * ignores packets sent using Ethernet multicast, and traffic sent to other + * nodes (which can happen if interface is running in promiscuous mode). * Routine truncates any Ethernet padding/CRC appended to the message, * and ensures message size matches actual length */ @@ -98,9 +101,7 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev, u32 size; if (likely(eb_ptr->bearer)) { - if (likely(!dev->promiscuity) || - !memcmp(skb_mac_header(buf), dev->dev_addr, ETH_ALEN) || - !memcmp(skb_mac_header(buf), dev->broadcast, ETH_ALEN)) { + if (likely(buf->pkt_type <= PACKET_BROADCAST)) { size = msg_size((struct tipc_msg *)buf->data); skb_trim(buf, size); if (likely(buf->len == size)) { -- cgit v1.2.3 From 5eee6a6dc945acc5bf4da12956b2f698bbb102b9 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Sun, 10 Jun 2007 17:24:55 -0700 Subject: [TIPC]: Use standard socket "not implemented" routines This patch modifies TIPC's socket API to utilize existing generic routines to indicate unsupported operations, rather than adding similar TIPC-specific routines. Signed-off-by: Allan Stephens Signed-off-by: Jon Paul Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 55 ++++++++++++++----------------------------------------- 1 file changed, 14 insertions(+), 41 deletions(-) (limited to 'net') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 45832fb75ea4..ac7f2aacc77b 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1,8 +1,8 @@ /* * net/tipc/socket.c: TIPC socket API * - * Copyright (c) 2001-2006, Ericsson AB - * Copyright (c) 2004-2006, Wind River Systems + * Copyright (c) 2001-2007, Ericsson AB + * Copyright (c) 2004-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -1599,33 +1599,6 @@ static int getsockopt(struct socket *sock, return res; } -/** - * Placeholders for non-implemented functionality - * - * Returns error code (POSIX-compliant where defined) - */ - -static int ioctl(struct socket *s, u32 cmd, unsigned long arg) -{ - return -EINVAL; -} - -static int no_mmap(struct file *file, struct socket *sock, - struct vm_area_struct *vma) -{ - return -EINVAL; -} -static ssize_t no_sendpage(struct socket *sock, struct page *page, - int offset, size_t size, int flags) -{ - return -EINVAL; -} - -static int no_skpair(struct socket *s1, struct socket *s2) -{ - return -EOPNOTSUPP; -} - /** * Protocol switches for the various types of TIPC sockets */ @@ -1636,19 +1609,19 @@ static struct proto_ops msg_ops = { .release = release, .bind = bind, .connect = connect, - .socketpair = no_skpair, + .socketpair = sock_no_socketpair, .accept = accept, .getname = get_name, .poll = poll, - .ioctl = ioctl, + .ioctl = sock_no_ioctl, .listen = listen, .shutdown = shutdown, .setsockopt = setsockopt, .getsockopt = getsockopt, .sendmsg = send_msg, .recvmsg = recv_msg, - .mmap = no_mmap, - .sendpage = no_sendpage + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage }; static struct proto_ops packet_ops = { @@ -1657,19 +1630,19 @@ static struct proto_ops packet_ops = { .release = release, .bind = bind, .connect = connect, - .socketpair = no_skpair, + .socketpair = sock_no_socketpair, .accept = accept, .getname = get_name, .poll = poll, - .ioctl = ioctl, + .ioctl = sock_no_ioctl, .listen = listen, .shutdown = shutdown, .setsockopt = setsockopt, .getsockopt = getsockopt, .sendmsg = send_packet, .recvmsg = recv_msg, - .mmap = no_mmap, - .sendpage = no_sendpage + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage }; static struct proto_ops stream_ops = { @@ -1678,19 +1651,19 @@ static struct proto_ops stream_ops = { .release = release, .bind = bind, .connect = connect, - .socketpair = no_skpair, + .socketpair = sock_no_socketpair, .accept = accept, .getname = get_name, .poll = poll, - .ioctl = ioctl, + .ioctl = sock_no_ioctl, .listen = listen, .shutdown = shutdown, .setsockopt = setsockopt, .getsockopt = getsockopt, .sendmsg = send_stream, .recvmsg = recv_stream, - .mmap = no_mmap, - .sendpage = no_sendpage + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage }; static struct net_proto_family tipc_family_ops = { -- cgit v1.2.3 From 05646c91109bfd129361d57dc5d98464ab6f6578 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Sun, 10 Jun 2007 17:25:24 -0700 Subject: [TIPC]: Optimize stream send routine to avoid fragmentation This patch enhances TIPC's stream socket send routine so that it avoids transmitting data in chunks that require fragmentation and reassembly, thereby improving performance at both the sending and receiving ends of the connection. The "maximum packet size" hint that records MTU info allows the socket to decide how big a chunk it should send; in the event that the hint has become stale, fragmentation may still occur, but the data will be passed correctly and the hint will be updated in time for the following send. Note: The 66060 byte pseudo-MTU used for intra-node connections requires the send routine to perform an additional check to ensure it does not exceed TIPC"s limit of 66000 bytes of user data per chunk. Signed-off-by: Allan Stephens Signed-off-by: Jon Paul Maloy Signed-off-by: David S. Miller --- include/net/tipc/tipc_port.h | 6 ++++-- net/tipc/link.c | 16 ++++++++-------- net/tipc/port.c | 10 +++++----- net/tipc/port.h | 6 ++---- net/tipc/socket.c | 25 +++++++++++++++++-------- 5 files changed, 36 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/include/net/tipc/tipc_port.h b/include/net/tipc/tipc_port.h index 333bba6dc522..cfc4ba46de8f 100644 --- a/include/net/tipc/tipc_port.h +++ b/include/net/tipc/tipc_port.h @@ -1,8 +1,8 @@ /* * include/net/tipc/tipc_port.h: Include file for privileged access to TIPC ports * - * Copyright (c) 1994-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 1994-2007, Ericsson AB + * Copyright (c) 2005-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -55,6 +55,7 @@ * @conn_unacked: number of unacknowledged messages received from peer port * @published: non-zero if port has one or more associated names * @congested: non-zero if cannot send because of link or port congestion + * @max_pkt: maximum packet size "hint" used when building messages sent by port * @ref: unique reference to port in TIPC object registry * @phdr: preformatted message header used when sending messages */ @@ -68,6 +69,7 @@ struct tipc_port { u32 conn_unacked; int published; u32 congested; + u32 max_pkt; u32 ref; struct tipc_msg phdr; }; diff --git a/net/tipc/link.c b/net/tipc/link.c index 2124f32ef29f..5adfdfd49d61 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1,8 +1,8 @@ /* * net/tipc/link.c: TIPC link code * - * Copyright (c) 1996-2006, Ericsson AB - * Copyright (c) 2004-2006, Wind River Systems + * Copyright (c) 1996-2007, Ericsson AB + * Copyright (c) 2004-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -1260,7 +1260,7 @@ again: * (Must not hold any locks while building message.) */ - res = msg_build(hdr, msg_sect, num_sect, sender->max_pkt, + res = msg_build(hdr, msg_sect, num_sect, sender->publ.max_pkt, !sender->user_port, &buf); read_lock_bh(&tipc_net_lock); @@ -1271,7 +1271,7 @@ again: if (likely(l_ptr)) { if (likely(buf)) { res = link_send_buf_fast(l_ptr, buf, - &sender->max_pkt); + &sender->publ.max_pkt); if (unlikely(res < 0)) buf_discard(buf); exit: @@ -1299,12 +1299,12 @@ exit: * then re-try fast path or fragment the message */ - sender->max_pkt = link_max_pkt(l_ptr); + sender->publ.max_pkt = link_max_pkt(l_ptr); tipc_node_unlock(node); read_unlock_bh(&tipc_net_lock); - if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt) + if ((msg_hdr_sz(hdr) + res) <= sender->publ.max_pkt) goto again; return link_send_sections_long(sender, msg_sect, @@ -1357,7 +1357,7 @@ static int link_send_sections_long(struct port *sender, again: fragm_no = 1; - max_pkt = sender->max_pkt - INT_H_SIZE; + max_pkt = sender->publ.max_pkt - INT_H_SIZE; /* leave room for tunnel header in case of link changeover */ fragm_sz = max_pkt - INT_H_SIZE; /* leave room for fragmentation header in each fragment */ @@ -1463,7 +1463,7 @@ error: goto reject; } if (link_max_pkt(l_ptr) < max_pkt) { - sender->max_pkt = link_max_pkt(l_ptr); + sender->publ.max_pkt = link_max_pkt(l_ptr); tipc_node_unlock(node); for (; buf_chain; buf_chain = buf) { buf = buf_chain->next; diff --git a/net/tipc/port.c b/net/tipc/port.c index bcd5da00737b..5d2b9ce84d0a 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -1,8 +1,8 @@ /* * net/tipc/port.c: TIPC port code * - * Copyright (c) 1992-2006, Ericsson AB - * Copyright (c) 2004-2005, Wind River Systems + * Copyright (c) 1992-2007, Ericsson AB + * Copyright (c) 2004-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -239,6 +239,8 @@ u32 tipc_createport_raw(void *usr_handle, } tipc_port_lock(ref); + p_ptr->publ.usr_handle = usr_handle; + p_ptr->publ.max_pkt = MAX_PKT_DEFAULT; p_ptr->publ.ref = ref; msg = &p_ptr->publ.phdr; msg_init(msg, DATA_LOW, TIPC_NAMED_MSG, TIPC_OK, LONG_H_SIZE, 0); @@ -248,11 +250,9 @@ u32 tipc_createport_raw(void *usr_handle, msg_set_importance(msg,importance); p_ptr->last_in_seqno = 41; p_ptr->sent = 1; - p_ptr->publ.usr_handle = usr_handle; INIT_LIST_HEAD(&p_ptr->wait_list); INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list); p_ptr->congested_link = NULL; - p_ptr->max_pkt = MAX_PKT_DEFAULT; p_ptr->dispatcher = dispatcher; p_ptr->wakeup = wakeup; p_ptr->user_port = NULL; @@ -1243,7 +1243,7 @@ int tipc_connect2port(u32 ref, struct tipc_portid const *peer) res = TIPC_OK; exit: tipc_port_unlock(p_ptr); - p_ptr->max_pkt = tipc_link_get_max_pkt(peer->node, ref); + p_ptr->publ.max_pkt = tipc_link_get_max_pkt(peer->node, ref); return res; } diff --git a/net/tipc/port.h b/net/tipc/port.h index 7ef4d64b32f7..e5f8c16429bd 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -1,8 +1,8 @@ /* * net/tipc/port.h: Include file for TIPC port code * - * Copyright (c) 1994-2006, Ericsson AB - * Copyright (c) 2004-2005, Wind River Systems + * Copyright (c) 1994-2007, Ericsson AB + * Copyright (c) 2004-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -81,7 +81,6 @@ struct user_port { * @acked: * @publications: list of publications for port * @pub_count: total # of publications port has made during its lifetime - * @max_pkt: maximum packet size "hint" used when building messages sent by port * @probing_state: * @probing_interval: * @last_in_seqno: @@ -102,7 +101,6 @@ struct port { u32 acked; struct list_head publications; u32 pub_count; - u32 max_pkt; u32 probing_state; u32 probing_interval; u32 last_in_seqno; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ac7f2aacc77b..4a8f37f48764 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -607,23 +607,24 @@ exit: static int send_stream(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len) { + struct tipc_port *tport; struct msghdr my_msg; struct iovec my_iov; struct iovec *curr_iov; int curr_iovlen; char __user *curr_start; + u32 hdr_size; int curr_left; int bytes_to_send; int bytes_sent; int res; - if (likely(total_len <= TIPC_MAX_USER_MSG_SIZE)) - return send_packet(iocb, sock, m, total_len); - - /* Can only send large data streams if already connected */ + /* Handle special cases where there is no connection */ if (unlikely(sock->state != SS_CONNECTED)) { - if (sock->state == SS_DISCONNECTING) + if (sock->state == SS_UNCONNECTED) + return send_packet(iocb, sock, m, total_len); + else if (sock->state == SS_DISCONNECTING) return -EPIPE; else return -ENOTCONN; @@ -648,17 +649,25 @@ static int send_stream(struct kiocb *iocb, struct socket *sock, my_msg.msg_name = NULL; bytes_sent = 0; + tport = tipc_sk(sock->sk)->p; + hdr_size = msg_hdr_sz(&tport->phdr); + while (curr_iovlen--) { curr_start = curr_iov->iov_base; curr_left = curr_iov->iov_len; while (curr_left) { - bytes_to_send = (curr_left < TIPC_MAX_USER_MSG_SIZE) - ? curr_left : TIPC_MAX_USER_MSG_SIZE; + bytes_to_send = tport->max_pkt - hdr_size; + if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE) + bytes_to_send = TIPC_MAX_USER_MSG_SIZE; + if (curr_left < bytes_to_send) + bytes_to_send = curr_left; my_iov.iov_base = curr_start; my_iov.iov_len = bytes_to_send; if ((res = send_packet(iocb, sock, &my_msg, 0)) < 0) { - return bytes_sent ? bytes_sent : res; + if (bytes_sent != 0) + res = bytes_sent; + return res; } curr_left -= bytes_to_send; curr_start += bytes_to_send; -- cgit v1.2.3 From c716a81ab946c68a8d84022ee32eb14674e72650 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 10 Jun 2007 17:31:24 -0700 Subject: [NET_SCHED]: Cleanup readability of qdisc restart Over the years this code has gotten hairier. Resulting in many long discussions over long summer days and patches that get it wrong. This patch helps tame that code so normal people will understand it. Thanks to Thomas Graf, Peter J. waskiewicz Jr, and Patrick McHardy for their valuable reviews. Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 199 ++++++++++++++++++++++++++---------------------- 1 file changed, 109 insertions(+), 90 deletions(-) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index f4d34480a093..9461e8ae0529 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -34,6 +34,9 @@ #include #include +#define SCHED_TX_DROP -2 +#define SCHED_TX_QUEUE -3 + /* Main transmission queue. */ /* Modifications to data participating in scheduling must be protected with @@ -59,7 +62,74 @@ void qdisc_unlock_tree(struct net_device *dev) spin_unlock_bh(&dev->queue_lock); } +static inline int qdisc_qlen(struct Qdisc *q) +{ + BUG_ON((int) q->q.qlen < 0); + return q->q.qlen; +} + +static inline int handle_dev_cpu_collision(struct net_device *dev) +{ + if (unlikely(dev->xmit_lock_owner == smp_processor_id())) { + if (net_ratelimit()) + printk(KERN_WARNING + "Dead loop on netdevice %s, fix it urgently!\n", + dev->name); + return SCHED_TX_DROP; + } + __get_cpu_var(netdev_rx_stat).cpu_collision++; + return SCHED_TX_QUEUE; +} + +static inline int +do_dev_requeue(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q) +{ + + if (unlikely(skb->next)) + dev->gso_skb = skb; + else + q->ops->requeue(skb, q); + /* XXX: Could netif_schedule fail? Or is the fact we are + * requeueing imply the hardware path is closed + * and even if we fail, some interupt will wake us + */ + netif_schedule(dev); + return 0; +} + +static inline struct sk_buff * +try_get_tx_pkt(struct net_device *dev, struct Qdisc *q) +{ + struct sk_buff *skb = dev->gso_skb; + + if (skb) + dev->gso_skb = NULL; + else + skb = q->dequeue(q); + + return skb; +} + +static inline int +tx_islocked(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q) +{ + int ret = handle_dev_cpu_collision(dev); + + if (ret == SCHED_TX_DROP) { + kfree_skb(skb); + return qdisc_qlen(q); + } + + return do_dev_requeue(skb, dev, q); +} + + /* + NOTE: Called under dev->queue_lock with locally disabled BH. + + __LINK_STATE_QDISC_RUNNING guarantees only one CPU + can enter this region at a time. + dev->queue_lock serializes queue accesses for this device AND dev->qdisc pointer itself. @@ -67,116 +137,65 @@ void qdisc_unlock_tree(struct net_device *dev) dev->queue_lock and netif_tx_lock are mutually exclusive, if one is grabbed, another must be free. - */ + Multiple CPUs may contend for the two locks. -/* Kick device. + Note, that this procedure can be called by a watchdog timer + Returns to the caller: Returns: 0 - queue is empty or throttled. >0 - queue is not empty. - NOTE: Called under dev->queue_lock with locally disabled BH. */ static inline int qdisc_restart(struct net_device *dev) { struct Qdisc *q = dev->qdisc; + unsigned lockless = (dev->features & NETIF_F_LLTX); struct sk_buff *skb; + int ret; - /* Dequeue packet */ - if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) { - unsigned nolock = (dev->features & NETIF_F_LLTX); - - dev->gso_skb = NULL; + skb = try_get_tx_pkt(dev, q); + if (skb == NULL) + return 0; - /* - * When the driver has LLTX set it does its own locking - * in start_xmit. No need to add additional overhead by - * locking again. These checks are worth it because - * even uncongested locks can be quite expensive. - * The driver can do trylock like here too, in case - * of lock congestion it should return -1 and the packet - * will be requeued. - */ - if (!nolock) { - if (!netif_tx_trylock(dev)) { - collision: - /* So, someone grabbed the driver. */ - - /* It may be transient configuration error, - when hard_start_xmit() recurses. We detect - it by checking xmit owner and drop the - packet when deadloop is detected. - */ - if (dev->xmit_lock_owner == smp_processor_id()) { - kfree_skb(skb); - if (net_ratelimit()) - printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name); - goto out; - } - __get_cpu_var(netdev_rx_stat).cpu_collision++; - goto requeue; - } - } + /* we have a packet to send */ + if (!lockless) { + if (!netif_tx_trylock(dev)) + return tx_islocked(skb, dev, q); + } + /* all clear .. */ + spin_unlock(&dev->queue_lock); - { - /* And release queue */ - spin_unlock(&dev->queue_lock); - - if (!netif_queue_stopped(dev)) { - int ret; - - ret = dev_hard_start_xmit(skb, dev); - if (ret == NETDEV_TX_OK) { - if (!nolock) { - netif_tx_unlock(dev); - } - spin_lock(&dev->queue_lock); - q = dev->qdisc; - goto out; - } - if (ret == NETDEV_TX_LOCKED && nolock) { - spin_lock(&dev->queue_lock); - q = dev->qdisc; - goto collision; - } - } + ret = NETDEV_TX_BUSY; + if (!netif_queue_stopped(dev)) + /* churn baby churn .. */ + ret = dev_hard_start_xmit(skb, dev); - /* NETDEV_TX_BUSY - we need to requeue */ - /* Release the driver */ - if (!nolock) { - netif_tx_unlock(dev); - } - spin_lock(&dev->queue_lock); - q = dev->qdisc; - } + if (!lockless) + netif_tx_unlock(dev); - /* Device kicked us out :( - This is possible in three cases: - - 0. driver is locked - 1. fastroute is enabled - 2. device cannot determine busy state - before start of transmission (f.e. dialout) - 3. device is buggy (ppp) - */ - -requeue: - if (unlikely(q == &noop_qdisc)) - kfree_skb(skb); - else if (skb->next) - dev->gso_skb = skb; - else - q->ops->requeue(skb, q); - netif_schedule(dev); - } - return 0; + spin_lock(&dev->queue_lock); -out: - BUG_ON((int) q->q.qlen < 0); - return q->q.qlen; + /* we need to refresh q because it may be invalid since + * we dropped dev->queue_lock earlier ... + * So dont try to be clever grasshopper + */ + q = dev->qdisc; + /* most likely result, packet went ok */ + if (ret == NETDEV_TX_OK) + return qdisc_qlen(q); + /* only for lockless drivers .. */ + if (ret == NETDEV_TX_LOCKED && lockless) + return tx_islocked(skb, dev, q); + + if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) + printk(KERN_WARNING " BUG %s code %d qlen %d\n",dev->name, ret, q->q.qlen); + + return do_dev_requeue(skb, dev, q); } + void __qdisc_run(struct net_device *dev) { do { -- cgit v1.2.3 From a7ab4b501f9b8a9dc4d5cee542db67b6ccd1088b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 10 Jun 2007 17:33:08 -0700 Subject: [TCPv4]: Improve BH latency in /proc/net/tcp Currently the code for /proc/net/tcp disable BH while iterating over the entire established hash table. Even though we call cond_resched_softirq for each entry, we still won't process softirq's as regularly as we would otherwise do which results in poor performance when the system is loaded near capacity. This anomaly comes from the 2.4 code where this was all in a single function and the local_bh_disable might have made sense as a small optimisation. The cost of each local_bh_disable is so small when compared against the increased latency in keeping it disabled over a large but mostly empty TCP established hash table that we should just move it to the individual read_lock/read_unlock calls as we do in inet_diag. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 354721d67f69..3f5f7423b95c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2045,10 +2045,7 @@ static void *established_get_first(struct seq_file *seq) struct hlist_node *node; struct inet_timewait_sock *tw; - /* We can reschedule _before_ having picked the target: */ - cond_resched_softirq(); - - read_lock(&tcp_hashinfo.ehash[st->bucket].lock); + read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock); sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { if (sk->sk_family != st->family) { continue; @@ -2065,7 +2062,7 @@ static void *established_get_first(struct seq_file *seq) rc = tw; goto out; } - read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); + read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); st->state = TCP_SEQ_STATE_ESTABLISHED; } out: @@ -2092,14 +2089,11 @@ get_tw: cur = tw; goto out; } - read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); + read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); st->state = TCP_SEQ_STATE_ESTABLISHED; - /* We can reschedule between buckets: */ - cond_resched_softirq(); - if (++st->bucket < tcp_hashinfo.ehash_size) { - read_lock(&tcp_hashinfo.ehash[st->bucket].lock); + read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock); sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); } else { cur = NULL; @@ -2144,7 +2138,6 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) if (!rc) { inet_listen_unlock(&tcp_hashinfo); - local_bh_disable(); st->state = TCP_SEQ_STATE_ESTABLISHED; rc = established_get_idx(seq, pos); } @@ -2177,7 +2170,6 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) rc = listening_get_next(seq, v); if (!rc) { inet_listen_unlock(&tcp_hashinfo); - local_bh_disable(); st->state = TCP_SEQ_STATE_ESTABLISHED; rc = established_get_first(seq); } @@ -2209,8 +2201,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) case TCP_SEQ_STATE_TIME_WAIT: case TCP_SEQ_STATE_ESTABLISHED: if (v) - read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); - local_bh_enable(); + read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); break; } } -- cgit v1.2.3 From 8de0a15483b357d0f0b821330ec84d1660cadc4e Mon Sep 17 00:00:00 2001 From: Ville Tervo Date: Wed, 11 Jul 2007 09:23:41 +0200 Subject: [Bluetooth] Keep rfcomm_dev on the list until it is freed This patch changes the RFCOMM TTY release process so that the TTY is kept on the list until it is really freed. A new device flag is used to keep track of released TTYs. Signed-off-by: Ville Tervo Signed-off-by: Marcel Holtmann --- include/net/bluetooth/rfcomm.h | 1 + net/bluetooth/rfcomm/tty.c | 30 ++++++++++++++++++++++-------- 2 files changed, 23 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index 3c563f02907c..25aa575db807 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -323,6 +323,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc #define RFCOMM_RELEASE_ONHUP 1 #define RFCOMM_HANGUP_NOW 2 #define RFCOMM_TTY_ATTACHED 3 +#define RFCOMM_TTY_RELEASED 4 struct rfcomm_dev_req { s16 dev_id; diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index ba469b038ea0..23ba61a13bdd 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -95,6 +95,10 @@ static void rfcomm_dev_destruct(struct rfcomm_dev *dev) BT_DBG("dev %p dlc %p", dev, dlc); + write_lock_bh(&rfcomm_dev_lock); + list_del_init(&dev->list); + write_unlock_bh(&rfcomm_dev_lock); + rfcomm_dlc_lock(dlc); /* Detach DLC if it's owned by this dev */ if (dlc->owner == dev) @@ -156,8 +160,13 @@ static inline struct rfcomm_dev *rfcomm_dev_get(int id) read_lock(&rfcomm_dev_lock); dev = __rfcomm_dev_get(id); - if (dev) - rfcomm_dev_hold(dev); + + if (dev) { + if (test_bit(RFCOMM_TTY_RELEASED, &dev->flags)) + dev = NULL; + else + rfcomm_dev_hold(dev); + } read_unlock(&rfcomm_dev_lock); @@ -265,6 +274,12 @@ out: dev->tty_dev = tty_register_device(rfcomm_tty_driver, dev->id, NULL); + if (IS_ERR(dev->tty_dev)) { + list_del(&dev->list); + kfree(dev); + return PTR_ERR(dev->tty_dev); + } + return dev->id; } @@ -272,10 +287,7 @@ static void rfcomm_dev_del(struct rfcomm_dev *dev) { BT_DBG("dev %p", dev); - write_lock_bh(&rfcomm_dev_lock); - list_del_init(&dev->list); - write_unlock_bh(&rfcomm_dev_lock); - + set_bit(RFCOMM_TTY_RELEASED, &dev->flags); rfcomm_dev_put(dev); } @@ -329,7 +341,7 @@ static int rfcomm_create_dev(struct sock *sk, void __user *arg) if (copy_from_user(&req, arg, sizeof(req))) return -EFAULT; - BT_DBG("sk %p dev_id %id flags 0x%x", sk, req.dev_id, req.flags); + BT_DBG("sk %p dev_id %d flags 0x%x", sk, req.dev_id, req.flags); if (req.flags != NOCAP_FLAGS && !capable(CAP_NET_ADMIN)) return -EPERM; @@ -370,7 +382,7 @@ static int rfcomm_release_dev(void __user *arg) if (copy_from_user(&req, arg, sizeof(req))) return -EFAULT; - BT_DBG("dev_id %id flags 0x%x", req.dev_id, req.flags); + BT_DBG("dev_id %d flags 0x%x", req.dev_id, req.flags); if (!(dev = rfcomm_dev_get(req.dev_id))) return -ENODEV; @@ -419,6 +431,8 @@ static int rfcomm_get_dev_list(void __user *arg) list_for_each(p, &rfcomm_dev_list) { struct rfcomm_dev *dev = list_entry(p, struct rfcomm_dev, list); + if (test_bit(RFCOMM_TTY_RELEASED, &dev->flags)) + continue; (di + n)->id = dev->id; (di + n)->flags = dev->flags; (di + n)->state = dev->dlc->state; -- cgit v1.2.3 From b3d88ad49a0623d09efcf998beb26288c8029f75 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sun, 10 Jun 2007 17:57:33 -0700 Subject: [MAC80211]: Add support for SIOCGIWRATE ioctl At present, transmission rate information for mac80211 is available only if verbose debugging is turned on, and then only in the logs. This patch implements the SIOCGIWRATE ioctl, which adds the current transmission rate to the output of iwconfig. Signed-off-by: Larry Finger Signed-off-by: John W. Linville Signed-off-by: David S. Miller --- net/mac80211/ieee80211_ioctl.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c index 352f03bd8a3a..66e8a976b311 100644 --- a/net/mac80211/ieee80211_ioctl.c +++ b/net/mac80211/ieee80211_ioctl.c @@ -838,6 +838,29 @@ static int ieee80211_ioctl_giwscan(struct net_device *dev, } +static int ieee80211_ioctl_giwrate(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *rate, char *extra) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct sta_info *sta; + struct ieee80211_sub_if_data *sdata; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (sdata->type == IEEE80211_IF_TYPE_STA) + sta = sta_info_get(local, sdata->u.sta.bssid); + else + return -EOPNOTSUPP; + if (!sta) + return -ENODEV; + if (sta->txrate < local->oper_hw_mode->num_rates) + rate->value = local->oper_hw_mode->rates[sta->txrate].rate * 100000; + else + rate->value = 0; + sta_info_put(sta); + return 0; +} + static int ieee80211_ioctl_siwrts(struct net_device *dev, struct iw_request_info *info, struct iw_param *rts, char *extra) @@ -1779,7 +1802,7 @@ static const iw_handler ieee80211_handler[] = (iw_handler) NULL, /* -- hole -- */ (iw_handler) NULL, /* -- hole -- */ (iw_handler) NULL, /* SIOCSIWRATE */ - (iw_handler) NULL, /* SIOCGIWRATE */ + (iw_handler) ieee80211_ioctl_giwrate, /* SIOCGIWRATE */ (iw_handler) ieee80211_ioctl_siwrts, /* SIOCSIWRTS */ (iw_handler) ieee80211_ioctl_giwrts, /* SIOCGIWRTS */ (iw_handler) ieee80211_ioctl_siwfrag, /* SIOCSIWFRAG */ -- cgit v1.2.3 From 0157f60c0caea24fa8347f4c0ed53297c412fce1 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:03:36 -0700 Subject: [RTNETLINK]: Split up rtnl_setlink Split up rtnl_setlink into a function performing validation and a function performing the actual changes. This allows to share the modifcation logic with rtnl_newlink, which is introduced by the next patch. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 105 ++++++++++++++++++++++++++++----------------------- 1 file changed, 57 insertions(+), 48 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 02e8bf084277..25ca219154e0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -561,44 +561,11 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINKMODE] = { .type = NLA_U8 }, }; -static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, + struct nlattr **tb, char *ifname) { - struct ifinfomsg *ifm; - struct net_device *dev; - int err, send_addr_notify = 0, modified = 0; - struct nlattr *tb[IFLA_MAX+1]; - char ifname[IFNAMSIZ]; - - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); - if (err < 0) - goto errout; - - if (tb[IFLA_IFNAME]) - nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); - else - ifname[0] = '\0'; - - err = -EINVAL; - ifm = nlmsg_data(nlh); - if (ifm->ifi_index > 0) - dev = dev_get_by_index(ifm->ifi_index); - else if (tb[IFLA_IFNAME]) - dev = dev_get_by_name(ifname); - else - goto errout; - - if (dev == NULL) { - err = -ENODEV; - goto errout; - } - - if (tb[IFLA_ADDRESS] && - nla_len(tb[IFLA_ADDRESS]) < dev->addr_len) - goto errout_dev; - - if (tb[IFLA_BROADCAST] && - nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) - goto errout_dev; + int modified = 0, send_addr_notify = 0; + int err; if (tb[IFLA_MAP]) { struct rtnl_link_ifmap *u_map; @@ -606,12 +573,12 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (!dev->set_config) { err = -EOPNOTSUPP; - goto errout_dev; + goto errout; } if (!netif_device_present(dev)) { err = -ENODEV; - goto errout_dev; + goto errout; } u_map = nla_data(tb[IFLA_MAP]); @@ -624,7 +591,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) err = dev->set_config(dev, &k_map); if (err < 0) - goto errout_dev; + goto errout; modified = 1; } @@ -635,19 +602,19 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (!dev->set_mac_address) { err = -EOPNOTSUPP; - goto errout_dev; + goto errout; } if (!netif_device_present(dev)) { err = -ENODEV; - goto errout_dev; + goto errout; } len = sizeof(sa_family_t) + dev->addr_len; sa = kmalloc(len, GFP_KERNEL); if (!sa) { err = -ENOMEM; - goto errout_dev; + goto errout; } sa->sa_family = dev->type; memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), @@ -655,7 +622,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) err = dev->set_mac_address(dev, sa); kfree(sa); if (err) - goto errout_dev; + goto errout; send_addr_notify = 1; modified = 1; } @@ -663,7 +630,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (tb[IFLA_MTU]) { err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU])); if (err < 0) - goto errout_dev; + goto errout; modified = 1; } @@ -675,7 +642,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (ifm->ifi_index > 0 && ifname[0]) { err = dev_change_name(dev, ifname); if (err < 0) - goto errout_dev; + goto errout; modified = 1; } @@ -684,7 +651,6 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) send_addr_notify = 1; } - if (ifm->ifi_flags || ifm->ifi_change) { unsigned int flags = ifm->ifi_flags; @@ -712,7 +678,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) err = 0; -errout_dev: +errout: if (err < 0 && modified && net_ratelimit()) printk(KERN_WARNING "A link change request failed with " "some changes comitted already. Interface %s may " @@ -721,7 +687,50 @@ errout_dev: if (send_addr_notify) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + return err; +} +static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct ifinfomsg *ifm; + struct net_device *dev; + int err; + struct nlattr *tb[IFLA_MAX+1]; + char ifname[IFNAMSIZ]; + + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + if (err < 0) + goto errout; + + if (tb[IFLA_IFNAME]) + nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + else + ifname[0] = '\0'; + + err = -EINVAL; + ifm = nlmsg_data(nlh); + if (ifm->ifi_index > 0) + dev = dev_get_by_index(ifm->ifi_index); + else if (tb[IFLA_IFNAME]) + dev = dev_get_by_name(ifname); + else + goto errout; + + if (dev == NULL) { + err = -ENODEV; + goto errout; + } + + if (tb[IFLA_ADDRESS] && + nla_len(tb[IFLA_ADDRESS]) < dev->addr_len) + goto errout_dev; + + if (tb[IFLA_BROADCAST] && + nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) + goto errout_dev; + + err = do_setlink(dev, ifm, tb, ifname); +errout_dev: dev_put(dev); errout: return err; -- cgit v1.2.3 From 38f7b870d4a6a5d3ec21557e849620cb7d032965 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:03:51 -0700 Subject: [RTNETLINK]: Link creation API Add rtnetlink API for creating, changing and deleting software devices. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_link.h | 13 ++ include/linux/netdevice.h | 3 + include/net/rtnetlink.h | 58 ++++++++ net/core/rtnetlink.c | 342 +++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 409 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 604c2434f71c..3144babd2357 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -76,6 +76,8 @@ enum #define IFLA_WEIGHT IFLA_WEIGHT IFLA_OPERSTATE, IFLA_LINKMODE, + IFLA_LINKINFO, +#define IFLA_LINKINFO IFLA_LINKINFO __IFLA_MAX }; @@ -140,4 +142,15 @@ struct ifla_cacheinfo __u32 retrans_time; }; +enum +{ + IFLA_INFO_UNSPEC, + IFLA_INFO_KIND, + IFLA_INFO_DATA, + IFLA_INFO_XSTATS, + __IFLA_INFO_MAX, +}; + +#define IFLA_INFO_MAX (__IFLA_INFO_MAX - 1) + #endif /* _LINUX_IF_LINK_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 94cc77cd3aa3..e7913ee5581c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -540,6 +540,9 @@ struct net_device struct device dev; /* space for optional statistics and wireless sysfs groups */ struct attribute_group *sysfs_groups[3]; + + /* rtnetlink link ops */ + const struct rtnl_link_ops *rtnl_link_ops; }; #define to_net_dev(d) container_of(d, struct net_device, dev) diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 3b3d4745618d..3861c05cdf0f 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -22,4 +22,62 @@ static inline int rtnl_msg_family(struct nlmsghdr *nlh) return AF_UNSPEC; } +/** + * struct rtnl_link_ops - rtnetlink link operations + * + * @list: Used internally + * @kind: Identifier + * @maxtype: Highest device specific netlink attribute number + * @policy: Netlink policy for device specific attribute validation + * @validate: Optional validation function for netlink/changelink parameters + * @priv_size: sizeof net_device private space + * @setup: net_device setup function + * @newlink: Function for configuring and registering a new device + * @changelink: Function for changing parameters of an existing device + * @dellink: Function to remove a device + * @get_size: Function to calculate required room for dumping device + * specific netlink attributes + * @fill_info: Function to dump device specific netlink attributes + * @get_xstats_size: Function to calculate required room for dumping devic + * specific statistics + * @fill_xstats: Function to dump device specific statistics + */ +struct rtnl_link_ops { + struct list_head list; + + const char *kind; + + size_t priv_size; + void (*setup)(struct net_device *dev); + + int maxtype; + const struct nla_policy *policy; + int (*validate)(struct nlattr *tb[], + struct nlattr *data[]); + + int (*newlink)(struct net_device *dev, + struct nlattr *tb[], + struct nlattr *data[]); + int (*changelink)(struct net_device *dev, + struct nlattr *tb[], + struct nlattr *data[]); + void (*dellink)(struct net_device *dev); + + size_t (*get_size)(const struct net_device *dev); + int (*fill_info)(struct sk_buff *skb, + const struct net_device *dev); + + size_t (*get_xstats_size)(const struct net_device *dev); + int (*fill_xstats)(struct sk_buff *skb, + const struct net_device *dev); +}; + +extern int __rtnl_link_register(struct rtnl_link_ops *ops); +extern void __rtnl_link_unregister(struct rtnl_link_ops *ops); + +extern int rtnl_link_register(struct rtnl_link_ops *ops); +extern void rtnl_link_unregister(struct rtnl_link_ops *ops); + +#define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind) + #endif diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 25ca219154e0..06c0c5afabf0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -243,6 +243,143 @@ void rtnl_unregister_all(int protocol) EXPORT_SYMBOL_GPL(rtnl_unregister_all); +static LIST_HEAD(link_ops); + +/** + * __rtnl_link_register - Register rtnl_link_ops with rtnetlink. + * @ops: struct rtnl_link_ops * to register + * + * The caller must hold the rtnl_mutex. This function should be used + * by drivers that create devices during module initialization. It + * must be called before registering the devices. + * + * Returns 0 on success or a negative error code. + */ +int __rtnl_link_register(struct rtnl_link_ops *ops) +{ + list_add_tail(&ops->list, &link_ops); + return 0; +} + +EXPORT_SYMBOL_GPL(__rtnl_link_register); + +/** + * rtnl_link_register - Register rtnl_link_ops with rtnetlink. + * @ops: struct rtnl_link_ops * to register + * + * Returns 0 on success or a negative error code. + */ +int rtnl_link_register(struct rtnl_link_ops *ops) +{ + int err; + + rtnl_lock(); + err = __rtnl_link_register(ops); + rtnl_unlock(); + return err; +} + +EXPORT_SYMBOL_GPL(rtnl_link_register); + +/** + * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. + * @ops: struct rtnl_link_ops * to unregister + * + * The caller must hold the rtnl_mutex. This function should be used + * by drivers that unregister devices during module unloading. It must + * be called after unregistering the devices. + */ +void __rtnl_link_unregister(struct rtnl_link_ops *ops) +{ + list_del(&ops->list); +} + +EXPORT_SYMBOL_GPL(__rtnl_link_unregister); + +/** + * rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. + * @ops: struct rtnl_link_ops * to unregister + */ +void rtnl_link_unregister(struct rtnl_link_ops *ops) +{ + rtnl_lock(); + __rtnl_link_unregister(ops); + rtnl_unlock(); +} + +EXPORT_SYMBOL_GPL(rtnl_link_unregister); + +static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) +{ + const struct rtnl_link_ops *ops; + + list_for_each_entry(ops, &link_ops, list) { + if (!strcmp(ops->kind, kind)) + return ops; + } + return NULL; +} + +static size_t rtnl_link_get_size(const struct net_device *dev) +{ + const struct rtnl_link_ops *ops = dev->rtnl_link_ops; + size_t size; + + if (!ops) + return 0; + + size = nlmsg_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */ + nlmsg_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */ + + if (ops->get_size) + /* IFLA_INFO_DATA + nested data */ + size += nlmsg_total_size(sizeof(struct nlattr)) + + ops->get_size(dev); + + if (ops->get_xstats_size) + size += ops->get_xstats_size(dev); /* IFLA_INFO_XSTATS */ + + return size; +} + +static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) +{ + const struct rtnl_link_ops *ops = dev->rtnl_link_ops; + struct nlattr *linkinfo, *data; + int err = -EMSGSIZE; + + linkinfo = nla_nest_start(skb, IFLA_LINKINFO); + if (linkinfo == NULL) + goto out; + + if (nla_put_string(skb, IFLA_INFO_KIND, ops->kind) < 0) + goto err_cancel_link; + if (ops->fill_xstats) { + err = ops->fill_xstats(skb, dev); + if (err < 0) + goto err_cancel_link; + } + if (ops->fill_info) { + data = nla_nest_start(skb, IFLA_INFO_DATA); + if (data == NULL) + goto err_cancel_link; + err = ops->fill_info(skb, dev); + if (err < 0) + goto err_cancel_data; + nla_nest_end(skb, data); + } + + nla_nest_end(skb, linkinfo); + return 0; + +err_cancel_data: + nla_nest_cancel(skb, data); +err_cancel_link: + nla_nest_cancel(skb, linkinfo); +out: + return err; +} + static const int rtm_min[RTM_NR_FAMILIES] = { [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)), @@ -437,7 +574,7 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, a->tx_compressed = b->tx_compressed; }; -static inline size_t if_nlmsg_size(void) +static inline size_t if_nlmsg_size(const struct net_device *dev) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ @@ -452,7 +589,8 @@ static inline size_t if_nlmsg_size(void) + nla_total_size(4) /* IFLA_LINK */ + nla_total_size(4) /* IFLA_MASTER */ + nla_total_size(1) /* IFLA_OPERSTATE */ - + nla_total_size(1); /* IFLA_LINKMODE */ + + nla_total_size(1) /* IFLA_LINKMODE */ + + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ } static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, @@ -522,6 +660,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, } } + if (dev->rtnl_link_ops) { + if (rtnl_link_fill(skb, dev) < 0) + goto nla_put_failure; + } + return nlmsg_end(skb, nlh); nla_put_failure: @@ -553,6 +696,8 @@ cont: static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, + [IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, + [IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, [IFLA_MTU] = { .type = NLA_U32 }, [IFLA_TXQLEN] = { .type = NLA_U32 }, @@ -561,10 +706,15 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINKMODE] = { .type = NLA_U8 }, }; +static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { + [IFLA_INFO_KIND] = { .type = NLA_STRING }, + [IFLA_INFO_DATA] = { .type = NLA_NESTED }, +}; + static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, - struct nlattr **tb, char *ifname) + struct nlattr **tb, char *ifname, int modified) { - int modified = 0, send_addr_notify = 0; + int send_addr_notify = 0; int err; if (tb[IFLA_MAP]) { @@ -729,13 +879,189 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) goto errout_dev; - err = do_setlink(dev, ifm, tb, ifname); + err = do_setlink(dev, ifm, tb, ifname, 0); errout_dev: dev_put(dev); errout: return err; } +static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + const struct rtnl_link_ops *ops; + struct net_device *dev; + struct ifinfomsg *ifm; + char ifname[IFNAMSIZ]; + struct nlattr *tb[IFLA_MAX+1]; + int err; + + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + if (err < 0) + return err; + + if (tb[IFLA_IFNAME]) + nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + + ifm = nlmsg_data(nlh); + if (ifm->ifi_index > 0) + dev = __dev_get_by_index(ifm->ifi_index); + else if (tb[IFLA_IFNAME]) + dev = __dev_get_by_name(ifname); + else + return -EINVAL; + + if (!dev) + return -ENODEV; + + ops = dev->rtnl_link_ops; + if (!ops) + return -EOPNOTSUPP; + + ops->dellink(dev); + return 0; +} + +static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + const struct rtnl_link_ops *ops; + struct net_device *dev; + struct ifinfomsg *ifm; + char kind[MODULE_NAME_LEN]; + char ifname[IFNAMSIZ]; + struct nlattr *tb[IFLA_MAX+1]; + struct nlattr *linkinfo[IFLA_INFO_MAX+1]; + int err; + +replay: + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + if (err < 0) + return err; + + if (tb[IFLA_IFNAME]) + nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + else + ifname[0] = '\0'; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_index > 0) + dev = __dev_get_by_index(ifm->ifi_index); + else if (ifname[0]) + dev = __dev_get_by_name(ifname); + else + dev = NULL; + + if (tb[IFLA_LINKINFO]) { + err = nla_parse_nested(linkinfo, IFLA_INFO_MAX, + tb[IFLA_LINKINFO], ifla_info_policy); + if (err < 0) + return err; + } else + memset(linkinfo, 0, sizeof(linkinfo)); + + if (linkinfo[IFLA_INFO_KIND]) { + nla_strlcpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind)); + ops = rtnl_link_ops_get(kind); + } else { + kind[0] = '\0'; + ops = NULL; + } + + if (1) { + struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL; + + if (ops) { + if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { + err = nla_parse_nested(attr, ops->maxtype, + linkinfo[IFLA_INFO_DATA], + ops->policy); + if (err < 0) + return err; + data = attr; + } + if (ops->validate) { + err = ops->validate(tb, data); + if (err < 0) + return err; + } + } + + if (dev) { + int modified = 0; + + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + + if (linkinfo[IFLA_INFO_DATA]) { + if (!ops || ops != dev->rtnl_link_ops || + !ops->changelink) + return -EOPNOTSUPP; + + err = ops->changelink(dev, tb, data); + if (err < 0) + return err; + modified = 1; + } + + return do_setlink(dev, ifm, tb, ifname, modified); + } + + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) + return -ENODEV; + + if (ifm->ifi_index || ifm->ifi_flags || ifm->ifi_change) + return -EOPNOTSUPP; + if (tb[IFLA_ADDRESS] || tb[IFLA_BROADCAST] || tb[IFLA_MAP] || + tb[IFLA_MASTER] || tb[IFLA_PROTINFO]) + return -EOPNOTSUPP; + + if (!ops) { +#ifdef CONFIG_KMOD + if (kind[0]) { + __rtnl_unlock(); + request_module("rtnl-link-%s", kind); + rtnl_lock(); + ops = rtnl_link_ops_get(kind); + if (ops) + goto replay; + } +#endif + return -EOPNOTSUPP; + } + + if (!ifname[0]) + snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); + dev = alloc_netdev(ops->priv_size, ifname, ops->setup); + if (!dev) + return -ENOMEM; + + if (strchr(dev->name, '%')) { + err = dev_alloc_name(dev, dev->name); + if (err < 0) + goto err_free; + } + dev->rtnl_link_ops = ops; + + if (tb[IFLA_MTU]) + dev->mtu = nla_get_u32(tb[IFLA_MTU]); + if (tb[IFLA_TXQLEN]) + dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); + if (tb[IFLA_WEIGHT]) + dev->weight = nla_get_u32(tb[IFLA_WEIGHT]); + if (tb[IFLA_OPERSTATE]) + set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); + if (tb[IFLA_LINKMODE]) + dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); + + err = ops->newlink(dev, tb, data); +err_free: + if (err < 0) + free_netdev(dev); + return err; + } +} + static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { struct ifinfomsg *ifm; @@ -756,7 +1082,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) } else return -EINVAL; - nskb = nlmsg_new(if_nlmsg_size(), GFP_KERNEL); + nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); if (nskb == NULL) { err = -ENOBUFS; goto errout; @@ -806,7 +1132,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(if_nlmsg_size(), GFP_KERNEL); + skb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); if (skb == NULL) goto errout; @@ -961,6 +1287,8 @@ void __init rtnetlink_init(void) rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo); rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL); + rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL); rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all); rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all); -- cgit v1.2.3 From c17d8874f9959070552fddf1b4e1d73c0c144c0f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:05:22 -0700 Subject: [VLAN]: Convert name-based configuration functions to struct netdevice * Move the device lookup and checks to the ioctl handler under the RTNL and change all name-based interfaces to take a struct net_device * instead. This allows to use them from a netlink interface, which identifies devices based on ifindex not name. It also avoids races between the ioctl interface and the (upcoming) netlink interface since now all changes happen under the RTNL. As a nice side effect this greatly simplifies error handling in the helper functions and fixes a number of incorrect error codes like -EINVAL for device not found. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/8021q/vlan.c | 153 +++++++++++++++++++++------------------------------ net/8021q/vlan.h | 13 +++-- net/8021q/vlan_dev.c | 142 +++++++++++++---------------------------------- 3 files changed, 109 insertions(+), 199 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index de78c9dd713b..3678f0719934 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -278,43 +278,16 @@ static int unregister_vlan_dev(struct net_device *real_dev, return ret; } -static int unregister_vlan_device(const char *vlan_IF_name) +static int unregister_vlan_device(struct net_device *dev) { - struct net_device *dev = NULL; int ret; + ret = unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev, + VLAN_DEV_INFO(dev)->vlan_id); + unregister_netdevice(dev); - dev = dev_get_by_name(vlan_IF_name); - ret = -EINVAL; - if (dev) { - if (dev->priv_flags & IFF_802_1Q_VLAN) { - rtnl_lock(); - - ret = unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev, - VLAN_DEV_INFO(dev)->vlan_id); - - dev_put(dev); - unregister_netdevice(dev); - - rtnl_unlock(); - - if (ret == 1) - ret = 0; - } else { - printk(VLAN_ERR - "%s: ERROR: Tried to remove a non-vlan device " - "with VLAN code, name: %s priv_flags: %hX\n", - __FUNCTION__, dev->name, dev->priv_flags); - dev_put(dev); - ret = -EPERM; - } - } else { -#ifdef VLAN_DEBUG - printk(VLAN_DBG "%s: WARNING: Could not find dev.\n", __FUNCTION__); -#endif - ret = -EINVAL; - } - + if (ret == 1) + ret = 0; return ret; } @@ -378,12 +351,11 @@ static struct lock_class_key vlan_netdev_xmit_lock_key; * Returns the device that was created, or NULL if there was * an error of some kind. */ -static struct net_device *register_vlan_device(const char *eth_IF_name, +static struct net_device *register_vlan_device(struct net_device *real_dev, unsigned short VLAN_ID) { struct vlan_group *grp; struct net_device *new_dev; - struct net_device *real_dev; /* the ethernet device */ char name[IFNAMSIZ]; int i; @@ -395,46 +367,36 @@ static struct net_device *register_vlan_device(const char *eth_IF_name, if (VLAN_ID >= VLAN_VID_MASK) goto out_ret_null; - /* find the device relating to eth_IF_name. */ - real_dev = dev_get_by_name(eth_IF_name); - if (!real_dev) - goto out_ret_null; - if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { printk(VLAN_DBG "%s: VLANs not supported on %s.\n", __FUNCTION__, real_dev->name); - goto out_put_dev; + goto out_ret_null; } if ((real_dev->features & NETIF_F_HW_VLAN_RX) && !real_dev->vlan_rx_register) { printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n", __FUNCTION__, real_dev->name); - goto out_put_dev; + goto out_ret_null; } if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) && (!real_dev->vlan_rx_add_vid || !real_dev->vlan_rx_kill_vid)) { printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n", __FUNCTION__, real_dev->name); - goto out_put_dev; + goto out_ret_null; } - /* From this point on, all the data structures must remain - * consistent. - */ - rtnl_lock(); - /* The real device must be up and operating in order to * assosciate a VLAN device with it. */ if (!(real_dev->flags & IFF_UP)) - goto out_unlock; + goto out_ret_null; if (__find_vlan_dev(real_dev, VLAN_ID) != NULL) { /* was already registered. */ printk(VLAN_DBG "%s: ALREADY had VLAN registered\n", __FUNCTION__); - goto out_unlock; + goto out_ret_null; } /* Gotta set up the fields for the device. */ @@ -471,7 +433,7 @@ static struct net_device *register_vlan_device(const char *eth_IF_name, vlan_setup); if (new_dev == NULL) - goto out_unlock; + goto out_ret_null; #ifdef VLAN_DEBUG printk(VLAN_DBG "Allocated new name -:%s:-\n", new_dev->name); @@ -577,9 +539,8 @@ static struct net_device *register_vlan_device(const char *eth_IF_name, if (real_dev->features & NETIF_F_HW_VLAN_FILTER) real_dev->vlan_rx_add_vid(real_dev, VLAN_ID); - rtnl_unlock(); - - + /* Account for reference in struct vlan_dev_info */ + dev_hold(real_dev); #ifdef VLAN_DEBUG printk(VLAN_DBG "Allocated new device successfully, returning.\n"); #endif @@ -590,17 +551,11 @@ out_free_arrays: out_free_unregister: unregister_netdev(new_dev); - goto out_unlock; + goto out_ret_null; out_free_newdev: free_netdev(new_dev); -out_unlock: - rtnl_unlock(); - -out_put_dev: - dev_put(real_dev); - out_ret_null: return NULL; } @@ -693,9 +648,10 @@ out: */ static int vlan_ioctl_handler(void __user *arg) { - int err = 0; + int err; unsigned short vid = 0; struct vlan_ioctl_args args; + struct net_device *dev = NULL; if (copy_from_user(&args, arg, sizeof(struct vlan_ioctl_args))) return -EFAULT; @@ -708,35 +664,61 @@ static int vlan_ioctl_handler(void __user *arg) printk(VLAN_DBG "%s: args.cmd: %x\n", __FUNCTION__, args.cmd); #endif + rtnl_lock(); + switch (args.cmd) { case SET_VLAN_INGRESS_PRIORITY_CMD: + case SET_VLAN_EGRESS_PRIORITY_CMD: + case SET_VLAN_FLAG_CMD: + case ADD_VLAN_CMD: + case DEL_VLAN_CMD: + case GET_VLAN_REALDEV_NAME_CMD: + case GET_VLAN_VID_CMD: + err = -ENODEV; + dev = __dev_get_by_name(args.device1); + if (!dev) + goto out; + + err = -EINVAL; + if (args.cmd != ADD_VLAN_CMD && + !(dev->priv_flags & IFF_802_1Q_VLAN)) + goto out; + } + + switch (args.cmd) { + case SET_VLAN_INGRESS_PRIORITY_CMD: + err = -EPERM; if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = vlan_dev_set_ingress_priority(args.device1, - args.u.skb_priority, - args.vlan_qos); + break; + vlan_dev_set_ingress_priority(dev, + args.u.skb_priority, + args.vlan_qos); break; case SET_VLAN_EGRESS_PRIORITY_CMD: + err = -EPERM; if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = vlan_dev_set_egress_priority(args.device1, + break; + err = vlan_dev_set_egress_priority(dev, args.u.skb_priority, args.vlan_qos); break; case SET_VLAN_FLAG_CMD: + err = -EPERM; if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = vlan_dev_set_vlan_flag(args.device1, + break; + err = vlan_dev_set_vlan_flag(dev, args.u.flag, args.vlan_qos); break; case SET_VLAN_NAME_TYPE_CMD: + err = -EPERM; if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (args.u.name_type < VLAN_NAME_TYPE_HIGHEST) { + if ((args.u.name_type >= 0) && + (args.u.name_type < VLAN_NAME_TYPE_HIGHEST)) { vlan_name_type = args.u.name_type; err = 0; } else { @@ -745,13 +727,10 @@ static int vlan_ioctl_handler(void __user *arg) break; case ADD_VLAN_CMD: + err = -EPERM; if (!capable(CAP_NET_ADMIN)) - return -EPERM; - /* we have been given the name of the Ethernet Device we want to - * talk to: args.dev1 We also have the - * VLAN ID: args.u.VID - */ - if (register_vlan_device(args.device1, args.u.VID)) { + break; + if (register_vlan_device(dev, args.u.VID)) { err = 0; } else { err = -EINVAL; @@ -759,12 +738,10 @@ static int vlan_ioctl_handler(void __user *arg) break; case DEL_VLAN_CMD: + err = -EPERM; if (!capable(CAP_NET_ADMIN)) - return -EPERM; - /* Here, the args.dev1 is the actual VLAN we want - * to get rid of. - */ - err = unregister_vlan_device(args.device1); + break; + err = unregister_vlan_device(dev); break; case GET_VLAN_INGRESS_PRIORITY_CMD: @@ -788,9 +765,7 @@ static int vlan_ioctl_handler(void __user *arg) err = -EINVAL; break; case GET_VLAN_REALDEV_NAME_CMD: - err = vlan_dev_get_realdev_name(args.device1, args.u.device2); - if (err) - goto out; + vlan_dev_get_realdev_name(dev, args.u.device2); if (copy_to_user(arg, &args, sizeof(struct vlan_ioctl_args))) { err = -EFAULT; @@ -798,9 +773,7 @@ static int vlan_ioctl_handler(void __user *arg) break; case GET_VLAN_VID_CMD: - err = vlan_dev_get_vid(args.device1, &vid); - if (err) - goto out; + vlan_dev_get_vid(dev, &vid); args.u.VID = vid; if (copy_to_user(arg, &args, sizeof(struct vlan_ioctl_args))) { @@ -812,9 +785,11 @@ static int vlan_ioctl_handler(void __user *arg) /* pass on to underlying device instead?? */ printk(VLAN_DBG "%s: Unknown VLAN CMD: %x \n", __FUNCTION__, args.cmd); - return -EINVAL; + err = -EINVAL; + break; } out: + rtnl_unlock(); return err; } diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 1976cdba8f72..b83739017e9d 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -62,11 +62,14 @@ int vlan_dev_set_mac_address(struct net_device *dev, void* addr); int vlan_dev_open(struct net_device* dev); int vlan_dev_stop(struct net_device* dev); int vlan_dev_ioctl(struct net_device* dev, struct ifreq *ifr, int cmd); -int vlan_dev_set_ingress_priority(char* dev_name, __u32 skb_prio, short vlan_prio); -int vlan_dev_set_egress_priority(char* dev_name, __u32 skb_prio, short vlan_prio); -int vlan_dev_set_vlan_flag(char* dev_name, __u32 flag, short flag_val); -int vlan_dev_get_realdev_name(const char* dev_name, char* result); -int vlan_dev_get_vid(const char* dev_name, unsigned short* result); +void vlan_dev_set_ingress_priority(const struct net_device *dev, + u32 skb_prio, short vlan_prio); +int vlan_dev_set_egress_priority(const struct net_device *dev, + u32 skb_prio, short vlan_prio); +int vlan_dev_set_vlan_flag(const struct net_device *dev, + u32 flag, short flag_val); +void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); +void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result); void vlan_dev_set_multicast_list(struct net_device *vlan_dev); #endif /* !(__BEN_VLAN_802_1Q_INC__) */ diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index ec46084f44b4..05a23601f670 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -534,136 +534,68 @@ int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) return 0; } -int vlan_dev_set_ingress_priority(char *dev_name, __u32 skb_prio, short vlan_prio) +void vlan_dev_set_ingress_priority(const struct net_device *dev, + u32 skb_prio, short vlan_prio) { - struct net_device *dev = dev_get_by_name(dev_name); - - if (dev) { - if (dev->priv_flags & IFF_802_1Q_VLAN) { - /* see if a priority mapping exists.. */ - VLAN_DEV_INFO(dev)->ingress_priority_map[vlan_prio & 0x7] = skb_prio; - dev_put(dev); - return 0; - } - - dev_put(dev); - } - return -EINVAL; + VLAN_DEV_INFO(dev)->ingress_priority_map[vlan_prio & 0x7] = skb_prio; } -int vlan_dev_set_egress_priority(char *dev_name, __u32 skb_prio, short vlan_prio) +int vlan_dev_set_egress_priority(const struct net_device *dev, + u32 skb_prio, short vlan_prio) { - struct net_device *dev = dev_get_by_name(dev_name); struct vlan_priority_tci_mapping *mp = NULL; struct vlan_priority_tci_mapping *np; - if (dev) { - if (dev->priv_flags & IFF_802_1Q_VLAN) { - /* See if a priority mapping exists.. */ - mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF]; - while (mp) { - if (mp->priority == skb_prio) { - mp->vlan_qos = ((vlan_prio << 13) & 0xE000); - dev_put(dev); - return 0; - } - mp = mp->next; - } - - /* Create a new mapping then. */ - mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF]; - np = kmalloc(sizeof(struct vlan_priority_tci_mapping), GFP_KERNEL); - if (np) { - np->next = mp; - np->priority = skb_prio; - np->vlan_qos = ((vlan_prio << 13) & 0xE000); - VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF] = np; - dev_put(dev); - return 0; - } else { - dev_put(dev); - return -ENOBUFS; - } + /* See if a priority mapping exists.. */ + mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF]; + while (mp) { + if (mp->priority == skb_prio) { + mp->vlan_qos = ((vlan_prio << 13) & 0xE000); + return 0; } - dev_put(dev); + mp = mp->next; } - return -EINVAL; + + /* Create a new mapping then. */ + mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF]; + np = kmalloc(sizeof(struct vlan_priority_tci_mapping), GFP_KERNEL); + if (!np) + return -ENOBUFS; + + np->next = mp; + np->priority = skb_prio; + np->vlan_qos = ((vlan_prio << 13) & 0xE000); + VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF] = np; + return 0; } /* Flags are defined in the vlan_dev_info class in include/linux/if_vlan.h file. */ -int vlan_dev_set_vlan_flag(char *dev_name, __u32 flag, short flag_val) +int vlan_dev_set_vlan_flag(const struct net_device *dev, + u32 flag, short flag_val) { - struct net_device *dev = dev_get_by_name(dev_name); - - if (dev) { - if (dev->priv_flags & IFF_802_1Q_VLAN) { - /* verify flag is supported */ - if (flag == 1) { - if (flag_val) { - VLAN_DEV_INFO(dev)->flags |= 1; - } else { - VLAN_DEV_INFO(dev)->flags &= ~1; - } - dev_put(dev); - return 0; - } else { - printk(KERN_ERR "%s: flag %i is not valid.\n", - __FUNCTION__, (int)(flag)); - dev_put(dev); - return -EINVAL; - } + /* verify flag is supported */ + if (flag == 1) { + if (flag_val) { + VLAN_DEV_INFO(dev)->flags |= 1; } else { - printk(KERN_ERR - "%s: %s is not a vlan device, priv_flags: %hX.\n", - __FUNCTION__, dev->name, dev->priv_flags); - dev_put(dev); + VLAN_DEV_INFO(dev)->flags &= ~1; } - } else { - printk(KERN_ERR "%s: Could not find device: %s\n", - __FUNCTION__, dev_name); + return 0; } - + printk(KERN_ERR "%s: flag %i is not valid.\n", __FUNCTION__, flag); return -EINVAL; } - -int vlan_dev_get_realdev_name(const char *dev_name, char* result) +void vlan_dev_get_realdev_name(const struct net_device *dev, char *result) { - struct net_device *dev = dev_get_by_name(dev_name); - int rv = 0; - if (dev) { - if (dev->priv_flags & IFF_802_1Q_VLAN) { - strncpy(result, VLAN_DEV_INFO(dev)->real_dev->name, 23); - rv = 0; - } else { - rv = -EINVAL; - } - dev_put(dev); - } else { - rv = -ENODEV; - } - return rv; + strncpy(result, VLAN_DEV_INFO(dev)->real_dev->name, 23); } -int vlan_dev_get_vid(const char *dev_name, unsigned short* result) +void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result) { - struct net_device *dev = dev_get_by_name(dev_name); - int rv = 0; - if (dev) { - if (dev->priv_flags & IFF_802_1Q_VLAN) { - *result = VLAN_DEV_INFO(dev)->vlan_id; - rv = 0; - } else { - rv = -EINVAL; - } - dev_put(dev); - } else { - rv = -ENODEV; - } - return rv; + *result = VLAN_DEV_INFO(dev)->vlan_id; } - int vlan_dev_set_mac_address(struct net_device *dev, void *addr_struct_p) { struct sockaddr *addr = (struct sockaddr *)(addr_struct_p); -- cgit v1.2.3 From 2f4284a406cb25d1e41454cbf9ec4545b5ed70a1 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:05:41 -0700 Subject: [VLAN]: Move some device intialization code to dev->init callback Move some device initialization code to new dev->init callback to make it shareable with netlink. Additionally this fixes a minor bug, dev->iflink is set after registration, which causes an incorrect value in the initial netlink message. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/8021q/vlan.c | 92 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 46 insertions(+), 46 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 3678f0719934..dc95f7cbf291 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -291,6 +291,48 @@ static int unregister_vlan_device(struct net_device *dev) return ret; } +/* + * vlan network devices have devices nesting below it, and are a special + * "super class" of normal network devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key vlan_netdev_xmit_lock_key; + +static int vlan_dev_init(struct net_device *dev) +{ + struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; + + /* IFF_BROADCAST|IFF_MULTICAST; ??? */ + dev->flags = real_dev->flags & ~IFF_UP; + dev->iflink = real_dev->ifindex; + dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) | + (1<<__LINK_STATE_DORMANT))) | + (1<<__LINK_STATE_PRESENT); + + /* TODO: maybe just assign it to be ETHERNET? */ + dev->type = real_dev->type; + + memcpy(dev->broadcast, real_dev->broadcast, real_dev->addr_len); + memcpy(dev->dev_addr, real_dev->dev_addr, real_dev->addr_len); + dev->addr_len = real_dev->addr_len; + + if (real_dev->features & NETIF_F_HW_VLAN_TX) { + dev->hard_header = real_dev->hard_header; + dev->hard_header_len = real_dev->hard_header_len; + dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit; + dev->rebuild_header = real_dev->rebuild_header; + } else { + dev->hard_header = vlan_dev_hard_header; + dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN; + dev->hard_start_xmit = vlan_dev_hard_start_xmit; + dev->rebuild_header = vlan_dev_rebuild_header; + } + dev->hard_header_parse = real_dev->hard_header_parse; + + lockdep_set_class(&dev->_xmit_lock, &vlan_netdev_xmit_lock_key); + return 0; +} + static void vlan_setup(struct net_device *new_dev) { SET_MODULE_OWNER(new_dev); @@ -311,6 +353,7 @@ static void vlan_setup(struct net_device *new_dev) /* set up method calls */ new_dev->change_mtu = vlan_dev_change_mtu; + new_dev->init = vlan_dev_init; new_dev->open = vlan_dev_open; new_dev->stop = vlan_dev_stop; new_dev->set_mac_address = vlan_dev_set_mac_address; @@ -339,14 +382,6 @@ static void vlan_transfer_operstate(const struct net_device *dev, struct net_dev } } -/* - * vlan network devices have devices nesting below it, and are a special - * "super class" of normal network devices; split their locks off into a - * separate class since they always nest. - */ -static struct lock_class_key vlan_netdev_xmit_lock_key; - - /* Attach a VLAN device to a mac address (ie Ethernet Card). * Returns the device that was created, or NULL if there was * an error of some kind. @@ -435,49 +470,17 @@ static struct net_device *register_vlan_device(struct net_device *real_dev, if (new_dev == NULL) goto out_ret_null; -#ifdef VLAN_DEBUG - printk(VLAN_DBG "Allocated new name -:%s:-\n", new_dev->name); -#endif - /* IFF_BROADCAST|IFF_MULTICAST; ??? */ - new_dev->flags = real_dev->flags; - new_dev->flags &= ~IFF_UP; - - new_dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) | - (1<<__LINK_STATE_DORMANT))) | - (1<<__LINK_STATE_PRESENT); - /* need 4 bytes for extra VLAN header info, * hope the underlying device can handle it. */ new_dev->mtu = real_dev->mtu; - /* TODO: maybe just assign it to be ETHERNET? */ - new_dev->type = real_dev->type; - - new_dev->hard_header_len = real_dev->hard_header_len; - if (!(real_dev->features & NETIF_F_HW_VLAN_TX)) { - /* Regular ethernet + 4 bytes (18 total). */ - new_dev->hard_header_len += VLAN_HLEN; - } - +#ifdef VLAN_DEBUG + printk(VLAN_DBG "Allocated new name -:%s:-\n", new_dev->name); VLAN_MEM_DBG("new_dev->priv malloc, addr: %p size: %i\n", new_dev->priv, sizeof(struct vlan_dev_info)); - - memcpy(new_dev->broadcast, real_dev->broadcast, real_dev->addr_len); - memcpy(new_dev->dev_addr, real_dev->dev_addr, real_dev->addr_len); - new_dev->addr_len = real_dev->addr_len; - - if (real_dev->features & NETIF_F_HW_VLAN_TX) { - new_dev->hard_header = real_dev->hard_header; - new_dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit; - new_dev->rebuild_header = real_dev->rebuild_header; - } else { - new_dev->hard_header = vlan_dev_hard_header; - new_dev->hard_start_xmit = vlan_dev_hard_start_xmit; - new_dev->rebuild_header = vlan_dev_rebuild_header; - } - new_dev->hard_header_parse = real_dev->hard_header_parse; +#endif VLAN_DEV_INFO(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */ VLAN_DEV_INFO(new_dev)->real_dev = real_dev; @@ -492,9 +495,6 @@ static struct net_device *register_vlan_device(struct net_device *real_dev, if (register_netdevice(new_dev)) goto out_free_newdev; - lockdep_set_class(&new_dev->_xmit_lock, &vlan_netdev_xmit_lock_key); - - new_dev->iflink = real_dev->ifindex; vlan_transfer_operstate(real_dev, new_dev); linkwatch_fire_event(new_dev); /* _MUST_ call rfc2863_policy() */ -- cgit v1.2.3 From 42429aaee5eb44f4a48fdb056d77d0c06ef5aebc Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:05:59 -0700 Subject: [VLAN]: Move vlan_group allocation to seperate function Move group allocation to a seperate function to clean up the code a bit and allocate groups before registering the device. Device registration is globally visible and causes netlink events, so we shouldn't fail afterwards. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/8021q/vlan.c | 78 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 41 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index dc95f7cbf291..1b9dc5ecd448 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -197,6 +197,34 @@ static void vlan_group_free(struct vlan_group *grp) kfree(grp); } +static struct vlan_group *vlan_group_alloc(int ifindex) +{ + struct vlan_group *grp; + unsigned int size; + unsigned int i; + + grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL); + if (!grp) + return NULL; + + size = sizeof(struct net_device *) * VLAN_GROUP_ARRAY_PART_LEN; + + for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) { + grp->vlan_devices_arrays[i] = kzalloc(size, GFP_KERNEL); + if (!grp->vlan_devices_arrays[i]) + goto err; + } + + grp->real_dev_ifindex = ifindex; + hlist_add_head_rcu(&grp->hlist, + &vlan_group_hash[vlan_grp_hashfn(ifindex)]); + return grp; + +err: + vlan_group_free(grp); + return NULL; +} + static void vlan_rcu_free(struct rcu_head *rcu) { vlan_group_free(container_of(rcu, struct vlan_group, rcu)); @@ -389,10 +417,9 @@ static void vlan_transfer_operstate(const struct net_device *dev, struct net_dev static struct net_device *register_vlan_device(struct net_device *real_dev, unsigned short VLAN_ID) { - struct vlan_group *grp; + struct vlan_group *grp, *ngrp = NULL; struct net_device *new_dev; char name[IFNAMSIZ]; - int i; #ifdef VLAN_DEBUG printk(VLAN_DBG "%s: if_name -:%s:- vid: %i\n", @@ -491,9 +518,15 @@ static struct net_device *register_vlan_device(struct net_device *real_dev, printk(VLAN_DBG "About to go find the group for idx: %i\n", real_dev->ifindex); #endif + grp = __vlan_find_group(real_dev->ifindex); + if (!grp) { + ngrp = grp = vlan_group_alloc(real_dev->ifindex); + if (!grp) + goto out_free_newdev; + } if (register_netdevice(new_dev)) - goto out_free_newdev; + goto out_free_group; vlan_transfer_operstate(real_dev, new_dev); linkwatch_fire_event(new_dev); /* _MUST_ call rfc2863_policy() */ @@ -501,34 +534,8 @@ static struct net_device *register_vlan_device(struct net_device *real_dev, /* So, got the sucker initialized, now lets place * it into our local structure. */ - grp = __vlan_find_group(real_dev->ifindex); - - /* Note, we are running under the RTNL semaphore - * so it cannot "appear" on us. - */ - if (!grp) { /* need to add a new group */ - grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL); - if (!grp) - goto out_free_unregister; - - for (i=0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) { - grp->vlan_devices_arrays[i] = kzalloc( - sizeof(struct net_device *)*VLAN_GROUP_ARRAY_PART_LEN, - GFP_KERNEL); - - if (!grp->vlan_devices_arrays[i]) - goto out_free_arrays; - } - - /* printk(KERN_ALERT "VLAN REGISTER: Allocated new group.\n"); */ - grp->real_dev_ifindex = real_dev->ifindex; - - hlist_add_head_rcu(&grp->hlist, - &vlan_group_hash[vlan_grp_hashfn(real_dev->ifindex)]); - - if (real_dev->features & NETIF_F_HW_VLAN_RX) - real_dev->vlan_rx_register(real_dev, grp); - } + if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX) + real_dev->vlan_rx_register(real_dev, ngrp); vlan_group_set_device(grp, VLAN_ID, new_dev); @@ -546,12 +553,9 @@ static struct net_device *register_vlan_device(struct net_device *real_dev, #endif return new_dev; -out_free_arrays: - vlan_group_free(grp); - -out_free_unregister: - unregister_netdev(new_dev); - goto out_ret_null; +out_free_group: + if (ngrp) + vlan_group_free(ngrp); out_free_newdev: free_netdev(new_dev); -- cgit v1.2.3 From c1d3ee9925ca714a5ed3f8fce01a7027137f4e3f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:06:14 -0700 Subject: [VLAN]: Split up device checks Move the checks of the underlying device to a seperate function. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/8021q/vlan.c | 56 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 1b9dc5ecd448..1e33dbb85d14 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -410,57 +410,65 @@ static void vlan_transfer_operstate(const struct net_device *dev, struct net_dev } } -/* Attach a VLAN device to a mac address (ie Ethernet Card). - * Returns the device that was created, or NULL if there was - * an error of some kind. - */ -static struct net_device *register_vlan_device(struct net_device *real_dev, - unsigned short VLAN_ID) +static int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id) { - struct vlan_group *grp, *ngrp = NULL; - struct net_device *new_dev; - char name[IFNAMSIZ]; - -#ifdef VLAN_DEBUG - printk(VLAN_DBG "%s: if_name -:%s:- vid: %i\n", - __FUNCTION__, eth_IF_name, VLAN_ID); -#endif - - if (VLAN_ID >= VLAN_VID_MASK) - goto out_ret_null; - if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { printk(VLAN_DBG "%s: VLANs not supported on %s.\n", __FUNCTION__, real_dev->name); - goto out_ret_null; + return -EOPNOTSUPP; } if ((real_dev->features & NETIF_F_HW_VLAN_RX) && !real_dev->vlan_rx_register) { printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n", __FUNCTION__, real_dev->name); - goto out_ret_null; + return -EOPNOTSUPP; } if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) && (!real_dev->vlan_rx_add_vid || !real_dev->vlan_rx_kill_vid)) { printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n", __FUNCTION__, real_dev->name); - goto out_ret_null; + return -EOPNOTSUPP; } /* The real device must be up and operating in order to * assosciate a VLAN device with it. */ if (!(real_dev->flags & IFF_UP)) - goto out_ret_null; + return -ENETDOWN; - if (__find_vlan_dev(real_dev, VLAN_ID) != NULL) { + if (__find_vlan_dev(real_dev, vlan_id) != NULL) { /* was already registered. */ printk(VLAN_DBG "%s: ALREADY had VLAN registered\n", __FUNCTION__); - goto out_ret_null; + return -EEXIST; } + return 0; +} + +/* Attach a VLAN device to a mac address (ie Ethernet Card). + * Returns the device that was created, or NULL if there was + * an error of some kind. + */ +static struct net_device *register_vlan_device(struct net_device *real_dev, + unsigned short VLAN_ID) +{ + struct vlan_group *grp, *ngrp = NULL; + struct net_device *new_dev; + char name[IFNAMSIZ]; + +#ifdef VLAN_DEBUG + printk(VLAN_DBG "%s: if_name -:%s:- vid: %i\n", + __FUNCTION__, eth_IF_name, VLAN_ID); +#endif + + if (VLAN_ID >= VLAN_VID_MASK) + goto out_ret_null; + + if (vlan_check_real_dev(real_dev, VLAN_ID) < 0) + goto out_ret_null; + /* Gotta set up the fields for the device. */ #ifdef VLAN_DEBUG printk(VLAN_DBG "About to allocate name, vlan_name_type: %i\n", -- cgit v1.2.3 From e89fe42cd03c8fd3686df82d8390a235717a66de Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:06:29 -0700 Subject: [VLAN]: Move device registation to seperate function Move device registration and configuration of the underlying device to a seperate function. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/8021q/vlan.c | 83 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 47 insertions(+), 36 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 1e33dbb85d14..e68b503f1012 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -447,6 +447,51 @@ static int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_ return 0; } +static int register_vlan_dev(struct net_device *dev) +{ + struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); + struct net_device *real_dev = vlan->real_dev; + unsigned short vlan_id = vlan->vlan_id; + struct vlan_group *grp, *ngrp = NULL; + int err; + + grp = __vlan_find_group(real_dev->ifindex); + if (!grp) { + ngrp = grp = vlan_group_alloc(real_dev->ifindex); + if (!grp) + return -ENOBUFS; + } + + err = register_netdevice(dev); + if (err < 0) + goto out_free_group; + + /* Account for reference in struct vlan_dev_info */ + dev_hold(real_dev); + + vlan_transfer_operstate(real_dev, dev); + linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */ + + /* So, got the sucker initialized, now lets place + * it into our local structure. + */ + vlan_group_set_device(grp, vlan_id, dev); + if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX) + real_dev->vlan_rx_register(real_dev, ngrp); + if (real_dev->features & NETIF_F_HW_VLAN_FILTER) + real_dev->vlan_rx_add_vid(real_dev, vlan_id); + + if (vlan_proc_add_dev(dev) < 0) + printk(KERN_WARNING "VLAN: failed to add proc entry for %s\n", + dev->name); + return 0; + +out_free_group: + if (ngrp) + vlan_group_free(ngrp); + return err; +} + /* Attach a VLAN device to a mac address (ie Ethernet Card). * Returns the device that was created, or NULL if there was * an error of some kind. @@ -454,7 +499,6 @@ static int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_ static struct net_device *register_vlan_device(struct net_device *real_dev, unsigned short VLAN_ID) { - struct vlan_group *grp, *ngrp = NULL; struct net_device *new_dev; char name[IFNAMSIZ]; @@ -522,37 +566,8 @@ static struct net_device *register_vlan_device(struct net_device *real_dev, VLAN_DEV_INFO(new_dev)->dent = NULL; VLAN_DEV_INFO(new_dev)->flags = 1; -#ifdef VLAN_DEBUG - printk(VLAN_DBG "About to go find the group for idx: %i\n", - real_dev->ifindex); -#endif - grp = __vlan_find_group(real_dev->ifindex); - if (!grp) { - ngrp = grp = vlan_group_alloc(real_dev->ifindex); - if (!grp) - goto out_free_newdev; - } - - if (register_netdevice(new_dev)) - goto out_free_group; - - vlan_transfer_operstate(real_dev, new_dev); - linkwatch_fire_event(new_dev); /* _MUST_ call rfc2863_policy() */ - - /* So, got the sucker initialized, now lets place - * it into our local structure. - */ - if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX) - real_dev->vlan_rx_register(real_dev, ngrp); - - vlan_group_set_device(grp, VLAN_ID, new_dev); - - if (vlan_proc_add_dev(new_dev)<0)/* create it's proc entry */ - printk(KERN_WARNING "VLAN: failed to add proc entry for %s\n", - new_dev->name); - - if (real_dev->features & NETIF_F_HW_VLAN_FILTER) - real_dev->vlan_rx_add_vid(real_dev, VLAN_ID); + if (register_vlan_dev(new_dev) < 0) + goto out_free_newdev; /* Account for reference in struct vlan_dev_info */ dev_hold(real_dev); @@ -561,10 +576,6 @@ static struct net_device *register_vlan_device(struct net_device *real_dev, #endif return new_dev; -out_free_group: - if (ngrp) - vlan_group_free(ngrp); - out_free_newdev: free_netdev(new_dev); -- cgit v1.2.3 From 2ae0bf69b716d07126f0a9c17fcc2d76da172cb6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:06:43 -0700 Subject: [VLAN]: Return proper error codes in register_vlan_device The returned device is unused, return proper error codes instead and avoid having the ioctl handler guess the error. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/8021q/vlan.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index e68b503f1012..5801993182b4 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -493,14 +493,14 @@ out_free_group: } /* Attach a VLAN device to a mac address (ie Ethernet Card). - * Returns the device that was created, or NULL if there was - * an error of some kind. + * Returns 0 if the device was created or a negative error code otherwise. */ -static struct net_device *register_vlan_device(struct net_device *real_dev, - unsigned short VLAN_ID) +static int register_vlan_device(struct net_device *real_dev, + unsigned short VLAN_ID) { struct net_device *new_dev; char name[IFNAMSIZ]; + int err; #ifdef VLAN_DEBUG printk(VLAN_DBG "%s: if_name -:%s:- vid: %i\n", @@ -508,10 +508,11 @@ static struct net_device *register_vlan_device(struct net_device *real_dev, #endif if (VLAN_ID >= VLAN_VID_MASK) - goto out_ret_null; + return -ERANGE; - if (vlan_check_real_dev(real_dev, VLAN_ID) < 0) - goto out_ret_null; + err = vlan_check_real_dev(real_dev, VLAN_ID); + if (err < 0) + return err; /* Gotta set up the fields for the device. */ #ifdef VLAN_DEBUG @@ -547,7 +548,7 @@ static struct net_device *register_vlan_device(struct net_device *real_dev, vlan_setup); if (new_dev == NULL) - goto out_ret_null; + return -ENOBUFS; /* need 4 bytes for extra VLAN header info, * hope the underlying device can handle it. @@ -566,7 +567,8 @@ static struct net_device *register_vlan_device(struct net_device *real_dev, VLAN_DEV_INFO(new_dev)->dent = NULL; VLAN_DEV_INFO(new_dev)->flags = 1; - if (register_vlan_dev(new_dev) < 0) + err = register_vlan_dev(new_dev); + if (err < 0) goto out_free_newdev; /* Account for reference in struct vlan_dev_info */ @@ -574,13 +576,11 @@ static struct net_device *register_vlan_device(struct net_device *real_dev, #ifdef VLAN_DEBUG printk(VLAN_DBG "Allocated new device successfully, returning.\n"); #endif - return new_dev; + return 0; out_free_newdev: free_netdev(new_dev); - -out_ret_null: - return NULL; + return err; } static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) @@ -753,11 +753,7 @@ static int vlan_ioctl_handler(void __user *arg) err = -EPERM; if (!capable(CAP_NET_ADMIN)) break; - if (register_vlan_device(dev, args.u.VID)) { - err = 0; - } else { - err = -EINVAL; - } + err = register_vlan_device(dev, args.u.VID); break; case DEL_VLAN_CMD: -- cgit v1.2.3 From 734423cf38021966a5d3bd5f5c6aaecaf32fb4ac Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:07:07 -0700 Subject: [VLAN]: Use 32 bit value for skb->priority mapping skb->priority has only 32 bits and even VLAN uses 32 bit values in its API. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 4 ++-- net/8021q/vlanproc.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 81e9bc93569b..aeddb49193f9 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -99,7 +99,7 @@ static inline void vlan_group_set_device(struct vlan_group *vg, int vlan_id, } struct vlan_priority_tci_mapping { - unsigned long priority; + u32 priority; unsigned short vlan_qos; /* This should be shifted when first set, so we only do it * at provisioning time. * ((skb->priority << 13) & 0xE000) @@ -112,7 +112,7 @@ struct vlan_dev_info { /** This will be the mapping that correlates skb->priority to * 3 bits of VLAN QOS tags... */ - unsigned long ingress_priority_map[8]; + u32 ingress_priority_map[8]; struct vlan_priority_tci_mapping *egress_priority_map[16]; /* hash table */ unsigned short vlan_id; /* The VLAN Identifier for this interface. */ diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index d216a64421cd..8693b21b3caa 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -342,7 +342,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) seq_printf(seq, "Device: %s", dev_info->real_dev->name); /* now show all PRIORITY mappings relating to this VLAN */ seq_printf(seq, - "\nINGRESS priority mappings: 0:%lu 1:%lu 2:%lu 3:%lu 4:%lu 5:%lu 6:%lu 7:%lu\n", + "\nINGRESS priority mappings: 0:%u 1:%u 2:%u 3:%u 4:%u 5:%u 6:%u 7:%u\n", dev_info->ingress_priority_map[0], dev_info->ingress_priority_map[1], dev_info->ingress_priority_map[2], @@ -357,7 +357,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) const struct vlan_priority_tci_mapping *mp = dev_info->egress_priority_map[i]; while (mp) { - seq_printf(seq, "%lu:%hu ", + seq_printf(seq, "%u:%hu ", mp->priority, ((mp->vlan_qos >> 13) & 0x7)); mp = mp->next; } -- cgit v1.2.3 From b020cb488586f982f40eb257a32e92a4de710d65 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:07:22 -0700 Subject: [VLAN]: Keep track of number of QoS mappings Keep track of the number of configured ingress/egress QoS mappings to avoid iteration while calculating the netlink attribute size. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 3 +++ net/8021q/vlan_dev.c | 27 +++++++++++++++++++++------ 2 files changed, 24 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index aeddb49193f9..b46d4225f74e 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -112,7 +112,10 @@ struct vlan_dev_info { /** This will be the mapping that correlates skb->priority to * 3 bits of VLAN QOS tags... */ + unsigned int nr_ingress_mappings; u32 ingress_priority_map[8]; + + unsigned int nr_egress_mappings; struct vlan_priority_tci_mapping *egress_priority_map[16]; /* hash table */ unsigned short vlan_id; /* The VLAN Identifier for this interface. */ diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 05a23601f670..4f6ede752a37 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -537,35 +537,50 @@ int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) void vlan_dev_set_ingress_priority(const struct net_device *dev, u32 skb_prio, short vlan_prio) { - VLAN_DEV_INFO(dev)->ingress_priority_map[vlan_prio & 0x7] = skb_prio; + struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); + + if (vlan->ingress_priority_map[vlan_prio & 0x7] && !skb_prio) + vlan->nr_ingress_mappings--; + else if (!vlan->ingress_priority_map[vlan_prio & 0x7] && skb_prio) + vlan->nr_ingress_mappings++; + + vlan->ingress_priority_map[vlan_prio & 0x7] = skb_prio; } int vlan_dev_set_egress_priority(const struct net_device *dev, u32 skb_prio, short vlan_prio) { + struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); struct vlan_priority_tci_mapping *mp = NULL; struct vlan_priority_tci_mapping *np; + u32 vlan_qos = (vlan_prio << 13) & 0xE000; /* See if a priority mapping exists.. */ - mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF]; + mp = vlan->egress_priority_map[skb_prio & 0xF]; while (mp) { if (mp->priority == skb_prio) { - mp->vlan_qos = ((vlan_prio << 13) & 0xE000); + if (mp->vlan_qos && !vlan_qos) + vlan->nr_egress_mappings--; + else if (!mp->vlan_qos && vlan_qos) + vlan->nr_egress_mappings++; + mp->vlan_qos = vlan_qos; return 0; } mp = mp->next; } /* Create a new mapping then. */ - mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF]; + mp = vlan->egress_priority_map[skb_prio & 0xF]; np = kmalloc(sizeof(struct vlan_priority_tci_mapping), GFP_KERNEL); if (!np) return -ENOBUFS; np->next = mp; np->priority = skb_prio; - np->vlan_qos = ((vlan_prio << 13) & 0xE000); - VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF] = np; + np->vlan_qos = vlan_qos; + vlan->egress_priority_map[skb_prio & 0xF] = np; + if (vlan_qos) + vlan->nr_egress_mappings++; return 0; } -- cgit v1.2.3 From a4bf3af4ac46802436d352ef409cee4fe80445b3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:07:37 -0700 Subject: [VLAN]: Introduce symbolic constants for flag values Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 4 ++++ net/8021q/vlan.c | 2 +- net/8021q/vlan_dev.c | 13 +++++++------ 3 files changed, 12 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index b46d4225f74e..c7912876a210 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -398,6 +398,10 @@ enum vlan_ioctl_cmds { GET_VLAN_VID_CMD /* Get the VID of this VLAN (specified by name) */ }; +enum vlan_flags { + VLAN_FLAG_REORDER_HDR = 0x1, +}; + enum vlan_name_types { VLAN_NAME_TYPE_PLUS_VID, /* Name will look like: vlan0005 */ VLAN_NAME_TYPE_RAW_PLUS_VID, /* name will look like: eth1.0005 */ diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 5801993182b4..f12f914edf02 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -565,7 +565,7 @@ static int register_vlan_device(struct net_device *real_dev, VLAN_DEV_INFO(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */ VLAN_DEV_INFO(new_dev)->real_dev = real_dev; VLAN_DEV_INFO(new_dev)->dent = NULL; - VLAN_DEV_INFO(new_dev)->flags = 1; + VLAN_DEV_INFO(new_dev)->flags = VLAN_FLAG_REORDER_HDR; err = register_vlan_dev(new_dev); if (err < 0) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 4f6ede752a37..95afe387b952 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -73,7 +73,7 @@ int vlan_dev_rebuild_header(struct sk_buff *skb) static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb) { - if (VLAN_DEV_INFO(skb->dev)->flags & 1) { + if (VLAN_DEV_INFO(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) { if (skb_shared(skb) || skb_cloned(skb)) { struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); kfree_skb(skb); @@ -350,7 +350,8 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, * header shuffling in the hard_start_xmit. Users can turn off this * REORDER behaviour with the vconfig tool. */ - build_vlan_header = ((VLAN_DEV_INFO(dev)->flags & 1) == 0); + if (!(VLAN_DEV_INFO(dev)->flags & VLAN_FLAG_REORDER_HDR)) + build_vlan_header = 1; if (build_vlan_header) { vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN); @@ -584,16 +585,16 @@ int vlan_dev_set_egress_priority(const struct net_device *dev, return 0; } -/* Flags are defined in the vlan_dev_info class in include/linux/if_vlan.h file. */ +/* Flags are defined in the vlan_flags enum in include/linux/if_vlan.h file. */ int vlan_dev_set_vlan_flag(const struct net_device *dev, u32 flag, short flag_val) { /* verify flag is supported */ - if (flag == 1) { + if (flag == VLAN_FLAG_REORDER_HDR) { if (flag_val) { - VLAN_DEV_INFO(dev)->flags |= 1; + VLAN_DEV_INFO(dev)->flags |= VLAN_FLAG_REORDER_HDR; } else { - VLAN_DEV_INFO(dev)->flags &= ~1; + VLAN_DEV_INFO(dev)->flags &= ~VLAN_FLAG_REORDER_HDR; } return 0; } -- cgit v1.2.3 From 07b5b17e157b7018d0ca40ca0d1581a23096fb45 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:07:54 -0700 Subject: [VLAN]: Use rtnl_link API Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_link.h | 34 +++++++ net/8021q/Makefile | 2 +- net/8021q/vlan.c | 29 ++++-- net/8021q/vlan.h | 10 ++ net/8021q/vlan_netlink.c | 236 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 300 insertions(+), 11 deletions(-) create mode 100644 net/8021q/vlan_netlink.c (limited to 'net') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 3144babd2357..422084d18ce1 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -153,4 +153,38 @@ enum #define IFLA_INFO_MAX (__IFLA_INFO_MAX - 1) +/* VLAN section */ + +enum +{ + IFLA_VLAN_UNSPEC, + IFLA_VLAN_ID, + IFLA_VLAN_FLAGS, + IFLA_VLAN_EGRESS_QOS, + IFLA_VLAN_INGRESS_QOS, + __IFLA_VLAN_MAX, +}; + +#define IFLA_VLAN_MAX (__IFLA_VLAN_MAX - 1) + +struct ifla_vlan_flags { + __u32 flags; + __u32 mask; +}; + +enum +{ + IFLA_VLAN_QOS_UNSPEC, + IFLA_VLAN_QOS_MAPPING, + __IFLA_VLAN_QOS_MAX +}; + +#define IFLA_VLAN_QOS_MAX (__IFLA_VLAN_QOS_MAX - 1) + +struct ifla_vlan_qos_mapping +{ + __u32 from; + __u32 to; +}; + #endif /* _LINUX_IF_LINK_H */ diff --git a/net/8021q/Makefile b/net/8021q/Makefile index 97feb44dbdce..10ca7f486c3a 100644 --- a/net/8021q/Makefile +++ b/net/8021q/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_VLAN_8021Q) += 8021q.o -8021q-objs := vlan.o vlan_dev.o +8021q-objs := vlan.o vlan_dev.o vlan_netlink.o ifeq ($(CONFIG_PROC_FS),y) 8021q-objs += vlanproc.o diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index f12f914edf02..e7583eea6fda 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -97,15 +97,22 @@ static int __init vlan_proto_init(void) /* Register us to receive netdevice events */ err = register_netdevice_notifier(&vlan_notifier_block); - if (err < 0) { - dev_remove_pack(&vlan_packet_type); - vlan_proc_cleanup(); - return err; - } + if (err < 0) + goto err1; - vlan_ioctl_set(vlan_ioctl_handler); + err = vlan_netlink_init(); + if (err < 0) + goto err2; + vlan_ioctl_set(vlan_ioctl_handler); return 0; + +err2: + unregister_netdevice_notifier(&vlan_notifier_block); +err1: + vlan_proc_cleanup(); + dev_remove_pack(&vlan_packet_type); + return err; } /* Cleanup all vlan devices @@ -136,6 +143,7 @@ static void __exit vlan_cleanup_module(void) { int i; + vlan_netlink_fini(); vlan_ioctl_set(NULL); /* Un-register us from receiving netdevice events */ @@ -306,7 +314,7 @@ static int unregister_vlan_dev(struct net_device *real_dev, return ret; } -static int unregister_vlan_device(struct net_device *dev) +int unregister_vlan_device(struct net_device *dev) { int ret; @@ -361,7 +369,7 @@ static int vlan_dev_init(struct net_device *dev) return 0; } -static void vlan_setup(struct net_device *new_dev) +void vlan_setup(struct net_device *new_dev) { SET_MODULE_OWNER(new_dev); @@ -410,7 +418,7 @@ static void vlan_transfer_operstate(const struct net_device *dev, struct net_dev } } -static int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id) +int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id) { if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { printk(VLAN_DBG "%s: VLANs not supported on %s.\n", @@ -447,7 +455,7 @@ static int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_ return 0; } -static int register_vlan_dev(struct net_device *dev) +int register_vlan_dev(struct net_device *dev) { struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); struct net_device *real_dev = vlan->real_dev; @@ -567,6 +575,7 @@ static int register_vlan_device(struct net_device *real_dev, VLAN_DEV_INFO(new_dev)->dent = NULL; VLAN_DEV_INFO(new_dev)->flags = VLAN_FLAG_REORDER_HDR; + new_dev->rtnl_link_ops = &vlan_link_ops; err = register_vlan_dev(new_dev); if (err < 0) goto out_free_newdev; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index b83739017e9d..fe6bb0f7d275 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -72,4 +72,14 @@ void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result); void vlan_dev_set_multicast_list(struct net_device *vlan_dev); +int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id); +void vlan_setup(struct net_device *dev); +int register_vlan_dev(struct net_device *dev); +int unregister_vlan_device(struct net_device *dev); + +int vlan_netlink_init(void); +void vlan_netlink_fini(void); + +extern struct rtnl_link_ops vlan_link_ops; + #endif /* !(__BEN_VLAN_802_1Q_INC__) */ diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c new file mode 100644 index 000000000000..844c7e43d0fa --- /dev/null +++ b/net/8021q/vlan_netlink.c @@ -0,0 +1,236 @@ +/* + * VLAN netlink control interface + * + * Copyright (c) 2007 Patrick McHardy + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include "vlan.h" + + +static const struct nla_policy vlan_policy[IFLA_VLAN_MAX + 1] = { + [IFLA_VLAN_ID] = { .type = NLA_U16 }, + [IFLA_VLAN_FLAGS] = { .len = sizeof(struct ifla_vlan_flags) }, + [IFLA_VLAN_EGRESS_QOS] = { .type = NLA_NESTED }, + [IFLA_VLAN_INGRESS_QOS] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy vlan_map_policy[IFLA_VLAN_QOS_MAX + 1] = { + [IFLA_VLAN_QOS_MAPPING] = { .len = sizeof(struct ifla_vlan_qos_mapping) }, +}; + + +static inline int vlan_validate_qos_map(struct nlattr *attr) +{ + if (!attr) + return 0; + return nla_validate_nested(attr, IFLA_VLAN_QOS_MAX, vlan_map_policy); +} + +static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + struct ifla_vlan_flags *flags; + u16 id; + int err; + + if (!data) + return -EINVAL; + + if (data[IFLA_VLAN_ID]) { + id = nla_get_u16(data[IFLA_VLAN_ID]); + if (id >= VLAN_VID_MASK) + return -ERANGE; + } + if (data[IFLA_VLAN_FLAGS]) { + flags = nla_data(data[IFLA_VLAN_FLAGS]); + if ((flags->flags & flags->mask) & ~VLAN_FLAG_REORDER_HDR) + return -EINVAL; + } + + err = vlan_validate_qos_map(data[IFLA_VLAN_INGRESS_QOS]); + if (err < 0) + return err; + err = vlan_validate_qos_map(data[IFLA_VLAN_EGRESS_QOS]); + if (err < 0) + return err; + return 0; +} + +static int vlan_changelink(struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); + struct ifla_vlan_flags *flags; + struct ifla_vlan_qos_mapping *m; + struct nlattr *attr; + int rem; + + if (data[IFLA_VLAN_FLAGS]) { + flags = nla_data(data[IFLA_VLAN_FLAGS]); + vlan->flags = (vlan->flags & ~flags->mask) | + (flags->flags & flags->mask); + } + if (data[IFLA_VLAN_INGRESS_QOS]) { + nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) { + m = nla_data(attr); + vlan_dev_set_ingress_priority(dev, m->to, m->from); + } + } + if (data[IFLA_VLAN_EGRESS_QOS]) { + nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) { + m = nla_data(attr); + vlan_dev_set_egress_priority(dev, m->from, m->to); + } + } + return 0; +} + +static int vlan_newlink(struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); + struct net_device *real_dev; + int err; + + if (!data[IFLA_VLAN_ID]) + return -EINVAL; + + if (!tb[IFLA_LINK]) + return -EINVAL; + real_dev = __dev_get_by_index(nla_get_u32(tb[IFLA_LINK])); + if (!real_dev) + return -ENODEV; + + vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]); + vlan->real_dev = real_dev; + vlan->flags = VLAN_FLAG_REORDER_HDR; + + err = vlan_check_real_dev(real_dev, vlan->vlan_id); + if (err < 0) + return err; + + if (!tb[IFLA_MTU]) + dev->mtu = real_dev->mtu; + else if (dev->mtu > real_dev->mtu) + return -EINVAL; + + err = vlan_changelink(dev, tb, data); + if (err < 0) + return err; + + return register_vlan_dev(dev); +} + +static void vlan_dellink(struct net_device *dev) +{ + unregister_vlan_device(dev); +} + +static inline size_t vlan_qos_map_size(unsigned int n) +{ + if (n == 0) + return 0; + /* IFLA_VLAN_{EGRESS,INGRESS}_QOS + n * IFLA_VLAN_QOS_MAPPING */ + return nla_total_size(sizeof(struct nlattr)) + + nla_total_size(sizeof(struct ifla_vlan_qos_mapping)) * n; +} + +static size_t vlan_get_size(const struct net_device *dev) +{ + struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); + + return nla_total_size(2) + /* IFLA_VLAN_ID */ + vlan_qos_map_size(vlan->nr_ingress_mappings) + + vlan_qos_map_size(vlan->nr_egress_mappings); +} + +static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); + struct vlan_priority_tci_mapping *pm; + struct ifla_vlan_flags f; + struct ifla_vlan_qos_mapping m; + struct nlattr *nest; + unsigned int i; + + NLA_PUT_U16(skb, IFLA_VLAN_ID, VLAN_DEV_INFO(dev)->vlan_id); + if (vlan->flags) { + f.flags = vlan->flags; + f.mask = ~0; + NLA_PUT(skb, IFLA_VLAN_FLAGS, sizeof(f), &f); + } + if (vlan->nr_ingress_mappings) { + nest = nla_nest_start(skb, IFLA_VLAN_INGRESS_QOS); + if (nest == NULL) + goto nla_put_failure; + + for (i = 0; i < ARRAY_SIZE(vlan->ingress_priority_map); i++) { + if (!vlan->ingress_priority_map[i]) + continue; + + m.from = i; + m.to = vlan->ingress_priority_map[i]; + NLA_PUT(skb, IFLA_VLAN_QOS_MAPPING, + sizeof(m), &m); + } + nla_nest_end(skb, nest); + } + + if (vlan->nr_egress_mappings) { + nest = nla_nest_start(skb, IFLA_VLAN_EGRESS_QOS); + if (nest == NULL) + goto nla_put_failure; + + for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) { + for (pm = vlan->egress_priority_map[i]; pm; + pm = pm->next) { + if (!pm->vlan_qos) + continue; + + m.from = pm->priority; + m.to = (pm->vlan_qos >> 13) & 0x7; + NLA_PUT(skb, IFLA_VLAN_QOS_MAPPING, + sizeof(m), &m); + } + } + nla_nest_end(skb, nest); + } + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +struct rtnl_link_ops vlan_link_ops __read_mostly = { + .kind = "vlan", + .maxtype = IFLA_VLAN_MAX, + .policy = vlan_policy, + .priv_size = sizeof(struct vlan_dev_info), + .setup = vlan_setup, + .validate = vlan_validate, + .newlink = vlan_newlink, + .changelink = vlan_changelink, + .dellink = vlan_dellink, + .get_size = vlan_get_size, + .fill_info = vlan_fill_info, +}; + +int __init vlan_netlink_init(void) +{ + return rtnl_link_register(&vlan_link_ops); +} + +void __exit vlan_netlink_fini(void) +{ + rtnl_link_unregister(&vlan_link_ops); +} + +MODULE_ALIAS_RTNL_LINK("vlan"); -- cgit v1.2.3 From b2f41ff4139c0df8d06f352acc962a62fc07a0c3 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Mon, 28 May 2007 12:23:29 -0300 Subject: ccid3: Update copyrights Signed-off-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 4 ++-- net/dccp/ccids/lib/loss_interval.c | 4 ++-- net/dccp/ccids/lib/loss_interval.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index ec7fa4d67f08..2d203ae41aad 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -1,8 +1,8 @@ /* * net/dccp/ccids/ccid3.c * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005-6 Ian McDonald + * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005-7 Ian McDonald * * An implementation of the DCCP protocol * diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 372d7e75cdd8..3829afc31176 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -1,8 +1,8 @@ /* * net/dccp/ccids/lib/loss_interval.c * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005-6 Ian McDonald + * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005-7 Ian McDonald * Copyright (c) 2005 Arnaldo Carvalho de Melo * * This program is free software; you can redistribute it and/or modify diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index eb257014dd74..1e48fe323006 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -3,8 +3,8 @@ /* * net/dccp/ccids/lib/loss_interval.h * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005-7 Ian McDonald * Copyright (c) 2005 Arnaldo Carvalho de Melo * * This program is free software; you can redistribute it and/or modify it -- cgit v1.2.3 From e961811fcde4202ae5c3c9ce81dcfc244e8959bb Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Mon, 28 May 2007 16:32:26 -0300 Subject: Fix dccp_sum_coverage When compiling with EXTRA_CFLAGS=-W notice that we have signed/unsigned issue in dccp.h. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Ian McDonald --- net/dccp/dccp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index d8ad27bfe01a..e2d74cd7eeeb 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -184,7 +184,7 @@ DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); /* * Checksumming routines */ -static inline int dccp_csum_coverage(const struct sk_buff *skb) +static inline unsigned int dccp_csum_coverage(const struct sk_buff *skb) { const struct dccp_hdr* dh = dccp_hdr(skb); @@ -195,7 +195,7 @@ static inline int dccp_csum_coverage(const struct sk_buff *skb) static inline void dccp_csum_outgoing(struct sk_buff *skb) { - int cov = dccp_csum_coverage(skb); + unsigned int cov = dccp_csum_coverage(skb); if (cov >= skb->len) dccp_hdr(skb)->dccph_cscov = 0; -- cgit v1.2.3 From 6bc7efe8efa627077f8f65d01dbb762fc9356a2f Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Mon, 28 May 2007 16:37:45 -0300 Subject: loss_interval: Fix timeval initialisation When compiling with EXTRA_CFLAGS=-W noticed that tstamp is not initialised correctly in dccp_li_calc_first_li. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Ian McDonald --- net/dccp/ccids/ccid3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 2d203ae41aad..9686a8de2e91 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -829,7 +829,7 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) struct dccp_rx_hist_entry *entry, *next, *tail = NULL; u32 x_recv, p; suseconds_t rtt, delta; - struct timeval tstamp = { 0, }; + struct timeval tstamp = { 0, 0 }; int interval = 0; int win_count = 0; int step = 0; -- cgit v1.2.3 From d83258a3da1d3c7ae7b75549c8bf7ed689562c62 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 May 2007 18:04:14 -0300 Subject: Remove accesses to ccid3_hc_rx_sock in ccid3_hc_rx_{update,calc_first}_li This is a preparatory patch for moving these loss interval functions from net/dccp/ccids/ccid3.c to net/dccp/ccids/lib/loss_interval.c. Based on a patch by Ian McDonald. Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 62 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 9686a8de2e91..fb500d3851c3 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -823,9 +823,12 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) * * returns estimated loss interval in usecs */ -static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) +static u32 ccid3_hc_rx_calc_first_li(struct sock *sk, + struct list_head *hist_list, + struct timeval *last_feedback, + u16 s, u32 bytes_recv, + u32 previous_x_recv) { - struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); struct dccp_rx_hist_entry *entry, *next, *tail = NULL; u32 x_recv, p; suseconds_t rtt, delta; @@ -835,8 +838,7 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) int step = 0; u64 fval; - list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, - dccphrx_node) { + list_for_each_entry_safe(entry, next, hist_list, dccphrx_node) { if (dccp_rx_hist_entry_data_packet(entry)) { tail = entry; @@ -895,19 +897,20 @@ found: } dccp_timestamp(sk, &tstamp); - delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback); + delta = timeval_delta(&tstamp, last_feedback); DCCP_BUG_ON(delta <= 0); - x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); + x_recv = scaled_div32(bytes_recv, delta); if (x_recv == 0) { /* would also trigger divide-by-zero */ DCCP_WARN("X_recv==0\n"); - if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) { + if (previous_x_recv == 0) { DCCP_BUG("stored value of X_recv is zero"); return ~0; } + x_recv = previous_x_recv; } - fval = scaled_div(hcrx->ccid3hcrx_s, rtt); + fval = scaled_div(s, rtt); fval = scaled_div32(fval, x_recv); p = tfrc_calc_x_reverse_lookup(fval); @@ -920,26 +923,36 @@ found: return 1000000 / p; } -static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) +static void ccid3_hc_rx_update_li(struct sock *sk, + struct list_head *li_hist_list, + struct list_head *hist_list, + struct timeval *last_feedback, + u16 s, u32 bytes_recv, + u32 previous_x_recv, + u64 seq_loss, u8 win_loss) { - struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); struct dccp_li_hist_entry *head; u64 seq_temp; - if (list_empty(&hcrx->ccid3hcrx_li_hist)) { + if (list_empty(li_hist_list)) { if (!dccp_li_hist_interval_new(ccid3_li_hist, - &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss)) + li_hist_list, seq_loss, + win_loss)) return; - head = list_entry(hcrx->ccid3hcrx_li_hist.next, - struct dccp_li_hist_entry, dccplih_node); - head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); + head = list_entry(li_hist_list->next, struct dccp_li_hist_entry, + dccplih_node); + head->dccplih_interval = + ccid3_hc_rx_calc_first_li(sk, hist_list, + last_feedback, s, + bytes_recv, + previous_x_recv); } else { struct dccp_li_hist_entry *entry; struct list_head *tail; - head = list_entry(hcrx->ccid3hcrx_li_hist.next, - struct dccp_li_hist_entry, dccplih_node); + head = list_entry(li_hist_list->next, struct dccp_li_hist_entry, + dccplih_node); /* FIXME win count check removed as was wrong */ /* should make this check with receive history */ /* and compare there as per section 10.2 of RFC4342 */ @@ -954,9 +967,9 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) return; } - list_add(&entry->dccplih_node, &hcrx->ccid3hcrx_li_hist); + list_add(&entry->dccplih_node, li_hist_list); - tail = hcrx->ccid3hcrx_li_hist.prev; + tail = li_hist_list->prev; list_del(tail); kmem_cache_free(ccid3_li_hist->dccplih_slab, tail); @@ -992,8 +1005,15 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk, while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno) > TFRC_RECV_NUM_LATE_LOSS) { loss = 1; - ccid3_hc_rx_update_li(sk, hcrx->ccid3hcrx_seqno_nonloss, - hcrx->ccid3hcrx_ccval_nonloss); + ccid3_hc_rx_update_li(sk, + &hcrx->ccid3hcrx_li_hist, + &hcrx->ccid3hcrx_hist, + &hcrx->ccid3hcrx_tstamp_last_feedback, + hcrx->ccid3hcrx_s, + hcrx->ccid3hcrx_bytes_recv, + hcrx->ccid3hcrx_x_recv, + hcrx->ccid3hcrx_seqno_nonloss, + hcrx->ccid3hcrx_ccval_nonloss); tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss; dccp_inc_seqno(&tmp_seqno); hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; -- cgit v1.2.3 From 878ac60023c4ba11a7fbf0b1dfe07b8472c0d6ce Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Jun 2007 12:24:46 -0300 Subject: [CCID3]: Pass ccid3_li_hist to ccid3_hc_rx_update_li Now ccid3_hc_rx_update_li is ready to be moved to net/dccp/ccids/lib/loss_interval, it uses the same interface as the other functions there. Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index fb500d3851c3..52a71a900eb1 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -924,6 +924,7 @@ found: } static void ccid3_hc_rx_update_li(struct sock *sk, + struct dccp_li_hist *li_hist, struct list_head *li_hist_list, struct list_head *hist_list, struct timeval *last_feedback, @@ -935,9 +936,8 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_temp; if (list_empty(li_hist_list)) { - if (!dccp_li_hist_interval_new(ccid3_li_hist, - li_hist_list, seq_loss, - win_loss)) + if (!dccp_li_hist_interval_new(li_hist, li_hist_list, + seq_loss, win_loss)) return; head = list_entry(li_hist_list->next, struct dccp_li_hist_entry, @@ -960,7 +960,7 @@ static void ccid3_hc_rx_update_li(struct sock *sk, /* new loss event detected */ /* calculate last interval length */ seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss); - entry = dccp_li_hist_entry_new(ccid3_li_hist, GFP_ATOMIC); + entry = dccp_li_hist_entry_new(li_hist, GFP_ATOMIC); if (entry == NULL) { DCCP_BUG("out of memory - can not allocate entry"); @@ -971,7 +971,7 @@ static void ccid3_hc_rx_update_li(struct sock *sk, tail = li_hist_list->prev; list_del(tail); - kmem_cache_free(ccid3_li_hist->dccplih_slab, tail); + kmem_cache_free(li_hist->dccplih_slab, tail); /* Create the newest interval */ entry->dccplih_seqno = seq_loss; @@ -1005,7 +1005,7 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk, while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno) > TFRC_RECV_NUM_LATE_LOSS) { loss = 1; - ccid3_hc_rx_update_li(sk, + ccid3_hc_rx_update_li(sk, ccid3_li_hist, &hcrx->ccid3hcrx_li_hist, &hcrx->ccid3hcrx_hist, &hcrx->ccid3hcrx_tstamp_last_feedback, -- cgit v1.2.3 From cc0a910b942d11069d35f52b2c0ed0e229e2fb46 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Jun 2007 17:41:28 -0300 Subject: [DCCP] loss_interval: Move ccid3_hc_rx_update_li to loss_interval Renaming it to dccp_li_update_li. Also based on previous work by Ian McDonald. Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 179 ++----------------------------------- net/dccp/ccids/lib/loss_interval.c | 160 +++++++++++++++++++++++++++++++++ net/dccp/ccids/lib/loss_interval.h | 7 ++ 3 files changed, 176 insertions(+), 170 deletions(-) (limited to 'net') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 52a71a900eb1..9d2e2c193cc6 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -819,167 +819,6 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) return 0; } -/* calculate first loss interval - * - * returns estimated loss interval in usecs */ - -static u32 ccid3_hc_rx_calc_first_li(struct sock *sk, - struct list_head *hist_list, - struct timeval *last_feedback, - u16 s, u32 bytes_recv, - u32 previous_x_recv) -{ - struct dccp_rx_hist_entry *entry, *next, *tail = NULL; - u32 x_recv, p; - suseconds_t rtt, delta; - struct timeval tstamp = { 0, 0 }; - int interval = 0; - int win_count = 0; - int step = 0; - u64 fval; - - list_for_each_entry_safe(entry, next, hist_list, dccphrx_node) { - if (dccp_rx_hist_entry_data_packet(entry)) { - tail = entry; - - switch (step) { - case 0: - tstamp = entry->dccphrx_tstamp; - win_count = entry->dccphrx_ccval; - step = 1; - break; - case 1: - interval = win_count - entry->dccphrx_ccval; - if (interval < 0) - interval += TFRC_WIN_COUNT_LIMIT; - if (interval > 4) - goto found; - break; - } - } - } - - if (unlikely(step == 0)) { - DCCP_WARN("%s(%p), packet history has no data packets!\n", - dccp_role(sk), sk); - return ~0; - } - - if (unlikely(interval == 0)) { - DCCP_WARN("%s(%p), Could not find a win_count interval > 0." - "Defaulting to 1\n", dccp_role(sk), sk); - interval = 1; - } -found: - if (!tail) { - DCCP_CRIT("tail is null\n"); - return ~0; - } - - delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp); - DCCP_BUG_ON(delta < 0); - - rtt = delta * 4 / interval; - ccid3_pr_debug("%s(%p), approximated RTT to %dus\n", - dccp_role(sk), sk, (int)rtt); - - /* - * Determine the length of the first loss interval via inverse lookup. - * Assume that X_recv can be computed by the throughput equation - * s - * X_recv = -------- - * R * fval - * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1]. - */ - if (rtt == 0) { /* would result in divide-by-zero */ - DCCP_WARN("RTT==0\n"); - return ~0; - } - - dccp_timestamp(sk, &tstamp); - delta = timeval_delta(&tstamp, last_feedback); - DCCP_BUG_ON(delta <= 0); - - x_recv = scaled_div32(bytes_recv, delta); - if (x_recv == 0) { /* would also trigger divide-by-zero */ - DCCP_WARN("X_recv==0\n"); - if (previous_x_recv == 0) { - DCCP_BUG("stored value of X_recv is zero"); - return ~0; - } - x_recv = previous_x_recv; - } - - fval = scaled_div(s, rtt); - fval = scaled_div32(fval, x_recv); - p = tfrc_calc_x_reverse_lookup(fval); - - ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied " - "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); - - if (p == 0) - return ~0; - else - return 1000000 / p; -} - -static void ccid3_hc_rx_update_li(struct sock *sk, - struct dccp_li_hist *li_hist, - struct list_head *li_hist_list, - struct list_head *hist_list, - struct timeval *last_feedback, - u16 s, u32 bytes_recv, - u32 previous_x_recv, - u64 seq_loss, u8 win_loss) -{ - struct dccp_li_hist_entry *head; - u64 seq_temp; - - if (list_empty(li_hist_list)) { - if (!dccp_li_hist_interval_new(li_hist, li_hist_list, - seq_loss, win_loss)) - return; - - head = list_entry(li_hist_list->next, struct dccp_li_hist_entry, - dccplih_node); - head->dccplih_interval = - ccid3_hc_rx_calc_first_li(sk, hist_list, - last_feedback, s, - bytes_recv, - previous_x_recv); - } else { - struct dccp_li_hist_entry *entry; - struct list_head *tail; - - head = list_entry(li_hist_list->next, struct dccp_li_hist_entry, - dccplih_node); - /* FIXME win count check removed as was wrong */ - /* should make this check with receive history */ - /* and compare there as per section 10.2 of RFC4342 */ - - /* new loss event detected */ - /* calculate last interval length */ - seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss); - entry = dccp_li_hist_entry_new(li_hist, GFP_ATOMIC); - - if (entry == NULL) { - DCCP_BUG("out of memory - can not allocate entry"); - return; - } - - list_add(&entry->dccplih_node, li_hist_list); - - tail = li_hist_list->prev; - list_del(tail); - kmem_cache_free(li_hist->dccplih_slab, tail); - - /* Create the newest interval */ - entry->dccplih_seqno = seq_loss; - entry->dccplih_interval = seq_temp; - entry->dccplih_win_count = win_loss; - } -} - static int ccid3_hc_rx_detect_loss(struct sock *sk, struct dccp_rx_hist_entry *packet) { @@ -1005,15 +844,15 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk, while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno) > TFRC_RECV_NUM_LATE_LOSS) { loss = 1; - ccid3_hc_rx_update_li(sk, ccid3_li_hist, - &hcrx->ccid3hcrx_li_hist, - &hcrx->ccid3hcrx_hist, - &hcrx->ccid3hcrx_tstamp_last_feedback, - hcrx->ccid3hcrx_s, - hcrx->ccid3hcrx_bytes_recv, - hcrx->ccid3hcrx_x_recv, - hcrx->ccid3hcrx_seqno_nonloss, - hcrx->ccid3hcrx_ccval_nonloss); + dccp_li_update_li(sk, ccid3_li_hist, + &hcrx->ccid3hcrx_li_hist, + &hcrx->ccid3hcrx_hist, + &hcrx->ccid3hcrx_tstamp_last_feedback, + hcrx->ccid3hcrx_s, + hcrx->ccid3hcrx_bytes_recv, + hcrx->ccid3hcrx_x_recv, + hcrx->ccid3hcrx_seqno_nonloss, + hcrx->ccid3hcrx_ccval_nonloss); tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss; dccp_inc_seqno(&tmp_seqno); hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 3829afc31176..ee59fde6653f 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -15,6 +15,8 @@ #include #include "../../dccp.h" #include "loss_interval.h" +#include "packet_history.h" +#include "tfrc.h" struct dccp_li_hist *dccp_li_hist_new(const char *name) { @@ -141,3 +143,161 @@ int dccp_li_hist_interval_new(struct dccp_li_hist *hist, } EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new); + +/* calculate first loss interval + * + * returns estimated loss interval in usecs */ +static u32 dccp_li_calc_first_li(struct sock *sk, + struct list_head *hist_list, + struct timeval *last_feedback, + u16 s, u32 bytes_recv, + u32 previous_x_recv) +{ + struct dccp_rx_hist_entry *entry, *next, *tail = NULL; + u32 x_recv, p; + suseconds_t rtt, delta; + struct timeval tstamp = { 0, 0 }; + int interval = 0; + int win_count = 0; + int step = 0; + u64 fval; + + list_for_each_entry_safe(entry, next, hist_list, dccphrx_node) { + if (dccp_rx_hist_entry_data_packet(entry)) { + tail = entry; + + switch (step) { + case 0: + tstamp = entry->dccphrx_tstamp; + win_count = entry->dccphrx_ccval; + step = 1; + break; + case 1: + interval = win_count - entry->dccphrx_ccval; + if (interval < 0) + interval += TFRC_WIN_COUNT_LIMIT; + if (interval > 4) + goto found; + break; + } + } + } + + if (unlikely(step == 0)) { + DCCP_WARN("%s(%p), packet history has no data packets!\n", + dccp_role(sk), sk); + return ~0; + } + + if (unlikely(interval == 0)) { + DCCP_WARN("%s(%p), Could not find a win_count interval > 0." + "Defaulting to 1\n", dccp_role(sk), sk); + interval = 1; + } +found: + if (!tail) { + DCCP_CRIT("tail is null\n"); + return ~0; + } + + delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp); + DCCP_BUG_ON(delta < 0); + + rtt = delta * 4 / interval; + dccp_pr_debug("%s(%p), approximated RTT to %dus\n", + dccp_role(sk), sk, (int)rtt); + + /* + * Determine the length of the first loss interval via inverse lookup. + * Assume that X_recv can be computed by the throughput equation + * s + * X_recv = -------- + * R * fval + * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1]. + */ + if (rtt == 0) { /* would result in divide-by-zero */ + DCCP_WARN("RTT==0\n"); + return ~0; + } + + dccp_timestamp(sk, &tstamp); + delta = timeval_delta(&tstamp, last_feedback); + DCCP_BUG_ON(delta <= 0); + + x_recv = scaled_div32(bytes_recv, delta); + if (x_recv == 0) { /* would also trigger divide-by-zero */ + DCCP_WARN("X_recv==0\n"); + if (previous_x_recv == 0) { + DCCP_BUG("stored value of X_recv is zero"); + return ~0; + } + x_recv = previous_x_recv; + } + + fval = scaled_div(s, rtt); + fval = scaled_div32(fval, x_recv); + p = tfrc_calc_x_reverse_lookup(fval); + + dccp_pr_debug("%s(%p), receive rate=%u bytes/s, implied " + "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); + + if (p == 0) + return ~0; + else + return 1000000 / p; +} + +void dccp_li_update_li(struct sock *sk, struct dccp_li_hist *li_hist, + struct list_head *li_hist_list, + struct list_head *hist_list, + struct timeval *last_feedback, u16 s, u32 bytes_recv, + u32 previous_x_recv, u64 seq_loss, u8 win_loss) +{ + struct dccp_li_hist_entry *head; + u64 seq_temp; + + if (list_empty(li_hist_list)) { + if (!dccp_li_hist_interval_new(li_hist, li_hist_list, + seq_loss, win_loss)) + return; + + head = list_entry(li_hist_list->next, struct dccp_li_hist_entry, + dccplih_node); + head->dccplih_interval = dccp_li_calc_first_li(sk, hist_list, + last_feedback, + s, bytes_recv, + previous_x_recv); + } else { + struct dccp_li_hist_entry *entry; + struct list_head *tail; + + head = list_entry(li_hist_list->next, struct dccp_li_hist_entry, + dccplih_node); + /* FIXME win count check removed as was wrong */ + /* should make this check with receive history */ + /* and compare there as per section 10.2 of RFC4342 */ + + /* new loss event detected */ + /* calculate last interval length */ + seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss); + entry = dccp_li_hist_entry_new(li_hist, GFP_ATOMIC); + + if (entry == NULL) { + DCCP_BUG("out of memory - can not allocate entry"); + return; + } + + list_add(&entry->dccplih_node, li_hist_list); + + tail = li_hist_list->prev; + list_del(tail); + kmem_cache_free(li_hist->dccplih_slab, tail); + + /* Create the newest interval */ + entry->dccplih_seqno = seq_loss; + entry->dccplih_interval = seq_temp; + entry->dccplih_win_count = win_loss; + } +} + +EXPORT_SYMBOL_GPL(dccp_li_update_li); diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index 1e48fe323006..17f173a30055 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -54,4 +54,11 @@ extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); extern int dccp_li_hist_interval_new(struct dccp_li_hist *hist, struct list_head *list, const u64 seq_loss, const u8 win_loss); + +extern void dccp_li_update_li(struct sock *sk, struct dccp_li_hist *li_hist, + struct list_head *li_hist_list, + struct list_head *hist_list, + struct timeval *last_feedback, u16 s, + u32 bytes_recv, u32 previous_x_recv, + u64 seq_loss, u8 win_loss); #endif /* _DCCP_LI_HIST_ */ -- cgit v1.2.3 From 8c281780c6f867460c84bd78d9c3885c10f00ae1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 May 2007 18:21:53 -0300 Subject: loss_interval: unexport dccp_li_hist_interval_new Now its only used inside the loss_interval code. Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/lib/loss_interval.c | 7 +++---- net/dccp/ccids/lib/loss_interval.h | 3 --- 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index ee59fde6653f..8ac68c60a1f1 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -120,8 +120,9 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list) EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean); -int dccp_li_hist_interval_new(struct dccp_li_hist *hist, - struct list_head *list, const u64 seq_loss, const u8 win_loss) +static int dccp_li_hist_interval_new(struct dccp_li_hist *hist, + struct list_head *list, + const u64 seq_loss, const u8 win_loss) { struct dccp_li_hist_entry *entry; int i; @@ -142,8 +143,6 @@ int dccp_li_hist_interval_new(struct dccp_li_hist *hist, return 1; } -EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new); - /* calculate first loss interval * * returns estimated loss interval in usecs */ diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index 17f173a30055..653328d04ef0 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -52,9 +52,6 @@ extern void dccp_li_hist_purge(struct dccp_li_hist *hist, extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); -extern int dccp_li_hist_interval_new(struct dccp_li_hist *hist, - struct list_head *list, const u64 seq_loss, const u8 win_loss); - extern void dccp_li_update_li(struct sock *sk, struct dccp_li_hist *li_hist, struct list_head *li_hist_list, struct list_head *hist_list, -- cgit v1.2.3 From c70b729e662a1b3ee2ef5370c1e4c9bc3ddc239f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 May 2007 18:25:12 -0300 Subject: loss_interval: Make dccp_li_hist_entry_{new,delete} private Not used outside the loss_interval code anymore. Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/lib/loss_interval.c | 14 ++++++++++++++ net/dccp/ccids/lib/loss_interval.h | 14 -------------- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 8ac68c60a1f1..28eac9be6634 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -62,6 +62,20 @@ void dccp_li_hist_delete(struct dccp_li_hist *hist) EXPORT_SYMBOL_GPL(dccp_li_hist_delete); +static inline struct dccp_li_hist_entry * + dccp_li_hist_entry_new(struct dccp_li_hist *hist, + const gfp_t prio) +{ + return kmem_cache_alloc(hist->dccplih_slab, prio); +} + +static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist, + struct dccp_li_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(hist->dccplih_slab, entry); +} + void dccp_li_hist_purge(struct dccp_li_hist *hist, struct list_head *list) { struct dccp_li_hist_entry *entry, *next; diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index 653328d04ef0..8d3c9bfa4915 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -33,20 +33,6 @@ struct dccp_li_hist_entry { u32 dccplih_interval; }; -static inline struct dccp_li_hist_entry * - dccp_li_hist_entry_new(struct dccp_li_hist *hist, - const gfp_t prio) -{ - return kmem_cache_alloc(hist->dccplih_slab, prio); -} - -static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist, - struct dccp_li_hist_entry *entry) -{ - if (entry != NULL) - kmem_cache_free(hist->dccplih_slab, entry); -} - extern void dccp_li_hist_purge(struct dccp_li_hist *hist, struct list_head *list); -- cgit v1.2.3 From cc4d6a3a34ce3976d7d01d044f3093cddc2921c2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 May 2007 18:53:08 -0300 Subject: loss_interval: Nuke dccp_li_hist It had just a slab cache, so, for the sake of simplicity just make dccp_trfc_lib module init routine create the slab cache, no need for users of the lib to create a private loss_interval object. Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 18 ++------ net/dccp/ccids/lib/loss_interval.c | 92 +++++++++++++------------------------- net/dccp/ccids/lib/loss_interval.h | 12 +---- 3 files changed, 36 insertions(+), 86 deletions(-) (limited to 'net') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 9d2e2c193cc6..407f10c742ae 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -49,7 +49,6 @@ static int ccid3_debug; static struct dccp_tx_hist *ccid3_tx_hist; static struct dccp_rx_hist *ccid3_rx_hist; -static struct dccp_li_hist *ccid3_li_hist; /* * Transmitter Half-Connection Routines @@ -844,7 +843,7 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk, while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno) > TFRC_RECV_NUM_LATE_LOSS) { loss = 1; - dccp_li_update_li(sk, ccid3_li_hist, + dccp_li_update_li(sk, &hcrx->ccid3hcrx_li_hist, &hcrx->ccid3hcrx_hist, &hcrx->ccid3hcrx_tstamp_last_feedback, @@ -1011,7 +1010,7 @@ static void ccid3_hc_rx_exit(struct sock *sk) dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist); /* Empty loss interval history */ - dccp_li_hist_purge(ccid3_li_hist, &hcrx->ccid3hcrx_li_hist); + dccp_li_hist_purge(&hcrx->ccid3hcrx_li_hist); } static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) @@ -1095,19 +1094,12 @@ static __init int ccid3_module_init(void) if (ccid3_tx_hist == NULL) goto out_free_rx; - ccid3_li_hist = dccp_li_hist_new("ccid3"); - if (ccid3_li_hist == NULL) - goto out_free_tx; - rc = ccid_register(&ccid3); if (rc != 0) - goto out_free_loss_interval_history; + goto out_free_tx; out: return rc; -out_free_loss_interval_history: - dccp_li_hist_delete(ccid3_li_hist); - ccid3_li_hist = NULL; out_free_tx: dccp_tx_hist_delete(ccid3_tx_hist); ccid3_tx_hist = NULL; @@ -1130,10 +1122,6 @@ static __exit void ccid3_module_exit(void) dccp_rx_hist_delete(ccid3_rx_hist); ccid3_rx_hist = NULL; } - if (ccid3_li_hist != NULL) { - dccp_li_hist_delete(ccid3_li_hist); - ccid3_li_hist = NULL; - } } module_exit(ccid3_module_exit); diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 28eac9be6634..e6b1f0c913ec 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -18,71 +18,26 @@ #include "packet_history.h" #include "tfrc.h" -struct dccp_li_hist *dccp_li_hist_new(const char *name) -{ - struct dccp_li_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); - static const char dccp_li_hist_mask[] = "li_hist_%s"; - char *slab_name; - - if (hist == NULL) - goto out; - - slab_name = kmalloc(strlen(name) + sizeof(dccp_li_hist_mask) - 1, - GFP_ATOMIC); - if (slab_name == NULL) - goto out_free_hist; - - sprintf(slab_name, dccp_li_hist_mask, name); - hist->dccplih_slab = kmem_cache_create(slab_name, - sizeof(struct dccp_li_hist_entry), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (hist->dccplih_slab == NULL) - goto out_free_slab_name; -out: - return hist; -out_free_slab_name: - kfree(slab_name); -out_free_hist: - kfree(hist); - hist = NULL; - goto out; -} - -EXPORT_SYMBOL_GPL(dccp_li_hist_new); - -void dccp_li_hist_delete(struct dccp_li_hist *hist) -{ - const char* name = kmem_cache_name(hist->dccplih_slab); - - kmem_cache_destroy(hist->dccplih_slab); - kfree(name); - kfree(hist); -} - -EXPORT_SYMBOL_GPL(dccp_li_hist_delete); +struct kmem_cache *dccp_li_cachep __read_mostly; -static inline struct dccp_li_hist_entry * - dccp_li_hist_entry_new(struct dccp_li_hist *hist, - const gfp_t prio) +static inline struct dccp_li_hist_entry *dccp_li_hist_entry_new(const gfp_t prio) { - return kmem_cache_alloc(hist->dccplih_slab, prio); + return kmem_cache_alloc(dccp_li_cachep, prio); } -static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist, - struct dccp_li_hist_entry *entry) +static inline void dccp_li_hist_entry_delete(struct dccp_li_hist_entry *entry) { if (entry != NULL) - kmem_cache_free(hist->dccplih_slab, entry); + kmem_cache_free(dccp_li_cachep, entry); } -void dccp_li_hist_purge(struct dccp_li_hist *hist, struct list_head *list) +void dccp_li_hist_purge(struct list_head *list) { struct dccp_li_hist_entry *entry, *next; list_for_each_entry_safe(entry, next, list, dccplih_node) { list_del_init(&entry->dccplih_node); - kmem_cache_free(hist->dccplih_slab, entry); + kmem_cache_free(dccp_li_cachep, entry); } } @@ -134,17 +89,16 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list) EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean); -static int dccp_li_hist_interval_new(struct dccp_li_hist *hist, - struct list_head *list, +static int dccp_li_hist_interval_new(struct list_head *list, const u64 seq_loss, const u8 win_loss) { struct dccp_li_hist_entry *entry; int i; for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) { - entry = dccp_li_hist_entry_new(hist, GFP_ATOMIC); + entry = dccp_li_hist_entry_new(GFP_ATOMIC); if (entry == NULL) { - dccp_li_hist_purge(hist, list); + dccp_li_hist_purge(list); DCCP_BUG("loss interval list entry is NULL"); return 0; } @@ -260,7 +214,7 @@ found: return 1000000 / p; } -void dccp_li_update_li(struct sock *sk, struct dccp_li_hist *li_hist, +void dccp_li_update_li(struct sock *sk, struct list_head *li_hist_list, struct list_head *hist_list, struct timeval *last_feedback, u16 s, u32 bytes_recv, @@ -270,8 +224,8 @@ void dccp_li_update_li(struct sock *sk, struct dccp_li_hist *li_hist, u64 seq_temp; if (list_empty(li_hist_list)) { - if (!dccp_li_hist_interval_new(li_hist, li_hist_list, - seq_loss, win_loss)) + if (!dccp_li_hist_interval_new(li_hist_list, seq_loss, + win_loss)) return; head = list_entry(li_hist_list->next, struct dccp_li_hist_entry, @@ -293,7 +247,7 @@ void dccp_li_update_li(struct sock *sk, struct dccp_li_hist *li_hist, /* new loss event detected */ /* calculate last interval length */ seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss); - entry = dccp_li_hist_entry_new(li_hist, GFP_ATOMIC); + entry = dccp_li_hist_entry_new(GFP_ATOMIC); if (entry == NULL) { DCCP_BUG("out of memory - can not allocate entry"); @@ -304,7 +258,7 @@ void dccp_li_update_li(struct sock *sk, struct dccp_li_hist *li_hist, tail = li_hist_list->prev; list_del(tail); - kmem_cache_free(li_hist->dccplih_slab, tail); + kmem_cache_free(dccp_li_cachep, tail); /* Create the newest interval */ entry->dccplih_seqno = seq_loss; @@ -314,3 +268,19 @@ void dccp_li_update_li(struct sock *sk, struct dccp_li_hist *li_hist, } EXPORT_SYMBOL_GPL(dccp_li_update_li); + +static __init int dccp_li_init(void) +{ + dccp_li_cachep = kmem_cache_create("dccp_li_hist", + sizeof(struct dccp_li_hist_entry), + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + return dccp_li_cachep == NULL ? -ENOBUFS : 0; +} + +static __exit void dccp_li_exit(void) +{ + kmem_cache_destroy(dccp_li_cachep); +} + +module_init(dccp_li_init); +module_exit(dccp_li_exit); diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index 8d3c9bfa4915..f35c11100fc6 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -19,13 +19,6 @@ #define DCCP_LI_HIST_IVAL_F_LENGTH 8 -struct dccp_li_hist { - struct kmem_cache *dccplih_slab; -}; - -extern struct dccp_li_hist *dccp_li_hist_new(const char *name); -extern void dccp_li_hist_delete(struct dccp_li_hist *hist); - struct dccp_li_hist_entry { struct list_head dccplih_node; u64 dccplih_seqno:48, @@ -33,12 +26,11 @@ struct dccp_li_hist_entry { u32 dccplih_interval; }; -extern void dccp_li_hist_purge(struct dccp_li_hist *hist, - struct list_head *list); +extern void dccp_li_hist_purge(struct list_head *list); extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); -extern void dccp_li_update_li(struct sock *sk, struct dccp_li_hist *li_hist, +extern void dccp_li_update_li(struct sock *sk, struct list_head *li_hist_list, struct list_head *hist_list, struct timeval *last_feedback, u16 s, -- cgit v1.2.3 From dd36a9aba44e4ddbac011de2cb14a70444487303 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 May 2007 18:56:44 -0300 Subject: loss_interval: make struct dccp_li_hist_entry private net/dccp/ccids/lib/loss_interval.c is the only place where this struct is used. Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/lib/loss_interval.c | 9 +++++++++ net/dccp/ccids/lib/loss_interval.h | 10 ---------- 2 files changed, 9 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index e6b1f0c913ec..01c1edb3e349 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -18,6 +18,15 @@ #include "packet_history.h" #include "tfrc.h" +#define DCCP_LI_HIST_IVAL_F_LENGTH 8 + +struct dccp_li_hist_entry { + struct list_head dccplih_node; + u64 dccplih_seqno:48, + dccplih_win_count:4; + u32 dccplih_interval; +}; + struct kmem_cache *dccp_li_cachep __read_mostly; static inline struct dccp_li_hist_entry *dccp_li_hist_entry_new(const gfp_t prio) diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index f35c11100fc6..906c806d6d9d 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -14,18 +14,8 @@ */ #include -#include #include -#define DCCP_LI_HIST_IVAL_F_LENGTH 8 - -struct dccp_li_hist_entry { - struct list_head dccplih_node; - u64 dccplih_seqno:48, - dccplih_win_count:4; - u32 dccplih_interval; -}; - extern void dccp_li_hist_purge(struct list_head *list); extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); -- cgit v1.2.3 From 8132da4d412ad51c34bad11133a8f0941e2a1972 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sat, 16 Jun 2007 13:34:02 -0300 Subject: [CCID3]: Sending time: update to ktime_t This updates the computation of t_nom and t_last_win_count to use the newer gettimeofday interface. Committer note: used ktime_to_timeval to set the 'now' variable to t_ld in ccid3hctx_no_feedback_timer Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 33 +++++++++++++-------------------- net/dccp/ccids/ccid3.h | 5 +++-- 2 files changed, 16 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 407f10c742ae..94b3a1a29537 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -193,25 +193,20 @@ static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len) * The algorithm is not applicable if RTT < 4 microseconds. */ static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx, - struct timeval *now) + ktime_t now) { - suseconds_t delta; u32 quarter_rtts; if (unlikely(hctx->ccid3hctx_rtt < 4)) /* avoid divide-by-zero */ return; - delta = timeval_delta(now, &hctx->ccid3hctx_t_last_win_count); - DCCP_BUG_ON(delta < 0); - - quarter_rtts = (u32)delta / (hctx->ccid3hctx_rtt / 4); + quarter_rtts = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count); + quarter_rtts /= hctx->ccid3hctx_rtt / 4; if (quarter_rtts > 0) { - hctx->ccid3hctx_t_last_win_count = *now; + hctx->ccid3hctx_t_last_win_count = now; hctx->ccid3hctx_last_win_count += min_t(u32, quarter_rtts, 5); hctx->ccid3hctx_last_win_count &= 0xF; /* mod 16 */ - - ccid3_pr_debug("now at %#X\n", hctx->ccid3hctx_last_win_count); } } @@ -311,8 +306,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - struct timeval now; - suseconds_t delay; + ktime_t now = ktime_get_real(); + s64 delay; BUG_ON(hctx == NULL); @@ -324,8 +319,6 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) if (unlikely(skb->len == 0)) return -EBADMSG; - dccp_timestamp(sk, &now); - switch (hctx->ccid3hctx_state) { case TFRC_SSTATE_NO_SENT: sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, @@ -348,7 +341,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt); hctx->ccid3hctx_rtt = dp->dccps_syn_rtt; hctx->ccid3hctx_x = rfc3390_initial_rate(sk); - hctx->ccid3hctx_t_ld = now; + hctx->ccid3hctx_t_ld = ktime_to_timeval(now); } else { /* Sender does not have RTT sample: X = MSS/second */ hctx->ccid3hctx_x = dp->dccps_mss_cache; @@ -360,7 +353,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) break; case TFRC_SSTATE_NO_FBACK: case TFRC_SSTATE_FBACK: - delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now); + delay = ktime_us_delta(hctx->ccid3hctx_t_nom, now); ccid3_pr_debug("delay=%ld\n", (long)delay); /* * Scheduling of packet transmissions [RFC 3448, 4.6] @@ -370,10 +363,10 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) * else * // send the packet in (t_nom - t_now) milliseconds. */ - if (delay - (suseconds_t)hctx->ccid3hctx_delta >= 0) - return delay / 1000L; + if (delay - (s64)hctx->ccid3hctx_delta >= 0) + return (u32)delay / 1000L; - ccid3_hc_tx_update_win_count(hctx, &now); + ccid3_hc_tx_update_win_count(hctx, now); break; case TFRC_SSTATE_TERM: DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); @@ -386,8 +379,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) hctx->ccid3hctx_idle = 0; /* set the nominal send time for the next following packet */ - timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); - + hctx->ccid3hctx_t_nom = ktime_add_us(hctx->ccid3hctx_t_nom, + hctx->ccid3hctx_t_ipi); return 0; } diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 8d31b389c19c..51d4b804e334 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -36,6 +36,7 @@ #ifndef _DCCP_CCID3_H_ #define _DCCP_CCID3_H_ +#include #include #include #include @@ -108,10 +109,10 @@ struct ccid3_hc_tx_sock { enum ccid3_hc_tx_states ccid3hctx_state:8; u8 ccid3hctx_last_win_count; u8 ccid3hctx_idle; - struct timeval ccid3hctx_t_last_win_count; + ktime_t ccid3hctx_t_last_win_count; struct timer_list ccid3hctx_no_feedback_timer; struct timeval ccid3hctx_t_ld; - struct timeval ccid3hctx_t_nom; + ktime_t ccid3hctx_t_nom; u32 ccid3hctx_delta; struct list_head ccid3hctx_hist; struct ccid3_options_received ccid3hctx_options_received; -- cgit v1.2.3 From 49d66a70cf9fd94057aacd6055334299ab3a5eac Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sat, 16 Jun 2007 13:48:50 -0300 Subject: [CCID3]: Fix a bug in the send time processing ccid3_hc_tx_send_packet currently returns 0 when the time difference between current time and t_nom is less than 1000 microseconds. In this case the packet is sent immediately; but, unlike other packets that can be emitted on first attempt, it will not have its window counter updated and its options set as required. This is a bug. Fix: Require the time difference to be at least 1000 microseconds. The algorithm then converges: time differences > 1000 microseconds trigger the timer in dccp_write_xmit; after timer expiry this function is tried again; when the time difference is less than 1000, the packet will have its options added and window counter updated as required. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 94b3a1a29537..e91c2b9dc27b 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -363,7 +363,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) * else * // send the packet in (t_nom - t_now) milliseconds. */ - if (delay - (s64)hctx->ccid3hctx_delta >= 0) + if (delay - (s64)hctx->ccid3hctx_delta >= 1000) return (u32)delay / 1000L; ccid3_hc_tx_update_win_count(hctx, now); -- cgit v1.2.3 From 6c1361a6f285bf3df4b502651c0dd38d0eedc044 Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Sun, 24 Jun 2007 19:56:09 -0700 Subject: [NET]: qdisc_restart - readability changes plus one bug fix. New changes : - Incorporated Peter Waskiewicz's comments. - Re-added back one warning message (on driver returning wrong value). Previous changes : - Converted to use switch/case code which looks neater. - "if (ret == NETDEV_TX_LOCKED && lockless)" is buggy, and the lockless check should be removed, since driver will return NETDEV_TX_LOCKED only if lockless is true and driver has to do the locking. In the original code as well as the latest code, this code can result in a bug where if LLTX is not set for a driver (lockless == 0) but the driver is written wrongly to do a trylock (despite LLTX being set), the driver returns LOCKED. But since lockless is zero, the packet is requeue'd instead of calling collision code which will issue warning and free up the skb. Instead this skb will be retried with this driver next time, and the same result will ensue. Removing this check will catch these driver bugs instead of hiding the problem. I am keeping this change to readability section since : a. it is confusing to check two things as it is; and b. it is difficult to keep this check in the changed 'switch' code. - Changed some names, like try_get_tx_pkt to dev_dequeue_skb (as that is the work being done and easier to understand) and do_dev_requeue to dev_requeue_skb, merged handle_dev_cpu_collision and tx_islocked to dev_handle_collision (handle_dev_cpu_collision is a small routine with only one caller, so there is no need to have two separate routines which also results in getting rid of two macros, etc. - Removed an XXX comment as it should never fail (I suspect this was related to batch skb WIP, Jamal ?). Converted some functions to original coding style of having the return values and the function name on same line, eg prio2list. Signed-off-by: Krishna Kumar Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 167 +++++++++++++++++++++++++----------------------- 1 file changed, 86 insertions(+), 81 deletions(-) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 9461e8ae0529..983c32caf713 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -34,9 +34,6 @@ #include #include -#define SCHED_TX_DROP -2 -#define SCHED_TX_QUEUE -3 - /* Main transmission queue. */ /* Modifications to data participating in scheduling must be protected with @@ -68,41 +65,24 @@ static inline int qdisc_qlen(struct Qdisc *q) return q->q.qlen; } -static inline int handle_dev_cpu_collision(struct net_device *dev) -{ - if (unlikely(dev->xmit_lock_owner == smp_processor_id())) { - if (net_ratelimit()) - printk(KERN_WARNING - "Dead loop on netdevice %s, fix it urgently!\n", - dev->name); - return SCHED_TX_DROP; - } - __get_cpu_var(netdev_rx_stat).cpu_collision++; - return SCHED_TX_QUEUE; -} - -static inline int -do_dev_requeue(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q) +static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev, + struct Qdisc *q) { - if (unlikely(skb->next)) dev->gso_skb = skb; else q->ops->requeue(skb, q); - /* XXX: Could netif_schedule fail? Or is the fact we are - * requeueing imply the hardware path is closed - * and even if we fail, some interupt will wake us - */ + netif_schedule(dev); return 0; } -static inline struct sk_buff * -try_get_tx_pkt(struct net_device *dev, struct Qdisc *q) +static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev, + struct Qdisc *q) { - struct sk_buff *skb = dev->gso_skb; + struct sk_buff *skb; - if (skb) + if ((skb = dev->gso_skb)) dev->gso_skb = NULL; else skb = q->dequeue(q); @@ -110,92 +90,117 @@ try_get_tx_pkt(struct net_device *dev, struct Qdisc *q) return skb; } -static inline int -tx_islocked(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q) +static inline int handle_dev_cpu_collision(struct sk_buff *skb, + struct net_device *dev, + struct Qdisc *q) { - int ret = handle_dev_cpu_collision(dev); + int ret; - if (ret == SCHED_TX_DROP) { + if (unlikely(dev->xmit_lock_owner == smp_processor_id())) { + /* + * Same CPU holding the lock. It may be a transient + * configuration error, when hard_start_xmit() recurses. We + * detect it by checking xmit owner and drop the packet when + * deadloop is detected. Return OK to try the next skb. + */ kfree_skb(skb); - return qdisc_qlen(q); + if (net_ratelimit()) + printk(KERN_WARNING "Dead loop on netdevice %s, " + "fix it urgently!\n", dev->name); + ret = qdisc_qlen(q); + } else { + /* + * Another cpu is holding lock, requeue & delay xmits for + * some time. + */ + __get_cpu_var(netdev_rx_stat).cpu_collision++; + ret = dev_requeue_skb(skb, dev, q); } - return do_dev_requeue(skb, dev, q); + return ret; } - /* - NOTE: Called under dev->queue_lock with locally disabled BH. - - __LINK_STATE_QDISC_RUNNING guarantees only one CPU - can enter this region at a time. - - dev->queue_lock serializes queue accesses for this device - AND dev->qdisc pointer itself. - - netif_tx_lock serializes accesses to device driver. - - dev->queue_lock and netif_tx_lock are mutually exclusive, - if one is grabbed, another must be free. - - Multiple CPUs may contend for the two locks. - - Note, that this procedure can be called by a watchdog timer - - Returns to the caller: - Returns: 0 - queue is empty or throttled. - >0 - queue is not empty. - -*/ - + * NOTE: Called under dev->queue_lock with locally disabled BH. + * + * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this + * device at a time. dev->queue_lock serializes queue accesses for + * this device AND dev->qdisc pointer itself. + * + * netif_tx_lock serializes accesses to device driver. + * + * dev->queue_lock and netif_tx_lock are mutually exclusive, + * if one is grabbed, another must be free. + * + * Note, that this procedure can be called by a watchdog timer + * + * Returns to the caller: + * 0 - queue is empty or throttled. + * >0 - queue is not empty. + * + */ static inline int qdisc_restart(struct net_device *dev) { struct Qdisc *q = dev->qdisc; - unsigned lockless = (dev->features & NETIF_F_LLTX); struct sk_buff *skb; + unsigned lockless; int ret; - skb = try_get_tx_pkt(dev, q); - if (skb == NULL) + /* Dequeue packet */ + if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL)) return 0; - /* we have a packet to send */ - if (!lockless) { - if (!netif_tx_trylock(dev)) - return tx_islocked(skb, dev, q); + /* + * When the driver has LLTX set, it does its own locking in + * start_xmit. These checks are worth it because even uncongested + * locks can be quite expensive. The driver can do a trylock, as + * is being done here; in case of lock contention it should return + * NETDEV_TX_LOCKED and the packet will be requeued. + */ + lockless = (dev->features & NETIF_F_LLTX); + + if (!lockless && !netif_tx_trylock(dev)) { + /* Another CPU grabbed the driver tx lock */ + return handle_dev_cpu_collision(skb, dev, q); } - /* all clear .. */ + + /* And release queue */ spin_unlock(&dev->queue_lock); ret = NETDEV_TX_BUSY; if (!netif_queue_stopped(dev)) - /* churn baby churn .. */ ret = dev_hard_start_xmit(skb, dev); if (!lockless) netif_tx_unlock(dev); spin_lock(&dev->queue_lock); - - /* we need to refresh q because it may be invalid since - * we dropped dev->queue_lock earlier ... - * So dont try to be clever grasshopper - */ q = dev->qdisc; - /* most likely result, packet went ok */ - if (ret == NETDEV_TX_OK) - return qdisc_qlen(q); - /* only for lockless drivers .. */ - if (ret == NETDEV_TX_LOCKED && lockless) - return tx_islocked(skb, dev, q); - if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) - printk(KERN_WARNING " BUG %s code %d qlen %d\n",dev->name, ret, q->q.qlen); + switch (ret) { + case NETDEV_TX_OK: + /* Driver sent out skb successfully */ + ret = qdisc_qlen(q); + break; + + case NETDEV_TX_LOCKED: + /* Driver try lock failed */ + ret = handle_dev_cpu_collision(skb, dev, q); + break; + + default: + /* Driver returned NETDEV_TX_BUSY - requeue skb */ + if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) + printk(KERN_WARNING "BUG %s code %d qlen %d\n", + dev->name, ret, q->q.qlen); + + ret = dev_requeue_skb(skb, dev, q); + break; + } - return do_dev_requeue(skb, dev, q); + return ret; } - void __qdisc_run(struct net_device *dev) { do { -- cgit v1.2.3 From e50c41b53d7aa48152dd9c633b04fc7abd536f1f Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Sun, 24 Jun 2007 19:57:27 -0700 Subject: [NET]: qdisc_restart - couple of optimizations. Changes : - netif_queue_stopped need not be called inside qdisc_restart as it has been called already in qdisc_run() before the first skb is sent, and in __qdisc_run() after each intermediate skb is sent (note : we are the only sender, so the queue cannot get stopped while the tx lock was got in the ~LLTX case). - BUG_ON((int) q->q.qlen < 0) was a relic from old times when -1 meant more packets are available, and __qdisc_run used to loop when qdisc_restart() returned -1. During those days, it was necessary to make sure that qlen is never less than zero, since __qdisc_run would get into an infinite loop if no packets are on the queue and this bug in qdisc was there (and worse - no more skbs could ever get queue'd as we hold the queue lock too). With Herbert's recent change to return values, this check is not required. Hopefully Herbert can validate this change. If at all this is required, it should be added to skb_dequeue (in failure case), and not to qdisc_qlen. Signed-off-by: Krishna Kumar Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 983c32caf713..2488dbb17b60 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -61,7 +61,6 @@ void qdisc_unlock_tree(struct net_device *dev) static inline int qdisc_qlen(struct Qdisc *q) { - BUG_ON((int) q->q.qlen < 0); return q->q.qlen; } @@ -167,9 +166,7 @@ static inline int qdisc_restart(struct net_device *dev) /* And release queue */ spin_unlock(&dev->queue_lock); - ret = NETDEV_TX_BUSY; - if (!netif_queue_stopped(dev)) - ret = dev_hard_start_xmit(skb, dev); + ret = dev_hard_start_xmit(skb, dev); if (!lockless) netif_tx_unlock(dev); -- cgit v1.2.3 From 334a8132d9950f769f390f0f35c233d099688e7a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 25 Jun 2007 04:35:20 -0700 Subject: [SKBUFF]: Keep track of writable header len of headerless clones Currently NAT (and others) that want to modify cloned skbs copy them, even if in the vast majority of cases its not necessary because the skb is a clone made by TCP and the portion NAT wants to modify is actually writable because TCP release the header reference before cloning. The problem is that there is no clean way for NAT to find out how long the writable header area is, so this patch introduces skb->hdr_len to hold this length. When a headerless skb is cloned skb->hdr_len is set to the current headroom, for regular clones it is copied from the original. A new function skb_clone_writable(skb, len) returns whether the skb is writable up to len bytes from skb->data. To avoid enlarging the skb the mac_len field is reduced to 16 bit and the new hdr_len field is put in the remaining 16 bit. I've done a few rough benchmarks of NAT (not with this exact patch, but a very similar one). As expected it saves huge amounts of system time in case of sendfile, bringing it down to basically the same amount as without NAT, with sendmsg it only helps on loopback, probably because of the large MTU. Transmit a 1GB file using sendfile/sendmsg over eth0/lo with and without NAT: - sendfile eth0, no NAT: sys 0m0.388s - sendfile eth0, NAT: sys 0m1.835s - sendfile eth0: NAT + path: sys 0m0.370s (~ -80%) - sendfile lo, no NAT: sys 0m0.258s - sendfile lo, NAT: sys 0m2.609s - sendfile lo, NAT + patch: sys 0m0.260s (~ -90%) - sendmsg eth0, no NAT: sys 0m2.508s - sendmsg eth0, NAT: sys 0m2.539s - sendmsg eth0, NAT + patch: sys 0m2.445s (no change) - sendmsg lo, no NAT: sys 0m2.151s - sendmsg lo, NAT: sys 0m3.557s - sendmsg lo, NAT + patch: sys 0m2.159s (~ -40%) I expect other users can see a similar performance improvement, packet mangling iptables targets, ipip and ip_gre come to mind .. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/skbuff.h | 24 ++++++++++++++++++++---- net/core/skbuff.c | 2 ++ net/netfilter/core.c | 4 +++- 3 files changed, 25 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6f0b2f7d0010..881fe80f01d0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -147,8 +147,8 @@ struct skb_shared_info { /* We divide dataref into two halves. The higher 16 bits hold references * to the payload part of skb->data. The lower 16 bits hold references to - * the entire skb->data. It is up to the users of the skb to agree on - * where the payload starts. + * the entire skb->data. A clone of a headerless skb holds the length of + * the header in skb->hdr_len. * * All users must obey the rule that the skb->data reference count must be * greater than or equal to the payload reference count. @@ -206,6 +206,7 @@ typedef unsigned char *sk_buff_data_t; * @len: Length of actual data * @data_len: Data length * @mac_len: Length of link layer header + * @hdr_len: writable header length of cloned skb * @csum: Checksum (must include start/offset pair) * @csum_start: Offset from skb->head where checksumming should start * @csum_offset: Offset from csum_start where checksum should be stored @@ -260,8 +261,9 @@ struct sk_buff { char cb[48]; unsigned int len, - data_len, - mac_len; + data_len; + __u16 mac_len, + hdr_len; union { __wsum csum; struct { @@ -1321,6 +1323,20 @@ static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev, return __netdev_alloc_skb(dev, length, GFP_ATOMIC); } +/** + * skb_clone_writable - is the header of a clone writable + * @skb: buffer to check + * @len: length up to which to write + * + * Returns true if modifying the header part of the cloned buffer + * does not requires the data to be copied. + */ +static inline int skb_clone_writable(struct sk_buff *skb, int len) +{ + return !skb_header_cloned(skb) && + skb_headroom(skb) + len <= skb->hdr_len; +} + /** * skb_cow - copy header of skb when it is required * @skb: buffer to cow diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3943c3ad9145..c989c3a0f907 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -415,6 +415,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) C(csum); C(local_df); n->cloned = 1; + n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; n->nohdr = 0; C(pkt_type); C(ip_summed); @@ -676,6 +677,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->network_header += off; skb->mac_header += off; skb->cloned = 0; + skb->hdr_len = 0; skb->nohdr = 0; atomic_set(&skb_shinfo(skb)->dataref, 1); return 0; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index a84478ee2ded..3aaabec70d19 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -203,7 +203,9 @@ int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len) return 0; /* Not exclusive use of packet? Must copy. */ - if (skb_shared(*pskb) || skb_cloned(*pskb)) + if (skb_cloned(*pskb) && !skb_clone_writable(*pskb, writable_len)) + goto copy_skb; + if (skb_shared(*pskb)) goto copy_skb; return pskb_may_pull(*pskb, writable_len); -- cgit v1.2.3 From 1092cb219774a82b1f16781aec7b8d4ec727c981 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 25 Jun 2007 13:49:35 -0700 Subject: [NETLINK]: attr: add nested compat attribute type Add a nested compat attribute type that can be used to convert attributes that contain a structure to nested attributes in a backwards compatible way. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netlink.h | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++ net/netlink/attr.c | 11 +++++++ 2 files changed, 95 insertions(+) (limited to 'net') diff --git a/include/net/netlink.h b/include/net/netlink.h index 7b510a9edb91..d7b824be5422 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -118,6 +118,9 @@ * Nested Attributes Construction: * nla_nest_start(skb, type) start a nested attribute * nla_nest_end(skb, nla) finalize a nested attribute + * nla_nest_compat_start(skb, type, start a nested compat attribute + * len, data) + * nla_nest_compat_end(skb, type) finalize a nested compat attribute * nla_nest_cancel(skb, nla) cancel nested attribute construction * * Attribute Length Calculations: @@ -152,6 +155,7 @@ * nla_find_nested() find attribute in nested attributes * nla_parse() parse and validate stream of attrs * nla_parse_nested() parse nested attribuets + * nla_parse_nested_compat() parse nested compat attributes * nla_for_each_attr() loop over all attributes * nla_for_each_nested() loop over the nested attributes *========================================================================= @@ -170,6 +174,7 @@ enum { NLA_FLAG, NLA_MSECS, NLA_NESTED, + NLA_NESTED_COMPAT, NLA_NUL_STRING, NLA_BINARY, __NLA_TYPE_MAX, @@ -190,6 +195,7 @@ enum { * NLA_NUL_STRING Maximum length of string (excluding NUL) * NLA_FLAG Unused * NLA_BINARY Maximum length of attribute payload + * NLA_NESTED_COMPAT Exact length of structure payload * All other Exact length of attribute payload * * Example: @@ -733,6 +739,39 @@ static inline int nla_parse_nested(struct nlattr *tb[], int maxtype, { return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy); } + +/** + * nla_parse_nested_compat - parse nested compat attributes + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @nla: attribute containing the nested attributes + * @data: pointer to point to contained structure + * @len: length of contained structure + * @policy: validation policy + * + * Parse a nested compat attribute. The compat attribute contains a structure + * and optionally a set of nested attributes. On success the data pointer + * points to the nested data and tb contains the parsed attributes + * (see nla_parse). + */ +static inline int __nla_parse_nested_compat(struct nlattr *tb[], int maxtype, + struct nlattr *nla, + const struct nla_policy *policy, + int len) +{ + if (nla_len(nla) < len) + return -1; + if (nla_len(nla) >= NLA_ALIGN(len) + sizeof(struct nlattr)) + return nla_parse_nested(tb, maxtype, + nla_data(nla) + NLA_ALIGN(len), + policy); + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + return 0; +} + +#define nla_parse_nested_compat(tb, maxtype, nla, policy, data, len) \ +({ data = nla_len(nla) >= len ? nla_data(nla) : NULL; \ + __nla_parse_nested_compat(tb, maxtype, nla, policy, len); }) /** * nla_put_u8 - Add a u16 netlink attribute to a socket buffer * @skb: socket buffer to add attribute to @@ -964,6 +1003,51 @@ static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start) return skb->len; } +/** + * nla_nest_compat_start - Start a new level of nested compat attributes + * @skb: socket buffer to add attributes to + * @attrtype: attribute type of container + * @attrlen: length of structure + * @data: pointer to structure + * + * Start a nested compat attribute that contains both a structure and + * a set of nested attributes. + * + * Returns the container attribute + */ +static inline struct nlattr *nla_nest_compat_start(struct sk_buff *skb, + int attrtype, int attrlen, + const void *data) +{ + struct nlattr *start = (struct nlattr *)skb_tail_pointer(skb); + + if (nla_put(skb, attrtype, attrlen, data) < 0) + return NULL; + if (nla_nest_start(skb, attrtype) == NULL) { + nlmsg_trim(skb, start); + return NULL; + } + return start; +} + +/** + * nla_nest_compat_end - Finalize nesting of compat attributes + * @skb: socket buffer the attribtues are stored in + * @start: container attribute + * + * Corrects the container attribute header to include the all + * appeneded attributes. + * + * Returns the total data length of the skb. + */ +static inline int nla_nest_compat_end(struct sk_buff *skb, struct nlattr *start) +{ + struct nlattr *nest = (void *)start + NLMSG_ALIGN(start->nla_len); + + start->nla_len = skb_tail_pointer(skb) - (unsigned char *)start; + return nla_nest_end(skb, nest); +} + /** * nla_nest_cancel - Cancel nesting of attributes * @skb: socket buffer the message is stored in diff --git a/net/netlink/attr.c b/net/netlink/attr.c index c591212793ee..e4d7bed99c2e 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -72,6 +72,17 @@ static int validate_nla(struct nlattr *nla, int maxtype, return -ERANGE; break; + case NLA_NESTED_COMPAT: + if (attrlen < pt->len) + return -ERANGE; + if (attrlen < NLA_ALIGN(pt->len)) + break; + if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN) + return -ERANGE; + nla = nla_data(nla) + NLA_ALIGN(pt->len); + if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN + nla_len(nla)) + return -ERANGE; + break; default: if (pt->len) minlen = pt->len; -- cgit v1.2.3 From afdc3238ec948531205f5c5f77d2de7bae519c71 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 25 Jun 2007 14:30:16 -0700 Subject: [RTNETLINK]: Add nested compat attribute Add a nested compat attribute type that can be used to convert attributes that contain a structure to nested attributes in a backwards compatible way. The attribute looks like this: struct { [ compat contents ] struct rtattr { .rta_len = total size, .rta_type = type, } rta; struct old_structure struct; [ nested top-level attribute ] struct rtattr { .rta_len = nest size, .rta_type = type, } nest_attr; [ optional 0 .. n nested attributes ] struct rtattr { .rta_len = private attribute len, .rta_type = private attribute typ, } nested_attr; struct nested_data data; }; Since both userspace and kernel deal correctly with attributes that are larger than expected old versions will just parse the compat part and ignore the rest. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 14 ++++++++++++++ net/core/rtnetlink.c | 16 ++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'net') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 612785848532..6731e7f4cc0f 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -570,6 +570,8 @@ static __inline__ int rtattr_strcmp(const struct rtattr *rta, const char *str) } extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len); +extern int rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr, + struct rtattr *rta, void **data, int len); #define rtattr_parse_nested(tb, max, rta) \ rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta))) @@ -638,6 +640,18 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi ({ (start)->rta_len = skb_tail_pointer(skb) - (unsigned char *)(start); \ (skb)->len; }) +#define RTA_NEST_COMPAT(skb, type, attrlen, data) \ +({ struct rtattr *__start = (struct rtattr *)skb_tail_pointer(skb); \ + RTA_PUT(skb, type, attrlen, data); \ + RTA_NEST(skb, type); \ + __start; }) + +#define RTA_NEST_COMPAT_END(skb, start) \ +({ struct rtattr *__nest = (void *)(start) + NLMSG_ALIGN((start)->rta_len); \ + (start)->rta_len = skb_tail_pointer(skb) - (unsigned char *)(start); \ + RTA_NEST_END(skb, __nest); \ + (skb)->len; }) + #define RTA_NEST_CANCEL(skb, start) \ ({ if (start) \ skb_trim(skb, (unsigned char *) (start) - (skb)->data); \ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 06c0c5afabf0..c25d23ba6d5d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -97,6 +97,21 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len) return 0; } +int rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr, + struct rtattr *rta, void **data, int len) +{ + if (RTA_PAYLOAD(rta) < len) + return -1; + *data = RTA_DATA(rta); + + if (RTA_PAYLOAD(rta) >= RTA_ALIGN(len) + sizeof(struct rtattr)) { + rta = RTA_DATA(rta) + RTA_ALIGN(len); + return rtattr_parse_nested(tb, maxattr, rta); + } + memset(tb, 0, sizeof(struct rtattr *) * maxattr); + return 0; +} + static struct rtnl_link *rtnl_msg_handlers[NPROTO]; static inline int rtm_msgindex(int msgtype) @@ -1297,6 +1312,7 @@ void __init rtnetlink_init(void) EXPORT_SYMBOL(__rta_fill); EXPORT_SYMBOL(rtattr_strlcpy); EXPORT_SYMBOL(rtattr_parse); +EXPORT_SYMBOL(rtattr_parse_nested_compat); EXPORT_SYMBOL(rtnetlink_put_metrics); EXPORT_SYMBOL(rtnl_lock); EXPORT_SYMBOL(rtnl_trylock); -- cgit v1.2.3 From 2371baa4bdab3268b32009926f75e7a5d3a41506 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 26 Jun 2007 03:23:44 -0700 Subject: [RTNETLINK]: Fix rtnetlink compat attribute patch Sent the wrong patch previously. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 8 ++++++-- net/core/rtnetlink.c | 8 +++----- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 6731e7f4cc0f..c91476ce314a 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -570,12 +570,16 @@ static __inline__ int rtattr_strcmp(const struct rtattr *rta, const char *str) } extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len); -extern int rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr, - struct rtattr *rta, void **data, int len); +extern int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr, + struct rtattr *rta, int len); #define rtattr_parse_nested(tb, max, rta) \ rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta))) +#define rtattr_parse_nested_compat(tb, max, rta, data, len) \ +({ data = RTA_PAYLOAD(rta) >= len ? RTA_DATA(rta) : NULL; \ + __rtattr_parse_nested_compat(tb, max, rta, len); }) + extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo); extern int rtnl_unicast(struct sk_buff *skb, u32 pid); extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c25d23ba6d5d..54c17e4cd28f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -97,13 +97,11 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len) return 0; } -int rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr, - struct rtattr *rta, void **data, int len) +int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr, + struct rtattr *rta, int len) { if (RTA_PAYLOAD(rta) < len) return -1; - *data = RTA_DATA(rta); - if (RTA_PAYLOAD(rta) >= RTA_ALIGN(len) + sizeof(struct rtattr)) { rta = RTA_DATA(rta) + RTA_ALIGN(len); return rtattr_parse_nested(tb, maxattr, rta); @@ -1312,7 +1310,7 @@ void __init rtnetlink_init(void) EXPORT_SYMBOL(__rta_fill); EXPORT_SYMBOL(rtattr_strlcpy); EXPORT_SYMBOL(rtattr_parse); -EXPORT_SYMBOL(rtattr_parse_nested_compat); +EXPORT_SYMBOL(__rtattr_parse_nested_compat); EXPORT_SYMBOL(rtnetlink_put_metrics); EXPORT_SYMBOL(rtnl_lock); EXPORT_SYMBOL(rtnl_trylock); -- cgit v1.2.3 From 136ebf08b46f839e2dc9db34322b654e5d9b9936 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Tue, 26 Jun 2007 23:51:41 -0700 Subject: [IPV6] MIP6: Kill unnecessary ifdefs. Kill unnecessary CONFIG_IPV6_MIP6. o It is redundant for RAW socket to keep MH out with the config then it can handle any protocol. o Clean-up at AH. Signed-off-by: Masahide NAKAMURA Signed-off-by: David S. Miller --- include/net/flow.h | 4 ---- include/net/xfrm.h | 2 -- net/ipv6/ah6.c | 4 ++-- net/ipv6/raw.c | 4 ---- 4 files changed, 2 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/net/flow.h b/include/net/flow.h index f3cc1f812619..af59fa5cc1f8 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -67,20 +67,16 @@ struct flowi { __be32 spi; -#ifdef CONFIG_IPV6_MIP6 struct { __u8 type; } mht; -#endif } uli_u; #define fl_ip_sport uli_u.ports.sport #define fl_ip_dport uli_u.ports.dport #define fl_icmp_type uli_u.icmpt.type #define fl_icmp_code uli_u.icmpt.code #define fl_ipsec_spi uli_u.spi -#ifdef CONFIG_IPV6_MIP6 #define fl_mh_type uli_u.mht.type -#endif __u32 secid; /* used by xfrm; see secid.txt */ } __attribute__((__aligned__(BITS_PER_LONG/8))); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 311f25af5e1a..7720c1182bb4 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -509,11 +509,9 @@ __be16 xfrm_flowi_sport(struct flowi *fl) case IPPROTO_ICMPV6: port = htons(fl->fl_icmp_type); break; -#ifdef CONFIG_IPV6_MIP6 case IPPROTO_MH: port = htons(fl->fl_mh_type); break; -#endif default: port = 0; /*XXX*/ } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 128f94c79c64..01fa302d281e 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -132,6 +132,8 @@ static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *des bad: return; } +#else +static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *destopt) {} #endif /** @@ -189,10 +191,8 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) while (exthdr.raw < end) { switch (nexthdr) { case NEXTHDR_DEST: -#ifdef CONFIG_IPV6_MIP6 if (dir == XFRM_POLICY_OUT) ipv6_rearrange_destopt(iph, exthdr.opth); -#endif case NEXTHDR_HOP: if (!zero_out_mutable_opts(exthdr.opth)) { LIMIT_NETDEBUG( diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a58459a76684..a22c9c93d931 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -611,9 +611,7 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) struct iovec *iov; u8 __user *type = NULL; u8 __user *code = NULL; -#ifdef CONFIG_IPV6_MIP6 u8 len = 0; -#endif int probed = 0; int i; @@ -646,7 +644,6 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) probed = 1; } break; -#ifdef CONFIG_IPV6_MIP6 case IPPROTO_MH: if (iov->iov_base && iov->iov_len < 1) break; @@ -660,7 +657,6 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) len += iov->iov_len; break; -#endif default: probed = 1; break; -- cgit v1.2.3 From 59fbb3a61e02deaeaa4fb50792217921f3002d64 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Tue, 26 Jun 2007 23:56:32 -0700 Subject: [IPV6] MIP6: Loadable module support for MIPv6. This patch makes MIPv6 loadable module named "mip6". Here is a modprobe.conf(5) example to load it automatically when user application uses XFRM state for MIPv6: alias xfrm-type-10-43 mip6 alias xfrm-type-10-60 mip6 Some MIPv6 feature is not included by this modular, however, it should not be affected to other features like either IPsec or IPv6 with and without the patch. We may discuss XFRM, MH (RAW socket) and ancillary data/sockopt separately for future work. Loadable features: * MH receiving check (to send ICMP error back) * RO header parsing and building (i.e. RH2 and HAO in DSTOPTS) * XFRM policy/state database handling for RO These are NOT covered as loadable: * Home Address flags and its rule on source address selection * XFRM sub policy (depends on its own kernel option) * XFRM functions to receive RO as IPv6 extension header * MH sending/receiving through raw socket if user application opens it (since raw socket allows to do so) * RH2 sending as ancillary data * RH2 operation with setsockopt(2) Signed-off-by: Masahide NAKAMURA Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- include/net/addrconf.h | 2 +- include/net/mip6.h | 4 ---- include/net/rawv6.h | 9 +++++++++ net/ipv6/Kconfig | 2 +- net/ipv6/Makefile | 2 +- net/ipv6/addrconf.c | 4 ++-- net/ipv6/af_inet6.c | 10 +--------- net/ipv6/ah6.c | 8 ++++---- net/ipv6/datagram.c | 2 +- net/ipv6/exthdrs.c | 19 ++++++++++--------- net/ipv6/icmp.c | 2 +- net/ipv6/ip6_output.c | 2 +- net/ipv6/ipv6_sockglue.c | 2 +- net/ipv6/mip6.c | 22 +++++++++++++++++++--- net/ipv6/raw.c | 34 +++++++++++++++++++++++++++++++--- net/ipv6/xfrm6_policy.c | 4 ++-- net/ipv6/xfrm6_state.c | 4 ++-- 18 files changed, 88 insertions(+), 46 deletions(-) (limited to 'net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 648bd1f0912d..213b63be3c8f 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -247,7 +247,7 @@ struct inet6_skb_parm { __u16 lastopt; __u32 nhoff; __u16 flags; -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) __u16 dsthao; #endif diff --git a/include/net/addrconf.h b/include/net/addrconf.h index f3531d0bcd05..33b593e17441 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -61,7 +61,7 @@ extern int addrconf_set_dstaddr(void __user *arg); extern int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict); -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) extern int ipv6_chk_home_addr(struct in6_addr *addr); #endif extern struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, diff --git a/include/net/mip6.h b/include/net/mip6.h index 68263c6d9996..63272610a24a 100644 --- a/include/net/mip6.h +++ b/include/net/mip6.h @@ -54,8 +54,4 @@ struct ip6_mh { #define IP6_MH_TYPE_BERROR 7 /* Binding Error */ #define IP6_MH_TYPE_MAX IP6_MH_TYPE_BERROR -extern int mip6_init(void); -extern void mip6_fini(void); -extern int mip6_mh_filter(struct sock *sk, struct sk_buff *skb); - #endif diff --git a/include/net/rawv6.h b/include/net/rawv6.h index af8960878ef4..a5819891d525 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -3,6 +3,8 @@ #ifdef __KERNEL__ +#include + #define RAWV6_HTABLE_SIZE MAX_INET_PROTOS extern struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; extern rwlock_t raw_v6_lock; @@ -23,6 +25,13 @@ extern void rawv6_err(struct sock *sk, int type, int code, int offset, __be32 info); +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +int rawv6_mh_filter_register(int (*filter)(struct sock *sock, + struct sk_buff *skb)); +int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock, + struct sk_buff *skb)); +#endif + #endif #endif diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 8e5d54f23b49..eb0b8085949b 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -109,7 +109,7 @@ config INET6_IPCOMP If unsure, say Y. config IPV6_MIP6 - bool "IPv6: Mobility (EXPERIMENTAL)" + tristate "IPv6: Mobility (EXPERIMENTAL)" depends on IPV6 && EXPERIMENTAL select XFRM ---help--- diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index bb33309044c9..87c23a73d284 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -14,7 +14,6 @@ ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ xfrm6_output.o ipv6-$(CONFIG_NETFILTER) += netfilter.o ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o -ipv6-$(CONFIG_IPV6_MIP6) += mip6.o ipv6-$(CONFIG_PROC_FS) += proc.o ipv6-objs += $(ipv6-y) @@ -28,6 +27,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o +obj-$(CONFIG_IPV6_MIP6) += mip6.o obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_SIT) += sit.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 79b79f3de24c..11c0028863b0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1034,7 +1034,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, } /* Rule 4: Prefer home address */ -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) if (hiscore.rule < 4) { if (ifa_result->flags & IFA_F_HOMEADDRESS) hiscore.attrs |= IPV6_SADDR_SCORE_HOA; @@ -2835,7 +2835,7 @@ void if6_proc_exit(void) } #endif /* CONFIG_PROC_FS */ -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) /* Check if address is a home address configured on any interface. */ int ipv6_chk_home_addr(struct in6_addr *addr) { diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 6dd377253cf7..eed09373a45d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -58,9 +58,6 @@ #ifdef CONFIG_IPV6_TUNNEL #include #endif -#ifdef CONFIG_IPV6_MIP6 -#include -#endif #include #include @@ -853,9 +850,6 @@ static int __init inet6_init(void) ipv6_frag_init(); ipv6_nodata_init(); ipv6_destopt_init(); -#ifdef CONFIG_IPV6_MIP6 - mip6_init(); -#endif /* Init v6 transport protocols. */ udpv6_init(); @@ -921,9 +915,7 @@ static void __exit inet6_exit(void) /* Cleanup code parts. */ ipv6_packet_cleanup(); -#ifdef CONFIG_IPV6_MIP6 - mip6_fini(); -#endif + addrconf_cleanup(); ip6_flowlabel_cleanup(); ip6_route_cleanup(); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 01fa302d281e..cc6884a40be4 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -74,7 +74,7 @@ bad: return 0; } -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) /** * ipv6_rearrange_destopt - rearrange IPv6 destination options header * @iph: IPv6 header @@ -228,7 +228,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) u8 nexthdr; char tmp_base[8]; struct { -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) struct in6_addr saddr; #endif struct in6_addr daddr; @@ -255,7 +255,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) err = -ENOMEM; goto error; } -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) memcpy(tmp_ext, &top_iph->saddr, extlen); #else memcpy(tmp_ext, &top_iph->daddr, extlen); @@ -294,7 +294,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(top_iph, tmp_base, sizeof(tmp_base)); if (tmp_ext) { -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) memcpy(&top_iph->saddr, tmp_ext, extlen); #else memcpy(&top_iph->daddr, tmp_ext, extlen); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index b1fe7ac5dc90..ba1386dd4168 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -658,7 +658,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, switch (rthdr->type) { case IPV6_SRCRT_TYPE_0: -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: #endif break; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 14be0b9b77a5..173a4bb52255 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -42,7 +42,7 @@ #include #include #include -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) #include #endif @@ -90,6 +90,7 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) bad: return -1; } +EXPORT_SYMBOL_GPL(ipv6_find_tlv); /* * Parsing tlv encoded headers. @@ -196,7 +197,7 @@ bad: Destination options header. *****************************/ -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) static int ipv6_dest_hao(struct sk_buff **skbp, int optoff) { struct sk_buff *skb = *skbp; @@ -270,7 +271,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff) #endif static struct tlvtype_proc tlvprocdestopt_lst[] = { -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) { .type = IPV6_TLV_HAO, .func = ipv6_dest_hao, @@ -283,7 +284,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) __u16 dstbuf; #endif struct dst_entry *dst; @@ -298,7 +299,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) } opt->lastopt = opt->dst1 = skb_network_header_len(skb); -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) dstbuf = opt->dst1; #endif @@ -308,7 +309,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) skb = *skbp; skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; opt = IP6CB(skb); -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) opt->nhoff = dstbuf; #else opt->nhoff = opt->dst1; @@ -427,7 +428,7 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp) looped_back: if (hdr->segments_left == 0) { switch (hdr->type) { -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: /* Silently discard type 2 header unless it was * processed by own @@ -463,7 +464,7 @@ looped_back: return -1; } break; -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: /* Silently discard invalid RTH type 2 */ if (hdr->hdrlen != 2 || hdr->segments_left != 1) { @@ -520,7 +521,7 @@ looped_back: addr += i - 1; switch (hdr->type) { -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr, diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index e9bcce9e7bdf..4765a29f98a8 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -272,7 +272,7 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st return 0; } -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) static void mip6_addr_swap(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4704b5fc3085..31dafaf0b652 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -543,7 +543,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) found_rhdr = 1; break; case NEXTHDR_DEST: -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) break; #endif diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index aa3d07c52a8f..b636c38411fb 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -417,7 +417,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, struct ipv6_rt_hdr *rthdr = opt->srcrt; switch (rthdr->type) { case IPV6_SRCRT_TYPE_0: -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: #endif break; diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 13b7160fb892..20c78ecff6a1 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -86,7 +87,7 @@ static int mip6_mh_len(int type) return len; } -int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) +static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) { struct ip6_mh *mh; @@ -471,7 +472,7 @@ static struct xfrm_type mip6_rthdr_type = .remote_addr = mip6_xfrm_addr, }; -int __init mip6_init(void) +static int __init mip6_init(void) { printk(KERN_INFO "Mobile IPv6\n"); @@ -483,18 +484,33 @@ int __init mip6_init(void) printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __FUNCTION__); goto mip6_rthdr_xfrm_fail; } + if (rawv6_mh_filter_register(mip6_mh_filter) < 0) { + printk(KERN_INFO "%s: can't add rawv6 mh filter\n", __FUNCTION__); + goto mip6_rawv6_mh_fail; + } + + return 0; + mip6_rawv6_mh_fail: + xfrm_unregister_type(&mip6_rthdr_type, AF_INET6); mip6_rthdr_xfrm_fail: xfrm_unregister_type(&mip6_destopt_type, AF_INET6); mip6_destopt_xfrm_fail: return -EAGAIN; } -void __exit mip6_fini(void) +static void __exit mip6_fini(void) { + if (rawv6_mh_filter_unregister(mip6_mh_filter) < 0) + printk(KERN_INFO "%s: can't remove rawv6 mh filter\n", __FUNCTION__); if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0) printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __FUNCTION__); if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0) printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __FUNCTION__); } + +module_init(mip6_init); +module_exit(mip6_fini); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a22c9c93d931..aac6aeb8de8c 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -49,7 +49,7 @@ #include #include #include -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) #include #endif @@ -137,6 +137,28 @@ static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) return 0; } +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +static int (*mh_filter)(struct sock *sock, struct sk_buff *skb); + +int rawv6_mh_filter_register(int (*filter)(struct sock *sock, + struct sk_buff *skb)) +{ + rcu_assign_pointer(mh_filter, filter); + return 0; +} +EXPORT_SYMBOL(rawv6_mh_filter_register); + +int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock, + struct sk_buff *skb)) +{ + rcu_assign_pointer(mh_filter, NULL); + synchronize_rcu(); + return 0; +} +EXPORT_SYMBOL(rawv6_mh_filter_unregister); + +#endif + /* * demultiplex raw sockets. * (should consider queueing the skb in the sock receive_queue @@ -178,16 +200,22 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) case IPPROTO_ICMPV6: filtered = icmpv6_filter(sk, skb); break; -#ifdef CONFIG_IPV6_MIP6 + +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPPROTO_MH: + { /* XXX: To validate MH only once for each packet, * this is placed here. It should be after checking * xfrm policy, however it doesn't. The checking xfrm * policy is placed in rawv6_rcv() because it is * required for each socket. */ - filtered = mip6_mh_filter(sk, skb); + int (*filter)(struct sock *sock, struct sk_buff *skb); + + filter = rcu_dereference(mh_filter); + filtered = filter ? filter(sk, skb) : 0; break; + } #endif default: filtered = 0; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 1faa2ea80afc..3ec0c4770ee3 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -18,7 +18,7 @@ #include #include #include -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) #include #endif @@ -318,7 +318,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl) fl->proto = nexthdr; return; -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPPROTO_MH: if (pskb_may_pull(skb, nh + offset + 3 - skb->data)) { struct ip6_mh *mh; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index baa461b9f74e..cdadb4847469 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -65,7 +65,7 @@ __xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n) goto end; /* Rule 2: select MIPv6 RO or inbound trigger */ -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) for (i = 0; i < n; i++) { if (src[i] && (src[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION || @@ -130,7 +130,7 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) goto end; /* Rule 2: select MIPv6 RO or inbound trigger */ -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) for (i = 0; i < n; i++) { if (src[i] && (src[i]->mode == XFRM_MODE_ROUTEOPTIMIZATION || -- cgit v1.2.3 From d3d6dd3adaaad71eae20902ed81808a66a40a5b9 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Tue, 26 Jun 2007 23:57:49 -0700 Subject: [XFRM]: Add module alias for transformation type. It is clean-up for XFRM type modules and adds aliases with its protocol: ESP, AH, IPCOMP, IPIP and IPv6 for IPsec ROUTING and DSTOPTS for MIPv6 It is almost the same thing as XFRM mode alias, but it is added new defines XFRM_PROTO_XXX for preprocessing since some protocols are defined as enum. Signed-off-by: Masahide NAKAMURA Acked-by: Ingo Oeser Signed-off-by: David S. Miller --- include/net/xfrm.h | 10 ++++++++++ net/ipv4/ah4.c | 1 + net/ipv4/esp4.c | 1 + net/ipv4/ipcomp.c | 1 + net/ipv4/xfrm4_tunnel.c | 1 + net/ipv6/ah6.c | 1 + net/ipv6/esp6.c | 1 + net/ipv6/ipcomp6.c | 2 +- net/ipv6/mip6.c | 2 ++ net/ipv6/xfrm6_tunnel.c | 1 + 10 files changed, 20 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 7720c1182bb4..ee3827f053d7 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -19,9 +19,19 @@ #include #include +#define XFRM_PROTO_ESP 50 +#define XFRM_PROTO_AH 51 +#define XFRM_PROTO_COMP 108 +#define XFRM_PROTO_IPIP 4 +#define XFRM_PROTO_IPV6 41 +#define XFRM_PROTO_ROUTING IPPROTO_ROUTING +#define XFRM_PROTO_DSTOPTS IPPROTO_DSTOPTS + #define XFRM_ALIGN8(len) (((len) + 7) & ~7) #define MODULE_ALIAS_XFRM_MODE(family, encap) \ MODULE_ALIAS("xfrm-mode-" __stringify(family) "-" __stringify(encap)) +#define MODULE_ALIAS_XFRM_TYPE(family, proto) \ + MODULE_ALIAS("xfrm-type-" __stringify(family) "-" __stringify(proto)) extern struct sock *xfrm_nl; extern u32 sysctl_xfrm_aevent_etime; diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 6da8ff597ad3..7a23e59c374a 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -339,3 +339,4 @@ static void __exit ah4_fini(void) module_init(ah4_init); module_exit(ah4_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_AH); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 47c95e8ef045..98767a4f1185 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -481,3 +481,4 @@ static void __exit esp4_fini(void) module_init(esp4_init); module_exit(esp4_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_ESP); diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index ab86137c71d2..e787044a8514 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -485,3 +485,4 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173"); MODULE_AUTHOR("James Morris "); +MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_COMP); diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 568510304553..9275c79119b6 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -109,3 +109,4 @@ static void __exit ipip_fini(void) module_init(ipip_init); module_exit(ipip_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_IPIP); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index cc6884a40be4..53f46ab6af70 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -554,3 +554,4 @@ module_init(ah6_init); module_exit(ah6_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_AH); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 7107bb7e2e62..2db31ce3c7e6 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -421,3 +421,4 @@ module_init(esp6_init); module_exit(esp6_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ESP); diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 1ee50b5782e1..473f165310ea 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -500,4 +500,4 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173"); MODULE_AUTHOR("Mitsuru KANDA "); - +MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_COMP); diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 20c78ecff6a1..8a1399ce38ce 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -514,3 +514,5 @@ module_init(mip6_init); module_exit(mip6_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_DSTOPTS); +MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ROUTING); diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 5502cc948dfb..6f87dd568ded 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -379,3 +379,4 @@ static void __exit xfrm6_tunnel_fini(void) module_init(xfrm6_tunnel_init); module_exit(xfrm6_tunnel_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_IPV6); -- cgit v1.2.3 From d212f87b068c9d72065ef579d85b5ee6b8b59381 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 27 Jun 2007 00:47:37 -0700 Subject: [NET]: IPV6 checksum offloading in network devices The existing model for checksum offload does not correctly handle devices that can offload IPV4 and IPV6 only. The NETIF_F_HW_CSUM flag implies device can do any arbitrary protocol. This patch: * adds NETIF_F_IPV6_CSUM for those devices * fixes bnx2 and tg3 devices that need it * add NETIF_F_IPV6_CSUM to ipv6 output (incl GSO) * fixes assumptions about NETIF_F_ALL_CSUM in nat * adjusts bridge union of checksumming computation Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 6 +++--- drivers/net/tg3.c | 7 +++---- include/linux/netdevice.h | 8 ++++++-- net/bridge/br_if.c | 10 +++++++++- net/core/dev.c | 24 +++++++++++++++++++++--- net/ipv4/af_inet.c | 3 +++ net/ipv4/ip_output.c | 2 +- net/ipv4/netfilter/nf_nat_helper.c | 4 ++-- net/ipv6/ipv6_sockglue.c | 2 +- 9 files changed, 49 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index ce3ed67a878e..0f4f76fda26f 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -6490,10 +6490,10 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) memcpy(dev->perm_addr, bp->mac_addr, 6); bp->name = board_info[ent->driver_data].name; + dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG; if (CHIP_NUM(bp) == CHIP_NUM_5709) - dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; - else - dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG; + dev->features |= NETIF_F_IPV6_CSUM; + #ifdef BCM_VLAN dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; #endif diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 2f3184184ad9..3a43426ced32 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -11944,12 +11944,11 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, * checksumming. */ if ((tp->tg3_flags & TG3_FLAG_BROKEN_CHECKSUMS) == 0) { + dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG; if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787) - dev->features |= NETIF_F_HW_CSUM; - else - dev->features |= NETIF_F_IP_CSUM; - dev->features |= NETIF_F_SG; + dev->features |= NETIF_F_IPV6_CSUM; + tp->tg3_flags |= TG3_FLAG_RX_CHECKSUMS; } else tp->tg3_flags &= ~TG3_FLAG_RX_CHECKSUMS; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e7913ee5581c..7a8f22fb4eee 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -314,9 +314,10 @@ struct net_device /* Net device features */ unsigned long features; #define NETIF_F_SG 1 /* Scatter/gather IO. */ -#define NETIF_F_IP_CSUM 2 /* Can checksum only TCP/UDP over IPv4. */ +#define NETIF_F_IP_CSUM 2 /* Can checksum TCP/UDP over IPv4. */ #define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */ #define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */ +#define NETIF_F_IPV6_CSUM 16 /* Can checksum TCP/UDP over IPV6 */ #define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */ #define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */ #define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */ @@ -338,8 +339,11 @@ struct net_device /* List of features with software fallbacks. */ #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6) + #define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) -#define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM) +#define NETIF_F_V4_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM) +#define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM) +#define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) struct net_device *next_sched; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 849deaf14108..7b4ce9113be2 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -368,10 +368,18 @@ void br_features_recompute(struct net_bridge *br) list_for_each_entry(p, &br->port_list, list) { unsigned long feature = p->dev->features; + /* if device needs checksumming, downgrade to hw checksumming */ if (checksum & NETIF_F_NO_CSUM && !(feature & NETIF_F_NO_CSUM)) checksum ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM; + + /* if device can't do all checksum, downgrade to ipv4/ipv6 */ if (checksum & NETIF_F_HW_CSUM && !(feature & NETIF_F_HW_CSUM)) - checksum ^= NETIF_F_HW_CSUM | NETIF_F_IP_CSUM; + checksum ^= NETIF_F_HW_CSUM + | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + + if (checksum & NETIF_F_IPV6_CSUM && !(feature & NETIF_F_IPV6_CSUM)) + checksum &= ~NETIF_F_IPV6_CSUM; + if (!(feature & NETIF_F_IP_CSUM)) checksum = 0; diff --git a/net/core/dev.c b/net/core/dev.c index ee051bb398a0..a0a46e7ed137 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1509,9 +1509,11 @@ int dev_queue_xmit(struct sk_buff *skb) skb_set_transport_header(skb, skb->csum_start - skb_headroom(skb)); - if (!(dev->features & NETIF_F_GEN_CSUM) && - (!(dev->features & NETIF_F_IP_CSUM) || - skb->protocol != htons(ETH_P_IP))) + if (!(dev->features & NETIF_F_GEN_CSUM) + || ((dev->features & NETIF_F_IP_CSUM) + && skb->protocol == htons(ETH_P_IP)) + || ((dev->features & NETIF_F_IPV6_CSUM) + && skb->protocol == htons(ETH_P_IPV6))) if (skb_checksum_help(skb)) goto out_kfree_skb; } @@ -3107,6 +3109,22 @@ int register_netdevice(struct net_device *dev) } } + /* Fix illegal checksum combinations */ + if ((dev->features & NETIF_F_HW_CSUM) && + (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", + dev->name); + dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); + } + + if ((dev->features & NETIF_F_NO_CSUM) && + (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", + dev->name); + dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); + } + + /* Fix illegal SG+CSUM combinations. */ if ((dev->features & NETIF_F_SG) && !(dev->features & NETIF_F_ALL_CSUM)) { diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 041fba3fa0aa..06c08e5740fb 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1170,6 +1170,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) int ihl; int id; + if (!(features & NETIF_F_V4_CSUM)) + features &= ~NETIF_F_SG; + if (unlikely(skb_shinfo(skb)->gso_type & ~(SKB_GSO_TCPV4 | SKB_GSO_UDP | diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 34ea4547ebbe..a7dd343d3a03 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -837,7 +837,7 @@ int ip_append_data(struct sock *sk, */ if (transhdrlen && length + fragheaderlen <= mtu && - rt->u.dst.dev->features & NETIF_F_ALL_CSUM && + rt->u.dst.dev->features & NETIF_F_V4_CSUM && !exthdrlen) csummode = CHECKSUM_PARTIAL; diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 15b6e5ce3a04..b1aa5983a95b 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -178,7 +178,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb, datalen = (*pskb)->len - iph->ihl*4; if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt_flags & RTCF_LOCAL) && - (*pskb)->dev->features & NETIF_F_ALL_CSUM) { + (*pskb)->dev->features & NETIF_F_V4_CSUM) { (*pskb)->ip_summed = CHECKSUM_PARTIAL; (*pskb)->csum_start = skb_headroom(*pskb) + skb_network_offset(*pskb) + @@ -265,7 +265,7 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb, if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt_flags & RTCF_LOCAL) && - (*pskb)->dev->features & NETIF_F_ALL_CSUM) { + (*pskb)->dev->features & NETIF_F_V4_CSUM) { (*pskb)->ip_summed = CHECKSUM_PARTIAL; (*pskb)->csum_start = skb_headroom(*pskb) + skb_network_offset(*pskb) + diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index b636c38411fb..1c3506696cb5 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -123,7 +123,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) struct ipv6hdr *ipv6h; struct inet6_protocol *ops; - if (!(features & NETIF_F_HW_CSUM)) + if (!(features & NETIF_F_V6_CSUM)) features &= ~NETIF_F_SG; if (unlikely(skb_shinfo(skb)->gso_type & -- cgit v1.2.3 From 75ebe8f73610636be8bbd8d73db883512850e6be Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 27 Jun 2007 01:25:11 -0700 Subject: [NET]: dev_mcast: unexport dev_mc_upload dev_mc_add/dev_mc_delete take care of uploading the list when necessary and thats the only interface other code should use. Also remove two incorrect calls in DECnet. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/dev_mcast.c | 1 - net/decnet/dn_dev.c | 3 --- 2 files changed, 4 deletions(-) (limited to 'net') diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index 5a54053386c8..80bb2e374b76 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -292,4 +292,3 @@ void __init dev_mcast_init(void) EXPORT_SYMBOL(dev_mc_add); EXPORT_SYMBOL(dev_mc_delete); -EXPORT_SYMBOL(dev_mc_upload); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index ab41c1879fd4..e31549e9d07a 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -461,7 +461,6 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa) if (ifa->ifa_local != dn_eth2dn(dev->dev_addr)) { dn_dn2eth(mac_addr, ifa->ifa_local); dev_mc_add(dev, mac_addr, ETH_ALEN, 0); - dev_mc_upload(dev); } } @@ -1064,8 +1063,6 @@ static int dn_eth_up(struct net_device *dev) else dev_mc_add(dev, dn_rt_all_rt_mcast, ETH_ALEN, 0); - dev_mc_upload(dev); - dn_db->use_long = 1; return 0; -- cgit v1.2.3 From bf742482d7a647c5c6f03f78eb35a862e159ecf5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 27 Jun 2007 01:26:19 -0700 Subject: [NET]: dev: introduce generic net_device address lists Introduce struct dev_addr_list and list maintenance functions based on dev_mc_list and the related functions. This will be used by follow-up patches for both multicast and secondary unicast addresses. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 11 ++++++++ net/core/dev.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7a8f22fb4eee..aa389c77aa3e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -177,6 +177,14 @@ struct netif_rx_stats DECLARE_PER_CPU(struct netif_rx_stats, netdev_rx_stat); +struct dev_addr_list +{ + struct dev_addr_list *next; + u8 da_addr[MAX_ADDR_LEN]; + u8 da_addrlen; + int da_users; + int da_gusers; +}; /* * We tag multicasts with these structures. @@ -1008,6 +1016,9 @@ extern void dev_mc_upload(struct net_device *dev); extern int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all); extern int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly); extern void dev_mc_discard(struct net_device *dev); +extern int __dev_addr_delete(struct dev_addr_list **list, void *addr, int alen, int all); +extern int __dev_addr_add(struct dev_addr_list **list, void *addr, int alen, int newonly); +extern void __dev_addr_discard(struct dev_addr_list **list); extern void dev_set_promiscuity(struct net_device *dev, int inc); extern void dev_set_allmulti(struct net_device *dev, int inc); extern void netdev_state_change(struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index a0a46e7ed137..18759ccdf219 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2553,6 +2553,75 @@ void dev_set_allmulti(struct net_device *dev, int inc) dev_mc_upload(dev); } +int __dev_addr_delete(struct dev_addr_list **list, void *addr, int alen, + int glbl) +{ + struct dev_addr_list *da; + + for (; (da = *list) != NULL; list = &da->next) { + if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && + alen == da->da_addrlen) { + if (glbl) { + int old_glbl = da->da_gusers; + da->da_gusers = 0; + if (old_glbl == 0) + break; + } + if (--da->da_users) + return 0; + + *list = da->next; + kfree(da); + return 0; + } + } + return -ENOENT; +} + +int __dev_addr_add(struct dev_addr_list **list, void *addr, int alen, int glbl) +{ + struct dev_addr_list *da; + + for (da = *list; da != NULL; da = da->next) { + if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && + da->da_addrlen == alen) { + if (glbl) { + int old_glbl = da->da_gusers; + da->da_gusers = 1; + if (old_glbl) + return 0; + } + da->da_users++; + return 0; + } + } + + da = kmalloc(sizeof(*da), GFP_ATOMIC); + if (da == NULL) + return -ENOMEM; + memcpy(da->da_addr, addr, alen); + da->da_addrlen = alen; + da->da_users = 1; + da->da_gusers = glbl ? 1 : 0; + da->next = *list; + *list = da; + return 0; +} + +void __dev_addr_discard(struct dev_addr_list **list) +{ + struct dev_addr_list *tmp; + + while (*list != NULL) { + tmp = *list; + *list = tmp->next; + if (tmp->da_users > tmp->da_gusers) + printk("__dev_addr_discard: address leakage! " + "da_users=%d\n", tmp->da_users); + kfree(tmp); + } +} + unsigned dev_get_flags(const struct net_device *dev) { unsigned flags; -- cgit v1.2.3 From 3fba5a8b1e3df2384b90493538161e83cf15dd5f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 27 Jun 2007 01:26:58 -0700 Subject: [NET]: dev_mcast: switch to generic net_device address lists Use generic net_device address lists for multicast list handling. Some defines are used to keep drivers working. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 17 ++++----- net/core/dev_mcast.c | 96 ++++++++--------------------------------------- 2 files changed, 22 insertions(+), 91 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index aa389c77aa3e..9e114e77e54d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -189,15 +189,12 @@ struct dev_addr_list /* * We tag multicasts with these structures. */ - -struct dev_mc_list -{ - struct dev_mc_list *next; - __u8 dmi_addr[MAX_ADDR_LEN]; - unsigned char dmi_addrlen; - int dmi_users; - int dmi_gusers; -}; + +#define dev_mc_list dev_addr_list +#define dmi_addr da_addr +#define dmi_addrlen da_addrlen +#define dmi_users da_users +#define dmi_gusers da_gusers struct hh_cache { @@ -400,7 +397,7 @@ struct net_device unsigned char addr_len; /* hardware address length */ unsigned short dev_id; /* for shared network cards */ - struct dev_mc_list *mc_list; /* Multicast mac addresses */ + struct dev_addr_list *mc_list; /* Multicast mac addresses */ int mc_count; /* Number of installed mcasts */ int promiscuity; int allmulti; diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index 80bb2e374b76..702907434a47 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -102,47 +102,20 @@ void dev_mc_upload(struct net_device *dev) int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) { - int err = 0; - struct dev_mc_list *dmi, **dmip; + int err; netif_tx_lock_bh(dev); + err = __dev_addr_delete(&dev->mc_list, addr, alen, glbl); + if (!err) { + dev->mc_count--; - for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) { /* - * Find the entry we want to delete. The device could - * have variable length entries so check these too. + * We have altered the list, so the card + * loaded filter is now wrong. Fix it */ - if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 && - alen == dmi->dmi_addrlen) { - if (glbl) { - int old_glbl = dmi->dmi_gusers; - dmi->dmi_gusers = 0; - if (old_glbl == 0) - break; - } - if (--dmi->dmi_users) - goto done; - - /* - * Last user. So delete the entry. - */ - *dmip = dmi->next; - dev->mc_count--; - - kfree(dmi); - - /* - * We have altered the list, so the card - * loaded filter is now wrong. Fix it - */ - __dev_mc_upload(dev); - - netif_tx_unlock_bh(dev); - return 0; - } + + __dev_mc_upload(dev); } - err = -ENOENT; -done: netif_tx_unlock_bh(dev); return err; } @@ -153,46 +126,15 @@ done: int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) { - int err = 0; - struct dev_mc_list *dmi, *dmi1; - - dmi1 = kmalloc(sizeof(*dmi), GFP_ATOMIC); + int err; netif_tx_lock_bh(dev); - for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) { - if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 && - dmi->dmi_addrlen == alen) { - if (glbl) { - int old_glbl = dmi->dmi_gusers; - dmi->dmi_gusers = 1; - if (old_glbl) - goto done; - } - dmi->dmi_users++; - goto done; - } - } - - if ((dmi = dmi1) == NULL) { - netif_tx_unlock_bh(dev); - return -ENOMEM; + err = __dev_addr_add(&dev->mc_list, addr, alen, glbl); + if (!err) { + dev->mc_count++; + __dev_mc_upload(dev); } - memcpy(dmi->dmi_addr, addr, alen); - dmi->dmi_addrlen = alen; - dmi->next = dev->mc_list; - dmi->dmi_users = 1; - dmi->dmi_gusers = glbl ? 1 : 0; - dev->mc_list = dmi; - dev->mc_count++; - - __dev_mc_upload(dev); - netif_tx_unlock_bh(dev); - return 0; - -done: - netif_tx_unlock_bh(dev); - kfree(dmi1); return err; } @@ -203,16 +145,8 @@ done: void dev_mc_discard(struct net_device *dev) { netif_tx_lock_bh(dev); - - while (dev->mc_list != NULL) { - struct dev_mc_list *tmp = dev->mc_list; - dev->mc_list = tmp->next; - if (tmp->dmi_users > tmp->dmi_gusers) - printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users); - kfree(tmp); - } + __dev_addr_discard(&dev->mc_list); dev->mc_count = 0; - netif_tx_unlock_bh(dev); } @@ -244,7 +178,7 @@ static void dev_mc_seq_stop(struct seq_file *seq, void *v) static int dev_mc_seq_show(struct seq_file *seq, void *v) { - struct dev_mc_list *m; + struct dev_addr_list *m; struct net_device *dev = v; netif_tx_lock_bh(dev); -- cgit v1.2.3 From 4417da668c0021903464f92db278ddae348e0299 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 27 Jun 2007 01:28:10 -0700 Subject: [NET]: dev: secondary unicast address support Add support for configuring secondary unicast addresses on network devices. To support this devices capable of filtering multiple unicast addresses need to change their set_multicast_list function to configure unicast filters as well and assign it to dev->set_rx_mode instead of dev->set_multicast_list. Other devices are put into promiscous mode when secondary unicast addresses are present. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 12 +++- net/core/dev.c | 144 ++++++++++++++++++++++++++++++++++++++++------ net/core/dev_mcast.c | 37 +----------- 3 files changed, 139 insertions(+), 54 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9e114e77e54d..2c0cc19edfb2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -397,6 +397,9 @@ struct net_device unsigned char addr_len; /* hardware address length */ unsigned short dev_id; /* for shared network cards */ + struct dev_addr_list *uc_list; /* Secondary unicast mac addresses */ + int uc_count; /* Number of installed ucasts */ + int uc_promisc; struct dev_addr_list *mc_list; /* Multicast mac addresses */ int mc_count; /* Number of installed mcasts */ int promiscuity; @@ -502,6 +505,8 @@ struct net_device void *saddr, unsigned len); int (*rebuild_header)(struct sk_buff *skb); +#define HAVE_SET_RX_MODE + void (*set_rx_mode)(struct net_device *dev); #define HAVE_MULTICAST void (*set_multicast_list)(struct net_device *dev); #define HAVE_SET_MAC_ADDR @@ -1008,8 +1013,11 @@ extern struct net_device *alloc_netdev(int sizeof_priv, const char *name, void (*setup)(struct net_device *)); extern int register_netdev(struct net_device *dev); extern void unregister_netdev(struct net_device *dev); -/* Functions used for multicast support */ -extern void dev_mc_upload(struct net_device *dev); +/* Functions used for secondary unicast and multicast support */ +extern void dev_set_rx_mode(struct net_device *dev); +extern void __dev_set_rx_mode(struct net_device *dev); +extern int dev_unicast_delete(struct net_device *dev, void *addr, int alen); +extern int dev_unicast_add(struct net_device *dev, void *addr, int alen); extern int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all); extern int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly); extern void dev_mc_discard(struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index 18759ccdf219..36e9bf8ec4af 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -942,7 +942,7 @@ int dev_open(struct net_device *dev) /* * Initialize multicasting status */ - dev_mc_upload(dev); + dev_set_rx_mode(dev); /* * Wakeup transmit queue engine @@ -2498,17 +2498,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) return 0; } -/** - * dev_set_promiscuity - update promiscuity count on a device - * @dev: device - * @inc: modifier - * - * Add or remove promiscuity from a device. While the count in the device - * remains above zero the interface remains promiscuous. Once it hits zero - * the device reverts back to normal filtering operation. A negative inc - * value is used to drop promiscuity on the device. - */ -void dev_set_promiscuity(struct net_device *dev, int inc) +static void __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; @@ -2517,7 +2507,6 @@ void dev_set_promiscuity(struct net_device *dev, int inc) else dev->flags |= IFF_PROMISC; if (dev->flags != old_flags) { - dev_mc_upload(dev); printk(KERN_INFO "device %s %s promiscuous mode\n", dev->name, (dev->flags & IFF_PROMISC) ? "entered" : "left"); @@ -2530,6 +2519,25 @@ void dev_set_promiscuity(struct net_device *dev, int inc) } } +/** + * dev_set_promiscuity - update promiscuity count on a device + * @dev: device + * @inc: modifier + * + * Add or remove promiscuity from a device. While the count in the device + * remains above zero the interface remains promiscuous. Once it hits zero + * the device reverts back to normal filtering operation. A negative inc + * value is used to drop promiscuity on the device. + */ +void dev_set_promiscuity(struct net_device *dev, int inc) +{ + unsigned short old_flags = dev->flags; + + __dev_set_promiscuity(dev, inc); + if (dev->flags != old_flags) + dev_set_rx_mode(dev); +} + /** * dev_set_allmulti - update allmulti count on a device * @dev: device @@ -2550,7 +2558,48 @@ void dev_set_allmulti(struct net_device *dev, int inc) if ((dev->allmulti += inc) == 0) dev->flags &= ~IFF_ALLMULTI; if (dev->flags ^ old_flags) - dev_mc_upload(dev); + dev_set_rx_mode(dev); +} + +/* + * Upload unicast and multicast address lists to device and + * configure RX filtering. When the device doesn't support unicast + * filtering it is put in promiscous mode while unicast addresses + * are present. + */ +void __dev_set_rx_mode(struct net_device *dev) +{ + /* dev_open will call this function so the list will stay sane. */ + if (!(dev->flags&IFF_UP)) + return; + + if (!netif_device_present(dev)) + return; + + if (dev->set_rx_mode) + dev->set_rx_mode(dev); + else { + /* Unicast addresses changes may only happen under the rtnl, + * therefore calling __dev_set_promiscuity here is safe. + */ + if (dev->uc_count > 0 && !dev->uc_promisc) { + __dev_set_promiscuity(dev, 1); + dev->uc_promisc = 1; + } else if (dev->uc_count == 0 && dev->uc_promisc) { + __dev_set_promiscuity(dev, -1); + dev->uc_promisc = 0; + } + + if (dev->set_multicast_list) + dev->set_multicast_list(dev); + } +} + +void dev_set_rx_mode(struct net_device *dev) +{ + netif_tx_lock_bh(dev); + __dev_set_rx_mode(dev); + netif_tx_unlock_bh(dev); } int __dev_addr_delete(struct dev_addr_list **list, void *addr, int alen, @@ -2622,6 +2671,66 @@ void __dev_addr_discard(struct dev_addr_list **list) } } +/** + * dev_unicast_delete - Release secondary unicast address. + * @dev: device + * + * Release reference to a secondary unicast address and remove it + * from the device if the reference count drop to zero. + * + * The caller must hold the rtnl_mutex. + */ +int dev_unicast_delete(struct net_device *dev, void *addr, int alen) +{ + int err; + + ASSERT_RTNL(); + + netif_tx_lock_bh(dev); + err = __dev_addr_delete(&dev->uc_list, addr, alen, 0); + if (!err) { + dev->uc_count--; + __dev_set_rx_mode(dev); + } + netif_tx_unlock_bh(dev); + return err; +} +EXPORT_SYMBOL(dev_unicast_delete); + +/** + * dev_unicast_add - add a secondary unicast address + * @dev: device + * + * Add a secondary unicast address to the device or increase + * the reference count if it already exists. + * + * The caller must hold the rtnl_mutex. + */ +int dev_unicast_add(struct net_device *dev, void *addr, int alen) +{ + int err; + + ASSERT_RTNL(); + + netif_tx_lock_bh(dev); + err = __dev_addr_add(&dev->uc_list, addr, alen, 0); + if (!err) { + dev->uc_count++; + __dev_set_rx_mode(dev); + } + netif_tx_unlock_bh(dev); + return err; +} +EXPORT_SYMBOL(dev_unicast_add); + +static void dev_unicast_discard(struct net_device *dev) +{ + netif_tx_lock_bh(dev); + __dev_addr_discard(&dev->uc_list); + dev->uc_count = 0; + netif_tx_unlock_bh(dev); +} + unsigned dev_get_flags(const struct net_device *dev) { unsigned flags; @@ -2665,7 +2774,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) * Load in the correct multicast list now the flags have changed. */ - dev_mc_upload(dev); + dev_set_rx_mode(dev); /* * Have we downed the interface. We handle IFF_UP ourselves @@ -2678,7 +2787,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); if (!ret) - dev_mc_upload(dev); + dev_set_rx_mode(dev); } if (dev->flags & IFF_UP && @@ -3558,8 +3667,9 @@ void unregister_netdevice(struct net_device *dev) raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); /* - * Flush the multicast chain + * Flush the unicast and multicast chains */ + dev_unicast_discard(dev); dev_mc_discard(dev); if (dev->uninit) diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index 702907434a47..5cc9b448c443 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -63,39 +63,6 @@ * We block accesses to device mc filters with netif_tx_lock. */ -/* - * Update the multicast list into the physical NIC controller. - */ - -static void __dev_mc_upload(struct net_device *dev) -{ - /* Don't do anything till we up the interface - * [dev_open will call this function so the list will - * stay sane] - */ - - if (!(dev->flags&IFF_UP)) - return; - - /* - * Devices with no set multicast or which have been - * detached don't get set. - */ - - if (dev->set_multicast_list == NULL || - !netif_device_present(dev)) - return; - - dev->set_multicast_list(dev); -} - -void dev_mc_upload(struct net_device *dev) -{ - netif_tx_lock_bh(dev); - __dev_mc_upload(dev); - netif_tx_unlock_bh(dev); -} - /* * Delete a device level multicast */ @@ -114,7 +81,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) * loaded filter is now wrong. Fix it */ - __dev_mc_upload(dev); + __dev_set_rx_mode(dev); } netif_tx_unlock_bh(dev); return err; @@ -132,7 +99,7 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) err = __dev_addr_add(&dev->mc_list, addr, alen, glbl); if (!err) { dev->mc_count++; - __dev_mc_upload(dev); + __dev_set_rx_mode(dev); } netif_tx_unlock_bh(dev); return err; -- cgit v1.2.3 From 342f0234c71b40da785dd6a7ce1dd481ecbfdb81 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Wed, 27 Jun 2007 15:37:46 -0700 Subject: [UDP]: Introduce UDP encapsulation type for L2TP This patch adds a new UDP_ENCAP_L2TPINUDP encapsulation type for UDP sockets. When a UDP socket's encap_type is UDP_ENCAP_L2TPINUDP, the skb is delivered to a function pointed to by the udp_sock's encap_rcv funcptr. If the skb isn't wanted by L2TP, it returns >0, which causes it to be passed through to UDP. Include padding to put the new encap_rcv field on a 4-byte boundary. Previously, the only user of UDP encap sockets was ESP, so when CONFIG_XFRM was not defined, some of the encap code was compiled out. This patch changes that. As a result, udp_encap_rcv() will now do a little more work when CONFIG_XFRM is not defined. Signed-off-by: James Chapman Signed-off-by: David S. Miller --- include/linux/udp.h | 6 ++++++ net/ipv4/udp.c | 27 +++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/linux/udp.h b/include/linux/udp.h index 6de445c31a64..8ec703f462da 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -42,6 +42,7 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb) /* UDP encapsulation types */ #define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ #define UDP_ENCAP_ESPINUDP 2 /* draft-ietf-ipsec-udp-encaps-06 */ +#define UDP_ENCAP_L2TPINUDP 3 /* rfc2661 */ #ifdef __KERNEL__ #include @@ -70,6 +71,11 @@ struct udp_sock { #define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ #define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ + __u8 unused[3]; + /* + * For encapsulation sockets. + */ + int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); }; static inline struct udp_sock *udp_sk(const struct sock *sk) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index facb7e29304e..b9276f8bdac5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -70,6 +70,7 @@ * Alexey Kuznetsov: allow both IPv4 and IPv6 sockets to bind * a single port at the same time. * Derek Atkins : Add Encapulation Support + * James Chapman : Add L2TP encapsulation type. * * * This program is free software; you can redistribute it and/or @@ -923,12 +924,10 @@ int udp_disconnect(struct sock *sk, int flags) * 1 if the UDP system should process it * 0 if we should drop this packet * -1 if it should get processed by xfrm4_rcv_encap + * -2 if it should get processed by l2tp */ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) { -#ifndef CONFIG_XFRM - return 1; -#else struct udp_sock *up = udp_sk(sk); struct udphdr *uh; struct iphdr *iph; @@ -983,8 +982,14 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) /* Must be an IKE packet.. pass it through */ return 1; break; + case UDP_ENCAP_L2TPINUDP: + /* Let caller know to send this to l2tp */ + return -2; } +#ifndef CONFIG_XFRM + return 1; +#else /* At this point we are sure that this is an ESPinUDP packet, * so we need to remove 'len' bytes from the packet (the UDP * header and optional ESP marker bytes) and then modify the @@ -1055,12 +1060,25 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) kfree_skb(skb); return 0; } - if (ret < 0) { + if (ret == -1) { /* process the ESP packet */ ret = xfrm4_rcv_encap(skb, up->encap_type); UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); return -ret; } + if (ret == -2) { + /* process the L2TP packet */ + if (up->encap_rcv != NULL) { + ret = (*up->encap_rcv)(sk, skb); + if (ret <= 0) { + UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); + return ret; + } + + /* FALLTHROUGH -- pass up as UDP packet */ + } + } + /* FALLTHROUGH -- it's a UDP Packet */ } @@ -1349,6 +1367,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, case 0: case UDP_ENCAP_ESPINUDP: case UDP_ENCAP_ESPINUDP_NON_IKE: + case UDP_ENCAP_L2TPINUDP: up->encap_type = val; break; default: -- cgit v1.2.3 From a298830cd026b4c0cde45ef3679a5f68a17577e6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 28 Jun 2007 13:44:37 -0700 Subject: [NET]: Fix TX checksum feature check This patch fixes a boolean error in the new TX checksum check that causes bogus TSO packets to be generated. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 36e9bf8ec4af..6dce9d2d46f2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1509,11 +1509,11 @@ int dev_queue_xmit(struct sk_buff *skb) skb_set_transport_header(skb, skb->csum_start - skb_headroom(skb)); - if (!(dev->features & NETIF_F_GEN_CSUM) - || ((dev->features & NETIF_F_IP_CSUM) - && skb->protocol == htons(ETH_P_IP)) - || ((dev->features & NETIF_F_IPV6_CSUM) - && skb->protocol == htons(ETH_P_IPV6))) + if (!(dev->features & NETIF_F_GEN_CSUM) && + !((dev->features & NETIF_F_IP_CSUM) && + skb->protocol == htons(ETH_P_IP)) && + !((dev->features & NETIF_F_IPV6_CSUM) && + skb->protocol == htons(ETH_P_IPV6))) if (skb_checksum_help(skb)) goto out_kfree_skb; } -- cgit v1.2.3 From f25f4e44808f0f6c9875d94ef1c41ef86c288eb2 Mon Sep 17 00:00:00 2001 From: Peter P Waskiewicz Jr Date: Fri, 6 Jul 2007 13:36:20 -0700 Subject: [CORE] Stack changes to add multiqueue hardware support API Add the multiqueue hardware device support API to the core network stack. Allow drivers to allocate multiple queues and manage them at the netdev level if they choose to do so. Added a new field to sk_buff, namely queue_mapping, for drivers to know which tx_ring to select based on OS classification of the flow. Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/net/Kconfig | 8 +++++ include/linux/etherdevice.h | 3 +- include/linux/netdevice.h | 80 ++++++++++++++++++++++++++++++++++++++++++--- include/linux/skbuff.h | 25 ++++++++++++-- net/core/dev.c | 36 ++++++++++++++------ net/core/netpoll.c | 8 +++-- net/core/pktgen.c | 10 ++++-- net/core/skbuff.c | 3 ++ net/ethernet/eth.c | 9 ++--- net/sched/sch_teql.c | 6 +++- 10 files changed, 158 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index c251cca295c1..d4e39ff1545b 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -25,6 +25,14 @@ menuconfig NETDEVICES # that for each of the symbols. if NETDEVICES +config NETDEVICES_MULTIQUEUE + bool "Netdevice multiple hardware queue support" + ---help--- + Say Y here if you want to allow the network stack to use multiple + hardware TX queues on an ethernet device. + + Most people will say N here. + config IFB tristate "Intermediate Functional Block support" depends on NET_CLS_ACT diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index f48eb89efd0f..6cdb97365e47 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -39,7 +39,8 @@ extern void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev extern int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh); -extern struct net_device *alloc_etherdev(int sizeof_priv); +extern struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count); +#define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1) /** * is_zero_ether_addr - Determine if give Ethernet address is all zeros. diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2c0cc19edfb2..9817821729c4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -108,6 +108,14 @@ struct wireless_dev; #define MAX_HEADER (LL_MAX_HEADER + 48) #endif +struct net_device_subqueue +{ + /* Give a control state for each queue. This struct may contain + * per-queue locks in the future. + */ + unsigned long state; +}; + /* * Network device statistics. Akin to the 2.0 ether stats but * with byte counters. @@ -331,6 +339,7 @@ struct net_device #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ #define NETIF_F_GSO 2048 /* Enable software GSO. */ #define NETIF_F_LLTX 4096 /* LockLess TX */ +#define NETIF_F_MULTI_QUEUE 16384 /* Has multiple TX/RX queues */ /* Segmentation offload features */ #define NETIF_F_GSO_SHIFT 16 @@ -557,6 +566,10 @@ struct net_device /* rtnetlink link ops */ const struct rtnl_link_ops *rtnl_link_ops; + + /* The TX queue control structures */ + unsigned int egress_subqueue_count; + struct net_device_subqueue egress_subqueue[0]; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -565,9 +578,7 @@ struct net_device static inline void *netdev_priv(const struct net_device *dev) { - return (char *)dev + ((sizeof(struct net_device) - + NETDEV_ALIGN_CONST) - & ~NETDEV_ALIGN_CONST); + return dev->priv; } #define SET_MODULE_OWNER(dev) do { } while (0) @@ -719,6 +730,62 @@ static inline int netif_running(const struct net_device *dev) return test_bit(__LINK_STATE_START, &dev->state); } +/* + * Routines to manage the subqueues on a device. We only need start + * stop, and a check if it's stopped. All other device management is + * done at the overall netdevice level. + * Also test the device if we're multiqueue. + */ +static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + clear_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state); +#endif +} + +static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE +#ifdef CONFIG_NETPOLL_TRAP + if (netpoll_trap()) + return; +#endif + set_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state); +#endif +} + +static inline int netif_subqueue_stopped(const struct net_device *dev, + u16 queue_index) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + return test_bit(__LINK_STATE_XOFF, + &dev->egress_subqueue[queue_index].state); +#else + return 0; +#endif +} + +static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE +#ifdef CONFIG_NETPOLL_TRAP + if (netpoll_trap()) + return; +#endif + if (test_and_clear_bit(__LINK_STATE_XOFF, + &dev->egress_subqueue[queue_index].state)) + __netif_schedule(dev); +#endif +} + +static inline int netif_is_multiqueue(const struct net_device *dev) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + return (!!(NETIF_F_MULTI_QUEUE & dev->features)); +#else + return 0; +#endif +} /* Use this variant when it is known for sure that it * is executing from interrupt context. @@ -1009,8 +1076,11 @@ static inline void netif_tx_disable(struct net_device *dev) extern void ether_setup(struct net_device *dev); /* Support for loadable net-drivers */ -extern struct net_device *alloc_netdev(int sizeof_priv, const char *name, - void (*setup)(struct net_device *)); +extern struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, + void (*setup)(struct net_device *), + unsigned int queue_count); +#define alloc_netdev(sizeof_priv, name, setup) \ + alloc_netdev_mq(sizeof_priv, name, setup, 1) extern int register_netdev(struct net_device *dev); extern void unregister_netdev(struct net_device *dev); /* Functions used for secondary unicast and multicast support */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 881fe80f01d0..2d6a14f5f2f1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -196,7 +196,6 @@ typedef unsigned char *sk_buff_data_t; * @sk: Socket we are owned by * @tstamp: Time we arrived * @dev: Device we arrived on/are leaving by - * @iif: ifindex of device we arrived on * @transport_header: Transport layer header * @network_header: Network layer header * @mac_header: Link layer header @@ -231,6 +230,8 @@ typedef unsigned char *sk_buff_data_t; * @nfctinfo: Relationship of this skb to the connection * @nfct_reasm: netfilter conntrack re-assembly pointer * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c + * @iif: ifindex of device we arrived on + * @queue_mapping: Queue mapping for multiqueue devices * @tc_index: Traffic control index * @tc_verd: traffic control verdict * @dma_cookie: a cookie to one of several possible DMA operations @@ -246,8 +247,6 @@ struct sk_buff { struct sock *sk; ktime_t tstamp; struct net_device *dev; - int iif; - /* 4 byte hole on 64 bit*/ struct dst_entry *dst; struct sec_path *sp; @@ -290,12 +289,18 @@ struct sk_buff { #ifdef CONFIG_BRIDGE_NETFILTER struct nf_bridge_info *nf_bridge; #endif + + int iif; + __u16 queue_mapping; + #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ #ifdef CONFIG_NET_CLS_ACT __u16 tc_verd; /* traffic control verdict */ #endif #endif + /* 2 byte hole */ + #ifdef CONFIG_NET_DMA dma_cookie_t dma_cookie; #endif @@ -1725,6 +1730,20 @@ static inline void skb_init_secmark(struct sk_buff *skb) { } #endif +static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + skb->queue_mapping = queue_mapping; +#endif +} + +static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_buff *from) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + to->queue_mapping = from->queue_mapping; +#endif +} + static inline int skb_is_gso(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_size; diff --git a/net/core/dev.c b/net/core/dev.c index 6dce9d2d46f2..7ddf66d0ad5e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1429,7 +1429,9 @@ gso: skb->next = nskb; return rc; } - if (unlikely(netif_queue_stopped(dev) && skb->next)) + if (unlikely((netif_queue_stopped(dev) || + netif_subqueue_stopped(dev, skb->queue_mapping)) && + skb->next)) return NETDEV_TX_BUSY; } while (skb->next); @@ -1547,6 +1549,8 @@ gso: spin_lock(&dev->queue_lock); q = dev->qdisc; if (q->enqueue) { + /* reset queue_mapping to zero */ + skb->queue_mapping = 0; rc = q->enqueue(skb, q); qdisc_run(dev); spin_unlock(&dev->queue_lock); @@ -1576,7 +1580,8 @@ gso: HARD_TX_LOCK(dev, cpu); - if (!netif_queue_stopped(dev)) { + if (!netif_queue_stopped(dev) && + !netif_subqueue_stopped(dev, skb->queue_mapping)) { rc = 0; if (!dev_hard_start_xmit(skb, dev)) { HARD_TX_UNLOCK(dev); @@ -3539,16 +3544,18 @@ static struct net_device_stats *internal_stats(struct net_device *dev) } /** - * alloc_netdev - allocate network device + * alloc_netdev_mq - allocate network device * @sizeof_priv: size of private data to allocate space for * @name: device name format string * @setup: callback to initialize device + * @queue_count: the number of subqueues to allocate * * Allocates a struct net_device with private data area for driver use - * and performs basic initialization. + * and performs basic initialization. Also allocates subquue structs + * for each queue on the device at the end of the netdevice. */ -struct net_device *alloc_netdev(int sizeof_priv, const char *name, - void (*setup)(struct net_device *)) +struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, + void (*setup)(struct net_device *), unsigned int queue_count) { void *p; struct net_device *dev; @@ -3557,7 +3564,9 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, BUG_ON(strlen(name) >= sizeof(dev->name)); /* ensure 32-byte alignment of both the device and private area */ - alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; + alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST + + (sizeof(struct net_device_subqueue) * queue_count)) & + ~NETDEV_ALIGN_CONST; alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; p = kzalloc(alloc_size, GFP_KERNEL); @@ -3570,15 +3579,22 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); dev->padded = (char *)dev - (char *)p; - if (sizeof_priv) - dev->priv = netdev_priv(dev); + if (sizeof_priv) { + dev->priv = ((char *)dev + + ((sizeof(struct net_device) + + (sizeof(struct net_device_subqueue) * + queue_count) + NETDEV_ALIGN_CONST) + & ~NETDEV_ALIGN_CONST)); + } + + dev->egress_subqueue_count = queue_count; dev->get_stats = internal_stats; setup(dev); strcpy(dev->name, name); return dev; } -EXPORT_SYMBOL(alloc_netdev); +EXPORT_SYMBOL(alloc_netdev_mq); /** * free_netdev - free network device diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a0efdd7a6b37..4b06d1936375 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -66,8 +66,9 @@ static void queue_process(struct work_struct *work) local_irq_save(flags); netif_tx_lock(dev); - if (netif_queue_stopped(dev) || - dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { + if ((netif_queue_stopped(dev) || + netif_subqueue_stopped(dev, skb->queue_mapping)) || + dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); netif_tx_unlock(dev); local_irq_restore(flags); @@ -254,7 +255,8 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { if (netif_tx_trylock(dev)) { - if (!netif_queue_stopped(dev)) + if (!netif_queue_stopped(dev) && + !netif_subqueue_stopped(dev, skb->queue_mapping)) status = dev->hard_start_xmit(skb, dev); netif_tx_unlock(dev); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 9cd3a1cb60ef..dffe067e7a7b 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3139,7 +3139,9 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) } } - if (netif_queue_stopped(odev) || need_resched()) { + if ((netif_queue_stopped(odev) || + netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) || + need_resched()) { idle_start = getCurUs(); if (!netif_running(odev)) { @@ -3154,7 +3156,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->idle_acc += getCurUs() - idle_start; - if (netif_queue_stopped(odev)) { + if (netif_queue_stopped(odev) || + netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) { pkt_dev->next_tx_us = getCurUs(); /* TODO */ pkt_dev->next_tx_ns = 0; goto out; /* Try the next interface */ @@ -3181,7 +3184,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) } netif_tx_lock_bh(odev); - if (!netif_queue_stopped(odev)) { + if (!netif_queue_stopped(odev) && + !netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) { atomic_inc(&(pkt_dev->skb->users)); retry_now: diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c989c3a0f907..6a41b96b3d37 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -419,6 +419,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) n->nohdr = 0; C(pkt_type); C(ip_summed); + skb_copy_queue_mapping(n, skb); C(priority); #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) C(ipvs_property); @@ -460,6 +461,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #endif new->sk = NULL; new->dev = old->dev; + skb_copy_queue_mapping(new, old); new->priority = old->priority; new->protocol = old->protocol; new->dst = dst_clone(old->dst); @@ -1932,6 +1934,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) tail = nskb; nskb->dev = skb->dev; + skb_copy_queue_mapping(nskb, skb); nskb->priority = skb->priority; nskb->protocol = skb->protocol; nskb->dst = dst_clone(skb->dst); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 0ac2524f3b68..1387e5411f77 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -316,9 +316,10 @@ void ether_setup(struct net_device *dev) EXPORT_SYMBOL(ether_setup); /** - * alloc_etherdev - Allocates and sets up an Ethernet device + * alloc_etherdev_mq - Allocates and sets up an Ethernet device * @sizeof_priv: Size of additional driver-private structure to be allocated * for this Ethernet device + * @queue_count: The number of queues this device has. * * Fill in the fields of the device structure with Ethernet-generic * values. Basically does everything except registering the device. @@ -328,8 +329,8 @@ EXPORT_SYMBOL(ether_setup); * this private data area. */ -struct net_device *alloc_etherdev(int sizeof_priv) +struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count) { - return alloc_netdev(sizeof_priv, "eth%d", ether_setup); + return alloc_netdev_mq(sizeof_priv, "eth%d", ether_setup, queue_count); } -EXPORT_SYMBOL(alloc_etherdev); +EXPORT_SYMBOL(alloc_etherdev_mq); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index f05ad9a30b4c..dfe7e4520988 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -277,6 +277,7 @@ static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev) int busy; int nores; int len = skb->len; + int subq = skb->queue_mapping; struct sk_buff *skb_res = NULL; start = master->slaves; @@ -293,7 +294,9 @@ restart: if (slave->qdisc_sleeping != q) continue; - if (netif_queue_stopped(slave) || ! netif_running(slave)) { + if (netif_queue_stopped(slave) || + netif_subqueue_stopped(slave, subq) || + !netif_running(slave)) { busy = 1; continue; } @@ -302,6 +305,7 @@ restart: case 0: if (netif_tx_trylock(slave)) { if (!netif_queue_stopped(slave) && + !netif_subqueue_stopped(slave, subq) && slave->hard_start_xmit(skb, slave) == 0) { netif_tx_unlock(slave); master->slaves = NEXT_SLAVE(q); -- cgit v1.2.3 From d62733c8e437fdb58325617c4b3331769ba82d70 Mon Sep 17 00:00:00 2001 From: Peter P Waskiewicz Jr Date: Thu, 28 Jun 2007 21:04:31 -0700 Subject: [SCHED]: Qdisc changes and sch_rr added for multiqueue Add the new sch_rr qdisc for multiqueue network device support. Allow sch_prio and sch_rr to be compiled with or without multiqueue hardware support. sch_rr is part of sch_prio, and is referenced from MODULE_ALIAS. This was done since sch_prio and sch_rr only differ in their dequeue routine. Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 9 ++++ net/sched/Kconfig | 11 ++++ net/sched/sch_prio.c | 129 ++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 134 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index d10f35338507..268c51599eb8 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -101,6 +101,15 @@ struct tc_prio_qopt __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ }; +enum +{ + TCA_PRIO_UNSPEC, + TCA_PRIO_MQ, + __TCA_PRIO_MAX +}; + +#define TCA_PRIO_MAX (__TCA_PRIO_MAX - 1) + /* TBF section */ struct tc_tbf_qopt diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 475df8449be9..f3217942ca87 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -111,6 +111,17 @@ config NET_SCH_PRIO To compile this code as a module, choose M here: the module will be called sch_prio. +config NET_SCH_RR + tristate "Multi Band Round Robin Queuing (RR)" + select NET_SCH_PRIO + ---help--- + Say Y here if you want to use an n-band round robin packet + scheduler. + + The module uses sch_prio for its framework and is aliased as + sch_rr, so it will load sch_prio, although it is referred + to using sch_rr. + config NET_SCH_RED tristate "Random Early Detection (RED)" ---help--- diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 6d7542c26e47..404522046289 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -40,9 +40,11 @@ struct prio_sched_data { int bands; + int curband; /* for round-robin */ struct tcf_proto *filter_list; u8 prio2band[TC_PRIO_MAX+1]; struct Qdisc *queues[TCQ_PRIO_BANDS]; + int mq; }; @@ -70,14 +72,17 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) #endif if (TC_H_MAJ(band)) band = 0; - return q->queues[q->prio2band[band&TC_PRIO_MAX]]; + band = q->prio2band[band&TC_PRIO_MAX]; + goto out; } band = res.classid; } band = TC_H_MIN(band) - 1; if (band >= q->bands) - return q->queues[q->prio2band[0]]; - + band = q->prio2band[0]; +out: + if (q->mq) + skb_set_queue_mapping(skb, band); return q->queues[band]; } @@ -144,17 +149,58 @@ prio_dequeue(struct Qdisc* sch) struct Qdisc *qdisc; for (prio = 0; prio < q->bands; prio++) { - qdisc = q->queues[prio]; - skb = qdisc->dequeue(qdisc); - if (skb) { - sch->q.qlen--; - return skb; + /* Check if the target subqueue is available before + * pulling an skb. This way we avoid excessive requeues + * for slower queues. + */ + if (!netif_subqueue_stopped(sch->dev, (q->mq ? prio : 0))) { + qdisc = q->queues[prio]; + skb = qdisc->dequeue(qdisc); + if (skb) { + sch->q.qlen--; + return skb; + } } } return NULL; } +static struct sk_buff *rr_dequeue(struct Qdisc* sch) +{ + struct sk_buff *skb; + struct prio_sched_data *q = qdisc_priv(sch); + struct Qdisc *qdisc; + int bandcount; + + /* Only take one pass through the queues. If nothing is available, + * return nothing. + */ + for (bandcount = 0; bandcount < q->bands; bandcount++) { + /* Check if the target subqueue is available before + * pulling an skb. This way we avoid excessive requeues + * for slower queues. If the queue is stopped, try the + * next queue. + */ + if (!netif_subqueue_stopped(sch->dev, + (q->mq ? q->curband : 0))) { + qdisc = q->queues[q->curband]; + skb = qdisc->dequeue(qdisc); + if (skb) { + sch->q.qlen--; + q->curband++; + if (q->curband >= q->bands) + q->curband = 0; + return skb; + } + } + q->curband++; + if (q->curband >= q->bands) + q->curband = 0; + } + return NULL; +} + static unsigned int prio_drop(struct Qdisc* sch) { struct prio_sched_data *q = qdisc_priv(sch); @@ -198,21 +244,41 @@ prio_destroy(struct Qdisc* sch) static int prio_tune(struct Qdisc *sch, struct rtattr *opt) { struct prio_sched_data *q = qdisc_priv(sch); - struct tc_prio_qopt *qopt = RTA_DATA(opt); + struct tc_prio_qopt *qopt; + struct rtattr *tb[TCA_PRIO_MAX]; int i; - if (opt->rta_len < RTA_LENGTH(sizeof(*qopt))) + if (rtattr_parse_nested_compat(tb, TCA_PRIO_MAX, opt, qopt, + sizeof(*qopt))) return -EINVAL; - if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) + q->bands = qopt->bands; + /* If we're multiqueue, make sure the number of incoming bands + * matches the number of queues on the device we're associating with. + * If the number of bands requested is zero, then set q->bands to + * dev->egress_subqueue_count. + */ + q->mq = RTA_GET_FLAG(tb[TCA_PRIO_MQ - 1]); + if (q->mq) { + if (sch->handle != TC_H_ROOT) + return -EINVAL; + if (netif_is_multiqueue(sch->dev)) { + if (q->bands == 0) + q->bands = sch->dev->egress_subqueue_count; + else if (q->bands != sch->dev->egress_subqueue_count) + return -EINVAL; + } else + return -EOPNOTSUPP; + } + + if (q->bands > TCQ_PRIO_BANDS || q->bands < 2) return -EINVAL; for (i=0; i<=TC_PRIO_MAX; i++) { - if (qopt->priomap[i] >= qopt->bands) + if (qopt->priomap[i] >= q->bands) return -EINVAL; } sch_tree_lock(sch); - q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); for (i=q->bands; ibands; memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1); - RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); + + nest = RTA_NEST_COMPAT(skb, TCA_OPTIONS, sizeof(opt), &opt); + if (q->mq) + RTA_PUT_FLAG(skb, TCA_PRIO_MQ); + RTA_NEST_COMPAT_END(skb, nest); + return skb->len; rtattr_failure: @@ -443,17 +515,44 @@ static struct Qdisc_ops prio_qdisc_ops = { .owner = THIS_MODULE, }; +static struct Qdisc_ops rr_qdisc_ops = { + .next = NULL, + .cl_ops = &prio_class_ops, + .id = "rr", + .priv_size = sizeof(struct prio_sched_data), + .enqueue = prio_enqueue, + .dequeue = rr_dequeue, + .requeue = prio_requeue, + .drop = prio_drop, + .init = prio_init, + .reset = prio_reset, + .destroy = prio_destroy, + .change = prio_tune, + .dump = prio_dump, + .owner = THIS_MODULE, +}; + static int __init prio_module_init(void) { - return register_qdisc(&prio_qdisc_ops); + int err; + + err = register_qdisc(&prio_qdisc_ops); + if (err < 0) + return err; + err = register_qdisc(&rr_qdisc_ops); + if (err < 0) + unregister_qdisc(&prio_qdisc_ops); + return err; } static void __exit prio_module_exit(void) { unregister_qdisc(&prio_qdisc_ops); + unregister_qdisc(&rr_qdisc_ops); } module_init(prio_module_init) module_exit(prio_module_exit) MODULE_LICENSE("GPL"); +MODULE_ALIAS("sch_rr"); -- cgit v1.2.3 From 61cbc2fca6335be52788773b21efdc52a2750924 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 30 Jun 2007 13:35:52 -0700 Subject: [NET]: Fix secondary unicast/multicast address count maintenance When a reference to an existing address is increased or decreased without hitting zero, the address count is incorrectly adjusted. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- net/core/dev.c | 21 ++++++++++----------- net/core/dev_mcast.c | 11 ++++------- 3 files changed, 16 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9817821729c4..8590d685d935 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1091,8 +1091,8 @@ extern int dev_unicast_add(struct net_device *dev, void *addr, int alen); extern int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all); extern int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly); extern void dev_mc_discard(struct net_device *dev); -extern int __dev_addr_delete(struct dev_addr_list **list, void *addr, int alen, int all); -extern int __dev_addr_add(struct dev_addr_list **list, void *addr, int alen, int newonly); +extern int __dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int all); +extern int __dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly); extern void __dev_addr_discard(struct dev_addr_list **list); extern void dev_set_promiscuity(struct net_device *dev, int inc); extern void dev_set_allmulti(struct net_device *dev, int inc); diff --git a/net/core/dev.c b/net/core/dev.c index 7ddf66d0ad5e..4221dcda88d7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2607,8 +2607,8 @@ void dev_set_rx_mode(struct net_device *dev) netif_tx_unlock_bh(dev); } -int __dev_addr_delete(struct dev_addr_list **list, void *addr, int alen, - int glbl) +int __dev_addr_delete(struct dev_addr_list **list, int *count, + void *addr, int alen, int glbl) { struct dev_addr_list *da; @@ -2626,13 +2626,15 @@ int __dev_addr_delete(struct dev_addr_list **list, void *addr, int alen, *list = da->next; kfree(da); + (*count)--; return 0; } } return -ENOENT; } -int __dev_addr_add(struct dev_addr_list **list, void *addr, int alen, int glbl) +int __dev_addr_add(struct dev_addr_list **list, int *count, + void *addr, int alen, int glbl) { struct dev_addr_list *da; @@ -2659,6 +2661,7 @@ int __dev_addr_add(struct dev_addr_list **list, void *addr, int alen, int glbl) da->da_gusers = glbl ? 1 : 0; da->next = *list; *list = da; + (*count)++; return 0; } @@ -2692,11 +2695,9 @@ int dev_unicast_delete(struct net_device *dev, void *addr, int alen) ASSERT_RTNL(); netif_tx_lock_bh(dev); - err = __dev_addr_delete(&dev->uc_list, addr, alen, 0); - if (!err) { - dev->uc_count--; + err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0); + if (!err) __dev_set_rx_mode(dev); - } netif_tx_unlock_bh(dev); return err; } @@ -2718,11 +2719,9 @@ int dev_unicast_add(struct net_device *dev, void *addr, int alen) ASSERT_RTNL(); netif_tx_lock_bh(dev); - err = __dev_addr_add(&dev->uc_list, addr, alen, 0); - if (!err) { - dev->uc_count++; + err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0); + if (!err) __dev_set_rx_mode(dev); - } netif_tx_unlock_bh(dev); return err; } diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index 5cc9b448c443..aa38100601fb 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -72,10 +72,9 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) int err; netif_tx_lock_bh(dev); - err = __dev_addr_delete(&dev->mc_list, addr, alen, glbl); + err = __dev_addr_delete(&dev->mc_list, &dev->mc_count, + addr, alen, glbl); if (!err) { - dev->mc_count--; - /* * We have altered the list, so the card * loaded filter is now wrong. Fix it @@ -96,11 +95,9 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) int err; netif_tx_lock_bh(dev); - err = __dev_addr_add(&dev->mc_list, addr, alen, glbl); - if (!err) { - dev->mc_count++; + err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl); + if (!err) __dev_set_rx_mode(dev); - } netif_tx_unlock_bh(dev); return err; } -- cgit v1.2.3 From d0410051164bbbc597e15f068b53c06a954ae0d4 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 2 Jul 2007 22:07:22 -0700 Subject: [TCP]: SACK fastpath did override adjusted fackets_out MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do same adjustment to SACK fastpath counters provided that they're valid. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 53232dd6fb48..20aea1595c4d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -699,6 +699,14 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss tp->fackets_out -= diff; if ((int)tp->fackets_out < 0) tp->fackets_out = 0; + /* SACK fastpath might overwrite it unless dealt with */ + if (tp->fastpath_skb_hint != NULL && + after(TCP_SKB_CB(tp->fastpath_skb_hint)->seq, + TCP_SKB_CB(skb)->seq)) { + tp->fastpath_cnt_hint -= diff; + if ((int)tp->fastpath_cnt_hint < 0) + tp->fastpath_cnt_hint = 0; + } } } -- cgit v1.2.3 From eef6caf8a916f32f8d9b2a02d4fa7674736c00ac Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 2 Jul 2007 22:36:38 -0700 Subject: [MAC80211]: Set low initial rate in rc80211_simple The initial rate for STA's using rc80211_simple is set to the last rate in the rate table. For situations for which the signal is weak, the rate may be too high for authentication and association. Although the rc80211_simple module will adjust the speed, the response may not be fast enough for a successful connection. This modification sets the initial rate to the lowest supported value. Signed-off-by: Larry Finger Signed-off-by: Johannes Berg Signed-off-by: John W. Linville Signed-off-by: David S. Miller --- net/mac80211/rc80211_simple.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/rc80211_simple.c b/net/mac80211/rc80211_simple.c index 2048cfd1ca70..5ae7fc454665 100644 --- a/net/mac80211/rc80211_simple.c +++ b/net/mac80211/rc80211_simple.c @@ -283,14 +283,16 @@ static void rate_control_simple_rate_init(void *priv, void *priv_sta, int i; sta->txrate = 0; mode = local->oper_hw_mode; - /* TODO: what is a good starting rate for STA? About middle? Maybe not - * the lowest or the highest rate.. Could consider using RSSI from - * previous packets? Need to have IEEE 802.1X auth succeed immediately - * after assoc.. */ + /* TODO: This routine should consider using RSSI from previous packets + * as we need to have IEEE 802.1X auth succeed immediately after assoc.. + * Until that method is implemented, we will use the lowest supported rate + * as a workaround, */ for (i = 0; i < mode->num_rates; i++) { if ((sta->supp_rates & BIT(i)) && - (mode->rates[i].flags & IEEE80211_RATE_SUPPORTED)) + (mode->rates[i].flags & IEEE80211_RATE_SUPPORTED)) { sta->txrate = i; + break; + } } } -- cgit v1.2.3 From 16dab72f65a6aab0aa72866e00c91b58a2794082 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 2 Jul 2007 22:39:50 -0700 Subject: [PKTGEN]: Centralize packet overhead tracking Track the extra packet overhead for VLAN tags, MPLS, IPSEC etc Signed-off-by: Jamal Hadi Salim Signed-off-by: Robert Olsson Signed-off-by: David S. Miller --- net/core/pktgen.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index dffe067e7a7b..9f0a780aa916 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -228,6 +228,7 @@ struct pktgen_dev { int min_pkt_size; /* = ETH_ZLEN; */ int max_pkt_size; /* = ETH_ZLEN; */ + int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */ int nfrags; __u32 delay_us; /* Default delay */ __u32 delay_ns; @@ -2075,6 +2076,13 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us) pkt_dev->idle_acc += now - start; } +static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) +{ + pkt_dev->pkt_overhead += pkt_dev->nr_labels*sizeof(u32); + pkt_dev->pkt_overhead += VLAN_TAG_SIZE(pkt_dev); + pkt_dev->pkt_overhead += SVLAN_TAG_SIZE(pkt_dev); +} + /* Increment/randomize headers according to flags and current values * for IP src/dest, UDP src/dst port, MAC-Addr src/dst */ @@ -2323,9 +2331,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, datalen = (odev->hard_header_len + 16) & ~0xf; skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + datalen + - pkt_dev->nr_labels*sizeof(u32) + - VLAN_TAG_SIZE(pkt_dev) + SVLAN_TAG_SIZE(pkt_dev), - GFP_ATOMIC); + pkt_dev->pkt_overhead, GFP_ATOMIC); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; @@ -2368,7 +2374,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 - - pkt_dev->nr_labels*sizeof(u32) - VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev); + pkt_dev->pkt_overhead; if (datalen < sizeof(struct pktgen_hdr)) datalen = sizeof(struct pktgen_hdr); @@ -2391,8 +2397,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, iph->check = ip_fast_csum((void *)iph, iph->ihl); skb->protocol = protocol; skb->mac_header = (skb->network_header - ETH_HLEN - - pkt_dev->nr_labels * sizeof(u32) - - VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev)); + pkt_dev->pkt_overhead); skb->dev = odev; skb->pkt_type = PACKET_HOST; @@ -2662,9 +2667,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, mod_cur_headers(pkt_dev); skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16 + - pkt_dev->nr_labels*sizeof(u32) + - VLAN_TAG_SIZE(pkt_dev) + SVLAN_TAG_SIZE(pkt_dev), - GFP_ATOMIC); + pkt_dev->pkt_overhead, GFP_ATOMIC); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; @@ -2708,7 +2711,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - sizeof(struct ipv6hdr) - sizeof(struct udphdr) - - pkt_dev->nr_labels*sizeof(u32) - VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev); + pkt_dev->pkt_overhead; if (datalen < sizeof(struct pktgen_hdr)) { datalen = sizeof(struct pktgen_hdr); @@ -2738,8 +2741,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr); skb->mac_header = (skb->network_header - ETH_HLEN - - pkt_dev->nr_labels * sizeof(u32) - - VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev)); + pkt_dev->pkt_overhead); skb->protocol = protocol; skb->dev = odev; skb->pkt_type = PACKET_HOST; @@ -2857,6 +2859,7 @@ static void pktgen_run(struct pktgen_thread *t) pkt_dev->started_at = getCurUs(); pkt_dev->next_tx_us = getCurUs(); /* Transmit immediately */ pkt_dev->next_tx_ns = 0; + set_pkt_overhead(pkt_dev); strcpy(pkt_dev->result, "Starting"); started++; -- cgit v1.2.3 From 007a531b0a0c902392a3deff730acd28ce6625c7 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 2 Jul 2007 22:40:36 -0700 Subject: [PKTGEN]: Introduce sequential flows By default all flows in pktgen are randomly selected. This patch introduces ability to have all defined flows to be sent sequentially. Robert defined randomness to be the default behavior. Signed-off-by: Jamal Hadi Salim Signed-off-by: Robert Olsson Signed-off-by: David S. Miller --- net/core/pktgen.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 52 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 9f0a780aa916..683da7065886 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -181,6 +181,7 @@ #define F_MPLS_RND (1<<8) /* Random MPLS labels */ #define F_VID_RND (1<<9) /* Random VLAN ID */ #define F_SVID_RND (1<<10) /* Random SVLAN ID */ +#define F_FLOW_SEQ (1<<11) /* Sequential flows */ /* Thread control flag bits */ #define T_TERMINATE (1<<0) @@ -207,8 +208,12 @@ static struct proc_dir_entry *pg_proc_dir = NULL; struct flow_state { __be32 cur_daddr; int count; + __u32 flags; }; +/* flow flag bits */ +#define F_INIT (1<<0) /* flow has been initialized */ + struct pktgen_dev { /* * Try to keep frequent/infrequent used vars. separated. @@ -342,6 +347,7 @@ struct pktgen_dev { unsigned cflows; /* Concurrent flows (config) */ unsigned lflow; /* Flow length (config) */ unsigned nflows; /* accumulated flows (stats) */ + unsigned curfl; /* current sequenced flow (state)*/ char result[512]; }; @@ -691,6 +697,13 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->flags & F_MPLS_RND) seq_printf(seq, "MPLS_RND "); + if (pkt_dev->cflows) { + if (pkt_dev->flags & F_FLOW_SEQ) + seq_printf(seq, "FLOW_SEQ "); /*in sequence flows*/ + else + seq_printf(seq, "FLOW_RND "); + } + if (pkt_dev->flags & F_MACSRC_RND) seq_printf(seq, "MACSRC_RND "); @@ -1182,6 +1195,9 @@ static ssize_t pktgen_if_write(struct file *file, else if (strcmp(f, "!SVID_RND") == 0) pkt_dev->flags &= ~F_SVID_RND; + else if (strcmp(f, "FLOW_SEQ") == 0) + pkt_dev->flags |= F_FLOW_SEQ; + else if (strcmp(f, "!IPV6") == 0) pkt_dev->flags &= ~F_IPV6; @@ -1190,7 +1206,7 @@ static ssize_t pktgen_if_write(struct file *file, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", f, "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, " - "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND\n"); + "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ\n"); return count; } sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); @@ -2083,6 +2099,37 @@ static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) pkt_dev->pkt_overhead += SVLAN_TAG_SIZE(pkt_dev); } +static inline int f_seen(struct pktgen_dev *pkt_dev, int flow) +{ + + if (pkt_dev->flows[flow].flags & F_INIT) + return 1; + else + return 0; +} + +static inline int f_pick(struct pktgen_dev *pkt_dev) +{ + int flow = pkt_dev->curfl; + + if (pkt_dev->flags & F_FLOW_SEQ) { + if (pkt_dev->flows[flow].count >= pkt_dev->lflow) { + /* reset time */ + pkt_dev->flows[flow].count = 0; + pkt_dev->curfl += 1; + if (pkt_dev->curfl >= pkt_dev->cflows) + pkt_dev->curfl = 0; /*reset */ + } + } else { + flow = random32() % pkt_dev->cflows; + + if (pkt_dev->flows[flow].count > pkt_dev->lflow) + pkt_dev->flows[flow].count = 0; + } + + return pkt_dev->curfl; +} + /* Increment/randomize headers according to flags and current values * for IP src/dest, UDP src/dst port, MAC-Addr src/dst */ @@ -2092,12 +2139,8 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) __u32 imx; int flow = 0; - if (pkt_dev->cflows) { - flow = random32() % pkt_dev->cflows; - - if (pkt_dev->flows[flow].count > pkt_dev->lflow) - pkt_dev->flows[flow].count = 0; - } + if (pkt_dev->cflows) + flow = f_pick(pkt_dev); /* Deal with source MAC */ if (pkt_dev->src_mac_count > 1) { @@ -2213,7 +2256,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->cur_saddr = htonl(t); } - if (pkt_dev->cflows && pkt_dev->flows[flow].count != 0) { + if (pkt_dev->cflows && f_seen(pkt_dev, flow)) { pkt_dev->cur_daddr = pkt_dev->flows[flow].cur_daddr; } else { imn = ntohl(pkt_dev->daddr_min); @@ -2243,6 +2286,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) } } if (pkt_dev->cflows) { + pkt_dev->flows[flow].flags |= F_INIT; pkt_dev->flows[flow].cur_daddr = pkt_dev->cur_daddr; pkt_dev->nflows++; -- cgit v1.2.3 From 628529b6ee334fedc8d25ce56205bb99566572b9 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 2 Jul 2007 22:41:14 -0700 Subject: [XFRM] Introduce standalone SAD lookup This allows other in-kernel functions to do SAD lookups. The only known user at the moment is pktgen. Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/xfrm.h | 4 ++++ net/xfrm/xfrm_state.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index ee3827f053d7..d3a898b18fb4 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -928,6 +928,10 @@ extern struct xfrm_state *xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, unsigned short family); +extern struct xfrm_state * xfrm_stateonly_find(xfrm_address_t *daddr, + xfrm_address_t *saddr, + unsigned short family, + u8 mode, u8 proto, u32 reqid); extern int xfrm_state_check_expire(struct xfrm_state *x); extern void xfrm_state_insert(struct xfrm_state *x); extern int xfrm_state_add(struct xfrm_state *x); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index dfacb9c2a6e3..e070c3f938fb 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -686,6 +686,37 @@ out: return x; } +struct xfrm_state * +xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr, + unsigned short family, u8 mode, u8 proto, u32 reqid) +{ + unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family); + struct xfrm_state *rx = NULL, *x = NULL; + struct hlist_node *entry; + + spin_lock(&xfrm_state_lock); + hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + if (x->props.family == family && + x->props.reqid == reqid && + !(x->props.flags & XFRM_STATE_WILDRECV) && + xfrm_state_addr_check(x, daddr, saddr, family) && + mode == x->props.mode && + proto == x->id.proto && + x->km.state == XFRM_STATE_VALID) { + rx = x; + break; + } + } + + if (rx) + xfrm_state_hold(rx); + spin_unlock(&xfrm_state_lock); + + + return rx; +} +EXPORT_SYMBOL(xfrm_stateonly_find); + static void __xfrm_state_insert(struct xfrm_state *x) { unsigned int h; -- cgit v1.2.3 From a553e4a6317b2cfc7659542c10fe43184ffe53da Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 2 Jul 2007 22:41:59 -0700 Subject: [PKTGEN]: IPSEC support Added transport mode ESP support for starters. I will send more of these modes and types once i have resolved the tunnel mode isses. Signed-off-by: Jamal Hadi Salim Signed-off-by: Robert Olsson Signed-off-by: David S. Miller --- net/core/pktgen.c | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 152 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 683da7065886..75215331b045 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -152,6 +152,9 @@ #include #include #include +#ifdef CONFIG_XFRM +#include +#endif #include #include #include @@ -182,6 +185,7 @@ #define F_VID_RND (1<<9) /* Random VLAN ID */ #define F_SVID_RND (1<<10) /* Random SVLAN ID */ #define F_FLOW_SEQ (1<<11) /* Sequential flows */ +#define F_IPSEC_ON (1<<12) /* ipsec on for flows */ /* Thread control flag bits */ #define T_TERMINATE (1<<0) @@ -208,6 +212,9 @@ static struct proc_dir_entry *pg_proc_dir = NULL; struct flow_state { __be32 cur_daddr; int count; +#ifdef CONFIG_XFRM + struct xfrm_state *x; +#endif __u32 flags; }; @@ -348,7 +355,10 @@ struct pktgen_dev { unsigned lflow; /* Flow length (config) */ unsigned nflows; /* accumulated flows (stats) */ unsigned curfl; /* current sequenced flow (state)*/ - +#ifdef CONFIG_XFRM + __u8 ipsmode; /* IPSEC mode (config) */ + __u8 ipsproto; /* IPSEC type (config) */ +#endif char result[512]; }; @@ -704,6 +714,11 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_printf(seq, "FLOW_RND "); } +#ifdef CONFIG_XFRM + if (pkt_dev->flags & F_IPSEC_ON) + seq_printf(seq, "IPSEC "); +#endif + if (pkt_dev->flags & F_MACSRC_RND) seq_printf(seq, "MACSRC_RND "); @@ -1198,6 +1213,11 @@ static ssize_t pktgen_if_write(struct file *file, else if (strcmp(f, "FLOW_SEQ") == 0) pkt_dev->flags |= F_FLOW_SEQ; +#ifdef CONFIG_XFRM + else if (strcmp(f, "IPSEC") == 0) + pkt_dev->flags |= F_IPSEC_ON; +#endif + else if (strcmp(f, "!IPV6") == 0) pkt_dev->flags &= ~F_IPV6; @@ -1206,7 +1226,7 @@ static ssize_t pktgen_if_write(struct file *file, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", f, "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, " - "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ\n"); + "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC\n"); return count; } sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); @@ -2094,6 +2114,7 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us) static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) { + pkt_dev->pkt_overhead = 0; pkt_dev->pkt_overhead += pkt_dev->nr_labels*sizeof(u32); pkt_dev->pkt_overhead += VLAN_TAG_SIZE(pkt_dev); pkt_dev->pkt_overhead += SVLAN_TAG_SIZE(pkt_dev); @@ -2130,6 +2151,31 @@ static inline int f_pick(struct pktgen_dev *pkt_dev) return pkt_dev->curfl; } + +#ifdef CONFIG_XFRM +/* If there was already an IPSEC SA, we keep it as is, else + * we go look for it ... +*/ +inline +void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) +{ + struct xfrm_state *x = pkt_dev->flows[flow].x; + if (!x) { + /*slow path: we dont already have xfrm_state*/ + x = xfrm_stateonly_find((xfrm_address_t *)&pkt_dev->cur_daddr, + (xfrm_address_t *)&pkt_dev->cur_saddr, + AF_INET, + pkt_dev->ipsmode, + pkt_dev->ipsproto, 0); + if (x) { + pkt_dev->flows[flow].x = x; + set_pkt_overhead(pkt_dev); + pkt_dev->pkt_overhead+=x->props.header_len; + } + + } +} +#endif /* Increment/randomize headers according to flags and current values * for IP src/dest, UDP src/dst port, MAC-Addr src/dst */ @@ -2289,6 +2335,10 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->flows[flow].flags |= F_INIT; pkt_dev->flows[flow].cur_daddr = pkt_dev->cur_daddr; +#ifdef CONFIG_XFRM + if (pkt_dev->flags & F_IPSEC_ON) + get_ipsec_sa(pkt_dev, flow); +#endif pkt_dev->nflows++; } } @@ -2329,6 +2379,91 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->flows[flow].count++; } + +#ifdef CONFIG_XFRM +static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) +{ + struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; + int err = 0; + struct iphdr *iph; + + if (!x) + return 0; + /* XXX: we dont support tunnel mode for now until + * we resolve the dst issue */ + if (x->props.mode != XFRM_MODE_TRANSPORT) + return 0; + + spin_lock(&x->lock); + iph = ip_hdr(skb); + + err = x->mode->output(x, skb); + if (err) + goto error; + err = x->type->output(x, skb); + if (err) + goto error; + + x->curlft.bytes +=skb->len; + x->curlft.packets++; + spin_unlock(&x->lock); + +error: + spin_unlock(&x->lock); + return err; +} + +static inline void free_SAs(struct pktgen_dev *pkt_dev) +{ + if (pkt_dev->cflows) { + /* let go of the SAs if we have them */ + int i = 0; + for (; i < pkt_dev->nflows; i++){ + struct xfrm_state *x = pkt_dev->flows[i].x; + if (x) { + xfrm_state_put(x); + pkt_dev->flows[i].x = NULL; + } + } + } +} + +static inline int process_ipsec(struct pktgen_dev *pkt_dev, + struct sk_buff *skb, __be16 protocol) +{ + if (pkt_dev->flags & F_IPSEC_ON) { + struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; + int nhead = 0; + if (x) { + int ret; + __u8 *eth; + nhead = x->props.header_len - skb_headroom(skb); + if (nhead >0) { + ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); + if (ret < 0) { + printk("Error expanding ipsec packet %d\n",ret); + return 0; + } + } + + /* ipsec is not expecting ll header */ + skb_pull(skb, ETH_HLEN); + ret = pktgen_output_ipsec(skb, pkt_dev); + if (ret) { + printk("Error creating ipsec packet %d\n",ret); + kfree_skb(skb); + return 0; + } + /* restore ll */ + eth = (__u8 *) skb_push(skb, ETH_HLEN); + memcpy(eth, pkt_dev->hh, 12); + *(u16 *) & eth[12] = protocol; + } + } + return 1; +} +#endif + static void mpls_push(__be32 *mpls, struct pktgen_dev *pkt_dev) { unsigned i; @@ -2512,6 +2647,11 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, pgh->tv_usec = htonl(timestamp.tv_usec); } +#ifdef CONFIG_XFRM + if (!process_ipsec(pkt_dev, skb, protocol)) + return NULL; +#endif + return skb; } @@ -3497,11 +3637,18 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) } pkt_dev->entry->proc_fops = &pktgen_if_fops; pkt_dev->entry->data = pkt_dev; +#ifdef CONFIG_XFRM + pkt_dev->ipsmode = XFRM_MODE_TRANSPORT; + pkt_dev->ipsproto = IPPROTO_ESP; +#endif return add_dev_to_thread(t, pkt_dev); out2: dev_put(pkt_dev->odev); out1: +#ifdef CONFIG_XFRM + free_SAs(pkt_dev); +#endif if (pkt_dev->flows) vfree(pkt_dev->flows); kfree(pkt_dev); @@ -3596,6 +3743,9 @@ static int pktgen_remove_device(struct pktgen_thread *t, if (pkt_dev->entry) remove_proc_entry(pkt_dev->entry->name, pg_proc_dir); +#ifdef CONFIG_XFRM + free_SAs(pkt_dev); +#endif if (pkt_dev->flows) vfree(pkt_dev->flows); kfree(pkt_dev); -- cgit v1.2.3 From 876d48aabf30e4981653f1a0a7ae1e262b8c8b6f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 2 Jul 2007 22:46:07 -0700 Subject: [NET_SCHED]: Remove CONFIG_NET_ESTIMATOR option The generic estimator is always built in anways and all the config options does is prevent including a minimal amount of code for setting it up. Additionally the option is already automatically selected for most cases. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/Kconfig | 12 ------------ net/sched/act_api.c | 6 ------ net/sched/act_police.c | 18 ------------------ net/sched/sch_api.c | 6 ------ net/sched/sch_cbq.c | 8 -------- net/sched/sch_generic.c | 2 -- net/sched/sch_hfsc.c | 8 -------- 7 files changed, 60 deletions(-) (limited to 'net') diff --git a/net/sched/Kconfig b/net/sched/Kconfig index f3217942ca87..b4662888bdbd 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -286,7 +286,6 @@ config CLS_U32_MARK config NET_CLS_RSVP tristate "IPv4 Resource Reservation Protocol (RSVP)" select NET_CLS - select NET_ESTIMATOR ---help--- The Resource Reservation Protocol (RSVP) permits end systems to request a minimum and maximum data flow rate for a connection; this @@ -301,7 +300,6 @@ config NET_CLS_RSVP config NET_CLS_RSVP6 tristate "IPv6 Resource Reservation Protocol (RSVP6)" select NET_CLS - select NET_ESTIMATOR ---help--- The Resource Reservation Protocol (RSVP) permits end systems to request a minimum and maximum data flow rate for a connection; this @@ -393,7 +391,6 @@ config NET_EMATCH_TEXT config NET_CLS_ACT bool "Actions" - select NET_ESTIMATOR ---help--- Say Y here if you want to use traffic control actions. Actions get attached to classifiers and are invoked after a successful @@ -476,7 +473,6 @@ config NET_ACT_SIMP config NET_CLS_POLICE bool "Traffic Policing (obsolete)" depends on NET_CLS_ACT!=y - select NET_ESTIMATOR ---help--- Say Y here if you want to do traffic policing, i.e. strict bandwidth limiting. This option is obsoleted by the traffic @@ -491,14 +487,6 @@ config NET_CLS_IND classification based on the incoming device. This option is likely to disappear in favour of the metadata ematch. -config NET_ESTIMATOR - bool "Rate estimator" - ---help--- - Say Y here to allow using rate estimators to estimate the current - rate-of-flow for network devices, queues, etc. This module is - automatically selected if needed but can be selected manually for - statistical purposes. - endif # NET_SCHED endmenu diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 711dd26c95c3..72bb9bd1a22a 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -42,10 +42,8 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) write_lock_bh(hinfo->lock); *p1p = p->tcfc_next; write_unlock_bh(hinfo->lock); -#ifdef CONFIG_NET_ESTIMATOR gen_kill_estimator(&p->tcfc_bstats, &p->tcfc_rate_est); -#endif kfree(p); return; } @@ -236,11 +234,9 @@ struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, struct tc_acti p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo); p->tcfc_tm.install = jiffies; p->tcfc_tm.lastuse = jiffies; -#ifdef CONFIG_NET_ESTIMATOR if (est) gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est, p->tcfc_stats_lock, est); -#endif a->priv = (void *) p; return p; } @@ -614,9 +610,7 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, goto errout; if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 || -#ifdef CONFIG_NET_ESTIMATOR gnet_stats_copy_rate_est(&d, &h->tcf_rate_est) < 0 || -#endif gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0) goto errout; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 616f465f407e..580698db578a 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -118,10 +118,8 @@ void tcf_police_destroy(struct tcf_police *p) write_lock_bh(&police_lock); *p1p = p->tcf_next; write_unlock_bh(&police_lock); -#ifdef CONFIG_NET_ESTIMATOR gen_kill_estimator(&p->tcf_bstats, &p->tcf_rate_est); -#endif if (p->tcfp_R_tab) qdisc_put_rtab(p->tcfp_R_tab); if (p->tcfp_P_tab) @@ -227,7 +225,6 @@ override: police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); police->tcf_action = parm->action; -#ifdef CONFIG_NET_ESTIMATOR if (tb[TCA_POLICE_AVRATE-1]) police->tcfp_ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); @@ -235,7 +232,6 @@ override: gen_replace_estimator(&police->tcf_bstats, &police->tcf_rate_est, police->tcf_stats_lock, est); -#endif spin_unlock_bh(&police->tcf_lock); if (ret != ACT_P_CREATED) @@ -281,14 +277,12 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, police->tcf_bstats.bytes += skb->len; police->tcf_bstats.packets++; -#ifdef CONFIG_NET_ESTIMATOR if (police->tcfp_ewma_rate && police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { police->tcf_qstats.overlimits++; spin_unlock(&police->tcf_lock); return police->tcf_action; } -#endif if (skb->len <= police->tcfp_mtu) { if (police->tcfp_R_tab == NULL) { @@ -348,10 +342,8 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) if (police->tcfp_result) RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &police->tcfp_result); -#ifdef CONFIG_NET_ESTIMATOR if (police->tcfp_ewma_rate) RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); -#endif return skb->len; rtattr_failure: @@ -477,14 +469,12 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) goto failure; police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); } -#ifdef CONFIG_NET_ESTIMATOR if (tb[TCA_POLICE_AVRATE-1]) { if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32)) goto failure; police->tcfp_ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); } -#endif police->tcfp_toks = police->tcfp_burst = parm->burst; police->tcfp_mtu = parm->mtu; if (police->tcfp_mtu == 0) { @@ -498,11 +488,9 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) police->tcf_index = parm->index ? parm->index : tcf_police_new_index(); police->tcf_action = parm->action; -#ifdef CONFIG_NET_ESTIMATOR if (est) gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est, police->tcf_stats_lock, est); -#endif h = tcf_hash(police->tcf_index, POL_TAB_MASK); write_lock_bh(&police_lock); police->tcf_next = tcf_police_ht[h]; @@ -528,14 +516,12 @@ int tcf_police(struct sk_buff *skb, struct tcf_police *police) police->tcf_bstats.bytes += skb->len; police->tcf_bstats.packets++; -#ifdef CONFIG_NET_ESTIMATOR if (police->tcfp_ewma_rate && police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { police->tcf_qstats.overlimits++; spin_unlock(&police->tcf_lock); return police->tcf_action; } -#endif if (skb->len <= police->tcfp_mtu) { if (police->tcfp_R_tab == NULL) { spin_unlock(&police->tcf_lock); @@ -591,10 +577,8 @@ int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police) if (police->tcfp_result) RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &police->tcfp_result); -#ifdef CONFIG_NET_ESTIMATOR if (police->tcfp_ewma_rate) RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); -#endif return skb->len; rtattr_failure: @@ -612,9 +596,7 @@ int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police) goto errout; if (gnet_stats_copy_basic(&d, &police->tcf_bstats) < 0 || -#ifdef CONFIG_NET_ESTIMATOR gnet_stats_copy_rate_est(&d, &police->tcf_rate_est) < 0 || -#endif gnet_stats_copy_queue(&d, &police->tcf_qstats) < 0) goto errout; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index bec600af03ca..0f9e1c71746a 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -515,7 +515,6 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) sch->handle = handle; if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) { -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) { err = gen_new_estimator(&sch->bstats, &sch->rate_est, sch->stats_lock, @@ -531,7 +530,6 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) goto err_out3; } } -#endif qdisc_lock_tree(dev); list_add_tail(&sch->list, &dev->qdisc_list); qdisc_unlock_tree(dev); @@ -559,11 +557,9 @@ static int qdisc_change(struct Qdisc *sch, struct rtattr **tca) if (err) return err; } -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) gen_replace_estimator(&sch->bstats, &sch->rate_est, sch->stats_lock, tca[TCA_RATE-1]); -#endif return 0; } @@ -839,9 +835,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, goto rtattr_failure; if (gnet_stats_copy_basic(&d, &q->bstats) < 0 || -#ifdef CONFIG_NET_ESTIMATOR gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || -#endif gnet_stats_copy_queue(&d, &q->qstats) < 0) goto rtattr_failure; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index ee2d5967d109..bf1ea9e75cd9 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1653,9 +1653,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, cl->xstats.undertime = cl->undertime - q->now; if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || -#ifdef CONFIG_NET_ESTIMATOR gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || -#endif gnet_stats_copy_queue(d, &cl->qstats) < 0) return -1; @@ -1726,9 +1724,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) tcf_destroy_chain(cl->filter_list); qdisc_destroy(cl->q); qdisc_put_rtab(cl->R_tab); -#ifdef CONFIG_NET_ESTIMATOR gen_kill_estimator(&cl->bstats, &cl->rate_est); -#endif if (cl != &q->link) kfree(cl); } @@ -1873,11 +1869,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t sch_tree_unlock(sch); -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) gen_replace_estimator(&cl->bstats, &cl->rate_est, cl->stats_lock, tca[TCA_RATE-1]); -#endif return 0; } @@ -1963,11 +1957,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1])); sch_tree_unlock(sch); -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) gen_new_estimator(&cl->bstats, &cl->rate_est, cl->stats_lock, tca[TCA_RATE-1]); -#endif *arg = (unsigned long)cl; return 0; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 2488dbb17b60..e525fd723c12 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -514,9 +514,7 @@ void qdisc_destroy(struct Qdisc *qdisc) return; list_del(&qdisc->list); -#ifdef CONFIG_NET_ESTIMATOR gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); -#endif if (ops->reset) ops->reset(qdisc); if (ops->destroy) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 9d124c4ee3a7..7ccdf63a0cb5 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1054,11 +1054,9 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } sch_tree_unlock(sch); -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) gen_replace_estimator(&cl->bstats, &cl->rate_est, cl->stats_lock, tca[TCA_RATE-1]); -#endif return 0; } @@ -1112,11 +1110,9 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->cl_pcvtoff = parent->cl_cvtoff; sch_tree_unlock(sch); -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) gen_new_estimator(&cl->bstats, &cl->rate_est, cl->stats_lock, tca[TCA_RATE-1]); -#endif *arg = (unsigned long)cl; return 0; } @@ -1128,9 +1124,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) tcf_destroy_chain(cl->filter_list); qdisc_destroy(cl->qdisc); -#ifdef CONFIG_NET_ESTIMATOR gen_kill_estimator(&cl->bstats, &cl->rate_est); -#endif if (cl != &q->root) kfree(cl); } @@ -1384,9 +1378,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, xstats.rtwork = cl->cl_cumul; if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || -#ifdef CONFIG_NET_ESTIMATOR gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || -#endif gnet_stats_copy_queue(d, &cl->qstats) < 0) return -1; -- cgit v1.2.3 From 4bdf39911e7a887c4499161422423cbaf16684e8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 2 Jul 2007 22:47:37 -0700 Subject: [NET_SCHED]: Remove unnecessary stats_lock pointers Remove stats_lock pointers from qdisc-internal structures, in all cases it points to dev->queue_lock. The only case where it is necessary is for top-level qdiscs, where it might also point to dev->ingress_lock in case of the ingress qdisc. Also remove it from actions completely, it always points to the actions internal lock. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/act_api.h | 2 -- net/sched/act_api.c | 7 +++---- net/sched/act_police.c | 8 +++----- net/sched/sch_atm.c | 1 - net/sched/sch_cbq.c | 8 +++----- net/sched/sch_hfsc.c | 10 +++------- 6 files changed, 12 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 8b06c2f3657f..2f0273feabd3 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -19,7 +19,6 @@ struct tcf_common { struct gnet_stats_basic tcfc_bstats; struct gnet_stats_queue tcfc_qstats; struct gnet_stats_rate_est tcfc_rate_est; - spinlock_t *tcfc_stats_lock; spinlock_t tcfc_lock; }; #define tcf_next common.tcfc_next @@ -32,7 +31,6 @@ struct tcf_common { #define tcf_bstats common.tcfc_bstats #define tcf_qstats common.tcfc_qstats #define tcf_rate_est common.tcfc_rate_est -#define tcf_stats_lock common.tcfc_stats_lock #define tcf_lock common.tcfc_lock struct tcf_police { diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 72bb9bd1a22a..32cc191d9f90 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -230,13 +230,12 @@ struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, struct tc_acti p->tcfc_bindcnt = 1; spin_lock_init(&p->tcfc_lock); - p->tcfc_stats_lock = &p->tcfc_lock; p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo); p->tcfc_tm.install = jiffies; p->tcfc_tm.lastuse = jiffies; if (est) gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est, - p->tcfc_stats_lock, est); + &p->tcfc_lock, est); a->priv = (void *) p; return p; } @@ -595,12 +594,12 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, if (compat_mode) { if (a->type == TCA_OLD_COMPAT) err = gnet_stats_start_copy_compat(skb, 0, - TCA_STATS, TCA_XSTATS, h->tcf_stats_lock, &d); + TCA_STATS, TCA_XSTATS, &h->tcf_lock, &d); else return 0; } else err = gnet_stats_start_copy(skb, TCA_ACT_STATS, - h->tcf_stats_lock, &d); + &h->tcf_lock, &d); if (err < 0) goto errout; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 580698db578a..3e8716dd738c 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -183,7 +183,6 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, ret = ACT_P_CREATED; police->tcf_refcnt = 1; spin_lock_init(&police->tcf_lock); - police->tcf_stats_lock = &police->tcf_lock; if (bind) police->tcf_bindcnt = 1; override: @@ -231,7 +230,7 @@ override: if (est) gen_replace_estimator(&police->tcf_bstats, &police->tcf_rate_est, - police->tcf_stats_lock, est); + &police->tcf_lock, est); spin_unlock_bh(&police->tcf_lock); if (ret != ACT_P_CREATED) @@ -450,7 +449,6 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) police->tcf_refcnt = 1; spin_lock_init(&police->tcf_lock); - police->tcf_stats_lock = &police->tcf_lock; if (parm->rate.rate) { police->tcfp_R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); @@ -490,7 +488,7 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) police->tcf_action = parm->action; if (est) gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est, - police->tcf_stats_lock, est); + &police->tcf_lock, est); h = tcf_hash(police->tcf_index, POL_TAB_MASK); write_lock_bh(&police_lock); police->tcf_next = tcf_police_ht[h]; @@ -591,7 +589,7 @@ int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police) struct gnet_dump d; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, police->tcf_stats_lock, + TCA_XSTATS, &police->tcf_lock, &d) < 0) goto errout; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index d1c383fca82c..16fe802a66f7 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -71,7 +71,6 @@ struct atm_flow_data { int ref; /* reference count */ struct gnet_stats_basic bstats; struct gnet_stats_queue qstats; - spinlock_t *stats_lock; struct atm_flow_data *next; struct atm_flow_data *excess; /* flow for excess traffic; NULL to set CLP instead */ diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index bf1ea9e75cd9..b093d8fce789 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -148,7 +148,6 @@ struct cbq_class struct gnet_stats_basic bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; - spinlock_t *stats_lock; struct tc_cbq_xstats xstats; struct tcf_proto *filter_list; @@ -1442,7 +1441,6 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt) q->link.ewma_log = TC_CBQ_DEF_EWMA; q->link.avpkt = q->link.allot/2; q->link.minidle = -0x7FFFFFFF; - q->link.stats_lock = &sch->dev->queue_lock; qdisc_watchdog_init(&q->watchdog, sch); hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); @@ -1871,7 +1869,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t if (tca[TCA_RATE-1]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - cl->stats_lock, tca[TCA_RATE-1]); + &sch->dev->queue_lock, + tca[TCA_RATE-1]); return 0; } @@ -1929,7 +1928,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t cl->allot = parent->allot; cl->quantum = cl->allot; cl->weight = cl->R_tab->rate.rate; - cl->stats_lock = &sch->dev->queue_lock; sch_tree_lock(sch); cbq_link_class(cl); @@ -1959,7 +1957,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t if (tca[TCA_RATE-1]) gen_new_estimator(&cl->bstats, &cl->rate_est, - cl->stats_lock, tca[TCA_RATE-1]); + &sch->dev->queue_lock, tca[TCA_RATE-1]); *arg = (unsigned long)cl; return 0; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 7ccdf63a0cb5..7130a2441b0d 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -122,7 +122,6 @@ struct hfsc_class struct gnet_stats_basic bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; - spinlock_t *stats_lock; unsigned int level; /* class level in hierarchy */ struct tcf_proto *filter_list; /* filter list */ unsigned int filter_cnt; /* filter count */ @@ -1056,7 +1055,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE-1]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - cl->stats_lock, tca[TCA_RATE-1]); + &sch->dev->queue_lock, + tca[TCA_RATE-1]); return 0; } @@ -1096,7 +1096,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; - cl->stats_lock = &sch->dev->queue_lock; INIT_LIST_HEAD(&cl->children); cl->vt_tree = RB_ROOT; cl->cf_tree = RB_ROOT; @@ -1112,7 +1111,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE-1]) gen_new_estimator(&cl->bstats, &cl->rate_est, - cl->stats_lock, tca[TCA_RATE-1]); + &sch->dev->queue_lock, tca[TCA_RATE-1]); *arg = (unsigned long)cl; return 0; } @@ -1440,8 +1439,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt) return -EINVAL; qopt = RTA_DATA(opt); - sch->stats_lock = &sch->dev->queue_lock; - q->defcls = qopt->defcls; for (i = 0; i < HFSC_HSIZE; i++) INIT_LIST_HEAD(&q->clhash[i]); @@ -1456,7 +1453,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt) sch->handle); if (q->root.qdisc == NULL) q->root.qdisc = &noop_qdisc; - q->root.stats_lock = &sch->dev->queue_lock; INIT_LIST_HEAD(&q->root.children); q->root.vt_tree = RB_ROOT; q->root.cf_tree = RB_ROOT; -- cgit v1.2.3 From ee39e10c27ca5293c72addb95bff864095e19904 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 2 Jul 2007 22:48:13 -0700 Subject: [NET_SCHED]: sch_htb: use generic estimator Use the generic estimator instead of reimplementing (parts of) it. For compatibility always create a default estimator for new classes. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 85 +++++++++++++++-------------------------------------- 1 file changed, 24 insertions(+), 61 deletions(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 035788c5b7f8..26f81b848bfd 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -69,8 +69,6 @@ */ #define HTB_HSIZE 16 /* classid hash size */ -#define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */ -#define HTB_RATECM 1 /* whether to use rate computer */ #define HTB_HYSTERESIS 1 /* whether to use mode hysteresis for speedup */ #define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ @@ -95,12 +93,6 @@ struct htb_class { struct tc_htb_xstats xstats; /* our special stats */ int refcnt; /* usage count of this class */ -#ifdef HTB_RATECM - /* rate measurement counters */ - unsigned long rate_bytes, sum_bytes; - unsigned long rate_packets, sum_packets; -#endif - /* topology */ int level; /* our level (see above) */ struct htb_class *parent; /* parent class */ @@ -194,10 +186,6 @@ struct htb_sched { int rate2quantum; /* quant = rate / rate2quantum */ psched_time_t now; /* cached dequeue time */ struct qdisc_watchdog watchdog; -#ifdef HTB_RATECM - struct timer_list rttim; /* rate computer timer */ - int recmp_bucket; /* which hash bucket to recompute next */ -#endif /* non shaped skbs; let them go directly thru */ struct sk_buff_head direct_queue; @@ -677,34 +665,6 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_SUCCESS; } -#ifdef HTB_RATECM -#define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0 -static void htb_rate_timer(unsigned long arg) -{ - struct Qdisc *sch = (struct Qdisc *)arg; - struct htb_sched *q = qdisc_priv(sch); - struct hlist_node *p; - struct htb_class *cl; - - - /* lock queue so that we can muck with it */ - spin_lock_bh(&sch->dev->queue_lock); - - q->rttim.expires = jiffies + HZ; - add_timer(&q->rttim); - - /* scan and recompute one bucket at time */ - if (++q->recmp_bucket >= HTB_HSIZE) - q->recmp_bucket = 0; - - hlist_for_each_entry(cl,p, q->hash + q->recmp_bucket, hlist) { - RT_GEN(cl->sum_bytes, cl->rate_bytes); - RT_GEN(cl->sum_packets, cl->rate_packets); - } - spin_unlock_bh(&sch->dev->queue_lock); -} -#endif - /** * htb_charge_class - charges amount "bytes" to leaf and ancestors * @@ -750,11 +710,6 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, if (cl->cmode != HTB_CAN_SEND) htb_add_to_wait_tree(q, cl, diff); } -#ifdef HTB_RATECM - /* update rate counters */ - cl->sum_bytes += bytes; - cl->sum_packets++; -#endif /* update byte stats except for leaves which are already updated */ if (cl->level) { @@ -1095,13 +1050,6 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt) if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ q->direct_qlen = 2; -#ifdef HTB_RATECM - init_timer(&q->rttim); - q->rttim.function = htb_rate_timer; - q->rttim.data = (unsigned long)sch; - q->rttim.expires = jiffies + HZ; - add_timer(&q->rttim); -#endif if ((q->rate2quantum = gopt->rate2quantum) < 1) q->rate2quantum = 1; q->defcls = gopt->defcls; @@ -1175,11 +1123,6 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) { struct htb_class *cl = (struct htb_class *)arg; -#ifdef HTB_RATECM - cl->rate_est.bps = cl->rate_bytes / (HTB_EWMAC * HTB_HSIZE); - cl->rate_est.pps = cl->rate_packets / (HTB_EWMAC * HTB_HSIZE); -#endif - if (!cl->level && cl->un.leaf.q) cl->qstats.qlen = cl->un.leaf.q->q.qlen; cl->xstats.tokens = cl->tokens; @@ -1277,6 +1220,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) BUG_TRAP(cl->un.leaf.q); qdisc_destroy(cl->un.leaf.q); } + gen_kill_estimator(&cl->bstats, &cl->rate_est); qdisc_put_rtab(cl->rate); qdisc_put_rtab(cl->ceil); @@ -1305,9 +1249,6 @@ static void htb_destroy(struct Qdisc *sch) struct htb_sched *q = qdisc_priv(sch); qdisc_watchdog_cancel(&q->watchdog); -#ifdef HTB_RATECM - del_timer_sync(&q->rttim); -#endif /* This line used to be after htb_destroy_class call below and surprisingly it worked in 2.4. But it must precede it because filter need its target class alive to be able to call @@ -1403,6 +1344,20 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!cl) { /* new class */ struct Qdisc *new_q; int prio; + struct { + struct rtattr rta; + struct gnet_estimator opt; + } est = { + .rta = { + .rta_len = RTA_LENGTH(sizeof(est.opt)), + .rta_type = TCA_RATE, + }, + .opt = { + /* 4s interval, 16s averaging constant */ + .interval = 2, + .ewma_log = 2, + }, + }; /* check for valid classid */ if (!classid || TC_H_MAJ(classid ^ sch->handle) @@ -1418,6 +1373,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL) goto failure; + gen_new_estimator(&cl->bstats, &cl->rate_est, + &sch->dev->queue_lock, + tca[TCA_RATE-1] ? : &est.rta); cl->refcnt = 1; INIT_LIST_HEAD(&cl->sibling); INIT_HLIST_NODE(&cl->hlist); @@ -1469,8 +1427,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, hlist_add_head(&cl->hlist, q->hash + htb_hash(classid)); list_add_tail(&cl->sibling, parent ? &parent->children : &q->root); - } else + } else { + if (tca[TCA_RATE-1]) + gen_replace_estimator(&cl->bstats, &cl->rate_est, + &sch->dev->queue_lock, + tca[TCA_RATE-1]); sch_tree_lock(sch); + } /* it used to be a nasty bug here, we have to check that node is really leaf before changing cl->un.leaf ! */ -- cgit v1.2.3 From 0ba48053831d5b89ee2afaefaae1c06eae80cb05 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 2 Jul 2007 22:49:07 -0700 Subject: [NET_SCHED]: Remove unnecessary includes Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/act_api.c | 10 ---------- net/sched/act_gact.c | 11 ----------- net/sched/act_ipt.c | 12 ------------ net/sched/act_mirred.c | 12 ------------ net/sched/act_pedit.c | 11 ----------- net/sched/act_police.c | 10 ---------- net/sched/act_simple.c | 1 - net/sched/cls_api.c | 10 ---------- net/sched/cls_basic.c | 1 - net/sched/cls_fw.c | 19 +------------------ net/sched/cls_route.c | 20 +++----------------- net/sched/cls_rsvp.c | 17 +---------------- net/sched/cls_rsvp6.c | 16 ---------------- net/sched/cls_tcindex.c | 3 --- net/sched/cls_u32.c | 18 +----------------- net/sched/ematch.c | 2 -- net/sched/sch_api.c | 12 ------------ net/sched/sch_atm.c | 3 --- net/sched/sch_blackhole.c | 1 - net/sched/sch_cbq.c | 18 +----------------- net/sched/sch_dsmark.c | 1 - net/sched/sch_fifo.c | 1 - net/sched/sch_generic.c | 8 -------- net/sched/sch_gred.c | 1 - net/sched/sch_hfsc.c | 3 --- net/sched/sch_htb.c | 18 +----------------- net/sched/sch_ingress.c | 9 +-------- net/sched/sch_netem.c | 2 -- net/sched/sch_prio.c | 16 ---------------- net/sched/sch_red.c | 1 - net/sched/sch_sfq.c | 18 ++---------------- net/sched/sch_tbf.c | 19 +------------------ net/sched/sch_teql.c | 18 ++---------------- 33 files changed, 14 insertions(+), 308 deletions(-) (limited to 'net') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 32cc191d9f90..feef366cad5d 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -11,23 +11,13 @@ * */ -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include #include #include #include -#include #include #include #include diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 7517f3791541..a9631e426d91 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -10,26 +10,15 @@ * */ -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include #include #include #include #include -#include #include -#include #include #include #include diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 00b05f422d45..6b407ece953c 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -11,27 +11,15 @@ * Copyright: Jamal Hadi Salim (2002-4) */ -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include #include #include #include #include -#include -#include #include -#include #include #include #include diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index de21c92faaa2..579578944ae7 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -12,31 +12,19 @@ * */ -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include #include #include #include #include -#include #include -#include #include #include #include -#include #include #define MIRRED_TAB_MASK 7 diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 6f8684b5617e..b46fab5fb323 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -9,26 +9,15 @@ * Authors: Jamal Hadi Salim (2002-4) */ -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include #include #include #include #include -#include #include -#include #include #include #include diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 3e8716dd738c..d20403890877 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -10,25 +10,15 @@ * J Hadi Salim (action changes) */ -#include -#include -#include #include #include #include #include -#include -#include -#include -#include #include -#include -#include #include #include #include #include -#include #include #include diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 36e1edad5990..fb84ef33d14f 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index ebf94edf0478..36b72aab1bde 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -14,26 +14,16 @@ * */ -#include -#include -#include #include #include #include #include -#include -#include -#include -#include #include -#include -#include #include #include #include #include #include -#include #include #include diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index c885412d79d5..8dbcf2771a46 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index bbec4a0d4dcb..8adbd6a37d14 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -19,29 +19,12 @@ */ #include -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include +#include #include #include diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index cc941d0ee3a5..0a8409c1d28a 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -10,28 +10,14 @@ */ #include -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include +#include +#include +#include #include #include diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c index 0a683c07c648..cbb5e0d600f3 100644 --- a/net/sched/cls_rsvp.c +++ b/net/sched/cls_rsvp.c @@ -10,27 +10,12 @@ */ #include -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include +#include #include #include #include diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c index 93b6abed57db..dd08aea2aee5 100644 --- a/net/sched/cls_rsvp6.c +++ b/net/sched/cls_rsvp6.c @@ -10,28 +10,12 @@ */ #include -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include #include -#include #include -#include #include #include #include diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 47ac0c556429..2314820a080a 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -9,12 +9,9 @@ #include #include #include -#include -#include #include #include #include -#include /* diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index c7a347bd6d70..77961e2314dc 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -30,30 +30,14 @@ * nfmark match added by Catalin(ux aka Dino) BOIE */ -#include -#include -#include #include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include #include -#include +#include #include #include diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 63146d339d81..24837391640d 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -84,9 +84,7 @@ #include #include #include -#include #include -#include #include #include #include diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 0f9e1c71746a..d92ea26982c5 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -19,30 +19,18 @@ #include #include #include -#include -#include -#include -#include #include -#include -#include #include #include #include #include #include #include -#include #include #include -#include #include -#include -#include -#include - static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid, struct Qdisc *old, struct Qdisc *new); static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 16fe802a66f7..54b92d22796c 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -8,15 +8,12 @@ #include #include #include -#include #include #include -#include #include #include /* for fput */ #include #include -#include extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */ diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c index cb0c456aa349..f914fc43a124 100644 --- a/net/sched/sch_blackhole.c +++ b/net/sched/sch_blackhole.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index b093d8fce789..b184c3545145 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -11,28 +11,12 @@ */ #include -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include +#include #include diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 3c6fd181263f..4d2c233a8611 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -9,7 +9,6 @@ #include #include #include -#include /* for pkt_sched */ #include #include #include diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index c2689f4ba8de..c264308f17c1 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index e525fd723c12..c81649cf0b9e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -11,27 +11,19 @@ * - Ingress support */ -#include -#include #include #include #include #include #include #include -#include -#include -#include -#include #include -#include #include #include #include #include #include #include -#include #include /* Main transmission queue. */ diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index fa1b4fe7a5fd..3cc6dda02e2e 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 7130a2441b0d..874452c41a01 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -53,7 +53,6 @@ #include #include #include -#include #include #include #include @@ -62,13 +61,11 @@ #include #include #include -#include #include #include #include #include #include -#include #include /* diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 26f81b848bfd..c031486b675f 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -28,32 +28,16 @@ * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $ */ #include -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include +#include #include -#include #include -#include /* HTB algorithm. Author: devik@cdi.cz diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index f8b9f1cdf738..cd0aab6a2a7c 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -9,21 +9,14 @@ #include #include +#include #include -#include #include #include #include #include -#include #include #include -#include -#include -#include -#include -#include -#include #undef DEBUG_INGRESS diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 5d9d8bc9cc3a..9e5e87e81f00 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -14,11 +14,9 @@ */ #include -#include #include #include #include -#include #include #include diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 404522046289..2d8c08493d6e 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -12,28 +12,12 @@ */ #include -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include #include #include -#include #include diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 00db53eb8159..9b95fefb70f4 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 96dfdf78d32c..957957309859 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -10,31 +10,17 @@ */ #include -#include -#include -#include #include #include #include #include -#include -#include -#include #include #include -#include -#include -#include -#include -#include -#include #include -#include -#include #include -#include #include -#include +#include +#include #include diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 53862953baaf..22e431dace54 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -13,29 +13,12 @@ */ #include -#include -#include -#include #include #include -#include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include +#include #include diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index dfe7e4520988..0968184ea6be 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -9,30 +9,17 @@ */ #include -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include #include -#include -#include #include -#include -#include #include -#include -#include #include #include -#include +#include +#include #include /* @@ -225,7 +212,6 @@ static int teql_qdisc_init(struct Qdisc *sch, struct rtattr *opt) return 0; } -/* "teql*" netdevice routines */ static int __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) -- cgit v1.2.3 From 89da1ecf5483e6aa29b456a15ad6d05a6797c5a5 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Mon, 2 Jul 2007 22:54:18 -0700 Subject: [IrDA]: Netlink layer. First IrDA configuration netlink layer implementation. Currently, we only support the set/get mode commands. Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller --- include/linux/irda.h | 27 ++++++++ include/net/irda/irda.h | 3 + include/net/irda/irlap.h | 2 + net/irda/Makefile | 2 +- net/irda/irmod.c | 48 +++++++++++-- net/irda/irnetlink.c | 170 +++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 247 insertions(+), 5 deletions(-) create mode 100644 net/irda/irnetlink.c (limited to 'net') diff --git a/include/linux/irda.h b/include/linux/irda.h index 945ba3110874..35911bd4dbe8 100644 --- a/include/linux/irda.h +++ b/include/linux/irda.h @@ -216,6 +216,33 @@ struct if_irda_req { #define ifr_dtr ifr_ifru.ifru_line.dtr #define ifr_rts ifr_ifru.ifru_line.rts + +/* IrDA netlink definitions */ +#define IRDA_NL_NAME "irda" +#define IRDA_NL_VERSION 1 + +enum irda_nl_commands { + IRDA_NL_CMD_UNSPEC, + IRDA_NL_CMD_SET_MODE, + IRDA_NL_CMD_GET_MODE, + + __IRDA_NL_CMD_AFTER_LAST +}; +#define IRDA_NL_CMD_MAX (__IRDA_NL_CMD_AFTER_LAST - 1) + +enum nl80211_attrs { + IRDA_NL_ATTR_UNSPEC, + IRDA_NL_ATTR_IFNAME, + IRDA_NL_ATTR_MODE, + + __IRDA_NL_ATTR_AFTER_LAST +}; +#define IRDA_NL_ATTR_MAX (__IRDA_NL_ATTR_AFTER_LAST - 1) + +/* IrDA modes */ +#define IRDA_MODE_PRIMARY 0x1 +#define IRDA_MODE_SECONDARY 0x2 + #endif /* KERNEL_IRDA_H */ diff --git a/include/net/irda/irda.h b/include/net/irda/irda.h index 36bee441aa56..08387553b57e 100644 --- a/include/net/irda/irda.h +++ b/include/net/irda/irda.h @@ -125,6 +125,9 @@ extern void irda_sysctl_unregister(void); extern int irsock_init(void); extern void irsock_cleanup(void); +extern int irda_nl_register(void); +extern void irda_nl_unregister(void); + extern int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev); diff --git a/include/net/irda/irlap.h b/include/net/irda/irlap.h index a3d370efb903..9d0c78ea92f5 100644 --- a/include/net/irda/irlap.h +++ b/include/net/irda/irlap.h @@ -208,6 +208,8 @@ struct irlap_cb { int xbofs_delay; /* Nr of XBOF's used to MTT */ int bofs_count; /* Negotiated extra BOFs */ int next_bofs; /* Negotiated extra BOFs after next frame */ + + int mode; /* IrLAP mode (primary, secondary or monitor) */ }; /* diff --git a/net/irda/Makefile b/net/irda/Makefile index d1366c2a39cb..187f6c563a4b 100644 --- a/net/irda/Makefile +++ b/net/irda/Makefile @@ -10,6 +10,6 @@ obj-$(CONFIG_IRCOMM) += ircomm/ irda-y := iriap.o iriap_event.o irlmp.o irlmp_event.o irlmp_frame.o \ irlap.o irlap_event.o irlap_frame.o timer.o qos.o irqueue.o \ irttp.o irda_device.o irias_object.o wrapper.o af_irda.o \ - discovery.o parameters.o irmod.o + discovery.o parameters.o irnetlink.o irmod.o irda-$(CONFIG_PROC_FS) += irproc.o irda-$(CONFIG_SYSCTL) += irsysctl.o diff --git a/net/irda/irmod.c b/net/irda/irmod.c index c7fad2c5b9f3..1900937b3328 100644 --- a/net/irda/irmod.c +++ b/net/irda/irmod.c @@ -88,16 +88,23 @@ EXPORT_SYMBOL(irda_notify_init); */ static int __init irda_init(void) { + int ret = 0; + IRDA_DEBUG(0, "%s()\n", __FUNCTION__); /* Lower layer of the stack */ irlmp_init(); irlap_init(); + /* Driver/dongle support */ + irda_device_init(); + /* Higher layers of the stack */ iriap_init(); irttp_init(); - irsock_init(); + ret = irsock_init(); + if (ret < 0) + goto out_err_1; /* Add IrDA packet type (Start receiving packets) */ dev_add_pack(&irda_packet_type); @@ -107,13 +114,44 @@ static int __init irda_init(void) irda_proc_register(); #endif #ifdef CONFIG_SYSCTL - irda_sysctl_register(); + ret = irda_sysctl_register(); + if (ret < 0) + goto out_err_2; #endif - /* Driver/dongle support */ - irda_device_init(); + ret = irda_nl_register(); + if (ret < 0) + goto out_err_3; return 0; + + out_err_3: +#ifdef CONFIG_SYSCTL + irda_sysctl_unregister(); +#endif + out_err_2: +#ifdef CONFIG_PROC_FS + irda_proc_unregister(); +#endif + + /* Remove IrDA packet type (stop receiving packets) */ + dev_remove_pack(&irda_packet_type); + + /* Remove higher layers */ + irsock_cleanup(); + out_err_1: + irttp_cleanup(); + iriap_cleanup(); + + /* Remove lower layers */ + irda_device_cleanup(); + irlap_cleanup(); /* Must be done before irlmp_cleanup()! DB */ + + /* Remove middle layer */ + irlmp_cleanup(); + + + return ret; } /* @@ -125,6 +163,8 @@ static int __init irda_init(void) static void __exit irda_cleanup(void) { /* Remove External APIs */ + irda_nl_unregister(); + #ifdef CONFIG_SYSCTL irda_sysctl_unregister(); #endif diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c new file mode 100644 index 000000000000..db716580e1ae --- /dev/null +++ b/net/irda/irnetlink.c @@ -0,0 +1,170 @@ +/* + * IrDA netlink layer, for stack configuration. + * + * Copyright (c) 2007 Samuel Ortiz + * + * Partly based on the 802.11 nelink implementation + * (see net/wireless/nl80211.c) which is: + * Copyright 2006 Johannes Berg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include + + + +static struct genl_family irda_nl_family = { + .id = GENL_ID_GENERATE, + .name = IRDA_NL_NAME, + .hdrsize = 0, + .version = IRDA_NL_VERSION, + .maxattr = IRDA_NL_CMD_MAX, +}; + +static struct net_device * ifname_to_netdev(struct genl_info *info) +{ + char * ifname; + + if (!info->attrs[IRDA_NL_ATTR_IFNAME]) + return NULL; + + ifname = nla_data(info->attrs[IRDA_NL_ATTR_IFNAME]); + + IRDA_DEBUG(5, "%s(): Looking for %s\n", __FUNCTION__, ifname); + + return dev_get_by_name(ifname); +} + +static int irda_nl_set_mode(struct sk_buff *skb, struct genl_info *info) +{ + struct net_device * dev; + struct irlap_cb * irlap; + u32 mode; + + if (!info->attrs[IRDA_NL_ATTR_MODE]) + return -EINVAL; + + mode = nla_get_u32(info->attrs[IRDA_NL_ATTR_MODE]); + + IRDA_DEBUG(5, "%s(): Switching to mode: %d\n", __FUNCTION__, mode); + + dev = ifname_to_netdev(info); + if (!dev) + return -ENODEV; + + irlap = (struct irlap_cb *)dev->atalk_ptr; + if (!irlap) { + dev_put(dev); + return -ENODEV; + } + + irlap->mode = mode; + + dev_put(dev); + + return 0; +} + +static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info) +{ + struct net_device * dev; + struct irlap_cb * irlap; + struct sk_buff *msg; + void *hdr; + int ret = -ENOBUFS; + + dev = ifname_to_netdev(info); + if (!dev) + return -ENODEV; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) { + dev_put(dev); + return -ENOMEM; + } + + irlap = (struct irlap_cb *)dev->atalk_ptr; + if (!irlap) { + ret = -ENODEV; + goto err_out; + } + + hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, + &irda_nl_family, 0, IRDA_NL_CMD_GET_MODE); + if (IS_ERR(hdr)) { + ret = PTR_ERR(hdr); + goto err_out; + } + + if(nla_put_string(msg, IRDA_NL_ATTR_IFNAME, + dev->name)); + goto err_out; + + if(nla_put_u32(msg, IRDA_NL_ATTR_MODE, irlap->mode)) + goto err_out; + + genlmsg_end(msg, hdr); + + return genlmsg_unicast(msg, info->snd_pid); + + err_out: + nlmsg_free(msg); + dev_put(dev); + + return ret; +} + +static struct nla_policy irda_nl_policy[IRDA_NL_ATTR_MAX + 1] = { + [IRDA_NL_ATTR_IFNAME] = { .type = NLA_NUL_STRING, + .len = IFNAMSIZ-1 }, + [IRDA_NL_ATTR_MODE] = { .type = NLA_U32 }, +}; + +static struct genl_ops irda_nl_ops[] = { + { + .cmd = IRDA_NL_CMD_SET_MODE, + .doit = irda_nl_set_mode, + .policy = irda_nl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = IRDA_NL_CMD_GET_MODE, + .doit = irda_nl_get_mode, + .policy = irda_nl_policy, + /* can be retrieved by unprivileged users */ + }, + +}; + +int irda_nl_register(void) +{ + int err, i; + + err = genl_register_family(&irda_nl_family); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(irda_nl_ops); i++) { + err = genl_register_ops(&irda_nl_family, &irda_nl_ops[i]); + if (err) + goto err_out; + } + return 0; + err_out: + genl_unregister_family(&irda_nl_family); + return err; +} + +void irda_nl_unregister(void) +{ + genl_unregister_family(&irda_nl_family); +} -- cgit v1.2.3 From 411725280bd0058ebb83c0e32133b7a94902c3a6 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Mon, 2 Jul 2007 22:55:31 -0700 Subject: [IrDA]: Monitor mode. Through the IrDA netlink set mode command, we switch to IrDA monitor mode, where one IrLAP instance receives all the packets on the media, without ever responding to them. Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller --- include/linux/irda.h | 1 + net/irda/irlap_frame.c | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'net') diff --git a/include/linux/irda.h b/include/linux/irda.h index 35911bd4dbe8..8e3735714c1c 100644 --- a/include/linux/irda.h +++ b/include/linux/irda.h @@ -242,6 +242,7 @@ enum nl80211_attrs { /* IrDA modes */ #define IRDA_MODE_PRIMARY 0x1 #define IRDA_MODE_SECONDARY 0x2 +#define IRDA_MODE_MONITOR 0x4 #endif /* KERNEL_IRDA_H */ diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index 3013c49ab975..25a3444a9234 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -101,6 +101,13 @@ void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb) irlap_insert_info(self, skb); + if (unlikely(self->mode & IRDA_MODE_MONITOR)) { + IRDA_DEBUG(3, "%s(): %s is in monitor mode\n", __FUNCTION__, + self->netdev->name); + dev_kfree_skb(skb); + return; + } + dev_queue_xmit(skb); } -- cgit v1.2.3 From 93cce3d3657bfb5d04789afcd5104f8c48700f32 Mon Sep 17 00:00:00 2001 From: "G. Liakhovetski" Date: Mon, 2 Jul 2007 22:56:57 -0700 Subject: [IrDA]: tsap init routine factorisation. This patch extracts common code from irttp_open_tsap() and irttp_dup() into a new function to 1) avoid code duplication, 2) help avoid forgetting object initialization in the tsap duplication path in the future. Signed-off-by: G. Liakhovetski Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller --- net/irda/irttp.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 7069e4a58257..ce4647542b9e 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -368,6 +368,20 @@ static int irttp_param_max_sdu_size(void *instance, irda_param_t *param, /************************** LMP CALLBACKS **************************/ /* Everything is happily mixed up. Waiting for next clean up - Jean II */ +/* + * Initialization, that has to be done on new tsap + * instance allocation and on duplication + */ +static void irttp_init_tsap(struct tsap_cb *tsap) +{ + spin_lock_init(&tsap->lock); + init_timer(&tsap->todo_timer); + + skb_queue_head_init(&tsap->rx_queue); + skb_queue_head_init(&tsap->tx_queue); + skb_queue_head_init(&tsap->rx_fragments); +} + /* * Function irttp_open_tsap (stsap, notify) * @@ -395,10 +409,11 @@ struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify) IRDA_DEBUG(0, "%s(), unable to kmalloc!\n", __FUNCTION__); return NULL; } - spin_lock_init(&self->lock); + + /* Initialize internal objects */ + irttp_init_tsap(self); /* Initialise todo timer */ - init_timer(&self->todo_timer); self->todo_timer.data = (unsigned long) self; self->todo_timer.function = &irttp_todo_expired; @@ -418,9 +433,6 @@ struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify) self->magic = TTP_TSAP_MAGIC; self->connected = FALSE; - skb_queue_head_init(&self->rx_queue); - skb_queue_head_init(&self->tx_queue); - skb_queue_head_init(&self->rx_fragments); /* * Create LSAP at IrLMP layer */ @@ -1455,12 +1467,9 @@ struct tsap_cb *irttp_dup(struct tsap_cb *orig, void *instance) /* Not everything should be copied */ new->notify.instance = instance; - spin_lock_init(&new->lock); - init_timer(&new->todo_timer); - skb_queue_head_init(&new->rx_queue); - skb_queue_head_init(&new->tx_queue); - skb_queue_head_init(&new->rx_fragments); + /* Initialize internal objects */ + irttp_init_tsap(new); /* This is locked */ hashbin_insert(irttp->tsaps, (irda_queue_t *) new, (long) new, NULL); -- cgit v1.2.3 From 067b207b281db5e3f03f8d244286c20f61aa2343 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Thu, 5 Jul 2007 17:08:05 -0700 Subject: [UDP]: Cleanup UDP encapsulation code This cleanup fell out after adding L2TP support where a new encap_rcv funcptr was added to struct udp_sock. Have XFRM use the new encap_rcv funcptr, which allows us to move the XFRM encap code from udp.c into xfrm4_input.c. Make xfrm4_rcv_encap() static since it is no longer called externally. Signed-off-by: James Chapman Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/xfrm.h | 5 +- net/ipv4/udp.c | 156 ++++++++----------------------------------------- net/ipv4/xfrm4_input.c | 114 +++++++++++++++++++++++++++++++++--- 3 files changed, 133 insertions(+), 142 deletions(-) (limited to 'net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d3a898b18fb4..ae959e950174 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1003,7 +1003,7 @@ extern int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr); #ifdef CONFIG_XFRM -extern int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type); +extern int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); extern int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen); extern int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsigned short family); #else @@ -1012,12 +1012,13 @@ static inline int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optv return -ENOPROTOOPT; } -static inline int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) +static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) { /* should not happen */ kfree_skb(skb); return 0; } + static inline int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsigned short family) { return -EINVAL; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b9276f8bdac5..4ec4a25a8d0c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -920,108 +920,6 @@ int udp_disconnect(struct sock *sk, int flags) return 0; } -/* return: - * 1 if the UDP system should process it - * 0 if we should drop this packet - * -1 if it should get processed by xfrm4_rcv_encap - * -2 if it should get processed by l2tp - */ -static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) -{ - struct udp_sock *up = udp_sk(sk); - struct udphdr *uh; - struct iphdr *iph; - int iphlen, len; - - __u8 *udpdata; - __be32 *udpdata32; - __u16 encap_type = up->encap_type; - - /* if we're overly short, let UDP handle it */ - len = skb->len - sizeof(struct udphdr); - if (len <= 0) - return 1; - - /* if this is not encapsulated socket, then just return now */ - if (!encap_type) - return 1; - - /* If this is a paged skb, make sure we pull up - * whatever data we need to look at. */ - if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8))) - return 1; - - /* Now we can get the pointers */ - uh = udp_hdr(skb); - udpdata = (__u8 *)uh + sizeof(struct udphdr); - udpdata32 = (__be32 *)udpdata; - - switch (encap_type) { - default: - case UDP_ENCAP_ESPINUDP: - /* Check if this is a keepalive packet. If so, eat it. */ - if (len == 1 && udpdata[0] == 0xff) { - return 0; - } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) { - /* ESP Packet without Non-ESP header */ - len = sizeof(struct udphdr); - } else - /* Must be an IKE packet.. pass it through */ - return 1; - break; - case UDP_ENCAP_ESPINUDP_NON_IKE: - /* Check if this is a keepalive packet. If so, eat it. */ - if (len == 1 && udpdata[0] == 0xff) { - return 0; - } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && - udpdata32[0] == 0 && udpdata32[1] == 0) { - - /* ESP Packet with Non-IKE marker */ - len = sizeof(struct udphdr) + 2 * sizeof(u32); - } else - /* Must be an IKE packet.. pass it through */ - return 1; - break; - case UDP_ENCAP_L2TPINUDP: - /* Let caller know to send this to l2tp */ - return -2; - } - -#ifndef CONFIG_XFRM - return 1; -#else - /* At this point we are sure that this is an ESPinUDP packet, - * so we need to remove 'len' bytes from the packet (the UDP - * header and optional ESP marker bytes) and then modify the - * protocol to ESP, and then call into the transform receiver. - */ - if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) - return 0; - - /* Now we can update and verify the packet length... */ - iph = ip_hdr(skb); - iphlen = iph->ihl << 2; - iph->tot_len = htons(ntohs(iph->tot_len) - len); - if (skb->len < iphlen + len) { - /* packet is too small!?! */ - return 0; - } - - /* pull the data buffer up to the ESP header and set the - * transport header to point to ESP. Keep UDP on the stack - * for later. - */ - __skb_pull(skb, len); - skb_reset_transport_header(skb); - - /* modify the protocol (it's ESP!) */ - iph->protocol = IPPROTO_ESP; - - /* and let the caller know to send this into the ESP processor... */ - return -1; -#endif -} - /* returns: * -1: error * 0: success @@ -1044,44 +942,36 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) if (up->encap_type) { /* - * This is an encapsulation socket, so let's see if this is - * an encapsulated packet. - * If it's a keepalive packet, then just eat it. - * If it's an encapsulateed packet, then pass it to the - * IPsec xfrm input and return the response - * appropriately. Otherwise, just fall through and - * pass this up the UDP socket. + * This is an encapsulation socket so pass the skb to + * the socket's udp_encap_rcv() hook. Otherwise, just + * fall through and pass this up the UDP socket. + * up->encap_rcv() returns the following value: + * =0 if skb was successfully passed to the encap + * handler or was discarded by it. + * >0 if skb should be passed on to UDP. + * <0 if skb should be resubmitted as proto -N */ - int ret; + unsigned int len; - ret = udp_encap_rcv(sk, skb); - if (ret == 0) { - /* Eat the packet .. */ - kfree_skb(skb); - return 0; - } - if (ret == -1) { - /* process the ESP packet */ - ret = xfrm4_rcv_encap(skb, up->encap_type); - UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); - return -ret; - } - if (ret == -2) { - /* process the L2TP packet */ - if (up->encap_rcv != NULL) { - ret = (*up->encap_rcv)(sk, skb); - if (ret <= 0) { - UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); - return ret; - } - - /* FALLTHROUGH -- pass up as UDP packet */ + /* if we're overly short, let UDP handle it */ + len = skb->len - sizeof(struct udphdr); + if (len <= 0) + goto udp; + + if (up->encap_rcv != NULL) { + int ret; + + ret = (*up->encap_rcv)(sk, skb); + if (ret <= 0) { + UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); + return -ret; } } /* FALLTHROUGH -- it's a UDP Packet */ } +udp: /* * UDP-Lite specific tests, ignored on UDP sockets */ @@ -1367,6 +1257,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, case 0: case UDP_ENCAP_ESPINUDP: case UDP_ENCAP_ESPINUDP_NON_IKE: + up->encap_rcv = xfrm4_udp_encap_rcv; + /* FALLTHROUGH */ case UDP_ENCAP_L2TPINUDP: up->encap_type = val; break; diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index fa1902dc81b8..2fa108245413 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -16,13 +16,6 @@ #include #include -int xfrm4_rcv(struct sk_buff *skb) -{ - return xfrm4_rcv_encap(skb, 0); -} - -EXPORT_SYMBOL(xfrm4_rcv); - static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) { switch (nexthdr) { @@ -53,7 +46,7 @@ drop: } #endif -int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) +static int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) { __be32 spi, seq; struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH]; @@ -167,3 +160,108 @@ drop: kfree_skb(skb); return 0; } + +/* If it's a keepalive packet, then just eat it. + * If it's an encapsulated packet, then pass it to the + * IPsec xfrm input. + * Returns 0 if skb passed to xfrm or was dropped. + * Returns >0 if skb should be passed to UDP. + * Returns <0 if skb should be resubmitted (-ret is protocol) + */ +int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct udp_sock *up = udp_sk(sk); + struct udphdr *uh; + struct iphdr *iph; + int iphlen, len; + int ret; + + __u8 *udpdata; + __be32 *udpdata32; + __u16 encap_type = up->encap_type; + + /* if this is not encapsulated socket, then just return now */ + if (!encap_type) + return 1; + + /* If this is a paged skb, make sure we pull up + * whatever data we need to look at. */ + len = skb->len - sizeof(struct udphdr); + if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8))) + return 1; + + /* Now we can get the pointers */ + uh = udp_hdr(skb); + udpdata = (__u8 *)uh + sizeof(struct udphdr); + udpdata32 = (__be32 *)udpdata; + + switch (encap_type) { + default: + case UDP_ENCAP_ESPINUDP: + /* Check if this is a keepalive packet. If so, eat it. */ + if (len == 1 && udpdata[0] == 0xff) { + goto drop; + } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) { + /* ESP Packet without Non-ESP header */ + len = sizeof(struct udphdr); + } else + /* Must be an IKE packet.. pass it through */ + return 1; + break; + case UDP_ENCAP_ESPINUDP_NON_IKE: + /* Check if this is a keepalive packet. If so, eat it. */ + if (len == 1 && udpdata[0] == 0xff) { + goto drop; + } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && + udpdata32[0] == 0 && udpdata32[1] == 0) { + + /* ESP Packet with Non-IKE marker */ + len = sizeof(struct udphdr) + 2 * sizeof(u32); + } else + /* Must be an IKE packet.. pass it through */ + return 1; + break; + } + + /* At this point we are sure that this is an ESPinUDP packet, + * so we need to remove 'len' bytes from the packet (the UDP + * header and optional ESP marker bytes) and then modify the + * protocol to ESP, and then call into the transform receiver. + */ + if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + goto drop; + + /* Now we can update and verify the packet length... */ + iph = ip_hdr(skb); + iphlen = iph->ihl << 2; + iph->tot_len = htons(ntohs(iph->tot_len) - len); + if (skb->len < iphlen + len) { + /* packet is too small!?! */ + goto drop; + } + + /* pull the data buffer up to the ESP header and set the + * transport header to point to ESP. Keep UDP on the stack + * for later. + */ + __skb_pull(skb, len); + skb_reset_transport_header(skb); + + /* modify the protocol (it's ESP!) */ + iph->protocol = IPPROTO_ESP; + + /* process ESP */ + ret = xfrm4_rcv_encap(skb, encap_type); + return ret; + +drop: + kfree_skb(skb); + return 0; +} + +int xfrm4_rcv(struct sk_buff *skb) +{ + return xfrm4_rcv_encap(skb, 0); +} + +EXPORT_SYMBOL(xfrm4_rcv); -- cgit v1.2.3 From 558585aad0c0ef83d3d14a1c7576b1e404ca1fbc Mon Sep 17 00:00:00 2001 From: Jing Min Zhao Date: Sat, 7 Jul 2007 22:13:17 -0700 Subject: [NETFILTER]: nf_conntrack_h323: check range first in sequence extension Check range before checking STOP flag. This optimization may save a nanosecond or less :) Signed-off-by: Jing Min Zhao Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_h323_asn1.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 6b7eaa019d4c..a869403b2294 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -555,15 +555,6 @@ int decode_seq(bitstr_t * bs, field_t * f, char *base, int level) /* Decode the extension components */ for (opt = 0; opt < bmp2_len; opt++, i++, son++) { - if (i < f->ub && son->attr & STOP) { - PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", - son->name); - return H323_ERROR_STOP; - } - - if (!((0x80000000 >> opt) & bmp2)) /* Not present */ - continue; - /* Check Range */ if (i >= f->ub) { /* Newer Version? */ CHECK_BOUND(bs, 2); @@ -573,6 +564,15 @@ int decode_seq(bitstr_t * bs, field_t * f, char *base, int level) continue; } + if (son->attr & STOP) { + PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", + son->name); + return H323_ERROR_STOP; + } + + if (!((0x80000000 >> opt) & bmp2)) /* Not present */ + continue; + CHECK_BOUND(bs, 2); len = get_len(bs); CHECK_BOUND(bs, len); -- cgit v1.2.3 From cff533ac12494fa002e2c46acc94d670e5f636a2 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 7 Jul 2007 22:15:12 -0700 Subject: [NETFILTER]: x_tables: switch hotdrop to bool Switch the "hotdrop" variables to boolean Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 2 +- net/ipv4/netfilter/arp_tables.c | 2 +- net/ipv4/netfilter/ip_tables.c | 8 ++++---- net/ipv4/netfilter/ipt_addrtype.c | 2 +- net/ipv4/netfilter/ipt_ah.c | 4 ++-- net/ipv4/netfilter/ipt_ecn.c | 6 +++--- net/ipv4/netfilter/ipt_iprange.c | 2 +- net/ipv4/netfilter/ipt_owner.c | 2 +- net/ipv4/netfilter/ipt_recent.c | 4 ++-- net/ipv4/netfilter/ipt_tos.c | 2 +- net/ipv4/netfilter/ipt_ttl.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 12 ++++++------ net/ipv6/netfilter/ip6t_ah.c | 6 +++--- net/ipv6/netfilter/ip6t_eui64.c | 4 ++-- net/ipv6/netfilter/ip6t_frag.c | 6 +++--- net/ipv6/netfilter/ip6t_hbh.c | 6 +++--- net/ipv6/netfilter/ip6t_hl.c | 2 +- net/ipv6/netfilter/ip6t_ipv6header.c | 2 +- net/ipv6/netfilter/ip6t_mh.c | 6 +++--- net/ipv6/netfilter/ip6t_owner.c | 2 +- net/ipv6/netfilter/ip6t_rt.c | 6 +++--- net/netfilter/xt_comment.c | 2 +- net/netfilter/xt_connbytes.c | 2 +- net/netfilter/xt_connmark.c | 2 +- net/netfilter/xt_conntrack.c | 2 +- net/netfilter/xt_dccp.c | 12 ++++++------ net/netfilter/xt_dscp.c | 4 ++-- net/netfilter/xt_esp.c | 4 ++-- net/netfilter/xt_hashlimit.c | 4 ++-- net/netfilter/xt_helper.c | 2 +- net/netfilter/xt_length.c | 4 ++-- net/netfilter/xt_limit.c | 2 +- net/netfilter/xt_mac.c | 2 +- net/netfilter/xt_mark.c | 2 +- net/netfilter/xt_multiport.c | 8 ++++---- net/netfilter/xt_physdev.c | 2 +- net/netfilter/xt_pkttype.c | 2 +- net/netfilter/xt_policy.c | 2 +- net/netfilter/xt_quota.c | 2 +- net/netfilter/xt_realm.c | 2 +- net/netfilter/xt_sctp.c | 8 ++++---- net/netfilter/xt_state.c | 2 +- net/netfilter/xt_statistic.c | 2 +- net/netfilter/xt_string.c | 2 +- net/netfilter/xt_tcpmss.c | 4 ++-- net/netfilter/xt_tcpudp.c | 16 ++++++++-------- 46 files changed, 92 insertions(+), 92 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 7e733a6ba4f6..b8577d18d10d 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -148,7 +148,7 @@ struct xt_match const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop); + bool *hotdrop); /* Called when user tries to insert an entry of this type. */ /* Should return true or false. */ diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index cae41215e3c7..1d75a5cd7b44 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -224,7 +224,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, static const char nulldevname[IFNAMSIZ]; unsigned int verdict = NF_DROP; struct arphdr *arp; - int hotdrop = 0; + bool hotdrop = false; struct arpt_entry *e, *back; const char *indev, *outdev; void *table_base; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 9bacf1a03630..e2a893825656 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -188,7 +188,7 @@ int do_match(struct ipt_entry_match *m, const struct net_device *in, const struct net_device *out, int offset, - int *hotdrop) + bool *hotdrop) { /* Stop iteration if it doesn't match */ if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data, @@ -216,7 +216,7 @@ ipt_do_table(struct sk_buff **pskb, u_int16_t offset; struct iphdr *ip; u_int16_t datalen; - int hotdrop = 0; + bool hotdrop = false; /* Initializing verdict to NF_DROP keeps gcc happy. */ unsigned int verdict = NF_DROP; const char *indev, *outdev; @@ -2122,7 +2122,7 @@ icmp_match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { struct icmphdr _icmph, *ic; const struct ipt_icmp *icmpinfo = matchinfo; @@ -2137,7 +2137,7 @@ icmp_match(const struct sk_buff *skb, * can't. Hence, no choice but to drop. */ duprintf("Dropping evil ICMP tinygram.\n"); - *hotdrop = 1; + *hotdrop = true; return 0; } diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index a652a1451552..a9a9b750ff2d 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c @@ -30,7 +30,7 @@ static inline int match_type(__be32 addr, u_int16_t mask) static int match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) + int offset, unsigned int protoff, bool *hotdrop) { const struct ipt_addrtype_info *info = matchinfo; const struct iphdr *iph = ip_hdr(skb); diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index 18a16782cf40..9a244e406a48 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c @@ -44,7 +44,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { struct ip_auth_hdr _ahdr, *ah; const struct ipt_ah *ahinfo = matchinfo; @@ -60,7 +60,7 @@ match(const struct sk_buff *skb, * can't. Hence, no choice but to drop. */ duprintf("Dropping evil AH tinygram.\n"); - *hotdrop = 1; + *hotdrop = true; return 0; } diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index 26218122f865..a47f3745553b 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -30,7 +30,7 @@ static inline int match_ip(const struct sk_buff *skb, static inline int match_tcp(const struct sk_buff *skb, const struct ipt_ecn_info *einfo, - int *hotdrop) + bool *hotdrop) { struct tcphdr _tcph, *th; @@ -39,7 +39,7 @@ static inline int match_tcp(const struct sk_buff *skb, */ th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); if (th == NULL) { - *hotdrop = 0; + *hotdrop = false; return 0; } @@ -69,7 +69,7 @@ static inline int match_tcp(const struct sk_buff *skb, static int match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) + int offset, unsigned int protoff, bool *hotdrop) { const struct ipt_ecn_info *info = matchinfo; diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c index 33af9e940887..86f225c1d067 100644 --- a/net/ipv4/netfilter/ipt_iprange.c +++ b/net/ipv4/netfilter/ipt_iprange.c @@ -29,7 +29,7 @@ match(const struct sk_buff *skb, const struct net_device *out, const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) + int offset, unsigned int protoff, bool *hotdrop) { const struct ipt_iprange_info *info = matchinfo; const struct iphdr *iph = ip_hdr(skb); diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c index 7fae9aa8944c..92be562c4aca 100644 --- a/net/ipv4/netfilter/ipt_owner.c +++ b/net/ipv4/netfilter/ipt_owner.c @@ -29,7 +29,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct ipt_owner_info *info = matchinfo; diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 15a9e8bbb7cc..81f1a017f311 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -173,7 +173,7 @@ static int ipt_recent_match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) + int offset, unsigned int protoff, bool *hotdrop) { const struct ipt_recent_info *info = matchinfo; struct recent_table *t; @@ -201,7 +201,7 @@ ipt_recent_match(const struct sk_buff *skb, goto out; e = recent_entry_init(t, addr, ttl); if (e == NULL) - *hotdrop = 1; + *hotdrop = true; ret ^= 1; goto out; } diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c index d314844af12b..803ed4c35b55 100644 --- a/net/ipv4/netfilter/ipt_tos.c +++ b/net/ipv4/netfilter/ipt_tos.c @@ -26,7 +26,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct ipt_tos_info *info = matchinfo; diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c index ab02d9e3139c..e7316b27d2c5 100644 --- a/net/ipv4/netfilter/ipt_ttl.c +++ b/net/ipv4/netfilter/ipt_ttl.c @@ -21,7 +21,7 @@ MODULE_LICENSE("GPL"); static int match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) + int offset, unsigned int protoff, bool *hotdrop) { const struct ipt_ttl_info *info = matchinfo; const u8 ttl = ip_hdr(skb)->ttl; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 9aa624026688..13c66a75c21c 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -102,7 +102,7 @@ ip6_packet_match(const struct sk_buff *skb, const char *outdev, const struct ip6t_ip6 *ip6info, unsigned int *protoff, - int *fragoff, int *hotdrop) + int *fragoff, bool *hotdrop) { size_t i; unsigned long ret; @@ -162,7 +162,7 @@ ip6_packet_match(const struct sk_buff *skb, protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off); if (protohdr < 0) { if (_frag_off == 0) - *hotdrop = 1; + *hotdrop = true; return 0; } *fragoff = _frag_off; @@ -225,7 +225,7 @@ int do_match(struct ip6t_entry_match *m, const struct net_device *out, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { /* Stop iteration if it doesn't match */ if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data, @@ -252,7 +252,7 @@ ip6t_do_table(struct sk_buff **pskb, static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); int offset = 0; unsigned int protoff = 0; - int hotdrop = 0; + bool hotdrop = false; /* Initializing verdict to NF_DROP keeps gcc happy. */ unsigned int verdict = NF_DROP; const char *indev, *outdev; @@ -1299,7 +1299,7 @@ icmp6_match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { struct icmp6hdr _icmp, *ic; const struct ip6t_icmp *icmpinfo = matchinfo; @@ -1313,7 +1313,7 @@ icmp6_match(const struct sk_buff *skb, /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ duprintf("Dropping evil ICMP tinygram.\n"); - *hotdrop = 1; + *hotdrop = true; return 0; } diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index d3c154371b41..27b7bd279c0e 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -49,7 +49,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { struct ip_auth_hdr *ah, _ah; const struct ip6t_ah *ahinfo = matchinfo; @@ -60,13 +60,13 @@ match(const struct sk_buff *skb, err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL); if (err < 0) { if (err != -ENOENT) - *hotdrop = 1; + *hotdrop = true; return 0; } ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah); if (ah == NULL) { - *hotdrop = 1; + *hotdrop = true; return 0; } diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c index 0f3dd932f0a6..69e79e19040e 100644 --- a/net/ipv6/netfilter/ip6t_eui64.c +++ b/net/ipv6/netfilter/ip6t_eui64.c @@ -27,7 +27,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { unsigned char eui64[8]; int i = 0; @@ -35,7 +35,7 @@ match(const struct sk_buff *skb, if (!(skb_mac_header(skb) >= skb->head && (skb_mac_header(skb) + ETH_HLEN) <= skb->data) && offset != 0) { - *hotdrop = 1; + *hotdrop = true; return 0; } diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index 5a5da71321b6..740fdcafa5f3 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -48,7 +48,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { struct frag_hdr _frag, *fh; const struct ip6t_frag *fraginfo = matchinfo; @@ -58,13 +58,13 @@ match(const struct sk_buff *skb, err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL); if (err < 0) { if (err != -ENOENT) - *hotdrop = 1; + *hotdrop = true; return 0; } fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); if (fh == NULL) { - *hotdrop = 1; + *hotdrop = true; return 0; } diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index d2373c7cd354..5633de160c6d 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -55,7 +55,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { struct ipv6_opt_hdr _optsh, *oh; const struct ip6t_opts *optinfo = matchinfo; @@ -71,13 +71,13 @@ match(const struct sk_buff *skb, err = ipv6_find_hdr(skb, &ptr, match->data, NULL); if (err < 0) { if (err != -ENOENT) - *hotdrop = 1; + *hotdrop = true; return 0; } oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); if (oh == NULL) { - *hotdrop = 1; + *hotdrop = true; return 0; } diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c index d606c0e6d6fd..cbf49cffa067 100644 --- a/net/ipv6/netfilter/ip6t_hl.c +++ b/net/ipv6/netfilter/ip6t_hl.c @@ -22,7 +22,7 @@ MODULE_LICENSE("GPL"); static int match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) + int offset, unsigned int protoff, bool *hotdrop) { const struct ip6t_hl_info *info = matchinfo; const struct ipv6hdr *ip6h = ipv6_hdr(skb); diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index fd6a0869099b..469dec27c649 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -34,7 +34,7 @@ ipv6header_match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct ip6t_ipv6header_info *info = matchinfo; unsigned int temp; diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c index c2a909893a64..c27647b6c274 100644 --- a/net/ipv6/netfilter/ip6t_mh.c +++ b/net/ipv6/netfilter/ip6t_mh.c @@ -48,7 +48,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { struct ip6_mh _mh, *mh; const struct ip6t_mh *mhinfo = matchinfo; @@ -62,14 +62,14 @@ match(const struct sk_buff *skb, /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ duprintf("Dropping evil MH tinygram.\n"); - *hotdrop = 1; + *hotdrop = true; return 0; } if (mh->ip6mh_proto != IPPROTO_NONE) { duprintf("Dropping invalid MH Payload Proto: %u\n", mh->ip6mh_proto); - *hotdrop = 1; + *hotdrop = true; return 0; } diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index 43738bba00b5..f90f7c32cc9e 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c @@ -31,7 +31,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct ip6t_owner_info *info = matchinfo; diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 81ab00d8c182..2bb88214cfda 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -50,7 +50,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { struct ipv6_rt_hdr _route, *rh; const struct ip6t_rt *rtinfo = matchinfo; @@ -64,13 +64,13 @@ match(const struct sk_buff *skb, err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL); if (err < 0) { if (err != -ENOENT) - *hotdrop = 1; + *hotdrop = true; return 0; } rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route); if (rh == NULL) { - *hotdrop = 1; + *hotdrop = true; return 0; } diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c index 7db492d65220..20690ea0d466 100644 --- a/net/netfilter/xt_comment.c +++ b/net/netfilter/xt_comment.c @@ -23,7 +23,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protooff, - int *hotdrop) + bool *hotdrop) { /* We always match */ return 1; diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 804afe55e141..8fe5775901e1 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -23,7 +23,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_connbytes_info *sinfo = matchinfo; struct nf_conn *ct; diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index e1803256c792..8a6d58ab5d2b 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -38,7 +38,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_connmark_info *info = matchinfo; struct nf_conn *ct; diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 189ded5f378b..915c730d3b72 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -27,7 +27,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_conntrack_info *sinfo = matchinfo; struct nf_conn *ct; diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index 2c9c0dee8aaf..3172e7308b35 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c @@ -36,7 +36,7 @@ dccp_find_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff, const struct dccp_hdr *dh, - int *hotdrop) + bool *hotdrop) { /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ unsigned char *op; @@ -45,7 +45,7 @@ dccp_find_option(u_int8_t option, unsigned int i; if (dh->dccph_doff * 4 < __dccp_hdr_len(dh)) { - *hotdrop = 1; + *hotdrop = true; return 0; } @@ -57,7 +57,7 @@ dccp_find_option(u_int8_t option, if (op == NULL) { /* If we don't have the whole header, drop packet. */ spin_unlock_bh(&dccp_buflock); - *hotdrop = 1; + *hotdrop = true; return 0; } @@ -86,7 +86,7 @@ match_types(const struct dccp_hdr *dh, u_int16_t typemask) static inline int match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff, - const struct dccp_hdr *dh, int *hotdrop) + const struct dccp_hdr *dh, bool *hotdrop) { return dccp_find_option(option, skb, protoff, dh, hotdrop); } @@ -99,7 +99,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_dccp_info *info = matchinfo; struct dccp_hdr _dh, *dh; @@ -109,7 +109,7 @@ match(const struct sk_buff *skb, dh = skb_header_pointer(skb, protoff, sizeof(_dh), &_dh); if (dh == NULL) { - *hotdrop = 1; + *hotdrop = true; return 0; } diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c index 56b247ecc283..c106d738da6d 100644 --- a/net/netfilter/xt_dscp.c +++ b/net/netfilter/xt_dscp.c @@ -29,7 +29,7 @@ static int match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_dscp_info *info = matchinfo; u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; @@ -44,7 +44,7 @@ static int match6(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_dscp_info *info = matchinfo; u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c index 7c95f149d942..5d3421bcd850 100644 --- a/net/netfilter/xt_esp.c +++ b/net/netfilter/xt_esp.c @@ -50,7 +50,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { struct ip_esp_hdr _esp, *eh; const struct xt_esp *espinfo = matchinfo; @@ -65,7 +65,7 @@ match(const struct sk_buff *skb, * can't. Hence, no choice but to drop. */ duprintf("Dropping evil ESP tinygram.\n"); - *hotdrop = 1; + *hotdrop = true; return 0; } diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index d3043fa32ebc..cd5cba6978c3 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -440,7 +440,7 @@ hashlimit_match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { struct xt_hashlimit_info *r = ((struct xt_hashlimit_info *)matchinfo)->u.master; @@ -487,7 +487,7 @@ hashlimit_match(const struct sk_buff *skb, return 0; hotdrop: - *hotdrop = 1; + *hotdrop = true; return 0; } diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index c139b2f43a10..0aa090776e27 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -36,7 +36,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_helper_info *info = matchinfo; struct nf_conn *ct; diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c index 77288c5ada78..621c9ee6d1c9 100644 --- a/net/netfilter/xt_length.c +++ b/net/netfilter/xt_length.c @@ -28,7 +28,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_length_info *info = matchinfo; u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len); @@ -44,7 +44,7 @@ match6(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_length_info *info = matchinfo; const u_int16_t pktlen = (ntohs(ipv6_hdr(skb)->payload_len) + diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 571a72ab89ad..1133b4ca4904 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -65,7 +65,7 @@ ipt_limit_match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { struct xt_rateinfo *r = ((struct xt_rateinfo *)matchinfo)->master; unsigned long now = jiffies; diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c index 1d3a1d98b885..0e6a28647206 100644 --- a/net/netfilter/xt_mac.c +++ b/net/netfilter/xt_mac.c @@ -32,7 +32,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_mac_info *info = matchinfo; diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index 39911dddb011..944d1ea56029 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c @@ -27,7 +27,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_mark_info *info = matchinfo; diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c index 4dce2a81702a..1dc53ded9887 100644 --- a/net/netfilter/xt_multiport.c +++ b/net/netfilter/xt_multiport.c @@ -102,7 +102,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { __be16 _ports[2], *pptr; const struct xt_multiport *multiinfo = matchinfo; @@ -116,7 +116,7 @@ match(const struct sk_buff *skb, * can't. Hence, no choice but to drop. */ duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n"); - *hotdrop = 1; + *hotdrop = true; return 0; } @@ -133,7 +133,7 @@ match_v1(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { __be16 _ports[2], *pptr; const struct xt_multiport_v1 *multiinfo = matchinfo; @@ -147,7 +147,7 @@ match_v1(const struct sk_buff *skb, * can't. Hence, no choice but to drop. */ duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n"); - *hotdrop = 1; + *hotdrop = true; return 0; } diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 35a0fe200c39..a6de512fa840 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -31,7 +31,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { int i; static const char nulldevname[IFNAMSIZ]; diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c index e1409fc5c288..692581f40c5f 100644 --- a/net/netfilter/xt_pkttype.c +++ b/net/netfilter/xt_pkttype.c @@ -28,7 +28,7 @@ static int match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { u_int8_t type; const struct xt_pkttype_info *info = matchinfo; diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index 15b45a95ec13..6878482cd527 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -115,7 +115,7 @@ static int match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_policy_info *info = matchinfo; int ret; diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index bfdde06ca0b7..53c71ac980fc 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -20,7 +20,7 @@ static int match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) + int offset, unsigned int protoff, bool *hotdrop) { struct xt_quota_info *q = ((struct xt_quota_info *)matchinfo)->master; int ret = q->flags & XT_QUOTA_INVERT ? 1 : 0; diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c index c2017f8af9c4..41451f57919c 100644 --- a/net/netfilter/xt_realm.c +++ b/net/netfilter/xt_realm.c @@ -29,7 +29,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_realm_info *info = matchinfo; struct dst_entry *dst = skb->dst; diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index f86d8d769d47..e581afe89098 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -47,7 +47,7 @@ match_packet(const struct sk_buff *skb, int chunk_match_type, const struct xt_sctp_flag_info *flag_info, const int flag_count, - int *hotdrop) + bool *hotdrop) { u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)]; sctp_chunkhdr_t _sch, *sch; @@ -64,7 +64,7 @@ match_packet(const struct sk_buff *skb, sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch); if (sch == NULL || sch->length == 0) { duprintf("Dropping invalid SCTP packet.\n"); - *hotdrop = 1; + *hotdrop = true; return 0; } @@ -127,7 +127,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_sctp_info *info = matchinfo; sctp_sctphdr_t _sh, *sh; @@ -140,7 +140,7 @@ match(const struct sk_buff *skb, sh = skb_header_pointer(skb, protoff, sizeof(_sh), &_sh); if (sh == NULL) { duprintf("Dropping evil TCP offset=0 tinygram.\n"); - *hotdrop = 1; + *hotdrop = true; return 0; } duprintf("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest)); diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index 149294f7df71..74fe069fc3aa 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -28,7 +28,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_state_info *sinfo = matchinfo; enum ip_conntrack_info ctinfo; diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c index 091a9f89f5d5..4e5ed81e9ce1 100644 --- a/net/netfilter/xt_statistic.c +++ b/net/netfilter/xt_statistic.c @@ -28,7 +28,7 @@ static int match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) + int offset, unsigned int protoff, bool *hotdrop) { struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo; int ret = info->flags & XT_STATISTIC_INVERT ? 1 : 0; diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index 999a005dbd0c..7552d8927570 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -28,7 +28,7 @@ static int match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_string_info *conf = matchinfo; struct ts_state state; diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c index 80571d0749f7..0db4f5362180 100644 --- a/net/netfilter/xt_tcpmss.c +++ b/net/netfilter/xt_tcpmss.c @@ -31,7 +31,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_tcpmss_match_info *info = matchinfo; struct tcphdr _tcph, *th; @@ -77,7 +77,7 @@ out: return info->invert; dropit: - *hotdrop = 1; + *hotdrop = true; return 0; } diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index 46414b562a19..ca9ccdd931bc 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -42,7 +42,7 @@ tcp_find_option(u_int8_t option, unsigned int protoff, unsigned int optlen, int invert, - int *hotdrop) + bool *hotdrop) { /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ u_int8_t _opt[60 - sizeof(struct tcphdr)], *op; @@ -57,7 +57,7 @@ tcp_find_option(u_int8_t option, op = skb_header_pointer(skb, protoff + sizeof(struct tcphdr), optlen, _opt); if (op == NULL) { - *hotdrop = 1; + *hotdrop = true; return 0; } @@ -78,7 +78,7 @@ tcp_match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { struct tcphdr _tcph, *th; const struct xt_tcp *tcpinfo = matchinfo; @@ -92,7 +92,7 @@ tcp_match(const struct sk_buff *skb, */ if (offset == 1) { duprintf("Dropping evil TCP offset=1 frag.\n"); - *hotdrop = 1; + *hotdrop = true; } /* Must not be a fragment. */ return 0; @@ -105,7 +105,7 @@ tcp_match(const struct sk_buff *skb, /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ duprintf("Dropping evil TCP offset=0 tinygram.\n"); - *hotdrop = 1; + *hotdrop = true; return 0; } @@ -123,7 +123,7 @@ tcp_match(const struct sk_buff *skb, return 0; if (tcpinfo->option) { if (th->doff * 4 < sizeof(_tcph)) { - *hotdrop = 1; + *hotdrop = true; return 0; } if (!tcp_find_option(tcpinfo->option, skb, protoff, @@ -157,7 +157,7 @@ udp_match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { struct udphdr _udph, *uh; const struct xt_udp *udpinfo = matchinfo; @@ -171,7 +171,7 @@ udp_match(const struct sk_buff *skb, /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ duprintf("Dropping evil UDP tinygram.\n"); - *hotdrop = 1; + *hotdrop = true; return 0; } -- cgit v1.2.3 From 1d93a9cbad608f6398ba6c5b588c504ccd35a2ca Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 7 Jul 2007 22:15:35 -0700 Subject: [NETFILTER]: x_tables: switch xt_match->match to bool Switch the return type of match functions to boolean Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 16 +++++++------- net/ipv4/netfilter/ip_tables.c | 26 +++++++++++----------- net/ipv4/netfilter/ipt_addrtype.c | 12 +++++------ net/ipv4/netfilter/ipt_ah.c | 10 ++++----- net/ipv4/netfilter/ipt_ecn.c | 38 ++++++++++++++++---------------- net/ipv4/netfilter/ipt_iprange.c | 8 +++---- net/ipv4/netfilter/ipt_owner.c | 10 ++++----- net/ipv4/netfilter/ipt_recent.c | 12 +++++------ net/ipv4/netfilter/ipt_tos.c | 2 +- net/ipv4/netfilter/ipt_ttl.c | 12 +++++------ net/ipv6/netfilter/ip6_tables.c | 42 ++++++++++++++++++------------------ net/ipv6/netfilter/ip6t_ah.c | 12 +++++------ net/ipv6/netfilter/ip6t_eui64.c | 8 +++---- net/ipv6/netfilter/ip6t_frag.c | 12 +++++------ net/ipv6/netfilter/ip6t_hbh.c | 18 ++++++++-------- net/ipv6/netfilter/ip6t_hl.c | 12 +++++------ net/ipv6/netfilter/ip6t_ipv6header.c | 6 +++--- net/ipv6/netfilter/ip6t_mh.c | 17 ++++++--------- net/ipv6/netfilter/ip6t_owner.c | 10 ++++----- net/ipv6/netfilter/ip6t_rt.c | 26 +++++++++++----------- net/netfilter/xt_comment.c | 4 ++-- net/netfilter/xt_connbytes.c | 4 ++-- net/netfilter/xt_connmark.c | 4 ++-- net/netfilter/xt_conntrack.c | 24 ++++++++++----------- net/netfilter/xt_dccp.c | 22 +++++++++---------- net/netfilter/xt_dscp.c | 32 +++++++++++++-------------- net/netfilter/xt_esp.c | 12 +++++------ net/netfilter/xt_hashlimit.c | 17 ++++++++------- net/netfilter/xt_helper.c | 6 +++--- net/netfilter/xt_length.c | 4 ++-- net/netfilter/xt_limit.c | 6 +++--- net/netfilter/xt_mac.c | 2 +- net/netfilter/xt_mark.c | 2 +- net/netfilter/xt_multiport.c | 34 ++++++++++++++--------------- net/netfilter/xt_physdev.c | 34 ++++++++++++++--------------- net/netfilter/xt_pkttype.c | 2 +- net/netfilter/xt_policy.c | 26 +++++++++++----------- net/netfilter/xt_quota.c | 6 +++--- net/netfilter/xt_realm.c | 2 +- net/netfilter/xt_sctp.c | 26 +++++++++++----------- net/netfilter/xt_state.c | 2 +- net/netfilter/xt_statistic.c | 8 +++---- net/netfilter/xt_string.c | 16 +++++++------- net/netfilter/xt_tcpmss.c | 4 ++-- net/netfilter/xt_tcpudp.c | 39 ++++++++++++++++----------------- 45 files changed, 320 insertions(+), 327 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index b8577d18d10d..304fce356a43 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -141,14 +141,14 @@ struct xt_match /* Arguments changed since 2.6.9, as this must now handle non-linear skb, using skb_header_pointer and skb_ip_make_writable. */ - int (*match)(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop); + bool (*match)(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + bool *hotdrop); /* Called when user tries to insert an entry of this type. */ /* Should return true or false. */ diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index e2a893825656..b9c792dd4890 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -183,19 +183,19 @@ ipt_error(struct sk_buff **pskb, } static inline -int do_match(struct ipt_entry_match *m, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int offset, - bool *hotdrop) +bool do_match(struct ipt_entry_match *m, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int offset, + bool *hotdrop) { /* Stop iteration if it doesn't match */ if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data, offset, ip_hdrlen(skb), hotdrop)) - return 1; + return true; else - return 0; + return false; } static inline struct ipt_entry * @@ -2105,16 +2105,16 @@ void ipt_unregister_table(struct xt_table *table) } /* Returns 1 if the type and code is matched by the range, 0 otherwise */ -static inline int +static inline bool icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, u_int8_t type, u_int8_t code, - int invert) + bool invert) { return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code)) ^ invert; } -static int +static bool icmp_match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -2129,7 +2129,7 @@ icmp_match(const struct sk_buff *skb, /* Must not be a fragment. */ if (offset) - return 0; + return false; ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph); if (ic == NULL) { @@ -2138,7 +2138,7 @@ icmp_match(const struct sk_buff *skb, */ duprintf("Dropping evil ICMP tinygram.\n"); *hotdrop = true; - return 0; + return false; } return icmp_type_code_match(icmpinfo->type, diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index a9a9b750ff2d..abea446a4437 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c @@ -22,19 +22,19 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); MODULE_DESCRIPTION("iptables addrtype match"); -static inline int match_type(__be32 addr, u_int16_t mask) +static inline bool match_type(__be32 addr, u_int16_t mask) { return !!(mask & (1 << inet_addr_type(addr))); } -static int match(const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, bool *hotdrop) +static bool match(const struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + const struct xt_match *match, const void *matchinfo, + int offset, unsigned int protoff, bool *hotdrop) { const struct ipt_addrtype_info *info = matchinfo; const struct iphdr *iph = ip_hdr(skb); - int ret = 1; + bool ret = true; if (info->source) ret &= match_type(iph->saddr, info->source)^info->invert_source; diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index 9a244e406a48..3da39ee92d8b 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c @@ -25,10 +25,10 @@ MODULE_DESCRIPTION("iptables AH SPI match module"); #endif /* Returns 1 if the spi is matched by the range, 0 otherwise */ -static inline int -spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert) +static inline bool +spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) { - int r=0; + bool r; duprintf("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ', min,spi,max); r=(spi >= min && spi <= max) ^ invert; @@ -36,7 +36,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert) return r; } -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -51,7 +51,7 @@ match(const struct sk_buff *skb, /* Must not be a fragment. */ if (offset) - return 0; + return false; ah = skb_header_pointer(skb, protoff, sizeof(_ahdr), &_ahdr); diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index a47f3745553b..ba3a17e0f848 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -22,15 +22,15 @@ MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("iptables ECN matching module"); MODULE_LICENSE("GPL"); -static inline int match_ip(const struct sk_buff *skb, - const struct ipt_ecn_info *einfo) +static inline bool match_ip(const struct sk_buff *skb, + const struct ipt_ecn_info *einfo) { return (ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect; } -static inline int match_tcp(const struct sk_buff *skb, - const struct ipt_ecn_info *einfo, - bool *hotdrop) +static inline bool match_tcp(const struct sk_buff *skb, + const struct ipt_ecn_info *einfo, + bool *hotdrop) { struct tcphdr _tcph, *th; @@ -40,51 +40,51 @@ static inline int match_tcp(const struct sk_buff *skb, th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); if (th == NULL) { *hotdrop = false; - return 0; + return false; } if (einfo->operation & IPT_ECN_OP_MATCH_ECE) { if (einfo->invert & IPT_ECN_OP_MATCH_ECE) { if (th->ece == 1) - return 0; + return false; } else { if (th->ece == 0) - return 0; + return false; } } if (einfo->operation & IPT_ECN_OP_MATCH_CWR) { if (einfo->invert & IPT_ECN_OP_MATCH_CWR) { if (th->cwr == 1) - return 0; + return false; } else { if (th->cwr == 0) - return 0; + return false; } } - return 1; + return true; } -static int match(const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, bool *hotdrop) +static bool match(const struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + const struct xt_match *match, const void *matchinfo, + int offset, unsigned int protoff, bool *hotdrop) { const struct ipt_ecn_info *info = matchinfo; if (info->operation & IPT_ECN_OP_MATCH_IP) if (!match_ip(skb, info)) - return 0; + return false; if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) { if (ip_hdr(skb)->protocol != IPPROTO_TCP) - return 0; + return false; if (!match_tcp(skb, info, hotdrop)) - return 0; + return false; } - return 1; + return true; } static int checkentry(const char *tablename, const void *ip_void, diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c index 86f225c1d067..b266d98aac8c 100644 --- a/net/ipv4/netfilter/ipt_iprange.c +++ b/net/ipv4/netfilter/ipt_iprange.c @@ -23,7 +23,7 @@ MODULE_DESCRIPTION("iptables arbitrary IP range match module"); #define DEBUGP(format, args...) #endif -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -44,7 +44,7 @@ match(const struct sk_buff *skb, info->flags & IPRANGE_SRC_INV ? "(INV) " : "", NIPQUAD(info->src.min_ip), NIPQUAD(info->src.max_ip)); - return 0; + return false; } } if (info->flags & IPRANGE_DST) { @@ -57,10 +57,10 @@ match(const struct sk_buff *skb, info->flags & IPRANGE_DST_INV ? "(INV) " : "", NIPQUAD(info->dst.min_ip), NIPQUAD(info->dst.max_ip)); - return 0; + return false; } } - return 1; + return true; } static struct xt_match iprange_match = { diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c index 92be562c4aca..8f441cef5504 100644 --- a/net/ipv4/netfilter/ipt_owner.c +++ b/net/ipv4/netfilter/ipt_owner.c @@ -21,7 +21,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher "); MODULE_DESCRIPTION("iptables owner match"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -34,21 +34,21 @@ match(const struct sk_buff *skb, const struct ipt_owner_info *info = matchinfo; if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file) - return 0; + return false; if(info->match & IPT_OWNER_UID) { if ((skb->sk->sk_socket->file->f_uid != info->uid) ^ !!(info->invert & IPT_OWNER_UID)) - return 0; + return false; } if(info->match & IPT_OWNER_GID) { if ((skb->sk->sk_socket->file->f_gid != info->gid) ^ !!(info->invert & IPT_OWNER_GID)) - return 0; + return false; } - return 1; + return true; } static int diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 81f1a017f311..2e513ed9b6e9 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -169,7 +169,7 @@ static void recent_table_flush(struct recent_table *t) } } -static int +static bool ipt_recent_match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct xt_match *match, const void *matchinfo, @@ -180,7 +180,7 @@ ipt_recent_match(const struct sk_buff *skb, struct recent_entry *e; __be32 addr; u_int8_t ttl; - int ret = info->invert; + bool ret = info->invert; if (info->side == IPT_RECENT_DEST) addr = ip_hdr(skb)->daddr; @@ -202,15 +202,15 @@ ipt_recent_match(const struct sk_buff *skb, e = recent_entry_init(t, addr, ttl); if (e == NULL) *hotdrop = true; - ret ^= 1; + ret = !ret; goto out; } if (info->check_set & IPT_RECENT_SET) - ret ^= 1; + ret = !ret; else if (info->check_set & IPT_RECENT_REMOVE) { recent_entry_remove(t, e); - ret ^= 1; + ret = !ret; } else if (info->check_set & (IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) { unsigned long t = jiffies - info->seconds * HZ; unsigned int i, hits = 0; @@ -219,7 +219,7 @@ ipt_recent_match(const struct sk_buff *skb, if (info->seconds && time_after(t, e->stamps[i])) continue; if (++hits >= info->hit_count) { - ret ^= 1; + ret = !ret; break; } } diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c index 803ed4c35b55..67699ae46d37 100644 --- a/net/ipv4/netfilter/ipt_tos.c +++ b/net/ipv4/netfilter/ipt_tos.c @@ -18,7 +18,7 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("iptables TOS match module"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c index e7316b27d2c5..82fe4ea8ab79 100644 --- a/net/ipv4/netfilter/ipt_ttl.c +++ b/net/ipv4/netfilter/ipt_ttl.c @@ -18,10 +18,10 @@ MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("IP tables TTL matching module"); MODULE_LICENSE("GPL"); -static int match(const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, bool *hotdrop) +static bool match(const struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + const struct xt_match *match, const void *matchinfo, + int offset, unsigned int protoff, bool *hotdrop) { const struct ipt_ttl_info *info = matchinfo; const u8 ttl = ip_hdr(skb)->ttl; @@ -42,10 +42,10 @@ static int match(const struct sk_buff *skb, default: printk(KERN_WARNING "ipt_ttl: unknown mode %d\n", info->mode); - return 0; + return false; } - return 0; + return false; } static struct xt_match ttl_match = { diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 13c66a75c21c..31f42e82184a 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -96,7 +96,7 @@ ip6t_ext_hdr(u8 nexthdr) } /* Returns whether matches rule or not. */ -static inline int +static inline bool ip6_packet_match(const struct sk_buff *skb, const char *indev, const char *outdev, @@ -122,7 +122,7 @@ ip6_packet_match(const struct sk_buff *skb, dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr, ipinfo->dmsk.s_addr, ipinfo->dst.s_addr, ipinfo->invflags & IP6T_INV_DSTIP ? " (INV)" : "");*/ - return 0; + return false; } /* Look for ifname matches; this should unroll nicely. */ @@ -136,7 +136,7 @@ ip6_packet_match(const struct sk_buff *skb, dprintf("VIA in mismatch (%s vs %s).%s\n", indev, ip6info->iniface, ip6info->invflags&IP6T_INV_VIA_IN ?" (INV)":""); - return 0; + return false; } for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { @@ -149,7 +149,7 @@ ip6_packet_match(const struct sk_buff *skb, dprintf("VIA out mismatch (%s vs %s).%s\n", outdev, ip6info->outiface, ip6info->invflags&IP6T_INV_VIA_OUT ?" (INV)":""); - return 0; + return false; } /* ... might want to do something with class and flowlabel here ... */ @@ -163,7 +163,7 @@ ip6_packet_match(const struct sk_buff *skb, if (protohdr < 0) { if (_frag_off == 0) *hotdrop = true; - return 0; + return false; } *fragoff = _frag_off; @@ -174,17 +174,17 @@ ip6_packet_match(const struct sk_buff *skb, if (ip6info->proto == protohdr) { if(ip6info->invflags & IP6T_INV_PROTO) { - return 0; + return false; } - return 1; + return true; } /* We need match for the '-p all', too! */ if ((ip6info->proto != 0) && !(ip6info->invflags & IP6T_INV_PROTO)) - return 0; + return false; } - return 1; + return true; } /* should be ip6 safe */ @@ -219,20 +219,20 @@ ip6t_error(struct sk_buff **pskb, } static inline -int do_match(struct ip6t_entry_match *m, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int offset, - unsigned int protoff, - bool *hotdrop) +bool do_match(struct ip6t_entry_match *m, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int offset, + unsigned int protoff, + bool *hotdrop) { /* Stop iteration if it doesn't match */ if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data, offset, protoff, hotdrop)) - return 1; + return true; else - return 0; + return false; } static inline struct ip6t_entry * @@ -1291,7 +1291,7 @@ icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, ^ invert; } -static int +static bool icmp6_match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -1306,7 +1306,7 @@ icmp6_match(const struct sk_buff *skb, /* Must not be a fragment. */ if (offset) - return 0; + return false; ic = skb_header_pointer(skb, protoff, sizeof(_icmp), &_icmp); if (ic == NULL) { @@ -1314,7 +1314,7 @@ icmp6_match(const struct sk_buff *skb, can't. Hence, no choice but to drop. */ duprintf("Dropping evil ICMP tinygram.\n"); *hotdrop = true; - return 0; + return false; } return icmp6_type_code_match(icmpinfo->type, diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index 27b7bd279c0e..607c2eb1296f 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -30,10 +30,10 @@ MODULE_AUTHOR("Andras Kis-Szabo "); #endif /* Returns 1 if the spi is matched by the range, 0 otherwise */ -static inline int -spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert) +static inline bool +spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) { - int r=0; + bool r; DEBUGP("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ', min,spi,max); r = (spi >= min && spi <= max) ^ invert; @@ -41,7 +41,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert) return r; } -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -61,13 +61,13 @@ match(const struct sk_buff *skb, if (err < 0) { if (err != -ENOENT) *hotdrop = true; - return 0; + return false; } ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah); if (ah == NULL) { *hotdrop = true; - return 0; + return false; } hdrlen = (ah->hdrlen + 2) << 2; diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c index 69e79e19040e..bebb12a1d0e6 100644 --- a/net/ipv6/netfilter/ip6t_eui64.c +++ b/net/ipv6/netfilter/ip6t_eui64.c @@ -19,7 +19,7 @@ MODULE_DESCRIPTION("IPv6 EUI64 address checking match"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Andras Kis-Szabo "); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -36,7 +36,7 @@ match(const struct sk_buff *skb, (skb_mac_header(skb) + ETH_HLEN) <= skb->data) && offset != 0) { *hotdrop = true; - return 0; + return false; } memset(eui64, 0, sizeof(eui64)); @@ -55,11 +55,11 @@ match(const struct sk_buff *skb, i++; if (i == 8) - return 1; + return true; } } - return 0; + return false; } static struct xt_match eui64_match = { diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index 740fdcafa5f3..0ed5fbcf1f18 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -29,10 +29,10 @@ MODULE_AUTHOR("Andras Kis-Szabo "); #endif /* Returns 1 if the id is matched by the range, 0 otherwise */ -static inline int -id_match(u_int32_t min, u_int32_t max, u_int32_t id, int invert) +static inline bool +id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) { - int r = 0; + bool r; DEBUGP("frag id_match:%c 0x%x <= 0x%x <= 0x%x", invert ? '!' : ' ', min, id, max); r = (id >= min && id <= max) ^ invert; @@ -40,7 +40,7 @@ id_match(u_int32_t min, u_int32_t max, u_int32_t id, int invert) return r; } -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -59,13 +59,13 @@ match(const struct sk_buff *skb, if (err < 0) { if (err != -ENOENT) *hotdrop = true; - return 0; + return false; } fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); if (fh == NULL) { *hotdrop = true; - return 0; + return false; } DEBUGP("INFO %04X ", fh->frag_off); diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 5633de160c6d..4b05393faa68 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -47,7 +47,7 @@ MODULE_ALIAS("ip6t_dst"); * 5 -> RTALERT 2 x x */ -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -62,7 +62,7 @@ match(const struct sk_buff *skb, unsigned int temp; unsigned int ptr; unsigned int hdrlen = 0; - unsigned int ret = 0; + bool ret = false; u8 _opttype, *tp = NULL; u8 _optlen, *lp = NULL; unsigned int optlen; @@ -72,19 +72,19 @@ match(const struct sk_buff *skb, if (err < 0) { if (err != -ENOENT) *hotdrop = true; - return 0; + return false; } oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); if (oh == NULL) { *hotdrop = true; - return 0; + return false; } hdrlen = ipv6_optlen(oh); if (skb->len - ptr < hdrlen) { /* Packet smaller than it's length field */ - return 0; + return false; } DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); @@ -123,7 +123,7 @@ match(const struct sk_buff *skb, DEBUGP("Tbad %02X %02X\n", *tp, (optinfo->opts[temp] & 0xFF00) >> 8); - return 0; + return false; } else { DEBUGP("Tok "); } @@ -144,7 +144,7 @@ match(const struct sk_buff *skb, if (spec_len != 0x00FF && spec_len != *lp) { DEBUGP("Lbad %02X %04X\n", *lp, spec_len); - return 0; + return false; } DEBUGP("Lok "); optlen = *lp + 2; @@ -167,10 +167,10 @@ match(const struct sk_buff *skb, if (temp == optinfo->optsnr) return ret; else - return 0; + return false; } - return 0; + return false; } /* Called when user tries to insert an entry of this type. */ diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c index cbf49cffa067..b933e84a06a4 100644 --- a/net/ipv6/netfilter/ip6t_hl.c +++ b/net/ipv6/netfilter/ip6t_hl.c @@ -19,10 +19,10 @@ MODULE_AUTHOR("Maciej Soltysiak "); MODULE_DESCRIPTION("IP tables Hop Limit matching module"); MODULE_LICENSE("GPL"); -static int match(const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, bool *hotdrop) +static bool match(const struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + const struct xt_match *match, const void *matchinfo, + int offset, unsigned int protoff, bool *hotdrop) { const struct ip6t_hl_info *info = matchinfo; const struct ipv6hdr *ip6h = ipv6_hdr(skb); @@ -43,10 +43,10 @@ static int match(const struct sk_buff *skb, default: printk(KERN_WARNING "ip6t_hl: unknown mode %d\n", info->mode); - return 0; + return false; } - return 0; + return false; } static struct xt_match hl_match = { diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 469dec27c649..3222e8959426 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -26,7 +26,7 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IPv6 headers match"); MODULE_AUTHOR("Andras Kis-Szabo "); -static int +static bool ipv6header_match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -58,7 +58,7 @@ ipv6header_match(const struct sk_buff *skb, /* Is there enough space for the next ext header? */ if (len < (int)sizeof(struct ipv6_opt_hdr)) - return 0; + return false; /* No more exthdr -> evaluate */ if (nexthdr == NEXTHDR_NONE) { temp |= MASK_NONE; @@ -99,7 +99,7 @@ ipv6header_match(const struct sk_buff *skb, temp |= MASK_DSTOPTS; break; default: - return 0; + return false; break; } diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c index c27647b6c274..ddffe03a8b37 100644 --- a/net/ipv6/netfilter/ip6t_mh.c +++ b/net/ipv6/netfilter/ip6t_mh.c @@ -31,16 +31,13 @@ MODULE_LICENSE("GPL"); #endif /* Returns 1 if the type is matched by the range, 0 otherwise */ -static inline int -type_match(u_int8_t min, u_int8_t max, u_int8_t type, int invert) +static inline bool +type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert) { - int ret; - - ret = (type >= min && type <= max) ^ invert; - return ret; + return (type >= min && type <= max) ^ invert; } -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -55,7 +52,7 @@ match(const struct sk_buff *skb, /* Must not be a fragment. */ if (offset) - return 0; + return false; mh = skb_header_pointer(skb, protoff, sizeof(_mh), &_mh); if (mh == NULL) { @@ -63,14 +60,14 @@ match(const struct sk_buff *skb, can't. Hence, no choice but to drop. */ duprintf("Dropping evil MH tinygram.\n"); *hotdrop = true; - return 0; + return false; } if (mh->ip6mh_proto != IPPROTO_NONE) { duprintf("Dropping invalid MH Payload Proto: %u\n", mh->ip6mh_proto); *hotdrop = true; - return 0; + return false; } return type_match(mhinfo->types[0], mhinfo->types[1], mh->ip6mh_type, diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index f90f7c32cc9e..cadd0a64fed7 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c @@ -23,7 +23,7 @@ MODULE_DESCRIPTION("IP6 tables owner matching module"); MODULE_LICENSE("GPL"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -36,21 +36,21 @@ match(const struct sk_buff *skb, const struct ip6t_owner_info *info = matchinfo; if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file) - return 0; + return false; if (info->match & IP6T_OWNER_UID) { if ((skb->sk->sk_socket->file->f_uid != info->uid) ^ !!(info->invert & IP6T_OWNER_UID)) - return 0; + return false; } if (info->match & IP6T_OWNER_GID) { if ((skb->sk->sk_socket->file->f_gid != info->gid) ^ !!(info->invert & IP6T_OWNER_GID)) - return 0; + return false; } - return 1; + return true; } static int diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 2bb88214cfda..7966f4a5e9b7 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -31,10 +31,10 @@ MODULE_AUTHOR("Andras Kis-Szabo "); #endif /* Returns 1 if the id is matched by the range, 0 otherwise */ -static inline int -segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, int invert) +static inline bool +segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) { - int r = 0; + bool r; DEBUGP("rt segsleft_match:%c 0x%x <= 0x%x <= 0x%x", invert ? '!' : ' ', min, id, max); r = (id >= min && id <= max) ^ invert; @@ -42,7 +42,7 @@ segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, int invert) return r; } -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -57,7 +57,7 @@ match(const struct sk_buff *skb, unsigned int temp; unsigned int ptr; unsigned int hdrlen = 0; - unsigned int ret = 0; + bool ret = false; struct in6_addr *ap, _addr; int err; @@ -65,19 +65,19 @@ match(const struct sk_buff *skb, if (err < 0) { if (err != -ENOENT) *hotdrop = true; - return 0; + return false; } rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route); if (rh == NULL) { *hotdrop = true; - return 0; + return false; } hdrlen = ipv6_optlen(rh); if (skb->len - ptr < hdrlen) { /* Pcket smaller than its length field */ - return 0; + return false; } DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen); @@ -136,7 +136,7 @@ match(const struct sk_buff *skb, DEBUGP("Not strict "); if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) { DEBUGP("There isn't enough space\n"); - return 0; + return false; } else { unsigned int i = 0; @@ -164,13 +164,13 @@ match(const struct sk_buff *skb, if (i == rtinfo->addrnr) return ret; else - return 0; + return false; } } else { DEBUGP("Strict "); if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) { DEBUGP("There isn't enough space\n"); - return 0; + return false; } else { DEBUGP("#%d ", rtinfo->addrnr); for (temp = 0; temp < rtinfo->addrnr; temp++) { @@ -190,11 +190,11 @@ match(const struct sk_buff *skb, (temp == (unsigned int)((hdrlen - 8) / 16))) return ret; else - return 0; + return false; } } - return 0; + return false; } /* Called when user tries to insert an entry of this type. */ diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c index 20690ea0d466..aa9503ff90ba 100644 --- a/net/netfilter/xt_comment.c +++ b/net/netfilter/xt_comment.c @@ -15,7 +15,7 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_comment"); MODULE_ALIAS("ip6t_comment"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -26,7 +26,7 @@ match(const struct sk_buff *skb, bool *hotdrop) { /* We always match */ - return 1; + return true; } static struct xt_match xt_comment_match[] = { diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 8fe5775901e1..aada7b797549 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -15,7 +15,7 @@ MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection"); MODULE_ALIAS("ipt_connbytes"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -35,7 +35,7 @@ match(const struct sk_buff *skb, ct = nf_ct_get(skb, &ctinfo); if (!ct) - return 0; + return false; counters = ct->counters; switch (sinfo->what) { diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 8a6d58ab5d2b..3321b80aff4f 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -30,7 +30,7 @@ MODULE_DESCRIPTION("IP tables connmark match module"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_connmark"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -46,7 +46,7 @@ match(const struct sk_buff *skb, ct = nf_ct_get(skb, &ctinfo); if (!ct) - return 0; + return false; return (((ct->mark) & info->mask) == info->mark) ^ info->invert; } diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 915c730d3b72..26901f95bf4b 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -19,7 +19,7 @@ MODULE_AUTHOR("Marc Boucher "); MODULE_DESCRIPTION("iptables connection tracking match module"); MODULE_ALIAS("ipt_conntrack"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -54,53 +54,53 @@ match(const struct sk_buff *skb, } if (FWINV((statebit & sinfo->statemask) == 0, XT_CONNTRACK_STATE)) - return 0; + return false; } if (ct == NULL) { if (sinfo->flags & ~XT_CONNTRACK_STATE) - return 0; - return 1; + return false; + return true; } if (sinfo->flags & XT_CONNTRACK_PROTO && FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, XT_CONNTRACK_PROTO)) - return 0; + return false; if (sinfo->flags & XT_CONNTRACK_ORIGSRC && FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip & sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, XT_CONNTRACK_ORIGSRC)) - return 0; + return false; if (sinfo->flags & XT_CONNTRACK_ORIGDST && FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip & sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, XT_CONNTRACK_ORIGDST)) - return 0; + return false; if (sinfo->flags & XT_CONNTRACK_REPLSRC && FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip & sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, XT_CONNTRACK_REPLSRC)) - return 0; + return false; if (sinfo->flags & XT_CONNTRACK_REPLDST && FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip & sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, XT_CONNTRACK_REPLDST)) - return 0; + return false; if (sinfo->flags & XT_CONNTRACK_STATUS && FWINV((ct->status & sinfo->statusmask) == 0, XT_CONNTRACK_STATUS)) - return 0; + return false; if(sinfo->flags & XT_CONNTRACK_EXPIRES) { unsigned long expires = timer_pending(&ct->timeout) ? @@ -109,9 +109,9 @@ match(const struct sk_buff *skb, if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), XT_CONNTRACK_EXPIRES)) - return 0; + return false; } - return 1; + return true; } static int diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index 3172e7308b35..b0eba4e2c53f 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c @@ -31,7 +31,7 @@ MODULE_ALIAS("ipt_dccp"); static unsigned char *dccp_optbuf; static DEFINE_SPINLOCK(dccp_buflock); -static inline int +static inline bool dccp_find_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff, @@ -46,11 +46,11 @@ dccp_find_option(u_int8_t option, if (dh->dccph_doff * 4 < __dccp_hdr_len(dh)) { *hotdrop = true; - return 0; + return false; } if (!optlen) - return 0; + return false; spin_lock_bh(&dccp_buflock); op = skb_header_pointer(skb, protoff + optoff, optlen, dccp_optbuf); @@ -58,13 +58,13 @@ dccp_find_option(u_int8_t option, /* If we don't have the whole header, drop packet. */ spin_unlock_bh(&dccp_buflock); *hotdrop = true; - return 0; + return false; } for (i = 0; i < optlen; ) { if (op[i] == option) { spin_unlock_bh(&dccp_buflock); - return 1; + return true; } if (op[i] < 2) @@ -74,24 +74,24 @@ dccp_find_option(u_int8_t option, } spin_unlock_bh(&dccp_buflock); - return 0; + return false; } -static inline int +static inline bool match_types(const struct dccp_hdr *dh, u_int16_t typemask) { return (typemask & (1 << dh->dccph_type)); } -static inline int +static inline bool match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff, const struct dccp_hdr *dh, bool *hotdrop) { return dccp_find_option(option, skb, protoff, dh, hotdrop); } -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -105,12 +105,12 @@ match(const struct sk_buff *skb, struct dccp_hdr _dh, *dh; if (offset) - return 0; + return false; dh = skb_header_pointer(skb, protoff, sizeof(_dh), &_dh); if (dh == NULL) { *hotdrop = true; - return 0; + return false; } return DCCHECK(((ntohs(dh->dccph_sport) >= info->spts[0]) diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c index c106d738da6d..c9c6518907a2 100644 --- a/net/netfilter/xt_dscp.c +++ b/net/netfilter/xt_dscp.c @@ -22,22 +22,7 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_dscp"); MODULE_ALIAS("ip6t_dscp"); -static int match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) -{ - const struct xt_dscp_info *info = matchinfo; - u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; - - return (dscp == info->dscp) ^ !!info->invert; -} - -static int match6(const struct sk_buff *skb, +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct xt_match *match, @@ -45,6 +30,21 @@ static int match6(const struct sk_buff *skb, int offset, unsigned int protoff, bool *hotdrop) +{ + const struct xt_dscp_info *info = matchinfo; + u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; + + return (dscp == info->dscp) ^ !!info->invert; +} + +static bool match6(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + bool *hotdrop) { const struct xt_dscp_info *info = matchinfo; u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c index 5d3421bcd850..1a945cb7c359 100644 --- a/net/netfilter/xt_esp.c +++ b/net/netfilter/xt_esp.c @@ -31,10 +31,10 @@ MODULE_ALIAS("ip6t_esp"); #endif /* Returns 1 if the spi is matched by the range, 0 otherwise */ -static inline int -spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert) +static inline bool +spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) { - int r = 0; + bool r; duprintf("esp spi_match:%c 0x%x <= 0x%x <= 0x%x", invert ? '!' : ' ', min, spi, max); r = (spi >= min && spi <= max) ^ invert; @@ -42,7 +42,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert) return r; } -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -57,7 +57,7 @@ match(const struct sk_buff *skb, /* Must not be a fragment. */ if (offset) - return 0; + return false; eh = skb_header_pointer(skb, protoff, sizeof(_esp), &_esp); if (eh == NULL) { @@ -66,7 +66,7 @@ match(const struct sk_buff *skb, */ duprintf("Dropping evil ESP tinygram.\n"); *hotdrop = true; - return 0; + return false; } return spi_match(espinfo->spis[0], espinfo->spis[1], ntohl(eh->spi), diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index cd5cba6978c3..21597b755cea 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -94,7 +94,8 @@ static DEFINE_MUTEX(hlimit_mutex); /* additional checkentry protection */ static HLIST_HEAD(hashlimit_htables); static struct kmem_cache *hashlimit_cachep __read_mostly; -static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b) +static inline bool dst_cmp(const struct dsthash_ent *ent, + struct dsthash_dst *b) { return !memcmp(&ent->dst, b, sizeof(ent->dst)); } @@ -227,18 +228,18 @@ static int htable_create(struct xt_hashlimit_info *minfo, int family) return 0; } -static int select_all(struct xt_hashlimit_htable *ht, struct dsthash_ent *he) +static bool select_all(struct xt_hashlimit_htable *ht, struct dsthash_ent *he) { return 1; } -static int select_gc(struct xt_hashlimit_htable *ht, struct dsthash_ent *he) +static bool select_gc(struct xt_hashlimit_htable *ht, struct dsthash_ent *he) { return (jiffies >= he->expires); } static void htable_selective_cleanup(struct xt_hashlimit_htable *ht, - int (*select)(struct xt_hashlimit_htable *ht, + bool (*select)(struct xt_hashlimit_htable *ht, struct dsthash_ent *he)) { unsigned int i; @@ -432,7 +433,7 @@ hashlimit_init_dst(struct xt_hashlimit_htable *hinfo, struct dsthash_dst *dst, return 0; } -static int +static bool hashlimit_match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -478,17 +479,17 @@ hashlimit_match(const struct sk_buff *skb, /* We're underlimit. */ dh->rateinfo.credit -= dh->rateinfo.cost; spin_unlock_bh(&hinfo->lock); - return 1; + return true; } spin_unlock_bh(&hinfo->lock); /* default case: we're overlimit, thus don't match */ - return 0; + return false; hotdrop: *hotdrop = true; - return 0; + return false; } static int diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index 0aa090776e27..10c629b34abf 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -28,7 +28,7 @@ MODULE_ALIAS("ip6t_helper"); #define DEBUGP(format, args...) #endif -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -42,7 +42,7 @@ match(const struct sk_buff *skb, struct nf_conn *ct; struct nf_conn_help *master_help; enum ip_conntrack_info ctinfo; - int ret = info->invert; + bool ret = info->invert; ct = nf_ct_get((struct sk_buff *)skb, &ctinfo); if (!ct) { @@ -67,7 +67,7 @@ match(const struct sk_buff *skb, ct->master->helper->name, info->name); if (info->name[0] == '\0') - ret ^= 1; + ret = !ret; else ret ^= !strncmp(master_help->helper->name, info->name, strlen(master_help->helper->name)); diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c index 621c9ee6d1c9..57bcfacde594 100644 --- a/net/netfilter/xt_length.c +++ b/net/netfilter/xt_length.c @@ -20,7 +20,7 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_length"); MODULE_ALIAS("ip6t_length"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -36,7 +36,7 @@ match(const struct sk_buff *skb, return (pktlen >= info->min && pktlen <= info->max) ^ info->invert; } -static int +static bool match6(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 1133b4ca4904..0cfe241a0493 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -57,7 +57,7 @@ static DEFINE_SPINLOCK(limit_lock); #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) -static int +static bool ipt_limit_match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -79,11 +79,11 @@ ipt_limit_match(const struct sk_buff *skb, /* We're not limited. */ r->credit -= r->cost; spin_unlock_bh(&limit_lock); - return 1; + return true; } spin_unlock_bh(&limit_lock); - return 0; + return false; } /* Precision saver. */ diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c index 0e6a28647206..86022027dd63 100644 --- a/net/netfilter/xt_mac.c +++ b/net/netfilter/xt_mac.c @@ -24,7 +24,7 @@ MODULE_DESCRIPTION("iptables mac matching module"); MODULE_ALIAS("ipt_mac"); MODULE_ALIAS("ip6t_mac"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index 944d1ea56029..10c6799cd56a 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c @@ -19,7 +19,7 @@ MODULE_DESCRIPTION("iptables mark matching module"); MODULE_ALIAS("ipt_mark"); MODULE_ALIAS("ip6t_mark"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c index 1dc53ded9887..55feb3d737d4 100644 --- a/net/netfilter/xt_multiport.c +++ b/net/netfilter/xt_multiport.c @@ -33,24 +33,24 @@ MODULE_ALIAS("ip6t_multiport"); #endif /* Returns 1 if the port is matched by the test, 0 otherwise. */ -static inline int +static inline bool ports_match(const u_int16_t *portlist, enum xt_multiport_flags flags, u_int8_t count, u_int16_t src, u_int16_t dst) { unsigned int i; for (i = 0; i < count; i++) { if (flags != XT_MULTIPORT_DESTINATION && portlist[i] == src) - return 1; + return true; if (flags != XT_MULTIPORT_SOURCE && portlist[i] == dst) - return 1; + return true; } - return 0; + return false; } /* Returns 1 if the port is matched by the test, 0 otherwise. */ -static inline int +static inline bool ports_match_v1(const struct xt_multiport_v1 *minfo, u_int16_t src, u_int16_t dst) { @@ -67,34 +67,34 @@ ports_match_v1(const struct xt_multiport_v1 *minfo, if (minfo->flags == XT_MULTIPORT_SOURCE && src >= s && src <= e) - return 1 ^ minfo->invert; + return true ^ minfo->invert; if (minfo->flags == XT_MULTIPORT_DESTINATION && dst >= s && dst <= e) - return 1 ^ minfo->invert; + return true ^ minfo->invert; if (minfo->flags == XT_MULTIPORT_EITHER && ((dst >= s && dst <= e) || (src >= s && src <= e))) - return 1 ^ minfo->invert; + return true ^ minfo->invert; } else { /* exact port matching */ duprintf("src or dst matches with %d?\n", s); if (minfo->flags == XT_MULTIPORT_SOURCE && src == s) - return 1 ^ minfo->invert; + return true ^ minfo->invert; if (minfo->flags == XT_MULTIPORT_DESTINATION && dst == s) - return 1 ^ minfo->invert; + return true ^ minfo->invert; if (minfo->flags == XT_MULTIPORT_EITHER && (src == s || dst == s)) - return 1 ^ minfo->invert; + return true ^ minfo->invert; } } return minfo->invert; } -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -108,7 +108,7 @@ match(const struct sk_buff *skb, const struct xt_multiport *multiinfo = matchinfo; if (offset) - return 0; + return false; pptr = skb_header_pointer(skb, protoff, sizeof(_ports), _ports); if (pptr == NULL) { @@ -117,7 +117,7 @@ match(const struct sk_buff *skb, */ duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n"); *hotdrop = true; - return 0; + return false; } return ports_match(multiinfo->ports, @@ -125,7 +125,7 @@ match(const struct sk_buff *skb, ntohs(pptr[0]), ntohs(pptr[1])); } -static int +static bool match_v1(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -139,7 +139,7 @@ match_v1(const struct sk_buff *skb, const struct xt_multiport_v1 *multiinfo = matchinfo; if (offset) - return 0; + return false; pptr = skb_header_pointer(skb, protoff, sizeof(_ports), _ports); if (pptr == NULL) { @@ -148,7 +148,7 @@ match_v1(const struct sk_buff *skb, */ duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n"); *hotdrop = true; - return 0; + return false; } return ports_match_v1(multiinfo, ntohs(pptr[0]), ntohs(pptr[1])); diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index a6de512fa840..70de6708e884 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -14,8 +14,6 @@ #include #include #include -#define MATCH 1 -#define NOMATCH 0 MODULE_LICENSE("GPL"); MODULE_AUTHOR("Bart De Schuymer "); @@ -23,7 +21,7 @@ MODULE_DESCRIPTION("iptables bridge physical device match module"); MODULE_ALIAS("ipt_physdev"); MODULE_ALIAS("ip6t_physdev"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -36,7 +34,7 @@ match(const struct sk_buff *skb, int i; static const char nulldevname[IFNAMSIZ]; const struct xt_physdev_info *info = matchinfo; - unsigned int ret; + bool ret; const char *indev, *outdev; struct nf_bridge_info *nf_bridge; @@ -47,58 +45,58 @@ match(const struct sk_buff *skb, /* Return MATCH if the invert flags of the used options are on */ if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) && !(info->invert & XT_PHYSDEV_OP_BRIDGED)) - return NOMATCH; + return false; if ((info->bitmask & XT_PHYSDEV_OP_ISIN) && !(info->invert & XT_PHYSDEV_OP_ISIN)) - return NOMATCH; + return false; if ((info->bitmask & XT_PHYSDEV_OP_ISOUT) && !(info->invert & XT_PHYSDEV_OP_ISOUT)) - return NOMATCH; + return false; if ((info->bitmask & XT_PHYSDEV_OP_IN) && !(info->invert & XT_PHYSDEV_OP_IN)) - return NOMATCH; + return false; if ((info->bitmask & XT_PHYSDEV_OP_OUT) && !(info->invert & XT_PHYSDEV_OP_OUT)) - return NOMATCH; - return MATCH; + return false; + return true; } /* This only makes sense in the FORWARD and POSTROUTING chains */ if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) && (!!(nf_bridge->mask & BRNF_BRIDGED) ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED))) - return NOMATCH; + return false; if ((info->bitmask & XT_PHYSDEV_OP_ISIN && (!nf_bridge->physindev ^ !!(info->invert & XT_PHYSDEV_OP_ISIN))) || (info->bitmask & XT_PHYSDEV_OP_ISOUT && (!nf_bridge->physoutdev ^ !!(info->invert & XT_PHYSDEV_OP_ISOUT)))) - return NOMATCH; + return false; if (!(info->bitmask & XT_PHYSDEV_OP_IN)) goto match_outdev; indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname; - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) { + for (i = 0, ret = false; i < IFNAMSIZ/sizeof(unsigned int); i++) { ret |= (((const unsigned int *)indev)[i] ^ ((const unsigned int *)info->physindev)[i]) & ((const unsigned int *)info->in_mask)[i]; } - if ((ret == 0) ^ !(info->invert & XT_PHYSDEV_OP_IN)) - return NOMATCH; + if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN)) + return false; match_outdev: if (!(info->bitmask & XT_PHYSDEV_OP_OUT)) - return MATCH; + return true; outdev = nf_bridge->physoutdev ? nf_bridge->physoutdev->name : nulldevname; - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) { + for (i = 0, ret = false; i < IFNAMSIZ/sizeof(unsigned int); i++) { ret |= (((const unsigned int *)outdev)[i] ^ ((const unsigned int *)info->physoutdev)[i]) & ((const unsigned int *)info->out_mask)[i]; } - return (ret != 0) ^ !(info->invert & XT_PHYSDEV_OP_OUT); + return ret ^ !(info->invert & XT_PHYSDEV_OP_OUT); } static int diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c index 692581f40c5f..63239727bc22 100644 --- a/net/netfilter/xt_pkttype.c +++ b/net/netfilter/xt_pkttype.c @@ -21,7 +21,7 @@ MODULE_DESCRIPTION("IP tables match to match on linklayer packet type"); MODULE_ALIAS("ipt_pkttype"); MODULE_ALIAS("ip6t_pkttype"); -static int match(const struct sk_buff *skb, +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct xt_match *match, diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index 6878482cd527..0aa487b1f3b8 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -20,7 +20,7 @@ MODULE_AUTHOR("Patrick McHardy "); MODULE_DESCRIPTION("Xtables IPsec policy matching module"); MODULE_LICENSE("GPL"); -static inline int +static inline bool xt_addr_cmp(const union xt_policy_addr *a1, const union xt_policy_addr *m, const union xt_policy_addr *a2, unsigned short family) { @@ -30,10 +30,10 @@ xt_addr_cmp(const union xt_policy_addr *a1, const union xt_policy_addr *m, case AF_INET6: return !ipv6_masked_addr_cmp(&a1->a6, &m->a6, &a2->a6); } - return 0; + return false; } -static inline int +static inline bool match_xfrm_state(struct xfrm_state *x, const struct xt_policy_elem *e, unsigned short family) { @@ -108,14 +108,14 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info, return strict ? i == info->len : 0; } -static int match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +static bool match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + bool *hotdrop) { const struct xt_policy_info *info = matchinfo; int ret; @@ -126,9 +126,9 @@ static int match(const struct sk_buff *skb, ret = match_policy_out(skb, info, match->family); if (ret < 0) - ret = info->flags & XT_POLICY_MATCH_NONE ? 1 : 0; + ret = info->flags & XT_POLICY_MATCH_NONE ? true : false; else if (info->flags & XT_POLICY_MATCH_NONE) - ret = 0; + ret = false; return ret; } diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index 53c71ac980fc..6091347e38b3 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -16,19 +16,19 @@ MODULE_ALIAS("ip6t_quota"); static DEFINE_SPINLOCK(quota_lock); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct xt_match *match, const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) { struct xt_quota_info *q = ((struct xt_quota_info *)matchinfo)->master; - int ret = q->flags & XT_QUOTA_INVERT ? 1 : 0; + bool ret = q->flags & XT_QUOTA_INVERT; spin_lock_bh("a_lock); if (q->quota >= skb->len) { q->quota -= skb->len; - ret ^= 1; + ret = !ret; } else { /* we do not allow even small packets from now on */ q->quota = 0; diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c index 41451f57919c..ad82c132694c 100644 --- a/net/netfilter/xt_realm.c +++ b/net/netfilter/xt_realm.c @@ -21,7 +21,7 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("X_tables realm match"); MODULE_ALIAS("ipt_realm"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index e581afe89098..a118a4c71563 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -23,7 +23,7 @@ MODULE_ALIAS("ipt_sctp"); #define SCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \ || (!!((invflag) & (option)) ^ (cond))) -static int +static bool match_flags(const struct xt_sctp_flag_info *flag_info, const int flag_count, u_int8_t chunktype, @@ -37,10 +37,10 @@ match_flags(const struct xt_sctp_flag_info *flag_info, } } - return 1; + return true; } -static inline int +static inline bool match_packet(const struct sk_buff *skb, unsigned int offset, const u_int32_t *chunkmap, @@ -65,7 +65,7 @@ match_packet(const struct sk_buff *skb, if (sch == NULL || sch->length == 0) { duprintf("Dropping invalid SCTP packet.\n"); *hotdrop = true; - return 0; + return false; } duprintf("Chunk num: %d\toffset: %d\ttype: %d\tlength: %d\tflags: %x\n", @@ -80,7 +80,7 @@ match_packet(const struct sk_buff *skb, case SCTP_CHUNK_MATCH_ANY: if (match_flags(flag_info, flag_count, sch->type, sch->flags)) { - return 1; + return true; } break; @@ -94,14 +94,14 @@ match_packet(const struct sk_buff *skb, case SCTP_CHUNK_MATCH_ONLY: if (!match_flags(flag_info, flag_count, sch->type, sch->flags)) { - return 0; + return false; } break; } } else { switch (chunk_match_type) { case SCTP_CHUNK_MATCH_ONLY: - return 0; + return false; } } } while (offset < skb->len); @@ -110,16 +110,16 @@ match_packet(const struct sk_buff *skb, case SCTP_CHUNK_MATCH_ALL: return SCTP_CHUNKMAP_IS_CLEAR(chunkmap); case SCTP_CHUNK_MATCH_ANY: - return 0; + return false; case SCTP_CHUNK_MATCH_ONLY: - return 1; + return true; } /* This will never be reached, but required to stop compiler whine */ - return 0; + return false; } -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -134,14 +134,14 @@ match(const struct sk_buff *skb, if (offset) { duprintf("Dropping non-first fragment.. FIXME\n"); - return 0; + return false; } sh = skb_header_pointer(skb, protoff, sizeof(_sh), &_sh); if (sh == NULL) { duprintf("Dropping evil TCP offset=0 tinygram.\n"); *hotdrop = true; - return 0; + return false; } duprintf("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest)); diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index 74fe069fc3aa..f77f74ad5c97 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -20,7 +20,7 @@ MODULE_DESCRIPTION("ip[6]_tables connection tracking state match module"); MODULE_ALIAS("ipt_state"); MODULE_ALIAS("ip6t_state"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c index 4e5ed81e9ce1..989924f9024e 100644 --- a/net/netfilter/xt_statistic.c +++ b/net/netfilter/xt_statistic.c @@ -24,26 +24,26 @@ MODULE_ALIAS("ip6t_statistic"); static DEFINE_SPINLOCK(nth_lock); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct xt_match *match, const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) { struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo; - int ret = info->flags & XT_STATISTIC_INVERT ? 1 : 0; + bool ret = info->flags & XT_STATISTIC_INVERT; switch (info->mode) { case XT_STATISTIC_MODE_RANDOM: if ((net_random() & 0x7FFFFFFF) < info->u.random.probability) - ret ^= 1; + ret = !ret; break; case XT_STATISTIC_MODE_NTH: info = info->master; spin_lock_bh(&nth_lock); if (info->u.nth.count++ == info->u.nth.every) { info->u.nth.count = 0; - ret ^= 1; + ret = !ret; } spin_unlock_bh(&nth_lock); break; diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index 7552d8927570..3aea43d37339 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -21,14 +21,14 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_string"); MODULE_ALIAS("ip6t_string"); -static int match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +static bool match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + bool *hotdrop) { const struct xt_string_info *conf = matchinfo; struct ts_state state; diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c index 0db4f5362180..e9bfd3dd3c81 100644 --- a/net/netfilter/xt_tcpmss.c +++ b/net/netfilter/xt_tcpmss.c @@ -23,7 +23,7 @@ MODULE_AUTHOR("Marc Boucher "); MODULE_DESCRIPTION("iptables TCP MSS match module"); MODULE_ALIAS("ipt_tcpmss"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -78,7 +78,7 @@ out: dropit: *hotdrop = true; - return 0; + return false; } static struct xt_match xt_tcpmss_match[] = { diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index ca9ccdd931bc..9ecc4a5bd529 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -27,21 +27,18 @@ MODULE_ALIAS("ip6t_tcp"); /* Returns 1 if the port is matched by the range, 0 otherwise */ -static inline int -port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert) +static inline bool +port_match(u_int16_t min, u_int16_t max, u_int16_t port, bool invert) { - int ret; - - ret = (port >= min && port <= max) ^ invert; - return ret; + return (port >= min && port <= max) ^ invert; } -static int +static bool tcp_find_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff, unsigned int optlen, - int invert, + bool invert, bool *hotdrop) { /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ @@ -58,7 +55,7 @@ tcp_find_option(u_int8_t option, optlen, _opt); if (op == NULL) { *hotdrop = true; - return 0; + return false; } for (i = 0; i < optlen; ) { @@ -70,7 +67,7 @@ tcp_find_option(u_int8_t option, return invert; } -static int +static bool tcp_match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -95,7 +92,7 @@ tcp_match(const struct sk_buff *skb, *hotdrop = true; } /* Must not be a fragment. */ - return 0; + return false; } #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg)) @@ -106,33 +103,33 @@ tcp_match(const struct sk_buff *skb, can't. Hence, no choice but to drop. */ duprintf("Dropping evil TCP offset=0 tinygram.\n"); *hotdrop = true; - return 0; + return false; } if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1], ntohs(th->source), !!(tcpinfo->invflags & XT_TCP_INV_SRCPT))) - return 0; + return false; if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1], ntohs(th->dest), !!(tcpinfo->invflags & XT_TCP_INV_DSTPT))) - return 0; + return false; if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask) == tcpinfo->flg_cmp, XT_TCP_INV_FLAGS)) - return 0; + return false; if (tcpinfo->option) { if (th->doff * 4 < sizeof(_tcph)) { *hotdrop = true; - return 0; + return false; } if (!tcp_find_option(tcpinfo->option, skb, protoff, th->doff*4 - sizeof(_tcph), tcpinfo->invflags & XT_TCP_INV_OPTION, hotdrop)) - return 0; + return false; } - return 1; + return true; } /* Called when user tries to insert an entry of this type. */ @@ -149,7 +146,7 @@ tcp_checkentry(const char *tablename, return !(tcpinfo->invflags & ~XT_TCP_INV_MASK); } -static int +static bool udp_match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -164,7 +161,7 @@ udp_match(const struct sk_buff *skb, /* Must not be a fragment. */ if (offset) - return 0; + return false; uh = skb_header_pointer(skb, protoff, sizeof(_udph), &_udph); if (uh == NULL) { @@ -172,7 +169,7 @@ udp_match(const struct sk_buff *skb, can't. Hence, no choice but to drop. */ duprintf("Dropping evil UDP tinygram.\n"); *hotdrop = true; - return 0; + return false; } return port_match(udpinfo->spts[0], udpinfo->spts[1], -- cgit v1.2.3 From ccb79bdce71f2c04cfa9bfcbaf4d37e2f963d684 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 7 Jul 2007 22:16:00 -0700 Subject: [NETFILTER]: x_tables: switch xt_match->checkentry to bool Switch the return type of match functions to boolean Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 10 +++++----- net/ipv4/netfilter/ip_tables.c | 10 +++++----- net/ipv4/netfilter/ipt_ah.c | 6 +++--- net/ipv4/netfilter/ipt_ecn.c | 14 +++++++------- net/ipv4/netfilter/ipt_owner.c | 6 +++--- net/ipv4/netfilter/ipt_recent.c | 14 +++++++------- net/ipv6/netfilter/ip6_tables.c | 14 +++++++------- net/ipv6/netfilter/ip6t_ah.c | 6 +++--- net/ipv6/netfilter/ip6t_frag.c | 6 +++--- net/ipv6/netfilter/ip6t_hbh.c | 6 +++--- net/ipv6/netfilter/ip6t_ipv6header.c | 6 +++--- net/ipv6/netfilter/ip6t_mh.c | 2 +- net/ipv6/netfilter/ip6t_owner.c | 6 +++--- net/ipv6/netfilter/ip6t_rt.c | 8 ++++---- net/netfilter/xt_connbytes.c | 18 +++++++++--------- net/netfilter/xt_connmark.c | 8 ++++---- net/netfilter/xt_conntrack.c | 6 +++--- net/netfilter/xt_dccp.c | 2 +- net/netfilter/xt_dscp.c | 14 +++++++------- net/netfilter/xt_esp.c | 6 +++--- net/netfilter/xt_hashlimit.c | 16 ++++++++-------- net/netfilter/xt_helper.c | 14 +++++++------- net/netfilter/xt_limit.c | 6 +++--- net/netfilter/xt_mark.c | 6 +++--- net/netfilter/xt_multiport.c | 10 +++++----- net/netfilter/xt_physdev.c | 8 ++++---- net/netfilter/xt_policy.c | 16 ++++++++-------- net/netfilter/xt_quota.c | 6 +++--- net/netfilter/xt_sctp.c | 2 +- net/netfilter/xt_state.c | 14 +++++++------- net/netfilter/xt_statistic.c | 6 +++--- net/netfilter/xt_string.c | 20 ++++++++++---------- net/netfilter/xt_tcpudp.c | 4 ++-- 33 files changed, 148 insertions(+), 148 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 304fce356a43..5130dd60a2fc 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -152,11 +152,11 @@ struct xt_match /* Called when user tries to insert an entry of this type. */ /* Should return true or false. */ - int (*checkentry)(const char *tablename, - const void *ip, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask); + bool (*checkentry)(const char *tablename, + const void *ip, + const struct xt_match *match, + void *matchinfo, + unsigned int hook_mask); /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_match *match, void *matchinfo); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index b9c792dd4890..7962306df585 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -152,20 +152,20 @@ ip_packet_match(const struct iphdr *ip, return 1; } -static inline int +static inline bool ip_checkentry(const struct ipt_ip *ip) { if (ip->flags & ~IPT_F_MASK) { duprintf("Unknown flag bits set: %08X\n", ip->flags & ~IPT_F_MASK); - return 0; + return false; } if (ip->invflags & ~IPT_INV_MASK) { duprintf("Unknown invflag bits set: %08X\n", ip->invflags & ~IPT_INV_MASK); - return 0; + return false; } - return 1; + return true; } static unsigned int @@ -2149,7 +2149,7 @@ icmp_match(const struct sk_buff *skb, } /* Called when user tries to insert an entry of this type. */ -static int +static bool icmp_checkentry(const char *tablename, const void *info, const struct xt_match *match, diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index 3da39ee92d8b..6b5b7c9f7392 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c @@ -70,7 +70,7 @@ match(const struct sk_buff *skb, } /* Called when user tries to insert an entry of this type. */ -static int +static bool checkentry(const char *tablename, const void *ip_void, const struct xt_match *match, @@ -82,9 +82,9 @@ checkentry(const char *tablename, /* Must specify no unknown invflags */ if (ahinfo->invflags & ~IPT_AH_INV_MASK) { duprintf("ipt_ah: unknown flags %X\n", ahinfo->invflags); - return 0; + return false; } - return 1; + return true; } static struct xt_match ah_match = { diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index ba3a17e0f848..ba4f5497add3 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -87,27 +87,27 @@ static bool match(const struct sk_buff *skb, return true; } -static int checkentry(const char *tablename, const void *ip_void, - const struct xt_match *match, - void *matchinfo, unsigned int hook_mask) +static bool checkentry(const char *tablename, const void *ip_void, + const struct xt_match *match, + void *matchinfo, unsigned int hook_mask) { const struct ipt_ecn_info *info = matchinfo; const struct ipt_ip *ip = ip_void; if (info->operation & IPT_ECN_OP_MATCH_MASK) - return 0; + return false; if (info->invert & IPT_ECN_OP_MATCH_MASK) - return 0; + return false; if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) && ip->proto != IPPROTO_TCP) { printk(KERN_WARNING "ipt_ecn: can't match TCP bits in rule for" " non-tcp packets\n"); - return 0; + return false; } - return 1; + return true; } static struct xt_match ecn_match = { diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c index 8f441cef5504..deea4b8cc055 100644 --- a/net/ipv4/netfilter/ipt_owner.c +++ b/net/ipv4/netfilter/ipt_owner.c @@ -51,7 +51,7 @@ match(const struct sk_buff *skb, return true; } -static int +static bool checkentry(const char *tablename, const void *ip, const struct xt_match *match, @@ -63,9 +63,9 @@ checkentry(const char *tablename, if (info->match & (IPT_OWNER_PID|IPT_OWNER_SID|IPT_OWNER_COMM)) { printk("ipt_owner: pid, sid and command matching " "not supported anymore\n"); - return 0; + return false; } - return 1; + return true; } static struct xt_match owner_match = { diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 2e513ed9b6e9..d632e0e6ef16 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -235,7 +235,7 @@ out: return ret; } -static int +static bool ipt_recent_checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, unsigned int hook_mask) @@ -243,24 +243,24 @@ ipt_recent_checkentry(const char *tablename, const void *ip, const struct ipt_recent_info *info = matchinfo; struct recent_table *t; unsigned i; - int ret = 0; + bool ret = false; if (hweight8(info->check_set & (IPT_RECENT_SET | IPT_RECENT_REMOVE | IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) != 1) - return 0; + return false; if ((info->check_set & (IPT_RECENT_SET | IPT_RECENT_REMOVE)) && (info->seconds || info->hit_count)) - return 0; + return false; if (info->name[0] == '\0' || strnlen(info->name, IPT_RECENT_NAME_LEN) == IPT_RECENT_NAME_LEN) - return 0; + return false; mutex_lock(&recent_mutex); t = recent_table_lookup(info->name); if (t != NULL) { t->refcnt++; - ret = 1; + ret = true; goto out; } @@ -287,7 +287,7 @@ ipt_recent_checkentry(const char *tablename, const void *ip, spin_lock_bh(&recent_lock); list_add_tail(&t->list, &tables); spin_unlock_bh(&recent_lock); - ret = 1; + ret = true; out: mutex_unlock(&recent_mutex); return ret; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 31f42e82184a..7fe4d29708cb 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -188,20 +188,20 @@ ip6_packet_match(const struct sk_buff *skb, } /* should be ip6 safe */ -static inline int +static inline bool ip6_checkentry(const struct ip6t_ip6 *ipv6) { if (ipv6->flags & ~IP6T_F_MASK) { duprintf("Unknown flag bits set: %08X\n", ipv6->flags & ~IP6T_F_MASK); - return 0; + return false; } if (ipv6->invflags & ~IP6T_INV_MASK) { duprintf("Unknown invflag bits set: %08X\n", ipv6->invflags & ~IP6T_INV_MASK); - return 0; + return false; } - return 1; + return true; } static unsigned int @@ -1282,10 +1282,10 @@ void ip6t_unregister_table(struct xt_table *table) } /* Returns 1 if the type and code is matched by the range, 0 otherwise */ -static inline int +static inline bool icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, u_int8_t type, u_int8_t code, - int invert) + bool invert) { return (type == test_type && code >= min_code && code <= max_code) ^ invert; @@ -1325,7 +1325,7 @@ icmp6_match(const struct sk_buff *skb, } /* Called when user tries to insert an entry of this type. */ -static int +static bool icmp6_checkentry(const char *tablename, const void *entry, const struct xt_match *match, diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index 607c2eb1296f..8fc00bdfc38b 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -103,7 +103,7 @@ match(const struct sk_buff *skb, } /* Called when user tries to insert an entry of this type. */ -static int +static bool checkentry(const char *tablename, const void *entry, const struct xt_match *match, @@ -114,9 +114,9 @@ checkentry(const char *tablename, if (ahinfo->invflags & ~IP6T_AH_INV_MASK) { DEBUGP("ip6t_ah: unknown flags %X\n", ahinfo->invflags); - return 0; + return false; } - return 1; + return true; } static struct xt_match ah_match = { diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index 0ed5fbcf1f18..f0aed898e8b7 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -120,7 +120,7 @@ match(const struct sk_buff *skb, } /* Called when user tries to insert an entry of this type. */ -static int +static bool checkentry(const char *tablename, const void *ip, const struct xt_match *match, @@ -131,9 +131,9 @@ checkentry(const char *tablename, if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) { DEBUGP("ip6t_frag: unknown flags %X\n", fraginfo->invflags); - return 0; + return false; } - return 1; + return true; } static struct xt_match frag_match = { diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 4b05393faa68..6fdd79785f32 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -174,7 +174,7 @@ match(const struct sk_buff *skb, } /* Called when user tries to insert an entry of this type. */ -static int +static bool checkentry(const char *tablename, const void *entry, const struct xt_match *match, @@ -185,9 +185,9 @@ checkentry(const char *tablename, if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) { DEBUGP("ip6t_opts: unknown flags %X\n", optsinfo->invflags); - return 0; + return false; } - return 1; + return true; } static struct xt_match opts_match[] = { diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 3222e8959426..5ba6ef0f1b1b 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -124,7 +124,7 @@ ipv6header_match(const struct sk_buff *skb, } } -static int +static bool ipv6header_checkentry(const char *tablename, const void *ip, const struct xt_match *match, @@ -136,9 +136,9 @@ ipv6header_checkentry(const char *tablename, /* invflags is 0 or 0xff in hard mode */ if ((!info->modeflag) && info->invflags != 0x00 && info->invflags != 0xFF) - return 0; + return false; - return 1; + return true; } static struct xt_match ip6t_ipv6header_match = { diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c index ddffe03a8b37..a3008b41d24b 100644 --- a/net/ipv6/netfilter/ip6t_mh.c +++ b/net/ipv6/netfilter/ip6t_mh.c @@ -75,7 +75,7 @@ match(const struct sk_buff *skb, } /* Called when user tries to insert an entry of this type. */ -static int +static bool mh_checkentry(const char *tablename, const void *entry, const struct xt_match *match, diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index cadd0a64fed7..8cb6c94b4a20 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c @@ -53,7 +53,7 @@ match(const struct sk_buff *skb, return true; } -static int +static bool checkentry(const char *tablename, const void *ip, const struct xt_match *match, @@ -65,9 +65,9 @@ checkentry(const char *tablename, if (info->match & (IP6T_OWNER_PID | IP6T_OWNER_SID)) { printk("ipt_owner: pid and sid matching " "not supported anymore\n"); - return 0; + return false; } - return 1; + return true; } static struct xt_match owner_match = { diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 7966f4a5e9b7..e991ed4a692e 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -198,7 +198,7 @@ match(const struct sk_buff *skb, } /* Called when user tries to insert an entry of this type. */ -static int +static bool checkentry(const char *tablename, const void *entry, const struct xt_match *match, @@ -209,17 +209,17 @@ checkentry(const char *tablename, if (rtinfo->invflags & ~IP6T_RT_INV_MASK) { DEBUGP("ip6t_rt: unknown flags %X\n", rtinfo->invflags); - return 0; + return false; } if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) && (!(rtinfo->flags & IP6T_RT_TYP) || (rtinfo->rt_type != 0) || (rtinfo->invflags & IP6T_RT_INV_TYP))) { DEBUGP("`--rt-type 0' required before `--rt-0-*'"); - return 0; + return false; } - return 1; + return true; } static struct xt_match rt_match = { diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index aada7b797549..12541784109a 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -95,31 +95,31 @@ match(const struct sk_buff *skb, return (what >= sinfo->count.from); } -static int check(const char *tablename, - const void *ip, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +static bool check(const char *tablename, + const void *ip, + const struct xt_match *match, + void *matchinfo, + unsigned int hook_mask) { const struct xt_connbytes_info *sinfo = matchinfo; if (sinfo->what != XT_CONNBYTES_PKTS && sinfo->what != XT_CONNBYTES_BYTES && sinfo->what != XT_CONNBYTES_AVGPKT) - return 0; + return false; if (sinfo->direction != XT_CONNBYTES_DIR_ORIGINAL && sinfo->direction != XT_CONNBYTES_DIR_REPLY && sinfo->direction != XT_CONNBYTES_DIR_BOTH) - return 0; + return false; if (nf_ct_l3proto_try_module_get(match->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", match->family); - return 0; + return false; } - return 1; + return true; } static void diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 3321b80aff4f..94d5251b3d88 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -51,7 +51,7 @@ match(const struct sk_buff *skb, return (((ct->mark) & info->mask) == info->mark) ^ info->invert; } -static int +static bool checkentry(const char *tablename, const void *ip, const struct xt_match *match, @@ -62,14 +62,14 @@ checkentry(const char *tablename, if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) { printk(KERN_WARNING "connmark: only support 32bit mark\n"); - return 0; + return false; } if (nf_ct_l3proto_try_module_get(match->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", match->family); - return 0; + return false; } - return 1; + return true; } static void diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 26901f95bf4b..87364f58a4b9 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -114,7 +114,7 @@ match(const struct sk_buff *skb, return true; } -static int +static bool checkentry(const char *tablename, const void *ip, const struct xt_match *match, @@ -124,9 +124,9 @@ checkentry(const char *tablename, if (nf_ct_l3proto_try_module_get(match->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", match->family); - return 0; + return false; } - return 1; + return true; } static void destroy(const struct xt_match *match, void *matchinfo) diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index b0eba4e2c53f..24895902cfe0 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c @@ -126,7 +126,7 @@ match(const struct sk_buff *skb, XT_DCCP_OPTION, info->flags, info->invflags); } -static int +static bool checkentry(const char *tablename, const void *inf, const struct xt_match *match, diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c index c9c6518907a2..35cabca28eff 100644 --- a/net/netfilter/xt_dscp.c +++ b/net/netfilter/xt_dscp.c @@ -52,20 +52,20 @@ static bool match6(const struct sk_buff *skb, return (dscp == info->dscp) ^ !!info->invert; } -static int checkentry(const char *tablename, - const void *info, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +static bool checkentry(const char *tablename, + const void *info, + const struct xt_match *match, + void *matchinfo, + unsigned int hook_mask) { const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp; if (dscp > XT_DSCP_MAX) { printk(KERN_ERR "xt_dscp: dscp %x out of range\n", dscp); - return 0; + return false; } - return 1; + return true; } static struct xt_match xt_dscp_match[] = { diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c index 1a945cb7c359..1a6ae8a047c7 100644 --- a/net/netfilter/xt_esp.c +++ b/net/netfilter/xt_esp.c @@ -74,7 +74,7 @@ match(const struct sk_buff *skb, } /* Called when user tries to insert an entry of this type. */ -static int +static bool checkentry(const char *tablename, const void *ip_void, const struct xt_match *match, @@ -85,10 +85,10 @@ checkentry(const char *tablename, if (espinfo->invflags & ~XT_ESP_INV_MASK) { duprintf("xt_esp: unknown flags %X\n", espinfo->invflags); - return 0; + return false; } - return 1; + return true; } static struct xt_match xt_esp_match[] = { diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 21597b755cea..a1b5996447dd 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -492,7 +492,7 @@ hotdrop: return false; } -static int +static bool hashlimit_checkentry(const char *tablename, const void *inf, const struct xt_match *match, @@ -506,20 +506,20 @@ hashlimit_checkentry(const char *tablename, user2credits(r->cfg.avg * r->cfg.burst) < user2credits(r->cfg.avg)) { printk(KERN_ERR "xt_hashlimit: overflow, try lower: %u/%u\n", r->cfg.avg, r->cfg.burst); - return 0; + return false; } if (r->cfg.mode == 0 || r->cfg.mode > (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_SIP | XT_HASHLIMIT_HASH_SPT)) - return 0; + return false; if (!r->cfg.gc_interval) - return 0; + return false; if (!r->cfg.expire) - return 0; + return false; if (r->name[sizeof(r->name) - 1] != '\0') - return 0; + return false; /* This is the best we've got: We cannot release and re-grab lock, * since checkentry() is called before x_tables.c grabs xt_mutex. @@ -531,13 +531,13 @@ hashlimit_checkentry(const char *tablename, r->hinfo = htable_find_get(r->name, match->family); if (!r->hinfo && htable_create(r, match->family) != 0) { mutex_unlock(&hlimit_mutex); - return 0; + return false; } mutex_unlock(&hlimit_mutex); /* Ugly hack: For SMP, we only want to use one set */ r->u.master = r; - return 1; + return true; } static void diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index 10c629b34abf..a2688b807a99 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -76,21 +76,21 @@ out_unlock: return ret; } -static int check(const char *tablename, - const void *inf, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +static bool check(const char *tablename, + const void *inf, + const struct xt_match *match, + void *matchinfo, + unsigned int hook_mask) { struct xt_helper_info *info = matchinfo; if (nf_ct_l3proto_try_module_get(match->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", match->family); - return 0; + return false; } info->name[29] = '\0'; - return 1; + return true; } static void diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 0cfe241a0493..2717aa65246a 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -98,7 +98,7 @@ user2credits(u_int32_t user) return (user * HZ * CREDITS_PER_JIFFY) / XT_LIMIT_SCALE; } -static int +static bool ipt_limit_checkentry(const char *tablename, const void *inf, const struct xt_match *match, @@ -112,7 +112,7 @@ ipt_limit_checkentry(const char *tablename, || user2credits(r->avg * r->burst) < user2credits(r->avg)) { printk("Overflow in xt_limit, try lower: %u/%u\n", r->avg, r->burst); - return 0; + return false; } /* For SMP, we only want to use one set of counters. */ @@ -125,7 +125,7 @@ ipt_limit_checkentry(const char *tablename, r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */ r->cost = user2credits(r->avg); } - return 1; + return true; } #ifdef CONFIG_COMPAT diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index 10c6799cd56a..83ed806764b4 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c @@ -34,7 +34,7 @@ match(const struct sk_buff *skb, return ((skb->mark & info->mask) == info->mark) ^ info->invert; } -static int +static bool checkentry(const char *tablename, const void *entry, const struct xt_match *match, @@ -45,9 +45,9 @@ checkentry(const char *tablename, if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) { printk(KERN_WARNING "mark: only supports 32bit mark\n"); - return 0; + return false; } - return 1; + return true; } #ifdef CONFIG_COMPAT diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c index 55feb3d737d4..3d69d6208965 100644 --- a/net/netfilter/xt_multiport.c +++ b/net/netfilter/xt_multiport.c @@ -154,7 +154,7 @@ match_v1(const struct sk_buff *skb, return ports_match_v1(multiinfo, ntohs(pptr[0]), ntohs(pptr[1])); } -static inline int +static inline bool check(u_int16_t proto, u_int8_t ip_invflags, u_int8_t match_flags, @@ -172,7 +172,7 @@ check(u_int16_t proto, } /* Called when user tries to insert an entry of this type. */ -static int +static bool checkentry(const char *tablename, const void *info, const struct xt_match *match, @@ -186,7 +186,7 @@ checkentry(const char *tablename, multiinfo->count); } -static int +static bool checkentry_v1(const char *tablename, const void *info, const struct xt_match *match, @@ -200,7 +200,7 @@ checkentry_v1(const char *tablename, multiinfo->count); } -static int +static bool checkentry6(const char *tablename, const void *info, const struct xt_match *match, @@ -214,7 +214,7 @@ checkentry6(const char *tablename, multiinfo->count); } -static int +static bool checkentry6_v1(const char *tablename, const void *info, const struct xt_match *match, diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 70de6708e884..34f0d3e44ea7 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -99,7 +99,7 @@ match_outdev: return ret ^ !(info->invert & XT_PHYSDEV_OP_OUT); } -static int +static bool checkentry(const char *tablename, const void *ip, const struct xt_match *match, @@ -110,7 +110,7 @@ checkentry(const char *tablename, if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || info->bitmask & ~XT_PHYSDEV_OP_MASK) - return 0; + return false; if (info->bitmask & XT_PHYSDEV_OP_OUT && (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || info->invert & XT_PHYSDEV_OP_BRIDGED) && @@ -120,9 +120,9 @@ checkentry(const char *tablename, "OUTPUT, FORWARD and POSTROUTING chains for non-bridged " "traffic is not supported anymore.\n"); if (hook_mask & (1 << NF_IP_LOCAL_OUT)) - return 0; + return false; } - return 1; + return true; } static struct xt_match xt_physdev_match[] = { diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index 0aa487b1f3b8..1534de55cdb6 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -133,35 +133,35 @@ static bool match(const struct sk_buff *skb, return ret; } -static int checkentry(const char *tablename, const void *ip_void, - const struct xt_match *match, - void *matchinfo, unsigned int hook_mask) +static bool checkentry(const char *tablename, const void *ip_void, + const struct xt_match *match, + void *matchinfo, unsigned int hook_mask) { struct xt_policy_info *info = matchinfo; if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) { printk(KERN_ERR "xt_policy: neither incoming nor " "outgoing policy selected\n"); - return 0; + return false; } /* hook values are equal for IPv4 and IPv6 */ if (hook_mask & (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_LOCAL_IN) && info->flags & XT_POLICY_MATCH_OUT) { printk(KERN_ERR "xt_policy: output policy not valid in " "PRE_ROUTING and INPUT\n"); - return 0; + return false; } if (hook_mask & (1 << NF_IP_POST_ROUTING | 1 << NF_IP_LOCAL_OUT) && info->flags & XT_POLICY_MATCH_IN) { printk(KERN_ERR "xt_policy: input policy not valid in " "POST_ROUTING and OUTPUT\n"); - return 0; + return false; } if (info->len > XT_POLICY_MAX_ELEM) { printk(KERN_ERR "xt_policy: too many policy elements\n"); - return 0; + return false; } - return 1; + return true; } static struct xt_match xt_policy_match[] = { diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index 6091347e38b3..e13d62a8caba 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -38,7 +38,7 @@ match(const struct sk_buff *skb, return ret; } -static int +static bool checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, unsigned int hook_mask) @@ -46,10 +46,10 @@ checkentry(const char *tablename, const void *entry, struct xt_quota_info *q = (struct xt_quota_info *)matchinfo; if (q->flags & ~XT_QUOTA_MASK) - return 0; + return false; /* For SMP, we only want to use one set of counters. */ q->master = q; - return 1; + return true; } static struct xt_match xt_quota_match[] = { diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index a118a4c71563..22df338b3934 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -158,7 +158,7 @@ match(const struct sk_buff *skb, XT_SCTP_CHUNK_TYPES, info->flags, info->invflags); } -static int +static bool checkentry(const char *tablename, const void *inf, const struct xt_match *match, diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index f77f74ad5c97..5b9c59aa14d3 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -44,18 +44,18 @@ match(const struct sk_buff *skb, return (sinfo->statemask & statebit); } -static int check(const char *tablename, - const void *inf, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +static bool check(const char *tablename, + const void *inf, + const struct xt_match *match, + void *matchinfo, + unsigned int hook_mask) { if (nf_ct_l3proto_try_module_get(match->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", match->family); - return 0; + return false; } - return 1; + return true; } static void diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c index 989924f9024e..0af42892e9dc 100644 --- a/net/netfilter/xt_statistic.c +++ b/net/netfilter/xt_statistic.c @@ -52,7 +52,7 @@ match(const struct sk_buff *skb, return ret; } -static int +static bool checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, unsigned int hook_mask) @@ -61,9 +61,9 @@ checkentry(const char *tablename, const void *entry, if (info->mode > XT_STATISTIC_MODE_MAX || info->flags & ~XT_STATISTIC_MASK) - return 0; + return false; info->master = info; - return 1; + return true; } static struct xt_match xt_statistic_match[] = { diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index 3aea43d37339..ab761b17f811 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -42,30 +42,30 @@ static bool match(const struct sk_buff *skb, #define STRING_TEXT_PRIV(m) ((struct xt_string_info *) m) -static int checkentry(const char *tablename, - const void *ip, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +static bool checkentry(const char *tablename, + const void *ip, + const struct xt_match *match, + void *matchinfo, + unsigned int hook_mask) { struct xt_string_info *conf = matchinfo; struct ts_config *ts_conf; /* Damn, can't handle this case properly with iptables... */ if (conf->from_offset > conf->to_offset) - return 0; + return false; if (conf->algo[XT_STRING_MAX_ALGO_NAME_SIZE - 1] != '\0') - return 0; + return false; if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE) - return 0; + return false; ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen, GFP_KERNEL, TS_AUTOLOAD); if (IS_ERR(ts_conf)) - return 0; + return false; conf->config = ts_conf; - return 1; + return true; } static void destroy(const struct xt_match *match, void *matchinfo) diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index 9ecc4a5bd529..0dd3022cc79a 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -133,7 +133,7 @@ tcp_match(const struct sk_buff *skb, } /* Called when user tries to insert an entry of this type. */ -static int +static bool tcp_checkentry(const char *tablename, const void *info, const struct xt_match *match, @@ -181,7 +181,7 @@ udp_match(const struct sk_buff *skb, } /* Called when user tries to insert an entry of this type. */ -static int +static bool udp_checkentry(const char *tablename, const void *info, const struct xt_match *match, -- cgit v1.2.3 From e1931b784a8de324abf310fa3b5e3f25d3988233 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 7 Jul 2007 22:16:26 -0700 Subject: [NETFILTER]: x_tables: switch xt_target->checkentry to bool Switch the return type of target checkentry functions to boolean. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 10 +++++----- net/ipv4/netfilter/arpt_mangle.c | 8 ++++---- net/ipv4/netfilter/ipt_CLUSTERIP.c | 28 ++++++++++++++-------------- net/ipv4/netfilter/ipt_ECN.c | 30 +++++++++++++++--------------- net/ipv4/netfilter/ipt_LOG.c | 16 ++++++++-------- net/ipv4/netfilter/ipt_MASQUERADE.c | 8 ++++---- net/ipv4/netfilter/ipt_NETMAP.c | 8 ++++---- net/ipv4/netfilter/ipt_REDIRECT.c | 8 ++++---- net/ipv4/netfilter/ipt_REJECT.c | 16 ++++++++-------- net/ipv4/netfilter/ipt_SAME.c | 14 +++++++------- net/ipv4/netfilter/ipt_TOS.c | 6 +++--- net/ipv4/netfilter/ipt_TTL.c | 8 ++++---- net/ipv4/netfilter/ipt_ULOG.c | 16 ++++++++-------- net/ipv4/netfilter/nf_nat_rule.c | 28 ++++++++++++++-------------- net/ipv6/netfilter/ip6t_HL.c | 8 ++++---- net/ipv6/netfilter/ip6t_LOG.c | 16 ++++++++-------- net/ipv6/netfilter/ip6t_REJECT.c | 16 ++++++++-------- net/netfilter/xt_CONNMARK.c | 10 +++++----- net/netfilter/xt_CONNSECMARK.c | 12 ++++++------ net/netfilter/xt_DSCP.c | 14 +++++++------- net/netfilter/xt_MARK.c | 14 +++++++------- net/netfilter/xt_NFLOG.c | 8 ++++---- net/netfilter/xt_SECMARK.c | 24 ++++++++++++------------ net/netfilter/xt_TCPMSS.c | 22 +++++++++++----------- 24 files changed, 174 insertions(+), 174 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 5130dd60a2fc..64f425a855bb 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -202,11 +202,11 @@ struct xt_target hook_mask is a bitmask of hooks from which it can be called. */ /* Should return true or false. */ - int (*checkentry)(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask); + bool (*checkentry)(const char *tablename, + const void *entry, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask); /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_target *target, void *targinfo); diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index 6298d404e7c7..497a16e0b064 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -65,7 +65,7 @@ target(struct sk_buff **pskb, return mangle->target; } -static int +static bool checkentry(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, unsigned int hook_mask) { @@ -73,12 +73,12 @@ checkentry(const char *tablename, const void *e, const struct xt_target *target, if (mangle->flags & ~ARPT_MANGLE_MASK || !(mangle->flags & ARPT_MANGLE_MASK)) - return 0; + return false; if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT && mangle->target != ARPT_CONTINUE) - return 0; - return 1; + return false; + return true; } static struct arpt_target arpt_mangle_reg = { diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 40e273421398..e82339a78c01 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -220,17 +220,17 @@ clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) return 0; } -static int +static bool clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) { if (nodenum == 0 || nodenum > c->num_total_nodes) - return 1; + return true; if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) - return 0; + return false; - return 1; + return true; } #endif @@ -370,7 +370,7 @@ target(struct sk_buff **pskb, return XT_CONTINUE; } -static int +static bool checkentry(const char *tablename, const void *e_void, const struct xt_target *target, @@ -387,13 +387,13 @@ checkentry(const char *tablename, cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { printk(KERN_WARNING "CLUSTERIP: unknown mode `%u'\n", cipinfo->hash_mode); - return 0; + return false; } if (e->ip.dmsk.s_addr != htonl(0xffffffff) || e->ip.dst.s_addr == 0) { printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n"); - return 0; + return false; } /* FIXME: further sanity checks */ @@ -407,7 +407,7 @@ checkentry(const char *tablename, if (cipinfo->config != config) { printk(KERN_ERR "CLUSTERIP: Reloaded entry " "has invalid config pointer!\n"); - return 0; + return false; } } else { /* Case B: This is a new rule referring to an existing @@ -418,19 +418,19 @@ checkentry(const char *tablename, /* Case C: This is a completely new clusterip config */ if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); - return 0; + return false; } else { struct net_device *dev; if (e->ip.iniface[0] == '\0') { printk(KERN_WARNING "CLUSTERIP: Please specify an interface name\n"); - return 0; + return false; } dev = dev_get_by_name(e->ip.iniface); if (!dev) { printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface); - return 0; + return false; } config = clusterip_config_init(cipinfo, @@ -438,7 +438,7 @@ checkentry(const char *tablename, if (!config) { printk(KERN_WARNING "CLUSTERIP: cannot allocate config\n"); dev_put(dev); - return 0; + return false; } dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); } @@ -448,10 +448,10 @@ checkentry(const char *tablename, if (nf_ct_l3proto_try_module_get(target->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", target->family); - return 0; + return false; } - return 1; + return true; } /* drop reference count of cluster config when rule is deleted */ diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 918ca92e534a..02367012fc74 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -24,8 +24,8 @@ MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("iptables ECN modification module"); /* set ECT codepoint from IP header. - * return 0 if there was an error. */ -static inline int + * return false if there was an error. */ +static inline bool set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) { struct iphdr *iph = ip_hdr(*pskb); @@ -33,18 +33,18 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { __u8 oldtos; if (!skb_make_writable(pskb, sizeof(struct iphdr))) - return 0; + return false; iph = ip_hdr(*pskb); oldtos = iph->tos; iph->tos &= ~IPT_ECN_IP_MASK; iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos)); } - return 1; + return true; } -/* Return 0 if there was an error. */ -static inline int +/* Return false if there was an error. */ +static inline bool set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) { struct tcphdr _tcph, *tcph; @@ -54,16 +54,16 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) tcph = skb_header_pointer(*pskb, ip_hdrlen(*pskb), sizeof(_tcph), &_tcph); if (!tcph) - return 0; + return false; if ((!(einfo->operation & IPT_ECN_OP_SET_ECE) || tcph->ece == einfo->proto.tcp.ece) && ((!(einfo->operation & IPT_ECN_OP_SET_CWR) || tcph->cwr == einfo->proto.tcp.cwr))) - return 1; + return true; if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph))) - return 0; + return false; tcph = (void *)ip_hdr(*pskb) + ip_hdrlen(*pskb); oldval = ((__be16 *)tcph)[6]; @@ -74,7 +74,7 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) nf_proto_csum_replace2(&tcph->check, *pskb, oldval, ((__be16 *)tcph)[6], 0); - return 1; + return true; } static unsigned int @@ -99,7 +99,7 @@ target(struct sk_buff **pskb, return XT_CONTINUE; } -static int +static bool checkentry(const char *tablename, const void *e_void, const struct xt_target *target, @@ -112,20 +112,20 @@ checkentry(const char *tablename, if (einfo->operation & IPT_ECN_OP_MASK) { printk(KERN_WARNING "ECN: unsupported ECN operation %x\n", einfo->operation); - return 0; + return false; } if (einfo->ip_ect & ~IPT_ECN_IP_MASK) { printk(KERN_WARNING "ECN: new ECT codepoint %x out of mask\n", einfo->ip_ect); - return 0; + return false; } if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) { printk(KERN_WARNING "ECN: cannot use TCP operations on a " "non-tcp rule\n"); - return 0; + return false; } - return 1; + return true; } static struct xt_target ipt_ecn_reg = { diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index a42c5cd968b1..bbff6c352ef8 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -435,24 +435,24 @@ ipt_log_target(struct sk_buff **pskb, return XT_CONTINUE; } -static int ipt_log_checkentry(const char *tablename, - const void *e, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool ipt_log_checkentry(const char *tablename, + const void *e, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) { const struct ipt_log_info *loginfo = targinfo; if (loginfo->level >= 8) { DEBUGP("LOG: level %u >= 8\n", loginfo->level); - return 0; + return false; } if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') { DEBUGP("LOG: prefix term %i\n", loginfo->prefix[sizeof(loginfo->prefix)-1]); - return 0; + return false; } - return 1; + return true; } static struct xt_target ipt_log_reg = { diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index d4f2d7775330..b5b216408ee7 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -37,7 +37,7 @@ MODULE_DESCRIPTION("iptables MASQUERADE target module"); static DEFINE_RWLOCK(masq_lock); /* FIXME: Multiple targets. --RR */ -static int +static bool masquerade_check(const char *tablename, const void *e, const struct xt_target *target, @@ -48,13 +48,13 @@ masquerade_check(const char *tablename, if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { DEBUGP("masquerade_check: bad MAP_IPS.\n"); - return 0; + return false; } if (mr->rangesize != 1) { DEBUGP("masquerade_check: bad rangesize %u.\n", mr->rangesize); - return 0; + return false; } - return 1; + return true; } static unsigned int diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 068c69bce30e..a902c71218bf 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -29,7 +29,7 @@ MODULE_DESCRIPTION("iptables 1:1 NAT mapping of IP networks target"); #define DEBUGP(format, args...) #endif -static int +static bool check(const char *tablename, const void *e, const struct xt_target *target, @@ -40,13 +40,13 @@ check(const char *tablename, if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) { DEBUGP(MODULENAME":check: bad MAP_IPS.\n"); - return 0; + return false; } if (mr->rangesize != 1) { DEBUGP(MODULENAME":check: bad rangesize %u.\n", mr->rangesize); - return 0; + return false; } - return 1; + return true; } static unsigned int diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 68cc76a198eb..2a04103b50d1 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -32,7 +32,7 @@ MODULE_DESCRIPTION("iptables REDIRECT target module"); #endif /* FIXME: Take multiple ranges --RR */ -static int +static bool redirect_check(const char *tablename, const void *e, const struct xt_target *target, @@ -43,13 +43,13 @@ redirect_check(const char *tablename, if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { DEBUGP("redirect_check: bad MAP_IPS.\n"); - return 0; + return false; } if (mr->rangesize != 1) { DEBUGP("redirect_check: bad rangesize %u.\n", mr->rangesize); - return 0; + return false; } - return 1; + return true; } static unsigned int diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 9041e0741f6f..5c3270d325f3 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -217,27 +217,27 @@ static unsigned int reject(struct sk_buff **pskb, return NF_DROP; } -static int check(const char *tablename, - const void *e_void, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool check(const char *tablename, + const void *e_void, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) { const struct ipt_reject_info *rejinfo = targinfo; const struct ipt_entry *e = e_void; if (rejinfo->with == IPT_ICMP_ECHOREPLY) { printk("REJECT: ECHOREPLY no longer supported.\n"); - return 0; + return false; } else if (rejinfo->with == IPT_TCP_RESET) { /* Must specify that it's a TCP packet */ if (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO)) { DEBUGP("REJECT: TCP_RESET invalid for non-tcp\n"); - return 0; + return false; } } - return 1; + return true; } static struct xt_target ipt_reject_reg = { diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c index 511e5ff84938..3649fabc04ea 100644 --- a/net/ipv4/netfilter/ipt_SAME.c +++ b/net/ipv4/netfilter/ipt_SAME.c @@ -33,7 +33,7 @@ MODULE_DESCRIPTION("iptables special SNAT module for consistent sourceip"); #define DEBUGP(format, args...) #endif -static int +static bool same_check(const char *tablename, const void *e, const struct xt_target *target, @@ -47,13 +47,13 @@ same_check(const char *tablename, if (mr->rangesize < 1) { DEBUGP("same_check: need at least one dest range.\n"); - return 0; + return false; } if (mr->rangesize > IPT_SAME_MAX_RANGE) { DEBUGP("same_check: too many ranges specified, maximum " "is %u ranges\n", IPT_SAME_MAX_RANGE); - return 0; + return false; } for (count = 0; count < mr->rangesize; count++) { if (ntohl(mr->range[count].min_ip) > @@ -62,11 +62,11 @@ same_check(const char *tablename, "range `%u.%u.%u.%u-%u.%u.%u.%u'.\n", NIPQUAD(mr->range[count].min_ip), NIPQUAD(mr->range[count].max_ip)); - return 0; + return false; } if (!(mr->range[count].flags & IP_NAT_RANGE_MAP_IPS)) { DEBUGP("same_check: bad MAP_IPS.\n"); - return 0; + return false; } rangeip = (ntohl(mr->range[count].max_ip) - ntohl(mr->range[count].min_ip) + 1); @@ -81,7 +81,7 @@ same_check(const char *tablename, DEBUGP("same_check: Couldn't allocate %u bytes " "for %u ipaddresses!\n", (sizeof(u_int32_t) * mr->ipnum), mr->ipnum); - return 0; + return false; } DEBUGP("same_check: Allocated %u bytes for %u ipaddresses.\n", (sizeof(u_int32_t) * mr->ipnum), mr->ipnum); @@ -97,7 +97,7 @@ same_check(const char *tablename, index++; } } - return 1; + return true; } static void diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 0ad02f249837..ac43e86afbcf 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -43,7 +43,7 @@ target(struct sk_buff **pskb, return XT_CONTINUE; } -static int +static bool checkentry(const char *tablename, const void *e_void, const struct xt_target *target, @@ -58,9 +58,9 @@ checkentry(const char *tablename, && tos != IPTOS_MINCOST && tos != IPTOS_NORMALSVC) { printk(KERN_WARNING "TOS: bad tos value %#x\n", tos); - return 0; + return false; } - return 1; + return true; } static struct xt_target ipt_tos_reg = { diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index a991ec7bd4e7..96b6e3514c22 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -62,7 +62,7 @@ ipt_ttl_target(struct sk_buff **pskb, return XT_CONTINUE; } -static int ipt_ttl_checkentry(const char *tablename, +static bool ipt_ttl_checkentry(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, @@ -73,11 +73,11 @@ static int ipt_ttl_checkentry(const char *tablename, if (info->mode > IPT_TTL_MAXMODE) { printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n", info->mode); - return 0; + return false; } if ((info->mode != IPT_TTL_SET) && (info->ttl == 0)) - return 0; - return 1; + return false; + return true; } static struct xt_target ipt_TTL = { diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 23b607b33b32..dfa7afd84763 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -328,25 +328,25 @@ static void ipt_logfn(unsigned int pf, ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); } -static int ipt_ulog_checkentry(const char *tablename, - const void *e, - const struct xt_target *target, - void *targinfo, - unsigned int hookmask) +static bool ipt_ulog_checkentry(const char *tablename, + const void *e, + const struct xt_target *target, + void *targinfo, + unsigned int hookmask) { struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') { DEBUGP("ipt_ULOG: prefix term %i\n", loginfo->prefix[sizeof(loginfo->prefix) - 1]); - return 0; + return false; } if (loginfo->qthreshold > ULOG_MAX_QLEN) { DEBUGP("ipt_ULOG: queue threshold %i > MAX_QLEN\n", loginfo->qthreshold); - return 0; + return false; } - return 1; + return true; } #ifdef CONFIG_COMPAT diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 6740736c5e79..fc3d9437beba 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -140,36 +140,36 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb, return nf_nat_setup_info(ct, &mr->range[0], hooknum); } -static int ipt_snat_checkentry(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool ipt_snat_checkentry(const char *tablename, + const void *entry, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) { struct nf_nat_multi_range_compat *mr = targinfo; /* Must be a valid range */ if (mr->rangesize != 1) { printk("SNAT: multiple ranges no longer supported\n"); - return 0; + return false; } - return 1; + return true; } -static int ipt_dnat_checkentry(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool ipt_dnat_checkentry(const char *tablename, + const void *entry, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) { struct nf_nat_multi_range_compat *mr = targinfo; /* Must be a valid range */ if (mr->rangesize != 1) { printk("DNAT: multiple ranges no longer supported\n"); - return 0; + return false; } - return 1; + return true; } inline unsigned int diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index 4115a576ba25..82966c09fd64 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -58,7 +58,7 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb, return XT_CONTINUE; } -static int ip6t_hl_checkentry(const char *tablename, +static bool ip6t_hl_checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, @@ -69,14 +69,14 @@ static int ip6t_hl_checkentry(const char *tablename, if (info->mode > IP6T_HL_MAXMODE) { printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n", info->mode); - return 0; + return false; } if ((info->mode != IP6T_HL_SET) && (info->hop_limit == 0)) { printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't " "make sense with value 0\n"); - return 0; + return false; } - return 1; + return true; } static struct xt_target ip6t_HL = { diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 5bb9cd349350..aa4b9a14a11c 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -448,24 +448,24 @@ ip6t_log_target(struct sk_buff **pskb, } -static int ip6t_log_checkentry(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool ip6t_log_checkentry(const char *tablename, + const void *entry, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) { const struct ip6t_log_info *loginfo = targinfo; if (loginfo->level >= 8) { DEBUGP("LOG: level %u >= 8\n", loginfo->level); - return 0; + return false; } if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') { DEBUGP("LOG: prefix term %i\n", loginfo->prefix[sizeof(loginfo->prefix)-1]); - return 0; + return false; } - return 1; + return true; } static struct xt_target ip6t_log_reg = { diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index cb3d2415a064..8639a0599bf5 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -221,27 +221,27 @@ static unsigned int reject6_target(struct sk_buff **pskb, return NF_DROP; } -static int check(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool check(const char *tablename, + const void *entry, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) { const struct ip6t_reject_info *rejinfo = targinfo; const struct ip6t_entry *e = entry; if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) { printk("ip6t_REJECT: ECHOREPLY is not supported.\n"); - return 0; + return false; } else if (rejinfo->with == IP6T_TCP_RESET) { /* Must specify that it's a TCP packet */ if (e->ipv6.proto != IPPROTO_TCP || (e->ipv6.invflags & XT_INV_PROTO)) { DEBUGP("ip6t_REJECT: TCP_RESET illegal for non-tcp\n"); - return 0; + return false; } } - return 1; + return true; } static struct xt_target ip6t_reject_reg = { diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index b03ce009d0bf..4e8aa1b0cba2 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -76,7 +76,7 @@ target(struct sk_buff **pskb, return XT_CONTINUE; } -static int +static bool checkentry(const char *tablename, const void *entry, const struct xt_target *target, @@ -88,21 +88,21 @@ checkentry(const char *tablename, if (nf_ct_l3proto_try_module_get(target->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", target->family); - return 0; + return false; } if (matchinfo->mode == XT_CONNMARK_RESTORE) { if (strcmp(tablename, "mangle") != 0) { printk(KERN_WARNING "CONNMARK: restore can only be " "called from \"mangle\" table, not \"%s\"\n", tablename); - return 0; + return false; } } if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) { printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n"); - return 0; + return false; } - return 1; + return true; } static void diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 81c0c58bab47..ab2f0d016953 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -85,16 +85,16 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in, return XT_CONTINUE; } -static int checkentry(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool checkentry(const char *tablename, const void *entry, + const struct xt_target *target, void *targinfo, + unsigned int hook_mask) { struct xt_connsecmark_target_info *info = targinfo; if (nf_ct_l3proto_try_module_get(target->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", target->family); - return 0; + return false; } switch (info->mode) { case CONNSECMARK_SAVE: @@ -103,10 +103,10 @@ static int checkentry(const char *tablename, const void *entry, default: printk(KERN_INFO PFX "invalid mode: %hu\n", info->mode); - return 0; + return false; } - return 1; + return true; } static void diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index 9f2f2201f6ae..2d779f6902dc 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -66,19 +66,19 @@ static unsigned int target6(struct sk_buff **pskb, return XT_CONTINUE; } -static int checkentry(const char *tablename, - const void *e_void, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool checkentry(const char *tablename, + const void *e_void, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) { const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp; if ((dscp > XT_DSCP_MAX)) { printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp); - return 0; + return false; } - return 1; + return true; } static struct xt_target xt_dscp_target[] = { diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c index 43817808d865..bd9cdf29cc3b 100644 --- a/net/netfilter/xt_MARK.c +++ b/net/netfilter/xt_MARK.c @@ -65,7 +65,7 @@ target_v1(struct sk_buff **pskb, } -static int +static bool checkentry_v0(const char *tablename, const void *entry, const struct xt_target *target, @@ -76,12 +76,12 @@ checkentry_v0(const char *tablename, if (markinfo->mark > 0xffffffff) { printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); - return 0; + return false; } - return 1; + return true; } -static int +static bool checkentry_v1(const char *tablename, const void *entry, const struct xt_target *target, @@ -95,13 +95,13 @@ checkentry_v1(const char *tablename, && markinfo->mode != XT_MARK_OR) { printk(KERN_WARNING "MARK: unknown mode %u\n", markinfo->mode); - return 0; + return false; } if (markinfo->mark > 0xffffffff) { printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); - return 0; + return false; } - return 1; + return true; } #ifdef CONFIG_COMPAT diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index 901ed7abaa1b..0c6f2838cc98 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -38,7 +38,7 @@ nflog_target(struct sk_buff **pskb, return XT_CONTINUE; } -static int +static bool nflog_checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targetinfo, unsigned int hookmask) @@ -46,10 +46,10 @@ nflog_checkentry(const char *tablename, const void *entry, struct xt_nflog_info *info = targetinfo; if (info->flags & ~XT_NFLOG_MASK) - return 0; + return false; if (info->prefix[sizeof(info->prefix) - 1] != '\0') - return 0; - return 1; + return false; + return true; } static struct xt_target xt_nflog_target[] = { diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index 705f0e830a79..f3e78c592f3a 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -51,7 +51,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in, return XT_CONTINUE; } -static int checkentry_selinux(struct xt_secmark_target_info *info) +static bool checkentry_selinux(struct xt_secmark_target_info *info) { int err; struct xt_secmark_target_selinux_info *sel = &info->u.sel; @@ -63,50 +63,50 @@ static int checkentry_selinux(struct xt_secmark_target_info *info) if (err == -EINVAL) printk(KERN_INFO PFX "invalid SELinux context \'%s\'\n", sel->selctx); - return 0; + return false; } if (!sel->selsid) { printk(KERN_INFO PFX "unable to map SELinux context \'%s\'\n", sel->selctx); - return 0; + return false; } err = selinux_relabel_packet_permission(sel->selsid); if (err) { printk(KERN_INFO PFX "unable to obtain relabeling permission\n"); - return 0; + return false; } - return 1; + return true; } -static int checkentry(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool checkentry(const char *tablename, const void *entry, + const struct xt_target *target, void *targinfo, + unsigned int hook_mask) { struct xt_secmark_target_info *info = targinfo; if (mode && mode != info->mode) { printk(KERN_INFO PFX "mode already set to %hu cannot mix with " "rules for mode %hu\n", mode, info->mode); - return 0; + return false; } switch (info->mode) { case SECMARK_MODE_SEL: if (!checkentry_selinux(info)) - return 0; + return false; break; default: printk(KERN_INFO PFX "invalid mode: %hu\n", info->mode); - return 0; + return false; } if (!mode) mode = info->mode; - return 1; + return true; } static struct xt_target xt_secmark_target[] = { diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 15fe8f649510..075051acb554 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -197,19 +197,19 @@ xt_tcpmss_target6(struct sk_buff **pskb, #define TH_SYN 0x02 /* Must specify -p tcp --syn */ -static inline int find_syn_match(const struct xt_entry_match *m) +static inline bool find_syn_match(const struct xt_entry_match *m) { const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data; if (strcmp(m->u.kernel.match->name, "tcp") == 0 && tcpinfo->flg_cmp & TH_SYN && !(tcpinfo->invflags & XT_TCP_INV_FLAGS)) - return 1; + return true; - return 0; + return false; } -static int +static bool xt_tcpmss_checkentry4(const char *tablename, const void *entry, const struct xt_target *target, @@ -225,16 +225,16 @@ xt_tcpmss_checkentry4(const char *tablename, (1 << NF_IP_POST_ROUTING))) != 0) { printk("xt_TCPMSS: path-MTU clamping only supported in " "FORWARD, OUTPUT and POSTROUTING hooks\n"); - return 0; + return false; } if (IPT_MATCH_ITERATE(e, find_syn_match)) - return 1; + return true; printk("xt_TCPMSS: Only works on TCP SYN packets\n"); - return 0; + return false; } #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) -static int +static bool xt_tcpmss_checkentry6(const char *tablename, const void *entry, const struct xt_target *target, @@ -250,12 +250,12 @@ xt_tcpmss_checkentry6(const char *tablename, (1 << NF_IP6_POST_ROUTING))) != 0) { printk("xt_TCPMSS: path-MTU clamping only supported in " "FORWARD, OUTPUT and POSTROUTING hooks\n"); - return 0; + return false; } if (IP6T_MATCH_ITERATE(e, find_syn_match)) - return 1; + return true; printk("xt_TCPMSS: Only works on TCP SYN packets\n"); - return 0; + return false; } #endif -- cgit v1.2.3 From a47362a226456d8db8207e618324a2278d05d3a7 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 7 Jul 2007 22:16:55 -0700 Subject: [NETFILTER]: add some consts, remove some casts Make a number of variables const and/or remove unneeded casts. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 11 ++++++----- net/ipv4/netfilter/ipt_LOG.c | 26 +++++++++++++++++--------- net/ipv4/netfilter/ipt_MASQUERADE.c | 8 ++++---- net/ipv4/netfilter/ipt_REJECT.c | 2 +- net/ipv4/netfilter/ipt_TTL.c | 2 +- net/ipv4/netfilter/ipt_ULOG.c | 6 +++--- net/ipv4/netfilter/ipt_ah.c | 3 ++- net/ipv4/netfilter/ipt_ecn.c | 3 ++- net/ipv4/netfilter/ipt_recent.c | 2 +- net/ipv4/netfilter/nf_nat_helper.c | 4 ++-- net/ipv6/netfilter/ip6t_HL.c | 2 +- net/ipv6/netfilter/ip6t_LOG.c | 27 ++++++++++++++++++--------- net/ipv6/netfilter/ip6t_REJECT.c | 2 +- net/ipv6/netfilter/ip6t_ah.c | 3 ++- net/ipv6/netfilter/ip6t_frag.c | 3 ++- net/ipv6/netfilter/ip6t_hbh.c | 9 ++++++--- net/ipv6/netfilter/ip6t_mh.c | 3 ++- net/ipv6/netfilter/ip6t_rt.c | 10 ++++++---- net/netfilter/core.c | 6 +++--- net/netfilter/xt_CONNMARK.c | 6 +++--- net/netfilter/xt_CONNSECMARK.c | 4 ++-- net/netfilter/xt_MARK.c | 8 ++++---- net/netfilter/xt_NFLOG.c | 2 +- net/netfilter/xt_connbytes.c | 2 +- net/netfilter/xt_connmark.c | 8 ++++---- net/netfilter/xt_conntrack.c | 8 ++++---- net/netfilter/xt_dccp.c | 2 +- net/netfilter/xt_hashlimit.c | 30 ++++++++++++++++++------------ net/netfilter/xt_helper.c | 6 +++--- net/netfilter/xt_limit.c | 7 ++++--- net/netfilter/xt_mark.c | 4 ++-- net/netfilter/xt_physdev.c | 2 +- net/netfilter/xt_policy.c | 6 +++--- net/netfilter/xt_quota.c | 5 +++-- net/netfilter/xt_realm.c | 2 +- net/netfilter/xt_statistic.c | 2 +- 36 files changed, 136 insertions(+), 100 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index e82339a78c01..2de7ae0180aa 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -235,12 +235,13 @@ clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) #endif static inline u_int32_t -clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) +clusterip_hashfn(const struct sk_buff *skb, + const struct clusterip_config *config) { - struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph = ip_hdr(skb); unsigned long hashval; u_int16_t sport, dport; - u_int16_t *ports; + const u_int16_t *ports; switch (iph->protocol) { case IPPROTO_TCP: @@ -249,7 +250,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) case IPPROTO_SCTP: case IPPROTO_DCCP: case IPPROTO_ICMP: - ports = (void *)iph+iph->ihl*4; + ports = (const void *)iph+iph->ihl*4; sport = ports[0]; dport = ports[1]; break; @@ -289,7 +290,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) } static inline int -clusterip_responsible(struct clusterip_config *config, u_int32_t hash) +clusterip_responsible(const struct clusterip_config *config, u_int32_t hash) { return test_bit(hash - 1, &config->local_nodes); } diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index bbff6c352ef8..bcc43a625e72 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -41,7 +41,8 @@ static void dump_packet(const struct nf_loginfo *info, const struct sk_buff *skb, unsigned int iphoff) { - struct iphdr _iph, *ih; + struct iphdr _iph; + const struct iphdr *ih; unsigned int logflags; if (info->type == NF_LOG_TYPE_LOG) @@ -100,7 +101,8 @@ static void dump_packet(const struct nf_loginfo *info, switch (ih->protocol) { case IPPROTO_TCP: { - struct tcphdr _tcph, *th; + struct tcphdr _tcph; + const struct tcphdr *th; /* Max length: 10 "PROTO=TCP " */ printk("PROTO=TCP "); @@ -151,7 +153,7 @@ static void dump_packet(const struct nf_loginfo *info, if ((logflags & IPT_LOG_TCPOPT) && th->doff * 4 > sizeof(struct tcphdr)) { unsigned char _opt[4 * 15 - sizeof(struct tcphdr)]; - unsigned char *op; + const unsigned char *op; unsigned int i, optsize; optsize = th->doff * 4 - sizeof(struct tcphdr); @@ -173,7 +175,8 @@ static void dump_packet(const struct nf_loginfo *info, } case IPPROTO_UDP: case IPPROTO_UDPLITE: { - struct udphdr _udph, *uh; + struct udphdr _udph; + const struct udphdr *uh; if (ih->protocol == IPPROTO_UDP) /* Max length: 10 "PROTO=UDP " */ @@ -200,7 +203,8 @@ static void dump_packet(const struct nf_loginfo *info, break; } case IPPROTO_ICMP: { - struct icmphdr _icmph, *ich; + struct icmphdr _icmph; + const struct icmphdr *ich; static const size_t required_len[NR_ICMP_TYPES+1] = { [ICMP_ECHOREPLY] = 4, [ICMP_DEST_UNREACH] @@ -285,7 +289,8 @@ static void dump_packet(const struct nf_loginfo *info, } /* Max Length */ case IPPROTO_AH: { - struct ip_auth_hdr _ahdr, *ah; + struct ip_auth_hdr _ahdr; + const struct ip_auth_hdr *ah; if (ntohs(ih->frag_off) & IP_OFFSET) break; @@ -307,7 +312,8 @@ static void dump_packet(const struct nf_loginfo *info, break; } case IPPROTO_ESP: { - struct ip_esp_hdr _esph, *eh; + struct ip_esp_hdr _esph; + const struct ip_esp_hdr *eh; /* Max length: 10 "PROTO=ESP " */ printk("PROTO=ESP "); @@ -385,11 +391,13 @@ ipt_log_packet(unsigned int pf, out ? out->name : ""); #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge) { - struct net_device *physindev = skb->nf_bridge->physindev; - struct net_device *physoutdev = skb->nf_bridge->physoutdev; + const struct net_device *physindev; + const struct net_device *physoutdev; + physindev = skb->nf_bridge->physindev; if (physindev && in != physindev) printk("PHYSIN=%s ", physindev->name); + physoutdev = skb->nf_bridge->physoutdev; if (physoutdev && out != physoutdev) printk("PHYSOUT=%s ", physoutdev->name); } diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index b5b216408ee7..846a0e727218 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -70,7 +70,7 @@ masquerade_target(struct sk_buff **pskb, enum ip_conntrack_info ctinfo; struct nf_nat_range newrange; const struct nf_nat_multi_range_compat *mr; - struct rtable *rt; + const struct rtable *rt; __be32 newsrc; NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING); @@ -112,7 +112,7 @@ masquerade_target(struct sk_buff **pskb, static inline int device_cmp(struct nf_conn *i, void *ifindex) { - struct nf_conn_nat *nat = nfct_nat(i); + const struct nf_conn_nat *nat = nfct_nat(i); int ret; if (!nat) @@ -129,7 +129,7 @@ static int masq_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + const struct net_device *dev = ptr; if (event == NETDEV_DOWN) { /* Device was downed. Search entire table for @@ -147,7 +147,7 @@ static int masq_inet_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; + const struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; if (event == NETDEV_DOWN) { /* IP address was deleted. Search entire table for diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 5c3270d325f3..90f7b7093785 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -122,7 +122,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) tcph->check = 0; tcph->check = tcp_v4_check(sizeof(struct tcphdr), niph->saddr, niph->daddr, - csum_partial((char *)tcph, + csum_partial(tcph, sizeof(struct tcphdr), 0)); /* Set DF, id = 0 */ diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index 96b6e3514c22..f53f2c4ca4a1 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -68,7 +68,7 @@ static bool ipt_ttl_checkentry(const char *tablename, void *targinfo, unsigned int hook_mask) { - struct ipt_TTL_info *info = targinfo; + const struct ipt_TTL_info *info = targinfo; if (info->mode > IPT_TTL_MAXMODE) { printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n", diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index dfa7afd84763..282eb00fc471 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -334,7 +334,7 @@ static bool ipt_ulog_checkentry(const char *tablename, void *targinfo, unsigned int hookmask) { - struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; + const struct ipt_ulog_info *loginfo = targinfo; if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') { DEBUGP("ipt_ULOG: prefix term %i\n", @@ -359,7 +359,7 @@ struct compat_ipt_ulog_info { static void compat_from_user(void *dst, void *src) { - struct compat_ipt_ulog_info *cl = src; + const struct compat_ipt_ulog_info *cl = src; struct ipt_ulog_info l = { .nl_group = cl->nl_group, .copy_range = cl->copy_range, @@ -372,7 +372,7 @@ static void compat_from_user(void *dst, void *src) static int compat_to_user(void __user *dst, void *src) { - struct ipt_ulog_info *l = src; + const struct ipt_ulog_info *l = src; struct compat_ipt_ulog_info cl = { .nl_group = l->nl_group, .copy_range = l->copy_range, diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index 6b5b7c9f7392..49d503cbab09 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c @@ -46,7 +46,8 @@ match(const struct sk_buff *skb, unsigned int protoff, bool *hotdrop) { - struct ip_auth_hdr _ahdr, *ah; + struct ip_auth_hdr _ahdr; + const struct ip_auth_hdr *ah; const struct ipt_ah *ahinfo = matchinfo; /* Must not be a fragment. */ diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index ba4f5497add3..3129e3106162 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -32,7 +32,8 @@ static inline bool match_tcp(const struct sk_buff *skb, const struct ipt_ecn_info *einfo, bool *hotdrop) { - struct tcphdr _tcph, *th; + struct tcphdr _tcph; + const struct tcphdr *th; /* In practice, TCP match does this, so can't fail. But let's * be good citizens. diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index d632e0e6ef16..d03e6a6eb767 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -323,7 +323,7 @@ struct recent_iter_state { static void *recent_seq_start(struct seq_file *seq, loff_t *pos) { struct recent_iter_state *st = seq->private; - struct recent_table *t = st->table; + const struct recent_table *t = st->table; struct recent_entry *e; loff_t p = *pos; diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index b1aa5983a95b..ef0a99e09fd1 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -190,7 +190,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb, tcph->check = 0; tcph->check = tcp_v4_check(datalen, iph->saddr, iph->daddr, - csum_partial((char *)tcph, + csum_partial(tcph, datalen, 0)); } } else @@ -278,7 +278,7 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb, udph->check = 0; udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, datalen, IPPROTO_UDP, - csum_partial((char *)udph, + csum_partial(udph, datalen, 0)); if (!udph->check) udph->check = CSUM_MANGLED_0; diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index 82966c09fd64..20047ff5492f 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -64,7 +64,7 @@ static bool ip6t_hl_checkentry(const char *tablename, void *targinfo, unsigned int hook_mask) { - struct ip6t_HL_info *info = targinfo; + const struct ip6t_HL_info *info = targinfo; if (info->mode > IP6T_HL_MAXMODE) { printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n", diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index aa4b9a14a11c..996168d2ca25 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -48,7 +48,8 @@ static void dump_packet(const struct nf_loginfo *info, { u_int8_t currenthdr; int fragment; - struct ipv6hdr _ip6h, *ih; + struct ipv6hdr _ip6h; + const struct ipv6hdr *ih; unsigned int ptr; unsigned int hdrlen = 0; unsigned int logflags; @@ -78,7 +79,8 @@ static void dump_packet(const struct nf_loginfo *info, ptr = ip6hoff + sizeof(struct ipv6hdr); currenthdr = ih->nexthdr; while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) { - struct ipv6_opt_hdr _hdr, *hp; + struct ipv6_opt_hdr _hdr; + const struct ipv6_opt_hdr *hp; hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); if (hp == NULL) { @@ -92,7 +94,8 @@ static void dump_packet(const struct nf_loginfo *info, switch (currenthdr) { case IPPROTO_FRAGMENT: { - struct frag_hdr _fhdr, *fh; + struct frag_hdr _fhdr; + const struct frag_hdr *fh; printk("FRAG:"); fh = skb_header_pointer(skb, ptr, sizeof(_fhdr), @@ -131,7 +134,8 @@ static void dump_packet(const struct nf_loginfo *info, /* Max Length */ case IPPROTO_AH: if (logflags & IP6T_LOG_IPOPT) { - struct ip_auth_hdr _ahdr, *ah; + struct ip_auth_hdr _ahdr; + const struct ip_auth_hdr *ah; /* Max length: 3 "AH " */ printk("AH "); @@ -162,7 +166,8 @@ static void dump_packet(const struct nf_loginfo *info, break; case IPPROTO_ESP: if (logflags & IP6T_LOG_IPOPT) { - struct ip_esp_hdr _esph, *eh; + struct ip_esp_hdr _esph; + const struct ip_esp_hdr *eh; /* Max length: 4 "ESP " */ printk("ESP "); @@ -202,7 +207,8 @@ static void dump_packet(const struct nf_loginfo *info, switch (currenthdr) { case IPPROTO_TCP: { - struct tcphdr _tcph, *th; + struct tcphdr _tcph; + const struct tcphdr *th; /* Max length: 10 "PROTO=TCP " */ printk("PROTO=TCP "); @@ -250,7 +256,8 @@ static void dump_packet(const struct nf_loginfo *info, if ((logflags & IP6T_LOG_TCPOPT) && th->doff * 4 > sizeof(struct tcphdr)) { - u_int8_t _opt[60 - sizeof(struct tcphdr)], *op; + u_int8_t _opt[60 - sizeof(struct tcphdr)]; + const u_int8_t *op; unsigned int i; unsigned int optsize = th->doff * 4 - sizeof(struct tcphdr); @@ -273,7 +280,8 @@ static void dump_packet(const struct nf_loginfo *info, } case IPPROTO_UDP: case IPPROTO_UDPLITE: { - struct udphdr _udph, *uh; + struct udphdr _udph; + const struct udphdr *uh; if (currenthdr == IPPROTO_UDP) /* Max length: 10 "PROTO=UDP " */ @@ -298,7 +306,8 @@ static void dump_packet(const struct nf_loginfo *info, break; } case IPPROTO_ICMPV6: { - struct icmp6hdr _icmp6h, *ic; + struct icmp6hdr _icmp6h; + const struct icmp6hdr *ic; /* Max length: 13 "PROTO=ICMPv6 " */ printk("PROTO=ICMPv6 "); diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 8639a0599bf5..4df07f0adf1d 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -159,7 +159,7 @@ static void send_reset(struct sk_buff *oldskb) tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr, &ipv6_hdr(nskb)->daddr, sizeof(struct tcphdr), IPPROTO_TCP, - csum_partial((char *)tcph, + csum_partial(tcph, sizeof(struct tcphdr), 0)); nf_ct_attach(nskb, oldskb); diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index 8fc00bdfc38b..b4b1d282761c 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -51,7 +51,8 @@ match(const struct sk_buff *skb, unsigned int protoff, bool *hotdrop) { - struct ip_auth_hdr *ah, _ah; + struct ip_auth_hdr _ah; + const struct ip_auth_hdr *ah; const struct ip6t_ah *ahinfo = matchinfo; unsigned int ptr; unsigned int hdrlen = 0; diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index f0aed898e8b7..e0e416bb284a 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -50,7 +50,8 @@ match(const struct sk_buff *skb, unsigned int protoff, bool *hotdrop) { - struct frag_hdr _frag, *fh; + struct frag_hdr _frag; + const struct frag_hdr *fh; const struct ip6t_frag *fraginfo = matchinfo; unsigned int ptr; int err; diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 6fdd79785f32..bbd2615ad2e1 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -57,14 +57,17 @@ match(const struct sk_buff *skb, unsigned int protoff, bool *hotdrop) { - struct ipv6_opt_hdr _optsh, *oh; + struct ipv6_opt_hdr _optsh; + const struct ipv6_opt_hdr *oh; const struct ip6t_opts *optinfo = matchinfo; unsigned int temp; unsigned int ptr; unsigned int hdrlen = 0; bool ret = false; - u8 _opttype, *tp = NULL; - u8 _optlen, *lp = NULL; + u8 _opttype; + u8 _optlen; + const u_int8_t *tp = NULL; + const u_int8_t *lp = NULL; unsigned int optlen; int err; diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c index a3008b41d24b..e94fdd82f284 100644 --- a/net/ipv6/netfilter/ip6t_mh.c +++ b/net/ipv6/netfilter/ip6t_mh.c @@ -47,7 +47,8 @@ match(const struct sk_buff *skb, unsigned int protoff, bool *hotdrop) { - struct ip6_mh _mh, *mh; + struct ip6_mh _mh; + const struct ip6_mh *mh; const struct ip6t_mh *mhinfo = matchinfo; /* Must not be a fragment. */ diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index e991ed4a692e..bc5ff4b1af39 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -52,13 +52,15 @@ match(const struct sk_buff *skb, unsigned int protoff, bool *hotdrop) { - struct ipv6_rt_hdr _route, *rh; + struct ipv6_rt_hdr _route; + const struct ipv6_rt_hdr *rh; const struct ip6t_rt *rtinfo = matchinfo; unsigned int temp; unsigned int ptr; unsigned int hdrlen = 0; bool ret = false; - struct in6_addr *ap, _addr; + struct in6_addr _addr; + const struct in6_addr *ap; int err; err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL); @@ -100,9 +102,9 @@ match(const struct sk_buff *skb, !!(rtinfo->invflags & IP6T_RT_INV_LEN)))); DEBUGP("res %02X %02X %02X ", (rtinfo->flags & IP6T_RT_RES), - ((struct rt0_hdr *)rh)->reserved, + ((const struct rt0_hdr *)rh)->reserved, !((rtinfo->flags & IP6T_RT_RES) && - (((struct rt0_hdr *)rh)->reserved))); + (((const struct rt0_hdr *)rh)->reserved))); ret = (rh != NULL) && diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 3aaabec70d19..381a77cf0c9e 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -231,13 +231,13 @@ void nf_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, { __be32 diff[] = { ~from, to }; if (skb->ip_summed != CHECKSUM_PARTIAL) { - *sum = csum_fold(csum_partial((char *)diff, sizeof(diff), + *sum = csum_fold(csum_partial(diff, sizeof(diff), ~csum_unfold(*sum))); if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) - skb->csum = ~csum_partial((char *)diff, sizeof(diff), + skb->csum = ~csum_partial(diff, sizeof(diff), ~skb->csum); } else if (pseudohdr) - *sum = ~csum_fold(csum_partial((char *)diff, sizeof(diff), + *sum = ~csum_fold(csum_partial(diff, sizeof(diff), csum_unfold(*sum))); } EXPORT_SYMBOL(nf_proto_csum_replace4); diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index 4e8aa1b0cba2..4284a59b03e1 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -83,7 +83,7 @@ checkentry(const char *tablename, void *targinfo, unsigned int hook_mask) { - struct xt_connmark_target_info *matchinfo = targinfo; + const struct xt_connmark_target_info *matchinfo = targinfo; if (nf_ct_l3proto_try_module_get(target->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " @@ -121,7 +121,7 @@ struct compat_xt_connmark_target_info { static void compat_from_user(void *dst, void *src) { - struct compat_xt_connmark_target_info *cm = src; + const struct compat_xt_connmark_target_info *cm = src; struct xt_connmark_target_info m = { .mark = cm->mark, .mask = cm->mask, @@ -132,7 +132,7 @@ static void compat_from_user(void *dst, void *src) static int compat_to_user(void __user *dst, void *src) { - struct xt_connmark_target_info *m = src; + const struct xt_connmark_target_info *m = src; struct compat_xt_connmark_target_info cm = { .mark = m->mark, .mask = m->mask, diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index ab2f0d016953..8d5e154013d6 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -33,7 +33,7 @@ MODULE_ALIAS("ip6t_CONNSECMARK"); * If the packet has a security mark and the connection does not, copy * the security mark from the packet to the connection. */ -static void secmark_save(struct sk_buff *skb) +static void secmark_save(const struct sk_buff *skb) { if (skb->secmark) { struct nf_conn *ct; @@ -89,7 +89,7 @@ static bool checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, unsigned int hook_mask) { - struct xt_connsecmark_target_info *info = targinfo; + const struct xt_connsecmark_target_info *info = targinfo; if (nf_ct_l3proto_try_module_get(target->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c index bd9cdf29cc3b..6b7369fc263f 100644 --- a/net/netfilter/xt_MARK.c +++ b/net/netfilter/xt_MARK.c @@ -72,7 +72,7 @@ checkentry_v0(const char *tablename, void *targinfo, unsigned int hook_mask) { - struct xt_mark_target_info *markinfo = targinfo; + const struct xt_mark_target_info *markinfo = targinfo; if (markinfo->mark > 0xffffffff) { printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); @@ -88,7 +88,7 @@ checkentry_v1(const char *tablename, void *targinfo, unsigned int hook_mask) { - struct xt_mark_target_info_v1 *markinfo = targinfo; + const struct xt_mark_target_info_v1 *markinfo = targinfo; if (markinfo->mode != XT_MARK_SET && markinfo->mode != XT_MARK_AND @@ -114,7 +114,7 @@ struct compat_xt_mark_target_info_v1 { static void compat_from_user_v1(void *dst, void *src) { - struct compat_xt_mark_target_info_v1 *cm = src; + const struct compat_xt_mark_target_info_v1 *cm = src; struct xt_mark_target_info_v1 m = { .mark = cm->mark, .mode = cm->mode, @@ -124,7 +124,7 @@ static void compat_from_user_v1(void *dst, void *src) static int compat_to_user_v1(void __user *dst, void *src) { - struct xt_mark_target_info_v1 *m = src; + const struct xt_mark_target_info_v1 *m = src; struct compat_xt_mark_target_info_v1 cm = { .mark = m->mark, .mode = m->mode, diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index 0c6f2838cc98..20e55d588a3c 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -43,7 +43,7 @@ nflog_checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targetinfo, unsigned int hookmask) { - struct xt_nflog_info *info = targetinfo; + const struct xt_nflog_info *info = targetinfo; if (info->flags & ~XT_NFLOG_MASK) return false; diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 12541784109a..99c246e45c42 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -26,7 +26,7 @@ match(const struct sk_buff *skb, bool *hotdrop) { const struct xt_connbytes_info *sinfo = matchinfo; - struct nf_conn *ct; + const struct nf_conn *ct; enum ip_conntrack_info ctinfo; u_int64_t what = 0; /* initialize to make gcc happy */ u_int64_t bytes = 0; diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 94d5251b3d88..71f3c1a5d5e5 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -41,7 +41,7 @@ match(const struct sk_buff *skb, bool *hotdrop) { const struct xt_connmark_info *info = matchinfo; - struct nf_conn *ct; + const struct nf_conn *ct; enum ip_conntrack_info ctinfo; ct = nf_ct_get(skb, &ctinfo); @@ -58,7 +58,7 @@ checkentry(const char *tablename, void *matchinfo, unsigned int hook_mask) { - struct xt_connmark_info *cm = matchinfo; + const struct xt_connmark_info *cm = matchinfo; if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) { printk(KERN_WARNING "connmark: only support 32bit mark\n"); @@ -88,7 +88,7 @@ struct compat_xt_connmark_info { static void compat_from_user(void *dst, void *src) { - struct compat_xt_connmark_info *cm = src; + const struct compat_xt_connmark_info *cm = src; struct xt_connmark_info m = { .mark = cm->mark, .mask = cm->mask, @@ -99,7 +99,7 @@ static void compat_from_user(void *dst, void *src) static int compat_to_user(void __user *dst, void *src) { - struct xt_connmark_info *m = src; + const struct xt_connmark_info *m = src; struct compat_xt_connmark_info cm = { .mark = m->mark, .mask = m->mask, diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 87364f58a4b9..9e3ec31f2016 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -30,11 +30,11 @@ match(const struct sk_buff *skb, bool *hotdrop) { const struct xt_conntrack_info *sinfo = matchinfo; - struct nf_conn *ct; + const struct nf_conn *ct; enum ip_conntrack_info ctinfo; unsigned int statebit; - ct = nf_ct_get((struct sk_buff *)skb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); #define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg)) @@ -150,7 +150,7 @@ struct compat_xt_conntrack_info static void compat_from_user(void *dst, void *src) { - struct compat_xt_conntrack_info *cm = src; + const struct compat_xt_conntrack_info *cm = src; struct xt_conntrack_info m = { .statemask = cm->statemask, .statusmask = cm->statusmask, @@ -167,7 +167,7 @@ static void compat_from_user(void *dst, void *src) static int compat_to_user(void __user *dst, void *src) { - struct xt_conntrack_info *m = src; + const struct xt_conntrack_info *m = src; struct compat_xt_conntrack_info cm = { .statemask = m->statemask, .statusmask = m->statusmask, diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index 24895902cfe0..1b77c5bcb348 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c @@ -39,7 +39,7 @@ dccp_find_option(u_int8_t option, bool *hotdrop) { /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ - unsigned char *op; + const unsigned char *op; unsigned int optoff = __dccp_hdr_len(dh); unsigned int optlen = dh->dccph_doff*4 - __dccp_hdr_len(dh); unsigned int i; diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index a1b5996447dd..deb5890aa3ac 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -95,7 +95,7 @@ static HLIST_HEAD(hashlimit_htables); static struct kmem_cache *hashlimit_cachep __read_mostly; static inline bool dst_cmp(const struct dsthash_ent *ent, - struct dsthash_dst *b) + const struct dsthash_dst *b) { return !memcmp(&ent->dst, b, sizeof(ent->dst)); } @@ -107,7 +107,8 @@ hash_dst(const struct xt_hashlimit_htable *ht, const struct dsthash_dst *dst) } static struct dsthash_ent * -dsthash_find(const struct xt_hashlimit_htable *ht, struct dsthash_dst *dst) +dsthash_find(const struct xt_hashlimit_htable *ht, + const struct dsthash_dst *dst) { struct dsthash_ent *ent; struct hlist_node *pos; @@ -123,7 +124,8 @@ dsthash_find(const struct xt_hashlimit_htable *ht, struct dsthash_dst *dst) /* allocate dsthash_ent, initialize dst, put in htable and lock it */ static struct dsthash_ent * -dsthash_alloc_init(struct xt_hashlimit_htable *ht, struct dsthash_dst *dst) +dsthash_alloc_init(struct xt_hashlimit_htable *ht, + const struct dsthash_dst *dst) { struct dsthash_ent *ent; @@ -228,19 +230,21 @@ static int htable_create(struct xt_hashlimit_info *minfo, int family) return 0; } -static bool select_all(struct xt_hashlimit_htable *ht, struct dsthash_ent *he) +static bool select_all(const struct xt_hashlimit_htable *ht, + const struct dsthash_ent *he) { return 1; } -static bool select_gc(struct xt_hashlimit_htable *ht, struct dsthash_ent *he) +static bool select_gc(const struct xt_hashlimit_htable *ht, + const struct dsthash_ent *he) { return (jiffies >= he->expires); } static void htable_selective_cleanup(struct xt_hashlimit_htable *ht, - bool (*select)(struct xt_hashlimit_htable *ht, - struct dsthash_ent *he)) + bool (*select)(const struct xt_hashlimit_htable *ht, + const struct dsthash_ent *he)) { unsigned int i; @@ -283,7 +287,8 @@ static void htable_destroy(struct xt_hashlimit_htable *hinfo) vfree(hinfo); } -static struct xt_hashlimit_htable *htable_find_get(char *name, int family) +static struct xt_hashlimit_htable *htable_find_get(const char *name, + int family) { struct xt_hashlimit_htable *hinfo; struct hlist_node *pos; @@ -368,7 +373,8 @@ static inline void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now) } static int -hashlimit_init_dst(struct xt_hashlimit_htable *hinfo, struct dsthash_dst *dst, +hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, + struct dsthash_dst *dst, const struct sk_buff *skb, unsigned int protoff) { __be16 _ports[2], *ports; @@ -443,8 +449,8 @@ hashlimit_match(const struct sk_buff *skb, unsigned int protoff, bool *hotdrop) { - struct xt_hashlimit_info *r = - ((struct xt_hashlimit_info *)matchinfo)->u.master; + const struct xt_hashlimit_info *r = + ((const struct xt_hashlimit_info *)matchinfo)->u.master; struct xt_hashlimit_htable *hinfo = r->hinfo; unsigned long now = jiffies; struct dsthash_ent *dh; @@ -543,7 +549,7 @@ hashlimit_checkentry(const char *tablename, static void hashlimit_destroy(const struct xt_match *match, void *matchinfo) { - struct xt_hashlimit_info *r = matchinfo; + const struct xt_hashlimit_info *r = matchinfo; htable_put(r->hinfo); } diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index a2688b807a99..047d0046b28c 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -39,12 +39,12 @@ match(const struct sk_buff *skb, bool *hotdrop) { const struct xt_helper_info *info = matchinfo; - struct nf_conn *ct; - struct nf_conn_help *master_help; + const struct nf_conn *ct; + const struct nf_conn_help *master_help; enum ip_conntrack_info ctinfo; bool ret = info->invert; - ct = nf_ct_get((struct sk_buff *)skb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); if (!ct) { DEBUGP("xt_helper: Eek! invalid conntrack?\n"); return ret; diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 2717aa65246a..b042419462af 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -67,7 +67,8 @@ ipt_limit_match(const struct sk_buff *skb, unsigned int protoff, bool *hotdrop) { - struct xt_rateinfo *r = ((struct xt_rateinfo *)matchinfo)->master; + struct xt_rateinfo *r = + ((const struct xt_rateinfo *)matchinfo)->master; unsigned long now = jiffies; spin_lock_bh(&limit_lock); @@ -144,7 +145,7 @@ struct compat_xt_rateinfo { * master pointer, which does not need to be preserved. */ static void compat_from_user(void *dst, void *src) { - struct compat_xt_rateinfo *cm = src; + const struct compat_xt_rateinfo *cm = src; struct xt_rateinfo m = { .avg = cm->avg, .burst = cm->burst, @@ -158,7 +159,7 @@ static void compat_from_user(void *dst, void *src) static int compat_to_user(void __user *dst, void *src) { - struct xt_rateinfo *m = src; + const struct xt_rateinfo *m = src; struct compat_xt_rateinfo cm = { .avg = m->avg, .burst = m->burst, diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index 83ed806764b4..b8ab79452f08 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c @@ -60,7 +60,7 @@ struct compat_xt_mark_info { static void compat_from_user(void *dst, void *src) { - struct compat_xt_mark_info *cm = src; + const struct compat_xt_mark_info *cm = src; struct xt_mark_info m = { .mark = cm->mark, .mask = cm->mask, @@ -71,7 +71,7 @@ static void compat_from_user(void *dst, void *src) static int compat_to_user(void __user *dst, void *src) { - struct xt_mark_info *m = src; + const struct xt_mark_info *m = src; struct compat_xt_mark_info cm = { .mark = m->mark, .mask = m->mask, diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 34f0d3e44ea7..467b2dcf7e6b 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -36,7 +36,7 @@ match(const struct sk_buff *skb, const struct xt_physdev_info *info = matchinfo; bool ret; const char *indev, *outdev; - struct nf_bridge_info *nf_bridge; + const struct nf_bridge_info *nf_bridge; /* Not a bridged IP packet or no info available yet: * LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index 1534de55cdb6..5ab6d71f8d05 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -34,7 +34,7 @@ xt_addr_cmp(const union xt_policy_addr *a1, const union xt_policy_addr *m, } static inline bool -match_xfrm_state(struct xfrm_state *x, const struct xt_policy_elem *e, +match_xfrm_state(const struct xfrm_state *x, const struct xt_policy_elem *e, unsigned short family) { #define MATCH_ADDR(x,y,z) (!e->match.x || \ @@ -55,7 +55,7 @@ match_policy_in(const struct sk_buff *skb, const struct xt_policy_info *info, unsigned short family) { const struct xt_policy_elem *e; - struct sec_path *sp = skb->sp; + const struct sec_path *sp = skb->sp; int strict = info->flags & XT_POLICY_MATCH_STRICT; int i, pos; @@ -85,7 +85,7 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info, unsigned short family) { const struct xt_policy_elem *e; - struct dst_entry *dst = skb->dst; + const struct dst_entry *dst = skb->dst; int strict = info->flags & XT_POLICY_MATCH_STRICT; int i, pos; diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index e13d62a8caba..feb130d14f2c 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -22,7 +22,8 @@ match(const struct sk_buff *skb, const struct xt_match *match, const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) { - struct xt_quota_info *q = ((struct xt_quota_info *)matchinfo)->master; + struct xt_quota_info *q = + ((const struct xt_quota_info *)matchinfo)->master; bool ret = q->flags & XT_QUOTA_INVERT; spin_lock_bh("a_lock); @@ -43,7 +44,7 @@ checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, unsigned int hook_mask) { - struct xt_quota_info *q = (struct xt_quota_info *)matchinfo; + struct xt_quota_info *q = matchinfo; if (q->flags & ~XT_QUOTA_MASK) return false; diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c index ad82c132694c..44b807d279ad 100644 --- a/net/netfilter/xt_realm.c +++ b/net/netfilter/xt_realm.c @@ -32,7 +32,7 @@ match(const struct sk_buff *skb, bool *hotdrop) { const struct xt_realm_info *info = matchinfo; - struct dst_entry *dst = skb->dst; + const struct dst_entry *dst = skb->dst; return (info->id == (dst->tclassid & info->mask)) ^ info->invert; } diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c index 0af42892e9dc..3da4978287f3 100644 --- a/net/netfilter/xt_statistic.c +++ b/net/netfilter/xt_statistic.c @@ -57,7 +57,7 @@ checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, unsigned int hook_mask) { - struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo; + struct xt_statistic_info *info = matchinfo; if (info->mode > XT_STATISTIC_MODE_MAX || info->flags & ~XT_STATISTIC_MASK) -- cgit v1.2.3 From 170b197c0afc621179f0f82284e331e3c252b7cf Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 7 Jul 2007 22:17:36 -0700 Subject: [NETFILTER]: Remove incorrect inline markers device_cmp: the function's address is taken (call to nf_ct_iterate_cleanup) alloc_null_binding: referenced externally Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_MASQUERADE.c | 2 +- net/ipv4/netfilter/nf_nat_rule.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 846a0e727218..f136ef7f23f8 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -109,7 +109,7 @@ masquerade_target(struct sk_buff **pskb, return nf_nat_setup_info(ct, &newrange, hooknum); } -static inline int +static int device_cmp(struct nf_conn *i, void *ifindex) { const struct nf_conn_nat *nat = nfct_nat(i); diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index fc3d9437beba..ea1a07c74fe3 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -172,7 +172,7 @@ static bool ipt_dnat_checkentry(const char *tablename, return true; } -inline unsigned int +unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) { /* Force range to this IP; let proto decide mapping for -- cgit v1.2.3 From 7c4e36bc172ae1accde835b880fdc4a2c2a3df57 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 7 Jul 2007 22:19:08 -0700 Subject: [NETFILTER]: Remove redundant parentheses/braces Removes redundant parentheses and braces (And add one pair in a xt_tcpudp.c macro). Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 11 ++++------- net/ipv4/netfilter/ipt_ECN.c | 4 ++-- net/ipv4/netfilter/ipt_TTL.c | 2 +- net/ipv4/netfilter/ipt_ULOG.c | 9 +++------ net/ipv4/netfilter/ipt_iprange.c | 8 ++++---- net/ipv4/netfilter/ipt_recent.c | 9 +++------ net/ipv4/netfilter/ipt_ttl.c | 12 ++++-------- net/ipv6/netfilter/ip6t_HL.c | 2 +- net/ipv6/netfilter/ip6t_REJECT.c | 2 +- net/ipv6/netfilter/ip6t_ah.c | 12 ++++++------ net/ipv6/netfilter/ip6t_eui64.c | 6 +++--- net/ipv6/netfilter/ip6t_frag.c | 18 +++++++++--------- net/ipv6/netfilter/ip6t_hbh.c | 2 +- net/ipv6/netfilter/ip6t_hl.c | 8 ++++---- net/ipv6/netfilter/ip6t_ipv6header.c | 6 +++--- net/ipv6/netfilter/ip6t_owner.c | 6 ++---- net/ipv6/netfilter/ip6t_rt.c | 16 ++++++++-------- net/netfilter/xt_DSCP.c | 2 +- net/netfilter/xt_TCPMSS.c | 4 ++-- net/netfilter/xt_connbytes.c | 4 ++-- net/netfilter/xt_connmark.c | 2 +- net/netfilter/xt_dccp.c | 10 +++++----- net/netfilter/xt_hashlimit.c | 2 +- net/netfilter/xt_length.c | 4 ++-- net/netfilter/xt_mac.c | 10 +++++----- net/netfilter/xt_pkttype.c | 4 ++-- net/netfilter/xt_sctp.c | 23 +++++++++-------------- net/netfilter/xt_tcpudp.c | 2 +- 28 files changed, 90 insertions(+), 110 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 2de7ae0180aa..5de13b44b1ca 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -122,9 +122,8 @@ __clusterip_config_find(__be32 clusterip) list_for_each(pos, &clusterip_configs) { struct clusterip_config *c = list_entry(pos, struct clusterip_config, list); - if (c->clusterip == clusterip) { + if (c->clusterip == clusterip) return c; - } } return NULL; @@ -155,9 +154,8 @@ clusterip_config_init_nodelist(struct clusterip_config *c, { int n; - for (n = 0; n < i->num_local_nodes; n++) { + for (n = 0; n < i->num_local_nodes; n++) set_bit(i->local_nodes[n] - 1, &c->local_nodes); - } } static struct clusterip_config * @@ -255,10 +253,9 @@ clusterip_hashfn(const struct sk_buff *skb, dport = ports[1]; break; default: - if (net_ratelimit()) { + if (net_ratelimit()) printk(KERN_NOTICE "CLUSTERIP: unknown protocol `%u'\n", iph->protocol); - } sport = dport = 0; } @@ -286,7 +283,7 @@ clusterip_hashfn(const struct sk_buff *skb, } /* node numbers are 1..n, not 0..n */ - return ((hashval % config->num_total_nodes)+1); + return (hashval % config->num_total_nodes) + 1; } static inline int diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 02367012fc74..a647c1db86dd 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -58,8 +58,8 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) if ((!(einfo->operation & IPT_ECN_OP_SET_ECE) || tcph->ece == einfo->proto.tcp.ece) && - ((!(einfo->operation & IPT_ECN_OP_SET_CWR) || - tcph->cwr == einfo->proto.tcp.cwr))) + (!(einfo->operation & IPT_ECN_OP_SET_CWR) || + tcph->cwr == einfo->proto.tcp.cwr)) return true; if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph))) diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index f53f2c4ca4a1..737830b68ade 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -75,7 +75,7 @@ static bool ipt_ttl_checkentry(const char *tablename, info->mode); return false; } - if ((info->mode != IPT_TTL_SET) && (info->ttl == 0)) + if (info->mode != IPT_TTL_SET && info->ttl == 0) return false; return true; } diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 282eb00fc471..5b25ca688784 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -179,12 +179,10 @@ static void ipt_ulog_packet(unsigned int hooknum, unsigned int groupnum = ffs(loginfo->nl_group) - 1; /* calculate the size of the skb needed */ - if ((loginfo->copy_range == 0) || - (loginfo->copy_range > skb->len)) { + if (loginfo->copy_range == 0 || loginfo->copy_range > skb->len) copy_len = skb->len; - } else { + else copy_len = loginfo->copy_range; - } size = NLMSG_SPACE(sizeof(*pm) + copy_len); @@ -257,9 +255,8 @@ static void ipt_ulog_packet(unsigned int hooknum, BUG(); /* check if we are building multi-part messages */ - if (ub->qlen > 1) { + if (ub->qlen > 1) ub->lastnlh->nlmsg_flags |= NLM_F_MULTI; - } ub->lastnlh = nlh; diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c index b266d98aac8c..854281c62008 100644 --- a/net/ipv4/netfilter/ipt_iprange.c +++ b/net/ipv4/netfilter/ipt_iprange.c @@ -35,8 +35,8 @@ match(const struct sk_buff *skb, const struct iphdr *iph = ip_hdr(skb); if (info->flags & IPRANGE_SRC) { - if (((ntohl(iph->saddr) < ntohl(info->src.min_ip)) - || (ntohl(iph->saddr) > ntohl(info->src.max_ip))) + if ((ntohl(iph->saddr) < ntohl(info->src.min_ip) + || ntohl(iph->saddr) > ntohl(info->src.max_ip)) ^ !!(info->flags & IPRANGE_SRC_INV)) { DEBUGP("src IP %u.%u.%u.%u NOT in range %s" "%u.%u.%u.%u-%u.%u.%u.%u\n", @@ -48,8 +48,8 @@ match(const struct sk_buff *skb, } } if (info->flags & IPRANGE_DST) { - if (((ntohl(iph->daddr) < ntohl(info->dst.min_ip)) - || (ntohl(iph->daddr) > ntohl(info->dst.max_ip))) + if ((ntohl(iph->daddr) < ntohl(info->dst.min_ip) + || ntohl(iph->daddr) > ntohl(info->dst.max_ip)) ^ !!(info->flags & IPRANGE_DST_INV)) { DEBUGP("dst IP %u.%u.%u.%u NOT in range %s" "%u.%u.%u.%u-%u.%u.%u.%u\n", diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index d03e6a6eb767..68f7181e412d 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -163,10 +163,9 @@ static void recent_table_flush(struct recent_table *t) struct recent_entry *e, *next; unsigned int i; - for (i = 0; i < ip_list_hash_size; i++) { + for (i = 0; i < ip_list_hash_size; i++) list_for_each_entry_safe(e, next, &t->iphash[i], list) recent_entry_remove(t, e); - } } static bool @@ -329,12 +328,10 @@ static void *recent_seq_start(struct seq_file *seq, loff_t *pos) spin_lock_bh(&recent_lock); - for (st->bucket = 0; st->bucket < ip_list_hash_size; st->bucket++) { - list_for_each_entry(e, &t->iphash[st->bucket], list) { + for (st->bucket = 0; st->bucket < ip_list_hash_size; st->bucket++) + list_for_each_entry(e, &t->iphash[st->bucket], list) if (p-- == 0) return e; - } - } return NULL; } diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c index 82fe4ea8ab79..59a644db4d74 100644 --- a/net/ipv4/netfilter/ipt_ttl.c +++ b/net/ipv4/netfilter/ipt_ttl.c @@ -28,17 +28,13 @@ static bool match(const struct sk_buff *skb, switch (info->mode) { case IPT_TTL_EQ: - return (ttl == info->ttl); - break; + return ttl == info->ttl; case IPT_TTL_NE: - return (!(ttl == info->ttl)); - break; + return ttl != info->ttl; case IPT_TTL_LT: - return (ttl < info->ttl); - break; + return ttl < info->ttl; case IPT_TTL_GT: - return (ttl > info->ttl); - break; + return ttl > info->ttl; default: printk(KERN_WARNING "ipt_ttl: unknown mode %d\n", info->mode); diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index 20047ff5492f..33c4cb8a5c43 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -71,7 +71,7 @@ static bool ip6t_hl_checkentry(const char *tablename, info->mode); return false; } - if ((info->mode != IP6T_HL_SET) && (info->hop_limit == 0)) { + if (info->mode != IP6T_HL_SET && info->hop_limit == 0) { printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't " "make sense with value 0\n"); return false; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 4df07f0adf1d..0fa1f2cf9fba 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -69,7 +69,7 @@ static void send_reset(struct sk_buff *oldskb) otcplen = oldskb->len - tcphoff; /* IP header checks: fragment, too short. */ - if ((proto != IPPROTO_TCP) || (otcplen < sizeof(struct tcphdr))) { + if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) { DEBUGP("ip6t_REJECT: proto(%d) != IPPROTO_TCP, or too short. otcplen = %d\n", proto, otcplen); return; diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index b4b1d282761c..fbf3d7748dc2 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -78,9 +78,9 @@ match(const struct sk_buff *skb, DEBUGP("SPI %u %08X\n", ntohl(ah->spi), ntohl(ah->spi)); DEBUGP("IPv6 AH spi %02X ", - (spi_match(ahinfo->spis[0], ahinfo->spis[1], - ntohl(ah->spi), - !!(ahinfo->invflags & IP6T_AH_INV_SPI)))); + spi_match(ahinfo->spis[0], ahinfo->spis[1], + ntohl(ah->spi), + !!(ahinfo->invflags & IP6T_AH_INV_SPI))); DEBUGP("len %02X %04X %02X ", ahinfo->hdrlen, hdrlen, (!ahinfo->hdrlen || @@ -92,9 +92,9 @@ match(const struct sk_buff *skb, return (ah != NULL) && - (spi_match(ahinfo->spis[0], ahinfo->spis[1], - ntohl(ah->spi), - !!(ahinfo->invflags & IP6T_AH_INV_SPI))) + spi_match(ahinfo->spis[0], ahinfo->spis[1], + ntohl(ah->spi), + !!(ahinfo->invflags & IP6T_AH_INV_SPI)) && (!ahinfo->hdrlen || (ahinfo->hdrlen == hdrlen) ^ diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c index bebb12a1d0e6..2af99fc6bdc9 100644 --- a/net/ipv6/netfilter/ip6t_eui64.c +++ b/net/ipv6/netfilter/ip6t_eui64.c @@ -33,7 +33,7 @@ match(const struct sk_buff *skb, int i = 0; if (!(skb_mac_header(skb) >= skb->head && - (skb_mac_header(skb) + ETH_HLEN) <= skb->data) && + skb_mac_header(skb) + ETH_HLEN <= skb->data) && offset != 0) { *hotdrop = true; return false; @@ -50,8 +50,8 @@ match(const struct sk_buff *skb, eui64[0] |= 0x02; i = 0; - while ((ipv6_hdr(skb)->saddr.s6_addr[8 + i] == eui64[i]) - && (i < 8)) + while (ipv6_hdr(skb)->saddr.s6_addr[8 + i] == eui64[i] + && i < 8) i++; if (i == 8) diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index e0e416bb284a..65482af711de 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -77,35 +77,35 @@ match(const struct sk_buff *skb, ntohl(fh->identification)); DEBUGP("IPv6 FRAG id %02X ", - (id_match(fraginfo->ids[0], fraginfo->ids[1], + id_match(fraginfo->ids[0], fraginfo->ids[1], ntohl(fh->identification), - !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)))); + !!(fraginfo->invflags & IP6T_FRAG_INV_IDS))); DEBUGP("res %02X %02X%04X %02X ", - (fraginfo->flags & IP6T_FRAG_RES), fh->reserved, + fraginfo->flags & IP6T_FRAG_RES, fh->reserved, ntohs(fh->frag_off) & 0x6, !((fraginfo->flags & IP6T_FRAG_RES) && (fh->reserved || (ntohs(fh->frag_off) & 0x06)))); DEBUGP("first %02X %02X %02X ", - (fraginfo->flags & IP6T_FRAG_FST), + fraginfo->flags & IP6T_FRAG_FST, ntohs(fh->frag_off) & ~0x7, !((fraginfo->flags & IP6T_FRAG_FST) && (ntohs(fh->frag_off) & ~0x7))); DEBUGP("mf %02X %02X %02X ", - (fraginfo->flags & IP6T_FRAG_MF), + fraginfo->flags & IP6T_FRAG_MF, ntohs(fh->frag_off) & IP6_MF, !((fraginfo->flags & IP6T_FRAG_MF) && !((ntohs(fh->frag_off) & IP6_MF)))); DEBUGP("last %02X %02X %02X\n", - (fraginfo->flags & IP6T_FRAG_NMF), + fraginfo->flags & IP6T_FRAG_NMF, ntohs(fh->frag_off) & IP6_MF, !((fraginfo->flags & IP6T_FRAG_NMF) && (ntohs(fh->frag_off) & IP6_MF))); return (fh != NULL) && - (id_match(fraginfo->ids[0], fraginfo->ids[1], - ntohl(fh->identification), - !!(fraginfo->invflags & IP6T_FRAG_INV_IDS))) + id_match(fraginfo->ids[0], fraginfo->ids[1], + ntohl(fh->identification), + !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)) && !((fraginfo->flags & IP6T_FRAG_RES) && (fh->reserved || (ntohs(fh->frag_off) & 0x6))) diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index bbd2615ad2e1..8eecac14ddaa 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -160,7 +160,7 @@ match(const struct sk_buff *skb, DEBUGP("len%04X \n", optlen); if ((ptr > skb->len - optlen || hdrlen < optlen) && - (temp < optinfo->optsnr - 1)) { + temp < optinfo->optsnr - 1) { DEBUGP("new pointer is too large! \n"); break; } diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c index b933e84a06a4..ddee088f5f10 100644 --- a/net/ipv6/netfilter/ip6t_hl.c +++ b/net/ipv6/netfilter/ip6t_hl.c @@ -29,16 +29,16 @@ static bool match(const struct sk_buff *skb, switch (info->mode) { case IP6T_HL_EQ: - return (ip6h->hop_limit == info->hop_limit); + return ip6h->hop_limit == info->hop_limit; break; case IP6T_HL_NE: - return (!(ip6h->hop_limit == info->hop_limit)); + return ip6h->hop_limit != info->hop_limit; break; case IP6T_HL_LT: - return (ip6h->hop_limit < info->hop_limit); + return ip6h->hop_limit < info->hop_limit; break; case IP6T_HL_GT: - return (ip6h->hop_limit > info->hop_limit); + return ip6h->hop_limit > info->hop_limit; break; default: printk(KERN_WARNING "ip6t_hl: unknown mode %d\n", diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 5ba6ef0f1b1b..ca020ce1c4a3 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -74,9 +74,9 @@ ipv6header_match(const struct sk_buff *skb, BUG_ON(hp == NULL); /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) { + if (nexthdr == NEXTHDR_FRAGMENT) hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) + else if (nexthdr == NEXTHDR_AUTH) hdrlen = (hp->hdrlen + 2) << 2; else hdrlen = ipv6_optlen(hp); @@ -110,7 +110,7 @@ ipv6header_match(const struct sk_buff *skb, break; } - if ((nexthdr != NEXTHDR_NONE) && (nexthdr != NEXTHDR_ESP)) + if (nexthdr != NEXTHDR_NONE && nexthdr != NEXTHDR_ESP) temp |= MASK_PROTO; if (info->modeflag) diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index 8cb6c94b4a20..d2bf3204aeac 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c @@ -38,17 +38,15 @@ match(const struct sk_buff *skb, if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file) return false; - if (info->match & IP6T_OWNER_UID) { + if (info->match & IP6T_OWNER_UID) if ((skb->sk->sk_socket->file->f_uid != info->uid) ^ !!(info->invert & IP6T_OWNER_UID)) return false; - } - if (info->match & IP6T_OWNER_GID) { + if (info->match & IP6T_OWNER_GID) if ((skb->sk->sk_socket->file->f_gid != info->gid) ^ !!(info->invert & IP6T_OWNER_GID)) return false; - } return true; } diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index bc5ff4b1af39..f86fdcdd8cb8 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -87,9 +87,9 @@ match(const struct sk_buff *skb, DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left); DEBUGP("IPv6 RT segsleft %02X ", - (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1], - rh->segments_left, - !!(rtinfo->invflags & IP6T_RT_INV_SGS)))); + segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1], + rh->segments_left, + !!(rtinfo->invflags & IP6T_RT_INV_SGS))); DEBUGP("type %02X %02X %02X ", rtinfo->rt_type, rh->type, (!(rtinfo->flags & IP6T_RT_TYP) || @@ -97,11 +97,11 @@ match(const struct sk_buff *skb, !!(rtinfo->invflags & IP6T_RT_INV_TYP)))); DEBUGP("len %02X %04X %02X ", rtinfo->hdrlen, hdrlen, - (!(rtinfo->flags & IP6T_RT_LEN) || + !(rtinfo->flags & IP6T_RT_LEN) || ((rtinfo->hdrlen == hdrlen) ^ - !!(rtinfo->invflags & IP6T_RT_INV_LEN)))); + !!(rtinfo->invflags & IP6T_RT_INV_LEN))); DEBUGP("res %02X %02X %02X ", - (rtinfo->flags & IP6T_RT_RES), + rtinfo->flags & IP6T_RT_RES, ((const struct rt0_hdr *)rh)->reserved, !((rtinfo->flags & IP6T_RT_RES) && (((const struct rt0_hdr *)rh)->reserved))); @@ -188,8 +188,8 @@ match(const struct sk_buff *skb, break; } DEBUGP("temp=%d #%d\n", temp, rtinfo->addrnr); - if ((temp == rtinfo->addrnr) && - (temp == (unsigned int)((hdrlen - 8) / 16))) + if (temp == rtinfo->addrnr && + temp == (unsigned int)((hdrlen - 8) / 16)) return ret; else return false; diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index 2d779f6902dc..ed6b524064f0 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -74,7 +74,7 @@ static bool checkentry(const char *tablename, { const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp; - if ((dscp > XT_DSCP_MAX)) { + if (dscp > XT_DSCP_MAX) { printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp); return false; } diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 075051acb554..6ae6df993aa2 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -93,7 +93,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb, return 0; opt[i+2] = (newmss & 0xff00) >> 8; - opt[i+3] = (newmss & 0x00ff); + opt[i+3] = newmss & 0x00ff; nf_proto_csum_replace2(&tcph->check, *pskb, htons(oldmss), htons(newmss), 0); @@ -126,7 +126,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb, opt[0] = TCPOPT_MSS; opt[1] = TCPOLEN_MSS; opt[2] = (newmss & 0xff00) >> 8; - opt[3] = (newmss & 0x00ff); + opt[3] = newmss & 0x00ff; nf_proto_csum_replace4(&tcph->check, *pskb, 0, *((__be32 *)opt), 0); diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 99c246e45c42..d9b2e75fbab2 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -90,9 +90,9 @@ match(const struct sk_buff *skb, } if (sinfo->count.to) - return (what <= sinfo->count.to && what >= sinfo->count.from); + return what <= sinfo->count.to && what >= sinfo->count.from; else - return (what >= sinfo->count.from); + return what >= sinfo->count.from; } static bool check(const char *tablename, diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 71f3c1a5d5e5..3a6e16d4edcd 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -48,7 +48,7 @@ match(const struct sk_buff *skb, if (!ct) return false; - return (((ct->mark) & info->mask) == info->mark) ^ info->invert; + return ((ct->mark & info->mask) == info->mark) ^ info->invert; } static bool diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index 1b77c5bcb348..f07a68d445ca 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c @@ -81,7 +81,7 @@ dccp_find_option(u_int8_t option, static inline bool match_types(const struct dccp_hdr *dh, u_int16_t typemask) { - return (typemask & (1 << dh->dccph_type)); + return typemask & (1 << dh->dccph_type); } static inline bool @@ -113,11 +113,11 @@ match(const struct sk_buff *skb, return false; } - return DCCHECK(((ntohs(dh->dccph_sport) >= info->spts[0]) - && (ntohs(dh->dccph_sport) <= info->spts[1])), + return DCCHECK(ntohs(dh->dccph_sport) >= info->spts[0] + && ntohs(dh->dccph_sport) <= info->spts[1], XT_DCCP_SRC_PORTS, info->flags, info->invflags) - && DCCHECK(((ntohs(dh->dccph_dport) >= info->dpts[0]) - && (ntohs(dh->dccph_dport) <= info->dpts[1])), + && DCCHECK(ntohs(dh->dccph_dport) >= info->dpts[0] + && ntohs(dh->dccph_dport) <= info->dpts[1], XT_DCCP_DEST_PORTS, info->flags, info->invflags) && DCCHECK(match_types(dh, info->typemask), XT_DCCP_TYPE, info->flags, info->invflags) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index deb5890aa3ac..094da6e066b8 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -239,7 +239,7 @@ static bool select_all(const struct xt_hashlimit_htable *ht, static bool select_gc(const struct xt_hashlimit_htable *ht, const struct dsthash_ent *he) { - return (jiffies >= he->expires); + return jiffies >= he->expires; } static void htable_selective_cleanup(struct xt_hashlimit_htable *ht, diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c index 57bcfacde594..ea4880bd31ea 100644 --- a/net/netfilter/xt_length.c +++ b/net/netfilter/xt_length.c @@ -47,8 +47,8 @@ match6(const struct sk_buff *skb, bool *hotdrop) { const struct xt_length_info *info = matchinfo; - const u_int16_t pktlen = (ntohs(ipv6_hdr(skb)->payload_len) + - sizeof(struct ipv6hdr)); + const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) + + sizeof(struct ipv6hdr); return (pktlen >= info->min && pktlen <= info->max) ^ info->invert; } diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c index 86022027dd63..28ec08e7511b 100644 --- a/net/netfilter/xt_mac.c +++ b/net/netfilter/xt_mac.c @@ -37,11 +37,11 @@ match(const struct sk_buff *skb, const struct xt_mac_info *info = matchinfo; /* Is mac pointer valid? */ - return (skb_mac_header(skb) >= skb->head && - (skb_mac_header(skb) + ETH_HLEN) <= skb->data - /* If so, compare... */ - && ((!compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr)) - ^ info->invert)); + return skb_mac_header(skb) >= skb->head && + skb_mac_header(skb) + ETH_HLEN <= skb->data + /* If so, compare... */ + && ((!compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr)) + ^ info->invert); } static struct xt_match xt_mac_match[] = { diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c index 63239727bc22..e4c420b5713b 100644 --- a/net/netfilter/xt_pkttype.c +++ b/net/netfilter/xt_pkttype.c @@ -34,9 +34,9 @@ static bool match(const struct sk_buff *skb, const struct xt_pkttype_info *info = matchinfo; if (skb->pkt_type == PACKET_LOOPBACK) - type = (MULTICAST(ip_hdr(skb)->daddr) + type = MULTICAST(ip_hdr(skb)->daddr) ? PACKET_MULTICAST - : PACKET_BROADCAST); + : PACKET_BROADCAST; else type = skb->pkt_type; diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index 22df338b3934..fefc846188f3 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -31,11 +31,9 @@ match_flags(const struct xt_sctp_flag_info *flag_info, { int i; - for (i = 0; i < flag_count; i++) { - if (flag_info[i].chunktype == chunktype) { + for (i = 0; i < flag_count; i++) + if (flag_info[i].chunktype == chunktype) return (chunkflags & flag_info[i].flag_mask) == flag_info[i].flag; - } - } return true; } @@ -56,9 +54,8 @@ match_packet(const struct sk_buff *skb, int i = 0; #endif - if (chunk_match_type == SCTP_CHUNK_MATCH_ALL) { + if (chunk_match_type == SCTP_CHUNK_MATCH_ALL) SCTP_CHUNKMAP_COPY(chunkmapcopy, chunkmap); - } do { sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch); @@ -86,16 +83,14 @@ match_packet(const struct sk_buff *skb, case SCTP_CHUNK_MATCH_ALL: if (match_flags(flag_info, flag_count, - sch->type, sch->flags)) { + sch->type, sch->flags)) SCTP_CHUNKMAP_CLEAR(chunkmapcopy, sch->type); - } break; case SCTP_CHUNK_MATCH_ONLY: if (!match_flags(flag_info, flag_count, - sch->type, sch->flags)) { + sch->type, sch->flags)) return false; - } break; } } else { @@ -145,11 +140,11 @@ match(const struct sk_buff *skb, } duprintf("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest)); - return SCCHECK(((ntohs(sh->source) >= info->spts[0]) - && (ntohs(sh->source) <= info->spts[1])), + return SCCHECK(ntohs(sh->source) >= info->spts[0] + && ntohs(sh->source) <= info->spts[1], XT_SCTP_SRC_PORTS, info->flags, info->invflags) - && SCCHECK(((ntohs(sh->dest) >= info->dpts[0]) - && (ntohs(sh->dest) <= info->dpts[1])), + && SCCHECK(ntohs(sh->dest) >= info->dpts[0] + && ntohs(sh->dest) <= info->dpts[1], XT_SCTP_DEST_PORTS, info->flags, info->invflags) && SCCHECK(match_packet(skb, protoff + sizeof (sctp_sctphdr_t), info->chunkmap, info->chunk_match_type, diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index 0dd3022cc79a..5cb345aeeca8 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -95,7 +95,7 @@ tcp_match(const struct sk_buff *skb, return false; } -#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg)) +#define FWINVTCP(bool, invflg) ((bool) ^ !!(tcpinfo->invflags & (invflg))) th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); if (th == NULL) { -- cgit v1.2.3 From f4a607bfae30d15aad46e75d2ed7a39f7ce7708b Mon Sep 17 00:00:00 2001 From: Jerome Borsboom Date: Sat, 7 Jul 2007 22:19:48 -0700 Subject: [NETFILTER]: nf_nat_sip: only perform RTP DNAT if SIP session was SNATed DNAT of the the RTP session is only necessary if the SIP session has been SNATed. Signed-off-by: Jerome Borsboom Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_nat_sip.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index fac97cf51ae5..a32d746c4592 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -260,7 +260,11 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb, DEBUGP("ip_nat_sdp():\n"); /* Connection will come from reply */ - newip = ct->tuplehash[!dir].tuple.dst.u3.ip; + if (ct->tuplehash[dir].tuple.src.u3.ip == + ct->tuplehash[!dir].tuple.dst.u3.ip) + newip = exp->tuple.dst.u3.ip; + else + newip = ct->tuplehash[!dir].tuple.dst.u3.ip; exp->saved_ip = exp->tuple.dst.u3.ip; exp->tuple.dst.u3.ip = newip; -- cgit v1.2.3 From 1b50b8a371e90a5e110f466e4ac02cf6b5f681de Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 7 Jul 2007 22:20:36 -0700 Subject: [NETFILTER]: Add u32 match Along comes... xt_u32, a revamped ipt_u32 from POM-NG, Plus: * 2007-06-02: added ipv6 support * 2007-06-05: uses kmalloc for the big buffer * 2007-06-05: added inversion * 2007-06-20: use skb_copy_bits() and get rid of the big buffer and lock (suggested by Pablo Neira Ayuso) Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/xt_u32.h | 40 ++++++++++++ net/netfilter/Kconfig | 13 ++++ net/netfilter/Makefile | 1 + net/netfilter/xt_u32.c | 135 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 189 insertions(+) create mode 100644 include/linux/netfilter/xt_u32.h create mode 100644 net/netfilter/xt_u32.c (limited to 'net') diff --git a/include/linux/netfilter/xt_u32.h b/include/linux/netfilter/xt_u32.h new file mode 100644 index 000000000000..9947f56cdbdd --- /dev/null +++ b/include/linux/netfilter/xt_u32.h @@ -0,0 +1,40 @@ +#ifndef _XT_U32_H +#define _XT_U32_H 1 + +enum xt_u32_ops { + XT_U32_AND, + XT_U32_LEFTSH, + XT_U32_RIGHTSH, + XT_U32_AT, +}; + +struct xt_u32_location_element { + u_int32_t number; + u_int8_t nextop; +}; + +struct xt_u32_value_element { + u_int32_t min; + u_int32_t max; +}; + +/* + * Any way to allow for an arbitrary number of elements? + * For now, I settle with a limit of 10 each. + */ +#define XT_U32_MAXSIZE 10 + +struct xt_u32_test { + struct xt_u32_location_element location[XT_U32_MAXSIZE+1]; + struct xt_u32_value_element value[XT_U32_MAXSIZE+1]; + u_int8_t nnums; + u_int8_t nvalues; +}; + +struct xt_u32 { + struct xt_u32_test tests[XT_U32_MAXSIZE+1]; + u_int8_t ntests; + u_int8_t invert; +}; + +#endif /* _XT_U32_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index a567dae8e5fd..aa567faa2a88 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -635,6 +635,19 @@ config NETFILTER_XT_MATCH_TCPMSS To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_U32 + tristate '"u32" match support' + depends on NETFILTER_XTABLES + ---help--- + u32 allows you to extract quantities of up to 4 bytes from a packet, + AND them with specified masks, shift them by specified amounts and + test whether the results are in any of a set of specified ranges. + The specification of what to extract is general enough to skip over + headers with lengths stored in the packet, as in IP or TCP header + lengths. + + Details and examples are in the kernel module source. + config NETFILTER_XT_MATCH_HASHLIMIT tristate '"hashlimit" match support' depends on NETFILTER_XTABLES && (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n) diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index b2b5c7566b26..3cf5b9cd6fe1 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -72,4 +72,5 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o +obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c new file mode 100644 index 000000000000..07068751a35c --- /dev/null +++ b/net/netfilter/xt_u32.c @@ -0,0 +1,135 @@ +/* + * xt_u32 - kernel module to match u32 packet content + * + * Original author: Don Cohen + * © Jan Engelhardt , 2007 + */ + +#include +#include +#include +#include +#include +#include +#include + +static bool u32_match_it(const struct xt_u32 *data, + const struct sk_buff *skb) +{ + const struct xt_u32_test *ct; + unsigned int testind; + unsigned int nnums; + unsigned int nvals; + unsigned int i; + u_int32_t pos; + u_int32_t val; + u_int32_t at; + int ret; + + /* + * Small example: "0 >> 28 == 4 && 8 & 0xFF0000 >> 16 = 6, 17" + * (=IPv4 and (TCP or UDP)). Outer loop runs over the "&&" operands. + */ + for (testind = 0; testind < data->ntests; ++testind) { + ct = &data->tests[testind]; + at = 0; + pos = ct->location[0].number; + + if (skb->len < 4 || pos > skb->len - 4); + return false; + + ret = skb_copy_bits(skb, pos, &val, sizeof(val)); + BUG_ON(ret < 0); + val = ntohl(val); + nnums = ct->nnums; + + /* Inner loop runs over "&", "<<", ">>" and "@" operands */ + for (i = 1; i < nnums; ++i) { + u_int32_t number = ct->location[i].number; + switch (ct->location[i].nextop) { + case XT_U32_AND: + val &= number; + break; + case XT_U32_LEFTSH: + val <<= number; + break; + case XT_U32_RIGHTSH: + val >>= number; + break; + case XT_U32_AT: + if (at + val < at) + return false; + at += val; + pos = number; + if (at + 4 < at || skb->len < at + 4 || + pos > skb->len - at - 4) + return false; + + ret = skb_copy_bits(skb, at + pos, &val, + sizeof(val)); + BUG_ON(ret < 0); + val = ntohl(val); + break; + } + } + + /* Run over the "," and ":" operands */ + nvals = ct->nvalues; + for (i = 0; i < nvals; ++i) + if (ct->value[i].min <= val && val <= ct->value[i].max) + break; + + if (i >= ct->nvalues) + return false; + } + + return true; +} + +static bool u32_match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, const void *matchinfo, + int offset, unsigned int protoff, bool *hotdrop) +{ + const struct xt_u32 *data = matchinfo; + bool ret; + + ret = u32_match_it(data, skb); + return ret ^ data->invert; +} + +static struct xt_match u32_reg[] = { + { + .name = "u32", + .family = AF_INET, + .match = u32_match, + .matchsize = sizeof(struct xt_u32), + .me = THIS_MODULE, + }, + { + .name = "u32", + .family = AF_INET6, + .match = u32_match, + .matchsize = sizeof(struct xt_u32), + .me = THIS_MODULE, + }, +}; + +static int __init xt_u32_init(void) +{ + return xt_register_matches(u32_reg, ARRAY_SIZE(u32_reg)); +} + +static void __exit xt_u32_exit(void) +{ + xt_unregister_matches(u32_reg, ARRAY_SIZE(u32_reg)); +} + +module_init(xt_u32_init); +module_exit(xt_u32_exit); +MODULE_AUTHOR("Jan Engelhardt "); +MODULE_DESCRIPTION("netfilter u32 match module"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_u32"); +MODULE_ALIAS("ip6t_u32"); -- cgit v1.2.3 From ba9dda3ab5a865542e69dfe01edb2436857c9420 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 7 Jul 2007 22:21:23 -0700 Subject: [NETFILTER]: x_tables: add TRACE target The TRACE target can be used to follow IP and IPv6 packets through the ruleset. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick NcHardy Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 +- net/core/skbuff.c | 8 +++ net/ipv4/ip_output.c | 4 ++ net/ipv4/netfilter/ip_tables.c | 127 +++++++++++++++++++++++++++++++++++---- net/ipv6/ip6_output.c | 4 ++ net/ipv6/netfilter/ip6_tables.c | 128 ++++++++++++++++++++++++++++++++++++---- net/netfilter/Kconfig | 12 ++++ net/netfilter/Makefile | 1 + net/netfilter/xt_TRACE.c | 53 +++++++++++++++++ 9 files changed, 314 insertions(+), 27 deletions(-) create mode 100644 net/netfilter/xt_TRACE.c (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2d6a14f5f2f1..625d73b07ab7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -227,6 +227,7 @@ typedef unsigned char *sk_buff_data_t; * @mark: Generic packet mark * @nfct: Associated connection, if any * @ipvs_property: skbuff is owned by ipvs + * @nf_trace: netfilter packet trace flag * @nfctinfo: Relationship of this skb to the connection * @nfct_reasm: netfilter conntrack re-assembly pointer * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c @@ -278,7 +279,8 @@ struct sk_buff { nfctinfo:3; __u8 pkt_type:3, fclone:2, - ipvs_property:1; + ipvs_property:1, + nf_trace:1; __be16 protocol; void (*destructor)(struct sk_buff *skb); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6a41b96b3d37..0583e8498f13 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -428,6 +428,10 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) n->destructor = NULL; C(mark); __nf_copy(n, skb); +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) + C(nf_trace); +#endif #ifdef CONFIG_NET_SCHED C(tc_index); #ifdef CONFIG_NET_CLS_ACT @@ -485,6 +489,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->destructor = NULL; new->mark = old->mark; __nf_copy(new, old); +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) + new->nf_trace = old->nf_trace; +#endif #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) new->ipvs_property = old->ipvs_property; #endif diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a7dd343d3a03..c9e2b5e6305e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -399,6 +399,10 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) + to->nf_trace = from->nf_trace; +#endif #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) to->ipvs_property = from->ipvs_property; #endif diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 7962306df585..650ab52e9157 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -204,6 +204,112 @@ get_entry(void *base, unsigned int offset) return (struct ipt_entry *)(base + offset); } +/* All zeroes == unconditional rule. */ +static inline int +unconditional(const struct ipt_ip *ip) +{ + unsigned int i; + + for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++) + if (((__u32 *)ip)[i]) + return 0; + + return 1; +} + +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +static const char *hooknames[] = { + [NF_IP_PRE_ROUTING] = "PREROUTING", + [NF_IP_LOCAL_IN] = "INPUT", + [NF_IP_FORWARD] = "FORWARD", + [NF_IP_LOCAL_OUT] = "OUTPUT", + [NF_IP_POST_ROUTING] = "POSTROUTING", +}; + +enum nf_ip_trace_comments { + NF_IP_TRACE_COMMENT_RULE, + NF_IP_TRACE_COMMENT_RETURN, + NF_IP_TRACE_COMMENT_POLICY, +}; + +static const char *comments[] = { + [NF_IP_TRACE_COMMENT_RULE] = "rule", + [NF_IP_TRACE_COMMENT_RETURN] = "return", + [NF_IP_TRACE_COMMENT_POLICY] = "policy", +}; + +static struct nf_loginfo trace_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 4, + .logflags = NF_LOG_MASK, + }, + }, +}; + +static inline int +get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e, + char *hookname, char **chainname, + char **comment, unsigned int *rulenum) +{ + struct ipt_standard_target *t = (void *)ipt_get_target(s); + + if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) { + /* Head of user chain: ERROR target with chainname */ + *chainname = t->target.data; + (*rulenum) = 0; + } else if (s == e) { + (*rulenum)++; + + if (s->target_offset == sizeof(struct ipt_entry) + && strcmp(t->target.u.kernel.target->name, + IPT_STANDARD_TARGET) == 0 + && t->verdict < 0 + && unconditional(&s->ip)) { + /* Tail of chains: STANDARD target (return/policy) */ + *comment = *chainname == hookname + ? (char *)comments[NF_IP_TRACE_COMMENT_POLICY] + : (char *)comments[NF_IP_TRACE_COMMENT_RETURN]; + } + return 1; + } else + (*rulenum)++; + + return 0; +} + +static void trace_packet(struct sk_buff *skb, + unsigned int hook, + const struct net_device *in, + const struct net_device *out, + char *tablename, + struct xt_table_info *private, + struct ipt_entry *e) +{ + void *table_base; + struct ipt_entry *root; + char *hookname, *chainname, *comment; + unsigned int rulenum = 0; + + table_base = (void *)private->entries[smp_processor_id()]; + root = get_entry(table_base, private->hook_entry[hook]); + + hookname = chainname = (char *)hooknames[hook]; + comment = (char *)comments[NF_IP_TRACE_COMMENT_RULE]; + + IPT_ENTRY_ITERATE(root, + private->size - private->hook_entry[hook], + get_chainname_rulenum, + e, hookname, &chainname, &comment, &rulenum); + + nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo, + "TRACE: %s:%s:%s:%u ", + tablename, chainname, comment, rulenum); +} +#endif + /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int ipt_do_table(struct sk_buff **pskb, @@ -261,6 +367,14 @@ ipt_do_table(struct sk_buff **pskb, t = ipt_get_target(e); IP_NF_ASSERT(t->u.kernel.target); + +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) + /* The packet is traced: log it */ + if (unlikely((*pskb)->nf_trace)) + trace_packet(*pskb, hook, in, out, + table->name, private, e); +#endif /* Standard target? */ if (!t->u.kernel.target->target) { int v; @@ -341,19 +455,6 @@ ipt_do_table(struct sk_buff **pskb, #endif } -/* All zeroes == unconditional rule. */ -static inline int -unconditional(const struct ipt_ip *ip) -{ - unsigned int i; - - for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++) - if (((__u32 *)ip)[i]) - return 0; - - return 1; -} - /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 31dafaf0b652..50d86e94d9ed 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -521,6 +521,10 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) + to->nf_trace = from->nf_trace; +#endif skb_copy_secmark(to, from); } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 7fe4d29708cb..4f93b79163aa 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -241,6 +241,113 @@ get_entry(void *base, unsigned int offset) return (struct ip6t_entry *)(base + offset); } +/* All zeroes == unconditional rule. */ +static inline int +unconditional(const struct ip6t_ip6 *ipv6) +{ + unsigned int i; + + for (i = 0; i < sizeof(*ipv6); i++) + if (((char *)ipv6)[i]) + break; + + return (i == sizeof(*ipv6)); +} + +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +/* This cries for unification! */ +static const char *hooknames[] = { + [NF_IP6_PRE_ROUTING] = "PREROUTING", + [NF_IP6_LOCAL_IN] = "INPUT", + [NF_IP6_FORWARD] = "FORWARD", + [NF_IP6_LOCAL_OUT] = "OUTPUT", + [NF_IP6_POST_ROUTING] = "POSTROUTING", +}; + +enum nf_ip_trace_comments { + NF_IP6_TRACE_COMMENT_RULE, + NF_IP6_TRACE_COMMENT_RETURN, + NF_IP6_TRACE_COMMENT_POLICY, +}; + +static const char *comments[] = { + [NF_IP6_TRACE_COMMENT_RULE] = "rule", + [NF_IP6_TRACE_COMMENT_RETURN] = "return", + [NF_IP6_TRACE_COMMENT_POLICY] = "policy", +}; + +static struct nf_loginfo trace_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 4, + .logflags = NF_LOG_MASK, + }, + }, +}; + +static inline int +get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e, + char *hookname, char **chainname, + char **comment, unsigned int *rulenum) +{ + struct ip6t_standard_target *t = (void *)ip6t_get_target(s); + + if (strcmp(t->target.u.kernel.target->name, IP6T_ERROR_TARGET) == 0) { + /* Head of user chain: ERROR target with chainname */ + *chainname = t->target.data; + (*rulenum) = 0; + } else if (s == e) { + (*rulenum)++; + + if (s->target_offset == sizeof(struct ip6t_entry) + && strcmp(t->target.u.kernel.target->name, + IP6T_STANDARD_TARGET) == 0 + && t->verdict < 0 + && unconditional(&s->ipv6)) { + /* Tail of chains: STANDARD target (return/policy) */ + *comment = *chainname == hookname + ? (char *)comments[NF_IP6_TRACE_COMMENT_POLICY] + : (char *)comments[NF_IP6_TRACE_COMMENT_RETURN]; + } + return 1; + } else + (*rulenum)++; + + return 0; +} + +static void trace_packet(struct sk_buff *skb, + unsigned int hook, + const struct net_device *in, + const struct net_device *out, + char *tablename, + struct xt_table_info *private, + struct ip6t_entry *e) +{ + void *table_base; + struct ip6t_entry *root; + char *hookname, *chainname, *comment; + unsigned int rulenum = 0; + + table_base = (void *)private->entries[smp_processor_id()]; + root = get_entry(table_base, private->hook_entry[hook]); + + hookname = chainname = (char *)hooknames[hook]; + comment = (char *)comments[NF_IP6_TRACE_COMMENT_RULE]; + + IP6T_ENTRY_ITERATE(root, + private->size - private->hook_entry[hook], + get_chainname_rulenum, + e, hookname, &chainname, &comment, &rulenum); + + nf_log_packet(AF_INET6, hook, skb, in, out, &trace_loginfo, + "TRACE: %s:%s:%s:%u ", + tablename, chainname, comment, rulenum); +} +#endif + /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int ip6t_do_table(struct sk_buff **pskb, @@ -298,6 +405,14 @@ ip6t_do_table(struct sk_buff **pskb, t = ip6t_get_target(e); IP_NF_ASSERT(t->u.kernel.target); + +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) + /* The packet is traced: log it */ + if (unlikely((*pskb)->nf_trace)) + trace_packet(*pskb, hook, in, out, + table->name, private, e); +#endif /* Standard target? */ if (!t->u.kernel.target->target) { int v; @@ -377,19 +492,6 @@ ip6t_do_table(struct sk_buff **pskb, #endif } -/* All zeroes == unconditional rule. */ -static inline int -unconditional(const struct ip6t_ip6 *ipv6) -{ - unsigned int i; - - for (i = 0; i < sizeof(*ipv6); i++) - if (((char *)ipv6)[i]) - break; - - return (i == sizeof(*ipv6)); -} - /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index aa567faa2a88..df5e8dab871d 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -343,6 +343,18 @@ config NETFILTER_XT_TARGET_NOTRACK If you want to compile it as a module, say M here and read . If unsure, say `N'. +config NETFILTER_XT_TARGET_TRACE + tristate '"TRACE" target support' + depends on NETFILTER_XTABLES + depends on IP_NF_RAW || IP6_NF_RAW + help + The TRACE target allows you to mark packets so that the kernel + will log every rule which match the packets as those traverse + the tables, chains, rules. + + If you want to compile it as a module, say M here and read + . If unsure, say `N'. + config NETFILTER_XT_TARGET_SECMARK tristate '"SECMARK" target support' depends on NETFILTER_XTABLES && NETWORK_SECMARK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 3cf5b9cd6fe1..3b792687f001 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -44,6 +44,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o +obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c new file mode 100644 index 000000000000..b82fc4680344 --- /dev/null +++ b/net/netfilter/xt_TRACE.c @@ -0,0 +1,53 @@ +/* This is a module which is used to mark packets for tracing. + */ +#include +#include + +#include + +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_TRACE"); +MODULE_ALIAS("ip6t_TRACE"); + +static unsigned int +target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const struct xt_target *target, + const void *targinfo) +{ + (*pskb)->nf_trace = 1; + return XT_CONTINUE; +} + +static struct xt_target xt_trace_target[] = { + { + .name = "TRACE", + .family = AF_INET, + .target = target, + .table = "raw", + .me = THIS_MODULE, + }, + { + .name = "TRACE", + .family = AF_INET6, + .target = target, + .table = "raw", + .me = THIS_MODULE, + }, +}; + +static int __init xt_trace_init(void) +{ + return xt_register_targets(xt_trace_target, + ARRAY_SIZE(xt_trace_target)); +} + +static void __exit xt_trace_fini(void) +{ + xt_unregister_targets(xt_trace_target, ARRAY_SIZE(xt_trace_target)); +} + +module_init(xt_trace_init); +module_exit(xt_trace_fini); -- cgit v1.2.3 From 9f15c5302de4e8b0aac7ca24c36bf26a7fe1a513 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:22:02 -0700 Subject: [NETFILTER]: x_tables: mark matches and targets __read_mostly Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/arp_tables.c | 4 ++-- net/ipv4/netfilter/arpt_mangle.c | 2 +- net/ipv4/netfilter/ip_tables.c | 6 +++--- net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +- net/ipv4/netfilter/ipt_ECN.c | 2 +- net/ipv4/netfilter/ipt_LOG.c | 2 +- net/ipv4/netfilter/ipt_MASQUERADE.c | 2 +- net/ipv4/netfilter/ipt_NETMAP.c | 2 +- net/ipv4/netfilter/ipt_REDIRECT.c | 2 +- net/ipv4/netfilter/ipt_REJECT.c | 2 +- net/ipv4/netfilter/ipt_SAME.c | 2 +- net/ipv4/netfilter/ipt_TOS.c | 2 +- net/ipv4/netfilter/ipt_TTL.c | 2 +- net/ipv4/netfilter/ipt_ULOG.c | 2 +- net/ipv4/netfilter/ipt_addrtype.c | 2 +- net/ipv4/netfilter/ipt_ah.c | 2 +- net/ipv4/netfilter/ipt_ecn.c | 2 +- net/ipv4/netfilter/ipt_iprange.c | 2 +- net/ipv4/netfilter/ipt_owner.c | 2 +- net/ipv4/netfilter/ipt_recent.c | 2 +- net/ipv4/netfilter/ipt_tos.c | 2 +- net/ipv4/netfilter/ipt_ttl.c | 2 +- net/ipv4/netfilter/nf_nat_rule.c | 4 ++-- net/ipv6/netfilter/ip6_tables.c | 6 +++--- net/ipv6/netfilter/ip6t_HL.c | 2 +- net/ipv6/netfilter/ip6t_LOG.c | 2 +- net/ipv6/netfilter/ip6t_REJECT.c | 2 +- net/ipv6/netfilter/ip6t_ah.c | 2 +- net/ipv6/netfilter/ip6t_eui64.c | 2 +- net/ipv6/netfilter/ip6t_frag.c | 2 +- net/ipv6/netfilter/ip6t_hbh.c | 2 +- net/ipv6/netfilter/ip6t_hl.c | 2 +- net/ipv6/netfilter/ip6t_ipv6header.c | 2 +- net/ipv6/netfilter/ip6t_mh.c | 2 +- net/ipv6/netfilter/ip6t_owner.c | 2 +- net/ipv6/netfilter/ip6t_rt.c | 2 +- net/netfilter/xt_CLASSIFY.c | 2 +- net/netfilter/xt_CONNMARK.c | 2 +- net/netfilter/xt_CONNSECMARK.c | 2 +- net/netfilter/xt_DSCP.c | 2 +- net/netfilter/xt_MARK.c | 2 +- net/netfilter/xt_NFLOG.c | 2 +- net/netfilter/xt_NFQUEUE.c | 2 +- net/netfilter/xt_NOTRACK.c | 2 +- net/netfilter/xt_SECMARK.c | 2 +- net/netfilter/xt_TCPMSS.c | 2 +- net/netfilter/xt_TRACE.c | 2 +- net/netfilter/xt_comment.c | 2 +- net/netfilter/xt_connbytes.c | 2 +- net/netfilter/xt_connmark.c | 2 +- net/netfilter/xt_conntrack.c | 2 +- net/netfilter/xt_dccp.c | 2 +- net/netfilter/xt_dscp.c | 2 +- net/netfilter/xt_esp.c | 2 +- net/netfilter/xt_hashlimit.c | 2 +- net/netfilter/xt_helper.c | 2 +- net/netfilter/xt_length.c | 2 +- net/netfilter/xt_limit.c | 2 +- net/netfilter/xt_mac.c | 2 +- net/netfilter/xt_mark.c | 2 +- net/netfilter/xt_multiport.c | 2 +- net/netfilter/xt_physdev.c | 2 +- net/netfilter/xt_pkttype.c | 2 +- net/netfilter/xt_policy.c | 2 +- net/netfilter/xt_quota.c | 2 +- net/netfilter/xt_realm.c | 2 +- net/netfilter/xt_sctp.c | 2 +- net/netfilter/xt_state.c | 2 +- net/netfilter/xt_statistic.c | 2 +- net/netfilter/xt_string.c | 2 +- net/netfilter/xt_tcpmss.c | 2 +- net/netfilter/xt_tcpudp.c | 2 +- net/netfilter/xt_u32.c | 2 +- 73 files changed, 79 insertions(+), 79 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 1d75a5cd7b44..e981232942a1 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1140,13 +1140,13 @@ void arpt_unregister_table(struct arpt_table *table) } /* The built-in targets: standard (NULL) and error. */ -static struct arpt_target arpt_standard_target = { +static struct arpt_target arpt_standard_target __read_mostly = { .name = ARPT_STANDARD_TARGET, .targetsize = sizeof(int), .family = NF_ARP, }; -static struct arpt_target arpt_error_target = { +static struct arpt_target arpt_error_target __read_mostly = { .name = ARPT_ERROR_TARGET, .target = arpt_error, .targetsize = ARPT_FUNCTION_MAXNAMELEN, diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index 497a16e0b064..c4bdab47597f 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -81,7 +81,7 @@ checkentry(const char *tablename, const void *e, const struct xt_target *target, return true; } -static struct arpt_target arpt_mangle_reg = { +static struct arpt_target arpt_mangle_reg __read_mostly = { .name = "mangle", .target = target, .targetsize = sizeof(struct arpt_mangle), diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 650ab52e9157..2ba5bd9c8c1b 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -2264,7 +2264,7 @@ icmp_checkentry(const char *tablename, } /* The built-in targets: standard (NULL) and error. */ -static struct xt_target ipt_standard_target = { +static struct xt_target ipt_standard_target __read_mostly = { .name = IPT_STANDARD_TARGET, .targetsize = sizeof(int), .family = AF_INET, @@ -2275,7 +2275,7 @@ static struct xt_target ipt_standard_target = { #endif }; -static struct xt_target ipt_error_target = { +static struct xt_target ipt_error_target __read_mostly = { .name = IPT_ERROR_TARGET, .target = ipt_error, .targetsize = IPT_FUNCTION_MAXNAMELEN, @@ -2298,7 +2298,7 @@ static struct nf_sockopt_ops ipt_sockopts = { #endif }; -static struct xt_match icmp_matchstruct = { +static struct xt_match icmp_matchstruct __read_mostly = { .name = "icmp", .match = icmp_match, .matchsize = sizeof(struct ipt_icmp), diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 5de13b44b1ca..1cef3b09c326 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -466,7 +466,7 @@ static void destroy(const struct xt_target *target, void *targinfo) nf_ct_l3proto_module_put(target->family); } -static struct xt_target clusterip_tgt = { +static struct xt_target clusterip_tgt __read_mostly = { .name = "CLUSTERIP", .family = AF_INET, .target = target, diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index a647c1db86dd..f1253bd3837f 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -128,7 +128,7 @@ checkentry(const char *tablename, return true; } -static struct xt_target ipt_ecn_reg = { +static struct xt_target ipt_ecn_reg __read_mostly = { .name = "ECN", .family = AF_INET, .target = target, diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index bcc43a625e72..9bfce614ec28 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -463,7 +463,7 @@ static bool ipt_log_checkentry(const char *tablename, return true; } -static struct xt_target ipt_log_reg = { +static struct xt_target ipt_log_reg __read_mostly = { .name = "LOG", .family = AF_INET, .target = ipt_log_target, diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index f136ef7f23f8..bc033e0f424d 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -169,7 +169,7 @@ static struct notifier_block masq_inet_notifier = { .notifier_call = masq_inet_event, }; -static struct xt_target masquerade = { +static struct xt_target masquerade __read_mostly = { .name = "MASQUERADE", .family = AF_INET, .target = masquerade_target, diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index a902c71218bf..0a7ce15bbdd0 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -85,7 +85,7 @@ target(struct sk_buff **pskb, return nf_nat_setup_info(ct, &newrange, hooknum); } -static struct xt_target target_module = { +static struct xt_target target_module __read_mostly = { .name = MODULENAME, .family = AF_INET, .target = target, diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 2a04103b50d1..61e1e4772e37 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -101,7 +101,7 @@ redirect_target(struct sk_buff **pskb, return nf_nat_setup_info(ct, &newrange, hooknum); } -static struct xt_target redirect_reg = { +static struct xt_target redirect_reg __read_mostly = { .name = "REDIRECT", .family = AF_INET, .target = redirect_target, diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 90f7b7093785..dd5432c3f365 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -240,7 +240,7 @@ static bool check(const char *tablename, return true; } -static struct xt_target ipt_reject_reg = { +static struct xt_target ipt_reject_reg __read_mostly = { .name = "REJECT", .family = AF_INET, .target = reject, diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c index 3649fabc04ea..3a0d7dac0af8 100644 --- a/net/ipv4/netfilter/ipt_SAME.c +++ b/net/ipv4/netfilter/ipt_SAME.c @@ -161,7 +161,7 @@ same_target(struct sk_buff **pskb, return nf_nat_setup_info(ct, &newrange, hooknum); } -static struct xt_target same_reg = { +static struct xt_target same_reg __read_mostly = { .name = "SAME", .family = AF_INET, .target = same_target, diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index ac43e86afbcf..25f5d0b39065 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -63,7 +63,7 @@ checkentry(const char *tablename, return true; } -static struct xt_target ipt_tos_reg = { +static struct xt_target ipt_tos_reg __read_mostly = { .name = "TOS", .family = AF_INET, .target = target, diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index 737830b68ade..2b54e7b0cfe8 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -80,7 +80,7 @@ static bool ipt_ttl_checkentry(const char *tablename, return true; } -static struct xt_target ipt_TTL = { +static struct xt_target ipt_TTL __read_mostly = { .name = "TTL", .family = AF_INET, .target = ipt_ttl_target, diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 5b25ca688784..226750d1f89b 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -381,7 +381,7 @@ static int compat_to_user(void __user *dst, void *src) } #endif /* CONFIG_COMPAT */ -static struct xt_target ipt_ulog_reg = { +static struct xt_target ipt_ulog_reg __read_mostly = { .name = "ULOG", .family = AF_INET, .target = ipt_ulog_target, diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index abea446a4437..59f01f7ba6b4 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c @@ -44,7 +44,7 @@ static bool match(const struct sk_buff *skb, return ret; } -static struct xt_match addrtype_match = { +static struct xt_match addrtype_match __read_mostly = { .name = "addrtype", .family = AF_INET, .match = match, diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index 49d503cbab09..61b017fd743c 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c @@ -88,7 +88,7 @@ checkentry(const char *tablename, return true; } -static struct xt_match ah_match = { +static struct xt_match ah_match __read_mostly = { .name = "ah", .family = AF_INET, .match = match, diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index 3129e3106162..d6925c674069 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -111,7 +111,7 @@ static bool checkentry(const char *tablename, const void *ip_void, return true; } -static struct xt_match ecn_match = { +static struct xt_match ecn_match __read_mostly = { .name = "ecn", .family = AF_INET, .match = match, diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c index 854281c62008..6a3a033a6808 100644 --- a/net/ipv4/netfilter/ipt_iprange.c +++ b/net/ipv4/netfilter/ipt_iprange.c @@ -63,7 +63,7 @@ match(const struct sk_buff *skb, return true; } -static struct xt_match iprange_match = { +static struct xt_match iprange_match __read_mostly = { .name = "iprange", .family = AF_INET, .match = match, diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c index deea4b8cc055..b14e77da7a33 100644 --- a/net/ipv4/netfilter/ipt_owner.c +++ b/net/ipv4/netfilter/ipt_owner.c @@ -68,7 +68,7 @@ checkentry(const char *tablename, return true; } -static struct xt_match owner_match = { +static struct xt_match owner_match __read_mostly = { .name = "owner", .family = AF_INET, .match = match, diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 68f7181e412d..a7b14f2ae2dc 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -460,7 +460,7 @@ static const struct file_operations recent_fops = { }; #endif /* CONFIG_PROC_FS */ -static struct xt_match recent_match = { +static struct xt_match recent_match __read_mostly = { .name = "recent", .family = AF_INET, .match = ipt_recent_match, diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c index 67699ae46d37..e740441c973d 100644 --- a/net/ipv4/netfilter/ipt_tos.c +++ b/net/ipv4/netfilter/ipt_tos.c @@ -33,7 +33,7 @@ match(const struct sk_buff *skb, return (ip_hdr(skb)->tos == info->tos) ^ info->invert; } -static struct xt_match tos_match = { +static struct xt_match tos_match __read_mostly = { .name = "tos", .family = AF_INET, .match = match, diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c index 59a644db4d74..a439900a4ba5 100644 --- a/net/ipv4/netfilter/ipt_ttl.c +++ b/net/ipv4/netfilter/ipt_ttl.c @@ -44,7 +44,7 @@ static bool match(const struct sk_buff *skb, return false; } -static struct xt_match ttl_match = { +static struct xt_match ttl_match __read_mostly = { .name = "ttl", .family = AF_INET, .match = match, diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index ea1a07c74fe3..080393a143d7 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -228,7 +228,7 @@ int nf_nat_rule_find(struct sk_buff **pskb, return ret; } -static struct xt_target ipt_snat_reg = { +static struct xt_target ipt_snat_reg __read_mostly = { .name = "SNAT", .target = ipt_snat_target, .targetsize = sizeof(struct nf_nat_multi_range_compat), @@ -238,7 +238,7 @@ static struct xt_target ipt_snat_reg = { .family = AF_INET, }; -static struct xt_target ipt_dnat_reg = { +static struct xt_target ipt_dnat_reg __read_mostly = { .name = "DNAT", .target = ipt_dnat_target, .targetsize = sizeof(struct nf_nat_multi_range_compat), diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 4f93b79163aa..254c769b750a 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1441,13 +1441,13 @@ icmp6_checkentry(const char *tablename, } /* The built-in targets: standard (NULL) and error. */ -static struct xt_target ip6t_standard_target = { +static struct xt_target ip6t_standard_target __read_mostly = { .name = IP6T_STANDARD_TARGET, .targetsize = sizeof(int), .family = AF_INET6, }; -static struct xt_target ip6t_error_target = { +static struct xt_target ip6t_error_target __read_mostly = { .name = IP6T_ERROR_TARGET, .target = ip6t_error, .targetsize = IP6T_FUNCTION_MAXNAMELEN, @@ -1464,7 +1464,7 @@ static struct nf_sockopt_ops ip6t_sockopts = { .get = do_ip6t_get_ctl, }; -static struct xt_match icmp6_matchstruct = { +static struct xt_match icmp6_matchstruct __read_mostly = { .name = "icmp6", .match = &icmp6_match, .matchsize = sizeof(struct ip6t_icmp), diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index 33c4cb8a5c43..ad4d94310b87 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -79,7 +79,7 @@ static bool ip6t_hl_checkentry(const char *tablename, return true; } -static struct xt_target ip6t_HL = { +static struct xt_target ip6t_HL __read_mostly = { .name = "HL", .family = AF_INET6, .target = ip6t_hl_target, diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 996168d2ca25..540bf14b851c 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -477,7 +477,7 @@ static bool ip6t_log_checkentry(const char *tablename, return true; } -static struct xt_target ip6t_log_reg = { +static struct xt_target ip6t_log_reg __read_mostly = { .name = "LOG", .family = AF_INET6, .target = ip6t_log_target, diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 0fa1f2cf9fba..14008dc6a197 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -244,7 +244,7 @@ static bool check(const char *tablename, return true; } -static struct xt_target ip6t_reject_reg = { +static struct xt_target ip6t_reject_reg __read_mostly = { .name = "REJECT", .family = AF_INET6, .target = reject6_target, diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index fbf3d7748dc2..a9fe2aa97072 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -120,7 +120,7 @@ checkentry(const char *tablename, return true; } -static struct xt_match ah_match = { +static struct xt_match ah_match __read_mostly = { .name = "ah", .family = AF_INET6, .match = match, diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c index 2af99fc6bdc9..34ba150bfe5d 100644 --- a/net/ipv6/netfilter/ip6t_eui64.c +++ b/net/ipv6/netfilter/ip6t_eui64.c @@ -62,7 +62,7 @@ match(const struct sk_buff *skb, return false; } -static struct xt_match eui64_match = { +static struct xt_match eui64_match __read_mostly = { .name = "eui64", .family = AF_INET6, .match = match, diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index 65482af711de..bb1cfa82b47c 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -137,7 +137,7 @@ checkentry(const char *tablename, return true; } -static struct xt_match frag_match = { +static struct xt_match frag_match __read_mostly = { .name = "frag", .family = AF_INET6, .match = match, diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 8eecac14ddaa..6247d4cdad99 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -193,7 +193,7 @@ checkentry(const char *tablename, return true; } -static struct xt_match opts_match[] = { +static struct xt_match opts_match[] __read_mostly = { { .name = "hbh", .family = AF_INET6, diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c index ddee088f5f10..ca29ec00dc18 100644 --- a/net/ipv6/netfilter/ip6t_hl.c +++ b/net/ipv6/netfilter/ip6t_hl.c @@ -49,7 +49,7 @@ static bool match(const struct sk_buff *skb, return false; } -static struct xt_match hl_match = { +static struct xt_match hl_match __read_mostly = { .name = "hl", .family = AF_INET6, .match = match, diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index ca020ce1c4a3..2c65c2f9a4ab 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -141,7 +141,7 @@ ipv6header_checkentry(const char *tablename, return true; } -static struct xt_match ip6t_ipv6header_match = { +static struct xt_match ip6t_ipv6header_match __read_mostly = { .name = "ipv6header", .family = AF_INET6, .match = &ipv6header_match, diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c index e94fdd82f284..0fa714092dc9 100644 --- a/net/ipv6/netfilter/ip6t_mh.c +++ b/net/ipv6/netfilter/ip6t_mh.c @@ -89,7 +89,7 @@ mh_checkentry(const char *tablename, return !(mhinfo->invflags & ~IP6T_MH_INV_MASK); } -static struct xt_match mh_match = { +static struct xt_match mh_match __read_mostly = { .name = "mh", .family = AF_INET6, .checkentry = mh_checkentry, diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index d2bf3204aeac..6036613aef36 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c @@ -68,7 +68,7 @@ checkentry(const char *tablename, return true; } -static struct xt_match owner_match = { +static struct xt_match owner_match __read_mostly = { .name = "owner", .family = AF_INET6, .match = match, diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index f86fdcdd8cb8..549deea26418 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -224,7 +224,7 @@ checkentry(const char *tablename, return true; } -static struct xt_match rt_match = { +static struct xt_match rt_match __read_mostly = { .name = "rt", .family = AF_INET6, .match = match, diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c index 30884833e665..519428566829 100644 --- a/net/netfilter/xt_CLASSIFY.c +++ b/net/netfilter/xt_CLASSIFY.c @@ -39,7 +39,7 @@ target(struct sk_buff **pskb, return XT_CONTINUE; } -static struct xt_target xt_classify_target[] = { +static struct xt_target xt_classify_target[] __read_mostly = { { .family = AF_INET, .name = "CLASSIFY", diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index 4284a59b03e1..5a00c5444334 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -142,7 +142,7 @@ static int compat_to_user(void __user *dst, void *src) } #endif /* CONFIG_COMPAT */ -static struct xt_target xt_connmark_target[] = { +static struct xt_target xt_connmark_target[] __read_mostly = { { .name = "CONNMARK", .family = AF_INET, diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 8d5e154013d6..63d73138c1b9 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -115,7 +115,7 @@ destroy(const struct xt_target *target, void *targinfo) nf_ct_l3proto_module_put(target->family); } -static struct xt_target xt_connsecmark_target[] = { +static struct xt_target xt_connsecmark_target[] __read_mostly = { { .name = "CONNSECMARK", .family = AF_INET, diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index ed6b524064f0..798ab731009d 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -81,7 +81,7 @@ static bool checkentry(const char *tablename, return true; } -static struct xt_target xt_dscp_target[] = { +static struct xt_target xt_dscp_target[] __read_mostly = { { .name = "DSCP", .family = AF_INET, diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c index 6b7369fc263f..f30fe0baf7de 100644 --- a/net/netfilter/xt_MARK.c +++ b/net/netfilter/xt_MARK.c @@ -133,7 +133,7 @@ static int compat_to_user_v1(void __user *dst, void *src) } #endif /* CONFIG_COMPAT */ -static struct xt_target xt_mark_target[] = { +static struct xt_target xt_mark_target[] __read_mostly = { { .name = "MARK", .family = AF_INET, diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index 20e55d588a3c..d3594c7ccb26 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -52,7 +52,7 @@ nflog_checkentry(const char *tablename, const void *entry, return true; } -static struct xt_target xt_nflog_target[] = { +static struct xt_target xt_nflog_target[] __read_mostly = { { .name = "NFLOG", .family = AF_INET, diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 201155b316e0..13f59f3e8c38 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -36,7 +36,7 @@ target(struct sk_buff **pskb, return NF_QUEUE_NR(tinfo->queuenum); } -static struct xt_target xt_nfqueue_target[] = { +static struct xt_target xt_nfqueue_target[] __read_mostly = { { .name = "NFQUEUE", .family = AF_INET, diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c index 5085fb3d1e2d..b7d6312fccc7 100644 --- a/net/netfilter/xt_NOTRACK.c +++ b/net/netfilter/xt_NOTRACK.c @@ -33,7 +33,7 @@ target(struct sk_buff **pskb, return XT_CONTINUE; } -static struct xt_target xt_notrack_target[] = { +static struct xt_target xt_notrack_target[] __read_mostly = { { .name = "NOTRACK", .family = AF_INET, diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index f3e78c592f3a..c83779a941a1 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -109,7 +109,7 @@ static bool checkentry(const char *tablename, const void *entry, return true; } -static struct xt_target xt_secmark_target[] = { +static struct xt_target xt_secmark_target[] __read_mostly = { { .name = "SECMARK", .family = AF_INET, diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 6ae6df993aa2..d40f7e4b1289 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -259,7 +259,7 @@ xt_tcpmss_checkentry6(const char *tablename, } #endif -static struct xt_target xt_tcpmss_reg[] = { +static struct xt_target xt_tcpmss_reg[] __read_mostly = { { .family = AF_INET, .name = "TCPMSS", diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c index b82fc4680344..4df2dedcc0b5 100644 --- a/net/netfilter/xt_TRACE.c +++ b/net/netfilter/xt_TRACE.c @@ -21,7 +21,7 @@ target(struct sk_buff **pskb, return XT_CONTINUE; } -static struct xt_target xt_trace_target[] = { +static struct xt_target xt_trace_target[] __read_mostly = { { .name = "TRACE", .family = AF_INET, diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c index aa9503ff90ba..64bcdb0fe1e6 100644 --- a/net/netfilter/xt_comment.c +++ b/net/netfilter/xt_comment.c @@ -29,7 +29,7 @@ match(const struct sk_buff *skb, return true; } -static struct xt_match xt_comment_match[] = { +static struct xt_match xt_comment_match[] __read_mostly = { { .name = "comment", .family = AF_INET, diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index d9b2e75fbab2..dd4d79b8fc9d 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -128,7 +128,7 @@ destroy(const struct xt_match *match, void *matchinfo) nf_ct_l3proto_module_put(match->family); } -static struct xt_match xt_connbytes_match[] = { +static struct xt_match xt_connbytes_match[] __read_mostly = { { .name = "connbytes", .family = AF_INET, diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 3a6e16d4edcd..e73fa9b46cf7 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -109,7 +109,7 @@ static int compat_to_user(void __user *dst, void *src) } #endif /* CONFIG_COMPAT */ -static struct xt_match xt_connmark_match[] = { +static struct xt_match xt_connmark_match[] __read_mostly = { { .name = "connmark", .family = AF_INET, diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 9e3ec31f2016..ca4b69f020a8 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -183,7 +183,7 @@ static int compat_to_user(void __user *dst, void *src) } #endif -static struct xt_match conntrack_match = { +static struct xt_match conntrack_match __read_mostly = { .name = "conntrack", .match = match, .checkentry = checkentry, diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index f07a68d445ca..83224ec89cc0 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c @@ -140,7 +140,7 @@ checkentry(const char *tablename, && !(info->invflags & ~info->flags); } -static struct xt_match xt_dccp_match[] = { +static struct xt_match xt_dccp_match[] __read_mostly = { { .name = "dccp", .family = AF_INET, diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c index 35cabca28eff..dde6d66e0d33 100644 --- a/net/netfilter/xt_dscp.c +++ b/net/netfilter/xt_dscp.c @@ -68,7 +68,7 @@ static bool checkentry(const char *tablename, return true; } -static struct xt_match xt_dscp_match[] = { +static struct xt_match xt_dscp_match[] __read_mostly = { { .name = "dscp", .family = AF_INET, diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c index 1a6ae8a047c7..b11378e001b6 100644 --- a/net/netfilter/xt_esp.c +++ b/net/netfilter/xt_esp.c @@ -91,7 +91,7 @@ checkentry(const char *tablename, return true; } -static struct xt_match xt_esp_match[] = { +static struct xt_match xt_esp_match[] __read_mostly = { { .name = "esp", .family = AF_INET, diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 094da6e066b8..5a6ea9b9108c 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -578,7 +578,7 @@ static int compat_to_user(void __user *dst, void *src) } #endif -static struct xt_match xt_hashlimit[] = { +static struct xt_match xt_hashlimit[] __read_mostly = { { .name = "hashlimit", .family = AF_INET, diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index 047d0046b28c..d03acb032cc8 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -99,7 +99,7 @@ destroy(const struct xt_match *match, void *matchinfo) nf_ct_l3proto_module_put(match->family); } -static struct xt_match xt_helper_match[] = { +static struct xt_match xt_helper_match[] __read_mostly = { { .name = "helper", .family = AF_INET, diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c index ea4880bd31ea..3dad173d9735 100644 --- a/net/netfilter/xt_length.c +++ b/net/netfilter/xt_length.c @@ -53,7 +53,7 @@ match6(const struct sk_buff *skb, return (pktlen >= info->min && pktlen <= info->max) ^ info->invert; } -static struct xt_match xt_length_match[] = { +static struct xt_match xt_length_match[] __read_mostly = { { .name = "length", .family = AF_INET, diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index b042419462af..4fcca797150f 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -173,7 +173,7 @@ static int compat_to_user(void __user *dst, void *src) } #endif /* CONFIG_COMPAT */ -static struct xt_match xt_limit_match[] = { +static struct xt_match xt_limit_match[] __read_mostly = { { .name = "limit", .family = AF_INET, diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c index 28ec08e7511b..00490d777a0f 100644 --- a/net/netfilter/xt_mac.c +++ b/net/netfilter/xt_mac.c @@ -44,7 +44,7 @@ match(const struct sk_buff *skb, ^ info->invert); } -static struct xt_match xt_mac_match[] = { +static struct xt_match xt_mac_match[] __read_mostly = { { .name = "mac", .family = AF_INET, diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index b8ab79452f08..c02a7f8f3925 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c @@ -81,7 +81,7 @@ static int compat_to_user(void __user *dst, void *src) } #endif /* CONFIG_COMPAT */ -static struct xt_match xt_mark_match[] = { +static struct xt_match xt_mark_match[] __read_mostly = { { .name = "mark", .family = AF_INET, diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c index 3d69d6208965..e8ae10284acd 100644 --- a/net/netfilter/xt_multiport.c +++ b/net/netfilter/xt_multiport.c @@ -228,7 +228,7 @@ checkentry6_v1(const char *tablename, multiinfo->count); } -static struct xt_match xt_multiport_match[] = { +static struct xt_match xt_multiport_match[] __read_mostly = { { .name = "multiport", .family = AF_INET, diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 467b2dcf7e6b..f47cab7a696d 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -125,7 +125,7 @@ checkentry(const char *tablename, return true; } -static struct xt_match xt_physdev_match[] = { +static struct xt_match xt_physdev_match[] __read_mostly = { { .name = "physdev", .family = AF_INET, diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c index e4c420b5713b..a52925f12f35 100644 --- a/net/netfilter/xt_pkttype.c +++ b/net/netfilter/xt_pkttype.c @@ -43,7 +43,7 @@ static bool match(const struct sk_buff *skb, return (type == info->pkttype) ^ info->invert; } -static struct xt_match xt_pkttype_match[] = { +static struct xt_match xt_pkttype_match[] __read_mostly = { { .name = "pkttype", .family = AF_INET, diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index 5ab6d71f8d05..6d6d3b7fcbb5 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -164,7 +164,7 @@ static bool checkentry(const char *tablename, const void *ip_void, return true; } -static struct xt_match xt_policy_match[] = { +static struct xt_match xt_policy_match[] __read_mostly = { { .name = "policy", .family = AF_INET, diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index feb130d14f2c..dae97445b87b 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -53,7 +53,7 @@ checkentry(const char *tablename, const void *entry, return true; } -static struct xt_match xt_quota_match[] = { +static struct xt_match xt_quota_match[] __read_mostly = { { .name = "quota", .family = AF_INET, diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c index 44b807d279ad..cc3e76d77a99 100644 --- a/net/netfilter/xt_realm.c +++ b/net/netfilter/xt_realm.c @@ -37,7 +37,7 @@ match(const struct sk_buff *skb, return (info->id == (dst->tclassid & info->mask)) ^ info->invert; } -static struct xt_match realm_match = { +static struct xt_match realm_match __read_mostly = { .name = "realm", .match = match, .matchsize = sizeof(struct xt_realm_info), diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index fefc846188f3..c002153b80ab 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -172,7 +172,7 @@ checkentry(const char *tablename, | SCTP_CHUNK_MATCH_ONLY))); } -static struct xt_match xt_sctp_match[] = { +static struct xt_match xt_sctp_match[] __read_mostly = { { .name = "sctp", .family = AF_INET, diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index 5b9c59aa14d3..e0a528df19a7 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -64,7 +64,7 @@ destroy(const struct xt_match *match, void *matchinfo) nf_ct_l3proto_module_put(match->family); } -static struct xt_match xt_state_match[] = { +static struct xt_match xt_state_match[] __read_mostly = { { .name = "state", .family = AF_INET, diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c index 3da4978287f3..4089dae4e286 100644 --- a/net/netfilter/xt_statistic.c +++ b/net/netfilter/xt_statistic.c @@ -66,7 +66,7 @@ checkentry(const char *tablename, const void *entry, return true; } -static struct xt_match xt_statistic_match[] = { +static struct xt_match xt_statistic_match[] __read_mostly = { { .name = "statistic", .family = AF_INET, diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index ab761b17f811..864133442cda 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -73,7 +73,7 @@ static void destroy(const struct xt_match *match, void *matchinfo) textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config); } -static struct xt_match xt_string_match[] = { +static struct xt_match xt_string_match[] __read_mostly = { { .name = "string", .family = AF_INET, diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c index e9bfd3dd3c81..cd5f6d758c68 100644 --- a/net/netfilter/xt_tcpmss.c +++ b/net/netfilter/xt_tcpmss.c @@ -81,7 +81,7 @@ dropit: return false; } -static struct xt_match xt_tcpmss_match[] = { +static struct xt_match xt_tcpmss_match[] __read_mostly = { { .name = "tcpmss", .family = AF_INET, diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index 5cb345aeeca8..ab7d845224fc 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -194,7 +194,7 @@ udp_checkentry(const char *tablename, return !(udpinfo->invflags & ~XT_UDP_INV_MASK); } -static struct xt_match xt_tcpudp_match[] = { +static struct xt_match xt_tcpudp_match[] __read_mostly = { { .name = "tcp", .family = AF_INET, diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c index 07068751a35c..04b677ae8dae 100644 --- a/net/netfilter/xt_u32.c +++ b/net/netfilter/xt_u32.c @@ -99,7 +99,7 @@ static bool u32_match(const struct sk_buff *skb, return ret ^ data->invert; } -static struct xt_match u32_reg[] = { +static struct xt_match u32_reg[] __read_mostly = { { .name = "u32", .family = AF_INET, -- cgit v1.2.3 From ecfab2c9fe5597221c2b30dec48634a2361a0d08 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:23:21 -0700 Subject: [NETFILTER]: nf_conntrack: introduce extension infrastructure Old space allocator of conntrack had problems about extensibility. - It required slab cache per combination of extensions. - It expected what extensions would be assigned, but it was impossible to expect that completely, then we allocated bigger memory object than really required. - It needed to search helper twice due to lock issue. Now basic informations of a connection are stored in 'struct nf_conn'. And a storage for extension (helper, NAT) is allocated by kmalloc. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack.h | 3 + include/net/netfilter/nf_conntrack_extend.h | 80 ++++++++++++ net/netfilter/Makefile | 2 +- net/netfilter/nf_conntrack_core.c | 4 + net/netfilter/nf_conntrack_extend.c | 195 ++++++++++++++++++++++++++++ 5 files changed, 283 insertions(+), 1 deletion(-) create mode 100644 include/net/netfilter/nf_conntrack_extend.h create mode 100644 net/netfilter/nf_conntrack_extend.c (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 12a0e793cc0b..c31382d3ef11 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -131,6 +131,9 @@ struct nf_conn /* Storage reserved for other modules: */ union nf_conntrack_proto proto; + /* Extensions */ + struct nf_ct_ext *ext; + /* features dynamically at the end: helper, nat (both optional) */ char data[0]; }; diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h new file mode 100644 index 000000000000..8a988d136465 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -0,0 +1,80 @@ +#ifndef _NF_CONNTRACK_EXTEND_H +#define _NF_CONNTRACK_EXTEND_H + +#include + +enum nf_ct_ext_id +{ + NF_CT_EXT_NUM, +}; + +/* Extensions: optional stuff which isn't permanently in struct. */ +struct nf_ct_ext { + u8 offset[NF_CT_EXT_NUM]; + u8 len; + u8 real_len; + char data[0]; +}; + +static inline int nf_ct_ext_exist(const struct nf_conn *ct, u8 id) +{ + return (ct->ext && ct->ext->offset[id]); +} + +static inline void *__nf_ct_ext_find(const struct nf_conn *ct, u8 id) +{ + if (!nf_ct_ext_exist(ct, id)) + return NULL; + + return (void *)ct->ext + ct->ext->offset[id]; +} +#define nf_ct_ext_find(ext, id) \ + ((id##_TYPE *)__nf_ct_ext_find((ext), (id))) + +/* Destroy all relationships */ +extern void __nf_ct_ext_destroy(struct nf_conn *ct); +static inline void nf_ct_ext_destroy(struct nf_conn *ct) +{ + if (ct->ext) + __nf_ct_ext_destroy(ct); +} + +/* Free operation. If you want to free a object referred from private area, + * please implement __nf_ct_ext_free() and call it. + */ +static inline void nf_ct_ext_free(struct nf_conn *ct) +{ + if (ct->ext) + kfree(ct->ext); +} + +/* Add this type, returns pointer to data or NULL. */ +void * +__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp); +#define nf_ct_ext_add(ct, id, gfp) \ + ((id##_TYPE *)__nf_ct_ext_add((ct), (id), (gfp))) + +#define NF_CT_EXT_F_PREALLOC 0x0001 + +struct nf_ct_ext_type +{ + /* Destroys relationships (can be NULL). */ + void (*destroy)(struct nf_conn *ct); + /* Called when realloacted (can be NULL). + Contents has already been moved. */ + void (*move)(struct nf_conn *ct, void *old); + + enum nf_ct_ext_id id; + + unsigned int flags; + + /* Length and min alignment. */ + u8 len; + u8 align; + /* initial size of nf_ct_ext. */ + u8 alloc_size; +}; + +int nf_ct_extend_register(struct nf_ct_ext_type *type); +void nf_ct_extend_unregister(struct nf_ct_ext_type *type); +#endif /* _NF_CONNTRACK_EXTEND_H */ diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 3b792687f001..58b4245a1723 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -1,6 +1,6 @@ netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o -nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o +nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o obj-$(CONFIG_NETFILTER) = netfilter.o diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7a15e30356f2..b56f954895bb 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -36,6 +36,7 @@ #include #include #include +#include #define NF_CONNTRACK_VERSION "0.5.0" @@ -317,6 +318,8 @@ destroy_conntrack(struct nf_conntrack *nfct) if (l4proto && l4proto->destroy) l4proto->destroy(ct); + nf_ct_ext_destroy(ct); + destroyed = rcu_dereference(nf_conntrack_destroyed); if (destroyed) destroyed(ct); @@ -650,6 +653,7 @@ void nf_conntrack_free(struct nf_conn *conntrack) { u_int32_t features = conntrack->features; NF_CT_ASSERT(features >= NF_CT_F_BASIC && features < NF_CT_F_NUM); + nf_ct_ext_free(conntrack); DEBUGP("nf_conntrack_free: features = 0x%x, conntrack=%p\n", features, conntrack); kmem_cache_free(nf_ct_cache[features].cachep, conntrack); diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c new file mode 100644 index 000000000000..a1a65a1313b3 --- /dev/null +++ b/net/netfilter/nf_conntrack_extend.c @@ -0,0 +1,195 @@ +/* Structure dynamic extension infrastructure + * Copyright (C) 2004 Rusty Russell IBM Corporation + * Copyright (C) 2007 Netfilter Core Team + * Copyright (C) 2007 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include + +static struct nf_ct_ext_type *nf_ct_ext_types[NF_CT_EXT_NUM]; +static DEFINE_MUTEX(nf_ct_ext_type_mutex); + +/* Horrible trick to figure out smallest amount worth kmallocing. */ +#define CACHE(x) (x) + 0 * +enum { + NF_CT_EXT_MIN_SIZE = +#include + 1 }; +#undef CACHE + +void __nf_ct_ext_destroy(struct nf_conn *ct) +{ + unsigned int i; + struct nf_ct_ext_type *t; + + for (i = 0; i < NF_CT_EXT_NUM; i++) { + if (!nf_ct_ext_exist(ct, i)) + continue; + + rcu_read_lock(); + t = rcu_dereference(nf_ct_ext_types[i]); + + /* Here the nf_ct_ext_type might have been unregisterd. + * I.e., it has responsible to cleanup private + * area in all conntracks when it is unregisterd. + */ + if (t && t->destroy) + t->destroy(ct); + rcu_read_unlock(); + } +} +EXPORT_SYMBOL(__nf_ct_ext_destroy); + +static void * +nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp) +{ + unsigned int off, len, real_len; + struct nf_ct_ext_type *t; + + rcu_read_lock(); + t = rcu_dereference(nf_ct_ext_types[id]); + BUG_ON(t == NULL); + off = ALIGN(sizeof(struct nf_ct_ext), t->align); + len = off + t->len; + real_len = t->alloc_size; + rcu_read_unlock(); + + *ext = kzalloc(real_len, gfp); + if (!*ext) + return NULL; + + (*ext)->offset[id] = off; + (*ext)->len = len; + (*ext)->real_len = real_len; + + return (void *)(*ext) + off; +} + +void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) +{ + struct nf_ct_ext *new; + int i, newlen, newoff; + struct nf_ct_ext_type *t; + + if (!ct->ext) + return nf_ct_ext_create(&ct->ext, id, gfp); + + if (nf_ct_ext_exist(ct, id)) + return NULL; + + rcu_read_lock(); + t = rcu_dereference(nf_ct_ext_types[id]); + BUG_ON(t == NULL); + + newoff = ALIGN(ct->ext->len, t->align); + newlen = newoff + t->len; + rcu_read_unlock(); + + if (newlen >= ct->ext->real_len) { + new = kmalloc(newlen, gfp); + if (!new) + return NULL; + + memcpy(new, ct->ext, ct->ext->len); + + for (i = 0; i < NF_CT_EXT_NUM; i++) { + if (!nf_ct_ext_exist(ct, i)) + continue; + + rcu_read_lock(); + t = rcu_dereference(nf_ct_ext_types[i]); + if (t && t->move) + t->move(ct, ct->ext + ct->ext->offset[id]); + rcu_read_unlock(); + } + kfree(ct->ext); + new->real_len = newlen; + ct->ext = new; + } + + ct->ext->offset[id] = newoff; + ct->ext->len = newlen; + memset((void *)ct->ext + newoff, 0, newlen - newoff); + return (void *)ct->ext + newoff; +} +EXPORT_SYMBOL(__nf_ct_ext_add); + +static void update_alloc_size(struct nf_ct_ext_type *type) +{ + int i, j; + struct nf_ct_ext_type *t1, *t2; + enum nf_ct_ext_id min = 0, max = NF_CT_EXT_NUM - 1; + + /* unnecessary to update all types */ + if ((type->flags & NF_CT_EXT_F_PREALLOC) == 0) { + min = type->id; + max = type->id; + } + + /* This assumes that extended areas in conntrack for the types + whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */ + for (i = min; i <= max; i++) { + t1 = nf_ct_ext_types[i]; + if (!t1) + continue; + + t1->alloc_size = sizeof(struct nf_ct_ext) + + ALIGN(sizeof(struct nf_ct_ext), t1->align) + + t1->len; + for (j = 0; j < NF_CT_EXT_NUM; j++) { + t2 = nf_ct_ext_types[j]; + if (t2 == NULL || t2 == t1 || + (t2->flags & NF_CT_EXT_F_PREALLOC) == 0) + continue; + + t1->alloc_size = ALIGN(t1->alloc_size, t2->align) + + t2->len; + } + if (t1->alloc_size < NF_CT_EXT_MIN_SIZE) + t1->alloc_size = NF_CT_EXT_MIN_SIZE; + } +} + +/* This MUST be called in process context. */ +int nf_ct_extend_register(struct nf_ct_ext_type *type) +{ + int ret = 0; + + mutex_lock(&nf_ct_ext_type_mutex); + if (nf_ct_ext_types[type->id]) { + ret = -EBUSY; + goto out; + } + + /* This ensures that nf_ct_ext_create() can allocate enough area + before updating alloc_size */ + type->alloc_size = ALIGN(sizeof(struct nf_ct_ext), type->align) + + type->len; + rcu_assign_pointer(nf_ct_ext_types[type->id], type); + update_alloc_size(type); +out: + mutex_unlock(&nf_ct_ext_type_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(nf_ct_extend_register); + +/* This MUST be called in process context. */ +void nf_ct_extend_unregister(struct nf_ct_ext_type *type) +{ + mutex_lock(&nf_ct_ext_type_mutex); + rcu_assign_pointer(nf_ct_ext_types[type->id], NULL); + update_alloc_size(type); + mutex_unlock(&nf_ct_ext_type_mutex); + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(nf_ct_extend_unregister); -- cgit v1.2.3 From ceceae1b1555a9afcb8dacf90df5fa1f20fd5466 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:23:42 -0700 Subject: [NETFILTER]: nf_conntrack: use extension infrastructure for helper Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack.h | 26 ---------- include/net/netfilter/nf_conntrack_core.h | 3 ++ include/net/netfilter/nf_conntrack_extend.h | 3 ++ include/net/netfilter/nf_conntrack_helper.h | 5 ++ net/ipv4/netfilter/nf_nat_standalone.c | 10 ---- net/netfilter/nf_conntrack_core.c | 80 ++++++++++++++++++----------- net/netfilter/nf_conntrack_helper.c | 27 ++++++---- net/netfilter/nf_conntrack_netlink.c | 37 +++++++------ 8 files changed, 99 insertions(+), 92 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index c31382d3ef11..f1e0fee9aa9c 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -294,32 +294,6 @@ static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) offset = ALIGN(offset, __alignof__(struct nf_conn_nat)); return (struct nf_conn_nat *) ((void *)ct + offset); } - -static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct) -{ - unsigned int offset = sizeof(struct nf_conn); - - if (!(ct->features & NF_CT_F_HELP)) - return NULL; - if (ct->features & NF_CT_F_NAT) { - offset = ALIGN(offset, __alignof__(struct nf_conn_nat)); - offset += sizeof(struct nf_conn_nat); - } - - offset = ALIGN(offset, __alignof__(struct nf_conn_help)); - return (struct nf_conn_help *) ((void *)ct + offset); -} -#else /* No NAT */ -static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct) -{ - unsigned int offset = sizeof(struct nf_conn); - - if (!(ct->features & NF_CT_F_HELP)) - return NULL; - - offset = ALIGN(offset, __alignof__(struct nf_conn_help)); - return (struct nf_conn_help *) ((void *)ct + offset); -} #endif /* CONFIG_NF_NAT_NEEDED */ #endif /* __KERNEL__ */ #endif /* _NF_CONNTRACK_H */ diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 9fb906688ffa..3bf7d05ea64d 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -30,6 +30,9 @@ extern void nf_conntrack_cleanup(void); extern int nf_conntrack_proto_init(void); extern void nf_conntrack_proto_fini(void); +extern int nf_conntrack_helper_init(void); +extern void nf_conntrack_helper_fini(void); + struct nf_conntrack_l3proto; extern struct nf_conntrack_l3proto *nf_ct_find_l3proto(u_int16_t pf); /* Like above, but you already have conntrack read lock. */ diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 8a988d136465..05357dc5d2d2 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -5,9 +5,12 @@ enum nf_ct_ext_id { + NF_CT_EXT_HELPER, NF_CT_EXT_NUM, }; +#define NF_CT_EXT_HELPER_TYPE struct nf_conn_help + /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { u8 offset[NF_CT_EXT_NUM]; diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 8c72ac9f0ab8..b43a75ba44ac 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -10,6 +10,7 @@ #ifndef _NF_CONNTRACK_HELPER_H #define _NF_CONNTRACK_HELPER_H #include +#include struct module; @@ -52,4 +53,8 @@ extern void nf_ct_helper_put(struct nf_conntrack_helper *helper); extern int nf_conntrack_helper_register(struct nf_conntrack_helper *); extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); +static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct) +{ + return nf_ct_ext_find(ct, NF_CT_EXT_HELPER); +} #endif /*_NF_CONNTRACK_HELPER_H*/ diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 55dac36dbc85..0b2f0c33f7cd 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -338,14 +338,6 @@ static int __init nf_nat_standalone_init(void) return ret; } - size = ALIGN(size, __alignof__(struct nf_conn_help)) + - sizeof(struct nf_conn_help); - ret = nf_conntrack_register_cache(NF_CT_F_NAT|NF_CT_F_HELP, - "nf_nat:help", size); - if (ret < 0) { - printk(KERN_ERR "nf_nat_init: Unable to create slab cache\n"); - goto cleanup_register_cache; - } #ifdef CONFIG_XFRM BUG_ON(ip_nat_decode_session != NULL); ip_nat_decode_session = nat_decode_session; @@ -370,8 +362,6 @@ static int __init nf_nat_standalone_init(void) ip_nat_decode_session = NULL; synchronize_net(); #endif - nf_conntrack_unregister_cache(NF_CT_F_NAT|NF_CT_F_HELP); - cleanup_register_cache: nf_conntrack_unregister_cache(NF_CT_F_NAT); return ret; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index b56f954895bb..914506e6c787 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -566,7 +566,6 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, u_int32_t features) { struct nf_conn *conntrack = NULL; - struct nf_conntrack_helper *helper; if (unlikely(!nf_conntrack_hash_rnd_initted)) { get_random_bytes(&nf_conntrack_hash_rnd, 4); @@ -593,14 +592,6 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, /* find features needed by this conntrack. */ features |= l3proto->get_features(orig); - /* FIXME: protect helper list per RCU */ - read_lock_bh(&nf_conntrack_lock); - helper = __nf_ct_helper_find(repl); - /* NAT might want to assign a helper later */ - if (helper || features & NF_CT_F_NAT) - features |= NF_CT_F_HELP; - read_unlock_bh(&nf_conntrack_lock); - DEBUGP("nf_conntrack_alloc: features=0x%x\n", features); read_lock_bh(&nf_ct_cache_lock); @@ -681,12 +672,6 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, return NULL; } - read_lock_bh(&nf_conntrack_lock); - exp = __nf_conntrack_expect_find(tuple); - if (exp && exp->helper) - features = NF_CT_F_HELP; - read_unlock_bh(&nf_conntrack_lock); - conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto, features); if (conntrack == NULL || IS_ERR(conntrack)) { DEBUGP("Can't allocate conntrack.\n"); @@ -701,16 +686,21 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, write_lock_bh(&nf_conntrack_lock); exp = find_expectation(tuple); - - help = nfct_help(conntrack); if (exp) { DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n", conntrack, exp); /* Welcome, Mr. Bond. We've been expecting you... */ __set_bit(IPS_EXPECTED_BIT, &conntrack->status); conntrack->master = exp->master; - if (exp->helper) - rcu_assign_pointer(help->helper, exp->helper); + if (exp->helper) { + help = nf_ct_ext_add(conntrack, NF_CT_EXT_HELPER, + GFP_ATOMIC); + if (help) + rcu_assign_pointer(help->helper, exp->helper); + else + DEBUGP("failed to add helper extension area"); + } + #ifdef CONFIG_NF_CONNTRACK_MARK conntrack->mark = exp->master->mark; #endif @@ -720,10 +710,18 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, nf_conntrack_get(&conntrack->master->ct_general); NF_CT_STAT_INC(expect_new); } else { - if (help) { - /* not in hash table yet, so not strictly necessary */ - rcu_assign_pointer(help->helper, - __nf_ct_helper_find(&repl_tuple)); + struct nf_conntrack_helper *helper; + + helper = __nf_ct_helper_find(&repl_tuple); + if (helper) { + help = nf_ct_ext_add(conntrack, NF_CT_EXT_HELPER, + GFP_ATOMIC); + if (help) + /* not in hash table yet, so not strictly + necessary */ + rcu_assign_pointer(help->helper, helper); + else + DEBUGP("failed to add helper extension area"); } NF_CT_STAT_INC(new); } @@ -892,6 +890,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, const struct nf_conntrack_tuple *newreply) { struct nf_conn_help *help = nfct_help(ct); + struct nf_conntrack_helper *helper; write_lock_bh(&nf_conntrack_lock); /* Should be unconfirmed, so not in hash table yet */ @@ -901,14 +900,28 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, NF_CT_DUMP_TUPLE(newreply); ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; - if (!ct->master && help && help->expecting == 0) { - struct nf_conntrack_helper *helper; - helper = __nf_ct_helper_find(newreply); - if (helper) - memset(&help->help, 0, sizeof(help->help)); - /* not in hash table yet, so not strictly necessary */ - rcu_assign_pointer(help->helper, helper); + if (ct->master || (help && help->expecting != 0)) + goto out; + + helper = __nf_ct_helper_find(newreply); + if (helper == NULL) { + if (help) + rcu_assign_pointer(help->helper, NULL); + goto out; } + + if (help == NULL) { + help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, GFP_ATOMIC); + if (help == NULL) { + DEBUGP("failed to add helper extension area"); + goto out; + } + } else { + memset(&help->help, 0, sizeof(help->help)); + } + + rcu_assign_pointer(help->helper, helper); +out: write_unlock_bh(&nf_conntrack_lock); } EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); @@ -1150,6 +1163,7 @@ void nf_conntrack_cleanup(void) nf_conntrack_htable_size); nf_conntrack_proto_fini(); + nf_conntrack_helper_fini(); } static struct list_head *alloc_hashtable(int size, int *vmalloced) @@ -1272,6 +1286,10 @@ int __init nf_conntrack_init(void) if (ret < 0) goto out_free_expect_slab; + ret = nf_conntrack_helper_init(); + if (ret < 0) + goto out_fini_proto; + /* For use by REJECT target */ rcu_assign_pointer(ip_ct_attach, __nf_conntrack_attach); rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); @@ -1284,6 +1302,8 @@ int __init nf_conntrack_init(void) return ret; +out_fini_proto: + nf_conntrack_proto_fini(); out_free_expect_slab: kmem_cache_destroy(nf_conntrack_expect_cachep); err_free_conntrack_slab: diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index f868b7fbd9b4..6d32399d64e1 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -26,6 +26,7 @@ #include #include #include +#include static __read_mostly LIST_HEAD(helpers); @@ -100,18 +101,8 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i, int nf_conntrack_helper_register(struct nf_conntrack_helper *me) { - int size, ret; - BUG_ON(me->timeout == 0); - size = ALIGN(sizeof(struct nf_conn), __alignof__(struct nf_conn_help)) + - sizeof(struct nf_conn_help); - ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help", - size); - if (ret < 0) { - printk(KERN_ERR "nf_conntrack_helper_register: Unable to create slab cache for conntracks\n"); - return ret; - } write_lock_bh(&nf_conntrack_lock); list_add(&me->list, &helpers); write_unlock_bh(&nf_conntrack_lock); @@ -153,3 +144,19 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) synchronize_net(); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); + +struct nf_ct_ext_type helper_extend = { + .len = sizeof(struct nf_conn_help), + .align = __alignof__(struct nf_conn_help), + .id = NF_CT_EXT_HELPER, +}; + +int nf_conntrack_helper_init() +{ + return nf_ct_extend_register(&helper_extend); +} + +void nf_conntrack_helper_fini() +{ + nf_ct_extend_unregister(&helper_extend); +} diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d0fe3d769828..3d56f36074f7 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -856,23 +856,23 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[]) return 0; } - if (!help) { - /* FIXME: we need to reallocate and rehash */ - return -EBUSY; - } - helper = __nf_conntrack_helper_find_byname(helpname); if (helper == NULL) return -EINVAL; - if (help->helper == helper) - return 0; - - if (help->helper) - return -EBUSY; + if (help) { + if (help->helper == helper) + return 0; + if (help->helper) + return -EBUSY; + /* need to zero data of old helper */ + memset(&help->help, 0, sizeof(help->help)); + } else { + help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, GFP_KERNEL); + if (help == NULL) + return -ENOMEM; + } - /* need to zero data of old helper */ - memset(&help->help, 0, sizeof(help->help)); rcu_assign_pointer(help->helper, helper); return 0; @@ -957,7 +957,7 @@ ctnetlink_create_conntrack(struct nfattr *cda[], struct nf_conn *ct; int err = -EINVAL; struct nf_conn_help *help; - struct nf_conntrack_helper *helper = NULL; + struct nf_conntrack_helper *helper; ct = nf_conntrack_alloc(otuple, rtuple); if (ct == NULL || IS_ERR(ct)) @@ -987,9 +987,14 @@ ctnetlink_create_conntrack(struct nfattr *cda[], ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1])); #endif - help = nfct_help(ct); - if (help) { - helper = nf_ct_helper_find_get(rtuple); + helper = nf_ct_helper_find_get(rtuple); + if (helper) { + help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, GFP_KERNEL); + if (help == NULL) { + nf_ct_helper_put(helper); + err = -ENOMEM; + goto err; + } /* not in hash table yet so not strictly necessary */ rcu_assign_pointer(help->helper, helper); } -- cgit v1.2.3 From e54cbc1f91dea4f98b6209e693d3b5eae46321bd Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:24:04 -0700 Subject: [NETFILTER]: nf_nat: add reference to conntrack from entry of bysource list I will split 'struct nf_nat_info' out from conntrack. So I cannot use 'offsetof' to get the pointer to conntrack from it. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_nat.h | 5 +++-- net/ipv4/netfilter/nf_nat_core.c | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 47d3dc107a6a..575dc8ac48dc 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -53,11 +53,14 @@ struct nf_nat_multi_range_compat #include #include +struct nf_conn; + /* The structure embedded in the conntrack structure. */ struct nf_nat_info { struct list_head bysource; struct nf_nat_seq seq[IP_CT_DIR_MAX]; + struct nf_conn *ct; }; /* per conntrack: nat application helper private data */ @@ -77,8 +80,6 @@ struct nf_conn_nat #endif }; -struct nf_conn; - /* Set up the info structure to map into this range. */ extern unsigned int nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index ea02f00d2dac..ac7e8abbbdeb 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -97,6 +97,7 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *conn) nat = nfct_nat(conn); write_lock_bh(&nf_nat_lock); list_del(&nat->info.bysource); + nat->info.ct = NULL; write_unlock_bh(&nf_nat_lock); } @@ -169,7 +170,7 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple, read_lock_bh(&nf_nat_lock); list_for_each_entry(nat, &bysource[h], info.bysource) { - ct = (struct nf_conn *)((char *)nat - offsetof(struct nf_conn, data)); + ct = nat->info.ct; if (same_src(ct, tuple)) { /* Copy source part from reply tuple. */ nf_ct_invert_tuplepr(result, @@ -337,6 +338,7 @@ nf_nat_setup_info(struct nf_conn *ct, srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); write_lock_bh(&nf_nat_lock); + info->ct = ct; list_add(&info->bysource, &bysource[srchash]); write_unlock_bh(&nf_nat_lock); } -- cgit v1.2.3 From 2d59e5ca8c7113ad91452f0f9259a4b55ee90323 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:24:28 -0700 Subject: [NETFILTER]: nf_nat: use extension infrastructure Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack.h | 17 -------- include/net/netfilter/nf_conntrack_extend.h | 2 + include/net/netfilter/nf_nat.h | 6 +++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 3 -- net/ipv4/netfilter/nf_nat_core.c | 60 ++++++++++++++++++++++++-- net/ipv4/netfilter/nf_nat_standalone.c | 21 ++++----- 6 files changed, 73 insertions(+), 36 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index f1e0fee9aa9c..b2083d386f33 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -278,22 +278,5 @@ nf_conntrack_register_cache(u_int32_t features, const char *name, size_t size); extern void nf_conntrack_unregister_cache(u_int32_t features); -/* valid combinations: - * basic: nf_conn, nf_conn .. nf_conn_help - * nat: nf_conn .. nf_conn_nat, nf_conn .. nf_conn_nat .. nf_conn help - */ -#ifdef CONFIG_NF_NAT_NEEDED -#include -static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) -{ - unsigned int offset = sizeof(struct nf_conn); - - if (!(ct->features & NF_CT_F_NAT)) - return NULL; - - offset = ALIGN(offset, __alignof__(struct nf_conn_nat)); - return (struct nf_conn_nat *) ((void *)ct + offset); -} -#endif /* CONFIG_NF_NAT_NEEDED */ #endif /* __KERNEL__ */ #endif /* _NF_CONNTRACK_H */ diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 05357dc5d2d2..73b5711faf32 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -6,10 +6,12 @@ enum nf_ct_ext_id { NF_CT_EXT_HELPER, + NF_CT_EXT_NAT, NF_CT_EXT_NUM, }; #define NF_CT_EXT_HELPER_TYPE struct nf_conn_help +#define NF_CT_EXT_NAT_TYPE struct nf_conn_nat /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 575dc8ac48dc..0425e28c6a78 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -52,6 +52,7 @@ struct nf_nat_multi_range_compat #ifdef __KERNEL__ #include #include +#include struct nf_conn; @@ -89,6 +90,11 @@ extern unsigned int nf_nat_setup_info(struct nf_conn *ct, extern int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack); +static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) +{ + return nf_ct_ext_find(ct, NF_CT_EXT_NAT); +} + extern int nf_nat_module_is_loaded; #else /* !__KERNEL__: iptables wants this to compile. */ diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 6dc72a815f77..96f641d07a40 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -108,9 +108,6 @@ EXPORT_SYMBOL_GPL(nf_nat_module_is_loaded); static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple) { - if (nf_nat_module_is_loaded) - return NF_CT_F_NAT; - return NF_CT_F_BASIC; } diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index ac7e8abbbdeb..4ce82d7014ff 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -297,11 +297,21 @@ nf_nat_setup_info(struct nf_conn *ct, unsigned int hooknum) { struct nf_conntrack_tuple curr_tuple, new_tuple; - struct nf_conn_nat *nat = nfct_nat(ct); - struct nf_nat_info *info = &nat->info; + struct nf_conn_nat *nat; + struct nf_nat_info *info; int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK); enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); + /* nat helper or nfctnetlink also setup binding */ + nat = nfct_nat(ct); + if (!nat) { + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + if (nat == NULL) { + DEBUGP("failed to add NAT extension\n"); + return NF_ACCEPT; + } + } + NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_POST_ROUTING || hooknum == NF_IP_LOCAL_IN || @@ -338,6 +348,8 @@ nf_nat_setup_info(struct nf_conn *ct, srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); write_lock_bh(&nf_nat_lock); + /* nf_conntrack_alter_reply might re-allocate exntension aera */ + info = &nfct_nat(ct)->info; info->ct = ct; list_add(&info->bysource, &bysource[srchash]); write_unlock_bh(&nf_nat_lock); @@ -592,17 +604,52 @@ nf_nat_port_nfattr_to_range(struct nfattr *tb[], struct nf_nat_range *range) EXPORT_SYMBOL_GPL(nf_nat_port_range_to_nfattr); #endif +static void nf_nat_move_storage(struct nf_conn *conntrack, void *old) +{ + struct nf_conn_nat *new_nat = nf_ct_ext_find(conntrack, NF_CT_EXT_NAT); + struct nf_conn_nat *old_nat = (struct nf_conn_nat *)old; + struct nf_conn *ct = old_nat->info.ct; + unsigned int srchash; + + if (!(ct->status & IPS_NAT_DONE_MASK)) + return; + + srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + + write_lock_bh(&nf_nat_lock); + list_replace(&old_nat->info.bysource, &new_nat->info.bysource); + new_nat->info.ct = ct; + write_unlock_bh(&nf_nat_lock); +} + +struct nf_ct_ext_type nat_extend = { + .len = sizeof(struct nf_conn_nat), + .align = __alignof__(struct nf_conn_nat), + .move = nf_nat_move_storage, + .id = NF_CT_EXT_NAT, + .flags = NF_CT_EXT_F_PREALLOC, +}; + static int __init nf_nat_init(void) { size_t i; + int ret; + + ret = nf_ct_extend_register(&nat_extend); + if (ret < 0) { + printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); + return ret; + } /* Leave them the same for the moment. */ nf_nat_htable_size = nf_conntrack_htable_size; /* One vmalloc for both hash tables */ bysource = vmalloc(sizeof(struct list_head) * nf_nat_htable_size); - if (!bysource) - return -ENOMEM; + if (!bysource) { + ret = -ENOMEM; + goto cleanup_extend; + } /* Sew in builtin protocols. */ write_lock_bh(&nf_nat_lock); @@ -626,6 +673,10 @@ static int __init nf_nat_init(void) l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); return 0; + + cleanup_extend: + nf_ct_extend_unregister(&nat_extend); + return ret; } /* Clear NAT section of all conntracks, in case we're loaded again. */ @@ -647,6 +698,7 @@ static void __exit nf_nat_cleanup(void) synchronize_rcu(); vfree(bysource); nf_ct_l3proto_put(l3proto); + nf_ct_extend_unregister(&nat_extend); } MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 0b2f0c33f7cd..51a2708f7bf0 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -113,8 +114,13 @@ nf_nat_fn(unsigned int hooknum, return NF_ACCEPT; nat = nfct_nat(ct); - if (!nat) - return NF_ACCEPT; + if (!nat) { + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + if (nat == NULL) { + DEBUGP("failed to add NAT extension\n"); + return NF_ACCEPT; + } + } switch (ctinfo) { case IP_CT_RELATED: @@ -326,18 +332,10 @@ static struct nf_hook_ops nf_nat_ops[] = { static int __init nf_nat_standalone_init(void) { - int size, ret = 0; + int ret = 0; need_conntrack(); - size = ALIGN(sizeof(struct nf_conn), __alignof__(struct nf_conn_nat)) + - sizeof(struct nf_conn_nat); - ret = nf_conntrack_register_cache(NF_CT_F_NAT, "nf_nat:base", size); - if (ret < 0) { - printk(KERN_ERR "nf_nat_init: Unable to create slab cache\n"); - return ret; - } - #ifdef CONFIG_XFRM BUG_ON(ip_nat_decode_session != NULL); ip_nat_decode_session = nat_decode_session; @@ -362,7 +360,6 @@ static int __init nf_nat_standalone_init(void) ip_nat_decode_session = NULL; synchronize_net(); #endif - nf_conntrack_unregister_cache(NF_CT_F_NAT); return ret; } -- cgit v1.2.3 From ff09b7493c8f433d3ffd6a31ad58d190f82ef0c5 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:25:28 -0700 Subject: [NETFILTER]: nf_nat: remove unused nf_nat_module_is_loaded Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_nat.h | 2 -- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 3 --- net/ipv4/netfilter/nf_nat_standalone.c | 2 -- 3 files changed, 7 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 0425e28c6a78..0541eed5008f 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -95,8 +95,6 @@ static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) return nf_ct_ext_find(ct, NF_CT_EXT_NAT); } -extern int nf_nat_module_is_loaded; - #else /* !__KERNEL__: iptables wants this to compile. */ #define nf_nat_multi_range nf_nat_multi_range_compat #endif /*__KERNEL__*/ diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 96f641d07a40..7411dd16d779 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -103,9 +103,6 @@ ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, return NF_ACCEPT; } -int nf_nat_module_is_loaded = 0; -EXPORT_SYMBOL_GPL(nf_nat_module_is_loaded); - static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple) { return NF_CT_F_BASIC; diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 51a2708f7bf0..30eeaa4c645c 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -350,7 +350,6 @@ static int __init nf_nat_standalone_init(void) printk("nf_nat_init: can't register hooks.\n"); goto cleanup_rule_init; } - nf_nat_module_is_loaded = 1; return ret; cleanup_rule_init: @@ -367,7 +366,6 @@ static void __exit nf_nat_standalone_fini(void) { nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); nf_nat_rule_cleanup(); - nf_nat_module_is_loaded = 0; #ifdef CONFIG_XFRM ip_nat_decode_session = NULL; synchronize_net(); -- cgit v1.2.3 From dacd2a1a5cf621288833aa3c6e815b86a1536538 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:25:51 -0700 Subject: [NETFILTER]: nf_conntrack: remove old memory allocator of conntrack Now memory space for help and NAT are allocated by extension infrastructure. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack.h | 14 -- include/net/netfilter/nf_conntrack_l3proto.h | 2 - net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 6 - net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 6 - net/netfilter/nf_conntrack_core.c | 222 ++----------------------- net/netfilter/nf_conntrack_l3proto_generic.c | 7 - 6 files changed, 15 insertions(+), 242 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index b2083d386f33..71386e5c4bb7 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -117,9 +117,6 @@ struct nf_conn /* Unique ID that identifies this conntrack*/ unsigned int id; - /* features - nat, helper, ... used by allocating system */ - u_int32_t features; - #if defined(CONFIG_NF_CONNTRACK_MARK) u_int32_t mark; #endif @@ -133,9 +130,6 @@ struct nf_conn /* Extensions */ struct nf_ct_ext *ext; - - /* features dynamically at the end: helper, nat (both optional) */ - char data[0]; }; static inline struct nf_conn * @@ -265,14 +259,6 @@ do { \ local_bh_enable(); \ } while (0) -/* no helper, no nat */ -#define NF_CT_F_BASIC 0 -/* for helper */ -#define NF_CT_F_HELP 1 -/* for nat. */ -#define NF_CT_F_NAT 2 -#define NF_CT_F_NUM 4 - extern int nf_conntrack_register_cache(u_int32_t features, const char *name, size_t size); extern void diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 96a58d8e1d3f..890752d7f673 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -64,8 +64,6 @@ struct nf_conntrack_l3proto int (*prepare)(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, u_int8_t *protonum); - u_int32_t (*get_features)(const struct nf_conntrack_tuple *tuple); - int (*tuple_to_nfattr)(struct sk_buff *skb, const struct nf_conntrack_tuple *t); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 7411dd16d779..129a8cccf4a1 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -103,11 +103,6 @@ ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, return NF_ACCEPT; } -static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple) -{ - return NF_CT_F_BASIC; -} - static unsigned int ipv4_confirm(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in, @@ -419,7 +414,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = { .print_tuple = ipv4_print_tuple, .print_conntrack = ipv4_print_conntrack, .prepare = ipv4_prepare, - .get_features = ipv4_get_features, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .tuple_to_nfattr = ipv4_tuple_to_nfattr, .nfattr_to_tuple = ipv4_nfattr_to_tuple, diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 1b1797f1f33d..747b01e53132 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -147,11 +147,6 @@ ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, return NF_ACCEPT; } -static u_int32_t ipv6_get_features(const struct nf_conntrack_tuple *tuple) -{ - return NF_CT_F_BASIC; -} - static unsigned int ipv6_confirm(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in, @@ -397,7 +392,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { .ctl_table_path = nf_net_netfilter_sysctl_path, .ctl_table = nf_ct_ipv6_sysctl_table, #endif - .get_features = ipv6_get_features, .me = THIS_MODULE, }; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 914506e6c787..a71366652938 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -71,39 +71,12 @@ EXPORT_SYMBOL_GPL(nf_conntrack_untracked); unsigned int nf_ct_log_invalid __read_mostly; LIST_HEAD(unconfirmed); static int nf_conntrack_vmalloc __read_mostly; - +static struct kmem_cache *nf_conntrack_cachep __read_mostly; static unsigned int nf_conntrack_next_id; DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat); -/* - * This scheme offers various size of "struct nf_conn" dependent on - * features(helper, nat, ...) - */ - -#define NF_CT_FEATURES_NAMELEN 256 -static struct { - /* name of slab cache. printed in /proc/slabinfo */ - char *name; - - /* size of slab cache */ - size_t size; - - /* slab cache pointer */ - struct kmem_cache *cachep; - - /* allocated slab cache + modules which uses this slab cache */ - int use; - -} nf_ct_cache[NF_CT_F_NUM]; - -/* protect members of nf_ct_cache except of "use" */ -DEFINE_RWLOCK(nf_ct_cache_lock); - -/* This avoids calling kmem_cache_create() with same name simultaneously */ -static DEFINE_MUTEX(nf_ct_cache_mutex); - static int nf_conntrack_hash_rnd_initted; static unsigned int nf_conntrack_hash_rnd; @@ -126,122 +99,6 @@ static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple) nf_conntrack_hash_rnd); } -int nf_conntrack_register_cache(u_int32_t features, const char *name, - size_t size) -{ - int ret = 0; - char *cache_name; - struct kmem_cache *cachep; - - DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n", - features, name, size); - - if (features < NF_CT_F_BASIC || features >= NF_CT_F_NUM) { - DEBUGP("nf_conntrack_register_cache: invalid features.: 0x%x\n", - features); - return -EINVAL; - } - - mutex_lock(&nf_ct_cache_mutex); - - write_lock_bh(&nf_ct_cache_lock); - /* e.g: multiple helpers are loaded */ - if (nf_ct_cache[features].use > 0) { - DEBUGP("nf_conntrack_register_cache: already resisterd.\n"); - if ((!strncmp(nf_ct_cache[features].name, name, - NF_CT_FEATURES_NAMELEN)) - && nf_ct_cache[features].size == size) { - DEBUGP("nf_conntrack_register_cache: reusing.\n"); - nf_ct_cache[features].use++; - ret = 0; - } else - ret = -EBUSY; - - write_unlock_bh(&nf_ct_cache_lock); - mutex_unlock(&nf_ct_cache_mutex); - return ret; - } - write_unlock_bh(&nf_ct_cache_lock); - - /* - * The memory space for name of slab cache must be alive until - * cache is destroyed. - */ - cache_name = kmalloc(sizeof(char)*NF_CT_FEATURES_NAMELEN, GFP_ATOMIC); - if (cache_name == NULL) { - DEBUGP("nf_conntrack_register_cache: can't alloc cache_name\n"); - ret = -ENOMEM; - goto out_up_mutex; - } - - if (strlcpy(cache_name, name, NF_CT_FEATURES_NAMELEN) - >= NF_CT_FEATURES_NAMELEN) { - printk("nf_conntrack_register_cache: name too long\n"); - ret = -EINVAL; - goto out_free_name; - } - - cachep = kmem_cache_create(cache_name, size, 0, 0, - NULL, NULL); - if (!cachep) { - printk("nf_conntrack_register_cache: Can't create slab cache " - "for the features = 0x%x\n", features); - ret = -ENOMEM; - goto out_free_name; - } - - write_lock_bh(&nf_ct_cache_lock); - nf_ct_cache[features].use = 1; - nf_ct_cache[features].size = size; - nf_ct_cache[features].cachep = cachep; - nf_ct_cache[features].name = cache_name; - write_unlock_bh(&nf_ct_cache_lock); - - goto out_up_mutex; - -out_free_name: - kfree(cache_name); -out_up_mutex: - mutex_unlock(&nf_ct_cache_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(nf_conntrack_register_cache); - -/* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */ -void nf_conntrack_unregister_cache(u_int32_t features) -{ - struct kmem_cache *cachep; - char *name; - - /* - * This assures that kmem_cache_create() isn't called before destroying - * slab cache. - */ - DEBUGP("nf_conntrack_unregister_cache: 0x%04x\n", features); - mutex_lock(&nf_ct_cache_mutex); - - write_lock_bh(&nf_ct_cache_lock); - if (--nf_ct_cache[features].use > 0) { - write_unlock_bh(&nf_ct_cache_lock); - mutex_unlock(&nf_ct_cache_mutex); - return; - } - cachep = nf_ct_cache[features].cachep; - name = nf_ct_cache[features].name; - nf_ct_cache[features].cachep = NULL; - nf_ct_cache[features].name = NULL; - nf_ct_cache[features].size = 0; - write_unlock_bh(&nf_ct_cache_lock); - - synchronize_net(); - - kmem_cache_destroy(cachep); - kfree(name); - - mutex_unlock(&nf_ct_cache_mutex); -} -EXPORT_SYMBOL_GPL(nf_conntrack_unregister_cache); - int nf_ct_get_tuple(const struct sk_buff *skb, unsigned int nhoff, @@ -559,11 +416,8 @@ static int early_drop(struct list_head *chain) return dropped; } -static struct nf_conn * -__nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, - const struct nf_conntrack_tuple *repl, - const struct nf_conntrack_l3proto *l3proto, - u_int32_t features) +struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, + const struct nf_conntrack_tuple *repl) { struct nf_conn *conntrack = NULL; @@ -589,65 +443,28 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, } } - /* find features needed by this conntrack. */ - features |= l3proto->get_features(orig); - - DEBUGP("nf_conntrack_alloc: features=0x%x\n", features); - - read_lock_bh(&nf_ct_cache_lock); - - if (unlikely(!nf_ct_cache[features].use)) { - DEBUGP("nf_conntrack_alloc: not supported features = 0x%x\n", - features); - goto out; - } - - conntrack = kmem_cache_alloc(nf_ct_cache[features].cachep, GFP_ATOMIC); + conntrack = kmem_cache_zalloc(nf_conntrack_cachep, GFP_ATOMIC); if (conntrack == NULL) { - DEBUGP("nf_conntrack_alloc: Can't alloc conntrack from cache\n"); - goto out; + DEBUGP("nf_conntrack_alloc: Can't alloc conntrack.\n"); + atomic_dec(&nf_conntrack_count); + return ERR_PTR(-ENOMEM); } - memset(conntrack, 0, nf_ct_cache[features].size); - conntrack->features = features; atomic_set(&conntrack->ct_general.use, 1); conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; /* Don't set timer yet: wait for confirmation */ setup_timer(&conntrack->timeout, death_by_timeout, (unsigned long)conntrack); - read_unlock_bh(&nf_ct_cache_lock); - return conntrack; -out: - read_unlock_bh(&nf_ct_cache_lock); - atomic_dec(&nf_conntrack_count); return conntrack; } - -struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, - const struct nf_conntrack_tuple *repl) -{ - struct nf_conntrack_l3proto *l3proto; - struct nf_conn *ct; - - rcu_read_lock(); - l3proto = __nf_ct_l3proto_find(orig->src.l3num); - ct = __nf_conntrack_alloc(orig, repl, l3proto, 0); - rcu_read_unlock(); - - return ct; -} EXPORT_SYMBOL_GPL(nf_conntrack_alloc); void nf_conntrack_free(struct nf_conn *conntrack) { - u_int32_t features = conntrack->features; - NF_CT_ASSERT(features >= NF_CT_F_BASIC && features < NF_CT_F_NUM); nf_ct_ext_free(conntrack); - DEBUGP("nf_conntrack_free: features = 0x%x, conntrack=%p\n", features, - conntrack); - kmem_cache_free(nf_ct_cache[features].cachep, conntrack); + kmem_cache_free(nf_conntrack_cachep, conntrack); atomic_dec(&nf_conntrack_count); } EXPORT_SYMBOL_GPL(nf_conntrack_free); @@ -665,14 +482,13 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, struct nf_conn_help *help; struct nf_conntrack_tuple repl_tuple; struct nf_conntrack_expect *exp; - u_int32_t features = 0; if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { DEBUGP("Can't invert tuple.\n"); return NULL; } - conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto, features); + conntrack = nf_conntrack_alloc(tuple, &repl_tuple); if (conntrack == NULL || IS_ERR(conntrack)) { DEBUGP("Can't allocate conntrack.\n"); return (struct nf_conntrack_tuple_hash *)conntrack; @@ -1128,8 +944,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_flush); supposed to kill the mall. */ void nf_conntrack_cleanup(void) { - int i; - rcu_assign_pointer(ip_ct_attach, NULL); /* This makes sure all current packets have passed through @@ -1150,14 +964,7 @@ void nf_conntrack_cleanup(void) rcu_assign_pointer(nf_ct_destroy, NULL); - for (i = 0; i < NF_CT_F_NUM; i++) { - if (nf_ct_cache[i].use == 0) - continue; - - NF_CT_ASSERT(nf_ct_cache[i].use == 1); - nf_ct_cache[i].use = 1; - nf_conntrack_unregister_cache(i); - } + kmem_cache_destroy(nf_conntrack_cachep); kmem_cache_destroy(nf_conntrack_expect_cachep); free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc, nf_conntrack_htable_size); @@ -1267,9 +1074,10 @@ int __init nf_conntrack_init(void) goto err_out; } - ret = nf_conntrack_register_cache(NF_CT_F_BASIC, "nf_conntrack:basic", - sizeof(struct nf_conn)); - if (ret < 0) { + nf_conntrack_cachep = kmem_cache_create("nf_conntrack", + sizeof(struct nf_conn), + 0, 0, NULL, NULL); + if (!nf_conntrack_cachep) { printk(KERN_ERR "Unable to create nf_conn slab cache\n"); goto err_free_hash; } @@ -1307,7 +1115,7 @@ out_fini_proto: out_free_expect_slab: kmem_cache_destroy(nf_conntrack_expect_cachep); err_free_conntrack_slab: - nf_conntrack_unregister_cache(NF_CT_F_BASIC); + kmem_cache_destroy(nf_conntrack_cachep); err_free_hash: free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc, nf_conntrack_htable_size); diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c index cbd96f3c1b89..2fd0f11b8fb2 100644 --- a/net/netfilter/nf_conntrack_l3proto_generic.c +++ b/net/netfilter/nf_conntrack_l3proto_generic.c @@ -76,12 +76,6 @@ generic_prepare(struct sk_buff **pskb, unsigned int hooknum, } -static u_int32_t generic_get_features(const struct nf_conntrack_tuple *tuple) - -{ - return NF_CT_F_BASIC; -} - struct nf_conntrack_l3proto nf_conntrack_l3proto_generic = { .l3proto = PF_UNSPEC, .name = "unknown", @@ -90,6 +84,5 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_generic = { .print_tuple = generic_print_tuple, .print_conntrack = generic_print_conntrack, .prepare = generic_prepare, - .get_features = generic_get_features, }; EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic); -- cgit v1.2.3 From d8a0509a696de60296a66ba4fe4f9eaade497103 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:26:16 -0700 Subject: [NETFILTER]: nf_nat: kill global 'destroy' operation This kills the global 'destroy' operation which was used by NAT. Instead it uses the extension infrastructure so that multiple extensions can register own operations. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack.h | 3 --- net/ipv4/netfilter/nf_nat_core.c | 46 +++++++++++++++++------------------- net/netfilter/nf_conntrack_core.c | 8 ------- 3 files changed, 22 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 71386e5c4bb7..ef4a403878a3 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -213,9 +213,6 @@ extern void nf_conntrack_tcp_update(struct sk_buff *skb, struct nf_conn *conntrack, int dir); -/* Call me when a conntrack is destroyed. */ -extern void (*nf_conntrack_destroyed)(struct nf_conn *conntrack); - /* Fake conntrack entry for untracked connections */ extern struct nf_conn nf_conntrack_untracked; diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 4ce82d7014ff..e370d1568001 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -87,20 +87,6 @@ hash_by_src(const struct nf_conntrack_tuple *tuple) tuple->dst.protonum, 0) % nf_nat_htable_size; } -/* Noone using conntrack by the time this called. */ -static void nf_nat_cleanup_conntrack(struct nf_conn *conn) -{ - struct nf_conn_nat *nat; - if (!(conn->status & IPS_NAT_DONE_MASK)) - return; - - nat = nfct_nat(conn); - write_lock_bh(&nf_nat_lock); - list_del(&nat->info.bysource); - nat->info.ct = NULL; - write_unlock_bh(&nf_nat_lock); -} - /* Is this tuple already taken? (not by us) */ int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, @@ -604,6 +590,22 @@ nf_nat_port_nfattr_to_range(struct nfattr *tb[], struct nf_nat_range *range) EXPORT_SYMBOL_GPL(nf_nat_port_range_to_nfattr); #endif +/* Noone using conntrack by the time this called. */ +static void nf_nat_cleanup_conntrack(struct nf_conn *ct) +{ + struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT); + + if (nat == NULL || nat->info.ct == NULL) + return; + + NF_CT_ASSERT(nat->info.ct->status & IPS_NAT_DONE_MASK); + + write_lock_bh(&nf_nat_lock); + list_del(&nat->info.bysource); + nat->info.ct = NULL; + write_unlock_bh(&nf_nat_lock); +} + static void nf_nat_move_storage(struct nf_conn *conntrack, void *old) { struct nf_conn_nat *new_nat = nf_ct_ext_find(conntrack, NF_CT_EXT_NAT); @@ -623,11 +625,12 @@ static void nf_nat_move_storage(struct nf_conn *conntrack, void *old) } struct nf_ct_ext_type nat_extend = { - .len = sizeof(struct nf_conn_nat), - .align = __alignof__(struct nf_conn_nat), - .move = nf_nat_move_storage, - .id = NF_CT_EXT_NAT, - .flags = NF_CT_EXT_F_PREALLOC, + .len = sizeof(struct nf_conn_nat), + .align = __alignof__(struct nf_conn_nat), + .destroy = nf_nat_cleanup_conntrack, + .move = nf_nat_move_storage, + .id = NF_CT_EXT_NAT, + .flags = NF_CT_EXT_F_PREALLOC, }; static int __init nf_nat_init(void) @@ -664,10 +667,6 @@ static int __init nf_nat_init(void) INIT_LIST_HEAD(&bysource[i]); } - /* FIXME: Man, this is a hack. */ - NF_CT_ASSERT(rcu_dereference(nf_conntrack_destroyed) == NULL); - rcu_assign_pointer(nf_conntrack_destroyed, nf_nat_cleanup_conntrack); - /* Initialize fake conntrack so that NAT will skip it */ nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; @@ -694,7 +693,6 @@ static int clean_nat(struct nf_conn *i, void *data) static void __exit nf_nat_cleanup(void) { nf_ct_iterate_cleanup(&clean_nat, NULL); - rcu_assign_pointer(nf_conntrack_destroyed, NULL); synchronize_rcu(); vfree(bysource); nf_ct_l3proto_put(l3proto); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index a71366652938..035eb9f4a61e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -53,9 +53,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_lock); atomic_t nf_conntrack_count = ATOMIC_INIT(0); EXPORT_SYMBOL_GPL(nf_conntrack_count); -void (*nf_conntrack_destroyed)(struct nf_conn *conntrack); -EXPORT_SYMBOL_GPL(nf_conntrack_destroyed); - unsigned int nf_conntrack_htable_size __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); @@ -157,7 +154,6 @@ destroy_conntrack(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; struct nf_conntrack_l4proto *l4proto; - typeof(nf_conntrack_destroyed) destroyed; DEBUGP("destroy_conntrack(%p)\n", ct); NF_CT_ASSERT(atomic_read(&nfct->use) == 0); @@ -177,10 +173,6 @@ destroy_conntrack(struct nf_conntrack *nfct) nf_ct_ext_destroy(ct); - destroyed = rcu_dereference(nf_conntrack_destroyed); - if (destroyed) - destroyed(ct); - rcu_read_unlock(); write_lock_bh(&nf_conntrack_lock); -- cgit v1.2.3 From b6b84d4a94e95727a4c65841eea23ac60c6aa329 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:26:35 -0700 Subject: [NETFILTER]: nf_nat: merge nf_conn and nf_nat_info Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_nat.h | 17 ++++++----------- net/ipv4/netfilter/nf_nat_core.c | 25 ++++++++++++------------- net/ipv4/netfilter/nf_nat_helper.c | 11 +++++------ 3 files changed, 23 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 0541eed5008f..d0e5e436dc1b 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -54,16 +54,6 @@ struct nf_nat_multi_range_compat #include #include -struct nf_conn; - -/* The structure embedded in the conntrack structure. */ -struct nf_nat_info -{ - struct list_head bysource; - struct nf_nat_seq seq[IP_CT_DIR_MAX]; - struct nf_conn *ct; -}; - /* per conntrack: nat application helper private data */ union nf_conntrack_nat_help { @@ -71,9 +61,14 @@ union nf_conntrack_nat_help struct nf_nat_pptp nat_pptp_info; }; +struct nf_conn; + +/* The structure embedded in the conntrack structure. */ struct nf_conn_nat { - struct nf_nat_info info; + struct list_head bysource; + struct nf_nat_seq seq[IP_CT_DIR_MAX]; + struct nf_conn *ct; union nf_conntrack_nat_help help; #if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \ defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index e370d1568001..7e31777082de 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -155,8 +155,8 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple, struct nf_conn *ct; read_lock_bh(&nf_nat_lock); - list_for_each_entry(nat, &bysource[h], info.bysource) { - ct = nat->info.ct; + list_for_each_entry(nat, &bysource[h], bysource) { + ct = nat->ct; if (same_src(ct, tuple)) { /* Copy source part from reply tuple. */ nf_ct_invert_tuplepr(result, @@ -284,7 +284,6 @@ nf_nat_setup_info(struct nf_conn *ct, { struct nf_conntrack_tuple curr_tuple, new_tuple; struct nf_conn_nat *nat; - struct nf_nat_info *info; int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK); enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); @@ -335,9 +334,9 @@ nf_nat_setup_info(struct nf_conn *ct, srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); write_lock_bh(&nf_nat_lock); /* nf_conntrack_alter_reply might re-allocate exntension aera */ - info = &nfct_nat(ct)->info; - info->ct = ct; - list_add(&info->bysource, &bysource[srchash]); + nat = nfct_nat(ct); + nat->ct = ct; + list_add(&nat->bysource, &bysource[srchash]); write_unlock_bh(&nf_nat_lock); } @@ -595,14 +594,14 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) { struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT); - if (nat == NULL || nat->info.ct == NULL) + if (nat == NULL || nat->ct == NULL) return; - NF_CT_ASSERT(nat->info.ct->status & IPS_NAT_DONE_MASK); + NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK); write_lock_bh(&nf_nat_lock); - list_del(&nat->info.bysource); - nat->info.ct = NULL; + list_del(&nat->bysource); + nat->ct = NULL; write_unlock_bh(&nf_nat_lock); } @@ -610,7 +609,7 @@ static void nf_nat_move_storage(struct nf_conn *conntrack, void *old) { struct nf_conn_nat *new_nat = nf_ct_ext_find(conntrack, NF_CT_EXT_NAT); struct nf_conn_nat *old_nat = (struct nf_conn_nat *)old; - struct nf_conn *ct = old_nat->info.ct; + struct nf_conn *ct = old_nat->ct; unsigned int srchash; if (!(ct->status & IPS_NAT_DONE_MASK)) @@ -619,8 +618,8 @@ static void nf_nat_move_storage(struct nf_conn *conntrack, void *old) srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); write_lock_bh(&nf_nat_lock); - list_replace(&old_nat->info.bysource, &new_nat->info.bysource); - new_nat->info.ct = ct; + list_replace(&old_nat->bysource, &new_nat->bysource); + new_nat->ct = ct; write_unlock_bh(&nf_nat_lock); } diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index ef0a99e09fd1..f3383fc14e1c 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -52,8 +52,8 @@ adjust_tcp_sequence(u32 seq, dir = CTINFO2DIR(ctinfo); - this_way = &nat->info.seq[dir]; - other_way = &nat->info.seq[!dir]; + this_way = &nat->seq[dir]; + other_way = &nat->seq[!dir]; DEBUGP("nf_nat_resize_packet: Seq_offset before: "); DUMP_OFFSET(this_way); @@ -372,8 +372,7 @@ nf_nat_sack_adjust(struct sk_buff **pskb, op[1] >= 2+TCPOLEN_SACK_PERBLOCK && ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0) sack_adjust(*pskb, tcph, optoff+2, - optoff+op[1], - &nat->info.seq[!dir]); + optoff+op[1], &nat->seq[!dir]); optoff += op[1]; } } @@ -394,8 +393,8 @@ nf_nat_seq_adjust(struct sk_buff **pskb, dir = CTINFO2DIR(ctinfo); - this_way = &nat->info.seq[dir]; - other_way = &nat->info.seq[!dir]; + this_way = &nat->seq[dir]; + other_way = &nat->seq[!dir]; if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph))) return 0; -- cgit v1.2.3 From 61eb3107cd8e0302f95aae26206e552365daf290 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:27:06 -0700 Subject: [NETFILTER]: nf_conntrack_extend: use __read_mostly for struct nf_ct_ext_type Also make them static. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_nat_core.c | 2 +- net/netfilter/nf_conntrack_helper.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 7e31777082de..04691edf50c2 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -623,7 +623,7 @@ static void nf_nat_move_storage(struct nf_conn *conntrack, void *old) write_unlock_bh(&nf_nat_lock); } -struct nf_ct_ext_type nat_extend = { +static struct nf_ct_ext_type nat_extend __read_mostly = { .len = sizeof(struct nf_conn_nat), .align = __alignof__(struct nf_conn_nat), .destroy = nf_nat_cleanup_conntrack, diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 6d32399d64e1..dc352f509477 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -145,7 +145,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); -struct nf_ct_ext_type helper_extend = { +static struct nf_ct_ext_type helper_extend __read_mostly = { .len = sizeof(struct nf_conn_help), .align = __alignof__(struct nf_conn_help), .id = NF_CT_EXT_HELPER, -- cgit v1.2.3 From 8e5105a0c36a059dfd0f0bb9e73ee7c97d306247 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:27:33 -0700 Subject: [NETFILTER]: nf_conntrack: round up hashsize to next multiple of PAGE_SIZE Don't let the rest of the page go to waste. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_core.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 035eb9f4a61e..54acac5c6ea7 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -965,12 +965,14 @@ void nf_conntrack_cleanup(void) nf_conntrack_helper_fini(); } -static struct list_head *alloc_hashtable(int size, int *vmalloced) +static struct list_head *alloc_hashtable(int *sizep, int *vmalloced) { struct list_head *hash; - unsigned int i; + unsigned int size, i; *vmalloced = 0; + + size = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct list_head)); hash = (void*)__get_free_pages(GFP_KERNEL, get_order(sizeof(struct list_head) * size)); @@ -1003,7 +1005,7 @@ int set_hashsize(const char *val, struct kernel_param *kp) if (!hashsize) return -EINVAL; - hash = alloc_hashtable(hashsize, &vmalloced); + hash = alloc_hashtable(&hashsize, &vmalloced); if (!hash) return -ENOMEM; @@ -1053,19 +1055,19 @@ int __init nf_conntrack_init(void) if (nf_conntrack_htable_size < 16) nf_conntrack_htable_size = 16; } - nf_conntrack_max = 8 * nf_conntrack_htable_size; - - printk("nf_conntrack version %s (%u buckets, %d max)\n", - NF_CONNTRACK_VERSION, nf_conntrack_htable_size, - nf_conntrack_max); - - nf_conntrack_hash = alloc_hashtable(nf_conntrack_htable_size, + nf_conntrack_hash = alloc_hashtable(&nf_conntrack_htable_size, &nf_conntrack_vmalloc); if (!nf_conntrack_hash) { printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); goto err_out; } + nf_conntrack_max = 8 * nf_conntrack_htable_size; + + printk("nf_conntrack version %s (%u buckets, %d max)\n", + NF_CONNTRACK_VERSION, nf_conntrack_htable_size, + nf_conntrack_max); + nf_conntrack_cachep = kmem_cache_create("nf_conntrack", sizeof(struct nf_conn), 0, 0, NULL, NULL); -- cgit v1.2.3 From f205c5e0c28aa7e0fb6eaaa66e97928f9d9e6994 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:28:14 -0700 Subject: [NETFILTER]: nf_conntrack: use hlists for conntrack hash Convert conntrack hash to hlists to reduce its size and cache footprint. Since the default hashsize to max. entries ratio sucks (1:16), this patch doesn't reduce the amount of memory used for the hash by default, but instead uses a better ratio of 1:8, which results in the same max. entries value. One thing worth noting is early_drop. It really should use LRU, so it now has to iterate over the entire chain to find the last unconfirmed entry. Since chains shouldn't be very long and the entire operation is very rare this shouldn't be a problem. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_core.h | 4 +- include/net/netfilter/nf_conntrack_tuple.h | 3 +- .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 17 ++-- net/netfilter/nf_conntrack_core.c | 100 ++++++++++++--------- net/netfilter/nf_conntrack_helper.c | 5 +- net/netfilter/nf_conntrack_netlink.c | 6 +- net/netfilter/nf_conntrack_standalone.c | 17 ++-- 7 files changed, 83 insertions(+), 69 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 3bf7d05ea64d..6351948654b3 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -84,9 +84,9 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_l3proto *l3proto, struct nf_conntrack_l4proto *proto); -extern struct list_head *nf_conntrack_hash; +extern struct hlist_head *nf_conntrack_hash; extern struct list_head nf_conntrack_expect_list; extern rwlock_t nf_conntrack_lock ; -extern struct list_head unconfirmed; +extern struct hlist_head unconfirmed; #endif /* _NF_CONNTRACK_CORE_H */ diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index 5d72b16e876f..d02ce876b4ca 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -125,8 +125,7 @@ DEBUGP("tuple %p: %u %u " NIP6_FMT " %hu -> " NIP6_FMT " %hu\n", \ /* Connections have two entries in the hash table: one for each way */ struct nf_conntrack_tuple_hash { - struct list_head list; - + struct hlist_node hnode; struct nf_conntrack_tuple tuple; }; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 89f933e81035..888f27fd884f 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -41,35 +41,36 @@ struct ct_iter_state { unsigned int bucket; }; -static struct list_head *ct_get_first(struct seq_file *seq) +static struct hlist_node *ct_get_first(struct seq_file *seq) { struct ct_iter_state *st = seq->private; for (st->bucket = 0; st->bucket < nf_conntrack_htable_size; st->bucket++) { - if (!list_empty(&nf_conntrack_hash[st->bucket])) - return nf_conntrack_hash[st->bucket].next; + if (!hlist_empty(&nf_conntrack_hash[st->bucket])) + return nf_conntrack_hash[st->bucket].first; } return NULL; } -static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head) +static struct hlist_node *ct_get_next(struct seq_file *seq, + struct hlist_node *head) { struct ct_iter_state *st = seq->private; head = head->next; - while (head == &nf_conntrack_hash[st->bucket]) { + while (head == NULL) { if (++st->bucket >= nf_conntrack_htable_size) return NULL; - head = nf_conntrack_hash[st->bucket].next; + head = nf_conntrack_hash[st->bucket].first; } return head; } -static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos) +static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos) { - struct list_head *head = ct_get_first(seq); + struct hlist_node *head = ct_get_first(seq); if (head) while (pos && (head = ct_get_next(seq, head))) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 54acac5c6ea7..992d0ef31fa3 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -59,14 +59,14 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); int nf_conntrack_max __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_max); -struct list_head *nf_conntrack_hash __read_mostly; +struct hlist_head *nf_conntrack_hash __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_hash); struct nf_conn nf_conntrack_untracked __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_untracked); unsigned int nf_ct_log_invalid __read_mostly; -LIST_HEAD(unconfirmed); +HLIST_HEAD(unconfirmed); static int nf_conntrack_vmalloc __read_mostly; static struct kmem_cache *nf_conntrack_cachep __read_mostly; static unsigned int nf_conntrack_next_id; @@ -142,8 +142,8 @@ static void clean_from_lists(struct nf_conn *ct) { DEBUGP("clean_from_lists(%p)\n", ct); - list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); - list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list); + hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); + hlist_del(&ct->tuplehash[IP_CT_DIR_REPLY].hnode); /* Destroy all pending expectations */ nf_ct_remove_expectations(ct); @@ -184,8 +184,8 @@ destroy_conntrack(struct nf_conntrack *nfct) /* We overload first tuple to link into unconfirmed list. */ if (!nf_ct_is_confirmed(ct)) { - BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list)); - list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + BUG_ON(hlist_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode)); + hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); } NF_CT_STAT_INC(delete); @@ -226,9 +226,10 @@ __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) { struct nf_conntrack_tuple_hash *h; + struct hlist_node *n; unsigned int hash = hash_conntrack(tuple); - list_for_each_entry(h, &nf_conntrack_hash[hash], list) { + hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode) { if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && nf_ct_tuple_equal(tuple, &h->tuple)) { NF_CT_STAT_INC(found); @@ -263,10 +264,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, unsigned int repl_hash) { ct->id = ++nf_conntrack_next_id; - list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list, - &nf_conntrack_hash[hash]); - list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list, - &nf_conntrack_hash[repl_hash]); + hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, + &nf_conntrack_hash[hash]); + hlist_add_head(&ct->tuplehash[IP_CT_DIR_REPLY].hnode, + &nf_conntrack_hash[repl_hash]); } void nf_conntrack_hash_insert(struct nf_conn *ct) @@ -290,6 +291,7 @@ __nf_conntrack_confirm(struct sk_buff **pskb) struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; struct nf_conn_help *help; + struct hlist_node *n; enum ip_conntrack_info ctinfo; ct = nf_ct_get(*pskb, &ctinfo); @@ -319,17 +321,17 @@ __nf_conntrack_confirm(struct sk_buff **pskb) /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ - list_for_each_entry(h, &nf_conntrack_hash[hash], list) + hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, &h->tuple)) goto out; - list_for_each_entry(h, &nf_conntrack_hash[repl_hash], list) + hlist_for_each_entry(h, n, &nf_conntrack_hash[repl_hash], hnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, &h->tuple)) goto out; /* Remove from unconfirmed list */ - list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); __nf_conntrack_hash_insert(ct, hash, repl_hash); /* Timer relative to confirmation time, not original @@ -378,22 +380,22 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ -static int early_drop(struct list_head *chain) +static int early_drop(struct hlist_head *chain) { - /* Traverse backwards: gives us oldest, which is roughly LRU */ + /* Use oldest entry, which is roughly LRU */ struct nf_conntrack_tuple_hash *h; struct nf_conn *ct = NULL, *tmp; + struct hlist_node *n; int dropped = 0; read_lock_bh(&nf_conntrack_lock); - list_for_each_entry_reverse(h, chain, list) { + hlist_for_each_entry(h, n, chain, hnode) { tmp = nf_ct_tuplehash_to_ctrack(h); - if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) { + if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) ct = tmp; - atomic_inc(&ct->ct_general.use); - break; - } } + if (ct) + atomic_inc(&ct->ct_general.use); read_unlock_bh(&nf_conntrack_lock); if (!ct) @@ -535,7 +537,8 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, } /* Overload tuple linked list to put us in unconfirmed list. */ - list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed); + hlist_add_head(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].hnode, + &unconfirmed); write_unlock_bh(&nf_conntrack_lock); @@ -873,16 +876,17 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data), { struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; + struct hlist_node *n; write_lock_bh(&nf_conntrack_lock); for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { - list_for_each_entry(h, &nf_conntrack_hash[*bucket], list) { + hlist_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) goto found; } } - list_for_each_entry(h, &unconfirmed, list) { + hlist_for_each_entry(h, n, &unconfirmed, hnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) set_bit(IPS_DYING_BIT, &ct->status); @@ -917,13 +921,14 @@ static int kill_all(struct nf_conn *i, void *data) return 1; } -static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size) +static void free_conntrack_hash(struct hlist_head *hash, int vmalloced, + int size) { if (vmalloced) vfree(hash); else free_pages((unsigned long)hash, - get_order(sizeof(struct list_head) * size)); + get_order(sizeof(struct hlist_head) * size)); } void nf_conntrack_flush(void) @@ -965,26 +970,26 @@ void nf_conntrack_cleanup(void) nf_conntrack_helper_fini(); } -static struct list_head *alloc_hashtable(int *sizep, int *vmalloced) +static struct hlist_head *alloc_hashtable(int *sizep, int *vmalloced) { - struct list_head *hash; + struct hlist_head *hash; unsigned int size, i; *vmalloced = 0; - size = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct list_head)); + size = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_head)); hash = (void*)__get_free_pages(GFP_KERNEL, - get_order(sizeof(struct list_head) + get_order(sizeof(struct hlist_head) * size)); if (!hash) { *vmalloced = 1; printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); - hash = vmalloc(sizeof(struct list_head) * size); + hash = vmalloc(sizeof(struct hlist_head) * size); } if (hash) for (i = 0; i < size; i++) - INIT_LIST_HEAD(&hash[i]); + INIT_HLIST_HEAD(&hash[i]); return hash; } @@ -994,7 +999,7 @@ int set_hashsize(const char *val, struct kernel_param *kp) int i, bucket, hashsize, vmalloced; int old_vmalloced, old_size; int rnd; - struct list_head *hash, *old_hash; + struct hlist_head *hash, *old_hash; struct nf_conntrack_tuple_hash *h; /* On boot, we can set this without any fancy locking. */ @@ -1015,12 +1020,12 @@ int set_hashsize(const char *val, struct kernel_param *kp) write_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_conntrack_htable_size; i++) { - while (!list_empty(&nf_conntrack_hash[i])) { - h = list_entry(nf_conntrack_hash[i].next, - struct nf_conntrack_tuple_hash, list); - list_del(&h->list); + while (!hlist_empty(&nf_conntrack_hash[i])) { + h = hlist_entry(nf_conntrack_hash[i].first, + struct nf_conntrack_tuple_hash, hnode); + hlist_del(&h->hnode); bucket = __hash_conntrack(&h->tuple, hashsize, rnd); - list_add_tail(&h->list, &hash[bucket]); + hlist_add_head(&h->hnode, &hash[bucket]); } } old_size = nf_conntrack_htable_size; @@ -1042,18 +1047,25 @@ module_param_call(hashsize, set_hashsize, param_get_uint, int __init nf_conntrack_init(void) { + int max_factor = 8; int ret; /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB - * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ + * machine has 512 buckets. >= 1GB machines have 16384 buckets. */ if (!nf_conntrack_htable_size) { nf_conntrack_htable_size = (((num_physpages << PAGE_SHIFT) / 16384) - / sizeof(struct list_head)); + / sizeof(struct hlist_head)); if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) - nf_conntrack_htable_size = 8192; - if (nf_conntrack_htable_size < 16) - nf_conntrack_htable_size = 16; + nf_conntrack_htable_size = 16384; + if (nf_conntrack_htable_size < 32) + nf_conntrack_htable_size = 32; + + /* Use a max. factor of four by default to get the same max as + * with the old struct list_heads. When a table size is given + * we use the old value of 8 to avoid reducing the max. + * entries. */ + max_factor = 4; } nf_conntrack_hash = alloc_hashtable(&nf_conntrack_htable_size, &nf_conntrack_vmalloc); @@ -1062,7 +1074,7 @@ int __init nf_conntrack_init(void) goto err_out; } - nf_conntrack_max = 8 * nf_conntrack_htable_size; + nf_conntrack_max = max_factor * nf_conntrack_htable_size; printk("nf_conntrack version %s (%u buckets, %d max)\n", NF_CONNTRACK_VERSION, nf_conntrack_htable_size, diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index dc352f509477..3fc6e9f0de1a 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -116,6 +116,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) unsigned int i; struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp, *tmp; + struct hlist_node *n; /* Need write lock here, to delete helper. */ write_lock_bh(&nf_conntrack_lock); @@ -132,10 +133,10 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) } /* Get rid of expecteds, set helpers to NULL. */ - list_for_each_entry(h, &unconfirmed, list) + hlist_for_each_entry(h, n, &unconfirmed, hnode) unhelp(h, me); for (i = 0; i < nf_conntrack_htable_size; i++) { - list_for_each_entry(h, &nf_conntrack_hash[i], list) + hlist_for_each_entry(h, n, &nf_conntrack_hash[i], hnode) unhelp(h, me); } write_unlock_bh(&nf_conntrack_lock); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 3d56f36074f7..0627559ca470 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -428,7 +428,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { struct nf_conn *ct, *last; struct nf_conntrack_tuple_hash *h; - struct list_head *i; + struct hlist_node *n; struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); u_int8_t l3proto = nfmsg->nfgen_family; @@ -436,8 +436,8 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) last = (struct nf_conn *)cb->args[1]; for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { restart: - list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) { - h = (struct nf_conntrack_tuple_hash *) i; + hlist_for_each_entry(h, n, &nf_conntrack_hash[cb->args[0]], + hnode) { if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; ct = nf_ct_tuplehash_to_ctrack(h); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 45baeb0e30f9..fe536b20408b 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -60,35 +60,36 @@ struct ct_iter_state { unsigned int bucket; }; -static struct list_head *ct_get_first(struct seq_file *seq) +static struct hlist_node *ct_get_first(struct seq_file *seq) { struct ct_iter_state *st = seq->private; for (st->bucket = 0; st->bucket < nf_conntrack_htable_size; st->bucket++) { - if (!list_empty(&nf_conntrack_hash[st->bucket])) - return nf_conntrack_hash[st->bucket].next; + if (!hlist_empty(&nf_conntrack_hash[st->bucket])) + return nf_conntrack_hash[st->bucket].first; } return NULL; } -static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head) +static struct hlist_node *ct_get_next(struct seq_file *seq, + struct hlist_node *head) { struct ct_iter_state *st = seq->private; head = head->next; - while (head == &nf_conntrack_hash[st->bucket]) { + while (head == NULL) { if (++st->bucket >= nf_conntrack_htable_size) return NULL; - head = nf_conntrack_hash[st->bucket].next; + head = nf_conntrack_hash[st->bucket].first; } return head; } -static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos) +static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos) { - struct list_head *head = ct_get_first(seq); + struct hlist_node *head = ct_get_first(seq); if (head) while (pos && (head = ct_get_next(seq, head))) -- cgit v1.2.3 From 330f7db5e578e1e298ba3a41748e5ea333a64a2b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:28:42 -0700 Subject: [NETFILTER]: nf_conntrack: remove 'ignore_conntrack' argument from nf_conntrack_find_get All callers pass NULL, this also doesn't seem very useful for modules. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_core.h | 3 +-- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 4 ++-- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +- net/netfilter/nf_conntrack_core.c | 7 +++---- net/netfilter/nf_conntrack_netlink.c | 6 +++--- net/netfilter/nf_conntrack_pptp.c | 2 +- 7 files changed, 12 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 6351948654b3..2fa3a1bbc7f2 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -58,8 +58,7 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, /* Find a connection corresponding to a tuple. */ extern struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple, - const struct nf_conn *ignored_conntrack); +nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple); extern int __nf_conntrack_confirm(struct sk_buff **pskb); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 129a8cccf4a1..a103f597d446 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -334,7 +334,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) return -EINVAL; } - h = nf_conntrack_find_get(&tuple, NULL); + h = nf_conntrack_find_get(&tuple); if (h) { struct sockaddr_in sin; struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index f4fc657c1983..91fb277045ef 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -187,13 +187,13 @@ icmp_error_message(struct sk_buff *skb, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(&innertuple, NULL); + h = nf_conntrack_find_get(&innertuple); if (!h) { /* Locally generated ICMPs will match inverted if they haven't been SNAT'ed yet */ /* FIXME: NAT code has to handle half-done double NAT --RR */ if (hooknum == NF_IP_LOCAL_OUT) - h = nf_conntrack_find_get(&origtuple, NULL); + h = nf_conntrack_find_get(&origtuple); if (!h) { DEBUGP("icmp_error_message: no match\n"); diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 8814b95b2326..a514661d25dd 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -193,7 +193,7 @@ icmpv6_error_message(struct sk_buff *skb, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(&intuple, NULL); + h = nf_conntrack_find_get(&intuple); if (!h) { DEBUGP("icmpv6_error: no match\n"); return -NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 992d0ef31fa3..8ed761cc0819 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -244,13 +244,12 @@ EXPORT_SYMBOL_GPL(__nf_conntrack_find); /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple, - const struct nf_conn *ignored_conntrack) +nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; read_lock_bh(&nf_conntrack_lock); - h = __nf_conntrack_find(tuple, ignored_conntrack); + h = __nf_conntrack_find(tuple, NULL); if (h) atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); read_unlock_bh(&nf_conntrack_lock); @@ -574,7 +573,7 @@ resolve_normal_ct(struct sk_buff *skb, } /* look for tuple match */ - h = nf_conntrack_find_get(&tuple, NULL); + h = nf_conntrack_find_get(&tuple); if (!h) { h = init_conntrack(&tuple, l3proto, l4proto, skb, dataoff); if (!h) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 0627559ca470..d310ec866194 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -689,7 +689,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - h = nf_conntrack_find_get(&tuple, NULL); + h = nf_conntrack_find_get(&tuple); if (!h) return -ENOENT; @@ -744,7 +744,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - h = nf_conntrack_find_get(&tuple, NULL); + h = nf_conntrack_find_get(&tuple); if (!h) return -ENOENT; @@ -1426,7 +1426,7 @@ ctnetlink_create_expect(struct nfattr *cda[], u_int8_t u3) return err; /* Look for master conntrack of this expectation */ - h = nf_conntrack_find_get(&master_tuple, NULL); + h = nf_conntrack_find_get(&master_tuple); if (!h) return -ENOENT; ct = nf_ct_tuplehash_to_ctrack(h); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 115bcb5d5a7c..da36c48177e9 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -146,7 +146,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t) DEBUGP("trying to timeout ct or exp for tuple "); NF_CT_DUMP_TUPLE(t); - h = nf_conntrack_find_get(t, NULL); + h = nf_conntrack_find_get(t); if (h) { sibling = nf_ct_tuplehash_to_ctrack(h); DEBUGP("setting timeout of conntrack %p to 0\n", sibling); -- cgit v1.2.3 From ac565e5fc104fe1842a87f2206fcfb7b6dda903d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:30:08 -0700 Subject: [NETFILTER]: nf_conntrack: export hash allocation/destruction functions Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack.h | 4 ++++ net/netfilter/nf_conntrack_core.c | 23 ++++++++++++----------- 2 files changed, 16 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index ef4a403878a3..8f2cbb965f3d 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -172,6 +172,10 @@ static inline void nf_ct_put(struct nf_conn *ct) extern int nf_ct_l3proto_try_module_get(unsigned short l3proto); extern void nf_ct_l3proto_module_put(unsigned short l3proto); +extern struct hlist_head *nf_ct_alloc_hashtable(int *sizep, int *vmalloced); +extern void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, + int size); + extern struct nf_conntrack_tuple_hash * __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 8ed761cc0819..f4c3039728dc 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -920,8 +920,7 @@ static int kill_all(struct nf_conn *i, void *data) return 1; } -static void free_conntrack_hash(struct hlist_head *hash, int vmalloced, - int size) +void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, int size) { if (vmalloced) vfree(hash); @@ -929,6 +928,7 @@ static void free_conntrack_hash(struct hlist_head *hash, int vmalloced, free_pages((unsigned long)hash, get_order(sizeof(struct hlist_head) * size)); } +EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); void nf_conntrack_flush(void) { @@ -962,14 +962,14 @@ void nf_conntrack_cleanup(void) kmem_cache_destroy(nf_conntrack_cachep); kmem_cache_destroy(nf_conntrack_expect_cachep); - free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc, - nf_conntrack_htable_size); + nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc, + nf_conntrack_htable_size); nf_conntrack_proto_fini(); nf_conntrack_helper_fini(); } -static struct hlist_head *alloc_hashtable(int *sizep, int *vmalloced) +struct hlist_head *nf_ct_alloc_hashtable(int *sizep, int *vmalloced) { struct hlist_head *hash; unsigned int size, i; @@ -992,6 +992,7 @@ static struct hlist_head *alloc_hashtable(int *sizep, int *vmalloced) return hash; } +EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable); int set_hashsize(const char *val, struct kernel_param *kp) { @@ -1009,7 +1010,7 @@ int set_hashsize(const char *val, struct kernel_param *kp) if (!hashsize) return -EINVAL; - hash = alloc_hashtable(&hashsize, &vmalloced); + hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced); if (!hash) return -ENOMEM; @@ -1037,7 +1038,7 @@ int set_hashsize(const char *val, struct kernel_param *kp) nf_conntrack_hash_rnd = rnd; write_unlock_bh(&nf_conntrack_lock); - free_conntrack_hash(old_hash, old_vmalloced, old_size); + nf_ct_free_hashtable(old_hash, old_vmalloced, old_size); return 0; } @@ -1066,8 +1067,8 @@ int __init nf_conntrack_init(void) * entries. */ max_factor = 4; } - nf_conntrack_hash = alloc_hashtable(&nf_conntrack_htable_size, - &nf_conntrack_vmalloc); + nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, + &nf_conntrack_vmalloc); if (!nf_conntrack_hash) { printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); goto err_out; @@ -1122,8 +1123,8 @@ out_free_expect_slab: err_free_conntrack_slab: kmem_cache_destroy(nf_conntrack_cachep); err_free_hash: - free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc, - nf_conntrack_htable_size); + nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc, + nf_conntrack_htable_size); err_out: return -ENOMEM; } -- cgit v1.2.3 From 53aba5979e1d964c0234816eda2316f1c2e7946d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:30:27 -0700 Subject: [NETFILTER]: nf_nat: use hlists for bysource hash Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_nat.h | 2 +- net/ipv4/netfilter/nf_nat_core.c | 21 +++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index d0e5e436dc1b..6ae52f7c9f55 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -66,7 +66,7 @@ struct nf_conn; /* The structure embedded in the conntrack structure. */ struct nf_conn_nat { - struct list_head bysource; + struct hlist_node bysource; struct nf_nat_seq seq[IP_CT_DIR_MAX]; struct nf_conn *ct; union nf_conntrack_nat_help help; diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 04691edf50c2..f242ac61b3eb 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -44,8 +43,9 @@ static struct nf_conntrack_l3proto *l3proto = NULL; /* Calculated at init based on memory size */ static unsigned int nf_nat_htable_size; +static int nf_nat_vmalloced; -static struct list_head *bysource; +static struct hlist_head *bysource; #define MAX_IP_NAT_PROTO 256 static struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]; @@ -153,9 +153,10 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple, unsigned int h = hash_by_src(tuple); struct nf_conn_nat *nat; struct nf_conn *ct; + struct hlist_node *n; read_lock_bh(&nf_nat_lock); - list_for_each_entry(nat, &bysource[h], bysource) { + hlist_for_each_entry(nat, n, &bysource[h], bysource) { ct = nat->ct; if (same_src(ct, tuple)) { /* Copy source part from reply tuple. */ @@ -336,7 +337,7 @@ nf_nat_setup_info(struct nf_conn *ct, /* nf_conntrack_alter_reply might re-allocate exntension aera */ nat = nfct_nat(ct); nat->ct = ct; - list_add(&nat->bysource, &bysource[srchash]); + hlist_add_head(&nat->bysource, &bysource[srchash]); write_unlock_bh(&nf_nat_lock); } @@ -600,7 +601,7 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK); write_lock_bh(&nf_nat_lock); - list_del(&nat->bysource); + hlist_del(&nat->bysource); nat->ct = NULL; write_unlock_bh(&nf_nat_lock); } @@ -618,7 +619,7 @@ static void nf_nat_move_storage(struct nf_conn *conntrack, void *old) srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); write_lock_bh(&nf_nat_lock); - list_replace(&old_nat->bysource, &new_nat->bysource); + hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); new_nat->ct = ct; write_unlock_bh(&nf_nat_lock); } @@ -646,8 +647,8 @@ static int __init nf_nat_init(void) /* Leave them the same for the moment. */ nf_nat_htable_size = nf_conntrack_htable_size; - /* One vmalloc for both hash tables */ - bysource = vmalloc(sizeof(struct list_head) * nf_nat_htable_size); + bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, + &nf_nat_vmalloced); if (!bysource) { ret = -ENOMEM; goto cleanup_extend; @@ -663,7 +664,7 @@ static int __init nf_nat_init(void) write_unlock_bh(&nf_nat_lock); for (i = 0; i < nf_nat_htable_size; i++) { - INIT_LIST_HEAD(&bysource[i]); + INIT_HLIST_HEAD(&bysource[i]); } /* Initialize fake conntrack so that NAT will skip it */ @@ -693,7 +694,7 @@ static void __exit nf_nat_cleanup(void) { nf_ct_iterate_cleanup(&clean_nat, NULL); synchronize_rcu(); - vfree(bysource); + nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size); nf_ct_l3proto_put(l3proto); nf_ct_extend_unregister(&nat_extend); } -- cgit v1.2.3 From 6823645d608541c2c69e8a99454936e058c294e0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:30:49 -0700 Subject: [NETFILTER]: nf_conntrack_expect: function naming unification Currently there is a wild mix of nf_conntrack_expect_, nf_ct_exp_, expect_, exp_, ... Consistently use nf_ct_ as prefix for exported functions. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_core.h | 2 +- include/net/netfilter/nf_conntrack_ecache.h | 17 ++- include/net/netfilter/nf_conntrack_expect.h | 28 ++--- .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 6 +- net/ipv4/netfilter/nf_nat_amanda.c | 4 +- net/ipv4/netfilter/nf_nat_ftp.c | 4 +- net/ipv4/netfilter/nf_nat_h323.c | 26 ++--- net/ipv4/netfilter/nf_nat_irc.c | 4 +- net/ipv4/netfilter/nf_nat_pptp.c | 6 +- net/ipv4/netfilter/nf_nat_sip.c | 4 +- net/ipv4/netfilter/nf_nat_tftp.c | 2 +- net/netfilter/nf_conntrack_amanda.c | 11 +- net/netfilter/nf_conntrack_core.c | 12 +- net/netfilter/nf_conntrack_ecache.c | 16 +-- net/netfilter/nf_conntrack_expect.c | 93 +++++++-------- net/netfilter/nf_conntrack_ftp.c | 6 +- net/netfilter/nf_conntrack_h323_main.c | 127 ++++++++++----------- net/netfilter/nf_conntrack_helper.c | 4 +- net/netfilter/nf_conntrack_irc.c | 12 +- net/netfilter/nf_conntrack_netbios_ns.c | 6 +- net/netfilter/nf_conntrack_netlink.c | 40 +++---- net/netfilter/nf_conntrack_pptp.c | 44 +++---- net/netfilter/nf_conntrack_sane.c | 12 +- net/netfilter/nf_conntrack_sip.c | 12 +- net/netfilter/nf_conntrack_tftp.c | 12 +- 25 files changed, 251 insertions(+), 259 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 2fa3a1bbc7f2..a18f79c80db8 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -84,7 +84,7 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_l4proto *proto); extern struct hlist_head *nf_conntrack_hash; -extern struct list_head nf_conntrack_expect_list; +extern struct list_head nf_ct_expect_list; extern rwlock_t nf_conntrack_lock ; extern struct hlist_head unconfirmed; diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 811c9073c532..f0b9078235c9 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -49,15 +49,15 @@ static inline void nf_conntrack_event(enum ip_conntrack_events event, atomic_notifier_call_chain(&nf_conntrack_chain, event, ct); } -extern struct atomic_notifier_head nf_conntrack_expect_chain; -extern int nf_conntrack_expect_register_notifier(struct notifier_block *nb); -extern int nf_conntrack_expect_unregister_notifier(struct notifier_block *nb); +extern struct atomic_notifier_head nf_ct_expect_chain; +extern int nf_ct_expect_register_notifier(struct notifier_block *nb); +extern int nf_ct_expect_unregister_notifier(struct notifier_block *nb); static inline void -nf_conntrack_expect_event(enum ip_conntrack_expect_events event, - struct nf_conntrack_expect *exp) +nf_ct_expect_event(enum ip_conntrack_expect_events event, + struct nf_conntrack_expect *exp) { - atomic_notifier_call_chain(&nf_conntrack_expect_chain, event, exp); + atomic_notifier_call_chain(&nf_ct_expect_chain, event, exp); } #else /* CONFIG_NF_CONNTRACK_EVENTS */ @@ -67,9 +67,8 @@ static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, static inline void nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) {} static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {} -static inline void -nf_conntrack_expect_event(enum ip_conntrack_expect_events event, - struct nf_conntrack_expect *exp) {} +static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event, + struct nf_conntrack_expect *exp) {} static inline void nf_ct_event_cache_flush(void) {} #endif /* CONFIG_NF_CONNTRACK_EVENTS */ diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 173c7c1eff23..c0b1d1fb23e1 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -6,8 +6,8 @@ #define _NF_CONNTRACK_EXPECT_H #include -extern struct list_head nf_conntrack_expect_list; -extern struct kmem_cache *nf_conntrack_expect_cachep; +extern struct list_head nf_ct_expect_list; +extern struct kmem_cache *nf_ct_expect_cachep; extern const struct file_operations exp_file_ops; struct nf_conntrack_expect @@ -54,27 +54,27 @@ struct nf_conntrack_expect struct nf_conntrack_expect * -__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple); +__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple); struct nf_conntrack_expect * -nf_conntrack_expect_find_get(const struct nf_conntrack_tuple *tuple); +nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple); struct nf_conntrack_expect * -find_expectation(const struct nf_conntrack_tuple *tuple); +nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple); void nf_ct_unlink_expect(struct nf_conntrack_expect *exp); void nf_ct_remove_expectations(struct nf_conn *ct); -void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp); +void nf_ct_unexpect_related(struct nf_conntrack_expect *exp); /* Allocate space for an expectation: this is mandatory before calling - nf_conntrack_expect_related. You will have to call put afterwards. */ -struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me); -void nf_conntrack_expect_init(struct nf_conntrack_expect *, int, - union nf_conntrack_address *, - union nf_conntrack_address *, - u_int8_t, __be16 *, __be16 *); -void nf_conntrack_expect_put(struct nf_conntrack_expect *exp); -int nf_conntrack_expect_related(struct nf_conntrack_expect *expect); + nf_ct_expect_related. You will have to call put afterwards. */ +struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me); +void nf_ct_expect_init(struct nf_conntrack_expect *, int, + union nf_conntrack_address *, + union nf_conntrack_address *, + u_int8_t, __be16 *, __be16 *); +void nf_ct_expect_put(struct nf_conntrack_expect *exp); +int nf_ct_expect_related(struct nf_conntrack_expect *expect); #endif /*_NF_CONNTRACK_EXPECT_H*/ diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 888f27fd884f..12d6a6327b63 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -209,7 +209,7 @@ static const struct file_operations ct_file_ops = { /* expects */ static void *exp_seq_start(struct seq_file *s, loff_t *pos) { - struct list_head *e = &nf_conntrack_expect_list; + struct list_head *e = &nf_ct_expect_list; loff_t i; /* strange seq_file api calls stop even if we fail, @@ -221,7 +221,7 @@ static void *exp_seq_start(struct seq_file *s, loff_t *pos) for (i = 0; i <= *pos; i++) { e = e->next; - if (e == &nf_conntrack_expect_list) + if (e == &nf_ct_expect_list) return NULL; } return e; @@ -234,7 +234,7 @@ static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos) ++*pos; e = e->next; - if (e == &nf_conntrack_expect_list) + if (e == &nf_ct_expect_list) return NULL; return e; diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index 0f17098917bc..bd93a1d71052 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c @@ -45,7 +45,7 @@ static unsigned int help(struct sk_buff **pskb, /* Try to get same port: if not, try to change it. */ for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { exp->tuple.dst.u.tcp.port = htons(port); - if (nf_conntrack_expect_related(exp) == 0) + if (nf_ct_expect_related(exp) == 0) break; } @@ -57,7 +57,7 @@ static unsigned int help(struct sk_buff **pskb, matchoff, matchlen, buffer, strlen(buffer)); if (ret != NF_ACCEPT) - nf_conntrack_unexpect_related(exp); + nf_ct_unexpect_related(exp); return ret; } diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c index e6bc8e5a72f1..cae4b460aee1 100644 --- a/net/ipv4/netfilter/nf_nat_ftp.c +++ b/net/ipv4/netfilter/nf_nat_ftp.c @@ -131,7 +131,7 @@ static unsigned int nf_nat_ftp(struct sk_buff **pskb, /* Try to get same port: if not, try to change it. */ for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { exp->tuple.dst.u.tcp.port = htons(port); - if (nf_conntrack_expect_related(exp) == 0) + if (nf_ct_expect_related(exp) == 0) break; } @@ -139,7 +139,7 @@ static unsigned int nf_nat_ftp(struct sk_buff **pskb, return NF_DROP; if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo)) { - nf_conntrack_unexpect_related(exp); + nf_ct_unexpect_related(exp); return NF_DROP; } return NF_ACCEPT; diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index c5d2a2d690b8..3d760dd657c7 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -237,12 +237,12 @@ static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port); nated_port != 0; nated_port += 2) { rtp_exp->tuple.dst.u.udp.port = htons(nated_port); - if (nf_conntrack_expect_related(rtp_exp) == 0) { + if (nf_ct_expect_related(rtp_exp) == 0) { rtcp_exp->tuple.dst.u.udp.port = htons(nated_port + 1); - if (nf_conntrack_expect_related(rtcp_exp) == 0) + if (nf_ct_expect_related(rtcp_exp) == 0) break; - nf_conntrack_unexpect_related(rtp_exp); + nf_ct_unexpect_related(rtp_exp); } } @@ -261,8 +261,8 @@ static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, info->rtp_port[i][dir] = rtp_port; info->rtp_port[i][!dir] = htons(nated_port); } else { - nf_conntrack_unexpect_related(rtp_exp); - nf_conntrack_unexpect_related(rtcp_exp); + nf_ct_unexpect_related(rtp_exp); + nf_ct_unexpect_related(rtcp_exp); return -1; } @@ -299,7 +299,7 @@ static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct, /* Try to get same port: if not, try to change it. */ for (; nated_port != 0; nated_port++) { exp->tuple.dst.u.tcp.port = htons(nated_port); - if (nf_conntrack_expect_related(exp) == 0) + if (nf_ct_expect_related(exp) == 0) break; } @@ -313,7 +313,7 @@ static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct, if (set_h245_addr(pskb, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) < 0) { - nf_conntrack_unexpect_related(exp); + nf_ct_unexpect_related(exp); return -1; } @@ -347,7 +347,7 @@ static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct, /* Try to get same port: if not, try to change it. */ for (; nated_port != 0; nated_port++) { exp->tuple.dst.u.tcp.port = htons(nated_port); - if (nf_conntrack_expect_related(exp) == 0) + if (nf_ct_expect_related(exp) == 0) break; } @@ -365,7 +365,7 @@ static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct, info->sig_port[dir] = port; info->sig_port[!dir] = htons(nated_port); } else { - nf_conntrack_unexpect_related(exp); + nf_ct_unexpect_related(exp); return -1; } @@ -433,7 +433,7 @@ static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct, /* Try to get same port: if not, try to change it. */ for (; nated_port != 0; nated_port++) { exp->tuple.dst.u.tcp.port = htons(nated_port); - if (nf_conntrack_expect_related(exp) == 0) + if (nf_ct_expect_related(exp) == 0) break; } @@ -460,7 +460,7 @@ static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct, info->sig_port[!dir]); } } else { - nf_conntrack_unexpect_related(exp); + nf_ct_unexpect_related(exp); return -1; } @@ -517,7 +517,7 @@ static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct, /* Try to get same port: if not, try to change it. */ for (nated_port = ntohs(port); nated_port != 0; nated_port++) { exp->tuple.dst.u.tcp.port = htons(nated_port); - if (nf_conntrack_expect_related(exp) == 0) + if (nf_ct_expect_related(exp) == 0) break; } @@ -531,7 +531,7 @@ static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct, if (!set_h225_addr(pskb, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) == 0) { - nf_conntrack_unexpect_related(exp); + nf_ct_unexpect_related(exp); return -1; } diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c index 9b8c0daea744..db7fbf66fec0 100644 --- a/net/ipv4/netfilter/nf_nat_irc.c +++ b/net/ipv4/netfilter/nf_nat_irc.c @@ -55,7 +55,7 @@ static unsigned int help(struct sk_buff **pskb, /* Try to get same port: if not, try to change it. */ for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { exp->tuple.dst.u.tcp.port = htons(port); - if (nf_conntrack_expect_related(exp) == 0) + if (nf_ct_expect_related(exp) == 0) break; } @@ -71,7 +71,7 @@ static unsigned int help(struct sk_buff **pskb, matchoff, matchlen, buffer, strlen(buffer)); if (ret != NF_ACCEPT) - nf_conntrack_unexpect_related(exp); + nf_ct_unexpect_related(exp); return ret; } diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index a66888749ceb..deb80ae2831e 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -81,10 +81,10 @@ static void pptp_nat_expected(struct nf_conn *ct, DEBUGP("trying to unexpect other dir: "); NF_CT_DUMP_TUPLE(&t); - other_exp = nf_conntrack_expect_find_get(&t); + other_exp = nf_ct_expect_find_get(&t); if (other_exp) { - nf_conntrack_unexpect_related(other_exp); - nf_conntrack_expect_put(other_exp); + nf_ct_unexpect_related(other_exp); + nf_ct_expect_put(other_exp); DEBUGP("success\n"); } else { DEBUGP("not found!\n"); diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index a32d746c4592..940cdfc429de 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -278,7 +278,7 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb, /* Try to get same port: if not, try to change it. */ for (port = ntohs(exp->saved_proto.udp.port); port != 0; port++) { exp->tuple.dst.u.udp.port = htons(port); - if (nf_conntrack_expect_related(exp) == 0) + if (nf_ct_expect_related(exp) == 0) break; } @@ -286,7 +286,7 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb, return NF_DROP; if (!mangle_sdp(pskb, ctinfo, ct, newip, port, dptr)) { - nf_conntrack_unexpect_related(exp); + nf_ct_unexpect_related(exp); return NF_DROP; } return NF_ACCEPT; diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c index 2566b79de224..04dfeaefec02 100644 --- a/net/ipv4/netfilter/nf_nat_tftp.c +++ b/net/ipv4/netfilter/nf_nat_tftp.c @@ -30,7 +30,7 @@ static unsigned int help(struct sk_buff **pskb, = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; exp->dir = IP_CT_DIR_REPLY; exp->expectfn = nf_nat_follow_master; - if (nf_conntrack_expect_related(exp) != 0) + if (nf_ct_expect_related(exp) != 0) return NF_DROP; return NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index 0568f2e86b59..d21359e6c14c 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -142,23 +142,22 @@ static int amanda_help(struct sk_buff **pskb, if (port == 0 || len > 5) break; - exp = nf_conntrack_expect_alloc(ct); + exp = nf_ct_expect_alloc(ct); if (exp == NULL) { ret = NF_DROP; goto out; } tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - nf_conntrack_expect_init(exp, family, - &tuple->src.u3, &tuple->dst.u3, - IPPROTO_TCP, NULL, &port); + nf_ct_expect_init(exp, family, &tuple->src.u3, &tuple->dst.u3, + IPPROTO_TCP, NULL, &port); nf_nat_amanda = rcu_dereference(nf_nat_amanda_hook); if (nf_nat_amanda && ct->status & IPS_NAT_MASK) ret = nf_nat_amanda(pskb, ctinfo, off - dataoff, len, exp); - else if (nf_conntrack_expect_related(exp) != 0) + else if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); } out: diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f4c3039728dc..793f12ff168b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -494,7 +494,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, } write_lock_bh(&nf_conntrack_lock); - exp = find_expectation(tuple); + exp = nf_ct_find_expectation(tuple); if (exp) { DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n", conntrack, exp); @@ -544,7 +544,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, if (exp) { if (exp->expectfn) exp->expectfn(conntrack, exp); - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); } return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]; @@ -961,7 +961,7 @@ void nf_conntrack_cleanup(void) rcu_assign_pointer(nf_ct_destroy, NULL); kmem_cache_destroy(nf_conntrack_cachep); - kmem_cache_destroy(nf_conntrack_expect_cachep); + kmem_cache_destroy(nf_ct_expect_cachep); nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc, nf_conntrack_htable_size); @@ -1088,10 +1088,10 @@ int __init nf_conntrack_init(void) goto err_free_hash; } - nf_conntrack_expect_cachep = kmem_cache_create("nf_conntrack_expect", + nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", sizeof(struct nf_conntrack_expect), 0, 0, NULL, NULL); - if (!nf_conntrack_expect_cachep) { + if (!nf_ct_expect_cachep) { printk(KERN_ERR "Unable to create nf_expect slab cache\n"); goto err_free_conntrack_slab; } @@ -1119,7 +1119,7 @@ int __init nf_conntrack_init(void) out_fini_proto: nf_conntrack_proto_fini(); out_free_expect_slab: - kmem_cache_destroy(nf_conntrack_expect_cachep); + kmem_cache_destroy(nf_ct_expect_cachep); err_free_conntrack_slab: kmem_cache_destroy(nf_conntrack_cachep); err_free_hash: diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 6bd421df2dbc..83c41ac3505b 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -26,8 +26,8 @@ ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain); EXPORT_SYMBOL_GPL(nf_conntrack_chain); -ATOMIC_NOTIFIER_HEAD(nf_conntrack_expect_chain); -EXPORT_SYMBOL_GPL(nf_conntrack_expect_chain); +ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain); +EXPORT_SYMBOL_GPL(nf_ct_expect_chain); DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache); EXPORT_PER_CPU_SYMBOL_GPL(nf_conntrack_ecache); @@ -103,14 +103,14 @@ int nf_conntrack_unregister_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); -int nf_conntrack_expect_register_notifier(struct notifier_block *nb) +int nf_ct_expect_register_notifier(struct notifier_block *nb) { - return atomic_notifier_chain_register(&nf_conntrack_expect_chain, nb); + return atomic_notifier_chain_register(&nf_ct_expect_chain, nb); } -EXPORT_SYMBOL_GPL(nf_conntrack_expect_register_notifier); +EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier); -int nf_conntrack_expect_unregister_notifier(struct notifier_block *nb) +int nf_ct_expect_unregister_notifier(struct notifier_block *nb) { - return atomic_notifier_chain_unregister(&nf_conntrack_expect_chain, nb); + return atomic_notifier_chain_unregister(&nf_ct_expect_chain, nb); } -EXPORT_SYMBOL_GPL(nf_conntrack_expect_unregister_notifier); +EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 504fb6c083f9..4130ea662c48 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -26,11 +26,11 @@ #include #include -LIST_HEAD(nf_conntrack_expect_list); -EXPORT_SYMBOL_GPL(nf_conntrack_expect_list); +LIST_HEAD(nf_ct_expect_list); +EXPORT_SYMBOL_GPL(nf_ct_expect_list); -struct kmem_cache *nf_conntrack_expect_cachep __read_mostly; -static unsigned int nf_conntrack_expect_next_id; +struct kmem_cache *nf_ct_expect_cachep __read_mostly; +static unsigned int nf_ct_expect_next_id; /* nf_conntrack_expect helper functions */ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) @@ -43,57 +43,57 @@ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) list_del(&exp->list); NF_CT_STAT_INC(expect_delete); master_help->expecting--; - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); } EXPORT_SYMBOL_GPL(nf_ct_unlink_expect); -static void expectation_timed_out(unsigned long ul_expect) +static void nf_ct_expectation_timed_out(unsigned long ul_expect) { struct nf_conntrack_expect *exp = (void *)ul_expect; write_lock_bh(&nf_conntrack_lock); nf_ct_unlink_expect(exp); write_unlock_bh(&nf_conntrack_lock); - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); } struct nf_conntrack_expect * -__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple) +__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; - list_for_each_entry(i, &nf_conntrack_expect_list, list) { + list_for_each_entry(i, &nf_ct_expect_list, list) { if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) return i; } return NULL; } -EXPORT_SYMBOL_GPL(__nf_conntrack_expect_find); +EXPORT_SYMBOL_GPL(__nf_ct_expect_find); /* Just find a expectation corresponding to a tuple. */ struct nf_conntrack_expect * -nf_conntrack_expect_find_get(const struct nf_conntrack_tuple *tuple) +nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; read_lock_bh(&nf_conntrack_lock); - i = __nf_conntrack_expect_find(tuple); + i = __nf_ct_expect_find(tuple); if (i) atomic_inc(&i->use); read_unlock_bh(&nf_conntrack_lock); return i; } -EXPORT_SYMBOL_GPL(nf_conntrack_expect_find_get); +EXPORT_SYMBOL_GPL(nf_ct_expect_find_get); /* If an expectation for this connection is found, it gets delete from * global list then returned. */ struct nf_conntrack_expect * -find_expectation(const struct nf_conntrack_tuple *tuple) +nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *exp; - exp = __nf_conntrack_expect_find(tuple); + exp = __nf_ct_expect_find(tuple); if (!exp) return NULL; @@ -126,10 +126,10 @@ void nf_ct_remove_expectations(struct nf_conn *ct) if (!help || help->expecting == 0) return; - list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) { + list_for_each_entry_safe(i, tmp, &nf_ct_expect_list, list) { if (i->master == ct && del_timer(&i->timeout)) { nf_ct_unlink_expect(i); - nf_conntrack_expect_put(i); + nf_ct_expect_put(i); } } } @@ -172,32 +172,32 @@ static inline int expect_matches(const struct nf_conntrack_expect *a, } /* Generally a bad idea to call this: could have matched already. */ -void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp) +void nf_ct_unexpect_related(struct nf_conntrack_expect *exp) { struct nf_conntrack_expect *i; write_lock_bh(&nf_conntrack_lock); /* choose the oldest expectation to evict */ - list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) { + list_for_each_entry_reverse(i, &nf_ct_expect_list, list) { if (expect_matches(i, exp) && del_timer(&i->timeout)) { nf_ct_unlink_expect(i); write_unlock_bh(&nf_conntrack_lock); - nf_conntrack_expect_put(i); + nf_ct_expect_put(i); return; } } write_unlock_bh(&nf_conntrack_lock); } -EXPORT_SYMBOL_GPL(nf_conntrack_unexpect_related); +EXPORT_SYMBOL_GPL(nf_ct_unexpect_related); /* We don't increase the master conntrack refcount for non-fulfilled * conntracks. During the conntrack destruction, the expectations are * always killed before the conntrack itself */ -struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me) +struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me) { struct nf_conntrack_expect *new; - new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC); + new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC); if (!new) return NULL; @@ -205,12 +205,12 @@ struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me) atomic_set(&new->use, 1); return new; } -EXPORT_SYMBOL_GPL(nf_conntrack_expect_alloc); +EXPORT_SYMBOL_GPL(nf_ct_expect_alloc); -void nf_conntrack_expect_init(struct nf_conntrack_expect *exp, int family, - union nf_conntrack_address *saddr, - union nf_conntrack_address *daddr, - u_int8_t proto, __be16 *src, __be16 *dst) +void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family, + union nf_conntrack_address *saddr, + union nf_conntrack_address *daddr, + u_int8_t proto, __be16 *src, __be16 *dst) { int len; @@ -273,28 +273,29 @@ void nf_conntrack_expect_init(struct nf_conntrack_expect *exp, int family, exp->mask.dst.u.all = 0; } } -EXPORT_SYMBOL_GPL(nf_conntrack_expect_init); +EXPORT_SYMBOL_GPL(nf_ct_expect_init); -void nf_conntrack_expect_put(struct nf_conntrack_expect *exp) +void nf_ct_expect_put(struct nf_conntrack_expect *exp) { if (atomic_dec_and_test(&exp->use)) - kmem_cache_free(nf_conntrack_expect_cachep, exp); + kmem_cache_free(nf_ct_expect_cachep, exp); } -EXPORT_SYMBOL_GPL(nf_conntrack_expect_put); +EXPORT_SYMBOL_GPL(nf_ct_expect_put); -static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp) +static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) { struct nf_conn_help *master_help = nfct_help(exp->master); atomic_inc(&exp->use); master_help->expecting++; - list_add(&exp->list, &nf_conntrack_expect_list); + list_add(&exp->list, &nf_ct_expect_list); - setup_timer(&exp->timeout, expectation_timed_out, (unsigned long)exp); + setup_timer(&exp->timeout, nf_ct_expectation_timed_out, + (unsigned long)exp); exp->timeout.expires = jiffies + master_help->helper->timeout * HZ; add_timer(&exp->timeout); - exp->id = ++nf_conntrack_expect_next_id; + exp->id = ++nf_ct_expect_next_id; atomic_inc(&exp->use); NF_CT_STAT_INC(expect_create); } @@ -304,11 +305,11 @@ static void evict_oldest_expect(struct nf_conn *master) { struct nf_conntrack_expect *i; - list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) { + list_for_each_entry_reverse(i, &nf_ct_expect_list, list) { if (i->master == master) { if (del_timer(&i->timeout)) { nf_ct_unlink_expect(i); - nf_conntrack_expect_put(i); + nf_ct_expect_put(i); } break; } @@ -327,7 +328,7 @@ static inline int refresh_timer(struct nf_conntrack_expect *i) return 1; } -int nf_conntrack_expect_related(struct nf_conntrack_expect *expect) +int nf_ct_expect_related(struct nf_conntrack_expect *expect) { struct nf_conntrack_expect *i; struct nf_conn *master = expect->master; @@ -341,7 +342,7 @@ int nf_conntrack_expect_related(struct nf_conntrack_expect *expect) ret = -ESHUTDOWN; goto out; } - list_for_each_entry(i, &nf_conntrack_expect_list, list) { + list_for_each_entry(i, &nf_ct_expect_list, list) { if (expect_matches(i, expect)) { /* Refresh timer: if it's dying, ignore.. */ if (refresh_timer(i)) { @@ -358,19 +359,19 @@ int nf_conntrack_expect_related(struct nf_conntrack_expect *expect) master_help->expecting >= master_help->helper->max_expected) evict_oldest_expect(master); - nf_conntrack_expect_insert(expect); - nf_conntrack_expect_event(IPEXP_NEW, expect); + nf_ct_expect_insert(expect); + nf_ct_expect_event(IPEXP_NEW, expect); ret = 0; out: write_unlock_bh(&nf_conntrack_lock); return ret; } -EXPORT_SYMBOL_GPL(nf_conntrack_expect_related); +EXPORT_SYMBOL_GPL(nf_ct_expect_related); #ifdef CONFIG_PROC_FS static void *exp_seq_start(struct seq_file *s, loff_t *pos) { - struct list_head *e = &nf_conntrack_expect_list; + struct list_head *e = &nf_ct_expect_list; loff_t i; /* strange seq_file api calls stop even if we fail, @@ -382,7 +383,7 @@ static void *exp_seq_start(struct seq_file *s, loff_t *pos) for (i = 0; i <= *pos; i++) { e = e->next; - if (e == &nf_conntrack_expect_list) + if (e == &nf_ct_expect_list) return NULL; } return e; @@ -395,7 +396,7 @@ static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos) ++*pos; e = e->next; - if (e == &nf_conntrack_expect_list) + if (e == &nf_ct_expect_list) return NULL; return e; diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 82db2aa53bfc..5efe65d4b3c0 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -445,7 +445,7 @@ static int help(struct sk_buff **pskb, (int)matchlen, fb_ptr + matchoff, matchlen, ntohl(th->seq) + matchoff); - exp = nf_conntrack_expect_alloc(ct); + exp = nf_ct_expect_alloc(ct); if (exp == NULL) { ret = NF_DROP; goto out; @@ -523,14 +523,14 @@ static int help(struct sk_buff **pskb, matchoff, matchlen, exp); else { /* Can't expect this? Best to drop packet now. */ - if (nf_conntrack_expect_related(exp) != 0) + if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; else ret = NF_ACCEPT; } out_put_expect: - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); out_update_nl: /* Now if this ends in \n, update ftp info. Seq may have been diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index a1b95acad297..61ae90fb328a 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -282,22 +282,22 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, rtcp_port = htons(ntohs(port) + 1); /* Create expect for RTP */ - if ((rtp_exp = nf_conntrack_expect_alloc(ct)) == NULL) + if ((rtp_exp = nf_ct_expect_alloc(ct)) == NULL) return -1; - nf_conntrack_expect_init(rtp_exp, ct->tuplehash[!dir].tuple.src.l3num, - &ct->tuplehash[!dir].tuple.src.u3, - &ct->tuplehash[!dir].tuple.dst.u3, - IPPROTO_UDP, NULL, &rtp_port); + nf_ct_expect_init(rtp_exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3, + IPPROTO_UDP, NULL, &rtp_port); /* Create expect for RTCP */ - if ((rtcp_exp = nf_conntrack_expect_alloc(ct)) == NULL) { - nf_conntrack_expect_put(rtp_exp); + if ((rtcp_exp = nf_ct_expect_alloc(ct)) == NULL) { + nf_ct_expect_put(rtp_exp); return -1; } - nf_conntrack_expect_init(rtcp_exp, ct->tuplehash[!dir].tuple.src.l3num, - &ct->tuplehash[!dir].tuple.src.u3, - &ct->tuplehash[!dir].tuple.dst.u3, - IPPROTO_UDP, NULL, &rtcp_port); + nf_ct_expect_init(rtcp_exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3, + IPPROTO_UDP, NULL, &rtcp_port); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, &ct->tuplehash[!dir].tuple.dst.u3, @@ -308,22 +308,22 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, ret = nat_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, taddr, port, rtp_port, rtp_exp, rtcp_exp); } else { /* Conntrack only */ - if (nf_conntrack_expect_related(rtp_exp) == 0) { - if (nf_conntrack_expect_related(rtcp_exp) == 0) { + if (nf_ct_expect_related(rtp_exp) == 0) { + if (nf_ct_expect_related(rtcp_exp) == 0) { DEBUGP("nf_ct_h323: expect RTP "); NF_CT_DUMP_TUPLE(&rtp_exp->tuple); DEBUGP("nf_ct_h323: expect RTCP "); NF_CT_DUMP_TUPLE(&rtcp_exp->tuple); } else { - nf_conntrack_unexpect_related(rtp_exp); + nf_ct_unexpect_related(rtp_exp); ret = -1; } } else ret = -1; } - nf_conntrack_expect_put(rtp_exp); - nf_conntrack_expect_put(rtcp_exp); + nf_ct_expect_put(rtp_exp); + nf_ct_expect_put(rtcp_exp); return ret; } @@ -349,12 +349,12 @@ static int expect_t120(struct sk_buff **pskb, return 0; /* Create expect for T.120 connections */ - if ((exp = nf_conntrack_expect_alloc(ct)) == NULL) + if ((exp = nf_ct_expect_alloc(ct)) == NULL) return -1; - nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, - &ct->tuplehash[!dir].tuple.src.u3, - &ct->tuplehash[!dir].tuple.dst.u3, - IPPROTO_TCP, NULL, &port); + nf_ct_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3, + IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple channels */ if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -366,14 +366,14 @@ static int expect_t120(struct sk_buff **pskb, ret = nat_t120(pskb, ct, ctinfo, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ - if (nf_conntrack_expect_related(exp) == 0) { + if (nf_ct_expect_related(exp) == 0) { DEBUGP("nf_ct_h323: expect T.120 "); NF_CT_DUMP_TUPLE(&exp->tuple); } else ret = -1; } - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); return ret; } @@ -684,12 +684,12 @@ static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct, return 0; /* Create expect for h245 connection */ - if ((exp = nf_conntrack_expect_alloc(ct)) == NULL) + if ((exp = nf_ct_expect_alloc(ct)) == NULL) return -1; - nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, - &ct->tuplehash[!dir].tuple.src.u3, - &ct->tuplehash[!dir].tuple.dst.u3, - IPPROTO_TCP, NULL, &port); + nf_ct_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3, + IPPROTO_TCP, NULL, &port); exp->helper = &nf_conntrack_helper_h245; if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -701,14 +701,14 @@ static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct, ret = nat_h245(pskb, ct, ctinfo, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ - if (nf_conntrack_expect_related(exp) == 0) { + if (nf_ct_expect_related(exp) == 0) { DEBUGP("nf_ct_q931: expect H.245 "); NF_CT_DUMP_TUPLE(&exp->tuple); } else ret = -1; } - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); return ret; } @@ -796,11 +796,11 @@ static int expect_callforwarding(struct sk_buff **pskb, } /* Create expect for the second call leg */ - if ((exp = nf_conntrack_expect_alloc(ct)) == NULL) + if ((exp = nf_ct_expect_alloc(ct)) == NULL) return -1; - nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, - &ct->tuplehash[!dir].tuple.src.u3, &addr, - IPPROTO_TCP, NULL, &port); + nf_ct_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, &addr, + IPPROTO_TCP, NULL, &port); exp->helper = nf_conntrack_helper_q931; if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -812,14 +812,14 @@ static int expect_callforwarding(struct sk_buff **pskb, ret = nat_callforwarding(pskb, ct, ctinfo, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ - if (nf_conntrack_expect_related(exp) == 0) { + if (nf_ct_expect_related(exp) == 0) { DEBUGP("nf_ct_q931: expect Call Forwarding "); NF_CT_DUMP_TUPLE(&exp->tuple); } else ret = -1; } - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); return ret; } @@ -1225,7 +1225,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, tuple.dst.u.tcp.port = port; tuple.dst.protonum = IPPROTO_TCP; - exp = __nf_conntrack_expect_find(&tuple); + exp = __nf_ct_expect_find(&tuple); if (exp && exp->master == ct) return exp; return NULL; @@ -1271,14 +1271,13 @@ static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct, return 0; /* Create expect for Q.931 */ - if ((exp = nf_conntrack_expect_alloc(ct)) == NULL) + if ((exp = nf_ct_expect_alloc(ct)) == NULL) return -1; - nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, - gkrouted_only ? /* only accept calls from GK? */ - &ct->tuplehash[!dir].tuple.src.u3 : - NULL, - &ct->tuplehash[!dir].tuple.dst.u3, - IPPROTO_TCP, NULL, &port); + nf_ct_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, + gkrouted_only ? /* only accept calls from GK? */ + &ct->tuplehash[!dir].tuple.src.u3 : NULL, + &ct->tuplehash[!dir].tuple.dst.u3, + IPPROTO_TCP, NULL, &port); exp->helper = nf_conntrack_helper_q931; exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */ @@ -1286,7 +1285,7 @@ static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct, if (nat_q931 && ct->status & IPS_NAT_MASK) { /* Need NAT */ ret = nat_q931(pskb, ct, ctinfo, data, taddr, i, port, exp); } else { /* Conntrack only */ - if (nf_conntrack_expect_related(exp) == 0) { + if (nf_ct_expect_related(exp) == 0) { DEBUGP("nf_ct_ras: expect Q.931 "); NF_CT_DUMP_TUPLE(&exp->tuple); @@ -1296,7 +1295,7 @@ static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct, ret = -1; } - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); return ret; } @@ -1343,20 +1342,20 @@ static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct, return 0; /* Need new expect */ - if ((exp = nf_conntrack_expect_alloc(ct)) == NULL) + if ((exp = nf_ct_expect_alloc(ct)) == NULL) return -1; - nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, - &ct->tuplehash[!dir].tuple.src.u3, &addr, - IPPROTO_UDP, NULL, &port); + nf_ct_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, &addr, + IPPROTO_UDP, NULL, &port); exp->helper = nf_conntrack_helper_ras; - if (nf_conntrack_expect_related(exp) == 0) { + if (nf_ct_expect_related(exp) == 0) { DEBUGP("nf_ct_ras: expect RAS "); NF_CT_DUMP_TUPLE(&exp->tuple); } else ret = -1; - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); return ret; } @@ -1548,21 +1547,21 @@ static int process_acf(struct sk_buff **pskb, struct nf_conn *ct, } /* Need new expect */ - if ((exp = nf_conntrack_expect_alloc(ct)) == NULL) + if ((exp = nf_ct_expect_alloc(ct)) == NULL) return -1; - nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, - &ct->tuplehash[!dir].tuple.src.u3, &addr, - IPPROTO_TCP, NULL, &port); + nf_ct_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, &addr, + IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; exp->helper = nf_conntrack_helper_q931; - if (nf_conntrack_expect_related(exp) == 0) { + if (nf_ct_expect_related(exp) == 0) { DEBUGP("nf_ct_ras: expect Q.931 "); NF_CT_DUMP_TUPLE(&exp->tuple); } else ret = -1; - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); return ret; } @@ -1601,21 +1600,21 @@ static int process_lcf(struct sk_buff **pskb, struct nf_conn *ct, return 0; /* Need new expect for call signal */ - if ((exp = nf_conntrack_expect_alloc(ct)) == NULL) + if ((exp = nf_ct_expect_alloc(ct)) == NULL) return -1; - nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, - &ct->tuplehash[!dir].tuple.src.u3, &addr, - IPPROTO_TCP, NULL, &port); + nf_ct_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, &addr, + IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; exp->helper = nf_conntrack_helper_q931; - if (nf_conntrack_expect_related(exp) == 0) { + if (nf_ct_expect_related(exp) == 0) { DEBUGP("nf_ct_ras: expect Q.931 "); NF_CT_DUMP_TUPLE(&exp->tuple); } else ret = -1; - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); /* Ignore rasAddress */ diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 3fc6e9f0de1a..89a5f7333d38 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -123,12 +123,12 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) list_del(&me->list); /* Get rid of expectations */ - list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) { + list_for_each_entry_safe(exp, tmp, &nf_ct_expect_list, list) { struct nf_conn_help *help = nfct_help(exp->master); if ((help->helper == me || exp->helper == me) && del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); } } diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 43ccd0e2e8ae..79da93e4396b 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -184,16 +184,16 @@ static int help(struct sk_buff **pskb, unsigned int protoff, continue; } - exp = nf_conntrack_expect_alloc(ct); + exp = nf_ct_expect_alloc(ct); if (exp == NULL) { ret = NF_DROP; goto out; } tuple = &ct->tuplehash[!dir].tuple; port = htons(dcc_port); - nf_conntrack_expect_init(exp, tuple->src.l3num, - NULL, &tuple->dst.u3, - IPPROTO_TCP, NULL, &port); + nf_ct_expect_init(exp, tuple->src.l3num, + NULL, &tuple->dst.u3, + IPPROTO_TCP, NULL, &port); nf_nat_irc = rcu_dereference(nf_nat_irc_hook); if (nf_nat_irc && ct->status & IPS_NAT_MASK) @@ -201,9 +201,9 @@ static int help(struct sk_buff **pskb, unsigned int protoff, addr_beg_p - ib_ptr, addr_end_p - addr_beg_p, exp); - else if (nf_conntrack_expect_related(exp) != 0) + else if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); goto out; } } diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 1093478cc007..ea585c789a83 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -74,7 +74,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff, if (mask == 0) goto out; - exp = nf_conntrack_expect_alloc(ct); + exp = nf_ct_expect_alloc(ct); if (exp == NULL) goto out; @@ -91,8 +91,8 @@ static int help(struct sk_buff **pskb, unsigned int protoff, exp->flags = NF_CT_EXPECT_PERMANENT; exp->helper = NULL; - nf_conntrack_expect_related(exp); - nf_conntrack_expect_put(exp); + nf_ct_expect_related(exp); + nf_ct_expect_put(exp); nf_ct_refresh(ct, *pskb, timeout * HZ); out: diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d310ec866194..954cc58b9d04 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1239,7 +1239,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) u_int8_t l3proto = nfmsg->nfgen_family; read_lock_bh(&nf_conntrack_lock); - list_for_each_prev(i, &nf_conntrack_expect_list) { + list_for_each_prev(i, &nf_ct_expect_list) { exp = (struct nf_conntrack_expect *) i; if (l3proto && exp->tuple.src.l3num != l3proto) continue; @@ -1291,14 +1291,14 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - exp = nf_conntrack_expect_find_get(&tuple); + exp = nf_ct_expect_find_get(&tuple); if (!exp) return -ENOENT; if (cda[CTA_EXPECT_ID-1]) { __be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]); if (exp->id != ntohl(id)) { - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); return -ENOENT; } } @@ -1314,14 +1314,14 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (err <= 0) goto free; - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); free: kfree_skb(skb2); out: - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); return err; } @@ -1346,23 +1346,23 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, return err; /* bump usage count to 2 */ - exp = nf_conntrack_expect_find_get(&tuple); + exp = nf_ct_expect_find_get(&tuple); if (!exp) return -ENOENT; if (cda[CTA_EXPECT_ID-1]) { __be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]); if (exp->id != ntohl(id)) { - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); return -ENOENT; } } /* after list removal, usage count == 1 */ - nf_conntrack_unexpect_related(exp); + nf_ct_unexpect_related(exp); /* have to put what we 'get' above. * after this line usage count == 0 */ - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); } else if (cda[CTA_EXPECT_HELP_NAME-1]) { char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]); @@ -1373,24 +1373,22 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, write_unlock_bh(&nf_conntrack_lock); return -EINVAL; } - list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, - list) { + list_for_each_entry_safe(exp, tmp, &nf_ct_expect_list, list) { struct nf_conn_help *m_help = nfct_help(exp->master); if (m_help->helper == h && del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); } } write_unlock_bh(&nf_conntrack_lock); } else { /* This basically means we have to flush everything*/ write_lock_bh(&nf_conntrack_lock); - list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, - list) { + list_for_each_entry_safe(exp, tmp, &nf_ct_expect_list, list) { if (del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); } } write_unlock_bh(&nf_conntrack_lock); @@ -1438,7 +1436,7 @@ ctnetlink_create_expect(struct nfattr *cda[], u_int8_t u3) goto out; } - exp = nf_conntrack_expect_alloc(ct); + exp = nf_ct_expect_alloc(ct); if (!exp) { err = -ENOMEM; goto out; @@ -1451,8 +1449,8 @@ ctnetlink_create_expect(struct nfattr *cda[], u_int8_t u3) memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple)); memcpy(&exp->mask, &mask, sizeof(struct nf_conntrack_tuple)); - err = nf_conntrack_expect_related(exp); - nf_conntrack_expect_put(exp); + err = nf_ct_expect_related(exp); + nf_ct_expect_put(exp); out: nf_ct_put(nf_ct_tuplehash_to_ctrack(h)); @@ -1482,7 +1480,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, return err; write_lock_bh(&nf_conntrack_lock); - exp = __nf_conntrack_expect_find(&tuple); + exp = __nf_ct_expect_find(&tuple); if (!exp) { write_unlock_bh(&nf_conntrack_lock); @@ -1572,7 +1570,7 @@ static int __init ctnetlink_init(void) goto err_unreg_exp_subsys; } - ret = nf_conntrack_expect_register_notifier(&ctnl_notifier_exp); + ret = nf_ct_expect_register_notifier(&ctnl_notifier_exp); if (ret < 0) { printk("ctnetlink_init: cannot expect register notifier.\n"); goto err_unreg_notifier; @@ -1598,7 +1596,7 @@ static void __exit ctnetlink_exit(void) printk("ctnetlink: unregistering from nfnetlink.\n"); #ifdef CONFIG_NF_CONNTRACK_EVENTS - nf_conntrack_expect_unregister_notifier(&ctnl_notifier_exp); + nf_ct_expect_unregister_notifier(&ctnl_notifier_exp); nf_conntrack_unregister_notifier(&ctnl_notifier); #endif diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index da36c48177e9..916e106d36bc 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -124,12 +124,12 @@ static void pptp_expectfn(struct nf_conn *ct, DEBUGP("trying to unexpect other dir: "); NF_CT_DUMP_TUPLE(&inv_t); - exp_other = nf_conntrack_expect_find_get(&inv_t); + exp_other = nf_ct_expect_find_get(&inv_t); if (exp_other) { /* delete other expectation. */ DEBUGP("found\n"); - nf_conntrack_unexpect_related(exp_other); - nf_conntrack_expect_put(exp_other); + nf_ct_unexpect_related(exp_other); + nf_ct_expect_put(exp_other); } else { DEBUGP("not found\n"); } @@ -157,11 +157,11 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t) nf_ct_put(sibling); return 1; } else { - exp = nf_conntrack_expect_find_get(t); + exp = nf_ct_expect_find_get(t); if (exp) { DEBUGP("unexpect_related of expect %p\n", exp); - nf_conntrack_unexpect_related(exp); - nf_conntrack_expect_put(exp); + nf_ct_unexpect_related(exp); + nf_ct_expect_put(exp); return 1; } } @@ -201,36 +201,36 @@ static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid) int ret = 1; typeof(nf_nat_pptp_hook_exp_gre) nf_nat_pptp_exp_gre; - exp_orig = nf_conntrack_expect_alloc(ct); + exp_orig = nf_ct_expect_alloc(ct); if (exp_orig == NULL) goto out; - exp_reply = nf_conntrack_expect_alloc(ct); + exp_reply = nf_ct_expect_alloc(ct); if (exp_reply == NULL) goto out_put_orig; /* original direction, PNS->PAC */ dir = IP_CT_DIR_ORIGINAL; - nf_conntrack_expect_init(exp_orig, ct->tuplehash[dir].tuple.src.l3num, - &ct->tuplehash[dir].tuple.src.u3, - &ct->tuplehash[dir].tuple.dst.u3, - IPPROTO_GRE, &peer_callid, &callid); + nf_ct_expect_init(exp_orig, ct->tuplehash[dir].tuple.src.l3num, + &ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[dir].tuple.dst.u3, + IPPROTO_GRE, &peer_callid, &callid); exp_orig->expectfn = pptp_expectfn; /* reply direction, PAC->PNS */ dir = IP_CT_DIR_REPLY; - nf_conntrack_expect_init(exp_reply, ct->tuplehash[dir].tuple.src.l3num, - &ct->tuplehash[dir].tuple.src.u3, - &ct->tuplehash[dir].tuple.dst.u3, - IPPROTO_GRE, &callid, &peer_callid); + nf_ct_expect_init(exp_reply, ct->tuplehash[dir].tuple.src.l3num, + &ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[dir].tuple.dst.u3, + IPPROTO_GRE, &callid, &peer_callid); exp_reply->expectfn = pptp_expectfn; nf_nat_pptp_exp_gre = rcu_dereference(nf_nat_pptp_hook_exp_gre); if (nf_nat_pptp_exp_gre && ct->status & IPS_NAT_MASK) nf_nat_pptp_exp_gre(exp_orig, exp_reply); - if (nf_conntrack_expect_related(exp_orig) != 0) + if (nf_ct_expect_related(exp_orig) != 0) goto out_put_both; - if (nf_conntrack_expect_related(exp_reply) != 0) + if (nf_ct_expect_related(exp_reply) != 0) goto out_unexpect_orig; /* Add GRE keymap entries */ @@ -243,16 +243,16 @@ static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid) ret = 0; out_put_both: - nf_conntrack_expect_put(exp_reply); + nf_ct_expect_put(exp_reply); out_put_orig: - nf_conntrack_expect_put(exp_orig); + nf_ct_expect_put(exp_orig); out: return ret; out_unexpect_both: - nf_conntrack_unexpect_related(exp_reply); + nf_ct_unexpect_related(exp_reply); out_unexpect_orig: - nf_conntrack_unexpect_related(exp_orig); + nf_ct_unexpect_related(exp_orig); goto out_put_both; } diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index eb2d1dc46d45..28ed303c565b 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -141,27 +141,25 @@ static int help(struct sk_buff **pskb, if (reply->zero != 0) goto out; - exp = nf_conntrack_expect_alloc(ct); + exp = nf_ct_expect_alloc(ct); if (exp == NULL) { ret = NF_DROP; goto out; } tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - nf_conntrack_expect_init(exp, family, - &tuple->src.u3, &tuple->dst.u3, - IPPROTO_TCP, - NULL, &reply->port); + nf_ct_expect_init(exp, family, &tuple->src.u3, &tuple->dst.u3, + IPPROTO_TCP, NULL, &reply->port); DEBUGP("nf_ct_sane: expect: "); NF_CT_DUMP_TUPLE(&exp->tuple); NF_CT_DUMP_TUPLE(&exp->mask); /* Can't expect this? Best to drop packet now. */ - if (nf_conntrack_expect_related(exp) != 0) + if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); out: spin_unlock_bh(&nf_sane_lock); diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 1b5c6c1055f7..1f17f8040cd2 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -378,23 +378,23 @@ static int set_expected_rtp(struct sk_buff **pskb, int ret; typeof(nf_nat_sdp_hook) nf_nat_sdp; - exp = nf_conntrack_expect_alloc(ct); + exp = nf_ct_expect_alloc(ct); if (exp == NULL) return NF_DROP; - nf_conntrack_expect_init(exp, family, - &ct->tuplehash[!dir].tuple.src.u3, addr, - IPPROTO_UDP, NULL, &port); + nf_ct_expect_init(exp, family, + &ct->tuplehash[!dir].tuple.src.u3, addr, + IPPROTO_UDP, NULL, &port); nf_nat_sdp = rcu_dereference(nf_nat_sdp_hook); if (nf_nat_sdp && ct->status & IPS_NAT_MASK) ret = nf_nat_sdp(pskb, ctinfo, exp, dptr); else { - if (nf_conntrack_expect_related(exp) != 0) + if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; else ret = NF_ACCEPT; } - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); return ret; } diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index 37c4542e3112..53d57b4c0de7 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -66,14 +66,12 @@ static int tftp_help(struct sk_buff **pskb, NF_CT_DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); NF_CT_DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - exp = nf_conntrack_expect_alloc(ct); + exp = nf_ct_expect_alloc(ct); if (exp == NULL) return NF_DROP; tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; - nf_conntrack_expect_init(exp, family, - &tuple->src.u3, &tuple->dst.u3, - IPPROTO_UDP, - NULL, &tuple->dst.u.udp.port); + nf_ct_expect_init(exp, family, &tuple->src.u3, &tuple->dst.u3, + IPPROTO_UDP, NULL, &tuple->dst.u.udp.port); DEBUGP("expect: "); NF_CT_DUMP_TUPLE(&exp->tuple); @@ -82,9 +80,9 @@ static int tftp_help(struct sk_buff **pskb, nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook); if (nf_nat_tftp && ct->status & IPS_NAT_MASK) ret = nf_nat_tftp(pskb, ctinfo, exp); - else if (nf_conntrack_expect_related(exp) != 0) + else if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); break; case TFTP_OPCODE_DATA: case TFTP_OPCODE_ACK: -- cgit v1.2.3 From df43b4e7ca46952756b2fc039ed80469b1bff62d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:31:07 -0700 Subject: [NETFILTER]: nf_conntrack_ftp: use nf_ct_expect_init Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_ftp.c | 36 ++++++------------------------------ 1 file changed, 6 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 5efe65d4b3c0..9ad15191bb44 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -364,6 +364,7 @@ static int help(struct sk_buff **pskb, unsigned int matchlen, matchoff; struct nf_ct_ftp_master *ct_ftp_info = &nfct_help(ct)->help.ct_ftp_info; struct nf_conntrack_expect *exp; + union nf_conntrack_address *daddr; struct nf_conntrack_man cmd = {}; unsigned int i; int found = 0, ends_in_nl; @@ -454,7 +455,7 @@ static int help(struct sk_buff **pskb, /* We refer to the reverse direction ("!dir") tuples here, * because we're expecting something in the other direction. * Doesn't matter unless NAT is happening. */ - exp->tuple.dst.u3 = ct->tuplehash[!dir].tuple.dst.u3; + daddr = &ct->tuplehash[!dir].tuple.dst.u3; /* Update the ftp info */ if ((cmd.l3num == ct->tuplehash[dir].tuple.src.l3num) && @@ -483,37 +484,12 @@ static int help(struct sk_buff **pskb, ret = NF_ACCEPT; goto out_put_expect; } - memcpy(&exp->tuple.dst.u3, &cmd.u3.all, - sizeof(exp->tuple.dst.u3)); + daddr = &cmd.u3; } - exp->tuple.src.u3 = ct->tuplehash[!dir].tuple.src.u3; - exp->tuple.src.l3num = cmd.l3num; - exp->tuple.src.u.tcp.port = 0; - exp->tuple.dst.u.tcp.port = cmd.u.tcp.port; - exp->tuple.dst.protonum = IPPROTO_TCP; - - exp->mask = (struct nf_conntrack_tuple) - { .src = { .l3num = 0xFFFF, - .u = { .tcp = { 0 }}, - }, - .dst = { .protonum = 0xFF, - .u = { .tcp = { __constant_htons(0xFFFF) }}, - }, - }; - if (cmd.l3num == PF_INET) { - exp->mask.src.u3.ip = htonl(0xFFFFFFFF); - exp->mask.dst.u3.ip = htonl(0xFFFFFFFF); - } else { - memset(exp->mask.src.u3.ip6, 0xFF, - sizeof(exp->mask.src.u3.ip6)); - memset(exp->mask.dst.u3.ip6, 0xFF, - sizeof(exp->mask.src.u3.ip6)); - } - - exp->expectfn = NULL; - exp->helper = NULL; - exp->flags = 0; + nf_ct_expect_init(exp, cmd.l3num, + &ct->tuplehash[!dir].tuple.src.u3, daddr, + IPPROTO_TCP, NULL, &cmd.u.tcp.port); /* Now, NAT might want to mangle the packet, and register the * (possibly changed) expectation itself. */ -- cgit v1.2.3 From d4156e8cd93f5772483928aaf4960120caebd789 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:31:32 -0700 Subject: [NETFILTER]: nf_conntrack: reduce masks to a subset of tuples Since conntrack currently allows to use masks for every bit of both helper and expectation tuples, we can't hash them and have to keep them on two global lists that are searched for every new connection. This patch removes the never used ability to use masks for the destination part of the expectation tuple and completely removes masks from helpers since the only reasonable choice is a full match on l3num, protonum and src.u.all. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_expect.h | 3 +- include/net/netfilter/nf_conntrack_helper.h | 5 +-- include/net/netfilter/nf_conntrack_tuple.h | 65 +++++++++++++++++++---------- net/ipv4/netfilter/nf_nat_snmp_basic.c | 6 --- net/netfilter/nf_conntrack_amanda.c | 6 --- net/netfilter/nf_conntrack_expect.c | 44 ++++--------------- net/netfilter/nf_conntrack_ftp.c | 3 -- net/netfilter/nf_conntrack_h323_main.c | 14 ------- net/netfilter/nf_conntrack_helper.c | 3 +- net/netfilter/nf_conntrack_irc.c | 3 -- net/netfilter/nf_conntrack_netbios_ns.c | 6 --- net/netfilter/nf_conntrack_netlink.c | 18 +++++--- net/netfilter/nf_conntrack_pptp.c | 3 -- net/netfilter/nf_conntrack_sane.c | 2 - net/netfilter/nf_conntrack_sip.c | 3 -- net/netfilter/nf_conntrack_tftp.c | 3 -- 16 files changed, 71 insertions(+), 116 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index c0b1d1fb23e1..13643f7f7422 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -16,7 +16,8 @@ struct nf_conntrack_expect struct list_head list; /* We expect this tuple, with the following mask */ - struct nf_conntrack_tuple tuple, mask; + struct nf_conntrack_tuple tuple; + struct nf_conntrack_tuple_mask mask; /* Function to call after setup and insertion */ void (*expectfn)(struct nf_conn *new, diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index b43a75ba44ac..d62e6f093af4 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -24,10 +24,9 @@ struct nf_conntrack_helper * expected connections */ unsigned int timeout; /* timeout for expecteds */ - /* Mask of things we will help (compared against server response) */ + /* Tuple of things we will help (compared against server response) */ struct nf_conntrack_tuple tuple; - struct nf_conntrack_tuple mask; - + /* Function to call when data passes; return verdict, or -1 to invalidate. */ int (*help)(struct sk_buff **pskb, diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index d02ce876b4ca..99934ab538e6 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -100,6 +100,14 @@ struct nf_conntrack_tuple } dst; }; +struct nf_conntrack_tuple_mask +{ + struct { + union nf_conntrack_address u3; + union nf_conntrack_man_proto u; + } src; +}; + /* This is optimized opposed to a memset of the whole structure. Everything we * really care about is the source/destination unions */ #define NF_CT_TUPLE_U_BLANK(tuple) \ @@ -161,31 +169,44 @@ static inline int nf_ct_tuple_equal(const struct nf_conntrack_tuple *t1, return nf_ct_tuple_src_equal(t1, t2) && nf_ct_tuple_dst_equal(t1, t2); } +static inline int nf_ct_tuple_mask_equal(const struct nf_conntrack_tuple_mask *m1, + const struct nf_conntrack_tuple_mask *m2) +{ + return (m1->src.u3.all[0] == m2->src.u3.all[0] && + m1->src.u3.all[1] == m2->src.u3.all[1] && + m1->src.u3.all[2] == m2->src.u3.all[2] && + m1->src.u3.all[3] == m2->src.u3.all[3] && + m1->src.u.all == m2->src.u.all); +} + +static inline int nf_ct_tuple_src_mask_cmp(const struct nf_conntrack_tuple *t1, + const struct nf_conntrack_tuple *t2, + const struct nf_conntrack_tuple_mask *mask) +{ + int count; + + for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++) { + if ((t1->src.u3.all[count] ^ t2->src.u3.all[count]) & + mask->src.u3.all[count]) + return 0; + } + + if ((t1->src.u.all ^ t2->src.u.all) & mask->src.u.all) + return 0; + + if (t1->src.l3num != t2->src.l3num || + t1->dst.protonum != t2->dst.protonum) + return 0; + + return 1; +} + static inline int nf_ct_tuple_mask_cmp(const struct nf_conntrack_tuple *t, const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_tuple *mask) + const struct nf_conntrack_tuple_mask *mask) { - int count = 0; - - for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ - if ((t->src.u3.all[count] ^ tuple->src.u3.all[count]) & - mask->src.u3.all[count]) - return 0; - } - - for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ - if ((t->dst.u3.all[count] ^ tuple->dst.u3.all[count]) & - mask->dst.u3.all[count]) - return 0; - } - - if ((t->src.u.all ^ tuple->src.u.all) & mask->src.u.all || - (t->dst.u.all ^ tuple->dst.u.all) & mask->dst.u.all || - (t->src.l3num ^ tuple->src.l3num) & mask->src.l3num || - (t->dst.protonum ^ tuple->dst.protonum) & mask->dst.protonum) - return 0; - - return 1; + return nf_ct_tuple_src_mask_cmp(t, tuple, mask) && + nf_ct_tuple_dst_equal(t, tuple); } #endif /* _NF_CONNTRACK_TUPLE_H */ diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 6e88505d6162..6bfcd3a90f08 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1276,9 +1276,6 @@ static struct nf_conntrack_helper snmp_helper __read_mostly = { .tuple.src.l3num = AF_INET, .tuple.src.u.udp.port = __constant_htons(SNMP_PORT), .tuple.dst.protonum = IPPROTO_UDP, - .mask.src.l3num = 0xFFFF, - .mask.src.u.udp.port = __constant_htons(0xFFFF), - .mask.dst.protonum = 0xFF, }; static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { @@ -1290,9 +1287,6 @@ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { .tuple.src.l3num = AF_INET, .tuple.src.u.udp.port = __constant_htons(SNMP_TRAP_PORT), .tuple.dst.protonum = IPPROTO_UDP, - .mask.src.l3num = 0xFFFF, - .mask.src.u.udp.port = __constant_htons(0xFFFF), - .mask.dst.protonum = 0xFF, }; /***************************************************************************** diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index d21359e6c14c..e42ab230ad88 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -174,9 +174,6 @@ static struct nf_conntrack_helper amanda_helper[2] __read_mostly = { .tuple.src.l3num = AF_INET, .tuple.src.u.udp.port = __constant_htons(10080), .tuple.dst.protonum = IPPROTO_UDP, - .mask.src.l3num = 0xFFFF, - .mask.src.u.udp.port = __constant_htons(0xFFFF), - .mask.dst.protonum = 0xFF, }, { .name = "amanda", @@ -187,9 +184,6 @@ static struct nf_conntrack_helper amanda_helper[2] __read_mostly = { .tuple.src.l3num = AF_INET6, .tuple.src.u.udp.port = __constant_htons(10080), .tuple.dst.protonum = IPPROTO_UDP, - .mask.src.l3num = 0xFFFF, - .mask.src.u.udp.port = __constant_htons(0xFFFF), - .mask.dst.protonum = 0xFF, }, }; diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 4130ea662c48..83b5ad85e0ee 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -141,25 +141,16 @@ static inline int expect_clash(const struct nf_conntrack_expect *a, { /* Part covered by intersection of masks must be unequal, otherwise they clash */ - struct nf_conntrack_tuple intersect_mask; + struct nf_conntrack_tuple_mask intersect_mask; int count; - intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num; intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all; - intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all; - intersect_mask.dst.protonum = a->mask.dst.protonum - & b->mask.dst.protonum; for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ intersect_mask.src.u3.all[count] = a->mask.src.u3.all[count] & b->mask.src.u3.all[count]; } - for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ - intersect_mask.dst.u3.all[count] = - a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count]; - } - return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask); } @@ -168,7 +159,7 @@ static inline int expect_matches(const struct nf_conntrack_expect *a, { return a->master == b->master && nf_ct_tuple_equal(&a->tuple, &b->tuple) - && nf_ct_tuple_equal(&a->mask, &b->mask); + && nf_ct_tuple_mask_equal(&a->mask, &b->mask); } /* Generally a bad idea to call this: could have matched already. */ @@ -224,8 +215,6 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family, exp->helper = NULL; exp->tuple.src.l3num = family; exp->tuple.dst.protonum = proto; - exp->mask.src.l3num = 0xFFFF; - exp->mask.dst.protonum = 0xFF; if (saddr) { memcpy(&exp->tuple.src.u3, saddr, len); @@ -242,21 +231,6 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family, memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3)); } - if (daddr) { - memcpy(&exp->tuple.dst.u3, daddr, len); - if (sizeof(exp->tuple.dst.u3) > len) - /* address needs to be cleared for nf_ct_tuple_equal */ - memset((void *)&exp->tuple.dst.u3 + len, 0x00, - sizeof(exp->tuple.dst.u3) - len); - memset(&exp->mask.dst.u3, 0xFF, len); - if (sizeof(exp->mask.dst.u3) > len) - memset((void *)&exp->mask.dst.u3 + len, 0x00, - sizeof(exp->mask.dst.u3) - len); - } else { - memset(&exp->tuple.dst.u3, 0x00, sizeof(exp->tuple.dst.u3)); - memset(&exp->mask.dst.u3, 0x00, sizeof(exp->mask.dst.u3)); - } - if (src) { exp->tuple.src.u.all = (__force u16)*src; exp->mask.src.u.all = 0xFFFF; @@ -265,13 +239,13 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family, exp->mask.src.u.all = 0; } - if (dst) { - exp->tuple.dst.u.all = (__force u16)*dst; - exp->mask.dst.u.all = 0xFFFF; - } else { - exp->tuple.dst.u.all = 0; - exp->mask.dst.u.all = 0; - } + memcpy(&exp->tuple.dst.u3, daddr, len); + if (sizeof(exp->tuple.dst.u3) > len) + /* address needs to be cleared for nf_ct_tuple_equal */ + memset((void *)&exp->tuple.dst.u3 + len, 0x00, + sizeof(exp->tuple.dst.u3) - len); + + exp->tuple.dst.u.all = (__force u16)*dst; } EXPORT_SYMBOL_GPL(nf_ct_expect_init); diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 9ad15191bb44..198330b8ada4 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -560,9 +560,6 @@ static int __init nf_conntrack_ftp_init(void) for (j = 0; j < 2; j++) { ftp[i][j].tuple.src.u.tcp.port = htons(ports[i]); ftp[i][j].tuple.dst.protonum = IPPROTO_TCP; - ftp[i][j].mask.src.l3num = 0xFFFF; - ftp[i][j].mask.src.u.tcp.port = htons(0xFFFF); - ftp[i][j].mask.dst.protonum = 0xFF; ftp[i][j].max_expected = 1; ftp[i][j].timeout = 5 * 60; /* 5 Minutes */ ftp[i][j].me = THIS_MODULE; diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 61ae90fb328a..8c57b8119bfb 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -626,8 +626,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = { .max_expected = H323_RTP_CHANNEL_MAX * 4 + 2 /* T.120 */, .timeout = 240, .tuple.dst.protonum = IPPROTO_UDP, - .mask.src.u.udp.port = __constant_htons(0xFFFF), - .mask.dst.protonum = 0xFF, .help = h245_help }; @@ -1173,9 +1171,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = { .tuple.src.l3num = AF_INET, .tuple.src.u.tcp.port = __constant_htons(Q931_PORT), .tuple.dst.protonum = IPPROTO_TCP, - .mask.src.l3num = 0xFFFF, - .mask.src.u.tcp.port = __constant_htons(0xFFFF), - .mask.dst.protonum = 0xFF, .help = q931_help }, { @@ -1187,9 +1182,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = { .tuple.src.l3num = AF_INET6, .tuple.src.u.tcp.port = __constant_htons(Q931_PORT), .tuple.dst.protonum = IPPROTO_TCP, - .mask.src.l3num = 0xFFFF, - .mask.src.u.tcp.port = __constant_htons(0xFFFF), - .mask.dst.protonum = 0xFF, .help = q931_help }, }; @@ -1751,9 +1743,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = { .tuple.src.l3num = AF_INET, .tuple.src.u.udp.port = __constant_htons(RAS_PORT), .tuple.dst.protonum = IPPROTO_UDP, - .mask.src.l3num = 0xFFFF, - .mask.src.u.udp.port = __constant_htons(0xFFFF), - .mask.dst.protonum = 0xFF, .help = ras_help, }, { @@ -1764,9 +1753,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = { .tuple.src.l3num = AF_INET6, .tuple.src.u.udp.port = __constant_htons(RAS_PORT), .tuple.dst.protonum = IPPROTO_UDP, - .mask.src.l3num = 0xFFFF, - .mask.src.u.udp.port = __constant_htons(0xFFFF), - .mask.dst.protonum = 0xFF, .help = ras_help, }, }; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 89a5f7333d38..fdabf823f8cd 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -34,9 +34,10 @@ struct nf_conntrack_helper * __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_helper *h; + struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) }; list_for_each_entry(h, &helpers, list) { - if (nf_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask)) + if (nf_ct_tuple_src_mask_cmp(tuple, &h->tuple, &mask)) return h; } return NULL; diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 79da93e4396b..8c7340794bf6 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -239,9 +239,6 @@ static int __init nf_conntrack_irc_init(void) irc[i].tuple.src.l3num = AF_INET; irc[i].tuple.src.u.tcp.port = htons(ports[i]); irc[i].tuple.dst.protonum = IPPROTO_TCP; - irc[i].mask.src.l3num = 0xFFFF; - irc[i].mask.src.u.tcp.port = htons(0xFFFF); - irc[i].mask.dst.protonum = 0xFF; irc[i].max_expected = max_dcc_channels; irc[i].timeout = dcc_timeout; irc[i].me = THIS_MODULE; diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index ea585c789a83..1d59fabeb5f7 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -83,9 +83,6 @@ static int help(struct sk_buff **pskb, unsigned int protoff, exp->mask.src.u3.ip = mask; exp->mask.src.u.udp.port = htons(0xFFFF); - exp->mask.dst.u3.ip = htonl(0xFFFFFFFF); - exp->mask.dst.u.udp.port = htons(0xFFFF); - exp->mask.dst.protonum = 0xFF; exp->expectfn = NULL; exp->flags = NF_CT_EXPECT_PERMANENT; @@ -104,9 +101,6 @@ static struct nf_conntrack_helper helper __read_mostly = { .tuple.src.l3num = AF_INET, .tuple.src.u.udp.port = __constant_htons(NMBD_PORT), .tuple.dst.protonum = IPPROTO_UDP, - .mask.src.l3num = 0xFFFF, - .mask.src.u.udp.port = __constant_htons(0xFFFF), - .mask.dst.protonum = 0xFF, .max_expected = 1, .me = THIS_MODULE, .help = help, diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 954cc58b9d04..206491488f4e 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1094,22 +1094,29 @@ nfattr_failure: static inline int ctnetlink_exp_dump_mask(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_tuple *mask) + const struct nf_conntrack_tuple_mask *mask) { int ret; struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; - struct nfattr *nest_parms = NFA_NEST(skb, CTA_EXPECT_MASK); + struct nf_conntrack_tuple m; + struct nfattr *nest_parms; + + memset(&m, 0xFF, sizeof(m)); + m.src.u.all = mask->src.u.all; + memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3)); + + nest_parms = NFA_NEST(skb, CTA_EXPECT_MASK); l3proto = nf_ct_l3proto_find_get(tuple->src.l3num); - ret = ctnetlink_dump_tuples_ip(skb, mask, l3proto); + ret = ctnetlink_dump_tuples_ip(skb, &m, l3proto); nf_ct_l3proto_put(l3proto); if (unlikely(ret < 0)) goto nfattr_failure; l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum); - ret = ctnetlink_dump_tuples_proto(skb, mask, l4proto); + ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto); nf_ct_l4proto_put(l4proto); if (unlikely(ret < 0)) goto nfattr_failure; @@ -1447,7 +1454,8 @@ ctnetlink_create_expect(struct nfattr *cda[], u_int8_t u3) exp->master = ct; exp->helper = NULL; memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple)); - memcpy(&exp->mask, &mask, sizeof(struct nf_conntrack_tuple)); + memcpy(&exp->mask.src.u3, &mask.src.u3, sizeof(exp->mask.src.u3)); + exp->mask.src.u.all = mask.src.u.all; err = nf_ct_expect_related(exp); nf_ct_expect_put(exp); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 916e106d36bc..63dac5eb959f 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -585,9 +585,6 @@ static struct nf_conntrack_helper pptp __read_mostly = { .tuple.src.l3num = AF_INET, .tuple.src.u.tcp.port = __constant_htons(PPTP_CONTROL_PORT), .tuple.dst.protonum = IPPROTO_TCP, - .mask.src.l3num = 0xffff, - .mask.src.u.tcp.port = __constant_htons(0xffff), - .mask.dst.protonum = 0xff, .help = conntrack_pptp_help, .destroy = pptp_destroy_siblings, }; diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 28ed303c565b..edd10df8aa08 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -206,8 +206,6 @@ static int __init nf_conntrack_sane_init(void) for (j = 0; j < 2; j++) { sane[i][j].tuple.src.u.tcp.port = htons(ports[i]); sane[i][j].tuple.dst.protonum = IPPROTO_TCP; - sane[i][j].mask.src.u.tcp.port = 0xFFFF; - sane[i][j].mask.dst.protonum = 0xFF; sane[i][j].max_expected = 1; sane[i][j].timeout = 5 * 60; /* 5 Minutes */ sane[i][j].me = THIS_MODULE; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 1f17f8040cd2..5b78f0e1f63b 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -506,9 +506,6 @@ static int __init nf_conntrack_sip_init(void) for (j = 0; j < 2; j++) { sip[i][j].tuple.dst.protonum = IPPROTO_UDP; sip[i][j].tuple.src.u.udp.port = htons(ports[i]); - sip[i][j].mask.src.l3num = 0xFFFF; - sip[i][j].mask.src.u.udp.port = htons(0xFFFF); - sip[i][j].mask.dst.protonum = 0xFF; sip[i][j].max_expected = 2; sip[i][j].timeout = 3 * 60; /* 3 minutes */ sip[i][j].me = THIS_MODULE; diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index 53d57b4c0de7..db0387cf9bac 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -126,9 +126,6 @@ static int __init nf_conntrack_tftp_init(void) for (j = 0; j < 2; j++) { tftp[i][j].tuple.dst.protonum = IPPROTO_UDP; tftp[i][j].tuple.src.u.udp.port = htons(ports[i]); - tftp[i][j].mask.src.l3num = 0xFFFF; - tftp[i][j].mask.dst.protonum = 0xFF; - tftp[i][j].mask.src.u.udp.port = htons(0xFFFF); tftp[i][j].max_expected = 1; tftp[i][j].timeout = 5 * 60; /* 5 minutes */ tftp[i][j].me = THIS_MODULE; -- cgit v1.2.3 From 4e1d4e6c5a448bd114e0cef6311c974cb7c7385e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:32:03 -0700 Subject: [NETFILTER]: nf_conntrack_expect: avoid useless list walking Don't walk the list when unexpecting an expectation, we already have a reference and the timer check is enough to guarantee that it still is on the list. This comment suggests that it was copied there by mistake from expectation eviction: /* choose the oldest expectation to evict */ Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_expect.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 83b5ad85e0ee..5c8cb0f243a2 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -165,17 +165,10 @@ static inline int expect_matches(const struct nf_conntrack_expect *a, /* Generally a bad idea to call this: could have matched already. */ void nf_ct_unexpect_related(struct nf_conntrack_expect *exp) { - struct nf_conntrack_expect *i; - write_lock_bh(&nf_conntrack_lock); - /* choose the oldest expectation to evict */ - list_for_each_entry_reverse(i, &nf_ct_expect_list, list) { - if (expect_matches(i, exp) && del_timer(&i->timeout)) { - nf_ct_unlink_expect(i); - write_unlock_bh(&nf_conntrack_lock); - nf_ct_expect_put(i); - return; - } + if (del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_ct_expect_put(exp); } write_unlock_bh(&nf_conntrack_lock); } -- cgit v1.2.3 From cf6994c2b9812a9f02b99e89df411ffc5db9c779 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:32:34 -0700 Subject: [NETFILTER]: nf_conntrack_netlink: sync expectation dumping with conntrack table dumping Resync expectation table dumping code with conntrack dumping: don't rely on the unique ID anymore since that requires to walk the list backwards, which doesn't work with the upcoming conversion to hlists. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_netlink.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 206491488f4e..65a7ebcf86f6 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1235,32 +1235,50 @@ nfattr_failure: return NOTIFY_DONE; } #endif +static int ctnetlink_exp_done(struct netlink_callback *cb) +{ + if (cb->args[0]) + nf_ct_expect_put((struct nf_conntrack_expect *)cb->args[0]); + return 0; +} static int ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { - struct nf_conntrack_expect *exp = NULL; + struct nf_conntrack_expect *exp, *last; struct list_head *i; - u_int32_t *id = (u_int32_t *) &cb->args[0]; struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); u_int8_t l3proto = nfmsg->nfgen_family; read_lock_bh(&nf_conntrack_lock); + last = (struct nf_conntrack_expect *)cb->args[0]; +restart: list_for_each_prev(i, &nf_ct_expect_list) { exp = (struct nf_conntrack_expect *) i; if (l3proto && exp->tuple.src.l3num != l3proto) continue; - if (exp->id <= *id) - continue; + if (cb->args[0]) { + if (exp != last) + continue; + cb->args[0] = 0; + } if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, - 1, exp) < 0) + 1, exp) < 0) { + atomic_inc(&exp->use); + cb->args[0] = (unsigned long)exp; goto out; - *id = exp->id; + } + } + if (cb->args[0]) { + cb->args[0] = 0; + goto restart; } out: read_unlock_bh(&nf_conntrack_lock); + if (last) + nf_ct_expect_put(last); return skb->len; } @@ -1287,7 +1305,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_DUMP) { return netlink_dump_start(ctnl, skb, nlh, ctnetlink_exp_dump_table, - ctnetlink_done); + ctnetlink_exp_done); } if (cda[CTA_EXPECT_MASTER-1]) -- cgit v1.2.3 From e9c1b084e17ca225b6be731b819308ee0f9e04b8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:32:53 -0700 Subject: [NETFILTER]: nf_conntrack: move expectaton related init code to nf_conntrack_expect.c Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_expect.h | 4 +-- net/netfilter/nf_conntrack_core.c | 20 +++++------- net/netfilter/nf_conntrack_expect.c | 48 ++++++++++++++++++++++++++++- net/netfilter/nf_conntrack_standalone.c | 11 ++----- 4 files changed, 59 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 13643f7f7422..cf6a619664e8 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -7,8 +7,6 @@ #include extern struct list_head nf_ct_expect_list; -extern struct kmem_cache *nf_ct_expect_cachep; -extern const struct file_operations exp_file_ops; struct nf_conntrack_expect { @@ -53,6 +51,8 @@ struct nf_conntrack_expect #define NF_CT_EXPECT_PERMANENT 0x1 +int nf_conntrack_expect_init(void); +void nf_conntrack_expect_fini(void); struct nf_conntrack_expect * __nf_ct_expect_find(const struct nf_conntrack_tuple *tuple); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 793f12ff168b..ed44a09ae739 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -961,12 +961,12 @@ void nf_conntrack_cleanup(void) rcu_assign_pointer(nf_ct_destroy, NULL); kmem_cache_destroy(nf_conntrack_cachep); - kmem_cache_destroy(nf_ct_expect_cachep); nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc, nf_conntrack_htable_size); nf_conntrack_proto_fini(); nf_conntrack_helper_fini(); + nf_conntrack_expect_fini(); } struct hlist_head *nf_ct_alloc_hashtable(int *sizep, int *vmalloced) @@ -1088,21 +1088,17 @@ int __init nf_conntrack_init(void) goto err_free_hash; } - nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", - sizeof(struct nf_conntrack_expect), - 0, 0, NULL, NULL); - if (!nf_ct_expect_cachep) { - printk(KERN_ERR "Unable to create nf_expect slab cache\n"); + ret = nf_conntrack_proto_init(); + if (ret < 0) goto err_free_conntrack_slab; - } - ret = nf_conntrack_proto_init(); + ret = nf_conntrack_expect_init(); if (ret < 0) - goto out_free_expect_slab; + goto out_fini_proto; ret = nf_conntrack_helper_init(); if (ret < 0) - goto out_fini_proto; + goto out_fini_expect; /* For use by REJECT target */ rcu_assign_pointer(ip_ct_attach, __nf_conntrack_attach); @@ -1116,10 +1112,10 @@ int __init nf_conntrack_init(void) return ret; +out_fini_expect: + nf_conntrack_expect_fini(); out_fini_proto: nf_conntrack_proto_fini(); -out_free_expect_slab: - kmem_cache_destroy(nf_ct_expect_cachep); err_free_conntrack_slab: kmem_cache_destroy(nf_conntrack_cachep); err_free_hash: diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 5c8cb0f243a2..ad197bccc7ca 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -29,7 +29,7 @@ LIST_HEAD(nf_ct_expect_list); EXPORT_SYMBOL_GPL(nf_ct_expect_list); -struct kmem_cache *nf_ct_expect_cachep __read_mostly; +static struct kmem_cache *nf_ct_expect_cachep __read_mostly; static unsigned int nf_ct_expect_next_id; /* nf_conntrack_expect helper functions */ @@ -413,3 +413,49 @@ const struct file_operations exp_file_ops = { .release = seq_release }; #endif /* CONFIG_PROC_FS */ + +static int __init exp_proc_init(void) +{ +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *proc; + + proc = proc_net_fops_create("nf_conntrack_expect", 0440, &exp_file_ops); + if (!proc) + return -ENOMEM; +#endif /* CONFIG_PROC_FS */ + return 0; +} + +static void exp_proc_remove(void) +{ +#ifdef CONFIG_PROC_FS + proc_net_remove("nf_conntrack_expect"); +#endif /* CONFIG_PROC_FS */ +} + +int __init nf_conntrack_expect_init(void) +{ + int err; + + nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", + sizeof(struct nf_conntrack_expect), + 0, 0, NULL, NULL); + if (!nf_ct_expect_cachep) + return -ENOMEM; + + err = exp_proc_init(); + if (err < 0) + goto err1; + + return 0; + +err1: + kmem_cache_destroy(nf_ct_expect_cachep); + return err; +} + +void nf_conntrack_expect_fini(void) +{ + exp_proc_remove(); + kmem_cache_destroy(nf_ct_expect_cachep); +} diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index fe536b20408b..098e7993932a 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -411,7 +411,7 @@ EXPORT_SYMBOL_GPL(nf_ct_log_invalid); static int __init nf_conntrack_standalone_init(void) { #ifdef CONFIG_PROC_FS - struct proc_dir_entry *proc, *proc_exp, *proc_stat; + struct proc_dir_entry *proc, *proc_stat; #endif int ret = 0; @@ -423,13 +423,9 @@ static int __init nf_conntrack_standalone_init(void) proc = proc_net_fops_create("nf_conntrack", 0440, &ct_file_ops); if (!proc) goto cleanup_init; - proc_exp = proc_net_fops_create("nf_conntrack_expect", 0440, - &exp_file_ops); - if (!proc_exp) goto cleanup_proc; - proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, proc_net_stat); if (!proc_stat) - goto cleanup_proc_exp; + goto cleanup_proc; proc_stat->proc_fops = &ct_cpu_seq_fops; proc_stat->owner = THIS_MODULE; @@ -449,8 +445,6 @@ static int __init nf_conntrack_standalone_init(void) #endif #ifdef CONFIG_PROC_FS remove_proc_entry("nf_conntrack", proc_net_stat); - cleanup_proc_exp: - proc_net_remove("nf_conntrack_expect"); cleanup_proc: proc_net_remove("nf_conntrack"); cleanup_init: @@ -466,7 +460,6 @@ static void __exit nf_conntrack_standalone_fini(void) #endif #ifdef CONFIG_PROC_FS remove_proc_entry("nf_conntrack", proc_net_stat); - proc_net_remove("nf_conntrack_expect"); proc_net_remove("nf_conntrack"); #endif /* CNFIG_PROC_FS */ nf_conntrack_cleanup(); -- cgit v1.2.3 From a71c085562bcc99e8b711cab4222bff1f6e955da Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:33:47 -0700 Subject: [NETFILTER]: nf_conntrack: use hashtable for expectations Currently all expectations are kept on a global list that - needs to be searched for every new conncetion - needs to be walked for evicting expectations when a master connection has reached its limit - needs to be walked on connection destruction for connections that have open expectations This is obviously not good, especially when considering helpers like H.323 that register *lots* of expectations and can set up permanent expectations, but it also allows for an easy DoS against firewalls using connection tracking helpers. Use a hashtable for expectations to avoid incurring the search overhead for every new connection. The default hash size is 1/256 of the conntrack hash table size, this can be overriden using a module parameter. This patch only introduces the hash table for expectation lookups and keeps other users to reduce the noise, the following patches will get rid of it completely. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_core.h | 1 - include/net/netfilter/nf_conntrack_expect.h | 5 ++ net/netfilter/nf_conntrack_expect.c | 72 ++++++++++++++++++++++++++--- 3 files changed, 71 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index a18f79c80db8..4056f5f08da1 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -84,7 +84,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_l4proto *proto); extern struct hlist_head *nf_conntrack_hash; -extern struct list_head nf_ct_expect_list; extern rwlock_t nf_conntrack_lock ; extern struct hlist_head unconfirmed; diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index cf6a619664e8..424d4bdb9848 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -7,12 +7,17 @@ #include extern struct list_head nf_ct_expect_list; +extern struct hlist_head *nf_ct_expect_hash; +extern unsigned int nf_ct_expect_hsize; struct nf_conntrack_expect { /* Internal linked list (global expectation list) */ struct list_head list; + /* Hash member */ + struct hlist_node hnode; + /* We expect this tuple, with the following mask */ struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_mask mask; diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index ad197bccc7ca..0696f87aaef1 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -29,6 +30,17 @@ LIST_HEAD(nf_ct_expect_list); EXPORT_SYMBOL_GPL(nf_ct_expect_list); +struct hlist_head *nf_ct_expect_hash __read_mostly; +EXPORT_SYMBOL_GPL(nf_ct_expect_hash); + +unsigned int nf_ct_expect_hsize __read_mostly; +EXPORT_SYMBOL_GPL(nf_ct_expect_hsize); + +static unsigned int nf_ct_expect_hash_rnd __read_mostly; +static unsigned int nf_ct_expect_count; +static int nf_ct_expect_hash_rnd_initted __read_mostly; +static int nf_ct_expect_vmalloc; + static struct kmem_cache *nf_ct_expect_cachep __read_mostly; static unsigned int nf_ct_expect_next_id; @@ -41,6 +53,9 @@ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) NF_CT_ASSERT(!timer_pending(&exp->timeout)); list_del(&exp->list); + hlist_del(&exp->hnode); + nf_ct_expect_count--; + NF_CT_STAT_INC(expect_delete); master_help->expecting--; nf_ct_expect_put(exp); @@ -57,12 +72,31 @@ static void nf_ct_expectation_timed_out(unsigned long ul_expect) nf_ct_expect_put(exp); } +static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple) +{ + if (unlikely(!nf_ct_expect_hash_rnd_initted)) { + get_random_bytes(&nf_ct_expect_hash_rnd, 4); + nf_ct_expect_hash_rnd_initted = 1; + } + + return jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all), + (((tuple->dst.protonum ^ tuple->src.l3num) << 16) | + tuple->dst.u.all) ^ nf_ct_expect_hash_rnd) % + nf_ct_expect_hsize; +} + struct nf_conntrack_expect * __nf_ct_expect_find(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; + struct hlist_node *n; + unsigned int h; + + if (!nf_ct_expect_count) + return NULL; - list_for_each_entry(i, &nf_ct_expect_list, list) { + h = nf_ct_expect_dst_hash(tuple); + hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) { if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) return i; } @@ -252,10 +286,14 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_put); static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) { struct nf_conn_help *master_help = nfct_help(exp->master); + unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); atomic_inc(&exp->use); master_help->expecting++; + list_add(&exp->list, &nf_ct_expect_list); + hlist_add_head(&exp->hnode, &nf_ct_expect_hash[h]); + nf_ct_expect_count++; setup_timer(&exp->timeout, nf_ct_expectation_timed_out, (unsigned long)exp); @@ -300,6 +338,8 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) struct nf_conntrack_expect *i; struct nf_conn *master = expect->master; struct nf_conn_help *master_help = nfct_help(master); + struct hlist_node *n; + unsigned int h; int ret; NF_CT_ASSERT(master_help); @@ -309,7 +349,8 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) ret = -ESHUTDOWN; goto out; } - list_for_each_entry(i, &nf_ct_expect_list, list) { + h = nf_ct_expect_dst_hash(&expect->tuple); + hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) { if (expect_matches(i, expect)) { /* Refresh timer: if it's dying, ignore.. */ if (refresh_timer(i)) { @@ -433,24 +474,41 @@ static void exp_proc_remove(void) #endif /* CONFIG_PROC_FS */ } +module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600); + int __init nf_conntrack_expect_init(void) { - int err; + int err = -ENOMEM; + + if (!nf_ct_expect_hsize) { + nf_ct_expect_hsize = nf_conntrack_htable_size / 256; + if (!nf_ct_expect_hsize) + nf_ct_expect_hsize = 1; + } + + nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, + &nf_ct_expect_vmalloc); + if (nf_ct_expect_hash == NULL) + goto err1; nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", sizeof(struct nf_conntrack_expect), 0, 0, NULL, NULL); if (!nf_ct_expect_cachep) - return -ENOMEM; + goto err2; err = exp_proc_init(); if (err < 0) - goto err1; + goto err3; return 0; -err1: +err3: + nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc, + nf_ct_expect_hsize); +err2: kmem_cache_destroy(nf_ct_expect_cachep); +err1: return err; } @@ -458,4 +516,6 @@ void nf_conntrack_expect_fini(void) { exp_proc_remove(); kmem_cache_destroy(nf_ct_expect_cachep); + nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc, + nf_ct_expect_hsize); } -- cgit v1.2.3 From 5d08ad440feae11b8d6e7599147a8a20ac60f99a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:34:07 -0700 Subject: [NETFILTER]: nf_conntrack_expect: convert proc functions to hash Convert from the global expectation list to the hash table. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 83 +++++++++++++++------ net/netfilter/nf_conntrack_expect.c | 85 ++++++++++++++++------ 2 files changed, 121 insertions(+), 47 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 12d6a6327b63..ab8e4c607b7a 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -207,47 +207,68 @@ static const struct file_operations ct_file_ops = { }; /* expects */ -static void *exp_seq_start(struct seq_file *s, loff_t *pos) +struct ct_expect_iter_state { + unsigned int bucket; +}; + +static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { - struct list_head *e = &nf_ct_expect_list; - loff_t i; + struct ct_expect_iter_state *st = seq->private; - /* strange seq_file api calls stop even if we fail, - * thus we need to grab lock since stop unlocks */ - read_lock_bh(&nf_conntrack_lock); + for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { + if (!hlist_empty(&nf_ct_expect_hash[st->bucket])) + return nf_ct_expect_hash[st->bucket].first; + } + return NULL; +} - if (list_empty(e)) - return NULL; +static struct hlist_node *ct_expect_get_next(struct seq_file *seq, + struct hlist_node *head) +{ + struct ct_expect_iter_state *st = seq->private; - for (i = 0; i <= *pos; i++) { - e = e->next; - if (e == &nf_ct_expect_list) + head = head->next; + while (head == NULL) { + if (++st->bucket >= nf_ct_expect_hsize) return NULL; + head = nf_ct_expect_hash[st->bucket].first; } - return e; + return head; } -static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos) +static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos) { - struct list_head *e = v; + struct hlist_node *head = ct_expect_get_first(seq); - ++*pos; - e = e->next; + if (head) + while (pos && (head = ct_expect_get_next(seq, head))) + pos--; + return pos ? NULL : head; +} - if (e == &nf_ct_expect_list) - return NULL; +static void *exp_seq_start(struct seq_file *seq, loff_t *pos) +{ + read_lock_bh(&nf_conntrack_lock); + return ct_expect_get_idx(seq, *pos); +} - return e; +static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + (*pos)++; + return ct_expect_get_next(seq, v); } -static void exp_seq_stop(struct seq_file *s, void *v) +static void exp_seq_stop(struct seq_file *seq, void *v) { read_unlock_bh(&nf_conntrack_lock); } static int exp_seq_show(struct seq_file *s, void *v) { - struct nf_conntrack_expect *exp = v; + struct nf_conntrack_expect *exp; + struct hlist_node *n = v; + + exp = hlist_entry(n, struct nf_conntrack_expect, hnode); if (exp->tuple.src.l3num != AF_INET) return 0; @@ -276,7 +297,23 @@ static struct seq_operations exp_seq_ops = { static int exp_open(struct inode *inode, struct file *file) { - return seq_open(file, &exp_seq_ops); + struct seq_file *seq; + struct ct_expect_iter_state *st; + int ret; + + st = kmalloc(sizeof(struct ct_expect_iter_state), GFP_KERNEL); + if (st == NULL) + return -ENOMEM; + ret = seq_open(file, &exp_seq_ops); + if (ret) + goto out_free; + seq = file->private_data; + seq->private = st; + memset(st, 0, sizeof(struct ct_expect_iter_state)); + return ret; +out_free: + kfree(st); + return ret; } static const struct file_operations ip_exp_file_ops = { @@ -284,7 +321,7 @@ static const struct file_operations ip_exp_file_ops = { .open = exp_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release + .release = seq_release_private, }; static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 0696f87aaef1..c5006b0a4e65 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -377,47 +377,68 @@ out: EXPORT_SYMBOL_GPL(nf_ct_expect_related); #ifdef CONFIG_PROC_FS -static void *exp_seq_start(struct seq_file *s, loff_t *pos) +struct ct_expect_iter_state { + unsigned int bucket; +}; + +static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { - struct list_head *e = &nf_ct_expect_list; - loff_t i; + struct ct_expect_iter_state *st = seq->private; - /* strange seq_file api calls stop even if we fail, - * thus we need to grab lock since stop unlocks */ - read_lock_bh(&nf_conntrack_lock); + for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { + if (!hlist_empty(&nf_ct_expect_hash[st->bucket])) + return nf_ct_expect_hash[st->bucket].first; + } + return NULL; +} - if (list_empty(e)) - return NULL; +static struct hlist_node *ct_expect_get_next(struct seq_file *seq, + struct hlist_node *head) +{ + struct ct_expect_iter_state *st = seq->private; - for (i = 0; i <= *pos; i++) { - e = e->next; - if (e == &nf_ct_expect_list) + head = head->next; + while (head == NULL) { + if (++st->bucket >= nf_ct_expect_hsize) return NULL; + head = nf_ct_expect_hash[st->bucket].first; } - return e; + return head; } -static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos) +static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos) { - struct list_head *e = v; + struct hlist_node *head = ct_expect_get_first(seq); - ++*pos; - e = e->next; + if (head) + while (pos && (head = ct_expect_get_next(seq, head))) + pos--; + return pos ? NULL : head; +} - if (e == &nf_ct_expect_list) - return NULL; +static void *exp_seq_start(struct seq_file *seq, loff_t *pos) +{ + read_lock_bh(&nf_conntrack_lock); + return ct_expect_get_idx(seq, *pos); +} - return e; +static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + (*pos)++; + return ct_expect_get_next(seq, v); } -static void exp_seq_stop(struct seq_file *s, void *v) +static void exp_seq_stop(struct seq_file *seq, void *v) { read_unlock_bh(&nf_conntrack_lock); } static int exp_seq_show(struct seq_file *s, void *v) { - struct nf_conntrack_expect *expect = v; + struct nf_conntrack_expect *expect; + struct hlist_node *n = v; + + expect = hlist_entry(n, struct nf_conntrack_expect, hnode); if (expect->timeout.function) seq_printf(s, "%ld ", timer_pending(&expect->timeout) @@ -443,15 +464,31 @@ static struct seq_operations exp_seq_ops = { static int exp_open(struct inode *inode, struct file *file) { - return seq_open(file, &exp_seq_ops); + struct seq_file *seq; + struct ct_expect_iter_state *st; + int ret; + + st = kmalloc(sizeof(struct ct_expect_iter_state), GFP_KERNEL); + if (st == NULL) + return -ENOMEM; + ret = seq_open(file, &exp_seq_ops); + if (ret) + goto out_free; + seq = file->private_data; + seq->private = st; + memset(st, 0, sizeof(struct ct_expect_iter_state)); + return ret; +out_free: + kfree(st); + return ret; } -const struct file_operations exp_file_ops = { +static const struct file_operations exp_file_ops = { .owner = THIS_MODULE, .open = exp_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release + .release = seq_release_private, }; #endif /* CONFIG_PROC_FS */ -- cgit v1.2.3 From 31f15875c5ad98a13b528aaf19c839e22b43dc9a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:35:21 -0700 Subject: [NETFILTER]: nf_conntrack_helper/nf_conntrack_netlink: convert to expectation hash Convert from the global expectation list to the hash table. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_helper.c | 21 ++++++---- net/netfilter/nf_conntrack_netlink.c | 79 +++++++++++++++++++++--------------- 2 files changed, 58 insertions(+), 42 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index fdabf823f8cd..cc8ae7404ac4 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -114,22 +114,25 @@ EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) { - unsigned int i; struct nf_conntrack_tuple_hash *h; - struct nf_conntrack_expect *exp, *tmp; - struct hlist_node *n; + struct nf_conntrack_expect *exp; + struct hlist_node *n, *next; + unsigned int i; /* Need write lock here, to delete helper. */ write_lock_bh(&nf_conntrack_lock); list_del(&me->list); /* Get rid of expectations */ - list_for_each_entry_safe(exp, tmp, &nf_ct_expect_list, list) { - struct nf_conn_help *help = nfct_help(exp->master); - if ((help->helper == me || exp->helper == me) && - del_timer(&exp->timeout)) { - nf_ct_unlink_expect(exp); - nf_ct_expect_put(exp); + for (i = 0; i < nf_ct_expect_hsize; i++) { + hlist_for_each_entry_safe(exp, n, next, + &nf_ct_expect_hash[i], hnode) { + struct nf_conn_help *help = nfct_help(exp->master); + if ((help->helper == me || exp->helper == me) && + del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_ct_expect_put(exp); + } } } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 65a7ebcf86f6..60af9b6b9e00 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1237,8 +1237,8 @@ nfattr_failure: #endif static int ctnetlink_exp_done(struct netlink_callback *cb) { - if (cb->args[0]) - nf_ct_expect_put((struct nf_conntrack_expect *)cb->args[0]); + if (cb->args[1]) + nf_ct_expect_put((struct nf_conntrack_expect *)cb->args[1]); return 0; } @@ -1246,35 +1246,37 @@ static int ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { struct nf_conntrack_expect *exp, *last; - struct list_head *i; struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); + struct hlist_node *n; u_int8_t l3proto = nfmsg->nfgen_family; read_lock_bh(&nf_conntrack_lock); - last = (struct nf_conntrack_expect *)cb->args[0]; + last = (struct nf_conntrack_expect *)cb->args[1]; + for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { restart: - list_for_each_prev(i, &nf_ct_expect_list) { - exp = (struct nf_conntrack_expect *) i; - if (l3proto && exp->tuple.src.l3num != l3proto) - continue; - if (cb->args[0]) { - if (exp != last) + hlist_for_each_entry(exp, n, &nf_ct_expect_hash[cb->args[0]], + hnode) { + if (l3proto && exp->tuple.src.l3num != l3proto) continue; - cb->args[0] = 0; + if (cb->args[1]) { + if (exp != last) + continue; + cb->args[1] = 0; + } + if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + IPCTNL_MSG_EXP_NEW, + 1, exp) < 0) { + atomic_inc(&exp->use); + cb->args[1] = (unsigned long)exp; + goto out; + } } - if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - IPCTNL_MSG_EXP_NEW, - 1, exp) < 0) { - atomic_inc(&exp->use); - cb->args[0] = (unsigned long)exp; - goto out; + if (cb->args[1]) { + cb->args[1] = 0; + goto restart; } } - if (cb->args[0]) { - cb->args[0] = 0; - goto restart; - } out: read_unlock_bh(&nf_conntrack_lock); if (last) @@ -1354,11 +1356,13 @@ static int ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, struct nlmsghdr *nlh, struct nfattr *cda[]) { - struct nf_conntrack_expect *exp, *tmp; + struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; struct nf_conntrack_helper *h; struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + struct hlist_node *n, *next; u_int8_t u3 = nfmsg->nfgen_family; + unsigned int i; int err; if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) @@ -1390,6 +1394,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, nf_ct_expect_put(exp); } else if (cda[CTA_EXPECT_HELP_NAME-1]) { char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]); + struct nf_conn_help *m_help; /* delete all expectations for this helper */ write_lock_bh(&nf_conntrack_lock); @@ -1398,22 +1403,30 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, write_unlock_bh(&nf_conntrack_lock); return -EINVAL; } - list_for_each_entry_safe(exp, tmp, &nf_ct_expect_list, list) { - struct nf_conn_help *m_help = nfct_help(exp->master); - if (m_help->helper == h - && del_timer(&exp->timeout)) { - nf_ct_unlink_expect(exp); - nf_ct_expect_put(exp); + for (i = 0; i < nf_ct_expect_hsize; i++) { + hlist_for_each_entry_safe(exp, n, next, + &nf_ct_expect_hash[i], + hnode) { + m_help = nfct_help(exp->master); + if (m_help->helper == h + && del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_ct_expect_put(exp); + } } } write_unlock_bh(&nf_conntrack_lock); } else { /* This basically means we have to flush everything*/ write_lock_bh(&nf_conntrack_lock); - list_for_each_entry_safe(exp, tmp, &nf_ct_expect_list, list) { - if (del_timer(&exp->timeout)) { - nf_ct_unlink_expect(exp); - nf_ct_expect_put(exp); + for (i = 0; i < nf_ct_expect_hsize; i++) { + hlist_for_each_entry_safe(exp, n, next, + &nf_ct_expect_hash[i], + hnode) { + if (del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_ct_expect_put(exp); + } } } write_unlock_bh(&nf_conntrack_lock); -- cgit v1.2.3 From b560580a13b180bc1e3cad7ffbc93388cc39be5d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:35:56 -0700 Subject: [NETFILTER]: nf_conntrack_expect: maintain per conntrack expectation list This patch brings back the per-conntrack expectation list that was removed around 2.6.10 to avoid walking all expectations on expectation eviction and conntrack destruction. As these were the last users of the global expectation list, this patch also kills that. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack.h | 2 ++ include/net/netfilter/nf_conntrack_expect.h | 5 ++-- include/net/netfilter/nf_conntrack_helper.h | 2 ++ net/netfilter/nf_conntrack_core.c | 18 +++---------- net/netfilter/nf_conntrack_expect.c | 40 ++++++++++++++--------------- net/netfilter/nf_conntrack_helper.c | 13 ++++++++++ net/netfilter/nf_conntrack_netlink.c | 4 +-- 7 files changed, 45 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 8f2cbb965f3d..d4f02eb0c66c 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -82,6 +82,8 @@ struct nf_conn_help { union nf_conntrack_help help; + struct hlist_head expectations; + /* Current number of expected connections */ unsigned int expecting; }; diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 424d4bdb9848..9d5af4e22c4f 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -6,14 +6,13 @@ #define _NF_CONNTRACK_EXPECT_H #include -extern struct list_head nf_ct_expect_list; extern struct hlist_head *nf_ct_expect_hash; extern unsigned int nf_ct_expect_hsize; struct nf_conntrack_expect { - /* Internal linked list (global expectation list) */ - struct list_head list; + /* Conntrack expectation list member */ + struct hlist_node lnode; /* Hash member */ struct hlist_node hnode; diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index d62e6f093af4..2c0e2e0fb7ff 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -52,6 +52,8 @@ extern void nf_ct_helper_put(struct nf_conntrack_helper *helper); extern int nf_conntrack_helper_register(struct nf_conntrack_helper *); extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); +extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp); + static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct) { return nf_ct_ext_find(ct, NF_CT_EXT_HELPER); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ed44a09ae739..d1fc019760a1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -502,12 +502,9 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, __set_bit(IPS_EXPECTED_BIT, &conntrack->status); conntrack->master = exp->master; if (exp->helper) { - help = nf_ct_ext_add(conntrack, NF_CT_EXT_HELPER, - GFP_ATOMIC); + help = nf_ct_helper_ext_add(conntrack, GFP_ATOMIC); if (help) rcu_assign_pointer(help->helper, exp->helper); - else - DEBUGP("failed to add helper extension area"); } #ifdef CONFIG_NF_CONNTRACK_MARK @@ -523,14 +520,9 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, helper = __nf_ct_helper_find(&repl_tuple); if (helper) { - help = nf_ct_ext_add(conntrack, NF_CT_EXT_HELPER, - GFP_ATOMIC); + help = nf_ct_helper_ext_add(conntrack, GFP_ATOMIC); if (help) - /* not in hash table yet, so not strictly - necessary */ rcu_assign_pointer(help->helper, helper); - else - DEBUGP("failed to add helper extension area"); } NF_CT_STAT_INC(new); } @@ -721,11 +713,9 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, } if (help == NULL) { - help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, GFP_ATOMIC); - if (help == NULL) { - DEBUGP("failed to add helper extension area"); + help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); + if (help == NULL) goto out; - } } else { memset(&help->help, 0, sizeof(help->help)); } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index c5006b0a4e65..5ef0dd439e76 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -27,9 +27,6 @@ #include #include -LIST_HEAD(nf_ct_expect_list); -EXPORT_SYMBOL_GPL(nf_ct_expect_list); - struct hlist_head *nf_ct_expect_hash __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_expect_hash); @@ -52,13 +49,14 @@ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) NF_CT_ASSERT(master_help); NF_CT_ASSERT(!timer_pending(&exp->timeout)); - list_del(&exp->list); hlist_del(&exp->hnode); nf_ct_expect_count--; - NF_CT_STAT_INC(expect_delete); + hlist_del(&exp->lnode); master_help->expecting--; nf_ct_expect_put(exp); + + NF_CT_STAT_INC(expect_delete); } EXPORT_SYMBOL_GPL(nf_ct_unlink_expect); @@ -153,17 +151,18 @@ nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple) /* delete all expectations for this conntrack */ void nf_ct_remove_expectations(struct nf_conn *ct) { - struct nf_conntrack_expect *i, *tmp; struct nf_conn_help *help = nfct_help(ct); + struct nf_conntrack_expect *exp; + struct hlist_node *n, *next; /* Optimization: most connection never expect any others. */ if (!help || help->expecting == 0) return; - list_for_each_entry_safe(i, tmp, &nf_ct_expect_list, list) { - if (i->master == ct && del_timer(&i->timeout)) { - nf_ct_unlink_expect(i); - nf_ct_expect_put(i); + hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) { + if (del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_ct_expect_put(exp); } } } @@ -289,9 +288,10 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); atomic_inc(&exp->use); + + hlist_add_head(&exp->lnode, &master_help->expectations); master_help->expecting++; - list_add(&exp->list, &nf_ct_expect_list); hlist_add_head(&exp->hnode, &nf_ct_expect_hash[h]); nf_ct_expect_count++; @@ -308,16 +308,16 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) /* Race with expectations being used means we could have none to find; OK. */ static void evict_oldest_expect(struct nf_conn *master) { - struct nf_conntrack_expect *i; + struct nf_conn_help *master_help = nfct_help(master); + struct nf_conntrack_expect *exp = NULL; + struct hlist_node *n; - list_for_each_entry_reverse(i, &nf_ct_expect_list, list) { - if (i->master == master) { - if (del_timer(&i->timeout)) { - nf_ct_unlink_expect(i); - nf_ct_expect_put(i); - } - break; - } + hlist_for_each_entry(exp, n, &master_help->expectations, lnode) + ; /* nothing */ + + if (exp && del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_ct_expect_put(exp); } } diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index cc8ae7404ac4..66c209d7e09d 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -87,6 +87,19 @@ __nf_conntrack_helper_find_byname(const char *name) } EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find_byname); +struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) +{ + struct nf_conn_help *help; + + help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, gfp); + if (help) + INIT_HLIST_HEAD(&help->expectations); + else + pr_debug("failed to add helper extension area"); + return help; +} +EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add); + static inline int unhelp(struct nf_conntrack_tuple_hash *i, const struct nf_conntrack_helper *me) { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 60af9b6b9e00..6f89b105a205 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -868,7 +868,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[]) /* need to zero data of old helper */ memset(&help->help, 0, sizeof(help->help)); } else { - help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, GFP_KERNEL); + help = nf_ct_helper_ext_add(ct, GFP_KERNEL); if (help == NULL) return -ENOMEM; } @@ -989,7 +989,7 @@ ctnetlink_create_conntrack(struct nfattr *cda[], helper = nf_ct_helper_find_get(rtuple); if (helper) { - help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, GFP_KERNEL); + help = nf_ct_helper_ext_add(ct, GFP_KERNEL); if (help == NULL) { nf_ct_helper_put(helper); err = -ENOMEM; -- cgit v1.2.3 From f264a7df08d50bb4a23be6a9aa06940e497ac1c4 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:36:24 -0700 Subject: [NETFILTER]: nf_conntrack_expect: introduce nf_conntrack_expect_max sysct As a last step of preventing DoS by creating lots of expectations, this patch introduces a global maximum and a sysctl to control it. The default is initialized to 4 * the expectation hash table size, which results in 1/64 of the default maxmimum of conntracks. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_expect.h | 1 + net/netfilter/nf_conntrack_expect.c | 10 ++++++++++ net/netfilter/nf_conntrack_standalone.c | 9 ++++++++- 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 9d5af4e22c4f..cae1a0dce365 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -8,6 +8,7 @@ extern struct hlist_head *nf_ct_expect_hash; extern unsigned int nf_ct_expect_hsize; +extern unsigned int nf_ct_expect_max; struct nf_conntrack_expect { diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 5ef0dd439e76..513828fdaa2c 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -35,6 +35,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_hsize); static unsigned int nf_ct_expect_hash_rnd __read_mostly; static unsigned int nf_ct_expect_count; +unsigned int nf_ct_expect_max __read_mostly; static int nf_ct_expect_hash_rnd_initted __read_mostly; static int nf_ct_expect_vmalloc; @@ -367,6 +368,14 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) master_help->expecting >= master_help->helper->max_expected) evict_oldest_expect(master); + if (nf_ct_expect_count >= nf_ct_expect_max) { + if (net_ratelimit()) + printk(KERN_WARNING + "nf_conntrack: expectation table full"); + ret = -EMFILE; + goto out; + } + nf_ct_expect_insert(expect); nf_ct_expect_event(IPEXP_NEW, expect); ret = 0; @@ -522,6 +531,7 @@ int __init nf_conntrack_expect_init(void) if (!nf_ct_expect_hsize) nf_ct_expect_hsize = 1; } + nf_ct_expect_max = nf_ct_expect_hsize * 4; nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, &nf_ct_expect_vmalloc); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 098e7993932a..6af96c6e29fb 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -372,7 +372,14 @@ static ctl_table nf_ct_sysctl_table[] = { .extra1 = &log_invalid_proto_min, .extra2 = &log_invalid_proto_max, }, - + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nf_conntrack_expect_max", + .data = &nf_ct_expect_max, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = 0 } }; -- cgit v1.2.3 From b8a7fe6c10511fce10b20efa163123f4041f2550 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:36:46 -0700 Subject: [NETFILTER]: nf_conntrack_helper: use hashtable for conntrack helpers Eliminate the last global list searched for every new connection. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_helper.h | 4 +- net/netfilter/nf_conntrack_helper.c | 70 +++++++++++++++++++++++------ 2 files changed, 59 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 2c0e2e0fb7ff..d04f99964d94 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -15,8 +15,8 @@ struct module; struct nf_conntrack_helper -{ - struct list_head list; /* Internal use. */ +{ + struct hlist_node hnode; /* Internal use. */ const char *name; /* name of the module */ struct module *me; /* pointer to self */ diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 66c209d7e09d..b1179dd3d8c3 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -28,23 +28,41 @@ #include #include -static __read_mostly LIST_HEAD(helpers); +static struct hlist_head *nf_ct_helper_hash __read_mostly; +static unsigned int nf_ct_helper_hsize __read_mostly; +static unsigned int nf_ct_helper_count __read_mostly; +static int nf_ct_helper_vmalloc; + + +/* Stupid hash, but collision free for the default registrations of the + * helpers currently in the kernel. */ +static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple) +{ + return (((tuple->src.l3num << 8) | tuple->dst.protonum) ^ + tuple->src.u.all) % nf_ct_helper_hsize; +} struct nf_conntrack_helper * __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) { - struct nf_conntrack_helper *h; + struct nf_conntrack_helper *helper; struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) }; + struct hlist_node *n; + unsigned int h; - list_for_each_entry(h, &helpers, list) { - if (nf_ct_tuple_src_mask_cmp(tuple, &h->tuple, &mask)) - return h; + if (!nf_ct_helper_count) + return NULL; + + h = helper_hash(tuple); + hlist_for_each_entry(helper, n, &nf_ct_helper_hash[h], hnode) { + if (nf_ct_tuple_src_mask_cmp(tuple, &helper->tuple, &mask)) + return helper; } return NULL; } struct nf_conntrack_helper * -nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple) +nf_ct_helper_find_get(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_helper *helper; @@ -77,12 +95,15 @@ struct nf_conntrack_helper * __nf_conntrack_helper_find_byname(const char *name) { struct nf_conntrack_helper *h; + struct hlist_node *n; + unsigned int i; - list_for_each_entry(h, &helpers, list) { - if (!strcmp(h->name, name)) - return h; + for (i = 0; i < nf_ct_helper_hsize; i++) { + hlist_for_each_entry(h, n, &nf_ct_helper_hash[i], hnode) { + if (!strcmp(h->name, name)) + return h; + } } - return NULL; } EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find_byname); @@ -115,10 +136,13 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i, int nf_conntrack_helper_register(struct nf_conntrack_helper *me) { + unsigned int h = helper_hash(&me->tuple); + BUG_ON(me->timeout == 0); write_lock_bh(&nf_conntrack_lock); - list_add(&me->list, &helpers); + hlist_add_head(&me->hnode, &nf_ct_helper_hash[h]); + nf_ct_helper_count++; write_unlock_bh(&nf_conntrack_lock); return 0; @@ -134,7 +158,8 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) /* Need write lock here, to delete helper. */ write_lock_bh(&nf_conntrack_lock); - list_del(&me->list); + hlist_del(&me->hnode); + nf_ct_helper_count--; /* Get rid of expectations */ for (i = 0; i < nf_ct_expect_hsize; i++) { @@ -171,10 +196,29 @@ static struct nf_ct_ext_type helper_extend __read_mostly = { int nf_conntrack_helper_init() { - return nf_ct_extend_register(&helper_extend); + int err; + + nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ + nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, + &nf_ct_helper_vmalloc); + if (!nf_ct_helper_hash) + return -ENOMEM; + + err = nf_ct_extend_register(&helper_extend); + if (err < 0) + goto err1; + + return 0; + +err1: + nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc, + nf_ct_helper_hsize); + return err; } void nf_conntrack_helper_fini() { nf_ct_extend_unregister(&helper_extend); + nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc, + nf_ct_helper_hsize); } -- cgit v1.2.3 From ec59a1110aee6846adada8979915cacae64042ce Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:37:03 -0700 Subject: [NETFILTER]: nf_conntrack: mark helpers __read_mostly Most are __read_mostly already, this changes the remaining ones. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_ftp.c | 4 ++-- net/netfilter/nf_conntrack_sane.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 198330b8ada4..cd9c2d00cc09 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -518,8 +518,8 @@ out_update_nl: return ret; } -static struct nf_conntrack_helper ftp[MAX_PORTS][2]; -static char ftp_names[MAX_PORTS][2][sizeof("ftp-65535")]; +static struct nf_conntrack_helper ftp[MAX_PORTS][2] __read_mostly; +static char ftp_names[MAX_PORTS][2][sizeof("ftp-65535")] __read_mostly; /* don't make this __exit, since it's called from __init ! */ static void nf_conntrack_ftp_fini(void) diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index edd10df8aa08..627eda79d154 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -166,8 +166,8 @@ out: return ret; } -static struct nf_conntrack_helper sane[MAX_PORTS][2]; -static char sane_names[MAX_PORTS][2][sizeof("sane-65535")]; +static struct nf_conntrack_helper sane[MAX_PORTS][2] __read_mostly; +static char sane_names[MAX_PORTS][2][sizeof("sane-65535")] __read_mostly; /* don't make this __exit, since it's called from __init ! */ static void nf_conntrack_sane_fini(void) -- cgit v1.2.3 From 7ae7730fd6d98be1afe8ad9ea77813de607ec970 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:37:38 -0700 Subject: [NETFILTER]: nf_conntrack: early_drop improvement When the maximum number of conntrack entries is reached and a new one needs to be allocated, conntrack tries to drop an unassured connection from the same hash bucket the new conntrack would hash to. Since with a properly sized hash the average number of entries per bucket is 1, the chances of actually finding one are not very good. This patch makes it walk the hash until a minimum number of 8 entries are checked. Based on patch by Vasily Averin . Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_core.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index d1fc019760a1..472396dac05c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -377,21 +377,30 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, } EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); +#define NF_CT_EVICTION_RANGE 8 + /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ -static int early_drop(struct hlist_head *chain) +static int early_drop(unsigned int hash) { /* Use oldest entry, which is roughly LRU */ struct nf_conntrack_tuple_hash *h; struct nf_conn *ct = NULL, *tmp; struct hlist_node *n; + unsigned int i, cnt = 0; int dropped = 0; read_lock_bh(&nf_conntrack_lock); - hlist_for_each_entry(h, n, chain, hnode) { - tmp = nf_ct_tuplehash_to_ctrack(h); - if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) - ct = tmp; + for (i = 0; i < nf_conntrack_htable_size; i++) { + hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode) { + tmp = nf_ct_tuplehash_to_ctrack(h); + if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) + ct = tmp; + cnt++; + } + if (ct || cnt >= NF_CT_EVICTION_RANGE) + break; + hash = (hash + 1) % nf_conntrack_htable_size; } if (ct) atomic_inc(&ct->ct_general.use); @@ -425,8 +434,7 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, if (nf_conntrack_max && atomic_read(&nf_conntrack_count) > nf_conntrack_max) { unsigned int hash = hash_conntrack(orig); - /* Try dropping from this hash chain. */ - if (!early_drop(&nf_conntrack_hash[hash])) { + if (!early_drop(hash)) { atomic_dec(&nf_conntrack_count); if (net_ratelimit()) printk(KERN_WARNING -- cgit v1.2.3 From 3569b621ceba0a9cfb80e24c0bd19fd632ccee25 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:38:07 -0700 Subject: [NETFILTER]: ipt_SAME: add to feature-removal-schedule Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- Documentation/feature-removal-schedule.txt | 8 ++++++++ net/ipv4/netfilter/Kconfig | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 484250dcdbe0..062fc2e79c86 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -311,3 +311,11 @@ Who: Tejun Heo --------------------------- +What: iptables SAME target +When: 1.1. 2008 +Files: net/ipv4/netfilter/ipt_SAME.c, include/linux/netfilter_ipv4/ipt_SAME.h +Why: Obsolete for multiple years now, NAT core provides the same behaviour. + Unfixable broken wrt. 32/64 bit cleanness. +Who: Patrick McHardy + +--------------------------- diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 46509fae9fd8..fa97947c6ae1 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -230,7 +230,7 @@ config IP_NF_TARGET_NETMAP To compile it as a module, choose M here. If unsure, say N. config IP_NF_TARGET_SAME - tristate "SAME target support" + tristate "SAME target support (OBSOLETE)" depends on NF_NAT help This option adds a `SAME' target, which works like the standard SNAT -- cgit v1.2.3 From d3c3f4243e135b3d8c41d98be0cb2f54a4141abf Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:38:30 -0700 Subject: [NETFILTER]: ipt_CLUSTERIP: add compat code Adjust structure size and don't expect pointers passed in from userspace to be valid. Also replace an enum in an ABI structure by a fixed size type. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ipt_CLUSTERIP.h | 4 +-- net/ipv4/netfilter/ipt_CLUSTERIP.c | 39 ++++++++++++++-------------- 2 files changed, 22 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h index d9bceedfb3dc..daf50be22c9d 100644 --- a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h +++ b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h @@ -18,13 +18,13 @@ struct clusterip_config; struct ipt_clusterip_tgt_info { u_int32_t flags; - + /* only relevant for new ones */ u_int8_t clustermac[6]; u_int16_t num_total_nodes; u_int16_t num_local_nodes; u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; - enum clusterip_hashmode hash_mode; + u_int32_t hash_mode; u_int32_t hash_initval; struct clusterip_config *config; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 1cef3b09c326..1981acedbfe8 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -397,23 +397,7 @@ checkentry(const char *tablename, /* FIXME: further sanity checks */ config = clusterip_config_find_get(e->ip.dst.s_addr, 1); - if (config) { - if (cipinfo->config != NULL) { - /* Case A: This is an entry that gets reloaded, since - * it still has a cipinfo->config pointer. Simply - * increase the entry refcount and return */ - if (cipinfo->config != config) { - printk(KERN_ERR "CLUSTERIP: Reloaded entry " - "has invalid config pointer!\n"); - return false; - } - } else { - /* Case B: This is a new rule referring to an existing - * clusterip config. */ - cipinfo->config = config; - } - } else { - /* Case C: This is a completely new clusterip config */ + if (!config) { if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); return false; @@ -440,8 +424,8 @@ checkentry(const char *tablename, } dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); } - cipinfo->config = config; } + cipinfo->config = config; if (nf_ct_l3proto_try_module_get(target->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " @@ -466,13 +450,30 @@ static void destroy(const struct xt_target *target, void *targinfo) nf_ct_l3proto_module_put(target->family); } +#ifdef CONFIG_COMPAT +struct compat_ipt_clusterip_tgt_info +{ + u_int32_t flags; + u_int8_t clustermac[6]; + u_int16_t num_total_nodes; + u_int16_t num_local_nodes; + u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; + u_int32_t hash_mode; + u_int32_t hash_initval; + compat_uptr_t config; +}; +#endif /* CONFIG_COMPAT */ + static struct xt_target clusterip_tgt __read_mostly = { .name = "CLUSTERIP", .family = AF_INET, .target = target, - .targetsize = sizeof(struct ipt_clusterip_tgt_info), .checkentry = checkentry, .destroy = destroy, + .targetsize = sizeof(struct ipt_clusterip_tgt_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info), +#endif /* CONFIG_COMPAT */ .me = THIS_MODULE }; -- cgit v1.2.3 From 91e8db80065d655ce1b6d74cadc921671e8d5285 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:38:54 -0700 Subject: [NETFILTER]: nf_conntrack_h323: turn some printks into DEBUGPs Don't spam the ringbuffer with decoding errors. The only printks remaining are for dropped packets when we're certain they are H.323. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_h323_main.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 8c57b8119bfb..aa5ba99b7a08 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -163,9 +163,7 @@ static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff, } /* Fragmented TPKT */ - if (net_ratelimit()) - printk("nf_ct_h323: " - "fragmented TPKT\n"); + DEBUGP("nf_ct_h323: fragmented TPKT\n"); goto clear_out; } @@ -596,10 +594,9 @@ static int h245_help(struct sk_buff **pskb, unsigned int protoff, ret = DecodeMultimediaSystemControlMessage(data, datalen, &mscm); if (ret < 0) { - if (net_ratelimit()) - printk("nf_ct_h245: decoding error: %s\n", - ret == H323_ERROR_BOUND ? - "out of bound" : "out of range"); + DEBUGP("nf_ct_h245: decoding error: %s\n", + ret == H323_ERROR_BOUND ? + "out of bound" : "out of range"); /* We don't drop when decoding error */ break; } @@ -1137,10 +1134,9 @@ static int q931_help(struct sk_buff **pskb, unsigned int protoff, /* Decode Q.931 signal */ ret = DecodeQ931(data, datalen, &q931); if (ret < 0) { - if (net_ratelimit()) - printk("nf_ct_q931: decoding error: %s\n", - ret == H323_ERROR_BOUND ? - "out of bound" : "out of range"); + DEBUGP("nf_ct_q931: decoding error: %s\n", + ret == H323_ERROR_BOUND ? + "out of bound" : "out of range"); /* We don't drop when decoding error */ break; } @@ -1711,10 +1707,9 @@ static int ras_help(struct sk_buff **pskb, unsigned int protoff, /* Decode RAS message */ ret = DecodeRasMessage(data, datalen, &ras); if (ret < 0) { - if (net_ratelimit()) - printk("nf_ct_ras: decoding error: %s\n", - ret == H323_ERROR_BOUND ? - "out of bound" : "out of range"); + DEBUGP("nf_ct_ras: decoding error: %s\n", + ret == H323_ERROR_BOUND ? + "out of bound" : "out of range"); goto accept; } -- cgit v1.2.3 From 342b7e3c8a3c84252799c4ac4d9a604b8903d2b4 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:39:16 -0700 Subject: [NETFILTER]: xt_helper: use RCU The ->helper pointer is protected by RCU, no need to take nf_conntrack_lock. Also remove excessive debugging. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_helper.c | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index d03acb032cc8..0a1f4c6bcdef 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -22,11 +22,6 @@ MODULE_DESCRIPTION("iptables helper match module"); MODULE_ALIAS("ipt_helper"); MODULE_ALIAS("ip6t_helper"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif static bool match(const struct sk_buff *skb, @@ -41,38 +36,28 @@ match(const struct sk_buff *skb, const struct xt_helper_info *info = matchinfo; const struct nf_conn *ct; const struct nf_conn_help *master_help; + const struct nf_conntrack_helper *helper; enum ip_conntrack_info ctinfo; bool ret = info->invert; ct = nf_ct_get(skb, &ctinfo); - if (!ct) { - DEBUGP("xt_helper: Eek! invalid conntrack?\n"); + if (!ct || !ct->master) return ret; - } - - if (!ct->master) { - DEBUGP("xt_helper: conntrack %p has no master\n", ct); - return ret; - } - read_lock_bh(&nf_conntrack_lock); master_help = nfct_help(ct->master); - if (!master_help || !master_help->helper) { - DEBUGP("xt_helper: master ct %p has no helper\n", - exp->expectant); - goto out_unlock; - } + if (!master_help) + return ret; - DEBUGP("master's name = %s , info->name = %s\n", - ct->master->helper->name, info->name); + /* rcu_read_lock()ed by nf_hook_slow */ + helper = rcu_dereference(master_help->helper); + if (!helper) + return ret; if (info->name[0] == '\0') ret = !ret; else ret ^= !strncmp(master_help->helper->name, info->name, strlen(master_help->helper->name)); -out_unlock: - read_unlock_bh(&nf_conntrack_lock); return ret; } -- cgit v1.2.3 From 0d53778e81ac7af266dac8a20cc328328c327112 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:39:38 -0700 Subject: [NETFILTER]: Convert DEBUGP to pr_debug Convert DEBUGP to pr_debug and fix lots of non-compiling debug statements. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_pptp.h | 2 + include/net/netfilter/nf_conntrack_tuple.h | 10 +- net/ipv4/netfilter/ipt_CLUSTERIP.c | 27 ++-- net/ipv4/netfilter/ipt_LOG.c | 12 +- net/ipv4/netfilter/ipt_MASQUERADE.c | 10 +- net/ipv4/netfilter/ipt_NETMAP.c | 13 +- net/ipv4/netfilter/ipt_REDIRECT.c | 10 +- net/ipv4/netfilter/ipt_REJECT.c | 10 +- net/ipv4/netfilter/ipt_SAME.c | 53 +++---- net/ipv4/netfilter/ipt_ULOG.c | 35 ++--- net/ipv4/netfilter/ipt_iprange.c | 30 ++-- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 22 +-- .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 6 - net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 22 +-- net/ipv4/netfilter/nf_nat_core.c | 15 +- net/ipv4/netfilter/nf_nat_ftp.c | 14 +- net/ipv4/netfilter/nf_nat_h323.c | 95 ++++++------ net/ipv4/netfilter/nf_nat_helper.c | 40 +++-- net/ipv4/netfilter/nf_nat_irc.c | 13 +- net/ipv4/netfilter/nf_nat_pptp.c | 37 ++--- net/ipv4/netfilter/nf_nat_proto_gre.c | 17 +-- net/ipv4/netfilter/nf_nat_rule.c | 14 +- net/ipv4/netfilter/nf_nat_sip.c | 8 - net/ipv4/netfilter/nf_nat_standalone.c | 16 +- net/ipv6/netfilter/ip6t_LOG.c | 12 +- net/ipv6/netfilter/ip6t_REJECT.c | 23 ++- net/ipv6/netfilter/ip6t_ah.c | 47 +++--- net/ipv6/netfilter/ip6t_frag.c | 76 +++++----- net/ipv6/netfilter/ip6t_hbh.c | 45 +++--- net/ipv6/netfilter/ip6t_rt.c | 82 +++++------ net/ipv6/netfilter/ip6table_mangle.c | 6 - net/ipv6/netfilter/ip6table_raw.c | 6 - net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 10 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 24 ++- net/ipv6/netfilter/nf_conntrack_reasm.c | 52 +++---- net/netfilter/nf_conntrack_core.c | 41 +++--- net/netfilter/nf_conntrack_ftp.c | 94 ++++++------ net/netfilter/nf_conntrack_h323_main.c | 161 ++++++++++----------- net/netfilter/nf_conntrack_irc.c | 24 ++- net/netfilter/nf_conntrack_l3proto_generic.c | 6 - net/netfilter/nf_conntrack_pptp.c | 71 +++++---- net/netfilter/nf_conntrack_proto_gre.c | 28 ++-- net/netfilter/nf_conntrack_proto_sctp.c | 92 ++++-------- net/netfilter/nf_conntrack_proto_tcp.c | 129 ++++++++--------- net/netfilter/nf_conntrack_sane.c | 27 ++-- net/netfilter/nf_conntrack_sip.c | 22 +-- net/netfilter/nf_conntrack_standalone.c | 6 - net/netfilter/nf_conntrack_tftp.c | 17 +-- 48 files changed, 662 insertions(+), 970 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter/nf_conntrack_pptp.h b/include/linux/netfilter/nf_conntrack_pptp.h index 9d8144a488cd..c93061f33144 100644 --- a/include/linux/netfilter/nf_conntrack_pptp.h +++ b/include/linux/netfilter/nf_conntrack_pptp.h @@ -4,6 +4,8 @@ #include +extern const char *pptp_msg_name[]; + /* state of the control session */ enum pptp_ctrlsess_state { PPTP_SESSION_NONE, /* no session present */ diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index 99934ab538e6..040dae5f0c9e 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -120,11 +120,11 @@ struct nf_conntrack_tuple_mask #ifdef __KERNEL__ -#define NF_CT_DUMP_TUPLE(tp) \ -DEBUGP("tuple %p: %u %u " NIP6_FMT " %hu -> " NIP6_FMT " %hu\n", \ - (tp), (tp)->src.l3num, (tp)->dst.protonum, \ - NIP6(*(struct in6_addr *)(tp)->src.u3.all), ntohs((tp)->src.u.all), \ - NIP6(*(struct in6_addr *)(tp)->dst.u3.all), ntohs((tp)->dst.u.all)) +#define NF_CT_DUMP_TUPLE(tp) \ +pr_debug("tuple %p: %u %u " NIP6_FMT " %hu -> " NIP6_FMT " %hu\n", \ + (tp), (tp)->src.l3num, (tp)->dst.protonum, \ + NIP6(*(struct in6_addr *)(tp)->src.u3.all), ntohs((tp)->src.u.all), \ + NIP6(*(struct in6_addr *)(tp)->dst.u3.all), ntohs((tp)->dst.u.all)) /* If we're the first tuple, it's the original dir. */ #define NF_CT_DIRECTION(h) \ diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 1981acedbfe8..8bacda3f6f6c 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -30,14 +30,6 @@ #define CLUSTERIP_VERSION "0.8" -#define DEBUG_CLUSTERIP - -#ifdef DEBUG_CLUSTERIP -#define DEBUGP printk -#else -#define DEBUGP -#endif - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("iptables target for CLUSTERIP"); @@ -351,15 +343,15 @@ target(struct sk_buff **pskb, break; } -#ifdef DEBUG_CLUSTERP +#ifdef DEBUG DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); #endif - DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark); + pr_debug("hash=%u ct_hash=%u ", hash, ct->mark); if (!clusterip_responsible(cipinfo->config, hash)) { - DEBUGP("not responsible\n"); + pr_debug("not responsible\n"); return NF_DROP; } - DEBUGP("responsible\n"); + pr_debug("responsible\n"); /* despite being received via linklayer multicast, this is * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */ @@ -490,7 +482,7 @@ struct arp_payload { __be32 dst_ip; } __attribute__ ((packed)); -#ifdef CLUSTERIP_DEBUG +#ifdef DEBUG static void arp_print(struct arp_payload *payload) { #define HBUFFERLEN 30 @@ -546,8 +538,9 @@ arp_mangle(unsigned int hook, * this wouldn't work, since we didn't subscribe the mcast group on * other interfaces */ if (c->dev != out) { - DEBUGP("CLUSTERIP: not mangling arp reply on different " - "interface: cip'%s'-skb'%s'\n", c->dev->name, out->name); + pr_debug("CLUSTERIP: not mangling arp reply on different " + "interface: cip'%s'-skb'%s'\n", + c->dev->name, out->name); clusterip_config_put(c); return NF_ACCEPT; } @@ -555,8 +548,8 @@ arp_mangle(unsigned int hook, /* mangle reply hardware address */ memcpy(payload->src_hw, c->clustermac, arp->ar_hln); -#ifdef CLUSTERIP_DEBUG - DEBUGP(KERN_DEBUG "CLUSTERIP mangled arp reply: "); +#ifdef DEBUG + pr_debug(KERN_DEBUG "CLUSTERIP mangled arp reply: "); arp_print(payload); #endif diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 9bfce614ec28..5937ad150b9f 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -27,12 +27,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); MODULE_DESCRIPTION("iptables syslog logging module"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* Use lock to serialize, so printks don't overlap */ static DEFINE_SPINLOCK(log_lock); @@ -452,12 +446,12 @@ static bool ipt_log_checkentry(const char *tablename, const struct ipt_log_info *loginfo = targinfo; if (loginfo->level >= 8) { - DEBUGP("LOG: level %u >= 8\n", loginfo->level); + pr_debug("LOG: level %u >= 8\n", loginfo->level); return false; } if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') { - DEBUGP("LOG: prefix term %i\n", - loginfo->prefix[sizeof(loginfo->prefix)-1]); + pr_debug("LOG: prefix term %i\n", + loginfo->prefix[sizeof(loginfo->prefix)-1]); return false; } return true; diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index bc033e0f424d..7c4e4be7c8b3 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -27,12 +27,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); MODULE_DESCRIPTION("iptables MASQUERADE target module"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* Lock protects masq region inside conntrack */ static DEFINE_RWLOCK(masq_lock); @@ -47,11 +41,11 @@ masquerade_check(const char *tablename, const struct nf_nat_multi_range_compat *mr = targinfo; if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { - DEBUGP("masquerade_check: bad MAP_IPS.\n"); + pr_debug("masquerade_check: bad MAP_IPS.\n"); return false; } if (mr->rangesize != 1) { - DEBUGP("masquerade_check: bad rangesize %u.\n", mr->rangesize); + pr_debug("masquerade_check: bad rangesize %u\n", mr->rangesize); return false; } return true; diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 0a7ce15bbdd0..41a011d5a065 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -18,17 +18,10 @@ #include #include -#define MODULENAME "NETMAP" MODULE_LICENSE("GPL"); MODULE_AUTHOR("Svenning Soerensen "); MODULE_DESCRIPTION("iptables 1:1 NAT mapping of IP networks target"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - static bool check(const char *tablename, const void *e, @@ -39,11 +32,11 @@ check(const char *tablename, const struct nf_nat_multi_range_compat *mr = targinfo; if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) { - DEBUGP(MODULENAME":check: bad MAP_IPS.\n"); + pr_debug("NETMAP:check: bad MAP_IPS.\n"); return false; } if (mr->rangesize != 1) { - DEBUGP(MODULENAME":check: bad rangesize %u.\n", mr->rangesize); + pr_debug("NETMAP:check: bad rangesize %u.\n", mr->rangesize); return false; } return true; @@ -86,7 +79,7 @@ target(struct sk_buff **pskb, } static struct xt_target target_module __read_mostly = { - .name = MODULENAME, + .name = "NETMAP", .family = AF_INET, .target = target, .targetsize = sizeof(struct nf_nat_multi_range_compat), diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 61e1e4772e37..6ac7a2373316 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -25,12 +25,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); MODULE_DESCRIPTION("iptables REDIRECT target module"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* FIXME: Take multiple ranges --RR */ static bool redirect_check(const char *tablename, @@ -42,11 +36,11 @@ redirect_check(const char *tablename, const struct nf_nat_multi_range_compat *mr = targinfo; if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { - DEBUGP("redirect_check: bad MAP_IPS.\n"); + pr_debug("redirect_check: bad MAP_IPS.\n"); return false; } if (mr->rangesize != 1) { - DEBUGP("redirect_check: bad rangesize %u.\n", mr->rangesize); + pr_debug("redirect_check: bad rangesize %u.\n", mr->rangesize); return false; } return true; diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index dd5432c3f365..cb038c8fbc9d 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -31,12 +31,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); MODULE_DESCRIPTION("iptables REJECT target module"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* Send RST reply */ static void send_reset(struct sk_buff *oldskb, int hook) { @@ -227,13 +221,13 @@ static bool check(const char *tablename, const struct ipt_entry *e = e_void; if (rejinfo->with == IPT_ICMP_ECHOREPLY) { - printk("REJECT: ECHOREPLY no longer supported.\n"); + printk("ipt_REJECT: ECHOREPLY no longer supported.\n"); return false; } else if (rejinfo->with == IPT_TCP_RESET) { /* Must specify that it's a TCP packet */ if (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO)) { - DEBUGP("REJECT: TCP_RESET invalid for non-tcp\n"); + printk("ipt_REJECT: TCP_RESET invalid for non-tcp\n"); return false; } } diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c index 3a0d7dac0af8..97641f1a97f6 100644 --- a/net/ipv4/netfilter/ipt_SAME.c +++ b/net/ipv4/netfilter/ipt_SAME.c @@ -27,12 +27,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Martin Josefsson "); MODULE_DESCRIPTION("iptables special SNAT module for consistent sourceip"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - static bool same_check(const char *tablename, const void *e, @@ -46,54 +40,52 @@ same_check(const char *tablename, mr->ipnum = 0; if (mr->rangesize < 1) { - DEBUGP("same_check: need at least one dest range.\n"); + pr_debug("same_check: need at least one dest range.\n"); return false; } if (mr->rangesize > IPT_SAME_MAX_RANGE) { - DEBUGP("same_check: too many ranges specified, maximum " - "is %u ranges\n", - IPT_SAME_MAX_RANGE); + pr_debug("same_check: too many ranges specified, maximum " + "is %u ranges\n", IPT_SAME_MAX_RANGE); return false; } for (count = 0; count < mr->rangesize; count++) { if (ntohl(mr->range[count].min_ip) > ntohl(mr->range[count].max_ip)) { - DEBUGP("same_check: min_ip is larger than max_ip in " - "range `%u.%u.%u.%u-%u.%u.%u.%u'.\n", - NIPQUAD(mr->range[count].min_ip), - NIPQUAD(mr->range[count].max_ip)); + pr_debug("same_check: min_ip is larger than max_ip in " + "range `%u.%u.%u.%u-%u.%u.%u.%u'.\n", + NIPQUAD(mr->range[count].min_ip), + NIPQUAD(mr->range[count].max_ip)); return false; } if (!(mr->range[count].flags & IP_NAT_RANGE_MAP_IPS)) { - DEBUGP("same_check: bad MAP_IPS.\n"); + pr_debug("same_check: bad MAP_IPS.\n"); return false; } rangeip = (ntohl(mr->range[count].max_ip) - ntohl(mr->range[count].min_ip) + 1); mr->ipnum += rangeip; - DEBUGP("same_check: range %u, ipnum = %u\n", count, rangeip); + pr_debug("same_check: range %u, ipnum = %u\n", count, rangeip); } - DEBUGP("same_check: total ipaddresses = %u\n", mr->ipnum); + pr_debug("same_check: total ipaddresses = %u\n", mr->ipnum); mr->iparray = kmalloc((sizeof(u_int32_t) * mr->ipnum), GFP_KERNEL); if (!mr->iparray) { - DEBUGP("same_check: Couldn't allocate %u bytes " - "for %u ipaddresses!\n", - (sizeof(u_int32_t) * mr->ipnum), mr->ipnum); + pr_debug("same_check: Couldn't allocate %Zu bytes " + "for %u ipaddresses!\n", + (sizeof(u_int32_t) * mr->ipnum), mr->ipnum); return false; } - DEBUGP("same_check: Allocated %u bytes for %u ipaddresses.\n", - (sizeof(u_int32_t) * mr->ipnum), mr->ipnum); + pr_debug("same_check: Allocated %Zu bytes for %u ipaddresses.\n", + (sizeof(u_int32_t) * mr->ipnum), mr->ipnum); for (count = 0; count < mr->rangesize; count++) { for (countess = ntohl(mr->range[count].min_ip); countess <= ntohl(mr->range[count].max_ip); countess++) { mr->iparray[index] = countess; - DEBUGP("same_check: Added ipaddress `%u.%u.%u.%u' " - "in index %u.\n", - HIPQUAD(countess), index); + pr_debug("same_check: Added ipaddress `%u.%u.%u.%u' " + "in index %u.\n", HIPQUAD(countess), index); index++; } } @@ -107,8 +99,8 @@ same_destroy(const struct xt_target *target, void *targinfo) kfree(mr->iparray); - DEBUGP("same_destroy: Deallocated %u bytes for %u ipaddresses.\n", - (sizeof(u_int32_t) * mr->ipnum), mr->ipnum); + pr_debug("same_destroy: Deallocated %Zu bytes for %u ipaddresses.\n", + (sizeof(u_int32_t) * mr->ipnum), mr->ipnum); } static unsigned int @@ -146,10 +138,9 @@ same_target(struct sk_buff **pskb, new_ip = htonl(same->iparray[aindex]); - DEBUGP("ipt_SAME: src=%u.%u.%u.%u dst=%u.%u.%u.%u, " - "new src=%u.%u.%u.%u\n", - NIPQUAD(t->src.ip), NIPQUAD(t->dst.ip), - NIPQUAD(new_ip)); + pr_debug("ipt_SAME: src=%u.%u.%u.%u dst=%u.%u.%u.%u, " + "new src=%u.%u.%u.%u\n", + NIPQUAD(t->src.u3.ip), NIPQUAD(t->dst.u3.ip), NIPQUAD(new_ip)); /* Transfer from original range. */ newrange = ((struct nf_nat_range) diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 226750d1f89b..6ca43e4ca7e3 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -55,13 +55,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG); #define ULOG_NL_EVENT 111 /* Harald's favorite number */ #define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */ -#if 0 -#define DEBUGP(format, args...) printk("%s:%s:" format, \ - __FILE__, __FUNCTION__ , ## args) -#else -#define DEBUGP(format, args...) -#endif - #define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0) static unsigned int nlbufsiz = NLMSG_GOODSIZE; @@ -96,12 +89,12 @@ static void ulog_send(unsigned int nlgroupnum) ulog_buff_t *ub = &ulog_buffers[nlgroupnum]; if (timer_pending(&ub->timer)) { - DEBUGP("ipt_ULOG: ulog_send: timer was pending, deleting\n"); + pr_debug("ipt_ULOG: ulog_send: timer was pending, deleting\n"); del_timer(&ub->timer); } if (!ub->skb) { - DEBUGP("ipt_ULOG: ulog_send: nothing to send\n"); + pr_debug("ipt_ULOG: ulog_send: nothing to send\n"); return; } @@ -110,8 +103,8 @@ static void ulog_send(unsigned int nlgroupnum) ub->lastnlh->nlmsg_type = NLMSG_DONE; NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1; - DEBUGP("ipt_ULOG: throwing %d packets to netlink group %u\n", - ub->qlen, nlgroupnum + 1); + pr_debug("ipt_ULOG: throwing %d packets to netlink group %u\n", + ub->qlen, nlgroupnum + 1); netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC); ub->qlen = 0; @@ -123,7 +116,7 @@ static void ulog_send(unsigned int nlgroupnum) /* timer function to flush queue in flushtimeout time */ static void ulog_timer(unsigned long data) { - DEBUGP("ipt_ULOG: timer function called, calling ulog_send\n"); + pr_debug("ipt_ULOG: timer function called, calling ulog_send\n"); /* lock to protect against somebody modifying our structure * from ipt_ulog_target at the same time */ @@ -204,8 +197,8 @@ static void ipt_ulog_packet(unsigned int hooknum, goto alloc_failure; } - DEBUGP("ipt_ULOG: qlen %d, qthreshold %d\n", ub->qlen, - loginfo->qthreshold); + pr_debug("ipt_ULOG: qlen %d, qthreshold %Zu\n", ub->qlen, + loginfo->qthreshold); /* NLMSG_PUT contains a hidden goto nlmsg_failure !!! */ nlh = NLMSG_PUT(ub->skb, 0, ub->qlen, ULOG_NL_EVENT, @@ -334,13 +327,13 @@ static bool ipt_ulog_checkentry(const char *tablename, const struct ipt_ulog_info *loginfo = targinfo; if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') { - DEBUGP("ipt_ULOG: prefix term %i\n", - loginfo->prefix[sizeof(loginfo->prefix) - 1]); + pr_debug("ipt_ULOG: prefix term %i\n", + loginfo->prefix[sizeof(loginfo->prefix) - 1]); return false; } if (loginfo->qthreshold > ULOG_MAX_QLEN) { - DEBUGP("ipt_ULOG: queue threshold %i > MAX_QLEN\n", - loginfo->qthreshold); + pr_debug("ipt_ULOG: queue threshold %Zu > MAX_QLEN\n", + loginfo->qthreshold); return false; } return true; @@ -405,7 +398,7 @@ static int __init ipt_ulog_init(void) { int ret, i; - DEBUGP("ipt_ULOG: init module\n"); + pr_debug("ipt_ULOG: init module\n"); if (nlbufsiz > 128*1024) { printk("Netlink buffer has to be <= 128kB\n"); @@ -437,7 +430,7 @@ static void __exit ipt_ulog_fini(void) ulog_buff_t *ub; int i; - DEBUGP("ipt_ULOG: cleanup_module\n"); + pr_debug("ipt_ULOG: cleanup_module\n"); if (nflog) nf_log_unregister(&ipt_ulog_logger); @@ -448,7 +441,7 @@ static void __exit ipt_ulog_fini(void) for (i = 0; i < ULOG_MAXNLGROUPS; i++) { ub = &ulog_buffers[i]; if (timer_pending(&ub->timer)) { - DEBUGP("timer was pending, deleting\n"); + pr_debug("timer was pending, deleting\n"); del_timer(&ub->timer); } diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c index 6a3a033a6808..0106dc955a69 100644 --- a/net/ipv4/netfilter/ipt_iprange.c +++ b/net/ipv4/netfilter/ipt_iprange.c @@ -17,12 +17,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); MODULE_DESCRIPTION("iptables arbitrary IP range match module"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - static bool match(const struct sk_buff *skb, const struct net_device *in, @@ -38,12 +32,12 @@ match(const struct sk_buff *skb, if ((ntohl(iph->saddr) < ntohl(info->src.min_ip) || ntohl(iph->saddr) > ntohl(info->src.max_ip)) ^ !!(info->flags & IPRANGE_SRC_INV)) { - DEBUGP("src IP %u.%u.%u.%u NOT in range %s" - "%u.%u.%u.%u-%u.%u.%u.%u\n", - NIPQUAD(iph->saddr), - info->flags & IPRANGE_SRC_INV ? "(INV) " : "", - NIPQUAD(info->src.min_ip), - NIPQUAD(info->src.max_ip)); + pr_debug("src IP %u.%u.%u.%u NOT in range %s" + "%u.%u.%u.%u-%u.%u.%u.%u\n", + NIPQUAD(iph->saddr), + info->flags & IPRANGE_SRC_INV ? "(INV) " : "", + NIPQUAD(info->src.min_ip), + NIPQUAD(info->src.max_ip)); return false; } } @@ -51,12 +45,12 @@ match(const struct sk_buff *skb, if ((ntohl(iph->daddr) < ntohl(info->dst.min_ip) || ntohl(iph->daddr) > ntohl(info->dst.max_ip)) ^ !!(info->flags & IPRANGE_DST_INV)) { - DEBUGP("dst IP %u.%u.%u.%u NOT in range %s" - "%u.%u.%u.%u-%u.%u.%u.%u\n", - NIPQUAD(iph->daddr), - info->flags & IPRANGE_DST_INV ? "(INV) " : "", - NIPQUAD(info->dst.min_ip), - NIPQUAD(info->dst.max_ip)); + pr_debug("dst IP %u.%u.%u.%u NOT in range %s" + "%u.%u.%u.%u-%u.%u.%u.%u\n", + NIPQUAD(iph->daddr), + info->flags & IPRANGE_DST_INV ? "(INV) " : "", + NIPQUAD(info->dst.min_ip), + NIPQUAD(info->dst.max_ip)); return false; } } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index a103f597d446..3c5629938487 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -24,12 +24,6 @@ #include #include -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) { @@ -324,13 +318,13 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) /* We only do TCP at the moment: is there a better way? */ if (strcmp(sk->sk_prot->name, "TCP")) { - DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n"); + pr_debug("SO_ORIGINAL_DST: Not a TCP socket\n"); return -ENOPROTOOPT; } if ((unsigned int) *len < sizeof(struct sockaddr_in)) { - DEBUGP("SO_ORIGINAL_DST: len %u not %u\n", - *len, sizeof(struct sockaddr_in)); + pr_debug("SO_ORIGINAL_DST: len %d not %Zu\n", + *len, sizeof(struct sockaddr_in)); return -EINVAL; } @@ -346,17 +340,17 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) .tuple.dst.u3.ip; memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); - DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", - NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); + pr_debug("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", + NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); nf_ct_put(ct); if (copy_to_user(user, &sin, sizeof(sin)) != 0) return -EFAULT; else return 0; } - DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", - NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port), - NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port)); + pr_debug("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", + NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port), + NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port)); return -ENOENT; } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index ab8e4c607b7a..434e08410879 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -18,12 +18,6 @@ #include #include -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - #ifdef CONFIG_NF_CT_ACCT static unsigned int seq_print_counters(struct seq_file *s, diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 91fb277045ef..0fe8fb0466ef 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -21,12 +21,6 @@ static unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ; -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - static int icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) @@ -125,8 +119,8 @@ static int icmp_new(struct nf_conn *conntrack, if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { /* Can't create a new ICMP `conn' with this. */ - DEBUGP("icmp: can't create new conn with type %u\n", - conntrack->tuplehash[0].tuple.dst.u.icmp.type); + pr_debug("icmp: can't create new conn with type %u\n", + conntrack->tuplehash[0].tuple.dst.u.icmp.type); NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple); return 0; } @@ -159,8 +153,8 @@ icmp_error_message(struct sk_buff *skb, /* Ignore ICMP's containing fragments (shouldn't happen) */ if (inside->ip.frag_off & htons(IP_OFFSET)) { - DEBUGP("icmp_error_message: fragment of proto %u\n", - inside->ip.protocol); + pr_debug("icmp_error_message: fragment of proto %u\n", + inside->ip.protocol); return -NF_ACCEPT; } @@ -172,8 +166,8 @@ icmp_error_message(struct sk_buff *skb, if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET, inside->ip.protocol, &origtuple, &nf_conntrack_l3proto_ipv4, innerproto)) { - DEBUGP("icmp_error_message: ! get_tuple p=%u", - inside->ip.protocol); + pr_debug("icmp_error_message: ! get_tuple p=%u", + inside->ip.protocol); return -NF_ACCEPT; } @@ -181,7 +175,7 @@ icmp_error_message(struct sk_buff *skb, been preserved inside the ICMP. */ if (!nf_ct_invert_tuple(&innertuple, &origtuple, &nf_conntrack_l3proto_ipv4, innerproto)) { - DEBUGP("icmp_error_message: no match\n"); + pr_debug("icmp_error_message: no match\n"); return -NF_ACCEPT; } @@ -196,7 +190,7 @@ icmp_error_message(struct sk_buff *skb, h = nf_conntrack_find_get(&origtuple); if (!h) { - DEBUGP("icmp_error_message: no match\n"); + pr_debug("icmp_error_message: no match\n"); return -NF_ACCEPT; } diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index f242ac61b3eb..e848d8d6292f 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -31,12 +31,6 @@ #include #include -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - static DEFINE_RWLOCK(nf_nat_lock); static struct nf_conntrack_l3proto *l3proto = NULL; @@ -242,7 +236,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, manips not an issue. */ if (maniptype == IP_NAT_MANIP_SRC) { if (find_appropriate_src(orig_tuple, tuple, range)) { - DEBUGP("get_unique_tuple: Found current src map\n"); + pr_debug("get_unique_tuple: Found current src map\n"); if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) if (!nf_nat_used_tuple(tuple, ct)) return; @@ -293,7 +287,7 @@ nf_nat_setup_info(struct nf_conn *ct, if (!nat) { nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); if (nat == NULL) { - DEBUGP("failed to add NAT extension\n"); + pr_debug("failed to add NAT extension\n"); return NF_ACCEPT; } } @@ -462,8 +456,9 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, return 0; } - DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n", - *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); + pr_debug("icmp_reply_translation: translating error %p manip %u " + "dir %s\n", *pskb, manip, + dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); /* rcu_read_lock()ed by nf_hook_slow */ l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c index cae4b460aee1..3663bd879c39 100644 --- a/net/ipv4/netfilter/nf_nat_ftp.c +++ b/net/ipv4/netfilter/nf_nat_ftp.c @@ -25,12 +25,6 @@ MODULE_AUTHOR("Rusty Russell "); MODULE_DESCRIPTION("ftp NAT helper"); MODULE_ALIAS("ip_nat_ftp"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* FIXME: Time out? --RR */ static int @@ -47,7 +41,7 @@ mangle_rfc959_packet(struct sk_buff **pskb, sprintf(buffer, "%u,%u,%u,%u,%u,%u", NIPQUAD(newip), port>>8, port&0xFF); - DEBUGP("calling nf_nat_mangle_tcp_packet\n"); + pr_debug("calling nf_nat_mangle_tcp_packet\n"); return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); @@ -67,7 +61,7 @@ mangle_eprt_packet(struct sk_buff **pskb, sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port); - DEBUGP("calling nf_nat_mangle_tcp_packet\n"); + pr_debug("calling nf_nat_mangle_tcp_packet\n"); return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); @@ -87,7 +81,7 @@ mangle_epsv_packet(struct sk_buff **pskb, sprintf(buffer, "|||%u|", port); - DEBUGP("calling nf_nat_mangle_tcp_packet\n"); + pr_debug("calling nf_nat_mangle_tcp_packet\n"); return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); @@ -117,7 +111,7 @@ static unsigned int nf_nat_ftp(struct sk_buff **pskb, int dir = CTINFO2DIR(ctinfo); struct nf_conn *ct = exp->master; - DEBUGP("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen); + pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen); /* Connection will come from wherever this packet goes, hence !dir */ newip = ct->tuplehash[!dir].tuple.dst.u3.ip; diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 3d760dd657c7..c1b059a73708 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -21,12 +21,6 @@ #include #include -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /****************************************************************************/ static int set_addr(struct sk_buff **pskb, unsigned char **data, int dataoff, @@ -126,12 +120,11 @@ static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct, (ntohl(addr.ip) & 0xff000000) == 0x7f000000) i = 0; - DEBUGP - ("nf_nat_ras: set signal address " - "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(ip), port, - NIPQUAD(ct->tuplehash[!dir].tuple.dst. - ip), info->sig_port[!dir]); + pr_debug("nf_nat_ras: set signal address " + "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(addr.ip), port, + NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip), + info->sig_port[!dir]); return set_h225_addr(pskb, data, 0, &taddr[i], &ct->tuplehash[!dir]. tuple.dst.u3, @@ -139,12 +132,11 @@ static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct, } else if (addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && port == info->sig_port[dir]) { /* GK->GW */ - DEBUGP - ("nf_nat_ras: set signal address " - "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(ip), port, - NIPQUAD(ct->tuplehash[!dir].tuple.src. - ip), info->sig_port[!dir]); + pr_debug("nf_nat_ras: set signal address " + "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(addr.ip), port, + NIPQUAD(ct->tuplehash[!dir].tuple.src.u3.ip), + info->sig_port[!dir]); return set_h225_addr(pskb, data, 0, &taddr[i], &ct->tuplehash[!dir]. tuple.src.u3, @@ -171,12 +163,11 @@ static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct, if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) && addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && port == ct->tuplehash[dir].tuple.src.u.udp.port) { - DEBUGP("nf_nat_ras: set rasAddress " - "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(ip), ntohs(port), - NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip), - ntohs(ct->tuplehash[!dir].tuple.dst.u.udp. - port)); + pr_debug("nf_nat_ras: set rasAddress " + "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(addr.ip), ntohs(port), + NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip), + ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port)); return set_h225_addr(pskb, data, 0, &taddr[i], &ct->tuplehash[!dir].tuple.dst.u3, ct->tuplehash[!dir].tuple. @@ -267,16 +258,16 @@ static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, } /* Success */ - DEBUGP("nf_nat_h323: expect RTP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(rtp_exp->tuple.src.ip), - ntohs(rtp_exp->tuple.src.u.udp.port), - NIPQUAD(rtp_exp->tuple.dst.ip), - ntohs(rtp_exp->tuple.dst.u.udp.port)); - DEBUGP("nf_nat_h323: expect RTCP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(rtcp_exp->tuple.src.ip), - ntohs(rtcp_exp->tuple.src.u.udp.port), - NIPQUAD(rtcp_exp->tuple.dst.ip), - ntohs(rtcp_exp->tuple.dst.u.udp.port)); + pr_debug("nf_nat_h323: expect RTP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(rtp_exp->tuple.src.u3.ip), + ntohs(rtp_exp->tuple.src.u.udp.port), + NIPQUAD(rtp_exp->tuple.dst.u3.ip), + ntohs(rtp_exp->tuple.dst.u.udp.port)); + pr_debug("nf_nat_h323: expect RTCP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(rtcp_exp->tuple.src.u3.ip), + ntohs(rtcp_exp->tuple.src.u.udp.port), + NIPQUAD(rtcp_exp->tuple.dst.u3.ip), + ntohs(rtcp_exp->tuple.dst.u.udp.port)); return 0; } @@ -317,9 +308,11 @@ static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct, return -1; } - DEBUGP("nf_nat_h323: expect T.120 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port), - NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port)); + pr_debug("nf_nat_h323: expect T.120 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(exp->tuple.src.u3.ip), + ntohs(exp->tuple.src.u.tcp.port), + NIPQUAD(exp->tuple.dst.u3.ip), + ntohs(exp->tuple.dst.u.tcp.port)); return 0; } @@ -369,9 +362,11 @@ static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct, return -1; } - DEBUGP("nf_nat_q931: expect H.245 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port), - NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port)); + pr_debug("nf_nat_q931: expect H.245 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(exp->tuple.src.u3.ip), + ntohs(exp->tuple.src.u.tcp.port), + NIPQUAD(exp->tuple.dst.u3.ip), + ntohs(exp->tuple.dst.u.tcp.port)); return 0; } @@ -465,9 +460,11 @@ static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct, } /* Success */ - DEBUGP("nf_nat_ras: expect Q.931 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port), - NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port)); + pr_debug("nf_nat_ras: expect Q.931 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(exp->tuple.src.u3.ip), + ntohs(exp->tuple.src.u.tcp.port), + NIPQUAD(exp->tuple.dst.u3.ip), + ntohs(exp->tuple.dst.u.tcp.port)); return 0; } @@ -536,10 +533,12 @@ static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct, } /* Success */ - DEBUGP("nf_nat_q931: expect Call Forwarding " - "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port), - NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port)); + pr_debug("nf_nat_q931: expect Call Forwarding " + "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(exp->tuple.src.u3.ip), + ntohs(exp->tuple.src.u.tcp.port), + NIPQUAD(exp->tuple.dst.u3.ip), + ntohs(exp->tuple.dst.u.tcp.port)); return 0; } @@ -566,8 +565,6 @@ static int __init init(void) rcu_assign_pointer(nat_h245_hook, nat_h245); rcu_assign_pointer(nat_callforwarding_hook, nat_callforwarding); rcu_assign_pointer(nat_q931_hook, nat_q931); - - DEBUGP("nf_nat_h323: init success\n"); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index f3383fc14e1c..93d8a0a8f035 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -26,13 +26,9 @@ #include #include -#if 0 -#define DEBUGP printk -#define DUMP_OFFSET(x) printk("offset_before=%d, offset_after=%d, correction_pos=%u\n", x->offset_before, x->offset_after, x->correction_pos); -#else -#define DEBUGP(format, args...) -#define DUMP_OFFSET(x) -#endif +#define DUMP_OFFSET(x) \ + pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \ + x->offset_before, x->offset_after, x->correction_pos); static DEFINE_SPINLOCK(nf_nat_seqofs_lock); @@ -47,15 +43,15 @@ adjust_tcp_sequence(u32 seq, struct nf_nat_seq *this_way, *other_way; struct nf_conn_nat *nat = nfct_nat(ct); - DEBUGP("nf_nat_resize_packet: old_size = %u, new_size = %u\n", - (*skb)->len, new_size); + pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", + ntohl(seq), seq); dir = CTINFO2DIR(ctinfo); this_way = &nat->seq[dir]; other_way = &nat->seq[!dir]; - DEBUGP("nf_nat_resize_packet: Seq_offset before: "); + pr_debug("nf_nat_resize_packet: Seq_offset before: "); DUMP_OFFSET(this_way); spin_lock_bh(&nf_nat_seqofs_lock); @@ -72,7 +68,7 @@ adjust_tcp_sequence(u32 seq, } spin_unlock_bh(&nf_nat_seqofs_lock); - DEBUGP("nf_nat_resize_packet: Seq_offset after: "); + pr_debug("nf_nat_resize_packet: Seq_offset after: "); DUMP_OFFSET(this_way); } @@ -100,14 +96,12 @@ static void mangle_contents(struct sk_buff *skb, /* update skb info */ if (rep_len > match_len) { - DEBUGP("nf_nat_mangle_packet: Extending packet by " - "%u from %u bytes\n", rep_len - match_len, - skb->len); + pr_debug("nf_nat_mangle_packet: Extending packet by " + "%u from %u bytes\n", rep_len - match_len, skb->len); skb_put(skb, rep_len - match_len); } else { - DEBUGP("nf_nat_mangle_packet: Shrinking packet from " - "%u from %u bytes\n", match_len - rep_len, - skb->len); + pr_debug("nf_nat_mangle_packet: Shrinking packet from " + "%u from %u bytes\n", match_len - rep_len, skb->len); __skb_trim(skb, skb->len + rep_len - match_len); } @@ -320,9 +314,9 @@ sack_adjust(struct sk_buff *skb, new_end_seq = htonl(ntohl(sack->end_seq) - natseq->offset_before); - DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", - ntohl(sack->start_seq), new_start_seq, - ntohl(sack->end_seq), new_end_seq); + pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", + ntohl(sack->start_seq), new_start_seq, + ntohl(sack->end_seq), new_end_seq); nf_proto_csum_replace4(&tcph->check, skb, sack->start_seq, new_start_seq, 0); @@ -414,9 +408,9 @@ nf_nat_seq_adjust(struct sk_buff **pskb, nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0); nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0); - DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n", - ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), - ntohl(newack)); + pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", + ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), + ntohl(newack)); tcph->seq = newseq; tcph->ack_seq = newack; diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c index db7fbf66fec0..bcf274bba602 100644 --- a/net/ipv4/netfilter/nf_nat_irc.c +++ b/net/ipv4/netfilter/nf_nat_irc.c @@ -22,12 +22,6 @@ #include #include -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("IRC (DCC) NAT helper"); MODULE_LICENSE("GPL"); @@ -44,9 +38,6 @@ static unsigned int help(struct sk_buff **pskb, u_int16_t port; unsigned int ret; - DEBUGP("IRC_NAT: info (seq %u + %u) in %u\n", - expect->seq, exp_irc_info->len, ntohl(tcph->seq)); - /* Reply comes from server. */ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; exp->dir = IP_CT_DIR_REPLY; @@ -64,8 +55,8 @@ static unsigned int help(struct sk_buff **pskb, ip = ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip); sprintf(buffer, "%u %u", ip, port); - DEBUGP("nf_nat_irc: inserting '%s' == %u.%u.%u.%u, port %u\n", - buffer, NIPQUAD(ip), port); + pr_debug("nf_nat_irc: inserting '%s' == %u.%u.%u.%u, port %u\n", + buffer, NIPQUAD(ip), port); ret = nf_nat_mangle_tcp_packet(pskb, exp->master, ctinfo, matchoff, matchlen, buffer, diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index deb80ae2831e..984ec8308b2e 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -37,14 +37,6 @@ MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP"); MODULE_ALIAS("ip_nat_pptp"); -#if 0 -extern const char *pptp_msg_name[]; -#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \ - __FUNCTION__, ## args) -#else -#define DEBUGP(format, args...) -#endif - static void pptp_nat_expected(struct nf_conn *ct, struct nf_conntrack_expect *exp) { @@ -60,7 +52,7 @@ static void pptp_nat_expected(struct nf_conn *ct, /* And here goes the grand finale of corrosion... */ if (exp->dir == IP_CT_DIR_ORIGINAL) { - DEBUGP("we are PNS->PAC\n"); + pr_debug("we are PNS->PAC\n"); /* therefore, build tuple for PAC->PNS */ t.src.l3num = AF_INET; t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip; @@ -69,7 +61,7 @@ static void pptp_nat_expected(struct nf_conn *ct, t.dst.u.gre.key = ct_pptp_info->pns_call_id; t.dst.protonum = IPPROTO_GRE; } else { - DEBUGP("we are PAC->PNS\n"); + pr_debug("we are PAC->PNS\n"); /* build tuple for PNS->PAC */ t.src.l3num = AF_INET; t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip; @@ -79,15 +71,15 @@ static void pptp_nat_expected(struct nf_conn *ct, t.dst.protonum = IPPROTO_GRE; } - DEBUGP("trying to unexpect other dir: "); + pr_debug("trying to unexpect other dir: "); NF_CT_DUMP_TUPLE(&t); other_exp = nf_ct_expect_find_get(&t); if (other_exp) { nf_ct_unexpect_related(other_exp); nf_ct_expect_put(other_exp); - DEBUGP("success\n"); + pr_debug("success\n"); } else { - DEBUGP("not found!\n"); + pr_debug("not found!\n"); } /* This must be a fresh one. */ @@ -161,9 +153,9 @@ pptp_outbound_pkt(struct sk_buff **pskb, cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); break; default: - DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, - (msg <= PPTP_MSG_MAX)? - pptp_msg_name[msg]:pptp_msg_name[0]); + pr_debug("unknown outbound packet 0x%04x:%s\n", msg, + msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : + pptp_msg_name[0]); /* fall through */ case PPTP_SET_LINK_INFO: /* only need to NAT in case PAC is behind NAT box */ @@ -179,8 +171,8 @@ pptp_outbound_pkt(struct sk_buff **pskb, /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass * down to here */ - DEBUGP("altering call id from 0x%04x to 0x%04x\n", - ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid)); + pr_debug("altering call id from 0x%04x to 0x%04x\n", + ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid)); /* mangle packet */ if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, @@ -255,8 +247,9 @@ pptp_inbound_pkt(struct sk_buff **pskb, pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID); break; default: - DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? - pptp_msg_name[msg]:pptp_msg_name[0]); + pr_debug("unknown inbound packet %s\n", + msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : + pptp_msg_name[0]); /* fall through */ case PPTP_START_SESSION_REQUEST: case PPTP_START_SESSION_REPLY: @@ -272,8 +265,8 @@ pptp_inbound_pkt(struct sk_buff **pskb, * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */ /* mangle packet */ - DEBUGP("altering peer call id from 0x%04x to 0x%04x\n", - ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid)); + pr_debug("altering peer call id from 0x%04x to 0x%04x\n", + ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid)); if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, pcid_off + sizeof(struct pptp_pkt_hdr) + diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index c3908bc5a709..2e40cc83526a 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -36,13 +36,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); -#if 0 -#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \ - __FUNCTION__, ## args) -#else -#define DEBUGP(x, args...) -#endif - /* is key in given range between min and max */ static int gre_in_range(const struct nf_conntrack_tuple *tuple, @@ -83,7 +76,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, keyptr = &tuple->dst.u.gre.key; if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { - DEBUGP("%p: NATing GRE PPTP\n", conntrack); + pr_debug("%p: NATing GRE PPTP\n", conntrack); min = 1; range_size = 0xffff; } else { @@ -91,7 +84,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, range_size = ntohs(range->max.gre.key) - min + 1; } - DEBUGP("min = %u, range_size = %u\n", min, range_size); + pr_debug("min = %u, range_size = %u\n", min, range_size); for (i = 0; i < range_size; i++, key++) { *keyptr = htons(min + key % range_size); @@ -99,7 +92,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, return 1; } - DEBUGP("%p: no NAT mapping\n", conntrack); + pr_debug("%p: no NAT mapping\n", conntrack); return 0; } @@ -132,11 +125,11 @@ gre_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff, * Try to behave like "nf_nat_proto_unknown" */ break; case GRE_VERSION_PPTP: - DEBUGP("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); + pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); pgreh->call_id = tuple->dst.u.gre.key; break; default: - DEBUGP("can't nat unknown GRE version\n"); + pr_debug("can't nat unknown GRE version\n"); return 0; } return 1; diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 080393a143d7..0f45427e5fdc 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -24,12 +24,6 @@ #include #include -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - #define NAT_VALID_HOOKS ((1<"); MODULE_DESCRIPTION("SIP NAT helper"); MODULE_ALIAS("ip_nat_sip"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - struct addr_map { struct { char src[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; @@ -257,8 +251,6 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb, __be32 newip; u_int16_t port; - DEBUGP("ip_nat_sdp():\n"); - /* Connection will come from reply */ if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip) diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 30eeaa4c645c..332814dac503 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -27,12 +27,6 @@ #include #include -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - #ifdef CONFIG_XFRM static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) { @@ -117,7 +111,7 @@ nf_nat_fn(unsigned int hooknum, if (!nat) { nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); if (nat == NULL) { - DEBUGP("failed to add NAT extension\n"); + pr_debug("failed to add NAT extension\n"); return NF_ACCEPT; } } @@ -154,9 +148,9 @@ nf_nat_fn(unsigned int hooknum, return ret; } } else - DEBUGP("Already setup manip %s for ct %p\n", - maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", - ct); + pr_debug("Already setup manip %s for ct %p\n", + maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", + ct); break; default: @@ -270,7 +264,7 @@ nf_nat_adjust(unsigned int hooknum, ct = nf_ct_get(*pskb, &ctinfo); if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { - DEBUGP("nf_nat_standalone: adjusting sequence number\n"); + pr_debug("nf_nat_standalone: adjusting sequence number\n"); if (!nf_nat_seq_adjust(pskb, ct, ctinfo)) return NF_DROP; } diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 540bf14b851c..b05327ebd332 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -32,12 +32,6 @@ struct in_device; #include #include -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* Use lock to serialize, so printks don't overlap */ static DEFINE_SPINLOCK(log_lock); @@ -466,12 +460,12 @@ static bool ip6t_log_checkentry(const char *tablename, const struct ip6t_log_info *loginfo = targinfo; if (loginfo->level >= 8) { - DEBUGP("LOG: level %u >= 8\n", loginfo->level); + pr_debug("LOG: level %u >= 8\n", loginfo->level); return false; } if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') { - DEBUGP("LOG: prefix term %i\n", - loginfo->prefix[sizeof(loginfo->prefix)-1]); + pr_debug("LOG: prefix term %i\n", + loginfo->prefix[sizeof(loginfo->prefix)-1]); return false; } return true; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 14008dc6a197..2f487cda3b6b 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -34,12 +34,6 @@ MODULE_AUTHOR("Yasuyuki KOZAKAI "); MODULE_DESCRIPTION("IP6 tables REJECT target module"); MODULE_LICENSE("GPL"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* Send RST reply */ static void send_reset(struct sk_buff *oldskb) { @@ -54,7 +48,7 @@ static void send_reset(struct sk_buff *oldskb) if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { - DEBUGP("ip6t_REJECT: addr is not unicast.\n"); + pr_debug("ip6t_REJECT: addr is not unicast.\n"); return; } @@ -62,7 +56,7 @@ static void send_reset(struct sk_buff *oldskb) tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto); if ((tcphoff < 0) || (tcphoff > oldskb->len)) { - DEBUGP("ip6t_REJECT: Can't get TCP header.\n"); + pr_debug("ip6t_REJECT: Can't get TCP header.\n"); return; } @@ -70,8 +64,9 @@ static void send_reset(struct sk_buff *oldskb) /* IP header checks: fragment, too short. */ if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) { - DEBUGP("ip6t_REJECT: proto(%d) != IPPROTO_TCP, or too short. otcplen = %d\n", - proto, otcplen); + pr_debug("ip6t_REJECT: proto(%d) != IPPROTO_TCP, " + "or too short. otcplen = %d\n", + proto, otcplen); return; } @@ -80,14 +75,14 @@ static void send_reset(struct sk_buff *oldskb) /* No RST for RST. */ if (otcph.rst) { - DEBUGP("ip6t_REJECT: RST is set\n"); + pr_debug("ip6t_REJECT: RST is set\n"); return; } /* Check checksum. */ if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP, skb_checksum(oldskb, tcphoff, otcplen, 0))) { - DEBUGP("ip6t_REJECT: TCP checksum is invalid\n"); + pr_debug("ip6t_REJECT: TCP checksum is invalid\n"); return; } @@ -186,7 +181,7 @@ static unsigned int reject6_target(struct sk_buff **pskb, { const struct ip6t_reject_info *reject = targinfo; - DEBUGP(KERN_DEBUG "%s: medium point\n", __FUNCTION__); + pr_debug("%s: medium point\n", __FUNCTION__); /* WARNING: This code causes reentry within ip6tables. This means that the ip6tables jump stack is now crap. We must return an absolute verdict. --RR */ @@ -237,7 +232,7 @@ static bool check(const char *tablename, /* Must specify that it's a TCP packet */ if (e->ipv6.proto != IPPROTO_TCP || (e->ipv6.invflags & XT_INV_PROTO)) { - DEBUGP("ip6t_REJECT: TCP_RESET illegal for non-tcp\n"); + printk("ip6t_REJECT: TCP_RESET illegal for non-tcp\n"); return false; } } diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index a9fe2aa97072..2a25fe25e0e0 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -23,21 +23,16 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IPv6 AH match"); MODULE_AUTHOR("Andras Kis-Szabo "); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* Returns 1 if the spi is matched by the range, 0 otherwise */ static inline bool spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) { bool r; - DEBUGP("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ', - min,spi,max); + + pr_debug("ah spi_match:%c 0x%x <= 0x%x <= 0x%x", + invert ? '!' : ' ', min, spi, max); r = (spi >= min && spi <= max) ^ invert; - DEBUGP(" result %s\n",r? "PASS\n" : "FAILED\n"); + pr_debug(" result %s\n", r ? "PASS" : "FAILED"); return r; } @@ -73,22 +68,22 @@ match(const struct sk_buff *skb, hdrlen = (ah->hdrlen + 2) << 2; - DEBUGP("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen); - DEBUGP("RES %04X ", ah->reserved); - DEBUGP("SPI %u %08X\n", ntohl(ah->spi), ntohl(ah->spi)); - - DEBUGP("IPv6 AH spi %02X ", - spi_match(ahinfo->spis[0], ahinfo->spis[1], - ntohl(ah->spi), - !!(ahinfo->invflags & IP6T_AH_INV_SPI))); - DEBUGP("len %02X %04X %02X ", - ahinfo->hdrlen, hdrlen, - (!ahinfo->hdrlen || - (ahinfo->hdrlen == hdrlen) ^ - !!(ahinfo->invflags & IP6T_AH_INV_LEN))); - DEBUGP("res %02X %04X %02X\n", - ahinfo->hdrres, ah->reserved, - !(ahinfo->hdrres && ah->reserved)); + pr_debug("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen); + pr_debug("RES %04X ", ah->reserved); + pr_debug("SPI %u %08X\n", ntohl(ah->spi), ntohl(ah->spi)); + + pr_debug("IPv6 AH spi %02X ", + spi_match(ahinfo->spis[0], ahinfo->spis[1], + ntohl(ah->spi), + !!(ahinfo->invflags & IP6T_AH_INV_SPI))); + pr_debug("len %02X %04X %02X ", + ahinfo->hdrlen, hdrlen, + (!ahinfo->hdrlen || + (ahinfo->hdrlen == hdrlen) ^ + !!(ahinfo->invflags & IP6T_AH_INV_LEN))); + pr_debug("res %02X %04X %02X\n", + ahinfo->hdrres, ah->reserved, + !(ahinfo->hdrres && ah->reserved)); return (ah != NULL) && @@ -114,7 +109,7 @@ checkentry(const char *tablename, const struct ip6t_ah *ahinfo = matchinfo; if (ahinfo->invflags & ~IP6T_AH_INV_MASK) { - DEBUGP("ip6t_ah: unknown flags %X\n", ahinfo->invflags); + pr_debug("ip6t_ah: unknown flags %X\n", ahinfo->invflags); return false; } return true; diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index bb1cfa82b47c..968aeba02073 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -22,21 +22,15 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IPv6 FRAG match"); MODULE_AUTHOR("Andras Kis-Szabo "); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* Returns 1 if the id is matched by the range, 0 otherwise */ static inline bool id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) { bool r; - DEBUGP("frag id_match:%c 0x%x <= 0x%x <= 0x%x", invert ? '!' : ' ', - min, id, max); + pr_debug("frag id_match:%c 0x%x <= 0x%x <= 0x%x", invert ? '!' : ' ', + min, id, max); r = (id >= min && id <= max) ^ invert; - DEBUGP(" result %s\n", r ? "PASS" : "FAILED"); + pr_debug(" result %s\n", r ? "PASS" : "FAILED"); return r; } @@ -69,37 +63,37 @@ match(const struct sk_buff *skb, return false; } - DEBUGP("INFO %04X ", fh->frag_off); - DEBUGP("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7); - DEBUGP("RES %02X %04X", fh->reserved, ntohs(fh->frag_off) & 0x6); - DEBUGP("MF %04X ", fh->frag_off & htons(IP6_MF)); - DEBUGP("ID %u %08X\n", ntohl(fh->identification), - ntohl(fh->identification)); - - DEBUGP("IPv6 FRAG id %02X ", - id_match(fraginfo->ids[0], fraginfo->ids[1], - ntohl(fh->identification), - !!(fraginfo->invflags & IP6T_FRAG_INV_IDS))); - DEBUGP("res %02X %02X%04X %02X ", - fraginfo->flags & IP6T_FRAG_RES, fh->reserved, - ntohs(fh->frag_off) & 0x6, - !((fraginfo->flags & IP6T_FRAG_RES) - && (fh->reserved || (ntohs(fh->frag_off) & 0x06)))); - DEBUGP("first %02X %02X %02X ", - fraginfo->flags & IP6T_FRAG_FST, - ntohs(fh->frag_off) & ~0x7, - !((fraginfo->flags & IP6T_FRAG_FST) - && (ntohs(fh->frag_off) & ~0x7))); - DEBUGP("mf %02X %02X %02X ", - fraginfo->flags & IP6T_FRAG_MF, - ntohs(fh->frag_off) & IP6_MF, - !((fraginfo->flags & IP6T_FRAG_MF) - && !((ntohs(fh->frag_off) & IP6_MF)))); - DEBUGP("last %02X %02X %02X\n", - fraginfo->flags & IP6T_FRAG_NMF, - ntohs(fh->frag_off) & IP6_MF, - !((fraginfo->flags & IP6T_FRAG_NMF) - && (ntohs(fh->frag_off) & IP6_MF))); + pr_debug("INFO %04X ", fh->frag_off); + pr_debug("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7); + pr_debug("RES %02X %04X", fh->reserved, ntohs(fh->frag_off) & 0x6); + pr_debug("MF %04X ", fh->frag_off & htons(IP6_MF)); + pr_debug("ID %u %08X\n", ntohl(fh->identification), + ntohl(fh->identification)); + + pr_debug("IPv6 FRAG id %02X ", + id_match(fraginfo->ids[0], fraginfo->ids[1], + ntohl(fh->identification), + !!(fraginfo->invflags & IP6T_FRAG_INV_IDS))); + pr_debug("res %02X %02X%04X %02X ", + fraginfo->flags & IP6T_FRAG_RES, fh->reserved, + ntohs(fh->frag_off) & 0x6, + !((fraginfo->flags & IP6T_FRAG_RES) + && (fh->reserved || (ntohs(fh->frag_off) & 0x06)))); + pr_debug("first %02X %02X %02X ", + fraginfo->flags & IP6T_FRAG_FST, + ntohs(fh->frag_off) & ~0x7, + !((fraginfo->flags & IP6T_FRAG_FST) + && (ntohs(fh->frag_off) & ~0x7))); + pr_debug("mf %02X %02X %02X ", + fraginfo->flags & IP6T_FRAG_MF, + ntohs(fh->frag_off) & IP6_MF, + !((fraginfo->flags & IP6T_FRAG_MF) + && !((ntohs(fh->frag_off) & IP6_MF)))); + pr_debug("last %02X %02X %02X\n", + fraginfo->flags & IP6T_FRAG_NMF, + ntohs(fh->frag_off) & IP6_MF, + !((fraginfo->flags & IP6T_FRAG_NMF) + && (ntohs(fh->frag_off) & IP6_MF))); return (fh != NULL) && @@ -131,7 +125,7 @@ checkentry(const char *tablename, const struct ip6t_frag *fraginfo = matchinfo; if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) { - DEBUGP("ip6t_frag: unknown flags %X\n", fraginfo->invflags); + pr_debug("ip6t_frag: unknown flags %X\n", fraginfo->invflags); return false; } return true; diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 6247d4cdad99..e6ca6018b1ea 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -25,12 +25,6 @@ MODULE_DESCRIPTION("IPv6 opts match"); MODULE_AUTHOR("Andras Kis-Szabo "); MODULE_ALIAS("ip6t_dst"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* * (Type & 0xC0) >> 6 * 0 -> ignorable @@ -90,13 +84,13 @@ match(const struct sk_buff *skb, return false; } - DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); + pr_debug("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); - DEBUGP("len %02X %04X %02X ", - optinfo->hdrlen, hdrlen, - (!(optinfo->flags & IP6T_OPTS_LEN) || - ((optinfo->hdrlen == hdrlen) ^ - !!(optinfo->invflags & IP6T_OPTS_INV_LEN)))); + pr_debug("len %02X %04X %02X ", + optinfo->hdrlen, hdrlen, + (!(optinfo->flags & IP6T_OPTS_LEN) || + ((optinfo->hdrlen == hdrlen) ^ + !!(optinfo->invflags & IP6T_OPTS_INV_LEN)))); ret = (oh != NULL) && (!(optinfo->flags & IP6T_OPTS_LEN) || @@ -108,10 +102,10 @@ match(const struct sk_buff *skb, if (!(optinfo->flags & IP6T_OPTS_OPTS)) { return ret; } else if (optinfo->flags & IP6T_OPTS_NSTRICT) { - DEBUGP("Not strict - not implemented"); + pr_debug("Not strict - not implemented"); } else { - DEBUGP("Strict "); - DEBUGP("#%d ", optinfo->optsnr); + pr_debug("Strict "); + pr_debug("#%d ", optinfo->optsnr); for (temp = 0; temp < optinfo->optsnr; temp++) { /* type field exists ? */ if (hdrlen < 1) @@ -123,12 +117,11 @@ match(const struct sk_buff *skb, /* Type check */ if (*tp != (optinfo->opts[temp] & 0xFF00) >> 8) { - DEBUGP("Tbad %02X %02X\n", - *tp, - (optinfo->opts[temp] & 0xFF00) >> 8); + pr_debug("Tbad %02X %02X\n", *tp, + (optinfo->opts[temp] & 0xFF00) >> 8); return false; } else { - DEBUGP("Tok "); + pr_debug("Tok "); } /* Length check */ if (*tp) { @@ -145,23 +138,23 @@ match(const struct sk_buff *skb, spec_len = optinfo->opts[temp] & 0x00FF; if (spec_len != 0x00FF && spec_len != *lp) { - DEBUGP("Lbad %02X %04X\n", *lp, - spec_len); + pr_debug("Lbad %02X %04X\n", *lp, + spec_len); return false; } - DEBUGP("Lok "); + pr_debug("Lok "); optlen = *lp + 2; } else { - DEBUGP("Pad1\n"); + pr_debug("Pad1\n"); optlen = 1; } /* Step to the next */ - DEBUGP("len%04X \n", optlen); + pr_debug("len%04X \n", optlen); if ((ptr > skb->len - optlen || hdrlen < optlen) && temp < optinfo->optsnr - 1) { - DEBUGP("new pointer is too large! \n"); + pr_debug("new pointer is too large! \n"); break; } ptr += optlen; @@ -187,7 +180,7 @@ checkentry(const char *tablename, const struct ip6t_opts *optsinfo = matchinfo; if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) { - DEBUGP("ip6t_opts: unknown flags %X\n", optsinfo->invflags); + pr_debug("ip6t_opts: unknown flags %X\n", optsinfo->invflags); return false; } return true; diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 549deea26418..357cea703bd9 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -24,21 +24,15 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IPv6 RT match"); MODULE_AUTHOR("Andras Kis-Szabo "); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* Returns 1 if the id is matched by the range, 0 otherwise */ static inline bool segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) { bool r; - DEBUGP("rt segsleft_match:%c 0x%x <= 0x%x <= 0x%x", - invert ? '!' : ' ', min, id, max); + pr_debug("rt segsleft_match:%c 0x%x <= 0x%x <= 0x%x", + invert ? '!' : ' ', min, id, max); r = (id >= min && id <= max) ^ invert; - DEBUGP(" result %s\n", r ? "PASS" : "FAILED"); + pr_debug(" result %s\n", r ? "PASS" : "FAILED"); return r; } @@ -82,29 +76,29 @@ match(const struct sk_buff *skb, return false; } - DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen); - DEBUGP("TYPE %04X ", rh->type); - DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left); - - DEBUGP("IPv6 RT segsleft %02X ", - segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1], - rh->segments_left, - !!(rtinfo->invflags & IP6T_RT_INV_SGS))); - DEBUGP("type %02X %02X %02X ", - rtinfo->rt_type, rh->type, - (!(rtinfo->flags & IP6T_RT_TYP) || - ((rtinfo->rt_type == rh->type) ^ - !!(rtinfo->invflags & IP6T_RT_INV_TYP)))); - DEBUGP("len %02X %04X %02X ", - rtinfo->hdrlen, hdrlen, - !(rtinfo->flags & IP6T_RT_LEN) || - ((rtinfo->hdrlen == hdrlen) ^ - !!(rtinfo->invflags & IP6T_RT_INV_LEN))); - DEBUGP("res %02X %02X %02X ", - rtinfo->flags & IP6T_RT_RES, - ((const struct rt0_hdr *)rh)->reserved, - !((rtinfo->flags & IP6T_RT_RES) && - (((const struct rt0_hdr *)rh)->reserved))); + pr_debug("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen); + pr_debug("TYPE %04X ", rh->type); + pr_debug("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left); + + pr_debug("IPv6 RT segsleft %02X ", + segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1], + rh->segments_left, + !!(rtinfo->invflags & IP6T_RT_INV_SGS))); + pr_debug("type %02X %02X %02X ", + rtinfo->rt_type, rh->type, + (!(rtinfo->flags & IP6T_RT_TYP) || + ((rtinfo->rt_type == rh->type) ^ + !!(rtinfo->invflags & IP6T_RT_INV_TYP)))); + pr_debug("len %02X %04X %02X ", + rtinfo->hdrlen, hdrlen, + !(rtinfo->flags & IP6T_RT_LEN) || + ((rtinfo->hdrlen == hdrlen) ^ + !!(rtinfo->invflags & IP6T_RT_INV_LEN))); + pr_debug("res %02X %02X %02X ", + rtinfo->flags & IP6T_RT_RES, + ((const struct rt0_hdr *)rh)->reserved, + !((rtinfo->flags & IP6T_RT_RES) && + (((const struct rt0_hdr *)rh)->reserved))); ret = (rh != NULL) && @@ -131,18 +125,18 @@ match(const struct sk_buff *skb, ret = (*rp == 0); } - DEBUGP("#%d ", rtinfo->addrnr); + pr_debug("#%d ", rtinfo->addrnr); if (!(rtinfo->flags & IP6T_RT_FST)) { return ret; } else if (rtinfo->flags & IP6T_RT_FST_NSTRICT) { - DEBUGP("Not strict "); + pr_debug("Not strict "); if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) { - DEBUGP("There isn't enough space\n"); + pr_debug("There isn't enough space\n"); return false; } else { unsigned int i = 0; - DEBUGP("#%d ", rtinfo->addrnr); + pr_debug("#%d ", rtinfo->addrnr); for (temp = 0; temp < (unsigned int)((hdrlen - 8) / 16); temp++) { @@ -156,25 +150,25 @@ match(const struct sk_buff *skb, BUG_ON(ap == NULL); if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) { - DEBUGP("i=%d temp=%d;\n", i, temp); + pr_debug("i=%d temp=%d;\n", i, temp); i++; } if (i == rtinfo->addrnr) break; } - DEBUGP("i=%d #%d\n", i, rtinfo->addrnr); + pr_debug("i=%d #%d\n", i, rtinfo->addrnr); if (i == rtinfo->addrnr) return ret; else return false; } } else { - DEBUGP("Strict "); + pr_debug("Strict "); if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) { - DEBUGP("There isn't enough space\n"); + pr_debug("There isn't enough space\n"); return false; } else { - DEBUGP("#%d ", rtinfo->addrnr); + pr_debug("#%d ", rtinfo->addrnr); for (temp = 0; temp < rtinfo->addrnr; temp++) { ap = skb_header_pointer(skb, ptr @@ -187,7 +181,7 @@ match(const struct sk_buff *skb, if (!ipv6_addr_equal(ap, &rtinfo->addrs[temp])) break; } - DEBUGP("temp=%d #%d\n", temp, rtinfo->addrnr); + pr_debug("temp=%d #%d\n", temp, rtinfo->addrnr); if (temp == rtinfo->addrnr && temp == (unsigned int)((hdrlen - 8) / 16)) return ret; @@ -210,14 +204,14 @@ checkentry(const char *tablename, const struct ip6t_rt *rtinfo = matchinfo; if (rtinfo->invflags & ~IP6T_RT_INV_MASK) { - DEBUGP("ip6t_rt: unknown flags %X\n", rtinfo->invflags); + pr_debug("ip6t_rt: unknown flags %X\n", rtinfo->invflags); return false; } if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) && (!(rtinfo->flags & IP6T_RT_TYP) || (rtinfo->rt_type != 0) || (rtinfo->invflags & IP6T_RT_INV_TYP))) { - DEBUGP("`--rt-type 0' required before `--rt-0-*'"); + pr_debug("`--rt-type 0' required before `--rt-0-*'"); return false; } diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index f2d26495f413..f0a9efa67fb5 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -21,12 +21,6 @@ MODULE_DESCRIPTION("ip6tables mangle table"); (1 << NF_IP6_LOCAL_OUT) | \ (1 << NF_IP6_POST_ROUTING)) -#if 0 -#define DEBUGP(x, args...) printk(KERN_DEBUG x, ## args) -#else -#define DEBUGP(x, args...) -#endif - static struct { struct ip6t_replace repl; diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 0acda45d455d..ec290e4ebdd8 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -8,12 +8,6 @@ #define RAW_VALID_HOOKS ((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_OUT)) -#if 0 -#define DEBUGP(x, args...) printk(KERN_DEBUG x, ## args) -#else -#define DEBUGP(x, args...) -#endif - static struct { struct ip6t_replace repl; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 747b01e53132..89e20ab494b8 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -26,12 +26,6 @@ #include #include -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - static int ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) { @@ -136,7 +130,7 @@ ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, * except of IPv6 & ext headers. but it's tracked anyway. - YK */ if ((protoff < 0) || (protoff > (*pskb)->len)) { - DEBUGP("ip6_conntrack_core: can't find proto in pkt\n"); + pr_debug("ip6_conntrack_core: can't find proto in pkt\n"); NF_CT_STAT_INC_ATOMIC(error); NF_CT_STAT_INC_ATOMIC(invalid); return -NF_ACCEPT; @@ -178,7 +172,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum, protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum, (*pskb)->len - extoff); if (protoff > (*pskb)->len || pnum == NEXTHDR_FRAGMENT) { - DEBUGP("proto header not found\n"); + pr_debug("proto header not found\n"); return NF_ACCEPT; } diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index a514661d25dd..9defc7e14554 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -27,12 +27,6 @@ static unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ; -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - static int icmpv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) @@ -125,8 +119,8 @@ static int icmpv6_new(struct nf_conn *conntrack, if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) { /* Can't create a new ICMPv6 `conn' with this. */ - DEBUGP("icmpv6: can't create new conn with type %u\n", - type + 128); + pr_debug("icmpv6: can't create new conn with type %u\n", + type + 128); NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple); return 0; } @@ -152,14 +146,15 @@ icmpv6_error_message(struct sk_buff *skb, hp = skb_header_pointer(skb, icmp6off, sizeof(_hdr), &_hdr); if (hp == NULL) { - DEBUGP("icmpv6_error: Can't get ICMPv6 hdr.\n"); + pr_debug("icmpv6_error: Can't get ICMPv6 hdr.\n"); return -NF_ACCEPT; } inip6off = icmp6off + sizeof(_hdr); if (skb_copy_bits(skb, inip6off+offsetof(struct ipv6hdr, nexthdr), &inprotonum, sizeof(inprotonum)) != 0) { - DEBUGP("icmpv6_error: Can't get nexthdr in inner IPv6 header.\n"); + pr_debug("icmpv6_error: Can't get nexthdr in inner IPv6 " + "header.\n"); return -NF_ACCEPT; } inprotoff = nf_ct_ipv6_skip_exthdr(skb, @@ -169,7 +164,8 @@ icmpv6_error_message(struct sk_buff *skb, - sizeof(struct ipv6hdr)); if ((inprotoff > skb->len) || (inprotonum == NEXTHDR_FRAGMENT)) { - DEBUGP("icmpv6_error: Can't get protocol header in ICMPv6 payload.\n"); + pr_debug("icmpv6_error: Can't get protocol header in ICMPv6 " + "payload.\n"); return -NF_ACCEPT; } @@ -179,7 +175,7 @@ icmpv6_error_message(struct sk_buff *skb, /* Are they talking about one of our connections? */ if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum, &origtuple, &nf_conntrack_l3proto_ipv6, inproto)) { - DEBUGP("icmpv6_error: Can't get tuple\n"); + pr_debug("icmpv6_error: Can't get tuple\n"); return -NF_ACCEPT; } @@ -187,7 +183,7 @@ icmpv6_error_message(struct sk_buff *skb, been preserved inside the ICMP. */ if (!nf_ct_invert_tuple(&intuple, &origtuple, &nf_conntrack_l3proto_ipv6, inproto)) { - DEBUGP("icmpv6_error: Can't invert tuple\n"); + pr_debug("icmpv6_error: Can't invert tuple\n"); return -NF_ACCEPT; } @@ -195,7 +191,7 @@ icmpv6_error_message(struct sk_buff *skb, h = nf_conntrack_find_get(&intuple); if (!h) { - DEBUGP("icmpv6_error: no match\n"); + pr_debug("icmpv6_error: no match\n"); return -NF_ACCEPT; } else { if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 347ab7608231..25442a8c1ba8 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -44,12 +44,6 @@ #include #include -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - #define NF_CT_FRAG6_HIGH_THRESH 262144 /* == 256*1024 */ #define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */ #define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT @@ -343,7 +337,7 @@ nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, str struct nf_ct_frag6_queue *fq; if ((fq = frag_alloc_queue()) == NULL) { - DEBUGP("Can't alloc new queue\n"); + pr_debug("Can't alloc new queue\n"); goto oom; } @@ -393,7 +387,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, int offset, end; if (fq->last_in & COMPLETE) { - DEBUGP("Allready completed\n"); + pr_debug("Allready completed\n"); goto err; } @@ -402,7 +396,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { - DEBUGP("offset is too large.\n"); + pr_debug("offset is too large.\n"); return -1; } @@ -420,7 +414,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, */ if (end < fq->len || ((fq->last_in & LAST_IN) && end != fq->len)) { - DEBUGP("already received last fragment\n"); + pr_debug("already received last fragment\n"); goto err; } fq->last_in |= LAST_IN; @@ -433,13 +427,13 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, /* RFC2460 says always send parameter problem in * this case. -DaveM */ - DEBUGP("the end of this fragment is not rounded to 8 bytes.\n"); + pr_debug("end of fragment not rounded to 8 bytes.\n"); return -1; } if (end > fq->len) { /* Some bits beyond end -> corruption. */ if (fq->last_in & LAST_IN) { - DEBUGP("last packet already reached.\n"); + pr_debug("last packet already reached.\n"); goto err; } fq->len = end; @@ -451,11 +445,11 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, /* Point into the IP datagram 'data' part. */ if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) { - DEBUGP("queue: message is too short.\n"); + pr_debug("queue: message is too short.\n"); goto err; } if (pskb_trim_rcsum(skb, end - offset)) { - DEBUGP("Can't trim\n"); + pr_debug("Can't trim\n"); goto err; } @@ -480,11 +474,11 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, if (i > 0) { offset += i; if (end <= offset) { - DEBUGP("overlap\n"); + pr_debug("overlap\n"); goto err; } if (!pskb_pull(skb, i)) { - DEBUGP("Can't pull\n"); + pr_debug("Can't pull\n"); goto err; } if (skb->ip_summed != CHECKSUM_UNNECESSARY) @@ -503,7 +497,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, /* Eat head of the next overlapped fragment * and leave the loop. The next ones cannot overlap. */ - DEBUGP("Eat head of the overlapped parts.: %d", i); + pr_debug("Eat head of the overlapped parts.: %d", i); if (!pskb_pull(next, i)) goto err; @@ -586,13 +580,13 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) { - DEBUGP("payload len is too large.\n"); + pr_debug("payload len is too large.\n"); goto out_oversize; } /* Head of list must not be cloned. */ if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) { - DEBUGP("skb is cloned but can't expand head"); + pr_debug("skb is cloned but can't expand head"); goto out_oom; } @@ -604,7 +598,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) int i, plen = 0; if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) { - DEBUGP("Can't alloc skb\n"); + pr_debug("Can't alloc skb\n"); goto out_oom; } clone->next = head->next; @@ -719,11 +713,11 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) return -1; } if (len < (int)sizeof(struct ipv6_opt_hdr)) { - DEBUGP("too short\n"); + pr_debug("too short\n"); return -1; } if (nexthdr == NEXTHDR_NONE) { - DEBUGP("next header is none\n"); + pr_debug("next header is none\n"); return -1; } if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) @@ -764,7 +758,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) /* Jumbo payload inhibits frag. header */ if (ipv6_hdr(skb)->payload_len == 0) { - DEBUGP("payload len = 0\n"); + pr_debug("payload len = 0\n"); return skb; } @@ -773,14 +767,14 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) clone = skb_clone(skb, GFP_ATOMIC); if (clone == NULL) { - DEBUGP("Can't clone skb\n"); + pr_debug("Can't clone skb\n"); return skb; } NFCT_FRAG6_CB(clone)->orig = skb; if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) { - DEBUGP("message is too short.\n"); + pr_debug("message is too short.\n"); goto ret_orig; } @@ -789,7 +783,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) fhdr = (struct frag_hdr *)skb_transport_header(clone); if (!(fhdr->frag_off & htons(0xFFF9))) { - DEBUGP("Invalid fragment offset\n"); + pr_debug("Invalid fragment offset\n"); /* It is not a fragmented frame */ goto ret_orig; } @@ -799,7 +793,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr); if (fq == NULL) { - DEBUGP("Can't find and can't create new queue\n"); + pr_debug("Can't find and can't create new queue\n"); goto ret_orig; } @@ -807,7 +801,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { spin_unlock(&fq->lock); - DEBUGP("Can't insert skb to queue\n"); + pr_debug("Can't insert skb to queue\n"); fq_put(fq, NULL); goto ret_orig; } @@ -815,7 +809,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) { ret_skb = nf_ct_frag6_reasm(fq, dev); if (ret_skb == NULL) - DEBUGP("Can't reassemble fragmented packets\n"); + pr_debug("Can't reassemble fragmented packets\n"); } spin_unlock(&fq->lock); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 472396dac05c..3d1411012a2c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -40,12 +40,6 @@ #define NF_CONNTRACK_VERSION "0.5.0" -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - DEFINE_RWLOCK(nf_conntrack_lock); EXPORT_SYMBOL_GPL(nf_conntrack_lock); @@ -141,7 +135,7 @@ EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); static void clean_from_lists(struct nf_conn *ct) { - DEBUGP("clean_from_lists(%p)\n", ct); + pr_debug("clean_from_lists(%p)\n", ct); hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); hlist_del(&ct->tuplehash[IP_CT_DIR_REPLY].hnode); @@ -155,7 +149,7 @@ destroy_conntrack(struct nf_conntrack *nfct) struct nf_conn *ct = (struct nf_conn *)nfct; struct nf_conntrack_l4proto *l4proto; - DEBUGP("destroy_conntrack(%p)\n", ct); + pr_debug("destroy_conntrack(%p)\n", ct); NF_CT_ASSERT(atomic_read(&nfct->use) == 0); NF_CT_ASSERT(!timer_pending(&ct->timeout)); @@ -194,7 +188,7 @@ destroy_conntrack(struct nf_conntrack *nfct) if (ct->master) nf_ct_put(ct->master); - DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); + pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct); nf_conntrack_free(ct); } @@ -313,7 +307,7 @@ __nf_conntrack_confirm(struct sk_buff **pskb) /* No external references means noone else could have confirmed us. */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); - DEBUGP("Confirming conntrack %p\n", ct); + pr_debug("Confirming conntrack %p\n", ct); write_lock_bh(&nf_conntrack_lock); @@ -446,7 +440,7 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, conntrack = kmem_cache_zalloc(nf_conntrack_cachep, GFP_ATOMIC); if (conntrack == NULL) { - DEBUGP("nf_conntrack_alloc: Can't alloc conntrack.\n"); + pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n"); atomic_dec(&nf_conntrack_count); return ERR_PTR(-ENOMEM); } @@ -485,27 +479,27 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, struct nf_conntrack_expect *exp; if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { - DEBUGP("Can't invert tuple.\n"); + pr_debug("Can't invert tuple.\n"); return NULL; } conntrack = nf_conntrack_alloc(tuple, &repl_tuple); if (conntrack == NULL || IS_ERR(conntrack)) { - DEBUGP("Can't allocate conntrack.\n"); + pr_debug("Can't allocate conntrack.\n"); return (struct nf_conntrack_tuple_hash *)conntrack; } if (!l4proto->new(conntrack, skb, dataoff)) { nf_conntrack_free(conntrack); - DEBUGP("init conntrack: can't track with proto module\n"); + pr_debug("init conntrack: can't track with proto module\n"); return NULL; } write_lock_bh(&nf_conntrack_lock); exp = nf_ct_find_expectation(tuple); if (exp) { - DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n", - conntrack, exp); + pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", + conntrack, exp); /* Welcome, Mr. Bond. We've been expecting you... */ __set_bit(IPS_EXPECTED_BIT, &conntrack->status); conntrack->master = exp->master; @@ -568,7 +562,7 @@ resolve_normal_ct(struct sk_buff *skb, if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, protonum, &tuple, l3proto, l4proto)) { - DEBUGP("resolve_normal_ct: Can't get tuple\n"); + pr_debug("resolve_normal_ct: Can't get tuple\n"); return NULL; } @@ -591,13 +585,14 @@ resolve_normal_ct(struct sk_buff *skb, } else { /* Once we've had two way comms, always ESTABLISHED. */ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { - DEBUGP("nf_conntrack_in: normal packet for %p\n", ct); + pr_debug("nf_conntrack_in: normal packet for %p\n", ct); *ctinfo = IP_CT_ESTABLISHED; } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { - DEBUGP("nf_conntrack_in: related packet for %p\n", ct); + pr_debug("nf_conntrack_in: related packet for %p\n", + ct); *ctinfo = IP_CT_RELATED; } else { - DEBUGP("nf_conntrack_in: new packet for %p\n", ct); + pr_debug("nf_conntrack_in: new packet for %p\n", ct); *ctinfo = IP_CT_NEW; } *set_reply = 0; @@ -629,7 +624,7 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) l3proto = __nf_ct_l3proto_find((u_int16_t)pf); if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) { - DEBUGP("not prepared to track yet or error occured\n"); + pr_debug("not prepared to track yet or error occured\n"); return -ret; } @@ -665,7 +660,7 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) if (ret < 0) { /* Invalid: inverse of the return code tells * the netfilter core what to do */ - DEBUGP("nf_conntrack_in: Can't track with proto module\n"); + pr_debug("nf_conntrack_in: Can't track with proto module\n"); nf_conntrack_put((*pskb)->nfct); (*pskb)->nfct = NULL; NF_CT_STAT_INC_ATOMIC(invalid); @@ -706,7 +701,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, /* Should be unconfirmed, so not in hash table yet */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); - DEBUGP("Altering reply tuple of %p to ", ct); + pr_debug("Altering reply tuple of %p to ", ct); NF_CT_DUMP_TUPLE(newreply); ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index cd9c2d00cc09..c763ee74ea02 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -51,12 +51,6 @@ unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb, struct nf_conntrack_expect *exp); EXPORT_SYMBOL_GPL(nf_nat_ftp_hook); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, char); static int try_eprt(const char *, size_t, struct nf_conntrack_man *, char); static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *, @@ -138,13 +132,13 @@ static int try_number(const char *data, size_t dlen, u_int32_t array[], if (*data == term && i == array_size - 1) return len; - DEBUGP("Char %u (got %u nums) `%u' unexpected\n", - len, i, *data); + pr_debug("Char %u (got %u nums) `%u' unexpected\n", + len, i, *data); return 0; } } - DEBUGP("Failed to fill %u numbers separated by %c\n", array_size, sep); - + pr_debug("Failed to fill %u numbers separated by %c\n", + array_size, sep); return 0; } @@ -178,13 +172,13 @@ static int get_port(const char *data, int start, size_t dlen, char delim, if (tmp_port == 0) break; *port = htons(tmp_port); - DEBUGP("get_port: return %d\n", tmp_port); + pr_debug("get_port: return %d\n", tmp_port); return i + 1; } else if (data[i] >= '0' && data[i] <= '9') tmp_port = tmp_port*10 + data[i] - '0'; else { /* Some other crap */ - DEBUGP("get_port: invalid char.\n"); + pr_debug("get_port: invalid char.\n"); break; } } @@ -201,22 +195,22 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd, /* First character is delimiter, then "1" for IPv4 or "2" for IPv6, then delimiter again. */ if (dlen <= 3) { - DEBUGP("EPRT: too short\n"); + pr_debug("EPRT: too short\n"); return 0; } delim = data[0]; if (isdigit(delim) || delim < 33 || delim > 126 || data[2] != delim) { - DEBUGP("try_eprt: invalid delimitter.\n"); + pr_debug("try_eprt: invalid delimitter.\n"); return 0; } if ((cmd->l3num == PF_INET && data[1] != '1') || (cmd->l3num == PF_INET6 && data[1] != '2')) { - DEBUGP("EPRT: invalid protocol number.\n"); + pr_debug("EPRT: invalid protocol number.\n"); return 0; } - DEBUGP("EPRT: Got %c%c%c\n", delim, data[1], delim); + pr_debug("EPRT: Got %c%c%c\n", delim, data[1], delim); if (data[1] == '1') { u_int32_t array[4]; @@ -234,7 +228,7 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd, if (length == 0) return 0; - DEBUGP("EPRT: Got IP address!\n"); + pr_debug("EPRT: Got IP address!\n"); /* Start offset includes initial "|1|", and trailing delimiter */ return get_port(data, 3 + length + 1, dlen, delim, &cmd->u.tcp.port); } @@ -267,7 +261,7 @@ static int find_pattern(const char *data, size_t dlen, { size_t i; - DEBUGP("find_pattern `%s': dlen = %u\n", pattern, dlen); + pr_debug("find_pattern `%s': dlen = %Zu\n", pattern, dlen); if (dlen == 0) return 0; @@ -282,17 +276,17 @@ static int find_pattern(const char *data, size_t dlen, #if 0 size_t i; - DEBUGP("ftp: string mismatch\n"); + pr_debug("ftp: string mismatch\n"); for (i = 0; i < plen; i++) { - DEBUGP("ftp:char %u `%c'(%u) vs `%c'(%u)\n", - i, data[i], data[i], - pattern[i], pattern[i]); + pr_debug("ftp:char %u `%c'(%u) vs `%c'(%u)\n", + i, data[i], data[i], + pattern[i], pattern[i]); } #endif return 0; } - DEBUGP("Pattern matches!\n"); + pr_debug("Pattern matches!\n"); /* Now we've found the constant string, try to skip to the 'skip' character */ for (i = plen; data[i] != skip; i++) @@ -301,14 +295,14 @@ static int find_pattern(const char *data, size_t dlen, /* Skip over the last character */ i++; - DEBUGP("Skipped up to `%c'!\n", skip); + pr_debug("Skipped up to `%c'!\n", skip); *numoff = i; *numlen = getnum(data + i, dlen - i, cmd, term); if (!*numlen) return -1; - DEBUGP("Match succeeded!\n"); + pr_debug("Match succeeded!\n"); return 1; } @@ -373,7 +367,7 @@ static int help(struct sk_buff **pskb, /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { - DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo); + pr_debug("ftp: Conntrackinfo = %u\n", ctinfo); return NF_ACCEPT; } @@ -384,8 +378,8 @@ static int help(struct sk_buff **pskb, dataoff = protoff + th->doff * 4; /* No data? */ if (dataoff >= (*pskb)->len) { - DEBUGP("ftp: dataoff(%u) >= skblen(%u)\n", dataoff, - (*pskb)->len); + pr_debug("ftp: dataoff(%u) >= skblen(%u)\n", dataoff, + (*pskb)->len); return NF_ACCEPT; } datalen = (*pskb)->len - dataoff; @@ -400,11 +394,11 @@ static int help(struct sk_buff **pskb, /* Look up to see if we're just after a \n. */ if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) { /* Now if this ends in \n, update ftp info. */ - DEBUGP("nf_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n", - ct_ftp_info->seq_aft_nl_num[dir] > 0 ? "" : "(UNSET)", - ct_ftp_info->seq_aft_nl[dir][0], - ct_ftp_info->seq_aft_nl_num[dir] > 1 ? "" : "(UNSET)", - ct_ftp_info->seq_aft_nl[dir][1]); + pr_debug("nf_conntrack_ftp: wrong seq pos %s(%u) or %s(%u)\n", + ct_ftp_info->seq_aft_nl_num[dir] > 0 ? "" : "(UNSET)", + ct_ftp_info->seq_aft_nl[dir][0], + ct_ftp_info->seq_aft_nl_num[dir] > 1 ? "" : "(UNSET)", + ct_ftp_info->seq_aft_nl[dir][1]); ret = NF_ACCEPT; goto out_update_nl; } @@ -442,9 +436,9 @@ static int help(struct sk_buff **pskb, goto out_update_nl; } - DEBUGP("conntrack_ftp: match `%.*s' (%u bytes at %u)\n", - (int)matchlen, fb_ptr + matchoff, - matchlen, ntohl(th->seq) + matchoff); + pr_debug("conntrack_ftp: match `%.*s' (%u bytes at %u)\n", + matchlen, fb_ptr + matchoff, + matchlen, ntohl(th->seq) + matchoff); exp = nf_ct_expect_alloc(ct); if (exp == NULL) { @@ -466,14 +460,16 @@ static int help(struct sk_buff **pskb, different IP address. Simply don't record it for NAT. */ if (cmd.l3num == PF_INET) { - DEBUGP("conntrack_ftp: NOT RECORDING: " NIPQUAD_FMT " != " NIPQUAD_FMT "\n", - NIPQUAD(cmd.u3.ip), - NIPQUAD(ct->tuplehash[dir].tuple.src.u3.ip)); + pr_debug("conntrack_ftp: NOT RECORDING: " NIPQUAD_FMT + " != " NIPQUAD_FMT "\n", + NIPQUAD(cmd.u3.ip), + NIPQUAD(ct->tuplehash[dir].tuple.src.u3.ip)); } else { - DEBUGP("conntrack_ftp: NOT RECORDING: " NIP6_FMT " != " NIP6_FMT "\n", - NIP6(*((struct in6_addr *)cmd.u3.ip6)), - NIP6(*((struct in6_addr *)ct->tuplehash[dir] - .tuple.src.u3.ip6))); + pr_debug("conntrack_ftp: NOT RECORDING: " NIP6_FMT + " != " NIP6_FMT "\n", + NIP6(*((struct in6_addr *)cmd.u3.ip6)), + NIP6(*((struct in6_addr *) + ct->tuplehash[dir].tuple.src.u3.ip6))); } /* Thanks to Cristiano Lincoln Mattos @@ -530,9 +526,9 @@ static void nf_conntrack_ftp_fini(void) if (ftp[i][j].me == NULL) continue; - DEBUGP("nf_ct_ftp: unregistering helper for pf: %d " - "port: %d\n", - ftp[i][j].tuple.src.l3num, ports[i]); + pr_debug("nf_ct_ftp: unregistering helper for pf: %d " + "port: %d\n", + ftp[i][j].tuple.src.l3num, ports[i]); nf_conntrack_helper_unregister(&ftp[i][j]); } } @@ -571,9 +567,9 @@ static int __init nf_conntrack_ftp_init(void) sprintf(tmpname, "ftp-%d", ports[i]); ftp[i][j].name = tmpname; - DEBUGP("nf_ct_ftp: registering helper for pf: %d " - "port: %d\n", - ftp[i][j].tuple.src.l3num, ports[i]); + pr_debug("nf_ct_ftp: registering helper for pf: %d " + "port: %d\n", + ftp[i][j].tuple.src.l3num, ports[i]); ret = nf_conntrack_helper_register(&ftp[i][j]); if (ret) { printk("nf_ct_ftp: failed to register helper " diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index aa5ba99b7a08..a8a9dfbe7a67 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -31,12 +31,6 @@ #include #include -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* Parameters */ static unsigned int default_rrq_ttl __read_mostly = 300; module_param(default_rrq_ttl, uint, 0600); @@ -150,9 +144,9 @@ static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff, if (tcpdatalen < 4 || tpkt[0] != 0x03 || tpkt[1] != 0) { /* Netmeeting sends TPKT header and data separately */ if (info->tpkt_len[dir] > 0) { - DEBUGP("nf_ct_h323: previous packet " - "indicated separate TPKT data of %hu " - "bytes\n", info->tpkt_len[dir]); + pr_debug("nf_ct_h323: previous packet " + "indicated separate TPKT data of %hu " + "bytes\n", info->tpkt_len[dir]); if (info->tpkt_len[dir] <= tcpdatalen) { /* Yes, there was a TPKT header * received */ @@ -163,7 +157,7 @@ static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff, } /* Fragmented TPKT */ - DEBUGP("nf_ct_h323: fragmented TPKT\n"); + pr_debug("nf_ct_h323: fragmented TPKT\n"); goto clear_out; } @@ -190,9 +184,9 @@ static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff, if (tpktlen > tcpdatalen) { if (tcpdatalen == 4) { /* Separate TPKT header */ /* Netmeeting sends TPKT header and data separately */ - DEBUGP("nf_ct_h323: separate TPKT header indicates " - "there will be TPKT data of %hu bytes\n", - tpktlen - 4); + pr_debug("nf_ct_h323: separate TPKT header indicates " + "there will be TPKT data of %hu bytes\n", + tpktlen - 4); info->tpkt_len[dir] = tpktlen - 4; return 0; } @@ -308,9 +302,9 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, } else { /* Conntrack only */ if (nf_ct_expect_related(rtp_exp) == 0) { if (nf_ct_expect_related(rtcp_exp) == 0) { - DEBUGP("nf_ct_h323: expect RTP "); + pr_debug("nf_ct_h323: expect RTP "); NF_CT_DUMP_TUPLE(&rtp_exp->tuple); - DEBUGP("nf_ct_h323: expect RTCP "); + pr_debug("nf_ct_h323: expect RTCP "); NF_CT_DUMP_TUPLE(&rtcp_exp->tuple); } else { nf_ct_unexpect_related(rtp_exp); @@ -365,7 +359,7 @@ static int expect_t120(struct sk_buff **pskb, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { - DEBUGP("nf_ct_h323: expect T.120 "); + pr_debug("nf_ct_h323: expect T.120 "); NF_CT_DUMP_TUPLE(&exp->tuple); } else ret = -1; @@ -413,7 +407,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct, { int ret; - DEBUGP("nf_ct_h323: OpenLogicalChannel\n"); + pr_debug("nf_ct_h323: OpenLogicalChannel\n"); if (olc->forwardLogicalChannelParameters.multiplexParameters.choice == eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters) @@ -473,7 +467,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, H2250LogicalChannelAckParameters *ack; int ret; - DEBUGP("nf_ct_h323: OpenLogicalChannelAck\n"); + pr_debug("nf_ct_h323: OpenLogicalChannelAck\n"); if ((olca->options & eOpenLogicalChannelAck_reverseLogicalChannelParameters) && @@ -544,8 +538,8 @@ static int process_h245(struct sk_buff **pskb, struct nf_conn *ct, return process_olc(pskb, ct, ctinfo, data, dataoff, &mscm->request.openLogicalChannel); } - DEBUGP("nf_ct_h323: H.245 Request %d\n", - mscm->request.choice); + pr_debug("nf_ct_h323: H.245 Request %d\n", + mscm->request.choice); break; case eMultimediaSystemControlMessage_response: if (mscm->response.choice == @@ -554,11 +548,11 @@ static int process_h245(struct sk_buff **pskb, struct nf_conn *ct, &mscm->response. openLogicalChannelAck); } - DEBUGP("nf_ct_h323: H.245 Response %d\n", - mscm->response.choice); + pr_debug("nf_ct_h323: H.245 Response %d\n", + mscm->response.choice); break; default: - DEBUGP("nf_ct_h323: H.245 signal %d\n", mscm->choice); + pr_debug("nf_ct_h323: H.245 signal %d\n", mscm->choice); break; } @@ -580,23 +574,23 @@ static int h245_help(struct sk_buff **pskb, unsigned int protoff, ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { return NF_ACCEPT; } - DEBUGP("nf_ct_h245: skblen = %u\n", (*pskb)->len); + pr_debug("nf_ct_h245: skblen = %u\n", (*pskb)->len); spin_lock_bh(&nf_h323_lock); /* Process each TPKT */ while (get_tpkt_data(pskb, protoff, ct, ctinfo, &data, &datalen, &dataoff)) { - DEBUGP("nf_ct_h245: TPKT len=%d ", datalen); + pr_debug("nf_ct_h245: TPKT len=%d ", datalen); NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple); /* Decode H.245 signal */ ret = DecodeMultimediaSystemControlMessage(data, datalen, &mscm); if (ret < 0) { - DEBUGP("nf_ct_h245: decoding error: %s\n", - ret == H323_ERROR_BOUND ? - "out of bound" : "out of range"); + pr_debug("nf_ct_h245: decoding error: %s\n", + ret == H323_ERROR_BOUND ? + "out of bound" : "out of range"); /* We don't drop when decoding error */ break; } @@ -697,7 +691,7 @@ static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { - DEBUGP("nf_ct_q931: expect H.245 "); + pr_debug("nf_ct_q931: expect H.245 "); NF_CT_DUMP_TUPLE(&exp->tuple); } else ret = -1; @@ -786,7 +780,7 @@ static int expect_callforwarding(struct sk_buff **pskb, if (callforward_filter && callforward_do_filter(&addr, &ct->tuplehash[!dir].tuple.src.u3, ct->tuplehash[!dir].tuple.src.l3num)) { - DEBUGP("nf_ct_q931: Call Forwarding not tracked\n"); + pr_debug("nf_ct_q931: Call Forwarding not tracked\n"); return 0; } @@ -808,7 +802,7 @@ static int expect_callforwarding(struct sk_buff **pskb, taddr, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { - DEBUGP("nf_ct_q931: expect Call Forwarding "); + pr_debug("nf_ct_q931: expect Call Forwarding "); NF_CT_DUMP_TUPLE(&exp->tuple); } else ret = -1; @@ -832,7 +826,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, union nf_conntrack_address addr; typeof(set_h225_addr_hook) set_h225_addr; - DEBUGP("nf_ct_q931: Setup\n"); + pr_debug("nf_ct_q931: Setup\n"); if (setup->options & eSetup_UUIE_h245Address) { ret = expect_h245(pskb, ct, ctinfo, data, dataoff, @@ -847,11 +841,11 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, get_h225_addr(ct, *data, &setup->destCallSignalAddress, &addr, &port) && memcmp(&addr, &ct->tuplehash[!dir].tuple.src.u3, sizeof(addr))) { - DEBUGP("nf_ct_q931: set destCallSignalAddress " - NIP6_FMT ":%hu->" NIP6_FMT ":%hu\n", - NIP6(*(struct in6_addr *)&addr), ntohs(port), - NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.src.u3), - ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port)); + pr_debug("nf_ct_q931: set destCallSignalAddress " + NIP6_FMT ":%hu->" NIP6_FMT ":%hu\n", + NIP6(*(struct in6_addr *)&addr), ntohs(port), + NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.src.u3), + ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port)); ret = set_h225_addr(pskb, data, dataoff, &setup->destCallSignalAddress, &ct->tuplehash[!dir].tuple.src.u3, @@ -865,11 +859,11 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, get_h225_addr(ct, *data, &setup->sourceCallSignalAddress, &addr, &port) && memcmp(&addr, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(addr))) { - DEBUGP("nf_ct_q931: set sourceCallSignalAddress " - NIP6_FMT ":%hu->" NIP6_FMT ":%hu\n", - NIP6(*(struct in6_addr *)&addr), ntohs(port), - NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.dst.u3), - ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port)); + pr_debug("nf_ct_q931: set sourceCallSignalAddress " + NIP6_FMT ":%hu->" NIP6_FMT ":%hu\n", + NIP6(*(struct in6_addr *)&addr), ntohs(port), + NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.dst.u3), + ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port)); ret = set_h225_addr(pskb, data, dataoff, &setup->sourceCallSignalAddress, &ct->tuplehash[!dir].tuple.dst.u3, @@ -900,7 +894,7 @@ static int process_callproceeding(struct sk_buff **pskb, int ret; int i; - DEBUGP("nf_ct_q931: CallProceeding\n"); + pr_debug("nf_ct_q931: CallProceeding\n"); if (callproc->options & eCallProceeding_UUIE_h245Address) { ret = expect_h245(pskb, ct, ctinfo, data, dataoff, @@ -930,7 +924,7 @@ static int process_connect(struct sk_buff **pskb, struct nf_conn *ct, int ret; int i; - DEBUGP("nf_ct_q931: Connect\n"); + pr_debug("nf_ct_q931: Connect\n"); if (connect->options & eConnect_UUIE_h245Address) { ret = expect_h245(pskb, ct, ctinfo, data, dataoff, @@ -960,7 +954,7 @@ static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct, int ret; int i; - DEBUGP("nf_ct_q931: Alerting\n"); + pr_debug("nf_ct_q931: Alerting\n"); if (alert->options & eAlerting_UUIE_h245Address) { ret = expect_h245(pskb, ct, ctinfo, data, dataoff, @@ -990,7 +984,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct, int ret; int i; - DEBUGP("nf_ct_q931: Facility\n"); + pr_debug("nf_ct_q931: Facility\n"); if (facility->reason.choice == eFacilityReason_callForwarded) { if (facility->options & eFacility_UUIE_alternativeAddress) @@ -1029,7 +1023,7 @@ static int process_progress(struct sk_buff **pskb, struct nf_conn *ct, int ret; int i; - DEBUGP("nf_ct_q931: Progress\n"); + pr_debug("nf_ct_q931: Progress\n"); if (progress->options & eProgress_UUIE_h245Address) { ret = expect_h245(pskb, ct, ctinfo, data, dataoff, @@ -1086,8 +1080,8 @@ static int process_q931(struct sk_buff **pskb, struct nf_conn *ct, &pdu->h323_message_body.progress); break; default: - DEBUGP("nf_ct_q931: Q.931 signal %d\n", - pdu->h323_message_body.choice); + pr_debug("nf_ct_q931: Q.931 signal %d\n", + pdu->h323_message_body.choice); break; } @@ -1121,22 +1115,22 @@ static int q931_help(struct sk_buff **pskb, unsigned int protoff, ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { return NF_ACCEPT; } - DEBUGP("nf_ct_q931: skblen = %u\n", (*pskb)->len); + pr_debug("nf_ct_q931: skblen = %u\n", (*pskb)->len); spin_lock_bh(&nf_h323_lock); /* Process each TPKT */ while (get_tpkt_data(pskb, protoff, ct, ctinfo, &data, &datalen, &dataoff)) { - DEBUGP("nf_ct_q931: TPKT len=%d ", datalen); + pr_debug("nf_ct_q931: TPKT len=%d ", datalen); NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple); /* Decode Q.931 signal */ ret = DecodeQ931(data, datalen, &q931); if (ret < 0) { - DEBUGP("nf_ct_q931: decoding error: %s\n", - ret == H323_ERROR_BOUND ? - "out of bound" : "out of range"); + pr_debug("nf_ct_q931: decoding error: %s\n", + ret == H323_ERROR_BOUND ? + "out of bound" : "out of range"); /* We don't drop when decoding error */ break; } @@ -1274,7 +1268,7 @@ static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct, ret = nat_q931(pskb, ct, ctinfo, data, taddr, i, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { - DEBUGP("nf_ct_ras: expect Q.931 "); + pr_debug("nf_ct_ras: expect Q.931 "); NF_CT_DUMP_TUPLE(&exp->tuple); /* Save port for looking up expect in processing RCF */ @@ -1295,7 +1289,7 @@ static int process_grq(struct sk_buff **pskb, struct nf_conn *ct, { typeof(set_ras_addr_hook) set_ras_addr; - DEBUGP("nf_ct_ras: GRQ\n"); + pr_debug("nf_ct_ras: GRQ\n"); set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && ct->status & IPS_NAT_MASK) /* NATed */ @@ -1315,7 +1309,7 @@ static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct, union nf_conntrack_address addr; struct nf_conntrack_expect *exp; - DEBUGP("nf_ct_ras: GCF\n"); + pr_debug("nf_ct_ras: GCF\n"); if (!get_h225_addr(ct, *data, &gcf->rasAddress, &addr, &port)) return 0; @@ -1338,7 +1332,7 @@ static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct, exp->helper = nf_conntrack_helper_ras; if (nf_ct_expect_related(exp) == 0) { - DEBUGP("nf_ct_ras: expect RAS "); + pr_debug("nf_ct_ras: expect RAS "); NF_CT_DUMP_TUPLE(&exp->tuple); } else ret = -1; @@ -1357,7 +1351,7 @@ static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct, int ret; typeof(set_ras_addr_hook) set_ras_addr; - DEBUGP("nf_ct_ras: RRQ\n"); + pr_debug("nf_ct_ras: RRQ\n"); ret = expect_q931(pskb, ct, ctinfo, data, rrq->callSignalAddress.item, @@ -1375,7 +1369,7 @@ static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct, } if (rrq->options & eRegistrationRequest_timeToLive) { - DEBUGP("nf_ct_ras: RRQ TTL = %u seconds\n", rrq->timeToLive); + pr_debug("nf_ct_ras: RRQ TTL = %u seconds\n", rrq->timeToLive); info->timeout = rrq->timeToLive; } else info->timeout = default_rrq_ttl; @@ -1394,7 +1388,7 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct, struct nf_conntrack_expect *exp; typeof(set_sig_addr_hook) set_sig_addr; - DEBUGP("nf_ct_ras: RCF\n"); + pr_debug("nf_ct_ras: RCF\n"); set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && ct->status & IPS_NAT_MASK) { @@ -1406,14 +1400,13 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct, } if (rcf->options & eRegistrationConfirm_timeToLive) { - DEBUGP("nf_ct_ras: RCF TTL = %u seconds\n", rcf->timeToLive); + pr_debug("nf_ct_ras: RCF TTL = %u seconds\n", rcf->timeToLive); info->timeout = rcf->timeToLive; } if (info->timeout > 0) { - DEBUGP - ("nf_ct_ras: set RAS connection timeout to %u seconds\n", - info->timeout); + pr_debug("nf_ct_ras: set RAS connection timeout to " + "%u seconds\n", info->timeout); nf_ct_refresh(ct, *pskb, info->timeout * HZ); /* Set expect timeout */ @@ -1421,9 +1414,9 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct, exp = find_expect(ct, &ct->tuplehash[dir].tuple.dst.u3, info->sig_port[!dir]); if (exp) { - DEBUGP("nf_ct_ras: set Q.931 expect " - "timeout to %u seconds for", - info->timeout); + pr_debug("nf_ct_ras: set Q.931 expect " + "timeout to %u seconds for", + info->timeout); NF_CT_DUMP_TUPLE(&exp->tuple); set_expect_timeout(exp, info->timeout); } @@ -1443,7 +1436,7 @@ static int process_urq(struct sk_buff **pskb, struct nf_conn *ct, int ret; typeof(set_sig_addr_hook) set_sig_addr; - DEBUGP("nf_ct_ras: URQ\n"); + pr_debug("nf_ct_ras: URQ\n"); set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && ct->status & IPS_NAT_MASK) { @@ -1476,7 +1469,7 @@ static int process_arq(struct sk_buff **pskb, struct nf_conn *ct, union nf_conntrack_address addr; typeof(set_h225_addr_hook) set_h225_addr; - DEBUGP("nf_ct_ras: ARQ\n"); + pr_debug("nf_ct_ras: ARQ\n"); set_h225_addr = rcu_dereference(set_h225_addr_hook); if ((arq->options & eAdmissionRequest_destCallSignalAddress) && @@ -1519,7 +1512,7 @@ static int process_acf(struct sk_buff **pskb, struct nf_conn *ct, struct nf_conntrack_expect *exp; typeof(set_sig_addr_hook) set_sig_addr; - DEBUGP("nf_ct_ras: ACF\n"); + pr_debug("nf_ct_ras: ACF\n"); if (!get_h225_addr(ct, *data, &acf->destCallSignalAddress, &addr, &port)) @@ -1544,7 +1537,7 @@ static int process_acf(struct sk_buff **pskb, struct nf_conn *ct, exp->helper = nf_conntrack_helper_q931; if (nf_ct_expect_related(exp) == 0) { - DEBUGP("nf_ct_ras: expect Q.931 "); + pr_debug("nf_ct_ras: expect Q.931 "); NF_CT_DUMP_TUPLE(&exp->tuple); } else ret = -1; @@ -1561,7 +1554,7 @@ static int process_lrq(struct sk_buff **pskb, struct nf_conn *ct, { typeof(set_ras_addr_hook) set_ras_addr; - DEBUGP("nf_ct_ras: LRQ\n"); + pr_debug("nf_ct_ras: LRQ\n"); set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && ct->status & IPS_NAT_MASK) @@ -1581,7 +1574,7 @@ static int process_lcf(struct sk_buff **pskb, struct nf_conn *ct, union nf_conntrack_address addr; struct nf_conntrack_expect *exp; - DEBUGP("nf_ct_ras: LCF\n"); + pr_debug("nf_ct_ras: LCF\n"); if (!get_h225_addr(ct, *data, &lcf->callSignalAddress, &addr, &port)) @@ -1597,7 +1590,7 @@ static int process_lcf(struct sk_buff **pskb, struct nf_conn *ct, exp->helper = nf_conntrack_helper_q931; if (nf_ct_expect_related(exp) == 0) { - DEBUGP("nf_ct_ras: expect Q.931 "); + pr_debug("nf_ct_ras: expect Q.931 "); NF_CT_DUMP_TUPLE(&exp->tuple); } else ret = -1; @@ -1618,7 +1611,7 @@ static int process_irr(struct sk_buff **pskb, struct nf_conn *ct, typeof(set_ras_addr_hook) set_ras_addr; typeof(set_sig_addr_hook) set_sig_addr; - DEBUGP("nf_ct_ras: IRR\n"); + pr_debug("nf_ct_ras: IRR\n"); set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && ct->status & IPS_NAT_MASK) { @@ -1677,7 +1670,7 @@ static int process_ras(struct sk_buff **pskb, struct nf_conn *ct, return process_irr(pskb, ct, ctinfo, data, &ras->infoRequestResponse); default: - DEBUGP("nf_ct_ras: RAS message %d\n", ras->choice); + pr_debug("nf_ct_ras: RAS message %d\n", ras->choice); break; } @@ -1693,7 +1686,7 @@ static int ras_help(struct sk_buff **pskb, unsigned int protoff, int datalen = 0; int ret; - DEBUGP("nf_ct_ras: skblen = %u\n", (*pskb)->len); + pr_debug("nf_ct_ras: skblen = %u\n", (*pskb)->len); spin_lock_bh(&nf_h323_lock); @@ -1701,15 +1694,15 @@ static int ras_help(struct sk_buff **pskb, unsigned int protoff, data = get_udp_data(pskb, protoff, &datalen); if (data == NULL) goto accept; - DEBUGP("nf_ct_ras: RAS message len=%d ", datalen); + pr_debug("nf_ct_ras: RAS message len=%d ", datalen); NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple); /* Decode RAS message */ ret = DecodeRasMessage(data, datalen, &ras); if (ret < 0) { - DEBUGP("nf_ct_ras: decoding error: %s\n", - ret == H323_ERROR_BOUND ? - "out of bound" : "out of range"); + pr_debug("nf_ct_ras: decoding error: %s\n", + ret == H323_ERROR_BOUND ? + "out of bound" : "out of range"); goto accept; } @@ -1760,7 +1753,7 @@ static void __exit nf_conntrack_h323_fini(void) nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]); nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]); kfree(h323_buffer); - DEBUGP("nf_ct_h323: fini\n"); + pr_debug("nf_ct_h323: fini\n"); } /****************************************************************************/ @@ -1783,7 +1776,7 @@ static int __init nf_conntrack_h323_init(void) ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[1]); if (ret < 0) goto err4; - DEBUGP("nf_ct_h323: init success\n"); + pr_debug("nf_ct_h323: init success\n"); return 0; err4: diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 8c7340794bf6..1562ca97a349 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -55,13 +56,6 @@ static const char *dccprotos[] = { #define MINMATCHLEN 5 -#if 0 -#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s:" format, \ - __FILE__, __FUNCTION__ , ## args) -#else -#define DEBUGP(format, args...) -#endif - /* tries to get the ip_addr and port out of a dcc command * return value: -1 on failure, 0 on success * data pointer to first byte of DCC command data @@ -99,6 +93,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { unsigned int dataoff; + struct iphdr *iph; struct tcphdr _tcph, *th; char *data, *data_limit, *ib_ptr; int dir = CTINFO2DIR(ctinfo); @@ -148,9 +143,10 @@ static int help(struct sk_buff **pskb, unsigned int protoff, data += 5; /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */ - DEBUGP("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u...\n", - NIPQUAD(iph->saddr), ntohs(th->source), - NIPQUAD(iph->daddr), ntohs(th->dest)); + iph = ip_hdr(*pskb); + pr_debug("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u\n", + NIPQUAD(iph->saddr), ntohs(th->source), + NIPQUAD(iph->daddr), ntohs(th->dest)); for (i = 0; i < ARRAY_SIZE(dccprotos); i++) { if (memcmp(data, dccprotos[i], strlen(dccprotos[i]))) { @@ -158,18 +154,18 @@ static int help(struct sk_buff **pskb, unsigned int protoff, continue; } data += strlen(dccprotos[i]); - DEBUGP("DCC %s detected\n", dccprotos[i]); + pr_debug("DCC %s detected\n", dccprotos[i]); /* we have at least * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid * data left (== 14/13 bytes) */ if (parse_dcc((char *)data, data_limit, &dcc_ip, &dcc_port, &addr_beg_p, &addr_end_p)) { - DEBUGP("unable to parse dcc command\n"); + pr_debug("unable to parse dcc command\n"); continue; } - DEBUGP("DCC bound ip/port: %u.%u.%u.%u:%u\n", - HIPQUAD(dcc_ip), dcc_port); + pr_debug("DCC bound ip/port: %u.%u.%u.%u:%u\n", + HIPQUAD(dcc_ip), dcc_port); /* dcc_ip can be the internal OR external (NAT'ed) IP */ tuple = &ct->tuplehash[dir].tuple; diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c index 2fd0f11b8fb2..b1bfa207a850 100644 --- a/net/netfilter/nf_conntrack_l3proto_generic.c +++ b/net/netfilter/nf_conntrack_l3proto_generic.c @@ -31,12 +31,6 @@ #include #include -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - static int generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) { diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 63dac5eb959f..b0804199ab59 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -65,7 +65,7 @@ void struct nf_conntrack_expect *exp) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn); -#if 0 +#ifdef DEBUG /* PptpControlMessageType names */ const char *pptp_msg_name[] = { "UNKNOWN_MESSAGE", @@ -86,9 +86,6 @@ const char *pptp_msg_name[] = { "SET_LINK_INFO" }; EXPORT_SYMBOL(pptp_msg_name); -#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args) -#else -#define DEBUGP(format, args...) #endif #define SECS *HZ @@ -102,7 +99,7 @@ static void pptp_expectfn(struct nf_conn *ct, struct nf_conntrack_expect *exp) { typeof(nf_nat_pptp_hook_expectfn) nf_nat_pptp_expectfn; - DEBUGP("increasing timeouts\n"); + pr_debug("increasing timeouts\n"); /* increase timeout of GRE data channel conntrack entry */ ct->proto.gre.timeout = PPTP_GRE_TIMEOUT; @@ -121,17 +118,17 @@ static void pptp_expectfn(struct nf_conn *ct, /* obviously this tuple inversion only works until you do NAT */ nf_ct_invert_tuplepr(&inv_t, &exp->tuple); - DEBUGP("trying to unexpect other dir: "); + pr_debug("trying to unexpect other dir: "); NF_CT_DUMP_TUPLE(&inv_t); exp_other = nf_ct_expect_find_get(&inv_t); if (exp_other) { /* delete other expectation. */ - DEBUGP("found\n"); + pr_debug("found\n"); nf_ct_unexpect_related(exp_other); nf_ct_expect_put(exp_other); } else { - DEBUGP("not found\n"); + pr_debug("not found\n"); } } rcu_read_unlock(); @@ -143,13 +140,13 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t) struct nf_conntrack_expect *exp; struct nf_conn *sibling; - DEBUGP("trying to timeout ct or exp for tuple "); + pr_debug("trying to timeout ct or exp for tuple "); NF_CT_DUMP_TUPLE(t); h = nf_conntrack_find_get(t); if (h) { sibling = nf_ct_tuplehash_to_ctrack(h); - DEBUGP("setting timeout of conntrack %p to 0\n", sibling); + pr_debug("setting timeout of conntrack %p to 0\n", sibling); sibling->proto.gre.timeout = 0; sibling->proto.gre.stream_timeout = 0; if (del_timer(&sibling->timeout)) @@ -159,7 +156,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t) } else { exp = nf_ct_expect_find_get(t); if (exp) { - DEBUGP("unexpect_related of expect %p\n", exp); + pr_debug("unexpect_related of expect %p\n", exp); nf_ct_unexpect_related(exp); nf_ct_expect_put(exp); return 1; @@ -182,7 +179,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct) t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id; t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id; if (!destroy_sibling_or_exp(&t)) - DEBUGP("failed to timeout original pns->pac ct/exp\n"); + pr_debug("failed to timeout original pns->pac ct/exp\n"); /* try reply (pac->pns) tuple */ memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); @@ -190,7 +187,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct) t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id; t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id; if (!destroy_sibling_or_exp(&t)) - DEBUGP("failed to timeout reply pac->pns ct/exp\n"); + pr_debug("failed to timeout reply pac->pns ct/exp\n"); } /* expect GRE connections (PNS->PAC and PAC->PNS direction) */ @@ -270,7 +267,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, typeof(nf_nat_pptp_hook_inbound) nf_nat_pptp_inbound; msg = ntohs(ctlh->messageType); - DEBUGP("inbound control message %s\n", pptp_msg_name[msg]); + pr_debug("inbound control message %s\n", pptp_msg_name[msg]); switch (msg) { case PPTP_START_SESSION_REPLY: @@ -305,8 +302,8 @@ pptp_inbound_pkt(struct sk_buff **pskb, pcid = pptpReq->ocack.peersCallID; if (info->pns_call_id != pcid) goto invalid; - DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], - ntohs(cid), ntohs(pcid)); + pr_debug("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], + ntohs(cid), ntohs(pcid)); if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) { info->cstate = PPTP_CALL_OUT_CONF; @@ -322,7 +319,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, goto invalid; cid = pptpReq->icreq.callID; - DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); + pr_debug("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); info->cstate = PPTP_CALL_IN_REQ; info->pac_call_id = cid; break; @@ -341,7 +338,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, if (info->pns_call_id != pcid) goto invalid; - DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid)); + pr_debug("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid)); info->cstate = PPTP_CALL_IN_CONF; /* we expect a GRE connection from PAC to PNS */ @@ -351,7 +348,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, case PPTP_CALL_DISCONNECT_NOTIFY: /* server confirms disconnect */ cid = pptpReq->disc.callID; - DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); + pr_debug("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); info->cstate = PPTP_CALL_NONE; /* untrack this call id, unexpect GRE packets */ @@ -374,11 +371,11 @@ pptp_inbound_pkt(struct sk_buff **pskb, return NF_ACCEPT; invalid: - DEBUGP("invalid %s: type=%d cid=%u pcid=%u " - "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", - msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], - msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, - ntohs(info->pns_call_id), ntohs(info->pac_call_id)); + pr_debug("invalid %s: type=%d cid=%u pcid=%u " + "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", + msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], + msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, + ntohs(info->pns_call_id), ntohs(info->pac_call_id)); return NF_ACCEPT; } @@ -396,7 +393,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, typeof(nf_nat_pptp_hook_outbound) nf_nat_pptp_outbound; msg = ntohs(ctlh->messageType); - DEBUGP("outbound control message %s\n", pptp_msg_name[msg]); + pr_debug("outbound control message %s\n", pptp_msg_name[msg]); switch (msg) { case PPTP_START_SESSION_REQUEST: @@ -418,7 +415,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, info->cstate = PPTP_CALL_OUT_REQ; /* track PNS call id */ cid = pptpReq->ocreq.callID; - DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); + pr_debug("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); info->pns_call_id = cid; break; @@ -432,8 +429,8 @@ pptp_outbound_pkt(struct sk_buff **pskb, pcid = pptpReq->icack.peersCallID; if (info->pac_call_id != pcid) goto invalid; - DEBUGP("%s, CID=%X PCID=%X\n", pptp_msg_name[msg], - ntohs(cid), ntohs(pcid)); + pr_debug("%s, CID=%X PCID=%X\n", pptp_msg_name[msg], + ntohs(cid), ntohs(pcid)); if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) { /* part two of the three-way handshake */ @@ -469,11 +466,11 @@ pptp_outbound_pkt(struct sk_buff **pskb, return NF_ACCEPT; invalid: - DEBUGP("invalid %s: type=%d cid=%u pcid=%u " - "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", - msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], - msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, - ntohs(info->pns_call_id), ntohs(info->pac_call_id)); + pr_debug("invalid %s: type=%d cid=%u pcid=%u " + "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", + msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], + msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, + ntohs(info->pns_call_id), ntohs(info->pac_call_id)); return NF_ACCEPT; } @@ -524,7 +521,7 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); if (!pptph) { - DEBUGP("no full PPTP header, can't track\n"); + pr_debug("no full PPTP header, can't track\n"); return NF_ACCEPT; } nexthdr_off += sizeof(_pptph); @@ -533,7 +530,7 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, /* if it's not a control message we can't do anything with it */ if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL || ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) { - DEBUGP("not a control packet\n"); + pr_debug("not a control packet\n"); return NF_ACCEPT; } @@ -569,8 +566,8 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, /* server -> client (PAC -> PNS) */ ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, ctinfo); - DEBUGP("sstate: %d->%d, cstate: %d->%d\n", - oldsstate, info->sstate, oldcstate, info->cstate); + pr_debug("sstate: %d->%d, cstate: %d->%d\n", + oldsstate, info->sstate, oldcstate, info->cstate); spin_unlock_bh(&nf_pptp_lock); return ret; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 339c397d1b5f..771c4c29936e 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -40,12 +40,6 @@ #define GRE_TIMEOUT (30 * HZ) #define GRE_STREAM_TIMEOUT (180 * HZ) -#if 0 -#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args) -#else -#define DEBUGP(x, args...) -#endif - static DEFINE_RWLOCK(nf_ct_gre_lock); static LIST_HEAD(gre_keymap_list); @@ -87,7 +81,7 @@ static __be16 gre_keymap_lookup(struct nf_conntrack_tuple *t) } read_unlock_bh(&nf_ct_gre_lock); - DEBUGP("lookup src key 0x%x for ", key); + pr_debug("lookup src key 0x%x for ", key); NF_CT_DUMP_TUPLE(t); return key; @@ -107,8 +101,8 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, if (gre_key_cmpfn(km, t) && km == *kmp) return 0; } - DEBUGP("trying to override keymap_%s for ct %p\n", - dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct); + pr_debug("trying to override keymap_%s for ct %p\n", + dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct); return -EEXIST; } @@ -118,7 +112,7 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, memcpy(&km->tuple, t, sizeof(*t)); *kmp = km; - DEBUGP("adding new entry %p: ", km); + pr_debug("adding new entry %p: ", km); NF_CT_DUMP_TUPLE(&km->tuple); write_lock_bh(&nf_ct_gre_lock); @@ -135,13 +129,13 @@ void nf_ct_gre_keymap_destroy(struct nf_conn *ct) struct nf_conn_help *help = nfct_help(ct); enum ip_conntrack_dir dir; - DEBUGP("entering for ct %p\n", ct); + pr_debug("entering for ct %p\n", ct); write_lock_bh(&nf_ct_gre_lock); for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) { if (help->help.ct_pptp_info.keymap[dir]) { - DEBUGP("removing %p from list\n", - help->help.ct_pptp_info.keymap[dir]); + pr_debug("removing %p from list\n", + help->help.ct_pptp_info.keymap[dir]); list_del(&help->help.ct_pptp_info.keymap[dir]->list); kfree(help->help.ct_pptp_info.keymap[dir]); help->help.ct_pptp_info.keymap[dir] = NULL; @@ -186,7 +180,7 @@ static int gre_pkt_to_tuple(const struct sk_buff *skb, return 1; if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) { - DEBUGP("GRE_VERSION_PPTP but unknown proto\n"); + pr_debug("GRE_VERSION_PPTP but unknown proto\n"); return 0; } @@ -242,7 +236,7 @@ static int gre_packet(struct nf_conn *ct, static int gre_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff) { - DEBUGP(": "); + pr_debug(": "); NF_CT_DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); /* initialize to sane value. Ideally a conntrack helper @@ -258,10 +252,10 @@ static int gre_new(struct nf_conn *ct, const struct sk_buff *skb, static void gre_destroy(struct nf_conn *ct) { struct nf_conn *master = ct->master; - DEBUGP(" entering\n"); + pr_debug(" entering\n"); if (!master) - DEBUGP("no master !?!\n"); + pr_debug("no master !?!\n"); else nf_ct_gre_keymap_destroy(master); } diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 0d3254b974c5..265769e5002b 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -25,12 +25,6 @@ #include #include -#if 0 -#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__) -#else -#define DEBUGP(format, args...) -#endif - /* Protects conntrack->proto.sctp */ static DEFINE_RWLOCK(sctp_lock); @@ -151,9 +145,6 @@ static int sctp_pkt_to_tuple(const struct sk_buff *skb, { sctp_sctphdr_t _hdr, *hp; - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - /* Actually only need first 8 bytes. */ hp = skb_header_pointer(skb, dataoff, 8, &_hdr); if (hp == NULL) @@ -167,9 +158,6 @@ static int sctp_pkt_to_tuple(const struct sk_buff *skb, static int sctp_invert_tuple(struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *orig) { - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - tuple->src.u.sctp.port = orig->dst.u.sctp.port; tuple->dst.u.sctp.port = orig->src.u.sctp.port; return 1; @@ -179,9 +167,6 @@ static int sctp_invert_tuple(struct nf_conntrack_tuple *tuple, static int sctp_print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple) { - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - return seq_printf(s, "sport=%hu dport=%hu ", ntohs(tuple->src.u.sctp.port), ntohs(tuple->dst.u.sctp.port)); @@ -193,9 +178,6 @@ static int sctp_print_conntrack(struct seq_file *s, { enum sctp_conntrack state; - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - read_lock_bh(&sctp_lock); state = conntrack->proto.sctp.state; read_unlock_bh(&sctp_lock); @@ -219,13 +201,10 @@ static int do_basic_checks(struct nf_conn *conntrack, sctp_chunkhdr_t _sch, *sch; int flag; - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - flag = 0; for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { - DEBUGP("Chunk Num: %d Type: %d\n", count, sch->type); + pr_debug("Chunk Num: %d Type: %d\n", count, sch->type); if (sch->type == SCTP_CID_INIT || sch->type == SCTP_CID_INIT_ACK @@ -242,7 +221,7 @@ static int do_basic_checks(struct nf_conn *conntrack, || sch->type == SCTP_CID_COOKIE_ECHO || flag) && count !=0) || !sch->length) { - DEBUGP("Basic checks failed\n"); + pr_debug("Basic checks failed\n"); return 1; } @@ -251,7 +230,7 @@ static int do_basic_checks(struct nf_conn *conntrack, } } - DEBUGP("Basic checks passed\n"); + pr_debug("Basic checks passed\n"); return count == 0; } @@ -261,50 +240,47 @@ static int new_state(enum ip_conntrack_dir dir, { int i; - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - - DEBUGP("Chunk type: %d\n", chunk_type); + pr_debug("Chunk type: %d\n", chunk_type); switch (chunk_type) { case SCTP_CID_INIT: - DEBUGP("SCTP_CID_INIT\n"); + pr_debug("SCTP_CID_INIT\n"); i = 0; break; case SCTP_CID_INIT_ACK: - DEBUGP("SCTP_CID_INIT_ACK\n"); + pr_debug("SCTP_CID_INIT_ACK\n"); i = 1; break; case SCTP_CID_ABORT: - DEBUGP("SCTP_CID_ABORT\n"); + pr_debug("SCTP_CID_ABORT\n"); i = 2; break; case SCTP_CID_SHUTDOWN: - DEBUGP("SCTP_CID_SHUTDOWN\n"); + pr_debug("SCTP_CID_SHUTDOWN\n"); i = 3; break; case SCTP_CID_SHUTDOWN_ACK: - DEBUGP("SCTP_CID_SHUTDOWN_ACK\n"); + pr_debug("SCTP_CID_SHUTDOWN_ACK\n"); i = 4; break; case SCTP_CID_ERROR: - DEBUGP("SCTP_CID_ERROR\n"); + pr_debug("SCTP_CID_ERROR\n"); i = 5; break; case SCTP_CID_COOKIE_ECHO: - DEBUGP("SCTP_CID_COOKIE_ECHO\n"); + pr_debug("SCTP_CID_COOKIE_ECHO\n"); i = 6; break; case SCTP_CID_COOKIE_ACK: - DEBUGP("SCTP_CID_COOKIE_ACK\n"); + pr_debug("SCTP_CID_COOKIE_ACK\n"); i = 7; break; case SCTP_CID_SHUTDOWN_COMPLETE: - DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n"); + pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n"); i = 8; break; default: /* Other chunks like DATA, SACK, HEARTBEAT and its ACK do not cause a change in state */ - DEBUGP("Unknown chunk type, Will stay in %s\n", - sctp_conntrack_names[cur_state]); + pr_debug("Unknown chunk type, Will stay in %s\n", + sctp_conntrack_names[cur_state]); return cur_state; } - DEBUGP("dir: %d cur_state: %s chunk_type: %d new_state: %s\n", - dir, sctp_conntrack_names[cur_state], chunk_type, - sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]); + pr_debug("dir: %d cur_state: %s chunk_type: %d new_state: %s\n", + dir, sctp_conntrack_names[cur_state], chunk_type, + sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]); return sctp_conntracks[dir][i][cur_state]; } @@ -323,9 +299,6 @@ static int sctp_packet(struct nf_conn *conntrack, u_int32_t offset, count; char map[256 / sizeof (char)] = {0}; - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); if (sh == NULL) return -1; @@ -340,7 +313,7 @@ static int sctp_packet(struct nf_conn *conntrack, && !test_bit(SCTP_CID_ABORT, (void *)map) && !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map) && (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) { - DEBUGP("Verification tag check failed\n"); + pr_debug("Verification tag check failed\n"); return -1; } @@ -385,8 +358,9 @@ static int sctp_packet(struct nf_conn *conntrack, /* Invalid */ if (newconntrack == SCTP_CONNTRACK_MAX) { - DEBUGP("nf_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n", - CTINFO2DIR(ctinfo), sch->type, oldsctpstate); + pr_debug("nf_conntrack_sctp: Invalid dir=%i ctype=%u " + "conntrack=%u\n", + CTINFO2DIR(ctinfo), sch->type, oldsctpstate); write_unlock_bh(&sctp_lock); return -1; } @@ -402,8 +376,8 @@ static int sctp_packet(struct nf_conn *conntrack, write_unlock_bh(&sctp_lock); return -1; } - DEBUGP("Setting vtag %x for dir %d\n", - ih->init_tag, !CTINFO2DIR(ctinfo)); + pr_debug("Setting vtag %x for dir %d\n", + ih->init_tag, !CTINFO2DIR(ctinfo)); conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag; } @@ -418,7 +392,7 @@ static int sctp_packet(struct nf_conn *conntrack, if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY && newconntrack == SCTP_CONNTRACK_ESTABLISHED) { - DEBUGP("Setting assured bit\n"); + pr_debug("Setting assured bit\n"); set_bit(IPS_ASSURED_BIT, &conntrack->status); nf_conntrack_event_cache(IPCT_STATUS, skb); } @@ -436,9 +410,6 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb, u_int32_t offset, count; char map[256 / sizeof (char)] = {0}; - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); if (sh == NULL) return 0; @@ -461,7 +432,7 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb, /* Invalid: delete conntrack */ if (newconntrack == SCTP_CONNTRACK_MAX) { - DEBUGP("nf_conntrack_sctp: invalid new deleting.\n"); + pr_debug("nf_conntrack_sctp: invalid new deleting.\n"); return 0; } @@ -475,8 +446,8 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb, if (ih == NULL) return 0; - DEBUGP("Setting vtag %x for new conn\n", - ih->init_tag); + pr_debug("Setting vtag %x for new conn\n", + ih->init_tag); conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag; @@ -488,8 +459,8 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb, /* If it is a shutdown ack OOTB packet, we expect a return shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ else { - DEBUGP("Setting vtag %x for new conn OOTB\n", - sh->vtag); + pr_debug("Setting vtag %x for new conn OOTB\n", + sh->vtag); conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag; } @@ -688,8 +659,6 @@ int __init nf_conntrack_proto_sctp_init(void) cleanup_sctp4: nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp4); out: - DEBUGP("SCTP conntrack module loading %s\n", - ret ? "failed": "succeeded"); return ret; } @@ -697,7 +666,6 @@ void __exit nf_conntrack_proto_sctp_fini(void) { nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp6); nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp4); - DEBUGP("SCTP conntrack module unloaded\n"); } module_init(nf_conntrack_proto_sctp_init); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index ccdd5d231e0d..1c8206e6560a 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -26,13 +26,6 @@ #include #include -#if 0 -#define DEBUGP printk -#define DEBUGP_VARS -#else -#define DEBUGP(format, args...) -#endif - /* Protects conntrack->proto.tcp */ static DEFINE_RWLOCK(tcp_lock); @@ -496,7 +489,8 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, } } -static int tcp_in_window(struct ip_ct_tcp *state, +static int tcp_in_window(struct nf_conn *ct, + struct ip_ct_tcp *state, enum ip_conntrack_dir dir, unsigned int index, const struct sk_buff *skb, @@ -506,6 +500,7 @@ static int tcp_in_window(struct ip_ct_tcp *state, { struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; + struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; __u32 seq, ack, sack, end, win, swin; int res; @@ -520,18 +515,17 @@ static int tcp_in_window(struct ip_ct_tcp *state, if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) tcp_sack(skb, dataoff, tcph, &sack); - DEBUGP("tcp_in_window: START\n"); - DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " - "seq=%u ack=%u sack=%u win=%u end=%u\n", - NIPQUAD(iph->saddr), ntohs(tcph->source), - NIPQUAD(iph->daddr), ntohs(tcph->dest), - seq, ack, sack, win, end); - DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " - "receiver end=%u maxend=%u maxwin=%u scale=%i\n", - sender->td_end, sender->td_maxend, sender->td_maxwin, - sender->td_scale, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin, - receiver->td_scale); + pr_debug("tcp_in_window: START\n"); + pr_debug("tcp_in_window: "); + NF_CT_DUMP_TUPLE(tuple); + pr_debug("seq=%u ack=%u sack=%u win=%u end=%u\n", + seq, ack, sack, win, end); + pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " + "receiver end=%u maxend=%u maxwin=%u scale=%i\n", + sender->td_end, sender->td_maxend, sender->td_maxwin, + sender->td_scale, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin, + receiver->td_scale); if (sender->td_end == 0) { /* @@ -609,23 +603,22 @@ static int tcp_in_window(struct ip_ct_tcp *state, */ seq = end = sender->td_end; - DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " - "seq=%u ack=%u sack =%u win=%u end=%u\n", - NIPQUAD(iph->saddr), ntohs(tcph->source), - NIPQUAD(iph->daddr), ntohs(tcph->dest), - seq, ack, sack, win, end); - DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " - "receiver end=%u maxend=%u maxwin=%u scale=%i\n", - sender->td_end, sender->td_maxend, sender->td_maxwin, - sender->td_scale, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin, - receiver->td_scale); - - DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n", - before(seq, sender->td_maxend + 1), - after(end, sender->td_end - receiver->td_maxwin - 1), - before(sack, receiver->td_end + 1), - after(ack, receiver->td_end - MAXACKWINDOW(sender))); + pr_debug("tcp_in_window: "); + NF_CT_DUMP_TUPLE(tuple); + pr_debug("seq=%u ack=%u sack =%u win=%u end=%u\n", + seq, ack, sack, win, end); + pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " + "receiver end=%u maxend=%u maxwin=%u scale=%i\n", + sender->td_end, sender->td_maxend, sender->td_maxwin, + sender->td_scale, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin, + receiver->td_scale); + + pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n", + before(seq, sender->td_maxend + 1), + after(end, sender->td_end - receiver->td_maxwin - 1), + before(sack, receiver->td_end + 1), + after(ack, receiver->td_end - MAXACKWINDOW(sender))); if (before(seq, sender->td_maxend + 1) && after(end, sender->td_end - receiver->td_maxwin - 1) && @@ -694,10 +687,10 @@ static int tcp_in_window(struct ip_ct_tcp *state, : "SEQ is over the upper bound (over the window of the receiver)"); } - DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u " - "receiver end=%u maxend=%u maxwin=%u\n", - res, sender->td_end, sender->td_maxend, sender->td_maxwin, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin); + pr_debug("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u " + "receiver end=%u maxend=%u maxwin=%u\n", + res, sender->td_end, sender->td_maxend, sender->td_maxwin, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin); return res; } @@ -711,11 +704,9 @@ void nf_conntrack_tcp_update(struct sk_buff *skb, int dir) { struct tcphdr *tcph = (void *)skb->data + dataoff; - __u32 end; -#ifdef DEBUGP_VARS struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir]; struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir]; -#endif + __u32 end; end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph); @@ -727,12 +718,12 @@ void nf_conntrack_tcp_update(struct sk_buff *skb, conntrack->proto.tcp.seen[dir].td_end = end; conntrack->proto.tcp.last_end = end; write_unlock_bh(&tcp_lock); - DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " - "receiver end=%u maxend=%u maxwin=%u scale=%i\n", - sender->td_end, sender->td_maxend, sender->td_maxwin, - sender->td_scale, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin, - receiver->td_scale); + pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " + "receiver end=%u maxend=%u maxwin=%u scale=%i\n", + sender->td_end, sender->td_maxend, sender->td_maxwin, + sender->td_scale, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin, + receiver->td_scale); } EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update); #endif @@ -823,6 +814,7 @@ static int tcp_packet(struct nf_conn *conntrack, int pf, unsigned int hooknum) { + struct nf_conntrack_tuple *tuple; enum tcp_conntrack new_state, old_state; enum ip_conntrack_dir dir; struct tcphdr *th, _tcph; @@ -837,6 +829,7 @@ static int tcp_packet(struct nf_conn *conntrack, dir = CTINFO2DIR(ctinfo); index = get_conntrack_index(th); new_state = tcp_conntracks[dir][index][old_state]; + tuple = &conntrack->tuplehash[dir].tuple; switch (new_state) { case TCP_CONNTRACK_IGNORE: @@ -880,9 +873,8 @@ static int tcp_packet(struct nf_conn *conntrack, return NF_ACCEPT; case TCP_CONNTRACK_MAX: /* Invalid packet */ - DEBUGP("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", - dir, get_conntrack_index(th), - old_state); + pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", + dir, get_conntrack_index(th), old_state); write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, @@ -933,7 +925,7 @@ static int tcp_packet(struct nf_conn *conntrack, break; } - if (!tcp_in_window(&conntrack->proto.tcp, dir, index, + if (!tcp_in_window(conntrack, &conntrack->proto.tcp, dir, index, skb, dataoff, th, pf)) { write_unlock_bh(&tcp_lock); return -NF_ACCEPT; @@ -942,13 +934,12 @@ static int tcp_packet(struct nf_conn *conntrack, /* From now on we have got in-window packets */ conntrack->proto.tcp.last_index = index; - DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " - "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", - NIPQUAD(iph->saddr), ntohs(th->source), - NIPQUAD(iph->daddr), ntohs(th->dest), - (th->syn ? 1 : 0), (th->ack ? 1 : 0), - (th->fin ? 1 : 0), (th->rst ? 1 : 0), - old_state, new_state); + pr_debug("tcp_conntracks: "); + NF_CT_DUMP_TUPLE(tuple); + pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", + (th->syn ? 1 : 0), (th->ack ? 1 : 0), + (th->fin ? 1 : 0), (th->rst ? 1 : 0), + old_state, new_state); conntrack->proto.tcp.state = new_state; if (old_state != new_state @@ -997,10 +988,8 @@ static int tcp_new(struct nf_conn *conntrack, { enum tcp_conntrack new_state; struct tcphdr *th, _tcph; -#ifdef DEBUGP_VARS struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0]; struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1]; -#endif th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); BUG_ON(th == NULL); @@ -1012,7 +1001,7 @@ static int tcp_new(struct nf_conn *conntrack, /* Invalid: delete conntrack */ if (new_state >= TCP_CONNTRACK_MAX) { - DEBUGP("nf_ct_tcp: invalid new deleting.\n"); + pr_debug("nf_ct_tcp: invalid new deleting.\n"); return 0; } @@ -1065,12 +1054,12 @@ static int tcp_new(struct nf_conn *conntrack, conntrack->proto.tcp.state = TCP_CONNTRACK_NONE; conntrack->proto.tcp.last_index = TCP_NONE_SET; - DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " - "receiver end=%u maxend=%u maxwin=%u scale=%i\n", - sender->td_end, sender->td_maxend, sender->td_maxwin, - sender->td_scale, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin, - receiver->td_scale); + pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " + "receiver end=%u maxend=%u maxwin=%u scale=%i\n", + sender->td_end, sender->td_maxend, sender->td_maxwin, + sender->td_scale, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin, + receiver->td_scale); return 1; } diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 627eda79d154..355d371bac93 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -40,12 +40,6 @@ static u_int16_t ports[MAX_PORTS]; static unsigned int ports_c; module_param_array(ports, ushort, &ports_c, 0400); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - struct sane_request { __be32 RPC_code; #define SANE_NET_START 7 /* RPC code */ @@ -125,15 +119,15 @@ static int help(struct sk_buff **pskb, ct_sane_info->state = SANE_STATE_NORMAL; if (datalen < sizeof(struct sane_reply_net_start)) { - DEBUGP("nf_ct_sane: NET_START reply too short\n"); + pr_debug("nf_ct_sane: NET_START reply too short\n"); goto out; } reply = (struct sane_reply_net_start *)sb_ptr; if (reply->status != htonl(SANE_STATUS_SUCCESS)) { /* saned refused the command */ - DEBUGP("nf_ct_sane: unsuccessful SANE_STATUS = %u\n", - ntohl(reply->status)); + pr_debug("nf_ct_sane: unsuccessful SANE_STATUS = %u\n", + ntohl(reply->status)); goto out; } @@ -151,9 +145,8 @@ static int help(struct sk_buff **pskb, nf_ct_expect_init(exp, family, &tuple->src.u3, &tuple->dst.u3, IPPROTO_TCP, NULL, &reply->port); - DEBUGP("nf_ct_sane: expect: "); + pr_debug("nf_ct_sane: expect: "); NF_CT_DUMP_TUPLE(&exp->tuple); - NF_CT_DUMP_TUPLE(&exp->mask); /* Can't expect this? Best to drop packet now. */ if (nf_ct_expect_related(exp) != 0) @@ -176,9 +169,9 @@ static void nf_conntrack_sane_fini(void) for (i = 0; i < ports_c; i++) { for (j = 0; j < 2; j++) { - DEBUGP("nf_ct_sane: unregistering helper for pf: %d " - "port: %d\n", - sane[i][j].tuple.src.l3num, ports[i]); + pr_debug("nf_ct_sane: unregistering helper for pf: %d " + "port: %d\n", + sane[i][j].tuple.src.l3num, ports[i]); nf_conntrack_helper_unregister(&sane[i][j]); } } @@ -217,9 +210,9 @@ static int __init nf_conntrack_sane_init(void) sprintf(tmpname, "sane-%d", ports[i]); sane[i][j].name = tmpname; - DEBUGP("nf_ct_sane: registering helper for pf: %d " - "port: %d\n", - sane[i][j].tuple.src.l3num, ports[i]); + pr_debug("nf_ct_sane: registering helper for pf: %d " + "port: %d\n", + sane[i][j].tuple.src.l3num, ports[i]); ret = nf_conntrack_helper_register(&sane[i][j]); if (ret) { printk(KERN_ERR "nf_ct_sane: failed to " diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 5b78f0e1f63b..1276a442f10c 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -21,12 +21,6 @@ #include #include -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Christian Hentschel "); MODULE_DESCRIPTION("SIP connection tracking helper"); @@ -285,7 +279,7 @@ static int epaddr_len(struct nf_conn *ct, const char *dptr, const char *aux = dptr; if (!parse_addr(ct, dptr, &dptr, &addr, limit)) { - DEBUGP("ip: %s parse failed.!\n", dptr); + pr_debug("ip: %s parse failed.!\n", dptr); return 0; } @@ -344,8 +338,8 @@ int ct_sip_get_info(struct nf_conn *ct, ct_sip_lnlen(dptr, limit), hnfo->case_sensitive); if (!aux) { - DEBUGP("'%s' not found in '%s'.\n", hnfo->ln_str, - hnfo->lname); + pr_debug("'%s' not found in '%s'.\n", hnfo->ln_str, + hnfo->lname); return -1; } aux += hnfo->ln_strlen; @@ -356,11 +350,11 @@ int ct_sip_get_info(struct nf_conn *ct, *matchoff = (aux - k) + shift; - DEBUGP("%s match succeeded! - len: %u\n", hnfo->lname, - *matchlen); + pr_debug("%s match succeeded! - len: %u\n", hnfo->lname, + *matchlen); return 1; } - DEBUGP("%s header not found.\n", hnfo->lname); + pr_debug("%s header not found.\n", hnfo->lname); return 0; } EXPORT_SYMBOL_GPL(ct_sip_get_info); @@ -424,7 +418,7 @@ static int sip_help(struct sk_buff **pskb, if (!skb_is_nonlinear(*pskb)) dptr = (*pskb)->data + dataoff; else { - DEBUGP("Copy of skbuff not supported yet.\n"); + pr_debug("Copy of skbuff not supported yet.\n"); goto out; } @@ -518,7 +512,7 @@ static int __init nf_conntrack_sip_init(void) sprintf(tmpname, "sip-%u", i); sip[i][j].name = tmpname; - DEBUGP("port #%u: %u\n", i, ports[i]); + pr_debug("port #%u: %u\n", i, ports[i]); ret = nf_conntrack_helper_register(&sip[i][j]); if (ret) { diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 6af96c6e29fb..54498bcfa862 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -25,12 +25,6 @@ #include #include -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - MODULE_LICENSE("GPL"); #ifdef CONFIG_PROC_FS diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index db0387cf9bac..cc19506cf2f8 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -29,13 +29,6 @@ static int ports_c; module_param_array(ports, ushort, &ports_c, 0400); MODULE_PARM_DESC(ports, "Port numbers of TFTP servers"); -#if 0 -#define DEBUGP(format, args...) printk("%s:%s:" format, \ - __FILE__, __FUNCTION__ , ## args) -#else -#define DEBUGP(format, args...) -#endif - unsigned int (*nf_nat_tftp_hook)(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp) __read_mostly; @@ -62,7 +55,6 @@ static int tftp_help(struct sk_buff **pskb, case TFTP_OPCODE_READ: case TFTP_OPCODE_WRITE: /* RRQ and WRQ works the same way */ - DEBUGP(""); NF_CT_DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); NF_CT_DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); @@ -73,9 +65,8 @@ static int tftp_help(struct sk_buff **pskb, nf_ct_expect_init(exp, family, &tuple->src.u3, &tuple->dst.u3, IPPROTO_UDP, NULL, &tuple->dst.u.udp.port); - DEBUGP("expect: "); + pr_debug("expect: "); NF_CT_DUMP_TUPLE(&exp->tuple); - NF_CT_DUMP_TUPLE(&exp->mask); nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook); if (nf_nat_tftp && ct->status & IPS_NAT_MASK) @@ -86,13 +77,13 @@ static int tftp_help(struct sk_buff **pskb, break; case TFTP_OPCODE_DATA: case TFTP_OPCODE_ACK: - DEBUGP("Data/ACK opcode\n"); + pr_debug("Data/ACK opcode\n"); break; case TFTP_OPCODE_ERROR: - DEBUGP("Error opcode\n"); + pr_debug("Error opcode\n"); break; default: - DEBUGP("Unknown opcode\n"); + pr_debug("Unknown opcode\n"); } return ret; } -- cgit v1.2.3 From ce7663d84a87bb4e1743f62950bf7dceed723a13 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:40:08 -0700 Subject: [NETFILTER]: nfnetlink_queue: don't unregister handler of other subsystem The queue handlers registered by ip[6]_queue.ko at initialization should not be unregistered according to requests from userland program using nfnetlink_queue. If we allow that, there is no way to register the handlers of built-in ip[6]_queue again. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter.h | 3 ++- net/netfilter/nf_queue.c | 7 ++++++- net/netfilter/nfnetlink_queue.c | 4 +--- 3 files changed, 9 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 10b5c6275706..0eed0b7ab2df 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -275,7 +275,8 @@ struct nf_queue_handler { }; extern int nf_register_queue_handler(int pf, struct nf_queue_handler *qh); -extern int nf_unregister_queue_handler(int pf); +extern int nf_unregister_queue_handler(int pf, + struct nf_queue_handler *qh); extern void nf_unregister_queue_handlers(struct nf_queue_handler *qh); extern void nf_reinject(struct sk_buff *skb, struct nf_info *info, diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index b1f2ace96f6d..f402894d7e72 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -44,12 +44,17 @@ int nf_register_queue_handler(int pf, struct nf_queue_handler *qh) EXPORT_SYMBOL(nf_register_queue_handler); /* The caller must flush their queue before this */ -int nf_unregister_queue_handler(int pf) +int nf_unregister_queue_handler(int pf, struct nf_queue_handler *qh) { if (pf >= NPROTO) return -EINVAL; write_lock_bh(&queue_handler_lock); + if (queue_handler[pf] != qh) { + write_unlock_bh(&queue_handler_lock); + return -EINVAL; + } + queue_handler[pf] = NULL; write_unlock_bh(&queue_handler_lock); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 7a97bec67729..7d47fc4b19c6 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -913,9 +913,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, case NFQNL_CFG_CMD_PF_UNBIND: QDEBUG("unregistering queue handler for pf=%u\n", ntohs(cmd->pf)); - /* This is a bug and a feature. We can unregister - * other handlers(!) */ - ret = nf_unregister_queue_handler(ntohs(cmd->pf)); + ret = nf_unregister_queue_handler(ntohs(cmd->pf), &nfqh); break; default: ret = -EINVAL; -- cgit v1.2.3 From 585426fdc5b4cccaacf0afc8cf821ff763750ae8 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:40:26 -0700 Subject: [NETFILTER]: nf_queue: Use RCU and mutex for queue handlers Queue handlers are registered/unregistered in only process context. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_queue.c | 52 +++++++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index f402894d7e72..823fbf404566 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -17,7 +17,7 @@ */ static struct nf_queue_handler *queue_handler[NPROTO]; -static DEFINE_RWLOCK(queue_handler_lock); +static DEFINE_MUTEX(queue_handler_mutex); /* return EBUSY when somebody else is registered, return EEXIST if the * same handler is registered, return 0 in case of success. */ @@ -28,16 +28,16 @@ int nf_register_queue_handler(int pf, struct nf_queue_handler *qh) if (pf >= NPROTO) return -EINVAL; - write_lock_bh(&queue_handler_lock); + mutex_lock(&queue_handler_mutex); if (queue_handler[pf] == qh) ret = -EEXIST; else if (queue_handler[pf]) ret = -EBUSY; else { - queue_handler[pf] = qh; + rcu_assign_pointer(queue_handler[pf], qh); ret = 0; } - write_unlock_bh(&queue_handler_lock); + mutex_unlock(&queue_handler_mutex); return ret; } @@ -49,14 +49,16 @@ int nf_unregister_queue_handler(int pf, struct nf_queue_handler *qh) if (pf >= NPROTO) return -EINVAL; - write_lock_bh(&queue_handler_lock); + mutex_lock(&queue_handler_mutex); if (queue_handler[pf] != qh) { - write_unlock_bh(&queue_handler_lock); + mutex_unlock(&queue_handler_mutex); return -EINVAL; } - queue_handler[pf] = NULL; - write_unlock_bh(&queue_handler_lock); + rcu_assign_pointer(queue_handler[pf], NULL); + mutex_unlock(&queue_handler_mutex); + + synchronize_rcu(); return 0; } @@ -66,12 +68,14 @@ void nf_unregister_queue_handlers(struct nf_queue_handler *qh) { int pf; - write_lock_bh(&queue_handler_lock); + mutex_lock(&queue_handler_mutex); for (pf = 0; pf < NPROTO; pf++) { if (queue_handler[pf] == qh) - queue_handler[pf] = NULL; + rcu_assign_pointer(queue_handler[pf], NULL); } - write_unlock_bh(&queue_handler_lock); + mutex_unlock(&queue_handler_mutex); + + synchronize_rcu(); } EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers); @@ -94,18 +98,21 @@ static int __nf_queue(struct sk_buff *skb, struct net_device *physoutdev = NULL; #endif struct nf_afinfo *afinfo; + struct nf_queue_handler *qh; /* QUEUE == DROP if noone is waiting, to be safe. */ - read_lock(&queue_handler_lock); - if (!queue_handler[pf]) { - read_unlock(&queue_handler_lock); + rcu_read_lock(); + + qh = rcu_dereference(queue_handler[pf]); + if (!qh) { + rcu_read_unlock(); kfree_skb(skb); return 1; } afinfo = nf_get_afinfo(pf); if (!afinfo) { - read_unlock(&queue_handler_lock); + rcu_read_unlock(); kfree_skb(skb); return 1; } @@ -115,7 +122,7 @@ static int __nf_queue(struct sk_buff *skb, if (net_ratelimit()) printk(KERN_ERR "OOM queueing packet %p\n", skb); - read_unlock(&queue_handler_lock); + rcu_read_unlock(); kfree_skb(skb); return 1; } @@ -125,7 +132,7 @@ static int __nf_queue(struct sk_buff *skb, /* If it's going away, ignore hook. */ if (!try_module_get(info->elem->owner)) { - read_unlock(&queue_handler_lock); + rcu_read_unlock(); kfree(info); return 0; } @@ -143,10 +150,9 @@ static int __nf_queue(struct sk_buff *skb, } #endif afinfo->saveroute(skb, info); - status = queue_handler[pf]->outfn(skb, info, queuenum, - queue_handler[pf]->data); + status = qh->outfn(skb, info, queuenum, qh->data); - read_unlock(&queue_handler_lock); + rcu_read_unlock(); if (status < 0) { /* James M doesn't say fuck enough. */ @@ -313,13 +319,13 @@ static int seq_show(struct seq_file *s, void *v) loff_t *pos = v; struct nf_queue_handler *qh; - read_lock_bh(&queue_handler_lock); - qh = queue_handler[*pos]; + rcu_read_lock(); + qh = rcu_dereference(queue_handler[*pos]); if (!qh) ret = seq_printf(s, "%2lld NONE\n", *pos); else ret = seq_printf(s, "%2lld %s\n", *pos, qh->name); - read_unlock_bh(&queue_handler_lock); + rcu_read_unlock(); return ret; } -- cgit v1.2.3 From 5faf41535214b68c989a22302c8870f8758cbb8c Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Sat, 7 Jul 2007 22:41:01 -0700 Subject: [NETFILTER]: x_tables: add more detail to error message about match/target mask mismatch Signed-off-by: Balazs Scheidler Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/x_tables.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 0eb2504b89b5..520eddf4d61b 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -320,8 +320,8 @@ int xt_check_match(const struct xt_match *match, unsigned short family, return -EINVAL; } if (match->hooks && (hook_mask & ~match->hooks) != 0) { - printk("%s_tables: %s match: bad hook_mask %u\n", - xt_prefix[family], match->name, hook_mask); + printk("%s_tables: %s match: bad hook_mask %u/%u\n", + xt_prefix[family], match->name, hook_mask, match->hooks); return -EINVAL; } if (match->proto && (match->proto != proto || inv_proto)) { @@ -410,8 +410,9 @@ int xt_check_target(const struct xt_target *target, unsigned short family, return -EINVAL; } if (target->hooks && (hook_mask & ~target->hooks) != 0) { - printk("%s_tables: %s target: bad hook_mask %u\n", - xt_prefix[family], target->name, hook_mask); + printk("%s_tables: %s target: bad hook_mask %u/%u\n", + xt_prefix[family], target->name, hook_mask, + target->hooks); return -EINVAL; } if (target->proto && (target->proto != proto || inv_proto)) { -- cgit v1.2.3 From 9af97186fcc9a1d9bbf195eb4bc2399d0dd66223 Mon Sep 17 00:00:00 2001 From: Pavel Emelianov Date: Mon, 9 Jul 2007 13:12:24 -0700 Subject: [ATM] br2684: Use seq_list_xxx helpers The .show callback receives the list_head pointer now, not the struct br2684_dev one. Signed-off-by: Pavel Emelianov Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/atm/br2684.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 0e9f00c5c899..3e2643893178 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -699,28 +699,13 @@ static struct atm_ioctl br2684_ioctl_ops = { #ifdef CONFIG_PROC_FS static void *br2684_seq_start(struct seq_file *seq, loff_t *pos) { - loff_t offs = 0; - struct br2684_dev *brd; - read_lock(&devs_lock); - - list_for_each_entry(brd, &br2684_devs, br2684_devs) { - if (offs == *pos) - return brd; - ++offs; - } - return NULL; + return seq_list_start(&br2684_devs, *pos); } static void *br2684_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct br2684_dev *brd = v; - - ++*pos; - - brd = list_entry(brd->br2684_devs.next, - struct br2684_dev, br2684_devs); - return (&brd->br2684_devs != &br2684_devs) ? brd : NULL; + return seq_list_next(v, &br2684_devs, pos); } static void br2684_seq_stop(struct seq_file *seq, void *v) @@ -730,7 +715,8 @@ static void br2684_seq_stop(struct seq_file *seq, void *v) static int br2684_seq_show(struct seq_file *seq, void *v) { - const struct br2684_dev *brdev = v; + const struct br2684_dev *brdev = list_entry(v, struct br2684_dev, + br2684_devs); const struct net_device *net_dev = brdev->net_dev; const struct br2684_vcc *brvcc; -- cgit v1.2.3 From 60f0438a87cfd9f5faa439ca419497cd64e4c59e Mon Sep 17 00:00:00 2001 From: Pavel Emelianov Date: Mon, 9 Jul 2007 13:15:14 -0700 Subject: [NET]: Make some network-related proc files use seq_list_xxx helpers This includes /proc/net/protocols, /proc/net/rxrpc_calls and /proc/net/rxrpc_connections files. All three need seq_list_start_head to show some header. Signed-off-by: Pavel Emelianov Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/core/sock.c | 39 ++++----------------------------------- net/rxrpc/ar-proc.c | 48 ++++++------------------------------------------ 2 files changed, 10 insertions(+), 77 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index c14ce0198d25..252d21a1bb04 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1851,46 +1851,15 @@ void proto_unregister(struct proto *prot) EXPORT_SYMBOL(proto_unregister); #ifdef CONFIG_PROC_FS -static inline struct proto *__proto_head(void) -{ - return list_entry(proto_list.next, struct proto, node); -} - -static inline struct proto *proto_head(void) -{ - return list_empty(&proto_list) ? NULL : __proto_head(); -} - -static inline struct proto *proto_next(struct proto *proto) -{ - return proto->node.next == &proto_list ? NULL : - list_entry(proto->node.next, struct proto, node); -} - -static inline struct proto *proto_get_idx(loff_t pos) -{ - struct proto *proto; - loff_t i = 0; - - list_for_each_entry(proto, &proto_list, node) - if (i++ == pos) - goto out; - - proto = NULL; -out: - return proto; -} - static void *proto_seq_start(struct seq_file *seq, loff_t *pos) { read_lock(&proto_list_lock); - return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN; + return seq_list_start_head(&proto_list, *pos); } static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - ++*pos; - return v == SEQ_START_TOKEN ? proto_head() : proto_next(v); + return seq_list_next(v, &proto_list, pos); } static void proto_seq_stop(struct seq_file *seq, void *v) @@ -1938,7 +1907,7 @@ static void proto_seq_printf(struct seq_file *seq, struct proto *proto) static int proto_seq_show(struct seq_file *seq, void *v) { - if (v == SEQ_START_TOKEN) + if (v == &proto_list) seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s", "protocol", "size", @@ -1950,7 +1919,7 @@ static int proto_seq_show(struct seq_file *seq, void *v) "module", "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n"); else - proto_seq_printf(seq, v); + proto_seq_printf(seq, list_entry(v, struct proto, node)); return 0; } diff --git a/net/rxrpc/ar-proc.c b/net/rxrpc/ar-proc.c index 1c0be0e77b16..77cc5fb17dcd 100644 --- a/net/rxrpc/ar-proc.c +++ b/net/rxrpc/ar-proc.c @@ -30,31 +30,13 @@ static const char *rxrpc_conn_states[] = { */ static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos) { - struct list_head *_p; - loff_t pos = *_pos; - read_lock(&rxrpc_call_lock); - if (!pos) - return SEQ_START_TOKEN; - pos--; - - list_for_each(_p, &rxrpc_calls) - if (!pos--) - break; - - return _p != &rxrpc_calls ? _p : NULL; + return seq_list_start_head(&rxrpc_calls, *_pos); } static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct list_head *_p; - - (*pos)++; - - _p = v; - _p = (v == SEQ_START_TOKEN) ? rxrpc_calls.next : _p->next; - - return _p != &rxrpc_calls ? _p : NULL; + return seq_list_next(v, &rxrpc_calls, pos); } static void rxrpc_call_seq_stop(struct seq_file *seq, void *v) @@ -68,7 +50,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) struct rxrpc_call *call; char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1]; - if (v == SEQ_START_TOKEN) { + if (v == &rxrpc_calls) { seq_puts(seq, "Proto Local Remote " " SvID ConnID CallID End Use State Abort " @@ -129,32 +111,14 @@ struct file_operations rxrpc_call_seq_fops = { */ static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos) { - struct list_head *_p; - loff_t pos = *_pos; - read_lock(&rxrpc_connection_lock); - if (!pos) - return SEQ_START_TOKEN; - pos--; - - list_for_each(_p, &rxrpc_connections) - if (!pos--) - break; - - return _p != &rxrpc_connections ? _p : NULL; + return seq_list_start_head(&rxrpc_connections, *_pos); } static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct list_head *_p; - - (*pos)++; - - _p = v; - _p = (v == SEQ_START_TOKEN) ? rxrpc_connections.next : _p->next; - - return _p != &rxrpc_connections ? _p : NULL; + return seq_list_next(v, &rxrpc_connections, pos); } static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v) @@ -168,7 +132,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) struct rxrpc_transport *trans; char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1]; - if (v == SEQ_START_TOKEN) { + if (v == &rxrpc_connections) { seq_puts(seq, "Proto Local Remote " " SvID ConnID Calls End Use State Key " -- cgit v1.2.3 From 6f11df8355e8f59f7572bf6ac1f63d692483b0c6 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 9 Jul 2007 13:16:00 -0700 Subject: [NET]: "wrong timeout value in sk_wait_data()": cleanups - save 4 bytes - it's read-mostly. Signed-off-by: Andrew Morton Acked-by: Vasily Averin Signed-off-by: David S. Miller --- net/core/sock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 252d21a1bb04..091032a250c7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -210,7 +210,8 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) return -EDOM; if (tv.tv_sec < 0) { - static int warned = 0; + static int warned __read_mostly; + *timeo_p = 0; if (warned < 10 && net_ratelimit()) warned++; -- cgit v1.2.3 From 4fda25a2cd7a18e0ef9f29ba3dd6f6cd9b7ca43f Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 9 Jul 2007 13:18:57 -0700 Subject: [DCCP]: Make struct dccp_li_cachep static. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/dccp/ccids/lib/loss_interval.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 01c1edb3e349..515225f3a464 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -27,7 +27,7 @@ struct dccp_li_hist_entry { u32 dccplih_interval; }; -struct kmem_cache *dccp_li_cachep __read_mostly; +static struct kmem_cache *dccp_li_cachep __read_mostly; static inline struct dccp_li_hist_entry *dccp_li_hist_entry_new(const gfp_t prio) { -- cgit v1.2.3 From 0236e667e188af0336cd776e5b54c1f3fd19a03c Mon Sep 17 00:00:00 2001 From: Dan Aloni Date: Mon, 9 Jul 2007 13:20:12 -0700 Subject: [NETFILTER] net/ipv4/netfilter/ip_tables.c: lower printk severity Signed-off-by: Dan Aloni Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_tables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 2ba5bd9c8c1b..e1b402c6b855 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -2331,7 +2331,7 @@ static int __init ip_tables_init(void) if (ret < 0) goto err5; - printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n"); + printk(KERN_INFO "ip_tables: (C) 2000-2006 Netfilter Core Team\n"); return 0; err5: -- cgit v1.2.3 From 5f1de3ec661e7b08348f565b7ca17586e7e94fc5 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Mon, 9 Jul 2007 13:20:54 -0700 Subject: [RXRPC]: Remove Makefile reference to obsolete RXRPC config variable Since there is no Kconfig variable RXRPC anywhere in the tree, and the variable AF_RXRPC performs exactly the same function, remove the reference to CONFIG_RXRPC from net/Makefile. Signed-off-by: Robert P. J. Day Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/Makefile | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/Makefile b/net/Makefile index 34e5b2d7f877..a87a88963432 100644 --- a/net/Makefile +++ b/net/Makefile @@ -37,7 +37,6 @@ obj-$(CONFIG_AX25) += ax25/ obj-$(CONFIG_IRDA) += irda/ obj-$(CONFIG_BT) += bluetooth/ obj-$(CONFIG_SUNRPC) += sunrpc/ -obj-$(CONFIG_RXRPC) += rxrpc/ obj-$(CONFIG_AF_RXRPC) += rxrpc/ obj-$(CONFIG_ATM) += atm/ obj-$(CONFIG_DECNET) += decnet/ -- cgit v1.2.3 From 1498b3f1952ae539a7d5c356acf942d5f4c1aece Mon Sep 17 00:00:00 2001 From: Satyam Sharma Date: Mon, 9 Jul 2007 15:22:23 -0700 Subject: [NETPOLL]: Fix a leak-n-bug in netpoll_cleanup() 93ec2c723e3f8a216dde2899aeb85c648672bc6b applied excessive duct tape to the netpoll beast's netpoll_cleanup(), thus substituting one leak with another, and opening up a little buglet :-) net_device->npinfo (netpoll_info) is a shared and refcounted object and cannot simply be set NULL the first time netpoll_cleanup() is called. Otherwise, further netpoll_cleanup()'s see np->dev->npinfo == NULL and become no-ops, thus leaking. And it's a bug too: the first call to netpoll_cleanup() would thus (annoyingly) "disable" other (still alive) netpolls too. Maybe nobody noticed this because netconsole (only user of netpoll) never supported multiple netpoll objects earlier. This is a trivial and obvious one-line fixlet. Signed-off-by: Satyam Sharma Signed-off-by: David S. Miller --- net/core/netpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 4b06d1936375..de1b26aa5720 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -783,7 +783,6 @@ void netpoll_cleanup(struct netpoll *np) spin_unlock_irqrestore(&npinfo->rx_lock, flags); } - np->dev->npinfo = NULL; if (atomic_dec_and_test(&npinfo->refcnt)) { skb_queue_purge(&npinfo->arp_tx); skb_queue_purge(&npinfo->txq); @@ -796,6 +795,7 @@ void netpoll_cleanup(struct netpoll *np) kfree_skb(skb); } kfree(npinfo); + np->dev->npinfo = NULL; } } -- cgit v1.2.3 From 6b25d30bf112370a12d05c3c0fd43732985dab01 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 9 Jul 2007 15:30:19 -0700 Subject: [NET]: Fix gen_estimator timer removal race As noticed by Jarek Poplawski , the timer removal in gen_kill_estimator races with the timer function rearming the timer. Check whether the timer list is empty before rearming the timer in the timer function to fix this. Signed-off-by: Patrick McHardy Acked-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/core/gen_estimator.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 17daf4c9f793..cc84d8d8a3c7 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -128,7 +128,8 @@ static void est_timer(unsigned long arg) spin_unlock(e->stats_lock); } - mod_timer(&elist[idx].timer, jiffies + ((HZ< Date: Mon, 9 Jul 2007 15:32:57 -0700 Subject: [IPV4]: Make ip_tos2prio const. Signed-off-by: Philippe De Muyter Signed-off-by: David S. Miller --- include/net/route.h | 2 +- net/ipv4/route.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/route.h b/include/net/route.h index 188b8936bb27..f7ce6259f86f 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -135,7 +135,7 @@ static inline void ip_rt_put(struct rtable * rt) #define IPTOS_RT_MASK (IPTOS_TOS_MASK & ~3) -extern __u8 ip_tos2prio[16]; +extern const __u8 ip_tos2prio[16]; static inline char rt_tos2priority(u8 tos) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 85285021518b..88fa648d7ba3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -167,7 +167,7 @@ static struct dst_ops ipv4_dst_ops = { #define ECN_OR_COST(class) TC_PRIO_##class -__u8 ip_tos2prio[16] = { +const __u8 ip_tos2prio[16] = { TC_PRIO_BESTEFFORT, ECN_OR_COST(FILLER), TC_PRIO_BESTEFFORT, -- cgit v1.2.3 From cfbba49d80be6cf8d3872b66fc5421f119843b36 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 9 Jul 2007 15:33:40 -0700 Subject: [NET]: Avoid copying writable clones in tunnel drivers Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 3 ++- net/ipv4/ipip.c | 3 ++- net/ipv6/ip6_tunnel.c | 4 ++-- net/ipv6/sit.c | 3 ++- 4 files changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 63282934725e..5c14ed63e56c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -809,7 +809,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen; - if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { + if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| + (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index ebd2f2d532f6..396437242a1b 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -595,7 +595,8 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) */ max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr)); - if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { + if (skb_headroom(skb) < max_headroom || skb_shared(skb) || + (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a0902fbdb4e1..281aee42d3f0 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -883,8 +883,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, */ max_headroom += LL_RESERVED_SPACE(tdev); - if (skb_headroom(skb) < max_headroom || - skb_cloned(skb) || skb_shared(skb)) { + if (skb_headroom(skb) < max_headroom || skb_shared(skb) || + (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb; if (!(new_skb = skb_realloc_headroom(skb, max_headroom))) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1efa95a99f45..eb20bb690abd 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -532,7 +532,8 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) */ max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr); - if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { + if (skb_headroom(skb) < max_headroom || skb_shared(skb) || + (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); -- cgit v1.2.3 From 5b7f990927fe87ad3bec762a33c0e72bcbf6841e Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 11 Jul 2007 09:51:55 +0200 Subject: [Bluetooth] Add basics to better support and handle eSCO links To better support and handle eSCO links in the future a bunch of constants needs to be added and some basic routines need to be updated. This is the initial step. Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 18 ++++++++++++-- include/net/bluetooth/hci_core.h | 2 ++ net/bluetooth/hci_conn.c | 51 ++++++++++++++++++++-------------------- net/bluetooth/hci_core.c | 3 ++- net/bluetooth/hci_event.c | 29 ++++++++++++++++------- 5 files changed, 66 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 93ce272a5d27..ebfb96b41106 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -107,14 +107,14 @@ enum { #define HCI_IDLE_TIMEOUT (6000) /* 6 seconds */ #define HCI_INIT_TIMEOUT (10000) /* 10 seconds */ -/* HCI Packet types */ +/* HCI data types */ #define HCI_COMMAND_PKT 0x01 #define HCI_ACLDATA_PKT 0x02 #define HCI_SCODATA_PKT 0x03 #define HCI_EVENT_PKT 0x04 #define HCI_VENDOR_PKT 0xff -/* HCI Packet types */ +/* HCI packet types */ #define HCI_DM1 0x0008 #define HCI_DM3 0x0400 #define HCI_DM5 0x4000 @@ -129,6 +129,14 @@ enum { #define SCO_PTYPE_MASK (HCI_HV1 | HCI_HV2 | HCI_HV3) #define ACL_PTYPE_MASK (~SCO_PTYPE_MASK) +/* eSCO packet types */ +#define ESCO_HV1 0x0001 +#define ESCO_HV2 0x0002 +#define ESCO_HV3 0x0004 +#define ESCO_EV3 0x0008 +#define ESCO_EV4 0x0010 +#define ESCO_EV5 0x0020 + /* ACL flags */ #define ACL_CONT 0x01 #define ACL_START 0x02 @@ -138,6 +146,7 @@ enum { /* Baseband links */ #define SCO_LINK 0x00 #define ACL_LINK 0x01 +#define ESCO_LINK 0x02 /* LMP features */ #define LMP_3SLOT 0x01 @@ -162,6 +171,11 @@ enum { #define LMP_PSCHEME 0x02 #define LMP_PCONTROL 0x04 +#define LMP_ESCO 0x80 + +#define LMP_EV4 0x01 +#define LMP_EV5 0x02 + #define LMP_SNIFF_SUBR 0x02 /* Connection modes */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 7c78744ec0fd..8f67c8a7169b 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -78,6 +78,7 @@ struct hci_dev { __u16 voice_setting; __u16 pkt_type; + __u16 esco_type; __u16 link_policy; __u16 link_mode; @@ -452,6 +453,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define lmp_encrypt_capable(dev) ((dev)->features[0] & LMP_ENCRYPT) #define lmp_sniff_capable(dev) ((dev)->features[0] & LMP_SNIFF) #define lmp_sniffsubr_capable(dev) ((dev)->features[5] & LMP_SNIFF_SUBR) +#define lmp_esco_capable(dev) ((dev)->features[3] & LMP_ESCO) /* ----- HCI protocols ----- */ struct hci_proto { diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 63980bd6b5f2..5fdfc9a67d39 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -123,8 +123,8 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle) conn->state = BT_CONNECT; conn->out = 1; - cp.pkt_type = cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK); cp.handle = cpu_to_le16(handle); + cp.pkt_type = cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK); hci_send_cmd(hdev, OGF_LINK_CTL, OCF_ADD_SCO, sizeof(cp), &cp); } @@ -220,19 +220,19 @@ int hci_conn_del(struct hci_conn *conn) del_timer(&conn->disc_timer); - if (conn->type == SCO_LINK) { - struct hci_conn *acl = conn->link; - if (acl) { - acl->link = NULL; - hci_conn_put(acl); - } - } else { + if (conn->type == ACL_LINK) { struct hci_conn *sco = conn->link; if (sco) sco->link = NULL; /* Unacked frames */ hdev->acl_cnt += conn->sent; + } else { + struct hci_conn *acl = conn->link; + if (acl) { + acl->link = NULL; + hci_conn_put(acl); + } } tasklet_disable(&hdev->tx_task); @@ -297,9 +297,10 @@ EXPORT_SYMBOL(hci_get_route); /* Create SCO or ACL connection. * Device _must_ be locked */ -struct hci_conn * hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst) +struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst) { struct hci_conn *acl; + struct hci_conn *sco; BT_DBG("%s dst %s", hdev->name, batostr(dst)); @@ -313,28 +314,26 @@ struct hci_conn * hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst) if (acl->state == BT_OPEN || acl->state == BT_CLOSED) hci_acl_connect(acl); - if (type == SCO_LINK) { - struct hci_conn *sco; + if (type == ACL_LINK) + return acl; - if (!(sco = hci_conn_hash_lookup_ba(hdev, SCO_LINK, dst))) { - if (!(sco = hci_conn_add(hdev, SCO_LINK, dst))) { - hci_conn_put(acl); - return NULL; - } + if (!(sco = hci_conn_hash_lookup_ba(hdev, type, dst))) { + if (!(sco = hci_conn_add(hdev, type, dst))) { + hci_conn_put(acl); + return NULL; } - acl->link = sco; - sco->link = acl; + } - hci_conn_hold(sco); + acl->link = sco; + sco->link = acl; - if (acl->state == BT_CONNECTED && - (sco->state == BT_OPEN || sco->state == BT_CLOSED)) - hci_add_sco(sco, acl->handle); + hci_conn_hold(sco); - return sco; - } else { - return acl; - } + if (acl->state == BT_CONNECTED && + (sco->state == BT_OPEN || sco->state == BT_CLOSED)) + hci_add_sco(sco, acl->handle); + + return sco; } EXPORT_SYMBOL(hci_connect); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 9c71cffbc6b0..f6d867e0179f 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -851,6 +851,7 @@ int hci_register_dev(struct hci_dev *hdev) hdev->flags = 0; hdev->pkt_type = (HCI_DM1 | HCI_DH1 | HCI_HV1); + hdev->esco_type = (ESCO_HV1); hdev->link_mode = (HCI_LM_ACCEPT); hdev->idle_timeout = 0; @@ -1254,7 +1255,7 @@ EXPORT_SYMBOL(hci_send_sco); static inline struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, int *quote) { struct hci_conn_hash *h = &hdev->conn_hash; - struct hci_conn *conn = NULL; + struct hci_conn *conn = NULL; int num = 0, min = ~0; struct list_head *p; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 447ba7131220..4baea1e38652 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -350,11 +350,24 @@ static void hci_cc_info_param(struct hci_dev *hdev, __u16 ocf, struct sk_buff *s if (hdev->features[0] & LMP_5SLOT) hdev->pkt_type |= (HCI_DM5 | HCI_DH5); - if (hdev->features[1] & LMP_HV2) - hdev->pkt_type |= (HCI_HV2); + if (hdev->features[1] & LMP_HV2) { + hdev->pkt_type |= (HCI_HV2); + hdev->esco_type |= (ESCO_HV2); + } + + if (hdev->features[1] & LMP_HV3) { + hdev->pkt_type |= (HCI_HV3); + hdev->esco_type |= (ESCO_HV3); + } - if (hdev->features[1] & LMP_HV3) - hdev->pkt_type |= (HCI_HV3); + if (hdev->features[3] & LMP_ESCO) + hdev->esco_type |= (ESCO_EV3); + + if (hdev->features[4] & LMP_EV4) + hdev->esco_type |= (ESCO_EV4); + + if (hdev->features[4] & LMP_EV5) + hdev->esco_type |= (ESCO_EV5); BT_DBG("%s: features 0x%x 0x%x 0x%x", hdev->name, lf->features[0], lf->features[1], lf->features[2]); @@ -881,12 +894,12 @@ static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *s if (conn) { conn->sent -= count; - if (conn->type == SCO_LINK) { - if ((hdev->sco_cnt += count) > hdev->sco_pkts) - hdev->sco_cnt = hdev->sco_pkts; - } else { + if (conn->type == ACL_LINK) { if ((hdev->acl_cnt += count) > hdev->acl_pkts) hdev->acl_cnt = hdev->acl_pkts; + } else { + if ((hdev->sco_cnt += count) > hdev->sco_pkts) + hdev->sco_cnt = hdev->sco_pkts; } } } -- cgit v1.2.3 From c9726d6890f7f3a892c879e067c3ed839f61e745 Mon Sep 17 00:00:00 2001 From: Ranjit Manomohan Date: Tue, 10 Jul 2007 22:43:16 -0700 Subject: [NET_SCHED]: Make HTB scheduler work with TSO. Currently the HTB scheduler does not correctly account for TSO packets which causes large inaccuracies in the bandwidth control when using TSO. This patch allows the HTB scheduler to work with TSO enabled devices. Signed-off-by: Ranjit Manomohan Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index c031486b675f..b417a95df322 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -129,15 +129,12 @@ struct htb_class { /* of un.leaf originals should be done. */ }; -/* TODO: maybe compute rate when size is too large .. or drop ? */ static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate, int size) { int slot = size >> rate->rate.cell_log; - if (slot > 255) { - cl->xstats.giants++; - slot = 255; - } + if (slot > 255) + return (rate->data[255]*(slot >> 8) + rate->data[slot & 0xFF]); return rate->data[slot]; } @@ -606,13 +603,14 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) cl->qstats.drops++; return NET_XMIT_DROP; } else { - cl->bstats.packets++; + cl->bstats.packets += + skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1; cl->bstats.bytes += skb->len; htb_activate(q, cl); } sch->q.qlen++; - sch->bstats.packets++; + sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1; sch->bstats.bytes += skb->len; return NET_XMIT_SUCCESS; } @@ -661,8 +659,9 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) * In such case we remove class from event queue first. */ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, - int level, int bytes) + int level, struct sk_buff *skb) { + int bytes = skb->len; long toks, diff; enum htb_cmode old_mode; @@ -698,7 +697,8 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, /* update byte stats except for leaves which are already updated */ if (cl->level) { cl->bstats.bytes += bytes; - cl->bstats.packets++; + cl->bstats.packets += skb_is_gso(skb)? + skb_shinfo(skb)->gso_segs:1; } cl = cl->parent; } @@ -882,7 +882,7 @@ next: gives us slightly better performance */ if (!cl->un.leaf.q->q.qlen) htb_deactivate(q, cl); - htb_charge_class(q, cl, level, skb->len); + htb_charge_class(q, cl, level, skb); } return skb; } -- cgit v1.2.3 From c382bb9d32a55029fb13b118858e25908fab4617 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 10 Jul 2007 22:47:58 -0700 Subject: [IPV6]: Restore semantics of Routing Header processing. The "fix" for emerging security threat was overkill and it broke basic semantic of IPv6 routing header processing. We should assume RT0 (or even RT2, depends on configuration) as "unknown" RH type so that we - silently ignore the routing header if segleft == 0 - send ICMPv6 Parameter Problem message back to the sender, otherwise. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/exthdrs.c | 47 ++++++++++++++++------------------------------- 1 file changed, 16 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 173a4bb52255..fc3a961fc5ba 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -372,22 +372,13 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp) struct rt0_hdr *rthdr; int accept_source_route = ipv6_devconf.accept_source_route; - if (accept_source_route < 0 || - ((idev = in6_dev_get(skb->dev)) == NULL)) { - kfree_skb(skb); - return -1; - } - if (idev->cnf.accept_source_route < 0) { + idev = in6_dev_get(skb->dev); + if (idev) { + if (accept_source_route > idev->cnf.accept_source_route) + accept_source_route = idev->cnf.accept_source_route; in6_dev_put(idev); - kfree_skb(skb); - return -1; } - if (accept_source_route > idev->cnf.accept_source_route) - accept_source_route = idev->cnf.accept_source_route; - - in6_dev_put(idev); - if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { @@ -399,24 +390,6 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp) hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb); - switch (hdr->type) { -#ifdef CONFIG_IPV6_MIP6 - case IPV6_SRCRT_TYPE_2: - break; -#endif - case IPV6_SRCRT_TYPE_0: - if (accept_source_route > 0) - break; - kfree_skb(skb); - return -1; - default: - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), - IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, - (&hdr->type) - skb_network_header(skb)); - return -1; - } - if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) || skb->pkt_type != PACKET_HOST) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), @@ -455,6 +428,8 @@ looped_back: switch (hdr->type) { case IPV6_SRCRT_TYPE_0: + if (accept_source_route <= 0) + goto unknown_rh; if (hdr->hdrlen & 0x01) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); @@ -466,6 +441,8 @@ looped_back: break; #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: + if (accept_source_route < 0) + goto unknown_rh; /* Silently discard invalid RTH type 2 */ if (hdr->hdrlen != 2 || hdr->segments_left != 1) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), @@ -475,6 +452,8 @@ looped_back: } break; #endif + default: + goto unknown_rh; } /* @@ -578,6 +557,12 @@ looped_back: skb_push(skb, skb->data - skb_network_header(skb)); dst_input(skb); return -1; + +unknown_rh: + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, + (&hdr->type) - skb_network_header(skb)); + return -1; } static struct inet6_protocol rthdr_protocol = { -- cgit v1.2.3 From bb4dbf9e61d0801927e7df2569bb3dd8287ea301 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 10 Jul 2007 22:55:49 -0700 Subject: [IPV6]: Do not send RH0 anymore. Based on . Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 3 +- include/linux/ipv6.h | 4 +- include/net/ipv6.h | 4 -- net/dccp/ipv6.c | 20 --------- net/ipv6/datagram.c | 3 +- net/ipv6/exthdrs.c | 78 ---------------------------------- net/ipv6/ipv6_sockglue.c | 3 +- net/ipv6/tcp_ipv6.c | 20 --------- 8 files changed, 5 insertions(+), 130 deletions(-) (limited to 'net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index af6a63ab9026..09c184e41cf8 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -874,8 +874,7 @@ accept_redirects - BOOLEAN accept_source_route - INTEGER Accept source routing (routing extension header). - > 0: Accept routing header. - = 0: Accept only routing header type 2. + >= 0: Accept only routing header type 2. < 0: Do not accept routing header. Default: 0 diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 213b63be3c8f..cb3118cf277c 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -27,8 +27,8 @@ struct in6_ifreq { int ifr6_ifindex; }; -#define IPV6_SRCRT_STRICT 0x01 /* this hop must be a neighbor */ -#define IPV6_SRCRT_TYPE_0 0 /* IPv6 type 0 Routing Header */ +#define IPV6_SRCRT_STRICT 0x01 /* Deprecated; will be removed */ +#define IPV6_SRCRT_TYPE_0 0 /* Deprecated; will be removed */ #define IPV6_SRCRT_TYPE_2 2 /* IPv6 type 2 Routing Header */ /* diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 78a0d06d98d5..46b9dce82f6e 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -512,10 +512,6 @@ extern int ipv6_ext_hdr(u8 nexthdr); extern int ipv6_find_tlv(struct sk_buff *skb, int offset, int type); -extern struct ipv6_txoptions * ipv6_invert_rthdr(struct sock *sk, - struct ipv6_rt_hdr *hdr); - - /* * socket options (ipv6_sockglue.c) */ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 31737cdf156a..b158c661867b 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -253,17 +253,6 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, if (dst == NULL) { opt = np->opt; - if (opt == NULL && - np->rxopt.bits.osrcrt == 2 && - ireq6->pktopts) { - struct sk_buff *pktopts = ireq6->pktopts; - struct inet6_skb_parm *rxopt = IP6CB(pktopts); - - if (rxopt->srcrt) - opt = ipv6_invert_rthdr(sk, - (struct ipv6_rt_hdr *)(skb_network_header(pktopts) + - rxopt->srcrt)); - } if (opt != NULL && opt->srcrt != NULL) { const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt; @@ -570,15 +559,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, if (sk_acceptq_is_full(sk)) goto out_overflow; - if (np->rxopt.bits.osrcrt == 2 && opt == NULL && ireq6->pktopts) { - const struct inet6_skb_parm *rxopt = IP6CB(ireq6->pktopts); - - if (rxopt->srcrt) - opt = ipv6_invert_rthdr(sk, - (struct ipv6_rt_hdr *)(skb_network_header(ireq6->pktopts) + - rxopt->srcrt)); - } - if (dst == NULL) { struct in6_addr *final_p = NULL, final; struct flowi fl; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index ba1386dd4168..fe0f49024a0a 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -657,11 +657,10 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg); switch (rthdr->type) { - case IPV6_SRCRT_TYPE_0: #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: -#endif break; +#endif default: err = -EINVAL; goto exit_f; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index fc3a961fc5ba..c82d4d49f71f 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -427,18 +427,6 @@ looped_back: } switch (hdr->type) { - case IPV6_SRCRT_TYPE_0: - if (accept_source_route <= 0) - goto unknown_rh; - if (hdr->hdrlen & 0x01) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), - IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, - ((&hdr->hdrlen) - - skb_network_header(skb))); - return -1; - } - break; #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: if (accept_source_route < 0) @@ -576,72 +564,6 @@ void __init ipv6_rthdr_init(void) printk(KERN_ERR "ipv6_rthdr_init: Could not register protocol\n"); }; -/* - This function inverts received rthdr. - NOTE: specs allow to make it automatically only if - packet authenticated. - - I will not discuss it here (though, I am really pissed off at - this stupid requirement making rthdr idea useless) - - Actually, it creates severe problems for us. - Embryonic requests has no associated sockets, - so that user have no control over it and - cannot not only to set reply options, but - even to know, that someone wants to connect - without success. :-( - - For now we need to test the engine, so that I created - temporary (or permanent) backdoor. - If listening socket set IPV6_RTHDR to 2, then we invert header. - --ANK (980729) - */ - -struct ipv6_txoptions * -ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr) -{ - /* Received rthdr: - - [ H1 -> H2 -> ... H_prev ] daddr=ME - - Inverted result: - [ H_prev -> ... -> H1 ] daddr =sender - - Note, that IP output engine will rewrite this rthdr - by rotating it left by one addr. - */ - - int n, i; - struct rt0_hdr *rthdr = (struct rt0_hdr*)hdr; - struct rt0_hdr *irthdr; - struct ipv6_txoptions *opt; - int hdrlen = ipv6_optlen(hdr); - - if (hdr->segments_left || - hdr->type != IPV6_SRCRT_TYPE_0 || - hdr->hdrlen & 0x01) - return NULL; - - n = hdr->hdrlen >> 1; - opt = sock_kmalloc(sk, sizeof(*opt) + hdrlen, GFP_ATOMIC); - if (opt == NULL) - return NULL; - memset(opt, 0, sizeof(*opt)); - opt->tot_len = sizeof(*opt) + hdrlen; - opt->srcrt = (void*)(opt+1); - opt->opt_nflen = hdrlen; - - memcpy(opt->srcrt, hdr, sizeof(*hdr)); - irthdr = (struct rt0_hdr*)opt->srcrt; - irthdr->reserved = 0; - opt->srcrt->segments_left = n; - for (i=0; iaddr+i, rthdr->addr+(n-1-i), 16); - return opt; -} - -EXPORT_SYMBOL_GPL(ipv6_invert_rthdr); - /********************************** Hop-by-hop options. **********************************/ diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 1c3506696cb5..1841714ac419 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -416,11 +416,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_RTHDR && opt && opt->srcrt) { struct ipv6_rt_hdr *rthdr = opt->srcrt; switch (rthdr->type) { - case IPV6_SRCRT_TYPE_0: #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: -#endif break; +#endif default: goto sticky_done; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 193d9d60bb7a..d67fb1ef751e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -484,17 +484,6 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, if (dst == NULL) { opt = np->opt; - if (opt == NULL && - np->rxopt.bits.osrcrt == 2 && - treq->pktopts) { - struct sk_buff *pktopts = treq->pktopts; - struct inet6_skb_parm *rxopt = IP6CB(pktopts); - if (rxopt->srcrt) - opt = ipv6_invert_rthdr(sk, - (struct ipv6_rt_hdr *)(skb_network_header(pktopts) + - rxopt->srcrt)); - } - if (opt && opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; ipv6_addr_copy(&final, &fl.fl6_dst); @@ -1391,15 +1380,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (sk_acceptq_is_full(sk)) goto out_overflow; - if (np->rxopt.bits.osrcrt == 2 && - opt == NULL && treq->pktopts) { - struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts); - if (rxopt->srcrt) - opt = ipv6_invert_rthdr(sk, - (struct ipv6_rt_hdr *)(skb_network_header(treq->pktopts) + - rxopt->srcrt)); - } - if (dst == NULL) { struct in6_addr *final_p = NULL, final; struct flowi fl; -- cgit v1.2.3 From 4c752098f529f41abfc985426a3eca0f2cb96676 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 May 2007 13:28:48 +0900 Subject: [IPV6]: Make IPV6_{RECV,2292}RTHDR boolean options. Because reversing RH0 is no longer supported by deprecation of RH0, let's make IPV6_{RECV,2292}RTHDR boolean options. Boolean are more appropriate from standard POV. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/ipv6.h | 4 ++-- net/ipv6/ipv6_sockglue.c | 8 ++------ 2 files changed, 4 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index cb3118cf277c..97983dc9df13 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -299,8 +299,8 @@ struct ipv6_pinfo { /* pktoption flags */ union { struct { - __u16 srcrt:2, - osrcrt:2, + __u16 srcrt:1, + osrcrt:1, rxinfo:1, rxoinfo:1, rxhlim:1, diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 1841714ac419..d6846393182d 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -336,16 +336,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; case IPV6_RECVRTHDR: - if (val < 0 || val > 2) - goto e_inval; - np->rxopt.bits.srcrt = val; + np->rxopt.bits.srcrt = valbool; retv = 0; break; case IPV6_2292RTHDR: - if (val < 0 || val > 2) - goto e_inval; - np->rxopt.bits.osrcrt = val; + np->rxopt.bits.osrcrt = valbool; retv = 0; break; -- cgit v1.2.3 From dffe4f048b420f1af0b10a6090add0c5ea69e585 Mon Sep 17 00:00:00 2001 From: Micah Gruber Date: Tue, 10 Jul 2007 23:04:19 -0700 Subject: [IPV6]: Remove unneeded pointer idev from addrconf_cleanup(). This trivial patch removes the unneeded pointer idev returned from __in6_dev_get(), which is never used. The check for NULL can be simply done by if (__in6_dev_get(dev) == NULL). Signed-off-by: Micah Gruber Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 11c0028863b0..95737ab040ec 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4243,7 +4243,6 @@ errout: void __exit addrconf_cleanup(void) { struct net_device *dev; - struct inet6_dev *idev; struct inet6_ifaddr *ifa; int i; @@ -4261,7 +4260,7 @@ void __exit addrconf_cleanup(void) */ for_each_netdev(dev) { - if ((idev = __in6_dev_get(dev)) == NULL) + if (__in6_dev_get(dev) == NULL) continue; addrconf_ifdown(dev, 1); } -- cgit v1.2.3 From 3be550f34b03e5eb762f74d447ebbeba97efbd6d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 10 Jul 2007 23:06:43 -0700 Subject: [UDP]: Fix length check. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rémi Denis-Courmont wrote: > Right. By the way, shouldn't "len" rather be signed in there? > > unsigned int len; > > /* if we're overly short, let UDP handle it */ > len = skb->len - sizeof(struct udphdr); > if (len <= 0) > goto udp; It should, but the < 0 case can't happen since __udp4_lib_rcv already makes sure that we have at least a complete UDP header. Anyways, this patch fixes it. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/udp.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 4ec4a25a8d0c..28355350fb62 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -951,14 +951,10 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) * >0 if skb should be passed on to UDP. * <0 if skb should be resubmitted as proto -N */ - unsigned int len; /* if we're overly short, let UDP handle it */ - len = skb->len - sizeof(struct udphdr); - if (len <= 0) - goto udp; - - if (up->encap_rcv != NULL) { + if (skb->len > sizeof(struct udphdr) && + up->encap_rcv != NULL) { int ret; ret = (*up->encap_rcv)(sk, skb); @@ -971,7 +967,6 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) /* FALLTHROUGH -- it's a UDP Packet */ } -udp: /* * UDP-Lite specific tests, ignored on UDP sockets */ -- cgit v1.2.3 From 56b3d975bbce65f655c5612b4822da671f9fd9b2 Mon Sep 17 00:00:00 2001 From: Philippe De Muyter Date: Tue, 10 Jul 2007 23:07:31 -0700 Subject: [NET]: Make all initialized struct seq_operations const. Make all initialized struct seq_operations in net/ const Signed-off-by: Philippe De Muyter Signed-off-by: David S. Miller --- net/802/tr.c | 2 +- net/8021q/vlanproc.c | 2 +- net/appletalk/aarp.c | 2 +- net/appletalk/atalk_proc.c | 6 +++--- net/atm/br2684.c | 2 +- net/atm/clip.c | 2 +- net/atm/lec.c | 2 +- net/atm/mpoa_proc.c | 2 +- net/atm/proc.c | 8 ++++---- net/ax25/af_ax25.c | 2 +- net/ax25/ax25_route.c | 2 +- net/ax25/ax25_uid.c | 2 +- net/decnet/af_decnet.c | 2 +- net/decnet/dn_dev.c | 2 +- net/decnet/dn_neigh.c | 2 +- net/decnet/dn_route.c | 2 +- net/ipv4/ipvs/ip_vs_app.c | 2 +- net/ipv4/ipvs/ip_vs_conn.c | 2 +- net/ipv4/ipvs/ip_vs_ctl.c | 2 +- net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +- net/ipv4/netfilter/ipt_recent.c | 2 +- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 6 +++--- net/ipv6/addrconf.c | 2 +- net/ipv6/anycast.c | 2 +- net/ipv6/ip6_flowlabel.c | 2 +- net/ipv6/mcast.c | 4 ++-- net/ipv6/raw.c | 2 +- net/ipx/ipx_proc.c | 6 +++--- net/irda/discovery.c | 2 +- net/irda/ircomm/ircomm_core.c | 2 +- net/irda/iriap.c | 2 +- net/irda/irlan/irlan_common.c | 2 +- net/irda/irlap.c | 2 +- net/irda/irlmp.c | 2 +- net/irda/irttp.c | 2 +- net/llc/llc_proc.c | 4 ++-- net/netfilter/nf_conntrack_expect.c | 2 +- net/netfilter/nf_conntrack_standalone.c | 4 ++-- net/netfilter/nf_log.c | 2 +- net/netfilter/nf_queue.c | 2 +- net/netfilter/nfnetlink_log.c | 2 +- net/netfilter/nfnetlink_queue.c | 2 +- net/netfilter/x_tables.c | 2 +- net/netfilter/xt_hashlimit.c | 2 +- net/netlink/af_netlink.c | 2 +- net/netrom/af_netrom.c | 2 +- net/netrom/nr_route.c | 4 ++-- net/packet/af_packet.c | 2 +- net/rose/af_rose.c | 2 +- net/rose/rose_route.c | 6 +++--- net/rxrpc/ar-proc.c | 4 ++-- net/sctp/proc.c | 4 ++-- net/sunrpc/cache.c | 2 +- net/unix/af_unix.c | 2 +- net/wanrouter/wanproc.c | 4 ++-- net/x25/x25_proc.c | 6 +++--- 56 files changed, 76 insertions(+), 76 deletions(-) (limited to 'net') diff --git a/net/802/tr.c b/net/802/tr.c index 0ba1946211c9..e56e61a7f545 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -567,7 +567,7 @@ static int rif_seq_show(struct seq_file *seq, void *v) } -static struct seq_operations rif_seq_ops = { +static const struct seq_operations rif_seq_ops = { .start = rif_seq_start, .next = rif_seq_next, .stop = rif_seq_stop, diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index 8693b21b3caa..c0040c9064a1 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -69,7 +69,7 @@ static const char name_conf[] = "config"; * Generic /proc/net/vlan/ file and inode operations */ -static struct seq_operations vlan_seq_ops = { +static const struct seq_operations vlan_seq_ops = { .start = vlan_seq_start, .next = vlan_seq_next, .stop = vlan_seq_stop, diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 5ef6a238bdbc..3d1655f98388 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -1024,7 +1024,7 @@ static int aarp_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations aarp_seq_ops = { +static const struct seq_operations aarp_seq_ops = { .start = aarp_seq_start, .next = aarp_seq_next, .stop = aarp_seq_stop, diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index 57ff8122b5c5..87a582cc8111 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -204,21 +204,21 @@ out: return 0; } -static struct seq_operations atalk_seq_interface_ops = { +static const struct seq_operations atalk_seq_interface_ops = { .start = atalk_seq_interface_start, .next = atalk_seq_interface_next, .stop = atalk_seq_interface_stop, .show = atalk_seq_interface_show, }; -static struct seq_operations atalk_seq_route_ops = { +static const struct seq_operations atalk_seq_route_ops = { .start = atalk_seq_route_start, .next = atalk_seq_route_next, .stop = atalk_seq_route_stop, .show = atalk_seq_route_show, }; -static struct seq_operations atalk_seq_socket_ops = { +static const struct seq_operations atalk_seq_socket_ops = { .start = atalk_seq_socket_start, .next = atalk_seq_socket_next, .stop = atalk_seq_socket_stop, diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 3e2643893178..faa6aaf67563 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -758,7 +758,7 @@ static int br2684_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations br2684_seq_ops = { +static const struct seq_operations br2684_seq_ops = { .start = br2684_seq_start, .next = br2684_seq_next, .stop = br2684_seq_stop, diff --git a/net/atm/clip.c b/net/atm/clip.c index 876b77f14745..ecf0f79b94ae 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -928,7 +928,7 @@ static int clip_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations arp_seq_ops = { +static const struct seq_operations arp_seq_ops = { .start = clip_seq_start, .next = neigh_seq_next, .stop = neigh_seq_stop, diff --git a/net/atm/lec.c b/net/atm/lec.c index 4dc5f2b8c43c..2770fb451ae8 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1174,7 +1174,7 @@ static int lec_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations lec_seq_ops = { +static const struct seq_operations lec_seq_ops = { .start = lec_seq_start, .next = lec_seq_next, .stop = lec_seq_stop, diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c index 4b05cbec7a58..91f3ffc90dbd 100644 --- a/net/atm/mpoa_proc.c +++ b/net/atm/mpoa_proc.c @@ -177,7 +177,7 @@ static int mpc_show(struct seq_file *m, void *v) return 0; } -static struct seq_operations mpc_op = { +static const struct seq_operations mpc_op = { .start = mpc_start, .next = mpc_next, .stop = mpc_stop, diff --git a/net/atm/proc.c b/net/atm/proc.c index 9e61e512f667..88154da62cd3 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -260,7 +260,7 @@ static int atm_dev_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations atm_dev_seq_ops = { +static const struct seq_operations atm_dev_seq_ops = { .start = atm_dev_seq_start, .next = atm_dev_seq_next, .stop = atm_dev_seq_stop, @@ -295,7 +295,7 @@ static int pvc_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations pvc_seq_ops = { +static const struct seq_operations pvc_seq_ops = { .start = vcc_seq_start, .next = vcc_seq_next, .stop = vcc_seq_stop, @@ -329,7 +329,7 @@ static int vcc_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations vcc_seq_ops = { +static const struct seq_operations vcc_seq_ops = { .start = vcc_seq_start, .next = vcc_seq_next, .stop = vcc_seq_stop, @@ -364,7 +364,7 @@ static int svc_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations svc_seq_ops = { +static const struct seq_operations svc_seq_ops = { .start = vcc_seq_start, .next = vcc_seq_next, .stop = vcc_seq_stop, diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 429e13a6c6ad..c83cf8432970 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1924,7 +1924,7 @@ static int ax25_info_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations ax25_info_seqops = { +static const struct seq_operations ax25_info_seqops = { .start = ax25_info_start, .next = ax25_info_next, .stop = ax25_info_stop, diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index d65b8e22868d..9ecf6f1df863 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -320,7 +320,7 @@ static int ax25_rt_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations ax25_rt_seqops = { +static const struct seq_operations ax25_rt_seqops = { .start = ax25_rt_seq_start, .next = ax25_rt_seq_next, .stop = ax25_rt_seq_stop, diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c index 75c76647b2cb..ce0b13d44385 100644 --- a/net/ax25/ax25_uid.c +++ b/net/ax25/ax25_uid.c @@ -185,7 +185,7 @@ static int ax25_uid_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations ax25_uid_seqops = { +static const struct seq_operations ax25_uid_seqops = { .start = ax25_uid_seq_start, .next = ax25_uid_seq_next, .stop = ax25_uid_seq_stop, diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index bfa910b6ad25..ed76d4aab4a9 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2304,7 +2304,7 @@ static int dn_socket_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations dn_socket_seq_ops = { +static const struct seq_operations dn_socket_seq_ops = { .start = dn_socket_seq_start, .next = dn_socket_seq_next, .stop = dn_socket_seq_stop, diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index e31549e9d07a..fa6604fcf0e7 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -1416,7 +1416,7 @@ static int dn_dev_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations dn_dev_seq_ops = { +static const struct seq_operations dn_dev_seq_ops = { .start = dn_dev_seq_start, .next = dn_dev_seq_next, .stop = dn_dev_seq_stop, diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 4bf066c416e2..174d8a7a6dac 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -569,7 +569,7 @@ static void *dn_neigh_seq_start(struct seq_file *seq, loff_t *pos) NEIGH_SEQ_NEIGH_ONLY); } -static struct seq_operations dn_neigh_seq_ops = { +static const struct seq_operations dn_neigh_seq_ops = { .start = dn_neigh_seq_start, .next = neigh_seq_next, .stop = neigh_seq_stop, diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index a8bf106b7a61..82622fb6f68f 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1726,7 +1726,7 @@ static int dn_rt_cache_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations dn_rt_cache_seq_ops = { +static const struct seq_operations dn_rt_cache_seq_ops = { .start = dn_rt_cache_seq_start, .next = dn_rt_cache_seq_next, .stop = dn_rt_cache_seq_stop, diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index 15ad5dd2d984..8d6901d4e94f 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c @@ -549,7 +549,7 @@ static int ip_vs_app_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations ip_vs_app_seq_ops = { +static const struct seq_operations ip_vs_app_seq_ops = { .start = ip_vs_app_seq_start, .next = ip_vs_app_seq_next, .stop = ip_vs_app_seq_stop, diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 7018f97c75dc..3b446b1a6b9c 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -745,7 +745,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations ip_vs_conn_seq_ops = { +static const struct seq_operations ip_vs_conn_seq_ops = { .start = ip_vs_conn_seq_start, .next = ip_vs_conn_seq_next, .stop = ip_vs_conn_seq_stop, diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 68fe1d4d0210..e1052bcf4ed1 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -1783,7 +1783,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations ip_vs_info_seq_ops = { +static const struct seq_operations ip_vs_info_seq_ops = { .start = ip_vs_info_seq_start, .next = ip_vs_info_seq_next, .stop = ip_vs_info_seq_stop, diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 8bacda3f6f6c..dcc12b183474 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -639,7 +639,7 @@ static int clusterip_seq_show(struct seq_file *s, void *v) return 0; } -static struct seq_operations clusterip_seq_ops = { +static const struct seq_operations clusterip_seq_ops = { .start = clusterip_seq_start, .next = clusterip_seq_next, .stop = clusterip_seq_stop, diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index a7b14f2ae2dc..321804315659 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -370,7 +370,7 @@ static int recent_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations recent_seq_ops = { +static const struct seq_operations recent_seq_ops = { .start = recent_seq_start, .next = recent_seq_next, .stop = recent_seq_stop, diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 434e08410879..3da9d73d1b52 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -164,7 +164,7 @@ static int ct_seq_show(struct seq_file *s, void *v) return 0; } -static struct seq_operations ct_seq_ops = { +static const struct seq_operations ct_seq_ops = { .start = ct_seq_start, .next = ct_seq_next, .stop = ct_seq_stop, @@ -282,7 +282,7 @@ static int exp_seq_show(struct seq_file *s, void *v) return seq_putc(s, '\n'); } -static struct seq_operations exp_seq_ops = { +static const struct seq_operations exp_seq_ops = { .start = exp_seq_start, .next = exp_seq_next, .stop = exp_seq_stop, @@ -386,7 +386,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations ct_cpu_seq_ops = { +static const struct seq_operations ct_cpu_seq_ops = { .start = ct_cpu_seq_start, .next = ct_cpu_seq_next, .stop = ct_cpu_seq_stop, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 95737ab040ec..24424c3b7dc0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2785,7 +2785,7 @@ static int if6_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations if6_seq_ops = { +static const struct seq_operations if6_seq_ops = { .start = if6_seq_start, .next = if6_seq_next, .show = if6_seq_show, diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 9b81264eb78f..b8c533fbdb63 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -539,7 +539,7 @@ static int ac6_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations ac6_seq_ops = { +static const struct seq_operations ac6_seq_ops = { .start = ac6_seq_start, .next = ac6_seq_next, .stop = ac6_seq_stop, diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index c206a152ed9d..413a4ebb195c 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -648,7 +648,7 @@ static int ip6fl_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations ip6fl_seq_ops = { +static const struct seq_operations ip6fl_seq_ops = { .start = ip6fl_seq_start, .next = ip6fl_seq_next, .stop = ip6fl_seq_stop, diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 3e308fb41b49..ae9881832a7e 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2423,7 +2423,7 @@ static int igmp6_mc_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations igmp6_mc_seq_ops = { +static const struct seq_operations igmp6_mc_seq_ops = { .start = igmp6_mc_seq_start, .next = igmp6_mc_seq_next, .stop = igmp6_mc_seq_stop, @@ -2597,7 +2597,7 @@ static int igmp6_mcf_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations igmp6_mcf_seq_ops = { +static const struct seq_operations igmp6_mcf_seq_ops = { .start = igmp6_mcf_seq_start, .next = igmp6_mcf_seq_next, .stop = igmp6_mcf_seq_stop, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index aac6aeb8de8c..e27383d855de 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1280,7 +1280,7 @@ static int raw6_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations raw6_seq_ops = { +static const struct seq_operations raw6_seq_ops = { .start = raw6_seq_start, .next = raw6_seq_next, .stop = raw6_seq_stop, diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index db32ac8e79bd..4226e71ae1e3 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -286,21 +286,21 @@ out: return 0; } -static struct seq_operations ipx_seq_interface_ops = { +static const struct seq_operations ipx_seq_interface_ops = { .start = ipx_seq_interface_start, .next = ipx_seq_interface_next, .stop = ipx_seq_interface_stop, .show = ipx_seq_interface_show, }; -static struct seq_operations ipx_seq_route_ops = { +static const struct seq_operations ipx_seq_route_ops = { .start = ipx_seq_route_start, .next = ipx_seq_route_next, .stop = ipx_seq_route_stop, .show = ipx_seq_route_show, }; -static struct seq_operations ipx_seq_socket_ops = { +static const struct seq_operations ipx_seq_socket_ops = { .start = ipx_seq_socket_start, .next = ipx_seq_socket_next, .stop = ipx_seq_interface_stop, diff --git a/net/irda/discovery.c b/net/irda/discovery.c index f09734128674..af0cea721d2a 100644 --- a/net/irda/discovery.c +++ b/net/irda/discovery.c @@ -395,7 +395,7 @@ static int discovery_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations discovery_seq_ops = { +static const struct seq_operations discovery_seq_ops = { .start = discovery_seq_start, .next = discovery_seq_next, .stop = discovery_seq_stop, diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c index 4749f8f55391..2d63fa8e1556 100644 --- a/net/irda/ircomm/ircomm_core.c +++ b/net/irda/ircomm/ircomm_core.c @@ -562,7 +562,7 @@ static int ircomm_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations ircomm_seq_ops = { +static const struct seq_operations ircomm_seq_ops = { .start = ircomm_seq_start, .next = ircomm_seq_next, .stop = ircomm_seq_stop, diff --git a/net/irda/iriap.c b/net/irda/iriap.c index 915d9384f36a..774eb707940c 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -1066,7 +1066,7 @@ static int irias_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations irias_seq_ops = { +static const struct seq_operations irias_seq_ops = { .start = irias_seq_start, .next = irias_seq_next, .stop = irias_seq_stop, diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index ed69773b0f8e..f5778ef3ccc7 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -1217,7 +1217,7 @@ static int irlan_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations irlan_seq_ops = { +static const struct seq_operations irlan_seq_ops = { .start = irlan_seq_start, .next = irlan_seq_next, .stop = irlan_seq_stop, diff --git a/net/irda/irlap.c b/net/irda/irlap.c index d93ebd11431e..2fc9f518f89d 100644 --- a/net/irda/irlap.c +++ b/net/irda/irlap.c @@ -1210,7 +1210,7 @@ static int irlap_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations irlap_seq_ops = { +static const struct seq_operations irlap_seq_ops = { .start = irlap_seq_start, .next = irlap_seq_next, .stop = irlap_seq_stop, diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 9df0461b6d18..24a5e3f23778 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -1994,7 +1994,7 @@ static int irlmp_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations irlmp_seq_ops = { +static const struct seq_operations irlmp_seq_ops = { .start = irlmp_seq_start, .next = irlmp_seq_next, .stop = irlmp_seq_stop, diff --git a/net/irda/irttp.c b/net/irda/irttp.c index ce4647542b9e..7f50832a2cd5 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -1875,7 +1875,7 @@ static int irttp_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations irttp_seq_ops = { +static const struct seq_operations irttp_seq_ops = { .start = irttp_seq_start, .next = irttp_seq_next, .stop = irttp_seq_stop, diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index 3ab9d9f8b17f..49be6c902c83 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -184,14 +184,14 @@ out: return 0; } -static struct seq_operations llc_seq_socket_ops = { +static const struct seq_operations llc_seq_socket_ops = { .start = llc_seq_start, .next = llc_seq_next, .stop = llc_seq_stop, .show = llc_seq_socket_show, }; -static struct seq_operations llc_seq_core_ops = { +static const struct seq_operations llc_seq_core_ops = { .start = llc_seq_start, .next = llc_seq_next, .stop = llc_seq_stop, diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 513828fdaa2c..2191fe008f60 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -464,7 +464,7 @@ static int exp_seq_show(struct seq_file *s, void *v) return seq_putc(s, '\n'); } -static struct seq_operations exp_seq_ops = { +static const struct seq_operations exp_seq_ops = { .start = exp_seq_start, .next = exp_seq_next, .stop = exp_seq_stop, diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 54498bcfa862..ffb6ff8c3528 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -185,7 +185,7 @@ static int ct_seq_show(struct seq_file *s, void *v) return 0; } -static struct seq_operations ct_seq_ops = { +static const struct seq_operations ct_seq_ops = { .start = ct_seq_start, .next = ct_seq_next, .stop = ct_seq_stop, @@ -289,7 +289,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations ct_cpu_seq_ops = { +static const struct seq_operations ct_cpu_seq_ops = { .start = ct_cpu_seq_start, .next = ct_cpu_seq_next, .stop = ct_cpu_seq_stop, diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 91b220cf5a1f..94985792b79a 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -140,7 +140,7 @@ static int seq_show(struct seq_file *s, void *v) return seq_printf(s, "%2lld %s\n", *pos, logger->name); } -static struct seq_operations nflog_seq_ops = { +static const struct seq_operations nflog_seq_ops = { .start = seq_start, .next = seq_next, .stop = seq_stop, diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 823fbf404566..a481a349f7bf 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -330,7 +330,7 @@ static int seq_show(struct seq_file *s, void *v) return ret; } -static struct seq_operations nfqueue_seq_ops = { +static const struct seq_operations nfqueue_seq_ops = { .start = seq_start, .next = seq_next, .stop = seq_stop, diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index e32e30e7a17c..e185a5b55913 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -962,7 +962,7 @@ static int seq_show(struct seq_file *s, void *v) inst->flushtimeout, atomic_read(&inst->use)); } -static struct seq_operations nful_seq_ops = { +static const struct seq_operations nful_seq_ops = { .start = seq_start, .next = seq_next, .stop = seq_stop, diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 7d47fc4b19c6..bb65a38c816c 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1048,7 +1048,7 @@ static int seq_show(struct seq_file *s, void *v) atomic_read(&inst->use)); } -static struct seq_operations nfqnl_seq_ops = { +static const struct seq_operations nfqnl_seq_ops = { .start = seq_start, .next = seq_next, .stop = seq_stop, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 520eddf4d61b..cc2baa6d5a7a 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -745,7 +745,7 @@ static int xt_name_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations xt_tgt_seq_ops = { +static const struct seq_operations xt_tgt_seq_ops = { .start = xt_tgt_seq_start, .next = xt_tgt_seq_next, .stop = xt_tgt_seq_stop, diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 5a6ea9b9108c..d6b3d01975b6 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -701,7 +701,7 @@ static int dl_seq_show(struct seq_file *s, void *v) return 0; } -static struct seq_operations dl_seq_ops = { +static const struct seq_operations dl_seq_ops = { .start = dl_seq_start, .next = dl_seq_next, .stop = dl_seq_stop, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 1f15821c8da4..a3c8e692f493 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1713,7 +1713,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations netlink_seq_ops = { +static const struct seq_operations netlink_seq_ops = { .start = netlink_seq_start, .next = netlink_seq_next, .stop = netlink_seq_stop, diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 5d4a26c2aa0c..5d66490dd290 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1328,7 +1328,7 @@ static int nr_info_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations nr_info_seqops = { +static const struct seq_operations nr_info_seqops = { .start = nr_info_start, .next = nr_info_next, .stop = nr_info_stop, diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 2f76e062609d..24fe4a66d297 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -922,7 +922,7 @@ static int nr_node_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations nr_node_seqops = { +static const struct seq_operations nr_node_seqops = { .start = nr_node_start, .next = nr_node_next, .stop = nr_node_stop, @@ -1006,7 +1006,7 @@ static int nr_neigh_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations nr_neigh_seqops = { +static const struct seq_operations nr_neigh_seqops = { .start = nr_neigh_start, .next = nr_neigh_next, .stop = nr_neigh_stop, diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index f8b83014ccca..7c27bd389b7e 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1928,7 +1928,7 @@ static int packet_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations packet_seq_ops = { +static const struct seq_operations packet_seq_ops = { .start = packet_seq_start, .next = packet_seq_next, .stop = packet_seq_stop, diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index d476c43d5216..f4d3aba00800 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1454,7 +1454,7 @@ static int rose_info_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations rose_info_seqops = { +static const struct seq_operations rose_info_seqops = { .start = rose_info_start, .next = rose_info_next, .stop = rose_info_stop, diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 929a784a86d7..bbcbad1da0d0 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -1118,7 +1118,7 @@ static int rose_node_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations rose_node_seqops = { +static const struct seq_operations rose_node_seqops = { .start = rose_node_start, .next = rose_node_next, .stop = rose_node_stop, @@ -1200,7 +1200,7 @@ static int rose_neigh_show(struct seq_file *seq, void *v) } -static struct seq_operations rose_neigh_seqops = { +static const struct seq_operations rose_neigh_seqops = { .start = rose_neigh_start, .next = rose_neigh_next, .stop = rose_neigh_stop, @@ -1284,7 +1284,7 @@ static int rose_route_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations rose_route_seqops = { +static const struct seq_operations rose_route_seqops = { .start = rose_route_start, .next = rose_route_next, .stop = rose_route_stop, diff --git a/net/rxrpc/ar-proc.c b/net/rxrpc/ar-proc.c index 77cc5fb17dcd..2e83ce325d15 100644 --- a/net/rxrpc/ar-proc.c +++ b/net/rxrpc/ar-proc.c @@ -86,7 +86,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations rxrpc_call_seq_ops = { +static const struct seq_operations rxrpc_call_seq_ops = { .start = rxrpc_call_seq_start, .next = rxrpc_call_seq_next, .stop = rxrpc_call_seq_stop, @@ -170,7 +170,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations rxrpc_connection_seq_ops = { +static const struct seq_operations rxrpc_connection_seq_ops = { .start = rxrpc_connection_seq_start, .next = rxrpc_connection_seq_next, .stop = rxrpc_connection_seq_stop, diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 2f12bf2d8d3c..e4cd841a22e4 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -250,7 +250,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations sctp_eps_ops = { +static const struct seq_operations sctp_eps_ops = { .start = sctp_eps_seq_start, .next = sctp_eps_seq_next, .stop = sctp_eps_seq_stop, @@ -361,7 +361,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations sctp_assoc_ops = { +static const struct seq_operations sctp_assoc_ops = { .start = sctp_assocs_seq_start, .next = sctp_assocs_seq_next, .stop = sctp_assocs_seq_stop, diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 543b085ae2c1..01c3c4105204 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1210,7 +1210,7 @@ static int c_show(struct seq_file *m, void *p) return cd->cache_show(m, cd, cp); } -static struct seq_operations cache_content_op = { +static const struct seq_operations cache_content_op = { .start = c_start, .next = c_next, .stop = c_stop, diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d70fa30d4294..3654b647ac79 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2048,7 +2048,7 @@ static int unix_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations unix_seq_ops = { +static const struct seq_operations unix_seq_ops = { .start = unix_seq_start, .next = unix_seq_next, .stop = unix_seq_stop, diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c index 205106521ecb..236e7eaf1b7f 100644 --- a/net/wanrouter/wanproc.c +++ b/net/wanrouter/wanproc.c @@ -164,14 +164,14 @@ static int status_show(struct seq_file *m, void *v) return 0; } -static struct seq_operations config_op = { +static const struct seq_operations config_op = { .start = r_start, .next = r_next, .stop = r_stop, .show = config_show, }; -static struct seq_operations status_op = { +static const struct seq_operations status_op = { .start = r_start, .next = r_next, .stop = r_stop, diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c index 96001f0c64fc..7405b9c5b7f2 100644 --- a/net/x25/x25_proc.c +++ b/net/x25/x25_proc.c @@ -234,21 +234,21 @@ out: return 0; } -static struct seq_operations x25_seq_route_ops = { +static const struct seq_operations x25_seq_route_ops = { .start = x25_seq_route_start, .next = x25_seq_route_next, .stop = x25_seq_route_stop, .show = x25_seq_route_show, }; -static struct seq_operations x25_seq_socket_ops = { +static const struct seq_operations x25_seq_socket_ops = { .start = x25_seq_socket_start, .next = x25_seq_socket_next, .stop = x25_seq_socket_stop, .show = x25_seq_socket_show, }; -static struct seq_operations x25_seq_forward_ops = { +static const struct seq_operations x25_seq_forward_ops = { .start = x25_seq_forward_start, .next = x25_seq_forward_next, .stop = x25_seq_forward_stop, -- cgit v1.2.3 From 99d24edeb6abc6ca3a0d0fbdb83c664c04403c8c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 10 Jul 2007 23:24:52 -0700 Subject: [NETFILTER]: {ip, nf}_conntrack_sctp: fix remotely triggerable NULL ptr dereference (CVE-2007-2876) When creating a new connection by sending an unknown chunk type, we don't transition to a valid state, causing a NULL pointer dereference in sctp_packet when accessing sctp_timeouts[SCTP_CONNTRACK_NONE]. Fix by don't creating new conntrack entry if initial state is invalid. Noticed by Vilmos Nebehaj Signed-off-by: Patrick McHardy Signed-off-by: Greg Kroah-Hartman Signed-off-by: Chris Wright Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_proto_sctp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 265769e5002b..debfe61378a1 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -431,7 +431,8 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb, SCTP_CONNTRACK_NONE, sch->type); /* Invalid: delete conntrack */ - if (newconntrack == SCTP_CONNTRACK_MAX) { + if (newconntrack == SCTP_CONNTRACK_NONE || + newconntrack == SCTP_CONNTRACK_MAX) { pr_debug("nf_conntrack_sctp: invalid new deleting.\n"); return 0; } -- cgit v1.2.3 From 1fd05ba5a2f2aa8e7b9b52ef55df850e2e7d54c9 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 11 Jul 2007 14:22:39 -0700 Subject: [AF_UNIX]: Rewrite garbage collector, fixes race. Throw out the old mark & sweep garbage collector and put in a refcounting cycle detecting one. The old one had a race with recvmsg, that resulted in false positives and hence data loss. The old algorithm operated on all unix sockets in the system, so any additional locking would have meant performance problems for all users of these. The new algorithm instead only operates on "in flight" sockets, which are very rare, and the additional locking for these doesn't negatively impact the vast majority of users. In fact it's probable, that there weren't *any* heavy senders of sockets over sockets, otherwise the above race would have been discovered long ago. The patch works OK with the app that exposed the race with the old code. The garbage collection has also been verified to work in a few simple cases. Signed-off-by: Miklos Szeredi Signed-off-by: David S. Miller --- include/net/af_unix.h | 3 +- net/unix/af_unix.c | 6 +- net/unix/garbage.c | 325 +++++++++++++++++++++++++++++--------------------- 3 files changed, 190 insertions(+), 144 deletions(-) (limited to 'net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 65f49fd7deff..6de1e9e35c73 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -79,9 +79,10 @@ struct unix_sock { struct mutex readlock; struct sock *peer; struct sock *other; - struct sock *gc_tree; + struct list_head link; atomic_t inflight; spinlock_t lock; + unsigned int gc_candidate : 1; wait_queue_head_t peer_wait; }; #define unix_sk(__sk) ((struct unix_sock *)__sk) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3654b647ac79..65ebccc0a698 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -592,7 +592,8 @@ static struct sock * unix_create1(struct socket *sock) u->dentry = NULL; u->mnt = NULL; spin_lock_init(&u->lock); - atomic_set(&u->inflight, sock ? 0 : -1); + atomic_set(&u->inflight, 0); + INIT_LIST_HEAD(&u->link); mutex_init(&u->readlock); /* single task reading lock */ init_waitqueue_head(&u->peer_wait); unix_insert_socket(unix_sockets_unbound, sk); @@ -1134,9 +1135,6 @@ restart: /* take ten and and send info to listening sock */ spin_lock(&other->sk_receive_queue.lock); __skb_queue_tail(&other->sk_receive_queue, skb); - /* Undo artificially decreased inflight after embrion - * is installed to listening socket. */ - atomic_inc(&newu->inflight); spin_unlock(&other->sk_receive_queue.lock); unix_state_unlock(other); other->sk_data_ready(other, 0); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index f20b7ea7c555..406b6433e467 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -62,6 +62,10 @@ * AV 1 Mar 1999 * Damn. Added missing check for ->dead in listen queues scanning. * + * Miklos Szeredi 25 Jun 2007 + * Reimplement with a cycle collecting algorithm. This should + * solve several problems with the previous code, like being racy + * wrt receive and holding up unrelated socket operations. */ #include @@ -84,10 +88,9 @@ /* Internal data structures and random procedures: */ -#define GC_HEAD ((struct sock *)(-1)) -#define GC_ORPHAN ((struct sock *)(-3)) - -static struct sock *gc_current = GC_HEAD; /* stack of objects to mark */ +static LIST_HEAD(gc_inflight_list); +static LIST_HEAD(gc_candidates); +static DEFINE_SPINLOCK(unix_gc_lock); atomic_t unix_tot_inflight = ATOMIC_INIT(0); @@ -122,8 +125,16 @@ void unix_inflight(struct file *fp) { struct sock *s = unix_get_socket(fp); if(s) { - atomic_inc(&unix_sk(s)->inflight); + struct unix_sock *u = unix_sk(s); + spin_lock(&unix_gc_lock); + if (atomic_inc_return(&u->inflight) == 1) { + BUG_ON(!list_empty(&u->link)); + list_add_tail(&u->link, &gc_inflight_list); + } else { + BUG_ON(list_empty(&u->link)); + } atomic_inc(&unix_tot_inflight); + spin_unlock(&unix_gc_lock); } } @@ -131,182 +142,218 @@ void unix_notinflight(struct file *fp) { struct sock *s = unix_get_socket(fp); if(s) { - atomic_dec(&unix_sk(s)->inflight); + struct unix_sock *u = unix_sk(s); + spin_lock(&unix_gc_lock); + BUG_ON(list_empty(&u->link)); + if (atomic_dec_and_test(&u->inflight)) + list_del_init(&u->link); atomic_dec(&unix_tot_inflight); + spin_unlock(&unix_gc_lock); } } +static inline struct sk_buff *sock_queue_head(struct sock *sk) +{ + return (struct sk_buff *) &sk->sk_receive_queue; +} -/* - * Garbage Collector Support Functions - */ +#define receive_queue_for_each_skb(sk, next, skb) \ + for (skb = sock_queue_head(sk)->next, next = skb->next; \ + skb != sock_queue_head(sk); skb = next, next = skb->next) -static inline struct sock *pop_stack(void) +static void scan_inflight(struct sock *x, void (*func)(struct sock *), + struct sk_buff_head *hitlist) { - struct sock *p = gc_current; - gc_current = unix_sk(p)->gc_tree; - return p; + struct sk_buff *skb; + struct sk_buff *next; + + spin_lock(&x->sk_receive_queue.lock); + receive_queue_for_each_skb(x, next, skb) { + /* + * Do we have file descriptors ? + */ + if (UNIXCB(skb).fp) { + bool hit = false; + /* + * Process the descriptors of this socket + */ + int nfd = UNIXCB(skb).fp->count; + struct file **fp = UNIXCB(skb).fp->fp; + while (nfd--) { + /* + * Get the socket the fd matches + * if it indeed does so + */ + struct sock *sk = unix_get_socket(*fp++); + if(sk) { + hit = true; + func(sk); + } + } + if (hit && hitlist != NULL) { + __skb_unlink(skb, &x->sk_receive_queue); + __skb_queue_tail(hitlist, skb); + } + } + } + spin_unlock(&x->sk_receive_queue.lock); } -static inline int empty_stack(void) +static void scan_children(struct sock *x, void (*func)(struct sock *), + struct sk_buff_head *hitlist) { - return gc_current == GC_HEAD; + if (x->sk_state != TCP_LISTEN) + scan_inflight(x, func, hitlist); + else { + struct sk_buff *skb; + struct sk_buff *next; + struct unix_sock *u; + LIST_HEAD(embryos); + + /* + * For a listening socket collect the queued embryos + * and perform a scan on them as well. + */ + spin_lock(&x->sk_receive_queue.lock); + receive_queue_for_each_skb(x, next, skb) { + u = unix_sk(skb->sk); + + /* + * An embryo cannot be in-flight, so it's safe + * to use the list link. + */ + BUG_ON(!list_empty(&u->link)); + list_add_tail(&u->link, &embryos); + } + spin_unlock(&x->sk_receive_queue.lock); + + while (!list_empty(&embryos)) { + u = list_entry(embryos.next, struct unix_sock, link); + scan_inflight(&u->sk, func, hitlist); + list_del_init(&u->link); + } + } } -static void maybe_unmark_and_push(struct sock *x) +static void dec_inflight(struct sock *sk) { - struct unix_sock *u = unix_sk(x); + atomic_dec(&unix_sk(sk)->inflight); +} - if (u->gc_tree != GC_ORPHAN) - return; - sock_hold(x); - u->gc_tree = gc_current; - gc_current = x; +static void inc_inflight(struct sock *sk) +{ + atomic_inc(&unix_sk(sk)->inflight); } +static void inc_inflight_move_tail(struct sock *sk) +{ + struct unix_sock *u = unix_sk(sk); + + atomic_inc(&u->inflight); + /* + * If this is still a candidate, move it to the end of the + * list, so that it's checked even if it was already passed + * over + */ + if (u->gc_candidate) + list_move_tail(&u->link, &gc_candidates); +} /* The external entry point: unix_gc() */ void unix_gc(void) { - static DEFINE_MUTEX(unix_gc_sem); - int i; - struct sock *s; - struct sk_buff_head hitlist; - struct sk_buff *skb; + static bool gc_in_progress = false; - /* - * Avoid a recursive GC. - */ + struct unix_sock *u; + struct unix_sock *next; + struct sk_buff_head hitlist; + struct list_head cursor; - if (!mutex_trylock(&unix_gc_sem)) - return; + spin_lock(&unix_gc_lock); - spin_lock(&unix_table_lock); + /* Avoid a recursive GC. */ + if (gc_in_progress) + goto out; - forall_unix_sockets(i, s) - { - unix_sk(s)->gc_tree = GC_ORPHAN; - } + gc_in_progress = true; /* - * Everything is now marked - */ - - /* Invariant to be maintained: - - everything unmarked is either: - -- (a) on the stack, or - -- (b) has all of its children unmarked - - everything on the stack is always unmarked - - nothing is ever pushed onto the stack twice, because: - -- nothing previously unmarked is ever pushed on the stack + * First, select candidates for garbage collection. Only + * in-flight sockets are considered, and from those only ones + * which don't have any external reference. + * + * Holding unix_gc_lock will protect these candidates from + * being detached, and hence from gaining an external + * reference. This also means, that since there are no + * possible receivers, the receive queues of these sockets are + * static during the GC, even though the dequeue is done + * before the detach without atomicity guarantees. */ + list_for_each_entry_safe(u, next, &gc_inflight_list, link) { + int total_refs; + int inflight_refs; + + total_refs = file_count(u->sk.sk_socket->file); + inflight_refs = atomic_read(&u->inflight); + + BUG_ON(inflight_refs < 1); + BUG_ON(total_refs < inflight_refs); + if (total_refs == inflight_refs) { + list_move_tail(&u->link, &gc_candidates); + u->gc_candidate = 1; + } + } /* - * Push root set + * Now remove all internal in-flight reference to children of + * the candidates. */ - - forall_unix_sockets(i, s) - { - int open_count = 0; - - /* - * If all instances of the descriptor are not - * in flight we are in use. - * - * Special case: when socket s is embrion, it may be - * hashed but still not in queue of listening socket. - * In this case (see unix_create1()) we set artificial - * negative inflight counter to close race window. - * It is trick of course and dirty one. - */ - if (s->sk_socket && s->sk_socket->file) - open_count = file_count(s->sk_socket->file); - if (open_count > atomic_read(&unix_sk(s)->inflight)) - maybe_unmark_and_push(s); - } + list_for_each_entry(u, &gc_candidates, link) + scan_children(&u->sk, dec_inflight, NULL); /* - * Mark phase + * Restore the references for children of all candidates, + * which have remaining references. Do this recursively, so + * only those remain, which form cyclic references. + * + * Use a "cursor" link, to make the list traversal safe, even + * though elements might be moved about. */ + list_add(&cursor, &gc_candidates); + while (cursor.next != &gc_candidates) { + u = list_entry(cursor.next, struct unix_sock, link); - while (!empty_stack()) - { - struct sock *x = pop_stack(); - struct sock *sk; - - spin_lock(&x->sk_receive_queue.lock); - skb = skb_peek(&x->sk_receive_queue); - - /* - * Loop through all but first born - */ + /* Move cursor to after the current position. */ + list_move(&cursor, &u->link); - while (skb && skb != (struct sk_buff *)&x->sk_receive_queue) { - /* - * Do we have file descriptors ? - */ - if(UNIXCB(skb).fp) - { - /* - * Process the descriptors of this socket - */ - int nfd=UNIXCB(skb).fp->count; - struct file **fp = UNIXCB(skb).fp->fp; - while(nfd--) - { - /* - * Get the socket the fd matches if - * it indeed does so - */ - if((sk=unix_get_socket(*fp++))!=NULL) - { - maybe_unmark_and_push(sk); - } - } - } - /* We have to scan not-yet-accepted ones too */ - if (x->sk_state == TCP_LISTEN) - maybe_unmark_and_push(skb->sk); - skb=skb->next; + if (atomic_read(&u->inflight) > 0) { + list_move_tail(&u->link, &gc_inflight_list); + u->gc_candidate = 0; + scan_children(&u->sk, inc_inflight_move_tail, NULL); } - spin_unlock(&x->sk_receive_queue.lock); - sock_put(x); } + list_del(&cursor); + /* + * Now gc_candidates contains only garbage. Restore original + * inflight counters for these as well, and remove the skbuffs + * which are creating the cycle(s). + */ skb_queue_head_init(&hitlist); + list_for_each_entry(u, &gc_candidates, link) + scan_children(&u->sk, inc_inflight, &hitlist); - forall_unix_sockets(i, s) - { - struct unix_sock *u = unix_sk(s); + spin_unlock(&unix_gc_lock); - if (u->gc_tree == GC_ORPHAN) { - struct sk_buff *nextsk; + /* Here we are. Hitlist is filled. Die. */ + __skb_queue_purge(&hitlist); - spin_lock(&s->sk_receive_queue.lock); - skb = skb_peek(&s->sk_receive_queue); - while (skb && - skb != (struct sk_buff *)&s->sk_receive_queue) { - nextsk = skb->next; - /* - * Do we have file descriptors ? - */ - if (UNIXCB(skb).fp) { - __skb_unlink(skb, - &s->sk_receive_queue); - __skb_queue_tail(&hitlist, skb); - } - skb = nextsk; - } - spin_unlock(&s->sk_receive_queue.lock); - } - u->gc_tree = GC_ORPHAN; - } - spin_unlock(&unix_table_lock); + spin_lock(&unix_gc_lock); - /* - * Here we are. Hitlist is filled. Die. - */ + /* All candidates should have been detached by now. */ + BUG_ON(!list_empty(&gc_candidates)); + gc_in_progress = false; - __skb_queue_purge(&hitlist); - mutex_unlock(&unix_gc_sem); + out: + spin_unlock(&unix_gc_lock); } -- cgit v1.2.3 From 7b595756ec1f49e0049a9e01a1298d53a7faaa15 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jun 2007 03:45:17 +0900 Subject: sysfs: kill unnecessary attribute->owner sysfs is now completely out of driver/module lifetime game. After deletion, a sysfs node doesn't access anything outside sysfs proper, so there's no reason to hold onto the attribute owners. Note that often the wrong modules were accounted for as owners leading to accessing removed modules. This patch kills now unnecessary attribute->owner. Note that with this change, userland holding a sysfs node does not prevent the backing module from being unloaded. For more info regarding lifetime rule cleanup, please read the following message. http://article.gmane.org/gmane.linux.kernel/510293 (tweaked by Greg to not delete the field just yet, to make it easier to merge things properly.) Signed-off-by: Tejun Heo Cc: Cornelia Huck Cc: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- arch/ppc/syslib/mv64x60.c | 1 - arch/s390/kernel/ipl.c | 2 -- drivers/base/bus.c | 2 -- drivers/base/class.c | 2 -- drivers/base/core.c | 4 ---- drivers/base/firmware_class.c | 2 +- drivers/block/pktcdvd.c | 3 +-- drivers/char/ipmi/ipmi_msghandler.c | 10 ---------- drivers/cpufreq/cpufreq_stats.c | 3 +-- drivers/cpufreq/cpufreq_userspace.c | 2 +- drivers/cpufreq/freq_table.c | 1 - drivers/firmware/dcdbas.h | 3 +-- drivers/firmware/dell_rbu.c | 6 +++--- drivers/firmware/edd.c | 2 +- drivers/firmware/efivars.c | 6 +++--- drivers/i2c/chips/eeprom.c | 1 - drivers/i2c/chips/max6875.c | 1 - drivers/infiniband/core/sysfs.c | 1 - drivers/input/mouse/psmouse.h | 1 - drivers/macintosh/windfarm_core.c | 2 -- drivers/misc/asus-laptop.c | 3 +-- drivers/net/ibmveth.c | 2 +- drivers/parisc/pdc_stable.c | 4 ++-- drivers/pci/hotplug/acpiphp_ibm.c | 1 - drivers/pci/pci-sysfs.c | 4 ---- drivers/pci/probe.c | 2 -- drivers/pcmcia/socket_sysfs.c | 2 +- drivers/rapidio/rio-sysfs.c | 1 - drivers/rtc/rtc-ds1553.c | 1 - drivers/rtc/rtc-ds1742.c | 1 - drivers/s390/cio/chp.c | 2 -- drivers/s390/net/qeth_sys.c | 2 +- drivers/scsi/arcmsr/arcmsr_attr.c | 3 --- drivers/scsi/libsas/sas_expander.c | 1 - drivers/scsi/lpfc/lpfc_attr.c | 2 -- drivers/scsi/qla2xxx/qla_attr.c | 6 ------ drivers/spi/at25.c | 1 - drivers/video/aty/radeon_base.c | 2 -- drivers/video/backlight/backlight.c | 2 +- drivers/video/backlight/lcd.c | 2 +- drivers/w1/slaves/w1_ds2433.c | 1 - drivers/w1/slaves/w1_therm.c | 1 - drivers/w1/w1.c | 2 -- drivers/zorro/zorro-sysfs.c | 1 - fs/ecryptfs/main.c | 2 -- fs/ocfs2/cluster/masklog.c | 1 - fs/partitions/check.c | 1 - fs/sysfs/bin.c | 19 +++++-------------- fs/sysfs/file.c | 21 +++++---------------- include/linux/sysdev.h | 3 +-- include/linux/sysfs.h | 12 ++++++++---- kernel/module.c | 9 +++------ kernel/params.c | 1 - net/bridge/br_sysfs_br.c | 3 +-- net/bridge/br_sysfs_if.c | 3 +-- 55 files changed, 44 insertions(+), 135 deletions(-) (limited to 'net') diff --git a/arch/ppc/syslib/mv64x60.c b/arch/ppc/syslib/mv64x60.c index 8485a68cd475..032f4b7f4225 100644 --- a/arch/ppc/syslib/mv64x60.c +++ b/arch/ppc/syslib/mv64x60.c @@ -2415,7 +2415,6 @@ static struct bin_attribute mv64xxx_hs_reg_attr = { /* Hotswap register */ .attr = { .name = "hs_reg", .mode = S_IRUGO | S_IWUSR, - .owner = THIS_MODULE, }, .size = VAL_LEN_MAX, .read = mv64xxx_hs_reg_read, diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 82b131ddd7ff..9a13b24ee1ab 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -312,7 +312,6 @@ static struct bin_attribute ipl_parameter_attr = { .attr = { .name = "binary_parameter", .mode = S_IRUGO, - .owner = THIS_MODULE, }, .size = PAGE_SIZE, .read = &ipl_parameter_read, @@ -336,7 +335,6 @@ static struct bin_attribute ipl_scp_data_attr = { .attr = { .name = "scp_data", .mode = S_IRUGO, - .owner = THIS_MODULE, }, .size = PAGE_SIZE, .read = &ipl_scp_data_read, diff --git a/drivers/base/bus.c b/drivers/base/bus.c index f299e0d6abc4..61c67526a656 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -574,7 +574,6 @@ static int add_probe_files(struct bus_type *bus) bus->drivers_probe_attr.attr.name = "drivers_probe"; bus->drivers_probe_attr.attr.mode = S_IWUSR; - bus->drivers_probe_attr.attr.owner = bus->owner; bus->drivers_probe_attr.store = store_drivers_probe; retval = bus_create_file(bus, &bus->drivers_probe_attr); if (retval) @@ -582,7 +581,6 @@ static int add_probe_files(struct bus_type *bus) bus->drivers_autoprobe_attr.attr.name = "drivers_autoprobe"; bus->drivers_autoprobe_attr.attr.mode = S_IWUSR | S_IRUGO; - bus->drivers_autoprobe_attr.attr.owner = bus->owner; bus->drivers_autoprobe_attr.show = show_drivers_autoprobe; bus->drivers_autoprobe_attr.store = store_drivers_autoprobe; retval = bus_create_file(bus, &bus->drivers_autoprobe_attr); diff --git a/drivers/base/class.c b/drivers/base/class.c index 8c506dbe3913..9cbfde23b9e3 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -605,7 +605,6 @@ int class_device_add(struct class_device *class_dev) goto out3; class_dev->uevent_attr.attr.name = "uevent"; class_dev->uevent_attr.attr.mode = S_IWUSR; - class_dev->uevent_attr.attr.owner = parent_class->owner; class_dev->uevent_attr.store = store_uevent; error = class_device_create_file(class_dev, &class_dev->uevent_attr); if (error) @@ -620,7 +619,6 @@ int class_device_add(struct class_device *class_dev) } attr->attr.name = "dev"; attr->attr.mode = S_IRUGO; - attr->attr.owner = parent_class->owner; attr->show = show_dev; error = class_device_create_file(class_dev, attr); if (error) { diff --git a/drivers/base/core.c b/drivers/base/core.c index cff4fbfbb055..e3fb87bfc6e1 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -683,8 +683,6 @@ int device_add(struct device *dev) dev->uevent_attr.attr.name = "uevent"; dev->uevent_attr.attr.mode = S_IRUGO | S_IWUSR; - if (dev->driver) - dev->uevent_attr.attr.owner = dev->driver->owner; dev->uevent_attr.store = store_uevent; dev->uevent_attr.show = show_uevent; error = device_create_file(dev, &dev->uevent_attr); @@ -700,8 +698,6 @@ int device_add(struct device *dev) } attr->attr.name = "dev"; attr->attr.mode = S_IRUGO; - if (dev->driver) - attr->attr.owner = dev->driver->owner; attr->show = show_dev; error = device_create_file(dev, attr); if (error) { diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 89a5f4a54913..0e511485d2e6 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -271,7 +271,7 @@ out: } static struct bin_attribute firmware_attr_data_tmpl = { - .attr = {.name = "data", .mode = 0644, .owner = THIS_MODULE}, + .attr = {.name = "data", .mode = 0644}, .size = 0, .read = firmware_data_read, .write = firmware_data_write, diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index f1b9dd7d47d6..ce64e86d6ffb 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -146,8 +146,7 @@ static void pkt_kobj_release(struct kobject *kobj) **********************************************************/ #define DEF_ATTR(_obj,_name,_mode) \ - static struct attribute _obj = { \ - .name = _name, .owner = THIS_MODULE, .mode = _mode } + static struct attribute _obj = { .name = _name, .mode = _mode } /********************************************************** /sys/class/pktcdvd/pktcdvd[0-7]/ diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 8e222f2b80cc..b5df7e61aeb2 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -2171,52 +2171,42 @@ static int create_files(struct bmc_device *bmc) int err; bmc->device_id_attr.attr.name = "device_id"; - bmc->device_id_attr.attr.owner = THIS_MODULE; bmc->device_id_attr.attr.mode = S_IRUGO; bmc->device_id_attr.show = device_id_show; bmc->provides_dev_sdrs_attr.attr.name = "provides_device_sdrs"; - bmc->provides_dev_sdrs_attr.attr.owner = THIS_MODULE; bmc->provides_dev_sdrs_attr.attr.mode = S_IRUGO; bmc->provides_dev_sdrs_attr.show = provides_dev_sdrs_show; bmc->revision_attr.attr.name = "revision"; - bmc->revision_attr.attr.owner = THIS_MODULE; bmc->revision_attr.attr.mode = S_IRUGO; bmc->revision_attr.show = revision_show; bmc->firmware_rev_attr.attr.name = "firmware_revision"; - bmc->firmware_rev_attr.attr.owner = THIS_MODULE; bmc->firmware_rev_attr.attr.mode = S_IRUGO; bmc->firmware_rev_attr.show = firmware_rev_show; bmc->version_attr.attr.name = "ipmi_version"; - bmc->version_attr.attr.owner = THIS_MODULE; bmc->version_attr.attr.mode = S_IRUGO; bmc->version_attr.show = ipmi_version_show; bmc->add_dev_support_attr.attr.name = "additional_device_support"; - bmc->add_dev_support_attr.attr.owner = THIS_MODULE; bmc->add_dev_support_attr.attr.mode = S_IRUGO; bmc->add_dev_support_attr.show = add_dev_support_show; bmc->manufacturer_id_attr.attr.name = "manufacturer_id"; - bmc->manufacturer_id_attr.attr.owner = THIS_MODULE; bmc->manufacturer_id_attr.attr.mode = S_IRUGO; bmc->manufacturer_id_attr.show = manufacturer_id_show; bmc->product_id_attr.attr.name = "product_id"; - bmc->product_id_attr.attr.owner = THIS_MODULE; bmc->product_id_attr.attr.mode = S_IRUGO; bmc->product_id_attr.show = product_id_show; bmc->guid_attr.attr.name = "guid"; - bmc->guid_attr.attr.owner = THIS_MODULE; bmc->guid_attr.attr.mode = S_IRUGO; bmc->guid_attr.show = guid_show; bmc->aux_firmware_rev_attr.attr.name = "aux_firmware_revision"; - bmc->aux_firmware_rev_attr.attr.owner = THIS_MODULE; bmc->aux_firmware_rev_attr.attr.mode = S_IRUGO; bmc->aux_firmware_rev_attr.show = aux_firmware_rev_show; diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index d2f0cbd8b8f3..917b9bab9ccb 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -25,8 +25,7 @@ static spinlock_t cpufreq_stats_lock; #define CPUFREQ_STATDEVICE_ATTR(_name,_mode,_show) \ static struct freq_attr _attr_##_name = {\ - .attr = {.name = __stringify(_name), .owner = THIS_MODULE, \ - .mode = _mode, }, \ + .attr = {.name = __stringify(_name), .mode = _mode, }, \ .show = _show,\ }; diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index 860345c7799a..a648970338b0 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c @@ -120,7 +120,7 @@ store_speed (struct cpufreq_policy *policy, const char *buf, size_t count) static struct freq_attr freq_attr_scaling_setspeed = { - .attr = { .name = "scaling_setspeed", .mode = 0644, .owner = THIS_MODULE }, + .attr = { .name = "scaling_setspeed", .mode = 0644 }, .show = show_speed, .store = store_speed, }; diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index e7490925fdcf..5409f3afb3f8 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -199,7 +199,6 @@ static ssize_t show_available_freqs (struct cpufreq_policy *policy, char *buf) struct freq_attr cpufreq_freq_attr_scaling_available_freqs = { .attr = { .name = "scaling_available_frequencies", .mode = 0444, - .owner=THIS_MODULE }, .show = show_available_freqs, }; diff --git a/drivers/firmware/dcdbas.h b/drivers/firmware/dcdbas.h index 58a85182b3e8..dcdba0f1b32c 100644 --- a/drivers/firmware/dcdbas.h +++ b/drivers/firmware/dcdbas.h @@ -67,8 +67,7 @@ #define DCDBAS_BIN_ATTR_RW(_name) \ struct bin_attribute bin_attr_##_name = { \ .attr = { .name = __stringify(_name), \ - .mode = 0600, \ - .owner = THIS_MODULE }, \ + .mode = 0600 }, \ .read = _name##_read, \ .write = _name##_write, \ } diff --git a/drivers/firmware/dell_rbu.c b/drivers/firmware/dell_rbu.c index fc702e40bd43..f8afecb7d0cf 100644 --- a/drivers/firmware/dell_rbu.c +++ b/drivers/firmware/dell_rbu.c @@ -687,18 +687,18 @@ static ssize_t write_rbu_packet_size(struct kobject *kobj, char *buffer, } static struct bin_attribute rbu_data_attr = { - .attr = {.name = "data",.owner = THIS_MODULE,.mode = 0444}, + .attr = {.name = "data", .mode = 0444}, .read = read_rbu_data, }; static struct bin_attribute rbu_image_type_attr = { - .attr = {.name = "image_type",.owner = THIS_MODULE,.mode = 0644}, + .attr = {.name = "image_type", .mode = 0644}, .read = read_rbu_image_type, .write = write_rbu_image_type, }; static struct bin_attribute rbu_packet_size_attr = { - .attr = {.name = "packet_size",.owner = THIS_MODULE,.mode = 0644}, + .attr = {.name = "packet_size", .mode = 0644}, .read = read_rbu_packet_size, .write = write_rbu_packet_size, }; diff --git a/drivers/firmware/edd.c b/drivers/firmware/edd.c index d8806e4f1829..15232271d848 100644 --- a/drivers/firmware/edd.c +++ b/drivers/firmware/edd.c @@ -74,7 +74,7 @@ static struct edd_device *edd_devices[EDD_MBR_SIG_MAX]; #define EDD_DEVICE_ATTR(_name,_mode,_show,_test) \ struct edd_attribute edd_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ .show = _show, \ .test = _test, \ }; diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 1324984a4c35..bfd2d67df689 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -131,21 +131,21 @@ struct efivar_attribute { #define EFI_ATTR(_name, _mode, _show, _store) \ struct subsys_attribute efi_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \ + .attr = {.name = __stringify(_name), .mode = _mode}, \ .show = _show, \ .store = _store, \ }; #define EFIVAR_ATTR(_name, _mode, _show, _store) \ struct efivar_attribute efivar_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \ + .attr = {.name = __stringify(_name), .mode = _mode}, \ .show = _show, \ .store = _store, \ }; #define VAR_SUBSYS_ATTR(_name, _mode, _show, _store) \ struct subsys_attribute var_subsys_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \ + .attr = {.name = __stringify(_name), .mode = _mode}, \ .show = _show, \ .store = _store, \ }; diff --git a/drivers/i2c/chips/eeprom.c b/drivers/i2c/chips/eeprom.c index bfce13c8f1ff..5990dd5fc773 100644 --- a/drivers/i2c/chips/eeprom.c +++ b/drivers/i2c/chips/eeprom.c @@ -143,7 +143,6 @@ static struct bin_attribute eeprom_attr = { .attr = { .name = "eeprom", .mode = S_IRUGO, - .owner = THIS_MODULE, }, .size = EEPROM_SIZE, .read = eeprom_read, diff --git a/drivers/i2c/chips/max6875.c b/drivers/i2c/chips/max6875.c index 76645c142977..1405ce5b236c 100644 --- a/drivers/i2c/chips/max6875.c +++ b/drivers/i2c/chips/max6875.c @@ -152,7 +152,6 @@ static struct bin_attribute user_eeprom_attr = { .attr = { .name = "eeprom", .mode = S_IRUGO, - .owner = THIS_MODULE, }, .size = USER_EEPROM_SIZE, .read = max6875_read, diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 08c299ebf4a8..bf9b99292048 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -479,7 +479,6 @@ alloc_group_attrs(ssize_t (*show)(struct ib_port *, element->attr.attr.name = element->name; element->attr.attr.mode = S_IRUGO; - element->attr.attr.owner = THIS_MODULE; element->attr.show = show; element->index = i; diff --git a/drivers/input/mouse/psmouse.h b/drivers/input/mouse/psmouse.h index 27a68835b5ba..1317bdd8cc7c 100644 --- a/drivers/input/mouse/psmouse.h +++ b/drivers/input/mouse/psmouse.h @@ -119,7 +119,6 @@ static struct psmouse_attribute psmouse_attr_##_name = { \ .attr = { \ .name = __stringify(_name), \ .mode = _mode, \ - .owner = THIS_MODULE, \ }, \ .show = psmouse_attr_show_helper, \ .store = psmouse_attr_set_helper, \ diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c index 11ced17f438a..4fcb245ba184 100644 --- a/drivers/macintosh/windfarm_core.c +++ b/drivers/macintosh/windfarm_core.c @@ -212,7 +212,6 @@ int wf_register_control(struct wf_control *new_ct) list_add(&new_ct->link, &wf_controls); new_ct->attr.attr.name = new_ct->name; - new_ct->attr.attr.owner = THIS_MODULE; new_ct->attr.attr.mode = 0644; new_ct->attr.show = wf_show_control; new_ct->attr.store = wf_store_control; @@ -325,7 +324,6 @@ int wf_register_sensor(struct wf_sensor *new_sr) list_add(&new_sr->link, &wf_sensors); new_sr->attr.attr.name = new_sr->name; - new_sr->attr.attr.owner = THIS_MODULE; new_sr->attr.attr.mode = 0444; new_sr->attr.show = wf_show_sensor; new_sr->attr.store = NULL; diff --git a/drivers/misc/asus-laptop.c b/drivers/misc/asus-laptop.c index 4f9060a2a2f2..7798f590e5aa 100644 --- a/drivers/misc/asus-laptop.c +++ b/drivers/misc/asus-laptop.c @@ -737,8 +737,7 @@ static void asus_hotk_notify(acpi_handle handle, u32 event, void *data) struct device_attribute dev_attr_##_name = { \ .attr = { \ .name = __stringify(_name), \ - .mode = 0, \ - .owner = THIS_MODULE }, \ + .mode = 0 }, \ .show = NULL, \ .store = NULL, \ } diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index 6ec3d500f334..d96eb7229548 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -1337,7 +1337,7 @@ const char * buf, size_t count) #define ATTR(_name, _mode) \ struct attribute veth_##_name##_attr = { \ - .name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE \ + .name = __stringify(_name), .mode = _mode, \ }; static ATTR(active, 0644); diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c index 924ef0609460..fc4bde259dc7 100644 --- a/drivers/parisc/pdc_stable.c +++ b/drivers/parisc/pdc_stable.c @@ -121,14 +121,14 @@ struct pdcspath_entry pdcspath_entry_##_name = { \ #define PDCS_ATTR(_name, _mode, _show, _store) \ struct subsys_attribute pdcs_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \ + .attr = {.name = __stringify(_name), .mode = _mode}, \ .show = _show, \ .store = _store, \ }; #define PATHS_ATTR(_name, _mode, _show, _store) \ struct pdcspath_attribute paths_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \ + .attr = {.name = __stringify(_name), .mode = _mode}, \ .show = _show, \ .store = _store, \ }; diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c index e7322c25d377..74556ec31a5b 100644 --- a/drivers/pci/hotplug/acpiphp_ibm.c +++ b/drivers/pci/hotplug/acpiphp_ibm.c @@ -117,7 +117,6 @@ static struct notification ibm_note; static struct bin_attribute ibm_apci_table_attr = { .attr = { .name = "apci_table", - .owner = THIS_MODULE, .mode = S_IRUGO, }, .read = ibm_read_apci_table, diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 284e83a527f9..d448f8df8613 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -499,7 +499,6 @@ static int pci_create_resource_files(struct pci_dev *pdev) sprintf(res_attr_name, "resource%d", i); res_attr->attr.name = res_attr_name; res_attr->attr.mode = S_IRUSR | S_IWUSR; - res_attr->attr.owner = THIS_MODULE; res_attr->size = pci_resource_len(pdev, i); res_attr->mmap = pci_mmap_resource; res_attr->private = &pdev->resource[i]; @@ -582,7 +581,6 @@ static struct bin_attribute pci_config_attr = { .attr = { .name = "config", .mode = S_IRUGO | S_IWUSR, - .owner = THIS_MODULE, }, .size = 256, .read = pci_read_config, @@ -593,7 +591,6 @@ static struct bin_attribute pcie_config_attr = { .attr = { .name = "config", .mode = S_IRUGO | S_IWUSR, - .owner = THIS_MODULE, }, .size = 4096, .read = pci_read_config, @@ -628,7 +625,6 @@ int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev) rom_attr->size = pci_resource_len(pdev, PCI_ROM_RESOURCE); rom_attr->attr.name = "rom"; rom_attr->attr.mode = S_IRUSR; - rom_attr->attr.owner = THIS_MODULE; rom_attr->read = pci_read_rom; rom_attr->write = pci_write_rom; retval = sysfs_create_bin_file(&pdev->dev.kobj, rom_attr); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index e48fcf089621..08783bd381f5 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -39,7 +39,6 @@ static void pci_create_legacy_files(struct pci_bus *b) b->legacy_io->attr.name = "legacy_io"; b->legacy_io->size = 0xffff; b->legacy_io->attr.mode = S_IRUSR | S_IWUSR; - b->legacy_io->attr.owner = THIS_MODULE; b->legacy_io->read = pci_read_legacy_io; b->legacy_io->write = pci_write_legacy_io; class_device_create_bin_file(&b->class_dev, b->legacy_io); @@ -49,7 +48,6 @@ static void pci_create_legacy_files(struct pci_bus *b) b->legacy_mem->attr.name = "legacy_mem"; b->legacy_mem->size = 1024*1024; b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR; - b->legacy_mem->attr.owner = THIS_MODULE; b->legacy_mem->mmap = pci_mmap_legacy_mem; class_device_create_bin_file(&b->class_dev, b->legacy_mem); } diff --git a/drivers/pcmcia/socket_sysfs.c b/drivers/pcmcia/socket_sysfs.c index a2bb46526b56..dbfbe65779e5 100644 --- a/drivers/pcmcia/socket_sysfs.c +++ b/drivers/pcmcia/socket_sysfs.c @@ -366,7 +366,7 @@ static struct device_attribute *pccard_socket_attributes[] = { }; static struct bin_attribute pccard_cis_attr = { - .attr = { .name = "cis", .mode = S_IRUGO | S_IWUSR, .owner = THIS_MODULE}, + .attr = { .name = "cis", .mode = S_IRUGO | S_IWUSR }, .size = 0x200, .read = pccard_show_cis, .write = pccard_store_cis, diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c index eed91434417d..a3972b9f96e6 100644 --- a/drivers/rapidio/rio-sysfs.c +++ b/drivers/rapidio/rio-sysfs.c @@ -197,7 +197,6 @@ static struct bin_attribute rio_config_attr = { .attr = { .name = "config", .mode = S_IRUGO | S_IWUSR, - .owner = THIS_MODULE, }, .size = 0x200000, .read = rio_read_config, diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c index afa64c7fa2e2..b024cfb558f4 100644 --- a/drivers/rtc/rtc-ds1553.c +++ b/drivers/rtc/rtc-ds1553.c @@ -290,7 +290,6 @@ static struct bin_attribute ds1553_nvram_attr = { .attr = { .name = "nvram", .mode = S_IRUGO | S_IWUGO, - .owner = THIS_MODULE, }, .size = RTC_OFFSET, .read = ds1553_nvram_read, diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c index d68288b389dc..1638acdbc913 100644 --- a/drivers/rtc/rtc-ds1742.c +++ b/drivers/rtc/rtc-ds1742.c @@ -159,7 +159,6 @@ static struct bin_attribute ds1742_nvram_attr = { .attr = { .name = "nvram", .mode = S_IRUGO | S_IWUGO, - .owner = THIS_MODULE, }, .read = ds1742_nvram_read, .write = ds1742_nvram_write, diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index ac289e6eadfe..96a8a72a6083 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -165,7 +165,6 @@ static struct bin_attribute chp_measurement_chars_attr = { .attr = { .name = "measurement_chars", .mode = S_IRUSR, - .owner = THIS_MODULE, }, .size = sizeof(struct cmg_chars), .read = chp_measurement_chars_read, @@ -217,7 +216,6 @@ static struct bin_attribute chp_measurement_attr = { .attr = { .name = "measurement", .mode = S_IRUSR, - .owner = THIS_MODULE, }, .size = sizeof(struct cmg_entry), .read = chp_measurement_read, diff --git a/drivers/s390/net/qeth_sys.c b/drivers/s390/net/qeth_sys.c index 65ffc21afc37..bb0287ad1aac 100644 --- a/drivers/s390/net/qeth_sys.c +++ b/drivers/s390/net/qeth_sys.c @@ -991,7 +991,7 @@ static struct attribute_group qeth_osn_device_attr_group = { #define QETH_DEVICE_ATTR(_id,_name,_mode,_show,_store) \ struct device_attribute dev_attr_##_id = { \ - .attr = {.name=__stringify(_name), .mode=_mode, .owner=THIS_MODULE },\ + .attr = {.name=__stringify(_name), .mode=_mode, },\ .show = _show, \ .store = _store, \ }; diff --git a/drivers/scsi/arcmsr/arcmsr_attr.c b/drivers/scsi/arcmsr/arcmsr_attr.c index 03bfed61bffc..8908228bc134 100644 --- a/drivers/scsi/arcmsr/arcmsr_attr.c +++ b/drivers/scsi/arcmsr/arcmsr_attr.c @@ -188,7 +188,6 @@ static struct bin_attribute arcmsr_sysfs_message_read_attr = { .attr = { .name = "mu_read", .mode = S_IRUSR , - .owner = THIS_MODULE, }, .size = 1032, .read = arcmsr_sysfs_iop_message_read, @@ -198,7 +197,6 @@ static struct bin_attribute arcmsr_sysfs_message_write_attr = { .attr = { .name = "mu_write", .mode = S_IWUSR, - .owner = THIS_MODULE, }, .size = 1032, .write = arcmsr_sysfs_iop_message_write, @@ -208,7 +206,6 @@ static struct bin_attribute arcmsr_sysfs_message_clear_attr = { .attr = { .name = "mu_clear", .mode = S_IWUSR, - .owner = THIS_MODULE, }, .size = 1, .write = arcmsr_sysfs_iop_message_clear, diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index e34442e405e8..578ed79f4148 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -1368,7 +1368,6 @@ static void sas_ex_smp_hook(struct domain_device *dev) memset(bin_attr, 0, sizeof(*bin_attr)); bin_attr->attr.name = SMP_BIN_ATTR_NAME; - bin_attr->attr.owner = THIS_MODULE; bin_attr->attr.mode = 0600; bin_attr->size = 0; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 95fe77e816f8..f81fe501a4a1 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -1200,7 +1200,6 @@ static struct bin_attribute sysfs_ctlreg_attr = { .attr = { .name = "ctlreg", .mode = S_IRUSR | S_IWUSR, - .owner = THIS_MODULE, }, .size = 256, .read = sysfs_ctlreg_read, @@ -1422,7 +1421,6 @@ static struct bin_attribute sysfs_mbox_attr = { .attr = { .name = "mbox", .mode = S_IRUSR | S_IWUSR, - .owner = THIS_MODULE, }, .size = MAILBOX_CMD_SIZE, .read = sysfs_mbox_read, diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 8081b637d97e..96587253bfa9 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -73,7 +73,6 @@ static struct bin_attribute sysfs_fw_dump_attr = { .attr = { .name = "fw_dump", .mode = S_IRUSR | S_IWUSR, - .owner = THIS_MODULE, }, .size = 0, .read = qla2x00_sysfs_read_fw_dump, @@ -149,7 +148,6 @@ static struct bin_attribute sysfs_nvram_attr = { .attr = { .name = "nvram", .mode = S_IRUSR | S_IWUSR, - .owner = THIS_MODULE, }, .size = 512, .read = qla2x00_sysfs_read_nvram, @@ -198,7 +196,6 @@ static struct bin_attribute sysfs_optrom_attr = { .attr = { .name = "optrom", .mode = S_IRUSR | S_IWUSR, - .owner = THIS_MODULE, }, .size = OPTROM_SIZE_24XX, .read = qla2x00_sysfs_read_optrom, @@ -279,7 +276,6 @@ static struct bin_attribute sysfs_optrom_ctl_attr = { .attr = { .name = "optrom_ctl", .mode = S_IWUSR, - .owner = THIS_MODULE, }, .size = 0, .write = qla2x00_sysfs_write_optrom_ctl, @@ -327,7 +323,6 @@ static struct bin_attribute sysfs_vpd_attr = { .attr = { .name = "vpd", .mode = S_IRUSR | S_IWUSR, - .owner = THIS_MODULE, }, .size = 0, .read = qla2x00_sysfs_read_vpd, @@ -375,7 +370,6 @@ static struct bin_attribute sysfs_sfp_attr = { .attr = { .name = "sfp", .mode = S_IRUSR | S_IWUSR, - .owner = THIS_MODULE, }, .size = SFP_DEV_SIZE * 2, .read = qla2x00_sysfs_read_sfp, diff --git a/drivers/spi/at25.c b/drivers/spi/at25.c index 8efa07e8b8c2..fde1dededba3 100644 --- a/drivers/spi/at25.c +++ b/drivers/spi/at25.c @@ -314,7 +314,6 @@ static int at25_probe(struct spi_device *spi) */ at25->bin.attr.name = "eeprom"; at25->bin.attr.mode = S_IRUSR; - at25->bin.attr.owner = THIS_MODULE; at25->bin.read = at25_bin_read; at25->bin.size = at25->chip.byte_len; diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c index 2ce050193018..3b3c6571f583 100644 --- a/drivers/video/aty/radeon_base.c +++ b/drivers/video/aty/radeon_base.c @@ -2126,7 +2126,6 @@ static ssize_t radeon_show_edid2(struct kobject *kobj, char *buf, loff_t off, si static struct bin_attribute edid1_attr = { .attr = { .name = "edid1", - .owner = THIS_MODULE, .mode = 0444, }, .size = EDID_LENGTH, @@ -2136,7 +2135,6 @@ static struct bin_attribute edid1_attr = { static struct bin_attribute edid2_attr = { .attr = { .name = "edid2", - .owner = THIS_MODULE, .mode = 0444, }, .size = EDID_LENGTH, diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c index c65e81ff3578..7e06223bca94 100644 --- a/drivers/video/backlight/backlight.c +++ b/drivers/video/backlight/backlight.c @@ -172,7 +172,7 @@ static struct class backlight_class = { #define DECLARE_ATTR(_name,_mode,_show,_store) \ { \ - .attr = { .name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ + .attr = { .name = __stringify(_name), .mode = _mode }, \ .show = _show, \ .store = _store, \ } diff --git a/drivers/video/backlight/lcd.c b/drivers/video/backlight/lcd.c index 6ef8f0a7a137..648b53c1fdea 100644 --- a/drivers/video/backlight/lcd.c +++ b/drivers/video/backlight/lcd.c @@ -157,7 +157,7 @@ static struct class lcd_class = { #define DECLARE_ATTR(_name,_mode,_show,_store) \ { \ - .attr = { .name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ + .attr = { .name = __stringify(_name), .mode = _mode }, \ .show = _show, \ .store = _store, \ } diff --git a/drivers/w1/slaves/w1_ds2433.c b/drivers/w1/slaves/w1_ds2433.c index 8ea17a53eed8..4e13aa71adea 100644 --- a/drivers/w1/slaves/w1_ds2433.c +++ b/drivers/w1/slaves/w1_ds2433.c @@ -252,7 +252,6 @@ static struct bin_attribute w1_f23_bin_attr = { .attr = { .name = "eeprom", .mode = S_IRUGO | S_IWUSR, - .owner = THIS_MODULE, }, .size = W1_EEPROM_SIZE, .read = w1_f23_read_bin, diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c index 1a6937dc190b..8ba4e572e09c 100644 --- a/drivers/w1/slaves/w1_therm.c +++ b/drivers/w1/slaves/w1_therm.c @@ -48,7 +48,6 @@ static struct bin_attribute w1_therm_bin_attr = { .attr = { .name = "w1_slave", .mode = S_IRUGO, - .owner = THIS_MODULE, }, .size = W1_SLAVE_DATA_SIZE, .read = w1_therm_read_bin, diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index 7d6876dbcc96..1838cb29b646 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -128,7 +128,6 @@ static struct bin_attribute w1_slave_attr_bin_id = { .attr = { .name = "id", .mode = S_IRUGO, - .owner = THIS_MODULE, }, .size = 8, .read = w1_slave_read_id, @@ -167,7 +166,6 @@ static struct bin_attribute w1_default_attr = { .attr = { .name = "rw", .mode = S_IRUGO | S_IWUSR, - .owner = THIS_MODULE, }, .size = PAGE_SIZE, .read = w1_default_read, diff --git a/drivers/zorro/zorro-sysfs.c b/drivers/zorro/zorro-sysfs.c index c3ba0ec334c4..7e03cc68b182 100644 --- a/drivers/zorro/zorro-sysfs.c +++ b/drivers/zorro/zorro-sysfs.c @@ -78,7 +78,6 @@ static struct bin_attribute zorro_config_attr = { .attr = { .name = "config", .mode = S_IRUGO | S_IWUSR, - .owner = THIS_MODULE }, .size = sizeof(struct ConfigDev), .read = zorro_read_config, diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 606128f5c927..02ca6f1e55d7 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -840,8 +840,6 @@ static int __init ecryptfs_init(void) goto out; } kobj_set_kset_s(&ecryptfs_subsys, fs_subsys); - sysfs_attr_version.attr.owner = THIS_MODULE; - sysfs_attr_version_str.attr.owner = THIS_MODULE; rc = do_sysfs_registration(); if (rc) { printk(KERN_ERR "sysfs registration failed\n"); diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c index 2b205f5d5790..e9e042b93dbf 100644 --- a/fs/ocfs2/cluster/masklog.c +++ b/fs/ocfs2/cluster/masklog.c @@ -74,7 +74,6 @@ struct mlog_attribute { #define define_mask(_name) { \ .attr = { \ .name = #_name, \ - .owner = THIS_MODULE, \ .mode = S_IRUGO | S_IWUSR, \ }, \ .mask = ML_##_name, \ diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 9a3a058f3553..98e0b85a9bb2 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -397,7 +397,6 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, static struct attribute addpartattr = { .name = "whole_disk", .mode = S_IRUSR | S_IRGRP | S_IROTH, - .owner = THIS_MODULE, }; sysfs_create_file(&p->kobj, &addpartattr); diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 618b8aea6a7b..3c5574a40b09 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -175,25 +175,20 @@ static int open(struct inode * inode, struct file * file) if (!sysfs_get_active(attr_sd)) return -ENODEV; - /* Grab the module reference for this attribute */ - error = -ENODEV; - if (!try_module_get(attr->attr.owner)) - goto err_sput; - error = -EACCES; if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap)) - goto err_mput; + goto err_out; if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap)) - goto err_mput; + goto err_out; error = -ENOMEM; bb = kzalloc(sizeof(*bb), GFP_KERNEL); if (!bb) - goto err_mput; + goto err_out; bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!bb->buffer) - goto err_mput; + goto err_out; mutex_init(&bb->mutex); file->private_data = bb; @@ -203,9 +198,7 @@ static int open(struct inode * inode, struct file * file) sysfs_get(attr_sd); return 0; - err_mput: - module_put(attr->attr.owner); - err_sput: + err_out: sysfs_put_active(attr_sd); kfree(bb); return error; @@ -214,13 +207,11 @@ static int open(struct inode * inode, struct file * file) static int release(struct inode * inode, struct file * file) { struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr; struct bin_buffer *bb = file->private_data; if (bb->mmapped) sysfs_put_active_two(attr_sd); sysfs_put(attr_sd); - module_put(attr->attr.owner); kfree(bb->buffer); kfree(bb); return 0; diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index d673d9b5d33f..a84b734f7b29 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -241,7 +241,6 @@ sysfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t static int sysfs_open_file(struct inode *inode, struct file *file) { struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - struct attribute *attr = attr_sd->s_elem.attr.attr; struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; struct sysfs_buffer * buffer; struct sysfs_ops * ops = NULL; @@ -251,11 +250,6 @@ static int sysfs_open_file(struct inode *inode, struct file *file) if (!sysfs_get_active_two(attr_sd)) return -ENODEV; - /* Grab the module reference for this attribute */ - error = -ENODEV; - if (!try_module_get(attr->owner)) - goto err_sput; - /* if the kobject has no ktype, then we assume that it is a subsystem * itself, and use ops for it. */ @@ -272,7 +266,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file) * or the subsystem have no operations. */ if (!ops) - goto err_mput; + goto err_out; /* File needs write support. * The inode's perms must say it's ok, @@ -280,7 +274,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file) */ if (file->f_mode & FMODE_WRITE) { if (!(inode->i_mode & S_IWUGO) || !ops->store) - goto err_mput; + goto err_out; } /* File needs read support. @@ -289,7 +283,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file) */ if (file->f_mode & FMODE_READ) { if (!(inode->i_mode & S_IRUGO) || !ops->show) - goto err_mput; + goto err_out; } /* No error? Great, allocate a buffer for the file, and store it @@ -298,7 +292,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file) error = -ENOMEM; buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL); if (!buffer) - goto err_mput; + goto err_out; init_MUTEX(&buffer->sem); buffer->needs_read_fill = 1; @@ -310,9 +304,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file) sysfs_get(attr_sd); return 0; - err_mput: - module_put(attr->owner); - err_sput: + err_out: sysfs_put_active_two(attr_sd); return error; } @@ -320,12 +312,9 @@ static int sysfs_open_file(struct inode *inode, struct file *file) static int sysfs_release(struct inode * inode, struct file * filp) { struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata; - struct attribute *attr = attr_sd->s_elem.attr.attr; struct sysfs_buffer *buffer = filp->private_data; sysfs_put(attr_sd); - /* After this point, attr should not be accessed. */ - module_put(attr->owner); if (buffer) { if (buffer->page) diff --git a/include/linux/sysdev.h b/include/linux/sysdev.h index e699ab279c2c..e285746588d6 100644 --- a/include/linux/sysdev.h +++ b/include/linux/sysdev.h @@ -101,8 +101,7 @@ struct sysdev_attribute { #define _SYSDEV_ATTR(_name,_mode,_show,_store) \ { \ - .attr = { .name = __stringify(_name), .mode = _mode, \ - .owner = THIS_MODULE }, \ + .attr = { .name = __stringify(_name), .mode = _mode }, \ .show = _show, \ .store = _store, \ } diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 2f86b080b39d..161e19aa2b4f 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -20,9 +20,13 @@ struct module; struct nameidata; struct dentry; +/* FIXME + * The *owner field is no longer used, but leave around + * until the tree gets cleaned up fully. + */ struct attribute { const char * name; - struct module * owner; + struct module * owner; mode_t mode; }; @@ -39,14 +43,14 @@ struct attribute_group { */ #define __ATTR(_name,_mode,_show,_store) { \ - .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ .show = _show, \ .store = _store, \ } #define __ATTR_RO(_name) { \ - .attr = { .name = __stringify(_name), .mode = 0444, .owner = THIS_MODULE }, \ - .show = _name##_show, \ + .attr = { .name = __stringify(_name), .mode = 0444 }, \ + .show = _name##_show, \ } #define __ATTR_NULL { .attr = { .name = NULL } } diff --git a/kernel/module.c b/kernel/module.c index 9bd93de01f4a..015d60cfd90e 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -488,8 +488,7 @@ static void free_modinfo_##field(struct module *mod) \ mod->field = NULL; \ } \ static struct module_attribute modinfo_##field = { \ - .attr = { .name = __stringify(field), .mode = 0444, \ - .owner = THIS_MODULE }, \ + .attr = { .name = __stringify(field), .mode = 0444 }, \ .show = show_modinfo_##field, \ .setup = setup_modinfo_##field, \ .test = modinfo_##field##_exists, \ @@ -793,7 +792,7 @@ static ssize_t show_refcnt(struct module_attribute *mattr, } static struct module_attribute refcnt = { - .attr = { .name = "refcnt", .mode = 0444, .owner = THIS_MODULE }, + .attr = { .name = "refcnt", .mode = 0444 }, .show = show_refcnt, }; @@ -851,7 +850,7 @@ static ssize_t show_initstate(struct module_attribute *mattr, } static struct module_attribute initstate = { - .attr = { .name = "initstate", .mode = 0444, .owner = THIS_MODULE }, + .attr = { .name = "initstate", .mode = 0444 }, .show = show_initstate, }; @@ -1032,7 +1031,6 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, sattr->mattr.show = module_sect_show; sattr->mattr.store = NULL; sattr->mattr.attr.name = sattr->name; - sattr->mattr.attr.owner = mod; sattr->mattr.attr.mode = S_IRUGO; *(gattr++) = &(sattr++)->mattr.attr; } @@ -1090,7 +1088,6 @@ int module_add_modinfo_attrs(struct module *mod) if (!attr->test || (attr->test && attr->test(mod))) { memcpy(temp_attr, attr, sizeof(*temp_attr)); - temp_attr->attr.owner = mod; error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr); ++temp_attr; } diff --git a/kernel/params.c b/kernel/params.c index e61c46c97ce7..effbaaedd7f3 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -491,7 +491,6 @@ param_sysfs_setup(struct module_kobject *mk, pattr->mattr.show = param_attr_show; pattr->mattr.store = param_attr_store; pattr->mattr.attr.name = (char *)&kp->name[name_skip]; - pattr->mattr.attr.owner = mk->mod; pattr->mattr.attr.mode = kp->perm; *(gattr++) = &(pattr++)->mattr.attr; } diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 33c6c4a7c689..31ace23a0914 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -383,8 +383,7 @@ static ssize_t brforward_read(struct kobject *kobj, char *buf, static struct bin_attribute bridge_forward = { .attr = { .name = SYSFS_BRIDGE_FDB, - .mode = S_IRUGO, - .owner = THIS_MODULE, }, + .mode = S_IRUGO, }, .read = brforward_read, }; diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 2da22927d8dd..79db51fcb476 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -29,8 +29,7 @@ struct brport_attribute { #define BRPORT_ATTR(_name,_mode,_show,_store) \ struct brport_attribute brport_attr_##_name = { \ .attr = {.name = __stringify(_name), \ - .mode = _mode, \ - .owner = THIS_MODULE, }, \ + .mode = _mode }, \ .show = _show, \ .store = _store, \ }; -- cgit v1.2.3 From 91a6902958f052358899f58683d44e36228d85c2 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Sat, 9 Jun 2007 13:57:22 +0800 Subject: sysfs: add parameter "struct bin_attribute *" in .read/.write methods for sysfs binary attributes Well, first of all, I don't want to change so many files either. What I do: Adding a new parameter "struct bin_attribute *" in the .read/.write methods for the sysfs binary attributes. In fact, only the four lines change in fs/sysfs/bin.c and include/linux/sysfs.h do the real work. But I have to update all the files that use binary attributes to make them compatible with the new .read and .write methods. I'm not sure if I missed any. :( Why I do this: For a sysfs attribute, we can get a pointer pointing to the struct attribute in the .show/.store method, while we can't do this for the binary attributes. I don't know why this is different, but this does make it not so handy to use the binary attributes as the regular ones. So I think this patch is reasonable. :) Who benefits from it: The patch that exposes ACPI tables in sysfs requires such an improvement. All the table binary attributes share the same .read method. Parameter "struct bin_attribute *" is used to get the table signature and instance number which are used to distinguish different ACPI table binary attributes. Without this parameter, we need to offer different .read methods for different ACPI table binary attributes. This is impossible as there are various ACPI tables on different platforms, and we don't know what they are until they are loaded. Signed-off-by: Zhang Rui Signed-off-by: Greg Kroah-Hartman --- .../firmware_sample_firmware_class.c | 2 + drivers/base/firmware_class.c | 4 +- drivers/firmware/dcdbas.c | 10 +++-- drivers/firmware/dell_rbu.c | 25 ++++++----- drivers/i2c/chips/eeprom.c | 3 +- drivers/i2c/chips/max6875.c | 5 ++- drivers/pci/hotplug/acpiphp_ibm.c | 6 ++- drivers/pci/pci-sysfs.c | 18 +++++--- drivers/pcmcia/socket_sysfs.c | 8 +++- drivers/rapidio/rio-sysfs.c | 6 ++- drivers/rtc/rtc-ds1553.c | 10 +++-- drivers/rtc/rtc-ds1742.c | 10 +++-- drivers/s390/cio/chp.c | 10 +++-- drivers/scsi/arcmsr/arcmsr_attr.c | 15 ++++--- drivers/scsi/ipr.c | 18 +++++--- drivers/scsi/libsas/sas_expander.c | 16 ++++--- drivers/scsi/lpfc/lpfc_attr.c | 12 ++++-- drivers/scsi/qla2xxx/qla_attr.c | 50 +++++++++++++--------- drivers/spi/at25.c | 6 ++- drivers/video/aty/radeon_base.c | 8 +++- drivers/w1/slaves/w1_ds2433.c | 10 +++-- drivers/w1/slaves/w1_therm.c | 7 ++- drivers/w1/w1.c | 12 ++++-- drivers/zorro/zorro-sysfs.c | 5 ++- fs/sysfs/bin.c | 4 +- include/linux/sysfs.h | 6 ++- net/bridge/br_sysfs_br.c | 5 ++- 27 files changed, 185 insertions(+), 106 deletions(-) (limited to 'net') diff --git a/Documentation/firmware_class/firmware_sample_firmware_class.c b/Documentation/firmware_class/firmware_sample_firmware_class.c index 4994f1f28f8c..fba943aacf93 100644 --- a/Documentation/firmware_class/firmware_sample_firmware_class.c +++ b/Documentation/firmware_class/firmware_sample_firmware_class.c @@ -78,6 +78,7 @@ static CLASS_DEVICE_ATTR(loading, 0644, firmware_loading_show, firmware_loading_store); static ssize_t firmware_data_read(struct kobject *kobj, + struct bin_attribute *bin_attr, char *buffer, loff_t offset, size_t count) { struct class_device *class_dev = to_class_dev(kobj); @@ -88,6 +89,7 @@ static ssize_t firmware_data_read(struct kobject *kobj, return count; } static ssize_t firmware_data_write(struct kobject *kobj, + struct bin_attribute *bin_attr, char *buffer, loff_t offset, size_t count) { struct class_device *class_dev = to_class_dev(kobj); diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 0e511485d2e6..53f0ee6f3016 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -175,7 +175,7 @@ static ssize_t firmware_loading_store(struct device *dev, static DEVICE_ATTR(loading, 0644, firmware_loading_show, firmware_loading_store); static ssize_t -firmware_data_read(struct kobject *kobj, +firmware_data_read(struct kobject *kobj, struct bin_attribute *bin_attr, char *buffer, loff_t offset, size_t count) { struct device *dev = to_dev(kobj); @@ -240,7 +240,7 @@ fw_realloc_buffer(struct firmware_priv *fw_priv, int min_size) * the driver as a firmware image. **/ static ssize_t -firmware_data_write(struct kobject *kobj, +firmware_data_write(struct kobject *kobj, struct bin_attribute *bin_attr, char *buffer, loff_t offset, size_t count) { struct device *dev = to_dev(kobj); diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c index 1865b56fb141..18cdcb3ae1ca 100644 --- a/drivers/firmware/dcdbas.c +++ b/drivers/firmware/dcdbas.c @@ -149,8 +149,9 @@ static ssize_t smi_data_buf_size_store(struct device *dev, return count; } -static ssize_t smi_data_read(struct kobject *kobj, char *buf, loff_t pos, - size_t count) +static ssize_t smi_data_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) { size_t max_read; ssize_t ret; @@ -170,8 +171,9 @@ out: return ret; } -static ssize_t smi_data_write(struct kobject *kobj, char *buf, loff_t pos, - size_t count) +static ssize_t smi_data_write(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) { ssize_t ret; diff --git a/drivers/firmware/dell_rbu.c b/drivers/firmware/dell_rbu.c index f8afecb7d0cf..477a3d0e3caf 100644 --- a/drivers/firmware/dell_rbu.c +++ b/drivers/firmware/dell_rbu.c @@ -543,8 +543,9 @@ static ssize_t read_rbu_mono_data(char *buffer, loff_t pos, size_t count) return ret_count; } -static ssize_t read_rbu_data(struct kobject *kobj, char *buffer, - loff_t pos, size_t count) +static ssize_t read_rbu_data(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buffer, loff_t pos, size_t count) { ssize_t ret_count = 0; @@ -591,8 +592,9 @@ static void callbackfn_rbu(const struct firmware *fw, void *context) spin_unlock(&rbu_data.lock); } -static ssize_t read_rbu_image_type(struct kobject *kobj, char *buffer, - loff_t pos, size_t count) +static ssize_t read_rbu_image_type(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buffer, loff_t pos, size_t count) { int size = 0; if (!pos) @@ -600,8 +602,9 @@ static ssize_t read_rbu_image_type(struct kobject *kobj, char *buffer, return size; } -static ssize_t write_rbu_image_type(struct kobject *kobj, char *buffer, - loff_t pos, size_t count) +static ssize_t write_rbu_image_type(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buffer, loff_t pos, size_t count) { int rc = count; int req_firm_rc = 0; @@ -660,8 +663,9 @@ static ssize_t write_rbu_image_type(struct kobject *kobj, char *buffer, return rc; } -static ssize_t read_rbu_packet_size(struct kobject *kobj, char *buffer, - loff_t pos, size_t count) +static ssize_t read_rbu_packet_size(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buffer, loff_t pos, size_t count) { int size = 0; if (!pos) { @@ -672,8 +676,9 @@ static ssize_t read_rbu_packet_size(struct kobject *kobj, char *buffer, return size; } -static ssize_t write_rbu_packet_size(struct kobject *kobj, char *buffer, - loff_t pos, size_t count) +static ssize_t write_rbu_packet_size(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buffer, loff_t pos, size_t count) { unsigned long temp; spin_lock(&rbu_data.lock); diff --git a/drivers/i2c/chips/eeprom.c b/drivers/i2c/chips/eeprom.c index 5990dd5fc773..332816431105 100644 --- a/drivers/i2c/chips/eeprom.c +++ b/drivers/i2c/chips/eeprom.c @@ -110,7 +110,8 @@ exit: mutex_unlock(&data->update_lock); } -static ssize_t eeprom_read(struct kobject *kobj, char *buf, loff_t off, size_t count) +static ssize_t eeprom_read(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct i2c_client *client = to_i2c_client(container_of(kobj, struct device, kobj)); struct eeprom_data *data = i2c_get_clientdata(client); diff --git a/drivers/i2c/chips/max6875.c b/drivers/i2c/chips/max6875.c index 1405ce5b236c..4e238c0a7ca3 100644 --- a/drivers/i2c/chips/max6875.c +++ b/drivers/i2c/chips/max6875.c @@ -125,8 +125,9 @@ exit_up: mutex_unlock(&data->update_lock); } -static ssize_t max6875_read(struct kobject *kobj, char *buf, loff_t off, - size_t count) +static ssize_t max6875_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct i2c_client *client = kobj_to_i2c_client(kobj); struct max6875_data *data = i2c_get_clientdata(client); diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c index 74556ec31a5b..70db38c0ced9 100644 --- a/drivers/pci/hotplug/acpiphp_ibm.c +++ b/drivers/pci/hotplug/acpiphp_ibm.c @@ -106,7 +106,8 @@ static int ibm_get_attention_status(struct hotplug_slot *slot, u8 *status); static void ibm_handle_events(acpi_handle handle, u32 event, void *context); static int ibm_get_table_from_acpi(char **bufp); static ssize_t ibm_read_apci_table(struct kobject *kobj, - char *buffer, loff_t pos, size_t size); + struct bin_attribute *bin_attr, + char *buffer, loff_t pos, size_t size); static acpi_status __init ibm_find_acpi_device(acpi_handle handle, u32 lvl, void *context, void **rv); static int __init ibm_acpiphp_init(void); @@ -357,7 +358,8 @@ read_table_done: * our solution is to only allow reading the table in all at once **/ static ssize_t ibm_read_apci_table(struct kobject *kobj, - char *buffer, loff_t pos, size_t size) + struct bin_attribute *bin_attr, + char *buffer, loff_t pos, size_t size) { int bytes_read = -EINVAL; char *table = NULL; diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index d448f8df8613..6543cbe83be5 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -213,7 +213,8 @@ struct device_attribute pci_dev_attrs[] = { }; static ssize_t -pci_read_config(struct kobject *kobj, char *buf, loff_t off, size_t count) +pci_read_config(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj)); unsigned int size = 64; @@ -285,7 +286,8 @@ pci_read_config(struct kobject *kobj, char *buf, loff_t off, size_t count) } static ssize_t -pci_write_config(struct kobject *kobj, char *buf, loff_t off, size_t count) +pci_write_config(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj)); unsigned int size = count; @@ -352,7 +354,8 @@ pci_write_config(struct kobject *kobj, char *buf, loff_t off, size_t count) * callback routine (pci_legacy_read). */ ssize_t -pci_read_legacy_io(struct kobject *kobj, char *buf, loff_t off, size_t count) +pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct pci_bus *bus = to_pci_bus(container_of(kobj, struct class_device, @@ -376,7 +379,8 @@ pci_read_legacy_io(struct kobject *kobj, char *buf, loff_t off, size_t count) * callback routine (pci_legacy_write). */ ssize_t -pci_write_legacy_io(struct kobject *kobj, char *buf, loff_t off, size_t count) +pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct pci_bus *bus = to_pci_bus(container_of(kobj, struct class_device, @@ -528,7 +532,8 @@ static inline void pci_remove_resource_files(struct pci_dev *dev) { return; } * writing anything except 0 enables it */ static ssize_t -pci_write_rom(struct kobject *kobj, char *buf, loff_t off, size_t count) +pci_write_rom(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj)); @@ -551,7 +556,8 @@ pci_write_rom(struct kobject *kobj, char *buf, loff_t off, size_t count) * device corresponding to @kobj. */ static ssize_t -pci_read_rom(struct kobject *kobj, char *buf, loff_t off, size_t count) +pci_read_rom(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj)); void __iomem *rom; diff --git a/drivers/pcmcia/socket_sysfs.c b/drivers/pcmcia/socket_sysfs.c index dbfbe65779e5..b4409002b7f8 100644 --- a/drivers/pcmcia/socket_sysfs.c +++ b/drivers/pcmcia/socket_sysfs.c @@ -283,7 +283,9 @@ static ssize_t pccard_extract_cis(struct pcmcia_socket *s, char *buf, loff_t off return (ret); } -static ssize_t pccard_show_cis(struct kobject *kobj, char *buf, loff_t off, size_t count) +static ssize_t pccard_show_cis(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { unsigned int size = 0x200; @@ -311,7 +313,9 @@ static ssize_t pccard_show_cis(struct kobject *kobj, char *buf, loff_t off, size return (count); } -static ssize_t pccard_store_cis(struct kobject *kobj, char *buf, loff_t off, size_t count) +static ssize_t pccard_store_cis(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct pcmcia_socket *s = to_socket(container_of(kobj, struct device, kobj)); cisdump_t *cis; diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c index a3972b9f96e6..659e31164cf0 100644 --- a/drivers/rapidio/rio-sysfs.c +++ b/drivers/rapidio/rio-sysfs.c @@ -67,7 +67,8 @@ struct device_attribute rio_dev_attrs[] = { }; static ssize_t -rio_read_config(struct kobject *kobj, char *buf, loff_t off, size_t count) +rio_read_config(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct rio_dev *dev = to_rio_dev(container_of(kobj, struct device, kobj)); @@ -137,7 +138,8 @@ rio_read_config(struct kobject *kobj, char *buf, loff_t off, size_t count) } static ssize_t -rio_write_config(struct kobject *kobj, char *buf, loff_t off, size_t count) +rio_write_config(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct rio_dev *dev = to_rio_dev(container_of(kobj, struct device, kobj)); diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c index b024cfb558f4..f98a83a11aae 100644 --- a/drivers/rtc/rtc-ds1553.c +++ b/drivers/rtc/rtc-ds1553.c @@ -258,8 +258,9 @@ static const struct rtc_class_ops ds1553_rtc_ops = { .ioctl = ds1553_rtc_ioctl, }; -static ssize_t ds1553_nvram_read(struct kobject *kobj, char *buf, - loff_t pos, size_t size) +static ssize_t ds1553_nvram_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t size) { struct platform_device *pdev = to_platform_device(container_of(kobj, struct device, kobj)); @@ -272,8 +273,9 @@ static ssize_t ds1553_nvram_read(struct kobject *kobj, char *buf, return count; } -static ssize_t ds1553_nvram_write(struct kobject *kobj, char *buf, - loff_t pos, size_t size) +static ssize_t ds1553_nvram_write(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t size) { struct platform_device *pdev = to_platform_device(container_of(kobj, struct device, kobj)); diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c index 1638acdbc913..d1778ae8bca5 100644 --- a/drivers/rtc/rtc-ds1742.c +++ b/drivers/rtc/rtc-ds1742.c @@ -127,8 +127,9 @@ static const struct rtc_class_ops ds1742_rtc_ops = { .set_time = ds1742_rtc_set_time, }; -static ssize_t ds1742_nvram_read(struct kobject *kobj, char *buf, - loff_t pos, size_t size) +static ssize_t ds1742_nvram_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t size) { struct platform_device *pdev = to_platform_device(container_of(kobj, struct device, kobj)); @@ -141,8 +142,9 @@ static ssize_t ds1742_nvram_read(struct kobject *kobj, char *buf, return count; } -static ssize_t ds1742_nvram_write(struct kobject *kobj, char *buf, - loff_t pos, size_t size) +static ssize_t ds1742_nvram_write(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t size) { struct platform_device *pdev = to_platform_device(container_of(kobj, struct device, kobj)); diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index 96a8a72a6083..b57d93d986c0 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -141,8 +141,9 @@ static int s390_vary_chpid(struct chp_id chpid, int on) /* * Channel measurement related functions */ -static ssize_t chp_measurement_chars_read(struct kobject *kobj, char *buf, - loff_t off, size_t count) +static ssize_t chp_measurement_chars_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct channel_path *chp; unsigned int size; @@ -192,8 +193,9 @@ static void chp_measurement_copy_block(struct cmg_entry *buf, } while (reference_buf.values[0] != buf->values[0]); } -static ssize_t chp_measurement_read(struct kobject *kobj, char *buf, - loff_t off, size_t count) +static ssize_t chp_measurement_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct channel_path *chp; struct channel_subsystem *css; diff --git a/drivers/scsi/arcmsr/arcmsr_attr.c b/drivers/scsi/arcmsr/arcmsr_attr.c index 8908228bc134..06c0dce3b839 100644 --- a/drivers/scsi/arcmsr/arcmsr_attr.c +++ b/drivers/scsi/arcmsr/arcmsr_attr.c @@ -59,8 +59,9 @@ struct class_device_attribute *arcmsr_host_attrs[]; static ssize_t -arcmsr_sysfs_iop_message_read(struct kobject *kobj, char *buf, loff_t off, - size_t count) +arcmsr_sysfs_iop_message_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct class_device *cdev = container_of(kobj,struct class_device,kobj); struct Scsi_Host *host = class_to_shost(cdev); @@ -105,8 +106,9 @@ arcmsr_sysfs_iop_message_read(struct kobject *kobj, char *buf, loff_t off, } static ssize_t -arcmsr_sysfs_iop_message_write(struct kobject *kobj, char *buf, loff_t off, - size_t count) +arcmsr_sysfs_iop_message_write(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct class_device *cdev = container_of(kobj,struct class_device,kobj); struct Scsi_Host *host = class_to_shost(cdev); @@ -152,8 +154,9 @@ arcmsr_sysfs_iop_message_write(struct kobject *kobj, char *buf, loff_t off, } static ssize_t -arcmsr_sysfs_iop_message_clear(struct kobject *kobj, char *buf, loff_t off, - size_t count) +arcmsr_sysfs_iop_message_clear(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct class_device *cdev = container_of(kobj,struct class_device,kobj); struct Scsi_Host *host = class_to_shost(cdev); diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index fa6ff295e568..4a3083ea59d5 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -2465,6 +2465,7 @@ restart: /** * ipr_read_trace - Dump the adapter trace * @kobj: kobject struct + * @bin_attr: bin_attribute struct * @buf: buffer * @off: offset * @count: buffer size @@ -2472,8 +2473,9 @@ restart: * Return value: * number of bytes printed to buffer **/ -static ssize_t ipr_read_trace(struct kobject *kobj, char *buf, - loff_t off, size_t count) +static ssize_t ipr_read_trace(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct class_device *cdev = container_of(kobj,struct class_device,kobj); struct Scsi_Host *shost = class_to_shost(cdev); @@ -3166,6 +3168,7 @@ static struct class_device_attribute *ipr_ioa_attrs[] = { /** * ipr_read_dump - Dump the adapter * @kobj: kobject struct + * @bin_attr: bin_attribute struct * @buf: buffer * @off: offset * @count: buffer size @@ -3173,8 +3176,9 @@ static struct class_device_attribute *ipr_ioa_attrs[] = { * Return value: * number of bytes printed to buffer **/ -static ssize_t ipr_read_dump(struct kobject *kobj, char *buf, - loff_t off, size_t count) +static ssize_t ipr_read_dump(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct class_device *cdev = container_of(kobj,struct class_device,kobj); struct Scsi_Host *shost = class_to_shost(cdev); @@ -3327,6 +3331,7 @@ static int ipr_free_dump(struct ipr_ioa_cfg *ioa_cfg) /** * ipr_write_dump - Setup dump state of adapter * @kobj: kobject struct + * @bin_attr: bin_attribute struct * @buf: buffer * @off: offset * @count: buffer size @@ -3334,8 +3339,9 @@ static int ipr_free_dump(struct ipr_ioa_cfg *ioa_cfg) * Return value: * number of bytes printed to buffer **/ -static ssize_t ipr_write_dump(struct kobject *kobj, char *buf, - loff_t off, size_t count) +static ssize_t ipr_write_dump(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct class_device *cdev = container_of(kobj,struct class_device,kobj); struct Scsi_Host *shost = class_to_shost(cdev); diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 578ed79f4148..23e90c5f8f35 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -38,8 +38,10 @@ static int sas_disable_routing(struct domain_device *dev, u8 *sas_addr); #if 0 /* FIXME: smp needs to migrate into the sas class */ -static ssize_t smp_portal_read(struct kobject *, char *, loff_t, size_t); -static ssize_t smp_portal_write(struct kobject *, char *, loff_t, size_t); +static ssize_t smp_portal_read(struct kobject *, struct bin_attribute *, + char *, loff_t, size_t); +static ssize_t smp_portal_write(struct kobject *, struct bin_attribute *, + char *, loff_t, size_t); #endif /* ---------- SMP task management ---------- */ @@ -1845,8 +1847,9 @@ out: #if 0 /* ---------- SMP portal ---------- */ -static ssize_t smp_portal_write(struct kobject *kobj, char *buf, loff_t offs, - size_t size) +static ssize_t smp_portal_write(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t offs, size_t size) { struct domain_device *dev = to_dom_device(kobj); struct expander_device *ex = &dev->ex_dev; @@ -1872,8 +1875,9 @@ static ssize_t smp_portal_write(struct kobject *kobj, char *buf, loff_t offs, return size; } -static ssize_t smp_portal_read(struct kobject *kobj, char *buf, loff_t offs, - size_t size) +static ssize_t smp_portal_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t offs, size_t size) { struct domain_device *dev = to_dom_device(kobj); struct expander_device *ex = &dev->ex_dev; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index f81fe501a4a1..5dfda9778c80 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -1133,7 +1133,8 @@ struct class_device_attribute *lpfc_host_attrs[] = { }; static ssize_t -sysfs_ctlreg_write(struct kobject *kobj, char *buf, loff_t off, size_t count) +sysfs_ctlreg_write(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { size_t buf_off; struct Scsi_Host *host = class_to_shost(container_of(kobj, @@ -1165,7 +1166,8 @@ sysfs_ctlreg_write(struct kobject *kobj, char *buf, loff_t off, size_t count) } static ssize_t -sysfs_ctlreg_read(struct kobject *kobj, char *buf, loff_t off, size_t count) +sysfs_ctlreg_read(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { size_t buf_off; uint32_t * tmp_ptr; @@ -1221,7 +1223,8 @@ sysfs_mbox_idle (struct lpfc_hba * phba) } static ssize_t -sysfs_mbox_write(struct kobject *kobj, char *buf, loff_t off, size_t count) +sysfs_mbox_write(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct Scsi_Host * host = class_to_shost(container_of(kobj, struct class_device, kobj)); @@ -1273,7 +1276,8 @@ sysfs_mbox_write(struct kobject *kobj, char *buf, loff_t off, size_t count) } static ssize_t -sysfs_mbox_read(struct kobject *kobj, char *buf, loff_t off, size_t count) +sysfs_mbox_read(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct Scsi_Host *host = class_to_shost(container_of(kobj, struct class_device, diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 96587253bfa9..942db9de785e 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -11,8 +11,9 @@ /* SYSFS attributes --------------------------------------------------------- */ static ssize_t -qla2x00_sysfs_read_fw_dump(struct kobject *kobj, char *buf, loff_t off, - size_t count) +qla2x00_sysfs_read_fw_dump(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, struct device, kobj))); @@ -31,8 +32,9 @@ qla2x00_sysfs_read_fw_dump(struct kobject *kobj, char *buf, loff_t off, } static ssize_t -qla2x00_sysfs_write_fw_dump(struct kobject *kobj, char *buf, loff_t off, - size_t count) +qla2x00_sysfs_write_fw_dump(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, struct device, kobj))); @@ -80,8 +82,9 @@ static struct bin_attribute sysfs_fw_dump_attr = { }; static ssize_t -qla2x00_sysfs_read_nvram(struct kobject *kobj, char *buf, loff_t off, - size_t count) +qla2x00_sysfs_read_nvram(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, struct device, kobj))); @@ -100,8 +103,9 @@ qla2x00_sysfs_read_nvram(struct kobject *kobj, char *buf, loff_t off, } static ssize_t -qla2x00_sysfs_write_nvram(struct kobject *kobj, char *buf, loff_t off, - size_t count) +qla2x00_sysfs_write_nvram(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, struct device, kobj))); @@ -155,8 +159,9 @@ static struct bin_attribute sysfs_nvram_attr = { }; static ssize_t -qla2x00_sysfs_read_optrom(struct kobject *kobj, char *buf, loff_t off, - size_t count) +qla2x00_sysfs_read_optrom(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, struct device, kobj))); @@ -174,8 +179,9 @@ qla2x00_sysfs_read_optrom(struct kobject *kobj, char *buf, loff_t off, } static ssize_t -qla2x00_sysfs_write_optrom(struct kobject *kobj, char *buf, loff_t off, - size_t count) +qla2x00_sysfs_write_optrom(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, struct device, kobj))); @@ -203,8 +209,9 @@ static struct bin_attribute sysfs_optrom_attr = { }; static ssize_t -qla2x00_sysfs_write_optrom_ctl(struct kobject *kobj, char *buf, loff_t off, - size_t count) +qla2x00_sysfs_write_optrom_ctl(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, struct device, kobj))); @@ -282,8 +289,9 @@ static struct bin_attribute sysfs_optrom_ctl_attr = { }; static ssize_t -qla2x00_sysfs_read_vpd(struct kobject *kobj, char *buf, loff_t off, - size_t count) +qla2x00_sysfs_read_vpd(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, struct device, kobj))); @@ -301,8 +309,9 @@ qla2x00_sysfs_read_vpd(struct kobject *kobj, char *buf, loff_t off, } static ssize_t -qla2x00_sysfs_write_vpd(struct kobject *kobj, char *buf, loff_t off, - size_t count) +qla2x00_sysfs_write_vpd(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, struct device, kobj))); @@ -330,8 +339,9 @@ static struct bin_attribute sysfs_vpd_attr = { }; static ssize_t -qla2x00_sysfs_read_sfp(struct kobject *kobj, char *buf, loff_t off, - size_t count) +qla2x00_sysfs_read_sfp(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, struct device, kobj))); diff --git a/drivers/spi/at25.c b/drivers/spi/at25.c index fde1dededba3..e007833cca59 100644 --- a/drivers/spi/at25.c +++ b/drivers/spi/at25.c @@ -111,7 +111,8 @@ at25_ee_read( } static ssize_t -at25_bin_read(struct kobject *kobj, char *buf, loff_t off, size_t count) +at25_bin_read(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct device *dev; struct at25_data *at25; @@ -236,7 +237,8 @@ at25_ee_write(struct at25_data *at25, char *buf, loff_t off, size_t count) } static ssize_t -at25_bin_write(struct kobject *kobj, char *buf, loff_t off, size_t count) +at25_bin_write(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct device *dev; struct at25_data *at25; diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c index 3b3c6571f583..2349e71b0083 100644 --- a/drivers/video/aty/radeon_base.c +++ b/drivers/video/aty/radeon_base.c @@ -2102,7 +2102,9 @@ static ssize_t radeon_show_one_edid(char *buf, loff_t off, size_t count, const u } -static ssize_t radeon_show_edid1(struct kobject *kobj, char *buf, loff_t off, size_t count) +static ssize_t radeon_show_edid1(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct device *dev = container_of(kobj, struct device, kobj); struct pci_dev *pdev = to_pci_dev(dev); @@ -2113,7 +2115,9 @@ static ssize_t radeon_show_edid1(struct kobject *kobj, char *buf, loff_t off, si } -static ssize_t radeon_show_edid2(struct kobject *kobj, char *buf, loff_t off, size_t count) +static ssize_t radeon_show_edid2(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct device *dev = container_of(kobj, struct device, kobj); struct pci_dev *pdev = to_pci_dev(dev); diff --git a/drivers/w1/slaves/w1_ds2433.c b/drivers/w1/slaves/w1_ds2433.c index 4e13aa71adea..cab56005dd49 100644 --- a/drivers/w1/slaves/w1_ds2433.c +++ b/drivers/w1/slaves/w1_ds2433.c @@ -91,8 +91,9 @@ static int w1_f23_refresh_block(struct w1_slave *sl, struct w1_f23_data *data, } #endif /* CONFIG_W1_SLAVE_DS2433_CRC */ -static ssize_t w1_f23_read_bin(struct kobject *kobj, char *buf, loff_t off, - size_t count) +static ssize_t w1_f23_read_bin(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct w1_slave *sl = kobj_to_w1_slave(kobj); #ifdef CONFIG_W1_SLAVE_DS2433_CRC @@ -199,8 +200,9 @@ static int w1_f23_write(struct w1_slave *sl, int addr, int len, const u8 *data) return 0; } -static ssize_t w1_f23_write_bin(struct kobject *kobj, char *buf, loff_t off, - size_t count) +static ssize_t w1_f23_write_bin(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct w1_slave *sl = kobj_to_w1_slave(kobj); int addr, len, idx; diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c index 8ba4e572e09c..4318935678c5 100644 --- a/drivers/w1/slaves/w1_therm.c +++ b/drivers/w1/slaves/w1_therm.c @@ -42,7 +42,8 @@ static u8 bad_roms[][9] = { {} }; -static ssize_t w1_therm_read_bin(struct kobject *, char *, loff_t, size_t); +static ssize_t w1_therm_read_bin(struct kobject *, struct bin_attribute *, + char *, loff_t, size_t); static struct bin_attribute w1_therm_bin_attr = { .attr = { @@ -158,7 +159,9 @@ static int w1_therm_check_rom(u8 rom[9]) return 0; } -static ssize_t w1_therm_read_bin(struct kobject *kobj, char *buf, loff_t off, size_t count) +static ssize_t w1_therm_read_bin(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct w1_slave *sl = kobj_to_w1_slave(kobj); struct w1_master *dev = sl->master; diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index 1838cb29b646..f5c5b760ed7b 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -105,7 +105,9 @@ static ssize_t w1_slave_read_name(struct device *dev, struct device_attribute *a return sprintf(buf, "%s\n", sl->name); } -static ssize_t w1_slave_read_id(struct kobject *kobj, char *buf, loff_t off, size_t count) +static ssize_t w1_slave_read_id(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct w1_slave *sl = kobj_to_w1_slave(kobj); @@ -135,7 +137,9 @@ static struct bin_attribute w1_slave_attr_bin_id = { /* Default family */ -static ssize_t w1_default_write(struct kobject *kobj, char *buf, loff_t off, size_t count) +static ssize_t w1_default_write(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct w1_slave *sl = kobj_to_w1_slave(kobj); @@ -152,7 +156,9 @@ out_up: return count; } -static ssize_t w1_default_read(struct kobject *kobj, char *buf, loff_t off, size_t count) +static ssize_t w1_default_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct w1_slave *sl = kobj_to_w1_slave(kobj); diff --git a/drivers/zorro/zorro-sysfs.c b/drivers/zorro/zorro-sysfs.c index 7e03cc68b182..9130f1c12c26 100644 --- a/drivers/zorro/zorro-sysfs.c +++ b/drivers/zorro/zorro-sysfs.c @@ -49,8 +49,9 @@ static ssize_t zorro_show_resource(struct device *dev, struct device_attribute * static DEVICE_ATTR(resource, S_IRUGO, zorro_show_resource, NULL); -static ssize_t zorro_read_config(struct kobject *kobj, char *buf, loff_t off, - size_t count) +static ssize_t zorro_read_config(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct zorro_dev *z = to_zorro_dev(container_of(kobj, struct device, kobj)); diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 55796bdacd3d..135353f8a296 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -40,7 +40,7 @@ fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count) rc = -EIO; if (attr->read) - rc = attr->read(kobj, buffer, off, count); + rc = attr->read(kobj, attr, buffer, off, count); sysfs_put_active_two(attr_sd); @@ -97,7 +97,7 @@ flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count) rc = -EIO; if (attr->write) - rc = attr->write(kobj, buffer, offset, count); + rc = attr->write(kobj, attr, buffer, offset, count); sysfs_put_active_two(attr_sd); diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 2f58ca1af770..be8228e50a27 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -64,8 +64,10 @@ struct bin_attribute { struct attribute attr; size_t size; void *private; - ssize_t (*read)(struct kobject *, char *, loff_t, size_t); - ssize_t (*write)(struct kobject *, char *, loff_t, size_t); + ssize_t (*read)(struct kobject *, struct bin_attribute *, + char *, loff_t, size_t); + ssize_t (*write)(struct kobject *, struct bin_attribute *, + char *, loff_t, size_t); int (*mmap)(struct kobject *, struct bin_attribute *attr, struct vm_area_struct *vma); }; diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 31ace23a0914..4f42263e0a8a 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -360,8 +360,9 @@ static struct attribute_group bridge_group = { * * Returns the number of bytes read. */ -static ssize_t brforward_read(struct kobject *kobj, char *buf, - loff_t off, size_t count) +static ssize_t brforward_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct device *dev = to_dev(kobj); struct net_bridge *br = to_bridge(dev); -- cgit v1.2.3 From 2b1244a43be97f504494b557a7f7a65fe0d00dba Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Thu, 8 Mar 2007 09:57:36 -0800 Subject: I/OAT: Only offload copies for TCP when there will be a context switch The performance wins come with having the DMA copy engine doing the copies in parallel with the context switch. If there is enough data ready on the socket at recv time just use a regular copy. Signed-off-by: Chris Leech --- net/ipv4/tcp.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 450f44bb2c8e..0eb7cf1002c1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1116,6 +1116,8 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, long timeo; struct task_struct *user_recv = NULL; int copied_early = 0; + int available = 0; + struct sk_buff *skb; lock_sock(sk); @@ -1142,7 +1144,11 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, #ifdef CONFIG_NET_DMA tp->ucopy.dma_chan = NULL; preempt_disable(); - if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && + skb = skb_peek_tail(&sk->sk_receive_queue); + if (skb) + available = TCP_SKB_CB(skb)->seq + skb->len - (*seq); + if ((available < target) && + (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && !sysctl_tcp_low_latency && __get_cpu_var(softnet_data).net_dma) { preempt_enable_no_resched(); tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len); @@ -1151,7 +1157,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, #endif do { - struct sk_buff *skb; u32 offset; /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ @@ -1439,7 +1444,6 @@ skip_copy: #ifdef CONFIG_NET_DMA if (tp->ucopy.dma_chan) { - struct sk_buff *skb; dma_cookie_t done, used; dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); -- cgit v1.2.3 From e00c5d8b4d800b95b72b3f072e1d55d7c7034702 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 8 Mar 2007 09:57:36 -0800 Subject: I/OAT: warning fix net/ipv4/tcp.c: In function 'tcp_recvmsg': net/ipv4/tcp.c:1111: warning: unused variable 'available' Signed-off-by: Andrew Morton Signed-off-by: Chris Leech --- net/ipv4/tcp.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0eb7cf1002c1..987b94403be5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1116,7 +1116,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, long timeo; struct task_struct *user_recv = NULL; int copied_early = 0; - int available = 0; struct sk_buff *skb; lock_sock(sk); @@ -1145,15 +1144,22 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, tp->ucopy.dma_chan = NULL; preempt_disable(); skb = skb_peek_tail(&sk->sk_receive_queue); - if (skb) - available = TCP_SKB_CB(skb)->seq + skb->len - (*seq); - if ((available < target) && - (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && - !sysctl_tcp_low_latency && __get_cpu_var(softnet_data).net_dma) { - preempt_enable_no_resched(); - tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len); - } else - preempt_enable_no_resched(); + { + int available = 0; + + if (skb) + available = TCP_SKB_CB(skb)->seq + skb->len - (*seq); + if ((available < target) && + (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && + !sysctl_tcp_low_latency && + __get_cpu_var(softnet_data).net_dma) { + preempt_enable_no_resched(); + tp->ucopy.pinned_list = + dma_pin_iovec_pages(msg->msg_iov, len); + } else { + preempt_enable_no_resched(); + } + } #endif do { -- cgit v1.2.3 From 29578624e354f56143d92510fff33a8b2aaa2c03 Mon Sep 17 00:00:00 2001 From: Olaf Kirch Date: Wed, 11 Jul 2007 19:32:02 -0700 Subject: [NET]: Fix races in net_rx_action vs netpoll. Keep netpoll/poll_napi from messing with the poll_list. Only net_rx_action is allowed to manipulate the list. Signed-off-by: Olaf Kirch Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++++++++ net/core/netpoll.c | 8 ++++++++ 2 files changed, 18 insertions(+) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8590d685d935..79cc3dab4be7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -261,6 +261,8 @@ enum netdev_state_t __LINK_STATE_LINKWATCH_PENDING, __LINK_STATE_DORMANT, __LINK_STATE_QDISC_RUNNING, + /* Set by the netpoll NAPI code */ + __LINK_STATE_POLL_LIST_FROZEN, }; @@ -1014,6 +1016,14 @@ static inline void netif_rx_complete(struct net_device *dev) { unsigned long flags; +#ifdef CONFIG_NETPOLL + /* Prevent race with netpoll - yes, this is a kludge. + * But at least it doesn't penalize the non-netpoll + * code path. */ + if (test_bit(__LINK_STATE_POLL_LIST_FROZEN, &dev->state)) + return; +#endif + local_irq_save(flags); __netif_rx_complete(dev); local_irq_restore(flags); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index de1b26aa5720..d1264e9a50a8 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -124,6 +124,13 @@ static void poll_napi(struct netpoll *np) if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) && npinfo->poll_owner != smp_processor_id() && spin_trylock(&npinfo->poll_lock)) { + /* When calling dev->poll from poll_napi, we may end up in + * netif_rx_complete. However, only the CPU to which the + * device was queued is allowed to remove it from poll_list. + * Setting POLL_LIST_FROZEN tells netif_rx_complete + * to leave the NAPI state alone. + */ + set_bit(__LINK_STATE_POLL_LIST_FROZEN, &np->dev->state); npinfo->rx_flags |= NETPOLL_RX_DROP; atomic_inc(&trapped); @@ -131,6 +138,7 @@ static void poll_napi(struct netpoll *np) atomic_dec(&trapped); npinfo->rx_flags &= ~NETPOLL_RX_DROP; + clear_bit(__LINK_STATE_POLL_LIST_FROZEN, &np->dev->state); spin_unlock(&npinfo->poll_lock); } } -- cgit v1.2.3 From 71bffe556c59a7865bf0b1ecd94530f1e296cdb0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 11 Jul 2007 19:41:18 -0700 Subject: [ETH]: Validate address in eth_mac_addr Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ethernet/eth.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 1387e5411f77..12c765715acf 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -266,8 +266,11 @@ void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, static int eth_mac_addr(struct net_device *dev, void *p) { struct sockaddr *addr = p; + if (netif_running(dev)) return -EBUSY; + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); return 0; } -- cgit v1.2.3 From 8c979c26a0f093c13290320edda799d8335e50ae Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 11 Jul 2007 19:45:24 -0700 Subject: [VLAN]: Fix MAC address handling The VLAN MAC address handling is broken in multiple ways. When the address differs when setting it, the real device is put in promiscous mode twice, but never taken out again. Additionally it doesn't resync when the real device's address is changed and needlessly puts it in promiscous mode when the vlan device is still down. Fix by moving address handling to vlan_dev_open/vlan_dev_stop and properly deal with address changes in the device notifier. Also switch to dev_unicast_add (which needs the exact same handling). Since the set_mac_address handler is identical to the generic ethernet one with these changes, kill it and use ether_setup(). Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 1 + net/8021q/vlan.c | 43 ++++++++++++++++++++++++++++++++----- net/8021q/vlan.h | 1 - net/8021q/vlan_dev.c | 57 ++++++++++++++++--------------------------------- 4 files changed, 57 insertions(+), 45 deletions(-) (limited to 'net') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index c7912876a210..61a57dc2ac99 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -135,6 +135,7 @@ struct vlan_dev_info { int old_allmulti; /* similar to above. */ int old_promiscuity; /* similar to above. */ struct net_device *real_dev; /* the underlying device/interface */ + unsigned char real_dev_addr[ETH_ALEN]; struct proc_dir_entry *dent; /* Holds the proc data */ unsigned long cnt_inc_headroom_on_tx; /* How many times did we have to grow the skb on TX. */ unsigned long cnt_encap_on_xmit; /* How many times did we have to encapsulate the skb on TX. */ diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index e7583eea6fda..b463ba47024d 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -345,12 +345,8 @@ static int vlan_dev_init(struct net_device *dev) (1<<__LINK_STATE_DORMANT))) | (1<<__LINK_STATE_PRESENT); - /* TODO: maybe just assign it to be ETHERNET? */ - dev->type = real_dev->type; - memcpy(dev->broadcast, real_dev->broadcast, real_dev->addr_len); memcpy(dev->dev_addr, real_dev->dev_addr, real_dev->addr_len); - dev->addr_len = real_dev->addr_len; if (real_dev->features & NETIF_F_HW_VLAN_TX) { dev->hard_header = real_dev->hard_header; @@ -364,6 +360,7 @@ static int vlan_dev_init(struct net_device *dev) dev->rebuild_header = vlan_dev_rebuild_header; } dev->hard_header_parse = real_dev->hard_header_parse; + dev->hard_header_cache = NULL; lockdep_set_class(&dev->_xmit_lock, &vlan_netdev_xmit_lock_key); return 0; @@ -373,6 +370,8 @@ void vlan_setup(struct net_device *new_dev) { SET_MODULE_OWNER(new_dev); + ether_setup(new_dev); + /* new_dev->ifindex = 0; it will be set when added to * the global list. * iflink is set as well. @@ -392,7 +391,6 @@ void vlan_setup(struct net_device *new_dev) new_dev->init = vlan_dev_init; new_dev->open = vlan_dev_open; new_dev->stop = vlan_dev_stop; - new_dev->set_mac_address = vlan_dev_set_mac_address; new_dev->set_multicast_list = vlan_dev_set_multicast_list; new_dev->destructor = free_netdev; new_dev->do_ioctl = vlan_dev_ioctl; @@ -592,6 +590,30 @@ out_free_newdev: return err; } +static void vlan_sync_address(struct net_device *dev, + struct net_device *vlandev) +{ + struct vlan_dev_info *vlan = VLAN_DEV_INFO(vlandev); + + /* May be called without an actual change */ + if (!compare_ether_addr(vlan->real_dev_addr, dev->dev_addr)) + return; + + /* vlan address was different from the old address and is equal to + * the new address */ + if (compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) && + !compare_ether_addr(vlandev->dev_addr, dev->dev_addr)) + dev_unicast_delete(dev, vlandev->dev_addr, ETH_ALEN); + + /* vlan address was equal to the old address and is different from + * the new address */ + if (!compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) && + compare_ether_addr(vlandev->dev_addr, dev->dev_addr)) + dev_unicast_add(dev, vlandev->dev_addr, ETH_ALEN); + + memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN); +} + static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = ptr; @@ -618,6 +640,17 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, } break; + case NETDEV_CHANGEADDR: + /* Adjust unicast filters on underlying device */ + for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { + vlandev = vlan_group_get_device(grp, i); + if (!vlandev) + continue; + + vlan_sync_address(dev, vlandev); + } + break; + case NETDEV_DOWN: /* Put all VLANs for this dev in the down state too. */ for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index fe6bb0f7d275..62ce1c519aab 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -58,7 +58,6 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev); int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev); int vlan_dev_change_mtu(struct net_device *dev, int new_mtu); -int vlan_dev_set_mac_address(struct net_device *dev, void* addr); int vlan_dev_open(struct net_device* dev); int vlan_dev_stop(struct net_device* dev); int vlan_dev_ioctl(struct net_device* dev, struct ifreq *ifr, int cmd); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 95afe387b952..d4a62d1b52b4 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -612,44 +612,6 @@ void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result) *result = VLAN_DEV_INFO(dev)->vlan_id; } -int vlan_dev_set_mac_address(struct net_device *dev, void *addr_struct_p) -{ - struct sockaddr *addr = (struct sockaddr *)(addr_struct_p); - int i; - - if (netif_running(dev)) - return -EBUSY; - - memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); - - printk("%s: Setting MAC address to ", dev->name); - for (i = 0; i < 6; i++) - printk(" %2.2x", dev->dev_addr[i]); - printk(".\n"); - - if (memcmp(VLAN_DEV_INFO(dev)->real_dev->dev_addr, - dev->dev_addr, - dev->addr_len) != 0) { - if (!(VLAN_DEV_INFO(dev)->real_dev->flags & IFF_PROMISC)) { - int flgs = VLAN_DEV_INFO(dev)->real_dev->flags; - - /* Increment our in-use promiscuity counter */ - dev_set_promiscuity(VLAN_DEV_INFO(dev)->real_dev, 1); - - /* Make PROMISC visible to the user. */ - flgs |= IFF_PROMISC; - printk("VLAN (%s): Setting underlying device (%s) to promiscious mode.\n", - dev->name, VLAN_DEV_INFO(dev)->real_dev->name); - dev_change_flags(VLAN_DEV_INFO(dev)->real_dev, flgs); - } - } else { - printk("VLAN (%s): Underlying device (%s) has same MAC, not checking promiscious mode.\n", - dev->name, VLAN_DEV_INFO(dev)->real_dev->name); - } - - return 0; -} - static inline int vlan_dmi_equals(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2) { @@ -736,15 +698,32 @@ static void vlan_flush_mc_list(struct net_device *dev) int vlan_dev_open(struct net_device *dev) { - if (!(VLAN_DEV_INFO(dev)->real_dev->flags & IFF_UP)) + struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); + struct net_device *real_dev = vlan->real_dev; + int err; + + if (!(real_dev->flags & IFF_UP)) return -ENETDOWN; + if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) { + err = dev_unicast_add(real_dev, dev->dev_addr, ETH_ALEN); + if (err < 0) + return err; + } + memcpy(vlan->real_dev_addr, real_dev->dev_addr, ETH_ALEN); + return 0; } int vlan_dev_stop(struct net_device *dev) { + struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; + vlan_flush_mc_list(dev); + + if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) + dev_unicast_delete(real_dev, dev->dev_addr, dev->addr_len); + return 0; } -- cgit v1.2.3 From 2d85cba2b272a5201a60966a65a4f8c0bcc0bb71 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 11 Jul 2007 19:42:13 -0700 Subject: [RTNETLINK]: rtnl_link API simplification All drivers need to unregister their devices in the module unload function. While doing so they must hold the rtnl and atomically unregister the rtnl_link ops as well. This makes the rtnl_link_unregister function that takes the rtnl itself completely useless. Provide default newlink/dellink functions, make __rtnl_link_unregister and rtnl_link_unregister unregister all devices with matching rtnl_link_ops and change the existing users to take advantage of that. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/net/dummy.c | 57 +++------------------------------------------------ drivers/net/ifb.c | 58 ++++++---------------------------------------------- net/8021q/vlan.c | 21 ------------------- net/core/rtnetlink.c | 18 ++++++++++++---- 4 files changed, 23 insertions(+), 131 deletions(-) (limited to 'net') diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index 91126b9ce453..373ff700404f 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -37,11 +37,6 @@ #include #include -struct dummy_priv { - struct net_device *dev; - struct list_head list; -}; - static int numdummies = 1; static int dummy_xmit(struct sk_buff *skb, struct net_device *dev); @@ -89,37 +84,9 @@ static int dummy_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } -static LIST_HEAD(dummies); - -static int dummy_newlink(struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) -{ - struct dummy_priv *priv = netdev_priv(dev); - int err; - - err = register_netdevice(dev); - if (err < 0) - return err; - - priv->dev = dev; - list_add_tail(&priv->list, &dummies); - return 0; -} - -static void dummy_dellink(struct net_device *dev) -{ - struct dummy_priv *priv = netdev_priv(dev); - - list_del(&priv->list); - unregister_netdevice(dev); -} - static struct rtnl_link_ops dummy_link_ops __read_mostly = { .kind = "dummy", - .priv_size = sizeof(struct dummy_priv), .setup = dummy_setup, - .newlink = dummy_newlink, - .dellink = dummy_dellink, }; /* Number of dummy devices to be set up by this module. */ @@ -129,12 +96,9 @@ MODULE_PARM_DESC(numdummies, "Number of dummy pseudo devices"); static int __init dummy_init_one(void) { struct net_device *dev_dummy; - struct dummy_priv *priv; int err; - dev_dummy = alloc_netdev(sizeof(struct dummy_priv), "dummy%d", - dummy_setup); - + dev_dummy = alloc_netdev(0, "dummy%d", dummy_setup); if (!dev_dummy) return -ENOMEM; @@ -146,10 +110,6 @@ static int __init dummy_init_one(void) err = register_netdevice(dev_dummy); if (err < 0) goto err; - - priv = netdev_priv(dev_dummy); - priv->dev = dev_dummy; - list_add_tail(&priv->list, &dummies); return 0; err: @@ -159,7 +119,6 @@ err: static int __init dummy_init_module(void) { - struct dummy_priv *priv, *next; int i, err = 0; rtnl_lock(); @@ -167,11 +126,8 @@ static int __init dummy_init_module(void) for (i = 0; i < numdummies && !err; i++) err = dummy_init_one(); - if (err < 0) { - list_for_each_entry_safe(priv, next, &dummies, list) - dummy_dellink(priv->dev); + if (err < 0) __rtnl_link_unregister(&dummy_link_ops); - } rtnl_unlock(); return err; @@ -179,14 +135,7 @@ static int __init dummy_init_module(void) static void __exit dummy_cleanup_module(void) { - struct dummy_priv *priv, *next; - - rtnl_lock(); - list_for_each_entry_safe(priv, next, &dummies, list) - dummy_dellink(priv->dev); - - __rtnl_link_unregister(&dummy_link_ops); - rtnl_unlock(); + rtnl_link_unregister(&dummy_link_ops); } module_init(dummy_init_module); diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 669ee1a1b284..c8e7c8f6ba3e 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -33,15 +33,12 @@ #include #include #include -#include #include #define TX_TIMEOUT (2*HZ) #define TX_Q_LIMIT 32 struct ifb_private { - struct list_head list; - struct net_device *dev; struct net_device_stats stats; struct tasklet_struct ifb_tasklet; int tasklet_pending; @@ -201,12 +198,6 @@ static struct net_device_stats *ifb_get_stats(struct net_device *dev) return stats; } -static LIST_HEAD(ifbs); - -/* Number of ifb devices to be set up by this module. */ -module_param(numifbs, int, 0); -MODULE_PARM_DESC(numifbs, "Number of ifb devices"); - static int ifb_close(struct net_device *dev) { struct ifb_private *dp = netdev_priv(dev); @@ -230,41 +221,19 @@ static int ifb_open(struct net_device *dev) return 0; } -static int ifb_newlink(struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) -{ - struct ifb_private *priv = netdev_priv(dev); - int err; - - err = register_netdevice(dev); - if (err < 0) - return err; - - priv->dev = dev; - list_add_tail(&priv->list, &ifbs); - return 0; -} - -static void ifb_dellink(struct net_device *dev) -{ - struct ifb_private *priv = netdev_priv(dev); - - list_del(&priv->list); - unregister_netdevice(dev); -} - static struct rtnl_link_ops ifb_link_ops __read_mostly = { .kind = "ifb", .priv_size = sizeof(struct ifb_private), .setup = ifb_setup, - .newlink = ifb_newlink, - .dellink = ifb_dellink, }; +/* Number of ifb devices to be set up by this module. */ +module_param(numifbs, int, 0); +MODULE_PARM_DESC(numifbs, "Number of ifb devices"); + static int __init ifb_init_one(int index) { struct net_device *dev_ifb; - struct ifb_private *priv; int err; dev_ifb = alloc_netdev(sizeof(struct ifb_private), @@ -281,10 +250,6 @@ static int __init ifb_init_one(int index) err = register_netdevice(dev_ifb); if (err < 0) goto err; - - priv = netdev_priv(dev_ifb); - priv->dev = dev_ifb; - list_add_tail(&priv->list, &ifbs); return 0; err: @@ -294,7 +259,6 @@ err: static int __init ifb_init_module(void) { - struct ifb_private *priv, *next; int i, err; rtnl_lock(); @@ -302,11 +266,8 @@ static int __init ifb_init_module(void) for (i = 0; i < numifbs && !err; i++) err = ifb_init_one(i); - if (err) { - list_for_each_entry_safe(priv, next, &ifbs, list) - ifb_dellink(priv->dev); + if (err) __rtnl_link_unregister(&ifb_link_ops); - } rtnl_unlock(); return err; @@ -314,14 +275,7 @@ static int __init ifb_init_module(void) static void __exit ifb_cleanup_module(void) { - struct ifb_private *priv, *next; - - rtnl_lock(); - list_for_each_entry_safe(priv, next, &ifbs, list) - ifb_dellink(priv->dev); - - __rtnl_link_unregister(&ifb_link_ops); - rtnl_unlock(); + rtnl_link_unregister(&ifb_link_ops); } module_init(ifb_init_module); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index b463ba47024d..34c1d0b241ca 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -115,26 +115,6 @@ err1: return err; } -/* Cleanup all vlan devices - * Note: devices that have been registered that but not - * brought up will exist but have no module ref count. - */ -static void __exit vlan_cleanup_devices(void) -{ - struct net_device *dev, *nxt; - - rtnl_lock(); - for_each_netdev_safe(dev, nxt) { - if (dev->priv_flags & IFF_802_1Q_VLAN) { - unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev, - VLAN_DEV_INFO(dev)->vlan_id); - - unregister_netdevice(dev); - } - } - rtnl_unlock(); -} - /* * Module 'remove' entry point. * o delete /proc/net/router directory and static entries. @@ -150,7 +130,6 @@ static void __exit vlan_cleanup_module(void) unregister_netdevice_notifier(&vlan_notifier_block); dev_remove_pack(&vlan_packet_type); - vlan_cleanup_devices(); /* This table must be empty if there are no module * references left. diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 54c17e4cd28f..7b6b16396745 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -270,6 +270,9 @@ static LIST_HEAD(link_ops); */ int __rtnl_link_register(struct rtnl_link_ops *ops) { + if (!ops->dellink) + ops->dellink = unregister_netdevice; + list_add_tail(&ops->list, &link_ops); return 0; } @@ -298,12 +301,16 @@ EXPORT_SYMBOL_GPL(rtnl_link_register); * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. * @ops: struct rtnl_link_ops * to unregister * - * The caller must hold the rtnl_mutex. This function should be used - * by drivers that unregister devices during module unloading. It must - * be called after unregistering the devices. + * The caller must hold the rtnl_mutex. */ void __rtnl_link_unregister(struct rtnl_link_ops *ops) { + struct net_device *dev, *n; + + for_each_netdev_safe(dev, n) { + if (dev->rtnl_link_ops == ops) + ops->dellink(dev); + } list_del(&ops->list); } @@ -1067,7 +1074,10 @@ replay: if (tb[IFLA_LINKMODE]) dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); - err = ops->newlink(dev, tb, data); + if (ops->newlink) + err = ops->newlink(dev, tb, data); + else + err = register_netdevice(dev); err_free: if (err < 0) free_netdev(dev); -- cgit v1.2.3 From 0e06877c6fdbc67b1132be895f995acd1ff30135 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 11 Jul 2007 19:42:31 -0700 Subject: [RTNETLINK]: rtnl_link: allow specifying initial device address Drivers need to validate the initial addresses in their netlink attribute validation function or manually reject them if they can't support this. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/net/dummy.c | 12 ++++++++++++ drivers/net/ifb.c | 12 ++++++++++++ net/8021q/vlan.c | 8 ++++++-- net/8021q/vlan_netlink.c | 7 +++++++ net/core/rtnetlink.c | 9 +++++++-- 5 files changed, 44 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index 373ff700404f..756a6bcb038d 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -84,9 +84,21 @@ static int dummy_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } +static int dummy_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + return 0; +} + static struct rtnl_link_ops dummy_link_ops __read_mostly = { .kind = "dummy", .setup = dummy_setup, + .validate = dummy_validate, }; /* Number of dummy devices to be set up by this module. */ diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index c8e7c8f6ba3e..f5c3598e59af 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -221,10 +221,22 @@ static int ifb_open(struct net_device *dev) return 0; } +static int ifb_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + return 0; +} + static struct rtnl_link_ops ifb_link_ops __read_mostly = { .kind = "ifb", .priv_size = sizeof(struct ifb_private), .setup = ifb_setup, + .validate = ifb_validate, }; /* Number of ifb devices to be set up by this module. */ diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 34c1d0b241ca..abb9900edb3f 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -324,8 +324,10 @@ static int vlan_dev_init(struct net_device *dev) (1<<__LINK_STATE_DORMANT))) | (1<<__LINK_STATE_PRESENT); - memcpy(dev->broadcast, real_dev->broadcast, real_dev->addr_len); - memcpy(dev->dev_addr, real_dev->dev_addr, real_dev->addr_len); + if (is_zero_ether_addr(dev->dev_addr)) + memcpy(dev->dev_addr, real_dev->dev_addr, dev->addr_len); + if (is_zero_ether_addr(dev->broadcast)) + memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len); if (real_dev->features & NETIF_F_HW_VLAN_TX) { dev->hard_header = real_dev->hard_header; @@ -373,6 +375,8 @@ void vlan_setup(struct net_device *new_dev) new_dev->set_multicast_list = vlan_dev_set_multicast_list; new_dev->destructor = free_netdev; new_dev->do_ioctl = vlan_dev_ioctl; + + memset(new_dev->broadcast, 0, sizeof(ETH_ALEN)); } static void vlan_transfer_operstate(const struct net_device *dev, struct net_device *vlandev) diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 844c7e43d0fa..6cdd1e015e2d 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -41,6 +41,13 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) u16 id; int err; + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + if (!data) return -EINVAL; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7b6b16396745..864cbdf31ed7 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1032,8 +1032,7 @@ replay: if (ifm->ifi_index || ifm->ifi_flags || ifm->ifi_change) return -EOPNOTSUPP; - if (tb[IFLA_ADDRESS] || tb[IFLA_BROADCAST] || tb[IFLA_MAP] || - tb[IFLA_MASTER] || tb[IFLA_PROTINFO]) + if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO]) return -EOPNOTSUPP; if (!ops) { @@ -1065,6 +1064,12 @@ replay: if (tb[IFLA_MTU]) dev->mtu = nla_get_u32(tb[IFLA_MTU]); + if (tb[IFLA_ADDRESS]) + memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), + nla_len(tb[IFLA_ADDRESS])); + if (tb[IFLA_BROADCAST]) + memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]), + nla_len(tb[IFLA_BROADCAST])); if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); if (tb[IFLA_WEIGHT]) -- cgit v1.2.3 From 662ad4f8efd3ba2ed710d36003f968b500e6f123 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 11 Jul 2007 19:43:52 -0700 Subject: [TCP]: tcp probe wraparound handling and other changes Switch from formatting messages in probe routine and copying with kfifo, to using a small circular queue of information and formatting on read. This avoids wraparound issues with kfifo, and saves one copy. Also make sure to state correct license, rather than copying off some other driver I started with. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_probe.c | 194 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 124 insertions(+), 70 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index d9323dfff826..86624fabc4bf 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -6,8 +6,7 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * the Free Software Foundation; either version 2 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -25,23 +24,22 @@ #include #include #include -#include #include #include -#include #include MODULE_AUTHOR("Stephen Hemminger "); MODULE_DESCRIPTION("TCP cwnd snooper"); MODULE_LICENSE("GPL"); +MODULE_VERSION("1.1"); static int port __read_mostly = 0; MODULE_PARM_DESC(port, "Port to match (0=all)"); module_param(port, int, 0); -static int bufsize __read_mostly = 64*1024; -MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); +static int bufsize __read_mostly = 4096; +MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)"); module_param(bufsize, int, 0); static int full __read_mostly; @@ -50,39 +48,38 @@ module_param(full, int, 0); static const char procname[] = "tcpprobe"; -struct { - struct kfifo *fifo; +struct tcp_log { + ktime_t tstamp; + __be32 saddr, daddr; + __be16 sport, dport; + u16 length; + u32 snd_nxt; + u32 snd_una; + u32 snd_wnd; + u32 snd_cwnd; + u32 ssthresh; + u32 srtt; +}; + +static struct { spinlock_t lock; wait_queue_head_t wait; ktime_t start; u32 lastcwnd; -} tcpw; -/* - * Print to log with timestamps. - * FIXME: causes an extra copy - */ -static void printl(const char *fmt, ...) - __attribute__ ((format (printf, 1, 2))); + unsigned long head, tail; + struct tcp_log *log; +} tcp_probe; + -static void printl(const char *fmt, ...) +static inline int tcp_probe_used(void) { - va_list args; - int len; - struct timespec tv; - char tbuf[256]; - - va_start(args, fmt); - /* want monotonic time since start of tcp_probe */ - tv = ktime_to_timespec(ktime_sub(ktime_get(), tcpw.start)); - - len = sprintf(tbuf, "%lu.%09lu ", - (unsigned long) tv.tv_sec, (unsigned long) tv.tv_nsec); - len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args); - va_end(args); - - kfifo_put(tcpw.fifo, tbuf, len); - wake_up(&tcpw.wait); + return (tcp_probe.head - tcp_probe.tail) % bufsize; +} + +static inline int tcp_probe_avail(void) +{ + return bufsize - tcp_probe_used(); } /* @@ -97,63 +94,117 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, /* Only update if port matches */ if ((port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port) - && (full || tp->snd_cwnd != tcpw.lastcwnd)) { - printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %#x %#x %u %u %u %u\n", - NIPQUAD(inet->saddr), ntohs(inet->sport), - NIPQUAD(inet->daddr), ntohs(inet->dport), - skb->len, tp->snd_nxt, tp->snd_una, - tp->snd_cwnd, tcp_current_ssthresh(sk), - tp->snd_wnd, tp->srtt >> 3); - tcpw.lastcwnd = tp->snd_cwnd; + && (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { + + spin_lock(&tcp_probe.lock); + /* If log fills, just silently drop */ + if (tcp_probe_avail() > 1) { + struct tcp_log *p = tcp_probe.log + tcp_probe.head; + + p->tstamp = ktime_get(); + p->saddr = inet->saddr; + p->sport = inet->sport; + p->daddr = inet->daddr; + p->dport = inet->dport; + p->length = skb->len; + p->snd_nxt = tp->snd_nxt; + p->snd_una = tp->snd_una; + p->snd_cwnd = tp->snd_cwnd; + p->snd_wnd = tp->snd_wnd; + p->srtt = tp->srtt >> 3; + + tcp_probe.head = (tcp_probe.head + 1) % bufsize; + } + tcp_probe.lastcwnd = tp->snd_cwnd; + spin_unlock(&tcp_probe.lock); + + wake_up(&tcp_probe.wait); } jprobe_return(); return 0; } -static struct jprobe tcp_probe = { +static struct jprobe tcp_jprobe = { .kp = { .symbol_name = "tcp_rcv_established", }, .entry = JPROBE_ENTRY(jtcp_rcv_established), }; - static int tcpprobe_open(struct inode * inode, struct file * file) { - kfifo_reset(tcpw.fifo); - tcpw.start = ktime_get(); + /* Reset (empty) log */ + spin_lock_bh(&tcp_probe.lock); + tcp_probe.head = tcp_probe.tail = 0; + tcp_probe.start = ktime_get(); + spin_unlock_bh(&tcp_probe.lock); + return 0; } +static int tcpprobe_sprint(char *tbuf, int n) +{ + const struct tcp_log *p + = tcp_probe.log + tcp_probe.tail % bufsize; + struct timespec tv + = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); + + return snprintf(tbuf, n, + "%lu.%09lu %d.%d.%d.%d:%u %d.%d.%d.%d:%u" + " %d %#x %#x %u %u %u %u\n", + (unsigned long) tv.tv_sec, + (unsigned long) tv.tv_nsec, + NIPQUAD(p->saddr), ntohs(p->sport), + NIPQUAD(p->daddr), ntohs(p->dport), + p->length, p->snd_nxt, p->snd_una, + p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt); +} + static ssize_t tcpprobe_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { int error = 0, cnt = 0; - unsigned char *tbuf; if (!buf || len < 0) return -EINVAL; - if (len == 0) - return 0; + while (cnt < len) { + char tbuf[128]; + int width; + + /* Wait for data in buffer */ + error = wait_event_interruptible(tcp_probe.wait, + tcp_probe_used() > 0); + if (error) + break; - tbuf = vmalloc(len); - if (!tbuf) - return -ENOMEM; + spin_lock_bh(&tcp_probe.lock); + if (tcp_probe.head == tcp_probe.tail) { + /* multiple readers race? */ + spin_unlock_bh(&tcp_probe.lock); + continue; + } - error = wait_event_interruptible(tcpw.wait, - __kfifo_len(tcpw.fifo) != 0); - if (error) - goto out_free; + width = tcpprobe_sprint(tbuf, sizeof(tbuf)); - cnt = kfifo_get(tcpw.fifo, tbuf, len); - error = copy_to_user(buf, tbuf, cnt); + if (width < len) + tcp_probe.tail = (tcp_probe.tail + 1) % bufsize; -out_free: - vfree(tbuf); + spin_unlock_bh(&tcp_probe.lock); + + /* if record greater than space available + return partial buffer (so far) */ + if (width >= len) + break; + + error = copy_to_user(buf + cnt, tbuf, width); + if (error) + break; + cnt += width; + } - return error ? error : cnt; + return cnt == 0 ? error : cnt; } static const struct file_operations tcpprobe_fops = { @@ -166,34 +217,37 @@ static __init int tcpprobe_init(void) { int ret = -ENOMEM; - init_waitqueue_head(&tcpw.wait); - spin_lock_init(&tcpw.lock); - tcpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &tcpw.lock); - if (IS_ERR(tcpw.fifo)) - return PTR_ERR(tcpw.fifo); + init_waitqueue_head(&tcp_probe.wait); + spin_lock_init(&tcp_probe.lock); + + if (bufsize < 0) + return -EINVAL; + + tcp_probe.log = kcalloc(sizeof(struct tcp_log), bufsize, GFP_KERNEL); + if (!tcp_probe.log) + goto err0; if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops)) goto err0; - ret = register_jprobe(&tcp_probe); + ret = register_jprobe(&tcp_jprobe); if (ret) goto err1; - pr_info("TCP watch registered (port=%d)\n", port); + pr_info("TCP probe registered (port=%d)\n", port); return 0; err1: proc_net_remove(procname); err0: - kfifo_free(tcpw.fifo); + kfree(tcp_probe.log); return ret; } module_init(tcpprobe_init); static __exit void tcpprobe_exit(void) { - kfifo_free(tcpw.fifo); proc_net_remove(procname); - unregister_jprobe(&tcp_probe); - + unregister_jprobe(&tcp_jprobe); + kfree(tcp_probe.log); } module_exit(tcpprobe_exit); -- cgit v1.2.3 From db3d99c090e0cdb34b1274767e062bfddbb384bc Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 11 Jul 2007 19:46:26 -0700 Subject: [NET_SCHED]: ematch: module autoloading Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/pkt_cls.h | 17 +++++++---------- include/net/pkt_cls.h | 2 ++ net/sched/em_cmp.c | 1 + net/sched/em_meta.c | 2 ++ net/sched/em_nbyte.c | 2 ++ net/sched/em_text.c | 2 ++ net/sched/em_u32.c | 2 ++ net/sched/ematch.c | 13 +++++++++++++ 8 files changed, 31 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index c3f01b3085a4..30b8571e6b34 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -403,16 +403,13 @@ enum * 1..32767 Reserved for ematches inside kernel tree * 32768..65535 Free to use, not reliable */ -enum -{ - TCF_EM_CONTAINER, - TCF_EM_CMP, - TCF_EM_NBYTE, - TCF_EM_U32, - TCF_EM_META, - TCF_EM_TEXT, - __TCF_EM_MAX -}; +#define TCF_EM_CONTAINER 0 +#define TCF_EM_CMP 1 +#define TCF_EM_NBYTE 2 +#define TCF_EM_U32 3 +#define TCF_EM_META 4 +#define TCF_EM_TEXT 5 +#define TCF_EM_MAX 5 enum { diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 4129df708079..6c29920cbe29 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -306,6 +306,8 @@ static inline int tcf_em_tree_match(struct sk_buff *skb, return 1; } +#define MODULE_ALIAS_TCF_EMATCH(kind) MODULE_ALIAS("ematch-kind-" __stringify(kind)) + #else /* CONFIG_NET_EMATCH */ struct tcf_ematch_tree diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c index 8d6dacd81900..cc49c932641d 100644 --- a/net/sched/em_cmp.c +++ b/net/sched/em_cmp.c @@ -98,3 +98,4 @@ MODULE_LICENSE("GPL"); module_init(init_em_cmp); module_exit(exit_em_cmp); +MODULE_ALIAS_TCF_EMATCH(TCF_EM_CMP); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 60acf8cdb27b..650f09c8bd6a 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -848,3 +848,5 @@ MODULE_LICENSE("GPL"); module_init(init_em_meta); module_exit(exit_em_meta); + +MODULE_ALIAS_TCF_EMATCH(TCF_EM_META); diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index b4b36efce292..370a1b2ea317 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c @@ -76,3 +76,5 @@ MODULE_LICENSE("GPL"); module_init(init_em_nbyte); module_exit(exit_em_nbyte); + +MODULE_ALIAS_TCF_EMATCH(TCF_EM_NBYTE); diff --git a/net/sched/em_text.c b/net/sched/em_text.c index e8f46169449d..d5cd86efb7d0 100644 --- a/net/sched/em_text.c +++ b/net/sched/em_text.c @@ -150,3 +150,5 @@ MODULE_LICENSE("GPL"); module_init(init_em_text); module_exit(exit_em_text); + +MODULE_ALIAS_TCF_EMATCH(TCF_EM_TEXT); diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c index 0a2a7fe08de3..112796e4a7c4 100644 --- a/net/sched/em_u32.c +++ b/net/sched/em_u32.c @@ -60,3 +60,5 @@ MODULE_LICENSE("GPL"); module_init(init_em_u32); module_exit(exit_em_u32); + +MODULE_ALIAS_TCF_EMATCH(TCF_EM_U32); diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 24837391640d..f3a104e323bd 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -222,6 +222,19 @@ static int tcf_em_validate(struct tcf_proto *tp, if (em->ops == NULL) { err = -ENOENT; +#ifdef CONFIG_KMOD + __rtnl_unlock(); + request_module("ematch-kind-%u", em_hdr->kind); + rtnl_lock(); + em->ops = tcf_em_lookup(em_hdr->kind); + if (em->ops) { + /* We dropped the RTNL mutex in order to + * perform the module load. Tell the caller + * to replay the request. */ + module_put(em->ops->owner); + err = -EAGAIN; + } +#endif goto errout; } -- cgit v1.2.3 From 179f831bc33104d14deb54a52b7a8b43433f8ccc Mon Sep 17 00:00:00 2001 From: Andy Green Date: Tue, 10 Jul 2007 19:29:38 +0200 Subject: [PATCH] cfg80211: Radiotap parser Generic code to walk through the fields in a radiotap header, accounting for nasties like extended "field present" bitfields and alignment rules Signed-off-by: Andy Green Signed-off-by: Jiri Benc Signed-off-by: John W. Linville --- Documentation/networking/radiotap-headers.txt | 65 +++++++ include/net/cfg80211.h | 38 ++++ net/wireless/Makefile | 2 +- net/wireless/radiotap.c | 257 ++++++++++++++++++++++++++ 4 files changed, 361 insertions(+), 1 deletion(-) create mode 100644 net/wireless/radiotap.c (limited to 'net') diff --git a/Documentation/networking/radiotap-headers.txt b/Documentation/networking/radiotap-headers.txt index e29e027d9be3..953331c7984f 100644 --- a/Documentation/networking/radiotap-headers.txt +++ b/Documentation/networking/radiotap-headers.txt @@ -84,4 +84,69 @@ Example valid radiotap header 0x01 //<-- antenna +Using the Radiotap Parser +------------------------- + +If you are having to parse a radiotap struct, you can radically simplify the +job by using the radiotap parser that lives in net/wireless/radiotap.c and has +its prototypes available in include/net/cfg80211.h. You use it like this: + +#include + +/* buf points to the start of the radiotap header part */ + +int MyFunction(u8 * buf, int buflen) +{ + int pkt_rate_100kHz = 0, antenna = 0, pwr = 0; + struct ieee80211_radiotap_iterator iterator; + int ret = ieee80211_radiotap_iterator_init(&iterator, buf, buflen); + + while (!ret) { + + ret = ieee80211_radiotap_iterator_next(&iterator); + + if (ret) + continue; + + /* see if this argument is something we can use */ + + switch (iterator.this_arg_index) { + /* + * You must take care when dereferencing iterator.this_arg + * for multibyte types... the pointer is not aligned. Use + * get_unaligned((type *)iterator.this_arg) to dereference + * iterator.this_arg for type "type" safely on all arches. + */ + case IEEE80211_RADIOTAP_RATE: + /* radiotap "rate" u8 is in + * 500kbps units, eg, 0x02=1Mbps + */ + pkt_rate_100kHz = (*iterator.this_arg) * 5; + break; + + case IEEE80211_RADIOTAP_ANTENNA: + /* radiotap uses 0 for 1st ant */ + antenna = *iterator.this_arg); + break; + + case IEEE80211_RADIOTAP_DBM_TX_POWER: + pwr = *iterator.this_arg; + break; + + default: + break; + } + } /* while more rt headers */ + + if (ret != -ENOENT) + return TXRX_DROP; + + /* discard the radiotap header part */ + buf += iterator.max_length; + buflen -= iterator.max_length; + + ... + +} + Andy Green diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 88171f8ce58a..7edaef6b29d6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -11,6 +11,44 @@ * Copyright 2006 Johannes Berg */ + +/* Radiotap header iteration + * implemented in net/wireless/radiotap.c + * docs in Documentation/networking/radiotap-headers.txt + */ +/** + * struct ieee80211_radiotap_iterator - tracks walk thru present radiotap args + * @rtheader: pointer to the radiotap header we are walking through + * @max_length: length of radiotap header in cpu byte ordering + * @this_arg_index: IEEE80211_RADIOTAP_... index of current arg + * @this_arg: pointer to current radiotap arg + * @arg_index: internal next argument index + * @arg: internal next argument pointer + * @next_bitmap: internal pointer to next present u32 + * @bitmap_shifter: internal shifter for curr u32 bitmap, b0 set == arg present + */ + +struct ieee80211_radiotap_iterator { + struct ieee80211_radiotap_header *rtheader; + int max_length; + int this_arg_index; + u8 *this_arg; + + int arg_index; + u8 *arg; + __le32 *next_bitmap; + u32 bitmap_shifter; +}; + +extern int ieee80211_radiotap_iterator_init( + struct ieee80211_radiotap_iterator *iterator, + struct ieee80211_radiotap_header *radiotap_header, + int max_length); + +extern int ieee80211_radiotap_iterator_next( + struct ieee80211_radiotap_iterator *iterator); + + /* from net/wireless.h */ struct wiphy; diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 3a96ae60271c..092116e390b6 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_WIRELESS_EXT) += wext.o obj-$(CONFIG_CFG80211) += cfg80211.o -cfg80211-y += core.o sysfs.o +cfg80211-y += core.o sysfs.o radiotap.o diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c new file mode 100644 index 000000000000..68c11d099917 --- /dev/null +++ b/net/wireless/radiotap.c @@ -0,0 +1,257 @@ +/* + * Radiotap parser + * + * Copyright 2007 Andy Green + */ + +#include +#include +#include + +/* function prototypes and related defs are in include/net/cfg80211.h */ + +/** + * ieee80211_radiotap_iterator_init - radiotap parser iterator initialization + * @iterator: radiotap_iterator to initialize + * @radiotap_header: radiotap header to parse + * @max_length: total length we can parse into (eg, whole packet length) + * + * Returns: 0 or a negative error code if there is a problem. + * + * This function initializes an opaque iterator struct which can then + * be passed to ieee80211_radiotap_iterator_next() to visit every radiotap + * argument which is present in the header. It knows about extended + * present headers and handles them. + * + * How to use: + * call __ieee80211_radiotap_iterator_init() to init a semi-opaque iterator + * struct ieee80211_radiotap_iterator (no need to init the struct beforehand) + * checking for a good 0 return code. Then loop calling + * __ieee80211_radiotap_iterator_next()... it returns either 0, + * -ENOENT if there are no more args to parse, or -EINVAL if there is a problem. + * The iterator's @this_arg member points to the start of the argument + * associated with the current argument index that is present, which can be + * found in the iterator's @this_arg_index member. This arg index corresponds + * to the IEEE80211_RADIOTAP_... defines. + * + * Radiotap header length: + * You can find the CPU-endian total radiotap header length in + * iterator->max_length after executing ieee80211_radiotap_iterator_init() + * successfully. + * + * Alignment Gotcha: + * You must take care when dereferencing iterator.this_arg + * for multibyte types... the pointer is not aligned. Use + * get_unaligned((type *)iterator.this_arg) to dereference + * iterator.this_arg for type "type" safely on all arches. + * + * Example code: + * See Documentation/networking/radiotap-headers.txt + */ + +int ieee80211_radiotap_iterator_init( + struct ieee80211_radiotap_iterator *iterator, + struct ieee80211_radiotap_header *radiotap_header, + int max_length) +{ + /* Linux only supports version 0 radiotap format */ + if (radiotap_header->it_version) + return -EINVAL; + + /* sanity check for allowed length and radiotap length field */ + if (max_length < le16_to_cpu(get_unaligned(&radiotap_header->it_len))) + return -EINVAL; + + iterator->rtheader = radiotap_header; + iterator->max_length = le16_to_cpu(get_unaligned( + &radiotap_header->it_len)); + iterator->arg_index = 0; + iterator->bitmap_shifter = le32_to_cpu(get_unaligned( + &radiotap_header->it_present)); + iterator->arg = (u8 *)radiotap_header + sizeof(*radiotap_header); + iterator->this_arg = NULL; + + /* find payload start allowing for extended bitmap(s) */ + + if (unlikely(iterator->bitmap_shifter & (1<arg)) & + (1<arg += sizeof(u32); + + /* + * check for insanity where the present bitmaps + * keep claiming to extend up to or even beyond the + * stated radiotap header length + */ + + if (((ulong)iterator->arg - + (ulong)iterator->rtheader) > iterator->max_length) + return -EINVAL; + } + + iterator->arg += sizeof(u32); + + /* + * no need to check again for blowing past stated radiotap + * header length, because ieee80211_radiotap_iterator_next + * checks it before it is dereferenced + */ + } + + /* we are all initialized happily */ + + return 0; +} +EXPORT_SYMBOL(ieee80211_radiotap_iterator_init); + + +/** + * ieee80211_radiotap_iterator_next - return next radiotap parser iterator arg + * @iterator: radiotap_iterator to move to next arg (if any) + * + * Returns: 0 if there is an argument to handle, + * -ENOENT if there are no more args or -EINVAL + * if there is something else wrong. + * + * This function provides the next radiotap arg index (IEEE80211_RADIOTAP_*) + * in @this_arg_index and sets @this_arg to point to the + * payload for the field. It takes care of alignment handling and extended + * present fields. @this_arg can be changed by the caller (eg, + * incremented to move inside a compound argument like + * IEEE80211_RADIOTAP_CHANNEL). The args pointed to are in + * little-endian format whatever the endianess of your CPU. + * + * Alignment Gotcha: + * You must take care when dereferencing iterator.this_arg + * for multibyte types... the pointer is not aligned. Use + * get_unaligned((type *)iterator.this_arg) to dereference + * iterator.this_arg for type "type" safely on all arches. + */ + +int ieee80211_radiotap_iterator_next( + struct ieee80211_radiotap_iterator *iterator) +{ + + /* + * small length lookup table for all radiotap types we heard of + * starting from b0 in the bitmap, so we can walk the payload + * area of the radiotap header + * + * There is a requirement to pad args, so that args + * of a given length must begin at a boundary of that length + * -- but note that compound args are allowed (eg, 2 x u16 + * for IEEE80211_RADIOTAP_CHANNEL) so total arg length is not + * a reliable indicator of alignment requirement. + * + * upper nybble: content alignment for arg + * lower nybble: content length for arg + */ + + static const u8 rt_sizes[] = { + [IEEE80211_RADIOTAP_TSFT] = 0x88, + [IEEE80211_RADIOTAP_FLAGS] = 0x11, + [IEEE80211_RADIOTAP_RATE] = 0x11, + [IEEE80211_RADIOTAP_CHANNEL] = 0x24, + [IEEE80211_RADIOTAP_FHSS] = 0x22, + [IEEE80211_RADIOTAP_DBM_ANTSIGNAL] = 0x11, + [IEEE80211_RADIOTAP_DBM_ANTNOISE] = 0x11, + [IEEE80211_RADIOTAP_LOCK_QUALITY] = 0x22, + [IEEE80211_RADIOTAP_TX_ATTENUATION] = 0x22, + [IEEE80211_RADIOTAP_DB_TX_ATTENUATION] = 0x22, + [IEEE80211_RADIOTAP_DBM_TX_POWER] = 0x11, + [IEEE80211_RADIOTAP_ANTENNA] = 0x11, + [IEEE80211_RADIOTAP_DB_ANTSIGNAL] = 0x11, + [IEEE80211_RADIOTAP_DB_ANTNOISE] = 0x11 + /* + * add more here as they are defined in + * include/net/ieee80211_radiotap.h + */ + }; + + /* + * for every radiotap entry we can at + * least skip (by knowing the length)... + */ + + while (iterator->arg_index < sizeof(rt_sizes)) { + int hit = 0; + int pad; + + if (!(iterator->bitmap_shifter & 1)) + goto next_entry; /* arg not present */ + + /* + * arg is present, account for alignment padding + * 8-bit args can be at any alignment + * 16-bit args must start on 16-bit boundary + * 32-bit args must start on 32-bit boundary + * 64-bit args must start on 64-bit boundary + * + * note that total arg size can differ from alignment of + * elements inside arg, so we use upper nybble of length + * table to base alignment on + * + * also note: these alignments are ** relative to the + * start of the radiotap header **. There is no guarantee + * that the radiotap header itself is aligned on any + * kind of boundary. + * + * the above is why get_unaligned() is used to dereference + * multibyte elements from the radiotap area + */ + + pad = (((ulong)iterator->arg) - + ((ulong)iterator->rtheader)) & + ((rt_sizes[iterator->arg_index] >> 4) - 1); + + if (pad) + iterator->arg += + (rt_sizes[iterator->arg_index] >> 4) - pad; + + /* + * this is what we will return to user, but we need to + * move on first so next call has something fresh to test + */ + iterator->this_arg_index = iterator->arg_index; + iterator->this_arg = iterator->arg; + hit = 1; + + /* internally move on the size of this arg */ + iterator->arg += rt_sizes[iterator->arg_index] & 0x0f; + + /* + * check for insanity where we are given a bitmap that + * claims to have more arg content than the length of the + * radiotap section. We will normally end up equalling this + * max_length on the last arg, never exceeding it. + */ + + if (((ulong)iterator->arg - (ulong)iterator->rtheader) > + iterator->max_length) + return -EINVAL; + + next_entry: + iterator->arg_index++; + if (unlikely((iterator->arg_index & 31) == 0)) { + /* completed current u32 bitmap */ + if (iterator->bitmap_shifter & 1) { + /* b31 was set, there is more */ + /* move to next u32 bitmap */ + iterator->bitmap_shifter = le32_to_cpu( + get_unaligned(iterator->next_bitmap)); + iterator->next_bitmap++; + } else + /* no more bitmaps: end */ + iterator->arg_index = sizeof(rt_sizes); + } else /* just try the next bit */ + iterator->bitmap_shifter >>= 1; + + /* if we found a valid arg earlier, return it now */ + if (hit) + return 0; + } + + /* we don't know how to handle any more args, we're done */ + return -ENOENT; +} +EXPORT_SYMBOL(ieee80211_radiotap_iterator_next); -- cgit v1.2.3 From e4c967c6d88ca94365dd8e2a7bbd22eedb8d7ae7 Mon Sep 17 00:00:00 2001 From: Andy Green Date: Tue, 10 Jul 2007 19:32:07 +0200 Subject: [PATCH] mac80211: Monitor mode radiotap-based packet injection Signed-off-by: Andy Green Signed-off-by: Jiri Benc Signed-off-by: John W. Linville --- net/mac80211/ieee80211.c | 234 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 224 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index 4e84f24fd439..8b57eaa2a271 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "ieee80211_common.h" #include "ieee80211_i.h" @@ -1118,7 +1119,138 @@ ieee80211_tx_h_ps_buf(struct ieee80211_txrx_data *tx) } -static void inline +/* + * deal with packet injection down monitor interface + * with Radiotap Header -- only called for monitor mode interface + */ + +static ieee80211_txrx_result +__ieee80211_parse_tx_radiotap( + struct ieee80211_txrx_data *tx, + struct sk_buff *skb, struct ieee80211_tx_control *control) +{ + /* + * this is the moment to interpret and discard the radiotap header that + * must be at the start of the packet injected in Monitor mode + * + * Need to take some care with endian-ness since radiotap + * args are little-endian + */ + + struct ieee80211_radiotap_iterator iterator; + struct ieee80211_radiotap_header *rthdr = + (struct ieee80211_radiotap_header *) skb->data; + struct ieee80211_hw_mode *mode = tx->local->hw.conf.mode; + int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len); + + /* + * default control situation for all injected packets + * FIXME: this does not suit all usage cases, expand to allow control + */ + + control->retry_limit = 1; /* no retry */ + control->key_idx = -1; /* no encryption key */ + control->flags &= ~(IEEE80211_TXCTL_USE_RTS_CTS | + IEEE80211_TXCTL_USE_CTS_PROTECT); + control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT | + IEEE80211_TXCTL_NO_ACK; + control->antenna_sel_tx = 0; /* default to default antenna */ + + /* + * for every radiotap entry that is present + * (ieee80211_radiotap_iterator_next returns -ENOENT when no more + * entries present, or -EINVAL on error) + */ + + while (!ret) { + int i, target_rate; + + ret = ieee80211_radiotap_iterator_next(&iterator); + + if (ret) + continue; + + /* see if this argument is something we can use */ + switch (iterator.this_arg_index) { + /* + * You must take care when dereferencing iterator.this_arg + * for multibyte types... the pointer is not aligned. Use + * get_unaligned((type *)iterator.this_arg) to dereference + * iterator.this_arg for type "type" safely on all arches. + */ + case IEEE80211_RADIOTAP_RATE: + /* + * radiotap rate u8 is in 500kbps units eg, 0x02=1Mbps + * ieee80211 rate int is in 100kbps units eg, 0x0a=1Mbps + */ + target_rate = (*iterator.this_arg) * 5; + for (i = 0; i < mode->num_rates; i++) { + struct ieee80211_rate *r = &mode->rates[i]; + + if (r->rate > target_rate) + continue; + + control->rate = r; + + if (r->flags & IEEE80211_RATE_PREAMBLE2) + control->tx_rate = r->val2; + else + control->tx_rate = r->val; + + /* end on exact match */ + if (r->rate == target_rate) + i = mode->num_rates; + } + break; + + case IEEE80211_RADIOTAP_ANTENNA: + /* + * radiotap uses 0 for 1st ant, mac80211 is 1 for + * 1st ant + */ + control->antenna_sel_tx = (*iterator.this_arg) + 1; + break; + + case IEEE80211_RADIOTAP_DBM_TX_POWER: + control->power_level = *iterator.this_arg; + break; + + case IEEE80211_RADIOTAP_FLAGS: + if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FCS) { + /* + * this indicates that the skb we have been + * handed has the 32-bit FCS CRC at the end... + * we should react to that by snipping it off + * because it will be recomputed and added + * on transmission + */ + if (skb->len < (iterator.max_length + FCS_LEN)) + return TXRX_DROP; + + skb_trim(skb, skb->len - FCS_LEN); + } + break; + + default: + break; + } + } + + if (ret != -ENOENT) /* ie, if we didn't simply run out of fields */ + return TXRX_DROP; + + /* + * remove the radiotap header + * iterator->max_length was sanity-checked against + * skb->len by iterator init + */ + skb_pull(skb, iterator.max_length); + + return TXRX_CONTINUE; +} + + +static ieee80211_txrx_result inline __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, struct sk_buff *skb, struct net_device *dev, @@ -1126,6 +1258,9 @@ __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_sub_if_data *sdata; + ieee80211_txrx_result res = TXRX_CONTINUE; + int hdrlen; memset(tx, 0, sizeof(*tx)); @@ -1135,7 +1270,32 @@ __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, tx->sdata = IEEE80211_DEV_TO_SUB_IF(dev); tx->sta = sta_info_get(local, hdr->addr1); tx->fc = le16_to_cpu(hdr->frame_control); + + /* + * set defaults for things that can be set by + * injected radiotap headers + */ control->power_level = local->hw.conf.power_level; + control->antenna_sel_tx = local->hw.conf.antenna_sel_tx; + if (local->sta_antenna_sel != STA_ANTENNA_SEL_AUTO && tx->sta) + control->antenna_sel_tx = tx->sta->antenna_sel_tx; + + /* process and remove the injection radiotap header */ + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (unlikely(sdata->type == IEEE80211_IF_TYPE_MNTR)) { + if (__ieee80211_parse_tx_radiotap(tx, skb, control) == + TXRX_DROP) { + return TXRX_DROP; + } + /* + * we removed the radiotap header after this point, + * we filled control with what we could use + * set to the actual ieee header now + */ + hdr = (struct ieee80211_hdr *) skb->data; + res = TXRX_QUEUED; /* indication it was monitor packet */ + } + tx->u.tx.control = control; tx->u.tx.unicast = !is_multicast_ether_addr(hdr->addr1); if (is_multicast_ether_addr(hdr->addr1)) @@ -1152,9 +1312,6 @@ __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK; tx->sta->clear_dst_mask = 0; } - control->antenna_sel_tx = local->hw.conf.antenna_sel_tx; - if (local->sta_antenna_sel != STA_ANTENNA_SEL_AUTO && tx->sta) - control->antenna_sel_tx = tx->sta->antenna_sel_tx; hdrlen = ieee80211_get_hdrlen(tx->fc); if (skb->len > hdrlen + sizeof(rfc1042_header) + 2) { u8 *pos = &skb->data[hdrlen + sizeof(rfc1042_header)]; @@ -1162,6 +1319,7 @@ __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, } control->flags |= IEEE80211_TXCTL_FIRST_FRAGMENT; + return res; } static int inline is_ieee80211_device(struct net_device *dev, @@ -1274,7 +1432,7 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb, struct sta_info *sta; ieee80211_tx_handler *handler; struct ieee80211_txrx_data tx; - ieee80211_txrx_result res = TXRX_DROP; + ieee80211_txrx_result res = TXRX_DROP, res_prepare; int ret, i; WARN_ON(__ieee80211_queue_pending(local, control->queue)); @@ -1284,15 +1442,26 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb, return 0; } - __ieee80211_tx_prepare(&tx, skb, dev, control); + res_prepare = __ieee80211_tx_prepare(&tx, skb, dev, control); + + if (res_prepare == TXRX_DROP) { + dev_kfree_skb(skb); + return 0; + } + sta = tx.sta; tx.u.tx.mgmt_interface = mgmt; tx.u.tx.mode = local->hw.conf.mode; - for (handler = local->tx_handlers; *handler != NULL; handler++) { - res = (*handler)(&tx); - if (res != TXRX_CONTINUE) - break; + if (res_prepare == TXRX_QUEUED) { /* if it was an injected packet */ + res = TXRX_CONTINUE; + } else { + for (handler = local->tx_handlers; *handler != NULL; + handler++) { + res = (*handler)(&tx); + if (res != TXRX_CONTINUE) + break; + } } skb = tx.skb; /* handlers are allowed to change skb */ @@ -1531,6 +1700,51 @@ static int ieee80211_subif_start_xmit(struct sk_buff *skb, goto fail; } + if (unlikely(sdata->type == IEEE80211_IF_TYPE_MNTR)) { + struct ieee80211_radiotap_header *prthdr = + (struct ieee80211_radiotap_header *)skb->data; + u16 len; + + /* + * there must be a radiotap header at the + * start in this case + */ + if (unlikely(prthdr->it_version)) { + /* only version 0 is supported */ + ret = 0; + goto fail; + } + + skb->dev = local->mdev; + + pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; + memset(pkt_data, 0, sizeof(*pkt_data)); + pkt_data->ifindex = sdata->dev->ifindex; + pkt_data->mgmt_iface = 0; + pkt_data->do_not_encrypt = 1; + + /* above needed because we set skb device to master */ + + /* + * fix up the pointers accounting for the radiotap + * header still being in there. We are being given + * a precooked IEEE80211 header so no need for + * normal processing + */ + len = le16_to_cpu(get_unaligned(&prthdr->it_len)); + skb_set_mac_header(skb, len); + skb_set_network_header(skb, len + sizeof(hdr)); + skb_set_transport_header(skb, len + sizeof(hdr)); + + /* + * pass the radiotap header up to + * the next stage intact + */ + dev_queue_xmit(skb); + + return 0; + } + nh_pos = skb_network_header(skb) - skb->data; h_pos = skb_transport_header(skb) - skb->data; -- cgit v1.2.3 From b306f45300866adc01b84f7aa083bfcd9cbb89c4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Jul 2007 19:32:08 +0200 Subject: [PATCH] mac80211: show transmitted frames on monitor interfaces This patch makes mac80211 show transmitted frames on monitor interfaces, including radiotap headers that indicate some transmission parameters. The shown parameters will need to be expanded, but this should work as a basis to work from. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville --- net/mac80211/ieee80211.c | 140 +++++++++++++++++++++++++++++++++++++-------- net/mac80211/ieee80211_i.h | 1 + 2 files changed, 118 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index 8b57eaa2a271..85f23fd866ad 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c @@ -57,6 +57,17 @@ static const unsigned char eapol_header[] = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00, 0x88, 0x8e }; +/* + * For seeing transmitted packets on monitor interfaces + * we have a radiotap header too. + */ +struct ieee80211_tx_status_rtap_hdr { + struct ieee80211_radiotap_header hdr; + __le16 tx_flags; + u8 data_retries; +} __attribute__ ((packed)); + + static inline void ieee80211_include_sequence(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr) { @@ -529,7 +540,7 @@ ieee80211_tx_h_fragment(struct ieee80211_txrx_data *tx) /* reserve enough extra head and tail room for possible * encryption */ frag = frags[i] = - dev_alloc_skb(tx->local->hw.extra_tx_headroom + + dev_alloc_skb(tx->local->tx_headroom + frag_threshold + IEEE80211_ENCRYPT_HEADROOM + IEEE80211_ENCRYPT_TAILROOM); @@ -538,8 +549,8 @@ ieee80211_tx_h_fragment(struct ieee80211_txrx_data *tx) /* Make sure that all fragments use the same priority so * that they end up using the same TX queue */ frag->priority = first->priority; - skb_reserve(frag, tx->local->hw.extra_tx_headroom + - IEEE80211_ENCRYPT_HEADROOM); + skb_reserve(frag, tx->local->tx_headroom + + IEEE80211_ENCRYPT_HEADROOM); fhdr = (struct ieee80211_hdr *) skb_put(frag, hdrlen); memcpy(fhdr, first->data, hdrlen); if (i == num_fragm - 2) @@ -1636,8 +1647,7 @@ static int ieee80211_master_start_xmit(struct sk_buff *skb, } osdata = IEEE80211_DEV_TO_SUB_IF(odev); - headroom = osdata->local->hw.extra_tx_headroom + - IEEE80211_ENCRYPT_HEADROOM; + headroom = osdata->local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM; if (skb_headroom(skb) < headroom) { if (pskb_expand_head(skb, headroom, 0, GFP_ATOMIC)) { dev_kfree_skb(skb); @@ -1833,7 +1843,7 @@ static int ieee80211_subif_start_xmit(struct sk_buff *skb, * build in headroom in __dev_alloc_skb() (linux/skbuff.h) and * alloc_skb() (net/core/skbuff.c) */ - head_need = hdrlen + encaps_len + local->hw.extra_tx_headroom; + head_need = hdrlen + encaps_len + local->tx_headroom; head_need -= skb_headroom(skb); /* We are going to modify skb data, so make a copy of it if happens to @@ -1920,9 +1930,9 @@ ieee80211_mgmt_start_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } - if (skb_headroom(skb) < sdata->local->hw.extra_tx_headroom) { - if (pskb_expand_head(skb, - sdata->local->hw.extra_tx_headroom, 0, GFP_ATOMIC)) { + if (skb_headroom(skb) < sdata->local->tx_headroom) { + if (pskb_expand_head(skb, sdata->local->tx_headroom, + 0, GFP_ATOMIC)) { dev_kfree_skb(skb); return 0; } @@ -2061,12 +2071,12 @@ struct sk_buff * ieee80211_beacon_get(struct ieee80211_hw *hw, int if_id, bh_len = ap->beacon_head_len; bt_len = ap->beacon_tail_len; - skb = dev_alloc_skb(local->hw.extra_tx_headroom + + skb = dev_alloc_skb(local->tx_headroom + bh_len + bt_len + 256 /* maximum TIM len */); if (!skb) return NULL; - skb_reserve(skb, local->hw.extra_tx_headroom); + skb_reserve(skb, local->tx_headroom); memcpy(skb_put(skb, bh_len), b_head, bh_len); ieee80211_include_sequence(sdata, (struct ieee80211_hdr *)skb->data); @@ -4498,6 +4508,9 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, struct ieee80211_local *local = hw_to_local(hw); u16 frag, type; u32 msg_type; + struct ieee80211_tx_status_rtap_hdr *rthdr; + struct ieee80211_sub_if_data *sdata; + int monitors; if (!status) { printk(KERN_ERR @@ -4609,27 +4622,100 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, local->dot11FailedCount++; } - if (!(status->control.flags & IEEE80211_TXCTL_REQ_TX_STATUS) - || unlikely(!local->apdev)) { + msg_type = (status->flags & IEEE80211_TX_STATUS_ACK) ? + ieee80211_msg_tx_callback_ack : ieee80211_msg_tx_callback_fail; + + /* this was a transmitted frame, but now we want to reuse it */ + skb_orphan(skb); + + if ((status->control.flags & IEEE80211_TXCTL_REQ_TX_STATUS) && + local->apdev) { + if (local->monitors) { + skb2 = skb_clone(skb, GFP_ATOMIC); + } else { + skb2 = skb; + skb = NULL; + } + + if (skb2) + /* Send frame to hostapd */ + ieee80211_rx_mgmt(local, skb2, NULL, msg_type); + + if (!skb) + return; + } + + if (!local->monitors) { dev_kfree_skb(skb); return; } - msg_type = (status->flags & IEEE80211_TX_STATUS_ACK) ? - ieee80211_msg_tx_callback_ack : ieee80211_msg_tx_callback_fail; + /* send frame to monitor interfaces now */ - /* skb was the original skb used for TX. Clone it and give the clone - * to netif_rx(). Free original skb. */ - skb2 = skb_copy(skb, GFP_ATOMIC); - if (!skb2) { + if (skb_headroom(skb) < sizeof(*rthdr)) { + printk(KERN_ERR "ieee80211_tx_status: headroom too small\n"); dev_kfree_skb(skb); return; } - dev_kfree_skb(skb); - skb = skb2; - /* Send frame to hostapd */ - ieee80211_rx_mgmt(local, skb, NULL, msg_type); + rthdr = (struct ieee80211_tx_status_rtap_hdr*) + skb_push(skb, sizeof(*rthdr)); + + memset(rthdr, 0, sizeof(*rthdr)); + rthdr->hdr.it_len = cpu_to_le16(sizeof(*rthdr)); + rthdr->hdr.it_present = + cpu_to_le32((1 << IEEE80211_RADIOTAP_TX_FLAGS) | + (1 << IEEE80211_RADIOTAP_DATA_RETRIES)); + + if (!(status->flags & IEEE80211_TX_STATUS_ACK) && + !is_multicast_ether_addr(hdr->addr1)) + rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_FAIL); + + if ((status->control.flags & IEEE80211_TXCTL_USE_RTS_CTS) && + (status->control.flags & IEEE80211_TXCTL_USE_CTS_PROTECT)) + rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_CTS); + else if (status->control.flags & IEEE80211_TXCTL_USE_RTS_CTS) + rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_RTS); + + rthdr->data_retries = status->retry_count; + + read_lock(&local->sub_if_lock); + monitors = local->monitors; + list_for_each_entry(sdata, &local->sub_if_list, list) { + /* + * Using the monitors counter is possibly racy, but + * if the value is wrong we simply either clone the skb + * once too much or forget sending it to one monitor iface + * The latter case isn't nice but fixing the race is much + * more complicated. + */ + if (!monitors || !skb) + goto out; + + if (sdata->type == IEEE80211_IF_TYPE_MNTR) { + if (!netif_running(sdata->dev)) + continue; + monitors--; + if (monitors) + skb2 = skb_clone(skb, GFP_KERNEL); + else + skb2 = NULL; + skb->dev = sdata->dev; + /* XXX: is this sufficient for BPF? */ + skb_set_mac_header(skb, 0); + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->pkt_type = PACKET_OTHERHOST; + skb->protocol = htons(ETH_P_802_2); + memset(skb->cb, 0, sizeof(skb->cb)); + netif_rx(skb); + skb = skb2; + break; + } + } + out: + read_unlock(&local->sub_if_lock); + if (skb) + dev_kfree_skb(skb); } EXPORT_SYMBOL(ieee80211_tx_status); @@ -4926,6 +5012,14 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) goto fail_workqueue; } + /* + * The hardware needs headroom for sending the frame, + * and we need some headroom for passing the frame to monitor + * interfaces, but never both at the same time. + */ + local->tx_headroom = max(local->hw.extra_tx_headroom, + sizeof(struct ieee80211_tx_status_rtap_hdr)); + debugfs_hw_add(local); local->hw.conf.beacon_int = 1000; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index af4d14d0b969..5a91e179efa0 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -392,6 +392,7 @@ struct ieee80211_local { int monitors; struct iw_statistics wstats; u8 wstats_flags; + int tx_headroom; /* required headroom for hardware/radiotap */ enum { IEEE80211_DEV_UNINITIALIZED = 0, -- cgit v1.2.3 From 7f8c05982865a32ee001b79ee0bd469f55ac8aba Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Jul 2007 19:32:08 +0200 Subject: [PATCH] mac80211: remove ieee80211_msg_passive_scan This constant is unused. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville --- net/mac80211/ieee80211_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_common.h b/net/mac80211/ieee80211_common.h index b9a73e7f5f75..7af6710bf718 100644 --- a/net/mac80211/ieee80211_common.h +++ b/net/mac80211/ieee80211_common.h @@ -47,7 +47,7 @@ enum ieee80211_msg_type { ieee80211_msg_normal = 0, ieee80211_msg_tx_callback_ack = 1, ieee80211_msg_tx_callback_fail = 2, - ieee80211_msg_passive_scan = 3, + /* hole at 3, was ieee80211_msg_passive_scan but unused */ ieee80211_msg_wep_frame_unknown_key = 4, ieee80211_msg_michael_mic_failure = 5, /* hole at 6, was monitor but never sent to userspace */ -- cgit v1.2.3 From c59304b5e07128816347fe3996d7952561f60529 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Jul 2007 19:32:08 +0200 Subject: [PATCH] mac80211: remove ieee80211_set_aid_for_sta Remove ieee80211_set_aid_for_sta and associated code. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville --- include/net/mac80211.h | 6 ------ net/mac80211/ieee80211.c | 28 ---------------------------- net/mac80211/ieee80211_common.h | 7 +------ 3 files changed, 1 insertion(+), 40 deletions(-) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a7f122b79948..627885765a36 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -921,12 +921,6 @@ struct sk_buff * ieee80211_get_buffered_bc(struct ieee80211_hw *hw, int if_id, struct ieee80211_tx_control *control); -/* Low level drivers that have their own MLME and MAC indicate - * the aid for an associating station with this call */ -int ieee80211_set_aid_for_sta(struct ieee80211_hw *hw, - u8 *peer_address, u16 aid); - - /* Given an sk_buff with a raw 802.11 header at the data pointer this function * returns the 802.11 header length in bytes (not including encryption * headers). If the data in the sk_buff is too short to contain a valid 802.11 diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index 85f23fd866ad..4bcf18097e53 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c @@ -3165,34 +3165,6 @@ int ieee80211_radar_status(struct ieee80211_hw *hw, int channel, } EXPORT_SYMBOL(ieee80211_radar_status); -int ieee80211_set_aid_for_sta(struct ieee80211_hw *hw, u8 *peer_address, - u16 aid) -{ - struct sk_buff *skb; - struct ieee80211_msg_set_aid_for_sta *msg; - struct ieee80211_local *local = hw_to_local(hw); - - /* unlikely because if this event only happens for APs, - * which require an open ap device. */ - if (unlikely(!local->apdev)) - return 0; - - skb = dev_alloc_skb(sizeof(struct ieee80211_frame_info) + - sizeof(struct ieee80211_msg_set_aid_for_sta)); - - if (!skb) - return -ENOMEM; - skb_reserve(skb, sizeof(struct ieee80211_frame_info)); - - msg = (struct ieee80211_msg_set_aid_for_sta *) - skb_put(skb, sizeof(struct ieee80211_msg_set_aid_for_sta)); - memcpy(msg->sta_address, peer_address, ETH_ALEN); - msg->aid = aid; - - ieee80211_rx_mgmt(local, skb, NULL, ieee80211_msg_set_aid_for_sta); - return 0; -} -EXPORT_SYMBOL(ieee80211_set_aid_for_sta); static void ap_sta_ps_start(struct net_device *dev, struct sta_info *sta) { diff --git a/net/mac80211/ieee80211_common.h b/net/mac80211/ieee80211_common.h index 7af6710bf718..77c6afb7f6a8 100644 --- a/net/mac80211/ieee80211_common.h +++ b/net/mac80211/ieee80211_common.h @@ -52,16 +52,11 @@ enum ieee80211_msg_type { ieee80211_msg_michael_mic_failure = 5, /* hole at 6, was monitor but never sent to userspace */ ieee80211_msg_sta_not_assoc = 7, - ieee80211_msg_set_aid_for_sta = 8 /* used by Intersil MVC driver */, + /* 8 was ieee80211_msg_set_aid_for_sta */ ieee80211_msg_key_threshold_notification = 9, ieee80211_msg_radar = 11, }; -struct ieee80211_msg_set_aid_for_sta { - char sta_address[ETH_ALEN]; - u16 aid; -}; - struct ieee80211_msg_key_notification { int tx_rx_count; char ifname[IFNAMSIZ]; -- cgit v1.2.3 From 333af2f0715c8d4d38cb657d8f4fb7c4e3ceba9f Mon Sep 17 00:00:00 2001 From: Hong Liu Date: Tue, 10 Jul 2007 19:32:08 +0200 Subject: [PATCH] mac80211: add support for iwlist channel Add supported channels info in SIOCGIWRANGE implementation. Signed-off-by: Hong Liu Signed-off-by: Jiri Benc Signed-off-by: John W. Linville --- net/mac80211/ieee80211_ioctl.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'net') diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c index 66e8a976b311..ef74a91e02a5 100644 --- a/net/mac80211/ieee80211_ioctl.c +++ b/net/mac80211/ieee80211_ioctl.c @@ -345,6 +345,8 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev, { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct iw_range *range = (struct iw_range *) extra; + struct ieee80211_hw_mode *mode = NULL; + int c = 0; data->length = sizeof(struct iw_range); memset(range, 0, sizeof(struct iw_range)); @@ -378,6 +380,29 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev, range->enc_capa = IW_ENC_CAPA_WPA | IW_ENC_CAPA_WPA2 | IW_ENC_CAPA_CIPHER_TKIP | IW_ENC_CAPA_CIPHER_CCMP; + list_for_each_entry(mode, &local->modes_list, list) { + int i = 0; + + if (!(local->enabled_modes & (1 << mode->mode)) || + (local->hw_modes & local->enabled_modes & + (1 << MODE_IEEE80211G) && mode->mode == MODE_IEEE80211B)) + continue; + + while (i < mode->num_channels && c < IW_MAX_FREQUENCIES) { + struct ieee80211_channel *chan = &mode->channels[i]; + + if (chan->flag & IEEE80211_CHAN_W_SCAN) { + range->freq[c].i = chan->chan; + range->freq[c].m = chan->freq * 100000; + range->freq[c].e = 1; + c++; + } + i++; + } + } + range->num_channels = c; + range->num_frequency = c; + IW_EVENT_CAPA_SET_KERNEL(range->event_capa); IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWTHRSPY); IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP); -- cgit v1.2.3 From 40f7cac9f8dd662c1dd02334afdceef0be03e34f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Jul 2007 19:32:08 +0200 Subject: [PATCH] mac80211: separate monitor/subif_start_xmit This patch separates the monitor interface start_xmit from the subif start xmit (those other devices have 802.3 framing, monitor interfaces have radiotap framing) Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville --- net/mac80211/ieee80211.c | 101 +++++++++++++++++++++-------------------- net/mac80211/ieee80211_i.h | 2 + net/mac80211/ieee80211_iface.c | 3 ++ 3 files changed, 58 insertions(+), 48 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index 4bcf18097e53..e91698308f31 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c @@ -1673,6 +1673,56 @@ static int ieee80211_master_start_xmit(struct sk_buff *skb, } +int ieee80211_monitor_start_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_tx_packet_data *pkt_data; + struct ieee80211_radiotap_header *prthdr = + (struct ieee80211_radiotap_header *)skb->data; + u16 len; + + /* + * there must be a radiotap header at the + * start in this case + */ + if (unlikely(prthdr->it_version)) { + /* only version 0 is supported */ + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + + skb->dev = local->mdev; + + pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; + memset(pkt_data, 0, sizeof(*pkt_data)); + pkt_data->ifindex = dev->ifindex; + pkt_data->mgmt_iface = 0; + pkt_data->do_not_encrypt = 1; + + /* above needed because we set skb device to master */ + + /* + * fix up the pointers accounting for the radiotap + * header still being in there. We are being given + * a precooked IEEE80211 header so no need for + * normal processing + */ + len = le16_to_cpu(get_unaligned(&prthdr->it_len)); + skb_set_mac_header(skb, len); + skb_set_network_header(skb, len + sizeof(struct ieee80211_hdr)); + skb_set_transport_header(skb, len + sizeof(struct ieee80211_hdr)); + + /* + * pass the radiotap header up to + * the next stage intact + */ + dev_queue_xmit(skb); + + return NETDEV_TX_OK; +} + + /** * ieee80211_subif_start_xmit - netif start_xmit function for Ethernet-type * subinterfaces (wlan#, WDS, and VLAN interfaces) @@ -1688,8 +1738,8 @@ static int ieee80211_master_start_xmit(struct sk_buff *skb, * encapsulated packet will then be passed to master interface, wlan#.11, for * transmission (through low-level driver). */ -static int ieee80211_subif_start_xmit(struct sk_buff *skb, - struct net_device *dev) +int ieee80211_subif_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_tx_packet_data *pkt_data; @@ -1710,51 +1760,6 @@ static int ieee80211_subif_start_xmit(struct sk_buff *skb, goto fail; } - if (unlikely(sdata->type == IEEE80211_IF_TYPE_MNTR)) { - struct ieee80211_radiotap_header *prthdr = - (struct ieee80211_radiotap_header *)skb->data; - u16 len; - - /* - * there must be a radiotap header at the - * start in this case - */ - if (unlikely(prthdr->it_version)) { - /* only version 0 is supported */ - ret = 0; - goto fail; - } - - skb->dev = local->mdev; - - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - memset(pkt_data, 0, sizeof(*pkt_data)); - pkt_data->ifindex = sdata->dev->ifindex; - pkt_data->mgmt_iface = 0; - pkt_data->do_not_encrypt = 1; - - /* above needed because we set skb device to master */ - - /* - * fix up the pointers accounting for the radiotap - * header still being in there. We are being given - * a precooked IEEE80211 header so no need for - * normal processing - */ - len = le16_to_cpu(get_unaligned(&prthdr->it_len)); - skb_set_mac_header(skb, len); - skb_set_network_header(skb, len + sizeof(hdr)); - skb_set_transport_header(skb, len + sizeof(hdr)); - - /* - * pass the radiotap header up to - * the next stage intact - */ - dev_queue_xmit(skb); - - return 0; - } - nh_pos = skb_network_header(skb) - skb->data; h_pos = skb_transport_header(skb) - skb->data; @@ -1882,7 +1887,7 @@ static int ieee80211_subif_start_xmit(struct sk_buff *skb, pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); - pkt_data->ifindex = sdata->dev->ifindex; + pkt_data->ifindex = dev->ifindex; pkt_data->mgmt_iface = (sdata->type == IEEE80211_IF_TYPE_MGMT); pkt_data->do_not_encrypt = no_encrypt; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 5a91e179efa0..fadcbccc0da2 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -720,6 +720,8 @@ void ieee80211_prepare_rates(struct ieee80211_local *local, struct ieee80211_hw_mode *mode); void ieee80211_tx_set_iswep(struct ieee80211_txrx_data *tx); int ieee80211_if_update_wds(struct net_device *dev, u8 *remote_addr); +int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev); +int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev); void ieee80211_if_setup(struct net_device *dev); void ieee80211_if_mgmt_setup(struct net_device *dev); int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, diff --git a/net/mac80211/ieee80211_iface.c b/net/mac80211/ieee80211_iface.c index cf0f32e8c2a2..8532a5ccdd1e 100644 --- a/net/mac80211/ieee80211_iface.c +++ b/net/mac80211/ieee80211_iface.c @@ -157,6 +157,8 @@ void ieee80211_if_set_type(struct net_device *dev, int type) struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); int oldtype = sdata->type; + dev->hard_start_xmit = ieee80211_subif_start_xmit; + sdata->type = type; switch (type) { case IEEE80211_IF_TYPE_WDS: @@ -196,6 +198,7 @@ void ieee80211_if_set_type(struct net_device *dev, int type) } case IEEE80211_IF_TYPE_MNTR: dev->type = ARPHRD_IEEE80211_RADIOTAP; + dev->hard_start_xmit = ieee80211_monitor_start_xmit; break; default: printk(KERN_WARNING "%s: %s: Unknown interface type 0x%x", -- cgit v1.2.3 From 3ef8bed4692a0de6a47d162196c850c5dea85b70 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Jul 2007 19:32:09 +0200 Subject: [PATCH] mac80211: kill rate control ioctls These aren't used anywhere (hostapd, wpa_supplicant) and until we have a proper interface to the rate control algorithms they don't make much sense either since e.g. rc80211_lowest won't honour them. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville --- net/mac80211/hostapd_ioctl.h | 4 --- net/mac80211/ieee80211.c | 2 -- net/mac80211/ieee80211_i.h | 2 -- net/mac80211/ieee80211_ioctl.c | 80 ------------------------------------------ net/mac80211/rc80211_simple.c | 8 +++-- 5 files changed, 6 insertions(+), 90 deletions(-) (limited to 'net') diff --git a/net/mac80211/hostapd_ioctl.h b/net/mac80211/hostapd_ioctl.h index 34fa128e9872..252204fd0abb 100644 --- a/net/mac80211/hostapd_ioctl.h +++ b/net/mac80211/hostapd_ioctl.h @@ -40,10 +40,6 @@ enum { PRISM2_PARAM_ANTENNA_MODE = 1013, PRISM2_PARAM_STAT_TIME = 1016, PRISM2_PARAM_STA_ANTENNA_SEL = 1017, - PRISM2_PARAM_FORCE_UNICAST_RATE = 1018, - PRISM2_PARAM_RATE_CTRL_NUM_UP = 1019, - PRISM2_PARAM_RATE_CTRL_NUM_DOWN = 1020, - PRISM2_PARAM_MAX_RATECTRL_RATE = 1021, PRISM2_PARAM_TX_POWER_REDUCTION = 1022, PRISM2_PARAM_KEY_TX_RX_THRESHOLD = 1024, PRISM2_PARAM_DEFAULT_WEP_ONLY = 1026, diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index e91698308f31..773a103ee3a1 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c @@ -4924,8 +4924,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->short_retry_limit = 7; local->long_retry_limit = 4; local->hw.conf.radio_enabled = 1; - local->rate_ctrl_num_up = RATE_CONTROL_NUM_UP; - local->rate_ctrl_num_down = RATE_CONTROL_NUM_DOWN; local->enabled_modes = (unsigned int) -1; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index fadcbccc0da2..b222a9afd4e9 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -514,8 +514,6 @@ struct ieee80211_local { STA_ANTENNA_SEL_SW_CTRL_DEBUG = 2 } sta_antenna_sel; - int rate_ctrl_num_up, rate_ctrl_num_down; - #ifdef CONFIG_MAC80211_DEBUG_COUNTERS /* TX/RX handler statistics */ unsigned int tx_handlers_drop; diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c index ef74a91e02a5..f404f1f4251d 100644 --- a/net/mac80211/ieee80211_ioctl.c +++ b/net/mac80211/ieee80211_ioctl.c @@ -1074,62 +1074,6 @@ static int ieee80211_ioctl_clear_keys(struct net_device *dev) } -static int -ieee80211_ioctl_force_unicast_rate(struct net_device *dev, - struct ieee80211_sub_if_data *sdata, - int rate) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hw_mode *mode; - int i; - - if (sdata->type != IEEE80211_IF_TYPE_AP) - return -ENOENT; - - if (rate == 0) { - sdata->u.ap.force_unicast_rateidx = -1; - return 0; - } - - mode = local->oper_hw_mode; - for (i = 0; i < mode->num_rates; i++) { - if (mode->rates[i].rate == rate) { - sdata->u.ap.force_unicast_rateidx = i; - return 0; - } - } - return -EINVAL; -} - - -static int -ieee80211_ioctl_max_ratectrl_rate(struct net_device *dev, - struct ieee80211_sub_if_data *sdata, - int rate) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hw_mode *mode; - int i; - - if (sdata->type != IEEE80211_IF_TYPE_AP) - return -ENOENT; - - if (rate == 0) { - sdata->u.ap.max_ratectrl_rateidx = -1; - return 0; - } - - mode = local->oper_hw_mode; - for (i = 0; i < mode->num_rates; i++) { - if (mode->rates[i].rate == rate) { - sdata->u.ap.max_ratectrl_rateidx = i; - return 0; - } - } - return -EINVAL; -} - - static void ieee80211_key_enable_hwaccel(struct ieee80211_local *local, struct ieee80211_key *key) { @@ -1317,22 +1261,6 @@ static int ieee80211_ioctl_prism2_param(struct net_device *dev, local->sta_antenna_sel = value; break; - case PRISM2_PARAM_FORCE_UNICAST_RATE: - ret = ieee80211_ioctl_force_unicast_rate(dev, sdata, value); - break; - - case PRISM2_PARAM_MAX_RATECTRL_RATE: - ret = ieee80211_ioctl_max_ratectrl_rate(dev, sdata, value); - break; - - case PRISM2_PARAM_RATE_CTRL_NUM_UP: - local->rate_ctrl_num_up = value; - break; - - case PRISM2_PARAM_RATE_CTRL_NUM_DOWN: - local->rate_ctrl_num_down = value; - break; - case PRISM2_PARAM_TX_POWER_REDUCTION: if (value < 0) ret = -EINVAL; @@ -1451,14 +1379,6 @@ static int ieee80211_ioctl_get_prism2_param(struct net_device *dev, *param = local->sta_antenna_sel; break; - case PRISM2_PARAM_RATE_CTRL_NUM_UP: - *param = local->rate_ctrl_num_up; - break; - - case PRISM2_PARAM_RATE_CTRL_NUM_DOWN: - *param = local->rate_ctrl_num_down; - break; - case PRISM2_PARAM_TX_POWER_REDUCTION: *param = local->hw.conf.tx_power_reduction; break; diff --git a/net/mac80211/rc80211_simple.c b/net/mac80211/rc80211_simple.c index 5ae7fc454665..f6780d63b342 100644 --- a/net/mac80211/rc80211_simple.c +++ b/net/mac80211/rc80211_simple.c @@ -187,9 +187,13 @@ static void rate_control_simple_tx_status(void *priv, struct net_device *dev, } #endif - if (per_failed > local->rate_ctrl_num_down) { + /* + * XXX: Make these configurable once we have an + * interface to the rate control algorithms + */ + if (per_failed > RATE_CONTROL_NUM_DOWN) { rate_control_rate_dec(local, sta); - } else if (per_failed < local->rate_ctrl_num_up) { + } else if (per_failed < RATE_CONTROL_NUM_UP) { rate_control_rate_inc(local, sta); } srctrl->tx_avg_rate_sum += status->control.rate->rate; -- cgit v1.2.3 From 9771f740c6319e67bab44d18b9717c894a6f266d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Jul 2007 19:32:09 +0200 Subject: [PATCH] mac80211: kill antenna select ioctls Not used anywhere. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville --- net/mac80211/hostapd_ioctl.h | 2 -- net/mac80211/ieee80211_ioctl.c | 20 -------------------- 2 files changed, 22 deletions(-) (limited to 'net') diff --git a/net/mac80211/hostapd_ioctl.h b/net/mac80211/hostapd_ioctl.h index 252204fd0abb..230eb4400ec6 100644 --- a/net/mac80211/hostapd_ioctl.h +++ b/net/mac80211/hostapd_ioctl.h @@ -26,8 +26,6 @@ * mess shall be deleted completely. */ enum { PRISM2_PARAM_IEEE_802_1X = 23, - PRISM2_PARAM_ANTSEL_TX = 24, - PRISM2_PARAM_ANTSEL_RX = 25, /* Instant802 additions */ PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES = 1001, diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c index f404f1f4251d..ab09e9a90f43 100644 --- a/net/mac80211/ieee80211_ioctl.c +++ b/net/mac80211/ieee80211_ioctl.c @@ -1197,18 +1197,6 @@ static int ieee80211_ioctl_prism2_param(struct net_device *dev, sdata->ieee802_1x = value; break; - case PRISM2_PARAM_ANTSEL_TX: - local->hw.conf.antenna_sel_tx = value; - if (ieee80211_hw_config(local)) - ret = -EINVAL; - break; - - case PRISM2_PARAM_ANTSEL_RX: - local->hw.conf.antenna_sel_rx = value; - if (ieee80211_hw_config(local)) - ret = -EINVAL; - break; - case PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES: local->cts_protect_erp_frames = value; break; @@ -1340,14 +1328,6 @@ static int ieee80211_ioctl_get_prism2_param(struct net_device *dev, *param = sdata->ieee802_1x; break; - case PRISM2_PARAM_ANTSEL_TX: - *param = local->hw.conf.antenna_sel_tx; - break; - - case PRISM2_PARAM_ANTSEL_RX: - *param = local->hw.conf.antenna_sel_rx; - break; - case PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES: *param = local->cts_protect_erp_frames; break; -- cgit v1.2.3 From fda6cc7ac45f97d4d40cc42781041dec488fa78c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Jul 2007 19:32:09 +0200 Subject: [PATCH] mac80211: remove PRISM2_PARAM_DROP_UNENCRYPTED ioctl Interestingly, wpa_supplicant doesn't use it, but uses the currently unsupported IW_AUTH_DROP_UNENCRYPTED. So I guess it doesn't matter anyway. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville --- net/mac80211/hostapd_ioctl.h | 1 - net/mac80211/ieee80211_ioctl.c | 8 -------- 2 files changed, 9 deletions(-) (limited to 'net') diff --git a/net/mac80211/hostapd_ioctl.h b/net/mac80211/hostapd_ioctl.h index 230eb4400ec6..95ca1f983a72 100644 --- a/net/mac80211/hostapd_ioctl.h +++ b/net/mac80211/hostapd_ioctl.h @@ -29,7 +29,6 @@ enum { /* Instant802 additions */ PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES = 1001, - PRISM2_PARAM_DROP_UNENCRYPTED = 1002, PRISM2_PARAM_PREAMBLE = 1003, PRISM2_PARAM_SHORT_SLOT_TIME = 1006, PRISM2_PARAM_NEXT_MODE = 1008, diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c index ab09e9a90f43..f465d0a1c153 100644 --- a/net/mac80211/ieee80211_ioctl.c +++ b/net/mac80211/ieee80211_ioctl.c @@ -1201,10 +1201,6 @@ static int ieee80211_ioctl_prism2_param(struct net_device *dev, local->cts_protect_erp_frames = value; break; - case PRISM2_PARAM_DROP_UNENCRYPTED: - sdata->drop_unencrypted = value; - break; - case PRISM2_PARAM_PREAMBLE: local->short_preamble = value; break; @@ -1332,10 +1328,6 @@ static int ieee80211_ioctl_get_prism2_param(struct net_device *dev, *param = local->cts_protect_erp_frames; break; - case PRISM2_PARAM_DROP_UNENCRYPTED: - *param = sdata->drop_unencrypted; - break; - case PRISM2_PARAM_PREAMBLE: *param = local->short_preamble; break; -- cgit v1.2.3 From 191b92666e3a8aa52af84e2d03350c25145be695 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Jul 2007 19:32:09 +0200 Subject: [PATCH] mac80211: kill PRISM2_PARAM_CLEAR_KEYS Not used anywhere, hence dead code. wpa_supplicant has its own clear_keys routine. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville --- net/mac80211/hostapd_ioctl.h | 1 - net/mac80211/ieee80211_ioctl.c | 60 ------------------------------------------ 2 files changed, 61 deletions(-) (limited to 'net') diff --git a/net/mac80211/hostapd_ioctl.h b/net/mac80211/hostapd_ioctl.h index 95ca1f983a72..52da513f060a 100644 --- a/net/mac80211/hostapd_ioctl.h +++ b/net/mac80211/hostapd_ioctl.h @@ -32,7 +32,6 @@ enum { PRISM2_PARAM_PREAMBLE = 1003, PRISM2_PARAM_SHORT_SLOT_TIME = 1006, PRISM2_PARAM_NEXT_MODE = 1008, - PRISM2_PARAM_CLEAR_KEYS = 1009, PRISM2_PARAM_RADIO_ENABLED = 1010, PRISM2_PARAM_ANTENNA_MODE = 1013, PRISM2_PARAM_STAT_TIME = 1016, diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c index f465d0a1c153..9c1d076b1944 100644 --- a/net/mac80211/ieee80211_ioctl.c +++ b/net/mac80211/ieee80211_ioctl.c @@ -1018,62 +1018,6 @@ static int ieee80211_ioctl_giwretry(struct net_device *dev, return 0; } -static int ieee80211_ioctl_clear_keys(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_key_conf key; - int i; - u8 addr[ETH_ALEN]; - struct ieee80211_key_conf *keyconf; - struct ieee80211_sub_if_data *sdata; - struct sta_info *sta; - - memset(addr, 0xff, ETH_ALEN); - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) { - for (i = 0; i < NUM_DEFAULT_KEYS; i++) { - keyconf = NULL; - if (sdata->keys[i] && - !sdata->keys[i]->force_sw_encrypt && - local->ops->set_key && - (keyconf = ieee80211_key_data2conf(local, - sdata->keys[i]))) - local->ops->set_key(local_to_hw(local), - DISABLE_KEY, addr, - keyconf, 0); - kfree(keyconf); - ieee80211_key_free(sdata->keys[i]); - sdata->keys[i] = NULL; - } - sdata->default_key = NULL; - } - read_unlock(&local->sub_if_lock); - - spin_lock_bh(&local->sta_lock); - list_for_each_entry(sta, &local->sta_list, list) { - keyconf = NULL; - if (sta->key && !sta->key->force_sw_encrypt && - local->ops->set_key && - (keyconf = ieee80211_key_data2conf(local, sta->key))) - local->ops->set_key(local_to_hw(local), DISABLE_KEY, - sta->addr, keyconf, sta->aid); - kfree(keyconf); - ieee80211_key_free(sta->key); - sta->key = NULL; - } - spin_unlock_bh(&local->sta_lock); - - memset(&key, 0, sizeof(key)); - if (local->ops->set_key && - local->ops->set_key(local_to_hw(local), REMOVE_ALL_KEYS, - NULL, &key, 0)) - printk(KERN_DEBUG "%s: failed to remove hwaccel keys\n", - dev->name); - - return 0; -} - - static void ieee80211_key_enable_hwaccel(struct ieee80211_local *local, struct ieee80211_key *key) { @@ -1227,10 +1171,6 @@ static int ieee80211_ioctl_prism2_param(struct net_device *dev, local->next_mode = value; break; - case PRISM2_PARAM_CLEAR_KEYS: - ret = ieee80211_ioctl_clear_keys(dev); - break; - case PRISM2_PARAM_RADIO_ENABLED: ret = ieee80211_ioctl_set_radio_enabled(dev, value); break; -- cgit v1.2.3 From 5558235c6bade6662e6f257a35f2dfdc8a742147 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Jul 2007 19:32:09 +0200 Subject: [PATCH] mac80211: conserve stack space due to padding This patch reorders some fields in struct ieee802_11_elems to save 17*7 or 17*3 bytes (on 64/32-bit machines respectively) stack space in a few functions. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville --- net/mac80211/ieee80211_sta.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c index 91b545c144c1..f3ca83743b48 100644 --- a/net/mac80211/ieee80211_sta.c +++ b/net/mac80211/ieee80211_sta.c @@ -76,33 +76,36 @@ static int ieee80211_sta_config_auth(struct net_device *dev, /* Parsed Information Elements */ struct ieee802_11_elems { + /* pointers to IEs */ u8 *ssid; - u8 ssid_len; u8 *supp_rates; - u8 supp_rates_len; u8 *fh_params; - u8 fh_params_len; u8 *ds_params; - u8 ds_params_len; u8 *cf_params; - u8 cf_params_len; u8 *tim; - u8 tim_len; u8 *ibss_params; - u8 ibss_params_len; u8 *challenge; - u8 challenge_len; u8 *wpa; - u8 wpa_len; u8 *rsn; - u8 rsn_len; u8 *erp_info; - u8 erp_info_len; u8 *ext_supp_rates; - u8 ext_supp_rates_len; u8 *wmm_info; - u8 wmm_info_len; u8 *wmm_param; + + /* length of them, respectively */ + u8 ssid_len; + u8 supp_rates_len; + u8 fh_params_len; + u8 ds_params_len; + u8 cf_params_len; + u8 tim_len; + u8 ibss_params_len; + u8 challenge_len; + u8 wpa_len; + u8 rsn_len; + u8 erp_info_len; + u8 ext_supp_rates_len; + u8 wmm_info_len; u8 wmm_param_len; }; -- cgit v1.2.3 From 4480f15ca62a595248d6d8e2b3e75052113cde59 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Jul 2007 19:32:10 +0200 Subject: [PATCH] mac80211: clarify some mac80211 things The semantics of not having an add_interface callback are not well defined, this callback is required because otherwise you cannot obtain the requested MAC address of the device. Change the documentation to reflect this, add a note about having no MAC address at all, add a warning that mac_addr in struct ieee80211_if_init_conf can be NULL and finally verify that a few callbacks are assigned by way of BUG_ON() Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville --- include/net/mac80211.h | 16 ++++++++++++---- net/mac80211/ieee80211.c | 29 ++++++++++++----------------- 2 files changed, 24 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 627885765a36..c34fd9a6160a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -347,9 +347,16 @@ enum ieee80211_if_types { * @mac_addr: pointer to MAC address of the interface. This pointer is valid * until the interface is removed (i.e. it cannot be used after * remove_interface() callback was called for this interface). + * This pointer will be %NULL for monitor interfaces, be careful. * * This structure is used in add_interface() and remove_interface() * callbacks of &struct ieee80211_hw. + * + * When you allow multiple interfaces to be added to your PHY, take care + * that the hardware can actually handle multiple MAC addresses. However, + * also take care that when there's no interface left with mac_addr != %NULL + * you remove the MAC address from the device to avoid acknowledging packets + * in pure monitor mode. */ struct ieee80211_if_init_conf { int if_id; @@ -574,10 +581,11 @@ struct ieee80211_ops { * to returning zero. By returning non-zero addition of the interface * is inhibited. Unless monitor_during_oper is set, it is guaranteed * that monitor interfaces and normal interfaces are mutually - * exclusive. The open() handler is called after add_interface() - * if this is the first device added. At least one of the open() - * open() and add_interface() callbacks has to be assigned. If - * add_interface() is NULL, one STA interface is permitted only. */ + * exclusive. If assigned, the open() handler is called after + * add_interface() if this is the first device added. The + * add_interface() callback has to be assigned because it is the only + * way to obtain the requested MAC address for any interface. + */ int (*add_interface)(struct ieee80211_hw *hw, struct ieee80211_if_init_conf *conf); diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index 773a103ee3a1..fe32a2d16053 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c @@ -2605,8 +2605,7 @@ static void ieee80211_start_hard_monitor(struct ieee80211_local *local) struct ieee80211_if_init_conf conf; if (local->open_count && local->open_count == local->monitors && - !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER) && - local->ops->add_interface) { + !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER)) { conf.if_id = -1; conf.type = IEEE80211_IF_TYPE_MNTR; conf.mac_addr = NULL; @@ -2649,21 +2648,14 @@ static int ieee80211_open(struct net_device *dev) } ieee80211_start_soft_monitor(local); - if (local->ops->add_interface) { - conf.if_id = dev->ifindex; - conf.type = sdata->type; - conf.mac_addr = dev->dev_addr; - res = local->ops->add_interface(local_to_hw(local), &conf); - if (res) { - if (sdata->type == IEEE80211_IF_TYPE_MNTR) - ieee80211_start_hard_monitor(local); - return res; - } - } else { - if (sdata->type != IEEE80211_IF_TYPE_STA) - return -EOPNOTSUPP; - if (local->open_count > 0) - return -ENOBUFS; + conf.if_id = dev->ifindex; + conf.type = sdata->type; + conf.mac_addr = dev->dev_addr; + res = local->ops->add_interface(local_to_hw(local), &conf); + if (res) { + if (sdata->type == IEEE80211_IF_TYPE_MNTR) + ieee80211_start_hard_monitor(local); + return res; } if (local->open_count == 0) { @@ -4896,6 +4888,9 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, ((sizeof(struct ieee80211_local) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); + BUG_ON(!ops->tx); + BUG_ON(!ops->config); + BUG_ON(!ops->add_interface); local->ops = ops; /* for now, mdev needs sub_if_data :/ */ -- cgit v1.2.3 From 1fd5e589d8c7d3cd42ddd39358338766cfcedec8 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Tue, 10 Jul 2007 19:32:10 +0200 Subject: [PATCH] mac80211: Implementation of SIOCSIWRATE The WEXT ioctl SIOCSIWRATE is not implemented in mac80211. This patch adds the missing routine. It supports the 'auto' keyword, fixed rates, and the combination of 'auto' and a fixed rate to select an upper bound. Based on the patch from Mohamed Abbas . Signed-off-by: Larry Finger Signed-off-by: Jiri Benc Signed-off-by: John W. Linville --- net/mac80211/ieee80211_ioctl.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c index 9c1d076b1944..9bc209b72d41 100644 --- a/net/mac80211/ieee80211_ioctl.c +++ b/net/mac80211/ieee80211_ioctl.c @@ -863,6 +863,44 @@ static int ieee80211_ioctl_giwscan(struct net_device *dev, } +static int ieee80211_ioctl_siwrate(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *rate, char *extra) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_hw_mode *mode; + int i; + u32 target_rate = rate->value / 100000; + struct ieee80211_sub_if_data *sdata; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (!sdata->bss) + return -ENODEV; + mode = local->oper_hw_mode; + /* target_rate = -1, rate->fixed = 0 means auto only, so use all rates + * target_rate = X, rate->fixed = 1 means only rate X + * target_rate = X, rate->fixed = 0 means all rates <= X */ + sdata->bss->max_ratectrl_rateidx = -1; + sdata->bss->force_unicast_rateidx = -1; + if (rate->value < 0) + return 0; + for (i=0; i< mode->num_rates; i++) { + struct ieee80211_rate *rates = &mode->rates[i]; + int this_rate = rates->rate; + + if (mode->mode == MODE_ATHEROS_TURBO || + mode->mode == MODE_ATHEROS_TURBOG) + this_rate *= 2; + if (target_rate == this_rate) { + sdata->bss->max_ratectrl_rateidx = i; + if (rate->fixed) + sdata->bss->force_unicast_rateidx = i; + break; + } + } + return 0; +} + static int ieee80211_ioctl_giwrate(struct net_device *dev, struct iw_request_info *info, struct iw_param *rate, char *extra) @@ -1658,7 +1696,7 @@ static const iw_handler ieee80211_handler[] = (iw_handler) NULL, /* SIOCGIWNICKN */ (iw_handler) NULL, /* -- hole -- */ (iw_handler) NULL, /* -- hole -- */ - (iw_handler) NULL, /* SIOCSIWRATE */ + (iw_handler) ieee80211_ioctl_siwrate, /* SIOCSIWRATE */ (iw_handler) ieee80211_ioctl_giwrate, /* SIOCGIWRATE */ (iw_handler) ieee80211_ioctl_siwrts, /* SIOCSIWRTS */ (iw_handler) ieee80211_ioctl_giwrts, /* SIOCGIWRTS */ -- cgit v1.2.3 From 5628221caf88e2a052782b042e12da7cd34111b0 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Tue, 10 Jul 2007 19:32:10 +0200 Subject: [PATCH] mac80211: ERP IE handling improvements The "protection needed" flag is currently parsed out of the ERP IE in beacons. This patch allows the ERP IE to be available at assocation time and causes the appropriate actions to be performed earlier. It is slightly complicated by the fact that most APs don't include the ERP IE in association responses. To work around this, we store ERP values in the ieee80211_sta_bss structure. Also added some WLAN_ERP defines for use by upcoming patches. Signed-off-by: Jiri Benc Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 11 +++++++ net/mac80211/ieee80211_i.h | 6 ++++ net/mac80211/ieee80211_sta.c | 69 ++++++++++++++++++++++++++++++++------------ 3 files changed, 67 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index ecd61e8438a5..272f8c8c90da 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -227,6 +227,17 @@ struct ieee80211_cts { #define WLAN_CAPABILITY_SHORT_SLOT_TIME (1<<10) #define WLAN_CAPABILITY_DSSS_OFDM (1<<13) +/* 802.11g ERP information element */ +#define WLAN_ERP_NON_ERP_PRESENT (1<<0) +#define WLAN_ERP_USE_PROTECTION (1<<1) +#define WLAN_ERP_BARKER_PREAMBLE (1<<2) + +/* WLAN_ERP_BARKER_PREAMBLE values */ +enum { + WLAN_ERP_PREAMBLE_SHORT = 0, + WLAN_ERP_PREAMBLE_LONG = 1, +}; + /* Status codes */ enum ieee80211_statuscode { WLAN_STATUS_SUCCESS = 0, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b222a9afd4e9..99ff7c5e9204 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -99,6 +99,12 @@ struct ieee80211_sta_bss { int probe_resp; unsigned long last_update; + /* during assocation, we save an ERP value from a probe response so + * that we can feed ERP info to the driver when handling the + * association completes. these fields probably won't be up-to-date + * otherwise, you probably don't want to use them. */ + int has_erp_value; + u8 erp_value; }; diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c index f3ca83743b48..df6c410de161 100644 --- a/net/mac80211/ieee80211_sta.c +++ b/net/mac80211/ieee80211_sta.c @@ -314,6 +314,27 @@ static void ieee80211_sta_wmm_params(struct net_device *dev, } +static void ieee80211_handle_erp_ie(struct net_device *dev, u8 erp_value) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + int use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0; + + if (use_protection != !!ifsta->use_protection) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: CTS protection %s (BSSID=" + MAC_FMT ")\n", + dev->name, + use_protection ? "enabled" : "disabled", + MAC_ARG(ifsta->bssid)); + } + ifsta->use_protection = use_protection ? 1 : 0; + local->cts_protect_erp_frames = use_protection; + } +} + + static void ieee80211_sta_send_associnfo(struct net_device *dev, struct ieee80211_if_sta *ifsta) { @@ -377,9 +398,18 @@ static void ieee80211_set_associated(struct net_device *dev, if (assoc) { struct ieee80211_sub_if_data *sdata; + struct ieee80211_sta_bss *bss; sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->type != IEEE80211_IF_TYPE_STA) return; + + bss = ieee80211_rx_bss_get(dev, ifsta->bssid); + if (bss) { + if (bss->has_erp_value) + ieee80211_handle_erp_ie(dev, bss->erp_value); + ieee80211_rx_bss_put(dev, bss); + } + netif_carrier_on(dev); ifsta->prev_bssid_set = 1; memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN); @@ -1177,6 +1207,18 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev, return; } + /* it probably doesn't, but if the frame includes an ERP value then + * update our stored copy */ + if (elems.erp_info && elems.erp_info_len >= 1) { + struct ieee80211_sta_bss *bss + = ieee80211_rx_bss_get(dev, ifsta->bssid); + if (bss) { + bss->erp_value = elems.erp_info[0]; + bss->has_erp_value = 1; + ieee80211_rx_bss_put(dev, bss); + } + } + printk(KERN_DEBUG "%s: associated\n", dev->name); ifsta->aid = aid; ifsta->ap_capab = capab_info; @@ -1499,6 +1541,12 @@ static void ieee80211_rx_bss_info(struct net_device *dev, return; } + /* save the ERP value so that it is available at association time */ + if (elems.erp_info && elems.erp_info_len >= 1) { + bss->erp_value = elems.erp_info[0]; + bss->has_erp_value = 1; + } + bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int); bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info); if (elems.ssid && elems.ssid_len <= IEEE80211_MAX_SSID_LEN) { @@ -1614,10 +1662,8 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev, size_t len, struct ieee80211_rx_status *rx_status) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata; struct ieee80211_if_sta *ifsta; - int use_protection; size_t baselen; struct ieee802_11_elems elems; @@ -1641,23 +1687,8 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev, &elems) == ParseFailed) return; - use_protection = 0; - if (elems.erp_info && elems.erp_info_len >= 1) { - use_protection = - (elems.erp_info[0] & ERP_INFO_USE_PROTECTION) != 0; - } - - if (use_protection != !!ifsta->use_protection) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: CTS protection %s (BSSID=" - MAC_FMT ")\n", - dev->name, - use_protection ? "enabled" : "disabled", - MAC_ARG(ifsta->bssid)); - } - ifsta->use_protection = use_protection ? 1 : 0; - local->cts_protect_erp_frames = use_protection; - } + if (elems.erp_info && elems.erp_info_len >= 1) + ieee80211_handle_erp_ie(dev, elems.erp_info[0]); if (elems.wmm_param && ifsta->wmm_enabled) { ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param, -- cgit v1.2.3 From 63fc33ceb0ccc08b3f62d7bfe56a33eb33ca9427 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Tue, 10 Jul 2007 19:32:11 +0200 Subject: [PATCH] mac80211: improved 802.11g CTS protection Currently, CTS protection is partially implemented twice: 1. via prism2 ioctls, only used by hostapd 2. via STA beacon parsing, recorded in sta.use_protection but never used (other than printed in debugfs) Protection control should be implemented on a per-subif basis. For example, a single physical device may be running a soft AP on one channel, and a STA on another. The AP interface should use protection based on what hostapd told it, and the STA interface should use protection based on beacon parsing. These should operate independantly: one subif using protection should not influence the other. To implement this, I moved the use_protection flag into ieee80211_sub_if_data and removed the device-global cts_protect_erp_frames flag. I also made the PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES write operation only available for AP interfaces, to avoid any possibility of the user messing with the behaviour of a STA. Signed-off-by: Daniel Drake Signed-off-by: Jiri Benc Signed-off-by: John W. Linville --- net/mac80211/debugfs_netdev.c | 2 +- net/mac80211/ieee80211.c | 5 ++--- net/mac80211/ieee80211_i.h | 3 +-- net/mac80211/ieee80211_ioctl.c | 7 +++++-- net/mac80211/ieee80211_sta.c | 8 ++++---- 5 files changed, 13 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 9e3964638bad..a3e01d76d503 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -118,7 +118,7 @@ static ssize_t ieee80211_if_fmt_flags( sdata->u.sta.authenticated ? "AUTH\n" : "", sdata->u.sta.associated ? "ASSOC\n" : "", sdata->u.sta.probereq_poll ? "PROBEREQ POLL\n" : "", - sdata->u.sta.use_protection ? "CTS prot\n" : ""); + sdata->use_protection ? "CTS prot\n" : ""); } __IEEE80211_IF_FILE(flags); diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index fe32a2d16053..2ddf4ef4065e 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c @@ -442,7 +442,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_txrx_data *tx) if (!tx->u.tx.rate) return TXRX_DROP; if (tx->u.tx.mode->mode == MODE_IEEE80211G && - tx->local->cts_protect_erp_frames && tx->fragmented && + tx->sdata->use_protection && tx->fragmented && extra.nonerp) { tx->u.tx.last_frag_rate = tx->u.tx.rate; tx->u.tx.probe_last_frag = extra.probe ? 1 : 0; @@ -868,8 +868,7 @@ ieee80211_tx_h_misc(struct ieee80211_txrx_data *tx) * for the frame. */ if (mode->mode == MODE_IEEE80211G && (tx->u.tx.rate->flags & IEEE80211_RATE_ERP) && - tx->u.tx.unicast && - tx->local->cts_protect_erp_frames && + tx->u.tx.unicast && tx->sdata->use_protection && !(control->flags & IEEE80211_TXCTL_USE_RTS_CTS)) control->flags |= IEEE80211_TXCTL_USE_CTS_PROTECT; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 99ff7c5e9204..055a2a912185 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -241,7 +241,6 @@ struct ieee80211_if_sta { unsigned int authenticated:1; unsigned int associated:1; unsigned int probereq_poll:1; - unsigned int use_protection:1; unsigned int create_ibss:1; unsigned int mixed_cell:1; unsigned int wmm_enabled:1; @@ -284,6 +283,7 @@ struct ieee80211_sub_if_data { int mc_count; unsigned int allmulti:1; unsigned int promisc:1; + unsigned int use_protection:1; /* CTS protect ERP frames */ struct net_device_stats stats; int drop_unencrypted; @@ -444,7 +444,6 @@ struct ieee80211_local { int *basic_rates[NUM_IEEE80211_MODES]; int rts_threshold; - int cts_protect_erp_frames; int fragmentation_threshold; int short_retry_limit; /* dot11ShortRetryLimit */ int long_retry_limit; /* dot11LongRetryLimit */ diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c index 9bc209b72d41..5918dd079e12 100644 --- a/net/mac80211/ieee80211_ioctl.c +++ b/net/mac80211/ieee80211_ioctl.c @@ -1180,7 +1180,10 @@ static int ieee80211_ioctl_prism2_param(struct net_device *dev, break; case PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES: - local->cts_protect_erp_frames = value; + if (sdata->type != IEEE80211_IF_TYPE_AP) + ret = -ENOENT; + else + sdata->use_protection = value; break; case PRISM2_PARAM_PREAMBLE: @@ -1303,7 +1306,7 @@ static int ieee80211_ioctl_get_prism2_param(struct net_device *dev, break; case PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES: - *param = local->cts_protect_erp_frames; + *param = sdata->use_protection; break; case PRISM2_PARAM_PREAMBLE: diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c index df6c410de161..ba2bf8f0a347 100644 --- a/net/mac80211/ieee80211_sta.c +++ b/net/mac80211/ieee80211_sta.c @@ -316,12 +316,11 @@ static void ieee80211_sta_wmm_params(struct net_device *dev, static void ieee80211_handle_erp_ie(struct net_device *dev, u8 erp_value) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_sta *ifsta = &sdata->u.sta; int use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0; - if (use_protection != !!ifsta->use_protection) { + if (use_protection != sdata->use_protection) { if (net_ratelimit()) { printk(KERN_DEBUG "%s: CTS protection %s (BSSID=" MAC_FMT ")\n", @@ -329,8 +328,7 @@ static void ieee80211_handle_erp_ie(struct net_device *dev, u8 erp_value) use_protection ? "enabled" : "disabled", MAC_ARG(ifsta->bssid)); } - ifsta->use_protection = use_protection ? 1 : 0; - local->cts_protect_erp_frames = use_protection; + sdata->use_protection = use_protection; } } @@ -390,6 +388,7 @@ static void ieee80211_set_associated(struct net_device *dev, struct ieee80211_if_sta *ifsta, int assoc) { union iwreq_data wrqu; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (ifsta->associated == assoc) return; @@ -417,6 +416,7 @@ static void ieee80211_set_associated(struct net_device *dev, ieee80211_sta_send_associnfo(dev, ifsta); } else { netif_carrier_off(dev); + sdata->use_protection = 0; memset(wrqu.ap_addr.sa_data, 0, ETH_ALEN); } wrqu.ap_addr.sa_family = ARPHRD_ETHER; -- cgit v1.2.3 From d379b01e9087a582d58f4b678208a4f8d8376fe7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 9 Jul 2007 11:56:42 -0700 Subject: dmaengine: make clients responsible for managing channels The current implementation assumes that a channel will only be used by one client at a time. In order to enable channel sharing the dmaengine core is changed to a model where clients subscribe to channel-available-events. Instead of tracking how many channels a client wants and how many it has received the core just broadcasts the available channels and lets the clients optionally take a reference. The core learns about the clients' needs at dma_event_callback time. In support of multiple operation types, clients can specify a capability mask to only be notified of channels that satisfy a certain set of capabilities. Changelog: * removed DMA_TX_ARRAY_INIT, no longer needed * dma_client_chan_free -> dma_chan_release: switch to global reference counting only at device unregistration time, before it was also happening at client unregistration time * clients now return dma_state_client to dmaengine (ack, dup, nak) * checkpatch.pl fixes * fixup merge with git-ioat Cc: Chris Leech Signed-off-by: Shannon Nelson Signed-off-by: Dan Williams Acked-by: David S. Miller --- drivers/dma/dmaengine.c | 217 ++++++++++++++++++++++++---------------------- drivers/dma/ioatdma.c | 1 - drivers/dma/ioatdma.h | 3 - include/linux/dmaengine.h | 58 ++++++++----- net/core/dev.c | 112 ++++++++++++++++-------- 5 files changed, 224 insertions(+), 167 deletions(-) (limited to 'net') diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 404cc7b6e705..82489923af09 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -37,11 +37,11 @@ * Each device has a channels list, which runs unlocked but is never modified * once the device is registered, it's just setup by the driver. * - * Each client has a channels list, it's only modified under the client->lock - * and in an RCU callback, so it's safe to read under rcu_read_lock(). + * Each client is responsible for keeping track of the channels it uses. See + * the definition of dma_event_callback in dmaengine.h. * * Each device has a kref, which is initialized to 1 when the device is - * registered. A kref_put is done for each class_device registered. When the + * registered. A kref_get is done for each class_device registered. When the * class_device is released, the coresponding kref_put is done in the release * method. Every time one of the device's channels is allocated to a client, * a kref_get occurs. When the channel is freed, the coresponding kref_put @@ -51,10 +51,12 @@ * references to finish. * * Each channel has an open-coded implementation of Rusty Russell's "bigref," - * with a kref and a per_cpu local_t. A single reference is set when on an - * ADDED event, and removed with a REMOVE event. Net DMA client takes an - * extra reference per outstanding transaction. The relase function does a - * kref_put on the device. -ChrisL + * with a kref and a per_cpu local_t. A dma_chan_get is called when a client + * signals that it wants to use a channel, and dma_chan_put is called when + * a channel is removed or a client using it is unregesitered. A client can + * take extra references per outstanding transaction, as is the case with + * the NET DMA client. The release function does a kref_put on the device. + * -ChrisL, DanW */ #include @@ -102,8 +104,19 @@ static ssize_t show_bytes_transferred(struct class_device *cd, char *buf) static ssize_t show_in_use(struct class_device *cd, char *buf) { struct dma_chan *chan = container_of(cd, struct dma_chan, class_dev); + int in_use = 0; + + if (unlikely(chan->slow_ref) && + atomic_read(&chan->refcount.refcount) > 1) + in_use = 1; + else { + if (local_read(&(per_cpu_ptr(chan->local, + get_cpu())->refcount)) > 0) + in_use = 1; + put_cpu(); + } - return sprintf(buf, "%d\n", (chan->client ? 1 : 0)); + return sprintf(buf, "%d\n", in_use); } static struct class_device_attribute dma_class_attrs[] = { @@ -129,42 +142,53 @@ static struct class dma_devclass = { /* --- client and device registration --- */ +#define dma_chan_satisfies_mask(chan, mask) \ + __dma_chan_satisfies_mask((chan), &(mask)) +static int +__dma_chan_satisfies_mask(struct dma_chan *chan, dma_cap_mask_t *want) +{ + dma_cap_mask_t has; + + bitmap_and(has.bits, want->bits, chan->device->cap_mask.bits, + DMA_TX_TYPE_END); + return bitmap_equal(want->bits, has.bits, DMA_TX_TYPE_END); +} + /** - * dma_client_chan_alloc - try to allocate a channel to a client + * dma_client_chan_alloc - try to allocate channels to a client * @client: &dma_client * * Called with dma_list_mutex held. */ -static struct dma_chan *dma_client_chan_alloc(struct dma_client *client) +static void dma_client_chan_alloc(struct dma_client *client) { struct dma_device *device; struct dma_chan *chan; - unsigned long flags; int desc; /* allocated descriptor count */ + enum dma_state_client ack; - /* Find a channel, any DMA engine will do */ - list_for_each_entry(device, &dma_device_list, global_node) { + /* Find a channel */ + list_for_each_entry(device, &dma_device_list, global_node) list_for_each_entry(chan, &device->channels, device_node) { - if (chan->client) + if (!dma_chan_satisfies_mask(chan, client->cap_mask)) continue; desc = chan->device->device_alloc_chan_resources(chan); if (desc >= 0) { - kref_get(&device->refcount); - kref_init(&chan->refcount); - chan->slow_ref = 0; - INIT_RCU_HEAD(&chan->rcu); - chan->client = client; - spin_lock_irqsave(&client->lock, flags); - list_add_tail_rcu(&chan->client_node, - &client->channels); - spin_unlock_irqrestore(&client->lock, flags); - return chan; + ack = client->event_callback(client, + chan, + DMA_RESOURCE_AVAILABLE); + + /* we are done once this client rejects + * an available resource + */ + if (ack == DMA_ACK) { + dma_chan_get(chan); + kref_get(&device->refcount); + } else if (ack == DMA_NAK) + return; } } - } - - return NULL; } enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie) @@ -193,7 +217,6 @@ void dma_chan_cleanup(struct kref *kref) { struct dma_chan *chan = container_of(kref, struct dma_chan, refcount); chan->device->device_free_chan_resources(chan); - chan->client = NULL; kref_put(&chan->device->refcount, dma_async_device_cleanup); } EXPORT_SYMBOL(dma_chan_cleanup); @@ -209,7 +232,7 @@ static void dma_chan_free_rcu(struct rcu_head *rcu) kref_put(&chan->refcount, dma_chan_cleanup); } -static void dma_client_chan_free(struct dma_chan *chan) +static void dma_chan_release(struct dma_chan *chan) { atomic_add(0x7FFFFFFF, &chan->refcount.refcount); chan->slow_ref = 1; @@ -217,70 +240,57 @@ static void dma_client_chan_free(struct dma_chan *chan) } /** - * dma_chans_rebalance - reallocate channels to clients - * - * When the number of DMA channel in the system changes, - * channels need to be rebalanced among clients. + * dma_chans_notify_available - broadcast available channels to the clients */ -static void dma_chans_rebalance(void) +static void dma_clients_notify_available(void) { struct dma_client *client; - struct dma_chan *chan; - unsigned long flags; mutex_lock(&dma_list_mutex); - list_for_each_entry(client, &dma_client_list, global_node) { - while (client->chans_desired > client->chan_count) { - chan = dma_client_chan_alloc(client); - if (!chan) - break; - client->chan_count++; - client->event_callback(client, - chan, - DMA_RESOURCE_ADDED); - } - while (client->chans_desired < client->chan_count) { - spin_lock_irqsave(&client->lock, flags); - chan = list_entry(client->channels.next, - struct dma_chan, - client_node); - list_del_rcu(&chan->client_node); - spin_unlock_irqrestore(&client->lock, flags); - client->chan_count--; - client->event_callback(client, - chan, - DMA_RESOURCE_REMOVED); - dma_client_chan_free(chan); - } - } + list_for_each_entry(client, &dma_client_list, global_node) + dma_client_chan_alloc(client); mutex_unlock(&dma_list_mutex); } /** - * dma_async_client_register - allocate and register a &dma_client - * @event_callback: callback for notification of channel addition/removal + * dma_chans_notify_available - tell the clients that a channel is going away + * @chan: channel on its way out */ -struct dma_client *dma_async_client_register(dma_event_callback event_callback) +static void dma_clients_notify_removed(struct dma_chan *chan) { struct dma_client *client; + enum dma_state_client ack; - client = kzalloc(sizeof(*client), GFP_KERNEL); - if (!client) - return NULL; + mutex_lock(&dma_list_mutex); + + list_for_each_entry(client, &dma_client_list, global_node) { + ack = client->event_callback(client, chan, + DMA_RESOURCE_REMOVED); + + /* client was holding resources for this channel so + * free it + */ + if (ack == DMA_ACK) { + dma_chan_put(chan); + kref_put(&chan->device->refcount, + dma_async_device_cleanup); + } + } - INIT_LIST_HEAD(&client->channels); - spin_lock_init(&client->lock); - client->chans_desired = 0; - client->chan_count = 0; - client->event_callback = event_callback; + mutex_unlock(&dma_list_mutex); +} +/** + * dma_async_client_register - register a &dma_client + * @client: ptr to a client structure with valid 'event_callback' and 'cap_mask' + */ +void dma_async_client_register(struct dma_client *client) +{ mutex_lock(&dma_list_mutex); list_add_tail(&client->global_node, &dma_client_list); mutex_unlock(&dma_list_mutex); - - return client; } EXPORT_SYMBOL(dma_async_client_register); @@ -292,40 +302,42 @@ EXPORT_SYMBOL(dma_async_client_register); */ void dma_async_client_unregister(struct dma_client *client) { + struct dma_device *device; struct dma_chan *chan; + enum dma_state_client ack; if (!client) return; - rcu_read_lock(); - list_for_each_entry_rcu(chan, &client->channels, client_node) - dma_client_chan_free(chan); - rcu_read_unlock(); - mutex_lock(&dma_list_mutex); + /* free all channels the client is holding */ + list_for_each_entry(device, &dma_device_list, global_node) + list_for_each_entry(chan, &device->channels, device_node) { + ack = client->event_callback(client, chan, + DMA_RESOURCE_REMOVED); + + if (ack == DMA_ACK) { + dma_chan_put(chan); + kref_put(&chan->device->refcount, + dma_async_device_cleanup); + } + } + list_del(&client->global_node); mutex_unlock(&dma_list_mutex); - - kfree(client); - dma_chans_rebalance(); } EXPORT_SYMBOL(dma_async_client_unregister); /** - * dma_async_client_chan_request - request DMA channels - * @client: &dma_client - * @number: count of DMA channels requested - * - * Clients call dma_async_client_chan_request() to specify how many - * DMA channels they need, 0 to free all currently allocated. - * The resulting allocations/frees are indicated to the client via the - * event callback. + * dma_async_client_chan_request - send all available channels to the + * client that satisfy the capability mask + * @client - requester */ -void dma_async_client_chan_request(struct dma_client *client, - unsigned int number) +void dma_async_client_chan_request(struct dma_client *client) { - client->chans_desired = number; - dma_chans_rebalance(); + mutex_lock(&dma_list_mutex); + dma_client_chan_alloc(client); + mutex_unlock(&dma_list_mutex); } EXPORT_SYMBOL(dma_async_client_chan_request); @@ -386,13 +398,16 @@ int dma_async_device_register(struct dma_device *device) } kref_get(&device->refcount); + kref_init(&chan->refcount); + chan->slow_ref = 0; + INIT_RCU_HEAD(&chan->rcu); } mutex_lock(&dma_list_mutex); list_add_tail(&device->global_node, &dma_device_list); mutex_unlock(&dma_list_mutex); - dma_chans_rebalance(); + dma_clients_notify_available(); return 0; @@ -428,26 +443,16 @@ static void dma_async_device_cleanup(struct kref *kref) void dma_async_device_unregister(struct dma_device *device) { struct dma_chan *chan; - unsigned long flags; mutex_lock(&dma_list_mutex); list_del(&device->global_node); mutex_unlock(&dma_list_mutex); list_for_each_entry(chan, &device->channels, device_node) { - if (chan->client) { - spin_lock_irqsave(&chan->client->lock, flags); - list_del(&chan->client_node); - chan->client->chan_count--; - spin_unlock_irqrestore(&chan->client->lock, flags); - chan->client->event_callback(chan->client, - chan, - DMA_RESOURCE_REMOVED); - dma_client_chan_free(chan); - } + dma_clients_notify_removed(chan); class_device_unregister(&chan->class_dev); + dma_chan_release(chan); } - dma_chans_rebalance(); kref_put(&device->refcount, dma_async_device_cleanup); wait_for_completion(&device->done); diff --git a/drivers/dma/ioatdma.c b/drivers/dma/ioatdma.c index 171044930282..81810b3042f1 100644 --- a/drivers/dma/ioatdma.c +++ b/drivers/dma/ioatdma.c @@ -72,7 +72,6 @@ static int enumerate_dma_channels(struct ioat_device *device) INIT_LIST_HEAD(&ioat_chan->used_desc); /* This should be made common somewhere in dmaengine.c */ ioat_chan->common.device = &device->common; - ioat_chan->common.client = NULL; list_add_tail(&ioat_chan->common.device_node, &device->common.channels); } diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h index d3f69bb15fa0..d3726478031a 100644 --- a/drivers/dma/ioatdma.h +++ b/drivers/dma/ioatdma.h @@ -30,9 +30,6 @@ #define IOAT_LOW_COMPLETION_MASK 0xffffffc0 -extern struct list_head dma_device_list; -extern struct list_head dma_client_list; - /** * struct ioat_device - internal representation of a IOAT device * @pdev: PCI-Express device diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 3de1cf71031a..a3b6035b6c86 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -29,19 +29,31 @@ #include /** - * enum dma_event - resource PNP/power managment events + * enum dma_state - resource PNP/power managment state * @DMA_RESOURCE_SUSPEND: DMA device going into low power state * @DMA_RESOURCE_RESUME: DMA device returning to full power - * @DMA_RESOURCE_ADDED: DMA device added to the system + * @DMA_RESOURCE_AVAILABLE: DMA device available to the system * @DMA_RESOURCE_REMOVED: DMA device removed from the system */ -enum dma_event { +enum dma_state { DMA_RESOURCE_SUSPEND, DMA_RESOURCE_RESUME, - DMA_RESOURCE_ADDED, + DMA_RESOURCE_AVAILABLE, DMA_RESOURCE_REMOVED, }; +/** + * enum dma_state_client - state of the channel in the client + * @DMA_ACK: client would like to use, or was using this channel + * @DMA_DUP: client has already seen this channel, or is not using this channel + * @DMA_NAK: client does not want to see any more channels + */ +enum dma_state_client { + DMA_ACK, + DMA_DUP, + DMA_NAK, +}; + /** * typedef dma_cookie_t - an opaque DMA cookie * @@ -104,7 +116,6 @@ struct dma_chan_percpu { /** * struct dma_chan - devices supply DMA channels, clients use them - * @client: ptr to the client user of this chan, will be %NULL when unused * @device: ptr to the dma device who supplies this channel, always !%NULL * @cookie: last cookie value returned to client * @chan_id: channel ID for sysfs @@ -112,12 +123,10 @@ struct dma_chan_percpu { * @refcount: kref, used in "bigref" slow-mode * @slow_ref: indicates that the DMA channel is free * @rcu: the DMA channel's RCU head - * @client_node: used to add this to the client chan list * @device_node: used to add this to the device chan list * @local: per-cpu pointer to a struct dma_chan_percpu */ struct dma_chan { - struct dma_client *client; struct dma_device *device; dma_cookie_t cookie; @@ -129,11 +138,11 @@ struct dma_chan { int slow_ref; struct rcu_head rcu; - struct list_head client_node; struct list_head device_node; struct dma_chan_percpu *local; }; + void dma_chan_cleanup(struct kref *kref); static inline void dma_chan_get(struct dma_chan *chan) @@ -158,26 +167,31 @@ static inline void dma_chan_put(struct dma_chan *chan) /* * typedef dma_event_callback - function pointer to a DMA event callback + * For each channel added to the system this routine is called for each client. + * If the client would like to use the channel it returns '1' to signal (ack) + * the dmaengine core to take out a reference on the channel and its + * corresponding device. A client must not 'ack' an available channel more + * than once. When a channel is removed all clients are notified. If a client + * is using the channel it must 'ack' the removal. A client must not 'ack' a + * removed channel more than once. + * @client - 'this' pointer for the client context + * @chan - channel to be acted upon + * @state - available or removed */ -typedef void (*dma_event_callback) (struct dma_client *client, - struct dma_chan *chan, enum dma_event event); +struct dma_client; +typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client, + struct dma_chan *chan, enum dma_state state); /** * struct dma_client - info on the entity making use of DMA services * @event_callback: func ptr to call when something happens - * @chan_count: number of chans allocated - * @chans_desired: number of chans requested. Can be +/- chan_count - * @lock: protects access to the channels list - * @channels: the list of DMA channels allocated + * @cap_mask: only return channels that satisfy the requested capabilities + * a value of zero corresponds to any capability * @global_node: list_head for global dma_client_list */ struct dma_client { dma_event_callback event_callback; - unsigned int chan_count; - unsigned int chans_desired; - - spinlock_t lock; - struct list_head channels; + dma_cap_mask_t cap_mask; struct list_head global_node; }; @@ -285,10 +299,9 @@ struct dma_device { /* --- public DMA engine API --- */ -struct dma_client *dma_async_client_register(dma_event_callback event_callback); +void dma_async_client_register(struct dma_client *client); void dma_async_client_unregister(struct dma_client *client); -void dma_async_client_chan_request(struct dma_client *client, - unsigned int number); +void dma_async_client_chan_request(struct dma_client *client); dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, void *src, size_t len); dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan, @@ -299,7 +312,6 @@ dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan, void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx, struct dma_chan *chan); - static inline void async_tx_ack(struct dma_async_tx_descriptor *tx) { diff --git a/net/core/dev.c b/net/core/dev.c index ee051bb398a0..835202fb34c4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -151,9 +151,22 @@ static struct list_head ptype_base[16] __read_mostly; /* 16 way hashed list */ static struct list_head ptype_all __read_mostly; /* Taps */ #ifdef CONFIG_NET_DMA -static struct dma_client *net_dma_client; -static unsigned int net_dma_count; -static spinlock_t net_dma_event_lock; +struct net_dma { + struct dma_client client; + spinlock_t lock; + cpumask_t channel_mask; + struct dma_chan *channels[NR_CPUS]; +}; + +static enum dma_state_client +netdev_dma_event(struct dma_client *client, struct dma_chan *chan, + enum dma_state state); + +static struct net_dma net_dma = { + .client = { + .event_callback = netdev_dma_event, + }, +}; #endif /* @@ -2015,12 +2028,13 @@ out: * There may not be any more sk_buffs coming right now, so push * any pending DMA copies to hardware */ - if (net_dma_client) { - struct dma_chan *chan; - rcu_read_lock(); - list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node) - dma_async_memcpy_issue_pending(chan); - rcu_read_unlock(); + if (!cpus_empty(net_dma.channel_mask)) { + int chan_idx; + for_each_cpu_mask(chan_idx, net_dma.channel_mask) { + struct dma_chan *chan = net_dma.channels[chan_idx]; + if (chan) + dma_async_memcpy_issue_pending(chan); + } } #endif return; @@ -3563,12 +3577,13 @@ static int dev_cpu_callback(struct notifier_block *nfb, * This is called when the number of channels allocated to the net_dma_client * changes. The net_dma_client tries to have one DMA channel per CPU. */ -static void net_dma_rebalance(void) + +static void net_dma_rebalance(struct net_dma *net_dma) { - unsigned int cpu, i, n; + unsigned int cpu, i, n, chan_idx; struct dma_chan *chan; - if (net_dma_count == 0) { + if (cpus_empty(net_dma->channel_mask)) { for_each_online_cpu(cpu) rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); return; @@ -3577,10 +3592,12 @@ static void net_dma_rebalance(void) i = 0; cpu = first_cpu(cpu_online_map); - rcu_read_lock(); - list_for_each_entry(chan, &net_dma_client->channels, client_node) { - n = ((num_online_cpus() / net_dma_count) - + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); + for_each_cpu_mask(chan_idx, net_dma->channel_mask) { + chan = net_dma->channels[chan_idx]; + + n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask)) + + (i < (num_online_cpus() % + cpus_weight(net_dma->channel_mask)) ? 1 : 0)); while(n) { per_cpu(softnet_data, cpu).net_dma = chan; @@ -3589,7 +3606,6 @@ static void net_dma_rebalance(void) } i++; } - rcu_read_unlock(); } /** @@ -3598,23 +3614,53 @@ static void net_dma_rebalance(void) * @chan: DMA channel for the event * @event: event type */ -static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, - enum dma_event event) -{ - spin_lock(&net_dma_event_lock); - switch (event) { - case DMA_RESOURCE_ADDED: - net_dma_count++; - net_dma_rebalance(); +static enum dma_state_client +netdev_dma_event(struct dma_client *client, struct dma_chan *chan, + enum dma_state state) +{ + int i, found = 0, pos = -1; + struct net_dma *net_dma = + container_of(client, struct net_dma, client); + enum dma_state_client ack = DMA_DUP; /* default: take no action */ + + spin_lock(&net_dma->lock); + switch (state) { + case DMA_RESOURCE_AVAILABLE: + for (i = 0; i < NR_CPUS; i++) + if (net_dma->channels[i] == chan) { + found = 1; + break; + } else if (net_dma->channels[i] == NULL && pos < 0) + pos = i; + + if (!found && pos >= 0) { + ack = DMA_ACK; + net_dma->channels[pos] = chan; + cpu_set(pos, net_dma->channel_mask); + net_dma_rebalance(net_dma); + } break; case DMA_RESOURCE_REMOVED: - net_dma_count--; - net_dma_rebalance(); + for (i = 0; i < NR_CPUS; i++) + if (net_dma->channels[i] == chan) { + found = 1; + pos = i; + break; + } + + if (found) { + ack = DMA_ACK; + cpu_clear(pos, net_dma->channel_mask); + net_dma->channels[i] = NULL; + net_dma_rebalance(net_dma); + } break; default: break; } - spin_unlock(&net_dma_event_lock); + spin_unlock(&net_dma->lock); + + return ack; } /** @@ -3622,12 +3668,10 @@ static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, */ static int __init netdev_dma_register(void) { - spin_lock_init(&net_dma_event_lock); - net_dma_client = dma_async_client_register(netdev_dma_event); - if (net_dma_client == NULL) - return -ENOMEM; - - dma_async_client_chan_request(net_dma_client, num_online_cpus()); + spin_lock_init(&net_dma.lock); + dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask); + dma_async_client_register(&net_dma.client); + dma_async_client_chan_request(&net_dma.client); return 0; } -- cgit v1.2.3 From bd238fb431f31989898423c8b6496bc8c4204a86 Mon Sep 17 00:00:00 2001 From: Latchesar Ionkov Date: Tue, 10 Jul 2007 17:57:28 -0500 Subject: 9p: Reorganization of 9p file system code This patchset moves non-filesystem interfaces of v9fs from fs/9p to net/9p. It moves the transport, packet marshalling and connection layers to net/9p leaving only the VFS related files in fs/9p. This work is being done in preparation for in-kernel 9p servers as well as alternate 9p clients (other than VFS). Signed-off-by: Latchesar Ionkov Signed-off-by: Eric Van Hensbergen --- fs/9p/9p.h | 375 ---------------- fs/9p/Makefile | 6 - fs/9p/conv.c | 845 ----------------------------------- fs/9p/conv.h | 50 --- fs/9p/debug.h | 77 ---- fs/9p/error.c | 93 ---- fs/9p/error.h | 177 -------- fs/9p/fcall.c | 427 ------------------ fs/9p/fcprint.c | 345 --------------- fs/9p/fid.c | 168 +++---- fs/9p/fid.h | 43 +- fs/9p/mux.c | 1033 ------------------------------------------- fs/9p/mux.h | 55 --- fs/9p/trans_fd.c | 308 ------------- fs/9p/transport.h | 45 -- fs/9p/v9fs.c | 288 +++--------- fs/9p/v9fs.h | 32 +- fs/9p/v9fs_vfs.h | 6 +- fs/9p/vfs_addr.c | 57 +-- fs/9p/vfs_dentry.c | 37 +- fs/9p/vfs_dir.c | 155 ++----- fs/9p/vfs_file.c | 160 ++----- fs/9p/vfs_inode.c | 753 ++++++++++++------------------- fs/9p/vfs_super.c | 91 ++-- fs/Kconfig | 2 +- include/net/9p/9p.h | 417 ++++++++++++++++++ include/net/9p/client.h | 80 ++++ include/net/9p/conn.h | 57 +++ include/net/9p/transport.h | 49 +++ net/9p/Kconfig | 21 + net/9p/Makefile | 13 + net/9p/client.c | 965 ++++++++++++++++++++++++++++++++++++++++ net/9p/conv.c | 903 +++++++++++++++++++++++++++++++++++++ net/9p/error.c | 240 ++++++++++ net/9p/fcprint.c | 358 +++++++++++++++ net/9p/mod.c | 85 ++++ net/9p/mux.c | 1050 ++++++++++++++++++++++++++++++++++++++++++++ net/9p/sysctl.c | 86 ++++ net/9p/trans_fd.c | 363 +++++++++++++++ net/9p/util.c | 125 ++++++ net/Kconfig | 1 + net/Makefile | 1 + 42 files changed, 5367 insertions(+), 5075 deletions(-) delete mode 100644 fs/9p/9p.h delete mode 100644 fs/9p/conv.c delete mode 100644 fs/9p/conv.h delete mode 100644 fs/9p/debug.h delete mode 100644 fs/9p/error.c delete mode 100644 fs/9p/error.h delete mode 100644 fs/9p/fcall.c delete mode 100644 fs/9p/fcprint.c delete mode 100644 fs/9p/mux.c delete mode 100644 fs/9p/mux.h delete mode 100644 fs/9p/trans_fd.c delete mode 100644 fs/9p/transport.h create mode 100644 include/net/9p/9p.h create mode 100644 include/net/9p/client.h create mode 100644 include/net/9p/conn.h create mode 100644 include/net/9p/transport.h create mode 100644 net/9p/Kconfig create mode 100644 net/9p/Makefile create mode 100644 net/9p/client.c create mode 100644 net/9p/conv.c create mode 100644 net/9p/error.c create mode 100644 net/9p/fcprint.c create mode 100644 net/9p/mod.c create mode 100644 net/9p/mux.c create mode 100644 net/9p/sysctl.c create mode 100644 net/9p/trans_fd.c create mode 100644 net/9p/util.c (limited to 'net') diff --git a/fs/9p/9p.h b/fs/9p/9p.h deleted file mode 100644 index 94e2f92ab2e8..000000000000 --- a/fs/9p/9p.h +++ /dev/null @@ -1,375 +0,0 @@ -/* - * linux/fs/9p/9p.h - * - * 9P protocol definitions. - * - * Copyright (C) 2005 by Latchesar Ionkov - * Copyright (C) 2004 by Eric Van Hensbergen - * Copyright (C) 2002 by Ron Minnich - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -/* Message Types */ -enum { - TVERSION = 100, - RVERSION, - TAUTH = 102, - RAUTH, - TATTACH = 104, - RATTACH, - TERROR = 106, - RERROR, - TFLUSH = 108, - RFLUSH, - TWALK = 110, - RWALK, - TOPEN = 112, - ROPEN, - TCREATE = 114, - RCREATE, - TREAD = 116, - RREAD, - TWRITE = 118, - RWRITE, - TCLUNK = 120, - RCLUNK, - TREMOVE = 122, - RREMOVE, - TSTAT = 124, - RSTAT, - TWSTAT = 126, - RWSTAT, -}; - -/* modes */ -enum { - V9FS_OREAD = 0x00, - V9FS_OWRITE = 0x01, - V9FS_ORDWR = 0x02, - V9FS_OEXEC = 0x03, - V9FS_OEXCL = 0x04, - V9FS_OTRUNC = 0x10, - V9FS_OREXEC = 0x20, - V9FS_ORCLOSE = 0x40, - V9FS_OAPPEND = 0x80, -}; - -/* permissions */ -enum { - V9FS_DMDIR = 0x80000000, - V9FS_DMAPPEND = 0x40000000, - V9FS_DMEXCL = 0x20000000, - V9FS_DMMOUNT = 0x10000000, - V9FS_DMAUTH = 0x08000000, - V9FS_DMTMP = 0x04000000, - V9FS_DMSYMLINK = 0x02000000, - V9FS_DMLINK = 0x01000000, - /* 9P2000.u extensions */ - V9FS_DMDEVICE = 0x00800000, - V9FS_DMNAMEDPIPE = 0x00200000, - V9FS_DMSOCKET = 0x00100000, - V9FS_DMSETUID = 0x00080000, - V9FS_DMSETGID = 0x00040000, -}; - -/* qid.types */ -enum { - V9FS_QTDIR = 0x80, - V9FS_QTAPPEND = 0x40, - V9FS_QTEXCL = 0x20, - V9FS_QTMOUNT = 0x10, - V9FS_QTAUTH = 0x08, - V9FS_QTTMP = 0x04, - V9FS_QTSYMLINK = 0x02, - V9FS_QTLINK = 0x01, - V9FS_QTFILE = 0x00, -}; - -#define V9FS_NOTAG (u16)(~0) -#define V9FS_NOFID (u32)(~0) -#define V9FS_MAXWELEM 16 - -/* ample room for Twrite/Rread header (iounit) */ -#define V9FS_IOHDRSZ 24 - -struct v9fs_str { - u16 len; - char *str; -}; - -/* qids are the unique ID for a file (like an inode */ -struct v9fs_qid { - u8 type; - u32 version; - u64 path; -}; - -/* Plan 9 file metadata (stat) structure */ -struct v9fs_stat { - u16 size; - u16 type; - u32 dev; - struct v9fs_qid qid; - u32 mode; - u32 atime; - u32 mtime; - u64 length; - struct v9fs_str name; - struct v9fs_str uid; - struct v9fs_str gid; - struct v9fs_str muid; - struct v9fs_str extension; /* 9p2000.u extensions */ - u32 n_uid; /* 9p2000.u extensions */ - u32 n_gid; /* 9p2000.u extensions */ - u32 n_muid; /* 9p2000.u extensions */ -}; - -/* file metadata (stat) structure used to create Twstat message - The is similar to v9fs_stat, but the strings don't point to - the same memory block and should be freed separately -*/ -struct v9fs_wstat { - u16 size; - u16 type; - u32 dev; - struct v9fs_qid qid; - u32 mode; - u32 atime; - u32 mtime; - u64 length; - char *name; - char *uid; - char *gid; - char *muid; - char *extension; /* 9p2000.u extensions */ - u32 n_uid; /* 9p2000.u extensions */ - u32 n_gid; /* 9p2000.u extensions */ - u32 n_muid; /* 9p2000.u extensions */ -}; - -/* Structures for Protocol Operations */ - -struct Tversion { - u32 msize; - struct v9fs_str version; -}; - -struct Rversion { - u32 msize; - struct v9fs_str version; -}; - -struct Tauth { - u32 afid; - struct v9fs_str uname; - struct v9fs_str aname; -}; - -struct Rauth { - struct v9fs_qid qid; -}; - -struct Rerror { - struct v9fs_str error; - u32 errno; /* 9p2000.u extension */ -}; - -struct Tflush { - u16 oldtag; -}; - -struct Rflush { -}; - -struct Tattach { - u32 fid; - u32 afid; - struct v9fs_str uname; - struct v9fs_str aname; -}; - -struct Rattach { - struct v9fs_qid qid; -}; - -struct Twalk { - u32 fid; - u32 newfid; - u16 nwname; - struct v9fs_str wnames[16]; -}; - -struct Rwalk { - u16 nwqid; - struct v9fs_qid wqids[16]; -}; - -struct Topen { - u32 fid; - u8 mode; -}; - -struct Ropen { - struct v9fs_qid qid; - u32 iounit; -}; - -struct Tcreate { - u32 fid; - struct v9fs_str name; - u32 perm; - u8 mode; - struct v9fs_str extension; -}; - -struct Rcreate { - struct v9fs_qid qid; - u32 iounit; -}; - -struct Tread { - u32 fid; - u64 offset; - u32 count; -}; - -struct Rread { - u32 count; - u8 *data; -}; - -struct Twrite { - u32 fid; - u64 offset; - u32 count; - u8 *data; -}; - -struct Rwrite { - u32 count; -}; - -struct Tclunk { - u32 fid; -}; - -struct Rclunk { -}; - -struct Tremove { - u32 fid; -}; - -struct Rremove { -}; - -struct Tstat { - u32 fid; -}; - -struct Rstat { - struct v9fs_stat stat; -}; - -struct Twstat { - u32 fid; - struct v9fs_stat stat; -}; - -struct Rwstat { -}; - -/* - * fcall is the primary packet structure - * - */ - -struct v9fs_fcall { - u32 size; - u8 id; - u16 tag; - void *sdata; - - union { - struct Tversion tversion; - struct Rversion rversion; - struct Tauth tauth; - struct Rauth rauth; - struct Rerror rerror; - struct Tflush tflush; - struct Rflush rflush; - struct Tattach tattach; - struct Rattach rattach; - struct Twalk twalk; - struct Rwalk rwalk; - struct Topen topen; - struct Ropen ropen; - struct Tcreate tcreate; - struct Rcreate rcreate; - struct Tread tread; - struct Rread rread; - struct Twrite twrite; - struct Rwrite rwrite; - struct Tclunk tclunk; - struct Rclunk rclunk; - struct Tremove tremove; - struct Rremove rremove; - struct Tstat tstat; - struct Rstat rstat; - struct Twstat twstat; - struct Rwstat rwstat; - } params; -}; - -#define PRINT_FCALL_ERROR(s, fcall) dprintk(DEBUG_ERROR, "%s: %.*s\n", s, \ - fcall?fcall->params.rerror.error.len:0, \ - fcall?fcall->params.rerror.error.str:""); - -int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize, - char *version, struct v9fs_fcall **rcall); - -int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname, - u32 fid, u32 afid, struct v9fs_fcall **rcall); - -int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid); - -int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, - struct v9fs_fcall **rcall); - -int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid, - struct v9fs_wstat *wstat, struct v9fs_fcall **rcall); - -int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid, - char *name, struct v9fs_fcall **rcall); - -int v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode, - struct v9fs_fcall **rcall); - -int v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid, - struct v9fs_fcall **rcall); - -int v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, - u32 perm, u8 mode, char *extension, struct v9fs_fcall **rcall); - -int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, - u64 offset, u32 count, struct v9fs_fcall **rcall); - -int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, - u32 count, const char __user * data, - struct v9fs_fcall **rcall); -int v9fs_printfcall(char *, int, struct v9fs_fcall *, int); diff --git a/fs/9p/Makefile b/fs/9p/Makefile index 87897f84dfb6..bc7f0d1551e6 100644 --- a/fs/9p/Makefile +++ b/fs/9p/Makefile @@ -1,18 +1,12 @@ obj-$(CONFIG_9P_FS) := 9p.o 9p-objs := \ - trans_fd.o \ - mux.o \ - fcall.o \ - conv.o \ vfs_super.o \ vfs_inode.o \ vfs_addr.o \ vfs_file.o \ vfs_dir.o \ vfs_dentry.o \ - error.o \ v9fs.o \ fid.o \ - fcprint.o diff --git a/fs/9p/conv.c b/fs/9p/conv.c deleted file mode 100644 index a3ed571eee31..000000000000 --- a/fs/9p/conv.c +++ /dev/null @@ -1,845 +0,0 @@ -/* - * linux/fs/9p/conv.c - * - * 9P protocol conversion functions - * - * Copyright (C) 2004, 2005 by Latchesar Ionkov - * Copyright (C) 2004 by Eric Van Hensbergen - * Copyright (C) 2002 by Ron Minnich - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -#include -#include -#include -#include -#include -#include -#include "debug.h" -#include "v9fs.h" -#include "9p.h" -#include "conv.h" - -/* - * Buffer to help with string parsing - */ -struct cbuf { - unsigned char *sp; - unsigned char *p; - unsigned char *ep; -}; - -static inline void buf_init(struct cbuf *buf, void *data, int datalen) -{ - buf->sp = buf->p = data; - buf->ep = data + datalen; -} - -static inline int buf_check_overflow(struct cbuf *buf) -{ - return buf->p > buf->ep; -} - -static int buf_check_size(struct cbuf *buf, int len) -{ - if (buf->p + len > buf->ep) { - if (buf->p < buf->ep) { - eprintk(KERN_ERR, "buffer overflow: want %d has %d\n", - len, (int)(buf->ep - buf->p)); - dump_stack(); - buf->p = buf->ep + 1; - } - - return 0; - } - - return 1; -} - -static void *buf_alloc(struct cbuf *buf, int len) -{ - void *ret = NULL; - - if (buf_check_size(buf, len)) { - ret = buf->p; - buf->p += len; - } - - return ret; -} - -static void buf_put_int8(struct cbuf *buf, u8 val) -{ - if (buf_check_size(buf, 1)) { - buf->p[0] = val; - buf->p++; - } -} - -static void buf_put_int16(struct cbuf *buf, u16 val) -{ - if (buf_check_size(buf, 2)) { - *(__le16 *) buf->p = cpu_to_le16(val); - buf->p += 2; - } -} - -static void buf_put_int32(struct cbuf *buf, u32 val) -{ - if (buf_check_size(buf, 4)) { - *(__le32 *)buf->p = cpu_to_le32(val); - buf->p += 4; - } -} - -static void buf_put_int64(struct cbuf *buf, u64 val) -{ - if (buf_check_size(buf, 8)) { - *(__le64 *)buf->p = cpu_to_le64(val); - buf->p += 8; - } -} - -static char *buf_put_stringn(struct cbuf *buf, const char *s, u16 slen) -{ - char *ret; - - ret = NULL; - if (buf_check_size(buf, slen + 2)) { - buf_put_int16(buf, slen); - ret = buf->p; - memcpy(buf->p, s, slen); - buf->p += slen; - } - - return ret; -} - -static inline void buf_put_string(struct cbuf *buf, const char *s) -{ - buf_put_stringn(buf, s, strlen(s)); -} - -static u8 buf_get_int8(struct cbuf *buf) -{ - u8 ret = 0; - - if (buf_check_size(buf, 1)) { - ret = buf->p[0]; - buf->p++; - } - - return ret; -} - -static u16 buf_get_int16(struct cbuf *buf) -{ - u16 ret = 0; - - if (buf_check_size(buf, 2)) { - ret = le16_to_cpu(*(__le16 *)buf->p); - buf->p += 2; - } - - return ret; -} - -static u32 buf_get_int32(struct cbuf *buf) -{ - u32 ret = 0; - - if (buf_check_size(buf, 4)) { - ret = le32_to_cpu(*(__le32 *)buf->p); - buf->p += 4; - } - - return ret; -} - -static u64 buf_get_int64(struct cbuf *buf) -{ - u64 ret = 0; - - if (buf_check_size(buf, 8)) { - ret = le64_to_cpu(*(__le64 *)buf->p); - buf->p += 8; - } - - return ret; -} - -static void buf_get_str(struct cbuf *buf, struct v9fs_str *vstr) -{ - vstr->len = buf_get_int16(buf); - if (!buf_check_overflow(buf) && buf_check_size(buf, vstr->len)) { - vstr->str = buf->p; - buf->p += vstr->len; - } else { - vstr->len = 0; - vstr->str = NULL; - } -} - -static void buf_get_qid(struct cbuf *bufp, struct v9fs_qid *qid) -{ - qid->type = buf_get_int8(bufp); - qid->version = buf_get_int32(bufp); - qid->path = buf_get_int64(bufp); -} - -/** - * v9fs_size_wstat - calculate the size of a variable length stat struct - * @stat: metadata (stat) structure - * @extended: non-zero if 9P2000.u - * - */ - -static int v9fs_size_wstat(struct v9fs_wstat *wstat, int extended) -{ - int size = 0; - - if (wstat == NULL) { - eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n"); - return 0; - } - - size = /* 2 + *//* size[2] */ - 2 + /* type[2] */ - 4 + /* dev[4] */ - 1 + /* qid.type[1] */ - 4 + /* qid.vers[4] */ - 8 + /* qid.path[8] */ - 4 + /* mode[4] */ - 4 + /* atime[4] */ - 4 + /* mtime[4] */ - 8 + /* length[8] */ - 8; /* minimum sum of string lengths */ - - if (wstat->name) - size += strlen(wstat->name); - if (wstat->uid) - size += strlen(wstat->uid); - if (wstat->gid) - size += strlen(wstat->gid); - if (wstat->muid) - size += strlen(wstat->muid); - - if (extended) { - size += 4 + /* n_uid[4] */ - 4 + /* n_gid[4] */ - 4 + /* n_muid[4] */ - 2; /* string length of extension[4] */ - if (wstat->extension) - size += strlen(wstat->extension); - } - - return size; -} - -/** - * buf_get_stat - safely decode a recieved metadata (stat) structure - * @bufp: buffer to deserialize - * @stat: metadata (stat) structure - * @extended: non-zero if 9P2000.u - * - */ - -static void -buf_get_stat(struct cbuf *bufp, struct v9fs_stat *stat, int extended) -{ - stat->size = buf_get_int16(bufp); - stat->type = buf_get_int16(bufp); - stat->dev = buf_get_int32(bufp); - stat->qid.type = buf_get_int8(bufp); - stat->qid.version = buf_get_int32(bufp); - stat->qid.path = buf_get_int64(bufp); - stat->mode = buf_get_int32(bufp); - stat->atime = buf_get_int32(bufp); - stat->mtime = buf_get_int32(bufp); - stat->length = buf_get_int64(bufp); - buf_get_str(bufp, &stat->name); - buf_get_str(bufp, &stat->uid); - buf_get_str(bufp, &stat->gid); - buf_get_str(bufp, &stat->muid); - - if (extended) { - buf_get_str(bufp, &stat->extension); - stat->n_uid = buf_get_int32(bufp); - stat->n_gid = buf_get_int32(bufp); - stat->n_muid = buf_get_int32(bufp); - } -} - -/** - * v9fs_deserialize_stat - decode a received metadata structure - * @buf: buffer to deserialize - * @buflen: length of received buffer - * @stat: metadata structure to decode into - * @extended: non-zero if 9P2000.u - * - * Note: stat will point to the buf region. - */ - -int -v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat, - int extended) -{ - struct cbuf buffer; - struct cbuf *bufp = &buffer; - unsigned char *p; - - buf_init(bufp, buf, buflen); - p = bufp->p; - buf_get_stat(bufp, stat, extended); - - if (buf_check_overflow(bufp)) - return 0; - else - return bufp->p - p; -} - -/** - * deserialize_fcall - unmarshal a response - * @buf: recieved buffer - * @buflen: length of received buffer - * @rcall: fcall structure to populate - * @rcalllen: length of fcall structure to populate - * @extended: non-zero if 9P2000.u - * - */ - -int -v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall, - int extended) -{ - - struct cbuf buffer; - struct cbuf *bufp = &buffer; - int i = 0; - - buf_init(bufp, buf, buflen); - - rcall->size = buf_get_int32(bufp); - rcall->id = buf_get_int8(bufp); - rcall->tag = buf_get_int16(bufp); - - dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id, - rcall->tag); - - switch (rcall->id) { - default: - eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id); - return -EPROTO; - case RVERSION: - rcall->params.rversion.msize = buf_get_int32(bufp); - buf_get_str(bufp, &rcall->params.rversion.version); - break; - case RFLUSH: - break; - case RATTACH: - rcall->params.rattach.qid.type = buf_get_int8(bufp); - rcall->params.rattach.qid.version = buf_get_int32(bufp); - rcall->params.rattach.qid.path = buf_get_int64(bufp); - break; - case RWALK: - rcall->params.rwalk.nwqid = buf_get_int16(bufp); - if (rcall->params.rwalk.nwqid > V9FS_MAXWELEM) { - eprintk(KERN_ERR, "Rwalk with more than %d qids: %d\n", - V9FS_MAXWELEM, rcall->params.rwalk.nwqid); - return -EPROTO; - } - - for (i = 0; i < rcall->params.rwalk.nwqid; i++) - buf_get_qid(bufp, &rcall->params.rwalk.wqids[i]); - break; - case ROPEN: - buf_get_qid(bufp, &rcall->params.ropen.qid); - rcall->params.ropen.iounit = buf_get_int32(bufp); - break; - case RCREATE: - buf_get_qid(bufp, &rcall->params.rcreate.qid); - rcall->params.rcreate.iounit = buf_get_int32(bufp); - break; - case RREAD: - rcall->params.rread.count = buf_get_int32(bufp); - rcall->params.rread.data = bufp->p; - buf_check_size(bufp, rcall->params.rread.count); - break; - case RWRITE: - rcall->params.rwrite.count = buf_get_int32(bufp); - break; - case RCLUNK: - break; - case RREMOVE: - break; - case RSTAT: - buf_get_int16(bufp); - buf_get_stat(bufp, &rcall->params.rstat.stat, extended); - break; - case RWSTAT: - break; - case RERROR: - buf_get_str(bufp, &rcall->params.rerror.error); - if (extended) - rcall->params.rerror.errno = buf_get_int16(bufp); - break; - } - - if (buf_check_overflow(bufp)) { - dprintk(DEBUG_ERROR, "buffer overflow\n"); - return -EIO; - } - - return bufp->p - bufp->sp; -} - -static inline void v9fs_put_int8(struct cbuf *bufp, u8 val, u8 * p) -{ - *p = val; - buf_put_int8(bufp, val); -} - -static inline void v9fs_put_int16(struct cbuf *bufp, u16 val, u16 * p) -{ - *p = val; - buf_put_int16(bufp, val); -} - -static inline void v9fs_put_int32(struct cbuf *bufp, u32 val, u32 * p) -{ - *p = val; - buf_put_int32(bufp, val); -} - -static inline void v9fs_put_int64(struct cbuf *bufp, u64 val, u64 * p) -{ - *p = val; - buf_put_int64(bufp, val); -} - -static void -v9fs_put_str(struct cbuf *bufp, char *data, struct v9fs_str *str) -{ - int len; - char *s; - - if (data) - len = strlen(data); - else - len = 0; - - s = buf_put_stringn(bufp, data, len); - if (str) { - str->len = len; - str->str = s; - } -} - -static int -v9fs_put_user_data(struct cbuf *bufp, const char __user * data, int count, - unsigned char **pdata) -{ - *pdata = buf_alloc(bufp, count); - return copy_from_user(*pdata, data, count); -} - -static void -v9fs_put_wstat(struct cbuf *bufp, struct v9fs_wstat *wstat, - struct v9fs_stat *stat, int statsz, int extended) -{ - v9fs_put_int16(bufp, statsz, &stat->size); - v9fs_put_int16(bufp, wstat->type, &stat->type); - v9fs_put_int32(bufp, wstat->dev, &stat->dev); - v9fs_put_int8(bufp, wstat->qid.type, &stat->qid.type); - v9fs_put_int32(bufp, wstat->qid.version, &stat->qid.version); - v9fs_put_int64(bufp, wstat->qid.path, &stat->qid.path); - v9fs_put_int32(bufp, wstat->mode, &stat->mode); - v9fs_put_int32(bufp, wstat->atime, &stat->atime); - v9fs_put_int32(bufp, wstat->mtime, &stat->mtime); - v9fs_put_int64(bufp, wstat->length, &stat->length); - - v9fs_put_str(bufp, wstat->name, &stat->name); - v9fs_put_str(bufp, wstat->uid, &stat->uid); - v9fs_put_str(bufp, wstat->gid, &stat->gid); - v9fs_put_str(bufp, wstat->muid, &stat->muid); - - if (extended) { - v9fs_put_str(bufp, wstat->extension, &stat->extension); - v9fs_put_int32(bufp, wstat->n_uid, &stat->n_uid); - v9fs_put_int32(bufp, wstat->n_gid, &stat->n_gid); - v9fs_put_int32(bufp, wstat->n_muid, &stat->n_muid); - } -} - -static struct v9fs_fcall * -v9fs_create_common(struct cbuf *bufp, u32 size, u8 id) -{ - struct v9fs_fcall *fc; - - size += 4 + 1 + 2; /* size[4] id[1] tag[2] */ - fc = kmalloc(sizeof(struct v9fs_fcall) + size, GFP_KERNEL); - if (!fc) - return ERR_PTR(-ENOMEM); - - fc->sdata = (char *)fc + sizeof(*fc); - - buf_init(bufp, (char *)fc->sdata, size); - v9fs_put_int32(bufp, size, &fc->size); - v9fs_put_int8(bufp, id, &fc->id); - v9fs_put_int16(bufp, V9FS_NOTAG, &fc->tag); - - return fc; -} - -void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag) -{ - fc->tag = tag; - *(__le16 *) (fc->sdata + 5) = cpu_to_le16(tag); -} - -struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version) -{ - int size; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 2 + strlen(version); /* msize[4] version[s] */ - fc = v9fs_create_common(bufp, size, TVERSION); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, msize, &fc->params.tversion.msize); - v9fs_put_str(bufp, version, &fc->params.tversion.version); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} - -#if 0 -struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname) -{ - int size; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 2 + strlen(uname) + 2 + strlen(aname); /* afid[4] uname[s] aname[s] */ - fc = v9fs_create_common(bufp, size, TAUTH); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, afid, &fc->params.tauth.afid); - v9fs_put_str(bufp, uname, &fc->params.tauth.uname); - v9fs_put_str(bufp, aname, &fc->params.tauth.aname); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} -#endif /* 0 */ - -struct v9fs_fcall * -v9fs_create_tattach(u32 fid, u32 afid, char *uname, char *aname) -{ - int size; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 4 + 2 + strlen(uname) + 2 + strlen(aname); /* fid[4] afid[4] uname[s] aname[s] */ - fc = v9fs_create_common(bufp, size, TATTACH); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, fid, &fc->params.tattach.fid); - v9fs_put_int32(bufp, afid, &fc->params.tattach.afid); - v9fs_put_str(bufp, uname, &fc->params.tattach.uname); - v9fs_put_str(bufp, aname, &fc->params.tattach.aname); - - error: - return fc; -} - -struct v9fs_fcall *v9fs_create_tflush(u16 oldtag) -{ - int size; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 2; /* oldtag[2] */ - fc = v9fs_create_common(bufp, size, TFLUSH); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int16(bufp, oldtag, &fc->params.tflush.oldtag); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} - -struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname, - char **wnames) -{ - int i, size; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - if (nwname > V9FS_MAXWELEM) { - dprintk(DEBUG_ERROR, "nwname > %d\n", V9FS_MAXWELEM); - return NULL; - } - - size = 4 + 4 + 2; /* fid[4] newfid[4] nwname[2] ... */ - for (i = 0; i < nwname; i++) { - size += 2 + strlen(wnames[i]); /* wname[s] */ - } - - fc = v9fs_create_common(bufp, size, TWALK); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, fid, &fc->params.twalk.fid); - v9fs_put_int32(bufp, newfid, &fc->params.twalk.newfid); - v9fs_put_int16(bufp, nwname, &fc->params.twalk.nwname); - for (i = 0; i < nwname; i++) { - v9fs_put_str(bufp, wnames[i], &fc->params.twalk.wnames[i]); - } - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} - -struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode) -{ - int size; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 1; /* fid[4] mode[1] */ - fc = v9fs_create_common(bufp, size, TOPEN); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, fid, &fc->params.topen.fid); - v9fs_put_int8(bufp, mode, &fc->params.topen.mode); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} - -struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode, - char *extension, int extended) -{ - int size; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 2 + strlen(name) + 4 + 1; /* fid[4] name[s] perm[4] mode[1] */ - if (extended) { - size += 2 + /* extension[s] */ - (extension == NULL ? 0 : strlen(extension)); - } - - fc = v9fs_create_common(bufp, size, TCREATE); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, fid, &fc->params.tcreate.fid); - v9fs_put_str(bufp, name, &fc->params.tcreate.name); - v9fs_put_int32(bufp, perm, &fc->params.tcreate.perm); - v9fs_put_int8(bufp, mode, &fc->params.tcreate.mode); - if (extended) - v9fs_put_str(bufp, extension, &fc->params.tcreate.extension); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} - -struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count) -{ - int size; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 8 + 4; /* fid[4] offset[8] count[4] */ - fc = v9fs_create_common(bufp, size, TREAD); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, fid, &fc->params.tread.fid); - v9fs_put_int64(bufp, offset, &fc->params.tread.offset); - v9fs_put_int32(bufp, count, &fc->params.tread.count); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} - -struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count, - const char __user * data) -{ - int size, err; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 8 + 4 + count; /* fid[4] offset[8] count[4] data[count] */ - fc = v9fs_create_common(bufp, size, TWRITE); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, fid, &fc->params.twrite.fid); - v9fs_put_int64(bufp, offset, &fc->params.twrite.offset); - v9fs_put_int32(bufp, count, &fc->params.twrite.count); - err = v9fs_put_user_data(bufp, data, count, &fc->params.twrite.data); - if (err) { - kfree(fc); - fc = ERR_PTR(err); - } - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} - -struct v9fs_fcall *v9fs_create_tclunk(u32 fid) -{ - int size; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4; /* fid[4] */ - fc = v9fs_create_common(bufp, size, TCLUNK); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, fid, &fc->params.tclunk.fid); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} - -struct v9fs_fcall *v9fs_create_tremove(u32 fid) -{ - int size; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4; /* fid[4] */ - fc = v9fs_create_common(bufp, size, TREMOVE); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, fid, &fc->params.tremove.fid); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} - -struct v9fs_fcall *v9fs_create_tstat(u32 fid) -{ - int size; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4; /* fid[4] */ - fc = v9fs_create_common(bufp, size, TSTAT); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, fid, &fc->params.tstat.fid); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} - -struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat, - int extended) -{ - int size, statsz; - struct v9fs_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - statsz = v9fs_size_wstat(wstat, extended); - size = 4 + 2 + 2 + statsz; /* fid[4] stat[n] */ - fc = v9fs_create_common(bufp, size, TWSTAT); - if (IS_ERR(fc)) - goto error; - - v9fs_put_int32(bufp, fid, &fc->params.twstat.fid); - buf_put_int16(bufp, statsz + 2); - v9fs_put_wstat(bufp, wstat, &fc->params.twstat.stat, statsz, extended); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } - error: - return fc; -} diff --git a/fs/9p/conv.h b/fs/9p/conv.h deleted file mode 100644 index dd5b6b1b610f..000000000000 --- a/fs/9p/conv.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * linux/fs/9p/conv.h - * - * 9P protocol conversion definitions. - * - * Copyright (C) 2005 by Latchesar Ionkov - * Copyright (C) 2004 by Eric Van Hensbergen - * Copyright (C) 2002 by Ron Minnich - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -int v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat, - int extended); -int v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall, - int extended); - -void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag); - -struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version); -struct v9fs_fcall *v9fs_create_tattach(u32 fid, u32 afid, char *uname, - char *aname); -struct v9fs_fcall *v9fs_create_tflush(u16 oldtag); -struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname, - char **wnames); -struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode); -struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode, - char *extension, int extended); -struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count); -struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count, - const char __user *data); -struct v9fs_fcall *v9fs_create_tclunk(u32 fid); -struct v9fs_fcall *v9fs_create_tremove(u32 fid); -struct v9fs_fcall *v9fs_create_tstat(u32 fid); -struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat, - int extended); diff --git a/fs/9p/debug.h b/fs/9p/debug.h deleted file mode 100644 index 4228c0bb3c32..000000000000 --- a/fs/9p/debug.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * linux/fs/9p/debug.h - V9FS Debug Definitions - * - * Copyright (C) 2004 by Eric Van Hensbergen - * Copyright (C) 2002 by Ron Minnich - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -#define DEBUG_ERROR (1<<0) -#define DEBUG_CURRENT (1<<1) -#define DEBUG_9P (1<<2) -#define DEBUG_VFS (1<<3) -#define DEBUG_CONV (1<<4) -#define DEBUG_MUX (1<<5) -#define DEBUG_TRANS (1<<6) -#define DEBUG_SLABS (1<<7) -#define DEBUG_FCALL (1<<8) - -#define DEBUG_DUMP_PKT 0 - -extern int v9fs_debug_level; - -#define dprintk(level, format, arg...) \ -do { \ - if((v9fs_debug_level & level)==level) \ - printk(KERN_NOTICE "-- %s (%d): " \ - format , __FUNCTION__, current->pid , ## arg); \ -} while(0) - -#define eprintk(level, format, arg...) \ -do { \ - printk(level "v9fs: %s (%d): " \ - format , __FUNCTION__, current->pid , ## arg); \ -} while(0) - -#if DEBUG_DUMP_PKT -static inline void dump_data(const unsigned char *data, unsigned int datalen) -{ - int i, n; - char buf[5*8]; - - n = 0; - i = 0; - while (i < datalen) { - n += snprintf(buf+n, sizeof(buf)-n, "%02x", data[i++]); - if (i%4 == 0) - n += snprintf(buf+n, sizeof(buf)-n, " "); - - if (i%16 == 0) { - dprintk(DEBUG_ERROR, "%s\n", buf); - n = 0; - } - } - - dprintk(DEBUG_ERROR, "%s\n", buf); -} -#else /* DEBUG_DUMP_PKT */ -static inline void dump_data(const unsigned char *data, unsigned int datalen) -{ - -} -#endif /* DEBUG_DUMP_PKT */ diff --git a/fs/9p/error.c b/fs/9p/error.c deleted file mode 100644 index 0d7fa4e08812..000000000000 --- a/fs/9p/error.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * linux/fs/9p/error.c - * - * Error string handling - * - * Plan 9 uses error strings, Unix uses error numbers. These functions - * try to help manage that and provide for dynamically adding error - * mappings. - * - * Copyright (C) 2004 by Eric Van Hensbergen - * Copyright (C) 2002 by Ron Minnich - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -#include - -#include -#include - -#include "debug.h" -#include "error.h" - -/** - * v9fs_error_init - preload - * @errstr: error string - * - */ - -int v9fs_error_init(void) -{ - struct errormap *c; - int bucket; - - /* initialize hash table */ - for (bucket = 0; bucket < ERRHASHSZ; bucket++) - INIT_HLIST_HEAD(&hash_errmap[bucket]); - - /* load initial error map into hash table */ - for (c = errmap; c->name != NULL; c++) { - c->namelen = strlen(c->name); - bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ; - INIT_HLIST_NODE(&c->list); - hlist_add_head(&c->list, &hash_errmap[bucket]); - } - - return 1; -} - -/** - * errstr2errno - convert error string to error number - * @errstr: error string - * - */ - -int v9fs_errstr2errno(char *errstr, int len) -{ - int errno = 0; - struct hlist_node *p = NULL; - struct errormap *c = NULL; - int bucket = jhash(errstr, len, 0) % ERRHASHSZ; - - hlist_for_each_entry(c, p, &hash_errmap[bucket], list) { - if (c->namelen==len && !memcmp(c->name, errstr, len)) { - errno = c->val; - break; - } - } - - if (errno == 0) { - /* TODO: if error isn't found, add it dynamically */ - errstr[len] = 0; - printk(KERN_ERR "%s: errstr :%s: not found\n", __FUNCTION__, - errstr); - errno = 1; - } - - return -errno; -} diff --git a/fs/9p/error.h b/fs/9p/error.h deleted file mode 100644 index 5f3ca522b316..000000000000 --- a/fs/9p/error.h +++ /dev/null @@ -1,177 +0,0 @@ -/* - * linux/fs/9p/error.h - * - * Huge Nasty Error Table - * - * Plan 9 uses error strings, Unix uses error numbers. This table tries to - * match UNIX strings and Plan 9 strings to unix error numbers. It is used - * to preload the dynamic error table which can also track user-specific error - * strings. - * - * Copyright (C) 2004 by Eric Van Hensbergen - * Copyright (C) 2002 by Ron Minnich - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -#include -#include - -struct errormap { - char *name; - int val; - - int namelen; - struct hlist_node list; -}; - -#define ERRHASHSZ 32 -static struct hlist_head hash_errmap[ERRHASHSZ]; - -/* FixMe - reduce to a reasonable size */ -static struct errormap errmap[] = { - {"Operation not permitted", EPERM}, - {"wstat prohibited", EPERM}, - {"No such file or directory", ENOENT}, - {"directory entry not found", ENOENT}, - {"file not found", ENOENT}, - {"Interrupted system call", EINTR}, - {"Input/output error", EIO}, - {"No such device or address", ENXIO}, - {"Argument list too long", E2BIG}, - {"Bad file descriptor", EBADF}, - {"Resource temporarily unavailable", EAGAIN}, - {"Cannot allocate memory", ENOMEM}, - {"Permission denied", EACCES}, - {"Bad address", EFAULT}, - {"Block device required", ENOTBLK}, - {"Device or resource busy", EBUSY}, - {"File exists", EEXIST}, - {"Invalid cross-device link", EXDEV}, - {"No such device", ENODEV}, - {"Not a directory", ENOTDIR}, - {"Is a directory", EISDIR}, - {"Invalid argument", EINVAL}, - {"Too many open files in system", ENFILE}, - {"Too many open files", EMFILE}, - {"Text file busy", ETXTBSY}, - {"File too large", EFBIG}, - {"No space left on device", ENOSPC}, - {"Illegal seek", ESPIPE}, - {"Read-only file system", EROFS}, - {"Too many links", EMLINK}, - {"Broken pipe", EPIPE}, - {"Numerical argument out of domain", EDOM}, - {"Numerical result out of range", ERANGE}, - {"Resource deadlock avoided", EDEADLK}, - {"File name too long", ENAMETOOLONG}, - {"No locks available", ENOLCK}, - {"Function not implemented", ENOSYS}, - {"Directory not empty", ENOTEMPTY}, - {"Too many levels of symbolic links", ELOOP}, - {"No message of desired type", ENOMSG}, - {"Identifier removed", EIDRM}, - {"No data available", ENODATA}, - {"Machine is not on the network", ENONET}, - {"Package not installed", ENOPKG}, - {"Object is remote", EREMOTE}, - {"Link has been severed", ENOLINK}, - {"Communication error on send", ECOMM}, - {"Protocol error", EPROTO}, - {"Bad message", EBADMSG}, - {"File descriptor in bad state", EBADFD}, - {"Streams pipe error", ESTRPIPE}, - {"Too many users", EUSERS}, - {"Socket operation on non-socket", ENOTSOCK}, - {"Message too long", EMSGSIZE}, - {"Protocol not available", ENOPROTOOPT}, - {"Protocol not supported", EPROTONOSUPPORT}, - {"Socket type not supported", ESOCKTNOSUPPORT}, - {"Operation not supported", EOPNOTSUPP}, - {"Protocol family not supported", EPFNOSUPPORT}, - {"Network is down", ENETDOWN}, - {"Network is unreachable", ENETUNREACH}, - {"Network dropped connection on reset", ENETRESET}, - {"Software caused connection abort", ECONNABORTED}, - {"Connection reset by peer", ECONNRESET}, - {"No buffer space available", ENOBUFS}, - {"Transport endpoint is already connected", EISCONN}, - {"Transport endpoint is not connected", ENOTCONN}, - {"Cannot send after transport endpoint shutdown", ESHUTDOWN}, - {"Connection timed out", ETIMEDOUT}, - {"Connection refused", ECONNREFUSED}, - {"Host is down", EHOSTDOWN}, - {"No route to host", EHOSTUNREACH}, - {"Operation already in progress", EALREADY}, - {"Operation now in progress", EINPROGRESS}, - {"Is a named type file", EISNAM}, - {"Remote I/O error", EREMOTEIO}, - {"Disk quota exceeded", EDQUOT}, -/* errors from fossil, vacfs, and u9fs */ - {"fid unknown or out of range", EBADF}, - {"permission denied", EACCES}, - {"file does not exist", ENOENT}, - {"authentication failed", ECONNREFUSED}, - {"bad offset in directory read", ESPIPE}, - {"bad use of fid", EBADF}, - {"wstat can't convert between files and directories", EPERM}, - {"directory is not empty", ENOTEMPTY}, - {"file exists", EEXIST}, - {"file already exists", EEXIST}, - {"file or directory already exists", EEXIST}, - {"fid already in use", EBADF}, - {"file in use", ETXTBSY}, - {"i/o error", EIO}, - {"file already open for I/O", ETXTBSY}, - {"illegal mode", EINVAL}, - {"illegal name", ENAMETOOLONG}, - {"not a directory", ENOTDIR}, - {"not a member of proposed group", EPERM}, - {"not owner", EACCES}, - {"only owner can change group in wstat", EACCES}, - {"read only file system", EROFS}, - {"no access to special file", EPERM}, - {"i/o count too large", EIO}, - {"unknown group", EINVAL}, - {"unknown user", EINVAL}, - {"bogus wstat buffer", EPROTO}, - {"exclusive use file already open", EAGAIN}, - {"corrupted directory entry", EIO}, - {"corrupted file entry", EIO}, - {"corrupted block label", EIO}, - {"corrupted meta data", EIO}, - {"illegal offset", EINVAL}, - {"illegal path element", ENOENT}, - {"root of file system is corrupted", EIO}, - {"corrupted super block", EIO}, - {"protocol botch", EPROTO}, - {"file system is full", ENOSPC}, - {"file is in use", EAGAIN}, - {"directory entry is not allocated", ENOENT}, - {"file is read only", EROFS}, - {"file has been removed", EIDRM}, - {"only support truncation to zero length", EPERM}, - {"cannot remove root", EPERM}, - {"file too big", EFBIG}, - {"venti i/o error", EIO}, - /* these are not errors */ - {"u9fs rhostsauth: no authentication required", 0}, - {"u9fs authnone: no authentication required", 0}, - {NULL, -1} -}; - -extern int v9fs_error_init(void); diff --git a/fs/9p/fcall.c b/fs/9p/fcall.c deleted file mode 100644 index dc336a67592f..000000000000 --- a/fs/9p/fcall.c +++ /dev/null @@ -1,427 +0,0 @@ -/* - * linux/fs/9p/fcall.c - * - * This file contains functions to perform synchronous 9P calls - * - * Copyright (C) 2004 by Latchesar Ionkov - * Copyright (C) 2004 by Eric Van Hensbergen - * Copyright (C) 2002 by Ron Minnich - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -#include -#include -#include -#include -#include - -#include "debug.h" -#include "v9fs.h" -#include "9p.h" -#include "conv.h" -#include "mux.h" - -/** - * v9fs_t_version - negotiate protocol parameters with sever - * @v9ses: 9P2000 session information - * @msize: requested max size packet - * @version: requested version.extension string - * @fcall: pointer to response fcall pointer - * - */ - -int -v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize, - char *version, struct v9fs_fcall **rcp) -{ - int ret; - struct v9fs_fcall *tc; - - dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version); - tc = v9fs_create_tversion(msize, version); - - if (!IS_ERR(tc)) { - ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); - kfree(tc); - } else - ret = PTR_ERR(tc); - - return ret; -} - -/** - * v9fs_t_attach - mount the server - * @v9ses: 9P2000 session information - * @uname: user name doing the attach - * @aname: remote name being attached to - * @fid: mount fid to attatch to root node - * @afid: authentication fid (in this case result key) - * @fcall: pointer to response fcall pointer - * - */ - -int -v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname, - u32 fid, u32 afid, struct v9fs_fcall **rcp) -{ - int ret; - struct v9fs_fcall* tc; - - dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname, - aname, fid, afid); - - tc = v9fs_create_tattach(fid, afid, uname, aname); - if (!IS_ERR(tc)) { - ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); - kfree(tc); - } else - ret = PTR_ERR(tc); - - return ret; -} - -static void v9fs_t_clunk_cb(void *a, struct v9fs_fcall *tc, - struct v9fs_fcall *rc, int err) -{ - int fid, id; - struct v9fs_session_info *v9ses; - - id = 0; - fid = tc->params.tclunk.fid; - if (rc) - id = rc->id; - - kfree(tc); - kfree(rc); - if (id == RCLUNK) { - v9ses = a; - v9fs_put_idpool(fid, &v9ses->fidpool); - } -} - -/** - * v9fs_t_clunk - release a fid (finish a transaction) - * @v9ses: 9P2000 session information - * @fid: fid to release - * @fcall: pointer to response fcall pointer - * - */ - -int -v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid) -{ - int ret; - struct v9fs_fcall *tc, *rc; - - dprintk(DEBUG_9P, "fid %d\n", fid); - - rc = NULL; - tc = v9fs_create_tclunk(fid); - if (!IS_ERR(tc)) - ret = v9fs_mux_rpc(v9ses->mux, tc, &rc); - else - ret = PTR_ERR(tc); - - if (ret) - dprintk(DEBUG_ERROR, "failed fid %d err %d\n", fid, ret); - - v9fs_t_clunk_cb(v9ses, tc, rc, ret); - return ret; -} - -#if 0 -/** - * v9fs_v9fs_t_flush - flush a pending transaction - * @v9ses: 9P2000 session information - * @tag: tag to release - * - */ -int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag) -{ - int ret; - struct v9fs_fcall *tc; - - dprintk(DEBUG_9P, "oldtag %d\n", oldtag); - - tc = v9fs_create_tflush(oldtag); - if (!IS_ERR(tc)) { - ret = v9fs_mux_rpc(v9ses->mux, tc, NULL); - kfree(tc); - } else - ret = PTR_ERR(tc); - - return ret; -} -#endif - -/** - * v9fs_t_stat - read a file's meta-data - * @v9ses: 9P2000 session information - * @fid: fid pointing to file or directory to get info about - * @fcall: pointer to response fcall - * - */ - -int -v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **rcp) -{ - int ret; - struct v9fs_fcall *tc; - - dprintk(DEBUG_9P, "fid %d\n", fid); - - ret = -ENOMEM; - tc = v9fs_create_tstat(fid); - if (!IS_ERR(tc)) { - ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); - kfree(tc); - } else - ret = PTR_ERR(tc); - - return ret; -} - -/** - * v9fs_t_wstat - write a file's meta-data - * @v9ses: 9P2000 session information - * @fid: fid pointing to file or directory to write info about - * @stat: metadata - * @fcall: pointer to response fcall - * - */ - -int -v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid, - struct v9fs_wstat *wstat, struct v9fs_fcall **rcp) -{ - int ret; - struct v9fs_fcall *tc; - - dprintk(DEBUG_9P, "fid %d\n", fid); - - tc = v9fs_create_twstat(fid, wstat, v9ses->extended); - if (!IS_ERR(tc)) { - ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); - kfree(tc); - } else - ret = PTR_ERR(tc); - - return ret; -} - -/** - * v9fs_t_walk - walk a fid to a new file or directory - * @v9ses: 9P2000 session information - * @fid: fid to walk - * @newfid: new fid (for clone operations) - * @name: path to walk fid to - * @fcall: pointer to response fcall - * - */ - -/* TODO: support multiple walk */ - -int -v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid, - char *name, struct v9fs_fcall **rcp) -{ - int ret; - struct v9fs_fcall *tc; - int nwname; - - dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name); - - if (name) - nwname = 1; - else - nwname = 0; - - tc = v9fs_create_twalk(fid, newfid, nwname, &name); - if (!IS_ERR(tc)) { - ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); - kfree(tc); - } else - ret = PTR_ERR(tc); - - return ret; -} - -/** - * v9fs_t_open - open a file - * - * @v9ses - 9P2000 session information - * @fid - fid to open - * @mode - mode to open file (R, RW, etc) - * @fcall - pointer to response fcall - * - */ - -int -v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode, - struct v9fs_fcall **rcp) -{ - int ret; - struct v9fs_fcall *tc; - - dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode); - - tc = v9fs_create_topen(fid, mode); - if (!IS_ERR(tc)) { - ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); - kfree(tc); - } else - ret = PTR_ERR(tc); - - return ret; -} - -/** - * v9fs_t_remove - remove a file or directory - * @v9ses: 9P2000 session information - * @fid: fid to remove - * @fcall: pointer to response fcall - * - */ - -int -v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid, - struct v9fs_fcall **rcp) -{ - int ret; - struct v9fs_fcall *tc; - - dprintk(DEBUG_9P, "fid %d\n", fid); - - tc = v9fs_create_tremove(fid); - if (!IS_ERR(tc)) { - ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); - kfree(tc); - } else - ret = PTR_ERR(tc); - - return ret; -} - -/** - * v9fs_t_create - create a file or directory - * @v9ses: 9P2000 session information - * @fid: fid to create - * @name: name of the file or directory to create - * @perm: permissions to create with - * @mode: mode to open file (R, RW, etc) - * @fcall: pointer to response fcall - * - */ - -int -v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, u32 perm, - u8 mode, char *extension, struct v9fs_fcall **rcp) -{ - int ret; - struct v9fs_fcall *tc; - - dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n", - fid, name, perm, mode); - - tc = v9fs_create_tcreate(fid, name, perm, mode, extension, - v9ses->extended); - - if (!IS_ERR(tc)) { - ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); - kfree(tc); - } else - ret = PTR_ERR(tc); - - return ret; -} - -/** - * v9fs_t_read - read data - * @v9ses: 9P2000 session information - * @fid: fid to read from - * @offset: offset to start read at - * @count: how many bytes to read - * @fcall: pointer to response fcall (with data) - * - */ - -int -v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset, - u32 count, struct v9fs_fcall **rcp) -{ - int ret; - struct v9fs_fcall *tc, *rc; - - dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid, - (long long unsigned) offset, count); - - tc = v9fs_create_tread(fid, offset, count); - if (!IS_ERR(tc)) { - ret = v9fs_mux_rpc(v9ses->mux, tc, &rc); - if (!ret) - ret = rc->params.rread.count; - if (rcp) - *rcp = rc; - else - kfree(rc); - - kfree(tc); - } else - ret = PTR_ERR(tc); - - return ret; -} - -/** - * v9fs_t_write - write data - * @v9ses: 9P2000 session information - * @fid: fid to write to - * @offset: offset to start write at - * @count: how many bytes to write - * @fcall: pointer to response fcall - * - */ - -int -v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, u32 count, - const char __user *data, struct v9fs_fcall **rcp) -{ - int ret; - struct v9fs_fcall *tc, *rc; - - dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid, - (long long unsigned) offset, count); - - tc = v9fs_create_twrite(fid, offset, count, data); - if (!IS_ERR(tc)) { - ret = v9fs_mux_rpc(v9ses->mux, tc, &rc); - - if (!ret) - ret = rc->params.rwrite.count; - if (rcp) - *rcp = rc; - else - kfree(rc); - - kfree(tc); - } else - ret = PTR_ERR(tc); - - return ret; -} - diff --git a/fs/9p/fcprint.c b/fs/9p/fcprint.c deleted file mode 100644 index 34b96114a28d..000000000000 --- a/fs/9p/fcprint.c +++ /dev/null @@ -1,345 +0,0 @@ -/* - * linux/fs/9p/fcprint.c - * - * Print 9P call. - * - * Copyright (C) 2005 by Latchesar Ionkov - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ -#include -#include -#include -#include - -#include "debug.h" -#include "v9fs.h" -#include "9p.h" -#include "mux.h" - -static int -v9fs_printqid(char *buf, int buflen, struct v9fs_qid *q) -{ - int n; - char b[10]; - - n = 0; - if (q->type & V9FS_QTDIR) - b[n++] = 'd'; - if (q->type & V9FS_QTAPPEND) - b[n++] = 'a'; - if (q->type & V9FS_QTAUTH) - b[n++] = 'A'; - if (q->type & V9FS_QTEXCL) - b[n++] = 'l'; - if (q->type & V9FS_QTTMP) - b[n++] = 't'; - if (q->type & V9FS_QTSYMLINK) - b[n++] = 'L'; - b[n] = '\0'; - - return scnprintf(buf, buflen, "(%.16llx %x %s)", (long long int) q->path, - q->version, b); -} - -static int -v9fs_printperm(char *buf, int buflen, int perm) -{ - int n; - char b[15]; - - n = 0; - if (perm & V9FS_DMDIR) - b[n++] = 'd'; - if (perm & V9FS_DMAPPEND) - b[n++] = 'a'; - if (perm & V9FS_DMAUTH) - b[n++] = 'A'; - if (perm & V9FS_DMEXCL) - b[n++] = 'l'; - if (perm & V9FS_DMTMP) - b[n++] = 't'; - if (perm & V9FS_DMDEVICE) - b[n++] = 'D'; - if (perm & V9FS_DMSOCKET) - b[n++] = 'S'; - if (perm & V9FS_DMNAMEDPIPE) - b[n++] = 'P'; - if (perm & V9FS_DMSYMLINK) - b[n++] = 'L'; - b[n] = '\0'; - - return scnprintf(buf, buflen, "%s%03o", b, perm&077); -} - -static int -v9fs_printstat(char *buf, int buflen, struct v9fs_stat *st, int extended) -{ - int n; - - n = scnprintf(buf, buflen, "'%.*s' '%.*s'", st->name.len, - st->name.str, st->uid.len, st->uid.str); - if (extended) - n += scnprintf(buf+n, buflen-n, "(%d)", st->n_uid); - - n += scnprintf(buf+n, buflen-n, " '%.*s'", st->gid.len, st->gid.str); - if (extended) - n += scnprintf(buf+n, buflen-n, "(%d)", st->n_gid); - - n += scnprintf(buf+n, buflen-n, " '%.*s'", st->muid.len, st->muid.str); - if (extended) - n += scnprintf(buf+n, buflen-n, "(%d)", st->n_muid); - - n += scnprintf(buf+n, buflen-n, " q "); - n += v9fs_printqid(buf+n, buflen-n, &st->qid); - n += scnprintf(buf+n, buflen-n, " m "); - n += v9fs_printperm(buf+n, buflen-n, st->mode); - n += scnprintf(buf+n, buflen-n, " at %d mt %d l %lld", - st->atime, st->mtime, (long long int) st->length); - - if (extended) - n += scnprintf(buf+n, buflen-n, " ext '%.*s'", - st->extension.len, st->extension.str); - - return n; -} - -static int -v9fs_dumpdata(char *buf, int buflen, u8 *data, int datalen) -{ - int i, n; - - i = n = 0; - while (i < datalen) { - n += scnprintf(buf + n, buflen - n, "%02x", data[i]); - if (i%4 == 3) - n += scnprintf(buf + n, buflen - n, " "); - if (i%32 == 31) - n += scnprintf(buf + n, buflen - n, "\n"); - - i++; - } - n += scnprintf(buf + n, buflen - n, "\n"); - - return n; -} - -static int -v9fs_printdata(char *buf, int buflen, u8 *data, int datalen) -{ - return v9fs_dumpdata(buf, buflen, data, datalen<16?datalen:16); -} - -int -v9fs_printfcall(char *buf, int buflen, struct v9fs_fcall *fc, int extended) -{ - int i, ret, type, tag; - - if (!fc) - return scnprintf(buf, buflen, ""); - - type = fc->id; - tag = fc->tag; - - ret = 0; - switch (type) { - case TVERSION: - ret += scnprintf(buf+ret, buflen-ret, - "Tversion tag %u msize %u version '%.*s'", tag, - fc->params.tversion.msize, fc->params.tversion.version.len, - fc->params.tversion.version.str); - break; - - case RVERSION: - ret += scnprintf(buf+ret, buflen-ret, - "Rversion tag %u msize %u version '%.*s'", tag, - fc->params.rversion.msize, fc->params.rversion.version.len, - fc->params.rversion.version.str); - break; - - case TAUTH: - ret += scnprintf(buf+ret, buflen-ret, - "Tauth tag %u afid %d uname '%.*s' aname '%.*s'", tag, - fc->params.tauth.afid, fc->params.tauth.uname.len, - fc->params.tauth.uname.str, fc->params.tauth.aname.len, - fc->params.tauth.aname.str); - break; - - case RAUTH: - ret += scnprintf(buf+ret, buflen-ret, "Rauth tag %u qid ", tag); - v9fs_printqid(buf+ret, buflen-ret, &fc->params.rauth.qid); - break; - - case TATTACH: - ret += scnprintf(buf+ret, buflen-ret, - "Tattach tag %u fid %d afid %d uname '%.*s' aname '%.*s'", - tag, fc->params.tattach.fid, fc->params.tattach.afid, - fc->params.tattach.uname.len, fc->params.tattach.uname.str, - fc->params.tattach.aname.len, fc->params.tattach.aname.str); - break; - - case RATTACH: - ret += scnprintf(buf+ret, buflen-ret, "Rattach tag %u qid ", tag); - v9fs_printqid(buf+ret, buflen-ret, &fc->params.rattach.qid); - break; - - case RERROR: - ret += scnprintf(buf+ret, buflen-ret, "Rerror tag %u ename '%.*s'", - tag, fc->params.rerror.error.len, - fc->params.rerror.error.str); - if (extended) - ret += scnprintf(buf+ret, buflen-ret, " ecode %d\n", - fc->params.rerror.errno); - break; - - case TFLUSH: - ret += scnprintf(buf+ret, buflen-ret, "Tflush tag %u oldtag %u", - tag, fc->params.tflush.oldtag); - break; - - case RFLUSH: - ret += scnprintf(buf+ret, buflen-ret, "Rflush tag %u", tag); - break; - - case TWALK: - ret += scnprintf(buf+ret, buflen-ret, - "Twalk tag %u fid %d newfid %d nwname %d", tag, - fc->params.twalk.fid, fc->params.twalk.newfid, - fc->params.twalk.nwname); - for(i = 0; i < fc->params.twalk.nwname; i++) - ret += scnprintf(buf+ret, buflen-ret," '%.*s'", - fc->params.twalk.wnames[i].len, - fc->params.twalk.wnames[i].str); - break; - - case RWALK: - ret += scnprintf(buf+ret, buflen-ret, "Rwalk tag %u nwqid %d", - tag, fc->params.rwalk.nwqid); - for(i = 0; i < fc->params.rwalk.nwqid; i++) - ret += v9fs_printqid(buf+ret, buflen-ret, - &fc->params.rwalk.wqids[i]); - break; - - case TOPEN: - ret += scnprintf(buf+ret, buflen-ret, - "Topen tag %u fid %d mode %d", tag, - fc->params.topen.fid, fc->params.topen.mode); - break; - - case ROPEN: - ret += scnprintf(buf+ret, buflen-ret, "Ropen tag %u", tag); - ret += v9fs_printqid(buf+ret, buflen-ret, &fc->params.ropen.qid); - ret += scnprintf(buf+ret, buflen-ret," iounit %d", - fc->params.ropen.iounit); - break; - - case TCREATE: - ret += scnprintf(buf+ret, buflen-ret, - "Tcreate tag %u fid %d name '%.*s' perm ", tag, - fc->params.tcreate.fid, fc->params.tcreate.name.len, - fc->params.tcreate.name.str); - - ret += v9fs_printperm(buf+ret, buflen-ret, fc->params.tcreate.perm); - ret += scnprintf(buf+ret, buflen-ret, " mode %d", - fc->params.tcreate.mode); - break; - - case RCREATE: - ret += scnprintf(buf+ret, buflen-ret, "Rcreate tag %u", tag); - ret += v9fs_printqid(buf+ret, buflen-ret, &fc->params.rcreate.qid); - ret += scnprintf(buf+ret, buflen-ret, " iounit %d", - fc->params.rcreate.iounit); - break; - - case TREAD: - ret += scnprintf(buf+ret, buflen-ret, - "Tread tag %u fid %d offset %lld count %u", tag, - fc->params.tread.fid, - (long long int) fc->params.tread.offset, - fc->params.tread.count); - break; - - case RREAD: - ret += scnprintf(buf+ret, buflen-ret, - "Rread tag %u count %u data ", tag, - fc->params.rread.count); - ret += v9fs_printdata(buf+ret, buflen-ret, fc->params.rread.data, - fc->params.rread.count); - break; - - case TWRITE: - ret += scnprintf(buf+ret, buflen-ret, - "Twrite tag %u fid %d offset %lld count %u data ", - tag, fc->params.twrite.fid, - (long long int) fc->params.twrite.offset, - fc->params.twrite.count); - ret += v9fs_printdata(buf+ret, buflen-ret, fc->params.twrite.data, - fc->params.twrite.count); - break; - - case RWRITE: - ret += scnprintf(buf+ret, buflen-ret, "Rwrite tag %u count %u", - tag, fc->params.rwrite.count); - break; - - case TCLUNK: - ret += scnprintf(buf+ret, buflen-ret, "Tclunk tag %u fid %d", - tag, fc->params.tclunk.fid); - break; - - case RCLUNK: - ret += scnprintf(buf+ret, buflen-ret, "Rclunk tag %u", tag); - break; - - case TREMOVE: - ret += scnprintf(buf+ret, buflen-ret, "Tremove tag %u fid %d", - tag, fc->params.tremove.fid); - break; - - case RREMOVE: - ret += scnprintf(buf+ret, buflen-ret, "Rremove tag %u", tag); - break; - - case TSTAT: - ret += scnprintf(buf+ret, buflen-ret, "Tstat tag %u fid %d", - tag, fc->params.tstat.fid); - break; - - case RSTAT: - ret += scnprintf(buf+ret, buflen-ret, "Rstat tag %u ", tag); - ret += v9fs_printstat(buf+ret, buflen-ret, &fc->params.rstat.stat, - extended); - break; - - case TWSTAT: - ret += scnprintf(buf+ret, buflen-ret, "Twstat tag %u fid %d ", - tag, fc->params.twstat.fid); - ret += v9fs_printstat(buf+ret, buflen-ret, &fc->params.twstat.stat, - extended); - break; - - case RWSTAT: - ret += scnprintf(buf+ret, buflen-ret, "Rwstat tag %u", tag); - break; - - default: - ret += scnprintf(buf+ret, buflen-ret, "unknown type %d", type); - break; - } - - return ret; -} diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 90419715c7e9..08fa320b7e6d 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -26,10 +26,10 @@ #include #include #include +#include +#include -#include "debug.h" #include "v9fs.h" -#include "9p.h" #include "v9fs_vfs.h" #include "fid.h" @@ -40,67 +40,29 @@ * */ -int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry) +int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid) { - struct list_head *fid_list = (struct list_head *)dentry->d_fsdata; - dprintk(DEBUG_9P, "fid %d (%p) dentry %s (%p)\n", fid->fid, fid, - dentry->d_iname, dentry); - if (dentry->d_fsdata == NULL) { - dentry->d_fsdata = - kmalloc(sizeof(struct list_head), GFP_KERNEL); - if (dentry->d_fsdata == NULL) { - dprintk(DEBUG_ERROR, "Out of memory\n"); - return -ENOMEM; - } - fid_list = (struct list_head *)dentry->d_fsdata; - INIT_LIST_HEAD(fid_list); /* Initialize list head */ - } + struct v9fs_dentry *dent; - fid->uid = current->uid; - list_add(&fid->list, fid_list); - return 0; -} + P9_DPRINTK(P9_DEBUG_VFS, "fid %d dentry %s\n", + fid->fid, dentry->d_iname); -/** - * v9fs_fid_create - allocate a FID structure - * @dentry - dentry to link newly created fid to - * - */ - -struct v9fs_fid *v9fs_fid_create(struct v9fs_session_info *v9ses, int fid) -{ - struct v9fs_fid *new; + dent = dentry->d_fsdata; + if (!dent) { + dent = kmalloc(sizeof(struct v9fs_dentry), GFP_KERNEL); + if (!dent) + return -ENOMEM; - dprintk(DEBUG_9P, "fid create fid %d\n", fid); - new = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL); - if (new == NULL) { - dprintk(DEBUG_ERROR, "Out of Memory\n"); - return ERR_PTR(-ENOMEM); + spin_lock_init(&dent->lock); + INIT_LIST_HEAD(&dent->fidlist); + dentry->d_fsdata = dent; } - new->fid = fid; - new->v9ses = v9ses; - new->fidopen = 0; - new->fidclunked = 0; - new->iounit = 0; - new->rdir_pos = 0; - new->rdir_fcall = NULL; - init_MUTEX(&new->lock); - INIT_LIST_HEAD(&new->list); - - return new; -} - -/** - * v9fs_fid_destroy - deallocate a FID structure - * @fid: fid to destroy - * - */ + spin_lock(&dent->lock); + list_add(&fid->dlist, &dent->fidlist); + spin_unlock(&dent->lock); -void v9fs_fid_destroy(struct v9fs_fid *fid) -{ - list_del(&fid->list); - kfree(fid); + return 0; } /** @@ -114,30 +76,42 @@ void v9fs_fid_destroy(struct v9fs_fid *fid) * */ -struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry) +struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) { - struct list_head *fid_list = (struct list_head *)dentry->d_fsdata; - struct v9fs_fid *return_fid = NULL; - - dprintk(DEBUG_9P, " dentry: %s (%p)\n", dentry->d_iname, dentry); - - if (fid_list) - return_fid = list_entry(fid_list->next, struct v9fs_fid, list); + struct v9fs_dentry *dent; + struct p9_fid *fid; + + P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); + dent = dentry->d_fsdata; + if (dent) + fid = list_entry(dent->fidlist.next, struct p9_fid, dlist); + else + fid = ERR_PTR(-EBADF); + + P9_DPRINTK(P9_DEBUG_VFS, " fid: %p\n", fid); + return fid; +} - if (!return_fid) { - dprintk(DEBUG_ERROR, "Couldn't find a fid in dentry\n"); - return_fid = ERR_PTR(-EBADF); +struct p9_fid *v9fs_fid_lookup_remove(struct dentry *dentry) +{ + struct p9_fid *fid; + struct v9fs_dentry *dent; + + dent = dentry->d_fsdata; + fid = v9fs_fid_lookup(dentry); + if (!IS_ERR(fid)) { + spin_lock(&dent->lock); + list_del(&fid->dlist); + spin_unlock(&dent->lock); } - if(down_interruptible(&return_fid->lock)) - return ERR_PTR(-EINTR); - - return return_fid; + return fid; } + /** * v9fs_fid_clone - lookup the fid for a dentry, clone a private copy and - * release it + * release it * @dentry: dentry to look for fid in * * find a fid in the dentry and then clone to a new private fid @@ -146,49 +120,15 @@ struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry) * */ -struct v9fs_fid *v9fs_fid_clone(struct dentry *dentry) +struct p9_fid *v9fs_fid_clone(struct dentry *dentry) { - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); - struct v9fs_fid *base_fid, *new_fid = ERR_PTR(-EBADF); - struct v9fs_fcall *fcall = NULL; - int fid, err; - - base_fid = v9fs_fid_lookup(dentry); - - if(IS_ERR(base_fid)) - return base_fid; - - if(base_fid) { /* clone fid */ - fid = v9fs_get_idpool(&v9ses->fidpool); - if (fid < 0) { - eprintk(KERN_WARNING, "newfid fails!\n"); - new_fid = ERR_PTR(-ENOSPC); - goto Release_Fid; - } - - err = v9fs_t_walk(v9ses, base_fid->fid, fid, NULL, &fcall); - if (err < 0) { - dprintk(DEBUG_ERROR, "clone walk didn't work\n"); - v9fs_put_idpool(fid, &v9ses->fidpool); - new_fid = ERR_PTR(err); - goto Free_Fcall; - } - new_fid = v9fs_fid_create(v9ses, fid); - if (new_fid == NULL) { - dprintk(DEBUG_ERROR, "out of memory\n"); - new_fid = ERR_PTR(-ENOMEM); - } -Free_Fcall: - kfree(fcall); - } + struct p9_fid *ofid, *fid; -Release_Fid: - up(&base_fid->lock); - return new_fid; -} + P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); + ofid = v9fs_fid_lookup(dentry); + if (IS_ERR(ofid)) + return ofid; -void v9fs_fid_clunk(struct v9fs_session_info *v9ses, struct v9fs_fid *fid) -{ - v9fs_t_clunk(v9ses, fid->fid); - v9fs_fid_destroy(fid); + fid = p9_client_walk(ofid, 0, NULL, 1); + return fid; } diff --git a/fs/9p/fid.h b/fs/9p/fid.h index 48fc170c26c8..47a0ba742872 100644 --- a/fs/9p/fid.h +++ b/fs/9p/fid.h @@ -22,41 +22,12 @@ #include -#define FID_OP 0 -#define FID_WALK 1 -#define FID_CREATE 2 - -struct v9fs_fid { - struct list_head list; /* list of fids associated with a dentry */ - struct list_head active; /* XXX - debug */ - - struct semaphore lock; - - u32 fid; - unsigned char fidopen; /* set when fid is opened */ - unsigned char fidclunked; /* set when fid has already been clunked */ - - struct v9fs_qid qid; - u32 iounit; - - /* readdir stuff */ - int rdir_fpos; - loff_t rdir_pos; - struct v9fs_fcall *rdir_fcall; - - /* management stuff */ - uid_t uid; /* user associated with this fid */ - - /* private data */ - struct file *filp; /* backpointer to File struct for open files */ - struct v9fs_session_info *v9ses; /* session info for this FID */ +struct v9fs_dentry { + spinlock_t lock; /* protect fidlist */ + struct list_head fidlist; }; -struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry); -struct v9fs_fid *v9fs_fid_get_created(struct dentry *); -void v9fs_fid_destroy(struct v9fs_fid *fid); -struct v9fs_fid *v9fs_fid_create(struct v9fs_session_info *, int fid); -int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry); -struct v9fs_fid *v9fs_fid_clone(struct dentry *dentry); -void v9fs_fid_clunk(struct v9fs_session_info *v9ses, struct v9fs_fid *fid); - +struct p9_fid *v9fs_fid_lookup(struct dentry *dentry); +struct p9_fid *v9fs_fid_lookup_remove(struct dentry *dentry); +struct p9_fid *v9fs_fid_clone(struct dentry *dentry); +int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid); diff --git a/fs/9p/mux.c b/fs/9p/mux.c deleted file mode 100644 index c783874a9caf..000000000000 --- a/fs/9p/mux.c +++ /dev/null @@ -1,1033 +0,0 @@ -/* - * linux/fs/9p/mux.c - * - * Protocol Multiplexer - * - * Copyright (C) 2004 by Eric Van Hensbergen - * Copyright (C) 2004-2005 by Latchesar Ionkov - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "debug.h" -#include "v9fs.h" -#include "9p.h" -#include "conv.h" -#include "transport.h" -#include "mux.h" - -#define ERREQFLUSH 1 -#define SCHED_TIMEOUT 10 -#define MAXPOLLWADDR 2 - -enum { - Rworksched = 1, /* read work scheduled or running */ - Rpending = 2, /* can read */ - Wworksched = 4, /* write work scheduled or running */ - Wpending = 8, /* can write */ -}; - -enum { - None, - Flushing, - Flushed, -}; - -struct v9fs_mux_poll_task; - -struct v9fs_req { - spinlock_t lock; - int tag; - struct v9fs_fcall *tcall; - struct v9fs_fcall *rcall; - int err; - v9fs_mux_req_callback cb; - void *cba; - int flush; - struct list_head req_list; -}; - -struct v9fs_mux_data { - spinlock_t lock; - struct list_head mux_list; - struct v9fs_mux_poll_task *poll_task; - int msize; - unsigned char *extended; - struct v9fs_transport *trans; - struct v9fs_idpool tagpool; - int err; - wait_queue_head_t equeue; - struct list_head req_list; - struct list_head unsent_req_list; - struct v9fs_fcall *rcall; - int rpos; - char *rbuf; - int wpos; - int wsize; - char *wbuf; - wait_queue_t poll_wait[MAXPOLLWADDR]; - wait_queue_head_t *poll_waddr[MAXPOLLWADDR]; - poll_table pt; - struct work_struct rq; - struct work_struct wq; - unsigned long wsched; -}; - -struct v9fs_mux_poll_task { - struct task_struct *task; - struct list_head mux_list; - int muxnum; -}; - -struct v9fs_mux_rpc { - struct v9fs_mux_data *m; - int err; - struct v9fs_fcall *tcall; - struct v9fs_fcall *rcall; - wait_queue_head_t wqueue; -}; - -static int v9fs_poll_proc(void *); -static void v9fs_read_work(struct work_struct *work); -static void v9fs_write_work(struct work_struct *work); -static void v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address, - poll_table * p); -static u16 v9fs_mux_get_tag(struct v9fs_mux_data *); -static void v9fs_mux_put_tag(struct v9fs_mux_data *, u16); - -static DEFINE_MUTEX(v9fs_mux_task_lock); -static struct workqueue_struct *v9fs_mux_wq; - -static int v9fs_mux_num; -static int v9fs_mux_poll_task_num; -static struct v9fs_mux_poll_task v9fs_mux_poll_tasks[100]; - -int v9fs_mux_global_init(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) - v9fs_mux_poll_tasks[i].task = NULL; - - v9fs_mux_wq = create_workqueue("v9fs"); - if (!v9fs_mux_wq) { - printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); - return -ENOMEM; - } - - return 0; -} - -void v9fs_mux_global_exit(void) -{ - destroy_workqueue(v9fs_mux_wq); -} - -/** - * v9fs_mux_calc_poll_procs - calculates the number of polling procs - * based on the number of mounted v9fs filesystems. - * - * The current implementation returns sqrt of the number of mounts. - */ -static int v9fs_mux_calc_poll_procs(int muxnum) -{ - int n; - - if (v9fs_mux_poll_task_num) - n = muxnum / v9fs_mux_poll_task_num + - (muxnum % v9fs_mux_poll_task_num ? 1 : 0); - else - n = 1; - - if (n > ARRAY_SIZE(v9fs_mux_poll_tasks)) - n = ARRAY_SIZE(v9fs_mux_poll_tasks); - - return n; -} - -static int v9fs_mux_poll_start(struct v9fs_mux_data *m) -{ - int i, n; - struct v9fs_mux_poll_task *vpt, *vptlast; - struct task_struct *pproc; - - dprintk(DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, v9fs_mux_num, - v9fs_mux_poll_task_num); - mutex_lock(&v9fs_mux_task_lock); - - n = v9fs_mux_calc_poll_procs(v9fs_mux_num + 1); - if (n > v9fs_mux_poll_task_num) { - for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) { - if (v9fs_mux_poll_tasks[i].task == NULL) { - vpt = &v9fs_mux_poll_tasks[i]; - dprintk(DEBUG_MUX, "create proc %p\n", vpt); - pproc = kthread_create(v9fs_poll_proc, vpt, - "v9fs-poll"); - - if (!IS_ERR(pproc)) { - vpt->task = pproc; - INIT_LIST_HEAD(&vpt->mux_list); - vpt->muxnum = 0; - v9fs_mux_poll_task_num++; - wake_up_process(vpt->task); - } - break; - } - } - - if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks)) - dprintk(DEBUG_ERROR, "warning: no free poll slots\n"); - } - - n = (v9fs_mux_num + 1) / v9fs_mux_poll_task_num + - ((v9fs_mux_num + 1) % v9fs_mux_poll_task_num ? 1 : 0); - - vptlast = NULL; - for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) { - vpt = &v9fs_mux_poll_tasks[i]; - if (vpt->task != NULL) { - vptlast = vpt; - if (vpt->muxnum < n) { - dprintk(DEBUG_MUX, "put in proc %d\n", i); - list_add(&m->mux_list, &vpt->mux_list); - vpt->muxnum++; - m->poll_task = vpt; - memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); - init_poll_funcptr(&m->pt, v9fs_pollwait); - break; - } - } - } - - if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks)) { - if (vptlast == NULL) - return -ENOMEM; - - dprintk(DEBUG_MUX, "put in proc %d\n", i); - list_add(&m->mux_list, &vptlast->mux_list); - vptlast->muxnum++; - m->poll_task = vptlast; - memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); - init_poll_funcptr(&m->pt, v9fs_pollwait); - } - - v9fs_mux_num++; - mutex_unlock(&v9fs_mux_task_lock); - - return 0; -} - -static void v9fs_mux_poll_stop(struct v9fs_mux_data *m) -{ - int i; - struct v9fs_mux_poll_task *vpt; - - mutex_lock(&v9fs_mux_task_lock); - vpt = m->poll_task; - list_del(&m->mux_list); - for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { - if (m->poll_waddr[i] != NULL) { - remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]); - m->poll_waddr[i] = NULL; - } - } - vpt->muxnum--; - if (!vpt->muxnum) { - dprintk(DEBUG_MUX, "destroy proc %p\n", vpt); - kthread_stop(vpt->task); - vpt->task = NULL; - v9fs_mux_poll_task_num--; - } - v9fs_mux_num--; - mutex_unlock(&v9fs_mux_task_lock); -} - -/** - * v9fs_mux_init - allocate and initialize the per-session mux data - * Creates the polling task if this is the first session. - * - * @trans - transport structure - * @msize - maximum message size - * @extended - pointer to the extended flag - */ -struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize, - unsigned char *extended) -{ - int i, n; - struct v9fs_mux_data *m, *mtmp; - - dprintk(DEBUG_MUX, "transport %p msize %d\n", trans, msize); - m = kmalloc(sizeof(struct v9fs_mux_data), GFP_KERNEL); - if (!m) - return ERR_PTR(-ENOMEM); - - spin_lock_init(&m->lock); - INIT_LIST_HEAD(&m->mux_list); - m->msize = msize; - m->extended = extended; - m->trans = trans; - idr_init(&m->tagpool.pool); - init_MUTEX(&m->tagpool.lock); - m->err = 0; - init_waitqueue_head(&m->equeue); - INIT_LIST_HEAD(&m->req_list); - INIT_LIST_HEAD(&m->unsent_req_list); - m->rcall = NULL; - m->rpos = 0; - m->rbuf = NULL; - m->wpos = m->wsize = 0; - m->wbuf = NULL; - INIT_WORK(&m->rq, v9fs_read_work); - INIT_WORK(&m->wq, v9fs_write_work); - m->wsched = 0; - memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); - m->poll_task = NULL; - n = v9fs_mux_poll_start(m); - if (n) - return ERR_PTR(n); - - n = trans->poll(trans, &m->pt); - if (n & POLLIN) { - dprintk(DEBUG_MUX, "mux %p can read\n", m); - set_bit(Rpending, &m->wsched); - } - - if (n & POLLOUT) { - dprintk(DEBUG_MUX, "mux %p can write\n", m); - set_bit(Wpending, &m->wsched); - } - - for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { - if (IS_ERR(m->poll_waddr[i])) { - v9fs_mux_poll_stop(m); - mtmp = (void *)m->poll_waddr; /* the error code */ - kfree(m); - m = mtmp; - break; - } - } - - return m; -} - -/** - * v9fs_mux_destroy - cancels all pending requests and frees mux resources - */ -void v9fs_mux_destroy(struct v9fs_mux_data *m) -{ - dprintk(DEBUG_MUX, "mux %p prev %p next %p\n", m, - m->mux_list.prev, m->mux_list.next); - v9fs_mux_cancel(m, -ECONNRESET); - - if (!list_empty(&m->req_list)) { - /* wait until all processes waiting on this session exit */ - dprintk(DEBUG_MUX, "mux %p waiting for empty request queue\n", - m); - wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000); - dprintk(DEBUG_MUX, "mux %p request queue empty: %d\n", m, - list_empty(&m->req_list)); - } - - v9fs_mux_poll_stop(m); - m->trans = NULL; - - kfree(m); -} - -/** - * v9fs_pollwait - called by files poll operation to add v9fs-poll task - * to files wait queue - */ -static void -v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address, - poll_table * p) -{ - int i; - struct v9fs_mux_data *m; - - m = container_of(p, struct v9fs_mux_data, pt); - for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) - if (m->poll_waddr[i] == NULL) - break; - - if (i >= ARRAY_SIZE(m->poll_waddr)) { - dprintk(DEBUG_ERROR, "not enough wait_address slots\n"); - return; - } - - m->poll_waddr[i] = wait_address; - - if (!wait_address) { - dprintk(DEBUG_ERROR, "no wait_address\n"); - m->poll_waddr[i] = ERR_PTR(-EIO); - return; - } - - init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task); - add_wait_queue(wait_address, &m->poll_wait[i]); -} - -/** - * v9fs_poll_mux - polls a mux and schedules read or write works if necessary - */ -static void v9fs_poll_mux(struct v9fs_mux_data *m) -{ - int n; - - if (m->err < 0) - return; - - n = m->trans->poll(m->trans, NULL); - if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) { - dprintk(DEBUG_MUX, "error mux %p err %d\n", m, n); - if (n >= 0) - n = -ECONNRESET; - v9fs_mux_cancel(m, n); - } - - if (n & POLLIN) { - set_bit(Rpending, &m->wsched); - dprintk(DEBUG_MUX, "mux %p can read\n", m); - if (!test_and_set_bit(Rworksched, &m->wsched)) { - dprintk(DEBUG_MUX, "schedule read work mux %p\n", m); - queue_work(v9fs_mux_wq, &m->rq); - } - } - - if (n & POLLOUT) { - set_bit(Wpending, &m->wsched); - dprintk(DEBUG_MUX, "mux %p can write\n", m); - if ((m->wsize || !list_empty(&m->unsent_req_list)) - && !test_and_set_bit(Wworksched, &m->wsched)) { - dprintk(DEBUG_MUX, "schedule write work mux %p\n", m); - queue_work(v9fs_mux_wq, &m->wq); - } - } -} - -/** - * v9fs_poll_proc - polls all v9fs transports for new events and queues - * the appropriate work to the work queue - */ -static int v9fs_poll_proc(void *a) -{ - struct v9fs_mux_data *m, *mtmp; - struct v9fs_mux_poll_task *vpt; - - vpt = a; - dprintk(DEBUG_MUX, "start %p %p\n", current, vpt); - while (!kthread_should_stop()) { - set_current_state(TASK_INTERRUPTIBLE); - - list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) { - v9fs_poll_mux(m); - } - - dprintk(DEBUG_MUX, "sleeping...\n"); - schedule_timeout(SCHED_TIMEOUT * HZ); - } - - __set_current_state(TASK_RUNNING); - dprintk(DEBUG_MUX, "finish\n"); - return 0; -} - -/** - * v9fs_write_work - called when a transport can send some data - */ -static void v9fs_write_work(struct work_struct *work) -{ - int n, err; - struct v9fs_mux_data *m; - struct v9fs_req *req; - - m = container_of(work, struct v9fs_mux_data, wq); - - if (m->err < 0) { - clear_bit(Wworksched, &m->wsched); - return; - } - - if (!m->wsize) { - if (list_empty(&m->unsent_req_list)) { - clear_bit(Wworksched, &m->wsched); - return; - } - - spin_lock(&m->lock); -again: - req = list_entry(m->unsent_req_list.next, struct v9fs_req, - req_list); - list_move_tail(&req->req_list, &m->req_list); - if (req->err == ERREQFLUSH) - goto again; - - m->wbuf = req->tcall->sdata; - m->wsize = req->tcall->size; - m->wpos = 0; - dump_data(m->wbuf, m->wsize); - spin_unlock(&m->lock); - } - - dprintk(DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos, m->wsize); - clear_bit(Wpending, &m->wsched); - err = m->trans->write(m->trans, m->wbuf + m->wpos, m->wsize - m->wpos); - dprintk(DEBUG_MUX, "mux %p sent %d bytes\n", m, err); - if (err == -EAGAIN) { - clear_bit(Wworksched, &m->wsched); - return; - } - - if (err <= 0) - goto error; - - m->wpos += err; - if (m->wpos == m->wsize) - m->wpos = m->wsize = 0; - - if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) { - if (test_and_clear_bit(Wpending, &m->wsched)) - n = POLLOUT; - else - n = m->trans->poll(m->trans, NULL); - - if (n & POLLOUT) { - dprintk(DEBUG_MUX, "schedule write work mux %p\n", m); - queue_work(v9fs_mux_wq, &m->wq); - } else - clear_bit(Wworksched, &m->wsched); - } else - clear_bit(Wworksched, &m->wsched); - - return; - - error: - v9fs_mux_cancel(m, err); - clear_bit(Wworksched, &m->wsched); -} - -static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req) -{ - int ecode; - struct v9fs_str *ename; - - if (!req->err && req->rcall->id == RERROR) { - ecode = req->rcall->params.rerror.errno; - ename = &req->rcall->params.rerror.error; - - dprintk(DEBUG_MUX, "Rerror %.*s\n", ename->len, ename->str); - - if (*m->extended) - req->err = -ecode; - - if (!req->err) { - req->err = v9fs_errstr2errno(ename->str, ename->len); - - if (!req->err) { /* string match failed */ - PRINT_FCALL_ERROR("unknown error", req->rcall); - } - - if (!req->err) - req->err = -ESERVERFAULT; - } - } else if (req->tcall && req->rcall->id != req->tcall->id + 1) { - dprintk(DEBUG_ERROR, "fcall mismatch: expected %d, got %d\n", - req->tcall->id + 1, req->rcall->id); - if (!req->err) - req->err = -EIO; - } -} - -/** - * v9fs_read_work - called when there is some data to be read from a transport - */ -static void v9fs_read_work(struct work_struct *work) -{ - int n, err; - struct v9fs_mux_data *m; - struct v9fs_req *req, *rptr, *rreq; - struct v9fs_fcall *rcall; - char *rbuf; - - m = container_of(work, struct v9fs_mux_data, rq); - - if (m->err < 0) - return; - - rcall = NULL; - dprintk(DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos); - - if (!m->rcall) { - m->rcall = - kmalloc(sizeof(struct v9fs_fcall) + m->msize, GFP_KERNEL); - if (!m->rcall) { - err = -ENOMEM; - goto error; - } - - m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall); - m->rpos = 0; - } - - clear_bit(Rpending, &m->wsched); - err = m->trans->read(m->trans, m->rbuf + m->rpos, m->msize - m->rpos); - dprintk(DEBUG_MUX, "mux %p got %d bytes\n", m, err); - if (err == -EAGAIN) { - clear_bit(Rworksched, &m->wsched); - return; - } - - if (err <= 0) - goto error; - - m->rpos += err; - while (m->rpos > 4) { - n = le32_to_cpu(*(__le32 *) m->rbuf); - if (n >= m->msize) { - dprintk(DEBUG_ERROR, - "requested packet size too big: %d\n", n); - err = -EIO; - goto error; - } - - if (m->rpos < n) - break; - - dump_data(m->rbuf, n); - err = - v9fs_deserialize_fcall(m->rbuf, n, m->rcall, *m->extended); - if (err < 0) { - goto error; - } - - if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) { - char buf[150]; - - v9fs_printfcall(buf, sizeof(buf), m->rcall, - *m->extended); - printk(KERN_NOTICE ">>> %p %s\n", m, buf); - } - - rcall = m->rcall; - rbuf = m->rbuf; - if (m->rpos > n) { - m->rcall = kmalloc(sizeof(struct v9fs_fcall) + m->msize, - GFP_KERNEL); - if (!m->rcall) { - err = -ENOMEM; - goto error; - } - - m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall); - memmove(m->rbuf, rbuf + n, m->rpos - n); - m->rpos -= n; - } else { - m->rcall = NULL; - m->rbuf = NULL; - m->rpos = 0; - } - - dprintk(DEBUG_MUX, "mux %p fcall id %d tag %d\n", m, rcall->id, - rcall->tag); - - req = NULL; - spin_lock(&m->lock); - list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { - if (rreq->tag == rcall->tag) { - req = rreq; - if (req->flush != Flushing) - list_del(&req->req_list); - break; - } - } - spin_unlock(&m->lock); - - if (req) { - req->rcall = rcall; - process_request(m, req); - - if (req->flush != Flushing) { - if (req->cb) - (*req->cb) (req, req->cba); - else - kfree(req->rcall); - - wake_up(&m->equeue); - } - } else { - if (err >= 0 && rcall->id != RFLUSH) - dprintk(DEBUG_ERROR, - "unexpected response mux %p id %d tag %d\n", - m, rcall->id, rcall->tag); - kfree(rcall); - } - } - - if (!list_empty(&m->req_list)) { - if (test_and_clear_bit(Rpending, &m->wsched)) - n = POLLIN; - else - n = m->trans->poll(m->trans, NULL); - - if (n & POLLIN) { - dprintk(DEBUG_MUX, "schedule read work mux %p\n", m); - queue_work(v9fs_mux_wq, &m->rq); - } else - clear_bit(Rworksched, &m->wsched); - } else - clear_bit(Rworksched, &m->wsched); - - return; - - error: - v9fs_mux_cancel(m, err); - clear_bit(Rworksched, &m->wsched); -} - -/** - * v9fs_send_request - send 9P request - * The function can sleep until the request is scheduled for sending. - * The function can be interrupted. Return from the function is not - * a guarantee that the request is sent successfully. Can return errors - * that can be retrieved by PTR_ERR macros. - * - * @m: mux data - * @tc: request to be sent - * @cb: callback function to call when response is received - * @cba: parameter to pass to the callback function - */ -static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m, - struct v9fs_fcall *tc, - v9fs_mux_req_callback cb, void *cba) -{ - int n; - struct v9fs_req *req; - - dprintk(DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current, - tc, tc->id); - if (m->err < 0) - return ERR_PTR(m->err); - - req = kmalloc(sizeof(struct v9fs_req), GFP_KERNEL); - if (!req) - return ERR_PTR(-ENOMEM); - - if (tc->id == TVERSION) - n = V9FS_NOTAG; - else - n = v9fs_mux_get_tag(m); - - if (n < 0) - return ERR_PTR(-ENOMEM); - - v9fs_set_tag(tc, n); - if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) { - char buf[150]; - - v9fs_printfcall(buf, sizeof(buf), tc, *m->extended); - printk(KERN_NOTICE "<<< %p %s\n", m, buf); - } - - spin_lock_init(&req->lock); - req->tag = n; - req->tcall = tc; - req->rcall = NULL; - req->err = 0; - req->cb = cb; - req->cba = cba; - req->flush = None; - - spin_lock(&m->lock); - list_add_tail(&req->req_list, &m->unsent_req_list); - spin_unlock(&m->lock); - - if (test_and_clear_bit(Wpending, &m->wsched)) - n = POLLOUT; - else - n = m->trans->poll(m->trans, NULL); - - if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) - queue_work(v9fs_mux_wq, &m->wq); - - return req; -} - -static void v9fs_mux_free_request(struct v9fs_mux_data *m, struct v9fs_req *req) -{ - v9fs_mux_put_tag(m, req->tag); - kfree(req); -} - -static void v9fs_mux_flush_cb(struct v9fs_req *freq, void *a) -{ - v9fs_mux_req_callback cb; - int tag; - struct v9fs_mux_data *m; - struct v9fs_req *req, *rreq, *rptr; - - m = a; - dprintk(DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m, - freq->tcall, freq->rcall, freq->err, - freq->tcall->params.tflush.oldtag); - - spin_lock(&m->lock); - cb = NULL; - tag = freq->tcall->params.tflush.oldtag; - req = NULL; - list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { - if (rreq->tag == tag) { - req = rreq; - list_del(&req->req_list); - break; - } - } - spin_unlock(&m->lock); - - if (req) { - spin_lock(&req->lock); - req->flush = Flushed; - spin_unlock(&req->lock); - - if (req->cb) - (*req->cb) (req, req->cba); - else - kfree(req->rcall); - - wake_up(&m->equeue); - } - - kfree(freq->tcall); - kfree(freq->rcall); - v9fs_mux_free_request(m, freq); -} - -static int -v9fs_mux_flush_request(struct v9fs_mux_data *m, struct v9fs_req *req) -{ - struct v9fs_fcall *fc; - struct v9fs_req *rreq, *rptr; - - dprintk(DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag); - - /* if a response was received for a request, do nothing */ - spin_lock(&req->lock); - if (req->rcall || req->err) { - spin_unlock(&req->lock); - dprintk(DEBUG_MUX, "mux %p req %p response already received\n", m, req); - return 0; - } - - req->flush = Flushing; - spin_unlock(&req->lock); - - spin_lock(&m->lock); - /* if the request is not sent yet, just remove it from the list */ - list_for_each_entry_safe(rreq, rptr, &m->unsent_req_list, req_list) { - if (rreq->tag == req->tag) { - dprintk(DEBUG_MUX, "mux %p req %p request is not sent yet\n", m, req); - list_del(&rreq->req_list); - req->flush = Flushed; - spin_unlock(&m->lock); - if (req->cb) - (*req->cb) (req, req->cba); - return 0; - } - } - spin_unlock(&m->lock); - - clear_thread_flag(TIF_SIGPENDING); - fc = v9fs_create_tflush(req->tag); - v9fs_send_request(m, fc, v9fs_mux_flush_cb, m); - return 1; -} - -static void -v9fs_mux_rpc_cb(struct v9fs_req *req, void *a) -{ - struct v9fs_mux_rpc *r; - - dprintk(DEBUG_MUX, "req %p r %p\n", req, a); - r = a; - r->rcall = req->rcall; - r->err = req->err; - - if (req->flush!=None && !req->err) - r->err = -ERESTARTSYS; - - wake_up(&r->wqueue); -} - -/** - * v9fs_mux_rpc - sends 9P request and waits until a response is available. - * The function can be interrupted. - * @m: mux data - * @tc: request to be sent - * @rc: pointer where a pointer to the response is stored - */ -int -v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, - struct v9fs_fcall **rc) -{ - int err, sigpending; - unsigned long flags; - struct v9fs_req *req; - struct v9fs_mux_rpc r; - - r.err = 0; - r.tcall = tc; - r.rcall = NULL; - r.m = m; - init_waitqueue_head(&r.wqueue); - - if (rc) - *rc = NULL; - - sigpending = 0; - if (signal_pending(current)) { - sigpending = 1; - clear_thread_flag(TIF_SIGPENDING); - } - - req = v9fs_send_request(m, tc, v9fs_mux_rpc_cb, &r); - if (IS_ERR(req)) { - err = PTR_ERR(req); - dprintk(DEBUG_MUX, "error %d\n", err); - return err; - } - - err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0); - if (r.err < 0) - err = r.err; - - if (err == -ERESTARTSYS && m->trans->status == Connected && m->err == 0) { - if (v9fs_mux_flush_request(m, req)) { - /* wait until we get response of the flush message */ - do { - clear_thread_flag(TIF_SIGPENDING); - err = wait_event_interruptible(r.wqueue, - r.rcall || r.err); - } while (!r.rcall && !r.err && err==-ERESTARTSYS && - m->trans->status==Connected && !m->err); - - err = -ERESTARTSYS; - } - sigpending = 1; - } - - if (sigpending) { - spin_lock_irqsave(¤t->sighand->siglock, flags); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); - } - - if (rc) - *rc = r.rcall; - else - kfree(r.rcall); - - v9fs_mux_free_request(m, req); - if (err > 0) - err = -EIO; - - return err; -} - -#if 0 -/** - * v9fs_mux_rpcnb - sends 9P request without waiting for response. - * @m: mux data - * @tc: request to be sent - * @cb: callback function to be called when response arrives - * @cba: value to pass to the callback function - */ -int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc, - v9fs_mux_req_callback cb, void *a) -{ - int err; - struct v9fs_req *req; - - req = v9fs_send_request(m, tc, cb, a); - if (IS_ERR(req)) { - err = PTR_ERR(req); - dprintk(DEBUG_MUX, "error %d\n", err); - return PTR_ERR(req); - } - - dprintk(DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag); - return 0; -} -#endif /* 0 */ - -/** - * v9fs_mux_cancel - cancel all pending requests with error - * @m: mux data - * @err: error code - */ -void v9fs_mux_cancel(struct v9fs_mux_data *m, int err) -{ - struct v9fs_req *req, *rtmp; - LIST_HEAD(cancel_list); - - dprintk(DEBUG_ERROR, "mux %p err %d\n", m, err); - m->err = err; - spin_lock(&m->lock); - list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { - list_move(&req->req_list, &cancel_list); - } - list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { - list_move(&req->req_list, &cancel_list); - } - spin_unlock(&m->lock); - - list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { - list_del(&req->req_list); - if (!req->err) - req->err = err; - - if (req->cb) - (*req->cb) (req, req->cba); - else - kfree(req->rcall); - } - - wake_up(&m->equeue); -} - -static u16 v9fs_mux_get_tag(struct v9fs_mux_data *m) -{ - int tag; - - tag = v9fs_get_idpool(&m->tagpool); - if (tag < 0) - return V9FS_NOTAG; - else - return (u16) tag; -} - -static void v9fs_mux_put_tag(struct v9fs_mux_data *m, u16 tag) -{ - if (tag != V9FS_NOTAG && v9fs_check_idpool(tag, &m->tagpool)) - v9fs_put_idpool(tag, &m->tagpool); -} diff --git a/fs/9p/mux.h b/fs/9p/mux.h deleted file mode 100644 index fb10c50186a1..000000000000 --- a/fs/9p/mux.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * linux/fs/9p/mux.h - * - * Multiplexer Definitions - * - * Copyright (C) 2005 by Latchesar Ionkov - * Copyright (C) 2004 by Eric Van Hensbergen - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -struct v9fs_mux_data; -struct v9fs_req; - -/** - * v9fs_mux_req_callback - callback function that is called when the - * response of a request is received. The callback is called from - * a workqueue and shouldn't block. - * - * @a - the pointer that was specified when the request was send to be - * passed to the callback - * @tc - request call - * @rc - response call - * @err - error code (non-zero if error occured) - */ -typedef void (*v9fs_mux_req_callback)(struct v9fs_req *req, void *a); - -int v9fs_mux_global_init(void); -void v9fs_mux_global_exit(void); - -struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize, - unsigned char *extended); -void v9fs_mux_destroy(struct v9fs_mux_data *); - -int v9fs_mux_send(struct v9fs_mux_data *m, struct v9fs_fcall *tc); -struct v9fs_fcall *v9fs_mux_recv(struct v9fs_mux_data *m); -int v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, struct v9fs_fcall **rc); - -void v9fs_mux_flush(struct v9fs_mux_data *m, int sendflush); -void v9fs_mux_cancel(struct v9fs_mux_data *m, int err); -int v9fs_errstr2errno(char *errstr, int len); diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c deleted file mode 100644 index 34d43355beb7..000000000000 --- a/fs/9p/trans_fd.c +++ /dev/null @@ -1,308 +0,0 @@ -/* - * linux/fs/9p/trans_fd.c - * - * Fd transport layer. Includes deprecated socket layer. - * - * Copyright (C) 2006 by Russ Cox - * Copyright (C) 2004-2005 by Latchesar Ionkov - * Copyright (C) 2004-2005 by Eric Van Hensbergen - * Copyright (C) 1997-2002 by Ron Minnich - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "debug.h" -#include "v9fs.h" -#include "transport.h" - -#define V9FS_PORT 564 - -struct v9fs_trans_fd { - struct file *rd; - struct file *wr; -}; - -/** - * v9fs_fd_read- read from a fd - * @v9ses: session information - * @v: buffer to receive data into - * @len: size of receive buffer - * - */ -static int v9fs_fd_read(struct v9fs_transport *trans, void *v, int len) -{ - int ret; - struct v9fs_trans_fd *ts; - - if (!trans || trans->status == Disconnected || !(ts = trans->priv)) - return -EREMOTEIO; - - if (!(ts->rd->f_flags & O_NONBLOCK)) - dprintk(DEBUG_ERROR, "blocking read ...\n"); - - ret = kernel_read(ts->rd, ts->rd->f_pos, v, len); - if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) - trans->status = Disconnected; - return ret; -} - -/** - * v9fs_fd_write - write to a socket - * @v9ses: session information - * @v: buffer to send data from - * @len: size of send buffer - * - */ -static int v9fs_fd_write(struct v9fs_transport *trans, void *v, int len) -{ - int ret; - mm_segment_t oldfs; - struct v9fs_trans_fd *ts; - - if (!trans || trans->status == Disconnected || !(ts = trans->priv)) - return -EREMOTEIO; - - if (!(ts->wr->f_flags & O_NONBLOCK)) - dprintk(DEBUG_ERROR, "blocking write ...\n"); - - oldfs = get_fs(); - set_fs(get_ds()); - /* The cast to a user pointer is valid due to the set_fs() */ - ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos); - set_fs(oldfs); - - if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) - trans->status = Disconnected; - return ret; -} - -static unsigned int -v9fs_fd_poll(struct v9fs_transport *trans, struct poll_table_struct *pt) -{ - int ret, n; - struct v9fs_trans_fd *ts; - mm_segment_t oldfs; - - if (!trans || trans->status != Connected || !(ts = trans->priv)) - return -EREMOTEIO; - - if (!ts->rd->f_op || !ts->rd->f_op->poll) - return -EIO; - - if (!ts->wr->f_op || !ts->wr->f_op->poll) - return -EIO; - - oldfs = get_fs(); - set_fs(get_ds()); - - ret = ts->rd->f_op->poll(ts->rd, pt); - if (ret < 0) - goto end; - - if (ts->rd != ts->wr) { - n = ts->wr->f_op->poll(ts->wr, pt); - if (n < 0) { - ret = n; - goto end; - } - ret = (ret & ~POLLOUT) | (n & ~POLLIN); - } - - end: - set_fs(oldfs); - return ret; -} - -static int v9fs_fd_open(struct v9fs_session_info *v9ses, int rfd, int wfd) -{ - struct v9fs_transport *trans = v9ses->transport; - struct v9fs_trans_fd *ts = kmalloc(sizeof(struct v9fs_trans_fd), - GFP_KERNEL); - if (!ts) - return -ENOMEM; - - ts->rd = fget(rfd); - ts->wr = fget(wfd); - if (!ts->rd || !ts->wr) { - if (ts->rd) - fput(ts->rd); - if (ts->wr) - fput(ts->wr); - kfree(ts); - return -EIO; - } - - trans->priv = ts; - trans->status = Connected; - - return 0; -} - -static int v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, - char *data) -{ - if (v9ses->rfdno == ~0 || v9ses->wfdno == ~0) { - printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n"); - return -ENOPROTOOPT; - } - - return v9fs_fd_open(v9ses, v9ses->rfdno, v9ses->wfdno); -} - -static int v9fs_socket_open(struct v9fs_session_info *v9ses, - struct socket *csocket) -{ - int fd, ret; - - csocket->sk->sk_allocation = GFP_NOIO; - if ((fd = sock_map_fd(csocket)) < 0) { - eprintk(KERN_ERR, "v9fs_socket_open: failed to map fd\n"); - ret = fd; - release_csocket: - sock_release(csocket); - return ret; - } - - if ((ret = v9fs_fd_open(v9ses, fd, fd)) < 0) { - sockfd_put(csocket); - eprintk(KERN_ERR, "v9fs_socket_open: failed to open fd\n"); - goto release_csocket; - } - - ((struct v9fs_trans_fd *)v9ses->transport->priv)->rd->f_flags |= - O_NONBLOCK; - return 0; -} - -static int v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, - char *data) -{ - int ret; - struct socket *csocket = NULL; - struct sockaddr_in sin_server; - - sin_server.sin_family = AF_INET; - sin_server.sin_addr.s_addr = in_aton(addr); - sin_server.sin_port = htons(v9ses->port); - sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket); - - if (!csocket) { - eprintk(KERN_ERR, "v9fs_trans_tcp: problem creating socket\n"); - return -1; - } - - ret = csocket->ops->connect(csocket, - (struct sockaddr *)&sin_server, - sizeof(struct sockaddr_in), 0); - if (ret < 0) { - eprintk(KERN_ERR, - "v9fs_trans_tcp: problem connecting socket to %s\n", - addr); - return ret; - } - - return v9fs_socket_open(v9ses, csocket); -} - -static int -v9fs_unix_init(struct v9fs_session_info *v9ses, const char *addr, char *data) -{ - int ret; - struct socket *csocket; - struct sockaddr_un sun_server; - - if (strlen(addr) > UNIX_PATH_MAX) { - eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n", - addr); - return -ENAMETOOLONG; - } - - sun_server.sun_family = PF_UNIX; - strcpy(sun_server.sun_path, addr); - sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket); - ret = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server, - sizeof(struct sockaddr_un) - 1, 0); - if (ret < 0) { - eprintk(KERN_ERR, - "v9fs_trans_unix: problem connecting socket: %s: %d\n", - addr, ret); - return ret; - } - - return v9fs_socket_open(v9ses, csocket); -} - -/** - * v9fs_sock_close - shutdown socket - * @trans: private socket structure - * - */ -static void v9fs_fd_close(struct v9fs_transport *trans) -{ - struct v9fs_trans_fd *ts; - - if (!trans) - return; - - ts = xchg(&trans->priv, NULL); - - if (!ts) - return; - - trans->status = Disconnected; - if (ts->rd) - fput(ts->rd); - if (ts->wr) - fput(ts->wr); - kfree(ts); -} - -struct v9fs_transport v9fs_trans_fd = { - .init = v9fs_fd_init, - .write = v9fs_fd_write, - .read = v9fs_fd_read, - .close = v9fs_fd_close, - .poll = v9fs_fd_poll, -}; - -struct v9fs_transport v9fs_trans_tcp = { - .init = v9fs_tcp_init, - .write = v9fs_fd_write, - .read = v9fs_fd_read, - .close = v9fs_fd_close, - .poll = v9fs_fd_poll, -}; - -struct v9fs_transport v9fs_trans_unix = { - .init = v9fs_unix_init, - .write = v9fs_fd_write, - .read = v9fs_fd_read, - .close = v9fs_fd_close, - .poll = v9fs_fd_poll, -}; diff --git a/fs/9p/transport.h b/fs/9p/transport.h deleted file mode 100644 index b38a4b8a41ce..000000000000 --- a/fs/9p/transport.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * linux/fs/9p/transport.h - * - * Transport Definition - * - * Copyright (C) 2005 by Latchesar Ionkov - * Copyright (C) 2004 by Eric Van Hensbergen - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -enum v9fs_transport_status { - Connected, - Disconnected, - Hung, -}; - -struct v9fs_transport { - enum v9fs_transport_status status; - void *priv; - - int (*init) (struct v9fs_session_info *, const char *, char *); - int (*write) (struct v9fs_transport *, void *, int); - int (*read) (struct v9fs_transport *, void *, int); - void (*close) (struct v9fs_transport *); - unsigned int (*poll)(struct v9fs_transport *, struct poll_table_struct *); -}; - -extern struct v9fs_transport v9fs_trans_tcp; -extern struct v9fs_transport v9fs_trans_unix; -extern struct v9fs_transport v9fs_trans_fd; diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 6ad6f192b6e4..4feb5ae63ecf 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -29,16 +29,12 @@ #include #include #include - -#include "debug.h" +#include +#include +#include +#include #include "v9fs.h" -#include "9p.h" #include "v9fs_vfs.h" -#include "transport.h" -#include "mux.h" - -/* TODO: sysfs or debugfs interface */ -int v9fs_debug_level = 0; /* feature-rific global debug level */ /* * Option Parsing (code inspired by NFS code) @@ -47,12 +43,12 @@ int v9fs_debug_level = 0; /* feature-rific global debug level */ enum { /* Options that take integer arguments */ - Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid, Opt_debug, + Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid, Opt_rfdno, Opt_wfdno, /* String options */ Opt_uname, Opt_remotename, /* Options that take no arguments */ - Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd, + Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd, Opt_pci, /* Cache options */ Opt_cache_loose, /* Error token */ @@ -67,12 +63,14 @@ static match_table_t tokens = { {Opt_afid, "afid=%u"}, {Opt_rfdno, "rfdno=%u"}, {Opt_wfdno, "wfdno=%u"}, - {Opt_debug, "debug=%x"}, {Opt_uname, "uname=%s"}, {Opt_remotename, "aname=%s"}, {Opt_unix, "proto=unix"}, {Opt_tcp, "proto=tcp"}, {Opt_fd, "proto=fd"}, +#ifdef CONFIG_PCI_9P + {Opt_pci, "proto=pci"}, +#endif {Opt_tcp, "tcp"}, {Opt_unix, "unix"}, {Opt_fd, "fd"}, @@ -83,6 +81,8 @@ static match_table_t tokens = { {Opt_err, NULL} }; +extern struct p9_transport *p9pci_trans_create(void); + /* * Parse option string. */ @@ -122,7 +122,7 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses) token = match_token(p, tokens, args); if (token < Opt_uname) { if ((ret = match_int(&args[0], &option)) < 0) { - dprintk(DEBUG_ERROR, + P9_DPRINTK(P9_DEBUG_ERROR, "integer field, but no integer?\n"); continue; } @@ -149,15 +149,15 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses) case Opt_wfdno: v9ses->wfdno = option; break; - case Opt_debug: - v9ses->debug = option; - break; case Opt_tcp: v9ses->proto = PROTO_TCP; break; case Opt_unix: v9ses->proto = PROTO_UNIX; break; + case Opt_pci: + v9ses->proto = PROTO_PCI; + break; case Opt_fd: v9ses->proto = PROTO_FD; break; @@ -182,82 +182,6 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses) } } -/** - * v9fs_inode2v9ses - safely extract v9fs session info from super block - * @inode: inode to extract information from - * - * Paranoid function to extract v9ses information from superblock, - * if anything is missing it will report an error. - * - */ - -struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode) -{ - return (inode->i_sb->s_fs_info); -} - -/** - * v9fs_get_idpool - allocate numeric id from pool - * @p - pool to allocate from - * - * XXX - This seems to be an awful generic function, should it be in idr.c with - * the lock included in struct idr? - */ - -int v9fs_get_idpool(struct v9fs_idpool *p) -{ - int i = 0; - int error; - -retry: - if (idr_pre_get(&p->pool, GFP_KERNEL) == 0) - return 0; - - if (down_interruptible(&p->lock) == -EINTR) { - eprintk(KERN_WARNING, "Interrupted while locking\n"); - return -1; - } - - /* no need to store exactly p, we just need something non-null */ - error = idr_get_new(&p->pool, p, &i); - up(&p->lock); - - if (error == -EAGAIN) - goto retry; - else if (error) - return -1; - - return i; -} - -/** - * v9fs_put_idpool - release numeric id from pool - * @p - pool to allocate from - * - * XXX - This seems to be an awful generic function, should it be in idr.c with - * the lock included in struct idr? - */ - -void v9fs_put_idpool(int id, struct v9fs_idpool *p) -{ - if (down_interruptible(&p->lock) == -EINTR) { - eprintk(KERN_WARNING, "Interrupted while locking\n"); - return; - } - idr_remove(&p->pool, id); - up(&p->lock); -} - -/** - * v9fs_check_idpool - check if the specified id is available - * @id - id to check - * @p - pool - */ -int v9fs_check_idpool(int id, struct v9fs_idpool *p) -{ - return idr_find(&p->pool, id) != NULL; -} - /** * v9fs_session_init - initialize session * @v9ses: session information structure @@ -266,25 +190,21 @@ int v9fs_check_idpool(int id, struct v9fs_idpool *p) * */ -int -v9fs_session_init(struct v9fs_session_info *v9ses, +struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, const char *dev_name, char *data) { - struct v9fs_fcall *fcall = NULL; - struct v9fs_transport *trans_proto; - int n = 0; - int newfid = -1; int retval = -EINVAL; - struct v9fs_str *version; + struct p9_transport *trans; + struct p9_fid *fid; v9ses->name = __getname(); if (!v9ses->name) - return -ENOMEM; + return ERR_PTR(-ENOMEM); v9ses->remotename = __getname(); if (!v9ses->remotename) { __putname(v9ses->name); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } strcpy(v9ses->name, V9FS_DEFUSER); @@ -292,130 +212,60 @@ v9fs_session_init(struct v9fs_session_info *v9ses, v9fs_parse_options(data, v9ses); - /* set global debug level */ - v9fs_debug_level = v9ses->debug; - - /* id pools that are session-dependent: fids and tags */ - idr_init(&v9ses->fidpool.pool); - init_MUTEX(&v9ses->fidpool.lock); - switch (v9ses->proto) { case PROTO_TCP: - trans_proto = &v9fs_trans_tcp; + trans = p9_trans_create_tcp(dev_name, v9ses->port); break; case PROTO_UNIX: - trans_proto = &v9fs_trans_unix; + trans = p9_trans_create_unix(dev_name); *v9ses->remotename = 0; break; case PROTO_FD: - trans_proto = &v9fs_trans_fd; + trans = p9_trans_create_fd(v9ses->rfdno, v9ses->wfdno); + *v9ses->remotename = 0; + break; +#ifdef CONFIG_PCI_9P + case PROTO_PCI: + trans = p9pci_trans_create(); *v9ses->remotename = 0; break; +#endif default: printk(KERN_ERR "v9fs: Bad mount protocol %d\n", v9ses->proto); retval = -ENOPROTOOPT; - goto SessCleanUp; + goto error; }; - v9ses->transport = kmalloc(sizeof(*v9ses->transport), GFP_KERNEL); - if (!v9ses->transport) { - retval = -ENOMEM; - goto SessCleanUp; + if (IS_ERR(trans)) { + retval = PTR_ERR(trans); + trans = NULL; + goto error; } - memmove(v9ses->transport, trans_proto, sizeof(*v9ses->transport)); + v9ses->clnt = p9_client_create(trans, v9ses->maxdata + P9_IOHDRSZ, + v9ses->extended); - if ((retval = v9ses->transport->init(v9ses, dev_name, data)) < 0) { - eprintk(KERN_ERR, "problem initializing transport\n"); - goto SessCleanUp; + if (IS_ERR(v9ses->clnt)) { + retval = PTR_ERR(v9ses->clnt); + v9ses->clnt = NULL; + P9_DPRINTK(P9_DEBUG_ERROR, "problem initializing 9p client\n"); + goto error; } - v9ses->inprogress = 0; - v9ses->shutdown = 0; - v9ses->session_hung = 0; - - v9ses->mux = v9fs_mux_init(v9ses->transport, v9ses->maxdata + V9FS_IOHDRSZ, - &v9ses->extended); - - if (IS_ERR(v9ses->mux)) { - retval = PTR_ERR(v9ses->mux); - v9ses->mux = NULL; - dprintk(DEBUG_ERROR, "problem initializing mux\n"); - goto SessCleanUp; + fid = p9_client_attach(v9ses->clnt, NULL, v9ses->name, + v9ses->remotename); + if (IS_ERR(fid)) { + retval = PTR_ERR(fid); + fid = NULL; + P9_DPRINTK(P9_DEBUG_ERROR, "cannot attach\n"); + goto error; } - if (v9ses->afid == ~0) { - if (v9ses->extended) - retval = - v9fs_t_version(v9ses, v9ses->maxdata, "9P2000.u", - &fcall); - else - retval = v9fs_t_version(v9ses, v9ses->maxdata, "9P2000", - &fcall); - - if (retval < 0) { - dprintk(DEBUG_ERROR, "v9fs_t_version failed\n"); - goto FreeFcall; - } - - version = &fcall->params.rversion.version; - if (version->len==8 && !memcmp(version->str, "9P2000.u", 8)) { - dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n"); - v9ses->extended = 1; - } else if (version->len==6 && !memcmp(version->str, "9P2000", 6)) { - dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n"); - v9ses->extended = 0; - } else { - retval = -EREMOTEIO; - goto FreeFcall; - } + return fid; - n = fcall->params.rversion.msize; - kfree(fcall); - - if (n < v9ses->maxdata) - v9ses->maxdata = n; - } - - newfid = v9fs_get_idpool(&v9ses->fidpool); - if (newfid < 0) { - eprintk(KERN_WARNING, "couldn't allocate FID\n"); - retval = -ENOMEM; - goto SessCleanUp; - } - /* it is a little bit ugly, but we have to prevent newfid */ - /* being the same as afid, so if it is, get a new fid */ - if (v9ses->afid != ~0 && newfid == v9ses->afid) { - newfid = v9fs_get_idpool(&v9ses->fidpool); - if (newfid < 0) { - eprintk(KERN_WARNING, "couldn't allocate FID\n"); - retval = -ENOMEM; - goto SessCleanUp; - } - } - - if ((retval = - v9fs_t_attach(v9ses, v9ses->name, v9ses->remotename, newfid, - v9ses->afid, NULL)) - < 0) { - dprintk(DEBUG_ERROR, "cannot attach\n"); - goto SessCleanUp; - } - - if (v9ses->afid != ~0) { - dprintk(DEBUG_ERROR, "afid not equal to ~0\n"); - if (v9fs_t_clunk(v9ses, v9ses->afid)) - dprintk(DEBUG_ERROR, "clunk failed\n"); - } - - return newfid; - - FreeFcall: - kfree(fcall); - - SessCleanUp: +error: v9fs_session_close(v9ses); - return retval; + return ERR_PTR(retval); } /** @@ -426,15 +276,9 @@ v9fs_session_init(struct v9fs_session_info *v9ses, void v9fs_session_close(struct v9fs_session_info *v9ses) { - if (v9ses->mux) { - v9fs_mux_destroy(v9ses->mux); - v9ses->mux = NULL; - } - - if (v9ses->transport) { - v9ses->transport->close(v9ses->transport); - kfree(v9ses->transport); - v9ses->transport = NULL; + if (v9ses->clnt) { + p9_client_destroy(v9ses->clnt); + v9ses->clnt = NULL; } __putname(v9ses->name); @@ -446,9 +290,8 @@ void v9fs_session_close(struct v9fs_session_info *v9ses) * and cancel all pending requests. */ void v9fs_session_cancel(struct v9fs_session_info *v9ses) { - dprintk(DEBUG_ERROR, "cancel session %p\n", v9ses); - v9ses->transport->status = Disconnected; - v9fs_mux_cancel(v9ses->mux, -EIO); + P9_DPRINTK(P9_DEBUG_ERROR, "cancel session %p\n", v9ses); + p9_client_disconnect(v9ses->clnt); } extern int v9fs_error_init(void); @@ -460,24 +303,9 @@ extern int v9fs_error_init(void); static int __init init_v9fs(void) { - int ret; - - v9fs_error_init(); - printk(KERN_INFO "Installing v9fs 9p2000 file system support\n"); - ret = v9fs_mux_global_init(); - if (ret) { - printk(KERN_WARNING "v9fs: starting mux failed\n"); - return ret; - } - ret = register_filesystem(&v9fs_fs_type); - if (ret) { - printk(KERN_WARNING "v9fs: registering file system failed\n"); - v9fs_mux_global_exit(); - } - - return ret; + return register_filesystem(&v9fs_fs_type); } /** @@ -487,13 +315,13 @@ static int __init init_v9fs(void) static void __exit exit_v9fs(void) { - v9fs_mux_global_exit(); unregister_filesystem(&v9fs_fs_type); } module_init(init_v9fs) module_exit(exit_v9fs) +MODULE_AUTHOR("Latchesar Ionkov "); MODULE_AUTHOR("Eric Van Hensbergen "); MODULE_AUTHOR("Ron Minnich "); MODULE_LICENSE("GPL"); diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 820bf5ca35d8..abc4b1668ace 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -21,16 +21,6 @@ * */ -/* - * Idpool structure provides lock and id management - * - */ - -struct v9fs_idpool { - struct semaphore lock; - struct idr pool; -}; - /* * Session structure provides information for an opened session * @@ -54,15 +44,7 @@ struct v9fs_session_info { unsigned int uid; /* default uid/muid for legacy support */ unsigned int gid; /* default gid for legacy support */ - /* book keeping */ - struct v9fs_idpool fidpool; /* The FID pool for file descriptors */ - - struct v9fs_transport *transport; - struct v9fs_mux_data *mux; - - int inprogress; /* session in progress => true */ - int shutdown; /* session shutting down. no more attaches. */ - unsigned char session_hung; + struct p9_client *clnt; /* 9p client */ struct dentry *debugfs_dir; }; @@ -71,6 +53,7 @@ enum { PROTO_TCP, PROTO_UNIX, PROTO_FD, + PROTO_PCI, }; /* possible values of ->cache */ @@ -82,12 +65,9 @@ enum { extern struct dentry *v9fs_debugfs_root; -int v9fs_session_init(struct v9fs_session_info *, const char *, char *); -struct v9fs_session_info *v9fs_inode2v9ses(struct inode *); +struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, + char *); void v9fs_session_close(struct v9fs_session_info *v9ses); -int v9fs_get_idpool(struct v9fs_idpool *p); -void v9fs_put_idpool(int id, struct v9fs_idpool *p); -int v9fs_check_idpool(int id, struct v9fs_idpool *p); void v9fs_session_cancel(struct v9fs_session_info *v9ses); #define V9FS_MAGIC 0x01021997 @@ -97,3 +77,7 @@ void v9fs_session_cancel(struct v9fs_session_info *v9ses); #define V9FS_DEFUSER "nobody" #define V9FS_DEFANAME "" +static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode) +{ + return (inode->i_sb->s_fs_info); +} diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 6a82d39dc498..fd01d90cada5 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -45,10 +45,10 @@ extern struct dentry_operations v9fs_dentry_operations; extern struct dentry_operations v9fs_cached_dentry_operations; struct inode *v9fs_get_inode(struct super_block *sb, int mode); -ino_t v9fs_qid2ino(struct v9fs_qid *qid); -void v9fs_stat2inode(struct v9fs_stat *, struct inode *, struct super_block *); +ino_t v9fs_qid2ino(struct p9_qid *qid); +void v9fs_stat2inode(struct p9_stat *, struct inode *, struct super_block *); int v9fs_dir_release(struct inode *inode, struct file *filp); int v9fs_file_open(struct inode *inode, struct file *file); -void v9fs_inode2stat(struct inode *inode, struct v9fs_stat *stat); +void v9fs_inode2stat(struct inode *inode, struct p9_stat *stat); void v9fs_dentry_release(struct dentry *); int v9fs_uflags2omode(int uflags); diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 9ac4ffe9ac7d..6248f0e727a3 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -33,10 +33,10 @@ #include #include #include +#include +#include -#include "debug.h" #include "v9fs.h" -#include "9p.h" #include "v9fs_vfs.h" #include "fid.h" @@ -50,55 +50,26 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page) { - char *buffer = NULL; - int retval = -EIO; - loff_t offset = page_offset(page); - int count = PAGE_CACHE_SIZE; - struct inode *inode = filp->f_path.dentry->d_inode; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); - int rsize = v9ses->maxdata - V9FS_IOHDRSZ; - struct v9fs_fid *v9f = filp->private_data; - struct v9fs_fcall *fcall = NULL; - int fid = v9f->fid; - int total = 0; - int result = 0; - - dprintk(DEBUG_VFS, "\n"); + int retval; + loff_t offset; + char *buffer; + struct p9_fid *fid; + P9_DPRINTK(P9_DEBUG_VFS, "\n"); + fid = filp->private_data; buffer = kmap(page); - do { - if (count < rsize) - rsize = count; - - result = v9fs_t_read(v9ses, fid, offset, rsize, &fcall); - - if (result < 0) { - printk(KERN_ERR "v9fs_t_read returned %d\n", - result); - - kfree(fcall); - goto UnmapAndUnlock; - } else - offset += result; - - memcpy(buffer, fcall->params.rread.data, result); - - count -= result; - buffer += result; - total += result; - - kfree(fcall); + offset = page_offset(page); - if (result < rsize) - break; - } while (count); + retval = p9_client_readn(fid, buffer, offset, PAGE_CACHE_SIZE); + if (retval < 0) + goto done; - memset(buffer, 0, count); + memset(buffer + retval, 0, PAGE_CACHE_SIZE - retval); flush_dcache_page(page); SetPageUptodate(page); retval = 0; -UnmapAndUnlock: +done: kunmap(page); unlock_page(page); return retval; diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index d93960429c09..f9534f18df0a 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -34,10 +34,10 @@ #include #include #include +#include +#include -#include "debug.h" #include "v9fs.h" -#include "9p.h" #include "v9fs_vfs.h" #include "fid.h" @@ -52,7 +52,7 @@ static int v9fs_dentry_delete(struct dentry *dentry) { - dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); + P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); return 1; } @@ -69,7 +69,7 @@ static int v9fs_dentry_delete(struct dentry *dentry) static int v9fs_cached_dentry_delete(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); + P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); if(!inode) return 1; @@ -85,26 +85,19 @@ static int v9fs_cached_dentry_delete(struct dentry *dentry) void v9fs_dentry_release(struct dentry *dentry) { - int err; - - dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); - - if (dentry->d_fsdata != NULL) { - struct list_head *fid_list = dentry->d_fsdata; - struct v9fs_fid *temp = NULL; - struct v9fs_fid *current_fid = NULL; - - list_for_each_entry_safe(current_fid, temp, fid_list, list) { - err = v9fs_t_clunk(current_fid->v9ses, current_fid->fid); - - if (err < 0) - dprintk(DEBUG_ERROR, "clunk failed: %d name %s\n", - err, dentry->d_iname); - - v9fs_fid_destroy(current_fid); + struct v9fs_dentry *dent; + struct p9_fid *temp, *current_fid; + + P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); + dent = dentry->d_fsdata; + if (dent) { + list_for_each_entry_safe(current_fid, temp, &dent->fidlist, + dlist) { + p9_client_clunk(current_fid); } - kfree(dentry->d_fsdata); /* free the list_head */ + kfree(dent); + dentry->d_fsdata = NULL; } } diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 1dd86ee90bc5..0924d4477da3 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -32,11 +32,10 @@ #include #include #include +#include +#include -#include "debug.h" #include "v9fs.h" -#include "9p.h" -#include "conv.h" #include "v9fs_vfs.h" #include "fid.h" @@ -46,14 +45,14 @@ * */ -static inline int dt_type(struct v9fs_stat *mistat) +static inline int dt_type(struct p9_stat *mistat) { unsigned long perm = mistat->mode; int rettype = DT_REG; - if (perm & V9FS_DMDIR) + if (perm & P9_DMDIR) rettype = DT_DIR; - if (perm & V9FS_DMSYMLINK) + if (perm & P9_DMSYMLINK) rettype = DT_LNK; return rettype; @@ -69,106 +68,36 @@ static inline int dt_type(struct v9fs_stat *mistat) static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct v9fs_fcall *fcall = NULL; - struct inode *inode = filp->f_path.dentry->d_inode; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); - struct v9fs_fid *file = filp->private_data; - unsigned int i, n, s; - int fid = -1; - int ret = 0; - struct v9fs_stat stat; - int over = 0; - - dprintk(DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); - - fid = file->fid; - - if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) { - kfree(file->rdir_fcall); - file->rdir_fcall = NULL; - } - - if (file->rdir_fcall) { - n = file->rdir_fcall->params.rread.count; - i = file->rdir_fpos; - while (i < n) { - s = v9fs_deserialize_stat( - file->rdir_fcall->params.rread.data + i, - n - i, &stat, v9ses->extended); - - if (s == 0) { - dprintk(DEBUG_ERROR, - "error while deserializing stat\n"); - ret = -EIO; - goto FreeStructs; - } - - over = filldir(dirent, stat.name.str, stat.name.len, - filp->f_pos, v9fs_qid2ino(&stat.qid), - dt_type(&stat)); - - if (over) { - file->rdir_fpos = i; - file->rdir_pos = filp->f_pos; - break; - } - - i += s; - filp->f_pos += s; - } - - if (!over) { - kfree(file->rdir_fcall); - file->rdir_fcall = NULL; - } - } - - while (!over) { - ret = v9fs_t_read(v9ses, fid, filp->f_pos, - v9ses->maxdata-V9FS_IOHDRSZ, &fcall); - if (ret < 0) { - dprintk(DEBUG_ERROR, "error while reading: %d: %p\n", - ret, fcall); - goto FreeStructs; - } else if (ret == 0) + int over; + struct p9_fid *fid; + struct v9fs_session_info *v9ses; + struct inode *inode; + struct p9_stat *st; + + P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); + inode = filp->f_path.dentry->d_inode; + v9ses = v9fs_inode2v9ses(inode); + fid = filp->private_data; + while ((st = p9_client_dirread(fid, filp->f_pos)) != NULL) { + if (IS_ERR(st)) + return PTR_ERR(st); + + over = filldir(dirent, st->name.str, st->name.len, filp->f_pos, + v9fs_qid2ino(&st->qid), dt_type(st)); + + if (over) break; - n = ret; - i = 0; - while (i < n) { - s = v9fs_deserialize_stat(fcall->params.rread.data + i, - n - i, &stat, v9ses->extended); - - if (s == 0) { - dprintk(DEBUG_ERROR, - "error while deserializing stat\n"); - return -EIO; - } - - over = filldir(dirent, stat.name.str, stat.name.len, - filp->f_pos, v9fs_qid2ino(&stat.qid), - dt_type(&stat)); - - if (over) { - file->rdir_fcall = fcall; - file->rdir_fpos = i; - file->rdir_pos = filp->f_pos; - fcall = NULL; - break; - } - - i += s; - filp->f_pos += s; - } - - kfree(fcall); + filp->f_pos += st->size; + kfree(st); + st = NULL; } - FreeStructs: - kfree(fcall); - return ret; + kfree(st); + return 0; } + /** * v9fs_dir_release - close a directory * @inode: inode of the directory @@ -178,29 +107,13 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) int v9fs_dir_release(struct inode *inode, struct file *filp) { - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); - struct v9fs_fid *fid = filp->private_data; - int fidnum = -1; - - dprintk(DEBUG_VFS, "inode: %p filp: %p fid: %d\n", inode, filp, - fid->fid); - fidnum = fid->fid; + struct p9_fid *fid; + fid = filp->private_data; + P9_DPRINTK(P9_DEBUG_VFS, + "inode: %p filp: %p fid: %d\n", inode, filp, fid->fid); filemap_write_and_wait(inode->i_mapping); - - if (fidnum >= 0) { - dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen, - fid->fid); - - if (v9fs_t_clunk(v9ses, fidnum)) - dprintk(DEBUG_ERROR, "clunk failed\n"); - - kfree(fid->rdir_fcall); - kfree(fid); - - filp->private_data = NULL; - } - + p9_client_clunk(fid); return 0; } diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 6e7678e4852f..c1f7c027cfeb 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -34,10 +34,10 @@ #include #include #include +#include +#include -#include "debug.h" #include "v9fs.h" -#include "9p.h" #include "v9fs_vfs.h" #include "fid.h" @@ -52,48 +52,36 @@ static const struct file_operations v9fs_cached_file_operations; int v9fs_file_open(struct inode *inode, struct file *file) { - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); - struct v9fs_fid *vfid; - struct v9fs_fcall *fcall = NULL; - int omode; int err; + struct v9fs_session_info *v9ses; + struct p9_fid *fid; + int omode; - dprintk(DEBUG_VFS, "inode: %p file: %p \n", inode, file); - - vfid = v9fs_fid_clone(file->f_path.dentry); - if (IS_ERR(vfid)) - return PTR_ERR(vfid); - + P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file); + v9ses = v9fs_inode2v9ses(inode); omode = v9fs_uflags2omode(file->f_flags); - err = v9fs_t_open(v9ses, vfid->fid, omode, &fcall); + fid = file->private_data; + if (!fid) { + fid = v9fs_fid_clone(file->f_path.dentry); + if (IS_ERR(fid)) + return PTR_ERR(fid); + + err = p9_client_open(fid, omode); if (err < 0) { - PRINT_FCALL_ERROR("open failed", fcall); - goto Clunk_Fid; + p9_client_clunk(fid); + return err; + } } - file->private_data = vfid; - vfid->fidopen = 1; - vfid->fidclunked = 0; - vfid->iounit = fcall->params.ropen.iounit; - vfid->rdir_pos = 0; - vfid->rdir_fcall = NULL; - vfid->filp = file; - kfree(fcall); - - if((vfid->qid.version) && (v9ses->cache)) { - dprintk(DEBUG_VFS, "cached"); + file->private_data = fid; + if ((fid->qid.version) && (v9ses->cache)) { + P9_DPRINTK(P9_DEBUG_VFS, "cached"); /* enable cached file options */ if(file->f_op == &v9fs_file_operations) file->f_op = &v9fs_cached_file_operations; } return 0; - -Clunk_Fid: - v9fs_fid_clunk(v9ses, vfid); - kfree(fcall); - - return err; } /** @@ -110,7 +98,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl) int res = 0; struct inode *inode = filp->f_path.dentry->d_inode; - dprintk(DEBUG_VFS, "filp: %p lock: %p\n", filp, fl); + P9_DPRINTK(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl); /* No mandatory locks */ if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) @@ -136,55 +124,16 @@ static ssize_t v9fs_file_read(struct file *filp, char __user * data, size_t count, loff_t * offset) { - struct inode *inode = filp->f_path.dentry->d_inode; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); - struct v9fs_fid *v9f = filp->private_data; - struct v9fs_fcall *fcall = NULL; - int fid = v9f->fid; - int rsize = 0; - int result = 0; - int total = 0; - int n; - - dprintk(DEBUG_VFS, "\n"); - - rsize = v9ses->maxdata - V9FS_IOHDRSZ; - if (v9f->iounit != 0 && rsize > v9f->iounit) - rsize = v9f->iounit; - - do { - if (count < rsize) - rsize = count; + int ret; + struct p9_fid *fid; - result = v9fs_t_read(v9ses, fid, *offset, rsize, &fcall); + P9_DPRINTK(P9_DEBUG_VFS, "\n"); + fid = filp->private_data; + ret = p9_client_uread(fid, data, *offset, count); + if (ret > 0) + *offset += ret; - if (result < 0) { - printk(KERN_ERR "9P2000: v9fs_t_read returned %d\n", - result); - - kfree(fcall); - return total; - } else - *offset += result; - - n = copy_to_user(data, fcall->params.rread.data, result); - if (n) { - dprintk(DEBUG_ERROR, "Problem copying to user %d\n", n); - kfree(fcall); - return -EFAULT; - } - - count -= result; - data += result; - total += result; - - kfree(fcall); - - if (result < rsize) - break; - } while (count); - - return total; + return ret; } /** @@ -200,50 +149,19 @@ static ssize_t v9fs_file_write(struct file *filp, const char __user * data, size_t count, loff_t * offset) { - struct inode *inode = filp->f_path.dentry->d_inode; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); - struct v9fs_fid *v9fid = filp->private_data; - struct v9fs_fcall *fcall; - int fid = v9fid->fid; - int result = -EIO; - int rsize = 0; - int total = 0; - - dprintk(DEBUG_VFS, "data %p count %d offset %x\n", data, (int)count, - (int)*offset); - rsize = v9ses->maxdata - V9FS_IOHDRSZ; - if (v9fid->iounit != 0 && rsize > v9fid->iounit) - rsize = v9fid->iounit; - - do { - if (count < rsize) - rsize = count; + int ret; + struct p9_fid *fid; - result = v9fs_t_write(v9ses, fid, *offset, rsize, data, &fcall); - if (result < 0) { - PRINT_FCALL_ERROR("error while writing", fcall); - kfree(fcall); - return result; - } else - *offset += result; - - kfree(fcall); - fcall = NULL; - - if (result != rsize) { - eprintk(KERN_ERR, - "short write: v9fs_t_write returned %d\n", - result); - break; - } + P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data, + (int)count, (int)*offset); - count -= result; - data += result; - total += result; - } while (count); + fid = filp->private_data; + ret = p9_client_uwrite(fid, data, *offset, count); + if (ret > 0) + *offset += ret; - invalidate_inode_pages2(inode->i_mapping); - return total; + invalidate_inode_pages2(filp->f_path.dentry->d_inode->i_mapping); + return ret; } static const struct file_operations v9fs_cached_file_operations = { diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index c76cd8fa3f6c..c602c0e054a2 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -34,10 +34,10 @@ #include #include #include +#include +#include -#include "debug.h" #include "v9fs.h" -#include "9p.h" #include "v9fs_vfs.h" #include "fid.h" @@ -58,27 +58,27 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode) int res; res = mode & 0777; if (S_ISDIR(mode)) - res |= V9FS_DMDIR; + res |= P9_DMDIR; if (v9ses->extended) { if (S_ISLNK(mode)) - res |= V9FS_DMSYMLINK; + res |= P9_DMSYMLINK; if (v9ses->nodev == 0) { if (S_ISSOCK(mode)) - res |= V9FS_DMSOCKET; + res |= P9_DMSOCKET; if (S_ISFIFO(mode)) - res |= V9FS_DMNAMEDPIPE; + res |= P9_DMNAMEDPIPE; if (S_ISBLK(mode)) - res |= V9FS_DMDEVICE; + res |= P9_DMDEVICE; if (S_ISCHR(mode)) - res |= V9FS_DMDEVICE; + res |= P9_DMDEVICE; } if ((mode & S_ISUID) == S_ISUID) - res |= V9FS_DMSETUID; + res |= P9_DMSETUID; if ((mode & S_ISGID) == S_ISGID) - res |= V9FS_DMSETGID; - if ((mode & V9FS_DMLINK)) - res |= V9FS_DMLINK; + res |= P9_DMSETGID; + if ((mode & P9_DMLINK)) + res |= P9_DMLINK; } return res; @@ -97,27 +97,27 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) res = mode & 0777; - if ((mode & V9FS_DMDIR) == V9FS_DMDIR) + if ((mode & P9_DMDIR) == P9_DMDIR) res |= S_IFDIR; - else if ((mode & V9FS_DMSYMLINK) && (v9ses->extended)) + else if ((mode & P9_DMSYMLINK) && (v9ses->extended)) res |= S_IFLNK; - else if ((mode & V9FS_DMSOCKET) && (v9ses->extended) + else if ((mode & P9_DMSOCKET) && (v9ses->extended) && (v9ses->nodev == 0)) res |= S_IFSOCK; - else if ((mode & V9FS_DMNAMEDPIPE) && (v9ses->extended) + else if ((mode & P9_DMNAMEDPIPE) && (v9ses->extended) && (v9ses->nodev == 0)) res |= S_IFIFO; - else if ((mode & V9FS_DMDEVICE) && (v9ses->extended) + else if ((mode & P9_DMDEVICE) && (v9ses->extended) && (v9ses->nodev == 0)) res |= S_IFBLK; else res |= S_IFREG; if (v9ses->extended) { - if ((mode & V9FS_DMSETUID) == V9FS_DMSETUID) + if ((mode & P9_DMSETUID) == P9_DMSETUID) res |= S_ISUID; - if ((mode & V9FS_DMSETGID) == V9FS_DMSETGID) + if ((mode & P9_DMSETGID) == P9_DMSETGID) res |= S_ISGID; } @@ -132,26 +132,26 @@ int v9fs_uflags2omode(int uflags) switch (uflags&3) { default: case O_RDONLY: - ret = V9FS_OREAD; + ret = P9_OREAD; break; case O_WRONLY: - ret = V9FS_OWRITE; + ret = P9_OWRITE; break; case O_RDWR: - ret = V9FS_ORDWR; + ret = P9_ORDWR; break; } if (uflags & O_EXCL) - ret |= V9FS_OEXCL; + ret |= P9_OEXCL; if (uflags & O_TRUNC) - ret |= V9FS_OTRUNC; + ret |= P9_OTRUNC; if (uflags & O_APPEND) - ret |= V9FS_OAPPEND; + ret |= P9_OAPPEND; return ret; } @@ -164,7 +164,7 @@ int v9fs_uflags2omode(int uflags) */ static void -v9fs_blank_wstat(struct v9fs_wstat *wstat) +v9fs_blank_wstat(struct p9_wstat *wstat) { wstat->type = ~0; wstat->dev = ~0; @@ -197,7 +197,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) struct inode *inode; struct v9fs_session_info *v9ses = sb->s_fs_info; - dprintk(DEBUG_VFS, "super block: %p mode: %o\n", sb, mode); + P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode); inode = new_inode(sb); if (inode) { @@ -215,7 +215,8 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) case S_IFCHR: case S_IFSOCK: if(!v9ses->extended) { - dprintk(DEBUG_ERROR, "special files without extended mode\n"); + P9_DPRINTK(P9_DEBUG_ERROR, + "special files without extended mode\n"); return ERR_PTR(-EINVAL); } init_special_inode(inode, inode->i_mode, @@ -227,7 +228,8 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) break; case S_IFLNK: if(!v9ses->extended) { - dprintk(DEBUG_ERROR, "extended modes used w/o 9P2000.u\n"); + P9_DPRINTK(P9_DEBUG_ERROR, + "extended modes used w/o 9P2000.u\n"); return ERR_PTR(-EINVAL); } inode->i_op = &v9fs_symlink_inode_operations; @@ -241,71 +243,19 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) inode->i_fop = &v9fs_dir_operations; break; default: - dprintk(DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n", + P9_DPRINTK(P9_DEBUG_ERROR, + "BAD mode 0x%x S_IFMT 0x%x\n", mode, mode & S_IFMT); return ERR_PTR(-EINVAL); } } else { - eprintk(KERN_WARNING, "Problem allocating inode\n"); + P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n"); return ERR_PTR(-ENOMEM); } return inode; } -static int -v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name, u32 perm, - u8 mode, char *extension, u32 *fidp, struct v9fs_qid *qid, u32 *iounit) -{ - int fid; - int err; - struct v9fs_fcall *fcall; - - fid = v9fs_get_idpool(&v9ses->fidpool); - if (fid < 0) { - eprintk(KERN_WARNING, "no free fids available\n"); - return -ENOSPC; - } - - err = v9fs_t_walk(v9ses, pfid, fid, NULL, &fcall); - if (err < 0) { - PRINT_FCALL_ERROR("clone error", fcall); - if (fcall && fcall->id == RWALK) - goto clunk_fid; - else - goto put_fid; - } - kfree(fcall); - - err = v9fs_t_create(v9ses, fid, name, perm, mode, extension, &fcall); - if (err < 0) { - PRINT_FCALL_ERROR("create fails", fcall); - goto clunk_fid; - } - - if (iounit) - *iounit = fcall->params.rcreate.iounit; - - if (qid) - *qid = fcall->params.rcreate.qid; - - if (fidp) - *fidp = fid; - - kfree(fcall); - return 0; - -clunk_fid: - v9fs_t_clunk(v9ses, fid); - fid = V9FS_NOFID; - -put_fid: - if (fid != V9FS_NOFID) - v9fs_put_idpool(fid, &v9ses->fidpool); - - kfree(fcall); - return err; -} - +/* static struct v9fs_fid* v9fs_clone_walk(struct v9fs_session_info *v9ses, u32 fid, struct dentry *dentry) { @@ -355,23 +305,25 @@ error: kfree(fcall); return ERR_PTR(err); } +*/ static struct inode * -v9fs_inode_from_fid(struct v9fs_session_info *v9ses, u32 fid, +v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb) { int err, umode; struct inode *ret; - struct v9fs_fcall *fcall; + struct p9_stat *st; ret = NULL; - err = v9fs_t_stat(v9ses, fid, &fcall); - if (err) { - PRINT_FCALL_ERROR("stat error", fcall); + st = p9_client_stat(fid); + if (IS_ERR(st)) { + err = PTR_ERR(st); + st = NULL; goto error; } - umode = p9mode2unixmode(v9ses, fcall->params.rstat.stat.mode); + umode = p9mode2unixmode(v9ses, st->mode); ret = v9fs_get_inode(sb, umode); if (IS_ERR(ret)) { err = PTR_ERR(ret); @@ -379,12 +331,13 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, u32 fid, goto error; } - v9fs_stat2inode(&fcall->params.rstat.stat, ret, sb); - kfree(fcall); + v9fs_stat2inode(st, ret, sb); + ret->i_ino = v9fs_qid2ino(&st->qid); + kfree(st); return ret; error: - kfree(fcall); + kfree(st); if (ret) iput(ret); @@ -401,43 +354,20 @@ error: static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) { - struct v9fs_fcall *fcall = NULL; - struct super_block *sb = NULL; - struct v9fs_session_info *v9ses = NULL; - struct v9fs_fid *v9fid = NULL; - struct inode *file_inode = NULL; - int fid = -1; - int result = 0; + struct inode *file_inode; + struct v9fs_session_info *v9ses; + struct p9_fid *v9fid; - dprintk(DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file, + P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file, rmdir); file_inode = file->d_inode; - sb = file_inode->i_sb; v9ses = v9fs_inode2v9ses(file_inode); v9fid = v9fs_fid_clone(file); if(IS_ERR(v9fid)) return PTR_ERR(v9fid); - fid = v9fid->fid; - if (fid < 0) { - dprintk(DEBUG_ERROR, "inode #%lu, no fid!\n", - file_inode->i_ino); - return -EBADF; - } - - result = v9fs_t_remove(v9ses, fid, &fcall); - if (result < 0) { - PRINT_FCALL_ERROR("remove fails", fcall); - goto Error; - } - - v9fs_put_idpool(fid, &v9ses->fidpool); - v9fs_fid_destroy(v9fid); - -Error: - kfree(fcall); - return result; + return p9_client_remove(v9fid); } static int @@ -446,61 +376,59 @@ v9fs_open_created(struct inode *inode, struct file *file) return 0; } + /** - * v9fs_vfs_create - VFS hook to create files - * @inode: directory inode that is being deleted - * @dentry: dentry that is being deleted - * @mode: create permissions - * @nd: path information + * v9fs_create - Create a file + * @dentry: dentry that is being created + * @perm: create permissions + * @mode: open mode * */ - -static int -v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode, - struct nameidata *nd) +static struct p9_fid * +v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, + struct dentry *dentry, char *extension, u32 perm, u8 mode) { int err; - u32 fid, perm, iounit; - int flags; - struct v9fs_session_info *v9ses; - struct v9fs_fid *dfid, *vfid, *ffid; + char *name; + struct p9_fid *dfid, *ofid, *fid; struct inode *inode; - struct v9fs_qid qid; - struct file *filp; - inode = NULL; - vfid = NULL; - v9ses = v9fs_inode2v9ses(dir); + err = 0; + ofid = NULL; + fid = NULL; + name = (char *) dentry->d_name.name; dfid = v9fs_fid_clone(dentry->d_parent); if(IS_ERR(dfid)) { err = PTR_ERR(dfid); + dfid = NULL; goto error; } - perm = unixmode2p9mode(v9ses, mode); - if (nd && nd->flags & LOOKUP_OPEN) - flags = nd->intent.open.flags - 1; - else - flags = O_RDWR; - - err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name, - perm, v9fs_uflags2omode(flags), NULL, &fid, &qid, &iounit); + /* clone a fid to use for creation */ + ofid = p9_client_walk(dfid, 0, NULL, 1); + if (IS_ERR(ofid)) { + err = PTR_ERR(ofid); + ofid = NULL; + goto error; + } - if (err) - goto clunk_dfid; + err = p9_client_fcreate(ofid, name, perm, mode, extension); + if (err < 0) + goto error; - vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry); - v9fs_fid_clunk(v9ses, dfid); - if (IS_ERR(vfid)) { - err = PTR_ERR(vfid); - vfid = NULL; + /* now walk from the parent so we can get unopened fid */ + fid = p9_client_walk(dfid, 1, &name, 0); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + fid = NULL; goto error; - } + } else + dfid = NULL; - inode = v9fs_inode_from_fid(v9ses, vfid->fid, dir->i_sb); + /* instantiate inode and assign the unopened fid to the dentry */ + inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); if (IS_ERR(inode)) { err = PTR_ERR(inode); - inode = NULL; goto error; } @@ -508,35 +436,78 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode, dentry->d_op = &v9fs_cached_dentry_operations; else dentry->d_op = &v9fs_dentry_operations; + d_instantiate(dentry, inode); + v9fs_fid_add(dentry, fid); + return ofid; - if (nd && nd->flags & LOOKUP_OPEN) { - ffid = v9fs_fid_create(v9ses, fid); - if (!ffid) - return -ENOMEM; +error: + if (dfid) + p9_client_clunk(dfid); + + if (ofid) + p9_client_clunk(ofid); + + if (fid) + p9_client_clunk(fid); + + return ERR_PTR(err); +} + +/** + * v9fs_vfs_create - VFS hook to create files + * @inode: directory inode that is being created + * @dentry: dentry that is being deleted + * @mode: create permissions + * @nd: path information + * + */ +static int +v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) +{ + int err; + u32 perm; + int flags; + struct v9fs_session_info *v9ses; + struct p9_fid *fid; + struct file *filp; + + err = 0; + fid = NULL; + v9ses = v9fs_inode2v9ses(dir); + perm = unixmode2p9mode(v9ses, mode); + if (nd && nd->flags & LOOKUP_OPEN) + flags = nd->intent.open.flags - 1; + else + flags = O_RDWR; + + fid = v9fs_create(v9ses, dir, dentry, NULL, perm, + v9fs_uflags2omode(flags)); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + fid = NULL; + goto error; + } + + /* if we are opening a file, assign the open fid to the file */ + if (nd && nd->flags & LOOKUP_OPEN) { filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created); if (IS_ERR(filp)) { - v9fs_fid_destroy(ffid); - return PTR_ERR(filp); + err = PTR_ERR(filp); + goto error; } - ffid->rdir_pos = 0; - ffid->rdir_fcall = NULL; - ffid->fidopen = 1; - ffid->iounit = iounit; - ffid->filp = filp; - filp->private_data = ffid; - } + filp->private_data = fid; + } else + p9_client_clunk(fid); return 0; -clunk_dfid: - v9fs_fid_clunk(v9ses, dfid); - error: - if (vfid) - v9fs_fid_destroy(vfid); + if (fid) + p9_client_clunk(fid); return err; } @@ -552,57 +523,23 @@ error: static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { int err; - u32 fid, perm; + u32 perm; struct v9fs_session_info *v9ses; - struct v9fs_fid *dfid, *vfid; - struct inode *inode; + struct p9_fid *fid; - inode = NULL; - vfid = NULL; + P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); + err = 0; v9ses = v9fs_inode2v9ses(dir); - dfid = v9fs_fid_clone(dentry->d_parent); - if(IS_ERR(dfid)) { - err = PTR_ERR(dfid); - goto error; - } - perm = unixmode2p9mode(v9ses, mode | S_IFDIR); - - err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name, - perm, V9FS_OREAD, NULL, &fid, NULL, NULL); - - if (err) { - dprintk(DEBUG_ERROR, "create error %d\n", err); - goto clean_up_dfid; + fid = v9fs_create(v9ses, dir, dentry, NULL, perm, P9_OREAD); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + fid = NULL; } - vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry); - if (IS_ERR(vfid)) { - err = PTR_ERR(vfid); - vfid = NULL; - goto clean_up_dfid; - } + if (fid) + p9_client_clunk(fid); - v9fs_fid_clunk(v9ses, dfid); - inode = v9fs_inode_from_fid(v9ses, vfid->fid, dir->i_sb); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - inode = NULL; - v9fs_fid_destroy(vfid); - goto error; - } - - if(v9ses->cache) - dentry->d_op = &v9fs_cached_dentry_operations; - else - dentry->d_op = &v9fs_dentry_operations; - d_instantiate(dentry, inode); - return 0; - -clean_up_dfid: - v9fs_fid_clunk(v9ses, dfid); - -error: return err; } @@ -619,104 +556,54 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, { struct super_block *sb; struct v9fs_session_info *v9ses; - struct v9fs_fid *dirfid; - struct v9fs_fid *fid; + struct p9_fid *dfid, *fid; struct inode *inode; - struct v9fs_fcall *fcall = NULL; - int dirfidnum = -1; - int newfid = -1; + char *name; int result = 0; - dprintk(DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n", + P9_DPRINTK(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n", dir, dentry->d_name.name, dentry, nameidata); sb = dir->i_sb; v9ses = v9fs_inode2v9ses(dir); - dirfid = v9fs_fid_lookup(dentry->d_parent); - - if(IS_ERR(dirfid)) - return ERR_PTR(PTR_ERR(dirfid)); - - dirfidnum = dirfid->fid; - - newfid = v9fs_get_idpool(&v9ses->fidpool); - if (newfid < 0) { - eprintk(KERN_WARNING, "newfid fails!\n"); - result = -ENOSPC; - goto Release_Dirfid; - } - - result = v9fs_t_walk(v9ses, dirfidnum, newfid, - (char *)dentry->d_name.name, &fcall); - - up(&dirfid->lock); - - if (result < 0) { - if (fcall && fcall->id == RWALK) - v9fs_t_clunk(v9ses, newfid); - else - v9fs_put_idpool(newfid, &v9ses->fidpool); - + dfid = v9fs_fid_lookup(dentry->d_parent); + if (IS_ERR(dfid)) + return ERR_PTR(PTR_ERR(dfid)); + + name = (char *) dentry->d_name.name; + fid = p9_client_walk(dfid, 1, &name, 1); + if (IS_ERR(fid)) { + result = PTR_ERR(fid); if (result == -ENOENT) { d_add(dentry, NULL); - dprintk(DEBUG_VFS, - "Return negative dentry %p count %d\n", - dentry, atomic_read(&dentry->d_count)); - kfree(fcall); return NULL; } - dprintk(DEBUG_ERROR, "walk error:%d\n", result); - goto FreeFcall; - } - kfree(fcall); - - result = v9fs_t_stat(v9ses, newfid, &fcall); - if (result < 0) { - dprintk(DEBUG_ERROR, "stat error\n"); - goto FreeFcall; - } - - inode = v9fs_get_inode(sb, p9mode2unixmode(v9ses, - fcall->params.rstat.stat.mode)); - if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) { - eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n", - PTR_ERR(inode)); - - result = -ENOSPC; - goto FreeFcall; + return ERR_PTR(result); } - inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat.qid); - - fid = v9fs_fid_create(v9ses, newfid); - if (fid == NULL) { - dprintk(DEBUG_ERROR, "couldn't insert\n"); - result = -ENOMEM; - goto FreeFcall; + inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); + if (IS_ERR(inode)) { + result = PTR_ERR(inode); + inode = NULL; + goto error; } - result = v9fs_fid_insert(fid, dentry); + result = v9fs_fid_add(dentry, fid); if (result < 0) - goto FreeFcall; + goto error; - fid->qid = fcall->params.rstat.stat.qid; - v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb); if((fid->qid.version)&&(v9ses->cache)) dentry->d_op = &v9fs_cached_dentry_operations; else dentry->d_op = &v9fs_dentry_operations; d_add(dentry, inode); - kfree(fcall); - return NULL; -Release_Dirfid: - up(&dirfid->lock); - -FreeFcall: - kfree(fcall); +error: + if (fid) + p9_client_clunk(fid); return ERR_PTR(result); } @@ -758,73 +645,54 @@ static int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { - struct inode *old_inode = old_dentry->d_inode; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(old_inode); - struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry); - struct v9fs_fid *olddirfid; - struct v9fs_fid *newdirfid; - struct v9fs_wstat wstat; - struct v9fs_fcall *fcall = NULL; - int fid = -1; - int olddirfidnum = -1; - int newdirfidnum = -1; - int retval = 0; - - dprintk(DEBUG_VFS, "\n"); + struct inode *old_inode; + struct v9fs_session_info *v9ses; + struct p9_fid *oldfid; + struct p9_fid *olddirfid; + struct p9_fid *newdirfid; + struct p9_wstat wstat; + int retval; + P9_DPRINTK(P9_DEBUG_VFS, "\n"); + retval = 0; + old_inode = old_dentry->d_inode; + v9ses = v9fs_inode2v9ses(old_inode); + oldfid = v9fs_fid_lookup(old_dentry); if(IS_ERR(oldfid)) return PTR_ERR(oldfid); olddirfid = v9fs_fid_clone(old_dentry->d_parent); if(IS_ERR(olddirfid)) { retval = PTR_ERR(olddirfid); - goto Release_lock; + goto done; } newdirfid = v9fs_fid_clone(new_dentry->d_parent); if(IS_ERR(newdirfid)) { retval = PTR_ERR(newdirfid); - goto Clunk_olddir; + goto clunk_olddir; } /* 9P can only handle file rename in the same directory */ if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) { - dprintk(DEBUG_ERROR, "old dir and new dir are different\n"); + P9_DPRINTK(P9_DEBUG_ERROR, + "old dir and new dir are different\n"); retval = -EXDEV; - goto Clunk_newdir; - } - - fid = oldfid->fid; - olddirfidnum = olddirfid->fid; - newdirfidnum = newdirfid->fid; - - if (fid < 0) { - dprintk(DEBUG_ERROR, "no fid for old file #%lu\n", - old_inode->i_ino); - retval = -EBADF; - goto Clunk_newdir; + goto clunk_newdir; } v9fs_blank_wstat(&wstat); wstat.muid = v9ses->name; wstat.name = (char *) new_dentry->d_name.name; + retval = p9_client_wstat(oldfid, &wstat); - retval = v9fs_t_wstat(v9ses, fid, &wstat, &fcall); +clunk_newdir: + p9_client_clunk(olddirfid); - if (retval < 0) - PRINT_FCALL_ERROR("wstat error", fcall); - - kfree(fcall); - -Clunk_newdir: - v9fs_fid_clunk(v9ses, newdirfid); - -Clunk_olddir: - v9fs_fid_clunk(v9ses, olddirfid); - -Release_lock: - up(&oldfid->lock); +clunk_olddir: + p9_client_clunk(newdirfid); +done: return retval; } @@ -840,28 +708,27 @@ static int v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { - struct v9fs_fcall *fcall = NULL; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); - struct v9fs_fid *fid = v9fs_fid_clone(dentry); - int err = -EPERM; - - dprintk(DEBUG_VFS, "dentry: %p\n", dentry); - if(IS_ERR(fid)) + int err; + struct v9fs_session_info *v9ses; + struct p9_fid *fid; + struct p9_stat *st; + + P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); + err = -EPERM; + v9ses = v9fs_inode2v9ses(dentry->d_inode); + fid = v9fs_fid_lookup(dentry); + if (IS_ERR(fid)) return PTR_ERR(fid); - err = v9fs_t_stat(v9ses, fid->fid, &fcall); + st = p9_client_stat(fid); + if (IS_ERR(st)) + return PTR_ERR(st); - if (err < 0) - dprintk(DEBUG_ERROR, "stat error\n"); - else { - v9fs_stat2inode(&fcall->params.rstat.stat, dentry->d_inode, - dentry->d_inode->i_sb); + v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb); generic_fillattr(dentry->d_inode, stat); - } - kfree(fcall); - v9fs_fid_clunk(v9ses, fid); - return err; + kfree(st); + return 0; } /** @@ -873,13 +740,15 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) { - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); - struct v9fs_fid *fid = v9fs_fid_clone(dentry); - struct v9fs_fcall *fcall = NULL; - struct v9fs_wstat wstat; - int res = -EPERM; + int retval; + struct v9fs_session_info *v9ses; + struct p9_fid *fid; + struct p9_wstat wstat; - dprintk(DEBUG_VFS, "\n"); + P9_DPRINTK(P9_DEBUG_VFS, "\n"); + retval = -EPERM; + v9ses = v9fs_inode2v9ses(dentry->d_inode); + fid = v9fs_fid_lookup(dentry); if(IS_ERR(fid)) return PTR_ERR(fid); @@ -904,17 +773,11 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) wstat.n_gid = iattr->ia_gid; } - res = v9fs_t_wstat(v9ses, fid->fid, &wstat, &fcall); + retval = p9_client_wstat(fid, &wstat); + if (retval >= 0) + retval = inode_setattr(dentry->d_inode, iattr); - if (res < 0) - PRINT_FCALL_ERROR("wstat error", fcall); - - kfree(fcall); - if (res >= 0) - res = inode_setattr(dentry->d_inode, iattr); - - v9fs_fid_clunk(v9ses, fid); - return res; + return retval; } /** @@ -926,7 +789,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) */ void -v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode, +v9fs_stat2inode(struct p9_stat *stat, struct inode *inode, struct super_block *sb) { int n; @@ -967,8 +830,9 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode, case 'b': break; default: - dprintk(DEBUG_ERROR, "Unknown special type %c (%.*s)\n", - type, stat->extension.len, stat->extension.str); + P9_DPRINTK(P9_DEBUG_ERROR, + "Unknown special type %c (%.*s)\n", type, + stat->extension.len, stat->extension.str); }; inode->i_rdev = MKDEV(major, minor); } else @@ -976,8 +840,8 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode, inode->i_size = stat->length; - inode->i_blocks = - (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; + /* not real number of blocks, but 512 byte ones ... */ + inode->i_blocks = (inode->i_size + 512 - 1) >> 9; } /** @@ -987,7 +851,7 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode, * BUG: potential for inode number collisions? */ -ino_t v9fs_qid2ino(struct v9fs_qid *qid) +ino_t v9fs_qid2ino(struct p9_qid *qid) { u64 path = qid->path + 2; ino_t i = 0; @@ -1010,56 +874,46 @@ ino_t v9fs_qid2ino(struct v9fs_qid *qid) static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) { - int retval = -EPERM; + int retval; - struct v9fs_fcall *fcall = NULL; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); - struct v9fs_fid *fid = v9fs_fid_clone(dentry); + struct v9fs_session_info *v9ses; + struct p9_fid *fid; + struct p9_stat *st; + P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name); + retval = -EPERM; + v9ses = v9fs_inode2v9ses(dentry->d_inode); + fid = v9fs_fid_lookup(dentry); if(IS_ERR(fid)) return PTR_ERR(fid); - if (!v9ses->extended) { - retval = -EBADF; - dprintk(DEBUG_ERROR, "not extended\n"); - goto ClunkFid; - } - - dprintk(DEBUG_VFS, " %s\n", dentry->d_name.name); - retval = v9fs_t_stat(v9ses, fid->fid, &fcall); - - if (retval < 0) { - dprintk(DEBUG_ERROR, "stat error\n"); - goto FreeFcall; - } + if (!v9ses->extended) + return -EBADF; - if (!fcall) { - retval = -EIO; - goto ClunkFid; - } + st = p9_client_stat(fid); + if (IS_ERR(st)) + return PTR_ERR(st); - if (!(fcall->params.rstat.stat.mode & V9FS_DMSYMLINK)) { + if (!(st->mode & P9_DMSYMLINK)) { retval = -EINVAL; - goto FreeFcall; + goto done; } /* copy extension buffer into buffer */ - if (fcall->params.rstat.stat.extension.len < buflen) - buflen = fcall->params.rstat.stat.extension.len + 1; + if (st->extension.len < buflen) + buflen = st->extension.len + 1; - memmove(buffer, fcall->params.rstat.stat.extension.str, buflen - 1); + memmove(buffer, st->extension.str, buflen - 1); buffer[buflen-1] = 0; - dprintk(DEBUG_ERROR, "%s -> %.*s (%s)\n", dentry->d_name.name, fcall->params.rstat.stat.extension.len, - fcall->params.rstat.stat.extension.str, buffer); - retval = buflen; + P9_DPRINTK(P9_DEBUG_VFS, + "%s -> %.*s (%s)\n", dentry->d_name.name, st->extension.len, + st->extension.str, buffer); -FreeFcall: - kfree(fcall); - -ClunkFid: - v9fs_fid_clunk(v9ses, fid); + retval = buflen; +done: + kfree(st); return retval; } @@ -1084,14 +938,14 @@ static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer, if (buflen > PATH_MAX) buflen = PATH_MAX; - dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); + P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); retval = v9fs_readlink(dentry, link, buflen); if (retval > 0) { if ((ret = copy_to_user(buffer, link, retval)) != 0) { - dprintk(DEBUG_ERROR, "problem copying to user: %d\n", - ret); + P9_DPRINTK(P9_DEBUG_ERROR, + "problem copying to user: %d\n", ret); retval = ret; } } @@ -1112,7 +966,7 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd) int len = 0; char *link = __getname(); - dprintk(DEBUG_VFS, "%s n", dentry->d_name.name); + P9_DPRINTK(P9_DEBUG_VFS, "%s n", dentry->d_name.name); if (!link) link = ERR_PTR(-ENOMEM); @@ -1141,7 +995,7 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void { char *s = nd_get_link(nd); - dprintk(DEBUG_VFS, " %s %s\n", dentry->d_name.name, s); + P9_DPRINTK(P9_DEBUG_VFS, " %s %s\n", dentry->d_name.name, s); if (!IS_ERR(s)) __putname(s); } @@ -1149,66 +1003,24 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, int mode, const char *extension) { - int err; - u32 fid, perm; + u32 perm; struct v9fs_session_info *v9ses; - struct v9fs_fid *dfid, *vfid = NULL; - struct inode *inode = NULL; + struct p9_fid *fid; v9ses = v9fs_inode2v9ses(dir); if (!v9ses->extended) { - dprintk(DEBUG_ERROR, "not extended\n"); + P9_DPRINTK(P9_DEBUG_ERROR, "not extended\n"); return -EPERM; } - dfid = v9fs_fid_clone(dentry->d_parent); - if(IS_ERR(dfid)) { - err = PTR_ERR(dfid); - goto error; - } - perm = unixmode2p9mode(v9ses, mode); + fid = v9fs_create(v9ses, dir, dentry, (char *) extension, perm, + P9_OREAD); + if (IS_ERR(fid)) + return PTR_ERR(fid); - err = v9fs_create(v9ses, dfid->fid, (char *) dentry->d_name.name, - perm, V9FS_OREAD, (char *) extension, &fid, NULL, NULL); - - if (err) - goto clunk_dfid; - - err = v9fs_t_clunk(v9ses, fid); - if (err) - goto clunk_dfid; - - vfid = v9fs_clone_walk(v9ses, dfid->fid, dentry); - if (IS_ERR(vfid)) { - err = PTR_ERR(vfid); - vfid = NULL; - goto clunk_dfid; - } - - inode = v9fs_inode_from_fid(v9ses, vfid->fid, dir->i_sb); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - inode = NULL; - goto free_vfid; - } - - if(v9ses->cache) - dentry->d_op = &v9fs_cached_dentry_operations; - else - dentry->d_op = &v9fs_dentry_operations; - d_instantiate(dentry, inode); + p9_client_clunk(fid); return 0; - -free_vfid: - v9fs_fid_destroy(vfid); - -clunk_dfid: - v9fs_fid_clunk(v9ses, dfid); - -error: - return err; - } /** @@ -1224,8 +1036,8 @@ error: static int v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, - symname); + P9_DPRINTK(P9_DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, + dentry->d_name.name, symname); return v9fs_vfs_mkspecial(dir, dentry, S_IFLNK, symname); } @@ -1247,11 +1059,11 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { int retval; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); - struct v9fs_fid *oldfid; + struct p9_fid *oldfid; char *name; - dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, + P9_DPRINTK(P9_DEBUG_VFS, + " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, old_dentry->d_name.name); oldfid = v9fs_fid_clone(old_dentry); @@ -1265,11 +1077,11 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, } sprintf(name, "%d\n", oldfid->fid); - retval = v9fs_vfs_mkspecial(dir, dentry, V9FS_DMLINK, name); + retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name); __putname(name); clunk_fid: - v9fs_fid_clunk(v9ses, oldfid); + p9_client_clunk(oldfid); return retval; } @@ -1288,7 +1100,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) int retval; char *name; - dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, + P9_DPRINTK(P9_DEBUG_VFS, + " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev)); if (!new_valid_dev(rdev)) diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 7bdf8b326841..f6a0519ade8c 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -37,10 +37,10 @@ #include #include #include +#include +#include -#include "debug.h" #include "v9fs.h" -#include "9p.h" #include "v9fs_vfs.h" #include "fid.h" @@ -107,41 +107,48 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, struct vfsmount *mnt) { struct super_block *sb = NULL; - struct v9fs_fcall *fcall = NULL; struct inode *inode = NULL; struct dentry *root = NULL; struct v9fs_session_info *v9ses = NULL; - struct v9fs_fid *root_fid = NULL; + struct p9_stat *st = NULL; int mode = S_IRWXUGO | S_ISVTX; uid_t uid = current->fsuid; gid_t gid = current->fsgid; - int stat_result = 0; - int newfid = 0; + struct p9_fid *fid; int retval = 0; - dprintk(DEBUG_VFS, " \n"); + P9_DPRINTK(P9_DEBUG_VFS, " \n"); v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); if (!v9ses) return -ENOMEM; - if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) { - dprintk(DEBUG_ERROR, "problem initiating session\n"); - retval = newfid; - goto out_free_session; + fid = v9fs_session_init(v9ses, dev_name, data); + if (IS_ERR(fid)) { + retval = PTR_ERR(fid); + fid = NULL; + kfree(v9ses); + v9ses = NULL; + goto error; + } + + st = p9_client_stat(fid); + if (IS_ERR(st)) { + retval = PTR_ERR(st); + goto error; } sb = sget(fs_type, NULL, v9fs_set_super, v9ses); if (IS_ERR(sb)) { retval = PTR_ERR(sb); - goto out_close_session; + goto error; } v9fs_fill_super(sb, v9ses, flags); inode = v9fs_get_inode(sb, S_IFDIR | mode); if (IS_ERR(inode)) { retval = PTR_ERR(inode); - goto put_back_sb; + goto error; } inode->i_uid = uid; @@ -150,54 +157,30 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, root = d_alloc_root(inode); if (!root) { retval = -ENOMEM; - goto put_back_sb; + goto error; } sb->s_root = root; + root->d_inode->i_ino = v9fs_qid2ino(&st->qid); + v9fs_stat2inode(st, root->d_inode, sb); + v9fs_fid_add(root, fid); - stat_result = v9fs_t_stat(v9ses, newfid, &fcall); - if (stat_result < 0) { - dprintk(DEBUG_ERROR, "stat error\n"); - v9fs_t_clunk(v9ses, newfid); - } else { - /* Setup the Root Inode */ - root_fid = v9fs_fid_create(v9ses, newfid); - if (root_fid == NULL) { - retval = -ENOMEM; - goto put_back_sb; - } - - retval = v9fs_fid_insert(root_fid, root); - if (retval < 0) { - kfree(fcall); - goto put_back_sb; - } - - root_fid->qid = fcall->params.rstat.stat.qid; - root->d_inode->i_ino = - v9fs_qid2ino(&fcall->params.rstat.stat.qid); - v9fs_stat2inode(&fcall->params.rstat.stat, root->d_inode, sb); - } + return simple_set_mnt(mnt, sb); - kfree(fcall); +error: + if (fid) + p9_client_clunk(fid); - if (stat_result < 0) { - retval = stat_result; - goto put_back_sb; + if (v9ses) { + v9fs_session_close(v9ses); + kfree(v9ses); } - return simple_set_mnt(mnt, sb); - -out_close_session: - v9fs_session_close(v9ses); -out_free_session: - kfree(v9ses); - return retval; + if (sb) { + up_write(&sb->s_umount); + deactivate_super(sb); + } -put_back_sb: - /* deactivate_super calls v9fs_kill_super which will frees the rest */ - up_write(&sb->s_umount); - deactivate_super(sb); return retval; } @@ -211,7 +194,7 @@ static void v9fs_kill_super(struct super_block *s) { struct v9fs_session_info *v9ses = s->s_fs_info; - dprintk(DEBUG_VFS, " %p\n", s); + P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s); v9fs_dentry_release(s->s_root); /* clunk root */ @@ -219,7 +202,7 @@ static void v9fs_kill_super(struct super_block *s) v9fs_session_close(v9ses); kfree(v9ses); - dprintk(DEBUG_VFS, "exiting kill_super\n"); + P9_DPRINTK(P9_DEBUG_VFS, "exiting kill_super\n"); } /** diff --git a/fs/Kconfig b/fs/Kconfig index 0fa0c1193e81..94b9d861bf9b 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -2048,7 +2048,7 @@ config AFS_DEBUG config 9P_FS tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)" - depends on INET && EXPERIMENTAL + depends on INET && NET_9P && EXPERIMENTAL help If you say Y here, you will get experimental support for Plan 9 resource sharing via the 9P2000 protocol. diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h new file mode 100644 index 000000000000..88884d39f28f --- /dev/null +++ b/include/net/9p/9p.h @@ -0,0 +1,417 @@ +/* + * include/net/9p/9p.h + * + * 9P protocol definitions. + * + * Copyright (C) 2005 by Latchesar Ionkov + * Copyright (C) 2004 by Eric Van Hensbergen + * Copyright (C) 2002 by Ron Minnich + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#ifndef NET_9P_H +#define NET_9P_H + +#ifdef CONFIG_NET_9P_DEBUG + +#define P9_DEBUG_ERROR (1<<0) +#define P9_DEBUG_9P (1<<2) +#define P9_DEBUG_VFS (1<<3) +#define P9_DEBUG_CONV (1<<4) +#define P9_DEBUG_MUX (1<<5) +#define P9_DEBUG_TRANS (1<<6) +#define P9_DEBUG_SLABS (1<<7) +#define P9_DEBUG_FCALL (1<<8) + +extern unsigned int p9_debug_level; + +#define P9_DPRINTK(level, format, arg...) \ +do { \ + if ((p9_debug_level & level) == level) \ + printk(KERN_NOTICE "-- %s (%d): " \ + format , __FUNCTION__, current->pid , ## arg); \ +} while (0) + +#define PRINT_FCALL_ERROR(s, fcall) P9_DPRINTK(P9_DEBUG_ERROR, \ + "%s: %.*s\n", s, fcall?fcall->params.rerror.error.len:0, \ + fcall?fcall->params.rerror.error.str:""); + +#else +#define P9_DPRINTK(level, format, arg...) do { } while (0) +#define PRINT_FCALL_ERROR(s, fcall) do { } while (0) +#endif + +#define P9_EPRINTK(level, format, arg...) \ +do { \ + printk(level "9p: %s (%d): " \ + format , __FUNCTION__, current->pid , ## arg); \ +} while (0) + + +/* Message Types */ +enum { + P9_TVERSION = 100, + P9_RVERSION, + P9_TAUTH = 102, + P9_RAUTH, + P9_TATTACH = 104, + P9_RATTACH, + P9_TERROR = 106, + P9_RERROR, + P9_TFLUSH = 108, + P9_RFLUSH, + P9_TWALK = 110, + P9_RWALK, + P9_TOPEN = 112, + P9_ROPEN, + P9_TCREATE = 114, + P9_RCREATE, + P9_TREAD = 116, + P9_RREAD, + P9_TWRITE = 118, + P9_RWRITE, + P9_TCLUNK = 120, + P9_RCLUNK, + P9_TREMOVE = 122, + P9_RREMOVE, + P9_TSTAT = 124, + P9_RSTAT, + P9_TWSTAT = 126, + P9_RWSTAT, +}; + +/* open modes */ +enum { + P9_OREAD = 0x00, + P9_OWRITE = 0x01, + P9_ORDWR = 0x02, + P9_OEXEC = 0x03, + P9_OEXCL = 0x04, + P9_OTRUNC = 0x10, + P9_OREXEC = 0x20, + P9_ORCLOSE = 0x40, + P9_OAPPEND = 0x80, +}; + +/* permissions */ +enum { + P9_DMDIR = 0x80000000, + P9_DMAPPEND = 0x40000000, + P9_DMEXCL = 0x20000000, + P9_DMMOUNT = 0x10000000, + P9_DMAUTH = 0x08000000, + P9_DMTMP = 0x04000000, + P9_DMSYMLINK = 0x02000000, + P9_DMLINK = 0x01000000, + /* 9P2000.u extensions */ + P9_DMDEVICE = 0x00800000, + P9_DMNAMEDPIPE = 0x00200000, + P9_DMSOCKET = 0x00100000, + P9_DMSETUID = 0x00080000, + P9_DMSETGID = 0x00040000, +}; + +/* qid.types */ +enum { + P9_QTDIR = 0x80, + P9_QTAPPEND = 0x40, + P9_QTEXCL = 0x20, + P9_QTMOUNT = 0x10, + P9_QTAUTH = 0x08, + P9_QTTMP = 0x04, + P9_QTSYMLINK = 0x02, + P9_QTLINK = 0x01, + P9_QTFILE = 0x00, +}; + +#define P9_NOTAG (u16)(~0) +#define P9_NOFID (u32)(~0) +#define P9_MAXWELEM 16 + +/* ample room for Twrite/Rread header */ +#define P9_IOHDRSZ 24 + +struct p9_str { + u16 len; + char *str; +}; + +/* qids are the unique ID for a file (like an inode */ +struct p9_qid { + u8 type; + u32 version; + u64 path; +}; + +/* Plan 9 file metadata (stat) structure */ +struct p9_stat { + u16 size; + u16 type; + u32 dev; + struct p9_qid qid; + u32 mode; + u32 atime; + u32 mtime; + u64 length; + struct p9_str name; + struct p9_str uid; + struct p9_str gid; + struct p9_str muid; + struct p9_str extension; /* 9p2000.u extensions */ + u32 n_uid; /* 9p2000.u extensions */ + u32 n_gid; /* 9p2000.u extensions */ + u32 n_muid; /* 9p2000.u extensions */ +}; + +/* file metadata (stat) structure used to create Twstat message + The is similar to p9_stat, but the strings don't point to + the same memory block and should be freed separately +*/ +struct p9_wstat { + u16 size; + u16 type; + u32 dev; + struct p9_qid qid; + u32 mode; + u32 atime; + u32 mtime; + u64 length; + char *name; + char *uid; + char *gid; + char *muid; + char *extension; /* 9p2000.u extensions */ + u32 n_uid; /* 9p2000.u extensions */ + u32 n_gid; /* 9p2000.u extensions */ + u32 n_muid; /* 9p2000.u extensions */ +}; + +/* Structures for Protocol Operations */ +struct p9_tversion { + u32 msize; + struct p9_str version; +}; + +struct p9_rversion { + u32 msize; + struct p9_str version; +}; + +struct p9_tauth { + u32 afid; + struct p9_str uname; + struct p9_str aname; +}; + +struct p9_rauth { + struct p9_qid qid; +}; + +struct p9_rerror { + struct p9_str error; + u32 errno; /* 9p2000.u extension */ +}; + +struct p9_tflush { + u16 oldtag; +}; + +struct p9_rflush { +}; + +struct p9_tattach { + u32 fid; + u32 afid; + struct p9_str uname; + struct p9_str aname; +}; + +struct p9_rattach { + struct p9_qid qid; +}; + +struct p9_twalk { + u32 fid; + u32 newfid; + u16 nwname; + struct p9_str wnames[16]; +}; + +struct p9_rwalk { + u16 nwqid; + struct p9_qid wqids[16]; +}; + +struct p9_topen { + u32 fid; + u8 mode; +}; + +struct p9_ropen { + struct p9_qid qid; + u32 iounit; +}; + +struct p9_tcreate { + u32 fid; + struct p9_str name; + u32 perm; + u8 mode; + struct p9_str extension; +}; + +struct p9_rcreate { + struct p9_qid qid; + u32 iounit; +}; + +struct p9_tread { + u32 fid; + u64 offset; + u32 count; +}; + +struct p9_rread { + u32 count; + u8 *data; +}; + +struct p9_twrite { + u32 fid; + u64 offset; + u32 count; + u8 *data; +}; + +struct p9_rwrite { + u32 count; +}; + +struct p9_tclunk { + u32 fid; +}; + +struct p9_rclunk { +}; + +struct p9_tremove { + u32 fid; +}; + +struct p9_rremove { +}; + +struct p9_tstat { + u32 fid; +}; + +struct p9_rstat { + struct p9_stat stat; +}; + +struct p9_twstat { + u32 fid; + struct p9_stat stat; +}; + +struct p9_rwstat { +}; + +/* + * fcall is the primary packet structure + * + */ + +struct p9_fcall { + u32 size; + u8 id; + u16 tag; + void *sdata; + + union { + struct p9_tversion tversion; + struct p9_rversion rversion; + struct p9_tauth tauth; + struct p9_rauth rauth; + struct p9_rerror rerror; + struct p9_tflush tflush; + struct p9_rflush rflush; + struct p9_tattach tattach; + struct p9_rattach rattach; + struct p9_twalk twalk; + struct p9_rwalk rwalk; + struct p9_topen topen; + struct p9_ropen ropen; + struct p9_tcreate tcreate; + struct p9_rcreate rcreate; + struct p9_tread tread; + struct p9_rread rread; + struct p9_twrite twrite; + struct p9_rwrite rwrite; + struct p9_tclunk tclunk; + struct p9_rclunk rclunk; + struct p9_tremove tremove; + struct p9_rremove rremove; + struct p9_tstat tstat; + struct p9_rstat rstat; + struct p9_twstat twstat; + struct p9_rwstat rwstat; + } params; +}; + +struct p9_idpool; + +int p9_deserialize_stat(void *buf, u32 buflen, struct p9_stat *stat, + int dotu); +int p9_deserialize_fcall(void *buf, u32 buflen, struct p9_fcall *fc, int dotu); +void p9_set_tag(struct p9_fcall *fc, u16 tag); +struct p9_fcall *p9_create_tversion(u32 msize, char *version); +struct p9_fcall *p9_create_tattach(u32 fid, u32 afid, char *uname, + char *aname); +struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname); +struct p9_fcall *p9_create_tflush(u16 oldtag); +struct p9_fcall *p9_create_twalk(u32 fid, u32 newfid, u16 nwname, + char **wnames); +struct p9_fcall *p9_create_topen(u32 fid, u8 mode); +struct p9_fcall *p9_create_tcreate(u32 fid, char *name, u32 perm, u8 mode, + char *extension, int dotu); +struct p9_fcall *p9_create_tread(u32 fid, u64 offset, u32 count); +struct p9_fcall *p9_create_twrite(u32 fid, u64 offset, u32 count, + const char *data); +struct p9_fcall *p9_create_twrite_u(u32 fid, u64 offset, u32 count, + const char __user *data); +struct p9_fcall *p9_create_tclunk(u32 fid); +struct p9_fcall *p9_create_tremove(u32 fid); +struct p9_fcall *p9_create_tstat(u32 fid); +struct p9_fcall *p9_create_twstat(u32 fid, struct p9_wstat *wstat, + int dotu); + +int p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int dotu); +int p9_errstr2errno(char *errstr, int len); + +struct p9_idpool *p9_idpool_create(void); +void p9_idpool_destroy(struct p9_idpool *); +int p9_idpool_get(struct p9_idpool *p); +void p9_idpool_put(int id, struct p9_idpool *p); +int p9_idpool_check(int id, struct p9_idpool *p); + +int p9_error_init(void); +int p9_errstr2errno(char *, int); +int __init p9_sysctl_register(void); +void __exit p9_sysctl_unregister(void); +#endif /* NET_9P_H */ diff --git a/include/net/9p/client.h b/include/net/9p/client.h new file mode 100644 index 000000000000..d65ed7c69063 --- /dev/null +++ b/include/net/9p/client.h @@ -0,0 +1,80 @@ +/* + * include/net/9p/client.h + * + * 9P Client Definitions + * + * Copyright (C) 2007 by Latchesar Ionkov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#ifndef NET_9P_CLIENT_H +#define NET_9P_CLIENT_H + +struct p9_client { + spinlock_t lock; /* protect client structure */ + int msize; + unsigned char dotu; + struct p9_transport *trans; + struct p9_conn *conn; + + struct p9_idpool *fidpool; + struct list_head fidlist; +}; + +struct p9_fid { + struct p9_client *clnt; + u32 fid; + int mode; + struct p9_qid qid; + u32 iounit; + uid_t uid; + void *aux; + + int rdir_fpos; + int rdir_pos; + struct p9_fcall *rdir_fcall; + struct list_head flist; + struct list_head dlist; /* list of all fids attached to a dentry */ +}; + +struct p9_client *p9_client_create(struct p9_transport *trans, int msize, + int dotu); +void p9_client_destroy(struct p9_client *clnt); +void p9_client_disconnect(struct p9_client *clnt); +struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, + char *uname, char *aname); +struct p9_fid *p9_client_auth(struct p9_client *clnt, char *uname, char *aname); +struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, + int clone); +int p9_client_open(struct p9_fid *fid, int mode); +int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, + char *extension); +int p9_client_clunk(struct p9_fid *fid); +int p9_client_remove(struct p9_fid *fid); +int p9_client_read(struct p9_fid *fid, char *data, u64 offset, u32 count); +int p9_client_readn(struct p9_fid *fid, char *data, u64 offset, u32 count); +int p9_client_write(struct p9_fid *fid, char *data, u64 offset, u32 count); +int p9_client_uread(struct p9_fid *fid, char __user *data, u64 offset, + u32 count); +int p9_client_uwrite(struct p9_fid *fid, const char __user *data, u64 offset, + u32 count); +struct p9_stat *p9_client_stat(struct p9_fid *fid); +int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst); +struct p9_stat *p9_client_dirread(struct p9_fid *fid, u64 offset); + +#endif /* NET_9P_CLIENT_H */ diff --git a/include/net/9p/conn.h b/include/net/9p/conn.h new file mode 100644 index 000000000000..583b6a2cb3df --- /dev/null +++ b/include/net/9p/conn.h @@ -0,0 +1,57 @@ +/* + * include/net/9p/conn.h + * + * Connection Definitions + * + * Copyright (C) 2005 by Latchesar Ionkov + * Copyright (C) 2004 by Eric Van Hensbergen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#ifndef NET_9P_CONN_H +#define NET_9P_CONN_H + +#undef P9_NONBLOCK + +struct p9_conn; +struct p9_req; + +/** + * p9_mux_req_callback - callback function that is called when the + * response of a request is received. The callback is called from + * a workqueue and shouldn't block. + * + * @req - request + * @a - the pointer that was specified when the request was send to be + * passed to the callback + */ +typedef void (*p9_conn_req_callback)(struct p9_req *req, void *a); + +struct p9_conn *p9_conn_create(struct p9_transport *trans, int msize, + unsigned char *dotu); +void p9_conn_destroy(struct p9_conn *); +int p9_conn_rpc(struct p9_conn *m, struct p9_fcall *tc, struct p9_fcall **rc); + +#ifdef P9_NONBLOCK +int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc, + p9_conn_req_callback cb, void *a); +#endif /* P9_NONBLOCK */ + +void p9_conn_cancel(struct p9_conn *m, int err); + +#endif /* NET_9P_CONN_H */ diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h new file mode 100644 index 000000000000..462d42279fb0 --- /dev/null +++ b/include/net/9p/transport.h @@ -0,0 +1,49 @@ +/* + * include/net/9p/transport.h + * + * Transport Definition + * + * Copyright (C) 2005 by Latchesar Ionkov + * Copyright (C) 2004 by Eric Van Hensbergen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#ifndef NET_9P_TRANSPORT_H +#define NET_9P_TRANSPORT_H + +enum p9_transport_status { + Connected, + Disconnected, + Hung, +}; + +struct p9_transport { + enum p9_transport_status status; + void *priv; + + int (*write) (struct p9_transport *, void *, int); + int (*read) (struct p9_transport *, void *, int); + void (*close) (struct p9_transport *); + unsigned int (*poll)(struct p9_transport *, struct poll_table_struct *); +}; + +struct p9_transport *p9_trans_create_tcp(const char *addr, int port); +struct p9_transport *p9_trans_create_unix(const char *addr); +struct p9_transport *p9_trans_create_fd(int rfd, int wfd); + +#endif /* NET_9P_TRANSPORT_H */ diff --git a/net/9p/Kconfig b/net/9p/Kconfig new file mode 100644 index 000000000000..66821cd64a76 --- /dev/null +++ b/net/9p/Kconfig @@ -0,0 +1,21 @@ +# +# 9P protocol configuration +# + +menuconfig NET_9P + depends on NET && EXPERIMENTAL + tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)" + help + If you say Y here, you will get experimental support for + Plan 9 resource sharing via the 9P2000 protocol. + + See for more information. + + If unsure, say N. + +config NET_9P_DEBUG + bool "Debug information" + depends on NET_9P + help + Say Y if you want the 9P subsistem to log debug information. + diff --git a/net/9p/Makefile b/net/9p/Makefile new file mode 100644 index 000000000000..ac46cb91900d --- /dev/null +++ b/net/9p/Makefile @@ -0,0 +1,13 @@ +obj-$(CONFIG_NET_9P) := 9p.o + +9p-objs := \ + mod.o \ + trans_fd.o \ + mux.o \ + client.o \ + conv.o \ + error.o \ + fcprint.o \ + util.o \ + +9p-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/9p/client.c b/net/9p/client.c new file mode 100644 index 000000000000..bb2b8a3af196 --- /dev/null +++ b/net/9p/client.c @@ -0,0 +1,965 @@ +/* + * net/9p/clnt.c + * + * 9P Client + * + * Copyright (C) 2007 by Latchesar Ionkov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct p9_fid *p9_fid_create(struct p9_client *clnt); +static void p9_fid_destroy(struct p9_fid *fid); +static struct p9_stat *p9_clone_stat(struct p9_stat *st, int dotu); + +struct p9_client *p9_client_create(struct p9_transport *trans, int msize, + int dotu) +{ + int err, n; + struct p9_client *clnt; + struct p9_fcall *tc, *rc; + struct p9_str *version; + + err = 0; + tc = NULL; + rc = NULL; + clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL); + if (!clnt) + return ERR_PTR(-ENOMEM); + + P9_DPRINTK(P9_DEBUG_9P, "clnt %p trans %p msize %d dotu %d\n", + clnt, trans, msize, dotu); + spin_lock_init(&clnt->lock); + clnt->trans = trans; + clnt->msize = msize; + clnt->dotu = dotu; + INIT_LIST_HEAD(&clnt->fidlist); + clnt->fidpool = p9_idpool_create(); + if (!clnt->fidpool) { + err = PTR_ERR(clnt->fidpool); + clnt->fidpool = NULL; + goto error; + } + + clnt->conn = p9_conn_create(clnt->trans, clnt->msize, &clnt->dotu); + if (IS_ERR(clnt->conn)) { + err = PTR_ERR(clnt->conn); + clnt->conn = NULL; + goto error; + } + + tc = p9_create_tversion(clnt->msize, clnt->dotu?"9P2000.u":"9P2000"); + if (IS_ERR(tc)) { + err = PTR_ERR(tc); + tc = NULL; + goto error; + } + + err = p9_conn_rpc(clnt->conn, tc, &rc); + if (err) + goto error; + + version = &rc->params.rversion.version; + if (version->len == 8 && !memcmp(version->str, "9P2000.u", 8)) + clnt->dotu = 1; + else if (version->len == 6 && !memcmp(version->str, "9P2000", 6)) + clnt->dotu = 0; + else { + err = -EREMOTEIO; + goto error; + } + + n = rc->params.rversion.msize; + if (n < clnt->msize) + clnt->msize = n; + + kfree(tc); + kfree(rc); + return clnt; + +error: + kfree(tc); + kfree(rc); + p9_client_destroy(clnt); + return ERR_PTR(err); +} +EXPORT_SYMBOL(p9_client_create); + +void p9_client_destroy(struct p9_client *clnt) +{ + struct p9_fid *fid, *fidptr; + + P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); + if (clnt->conn) { + p9_conn_destroy(clnt->conn); + clnt->conn = NULL; + } + + if (clnt->trans) { + clnt->trans->close(clnt->trans); + kfree(clnt->trans); + clnt->trans = NULL; + } + + if (clnt->fidpool) + p9_idpool_destroy(clnt->fidpool); + + list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist) + p9_fid_destroy(fid); + + kfree(clnt); +} +EXPORT_SYMBOL(p9_client_destroy); + +void p9_client_disconnect(struct p9_client *clnt) +{ + P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); + clnt->trans->status = Disconnected; + p9_conn_cancel(clnt->conn, -EIO); +} +EXPORT_SYMBOL(p9_client_disconnect); + +struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, + char *uname, char *aname) +{ + int err; + struct p9_fcall *tc, *rc; + struct p9_fid *fid; + + P9_DPRINTK(P9_DEBUG_9P, "clnt %p afid %d uname %s aname %s\n", + clnt, afid?afid->fid:-1, uname, aname); + err = 0; + tc = NULL; + rc = NULL; + + fid = p9_fid_create(clnt); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + fid = NULL; + goto error; + } + + tc = p9_create_tattach(fid->fid, afid?afid->fid:P9_NOFID, uname, aname); + if (IS_ERR(tc)) { + err = PTR_ERR(tc); + tc = NULL; + goto error; + } + + err = p9_conn_rpc(clnt->conn, tc, &rc); + if (err) + goto error; + + memmove(&fid->qid, &rc->params.rattach.qid, sizeof(struct p9_qid)); + kfree(tc); + kfree(rc); + return fid; + +error: + kfree(tc); + kfree(rc); + if (fid) + p9_fid_destroy(fid); + return ERR_PTR(err); +} +EXPORT_SYMBOL(p9_client_attach); + +struct p9_fid *p9_client_auth(struct p9_client *clnt, char *uname, char *aname) +{ + int err; + struct p9_fcall *tc, *rc; + struct p9_fid *fid; + + P9_DPRINTK(P9_DEBUG_9P, "clnt %p uname %s aname %s\n", clnt, uname, + aname); + err = 0; + tc = NULL; + rc = NULL; + + fid = p9_fid_create(clnt); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + fid = NULL; + goto error; + } + + tc = p9_create_tauth(fid->fid, uname, aname); + if (IS_ERR(tc)) { + err = PTR_ERR(tc); + tc = NULL; + goto error; + } + + err = p9_conn_rpc(clnt->conn, tc, &rc); + if (err) + goto error; + + memmove(&fid->qid, &rc->params.rauth.qid, sizeof(struct p9_qid)); + kfree(tc); + kfree(rc); + return fid; + +error: + kfree(tc); + kfree(rc); + if (fid) + p9_fid_destroy(fid); + return ERR_PTR(err); +} +EXPORT_SYMBOL(p9_client_auth); + +struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, + int clone) +{ + int err; + struct p9_fcall *tc, *rc; + struct p9_client *clnt; + struct p9_fid *fid; + + P9_DPRINTK(P9_DEBUG_9P, "fid %d nwname %d wname[0] %s\n", + oldfid->fid, nwname, wnames?wnames[0]:NULL); + err = 0; + tc = NULL; + rc = NULL; + clnt = oldfid->clnt; + if (clone) { + fid = p9_fid_create(clnt); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + fid = NULL; + goto error; + } + + fid->uid = oldfid->uid; + } else + fid = oldfid; + + tc = p9_create_twalk(oldfid->fid, fid->fid, nwname, wnames); + if (IS_ERR(tc)) { + err = PTR_ERR(tc); + tc = NULL; + goto error; + } + + err = p9_conn_rpc(clnt->conn, tc, &rc); + if (err) { + if (rc && rc->id == P9_RWALK) + goto clunk_fid; + else + goto error; + } + + if (rc->params.rwalk.nwqid != nwname) { + err = -ENOENT; + goto clunk_fid; + } + + if (nwname) + memmove(&fid->qid, + &rc->params.rwalk.wqids[rc->params.rwalk.nwqid - 1], + sizeof(struct p9_qid)); + else + fid->qid = oldfid->qid; + + kfree(tc); + kfree(rc); + return fid; + +clunk_fid: + kfree(tc); + kfree(rc); + rc = NULL; + tc = p9_create_tclunk(fid->fid); + if (IS_ERR(tc)) { + err = PTR_ERR(tc); + tc = NULL; + goto error; + } + + p9_conn_rpc(clnt->conn, tc, &rc); + +error: + kfree(tc); + kfree(rc); + if (fid && (fid != oldfid)) + p9_fid_destroy(fid); + + return ERR_PTR(err); +} +EXPORT_SYMBOL(p9_client_walk); + +int p9_client_open(struct p9_fid *fid, int mode) +{ + int err; + struct p9_fcall *tc, *rc; + struct p9_client *clnt; + + P9_DPRINTK(P9_DEBUG_9P, "fid %d mode %d\n", fid->fid, mode); + err = 0; + tc = NULL; + rc = NULL; + clnt = fid->clnt; + + if (fid->mode != -1) + return -EINVAL; + + tc = p9_create_topen(fid->fid, mode); + if (IS_ERR(tc)) { + err = PTR_ERR(tc); + tc = NULL; + goto done; + } + + err = p9_conn_rpc(clnt->conn, tc, &rc); + if (err) + goto done; + + fid->mode = mode; + fid->iounit = rc->params.ropen.iounit; + +done: + kfree(tc); + kfree(rc); + return err; +} +EXPORT_SYMBOL(p9_client_open); + +int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, + char *extension) +{ + int err; + struct p9_fcall *tc, *rc; + struct p9_client *clnt; + + P9_DPRINTK(P9_DEBUG_9P, "fid %d name %s perm %d mode %d\n", fid->fid, + name, perm, mode); + err = 0; + tc = NULL; + rc = NULL; + clnt = fid->clnt; + + if (fid->mode != -1) + return -EINVAL; + + tc = p9_create_tcreate(fid->fid, name, perm, mode, extension, + clnt->dotu); + if (IS_ERR(tc)) { + err = PTR_ERR(tc); + tc = NULL; + goto done; + } + + err = p9_conn_rpc(clnt->conn, tc, &rc); + if (err) + goto done; + + fid->mode = mode; + fid->iounit = rc->params.ropen.iounit; + +done: + kfree(tc); + kfree(rc); + return err; +} +EXPORT_SYMBOL(p9_client_fcreate); + +int p9_client_clunk(struct p9_fid *fid) +{ + int err; + struct p9_fcall *tc, *rc; + struct p9_client *clnt; + + P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid); + err = 0; + tc = NULL; + rc = NULL; + clnt = fid->clnt; + + tc = p9_create_tclunk(fid->fid); + if (IS_ERR(tc)) { + err = PTR_ERR(tc); + tc = NULL; + goto done; + } + + err = p9_conn_rpc(clnt->conn, tc, &rc); + if (err) + goto done; + + p9_fid_destroy(fid); + +done: + kfree(tc); + kfree(rc); + return err; +} +EXPORT_SYMBOL(p9_client_clunk); + +int p9_client_remove(struct p9_fid *fid) +{ + int err; + struct p9_fcall *tc, *rc; + struct p9_client *clnt; + + P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid); + err = 0; + tc = NULL; + rc = NULL; + clnt = fid->clnt; + + tc = p9_create_tremove(fid->fid); + if (IS_ERR(tc)) { + err = PTR_ERR(tc); + tc = NULL; + goto done; + } + + err = p9_conn_rpc(clnt->conn, tc, &rc); + if (err) + goto done; + + p9_fid_destroy(fid); + +done: + kfree(tc); + kfree(rc); + return err; +} +EXPORT_SYMBOL(p9_client_remove); + +int p9_client_read(struct p9_fid *fid, char *data, u64 offset, u32 count) +{ + int err, n, rsize, total; + struct p9_fcall *tc, *rc; + struct p9_client *clnt; + + P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu %d\n", fid->fid, + (long long unsigned) offset, count); + err = 0; + tc = NULL; + rc = NULL; + clnt = fid->clnt; + total = 0; + + rsize = fid->iounit; + if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) + rsize = clnt->msize - P9_IOHDRSZ; + + do { + if (count < rsize) + rsize = count; + + tc = p9_create_tread(fid->fid, offset, rsize); + if (IS_ERR(tc)) { + err = PTR_ERR(tc); + tc = NULL; + goto error; + } + + err = p9_conn_rpc(clnt->conn, tc, &rc); + if (err) + goto error; + + n = rc->params.rread.count; + if (n > count) + n = count; + + memmove(data, rc->params.rread.data, n); + count -= n; + data += n; + offset += n; + total += n; + kfree(tc); + tc = NULL; + kfree(rc); + rc = NULL; + } while (count > 0 && n == rsize); + + return total; + +error: + kfree(tc); + kfree(rc); + return err; +} +EXPORT_SYMBOL(p9_client_read); + +int p9_client_write(struct p9_fid *fid, char *data, u64 offset, u32 count) +{ + int err, n, rsize, total; + struct p9_fcall *tc, *rc; + struct p9_client *clnt; + + P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid, + (long long unsigned) offset, count); + err = 0; + tc = NULL; + rc = NULL; + clnt = fid->clnt; + total = 0; + + rsize = fid->iounit; + if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) + rsize = clnt->msize - P9_IOHDRSZ; + + do { + if (count < rsize) + rsize = count; + + tc = p9_create_twrite(fid->fid, offset, rsize, data); + if (IS_ERR(tc)) { + err = PTR_ERR(tc); + tc = NULL; + goto error; + } + + err = p9_conn_rpc(clnt->conn, tc, &rc); + if (err) + goto error; + + n = rc->params.rread.count; + count -= n; + data += n; + offset += n; + total += n; + kfree(tc); + tc = NULL; + kfree(rc); + rc = NULL; + } while (count > 0); + + return total; + +error: + kfree(tc); + kfree(rc); + return err; +} +EXPORT_SYMBOL(p9_client_write); + +int +p9_client_uread(struct p9_fid *fid, char __user *data, u64 offset, u32 count) +{ + int err, n, rsize, total; + struct p9_fcall *tc, *rc; + struct p9_client *clnt; + + P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid, + (long long unsigned) offset, count); + err = 0; + tc = NULL; + rc = NULL; + clnt = fid->clnt; + total = 0; + + rsize = fid->iounit; + if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) + rsize = clnt->msize - P9_IOHDRSZ; + + do { + if (count < rsize) + rsize = count; + + tc = p9_create_tread(fid->fid, offset, rsize); + if (IS_ERR(tc)) { + err = PTR_ERR(tc); + tc = NULL; + goto error; + } + + err = p9_conn_rpc(clnt->conn, tc, &rc); + if (err) + goto error; + + n = rc->params.rread.count; + if (n > count) + n = count; + + err = copy_to_user(data, rc->params.rread.data, n); + if (err) { + err = -EFAULT; + goto error; + } + + count -= n; + data += n; + offset += n; + total += n; + kfree(tc); + tc = NULL; + kfree(rc); + rc = NULL; + } while (count > 0 && n == rsize); + + return total; + +error: + kfree(tc); + kfree(rc); + return err; +} +EXPORT_SYMBOL(p9_client_uread); + +int +p9_client_uwrite(struct p9_fid *fid, const char __user *data, u64 offset, + u32 count) +{ + int err, n, rsize, total; + struct p9_fcall *tc, *rc; + struct p9_client *clnt; + + P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid, + (long long unsigned) offset, count); + err = 0; + tc = NULL; + rc = NULL; + clnt = fid->clnt; + total = 0; + + rsize = fid->iounit; + if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) + rsize = clnt->msize - P9_IOHDRSZ; + + do { + if (count < rsize) + rsize = count; + + tc = p9_create_twrite_u(fid->fid, offset, rsize, data); + if (IS_ERR(tc)) { + err = PTR_ERR(tc); + tc = NULL; + goto error; + } + + err = p9_conn_rpc(clnt->conn, tc, &rc); + if (err) + goto error; + + n = rc->params.rread.count; + count -= n; + data += n; + offset += n; + total += n; + kfree(tc); + tc = NULL; + kfree(rc); + rc = NULL; + } while (count > 0); + + return total; + +error: + kfree(tc); + kfree(rc); + return err; +} +EXPORT_SYMBOL(p9_client_uwrite); + +int p9_client_readn(struct p9_fid *fid, char *data, u64 offset, u32 count) +{ + int n, total; + + P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid, + (long long unsigned) offset, count); + n = 0; + total = 0; + while (count) { + n = p9_client_read(fid, data, offset, count); + if (n <= 0) + break; + + data += n; + offset += n; + count -= n; + total += n; + } + + if (n < 0) + total = n; + + return total; +} +EXPORT_SYMBOL(p9_client_readn); + +struct p9_stat *p9_client_stat(struct p9_fid *fid) +{ + int err; + struct p9_fcall *tc, *rc; + struct p9_client *clnt; + struct p9_stat *ret; + + P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid); + err = 0; + tc = NULL; + rc = NULL; + ret = NULL; + clnt = fid->clnt; + + tc = p9_create_tstat(fid->fid); + if (IS_ERR(tc)) { + err = PTR_ERR(tc); + tc = NULL; + goto error; + } + + err = p9_conn_rpc(clnt->conn, tc, &rc); + if (err) + goto error; + + ret = p9_clone_stat(&rc->params.rstat.stat, clnt->dotu); + if (IS_ERR(ret)) { + err = PTR_ERR(ret); + ret = NULL; + goto error; + } + + kfree(tc); + kfree(rc); + return ret; + +error: + kfree(tc); + kfree(rc); + kfree(ret); + return ERR_PTR(err); +} +EXPORT_SYMBOL(p9_client_stat); + +int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) +{ + int err; + struct p9_fcall *tc, *rc; + struct p9_client *clnt; + + P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid); + err = 0; + tc = NULL; + rc = NULL; + clnt = fid->clnt; + + tc = p9_create_twstat(fid->fid, wst, clnt->dotu); + if (IS_ERR(tc)) { + err = PTR_ERR(tc); + tc = NULL; + goto done; + } + + err = p9_conn_rpc(clnt->conn, tc, &rc); + +done: + kfree(tc); + kfree(rc); + return err; +} +EXPORT_SYMBOL(p9_client_wstat); + +struct p9_stat *p9_client_dirread(struct p9_fid *fid, u64 offset) +{ + int err, n, m; + struct p9_fcall *tc, *rc; + struct p9_client *clnt; + struct p9_stat st, *ret; + + P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu\n", fid->fid, + (long long unsigned) offset); + err = 0; + tc = NULL; + rc = NULL; + ret = NULL; + clnt = fid->clnt; + + /* if the offset is below or above the current response, free it */ + if (offset < fid->rdir_fpos || (fid->rdir_fcall && + offset >= fid->rdir_fpos+fid->rdir_fcall->params.rread.count)) { + fid->rdir_pos = 0; + if (fid->rdir_fcall) + fid->rdir_fpos += fid->rdir_fcall->params.rread.count; + + kfree(fid->rdir_fcall); + fid->rdir_fcall = NULL; + if (offset < fid->rdir_fpos) + fid->rdir_fpos = 0; + } + + if (!fid->rdir_fcall) { + n = fid->iounit; + if (!n || n > clnt->msize-P9_IOHDRSZ) + n = clnt->msize - P9_IOHDRSZ; + + while (1) { + if (fid->rdir_fcall) { + fid->rdir_fpos += + fid->rdir_fcall->params.rread.count; + kfree(fid->rdir_fcall); + fid->rdir_fcall = NULL; + } + + tc = p9_create_tread(fid->fid, fid->rdir_fpos, n); + if (IS_ERR(tc)) { + err = PTR_ERR(tc); + tc = NULL; + goto error; + } + + err = p9_conn_rpc(clnt->conn, tc, &rc); + if (err) + goto error; + + n = rc->params.rread.count; + if (n == 0) + goto done; + + fid->rdir_fcall = rc; + rc = NULL; + if (offset >= fid->rdir_fpos && + offset < fid->rdir_fpos+n) + break; + } + + fid->rdir_pos = 0; + } + + m = offset - fid->rdir_fpos; + if (m < 0) + goto done; + + n = p9_deserialize_stat(fid->rdir_fcall->params.rread.data + m, + fid->rdir_fcall->params.rread.count - m, &st, clnt->dotu); + + if (!n) { + err = -EIO; + goto error; + } + + fid->rdir_pos += n; + st.size = n; + ret = p9_clone_stat(&st, clnt->dotu); + if (IS_ERR(ret)) { + err = PTR_ERR(ret); + ret = NULL; + goto error; + } + +done: + kfree(tc); + kfree(rc); + return ret; + +error: + kfree(tc); + kfree(rc); + kfree(ret); + return ERR_PTR(err); +} +EXPORT_SYMBOL(p9_client_dirread); + +static struct p9_stat *p9_clone_stat(struct p9_stat *st, int dotu) +{ + int n; + char *p; + struct p9_stat *ret; + + n = sizeof(struct p9_stat) + st->name.len + st->uid.len + st->gid.len + + st->muid.len; + + if (dotu) + n += st->extension.len; + + ret = kmalloc(n, GFP_KERNEL); + if (!ret) + return ERR_PTR(-ENOMEM); + + memmove(ret, st, sizeof(struct p9_stat)); + p = ((char *) ret) + sizeof(struct p9_stat); + memmove(p, st->name.str, st->name.len); + p += st->name.len; + memmove(p, st->uid.str, st->uid.len); + p += st->uid.len; + memmove(p, st->gid.str, st->gid.len); + p += st->gid.len; + memmove(p, st->muid.str, st->muid.len); + p += st->muid.len; + + if (dotu) { + memmove(p, st->extension.str, st->extension.len); + p += st->extension.len; + } + + return ret; +} + +static struct p9_fid *p9_fid_create(struct p9_client *clnt) +{ + int err; + struct p9_fid *fid; + + P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); + fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL); + if (!fid) + return ERR_PTR(-ENOMEM); + + fid->fid = p9_idpool_get(clnt->fidpool); + if (fid->fid < 0) { + err = -ENOSPC; + goto error; + } + + memset(&fid->qid, 0, sizeof(struct p9_qid)); + fid->mode = -1; + fid->rdir_fpos = 0; + fid->rdir_pos = 0; + fid->rdir_fcall = NULL; + fid->uid = current->fsuid; + fid->clnt = clnt; + fid->aux = NULL; + + spin_lock(&clnt->lock); + list_add(&fid->flist, &clnt->fidlist); + spin_unlock(&clnt->lock); + + return fid; + +error: + kfree(fid); + return ERR_PTR(err); +} + +static void p9_fid_destroy(struct p9_fid *fid) +{ + struct p9_client *clnt; + + P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid); + clnt = fid->clnt; + p9_idpool_put(fid->fid, clnt->fidpool); + spin_lock(&clnt->lock); + list_del(&fid->flist); + spin_unlock(&clnt->lock); + kfree(fid->rdir_fcall); + kfree(fid); +} diff --git a/net/9p/conv.c b/net/9p/conv.c new file mode 100644 index 000000000000..37451178e761 --- /dev/null +++ b/net/9p/conv.c @@ -0,0 +1,903 @@ +/* + * net/9p/conv.c + * + * 9P protocol conversion functions + * + * Copyright (C) 2004, 2005 by Latchesar Ionkov + * Copyright (C) 2004 by Eric Van Hensbergen + * Copyright (C) 2002 by Ron Minnich + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include + +/* + * Buffer to help with string parsing + */ +struct cbuf { + unsigned char *sp; + unsigned char *p; + unsigned char *ep; +}; + +static inline void buf_init(struct cbuf *buf, void *data, int datalen) +{ + buf->sp = buf->p = data; + buf->ep = data + datalen; +} + +static inline int buf_check_overflow(struct cbuf *buf) +{ + return buf->p > buf->ep; +} + +static int buf_check_size(struct cbuf *buf, int len) +{ + if (buf->p + len > buf->ep) { + if (buf->p < buf->ep) { + P9_EPRINTK(KERN_ERR, + "buffer overflow: want %d has %d\n", len, + (int)(buf->ep - buf->p)); + dump_stack(); + buf->p = buf->ep + 1; + } + + return 0; + } + + return 1; +} + +static void *buf_alloc(struct cbuf *buf, int len) +{ + void *ret = NULL; + + if (buf_check_size(buf, len)) { + ret = buf->p; + buf->p += len; + } + + return ret; +} + +static void buf_put_int8(struct cbuf *buf, u8 val) +{ + if (buf_check_size(buf, 1)) { + buf->p[0] = val; + buf->p++; + } +} + +static void buf_put_int16(struct cbuf *buf, u16 val) +{ + if (buf_check_size(buf, 2)) { + *(__le16 *) buf->p = cpu_to_le16(val); + buf->p += 2; + } +} + +static void buf_put_int32(struct cbuf *buf, u32 val) +{ + if (buf_check_size(buf, 4)) { + *(__le32 *)buf->p = cpu_to_le32(val); + buf->p += 4; + } +} + +static void buf_put_int64(struct cbuf *buf, u64 val) +{ + if (buf_check_size(buf, 8)) { + *(__le64 *)buf->p = cpu_to_le64(val); + buf->p += 8; + } +} + +static char *buf_put_stringn(struct cbuf *buf, const char *s, u16 slen) +{ + char *ret; + + ret = NULL; + if (buf_check_size(buf, slen + 2)) { + buf_put_int16(buf, slen); + ret = buf->p; + memcpy(buf->p, s, slen); + buf->p += slen; + } + + return ret; +} + +static inline void buf_put_string(struct cbuf *buf, const char *s) +{ + buf_put_stringn(buf, s, strlen(s)); +} + +static u8 buf_get_int8(struct cbuf *buf) +{ + u8 ret = 0; + + if (buf_check_size(buf, 1)) { + ret = buf->p[0]; + buf->p++; + } + + return ret; +} + +static u16 buf_get_int16(struct cbuf *buf) +{ + u16 ret = 0; + + if (buf_check_size(buf, 2)) { + ret = le16_to_cpu(*(__le16 *)buf->p); + buf->p += 2; + } + + return ret; +} + +static u32 buf_get_int32(struct cbuf *buf) +{ + u32 ret = 0; + + if (buf_check_size(buf, 4)) { + ret = le32_to_cpu(*(__le32 *)buf->p); + buf->p += 4; + } + + return ret; +} + +static u64 buf_get_int64(struct cbuf *buf) +{ + u64 ret = 0; + + if (buf_check_size(buf, 8)) { + ret = le64_to_cpu(*(__le64 *)buf->p); + buf->p += 8; + } + + return ret; +} + +static void buf_get_str(struct cbuf *buf, struct p9_str *vstr) +{ + vstr->len = buf_get_int16(buf); + if (!buf_check_overflow(buf) && buf_check_size(buf, vstr->len)) { + vstr->str = buf->p; + buf->p += vstr->len; + } else { + vstr->len = 0; + vstr->str = NULL; + } +} + +static void buf_get_qid(struct cbuf *bufp, struct p9_qid *qid) +{ + qid->type = buf_get_int8(bufp); + qid->version = buf_get_int32(bufp); + qid->path = buf_get_int64(bufp); +} + +/** + * p9_size_wstat - calculate the size of a variable length stat struct + * @stat: metadata (stat) structure + * @dotu: non-zero if 9P2000.u + * + */ + +static int p9_size_wstat(struct p9_wstat *wstat, int dotu) +{ + int size = 0; + + if (wstat == NULL) { + P9_EPRINTK(KERN_ERR, "p9_size_stat: got a NULL stat pointer\n"); + return 0; + } + + size = /* 2 + *//* size[2] */ + 2 + /* type[2] */ + 4 + /* dev[4] */ + 1 + /* qid.type[1] */ + 4 + /* qid.vers[4] */ + 8 + /* qid.path[8] */ + 4 + /* mode[4] */ + 4 + /* atime[4] */ + 4 + /* mtime[4] */ + 8 + /* length[8] */ + 8; /* minimum sum of string lengths */ + + if (wstat->name) + size += strlen(wstat->name); + if (wstat->uid) + size += strlen(wstat->uid); + if (wstat->gid) + size += strlen(wstat->gid); + if (wstat->muid) + size += strlen(wstat->muid); + + if (dotu) { + size += 4 + /* n_uid[4] */ + 4 + /* n_gid[4] */ + 4 + /* n_muid[4] */ + 2; /* string length of extension[4] */ + if (wstat->extension) + size += strlen(wstat->extension); + } + + return size; +} + +/** + * buf_get_stat - safely decode a recieved metadata (stat) structure + * @bufp: buffer to deserialize + * @stat: metadata (stat) structure + * @dotu: non-zero if 9P2000.u + * + */ + +static void +buf_get_stat(struct cbuf *bufp, struct p9_stat *stat, int dotu) +{ + stat->size = buf_get_int16(bufp); + stat->type = buf_get_int16(bufp); + stat->dev = buf_get_int32(bufp); + stat->qid.type = buf_get_int8(bufp); + stat->qid.version = buf_get_int32(bufp); + stat->qid.path = buf_get_int64(bufp); + stat->mode = buf_get_int32(bufp); + stat->atime = buf_get_int32(bufp); + stat->mtime = buf_get_int32(bufp); + stat->length = buf_get_int64(bufp); + buf_get_str(bufp, &stat->name); + buf_get_str(bufp, &stat->uid); + buf_get_str(bufp, &stat->gid); + buf_get_str(bufp, &stat->muid); + + if (dotu) { + buf_get_str(bufp, &stat->extension); + stat->n_uid = buf_get_int32(bufp); + stat->n_gid = buf_get_int32(bufp); + stat->n_muid = buf_get_int32(bufp); + } +} + +/** + * p9_deserialize_stat - decode a received metadata structure + * @buf: buffer to deserialize + * @buflen: length of received buffer + * @stat: metadata structure to decode into + * @dotu: non-zero if 9P2000.u + * + * Note: stat will point to the buf region. + */ + +int +p9_deserialize_stat(void *buf, u32 buflen, struct p9_stat *stat, + int dotu) +{ + struct cbuf buffer; + struct cbuf *bufp = &buffer; + unsigned char *p; + + buf_init(bufp, buf, buflen); + p = bufp->p; + buf_get_stat(bufp, stat, dotu); + + if (buf_check_overflow(bufp)) + return 0; + else + return bufp->p - p; +} +EXPORT_SYMBOL(p9_deserialize_stat); + +/** + * deserialize_fcall - unmarshal a response + * @buf: recieved buffer + * @buflen: length of received buffer + * @rcall: fcall structure to populate + * @rcalllen: length of fcall structure to populate + * @dotu: non-zero if 9P2000.u + * + */ + +int +p9_deserialize_fcall(void *buf, u32 buflen, struct p9_fcall *rcall, + int dotu) +{ + + struct cbuf buffer; + struct cbuf *bufp = &buffer; + int i = 0; + + buf_init(bufp, buf, buflen); + + rcall->size = buf_get_int32(bufp); + rcall->id = buf_get_int8(bufp); + rcall->tag = buf_get_int16(bufp); + + P9_DPRINTK(P9_DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, + rcall->id, rcall->tag); + + switch (rcall->id) { + default: + P9_EPRINTK(KERN_ERR, "unknown message type: %d\n", rcall->id); + return -EPROTO; + case P9_RVERSION: + rcall->params.rversion.msize = buf_get_int32(bufp); + buf_get_str(bufp, &rcall->params.rversion.version); + break; + case P9_RFLUSH: + break; + case P9_RATTACH: + rcall->params.rattach.qid.type = buf_get_int8(bufp); + rcall->params.rattach.qid.version = buf_get_int32(bufp); + rcall->params.rattach.qid.path = buf_get_int64(bufp); + break; + case P9_RWALK: + rcall->params.rwalk.nwqid = buf_get_int16(bufp); + if (rcall->params.rwalk.nwqid > P9_MAXWELEM) { + P9_EPRINTK(KERN_ERR, + "Rwalk with more than %d qids: %d\n", + P9_MAXWELEM, rcall->params.rwalk.nwqid); + return -EPROTO; + } + + for (i = 0; i < rcall->params.rwalk.nwqid; i++) + buf_get_qid(bufp, &rcall->params.rwalk.wqids[i]); + break; + case P9_ROPEN: + buf_get_qid(bufp, &rcall->params.ropen.qid); + rcall->params.ropen.iounit = buf_get_int32(bufp); + break; + case P9_RCREATE: + buf_get_qid(bufp, &rcall->params.rcreate.qid); + rcall->params.rcreate.iounit = buf_get_int32(bufp); + break; + case P9_RREAD: + rcall->params.rread.count = buf_get_int32(bufp); + rcall->params.rread.data = bufp->p; + buf_check_size(bufp, rcall->params.rread.count); + break; + case P9_RWRITE: + rcall->params.rwrite.count = buf_get_int32(bufp); + break; + case P9_RCLUNK: + break; + case P9_RREMOVE: + break; + case P9_RSTAT: + buf_get_int16(bufp); + buf_get_stat(bufp, &rcall->params.rstat.stat, dotu); + break; + case P9_RWSTAT: + break; + case P9_RERROR: + buf_get_str(bufp, &rcall->params.rerror.error); + if (dotu) + rcall->params.rerror.errno = buf_get_int16(bufp); + break; + } + + if (buf_check_overflow(bufp)) { + P9_DPRINTK(P9_DEBUG_ERROR, "buffer overflow\n"); + return -EIO; + } + + return bufp->p - bufp->sp; +} +EXPORT_SYMBOL(p9_deserialize_fcall); + +static inline void p9_put_int8(struct cbuf *bufp, u8 val, u8 * p) +{ + *p = val; + buf_put_int8(bufp, val); +} + +static inline void p9_put_int16(struct cbuf *bufp, u16 val, u16 * p) +{ + *p = val; + buf_put_int16(bufp, val); +} + +static inline void p9_put_int32(struct cbuf *bufp, u32 val, u32 * p) +{ + *p = val; + buf_put_int32(bufp, val); +} + +static inline void p9_put_int64(struct cbuf *bufp, u64 val, u64 * p) +{ + *p = val; + buf_put_int64(bufp, val); +} + +static void +p9_put_str(struct cbuf *bufp, char *data, struct p9_str *str) +{ + int len; + char *s; + + if (data) + len = strlen(data); + else + len = 0; + + s = buf_put_stringn(bufp, data, len); + if (str) { + str->len = len; + str->str = s; + } +} + +static int +p9_put_data(struct cbuf *bufp, const char *data, int count, + unsigned char **pdata) +{ + *pdata = buf_alloc(bufp, count); + memmove(*pdata, data, count); + return count; +} + +static int +p9_put_user_data(struct cbuf *bufp, const char __user *data, int count, + unsigned char **pdata) +{ + *pdata = buf_alloc(bufp, count); + return copy_from_user(*pdata, data, count); +} + +static void +p9_put_wstat(struct cbuf *bufp, struct p9_wstat *wstat, + struct p9_stat *stat, int statsz, int dotu) +{ + p9_put_int16(bufp, statsz, &stat->size); + p9_put_int16(bufp, wstat->type, &stat->type); + p9_put_int32(bufp, wstat->dev, &stat->dev); + p9_put_int8(bufp, wstat->qid.type, &stat->qid.type); + p9_put_int32(bufp, wstat->qid.version, &stat->qid.version); + p9_put_int64(bufp, wstat->qid.path, &stat->qid.path); + p9_put_int32(bufp, wstat->mode, &stat->mode); + p9_put_int32(bufp, wstat->atime, &stat->atime); + p9_put_int32(bufp, wstat->mtime, &stat->mtime); + p9_put_int64(bufp, wstat->length, &stat->length); + + p9_put_str(bufp, wstat->name, &stat->name); + p9_put_str(bufp, wstat->uid, &stat->uid); + p9_put_str(bufp, wstat->gid, &stat->gid); + p9_put_str(bufp, wstat->muid, &stat->muid); + + if (dotu) { + p9_put_str(bufp, wstat->extension, &stat->extension); + p9_put_int32(bufp, wstat->n_uid, &stat->n_uid); + p9_put_int32(bufp, wstat->n_gid, &stat->n_gid); + p9_put_int32(bufp, wstat->n_muid, &stat->n_muid); + } +} + +static struct p9_fcall * +p9_create_common(struct cbuf *bufp, u32 size, u8 id) +{ + struct p9_fcall *fc; + + size += 4 + 1 + 2; /* size[4] id[1] tag[2] */ + fc = kmalloc(sizeof(struct p9_fcall) + size, GFP_KERNEL); + if (!fc) + return ERR_PTR(-ENOMEM); + + fc->sdata = (char *)fc + sizeof(*fc); + + buf_init(bufp, (char *)fc->sdata, size); + p9_put_int32(bufp, size, &fc->size); + p9_put_int8(bufp, id, &fc->id); + p9_put_int16(bufp, P9_NOTAG, &fc->tag); + + return fc; +} + +void p9_set_tag(struct p9_fcall *fc, u16 tag) +{ + fc->tag = tag; + *(__le16 *) (fc->sdata + 5) = cpu_to_le16(tag); +} +EXPORT_SYMBOL(p9_set_tag); + +struct p9_fcall *p9_create_tversion(u32 msize, char *version) +{ + int size; + struct p9_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4 + 2 + strlen(version); /* msize[4] version[s] */ + fc = p9_create_common(bufp, size, P9_TVERSION); + if (IS_ERR(fc)) + goto error; + + p9_put_int32(bufp, msize, &fc->params.tversion.msize); + p9_put_str(bufp, version, &fc->params.tversion.version); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } +error: + return fc; +} +EXPORT_SYMBOL(p9_create_tversion); + +struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname) +{ + int size; + struct p9_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + /* afid[4] uname[s] aname[s] */ + size = 4 + 2 + strlen(uname) + 2 + strlen(aname); + fc = p9_create_common(bufp, size, P9_TAUTH); + if (IS_ERR(fc)) + goto error; + + p9_put_int32(bufp, afid, &fc->params.tauth.afid); + p9_put_str(bufp, uname, &fc->params.tauth.uname); + p9_put_str(bufp, aname, &fc->params.tauth.aname); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } +error: + return fc; +} +EXPORT_SYMBOL(p9_create_tauth); + +struct p9_fcall * +p9_create_tattach(u32 fid, u32 afid, char *uname, char *aname) +{ + int size; + struct p9_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + /* fid[4] afid[4] uname[s] aname[s] */ + size = 4 + 4 + 2 + strlen(uname) + 2 + strlen(aname); + fc = p9_create_common(bufp, size, P9_TATTACH); + if (IS_ERR(fc)) + goto error; + + p9_put_int32(bufp, fid, &fc->params.tattach.fid); + p9_put_int32(bufp, afid, &fc->params.tattach.afid); + p9_put_str(bufp, uname, &fc->params.tattach.uname); + p9_put_str(bufp, aname, &fc->params.tattach.aname); + +error: + return fc; +} +EXPORT_SYMBOL(p9_create_tattach); + +struct p9_fcall *p9_create_tflush(u16 oldtag) +{ + int size; + struct p9_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 2; /* oldtag[2] */ + fc = p9_create_common(bufp, size, P9_TFLUSH); + if (IS_ERR(fc)) + goto error; + + p9_put_int16(bufp, oldtag, &fc->params.tflush.oldtag); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } +error: + return fc; +} +EXPORT_SYMBOL(p9_create_tflush); + +struct p9_fcall *p9_create_twalk(u32 fid, u32 newfid, u16 nwname, + char **wnames) +{ + int i, size; + struct p9_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + if (nwname > P9_MAXWELEM) { + P9_DPRINTK(P9_DEBUG_ERROR, "nwname > %d\n", P9_MAXWELEM); + return NULL; + } + + size = 4 + 4 + 2; /* fid[4] newfid[4] nwname[2] ... */ + for (i = 0; i < nwname; i++) { + size += 2 + strlen(wnames[i]); /* wname[s] */ + } + + fc = p9_create_common(bufp, size, P9_TWALK); + if (IS_ERR(fc)) + goto error; + + p9_put_int32(bufp, fid, &fc->params.twalk.fid); + p9_put_int32(bufp, newfid, &fc->params.twalk.newfid); + p9_put_int16(bufp, nwname, &fc->params.twalk.nwname); + for (i = 0; i < nwname; i++) { + p9_put_str(bufp, wnames[i], &fc->params.twalk.wnames[i]); + } + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } +error: + return fc; +} +EXPORT_SYMBOL(p9_create_twalk); + +struct p9_fcall *p9_create_topen(u32 fid, u8 mode) +{ + int size; + struct p9_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4 + 1; /* fid[4] mode[1] */ + fc = p9_create_common(bufp, size, P9_TOPEN); + if (IS_ERR(fc)) + goto error; + + p9_put_int32(bufp, fid, &fc->params.topen.fid); + p9_put_int8(bufp, mode, &fc->params.topen.mode); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } +error: + return fc; +} +EXPORT_SYMBOL(p9_create_topen); + +struct p9_fcall *p9_create_tcreate(u32 fid, char *name, u32 perm, u8 mode, + char *extension, int dotu) +{ + int size; + struct p9_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + /* fid[4] name[s] perm[4] mode[1] */ + size = 4 + 2 + strlen(name) + 4 + 1; + if (dotu) { + size += 2 + /* extension[s] */ + (extension == NULL ? 0 : strlen(extension)); + } + + fc = p9_create_common(bufp, size, P9_TCREATE); + if (IS_ERR(fc)) + goto error; + + p9_put_int32(bufp, fid, &fc->params.tcreate.fid); + p9_put_str(bufp, name, &fc->params.tcreate.name); + p9_put_int32(bufp, perm, &fc->params.tcreate.perm); + p9_put_int8(bufp, mode, &fc->params.tcreate.mode); + if (dotu) + p9_put_str(bufp, extension, &fc->params.tcreate.extension); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } +error: + return fc; +} +EXPORT_SYMBOL(p9_create_tcreate); + +struct p9_fcall *p9_create_tread(u32 fid, u64 offset, u32 count) +{ + int size; + struct p9_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4 + 8 + 4; /* fid[4] offset[8] count[4] */ + fc = p9_create_common(bufp, size, P9_TREAD); + if (IS_ERR(fc)) + goto error; + + p9_put_int32(bufp, fid, &fc->params.tread.fid); + p9_put_int64(bufp, offset, &fc->params.tread.offset); + p9_put_int32(bufp, count, &fc->params.tread.count); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } +error: + return fc; +} +EXPORT_SYMBOL(p9_create_tread); + +struct p9_fcall *p9_create_twrite(u32 fid, u64 offset, u32 count, + const char *data) +{ + int size, err; + struct p9_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + /* fid[4] offset[8] count[4] data[count] */ + size = 4 + 8 + 4 + count; + fc = p9_create_common(bufp, size, P9_TWRITE); + if (IS_ERR(fc)) + goto error; + + p9_put_int32(bufp, fid, &fc->params.twrite.fid); + p9_put_int64(bufp, offset, &fc->params.twrite.offset); + p9_put_int32(bufp, count, &fc->params.twrite.count); + err = p9_put_data(bufp, data, count, &fc->params.twrite.data); + if (err) { + kfree(fc); + fc = ERR_PTR(err); + } + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } +error: + return fc; +} +EXPORT_SYMBOL(p9_create_twrite); + +struct p9_fcall *p9_create_twrite_u(u32 fid, u64 offset, u32 count, + const char __user *data) +{ + int size, err; + struct p9_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + /* fid[4] offset[8] count[4] data[count] */ + size = 4 + 8 + 4 + count; + fc = p9_create_common(bufp, size, P9_TWRITE); + if (IS_ERR(fc)) + goto error; + + p9_put_int32(bufp, fid, &fc->params.twrite.fid); + p9_put_int64(bufp, offset, &fc->params.twrite.offset); + p9_put_int32(bufp, count, &fc->params.twrite.count); + err = p9_put_user_data(bufp, data, count, &fc->params.twrite.data); + if (err) { + kfree(fc); + fc = ERR_PTR(err); + } + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } +error: + return fc; +} +EXPORT_SYMBOL(p9_create_twrite_u); + +struct p9_fcall *p9_create_tclunk(u32 fid) +{ + int size; + struct p9_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4; /* fid[4] */ + fc = p9_create_common(bufp, size, P9_TCLUNK); + if (IS_ERR(fc)) + goto error; + + p9_put_int32(bufp, fid, &fc->params.tclunk.fid); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } +error: + return fc; +} +EXPORT_SYMBOL(p9_create_tclunk); + +struct p9_fcall *p9_create_tremove(u32 fid) +{ + int size; + struct p9_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4; /* fid[4] */ + fc = p9_create_common(bufp, size, P9_TREMOVE); + if (IS_ERR(fc)) + goto error; + + p9_put_int32(bufp, fid, &fc->params.tremove.fid); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } +error: + return fc; +} +EXPORT_SYMBOL(p9_create_tremove); + +struct p9_fcall *p9_create_tstat(u32 fid) +{ + int size; + struct p9_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4; /* fid[4] */ + fc = p9_create_common(bufp, size, P9_TSTAT); + if (IS_ERR(fc)) + goto error; + + p9_put_int32(bufp, fid, &fc->params.tstat.fid); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } +error: + return fc; +} +EXPORT_SYMBOL(p9_create_tstat); + +struct p9_fcall *p9_create_twstat(u32 fid, struct p9_wstat *wstat, + int dotu) +{ + int size, statsz; + struct p9_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + statsz = p9_size_wstat(wstat, dotu); + size = 4 + 2 + 2 + statsz; /* fid[4] stat[n] */ + fc = p9_create_common(bufp, size, P9_TWSTAT); + if (IS_ERR(fc)) + goto error; + + p9_put_int32(bufp, fid, &fc->params.twstat.fid); + buf_put_int16(bufp, statsz + 2); + p9_put_wstat(bufp, wstat, &fc->params.twstat.stat, statsz, dotu); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } +error: + return fc; +} +EXPORT_SYMBOL(p9_create_twstat); diff --git a/net/9p/error.c b/net/9p/error.c new file mode 100644 index 000000000000..ab2458b6c903 --- /dev/null +++ b/net/9p/error.c @@ -0,0 +1,240 @@ +/* + * linux/fs/9p/error.c + * + * Error string handling + * + * Plan 9 uses error strings, Unix uses error numbers. These functions + * try to help manage that and provide for dynamically adding error + * mappings. + * + * Copyright (C) 2004 by Eric Van Hensbergen + * Copyright (C) 2002 by Ron Minnich + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include +#include +#include +#include +#include + +struct errormap { + char *name; + int val; + + int namelen; + struct hlist_node list; +}; + +#define ERRHASHSZ 32 +static struct hlist_head hash_errmap[ERRHASHSZ]; + +/* FixMe - reduce to a reasonable size */ +static struct errormap errmap[] = { + {"Operation not permitted", EPERM}, + {"wstat prohibited", EPERM}, + {"No such file or directory", ENOENT}, + {"directory entry not found", ENOENT}, + {"file not found", ENOENT}, + {"Interrupted system call", EINTR}, + {"Input/output error", EIO}, + {"No such device or address", ENXIO}, + {"Argument list too long", E2BIG}, + {"Bad file descriptor", EBADF}, + {"Resource temporarily unavailable", EAGAIN}, + {"Cannot allocate memory", ENOMEM}, + {"Permission denied", EACCES}, + {"Bad address", EFAULT}, + {"Block device required", ENOTBLK}, + {"Device or resource busy", EBUSY}, + {"File exists", EEXIST}, + {"Invalid cross-device link", EXDEV}, + {"No such device", ENODEV}, + {"Not a directory", ENOTDIR}, + {"Is a directory", EISDIR}, + {"Invalid argument", EINVAL}, + {"Too many open files in system", ENFILE}, + {"Too many open files", EMFILE}, + {"Text file busy", ETXTBSY}, + {"File too large", EFBIG}, + {"No space left on device", ENOSPC}, + {"Illegal seek", ESPIPE}, + {"Read-only file system", EROFS}, + {"Too many links", EMLINK}, + {"Broken pipe", EPIPE}, + {"Numerical argument out of domain", EDOM}, + {"Numerical result out of range", ERANGE}, + {"Resource deadlock avoided", EDEADLK}, + {"File name too long", ENAMETOOLONG}, + {"No locks available", ENOLCK}, + {"Function not implemented", ENOSYS}, + {"Directory not empty", ENOTEMPTY}, + {"Too many levels of symbolic links", ELOOP}, + {"No message of desired type", ENOMSG}, + {"Identifier removed", EIDRM}, + {"No data available", ENODATA}, + {"Machine is not on the network", ENONET}, + {"Package not installed", ENOPKG}, + {"Object is remote", EREMOTE}, + {"Link has been severed", ENOLINK}, + {"Communication error on send", ECOMM}, + {"Protocol error", EPROTO}, + {"Bad message", EBADMSG}, + {"File descriptor in bad state", EBADFD}, + {"Streams pipe error", ESTRPIPE}, + {"Too many users", EUSERS}, + {"Socket operation on non-socket", ENOTSOCK}, + {"Message too long", EMSGSIZE}, + {"Protocol not available", ENOPROTOOPT}, + {"Protocol not supported", EPROTONOSUPPORT}, + {"Socket type not supported", ESOCKTNOSUPPORT}, + {"Operation not supported", EOPNOTSUPP}, + {"Protocol family not supported", EPFNOSUPPORT}, + {"Network is down", ENETDOWN}, + {"Network is unreachable", ENETUNREACH}, + {"Network dropped connection on reset", ENETRESET}, + {"Software caused connection abort", ECONNABORTED}, + {"Connection reset by peer", ECONNRESET}, + {"No buffer space available", ENOBUFS}, + {"Transport endpoint is already connected", EISCONN}, + {"Transport endpoint is not connected", ENOTCONN}, + {"Cannot send after transport endpoint shutdown", ESHUTDOWN}, + {"Connection timed out", ETIMEDOUT}, + {"Connection refused", ECONNREFUSED}, + {"Host is down", EHOSTDOWN}, + {"No route to host", EHOSTUNREACH}, + {"Operation already in progress", EALREADY}, + {"Operation now in progress", EINPROGRESS}, + {"Is a named type file", EISNAM}, + {"Remote I/O error", EREMOTEIO}, + {"Disk quota exceeded", EDQUOT}, +/* errors from fossil, vacfs, and u9fs */ + {"fid unknown or out of range", EBADF}, + {"permission denied", EACCES}, + {"file does not exist", ENOENT}, + {"authentication failed", ECONNREFUSED}, + {"bad offset in directory read", ESPIPE}, + {"bad use of fid", EBADF}, + {"wstat can't convert between files and directories", EPERM}, + {"directory is not empty", ENOTEMPTY}, + {"file exists", EEXIST}, + {"file already exists", EEXIST}, + {"file or directory already exists", EEXIST}, + {"fid already in use", EBADF}, + {"file in use", ETXTBSY}, + {"i/o error", EIO}, + {"file already open for I/O", ETXTBSY}, + {"illegal mode", EINVAL}, + {"illegal name", ENAMETOOLONG}, + {"not a directory", ENOTDIR}, + {"not a member of proposed group", EPERM}, + {"not owner", EACCES}, + {"only owner can change group in wstat", EACCES}, + {"read only file system", EROFS}, + {"no access to special file", EPERM}, + {"i/o count too large", EIO}, + {"unknown group", EINVAL}, + {"unknown user", EINVAL}, + {"bogus wstat buffer", EPROTO}, + {"exclusive use file already open", EAGAIN}, + {"corrupted directory entry", EIO}, + {"corrupted file entry", EIO}, + {"corrupted block label", EIO}, + {"corrupted meta data", EIO}, + {"illegal offset", EINVAL}, + {"illegal path element", ENOENT}, + {"root of file system is corrupted", EIO}, + {"corrupted super block", EIO}, + {"protocol botch", EPROTO}, + {"file system is full", ENOSPC}, + {"file is in use", EAGAIN}, + {"directory entry is not allocated", ENOENT}, + {"file is read only", EROFS}, + {"file has been removed", EIDRM}, + {"only support truncation to zero length", EPERM}, + {"cannot remove root", EPERM}, + {"file too big", EFBIG}, + {"venti i/o error", EIO}, + /* these are not errors */ + {"u9fs rhostsauth: no authentication required", 0}, + {"u9fs authnone: no authentication required", 0}, + {NULL, -1} +}; + +/** + * p9_error_init - preload + * @errstr: error string + * + */ + +int p9_error_init(void) +{ + struct errormap *c; + int bucket; + + /* initialize hash table */ + for (bucket = 0; bucket < ERRHASHSZ; bucket++) + INIT_HLIST_HEAD(&hash_errmap[bucket]); + + /* load initial error map into hash table */ + for (c = errmap; c->name != NULL; c++) { + c->namelen = strlen(c->name); + bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ; + INIT_HLIST_NODE(&c->list); + hlist_add_head(&c->list, &hash_errmap[bucket]); + } + + return 1; +} +EXPORT_SYMBOL(p9_error_init); + +/** + * errstr2errno - convert error string to error number + * @errstr: error string + * + */ + +int p9_errstr2errno(char *errstr, int len) +{ + int errno; + struct hlist_node *p; + struct errormap *c; + int bucket; + + errno = 0; + p = NULL; + c = NULL; + bucket = jhash(errstr, len, 0) % ERRHASHSZ; + hlist_for_each_entry(c, p, &hash_errmap[bucket], list) { + if (c->namelen == len && !memcmp(c->name, errstr, len)) { + errno = c->val; + break; + } + } + + if (errno == 0) { + /* TODO: if error isn't found, add it dynamically */ + errstr[len] = 0; + printk(KERN_ERR "%s: errstr :%s: not found\n", __FUNCTION__, + errstr); + errno = 1; + } + + return -errno; +} +EXPORT_SYMBOL(p9_errstr2errno); diff --git a/net/9p/fcprint.c b/net/9p/fcprint.c new file mode 100644 index 000000000000..b1ae8ec57d54 --- /dev/null +++ b/net/9p/fcprint.c @@ -0,0 +1,358 @@ +/* + * net/9p/fcprint.c + * + * Print 9P call. + * + * Copyright (C) 2005 by Latchesar Ionkov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ +#include +#include +#include +#include +#include + +#ifdef CONFIG_NET_9P_DEBUG + +static int +p9_printqid(char *buf, int buflen, struct p9_qid *q) +{ + int n; + char b[10]; + + n = 0; + if (q->type & P9_QTDIR) + b[n++] = 'd'; + if (q->type & P9_QTAPPEND) + b[n++] = 'a'; + if (q->type & P9_QTAUTH) + b[n++] = 'A'; + if (q->type & P9_QTEXCL) + b[n++] = 'l'; + if (q->type & P9_QTTMP) + b[n++] = 't'; + if (q->type & P9_QTSYMLINK) + b[n++] = 'L'; + b[n] = '\0'; + + return scnprintf(buf, buflen, "(%.16llx %x %s)", + (long long int) q->path, q->version, b); +} + +static int +p9_printperm(char *buf, int buflen, int perm) +{ + int n; + char b[15]; + + n = 0; + if (perm & P9_DMDIR) + b[n++] = 'd'; + if (perm & P9_DMAPPEND) + b[n++] = 'a'; + if (perm & P9_DMAUTH) + b[n++] = 'A'; + if (perm & P9_DMEXCL) + b[n++] = 'l'; + if (perm & P9_DMTMP) + b[n++] = 't'; + if (perm & P9_DMDEVICE) + b[n++] = 'D'; + if (perm & P9_DMSOCKET) + b[n++] = 'S'; + if (perm & P9_DMNAMEDPIPE) + b[n++] = 'P'; + if (perm & P9_DMSYMLINK) + b[n++] = 'L'; + b[n] = '\0'; + + return scnprintf(buf, buflen, "%s%03o", b, perm&077); +} + +static int +p9_printstat(char *buf, int buflen, struct p9_stat *st, int extended) +{ + int n; + + n = scnprintf(buf, buflen, "'%.*s' '%.*s'", st->name.len, + st->name.str, st->uid.len, st->uid.str); + if (extended) + n += scnprintf(buf+n, buflen-n, "(%d)", st->n_uid); + + n += scnprintf(buf+n, buflen-n, " '%.*s'", st->gid.len, st->gid.str); + if (extended) + n += scnprintf(buf+n, buflen-n, "(%d)", st->n_gid); + + n += scnprintf(buf+n, buflen-n, " '%.*s'", st->muid.len, st->muid.str); + if (extended) + n += scnprintf(buf+n, buflen-n, "(%d)", st->n_muid); + + n += scnprintf(buf+n, buflen-n, " q "); + n += p9_printqid(buf+n, buflen-n, &st->qid); + n += scnprintf(buf+n, buflen-n, " m "); + n += p9_printperm(buf+n, buflen-n, st->mode); + n += scnprintf(buf+n, buflen-n, " at %d mt %d l %lld", + st->atime, st->mtime, (long long int) st->length); + + if (extended) + n += scnprintf(buf+n, buflen-n, " ext '%.*s'", + st->extension.len, st->extension.str); + + return n; +} + +static int +p9_dumpdata(char *buf, int buflen, u8 *data, int datalen) +{ + int i, n; + + i = n = 0; + while (i < datalen) { + n += scnprintf(buf + n, buflen - n, "%02x", data[i]); + if (i%4 == 3) + n += scnprintf(buf + n, buflen - n, " "); + if (i%32 == 31) + n += scnprintf(buf + n, buflen - n, "\n"); + + i++; + } + n += scnprintf(buf + n, buflen - n, "\n"); + + return n; +} + +static int +p9_printdata(char *buf, int buflen, u8 *data, int datalen) +{ + return p9_dumpdata(buf, buflen, data, datalen < 16?datalen:16); +} + +int +p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int extended) +{ + int i, ret, type, tag; + + if (!fc) + return scnprintf(buf, buflen, ""); + + type = fc->id; + tag = fc->tag; + + ret = 0; + switch (type) { + case P9_TVERSION: + ret += scnprintf(buf+ret, buflen-ret, + "Tversion tag %u msize %u version '%.*s'", tag, + fc->params.tversion.msize, + fc->params.tversion.version.len, + fc->params.tversion.version.str); + break; + + case P9_RVERSION: + ret += scnprintf(buf+ret, buflen-ret, + "Rversion tag %u msize %u version '%.*s'", tag, + fc->params.rversion.msize, + fc->params.rversion.version.len, + fc->params.rversion.version.str); + break; + + case P9_TAUTH: + ret += scnprintf(buf+ret, buflen-ret, + "Tauth tag %u afid %d uname '%.*s' aname '%.*s'", tag, + fc->params.tauth.afid, fc->params.tauth.uname.len, + fc->params.tauth.uname.str, fc->params.tauth.aname.len, + fc->params.tauth.aname.str); + break; + + case P9_RAUTH: + ret += scnprintf(buf+ret, buflen-ret, "Rauth tag %u qid ", tag); + p9_printqid(buf+ret, buflen-ret, &fc->params.rauth.qid); + break; + + case P9_TATTACH: + ret += scnprintf(buf+ret, buflen-ret, + "Tattach tag %u fid %d afid %d uname '%.*s' aname '%.*s'", tag, + fc->params.tattach.fid, fc->params.tattach.afid, + fc->params.tattach.uname.len, fc->params.tattach.uname.str, + fc->params.tattach.aname.len, fc->params.tattach.aname.str); + break; + + case P9_RATTACH: + ret += scnprintf(buf+ret, buflen-ret, "Rattach tag %u qid ", + tag); + p9_printqid(buf+ret, buflen-ret, &fc->params.rattach.qid); + break; + + case P9_RERROR: + ret += scnprintf(buf+ret, buflen-ret, + "Rerror tag %u ename '%.*s'", tag, + fc->params.rerror.error.len, + fc->params.rerror.error.str); + if (extended) + ret += scnprintf(buf+ret, buflen-ret, " ecode %d\n", + fc->params.rerror.errno); + break; + + case P9_TFLUSH: + ret += scnprintf(buf+ret, buflen-ret, "Tflush tag %u oldtag %u", + tag, fc->params.tflush.oldtag); + break; + + case P9_RFLUSH: + ret += scnprintf(buf+ret, buflen-ret, "Rflush tag %u", tag); + break; + + case P9_TWALK: + ret += scnprintf(buf+ret, buflen-ret, + "Twalk tag %u fid %d newfid %d nwname %d", tag, + fc->params.twalk.fid, fc->params.twalk.newfid, + fc->params.twalk.nwname); + for (i = 0; i < fc->params.twalk.nwname; i++) + ret += scnprintf(buf+ret, buflen-ret, " '%.*s'", + fc->params.twalk.wnames[i].len, + fc->params.twalk.wnames[i].str); + break; + + case P9_RWALK: + ret += scnprintf(buf+ret, buflen-ret, "Rwalk tag %u nwqid %d", + tag, fc->params.rwalk.nwqid); + for (i = 0; i < fc->params.rwalk.nwqid; i++) + ret += p9_printqid(buf+ret, buflen-ret, + &fc->params.rwalk.wqids[i]); + break; + + case P9_TOPEN: + ret += scnprintf(buf+ret, buflen-ret, + "Topen tag %u fid %d mode %d", tag, + fc->params.topen.fid, fc->params.topen.mode); + break; + + case P9_ROPEN: + ret += scnprintf(buf+ret, buflen-ret, "Ropen tag %u", tag); + ret += p9_printqid(buf+ret, buflen-ret, &fc->params.ropen.qid); + ret += scnprintf(buf+ret, buflen-ret, " iounit %d", + fc->params.ropen.iounit); + break; + + case P9_TCREATE: + ret += scnprintf(buf+ret, buflen-ret, + "Tcreate tag %u fid %d name '%.*s' perm ", tag, + fc->params.tcreate.fid, fc->params.tcreate.name.len, + fc->params.tcreate.name.str); + + ret += p9_printperm(buf+ret, buflen-ret, + fc->params.tcreate.perm); + ret += scnprintf(buf+ret, buflen-ret, " mode %d", + fc->params.tcreate.mode); + break; + + case P9_RCREATE: + ret += scnprintf(buf+ret, buflen-ret, "Rcreate tag %u", tag); + ret += p9_printqid(buf+ret, buflen-ret, + &fc->params.rcreate.qid); + ret += scnprintf(buf+ret, buflen-ret, " iounit %d", + fc->params.rcreate.iounit); + break; + + case P9_TREAD: + ret += scnprintf(buf+ret, buflen-ret, + "Tread tag %u fid %d offset %lld count %u", tag, + fc->params.tread.fid, + (long long int) fc->params.tread.offset, + fc->params.tread.count); + break; + + case P9_RREAD: + ret += scnprintf(buf+ret, buflen-ret, + "Rread tag %u count %u data ", tag, + fc->params.rread.count); + ret += p9_printdata(buf+ret, buflen-ret, fc->params.rread.data, + fc->params.rread.count); + break; + + case P9_TWRITE: + ret += scnprintf(buf+ret, buflen-ret, + "Twrite tag %u fid %d offset %lld count %u data ", + tag, fc->params.twrite.fid, + (long long int) fc->params.twrite.offset, + fc->params.twrite.count); + ret += p9_printdata(buf+ret, buflen-ret, fc->params.twrite.data, + fc->params.twrite.count); + break; + + case P9_RWRITE: + ret += scnprintf(buf+ret, buflen-ret, "Rwrite tag %u count %u", + tag, fc->params.rwrite.count); + break; + + case P9_TCLUNK: + ret += scnprintf(buf+ret, buflen-ret, "Tclunk tag %u fid %d", + tag, fc->params.tclunk.fid); + break; + + case P9_RCLUNK: + ret += scnprintf(buf+ret, buflen-ret, "Rclunk tag %u", tag); + break; + + case P9_TREMOVE: + ret += scnprintf(buf+ret, buflen-ret, "Tremove tag %u fid %d", + tag, fc->params.tremove.fid); + break; + + case P9_RREMOVE: + ret += scnprintf(buf+ret, buflen-ret, "Rremove tag %u", tag); + break; + + case P9_TSTAT: + ret += scnprintf(buf+ret, buflen-ret, "Tstat tag %u fid %d", + tag, fc->params.tstat.fid); + break; + + case P9_RSTAT: + ret += scnprintf(buf+ret, buflen-ret, "Rstat tag %u ", tag); + ret += p9_printstat(buf+ret, buflen-ret, &fc->params.rstat.stat, + extended); + break; + + case P9_TWSTAT: + ret += scnprintf(buf+ret, buflen-ret, "Twstat tag %u fid %d ", + tag, fc->params.twstat.fid); + ret += p9_printstat(buf+ret, buflen-ret, + &fc->params.twstat.stat, extended); + break; + + case P9_RWSTAT: + ret += scnprintf(buf+ret, buflen-ret, "Rwstat tag %u", tag); + break; + + default: + ret += scnprintf(buf+ret, buflen-ret, "unknown type %d", type); + break; + } + + return ret; +} + +#else +int +p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int extended) +{ + return 0; +} +EXPORT_SYMBOL(p9_printfcall); +#endif /* CONFIG_NET_9P_DEBUG */ diff --git a/net/9p/mod.c b/net/9p/mod.c new file mode 100644 index 000000000000..4f9e1d2ac257 --- /dev/null +++ b/net/9p/mod.c @@ -0,0 +1,85 @@ +/* + * net/9p/9p.c + * + * 9P entry point + * + * Copyright (C) 2007 by Latchesar Ionkov + * Copyright (C) 2004 by Eric Van Hensbergen + * Copyright (C) 2002 by Ron Minnich + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include +#include +#include + +#ifdef CONFIG_NET_9P_DEBUG +unsigned int p9_debug_level = 0; /* feature-rific global debug level */ +EXPORT_SYMBOL(p9_debug_level); +module_param_named(debug, p9_debug_level, uint, 0); +MODULE_PARM_DESC(debug, "9P debugging level"); +#endif + +extern int p9_mux_global_init(void); +extern void p9_mux_global_exit(void); +extern int p9_sysctl_register(void); +extern void p9_sysctl_unregister(void); + +/** + * v9fs_init - Initialize module + * + */ +static int __init init_p9(void) +{ + int ret; + + p9_error_init(); + printk(KERN_INFO "Installing 9P2000 support\n"); + ret = p9_mux_global_init(); + if (ret) { + printk(KERN_WARNING "9p: starting mux failed\n"); + return ret; + } + + ret = p9_sysctl_register(); + if (ret) { + printk(KERN_WARNING "9p: registering sysctl failed\n"); + return ret; + } + + return ret; +} + +/** + * v9fs_init - shutdown module + * + */ + +static void __exit exit_p9(void) +{ + p9_sysctl_unregister(); + p9_mux_global_exit(); +} + +module_init(init_p9) +module_exit(exit_p9) + +MODULE_AUTHOR("Latchesar Ionkov "); +MODULE_AUTHOR("Eric Van Hensbergen "); +MODULE_AUTHOR("Ron Minnich "); +MODULE_LICENSE("GPL"); diff --git a/net/9p/mux.c b/net/9p/mux.c new file mode 100644 index 000000000000..c3aa87bc8b97 --- /dev/null +++ b/net/9p/mux.c @@ -0,0 +1,1050 @@ +/* + * net/9p/mux.c + * + * Protocol Multiplexer + * + * Copyright (C) 2004 by Eric Van Hensbergen + * Copyright (C) 2004-2005 by Latchesar Ionkov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ERREQFLUSH 1 +#define SCHED_TIMEOUT 10 +#define MAXPOLLWADDR 2 + +enum { + Rworksched = 1, /* read work scheduled or running */ + Rpending = 2, /* can read */ + Wworksched = 4, /* write work scheduled or running */ + Wpending = 8, /* can write */ +}; + +enum { + None, + Flushing, + Flushed, +}; + +struct p9_mux_poll_task; + +struct p9_req { + spinlock_t lock; /* protect request structure */ + int tag; + struct p9_fcall *tcall; + struct p9_fcall *rcall; + int err; + p9_conn_req_callback cb; + void *cba; + int flush; + struct list_head req_list; +}; + +struct p9_conn { + spinlock_t lock; /* protect lock structure */ + struct list_head mux_list; + struct p9_mux_poll_task *poll_task; + int msize; + unsigned char *extended; + struct p9_transport *trans; + struct p9_idpool *tagpool; + int err; + wait_queue_head_t equeue; + struct list_head req_list; + struct list_head unsent_req_list; + struct p9_fcall *rcall; + int rpos; + char *rbuf; + int wpos; + int wsize; + char *wbuf; + wait_queue_t poll_wait[MAXPOLLWADDR]; + wait_queue_head_t *poll_waddr[MAXPOLLWADDR]; + poll_table pt; + struct work_struct rq; + struct work_struct wq; + unsigned long wsched; +}; + +struct p9_mux_poll_task { + struct task_struct *task; + struct list_head mux_list; + int muxnum; +}; + +struct p9_mux_rpc { + struct p9_conn *m; + int err; + struct p9_fcall *tcall; + struct p9_fcall *rcall; + wait_queue_head_t wqueue; +}; + +static int p9_poll_proc(void *); +static void p9_read_work(struct work_struct *work); +static void p9_write_work(struct work_struct *work); +static void p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, + poll_table * p); +static u16 p9_mux_get_tag(struct p9_conn *); +static void p9_mux_put_tag(struct p9_conn *, u16); + +static DEFINE_MUTEX(p9_mux_task_lock); +static struct workqueue_struct *p9_mux_wq; + +static int p9_mux_num; +static int p9_mux_poll_task_num; +static struct p9_mux_poll_task p9_mux_poll_tasks[100]; + +int p9_mux_global_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) + p9_mux_poll_tasks[i].task = NULL; + + p9_mux_wq = create_workqueue("v9fs"); + if (!p9_mux_wq) { + printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); + return -ENOMEM; + } + + return 0; +} + +void p9_mux_global_exit(void) +{ + destroy_workqueue(p9_mux_wq); +} + +/** + * p9_mux_calc_poll_procs - calculates the number of polling procs + * based on the number of mounted v9fs filesystems. + * + * The current implementation returns sqrt of the number of mounts. + */ +static int p9_mux_calc_poll_procs(int muxnum) +{ + int n; + + if (p9_mux_poll_task_num) + n = muxnum / p9_mux_poll_task_num + + (muxnum % p9_mux_poll_task_num ? 1 : 0); + else + n = 1; + + if (n > ARRAY_SIZE(p9_mux_poll_tasks)) + n = ARRAY_SIZE(p9_mux_poll_tasks); + + return n; +} + +static int p9_mux_poll_start(struct p9_conn *m) +{ + int i, n; + struct p9_mux_poll_task *vpt, *vptlast; + struct task_struct *pproc; + + P9_DPRINTK(P9_DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, p9_mux_num, + p9_mux_poll_task_num); + mutex_lock(&p9_mux_task_lock); + + n = p9_mux_calc_poll_procs(p9_mux_num + 1); + if (n > p9_mux_poll_task_num) { + for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) { + if (p9_mux_poll_tasks[i].task == NULL) { + vpt = &p9_mux_poll_tasks[i]; + P9_DPRINTK(P9_DEBUG_MUX, "create proc %p\n", + vpt); + pproc = kthread_create(p9_poll_proc, vpt, + "v9fs-poll"); + + if (!IS_ERR(pproc)) { + vpt->task = pproc; + INIT_LIST_HEAD(&vpt->mux_list); + vpt->muxnum = 0; + p9_mux_poll_task_num++; + wake_up_process(vpt->task); + } + break; + } + } + + if (i >= ARRAY_SIZE(p9_mux_poll_tasks)) + P9_DPRINTK(P9_DEBUG_ERROR, + "warning: no free poll slots\n"); + } + + n = (p9_mux_num + 1) / p9_mux_poll_task_num + + ((p9_mux_num + 1) % p9_mux_poll_task_num ? 1 : 0); + + vptlast = NULL; + for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) { + vpt = &p9_mux_poll_tasks[i]; + if (vpt->task != NULL) { + vptlast = vpt; + if (vpt->muxnum < n) { + P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i); + list_add(&m->mux_list, &vpt->mux_list); + vpt->muxnum++; + m->poll_task = vpt; + memset(&m->poll_waddr, 0, + sizeof(m->poll_waddr)); + init_poll_funcptr(&m->pt, p9_pollwait); + break; + } + } + } + + if (i >= ARRAY_SIZE(p9_mux_poll_tasks)) { + if (vptlast == NULL) + return -ENOMEM; + + P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i); + list_add(&m->mux_list, &vptlast->mux_list); + vptlast->muxnum++; + m->poll_task = vptlast; + memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); + init_poll_funcptr(&m->pt, p9_pollwait); + } + + p9_mux_num++; + mutex_unlock(&p9_mux_task_lock); + + return 0; +} + +static void p9_mux_poll_stop(struct p9_conn *m) +{ + int i; + struct p9_mux_poll_task *vpt; + + mutex_lock(&p9_mux_task_lock); + vpt = m->poll_task; + list_del(&m->mux_list); + for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { + if (m->poll_waddr[i] != NULL) { + remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]); + m->poll_waddr[i] = NULL; + } + } + vpt->muxnum--; + if (!vpt->muxnum) { + P9_DPRINTK(P9_DEBUG_MUX, "destroy proc %p\n", vpt); + kthread_stop(vpt->task); + vpt->task = NULL; + p9_mux_poll_task_num--; + } + p9_mux_num--; + mutex_unlock(&p9_mux_task_lock); +} + +/** + * p9_conn_create - allocate and initialize the per-session mux data + * Creates the polling task if this is the first session. + * + * @trans - transport structure + * @msize - maximum message size + * @extended - pointer to the extended flag + */ +struct p9_conn *p9_conn_create(struct p9_transport *trans, int msize, + unsigned char *extended) +{ + int i, n; + struct p9_conn *m, *mtmp; + + P9_DPRINTK(P9_DEBUG_MUX, "transport %p msize %d\n", trans, msize); + m = kmalloc(sizeof(struct p9_conn), GFP_KERNEL); + if (!m) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&m->lock); + INIT_LIST_HEAD(&m->mux_list); + m->msize = msize; + m->extended = extended; + m->trans = trans; + m->tagpool = p9_idpool_create(); + if (!m->tagpool) { + kfree(m); + return ERR_PTR(PTR_ERR(m->tagpool)); + } + + m->err = 0; + init_waitqueue_head(&m->equeue); + INIT_LIST_HEAD(&m->req_list); + INIT_LIST_HEAD(&m->unsent_req_list); + m->rcall = NULL; + m->rpos = 0; + m->rbuf = NULL; + m->wpos = m->wsize = 0; + m->wbuf = NULL; + INIT_WORK(&m->rq, p9_read_work); + INIT_WORK(&m->wq, p9_write_work); + m->wsched = 0; + memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); + m->poll_task = NULL; + n = p9_mux_poll_start(m); + if (n) + return ERR_PTR(n); + + n = trans->poll(trans, &m->pt); + if (n & POLLIN) { + P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m); + set_bit(Rpending, &m->wsched); + } + + if (n & POLLOUT) { + P9_DPRINTK(P9_DEBUG_MUX, "mux %p can write\n", m); + set_bit(Wpending, &m->wsched); + } + + for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { + if (IS_ERR(m->poll_waddr[i])) { + p9_mux_poll_stop(m); + mtmp = (void *)m->poll_waddr; /* the error code */ + kfree(m); + m = mtmp; + break; + } + } + + return m; +} +EXPORT_SYMBOL(p9_conn_create); + +/** + * p9_mux_destroy - cancels all pending requests and frees mux resources + */ +void p9_conn_destroy(struct p9_conn *m) +{ + P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m, + m->mux_list.prev, m->mux_list.next); + p9_conn_cancel(m, -ECONNRESET); + + if (!list_empty(&m->req_list)) { + /* wait until all processes waiting on this session exit */ + P9_DPRINTK(P9_DEBUG_MUX, + "mux %p waiting for empty request queue\n", m); + wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000); + P9_DPRINTK(P9_DEBUG_MUX, "mux %p request queue empty: %d\n", m, + list_empty(&m->req_list)); + } + + p9_mux_poll_stop(m); + m->trans = NULL; + p9_idpool_destroy(m->tagpool); + kfree(m); +} +EXPORT_SYMBOL(p9_conn_destroy); + +/** + * p9_pollwait - called by files poll operation to add v9fs-poll task + * to files wait queue + */ +static void +p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, + poll_table * p) +{ + int i; + struct p9_conn *m; + + m = container_of(p, struct p9_conn, pt); + for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) + if (m->poll_waddr[i] == NULL) + break; + + if (i >= ARRAY_SIZE(m->poll_waddr)) { + P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n"); + return; + } + + m->poll_waddr[i] = wait_address; + + if (!wait_address) { + P9_DPRINTK(P9_DEBUG_ERROR, "no wait_address\n"); + m->poll_waddr[i] = ERR_PTR(-EIO); + return; + } + + init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task); + add_wait_queue(wait_address, &m->poll_wait[i]); +} + +/** + * p9_poll_mux - polls a mux and schedules read or write works if necessary + */ +static void p9_poll_mux(struct p9_conn *m) +{ + int n; + + if (m->err < 0) + return; + + n = m->trans->poll(m->trans, NULL); + if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) { + P9_DPRINTK(P9_DEBUG_MUX, "error mux %p err %d\n", m, n); + if (n >= 0) + n = -ECONNRESET; + p9_conn_cancel(m, n); + } + + if (n & POLLIN) { + set_bit(Rpending, &m->wsched); + P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m); + if (!test_and_set_bit(Rworksched, &m->wsched)) { + P9_DPRINTK(P9_DEBUG_MUX, "schedule read work %p\n", m); + queue_work(p9_mux_wq, &m->rq); + } + } + + if (n & POLLOUT) { + set_bit(Wpending, &m->wsched); + P9_DPRINTK(P9_DEBUG_MUX, "mux %p can write\n", m); + if ((m->wsize || !list_empty(&m->unsent_req_list)) + && !test_and_set_bit(Wworksched, &m->wsched)) { + P9_DPRINTK(P9_DEBUG_MUX, "schedule write work %p\n", m); + queue_work(p9_mux_wq, &m->wq); + } + } +} + +/** + * p9_poll_proc - polls all v9fs transports for new events and queues + * the appropriate work to the work queue + */ +static int p9_poll_proc(void *a) +{ + struct p9_conn *m, *mtmp; + struct p9_mux_poll_task *vpt; + + vpt = a; + P9_DPRINTK(P9_DEBUG_MUX, "start %p %p\n", current, vpt); + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + + list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) { + p9_poll_mux(m); + } + + P9_DPRINTK(P9_DEBUG_MUX, "sleeping...\n"); + schedule_timeout(SCHED_TIMEOUT * HZ); + } + + __set_current_state(TASK_RUNNING); + P9_DPRINTK(P9_DEBUG_MUX, "finish\n"); + return 0; +} + +/** + * p9_write_work - called when a transport can send some data + */ +static void p9_write_work(struct work_struct *work) +{ + int n, err; + struct p9_conn *m; + struct p9_req *req; + + m = container_of(work, struct p9_conn, wq); + + if (m->err < 0) { + clear_bit(Wworksched, &m->wsched); + return; + } + + if (!m->wsize) { + if (list_empty(&m->unsent_req_list)) { + clear_bit(Wworksched, &m->wsched); + return; + } + + spin_lock(&m->lock); +again: + req = list_entry(m->unsent_req_list.next, struct p9_req, + req_list); + list_move_tail(&req->req_list, &m->req_list); + if (req->err == ERREQFLUSH) + goto again; + + m->wbuf = req->tcall->sdata; + m->wsize = req->tcall->size; + m->wpos = 0; + spin_unlock(&m->lock); + } + + P9_DPRINTK(P9_DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos, + m->wsize); + clear_bit(Wpending, &m->wsched); + err = m->trans->write(m->trans, m->wbuf + m->wpos, m->wsize - m->wpos); + P9_DPRINTK(P9_DEBUG_MUX, "mux %p sent %d bytes\n", m, err); + if (err == -EAGAIN) { + clear_bit(Wworksched, &m->wsched); + return; + } + + if (err <= 0) + goto error; + + m->wpos += err; + if (m->wpos == m->wsize) + m->wpos = m->wsize = 0; + + if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) { + if (test_and_clear_bit(Wpending, &m->wsched)) + n = POLLOUT; + else + n = m->trans->poll(m->trans, NULL); + + if (n & POLLOUT) { + P9_DPRINTK(P9_DEBUG_MUX, "schedule write work %p\n", m); + queue_work(p9_mux_wq, &m->wq); + } else + clear_bit(Wworksched, &m->wsched); + } else + clear_bit(Wworksched, &m->wsched); + + return; + +error: + p9_conn_cancel(m, err); + clear_bit(Wworksched, &m->wsched); +} + +static void process_request(struct p9_conn *m, struct p9_req *req) +{ + int ecode; + struct p9_str *ename; + + if (!req->err && req->rcall->id == P9_RERROR) { + ecode = req->rcall->params.rerror.errno; + ename = &req->rcall->params.rerror.error; + + P9_DPRINTK(P9_DEBUG_MUX, "Rerror %.*s\n", ename->len, + ename->str); + + if (*m->extended) + req->err = -ecode; + + if (!req->err) { + req->err = p9_errstr2errno(ename->str, ename->len); + + if (!req->err) { /* string match failed */ + PRINT_FCALL_ERROR("unknown error", req->rcall); + } + + if (!req->err) + req->err = -ESERVERFAULT; + } + } else if (req->tcall && req->rcall->id != req->tcall->id + 1) { + P9_DPRINTK(P9_DEBUG_ERROR, + "fcall mismatch: expected %d, got %d\n", + req->tcall->id + 1, req->rcall->id); + if (!req->err) + req->err = -EIO; + } +} + +/** + * p9_read_work - called when there is some data to be read from a transport + */ +static void p9_read_work(struct work_struct *work) +{ + int n, err; + struct p9_conn *m; + struct p9_req *req, *rptr, *rreq; + struct p9_fcall *rcall; + char *rbuf; + + m = container_of(work, struct p9_conn, rq); + + if (m->err < 0) + return; + + rcall = NULL; + P9_DPRINTK(P9_DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos); + + if (!m->rcall) { + m->rcall = + kmalloc(sizeof(struct p9_fcall) + m->msize, GFP_KERNEL); + if (!m->rcall) { + err = -ENOMEM; + goto error; + } + + m->rbuf = (char *)m->rcall + sizeof(struct p9_fcall); + m->rpos = 0; + } + + clear_bit(Rpending, &m->wsched); + err = m->trans->read(m->trans, m->rbuf + m->rpos, m->msize - m->rpos); + P9_DPRINTK(P9_DEBUG_MUX, "mux %p got %d bytes\n", m, err); + if (err == -EAGAIN) { + clear_bit(Rworksched, &m->wsched); + return; + } + + if (err <= 0) + goto error; + + m->rpos += err; + while (m->rpos > 4) { + n = le32_to_cpu(*(__le32 *) m->rbuf); + if (n >= m->msize) { + P9_DPRINTK(P9_DEBUG_ERROR, + "requested packet size too big: %d\n", n); + err = -EIO; + goto error; + } + + if (m->rpos < n) + break; + + err = + p9_deserialize_fcall(m->rbuf, n, m->rcall, *m->extended); + if (err < 0) { + goto error; + } + +#ifdef CONFIG_NET_9P_DEBUG + if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) { + char buf[150]; + + p9_printfcall(buf, sizeof(buf), m->rcall, + *m->extended); + printk(KERN_NOTICE ">>> %p %s\n", m, buf); + } +#endif + + rcall = m->rcall; + rbuf = m->rbuf; + if (m->rpos > n) { + m->rcall = kmalloc(sizeof(struct p9_fcall) + m->msize, + GFP_KERNEL); + if (!m->rcall) { + err = -ENOMEM; + goto error; + } + + m->rbuf = (char *)m->rcall + sizeof(struct p9_fcall); + memmove(m->rbuf, rbuf + n, m->rpos - n); + m->rpos -= n; + } else { + m->rcall = NULL; + m->rbuf = NULL; + m->rpos = 0; + } + + P9_DPRINTK(P9_DEBUG_MUX, "mux %p fcall id %d tag %d\n", m, + rcall->id, rcall->tag); + + req = NULL; + spin_lock(&m->lock); + list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { + if (rreq->tag == rcall->tag) { + req = rreq; + if (req->flush != Flushing) + list_del(&req->req_list); + break; + } + } + spin_unlock(&m->lock); + + if (req) { + req->rcall = rcall; + process_request(m, req); + + if (req->flush != Flushing) { + if (req->cb) + (*req->cb) (req, req->cba); + else + kfree(req->rcall); + + wake_up(&m->equeue); + } + } else { + if (err >= 0 && rcall->id != P9_RFLUSH) + P9_DPRINTK(P9_DEBUG_ERROR, + "unexpected response mux %p id %d tag %d\n", + m, rcall->id, rcall->tag); + kfree(rcall); + } + } + + if (!list_empty(&m->req_list)) { + if (test_and_clear_bit(Rpending, &m->wsched)) + n = POLLIN; + else + n = m->trans->poll(m->trans, NULL); + + if (n & POLLIN) { + P9_DPRINTK(P9_DEBUG_MUX, "schedule read work %p\n", m); + queue_work(p9_mux_wq, &m->rq); + } else + clear_bit(Rworksched, &m->wsched); + } else + clear_bit(Rworksched, &m->wsched); + + return; + +error: + p9_conn_cancel(m, err); + clear_bit(Rworksched, &m->wsched); +} + +/** + * p9_send_request - send 9P request + * The function can sleep until the request is scheduled for sending. + * The function can be interrupted. Return from the function is not + * a guarantee that the request is sent successfully. Can return errors + * that can be retrieved by PTR_ERR macros. + * + * @m: mux data + * @tc: request to be sent + * @cb: callback function to call when response is received + * @cba: parameter to pass to the callback function + */ +static struct p9_req *p9_send_request(struct p9_conn *m, + struct p9_fcall *tc, + p9_conn_req_callback cb, void *cba) +{ + int n; + struct p9_req *req; + + P9_DPRINTK(P9_DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current, + tc, tc->id); + if (m->err < 0) + return ERR_PTR(m->err); + + req = kmalloc(sizeof(struct p9_req), GFP_KERNEL); + if (!req) + return ERR_PTR(-ENOMEM); + + if (tc->id == P9_TVERSION) + n = P9_NOTAG; + else + n = p9_mux_get_tag(m); + + if (n < 0) + return ERR_PTR(-ENOMEM); + + p9_set_tag(tc, n); + +#ifdef CONFIG_NET_9P_DEBUG + if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) { + char buf[150]; + + p9_printfcall(buf, sizeof(buf), tc, *m->extended); + printk(KERN_NOTICE "<<< %p %s\n", m, buf); + } +#endif + + spin_lock_init(&req->lock); + req->tag = n; + req->tcall = tc; + req->rcall = NULL; + req->err = 0; + req->cb = cb; + req->cba = cba; + req->flush = None; + + spin_lock(&m->lock); + list_add_tail(&req->req_list, &m->unsent_req_list); + spin_unlock(&m->lock); + + if (test_and_clear_bit(Wpending, &m->wsched)) + n = POLLOUT; + else + n = m->trans->poll(m->trans, NULL); + + if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) + queue_work(p9_mux_wq, &m->wq); + + return req; +} + +static void p9_mux_free_request(struct p9_conn *m, struct p9_req *req) +{ + p9_mux_put_tag(m, req->tag); + kfree(req); +} + +static void p9_mux_flush_cb(struct p9_req *freq, void *a) +{ + p9_conn_req_callback cb; + int tag; + struct p9_conn *m; + struct p9_req *req, *rreq, *rptr; + + m = a; + P9_DPRINTK(P9_DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m, + freq->tcall, freq->rcall, freq->err, + freq->tcall->params.tflush.oldtag); + + spin_lock(&m->lock); + cb = NULL; + tag = freq->tcall->params.tflush.oldtag; + req = NULL; + list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { + if (rreq->tag == tag) { + req = rreq; + list_del(&req->req_list); + break; + } + } + spin_unlock(&m->lock); + + if (req) { + spin_lock(&req->lock); + req->flush = Flushed; + spin_unlock(&req->lock); + + if (req->cb) + (*req->cb) (req, req->cba); + else + kfree(req->rcall); + + wake_up(&m->equeue); + } + + kfree(freq->tcall); + kfree(freq->rcall); + p9_mux_free_request(m, freq); +} + +static int +p9_mux_flush_request(struct p9_conn *m, struct p9_req *req) +{ + struct p9_fcall *fc; + struct p9_req *rreq, *rptr; + + P9_DPRINTK(P9_DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag); + + /* if a response was received for a request, do nothing */ + spin_lock(&req->lock); + if (req->rcall || req->err) { + spin_unlock(&req->lock); + P9_DPRINTK(P9_DEBUG_MUX, + "mux %p req %p response already received\n", m, req); + return 0; + } + + req->flush = Flushing; + spin_unlock(&req->lock); + + spin_lock(&m->lock); + /* if the request is not sent yet, just remove it from the list */ + list_for_each_entry_safe(rreq, rptr, &m->unsent_req_list, req_list) { + if (rreq->tag == req->tag) { + P9_DPRINTK(P9_DEBUG_MUX, + "mux %p req %p request is not sent yet\n", m, req); + list_del(&rreq->req_list); + req->flush = Flushed; + spin_unlock(&m->lock); + if (req->cb) + (*req->cb) (req, req->cba); + return 0; + } + } + spin_unlock(&m->lock); + + clear_thread_flag(TIF_SIGPENDING); + fc = p9_create_tflush(req->tag); + p9_send_request(m, fc, p9_mux_flush_cb, m); + return 1; +} + +static void +p9_conn_rpc_cb(struct p9_req *req, void *a) +{ + struct p9_mux_rpc *r; + + P9_DPRINTK(P9_DEBUG_MUX, "req %p r %p\n", req, a); + r = a; + r->rcall = req->rcall; + r->err = req->err; + + if (req->flush != None && !req->err) + r->err = -ERESTARTSYS; + + wake_up(&r->wqueue); +} + +/** + * p9_mux_rpc - sends 9P request and waits until a response is available. + * The function can be interrupted. + * @m: mux data + * @tc: request to be sent + * @rc: pointer where a pointer to the response is stored + */ +int +p9_conn_rpc(struct p9_conn *m, struct p9_fcall *tc, + struct p9_fcall **rc) +{ + int err, sigpending; + unsigned long flags; + struct p9_req *req; + struct p9_mux_rpc r; + + r.err = 0; + r.tcall = tc; + r.rcall = NULL; + r.m = m; + init_waitqueue_head(&r.wqueue); + + if (rc) + *rc = NULL; + + sigpending = 0; + if (signal_pending(current)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + } + + req = p9_send_request(m, tc, p9_conn_rpc_cb, &r); + if (IS_ERR(req)) { + err = PTR_ERR(req); + P9_DPRINTK(P9_DEBUG_MUX, "error %d\n", err); + return err; + } + + err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0); + if (r.err < 0) + err = r.err; + + if (err == -ERESTARTSYS && m->trans->status == Connected + && m->err == 0) { + if (p9_mux_flush_request(m, req)) { + /* wait until we get response of the flush message */ + do { + clear_thread_flag(TIF_SIGPENDING); + err = wait_event_interruptible(r.wqueue, + r.rcall || r.err); + } while (!r.rcall && !r.err && err == -ERESTARTSYS && + m->trans->status == Connected && !m->err); + + err = -ERESTARTSYS; + } + sigpending = 1; + } + + if (sigpending) { + spin_lock_irqsave(¤t->sighand->siglock, flags); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + } + + if (rc) + *rc = r.rcall; + else + kfree(r.rcall); + + p9_mux_free_request(m, req); + if (err > 0) + err = -EIO; + + return err; +} +EXPORT_SYMBOL(p9_conn_rpc); + +#ifdef P9_NONBLOCK +/** + * p9_conn_rpcnb - sends 9P request without waiting for response. + * @m: mux data + * @tc: request to be sent + * @cb: callback function to be called when response arrives + * @cba: value to pass to the callback function + */ +int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc, + p9_conn_req_callback cb, void *a) +{ + int err; + struct p9_req *req; + + req = p9_send_request(m, tc, cb, a); + if (IS_ERR(req)) { + err = PTR_ERR(req); + P9_DPRINTK(P9_DEBUG_MUX, "error %d\n", err); + return PTR_ERR(req); + } + + P9_DPRINTK(P9_DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag); + return 0; +} +EXPORT_SYMBOL(p9_conn_rpcnb); +#endif /* P9_NONBLOCK */ + +/** + * p9_conn_cancel - cancel all pending requests with error + * @m: mux data + * @err: error code + */ +void p9_conn_cancel(struct p9_conn *m, int err) +{ + struct p9_req *req, *rtmp; + LIST_HEAD(cancel_list); + + P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); + m->err = err; + spin_lock(&m->lock); + list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { + list_move(&req->req_list, &cancel_list); + } + list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { + list_move(&req->req_list, &cancel_list); + } + spin_unlock(&m->lock); + + list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { + list_del(&req->req_list); + if (!req->err) + req->err = err; + + if (req->cb) + (*req->cb) (req, req->cba); + else + kfree(req->rcall); + } + + wake_up(&m->equeue); +} +EXPORT_SYMBOL(p9_conn_cancel); + +static u16 p9_mux_get_tag(struct p9_conn *m) +{ + int tag; + + tag = p9_idpool_get(m->tagpool); + if (tag < 0) + return P9_NOTAG; + else + return (u16) tag; +} + +static void p9_mux_put_tag(struct p9_conn *m, u16 tag) +{ + if (tag != P9_NOTAG && p9_idpool_check(tag, m->tagpool)) + p9_idpool_put(tag, m->tagpool); +} diff --git a/net/9p/sysctl.c b/net/9p/sysctl.c new file mode 100644 index 000000000000..e7fe706ab95a --- /dev/null +++ b/net/9p/sysctl.c @@ -0,0 +1,86 @@ +/* + * net/9p/sysctl.c + * + * 9P sysctl interface + * + * Copyright (C) 2007 by Latchesar Ionkov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include +#include +#include +#include +#include + +enum { + P9_SYSCTL_NET = 487, + P9_SYSCTL_DEBUG = 1, +}; + +static ctl_table p9_table[] = { +#ifdef CONFIG_NET_9P_DEBUG + { + .ctl_name = P9_SYSCTL_DEBUG, + .procname = "debug", + .data = &p9_debug_level, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, +#endif + { .ctl_name = 0 }, +}; + +static ctl_table p9_net_table[] = { + { + .ctl_name = P9_SYSCTL_NET, + .procname = "9p", + .maxlen = 0, + .mode = 0555, + .child = p9_table, + }, + { .ctl_name = 0 }, +}; + +static ctl_table p9_ctl_table[] = { + { + .ctl_name = CTL_NET, + .procname = "net", + .maxlen = 0, + .mode = 0555, + .child = p9_net_table, + }, + { .ctl_name = 0 }, +}; + +static struct ctl_table_header *p9_table_header; + +int __init p9_sysctl_register(void) +{ + p9_table_header = register_sysctl_table(p9_ctl_table); + if (!p9_table_header) + return -ENOMEM; + + return 0; +} + +void __exit p9_sysctl_unregister(void) +{ + unregister_sysctl_table(p9_table_header); +} diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c new file mode 100644 index 000000000000..fd636e94358f --- /dev/null +++ b/net/9p/trans_fd.c @@ -0,0 +1,363 @@ +/* + * linux/fs/9p/trans_fd.c + * + * Fd transport layer. Includes deprecated socket layer. + * + * Copyright (C) 2006 by Russ Cox + * Copyright (C) 2004-2005 by Latchesar Ionkov + * Copyright (C) 2004-2005 by Eric Van Hensbergen + * Copyright (C) 1997-2002 by Ron Minnich + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define P9_PORT 564 + +struct p9_trans_fd { + struct file *rd; + struct file *wr; +}; + +static int p9_socket_open(struct p9_transport *trans, struct socket *csocket); +static int p9_fd_open(struct p9_transport *trans, int rfd, int wfd); +static int p9_fd_read(struct p9_transport *trans, void *v, int len); +static int p9_fd_write(struct p9_transport *trans, void *v, int len); +static unsigned int p9_fd_poll(struct p9_transport *trans, + struct poll_table_struct *pt); +static void p9_fd_close(struct p9_transport *trans); + +struct p9_transport *p9_trans_create_tcp(const char *addr, int port) +{ + int err; + struct p9_transport *trans; + struct socket *csocket; + struct sockaddr_in sin_server; + + csocket = NULL; + trans = kmalloc(sizeof(struct p9_transport), GFP_KERNEL); + if (!trans) + return ERR_PTR(-ENOMEM); + + trans->write = p9_fd_write; + trans->read = p9_fd_read; + trans->close = p9_fd_close; + trans->poll = p9_fd_poll; + + sin_server.sin_family = AF_INET; + sin_server.sin_addr.s_addr = in_aton(addr); + sin_server.sin_port = htons(port); + sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket); + + if (!csocket) { + P9_EPRINTK(KERN_ERR, "p9_trans_tcp: problem creating socket\n"); + err = -EIO; + goto error; + } + + err = csocket->ops->connect(csocket, + (struct sockaddr *)&sin_server, + sizeof(struct sockaddr_in), 0); + if (err < 0) { + P9_EPRINTK(KERN_ERR, + "p9_trans_tcp: problem connecting socket to %s\n", + addr); + goto error; + } + + err = p9_socket_open(trans, csocket); + if (err < 0) + goto error; + + return trans; + +error: + if (csocket) + sock_release(csocket); + + kfree(trans); + return ERR_PTR(err); +} +EXPORT_SYMBOL(p9_trans_create_tcp); + +struct p9_transport *p9_trans_create_unix(const char *addr) +{ + int err; + struct socket *csocket; + struct sockaddr_un sun_server; + struct p9_transport *trans; + + csocket = NULL; + trans = kmalloc(sizeof(struct p9_transport), GFP_KERNEL); + if (!trans) + return ERR_PTR(-ENOMEM); + + trans->write = p9_fd_write; + trans->read = p9_fd_read; + trans->close = p9_fd_close; + trans->poll = p9_fd_poll; + + if (strlen(addr) > UNIX_PATH_MAX) { + P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n", + addr); + err = -ENAMETOOLONG; + goto error; + } + + sun_server.sun_family = PF_UNIX; + strcpy(sun_server.sun_path, addr); + sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket); + err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server, + sizeof(struct sockaddr_un) - 1, 0); + if (err < 0) { + P9_EPRINTK(KERN_ERR, + "p9_trans_unix: problem connecting socket: %s: %d\n", + addr, err); + goto error; + } + + err = p9_socket_open(trans, csocket); + if (err < 0) + goto error; + + return trans; + +error: + if (csocket) + sock_release(csocket); + + kfree(trans); + return ERR_PTR(err); +} +EXPORT_SYMBOL(p9_trans_create_unix); + +struct p9_transport *p9_trans_create_fd(int rfd, int wfd) +{ + int err; + struct p9_transport *trans; + + if (rfd == ~0 || wfd == ~0) { + printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n"); + return ERR_PTR(-ENOPROTOOPT); + } + + trans = kmalloc(sizeof(struct p9_transport), GFP_KERNEL); + if (!trans) + return ERR_PTR(-ENOMEM); + + trans->write = p9_fd_write; + trans->read = p9_fd_read; + trans->close = p9_fd_close; + trans->poll = p9_fd_poll; + + err = p9_fd_open(trans, rfd, wfd); + if (err < 0) + goto error; + + return trans; + +error: + kfree(trans); + return ERR_PTR(err); +} +EXPORT_SYMBOL(p9_trans_create_fd); + +static int p9_socket_open(struct p9_transport *trans, struct socket *csocket) +{ + int fd, ret; + + csocket->sk->sk_allocation = GFP_NOIO; + fd = sock_map_fd(csocket); + if (fd < 0) { + P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to map fd\n"); + return fd; + } + + ret = p9_fd_open(trans, fd, fd); + if (ret < 0) { + P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to open fd\n"); + sockfd_put(csocket); + return ret; + } + + ((struct p9_trans_fd *)trans->priv)->rd->f_flags |= O_NONBLOCK; + + return 0; +} + +static int p9_fd_open(struct p9_transport *trans, int rfd, int wfd) +{ + struct p9_trans_fd *ts = kmalloc(sizeof(struct p9_trans_fd), + GFP_KERNEL); + if (!ts) + return -ENOMEM; + + ts->rd = fget(rfd); + ts->wr = fget(wfd); + if (!ts->rd || !ts->wr) { + if (ts->rd) + fput(ts->rd); + if (ts->wr) + fput(ts->wr); + kfree(ts); + return -EIO; + } + + trans->priv = ts; + trans->status = Connected; + + return 0; +} + +/** + * p9_fd_read- read from a fd + * @v9ses: session information + * @v: buffer to receive data into + * @len: size of receive buffer + * + */ +static int p9_fd_read(struct p9_transport *trans, void *v, int len) +{ + int ret; + struct p9_trans_fd *ts = NULL; + + if (trans && trans->status != Disconnected) + ts = trans->priv; + + if (!ts) + return -EREMOTEIO; + + if (!(ts->rd->f_flags & O_NONBLOCK)) + P9_DPRINTK(P9_DEBUG_ERROR, "blocking read ...\n"); + + ret = kernel_read(ts->rd, ts->rd->f_pos, v, len); + if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) + trans->status = Disconnected; + return ret; +} + +/** + * p9_fd_write - write to a socket + * @v9ses: session information + * @v: buffer to send data from + * @len: size of send buffer + * + */ +static int p9_fd_write(struct p9_transport *trans, void *v, int len) +{ + int ret; + mm_segment_t oldfs; + struct p9_trans_fd *ts = NULL; + + if (trans && trans->status != Disconnected) + ts = trans->priv; + + if (!ts) + return -EREMOTEIO; + + if (!(ts->wr->f_flags & O_NONBLOCK)) + P9_DPRINTK(P9_DEBUG_ERROR, "blocking write ...\n"); + + oldfs = get_fs(); + set_fs(get_ds()); + /* The cast to a user pointer is valid due to the set_fs() */ + ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos); + set_fs(oldfs); + + if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) + trans->status = Disconnected; + return ret; +} + +static unsigned int +p9_fd_poll(struct p9_transport *trans, struct poll_table_struct *pt) +{ + int ret, n; + struct p9_trans_fd *ts = NULL; + mm_segment_t oldfs; + + if (trans && trans->status == Connected) + ts = trans->priv; + + if (!ts) + return -EREMOTEIO; + + if (!ts->rd->f_op || !ts->rd->f_op->poll) + return -EIO; + + if (!ts->wr->f_op || !ts->wr->f_op->poll) + return -EIO; + + oldfs = get_fs(); + set_fs(get_ds()); + + ret = ts->rd->f_op->poll(ts->rd, pt); + if (ret < 0) + goto end; + + if (ts->rd != ts->wr) { + n = ts->wr->f_op->poll(ts->wr, pt); + if (n < 0) { + ret = n; + goto end; + } + ret = (ret & ~POLLOUT) | (n & ~POLLIN); + } + +end: + set_fs(oldfs); + return ret; +} + +/** + * p9_sock_close - shutdown socket + * @trans: private socket structure + * + */ +static void p9_fd_close(struct p9_transport *trans) +{ + struct p9_trans_fd *ts; + + if (!trans) + return; + + ts = xchg(&trans->priv, NULL); + + if (!ts) + return; + + trans->status = Disconnected; + if (ts->rd) + fput(ts->rd); + if (ts->wr) + fput(ts->wr); + kfree(ts); +} + diff --git a/net/9p/util.c b/net/9p/util.c new file mode 100644 index 000000000000..22077b79395d --- /dev/null +++ b/net/9p/util.c @@ -0,0 +1,125 @@ +/* + * net/9p/util.c + * + * This file contains some helper functions + * + * Copyright (C) 2007 by Latchesar Ionkov + * Copyright (C) 2004 by Eric Van Hensbergen + * Copyright (C) 2002 by Ron Minnich + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include + +struct p9_idpool { + struct semaphore lock; + struct idr pool; +}; + +struct p9_idpool *p9_idpool_create(void) +{ + struct p9_idpool *p; + + p = kmalloc(sizeof(struct p9_idpool), GFP_KERNEL); + if (!p) + return ERR_PTR(-ENOMEM); + + init_MUTEX(&p->lock); + idr_init(&p->pool); + + return p; +} +EXPORT_SYMBOL(p9_idpool_create); + +void p9_idpool_destroy(struct p9_idpool *p) +{ + idr_destroy(&p->pool); + kfree(p); +} +EXPORT_SYMBOL(p9_idpool_destroy); + +/** + * p9_idpool_get - allocate numeric id from pool + * @p - pool to allocate from + * + * XXX - This seems to be an awful generic function, should it be in idr.c with + * the lock included in struct idr? + */ + +int p9_idpool_get(struct p9_idpool *p) +{ + int i = 0; + int error; + +retry: + if (idr_pre_get(&p->pool, GFP_KERNEL) == 0) + return 0; + + if (down_interruptible(&p->lock) == -EINTR) { + P9_EPRINTK(KERN_WARNING, "Interrupted while locking\n"); + return -1; + } + + /* no need to store exactly p, we just need something non-null */ + error = idr_get_new(&p->pool, p, &i); + up(&p->lock); + + if (error == -EAGAIN) + goto retry; + else if (error) + return -1; + + return i; +} +EXPORT_SYMBOL(p9_idpool_get); + +/** + * p9_idpool_put - release numeric id from pool + * @p - pool to allocate from + * + * XXX - This seems to be an awful generic function, should it be in idr.c with + * the lock included in struct idr? + */ + +void p9_idpool_put(int id, struct p9_idpool *p) +{ + if (down_interruptible(&p->lock) == -EINTR) { + P9_EPRINTK(KERN_WARNING, "Interrupted while locking\n"); + return; + } + idr_remove(&p->pool, id); + up(&p->lock); +} +EXPORT_SYMBOL(p9_idpool_put); + +/** + * p9_idpool_check - check if the specified id is available + * @id - id to check + * @p - pool + */ +int p9_idpool_check(int id, struct p9_idpool *p) +{ + return idr_find(&p->pool, id) != NULL; +} +EXPORT_SYMBOL(p9_idpool_check); diff --git a/net/Kconfig b/net/Kconfig index f3de72978ab6..cdba08ca2efe 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -227,6 +227,7 @@ source "net/ieee80211/Kconfig" endmenu source "net/rfkill/Kconfig" +source "net/9p/Kconfig" endif # if NET endmenu # Networking diff --git a/net/Makefile b/net/Makefile index a87a88963432..bbe7d2a41486 100644 --- a/net/Makefile +++ b/net/Makefile @@ -51,6 +51,7 @@ obj-$(CONFIG_TIPC) += tipc/ obj-$(CONFIG_NETLABEL) += netlabel/ obj-$(CONFIG_IUCV) += iucv/ obj-$(CONFIG_RFKILL) += rfkill/ +obj-$(CONFIG_NET_9P) += 9p/ ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o -- cgit v1.2.3 From e46662be7fddde3464bf208317542c2f8df13d0b Mon Sep 17 00:00:00 2001 From: Latchesar Ionkov Date: Wed, 11 Jul 2007 15:13:54 -0600 Subject: net/9p: change net/9p module name to 9pnet Change module name of net/9p module from 9p.ko to 9pnet.ko. fs/9p module already uses 9p.ko name. Signed-off-by: Latchesar Ionkov --- net/9p/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/9p/Makefile b/net/9p/Makefile index ac46cb91900d..85b3a7838acf 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile @@ -1,6 +1,6 @@ -obj-$(CONFIG_NET_9P) := 9p.o +obj-$(CONFIG_NET_9P) := 9pnet.o -9p-objs := \ +9pnet-objs := \ mod.o \ trans_fd.o \ mux.o \ @@ -10,4 +10,4 @@ obj-$(CONFIG_NET_9P) := 9p.o fcprint.o \ util.o \ -9p-$(CONFIG_SYSCTL) += sysctl.o +9pnet-$(CONFIG_SYSCTL) += sysctl.o -- cgit v1.2.3 From 1d6b5602381524c339af2c2fdfe42ad0a01464a4 Mon Sep 17 00:00:00 2001 From: Latchesar Ionkov Date: Wed, 11 Jul 2007 15:14:46 -0600 Subject: net/9p: set error to EREMOTEIO if trans->write returns zero If trans->write returns 0, p9_write_work goes through the error path, but sets the error code to zero. This patch sets the error code to EREMOTEIO if trans->write returns zero value. Signed-off-by: Latchesar Ionkov --- net/9p/mux.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/9p/mux.c b/net/9p/mux.c index c3aa87bc8b97..acb038810f39 100644 --- a/net/9p/mux.c +++ b/net/9p/mux.c @@ -505,8 +505,12 @@ again: return; } - if (err <= 0) + if (err < 0) + goto error; + else if (err == 0) { + err = -EREMOTEIO; goto error; + } m->wpos += err; if (m->wpos == m->wsize) -- cgit v1.2.3 From 0af8887ebf4556a76680a61b0bb156d934702c63 Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Fri, 13 Jul 2007 16:47:58 -0500 Subject: 9p: fix a race condition bug in umount which caused a segfault umounting partitions after heavy activity would sometimes trigger a segmentation violation. This fix appears to remove that problem. Fix originally provided by Latchesar Ionkov. Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index bb2b8a3af196..cb170750337c 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -127,12 +127,12 @@ void p9_client_destroy(struct p9_client *clnt) clnt->trans = NULL; } - if (clnt->fidpool) - p9_idpool_destroy(clnt->fidpool); - list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist) p9_fid_destroy(fid); + if (clnt->fidpool) + p9_idpool_destroy(clnt->fidpool); + kfree(clnt); } EXPORT_SYMBOL(p9_client_destroy); -- cgit v1.2.3 From e6c9116d1dc984cb7ecf1b0fe26ca4a8ab36bb57 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 14 Jul 2007 18:50:15 -0700 Subject: [RFKILL]: fix net/rfkill/rfkill-input.c bug on 64-bit systems Subject: [patch] net/input: fix net/rfkill/rfkill-input.c bug on 64-bit systems this recent commit: commit cf4328cd949c2086091c62c5685f1580fe9b55e4 Author: Ivo van Doorn Date: Mon May 7 00:34:20 2007 -0700 [NET]: rfkill: add support for input key to control wireless radio added this 64-bit bug: .... unsigned int flags; spin_lock_irqsave(&task->lock, flags); .... irq 'flags' must be unsigned long, not unsigned int. The -rt tree has strict checks about this on 64-bit so this triggered a build failure. Signed-off-by: Ingo Molnar Signed-off-by: David S. Miller --- net/rfkill/rfkill-input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c index e5c840c30284..230e35c59786 100644 --- a/net/rfkill/rfkill-input.c +++ b/net/rfkill/rfkill-input.c @@ -55,7 +55,7 @@ static void rfkill_task_handler(struct work_struct *work) static void rfkill_schedule_toggle(struct rfkill_task *task) { - unsigned int flags; + unsigned long flags; spin_lock_irqsave(&task->lock, flags); -- cgit v1.2.3 From 24023451c8df726692e2f52288a20870d13b501f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 18:51:31 -0700 Subject: [NET]: Add net_device change_rx_mode callback Currently the set_multicast_list (and set_rx_mode) callbacks are responsible for configuring the device according to the IFF_PROMISC, IFF_MULTICAST and IFF_ALLMULTI flags and the mc_list (and uc_list in case of set_rx_mode). These callbacks can be invoked from BH context without the rtnl_mutex by dev_mc_add/dev_mc_delete, which makes reading the device flags and promiscous/allmulti count racy. For real hardware drivers that just commit all changes to the hardware this is not a real problem since the stack guarantees to call them for every change, so at least the final call will not race and commit the correct configuration to the hardware. For software devices that want to synchronize promiscous and multicast state to an underlying device however this can cause corruption of the underlying device's flags or promisc/allmulti counts. When the software device is concurrently put in promiscous or allmulti mode while set_multicast_list is invoked from bottem half context, the device might synchronize the change to the underlying device without holding the rtnl_mutex, which races with concurrent changes to the underlying device. Add a dev->change_rx_flags hook that is invoked when any of the flags that affect rx filtering change (under the rtnl_mutex), which allows drivers to perform synchronization immediately and only synchronize the address lists in set_multicast_list/set_rx_mode. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ net/core/dev.c | 17 ++++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 79cc3dab4be7..f193aba30384 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -516,6 +516,9 @@ struct net_device void *saddr, unsigned len); int (*rebuild_header)(struct sk_buff *skb); +#define HAVE_CHANGE_RX_FLAGS + void (*change_rx_flags)(struct net_device *dev, + int flags); #define HAVE_SET_RX_MODE void (*set_rx_mode)(struct net_device *dev); #define HAVE_MULTICAST diff --git a/net/core/dev.c b/net/core/dev.c index 96443055324e..59ec811d2b54 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2521,6 +2521,8 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; + ASSERT_RTNL(); + if ((dev->promiscuity += inc) == 0) dev->flags &= ~IFF_PROMISC; else @@ -2535,6 +2537,9 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc) dev->name, (dev->flags & IFF_PROMISC), (old_flags & IFF_PROMISC), audit_get_loginuid(current->audit_context)); + + if (dev->change_rx_flags) + dev->change_rx_flags(dev, IFF_PROMISC); } } @@ -2573,11 +2578,16 @@ void dev_set_allmulti(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; + ASSERT_RTNL(); + dev->flags |= IFF_ALLMULTI; if ((dev->allmulti += inc) == 0) dev->flags &= ~IFF_ALLMULTI; - if (dev->flags ^ old_flags) + if (dev->flags ^ old_flags) { + if (dev->change_rx_flags) + dev->change_rx_flags(dev, IFF_ALLMULTI); dev_set_rx_mode(dev); + } } /* @@ -2778,6 +2788,8 @@ int dev_change_flags(struct net_device *dev, unsigned flags) int ret, changes; int old_flags = dev->flags; + ASSERT_RTNL(); + /* * Set the flags on our device. */ @@ -2792,6 +2804,9 @@ int dev_change_flags(struct net_device *dev, unsigned flags) * Load in the correct multicast list now the flags have changed. */ + if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST) + dev->change_rx_flags(dev, IFF_MULTICAST); + dev_set_rx_mode(dev); /* -- cgit v1.2.3 From a0a400d79e3dd7843e7e81baa3ef2957bdc292d0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 18:52:02 -0700 Subject: [NET]: dev_mcast: add multicast list synchronization helpers The method drivers currently use to synchronize multicast lists is not very pretty: - walk the multicast list - search each entry on a copy of the previous list - if new add to lower device - walk the copy of the previous list - search each entry on the current list - if removed delete from lower device - copy entire list This patch adds a new field to struct dev_addr_list to store the synchronization state and adds two helper functions for synchronization and cleanup. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++ net/core/dev_mcast.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f193aba30384..e5af458ab04b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -190,6 +190,7 @@ struct dev_addr_list struct dev_addr_list *next; u8 da_addr[MAX_ADDR_LEN]; u8 da_addrlen; + u8 da_synced; int da_users; int da_gusers; }; @@ -1103,6 +1104,8 @@ extern int dev_unicast_delete(struct net_device *dev, void *addr, int alen); extern int dev_unicast_add(struct net_device *dev, void *addr, int alen); extern int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all); extern int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly); +extern int dev_mc_sync(struct net_device *to, struct net_device *from); +extern void dev_mc_unsync(struct net_device *to, struct net_device *from); extern void dev_mc_discard(struct net_device *dev); extern int __dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int all); extern int __dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly); diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index aa38100601fb..235a2a8a0d05 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -102,6 +102,81 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) return err; } +/** + * dev_mc_sync - Synchronize device's multicast list to another device + * @to: destination device + * @from: source device + * + * Add newly added addresses to the destination device and release + * addresses that have no users left. The source device must be + * locked by netif_tx_lock_bh. + * + * This function is intended to be called from the dev->set_multicast_list + * function of layered software devices. + */ +int dev_mc_sync(struct net_device *to, struct net_device *from) +{ + struct dev_addr_list *da; + int err = 0; + + netif_tx_lock_bh(to); + for (da = from->mc_list; da != NULL; da = da->next) { + if (!da->da_synced) { + err = __dev_addr_add(&to->mc_list, &to->mc_count, + da->da_addr, da->da_addrlen, 0); + if (err < 0) + break; + da->da_synced = 1; + da->da_users++; + } else if (da->da_users == 1) { + __dev_addr_delete(&to->mc_list, &to->mc_count, + da->da_addr, da->da_addrlen, 0); + __dev_addr_delete(&from->mc_list, &from->mc_count, + da->da_addr, da->da_addrlen, 0); + } + } + if (!err) + __dev_set_rx_mode(to); + netif_tx_unlock_bh(to); + + return err; +} +EXPORT_SYMBOL(dev_mc_sync); + + +/** + * dev_mc_unsync - Remove synchronized addresses from the destination + * device + * @to: destination device + * @from: source device + * + * Remove all addresses that were added to the destination device by + * dev_mc_sync(). This function is intended to be called from the + * dev->stop function of layered software devices. + */ +void dev_mc_unsync(struct net_device *to, struct net_device *from) +{ + struct dev_addr_list *da; + + netif_tx_lock_bh(from); + netif_tx_lock_bh(to); + + for (da = from->mc_list; da != NULL; da = da->next) { + if (!da->da_synced) + continue; + __dev_addr_delete(&to->mc_list, &to->mc_count, + da->da_addr, da->da_addrlen, 0); + da->da_synced = 0; + __dev_addr_delete(&from->mc_list, &from->mc_count, + da->da_addr, da->da_addrlen, 0); + } + __dev_set_rx_mode(to); + + netif_tx_unlock_bh(to); + netif_tx_unlock_bh(from); +} +EXPORT_SYMBOL(dev_mc_unsync); + /* * Discard multicast list when a device is downed */ -- cgit v1.2.3 From 6c78dcbd47a68a7d25d2bee7a6c74b9136cb5fde Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 18:52:56 -0700 Subject: [VLAN]: Fix promiscous/allmulti synchronization races The set_multicast_list function may be called without holding the rtnl mutex, resulting in races when changing the underlying device's promiscous and allmulti state. Use the change_rx_mode hook, which is always invoked under the rtnl. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 2 -- net/8021q/vlan.c | 1 + net/8021q/vlan.h | 1 + net/8021q/vlan_dev.c | 38 ++++++++++++++++++++------------------ 4 files changed, 22 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 61a57dc2ac99..7f71df4c952f 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -132,8 +132,6 @@ struct vlan_dev_info { * made, in order to feed the right changes down * to the real hardware... */ - int old_allmulti; /* similar to above. */ - int old_promiscuity; /* similar to above. */ struct net_device *real_dev; /* the underlying device/interface */ unsigned char real_dev_addr[ETH_ALEN]; struct proc_dir_entry *dent; /* Holds the proc data */ diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index abb9900edb3f..39bdcc25c150 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -373,6 +373,7 @@ void vlan_setup(struct net_device *new_dev) new_dev->open = vlan_dev_open; new_dev->stop = vlan_dev_stop; new_dev->set_multicast_list = vlan_dev_set_multicast_list; + new_dev->change_rx_flags = vlan_change_rx_flags; new_dev->destructor = free_netdev; new_dev->do_ioctl = vlan_dev_ioctl; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 62ce1c519aab..7df5b2935579 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -69,6 +69,7 @@ int vlan_dev_set_vlan_flag(const struct net_device *dev, u32 flag, short flag_val); void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result); +void vlan_change_rx_flags(struct net_device *dev, int change); void vlan_dev_set_multicast_list(struct net_device *vlan_dev); int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index d4a62d1b52b4..dec7e62b2e10 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -712,6 +712,11 @@ int vlan_dev_open(struct net_device *dev) } memcpy(vlan->real_dev_addr, real_dev->dev_addr, ETH_ALEN); + if (dev->flags & IFF_ALLMULTI) + dev_set_allmulti(real_dev, 1); + if (dev->flags & IFF_PROMISC) + dev_set_promiscuity(real_dev, 1); + return 0; } @@ -721,6 +726,11 @@ int vlan_dev_stop(struct net_device *dev) vlan_flush_mc_list(dev); + if (dev->flags & IFF_ALLMULTI) + dev_set_allmulti(real_dev, -1); + if (dev->flags & IFF_PROMISC) + dev_set_promiscuity(real_dev, -1); + if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) dev_unicast_delete(real_dev, dev->dev_addr, dev->addr_len); @@ -754,34 +764,26 @@ int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return err; } +void vlan_change_rx_flags(struct net_device *dev, int change) +{ + struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; + + if (change & IFF_ALLMULTI) + dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1); + if (change & IFF_PROMISC) + dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1); +} + /** Taken from Gleb + Lennert's VLAN code, and modified... */ void vlan_dev_set_multicast_list(struct net_device *vlan_dev) { struct dev_mc_list *dmi; struct net_device *real_dev; - int inc; if (vlan_dev && (vlan_dev->priv_flags & IFF_802_1Q_VLAN)) { /* Then it's a real vlan device, as far as we can tell.. */ real_dev = VLAN_DEV_INFO(vlan_dev)->real_dev; - /* compare the current promiscuity to the last promisc we had.. */ - inc = vlan_dev->promiscuity - VLAN_DEV_INFO(vlan_dev)->old_promiscuity; - if (inc) { - printk(KERN_INFO "%s: dev_set_promiscuity(master, %d)\n", - vlan_dev->name, inc); - dev_set_promiscuity(real_dev, inc); /* found in dev.c */ - VLAN_DEV_INFO(vlan_dev)->old_promiscuity = vlan_dev->promiscuity; - } - - inc = vlan_dev->allmulti - VLAN_DEV_INFO(vlan_dev)->old_allmulti; - if (inc) { - printk(KERN_INFO "%s: dev_set_allmulti(master, %d)\n", - vlan_dev->name, inc); - dev_set_allmulti(real_dev, inc); /* dev.c */ - VLAN_DEV_INFO(vlan_dev)->old_allmulti = vlan_dev->allmulti; - } - /* looking for addresses to add to master's list */ for (dmi = vlan_dev->mc_list; dmi != NULL; dmi = dmi->next) { if (vlan_should_add_mc(dmi, VLAN_DEV_INFO(vlan_dev)->old_mc_list)) { -- cgit v1.2.3 From 56addd6eeeb4e11f5a0af7093ca078e0f29140e0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 18:53:28 -0700 Subject: [VLAN]: Use multicast list synchronization helpers Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 5 -- net/8021q/vlan_dev.c | 131 +----------------------------------------------- 2 files changed, 2 insertions(+), 134 deletions(-) (limited to 'net') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 7f71df4c952f..f8443fdb124a 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -127,11 +127,6 @@ struct vlan_dev_info { * like DHCP that use packet-filtering and don't understand * 802.1Q */ - struct dev_mc_list *old_mc_list; /* old multi-cast list for the VLAN interface.. - * we save this so we can tell what changes were - * made, in order to feed the right changes down - * to the real hardware... - */ struct net_device *real_dev; /* the underlying device/interface */ unsigned char real_dev_addr[ETH_ALEN]; struct proc_dir_entry *dent; /* Holds the proc data */ diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index dec7e62b2e10..4d2aa4dd42ac 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -612,90 +612,6 @@ void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result) *result = VLAN_DEV_INFO(dev)->vlan_id; } -static inline int vlan_dmi_equals(struct dev_mc_list *dmi1, - struct dev_mc_list *dmi2) -{ - return ((dmi1->dmi_addrlen == dmi2->dmi_addrlen) && - (memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0)); -} - -/** dmi is a single entry into a dev_mc_list, a single node. mc_list is - * an entire list, and we'll iterate through it. - */ -static int vlan_should_add_mc(struct dev_mc_list *dmi, struct dev_mc_list *mc_list) -{ - struct dev_mc_list *idmi; - - for (idmi = mc_list; idmi != NULL; ) { - if (vlan_dmi_equals(dmi, idmi)) { - if (dmi->dmi_users > idmi->dmi_users) - return 1; - else - return 0; - } else { - idmi = idmi->next; - } - } - - return 1; -} - -static inline void vlan_destroy_mc_list(struct dev_mc_list *mc_list) -{ - struct dev_mc_list *dmi = mc_list; - struct dev_mc_list *next; - - while(dmi) { - next = dmi->next; - kfree(dmi); - dmi = next; - } -} - -static void vlan_copy_mc_list(struct dev_mc_list *mc_list, struct vlan_dev_info *vlan_info) -{ - struct dev_mc_list *dmi, *new_dmi; - - vlan_destroy_mc_list(vlan_info->old_mc_list); - vlan_info->old_mc_list = NULL; - - for (dmi = mc_list; dmi != NULL; dmi = dmi->next) { - new_dmi = kmalloc(sizeof(*new_dmi), GFP_ATOMIC); - if (new_dmi == NULL) { - printk(KERN_ERR "vlan: cannot allocate memory. " - "Multicast may not work properly from now.\n"); - return; - } - - /* Copy whole structure, then make new 'next' pointer */ - *new_dmi = *dmi; - new_dmi->next = vlan_info->old_mc_list; - vlan_info->old_mc_list = new_dmi; - } -} - -static void vlan_flush_mc_list(struct net_device *dev) -{ - struct dev_mc_list *dmi = dev->mc_list; - - while (dmi) { - printk(KERN_DEBUG "%s: del %.2x:%.2x:%.2x:%.2x:%.2x:%.2x mcast address from vlan interface\n", - dev->name, - dmi->dmi_addr[0], - dmi->dmi_addr[1], - dmi->dmi_addr[2], - dmi->dmi_addr[3], - dmi->dmi_addr[4], - dmi->dmi_addr[5]); - dev_mc_delete(dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); - dmi = dev->mc_list; - } - - /* dev->mc_list is NULL by the time we get here. */ - vlan_destroy_mc_list(VLAN_DEV_INFO(dev)->old_mc_list); - VLAN_DEV_INFO(dev)->old_mc_list = NULL; -} - int vlan_dev_open(struct net_device *dev) { struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); @@ -724,8 +640,7 @@ int vlan_dev_stop(struct net_device *dev) { struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; - vlan_flush_mc_list(dev); - + dev_mc_unsync(real_dev, dev); if (dev->flags & IFF_ALLMULTI) dev_set_allmulti(real_dev, -1); if (dev->flags & IFF_PROMISC) @@ -777,47 +692,5 @@ void vlan_change_rx_flags(struct net_device *dev, int change) /** Taken from Gleb + Lennert's VLAN code, and modified... */ void vlan_dev_set_multicast_list(struct net_device *vlan_dev) { - struct dev_mc_list *dmi; - struct net_device *real_dev; - - if (vlan_dev && (vlan_dev->priv_flags & IFF_802_1Q_VLAN)) { - /* Then it's a real vlan device, as far as we can tell.. */ - real_dev = VLAN_DEV_INFO(vlan_dev)->real_dev; - - /* looking for addresses to add to master's list */ - for (dmi = vlan_dev->mc_list; dmi != NULL; dmi = dmi->next) { - if (vlan_should_add_mc(dmi, VLAN_DEV_INFO(vlan_dev)->old_mc_list)) { - dev_mc_add(real_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); - printk(KERN_DEBUG "%s: add %.2x:%.2x:%.2x:%.2x:%.2x:%.2x mcast address to master interface\n", - vlan_dev->name, - dmi->dmi_addr[0], - dmi->dmi_addr[1], - dmi->dmi_addr[2], - dmi->dmi_addr[3], - dmi->dmi_addr[4], - dmi->dmi_addr[5]); - } - } - - /* looking for addresses to delete from master's list */ - for (dmi = VLAN_DEV_INFO(vlan_dev)->old_mc_list; dmi != NULL; dmi = dmi->next) { - if (vlan_should_add_mc(dmi, vlan_dev->mc_list)) { - /* if we think we should add it to the new list, then we should really - * delete it from the real list on the underlying device. - */ - dev_mc_delete(real_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); - printk(KERN_DEBUG "%s: del %.2x:%.2x:%.2x:%.2x:%.2x:%.2x mcast address from master interface\n", - vlan_dev->name, - dmi->dmi_addr[0], - dmi->dmi_addr[1], - dmi->dmi_addr[2], - dmi->dmi_addr[3], - dmi->dmi_addr[4], - dmi->dmi_addr[5]); - } - } - - /* save multicast list */ - vlan_copy_mc_list(vlan_dev->mc_list, VLAN_DEV_INFO(vlan_dev)); - } + dev_mc_sync(VLAN_DEV_INFO(vlan_dev)->real_dev, vlan_dev); } -- cgit v1.2.3 From b863ceb7ddcea8c55fcf1d7b2ac591d50aa7ed53 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 18:55:06 -0700 Subject: [NET]: Add macvlan driver Add macvlan driver, which allows to create virtual ethernet devices based on MAC address. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- MAINTAINERS | 6 + drivers/net/Kconfig | 10 + drivers/net/Makefile | 1 + drivers/net/macvlan.c | 496 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/if_macvlan.h | 9 + include/linux/netdevice.h | 2 + net/core/dev.c | 26 +++ 7 files changed, 550 insertions(+) create mode 100644 drivers/net/macvlan.c create mode 100644 include/linux/if_macvlan.h (limited to 'net') diff --git a/MAINTAINERS b/MAINTAINERS index 845fbf4478b3..360eb581953b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2330,6 +2330,12 @@ W: http://linuxwireless.org/ T: git kernel.org:/pub/scm/linux/kernel/git/jbenc/mac80211.git S: Maintained +MACVLAN DRIVER +P: Patrick McHardy +M: kaber@trash.net +L: netdev@vger.kernel.org +S: Maintained + MARVELL YUKON / SYSKONNECT DRIVER P: Mirko Lindner M: mlindner@syskonnect.de diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index ba314adf68b8..d17d64eb7065 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -82,6 +82,16 @@ config BONDING To compile this driver as a module, choose M here: the module will be called bonding. +config MACVLAN + tristate "MAC-VLAN support (EXPERIMENTAL)" + depends on EXPERIMENTAL + ---help--- + This allows one to create virtual interfaces that map packets to + or from specific MAC addresses to a particular interface. + + To compile this driver as a module, choose M here: the module + will be called macvlan. + config EQUALIZER tristate "EQL (serial line load balancing) support" ---help--- diff --git a/drivers/net/Makefile b/drivers/net/Makefile index a2241e6e1457..c26b8674213c 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -128,6 +128,7 @@ obj-$(CONFIG_SLHC) += slhc.o obj-$(CONFIG_DUMMY) += dummy.o obj-$(CONFIG_IFB) += ifb.o +obj-$(CONFIG_MACVLAN) += macvlan.o obj-$(CONFIG_DE600) += de600.o obj-$(CONFIG_DE620) += de620.o obj-$(CONFIG_LANCE) += lance.o diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c new file mode 100644 index 000000000000..dc74d006e01f --- /dev/null +++ b/drivers/net/macvlan.c @@ -0,0 +1,496 @@ +/* + * Copyright (c) 2007 Patrick McHardy + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * The code this is based on carried the following copyright notice: + * --- + * (C) Copyright 2001-2006 + * Alex Zeffertt, Cambridge Broadband Ltd, ajz@cambridgebroadband.com + * Re-worked by Ben Greear + * --- + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MACVLAN_HASH_SIZE (1 << BITS_PER_BYTE) + +struct macvlan_port { + struct net_device *dev; + struct hlist_head vlan_hash[MACVLAN_HASH_SIZE]; + struct list_head vlans; +}; + +struct macvlan_dev { + struct net_device *dev; + struct list_head list; + struct hlist_node hlist; + struct macvlan_port *port; + struct net_device *lowerdev; +}; + + +static struct macvlan_dev *macvlan_hash_lookup(const struct macvlan_port *port, + const unsigned char *addr) +{ + struct macvlan_dev *vlan; + struct hlist_node *n; + + hlist_for_each_entry_rcu(vlan, n, &port->vlan_hash[addr[5]], hlist) { + if (!compare_ether_addr(vlan->dev->dev_addr, addr)) + return vlan; + } + return NULL; +} + +static void macvlan_broadcast(struct sk_buff *skb, + const struct macvlan_port *port) +{ + const struct ethhdr *eth = eth_hdr(skb); + const struct macvlan_dev *vlan; + struct hlist_node *n; + struct net_device *dev; + struct sk_buff *nskb; + unsigned int i; + + for (i = 0; i < MACVLAN_HASH_SIZE; i++) { + hlist_for_each_entry_rcu(vlan, n, &port->vlan_hash[i], hlist) { + dev = vlan->dev; + if (unlikely(!(dev->flags & IFF_UP))) + continue; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (nskb == NULL) { + dev->stats.rx_errors++; + dev->stats.rx_dropped++; + continue; + } + + dev->stats.rx_bytes += skb->len + ETH_HLEN; + dev->stats.rx_packets++; + dev->stats.multicast++; + dev->last_rx = jiffies; + + nskb->dev = dev; + if (!compare_ether_addr(eth->h_dest, dev->broadcast)) + nskb->pkt_type = PACKET_BROADCAST; + else + nskb->pkt_type = PACKET_MULTICAST; + + netif_rx(nskb); + } + } +} + +/* called under rcu_read_lock() from netif_receive_skb */ +static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb) +{ + const struct ethhdr *eth = eth_hdr(skb); + const struct macvlan_port *port; + const struct macvlan_dev *vlan; + struct net_device *dev; + + port = rcu_dereference(skb->dev->macvlan_port); + if (port == NULL) + return skb; + + if (is_multicast_ether_addr(eth->h_dest)) { + macvlan_broadcast(skb, port); + return skb; + } + + vlan = macvlan_hash_lookup(port, eth->h_dest); + if (vlan == NULL) + return skb; + + dev = vlan->dev; + if (unlikely(!(dev->flags & IFF_UP))) { + kfree_skb(skb); + return NULL; + } + + skb = skb_share_check(skb, GFP_ATOMIC); + if (skb == NULL) { + dev->stats.rx_errors++; + dev->stats.rx_dropped++; + return NULL; + } + + dev->stats.rx_bytes += skb->len + ETH_HLEN; + dev->stats.rx_packets++; + dev->last_rx = jiffies; + + skb->dev = dev; + skb->pkt_type = PACKET_HOST; + + netif_rx(skb); + return NULL; +} + +static int macvlan_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + const struct macvlan_dev *vlan = netdev_priv(dev); + unsigned int len = skb->len; + int ret; + + skb->dev = vlan->lowerdev; + ret = dev_queue_xmit(skb); + + if (likely(ret == NET_XMIT_SUCCESS)) { + dev->stats.tx_packets++; + dev->stats.tx_bytes += len; + } else { + dev->stats.tx_errors++; + dev->stats.tx_aborted_errors++; + } + return NETDEV_TX_OK; +} + +static int macvlan_hard_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, void *daddr, void *saddr, + unsigned len) +{ + const struct macvlan_dev *vlan = netdev_priv(dev); + struct net_device *lowerdev = vlan->lowerdev; + + return lowerdev->hard_header(skb, lowerdev, type, daddr, + saddr ? : dev->dev_addr, len); +} + +static int macvlan_open(struct net_device *dev) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + struct macvlan_port *port = vlan->port; + struct net_device *lowerdev = vlan->lowerdev; + int err; + + err = dev_unicast_add(lowerdev, dev->dev_addr, ETH_ALEN); + if (err < 0) + return err; + if (dev->flags & IFF_ALLMULTI) + dev_set_allmulti(lowerdev, 1); + + hlist_add_head_rcu(&vlan->hlist, &port->vlan_hash[dev->dev_addr[5]]); + return 0; +} + +static int macvlan_stop(struct net_device *dev) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + struct net_device *lowerdev = vlan->lowerdev; + + dev_mc_unsync(lowerdev, dev); + if (dev->flags & IFF_ALLMULTI) + dev_set_allmulti(lowerdev, -1); + + dev_unicast_delete(lowerdev, dev->dev_addr, ETH_ALEN); + + hlist_del_rcu(&vlan->hlist); + synchronize_rcu(); + return 0; +} + +static void macvlan_change_rx_flags(struct net_device *dev, int change) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + struct net_device *lowerdev = vlan->lowerdev; + + if (change & IFF_ALLMULTI) + dev_set_allmulti(lowerdev, dev->flags & IFF_ALLMULTI ? 1 : -1); +} + +static void macvlan_set_multicast_list(struct net_device *dev) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + + dev_mc_sync(vlan->lowerdev, dev); +} + +static int macvlan_change_mtu(struct net_device *dev, int new_mtu) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + + if (new_mtu < 68 || vlan->lowerdev->mtu < new_mtu) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + +/* + * macvlan network devices have devices nesting below it and are a special + * "super class" of normal network devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key macvlan_netdev_xmit_lock_key; + +#define MACVLAN_FEATURES \ + (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ + NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \ + NETIF_F_TSO_ECN | NETIF_F_TSO6) + +#define MACVLAN_STATE_MASK \ + ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT)) + +static int macvlan_init(struct net_device *dev) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + const struct net_device *lowerdev = vlan->lowerdev; + + dev->state = (dev->state & ~MACVLAN_STATE_MASK) | + (lowerdev->state & MACVLAN_STATE_MASK); + dev->features = lowerdev->features & MACVLAN_FEATURES; + dev->iflink = lowerdev->ifindex; + + lockdep_set_class(&dev->_xmit_lock, &macvlan_netdev_xmit_lock_key); + return 0; +} + +static void macvlan_ethtool_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + snprintf(drvinfo->driver, 32, "macvlan"); + snprintf(drvinfo->version, 32, "0.1"); +} + +static u32 macvlan_ethtool_get_rx_csum(struct net_device *dev) +{ + const struct macvlan_dev *vlan = netdev_priv(dev); + struct net_device *lowerdev = vlan->lowerdev; + + if (lowerdev->ethtool_ops->get_rx_csum == NULL) + return 0; + return lowerdev->ethtool_ops->get_rx_csum(lowerdev); +} + +static const struct ethtool_ops macvlan_ethtool_ops = { + .get_link = ethtool_op_get_link, + .get_rx_csum = macvlan_ethtool_get_rx_csum, + .get_tx_csum = ethtool_op_get_tx_csum, + .get_tso = ethtool_op_get_tso, + .get_ufo = ethtool_op_get_ufo, + .get_sg = ethtool_op_get_sg, + .get_drvinfo = macvlan_ethtool_get_drvinfo, +}; + +static void macvlan_setup(struct net_device *dev) +{ + ether_setup(dev); + + dev->init = macvlan_init; + dev->open = macvlan_open; + dev->stop = macvlan_stop; + dev->change_mtu = macvlan_change_mtu; + dev->change_rx_flags = macvlan_change_rx_flags; + dev->set_multicast_list = macvlan_set_multicast_list; + dev->hard_header = macvlan_hard_header; + dev->hard_start_xmit = macvlan_hard_start_xmit; + dev->destructor = free_netdev; + dev->ethtool_ops = &macvlan_ethtool_ops; + dev->tx_queue_len = 0; +} + +static int macvlan_port_create(struct net_device *dev) +{ + struct macvlan_port *port; + unsigned int i; + + if (dev->type != ARPHRD_ETHER || dev->flags & IFF_LOOPBACK) + return -EINVAL; + + port = kzalloc(sizeof(*port), GFP_KERNEL); + if (port == NULL) + return -ENOMEM; + + port->dev = dev; + INIT_LIST_HEAD(&port->vlans); + for (i = 0; i < MACVLAN_HASH_SIZE; i++) + INIT_HLIST_HEAD(&port->vlan_hash[i]); + rcu_assign_pointer(dev->macvlan_port, port); + return 0; +} + +static void macvlan_port_destroy(struct net_device *dev) +{ + struct macvlan_port *port = dev->macvlan_port; + + rcu_assign_pointer(dev->macvlan_port, NULL); + synchronize_rcu(); + kfree(port); +} + +static void macvlan_transfer_operstate(struct net_device *dev) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + const struct net_device *lowerdev = vlan->lowerdev; + + if (lowerdev->operstate == IF_OPER_DORMANT) + netif_dormant_on(dev); + else + netif_dormant_off(dev); + + if (netif_carrier_ok(lowerdev)) { + if (!netif_carrier_ok(dev)) + netif_carrier_on(dev); + } else { + if (netif_carrier_ok(lowerdev)) + netif_carrier_off(dev); + } +} + +static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + return 0; +} + +static int macvlan_newlink(struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + struct macvlan_port *port; + struct net_device *lowerdev; + int err; + + if (!tb[IFLA_LINK]) + return -EINVAL; + + lowerdev = __dev_get_by_index(nla_get_u32(tb[IFLA_LINK])); + if (lowerdev == NULL) + return -ENODEV; + + if (!tb[IFLA_MTU]) + dev->mtu = lowerdev->mtu; + else if (dev->mtu > lowerdev->mtu) + return -EINVAL; + + if (!tb[IFLA_ADDRESS]) + random_ether_addr(dev->dev_addr); + + if (lowerdev->macvlan_port == NULL) { + err = macvlan_port_create(lowerdev); + if (err < 0) + return err; + } + port = lowerdev->macvlan_port; + + vlan->lowerdev = lowerdev; + vlan->dev = dev; + vlan->port = port; + + err = register_netdevice(dev); + if (err < 0) + return err; + + list_add_tail(&vlan->list, &port->vlans); + macvlan_transfer_operstate(dev); + return 0; +} + +static void macvlan_dellink(struct net_device *dev) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + struct macvlan_port *port = vlan->port; + + list_del(&vlan->list); + unregister_netdevice(dev); + + if (list_empty(&port->vlans)) + macvlan_port_destroy(dev); +} + +static struct rtnl_link_ops macvlan_link_ops __read_mostly = { + .kind = "macvlan", + .priv_size = sizeof(struct macvlan_dev), + .setup = macvlan_setup, + .validate = macvlan_validate, + .newlink = macvlan_newlink, + .dellink = macvlan_dellink, +}; + +static int macvlan_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + struct macvlan_dev *vlan, *next; + struct macvlan_port *port; + + port = dev->macvlan_port; + if (port == NULL) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_CHANGE: + list_for_each_entry(vlan, &port->vlans, list) + macvlan_transfer_operstate(vlan->dev); + break; + case NETDEV_FEAT_CHANGE: + list_for_each_entry(vlan, &port->vlans, list) { + vlan->dev->features = dev->features & MACVLAN_FEATURES; + netdev_features_change(vlan->dev); + } + break; + case NETDEV_UNREGISTER: + list_for_each_entry_safe(vlan, next, &port->vlans, list) + macvlan_dellink(vlan->dev); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block macvlan_notifier_block __read_mostly = { + .notifier_call = macvlan_device_event, +}; + +static int __init macvlan_init_module(void) +{ + int err; + + register_netdevice_notifier(&macvlan_notifier_block); + macvlan_handle_frame_hook = macvlan_handle_frame; + + err = rtnl_link_register(&macvlan_link_ops); + if (err < 0) + goto err1; + return 0; +err1: + macvlan_handle_frame_hook = macvlan_handle_frame; + unregister_netdevice_notifier(&macvlan_notifier_block); + return err; +} + +static void __exit macvlan_cleanup_module(void) +{ + rtnl_link_unregister(&macvlan_link_ops); + macvlan_handle_frame_hook = NULL; + unregister_netdevice_notifier(&macvlan_notifier_block); +} + +module_init(macvlan_init_module); +module_exit(macvlan_cleanup_module); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("Driver for MAC address based VLANs"); +MODULE_ALIAS_RTNL_LINK("macvlan"); diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h new file mode 100644 index 000000000000..0d9d7ea2c1cc --- /dev/null +++ b/include/linux/if_macvlan.h @@ -0,0 +1,9 @@ +#ifndef _LINUX_IF_MACVLAN_H +#define _LINUX_IF_MACVLAN_H + +#ifdef __KERNEL__ + +extern struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *); + +#endif /* __KERNEL__ */ +#endif /* _LINUX_IF_MACVLAN_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e5af458ab04b..322b5eae57dd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -564,6 +564,8 @@ struct net_device /* bridge stuff */ struct net_bridge_port *br_port; + /* macvlan */ + struct macvlan_port *macvlan_port; /* class/net/name entry */ struct device dev; diff --git a/net/core/dev.c b/net/core/dev.c index 59ec811d2b54..13a0d9f6da54 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -98,6 +98,7 @@ #include #include #include +#include #include #include #include @@ -1813,6 +1814,28 @@ static inline struct sk_buff *handle_bridge(struct sk_buff *skb, #define handle_bridge(skb, pt_prev, ret, orig_dev) (skb) #endif +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) +struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly; +EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); + +static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, + struct packet_type **pt_prev, + int *ret, + struct net_device *orig_dev) +{ + if (skb->dev->macvlan_port == NULL) + return skb; + + if (*pt_prev) { + *ret = deliver_skb(skb, *pt_prev, orig_dev); + *pt_prev = NULL; + } + return macvlan_handle_frame_hook(skb); +} +#else +#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) +#endif + #ifdef CONFIG_NET_CLS_ACT /* TODO: Maybe we should just force sch_ingress to be compiled in * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions @@ -1918,6 +1941,9 @@ ncls: #endif skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); + if (!skb) + goto out; + skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev); if (!skb) goto out; -- cgit v1.2.3 From a7ecfc866578e665e20004a2f5fff5b73e8be3bc Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 18:56:30 -0700 Subject: [VLAN]: Fix memset length Fix sizeof(ETH_ALEN) Introduced by my rtnl_link patches. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/8021q/vlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 39bdcc25c150..cda936b77d22 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -377,7 +377,7 @@ void vlan_setup(struct net_device *new_dev) new_dev->destructor = free_netdev; new_dev->do_ioctl = vlan_dev_ioctl; - memset(new_dev->broadcast, 0, sizeof(ETH_ALEN)); + memset(new_dev->broadcast, 0, ETH_ALEN); } static void vlan_transfer_operstate(const struct net_device *dev, struct net_device *vlandev) -- cgit v1.2.3 From b3b0b681b12478a7afa7d1f3d58be96830e16c7d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 14 Jul 2007 18:57:19 -0700 Subject: [TCP]: tcp probe add back ssthresh field Sangtae noticed the ssthresh got missed. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_probe.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 86624fabc4bf..f37d5928921a 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -111,6 +111,7 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, p->snd_una = tp->snd_una; p->snd_cwnd = tp->snd_cwnd; p->snd_wnd = tp->snd_wnd; + p->ssthresh = tcp_current_ssthresh(sk); p->srtt = tp->srtt >> 3; tcp_probe.head = (tcp_probe.head + 1) % bufsize; -- cgit v1.2.3 From acd159b6b5828175be6b9ccccd9b054239ec63e9 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 14 Jul 2007 19:00:59 -0700 Subject: [INET_SOCK]: make net/ipv4/inet_timewait_sock.c:__inet_twsk_kill() static This patch makes the needlessly global __inet_twsk_kill() static. Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 3 --- net/ipv4/inet_timewait_sock.c | 5 ++--- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 09a2532699b2..47d52b2414db 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -209,9 +209,6 @@ static inline void inet_twsk_put(struct inet_timewait_sock *tw) extern struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state); -extern void __inet_twsk_kill(struct inet_timewait_sock *tw, - struct inet_hashinfo *hashinfo); - extern void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, struct inet_hashinfo *hashinfo); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index a73cf93cee36..2586df09b9b6 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -14,7 +14,8 @@ #include /* Must be called with locally disabled BHs. */ -void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo) +static void __inet_twsk_kill(struct inet_timewait_sock *tw, + struct inet_hashinfo *hashinfo) { struct inet_bind_hashbucket *bhead; struct inet_bind_bucket *tb; @@ -47,8 +48,6 @@ void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashi inet_twsk_put(tw); } -EXPORT_SYMBOL_GPL(__inet_twsk_kill); - /* * Enter the time wait state. This is called with locally disabled BH. * Essentially we whip up a timewait bucket, copy the relevant info into it -- cgit v1.2.3 From da7de31cc50796a53593785d4508b7b7ffa9a9b2 Mon Sep 17 00:00:00 2001 From: Jennifer Hunt Date: Sat, 14 Jul 2007 19:03:00 -0700 Subject: [AF_IUCV]: Improve description of IUCV and AFIUCV configuration options. Signed-off-by: Jennifer Hunt Signed-off-by: Ursula Braun >braunu@de.ibm.com> Acked-by: Frank Pavlic Signed-off-by: David S. Miller --- net/iucv/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/iucv/Kconfig b/net/iucv/Kconfig index f8fcc3d10327..16ce9cd4f39e 100644 --- a/net/iucv/Kconfig +++ b/net/iucv/Kconfig @@ -1,13 +1,13 @@ config IUCV - tristate "IUCV support (VM only)" + tristate "IUCV support (S390 - z/VM only)" depends on S390 help - Select this option if you want to use inter-user communication under - VM or VIF sockets. If you run on z/VM, say "Y" to enable a fast + Select this option if you want to use inter-user communication + under VM or VIF. If you run on z/VM, say "Y" to enable a fast communication link between VM guests. config AFIUCV - tristate "AF_IUCV support (VM only)" + tristate "AF_IUCV support (S390 - z/VM only)" depends on IUCV help Select this option if you want to use inter-user communication under -- cgit v1.2.3 From 13fdc9a74df0fec70f421c6891e184ed8c3b9088 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Sat, 14 Jul 2007 19:03:41 -0700 Subject: [AF_IUCV]: Avoid deadlock between iucv_path_connect and tasklet. An iucv deadlock may occur, where one CPU is spinning on the iucv_table_lock for iucv_tasklet_fn(), while another CPU is holding the iucv_table_lock for an iucv_path_connect() and is waiting for the first CPU in an smp_call_function. Solution: replace spin_lock in iucv_tasklet_fn by spin_trylock and reschedule tasklet in case of non-granted lock. Signed-off-by: Ursula Braun Acked-by: Frank Pavlic Signed-off-by: David S. Miller --- net/iucv/iucv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index b7333061016d..ad5150b8dfa9 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -1494,7 +1494,10 @@ static void iucv_tasklet_fn(unsigned long ignored) struct iucv_irq_list *p, *n; /* Serialize tasklet, iucv_path_sever and iucv_path_connect. */ - spin_lock(&iucv_table_lock); + if (!spin_trylock(&iucv_table_lock)) { + tasklet_schedule(&iucv_tasklet); + return; + } iucv_active_cpu = smp_processor_id(); spin_lock_irq(&iucv_queue_lock); -- cgit v1.2.3 From febca281f677a775c61cd0572c2f35e4ead9e7d5 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Sat, 14 Jul 2007 19:04:25 -0700 Subject: [AF_IUCV]: Add lock when updating accept_q The accept_queue of an af_iucv socket will be corrupted, if adding and deleting of entries in this queue occurs at the same time (connect request from one client, while accept call is processed for another client). Solution: add locking when updating accept_q Signed-off-by: Ursula Braun Acked-by: Frank Pavlic Signed-off-by: David S. Miller --- include/net/iucv/af_iucv.h | 1 + net/iucv/af_iucv.c | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index f9bd11be1891..b6c468cd7f5b 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -60,6 +60,7 @@ struct iucv_sock { char dst_user_id[8]; char dst_name[8]; struct list_head accept_q; + spinlock_t accept_q_lock; struct sock *parent; struct iucv_path *path; struct sk_buff_head send_skb_q; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index d9e9ddb8eac5..53ae14c35f70 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -219,6 +219,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) sock_init_data(sock, sk); INIT_LIST_HEAD(&iucv_sk(sk)->accept_q); + spin_lock_init(&iucv_sk(sk)->accept_q_lock); skb_queue_head_init(&iucv_sk(sk)->send_skb_q); skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q); iucv_sk(sk)->send_tag = 0; @@ -274,15 +275,25 @@ void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *sk) void iucv_accept_enqueue(struct sock *parent, struct sock *sk) { + unsigned long flags; + struct iucv_sock *par = iucv_sk(parent); + sock_hold(sk); - list_add_tail(&iucv_sk(sk)->accept_q, &iucv_sk(parent)->accept_q); + spin_lock_irqsave(&par->accept_q_lock, flags); + list_add_tail(&iucv_sk(sk)->accept_q, &par->accept_q); + spin_unlock_irqrestore(&par->accept_q_lock, flags); iucv_sk(sk)->parent = parent; parent->sk_ack_backlog++; } void iucv_accept_unlink(struct sock *sk) { + unsigned long flags; + struct iucv_sock *par = iucv_sk(iucv_sk(sk)->parent); + + spin_lock_irqsave(&par->accept_q_lock, flags); list_del_init(&iucv_sk(sk)->accept_q); + spin_unlock_irqrestore(&par->accept_q_lock, flags); iucv_sk(sk)->parent->sk_ack_backlog--; iucv_sk(sk)->parent = NULL; sock_put(sk); @@ -298,8 +309,8 @@ struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock) lock_sock(sk); if (sk->sk_state == IUCV_CLOSED) { - release_sock(sk); iucv_accept_unlink(sk); + release_sock(sk); continue; } @@ -879,6 +890,7 @@ static int iucv_callback_connreq(struct iucv_path *path, /* Find out if this path belongs to af_iucv. */ read_lock(&iucv_sk_list.lock); iucv = NULL; + sk = NULL; sk_for_each(sk, node, &iucv_sk_list.head) if (sk->sk_state == IUCV_LISTEN && !memcmp(&iucv_sk(sk)->src_name, src_name, 8)) { -- cgit v1.2.3 From 6460d948f3ebf7d5040328a60a0ab7221f69945b Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sat, 14 Jul 2007 19:07:52 -0700 Subject: [NET]: Add ethtool support for NETIF_F_IPV6_CSUM devices. Add ethtool utility function to set or clear IPV6_CSUM feature flag. Modify tg3.c and bnx2.c to use this function when doing ethtool -K to change tx checksum. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 2 +- drivers/net/tg3.c | 2 +- include/linux/ethtool.h | 1 + net/core/ethtool.c | 12 ++++++++++++ 4 files changed, 15 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 4e5e1cb2adc1..d23861c8658c 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -6218,7 +6218,7 @@ bnx2_set_tx_csum(struct net_device *dev, u32 data) struct bnx2 *bp = netdev_priv(dev); if (CHIP_NUM(bp) == CHIP_NUM_5709) - return (ethtool_op_set_tx_hw_csum(dev, data)); + return (ethtool_op_set_tx_ipv6_csum(dev, data)); else return (ethtool_op_set_tx_csum(dev, data)); } diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 32e4037dcb50..5ee14764fd74 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -8318,7 +8318,7 @@ static int tg3_set_tx_csum(struct net_device *dev, u32 data) if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787) - ethtool_op_set_tx_hw_csum(dev, data); + ethtool_op_set_tx_ipv6_csum(dev, data); else ethtool_op_set_tx_csum(dev, data); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index f2d248f8cc92..3a632244f31b 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -265,6 +265,7 @@ u32 ethtool_op_get_link(struct net_device *dev); u32 ethtool_op_get_tx_csum(struct net_device *dev); int ethtool_op_set_tx_csum(struct net_device *dev, u32 data); int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data); +int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data); u32 ethtool_op_get_sg(struct net_device *dev); int ethtool_op_set_sg(struct net_device *dev, u32 data); u32 ethtool_op_get_tso(struct net_device *dev); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 8d5e5a09b576..0b531e98ec33 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -52,6 +52,17 @@ int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data) return 0; } + +int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data) +{ + if (data) + dev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + else + dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); + + return 0; +} + u32 ethtool_op_get_sg(struct net_device *dev) { return (dev->features & NETIF_F_SG) != 0; @@ -980,5 +991,6 @@ EXPORT_SYMBOL(ethtool_op_set_sg); EXPORT_SYMBOL(ethtool_op_set_tso); EXPORT_SYMBOL(ethtool_op_set_tx_csum); EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum); +EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum); EXPORT_SYMBOL(ethtool_op_set_ufo); EXPORT_SYMBOL(ethtool_op_get_ufo); -- cgit v1.2.3 From d87d8469e2dd19a3a134b99f78288d41854c614b Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 14 Jul 2007 20:44:23 -0700 Subject: [NETFILTER]: nf_conntrack: Increment error count on parsing IPv4 header Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 2 -- net/netfilter/nf_conntrack_core.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 89e20ab494b8..b5c4bb54691e 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -131,8 +131,6 @@ ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, */ if ((protoff < 0) || (protoff > (*pskb)->len)) { pr_debug("ip6_conntrack_core: can't find proto in pkt\n"); - NF_CT_STAT_INC_ATOMIC(error); - NF_CT_STAT_INC_ATOMIC(invalid); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 3d1411012a2c..b730413738a6 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -625,6 +625,8 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) { pr_debug("not prepared to track yet or error occured\n"); + NF_CT_STAT_INC_ATOMIC(error); + NF_CT_STAT_INC_ATOMIC(invalid); return -ret; } -- cgit v1.2.3 From ffc30690480bdd337e4914302b926d24870b56b2 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 14 Jul 2007 20:44:50 -0700 Subject: [NETFILTER]: nf_conntrack: make l3proto->prepare() generic and renames it The icmp[v6] l4proto modules parse headers in ICMP[v6] error to get tuple. But they have to find the offset to transport protocol header before that. Their processings are almost same as prepare() of l3proto modules. This makes prepare() more generic to simplify icmp[v6] l4proto module later. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/ipv6/nf_conntrack_ipv6.h | 2 +- include/net/netfilter/nf_conntrack_l3proto.h | 6 +++--- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 23 +++++++++++++--------- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 27 +++++++++++++++----------- net/netfilter/nf_conntrack_core.c | 5 +++-- net/netfilter/nf_conntrack_l3proto_generic.c | 7 +++---- 6 files changed, 40 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index b4b6049e01fa..5a8965904377 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -7,7 +7,7 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; -extern int nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, +extern int nf_ct_ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, int len); extern int nf_ct_frag6_init(void); diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 890752d7f673..e3708a698d8b 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -58,11 +58,11 @@ struct nf_conntrack_l3proto /* * Called before tracking. - * *dataoff: offset of protocol header (TCP, UDP,...) in *pskb + * *dataoff: offset of protocol header (TCP, UDP,...) in skb * *protonum: protocol number */ - int (*prepare)(struct sk_buff **pskb, unsigned int hooknum, - unsigned int *dataoff, u_int8_t *protonum); + int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff, + unsigned int *dataoff, u_int8_t *protonum); int (*tuple_to_nfattr)(struct sk_buff *skb, const struct nf_conntrack_tuple *t); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 3c5629938487..ee29f4e9eac2 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -78,21 +78,26 @@ nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) return skb; } -static int -ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, - u_int8_t *protonum) +static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, + unsigned int *dataoff, u_int8_t *protonum) { + struct iphdr _iph, *iph; + + iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); + if (iph == NULL) + return -NF_DROP; + /* Never happen */ - if (ip_hdr(*pskb)->frag_off & htons(IP_OFFSET)) { + if (iph->frag_off & htons(IP_OFFSET)) { if (net_ratelimit()) { - printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n", - ip_hdr(*pskb)->protocol, hooknum); + printk(KERN_ERR "ipv4_get_l4proto: Frag of proto %u\n", + iph->protocol); } return -NF_DROP; } - *dataoff = skb_network_offset(*pskb) + ip_hdrlen(*pskb); - *protonum = ip_hdr(*pskb)->protocol; + *dataoff = nhoff + (iph->ihl << 2); + *protonum = iph->protocol; return NF_ACCEPT; } @@ -407,7 +412,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = { .invert_tuple = ipv4_invert_tuple, .print_tuple = ipv4_print_tuple, .print_conntrack = ipv4_print_conntrack, - .prepare = ipv4_prepare, + .get_l4proto = ipv4_get_l4proto, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .tuple_to_nfattr = ipv4_tuple_to_nfattr, .nfattr_to_tuple = ipv4_nfattr_to_tuple, diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index b5c4bb54691e..9b7eaaadc67e 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -86,7 +86,7 @@ static int ipv6_print_conntrack(struct seq_file *s, * - Note also special handling of AUTH header. Thanks to IPsec wizards. */ -int nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, +int nf_ct_ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, int len) { u8 nexthdr = *nexthdrp; @@ -117,19 +117,24 @@ int nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, return start; } -static int -ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, - u_int8_t *protonum) +static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, + unsigned int *dataoff, u_int8_t *protonum) { - unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data; - unsigned char pnum = ipv6_hdr(*pskb)->nexthdr; - int protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum, - (*pskb)->len - extoff); + unsigned int extoff = nhoff + sizeof(struct ipv6hdr); + unsigned char pnum; + int protoff; + + if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr), + &pnum, sizeof(pnum)) != 0) { + pr_debug("ip6_conntrack_core: can't get nexthdr\n"); + return -NF_ACCEPT; + } + protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, skb->len - extoff); /* - * (protoff == (*pskb)->len) mean that the packet doesn't have no data + * (protoff == skb->len) mean that the packet doesn't have no data * except of IPv6 & ext headers. but it's tracked anyway. - YK */ - if ((protoff < 0) || (protoff > (*pskb)->len)) { + if ((protoff < 0) || (protoff > skb->len)) { pr_debug("ip6_conntrack_core: can't find proto in pkt\n"); return -NF_ACCEPT; } @@ -375,7 +380,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { .invert_tuple = ipv6_invert_tuple, .print_tuple = ipv6_print_tuple, .print_conntrack = ipv6_print_conntrack, - .prepare = ipv6_prepare, + .get_l4proto = ipv6_get_l4proto, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .tuple_to_nfattr = ipv6_tuple_to_nfattr, .nfattr_to_tuple = ipv6_nfattr_to_tuple, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index b730413738a6..5b194e3c4e25 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -622,8 +622,9 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) /* rcu_read_lock()ed by nf_hook_slow */ l3proto = __nf_ct_l3proto_find((u_int16_t)pf); - - if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) { + ret = l3proto->get_l4proto(*pskb, skb_network_offset(*pskb), + &dataoff, &protonum); + if (ret <= 0) { pr_debug("not prepared to track yet or error occured\n"); NF_CT_STAT_INC_ATOMIC(error); NF_CT_STAT_INC_ATOMIC(invalid); diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c index b1bfa207a850..0691642b7724 100644 --- a/net/netfilter/nf_conntrack_l3proto_generic.c +++ b/net/netfilter/nf_conntrack_l3proto_generic.c @@ -61,9 +61,8 @@ static int generic_print_conntrack(struct seq_file *s, return 0; } -static int -generic_prepare(struct sk_buff **pskb, unsigned int hooknum, - unsigned int *dataoff, u_int8_t *protonum) +static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, + unsigned int *dataoff, u_int8_t *protonum) { /* Never track !!! */ return -NF_ACCEPT; @@ -77,6 +76,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_generic = { .invert_tuple = generic_invert_tuple, .print_tuple = generic_print_tuple, .print_conntrack = generic_print_conntrack, - .prepare = generic_prepare, + .get_l4proto = generic_get_l4proto, }; EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic); -- cgit v1.2.3 From e2a3123fbe58da9fd3f35cd242087896ace6049f Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 14 Jul 2007 20:45:14 -0700 Subject: [NETFILTER]: nf_conntrack: Introduces nf_ct_get_tuplepr and uses it nf_ct_get_tuple() requires the offset to transport header and that bothers callers such as icmp[v6] l4proto modules. This introduces new function to simplify them. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack.h | 4 +++ net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 32 +++++--------------- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 42 +++++--------------------- net/netfilter/nf_conntrack_core.c | 30 ++++++++++++++++++ 4 files changed, 49 insertions(+), 59 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index d4f02eb0c66c..810020ec345d 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -186,6 +186,10 @@ extern void nf_conntrack_hash_insert(struct nf_conn *ct); extern void nf_conntrack_flush(void); +extern int nf_ct_get_tuplepr(const struct sk_buff *skb, + unsigned int nhoff, + u_int16_t l3num, + struct nf_conntrack_tuple *tuple); extern int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_tuple *orig); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 0fe8fb0466ef..b8b79992797e 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -136,40 +136,22 @@ icmp_error_message(struct sk_buff *skb, unsigned int hooknum) { struct nf_conntrack_tuple innertuple, origtuple; - struct { - struct icmphdr icmp; - struct iphdr ip; - } _in, *inside; struct nf_conntrack_l4proto *innerproto; struct nf_conntrack_tuple_hash *h; - int dataoff; NF_CT_ASSERT(skb->nfct == NULL); - /* Not enough header? */ - inside = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_in), &_in); - if (inside == NULL) - return -NF_ACCEPT; - - /* Ignore ICMP's containing fragments (shouldn't happen) */ - if (inside->ip.frag_off & htons(IP_OFFSET)) { - pr_debug("icmp_error_message: fragment of proto %u\n", - inside->ip.protocol); + /* Are they talking about one of our connections? */ + if (!nf_ct_get_tuplepr(skb, + skb_network_offset(skb) + ip_hdrlen(skb) + + sizeof(struct icmphdr), + PF_INET, &origtuple)) { + pr_debug("icmp_error_message: failed to get tuple\n"); return -NF_ACCEPT; } /* rcu_read_lock()ed by nf_hook_slow */ - innerproto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); - - dataoff = ip_hdrlen(skb) + sizeof(inside->icmp); - /* Are they talking about one of our connections? */ - if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET, - inside->ip.protocol, &origtuple, - &nf_conntrack_l3proto_ipv4, innerproto)) { - pr_debug("icmp_error_message: ! get_tuple p=%u", - inside->ip.protocol); - return -NF_ACCEPT; - } + innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum); /* Ordinarily, we'd expect the inverted tupleproto, but it's been preserved inside the ICMP. */ diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 9defc7e14554..0fca7e8abeb7 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -136,49 +136,23 @@ icmpv6_error_message(struct sk_buff *skb, { struct nf_conntrack_tuple intuple, origtuple; struct nf_conntrack_tuple_hash *h; - struct icmp6hdr _hdr, *hp; - unsigned int inip6off; struct nf_conntrack_l4proto *inproto; - u_int8_t inprotonum; - unsigned int inprotoff; NF_CT_ASSERT(skb->nfct == NULL); - hp = skb_header_pointer(skb, icmp6off, sizeof(_hdr), &_hdr); - if (hp == NULL) { - pr_debug("icmpv6_error: Can't get ICMPv6 hdr.\n"); - return -NF_ACCEPT; - } - - inip6off = icmp6off + sizeof(_hdr); - if (skb_copy_bits(skb, inip6off+offsetof(struct ipv6hdr, nexthdr), - &inprotonum, sizeof(inprotonum)) != 0) { - pr_debug("icmpv6_error: Can't get nexthdr in inner IPv6 " - "header.\n"); - return -NF_ACCEPT; - } - inprotoff = nf_ct_ipv6_skip_exthdr(skb, - inip6off + sizeof(struct ipv6hdr), - &inprotonum, - skb->len - inip6off - - sizeof(struct ipv6hdr)); - - if ((inprotoff > skb->len) || (inprotonum == NEXTHDR_FRAGMENT)) { - pr_debug("icmpv6_error: Can't get protocol header in ICMPv6 " - "payload.\n"); - return -NF_ACCEPT; - } - - /* rcu_read_lock()ed by nf_hook_slow */ - inproto = __nf_ct_l4proto_find(PF_INET6, inprotonum); - /* Are they talking about one of our connections? */ - if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum, - &origtuple, &nf_conntrack_l3proto_ipv6, inproto)) { + if (!nf_ct_get_tuplepr(skb, + skb_network_offset(skb) + + sizeof(struct ipv6hdr) + + sizeof(struct icmp6hdr), + PF_INET6, &origtuple)) { pr_debug("icmpv6_error: Can't get tuple\n"); return -NF_ACCEPT; } + /* rcu_read_lock()ed by nf_hook_slow */ + inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum); + /* Ordinarily, we'd expect the inverted tupleproto, but it's been preserved inside the ICMP. */ if (!nf_ct_invert_tuple(&intuple, &origtuple, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5b194e3c4e25..8cce814f6bee 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -113,6 +113,36 @@ nf_ct_get_tuple(const struct sk_buff *skb, } EXPORT_SYMBOL_GPL(nf_ct_get_tuple); +int nf_ct_get_tuplepr(const struct sk_buff *skb, + unsigned int nhoff, + u_int16_t l3num, + struct nf_conntrack_tuple *tuple) +{ + struct nf_conntrack_l3proto *l3proto; + struct nf_conntrack_l4proto *l4proto; + unsigned int protoff; + u_int8_t protonum; + int ret; + + rcu_read_lock(); + + l3proto = __nf_ct_l3proto_find(l3num); + ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum); + if (ret != NF_ACCEPT) { + rcu_read_unlock(); + return 0; + } + + l4proto = __nf_ct_l4proto_find(l3num, protonum); + + ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, tuple, + l3proto, l4proto); + + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr); + int nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_tuple *orig, -- cgit v1.2.3 From 130e7a83d7ec8c5c673225e0fa8ea37b1ed507a5 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 14 Jul 2007 20:45:41 -0700 Subject: [NETFILTER]: nf_conntrack: Don't track locally generated special ICMP error The conntrack assigned to locally generated ICMP error is usually the one assigned to the original packet which has caused the error. But if the original packet is handled as invalid by nf_conntrack, no conntrack is assigned to the original packet. Then nf_ct_attach() cannot assign any conntrack to the ICMP error packet. In that case the current nf_conntrack_icmp assigns appropriate conntrack to it. But the current code mistakes the direction of the packet. As a result, NAT code mistakes the address to be mangled. To fix the bug, this changes nf_conntrack_icmp not to assign conntrack to such ICMP error. Actually no address is necessary to be mangled in this case. Spotted by Jordan Russell. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index b8b79992797e..f96573304f5b 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -165,25 +165,13 @@ icmp_error_message(struct sk_buff *skb, h = nf_conntrack_find_get(&innertuple); if (!h) { - /* Locally generated ICMPs will match inverted if they - haven't been SNAT'ed yet */ - /* FIXME: NAT code has to handle half-done double NAT --RR */ - if (hooknum == NF_IP_LOCAL_OUT) - h = nf_conntrack_find_get(&origtuple); - - if (!h) { - pr_debug("icmp_error_message: no match\n"); - return -NF_ACCEPT; - } - - /* Reverse direction from that found */ - if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) - *ctinfo += IP_CT_IS_REPLY; - } else { - if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) - *ctinfo += IP_CT_IS_REPLY; + pr_debug("icmp_error_message: no match\n"); + return -NF_ACCEPT; } + if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) + *ctinfo += IP_CT_IS_REPLY; + /* Update skb to refer to this connection */ skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; skb->nfctinfo = *ctinfo; -- cgit v1.2.3 From a887c1c148ffb3eb1c193e9869ca5297c6e22078 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 20:46:15 -0700 Subject: [NETFILTER]: Lower *tables printk severity Lower ip6tables, arptables and ebtables printk severity similar to Dan Aloni's patch for iptables. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/bridge/netfilter/ebtables.c | 4 ++-- net/ipv4/netfilter/arp_tables.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index ac9984f98e59..4169a2a89a39 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1525,14 +1525,14 @@ static int __init ebtables_init(void) if ((ret = nf_register_sockopt(&ebt_sockopts)) < 0) return ret; - printk(KERN_NOTICE "Ebtables v2.0 registered\n"); + printk(KERN_INFO "Ebtables v2.0 registered\n"); return 0; } static void __exit ebtables_fini(void) { nf_unregister_sockopt(&ebt_sockopts); - printk(KERN_NOTICE "Ebtables v2.0 unregistered\n"); + printk(KERN_INFO "Ebtables v2.0 unregistered\n"); } EXPORT_SYMBOL(ebt_register_table); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index e981232942a1..d1149aba9351 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1184,7 +1184,7 @@ static int __init arp_tables_init(void) if (ret < 0) goto err4; - printk("arp_tables: (C) 2002 David S. Miller\n"); + printk(KERN_INFO "arp_tables: (C) 2002 David S. Miller\n"); return 0; err4: diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 254c769b750a..aeda617246b7 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1497,7 +1497,7 @@ static int __init ip6_tables_init(void) if (ret < 0) goto err5; - printk("ip6_tables: (C) 2000-2006 Netfilter Core Team\n"); + printk(KERN_INFO "ip6_tables: (C) 2000-2006 Netfilter Core Team\n"); return 0; err5: -- cgit v1.2.3 From 370786f9cfd430cb424f00ce4110e75bb1b95a19 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 14 Jul 2007 20:47:26 -0700 Subject: [NETFILTER]: x_tables: add connlimit match ipt_connlimit has been sitting in POM-NG for a long time. Here is a new shiny xt_connlimit with: * xtables'ified * will request the layer3 module (previously it hotdropped every packet when it was not loaded) * fixed: there was a deadlock in case of an OOM condition * support for any layer4 protocol (e.g. UDP/SCTP) * using jhash, as suggested by Eric Dumazet * ipv6 support Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/xt_connlimit.h | 17 ++ net/netfilter/Kconfig | 7 + net/netfilter/Makefile | 1 + net/netfilter/xt_connlimit.c | 313 +++++++++++++++++++++++++++++++++ 4 files changed, 338 insertions(+) create mode 100644 include/linux/netfilter/xt_connlimit.h create mode 100644 net/netfilter/xt_connlimit.c (limited to 'net') diff --git a/include/linux/netfilter/xt_connlimit.h b/include/linux/netfilter/xt_connlimit.h new file mode 100644 index 000000000000..90ae8b474cb8 --- /dev/null +++ b/include/linux/netfilter/xt_connlimit.h @@ -0,0 +1,17 @@ +#ifndef _XT_CONNLIMIT_H +#define _XT_CONNLIMIT_H + +struct xt_connlimit_data; + +struct xt_connlimit_info { + union { + u_int32_t v4_mask; + u_int32_t v6_mask[4]; + }; + unsigned int limit, inverse; + + /* this needs to be at the end */ + struct xt_connlimit_data *data __attribute__((aligned(8))); +}; + +#endif /* _XT_CONNLIMIT_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index df5e8dab871d..9415b9a5dba6 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -423,6 +423,13 @@ config NETFILTER_XT_MATCH_CONNBYTES If you want to compile it as a module, say M here and read . If unsure, say `N'. +config NETFILTER_XT_MATCH_CONNLIMIT + tristate '"connlimit" match support"' + depends on NETFILTER_XTABLES + ---help--- + This match allows you to match against the number of parallel + connections to a server per client IP address (or address block). + config NETFILTER_XT_MATCH_CONNMARK tristate '"connmark" connection mark match support' depends on NETFILTER_XTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 58b4245a1723..3e4a16aeb04e 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -52,6 +52,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o # matches obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o +obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c new file mode 100644 index 000000000000..3335dd5be962 --- /dev/null +++ b/net/netfilter/xt_connlimit.c @@ -0,0 +1,313 @@ +/* + * netfilter module to limit the number of parallel tcp + * connections per IP address. + * (c) 2000 Gerd Knorr + * Nov 2002: Martin Bene : + * only ignore TIME_WAIT or gone connections + * Copyright © Jan Engelhardt , 2007 + * + * based on ... + * + * Kernel module to match connection tracking information. + * GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au). + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* we will save the tuples of all connections we care about */ +struct xt_connlimit_conn { + struct list_head list; + struct nf_conntrack_tuple tuple; +}; + +struct xt_connlimit_data { + struct list_head iphash[256]; + spinlock_t lock; +}; + +static u_int32_t connlimit_rnd; +static bool connlimit_rnd_inited; + +static inline unsigned int connlimit_iphash(u_int32_t addr) +{ + if (unlikely(!connlimit_rnd_inited)) { + get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd)); + connlimit_rnd_inited = true; + } + return jhash_1word(addr, connlimit_rnd) & 0xFF; +} + +static inline unsigned int +connlimit_iphash6(const union nf_conntrack_address *addr, + const union nf_conntrack_address *mask) +{ + union nf_conntrack_address res; + unsigned int i; + + if (unlikely(!connlimit_rnd_inited)) { + get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd)); + connlimit_rnd_inited = true; + } + + for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) + res.ip6[i] = addr->ip6[i] & mask->ip6[i]; + + return jhash2(res.ip6, ARRAY_SIZE(res.ip6), connlimit_rnd) & 0xFF; +} + +static inline bool already_closed(const struct nf_conn *conn) +{ + u_int16_t proto = conn->tuplehash[0].tuple.dst.protonum; + + if (proto == IPPROTO_TCP) + return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT; + else + return 0; +} + +static inline unsigned int +same_source_net(const union nf_conntrack_address *addr, + const union nf_conntrack_address *mask, + const union nf_conntrack_address *u3, unsigned int family) +{ + if (family == AF_INET) { + return (addr->ip & mask->ip) == (u3->ip & mask->ip); + } else { + union nf_conntrack_address lh, rh; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) { + lh.ip6[i] = addr->ip6[i] & mask->ip6[i]; + rh.ip6[i] = u3->ip6[i] & mask->ip6[i]; + } + + return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6)) == 0; + } +} + +static int count_them(struct xt_connlimit_data *data, + const struct nf_conntrack_tuple *tuple, + const union nf_conntrack_address *addr, + const union nf_conntrack_address *mask, + const struct xt_match *match) +{ + struct nf_conntrack_tuple_hash *found; + struct xt_connlimit_conn *conn; + struct xt_connlimit_conn *tmp; + struct nf_conn *found_ct; + struct list_head *hash; + bool addit = true; + int matches = 0; + + + if (match->family == AF_INET6) + hash = &data->iphash[connlimit_iphash6(addr, mask)]; + else + hash = &data->iphash[connlimit_iphash(addr->ip & mask->ip)]; + + read_lock_bh(&nf_conntrack_lock); + + /* check the saved connections */ + list_for_each_entry_safe(conn, tmp, hash, list) { + found = __nf_conntrack_find(&conn->tuple, NULL); + found_ct = NULL; + + if (found != NULL) + found_ct = nf_ct_tuplehash_to_ctrack(found); + + if (found_ct != NULL && + nf_ct_tuple_equal(&conn->tuple, tuple) && + !already_closed(found_ct)) + /* + * Just to be sure we have it only once in the list. + * We should not see tuples twice unless someone hooks + * this into a table without "-p tcp --syn". + */ + addit = false; + + if (found == NULL) { + /* this one is gone */ + list_del(&conn->list); + kfree(conn); + continue; + } + + if (already_closed(found_ct)) { + /* + * we do not care about connections which are + * closed already -> ditch it + */ + list_del(&conn->list); + kfree(conn); + continue; + } + + if (same_source_net(addr, mask, &conn->tuple.src.u3, + match->family)) + /* same source network -> be counted! */ + ++matches; + } + + read_unlock_bh(&nf_conntrack_lock); + + if (addit) { + /* save the new connection in our list */ + conn = kzalloc(sizeof(*conn), GFP_ATOMIC); + if (conn == NULL) + return -ENOMEM; + conn->tuple = *tuple; + list_add(&conn->list, hash); + ++matches; + } + + return matches; +} + +static bool connlimit_match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, int offset, + unsigned int protoff, bool *hotdrop) +{ + const struct xt_connlimit_info *info = matchinfo; + union nf_conntrack_address addr, mask; + struct nf_conntrack_tuple tuple; + const struct nf_conntrack_tuple *tuple_ptr = &tuple; + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct; + int connections; + + ct = nf_ct_get(skb, &ctinfo); + if (ct != NULL) + tuple_ptr = &ct->tuplehash[0].tuple; + else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), + match->family, &tuple)) + goto hotdrop; + + if (match->family == AF_INET6) { + const struct ipv6hdr *iph = ipv6_hdr(skb); + memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr)); + memcpy(&mask.ip6, info->v6_mask, sizeof(info->v6_mask)); + } else { + const struct iphdr *iph = ip_hdr(skb); + addr.ip = iph->saddr; + mask.ip = info->v4_mask; + } + + spin_lock_bh(&info->data->lock); + connections = count_them(info->data, tuple_ptr, &addr, &mask, match); + spin_unlock_bh(&info->data->lock); + + if (connections < 0) { + /* kmalloc failed, drop it entirely */ + *hotdrop = true; + return false; + } + + return (connections > info->limit) ^ info->inverse; + + hotdrop: + *hotdrop = true; + return false; +} + +static bool connlimit_check(const char *tablename, const void *ip, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask) +{ + struct xt_connlimit_info *info = matchinfo; + unsigned int i; + + if (nf_ct_l3proto_try_module_get(match->family) < 0) { + printk(KERN_WARNING "cannot load conntrack support for " + "address family %u\n", match->family); + return false; + } + + /* init private data */ + info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL); + if (info->data == NULL) { + nf_ct_l3proto_module_put(match->family); + return false; + } + + spin_lock_init(&info->data->lock); + for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) + INIT_LIST_HEAD(&info->data->iphash[i]); + + return true; +} + +static void connlimit_destroy(const struct xt_match *match, void *matchinfo) +{ + struct xt_connlimit_info *info = matchinfo; + struct xt_connlimit_conn *conn; + struct xt_connlimit_conn *tmp; + struct list_head *hash = info->data->iphash; + unsigned int i; + + nf_ct_l3proto_module_put(match->family); + + for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) { + list_for_each_entry_safe(conn, tmp, &hash[i], list) { + list_del(&conn->list); + kfree(conn); + } + } + + kfree(info->data); +} + +static struct xt_match connlimit_reg[] __read_mostly = { + { + .name = "connlimit", + .family = AF_INET, + .checkentry = connlimit_check, + .match = connlimit_match, + .matchsize = sizeof(struct xt_connlimit_info), + .destroy = connlimit_destroy, + .me = THIS_MODULE, + }, + { + .name = "connlimit", + .family = AF_INET6, + .checkentry = connlimit_check, + .match = connlimit_match, + .matchsize = sizeof(struct xt_connlimit_info), + .destroy = connlimit_destroy, + .me = THIS_MODULE, + }, +}; + +static int __init xt_connlimit_init(void) +{ + return xt_register_matches(connlimit_reg, ARRAY_SIZE(connlimit_reg)); +} + +static void __exit xt_connlimit_exit(void) +{ + xt_unregister_matches(connlimit_reg, ARRAY_SIZE(connlimit_reg)); +} + +module_init(xt_connlimit_init); +module_exit(xt_connlimit_exit); +MODULE_AUTHOR("Jan Engelhardt "); +MODULE_DESCRIPTION("netfilter xt_connlimit match module"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_connlimit"); +MODULE_ALIAS("ip6t_connlimit"); -- cgit v1.2.3 From 61075af51f252913401c41fbe94075b46c94e9f1 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 20:48:19 -0700 Subject: [NETFILTER]: nf_conntrack: mark protocols __read_mostly Also remove two unnecessary EXPORT_SYMBOLs and move the nf_conntrack_l3proto_ipv4 declaration to the correct file. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/ipv4/nf_conntrack_ipv4.h | 2 ++ include/net/netfilter/nf_conntrack_l3proto.h | 2 -- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 3 +-- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 2 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 4 +--- net/netfilter/nf_conntrack_l3proto_generic.c | 2 +- net/netfilter/nf_conntrack_proto_generic.c | 2 +- net/netfilter/nf_conntrack_proto_gre.c | 2 +- net/netfilter/nf_conntrack_proto_sctp.c | 4 ++-- net/netfilter/nf_conntrack_proto_tcp.c | 4 ++-- net/netfilter/nf_conntrack_proto_udp.c | 4 ++-- 12 files changed, 15 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 3ed4e14970c5..7a671603fca6 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -12,6 +12,8 @@ /* Returns new sk_buff, or NULL */ struct sk_buff *nf_ct_ipv4_ct_gather_frags(struct sk_buff *skb); +extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; + extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index e3708a698d8b..3c58a2c4df28 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -89,8 +89,6 @@ extern struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto); extern void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p); /* Existing built-in protocols */ -extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; -extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic; static inline struct nf_conntrack_l3proto * diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index ee29f4e9eac2..64552afd01cb 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -405,7 +405,7 @@ static struct nf_sockopt_ops so_getorigdst = { .get = &getorigdst, }; -struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = { +struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { .l3proto = PF_INET, .name = "ipv4", .pkt_to_tuple = ipv4_pkt_to_tuple, diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index f96573304f5b..6593fd2c5b10 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -312,7 +312,7 @@ static struct ctl_table icmp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ -struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp = +struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_ICMP, @@ -338,4 +338,3 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp = #endif #endif }; -EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_icmp); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 9b7eaaadc67e..36df2218b669 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -373,7 +373,7 @@ static int ipv6_nfattr_to_tuple(struct nfattr *tb[], } #endif -struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { +struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { .l3proto = PF_INET6, .name = "ipv6", .pkt_to_tuple = ipv6_pkt_to_tuple, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 0fca7e8abeb7..ab154fb90018 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -276,7 +276,7 @@ static struct ctl_table icmpv6_sysctl_table[] = { }; #endif /* CONFIG_SYSCTL */ -struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = +struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_ICMPV6, @@ -297,5 +297,3 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = .ctl_table = icmpv6_sysctl_table, #endif }; - -EXPORT_SYMBOL(nf_conntrack_l4proto_icmpv6); diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c index 0691642b7724..991c52c9a28b 100644 --- a/net/netfilter/nf_conntrack_l3proto_generic.c +++ b/net/netfilter/nf_conntrack_l3proto_generic.c @@ -69,7 +69,7 @@ static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, } -struct nf_conntrack_l3proto nf_conntrack_l3proto_generic = { +struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = { .l3proto = PF_UNSPEC, .name = "unknown", .pkt_to_tuple = generic_pkt_to_tuple, diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 6faf1bed7224..d8b501878d9f 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -98,7 +98,7 @@ static struct ctl_table generic_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ -struct nf_conntrack_l4proto nf_conntrack_l4proto_generic = +struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = { .l3proto = PF_UNSPEC, .l4proto = 0, diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 771c4c29936e..bdbead8a7a83 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -261,7 +261,7 @@ static void gre_destroy(struct nf_conn *ct) } /* protocol helper struct */ -static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = { +static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { .l3proto = AF_INET, .l4proto = IPPROTO_GRE, .name = "gre", diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index debfe61378a1..04192acc7c40 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -601,7 +601,7 @@ static struct ctl_table sctp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif -struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = { +static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_SCTP, .name = "sctp", @@ -622,7 +622,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = { #endif }; -struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = { +static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_SCTP, .name = "sctp", diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 1c8206e6560a..87ad3ccf8aff 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1372,7 +1372,7 @@ static struct ctl_table tcp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ -struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 = +struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_TCP, @@ -1401,7 +1401,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 = }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4); -struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 = +struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_TCP, diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 3620ecc095fd..13d94a025723 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -191,7 +191,7 @@ static struct ctl_table udp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ -struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 = +struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_UDP, @@ -218,7 +218,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 = }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4); -struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 = +struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_UDP, -- cgit v1.2.3 From 59eecdfb166f6846ae356ddc744abed5820ad965 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 20:48:44 -0700 Subject: [NETFILTER]: nf_conntrack: UDPLITE support Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/Kconfig | 10 ++ net/netfilter/Makefile | 1 + net/netfilter/nf_conntrack_proto_udplite.c | 266 +++++++++++++++++++++++++++++ 3 files changed, 277 insertions(+) create mode 100644 net/netfilter/nf_conntrack_proto_udplite.c (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 9415b9a5dba6..3ac39f1ec775 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -102,6 +102,16 @@ config NF_CT_PROTO_SCTP If you want to compile it as a module, say M here and read . If unsure, say `N'. +config NF_CT_PROTO_UDPLITE + tristate 'UDP-Lite protocol connection tracking support (EXPERIMENTAL)' + depends on EXPERIMENTAL && NF_CONNTRACK + help + With this option enabled, the layer 3 independent connection + tracking code will be able to do state tracking on UDP-Lite + connections. + + To compile it as a module, choose M here. If unsure, say N. + config NF_CONNTRACK_AMANDA tristate "Amanda backup protocol support" depends on NF_CONNTRACK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 3e4a16aeb04e..0c054bf27973 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o # SCTP protocol connection tracking obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o +obj-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o # netlink interface for nf_conntrack obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c new file mode 100644 index 000000000000..93e747b5396e --- /dev/null +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -0,0 +1,266 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * (C) 2007 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static unsigned int nf_ct_udplite_timeout __read_mostly = 30*HZ; +static unsigned int nf_ct_udplite_timeout_stream __read_mostly = 180*HZ; + +static int udplite_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct nf_conntrack_tuple *tuple) +{ + struct udphdr _hdr, *hp; + + hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + if (hp == NULL) + return 0; + + tuple->src.u.udp.port = hp->source; + tuple->dst.u.udp.port = hp->dest; + return 1; +} + +static int udplite_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) +{ + tuple->src.u.udp.port = orig->dst.u.udp.port; + tuple->dst.u.udp.port = orig->src.u.udp.port; + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static int udplite_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) +{ + return seq_printf(s, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.udp.port), + ntohs(tuple->dst.u.udp.port)); +} + +/* Print out the private part of the conntrack. */ +static int udplite_print_conntrack(struct seq_file *s, + const struct nf_conn *conntrack) +{ + return 0; +} + +/* Returns verdict for packet, and may modify conntracktype */ +static int udplite_packet(struct nf_conn *conntrack, + const struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + int pf, + unsigned int hooknum) +{ + /* If we've seen traffic both ways, this is some kind of UDP + stream. Extend timeout. */ + if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { + nf_ct_refresh_acct(conntrack, ctinfo, skb, + nf_ct_udplite_timeout_stream); + /* Also, more likely to be important, and not a probe */ + if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status)) + nf_conntrack_event_cache(IPCT_STATUS, skb); + } else + nf_ct_refresh_acct(conntrack, ctinfo, skb, + nf_ct_udplite_timeout); + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int udplite_new(struct nf_conn *conntrack, const struct sk_buff *skb, + unsigned int dataoff) +{ + return 1; +} + +static int udplite_error(struct sk_buff *skb, unsigned int dataoff, + enum ip_conntrack_info *ctinfo, + int pf, + unsigned int hooknum) +{ + unsigned int udplen = skb->len - dataoff; + struct udphdr _hdr, *hdr; + unsigned int cscov; + + /* Header is too small? */ + hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + if (hdr == NULL) { + if (LOG_INVALID(IPPROTO_UDPLITE)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udplite: short packet "); + return -NF_ACCEPT; + } + + cscov = ntohs(hdr->len); + if (cscov == 0) + cscov = udplen; + else if (cscov < sizeof(*hdr) || cscov > udplen) { + if (LOG_INVALID(IPPROTO_UDPLITE)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udplite: invalid checksum coverage "); + return -NF_ACCEPT; + } + + /* UDPLITE mandates checksums */ + if (!hdr->check) { + if (LOG_INVALID(IPPROTO_UDPLITE)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udplite: checksum missing "); + return -NF_ACCEPT; + } + + /* Checksum invalid? Ignore. */ + if (nf_conntrack_checksum && !skb_csum_unnecessary(skb) && + ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || + (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING))) { + if (pf == PF_INET) { + struct iphdr *iph = ip_hdr(skb); + + skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, + udplen, IPPROTO_UDPLITE, 0); + } else { + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + __wsum hsum = skb_checksum(skb, 0, dataoff, 0); + + skb->csum = ~csum_unfold( + csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, + udplen, IPPROTO_UDPLITE, + csum_sub(0, hsum))); + } + + skb->ip_summed = CHECKSUM_NONE; + if (__skb_checksum_complete_head(skb, dataoff + cscov)) { + if (LOG_INVALID(IPPROTO_UDPLITE)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udplite: bad UDPLite " + "checksum "); + return -NF_ACCEPT; + } + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + + return NF_ACCEPT; +} + +#ifdef CONFIG_SYSCTL +static unsigned int udplite_sysctl_table_users; +static struct ctl_table_header *udplite_sysctl_header; +static struct ctl_table udplite_sysctl_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nf_conntrack_udplite_timeout", + .data = &nf_ct_udplite_timeout, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nf_conntrack_udplite_timeout_stream", + .data = &nf_ct_udplite_timeout_stream, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = 0 + } +}; +#endif /* CONFIG_SYSCTL */ + +static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = +{ + .l3proto = PF_INET, + .l4proto = IPPROTO_UDPLITE, + .name = "udplite", + .pkt_to_tuple = udplite_pkt_to_tuple, + .invert_tuple = udplite_invert_tuple, + .print_tuple = udplite_print_tuple, + .print_conntrack = udplite_print_conntrack, + .packet = udplite_packet, + .new = udplite_new, + .error = udplite_error, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, +#endif +#ifdef CONFIG_SYSCTL + .ctl_table_users = &udplite_sysctl_table_users, + .ctl_table_header = &udplite_sysctl_header, + .ctl_table = udplite_sysctl_table, +#endif +}; + +static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = +{ + .l3proto = PF_INET6, + .l4proto = IPPROTO_UDPLITE, + .name = "udplite", + .pkt_to_tuple = udplite_pkt_to_tuple, + .invert_tuple = udplite_invert_tuple, + .print_tuple = udplite_print_tuple, + .print_conntrack = udplite_print_conntrack, + .packet = udplite_packet, + .new = udplite_new, + .error = udplite_error, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, +#endif +#ifdef CONFIG_SYSCTL + .ctl_table_users = &udplite_sysctl_table_users, + .ctl_table_header = &udplite_sysctl_header, + .ctl_table = udplite_sysctl_table, +#endif +}; + +static int __init nf_conntrack_proto_udplite_init(void) +{ + int err; + + err = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udplite4); + if (err < 0) + goto err1; + err = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udplite6); + if (err < 0) + goto err2; + return 0; +err2: + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udplite4); +err1: + return err; +} + +static void __exit nf_conntrack_proto_udplite_exit(void) +{ + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udplite6); + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udplite4); +} + +module_init(nf_conntrack_proto_udplite_init); +module_exit(nf_conntrack_proto_udplite_exit); + +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 0621ed2e4edbe2f6f83dafbf85eecefae7aaf2e8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 20:49:26 -0700 Subject: [NET_SCHED]: Revert "avoid transmit softirq on watchdog wakeup" optimization As noticed by Ranko Zivojnovic , calling qdisc_run from the timer handler can result in deadlock: > CPU#0 > > qdisc_watchdog() fires and gets dev->queue_lock > qdisc_run()...qdisc_restart()... > -> releases dev->queue_lock and enters dev_hard_start_xmit() > > CPU#1 > > tc del qdisc dev ... > qdisc_graft()...dev_graft_qdisc()...dev_deactivate()... > -> grabs dev->queue_lock ... > > qdisc_reset()...{cbq,hfsc,htb,netem,tbf}_reset()...qdisc_watchdog_cancel()... > -> hrtimer_cancel() - waiting for the qdisc_watchdog() to exit, while still > holding dev->queue_lock > > CPU#0 > > dev_hard_start_xmit() returns ... > -> wants to get dev->queue_lock(!) > > DEADLOCK! The entire optimization is a bit questionable IMO, it moves potentially large parts of NET_TX_SOFTIRQ work to TIMER_SOFTIRQ/HRTIMER_SOFTIRQ, which kind of defeats the separation of them. Signed-off-by: Patrick McHardy Acked-by: Ranko Zivojnovic Signed-off-by: David S. Miller --- net/sched/sch_api.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'net') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d92ea26982c5..4fd0beca9450 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -278,11 +278,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) wd->qdisc->flags &= ~TCQ_F_THROTTLED; smp_wmb(); - if (spin_trylock(&dev->queue_lock)) { - qdisc_run(dev); - spin_unlock(&dev->queue_lock); - } else - netif_schedule(dev); + netif_schedule(dev); return HRTIMER_NORESTART; } -- cgit v1.2.3 From 1b1ac759d7c6bba6e5f4731ef6ea720b6636e27c Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sat, 14 Jul 2007 20:51:44 -0700 Subject: [IPV4]: Cleanup call to __neigh_lookup() Back in the times of Linux 2.2, negative values for the creat parameter of __neigh_lookup() had a particular meaning, but no longer, so we should pass 1 instead. Signed-off-by: Jean Delvare Signed-off-by: David S. Miller --- net/ipv4/arp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index e00767e8ebd9..9ab9d534fbac 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -885,7 +885,7 @@ static int arp_process(struct sk_buff *skb) if (n == NULL && arp->ar_op == htons(ARPOP_REPLY) && inet_addr_type(sip) == RTN_UNICAST) - n = __neigh_lookup(&arp_tbl, &sip, dev, -1); + n = __neigh_lookup(&arp_tbl, &sip, dev, 1); } if (n) { -- cgit v1.2.3 From f13ec93fba60d339dc1663eb47b2fb801225d2d2 Mon Sep 17 00:00:00 2001 From: Dmitry Butskoy Date: Sat, 14 Jul 2007 23:53:08 -0700 Subject: [IPV6]: MSG_ERRQUEUE messages do not pass to connected raw sockets From: Dmitry Butskoy Taken from http://bugzilla.kernel.org/show_bug.cgi?id=8747 Problem Description: It is related to the possibility to obtain MSG_ERRQUEUE messages from the udp and raw sockets, both connected and unconnected. There is a little typo in net/ipv6/icmp.c code, which prevents such messages to be delivered to the errqueue of the correspond raw socket, when the socket is CONNECTED. The typo is due to swap of local/remote addresses. Consider __raw_v6_lookup() function from net/ipv6/raw.c. When a raw socket is looked up usual way, it is something like: sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); where "daddr" is a destination address of the incoming packet (IOW our local address), "saddr" is a source address of the incoming packet (the remote end). But when the raw socket is looked up for some icmp error report, in net/ipv6/icmp.c:icmpv6_notify() , daddr/saddr are obtained from the echoed fragment of the "bad" packet, i.e. "daddr" is the original destination address of that packet, "saddr" is our local address. Hence, for icmpv6_notify() must use "saddr, daddr" in its arguments, not "daddr, saddr" ... Steps to reproduce: Create some raw socket, connect it to an address, and cause some error situation: f.e. set ttl=1 where the remote address is more than 1 hop to reach. Set IPV6_RECVERR . Then send something and wait for the error (f.e. poll() with POLLERR|POLLIN). You should receive "time exceeded" icmp message (because of "ttl=1"), but the socket do not receive it. If you do not connect your raw socket, you will receive MSG_ERRQUEUE successfully. (The reason is that for unconnected socket there are no actual checks for local/remote addresses). Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 4765a29f98a8..6a6714d154ed 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -604,7 +604,7 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info) read_lock(&raw_v6_lock); if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) { - while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, + while ((sk = __raw_v6_lookup(sk, nexthdr, saddr, daddr, IP6CB(skb)->iif))) { rawv6_err(sk, skb, NULL, type, code, inner_offset, info); sk = sk_next(sk); -- cgit v1.2.3 From b0188d4dbe5f4285372dd033acf7c92a97006629 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 15 Jul 2007 00:01:25 -0700 Subject: [NET_SCHED]: sch_atm: Lindent Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_atm.c | 468 ++++++++++++++++++++++++++-------------------------- 1 file changed, 232 insertions(+), 236 deletions(-) (limited to 'net') diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 54b92d22796c..9b458c403fa0 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -2,7 +2,6 @@ /* Written 1998-2000 by Werner Almesberger, EPFL ICA */ - #include #include #include @@ -11,12 +10,11 @@ #include #include #include -#include /* for fput */ +#include /* for fput */ #include #include - -extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */ +extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */ #if 0 /* control */ #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) @@ -30,7 +28,6 @@ extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */ #define D2PRINTK(format,args...) #endif - /* * The ATM queuing discipline provides a framework for invoking classifiers * (aka "filters"), which in turn select classes of this queuing discipline. @@ -52,16 +49,15 @@ extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */ * - should lock the flow while there is data in the queue (?) */ - #define PRIV(sch) qdisc_priv(sch) #define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back)) - struct atm_flow_data { - struct Qdisc *q; /* FIFO, TBF, etc. */ + struct Qdisc *q; /* FIFO, TBF, etc. */ struct tcf_proto *filter_list; - struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */ - void (*old_pop)(struct atm_vcc *vcc,struct sk_buff *skb); /* chaining */ + struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */ + void (*old_pop)(struct atm_vcc *vcc, + struct sk_buff * skb); /* chaining */ struct atm_qdisc_data *parent; /* parent qdisc */ struct socket *sock; /* for closing */ u32 classid; /* x:y type ID */ @@ -82,76 +78,74 @@ struct atm_qdisc_data { struct tasklet_struct task; /* requeue tasklet */ }; - /* ------------------------- Class/flow operations ------------------------- */ - -static int find_flow(struct atm_qdisc_data *qdisc,struct atm_flow_data *flow) +static int find_flow(struct atm_qdisc_data *qdisc, struct atm_flow_data *flow) { struct atm_flow_data *walk; - DPRINTK("find_flow(qdisc %p,flow %p)\n",qdisc,flow); + DPRINTK("find_flow(qdisc %p,flow %p)\n", qdisc, flow); for (walk = qdisc->flows; walk; walk = walk->next) - if (walk == flow) return 1; + if (walk == flow) + return 1; DPRINTK("find_flow: not found\n"); return 0; } - -static __inline__ struct atm_flow_data *lookup_flow(struct Qdisc *sch, - u32 classid) +static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid) { struct atm_qdisc_data *p = PRIV(sch); struct atm_flow_data *flow; for (flow = p->flows; flow; flow = flow->next) - if (flow->classid == classid) break; + if (flow->classid == classid) + break; return flow; } - -static int atm_tc_graft(struct Qdisc *sch,unsigned long arg, - struct Qdisc *new,struct Qdisc **old) +static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, + struct Qdisc *new, struct Qdisc **old) { struct atm_qdisc_data *p = PRIV(sch); - struct atm_flow_data *flow = (struct atm_flow_data *) arg; - - DPRINTK("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n",sch, - p,flow,new,old); - if (!find_flow(p,flow)) return -EINVAL; - if (!new) new = &noop_qdisc; - *old = xchg(&flow->q,new); - if (*old) qdisc_reset(*old); + struct atm_flow_data *flow = (struct atm_flow_data *)arg; + + DPRINTK("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n", + sch, p, flow, new, old); + if (!find_flow(p, flow)) + return -EINVAL; + if (!new) + new = &noop_qdisc; + *old = xchg(&flow->q, new); + if (*old) + qdisc_reset(*old); return 0; } - -static struct Qdisc *atm_tc_leaf(struct Qdisc *sch,unsigned long cl) +static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl) { - struct atm_flow_data *flow = (struct atm_flow_data *) cl; + struct atm_flow_data *flow = (struct atm_flow_data *)cl; - DPRINTK("atm_tc_leaf(sch %p,flow %p)\n",sch,flow); + DPRINTK("atm_tc_leaf(sch %p,flow %p)\n", sch, flow); return flow ? flow->q : NULL; } - -static unsigned long atm_tc_get(struct Qdisc *sch,u32 classid) +static unsigned long atm_tc_get(struct Qdisc *sch, u32 classid) { - struct atm_qdisc_data *p __attribute__((unused)) = PRIV(sch); + struct atm_qdisc_data *p __maybe_unused = PRIV(sch); struct atm_flow_data *flow; - DPRINTK("atm_tc_get(sch %p,[qdisc %p],classid %x)\n",sch,p,classid); - flow = lookup_flow(sch,classid); - if (flow) flow->ref++; - DPRINTK("atm_tc_get: flow %p\n",flow); - return (unsigned long) flow; + DPRINTK("atm_tc_get(sch %p,[qdisc %p],classid %x)\n", sch, p, classid); + flow = lookup_flow(sch, classid); + if (flow) + flow->ref++; + DPRINTK("atm_tc_get: flow %p\n", flow); + return (unsigned long)flow; } - static unsigned long atm_tc_bind_filter(struct Qdisc *sch, - unsigned long parent, u32 classid) + unsigned long parent, u32 classid) { - return atm_tc_get(sch,classid); + return atm_tc_get(sch, classid); } /* @@ -159,72 +153,75 @@ static unsigned long atm_tc_bind_filter(struct Qdisc *sch, * requested (atm_tc_destroy, etc.). The assumption here is that we never drop * anything that still seems to be in use. */ - static void atm_tc_put(struct Qdisc *sch, unsigned long cl) { struct atm_qdisc_data *p = PRIV(sch); - struct atm_flow_data *flow = (struct atm_flow_data *) cl; + struct atm_flow_data *flow = (struct atm_flow_data *)cl; struct atm_flow_data **prev; - DPRINTK("atm_tc_put(sch %p,[qdisc %p],flow %p)\n",sch,p,flow); - if (--flow->ref) return; + DPRINTK("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); + if (--flow->ref) + return; DPRINTK("atm_tc_put: destroying\n"); for (prev = &p->flows; *prev; prev = &(*prev)->next) - if (*prev == flow) break; + if (*prev == flow) + break; if (!*prev) { - printk(KERN_CRIT "atm_tc_put: class %p not found\n",flow); + printk(KERN_CRIT "atm_tc_put: class %p not found\n", flow); return; } *prev = flow->next; - DPRINTK("atm_tc_put: qdisc %p\n",flow->q); + DPRINTK("atm_tc_put: qdisc %p\n", flow->q); qdisc_destroy(flow->q); tcf_destroy_chain(flow->filter_list); if (flow->sock) { DPRINTK("atm_tc_put: f_count %d\n", - file_count(flow->sock->file)); + file_count(flow->sock->file)); flow->vcc->pop = flow->old_pop; sockfd_put(flow->sock); } - if (flow->excess) atm_tc_put(sch,(unsigned long) flow->excess); - if (flow != &p->link) kfree(flow); + if (flow->excess) + atm_tc_put(sch, (unsigned long)flow->excess); + if (flow != &p->link) + kfree(flow); /* * If flow == &p->link, the qdisc no longer works at this point and * needs to be removed. (By the caller of atm_tc_put.) */ } - -static void sch_atm_pop(struct atm_vcc *vcc,struct sk_buff *skb) +static void sch_atm_pop(struct atm_vcc *vcc, struct sk_buff *skb) { struct atm_qdisc_data *p = VCC2FLOW(vcc)->parent; - D2PRINTK("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n",vcc,skb,p); - VCC2FLOW(vcc)->old_pop(vcc,skb); + D2PRINTK("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n", vcc, skb, p); + VCC2FLOW(vcc)->old_pop(vcc, skb); tasklet_schedule(&p->task); } static const u8 llc_oui_ip[] = { - 0xaa, /* DSAP: non-ISO */ - 0xaa, /* SSAP: non-ISO */ - 0x03, /* Ctrl: Unnumbered Information Command PDU */ - 0x00, /* OUI: EtherType */ + 0xaa, /* DSAP: non-ISO */ + 0xaa, /* SSAP: non-ISO */ + 0x03, /* Ctrl: Unnumbered Information Command PDU */ + 0x00, /* OUI: EtherType */ 0x00, 0x00, - 0x08, 0x00 }; /* Ethertype IP (0800) */ + 0x08, 0x00 +}; /* Ethertype IP (0800) */ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, - struct rtattr **tca, unsigned long *arg) + struct rtattr **tca, unsigned long *arg) { struct atm_qdisc_data *p = PRIV(sch); - struct atm_flow_data *flow = (struct atm_flow_data *) *arg; + struct atm_flow_data *flow = (struct atm_flow_data *)*arg; struct atm_flow_data *excess = NULL; - struct rtattr *opt = tca[TCA_OPTIONS-1]; + struct rtattr *opt = tca[TCA_OPTIONS - 1]; struct rtattr *tb[TCA_ATM_MAX]; struct socket *sock; - int fd,error,hdr_len; + int fd, error, hdr_len; void *hdr; DPRINTK("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x," - "flow %p,opt %p)\n",sch,p,classid,parent,flow,opt); + "flow %p,opt %p)\n", sch, p, classid, parent, flow, opt); /* * The concept of parents doesn't apply for this qdisc. */ @@ -237,33 +234,36 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, * class needs to be removed and a new one added. (This may be changed * later.) */ - if (flow) return -EBUSY; + if (flow) + return -EBUSY; if (opt == NULL || rtattr_parse_nested(tb, TCA_ATM_MAX, opt)) return -EINVAL; - if (!tb[TCA_ATM_FD-1] || RTA_PAYLOAD(tb[TCA_ATM_FD-1]) < sizeof(fd)) + if (!tb[TCA_ATM_FD - 1] || RTA_PAYLOAD(tb[TCA_ATM_FD - 1]) < sizeof(fd)) return -EINVAL; - fd = *(int *) RTA_DATA(tb[TCA_ATM_FD-1]); - DPRINTK("atm_tc_change: fd %d\n",fd); - if (tb[TCA_ATM_HDR-1]) { - hdr_len = RTA_PAYLOAD(tb[TCA_ATM_HDR-1]); - hdr = RTA_DATA(tb[TCA_ATM_HDR-1]); - } - else { + fd = *(int *)RTA_DATA(tb[TCA_ATM_FD - 1]); + DPRINTK("atm_tc_change: fd %d\n", fd); + if (tb[TCA_ATM_HDR - 1]) { + hdr_len = RTA_PAYLOAD(tb[TCA_ATM_HDR - 1]); + hdr = RTA_DATA(tb[TCA_ATM_HDR - 1]); + } else { hdr_len = RFC1483LLC_LEN; - hdr = NULL; /* default LLC/SNAP for IP */ + hdr = NULL; /* default LLC/SNAP for IP */ } - if (!tb[TCA_ATM_EXCESS-1]) excess = NULL; + if (!tb[TCA_ATM_EXCESS - 1]) + excess = NULL; else { - if (RTA_PAYLOAD(tb[TCA_ATM_EXCESS-1]) != sizeof(u32)) + if (RTA_PAYLOAD(tb[TCA_ATM_EXCESS - 1]) != sizeof(u32)) return -EINVAL; - excess = (struct atm_flow_data *) atm_tc_get(sch, - *(u32 *) RTA_DATA(tb[TCA_ATM_EXCESS-1])); - if (!excess) return -ENOENT; + excess = (struct atm_flow_data *) + atm_tc_get(sch, *(u32 *)RTA_DATA(tb[TCA_ATM_EXCESS - 1])); + if (!excess) + return -ENOENT; } DPRINTK("atm_tc_change: type %d, payload %d, hdr_len %d\n", - opt->rta_type,RTA_PAYLOAD(opt),hdr_len); - if (!(sock = sockfd_lookup(fd,&error))) return error; /* f_count++ */ - DPRINTK("atm_tc_change: f_count %d\n",file_count(sock->file)); + opt->rta_type, RTA_PAYLOAD(opt), hdr_len); + if (!(sock = sockfd_lookup(fd, &error))) + return error; /* f_count++ */ + DPRINTK("atm_tc_change: f_count %d\n", file_count(sock->file)); if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) { error = -EPROTOTYPE; goto err_out; @@ -276,37 +276,37 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, error = -EINVAL; goto err_out; } - if (find_flow(p,flow)) { + if (find_flow(p, flow)) { error = -EEXIST; goto err_out; } - } - else { + } else { int i; unsigned long cl; for (i = 1; i < 0x8000; i++) { - classid = TC_H_MAKE(sch->handle,0x8000 | i); - if (!(cl = atm_tc_get(sch,classid))) break; - atm_tc_put(sch,cl); + classid = TC_H_MAKE(sch->handle, 0x8000 | i); + if (!(cl = atm_tc_get(sch, classid))) + break; + atm_tc_put(sch, cl); } } - DPRINTK("atm_tc_change: new id %x\n",classid); - flow = kmalloc(sizeof(struct atm_flow_data)+hdr_len,GFP_KERNEL); - DPRINTK("atm_tc_change: flow %p\n",flow); + DPRINTK("atm_tc_change: new id %x\n", classid); + flow = kmalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL); + DPRINTK("atm_tc_change: flow %p\n", flow); if (!flow) { error = -ENOBUFS; goto err_out; } - memset(flow,0,sizeof(*flow)); + memset(flow, 0, sizeof(*flow)); flow->filter_list = NULL; - if (!(flow->q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops,classid))) + if (!(flow->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid))) flow->q = &noop_qdisc; - DPRINTK("atm_tc_change: qdisc %p\n",flow->q); + DPRINTK("atm_tc_change: qdisc %p\n", flow->q); flow->sock = sock; - flow->vcc = ATM_SD(sock); /* speedup */ + flow->vcc = ATM_SD(sock); /* speedup */ flow->vcc->user_back = flow; - DPRINTK("atm_tc_change: vcc %p\n",flow->vcc); + DPRINTK("atm_tc_change: vcc %p\n", flow->vcc); flow->old_pop = flow->vcc->pop; flow->parent = p; flow->vcc->pop = sch_atm_pop; @@ -317,50 +317,53 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, p->link.next = flow; flow->hdr_len = hdr_len; if (hdr) - memcpy(flow->hdr,hdr,hdr_len); + memcpy(flow->hdr, hdr, hdr_len); else - memcpy(flow->hdr,llc_oui_ip,sizeof(llc_oui_ip)); - *arg = (unsigned long) flow; + memcpy(flow->hdr, llc_oui_ip, sizeof(llc_oui_ip)); + *arg = (unsigned long)flow; return 0; err_out: - if (excess) atm_tc_put(sch,(unsigned long) excess); + if (excess) + atm_tc_put(sch, (unsigned long)excess); sockfd_put(sock); return error; } - -static int atm_tc_delete(struct Qdisc *sch,unsigned long arg) +static int atm_tc_delete(struct Qdisc *sch, unsigned long arg) { struct atm_qdisc_data *p = PRIV(sch); - struct atm_flow_data *flow = (struct atm_flow_data *) arg; + struct atm_flow_data *flow = (struct atm_flow_data *)arg; - DPRINTK("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n",sch,p,flow); - if (!find_flow(PRIV(sch),flow)) return -EINVAL; - if (flow->filter_list || flow == &p->link) return -EBUSY; + DPRINTK("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); + if (!find_flow(PRIV(sch), flow)) + return -EINVAL; + if (flow->filter_list || flow == &p->link) + return -EBUSY; /* * Reference count must be 2: one for "keepalive" (set at class * creation), and one for the reference held when calling delete. */ if (flow->ref < 2) { - printk(KERN_ERR "atm_tc_delete: flow->ref == %d\n",flow->ref); + printk(KERN_ERR "atm_tc_delete: flow->ref == %d\n", flow->ref); return -EINVAL; } - if (flow->ref > 2) return -EBUSY; /* catch references via excess, etc.*/ - atm_tc_put(sch,arg); + if (flow->ref > 2) + return -EBUSY; /* catch references via excess, etc. */ + atm_tc_put(sch, arg); return 0; } - -static void atm_tc_walk(struct Qdisc *sch,struct qdisc_walker *walker) +static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker) { struct atm_qdisc_data *p = PRIV(sch); struct atm_flow_data *flow; - DPRINTK("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n",sch,p,walker); - if (walker->stop) return; + DPRINTK("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker); + if (walker->stop) + return; for (flow = p->flows; flow; flow = flow->next) { if (walker->count >= walker->skip) - if (walker->fn(sch,(unsigned long) flow,walker) < 0) { + if (walker->fn(sch, (unsigned long)flow, walker) < 0) { walker->stop = 1; break; } @@ -368,73 +371,74 @@ static void atm_tc_walk(struct Qdisc *sch,struct qdisc_walker *walker) } } - -static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch,unsigned long cl) +static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl) { struct atm_qdisc_data *p = PRIV(sch); - struct atm_flow_data *flow = (struct atm_flow_data *) cl; + struct atm_flow_data *flow = (struct atm_flow_data *)cl; - DPRINTK("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n",sch,p,flow); + DPRINTK("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); return flow ? &flow->filter_list : &p->link.filter_list; } - /* --------------------------- Qdisc operations ---------------------------- */ - -static int atm_tc_enqueue(struct sk_buff *skb,struct Qdisc *sch) +static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct atm_qdisc_data *p = PRIV(sch); - struct atm_flow_data *flow = NULL ; /* @@@ */ + struct atm_flow_data *flow = NULL; /* @@@ */ struct tcf_result res; int result; int ret = NET_XMIT_POLICED; - D2PRINTK("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p); - result = TC_POLICE_OK; /* be nice to gcc */ + D2PRINTK("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); + result = TC_POLICE_OK; /* be nice to gcc */ if (TC_H_MAJ(skb->priority) != sch->handle || - !(flow = (struct atm_flow_data *) atm_tc_get(sch,skb->priority))) + !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) for (flow = p->flows; flow; flow = flow->next) if (flow->filter_list) { - result = tc_classify(skb,flow->filter_list, - &res); - if (result < 0) continue; - flow = (struct atm_flow_data *) res.class; - if (!flow) flow = lookup_flow(sch,res.classid); + result = tc_classify(skb, flow->filter_list, + &res); + if (result < 0) + continue; + flow = (struct atm_flow_data *)res.class; + if (!flow) + flow = lookup_flow(sch, res.classid); break; } - if (!flow) flow = &p->link; + if (!flow) + flow = &p->link; else { if (flow->vcc) ATM_SKB(skb)->atm_options = flow->vcc->atm_options; - /*@@@ looks good ... but it's not supposed to work :-)*/ + /*@@@ looks good ... but it's not supposed to work :-) */ #ifdef CONFIG_NET_CLS_POLICE switch (result) { - case TC_POLICE_SHOT: - kfree_skb(skb); - break; - case TC_POLICE_RECLASSIFY: - if (flow->excess) flow = flow->excess; - else { - ATM_SKB(skb)->atm_options |= - ATM_ATMOPT_CLP; - break; - } - /* fall through */ - case TC_POLICE_OK: - /* fall through */ - default: + case TC_POLICE_SHOT: + kfree_skb(skb); + break; + case TC_POLICE_RECLASSIFY: + if (flow->excess) + flow = flow->excess; + else { + ATM_SKB(skb)->atm_options |= ATM_ATMOPT_CLP; break; + } + /* fall through */ + case TC_POLICE_OK: + /* fall through */ + default: + break; } #endif } if ( #ifdef CONFIG_NET_CLS_POLICE - result == TC_POLICE_SHOT || + result == TC_POLICE_SHOT || #endif - (ret = flow->q->enqueue(skb,flow->q)) != 0) { + (ret = flow->q->enqueue(skb, flow->q)) != 0) { sch->qstats.drops++; - if (flow) flow->qstats.drops++; + if (flow) + flow->qstats.drops++; return ret; } sch->bstats.bytes += skb->len; @@ -458,7 +462,6 @@ static int atm_tc_enqueue(struct sk_buff *skb,struct Qdisc *sch) return NET_XMIT_BYPASS; } - /* * Dequeue packets and send them over ATM. Note that we quite deliberately * avoid checking net_device's flow control here, simply because sch_atm @@ -466,167 +469,163 @@ static int atm_tc_enqueue(struct sk_buff *skb,struct Qdisc *sch) * non-ATM interfaces. */ - static void sch_atm_dequeue(unsigned long data) { - struct Qdisc *sch = (struct Qdisc *) data; + struct Qdisc *sch = (struct Qdisc *)data; struct atm_qdisc_data *p = PRIV(sch); struct atm_flow_data *flow; struct sk_buff *skb; - D2PRINTK("sch_atm_dequeue(sch %p,[qdisc %p])\n",sch,p); + D2PRINTK("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p); for (flow = p->link.next; flow; flow = flow->next) /* * If traffic is properly shaped, this won't generate nasty * little bursts. Otherwise, it may ... (but that's okay) */ while ((skb = flow->q->dequeue(flow->q))) { - if (!atm_may_send(flow->vcc,skb->truesize)) { - (void) flow->q->ops->requeue(skb,flow->q); + if (!atm_may_send(flow->vcc, skb->truesize)) { + (void)flow->q->ops->requeue(skb, flow->q); break; } - D2PRINTK("atm_tc_dequeue: sending on class %p\n",flow); + D2PRINTK("atm_tc_dequeue: sending on class %p\n", flow); /* remove any LL header somebody else has attached */ skb_pull(skb, skb_network_offset(skb)); if (skb_headroom(skb) < flow->hdr_len) { struct sk_buff *new; - new = skb_realloc_headroom(skb,flow->hdr_len); + new = skb_realloc_headroom(skb, flow->hdr_len); dev_kfree_skb(skb); - if (!new) continue; + if (!new) + continue; skb = new; } D2PRINTK("sch_atm_dequeue: ip %p, data %p\n", skb_network_header(skb), skb->data); ATM_SKB(skb)->vcc = flow->vcc; - memcpy(skb_push(skb,flow->hdr_len),flow->hdr, - flow->hdr_len); + memcpy(skb_push(skb, flow->hdr_len), flow->hdr, + flow->hdr_len); atomic_add(skb->truesize, &sk_atm(flow->vcc)->sk_wmem_alloc); /* atm.atm_options are already set by atm_tc_enqueue */ - (void) flow->vcc->send(flow->vcc,skb); + flow->vcc->send(flow->vcc, skb); } } - static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch) { struct atm_qdisc_data *p = PRIV(sch); struct sk_buff *skb; - D2PRINTK("atm_tc_dequeue(sch %p,[qdisc %p])\n",sch,p); + D2PRINTK("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p); tasklet_schedule(&p->task); skb = p->link.q->dequeue(p->link.q); - if (skb) sch->q.qlen--; + if (skb) + sch->q.qlen--; return skb; } - -static int atm_tc_requeue(struct sk_buff *skb,struct Qdisc *sch) +static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch) { struct atm_qdisc_data *p = PRIV(sch); int ret; - D2PRINTK("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p); - ret = p->link.q->ops->requeue(skb,p->link.q); + D2PRINTK("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); + ret = p->link.q->ops->requeue(skb, p->link.q); if (!ret) { - sch->q.qlen++; - sch->qstats.requeues++; - } else { + sch->q.qlen++; + sch->qstats.requeues++; + } else { sch->qstats.drops++; p->link.qstats.drops++; } return ret; } - static unsigned int atm_tc_drop(struct Qdisc *sch) { struct atm_qdisc_data *p = PRIV(sch); struct atm_flow_data *flow; unsigned int len; - DPRINTK("atm_tc_drop(sch %p,[qdisc %p])\n",sch,p); + DPRINTK("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p); for (flow = p->flows; flow; flow = flow->next) if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q))) return len; return 0; } - -static int atm_tc_init(struct Qdisc *sch,struct rtattr *opt) +static int atm_tc_init(struct Qdisc *sch, struct rtattr *opt) { struct atm_qdisc_data *p = PRIV(sch); - DPRINTK("atm_tc_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt); + DPRINTK("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); p->flows = &p->link; - if(!(p->link.q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops, - sch->handle))) + if (!(p->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + sch->handle))) p->link.q = &noop_qdisc; - DPRINTK("atm_tc_init: link (%p) qdisc %p\n",&p->link,p->link.q); + DPRINTK("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); p->link.filter_list = NULL; p->link.vcc = NULL; p->link.sock = NULL; p->link.classid = sch->handle; p->link.ref = 1; p->link.next = NULL; - tasklet_init(&p->task,sch_atm_dequeue,(unsigned long) sch); + tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch); return 0; } - static void atm_tc_reset(struct Qdisc *sch) { struct atm_qdisc_data *p = PRIV(sch); struct atm_flow_data *flow; - DPRINTK("atm_tc_reset(sch %p,[qdisc %p])\n",sch,p); - for (flow = p->flows; flow; flow = flow->next) qdisc_reset(flow->q); + DPRINTK("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p); + for (flow = p->flows; flow; flow = flow->next) + qdisc_reset(flow->q); sch->q.qlen = 0; } - static void atm_tc_destroy(struct Qdisc *sch) { struct atm_qdisc_data *p = PRIV(sch); struct atm_flow_data *flow; - DPRINTK("atm_tc_destroy(sch %p,[qdisc %p])\n",sch,p); + DPRINTK("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); /* races ? */ while ((flow = p->flows)) { tcf_destroy_chain(flow->filter_list); flow->filter_list = NULL; if (flow->ref > 1) - printk(KERN_ERR "atm_destroy: %p->ref = %d\n",flow, - flow->ref); - atm_tc_put(sch,(unsigned long) flow); + printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow, + flow->ref); + atm_tc_put(sch, (unsigned long)flow); if (p->flows == flow) { printk(KERN_ERR "atm_destroy: putting flow %p didn't " - "kill it\n",flow); - p->flows = flow->next; /* brute force */ + "kill it\n", flow); + p->flows = flow->next; /* brute force */ break; } } tasklet_kill(&p->task); } - static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, - struct sk_buff *skb, struct tcmsg *tcm) + struct sk_buff *skb, struct tcmsg *tcm) { struct atm_qdisc_data *p = PRIV(sch); - struct atm_flow_data *flow = (struct atm_flow_data *) cl; + struct atm_flow_data *flow = (struct atm_flow_data *)cl; unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; DPRINTK("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n", - sch,p,flow,skb,tcm); - if (!find_flow(p,flow)) return -EINVAL; + sch, p, flow, skb, tcm); + if (!find_flow(p, flow)) + return -EINVAL; tcm->tcm_handle = flow->classid; tcm->tcm_info = flow->q->handle; - rta = (struct rtattr *) b; - RTA_PUT(skb,TCA_OPTIONS,0,NULL); - RTA_PUT(skb,TCA_ATM_HDR,flow->hdr_len,flow->hdr); + rta = (struct rtattr *)b; + RTA_PUT(skb, TCA_OPTIONS, 0, NULL); + RTA_PUT(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr); if (flow->vcc) { struct sockaddr_atmpvc pvc; int state; @@ -635,16 +634,16 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1; pvc.sap_addr.vpi = flow->vcc->vpi; pvc.sap_addr.vci = flow->vcc->vci; - RTA_PUT(skb,TCA_ATM_ADDR,sizeof(pvc),&pvc); + RTA_PUT(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc); state = ATM_VF2VS(flow->vcc->flags); - RTA_PUT(skb,TCA_ATM_STATE,sizeof(state),&state); + RTA_PUT(skb, TCA_ATM_STATE, sizeof(state), &state); } if (flow->excess) - RTA_PUT(skb,TCA_ATM_EXCESS,sizeof(u32),&flow->classid); + RTA_PUT(skb, TCA_ATM_EXCESS, sizeof(u32), &flow->classid); else { static u32 zero; - RTA_PUT(skb,TCA_ATM_EXCESS,sizeof(zero),&zero); + RTA_PUT(skb, TCA_ATM_EXCESS, sizeof(zero), &zero); } rta->rta_len = skb_tail_pointer(skb) - b; return skb->len; @@ -655,9 +654,9 @@ rtattr_failure: } static int atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg, - struct gnet_dump *d) + struct gnet_dump *d) { - struct atm_flow_data *flow = (struct atm_flow_data *) arg; + struct atm_flow_data *flow = (struct atm_flow_data *)arg; flow->qstats.qlen = flow->q->q.qlen; @@ -674,38 +673,35 @@ static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb) } static struct Qdisc_class_ops atm_class_ops = { - .graft = atm_tc_graft, - .leaf = atm_tc_leaf, - .get = atm_tc_get, - .put = atm_tc_put, - .change = atm_tc_change, - .delete = atm_tc_delete, - .walk = atm_tc_walk, - .tcf_chain = atm_tc_find_tcf, - .bind_tcf = atm_tc_bind_filter, - .unbind_tcf = atm_tc_put, - .dump = atm_tc_dump_class, - .dump_stats = atm_tc_dump_class_stats, + .graft = atm_tc_graft, + .leaf = atm_tc_leaf, + .get = atm_tc_get, + .put = atm_tc_put, + .change = atm_tc_change, + .delete = atm_tc_delete, + .walk = atm_tc_walk, + .tcf_chain = atm_tc_find_tcf, + .bind_tcf = atm_tc_bind_filter, + .unbind_tcf = atm_tc_put, + .dump = atm_tc_dump_class, + .dump_stats = atm_tc_dump_class_stats, }; static struct Qdisc_ops atm_qdisc_ops = { - .next = NULL, - .cl_ops = &atm_class_ops, - .id = "atm", - .priv_size = sizeof(struct atm_qdisc_data), - .enqueue = atm_tc_enqueue, - .dequeue = atm_tc_dequeue, - .requeue = atm_tc_requeue, - .drop = atm_tc_drop, - .init = atm_tc_init, - .reset = atm_tc_reset, - .destroy = atm_tc_destroy, - .change = NULL, - .dump = atm_tc_dump, - .owner = THIS_MODULE, + .cl_ops = &atm_class_ops, + .id = "atm", + .priv_size = sizeof(struct atm_qdisc_data), + .enqueue = atm_tc_enqueue, + .dequeue = atm_tc_dequeue, + .requeue = atm_tc_requeue, + .drop = atm_tc_drop, + .init = atm_tc_init, + .reset = atm_tc_reset, + .destroy = atm_tc_destroy, + .dump = atm_tc_dump, + .owner = THIS_MODULE, }; - static int __init atm_init(void) { return register_qdisc(&atm_qdisc_ops); -- cgit v1.2.3 From 9210080445b0c51a73b488750a26eb17177d8684 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 15 Jul 2007 00:01:49 -0700 Subject: [NET_SCHED]: sch_atm: act_api support Handle act_api classification results. The ATM scheduler behaves slightly different than other schedulers in that it only handles policer results for successful classifications, this behaviour is retained for the act_api case. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_atm.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 9b458c403fa0..ccee10dae66d 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -411,11 +411,21 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (flow->vcc) ATM_SKB(skb)->atm_options = flow->vcc->atm_options; /*@@@ looks good ... but it's not supposed to work :-) */ -#ifdef CONFIG_NET_CLS_POLICE +#ifdef CONFIG_NET_CLS_ACT + switch (result) { + case TC_ACT_QUEUED: + case TC_ACT_STOLEN: + kfree_skb(skb); + return NET_XMIT_SUCCESS; + case TC_ACT_SHOT: + kfree_skb(skb); + goto drop; + } +#elif defined(CONFIG_NET_CLS_POLICE) switch (result) { case TC_POLICE_SHOT: kfree_skb(skb); - break; + goto drop; case TC_POLICE_RECLASSIFY: if (flow->excess) flow = flow->excess; @@ -431,11 +441,8 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) } #endif } - if ( -#ifdef CONFIG_NET_CLS_POLICE - result == TC_POLICE_SHOT || -#endif - (ret = flow->q->enqueue(skb, flow->q)) != 0) { + if ((ret = flow->q->enqueue(skb, flow->q)) != 0) { +drop: __maybe_unused sch->qstats.drops++; if (flow) flow->qstats.drops++; -- cgit v1.2.3 From f6853e2df3de82c1dac8f62ddcf3a8dfa302419e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 15 Jul 2007 00:02:10 -0700 Subject: [NET_SCHED]: sch_dsmark: act_api support Handle act_api classification results. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_dsmark.c | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 4d2c233a8611..2d7e891e6b0d 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -237,25 +237,34 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch) D2PRINTK("result %d class 0x%04x\n", result, res.classid); switch (result) { -#ifdef CONFIG_NET_CLS_POLICE - case TC_POLICE_SHOT: - kfree_skb(skb); - sch->qstats.drops++; - return NET_XMIT_POLICED; +#ifdef CONFIG_NET_CLS_ACT + case TC_ACT_QUEUED: + case TC_ACT_STOLEN: + kfree_skb(skb); + return NET_XMIT_SUCCESS; + case TC_ACT_SHOT: + kfree_skb(skb); + sch->qstats.drops++; + return NET_XMIT_BYPASS; +#elif defined(CONFIG_NET_CLS_POLICE) + case TC_POLICE_SHOT: + kfree_skb(skb); + sch->qstats.drops++; + return NET_XMIT_POLICED; #if 0 - case TC_POLICE_RECLASSIFY: - /* FIXME: what to do here ??? */ + case TC_POLICE_RECLASSIFY: + /* FIXME: what to do here ??? */ #endif #endif - case TC_POLICE_OK: - skb->tc_index = TC_H_MIN(res.classid); - break; - case TC_POLICE_UNSPEC: - /* fall through */ - default: - if (p->default_index != NO_DEFAULT_INDEX) - skb->tc_index = p->default_index; - break; + case TC_POLICE_OK: + skb->tc_index = TC_H_MIN(res.classid); + break; + case TC_POLICE_UNSPEC: + /* fall through */ + default: + if (p->default_index != NO_DEFAULT_INDEX) + skb->tc_index = p->default_index; + break; } } -- cgit v1.2.3 From 73ca4918fbb98311421259d82ef4ab44feeace43 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 15 Jul 2007 00:02:31 -0700 Subject: [NET_SCHED]: act_api: qdisc internal reclassify support The behaviour of NET_CLS_POLICE for TC_POLICE_RECLASSIFY was to return it to the qdisc, which could handle it internally or ignore it. With NET_CLS_ACT however, tc_classify starts over at the first classifier and never returns it to the qdisc. This makes it impossible to support qdisc-internal reclassification, which in turn makes it impossible to remove the old NET_CLS_POLICE code without breaking compatibility since we have two qdiscs (CBQ and ATM) that support this. This patch adds a tc_classify_compat function that handles reclassification the old way and changes CBQ and ATM to use it. This again is of course not fully backwards compatible with the previous NET_CLS_ACT behaviour. Unfortunately there is no way to fully maintain compatibility *and* support qdisc internal reclassification with NET_CLS_ACT, but this seems like the better choice over keeping the two incompatible options around forever. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 4 ++- include/net/sch_generic.h | 2 +- net/sched/sch_api.c | 67 +++++++++++++++++++++++++++-------------------- net/sched/sch_atm.c | 11 ++++++-- net/sched/sch_cbq.c | 39 ++++++++++++++------------- net/sched/sch_tbf.c | 2 +- 6 files changed, 73 insertions(+), 52 deletions(-) (limited to 'net') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 5754d53d9efc..9e22526e80e7 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -89,8 +89,10 @@ static inline void qdisc_run(struct net_device *dev) __qdisc_run(dev); } +extern int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp, + struct tcf_result *res); extern int tc_classify(struct sk_buff *skb, struct tcf_proto *tp, - struct tcf_result *res); + struct tcf_result *res); /* Calculate maximal size of packet seen by hard_start_xmit routine of this device. diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 1b8e35197ebe..0153cd9d1b8d 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -290,7 +290,7 @@ static inline int qdisc_reshape_fail(struct sk_buff *skb, struct Qdisc *sch) { sch->qstats.drops++; -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch)) goto drop; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 4fd0beca9450..13c09bc32aa3 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1145,47 +1145,57 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) to this qdisc, (optionally) tests for protocol and asks specific classifiers. */ +int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp, + struct tcf_result *res) +{ + __be16 protocol = skb->protocol; + int err = 0; + + for (; tp; tp = tp->next) { + if ((tp->protocol == protocol || + tp->protocol == htons(ETH_P_ALL)) && + (err = tp->classify(skb, tp, res)) >= 0) { +#ifdef CONFIG_NET_CLS_ACT + if (err != TC_ACT_RECLASSIFY && skb->tc_verd) + skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0); +#endif + return err; + } + } + return -1; +} +EXPORT_SYMBOL(tc_classify_compat); + int tc_classify(struct sk_buff *skb, struct tcf_proto *tp, - struct tcf_result *res) + struct tcf_result *res) { int err = 0; - __be16 protocol = skb->protocol; + __be16 protocol; #ifdef CONFIG_NET_CLS_ACT struct tcf_proto *otp = tp; reclassify: #endif protocol = skb->protocol; - for ( ; tp; tp = tp->next) { - if ((tp->protocol == protocol || - tp->protocol == htons(ETH_P_ALL)) && - (err = tp->classify(skb, tp, res)) >= 0) { + err = tc_classify_compat(skb, tp, res); #ifdef CONFIG_NET_CLS_ACT - if ( TC_ACT_RECLASSIFY == err) { - __u32 verd = (__u32) G_TC_VERD(skb->tc_verd); - tp = otp; - - if (MAX_REC_LOOP < verd++) { - printk("rule prio %d protocol %02x reclassify is buggy packet dropped\n", - tp->prio&0xffff, ntohs(tp->protocol)); - return TC_ACT_SHOT; - } - skb->tc_verd = SET_TC_VERD(skb->tc_verd,verd); - goto reclassify; - } else { - if (skb->tc_verd) - skb->tc_verd = SET_TC_VERD(skb->tc_verd,0); - return err; - } -#else - - return err; -#endif + if (err == TC_ACT_RECLASSIFY) { + u32 verd = G_TC_VERD(skb->tc_verd); + tp = otp; + + if (verd++ >= MAX_REC_LOOP) { + printk("rule prio %u protocol %02x reclassify loop, " + "packet dropped\n", + tp->prio&0xffff, ntohs(tp->protocol)); + return TC_ACT_SHOT; } - + skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd); + goto reclassify; } - return -1; +#endif + return err; } +EXPORT_SYMBOL(tc_classify); void tcf_destroy(struct tcf_proto *tp) { @@ -1252,4 +1262,3 @@ EXPORT_SYMBOL(qdisc_get_rtab); EXPORT_SYMBOL(qdisc_put_rtab); EXPORT_SYMBOL(register_qdisc); EXPORT_SYMBOL(unregister_qdisc); -EXPORT_SYMBOL(tc_classify); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index ccee10dae66d..37ae6d1deb14 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -396,8 +396,9 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) for (flow = p->flows; flow; flow = flow->next) if (flow->filter_list) { - result = tc_classify(skb, flow->filter_list, - &res); + result = tc_classify_compat(skb, + flow->filter_list, + &res); if (result < 0) continue; flow = (struct atm_flow_data *)res.class; @@ -420,6 +421,12 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) case TC_ACT_SHOT: kfree_skb(skb); goto drop; + case TC_POLICE_RECLASSIFY: + if (flow->excess) + flow = flow->excess; + else + ATM_SKB(skb)->atm_options |= ATM_ATMOPT_CLP; + break; } #elif defined(CONFIG_NET_CLS_POLICE) switch (result) { diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index b184c3545145..77381f1c6541 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -82,7 +82,7 @@ struct cbq_class unsigned char priority2; /* priority to be used after overlimit */ unsigned char ewma_log; /* time constant for idle time calculation */ unsigned char ovl_strategy; -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) unsigned char police; #endif @@ -154,7 +154,7 @@ struct cbq_sched_data struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes with backlog */ -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) struct cbq_class *rx_class; #endif struct cbq_class *tx_class; @@ -196,7 +196,7 @@ cbq_class_lookup(struct cbq_sched_data *q, u32 classid) return NULL; } -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) static struct cbq_class * cbq_reclassify(struct sk_buff *skb, struct cbq_class *this) @@ -247,7 +247,8 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) /* * Step 2+n. Apply classifier. */ - if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0) + if (!head->filter_list || + (result = tc_classify_compat(skb, head->filter_list, &res)) < 0) goto fallback; if ((cl = (void*)res.class) == NULL) { @@ -267,6 +268,8 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) *qerr = NET_XMIT_SUCCESS; case TC_ACT_SHOT: return NULL; + case TC_ACT_RECLASSIFY: + return cbq_reclassify(skb, cl); } #elif defined(CONFIG_NET_CLS_POLICE) switch (result) { @@ -389,7 +392,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) int ret; struct cbq_class *cl = cbq_classify(skb, sch, &ret); -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) q->rx_class = cl; #endif if (cl == NULL) { @@ -399,7 +402,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) cl->q->__parent = sch; #endif if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) { @@ -434,7 +437,7 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch) cbq_mark_toplevel(q, cl); -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) q->rx_class = cl; cl->q->__parent = sch; #endif @@ -670,7 +673,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) } -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) { @@ -1364,7 +1367,7 @@ static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl) return 0; } -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p) { cl->police = p->police; @@ -1532,7 +1535,7 @@ rtattr_failure: return -1; } -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl) { unsigned char *b = skb_tail_pointer(skb); @@ -1558,7 +1561,7 @@ static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl) cbq_dump_rate(skb, cl) < 0 || cbq_dump_wrr(skb, cl) < 0 || cbq_dump_ovl(skb, cl) < 0 || -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) cbq_dump_police(skb, cl) < 0 || #endif cbq_dump_fopt(skb, cl) < 0) @@ -1653,7 +1656,7 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, cl->classid)) == NULL) return -ENOBUFS; } else { -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) if (cl->police == TC_POLICE_RECLASSIFY) new->reshape_fail = cbq_reshape_fail; #endif @@ -1718,7 +1721,7 @@ cbq_destroy(struct Qdisc* sch) struct cbq_class *cl; unsigned h; -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) q->rx_class = NULL; #endif /* @@ -1747,7 +1750,7 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg) struct cbq_class *cl = (struct cbq_class*)arg; if (--cl->refcnt == 0) { -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) struct cbq_sched_data *q = qdisc_priv(sch); spin_lock_bh(&sch->dev->queue_lock); @@ -1795,7 +1798,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt)) return -EINVAL; -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) if (tb[TCA_CBQ_POLICE-1] && RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police)) return -EINVAL; @@ -1838,7 +1841,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t if (tb[TCA_CBQ_OVL_STRATEGY-1]) cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1])); -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) if (tb[TCA_CBQ_POLICE-1]) cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1])); #endif @@ -1931,7 +1934,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t cl->overlimit = cbq_ovl_classic; if (tb[TCA_CBQ_OVL_STRATEGY-1]) cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1])); -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) if (tb[TCA_CBQ_POLICE-1]) cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1])); #endif @@ -1975,7 +1978,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) q->tx_class = NULL; q->tx_borrowed = NULL; } -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) if (q->rx_class == cl) q->rx_class = NULL; #endif diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 22e431dace54..b8b3345cede8 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -125,7 +125,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) if (skb->len > q->max_size) { sch->qstats.drops++; -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch)) #endif kfree_skb(skb); -- cgit v1.2.3 From c3bc7cff8fddb6ff9715be8bfc3d911378c4d69d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 15 Jul 2007 00:03:05 -0700 Subject: [NET_SCHED]: Kill CONFIG_NET_CLS_POLICE The NET_CLS_ACT option is now a full replacement for NET_CLS_POLICE, remove the old code. The config option will be kept around to select the equivalent NET_CLS_ACT options for a short time to allow easier upgrades. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/act_api.h | 30 ------ include/net/pkt_cls.h | 8 -- include/net/sch_generic.h | 2 +- net/sched/Kconfig | 8 +- net/sched/Makefile | 1 - net/sched/act_police.c | 246 +++------------------------------------------- net/sched/cls_api.c | 40 -------- net/sched/cls_u32.c | 3 - net/sched/sch_atm.c | 19 +--- net/sched/sch_cbq.c | 45 ++++----- net/sched/sch_dsmark.c | 13 +-- net/sched/sch_hfsc.c | 3 - net/sched/sch_htb.c | 3 - net/sched/sch_ingress.c | 19 ---- net/sched/sch_tbf.c | 2 +- 15 files changed, 38 insertions(+), 404 deletions(-) (limited to 'net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 2f0273feabd3..68b4eaf7719d 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -121,34 +121,4 @@ extern int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, in extern int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int); extern int tcf_action_copy_stats (struct sk_buff *,struct tc_action *, int); #endif /* CONFIG_NET_CLS_ACT */ - -extern int tcf_police(struct sk_buff *skb, struct tcf_police *p); -extern void tcf_police_destroy(struct tcf_police *p); -extern struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est); -extern int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p); -extern int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p); - -static inline int -tcf_police_release(struct tcf_police *p, int bind) -{ - int ret = 0; -#ifdef CONFIG_NET_CLS_ACT - if (p) { - if (bind) - p->tcf_bindcnt--; - - p->tcf_refcnt--; - if (p->tcf_refcnt <= 0 && !p->tcf_bindcnt) { - tcf_police_destroy(p); - ret = 1; - } - } -#else - if (p && --p->tcf_refcnt == 0) - tcf_police_destroy(p); - -#endif /* CONFIG_NET_CLS_ACT */ - return ret; -} - #endif diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 6c29920cbe29..7968b1d66369 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -65,8 +65,6 @@ struct tcf_exts { #ifdef CONFIG_NET_CLS_ACT struct tc_action *action; -#elif defined CONFIG_NET_CLS_POLICE - struct tcf_police *police; #endif }; @@ -91,8 +89,6 @@ tcf_exts_is_predicative(struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT return !!exts->action; -#elif defined CONFIG_NET_CLS_POLICE - return !!exts->police; #else return 0; #endif @@ -129,11 +125,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, #ifdef CONFIG_NET_CLS_ACT if (exts->action) return tcf_action_exec(skb, exts->action, res); -#elif defined CONFIG_NET_CLS_POLICE - if (exts->police) - return tcf_police(skb, exts->police); #endif - return 0; } diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 0153cd9d1b8d..8a67f24cbe02 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -290,7 +290,7 @@ static inline int qdisc_reshape_fail(struct sk_buff *skb, struct Qdisc *sch) { sch->qstats.drops++; -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch)) goto drop; diff --git a/net/sched/Kconfig b/net/sched/Kconfig index b4662888bdbd..d3f7c3f9407a 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -472,12 +472,12 @@ config NET_ACT_SIMP config NET_CLS_POLICE bool "Traffic Policing (obsolete)" - depends on NET_CLS_ACT!=y + select NET_CLS_ACT + select NET_ACT_POLICE ---help--- Say Y here if you want to do traffic policing, i.e. strict - bandwidth limiting. This option is obsoleted by the traffic - policer implemented as action, it stays here for compatibility - reasons. + bandwidth limiting. This option is obsolete and just selects + the option replacing it. It will be removed in the future. config NET_CLS_IND bool "Incoming device classification" diff --git a/net/sched/Makefile b/net/sched/Makefile index 020767a204d4..b67c36f65cf2 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -8,7 +8,6 @@ obj-$(CONFIG_NET_SCHED) += sch_api.o sch_blackhole.o obj-$(CONFIG_NET_CLS) += cls_api.o obj-$(CONFIG_NET_CLS_ACT) += act_api.o obj-$(CONFIG_NET_ACT_POLICE) += act_police.o -obj-$(CONFIG_NET_CLS_POLICE) += act_police.o obj-$(CONFIG_NET_ACT_GACT) += act_gact.o obj-$(CONFIG_NET_ACT_MIRRED) += act_mirred.o obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o diff --git a/net/sched/act_police.c b/net/sched/act_police.c index d20403890877..bf90e60f8411 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -50,7 +50,6 @@ struct tc_police_compat /* Each policer is serialized by its individual spinlock */ -#ifdef CONFIG_NET_CLS_ACT static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb, int type, struct tc_action *a) { @@ -96,9 +95,8 @@ rtattr_failure: nlmsg_trim(skb, r); goto done; } -#endif -void tcf_police_destroy(struct tcf_police *p) +static void tcf_police_destroy(struct tcf_police *p) { unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); struct tcf_common **p1p; @@ -121,7 +119,6 @@ void tcf_police_destroy(struct tcf_police *p) BUG_TRAP(0); } -#ifdef CONFIG_NET_CLS_ACT static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, struct tc_action *a, int ovr, int bind) { @@ -247,10 +244,19 @@ failure: static int tcf_act_police_cleanup(struct tc_action *a, int bind) { struct tcf_police *p = a->priv; + int ret = 0; - if (p != NULL) - return tcf_police_release(p, bind); - return 0; + if (p != NULL) { + if (bind) + p->tcf_bindcnt--; + + p->tcf_refcnt--; + if (p->tcf_refcnt <= 0 && !p->tcf_bindcnt) { + tcf_police_destroy(p); + ret = 1; + } + } + return ret; } static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, @@ -372,229 +378,3 @@ police_cleanup_module(void) module_init(police_init_module); module_exit(police_cleanup_module); - -#else /* CONFIG_NET_CLS_ACT */ - -static struct tcf_common *tcf_police_lookup(u32 index) -{ - struct tcf_hashinfo *hinfo = &police_hash_info; - struct tcf_common *p; - - read_lock(hinfo->lock); - for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p; - p = p->tcfc_next) { - if (p->tcfc_index == index) - break; - } - read_unlock(hinfo->lock); - - return p; -} - -static u32 tcf_police_new_index(void) -{ - u32 *idx_gen = &police_idx_gen; - u32 val = *idx_gen; - - do { - if (++val == 0) - val = 1; - } while (tcf_police_lookup(val)); - - return (*idx_gen = val); -} - -struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) -{ - unsigned int h; - struct tcf_police *police; - struct rtattr *tb[TCA_POLICE_MAX]; - struct tc_police *parm; - int size; - - if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) - return NULL; - - if (tb[TCA_POLICE_TBF-1] == NULL) - return NULL; - size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]); - if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat)) - return NULL; - - parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); - - if (parm->index) { - struct tcf_common *pc; - - pc = tcf_police_lookup(parm->index); - if (pc) { - police = to_police(pc); - police->tcf_refcnt++; - return police; - } - } - police = kzalloc(sizeof(*police), GFP_KERNEL); - if (unlikely(!police)) - return NULL; - - police->tcf_refcnt = 1; - spin_lock_init(&police->tcf_lock); - if (parm->rate.rate) { - police->tcfp_R_tab = - qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); - if (police->tcfp_R_tab == NULL) - goto failure; - if (parm->peakrate.rate) { - police->tcfp_P_tab = - qdisc_get_rtab(&parm->peakrate, - tb[TCA_POLICE_PEAKRATE-1]); - if (police->tcfp_P_tab == NULL) - goto failure; - } - } - if (tb[TCA_POLICE_RESULT-1]) { - if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) - goto failure; - police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); - } - if (tb[TCA_POLICE_AVRATE-1]) { - if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32)) - goto failure; - police->tcfp_ewma_rate = - *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); - } - police->tcfp_toks = police->tcfp_burst = parm->burst; - police->tcfp_mtu = parm->mtu; - if (police->tcfp_mtu == 0) { - police->tcfp_mtu = ~0; - if (police->tcfp_R_tab) - police->tcfp_mtu = 255<tcfp_R_tab->rate.cell_log; - } - if (police->tcfp_P_tab) - police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); - police->tcfp_t_c = psched_get_time(); - police->tcf_index = parm->index ? parm->index : - tcf_police_new_index(); - police->tcf_action = parm->action; - if (est) - gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est, - &police->tcf_lock, est); - h = tcf_hash(police->tcf_index, POL_TAB_MASK); - write_lock_bh(&police_lock); - police->tcf_next = tcf_police_ht[h]; - tcf_police_ht[h] = &police->common; - write_unlock_bh(&police_lock); - return police; - -failure: - if (police->tcfp_R_tab) - qdisc_put_rtab(police->tcfp_R_tab); - kfree(police); - return NULL; -} - -int tcf_police(struct sk_buff *skb, struct tcf_police *police) -{ - psched_time_t now; - long toks; - long ptoks = 0; - - spin_lock(&police->tcf_lock); - - police->tcf_bstats.bytes += skb->len; - police->tcf_bstats.packets++; - - if (police->tcfp_ewma_rate && - police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { - police->tcf_qstats.overlimits++; - spin_unlock(&police->tcf_lock); - return police->tcf_action; - } - if (skb->len <= police->tcfp_mtu) { - if (police->tcfp_R_tab == NULL) { - spin_unlock(&police->tcf_lock); - return police->tcfp_result; - } - - now = psched_get_time(); - toks = psched_tdiff_bounded(now, police->tcfp_t_c, - police->tcfp_burst); - if (police->tcfp_P_tab) { - ptoks = toks + police->tcfp_ptoks; - if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) - ptoks = (long)L2T_P(police, police->tcfp_mtu); - ptoks -= L2T_P(police, skb->len); - } - toks += police->tcfp_toks; - if (toks > (long)police->tcfp_burst) - toks = police->tcfp_burst; - toks -= L2T(police, skb->len); - if ((toks|ptoks) >= 0) { - police->tcfp_t_c = now; - police->tcfp_toks = toks; - police->tcfp_ptoks = ptoks; - spin_unlock(&police->tcf_lock); - return police->tcfp_result; - } - } - - police->tcf_qstats.overlimits++; - spin_unlock(&police->tcf_lock); - return police->tcf_action; -} -EXPORT_SYMBOL(tcf_police); - -int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tc_police opt; - - opt.index = police->tcf_index; - opt.action = police->tcf_action; - opt.mtu = police->tcfp_mtu; - opt.burst = police->tcfp_burst; - if (police->tcfp_R_tab) - opt.rate = police->tcfp_R_tab->rate; - else - memset(&opt.rate, 0, sizeof(opt.rate)); - if (police->tcfp_P_tab) - opt.peakrate = police->tcfp_P_tab->rate; - else - memset(&opt.peakrate, 0, sizeof(opt.peakrate)); - RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); - if (police->tcfp_result) - RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), - &police->tcfp_result); - if (police->tcfp_ewma_rate) - RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); - return skb->len; - -rtattr_failure: - nlmsg_trim(skb, b); - return -1; -} - -int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police) -{ - struct gnet_dump d; - - if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, &police->tcf_lock, - &d) < 0) - goto errout; - - if (gnet_stats_copy_basic(&d, &police->tcf_bstats) < 0 || - gnet_stats_copy_rate_est(&d, &police->tcf_rate_est) < 0 || - gnet_stats_copy_queue(&d, &police->tcf_qstats) < 0) - goto errout; - - if (gnet_stats_finish_copy(&d) < 0) - goto errout; - - return 0; - -errout: - return -1; -} - -#endif /* CONFIG_NET_CLS_ACT */ diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 36b72aab1bde..5f0fbca7393f 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -458,11 +458,6 @@ tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts) tcf_action_destroy(exts->action, TCA_ACT_UNBIND); exts->action = NULL; } -#elif defined CONFIG_NET_CLS_POLICE - if (exts->police) { - tcf_police_release(exts->police, TCA_ACT_UNBIND); - exts->police = NULL; - } #endif } @@ -496,17 +491,6 @@ tcf_exts_validate(struct tcf_proto *tp, struct rtattr **tb, exts->action = act; } } -#elif defined CONFIG_NET_CLS_POLICE - if (map->police && tb[map->police-1]) { - struct tcf_police *p; - - p = tcf_police_locate(tb[map->police-1], rate_tlv); - if (p == NULL) - return -EINVAL; - - exts->police = p; - } else if (map->action && tb[map->action-1]) - return -EOPNOTSUPP; #else if ((map->action && tb[map->action-1]) || (map->police && tb[map->police-1])) @@ -529,15 +513,6 @@ tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, if (act) tcf_action_destroy(act, TCA_ACT_UNBIND); } -#elif defined CONFIG_NET_CLS_POLICE - if (src->police) { - struct tcf_police *p; - tcf_tree_lock(tp); - p = xchg(&dst->police, src->police); - tcf_tree_unlock(tp); - if (p) - tcf_police_release(p, TCA_ACT_UNBIND); - } #endif } @@ -566,17 +541,6 @@ tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts, p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta; } } -#elif defined CONFIG_NET_CLS_POLICE - if (map->police && exts->police) { - struct rtattr *p_rta = (struct rtattr *)skb_tail_pointer(skb); - - RTA_PUT(skb, map->police, 0, NULL); - - if (tcf_police_dump(skb, exts->police) < 0) - goto rtattr_failure; - - p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta; - } #endif return 0; rtattr_failure: __attribute__ ((unused)) @@ -591,10 +555,6 @@ tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts, if (exts->action) if (tcf_action_copy_stats(skb, exts->action, 1) < 0) goto rtattr_failure; -#elif defined CONFIG_NET_CLS_POLICE - if (exts->police) - if (tcf_police_dump_stats(skb, exts->police) < 0) - goto rtattr_failure; #endif return 0; rtattr_failure: __attribute__ ((unused)) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 77961e2314dc..8dbe36912ecb 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -782,9 +782,6 @@ static int __init init_u32(void) #ifdef CONFIG_CLS_U32_PERF printk(" Performance counters on\n"); #endif -#ifdef CONFIG_NET_CLS_POLICE - printk(" OLD policer on \n"); -#endif #ifdef CONFIG_NET_CLS_IND printk(" input device check on \n"); #endif diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 37ae6d1deb14..417ec8fb7f1a 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -428,26 +428,9 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) ATM_SKB(skb)->atm_options |= ATM_ATMOPT_CLP; break; } -#elif defined(CONFIG_NET_CLS_POLICE) - switch (result) { - case TC_POLICE_SHOT: - kfree_skb(skb); - goto drop; - case TC_POLICE_RECLASSIFY: - if (flow->excess) - flow = flow->excess; - else { - ATM_SKB(skb)->atm_options |= ATM_ATMOPT_CLP; - break; - } - /* fall through */ - case TC_POLICE_OK: - /* fall through */ - default: - break; - } #endif } + if ((ret = flow->q->enqueue(skb, flow->q)) != 0) { drop: __maybe_unused sch->qstats.drops++; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 77381f1c6541..e38c2839b25c 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -82,7 +82,7 @@ struct cbq_class unsigned char priority2; /* priority to be used after overlimit */ unsigned char ewma_log; /* time constant for idle time calculation */ unsigned char ovl_strategy; -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT unsigned char police; #endif @@ -154,7 +154,7 @@ struct cbq_sched_data struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes with backlog */ -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT struct cbq_class *rx_class; #endif struct cbq_class *tx_class; @@ -196,7 +196,7 @@ cbq_class_lookup(struct cbq_sched_data *q, u32 classid) return NULL; } -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT static struct cbq_class * cbq_reclassify(struct sk_buff *skb, struct cbq_class *this) @@ -271,15 +271,6 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_RECLASSIFY: return cbq_reclassify(skb, cl); } -#elif defined(CONFIG_NET_CLS_POLICE) - switch (result) { - case TC_POLICE_RECLASSIFY: - return cbq_reclassify(skb, cl); - case TC_POLICE_SHOT: - return NULL; - default: - break; - } #endif if (cl->level == 0) return cl; @@ -392,7 +383,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) int ret; struct cbq_class *cl = cbq_classify(skb, sch, &ret); -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT q->rx_class = cl; #endif if (cl == NULL) { @@ -402,7 +393,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT cl->q->__parent = sch; #endif if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) { @@ -437,7 +428,7 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch) cbq_mark_toplevel(q, cl); -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT q->rx_class = cl; cl->q->__parent = sch; #endif @@ -672,9 +663,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) return HRTIMER_NORESTART; } - -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) - +#ifdef CONFIG_NET_CLS_ACT static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) { int len = skb->len; @@ -1367,7 +1356,7 @@ static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl) return 0; } -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p) { cl->police = p->police; @@ -1535,7 +1524,7 @@ rtattr_failure: return -1; } -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl) { unsigned char *b = skb_tail_pointer(skb); @@ -1561,7 +1550,7 @@ static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl) cbq_dump_rate(skb, cl) < 0 || cbq_dump_wrr(skb, cl) < 0 || cbq_dump_ovl(skb, cl) < 0 || -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT cbq_dump_police(skb, cl) < 0 || #endif cbq_dump_fopt(skb, cl) < 0) @@ -1656,7 +1645,7 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, cl->classid)) == NULL) return -ENOBUFS; } else { -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT if (cl->police == TC_POLICE_RECLASSIFY) new->reshape_fail = cbq_reshape_fail; #endif @@ -1721,7 +1710,7 @@ cbq_destroy(struct Qdisc* sch) struct cbq_class *cl; unsigned h; -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT q->rx_class = NULL; #endif /* @@ -1750,7 +1739,7 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg) struct cbq_class *cl = (struct cbq_class*)arg; if (--cl->refcnt == 0) { -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT struct cbq_sched_data *q = qdisc_priv(sch); spin_lock_bh(&sch->dev->queue_lock); @@ -1798,7 +1787,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt)) return -EINVAL; -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT if (tb[TCA_CBQ_POLICE-1] && RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police)) return -EINVAL; @@ -1841,7 +1830,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t if (tb[TCA_CBQ_OVL_STRATEGY-1]) cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1])); -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT if (tb[TCA_CBQ_POLICE-1]) cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1])); #endif @@ -1934,7 +1923,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t cl->overlimit = cbq_ovl_classic; if (tb[TCA_CBQ_OVL_STRATEGY-1]) cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1])); -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT if (tb[TCA_CBQ_POLICE-1]) cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1])); #endif @@ -1978,7 +1967,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) q->tx_class = NULL; q->tx_borrowed = NULL; } -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT if (q->rx_class == cl) q->rx_class = NULL; #endif diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 2d7e891e6b0d..60f89199e3da 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -246,21 +246,10 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch) kfree_skb(skb); sch->qstats.drops++; return NET_XMIT_BYPASS; -#elif defined(CONFIG_NET_CLS_POLICE) - case TC_POLICE_SHOT: - kfree_skb(skb); - sch->qstats.drops++; - return NET_XMIT_POLICED; -#if 0 - case TC_POLICE_RECLASSIFY: - /* FIXME: what to do here ??? */ -#endif #endif - case TC_POLICE_OK: + case TC_ACT_OK: skb->tc_index = TC_H_MIN(res.classid); break; - case TC_POLICE_UNSPEC: - /* fall through */ default: if (p->default_index != NO_DEFAULT_INDEX) skb->tc_index = p->default_index; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 874452c41a01..55e7e4530f43 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1174,9 +1174,6 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_SHOT: return NULL; } -#elif defined(CONFIG_NET_CLS_POLICE) - if (result == TC_POLICE_SHOT) - return NULL; #endif if ((cl = (struct hfsc_class *)res.class) == NULL) { if ((cl = hfsc_find_class(res.classid, sch)) == NULL) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index b417a95df322..246a2f9765f1 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -249,9 +249,6 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_SHOT: return NULL; } -#elif defined(CONFIG_NET_CLS_POLICE) - if (result == TC_POLICE_SHOT) - return HTB_DIRECT; #endif if ((cl = (void *)res.class) == NULL) { if (res.classid == sch->handle) diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index cd0aab6a2a7c..51f16b0af198 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -164,30 +164,11 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch) result = TC_ACT_OK; break; } -/* backward compat */ -#else -#ifdef CONFIG_NET_CLS_POLICE - switch (result) { - case TC_POLICE_SHOT: - result = NF_DROP; - sch->qstats.drops++; - break; - case TC_POLICE_RECLASSIFY: /* DSCP remarking here ? */ - case TC_POLICE_OK: - case TC_POLICE_UNSPEC: - default: - sch->bstats.packets++; - sch->bstats.bytes += skb->len; - result = NF_ACCEPT; - break; - } - #else D2PRINTK("Overriding result to ACCEPT\n"); result = NF_ACCEPT; sch->bstats.packets++; sch->bstats.bytes += skb->len; -#endif #endif return result; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index b8b3345cede8..8c2639af4c6a 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -125,7 +125,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) if (skb->len > q->max_size) { sch->qstats.drops++; -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch)) #endif kfree_skb(skb); -- cgit v1.2.3 From 063ed369c97f8de4cce23bf93bebd7ffacb542ff Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Sun, 15 Jul 2007 00:16:35 -0700 Subject: [IPV6]: Call inet6addr_chain notifiers on link down Currently if the link is brought down via ip link or ifconfig down, the inet6addr_chain notifiers are not called even though all the addresses are removed from the interface. This caused SCTP to add duplicate addresses to it's list. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 24424c3b7dc0..06012920912a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2475,6 +2475,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) write_unlock_bh(&idev->lock); __ipv6_ifa_notify(RTM_DELADDR, ifa); + atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); in6_ifa_put(ifa); write_lock_bh(&idev->lock); -- cgit v1.2.3 From 0a9f2a467d8dacaf7e97469dba99ed2d07287d80 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sun, 15 Jul 2007 00:19:29 -0700 Subject: [TCP]: Verify the presence of RETRANS bit when leaving FRTO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For yet unknown reason, something cleared SACKED_RETRANS bit underneath FRTO. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 69f9f1ef3ef6..4e5884ac8f29 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1398,7 +1398,9 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) * waiting for the first ACK and did not get it)... */ if ((tp->frto_counter == 1) && !(flag&FLAG_DATA_ACKED)) { - tp->retrans_out += tcp_skb_pcount(skb); + /* For some reason this R-bit might get cleared? */ + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) + tp->retrans_out += tcp_skb_pcount(skb); /* ...enter this if branch just for the first segment */ flag |= FLAG_DATA_ACKED; } else { -- cgit v1.2.3 From ececfdee1cc287123149c801af201e41c7c3cc84 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 15 Jul 2007 21:01:12 +0100 Subject: fallout from constified seq_operations Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- net/atm/proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/atm/proc.c b/net/atm/proc.c index 88154da62cd3..99fc1fe950ee 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -110,7 +110,7 @@ static inline void *vcc_walk(struct vcc_state *state, loff_t l) } static int __vcc_seq_open(struct inode *inode, struct file *file, - int family, struct seq_operations *ops) + int family, const struct seq_operations *ops) { struct vcc_state *state; struct seq_file *seq; -- cgit v1.2.3 From 09561f44c75bc462ae86590b9c089d01c4e94a74 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 15 Jul 2007 23:37:18 -0700 Subject: authgss build fix Recent breakage.. net/sunrpc/auth_gss/auth_gss.c:1002: warning: implicit declaration of function 'lock_kernel' net/sunrpc/auth_gss/auth_gss.c:1004: warning: implicit declaration of function 'unlock_kernel' Cc: Trond Myklebust Cc: "J. Bruce Fields" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/sunrpc/auth_gss/auth_gss.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index baf4096d52d4..abfda33bac64 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 4a19542e5f694cd408a32c3d9dc593ba9366e2d7 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sun, 15 Jul 2007 23:40:34 -0700 Subject: O_CLOEXEC for SCM_RIGHTS Part two in the O_CLOEXEC saga: adding support for file descriptors received through Unix domain sockets. The patch is once again pretty minimal, it introduces a new flag for recvmsg and passes it just like the existing MSG_CMSG_COMPAT flag. I think this bit is not used otherwise but the networking people will know better. This new flag is not recognized by recvfrom and recv. These functions cannot be used for that purpose and the asymmetry this introduces is not worse than the already existing MSG_CMSG_COMPAT situations. The patch must be applied on the patch which introduced O_CLOEXEC. It has to remove static from the new get_unused_fd_flags function but since scm.c cannot live in a module the function still hasn't to be exported. Here's a test program to make sure the code works. It's so much longer than the actual patch... #include #include #include #include #include #include #include #include #ifndef O_CLOEXEC # define O_CLOEXEC 02000000 #endif #ifndef MSG_CMSG_CLOEXEC # define MSG_CMSG_CLOEXEC 0x40000000 #endif int main (int argc, char *argv[]) { if (argc > 1) { int fd = atol (argv[1]); printf ("child: fd = %d\n", fd); if (fcntl (fd, F_GETFD) == 0 || errno != EBADF) { puts ("file descriptor valid in child"); return 1; } return 0; } struct sockaddr_un sun; strcpy (sun.sun_path, "./testsocket"); sun.sun_family = AF_UNIX; char databuf[] = "hello"; struct iovec iov[1]; iov[0].iov_base = databuf; iov[0].iov_len = sizeof (databuf); union { struct cmsghdr hdr; char bytes[CMSG_SPACE (sizeof (int))]; } buf; struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 1, .msg_control = buf.bytes, .msg_controllen = sizeof (buf) }; struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg); cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; cmsg->cmsg_len = CMSG_LEN (sizeof (int)); msg.msg_controllen = cmsg->cmsg_len; pid_t child = fork (); if (child == -1) error (1, errno, "fork"); if (child == 0) { int sock = socket (PF_UNIX, SOCK_STREAM, 0); if (sock < 0) error (1, errno, "socket"); if (bind (sock, (struct sockaddr *) &sun, sizeof (sun)) < 0) error (1, errno, "bind"); if (listen (sock, SOMAXCONN) < 0) error (1, errno, "listen"); int conn = accept (sock, NULL, NULL); if (conn == -1) error (1, errno, "accept"); *(int *) CMSG_DATA (cmsg) = sock; if (sendmsg (conn, &msg, MSG_NOSIGNAL) < 0) error (1, errno, "sendmsg"); return 0; } /* For a test suite this should be more robust like a barrier in shared memory. */ sleep (1); int sock = socket (PF_UNIX, SOCK_STREAM, 0); if (sock < 0) error (1, errno, "socket"); if (connect (sock, (struct sockaddr *) &sun, sizeof (sun)) < 0) error (1, errno, "connect"); unlink (sun.sun_path); *(int *) CMSG_DATA (cmsg) = -1; if (recvmsg (sock, &msg, MSG_CMSG_CLOEXEC) < 0) error (1, errno, "recvmsg"); int fd = *(int *) CMSG_DATA (cmsg); if (fd == -1) error (1, 0, "no descriptor received"); char fdname[20]; snprintf (fdname, sizeof (fdname), "%d", fd); execl ("/proc/self/exe", argv[0], fdname, NULL); puts ("execl failed"); return 1; } [akpm@linux-foundation.org: Fix fastcall inconsistency noted by Michael Buesch] [akpm@linux-foundation.org: build fix] Signed-off-by: Ulrich Drepper Cc: Ingo Molnar Cc: Michael Buesch Cc: Michael Kerrisk Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/open.c | 2 +- include/linux/file.h | 1 + include/linux/socket.h | 3 +++ net/compat.c | 3 ++- net/core/scm.c | 3 ++- net/socket.c | 4 +--- 6 files changed, 10 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/fs/open.c b/fs/open.c index e6991c1b5874..be6a457f4226 100644 --- a/fs/open.c +++ b/fs/open.c @@ -855,7 +855,7 @@ EXPORT_SYMBOL(dentry_open); /* * Find an empty file descriptor entry, and mark it busy. */ -static int get_unused_fd_flags(int flags) +int get_unused_fd_flags(int flags) { struct files_struct * files = current->files; int fd, error; diff --git a/include/linux/file.h b/include/linux/file.h index a59001e9ea58..0114fbc78061 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -73,6 +73,7 @@ extern struct file * FASTCALL(fget_light(unsigned int fd, int *fput_needed)); extern void FASTCALL(set_close_on_exec(unsigned int fd, int flag)); extern void put_filp(struct file *); extern int get_unused_fd(void); +extern int get_unused_fd_flags(int flags); extern void FASTCALL(put_unused_fd(unsigned int fd)); struct kmem_cache; diff --git a/include/linux/socket.h b/include/linux/socket.h index fe195c97a89d..f852e1afd65a 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -253,6 +253,9 @@ struct ucred { #define MSG_EOF MSG_FIN +#define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exit for file + descriptor received through + SCM_RIGHTS */ #if defined(CONFIG_COMPAT) #define MSG_CMSG_COMPAT 0x80000000 /* This message needs 32 bit fixups */ #else diff --git a/net/compat.c b/net/compat.c index 9a0f5f2b90c8..d74d82155d78 100644 --- a/net/compat.c +++ b/net/compat.c @@ -276,7 +276,8 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm) err = security_file_receive(fp[i]); if (err) break; - err = get_unused_fd(); + err = get_unused_fd_flags(MSG_CMSG_CLOEXEC & kmsg->msg_flags + ? O_CLOEXEC : 0); if (err < 0) break; new_fd = err; diff --git a/net/core/scm.c b/net/core/scm.c index 292ad8d5ad76..44c4ec2c8769 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -228,7 +228,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) err = security_file_receive(fp[i]); if (err) break; - err = get_unused_fd(); + err = get_unused_fd_flags(MSG_CMSG_CLOEXEC & msg->msg_flags + ? O_CLOEXEC : 0); if (err < 0) break; new_fd = err; diff --git a/net/socket.c b/net/socket.c index f4530196a70a..b71114250046 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1939,9 +1939,7 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, total_len = err; cmsg_ptr = (unsigned long)msg_sys.msg_control; - msg_sys.msg_flags = 0; - if (MSG_CMSG_COMPAT & flags) - msg_sys.msg_flags = MSG_CMSG_COMPAT; + msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; -- cgit v1.2.3 From 522ed7767e800cff6c650ec64b0ee0677303119c Mon Sep 17 00:00:00 2001 From: Miloslav Trmac Date: Sun, 15 Jul 2007 23:40:56 -0700 Subject: Audit: add TTY input auditing Add TTY input auditing, used to audit system administrator's actions. This is required by various security standards such as DCID 6/3 and PCI to provide non-repudiation of administrator's actions and to allow a review of past actions if the administrator seems to overstep their duties or if the system becomes misconfigured for unknown reasons. These requirements do not make it necessary to audit TTY output as well. Compared to an user-space keylogger, this approach records TTY input using the audit subsystem, correlated with other audit events, and it is completely transparent to the user-space application (e.g. the console ioctls still work). TTY input auditing works on a higher level than auditing all system calls within the session, which would produce an overwhelming amount of mostly useless audit events. Add an "audit_tty" attribute, inherited across fork (). Data read from TTYs by process with the attribute is sent to the audit subsystem by the kernel. The audit netlink interface is extended to allow modifying the audit_tty attribute, and to allow sending explanatory audit events from user-space (for example, a shell might send an event containing the final command, after the interactive command-line editing and history expansion is performed, which might be difficult to decipher from the TTY input alone). Because the "audit_tty" attribute is inherited across fork (), it would be set e.g. for sshd restarted within an audited session. To prevent this, the audit_tty attribute is cleared when a process with no open TTY file descriptors (e.g. after daemon startup) opens a TTY. See https://www.redhat.com/archives/linux-audit/2007-June/msg00000.html for a more detailed rationale document for an older version of this patch. [akpm@linux-foundation.org: build fix] Signed-off-by: Miloslav Trmac Cc: Al Viro Cc: Alan Cox Cc: Paul Fulghum Cc: Casey Schaufler Cc: Steve Grubb Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/Makefile | 1 + drivers/char/n_tty.c | 20 ++- drivers/char/tty_audit.c | 345 +++++++++++++++++++++++++++++++++++++++++++ drivers/char/tty_io.c | 14 +- include/linux/audit.h | 11 ++ include/linux/sched.h | 4 + include/linux/tty.h | 33 +++++ kernel/audit.c | 96 +++++++++++- kernel/audit.h | 1 - kernel/auditsc.c | 3 - kernel/exit.c | 2 + kernel/fork.c | 3 + net/netlabel/netlabel_user.c | 4 - security/selinux/nlmsgtab.c | 2 + 14 files changed, 518 insertions(+), 21 deletions(-) create mode 100644 drivers/char/tty_audit.c (limited to 'net') diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 2f56ecc035aa..f2996a95eb07 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -15,6 +15,7 @@ obj-y += misc.o obj-$(CONFIG_VT) += vt_ioctl.o vc_screen.o consolemap.o \ consolemap_deftbl.o selection.o keyboard.o obj-$(CONFIG_HW_CONSOLE) += vt.o defkeymap.o +obj-$(CONFIG_AUDIT) += tty_audit.o obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o obj-$(CONFIG_ESPSERIAL) += esp.o obj-$(CONFIG_MVME147_SCC) += generic_serial.o vme_scc.o diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c index 371631f4bfb9..038056911934 100644 --- a/drivers/char/n_tty.c +++ b/drivers/char/n_tty.c @@ -45,6 +45,8 @@ #include #include #include +#include +#include #include #include @@ -78,6 +80,13 @@ static inline void free_buf(unsigned char *buf) free_page((unsigned long) buf); } +static inline int tty_put_user(struct tty_struct *tty, unsigned char x, + unsigned char __user *ptr) +{ + tty_audit_add_data(tty, &x, 1); + return put_user(x, ptr); +} + /** * n_tty_set__room - receive space * @tty: terminal @@ -1153,6 +1162,7 @@ static int copy_from_read_buf(struct tty_struct *tty, if (n) { retval = copy_to_user(*b, &tty->read_buf[tty->read_tail], n); n -= retval; + tty_audit_add_data(tty, &tty->read_buf[tty->read_tail], n); spin_lock_irqsave(&tty->read_lock, flags); tty->read_tail = (tty->read_tail + n) & (N_TTY_BUF_SIZE-1); tty->read_cnt -= n; @@ -1279,7 +1289,7 @@ do_it_again: break; cs = tty->link->ctrl_status; tty->link->ctrl_status = 0; - if (put_user(cs, b++)) { + if (tty_put_user(tty, cs, b++)) { retval = -EFAULT; b--; break; @@ -1321,7 +1331,7 @@ do_it_again: /* Deal with packet mode. */ if (tty->packet && b == buf) { - if (put_user(TIOCPKT_DATA, b++)) { + if (tty_put_user(tty, TIOCPKT_DATA, b++)) { retval = -EFAULT; b--; break; @@ -1352,15 +1362,17 @@ do_it_again: spin_unlock_irqrestore(&tty->read_lock, flags); if (!eol || (c != __DISABLED_CHAR)) { - if (put_user(c, b++)) { + if (tty_put_user(tty, c, b++)) { retval = -EFAULT; b--; break; } nr--; } - if (eol) + if (eol) { + tty_audit_push(tty); break; + } } if (retval) break; diff --git a/drivers/char/tty_audit.c b/drivers/char/tty_audit.c new file mode 100644 index 000000000000..d222012c1b0c --- /dev/null +++ b/drivers/char/tty_audit.c @@ -0,0 +1,345 @@ +/* + * Creating audit events from TTY input. + * + * Copyright (C) 2007 Red Hat, Inc. All rights reserved. This copyrighted + * material is made available to anyone wishing to use, modify, copy, or + * redistribute it subject to the terms and conditions of the GNU General + * Public License v.2. + * + * Authors: Miloslav Trmac + */ + +#include +#include +#include + +struct tty_audit_buf { + atomic_t count; + struct mutex mutex; /* Protects all data below */ + int major, minor; /* The TTY which the data is from */ + unsigned icanon:1; + size_t valid; + unsigned char *data; /* Allocated size N_TTY_BUF_SIZE */ +}; + +static struct tty_audit_buf *tty_audit_buf_alloc(int major, int minor, + int icanon) +{ + struct tty_audit_buf *buf; + + buf = kmalloc(sizeof (*buf), GFP_KERNEL); + if (!buf) + goto err; + if (PAGE_SIZE != N_TTY_BUF_SIZE) + buf->data = kmalloc(N_TTY_BUF_SIZE, GFP_KERNEL); + else + buf->data = (unsigned char *)__get_free_page(GFP_KERNEL); + if (!buf->data) + goto err_buf; + atomic_set(&buf->count, 1); + mutex_init(&buf->mutex); + buf->major = major; + buf->minor = minor; + buf->icanon = icanon; + buf->valid = 0; + return buf; + +err_buf: + kfree(buf); +err: + return NULL; +} + +static void tty_audit_buf_free(struct tty_audit_buf *buf) +{ + WARN_ON(buf->valid != 0); + if (PAGE_SIZE != N_TTY_BUF_SIZE) + kfree(buf->data); + else + free_page((unsigned long)buf->data); + kfree(buf); +} + +static void tty_audit_buf_put(struct tty_audit_buf *buf) +{ + if (atomic_dec_and_test(&buf->count)) + tty_audit_buf_free(buf); +} + +/** + * tty_audit_buf_push - Push buffered data out + * + * Generate an audit message from the contents of @buf, which is owned by + * @tsk with @loginuid. @buf->mutex must be locked. + */ +static void tty_audit_buf_push(struct task_struct *tsk, uid_t loginuid, + struct tty_audit_buf *buf) +{ + struct audit_buffer *ab; + + if (buf->valid == 0) + return; + if (audit_enabled == 0) + return; + ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_TTY); + if (ab) { + char name[sizeof(tsk->comm)]; + + audit_log_format(ab, "tty pid=%u uid=%u auid=%u major=%d " + "minor=%d comm=", tsk->pid, tsk->uid, + loginuid, buf->major, buf->minor); + get_task_comm(name, tsk); + audit_log_untrustedstring(ab, name); + audit_log_format(ab, " data="); + audit_log_n_untrustedstring(ab, buf->valid, buf->data); + audit_log_end(ab); + } + buf->valid = 0; +} + +/** + * tty_audit_buf_push_current - Push buffered data out + * + * Generate an audit message from the contents of @buf, which is owned by + * the current task. @buf->mutex must be locked. + */ +static void tty_audit_buf_push_current(struct tty_audit_buf *buf) +{ + tty_audit_buf_push(current, audit_get_loginuid(current->audit_context), + buf); +} + +/** + * tty_audit_exit - Handle a task exit + * + * Make sure all buffered data is written out and deallocate the buffer. + * Only needs to be called if current->signal->tty_audit_buf != %NULL. + */ +void tty_audit_exit(void) +{ + struct tty_audit_buf *buf; + + spin_lock_irq(¤t->sighand->siglock); + buf = current->signal->tty_audit_buf; + current->signal->tty_audit_buf = NULL; + spin_unlock_irq(¤t->sighand->siglock); + if (!buf) + return; + + mutex_lock(&buf->mutex); + tty_audit_buf_push_current(buf); + mutex_unlock(&buf->mutex); + + tty_audit_buf_put(buf); +} + +/** + * tty_audit_fork - Copy TTY audit state for a new task + * + * Set up TTY audit state in @sig from current. @sig needs no locking. + */ +void tty_audit_fork(struct signal_struct *sig) +{ + spin_lock_irq(¤t->sighand->siglock); + sig->audit_tty = current->signal->audit_tty; + spin_unlock_irq(¤t->sighand->siglock); + sig->tty_audit_buf = NULL; +} + +/** + * tty_audit_push_task - Flush task's pending audit data + */ +void tty_audit_push_task(struct task_struct *tsk, uid_t loginuid) +{ + struct tty_audit_buf *buf; + + spin_lock_irq(&tsk->sighand->siglock); + buf = tsk->signal->tty_audit_buf; + if (buf) + atomic_inc(&buf->count); + spin_unlock_irq(&tsk->sighand->siglock); + if (!buf) + return; + + mutex_lock(&buf->mutex); + tty_audit_buf_push(tsk, loginuid, buf); + mutex_unlock(&buf->mutex); + + tty_audit_buf_put(buf); +} + +/** + * tty_audit_buf_get - Get an audit buffer. + * + * Get an audit buffer for @tty, allocate it if necessary. Return %NULL + * if TTY auditing is disabled or out of memory. Otherwise, return a new + * reference to the buffer. + */ +static struct tty_audit_buf *tty_audit_buf_get(struct tty_struct *tty) +{ + struct tty_audit_buf *buf, *buf2; + + buf = NULL; + buf2 = NULL; + spin_lock_irq(¤t->sighand->siglock); + if (likely(!current->signal->audit_tty)) + goto out; + buf = current->signal->tty_audit_buf; + if (buf) { + atomic_inc(&buf->count); + goto out; + } + spin_unlock_irq(¤t->sighand->siglock); + + buf2 = tty_audit_buf_alloc(tty->driver->major, + tty->driver->minor_start + tty->index, + tty->icanon); + if (buf2 == NULL) { + audit_log_lost("out of memory in TTY auditing"); + return NULL; + } + + spin_lock_irq(¤t->sighand->siglock); + if (!current->signal->audit_tty) + goto out; + buf = current->signal->tty_audit_buf; + if (!buf) { + current->signal->tty_audit_buf = buf2; + buf = buf2; + buf2 = NULL; + } + atomic_inc(&buf->count); + /* Fall through */ + out: + spin_unlock_irq(¤t->sighand->siglock); + if (buf2) + tty_audit_buf_free(buf2); + return buf; +} + +/** + * tty_audit_add_data - Add data for TTY auditing. + * + * Audit @data of @size from @tty, if necessary. + */ +void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, + size_t size) +{ + struct tty_audit_buf *buf; + int major, minor; + + if (unlikely(size == 0)) + return; + + buf = tty_audit_buf_get(tty); + if (!buf) + return; + + mutex_lock(&buf->mutex); + major = tty->driver->major; + minor = tty->driver->minor_start + tty->index; + if (buf->major != major || buf->minor != minor + || buf->icanon != tty->icanon) { + tty_audit_buf_push_current(buf); + buf->major = major; + buf->minor = minor; + buf->icanon = tty->icanon; + } + do { + size_t run; + + run = N_TTY_BUF_SIZE - buf->valid; + if (run > size) + run = size; + memcpy(buf->data + buf->valid, data, run); + buf->valid += run; + data += run; + size -= run; + if (buf->valid == N_TTY_BUF_SIZE) + tty_audit_buf_push_current(buf); + } while (size != 0); + mutex_unlock(&buf->mutex); + tty_audit_buf_put(buf); +} + +/** + * tty_audit_push - Push buffered data out + * + * Make sure no audit data is pending for @tty on the current process. + */ +void tty_audit_push(struct tty_struct *tty) +{ + struct tty_audit_buf *buf; + + spin_lock_irq(¤t->sighand->siglock); + if (likely(!current->signal->audit_tty)) { + spin_unlock_irq(¤t->sighand->siglock); + return; + } + buf = current->signal->tty_audit_buf; + if (buf) + atomic_inc(&buf->count); + spin_unlock_irq(¤t->sighand->siglock); + + if (buf) { + int major, minor; + + major = tty->driver->major; + minor = tty->driver->minor_start + tty->index; + mutex_lock(&buf->mutex); + if (buf->major == major && buf->minor == minor) + tty_audit_buf_push_current(buf); + mutex_unlock(&buf->mutex); + tty_audit_buf_put(buf); + } +} + +/** + * tty_audit_opening - A TTY is being opened. + * + * As a special hack, tasks that close all their TTYs and open new ones + * are assumed to be system daemons (e.g. getty) and auditing is + * automatically disabled for them. + */ +void tty_audit_opening(void) +{ + int disable; + + disable = 1; + spin_lock_irq(¤t->sighand->siglock); + if (current->signal->audit_tty == 0) + disable = 0; + spin_unlock_irq(¤t->sighand->siglock); + if (!disable) + return; + + task_lock(current); + if (current->files) { + struct fdtable *fdt; + unsigned i; + + /* + * We don't take a ref to the file, so we must hold ->file_lock + * instead. + */ + spin_lock(¤t->files->file_lock); + fdt = files_fdtable(current->files); + for (i = 0; i < fdt->max_fds; i++) { + struct file *filp; + + filp = fcheck_files(current->files, i); + if (filp && is_tty(filp)) { + disable = 0; + break; + } + } + spin_unlock(¤t->files->file_lock); + } + task_unlock(current); + if (!disable) + return; + + spin_lock_irq(¤t->sighand->siglock); + current->signal->audit_tty = 0; + spin_unlock_irq(¤t->sighand->siglock); +} diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index fde69e589ca7..de37ebc3a4cf 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -1503,6 +1503,15 @@ int tty_hung_up_p(struct file * filp) EXPORT_SYMBOL(tty_hung_up_p); +/** + * is_tty - checker whether file is a TTY + */ +int is_tty(struct file *filp) +{ + return filp->f_op->read == tty_read + || filp->f_op->read == hung_up_tty_read; +} + static void session_clear_tty(struct pid *session) { struct task_struct *p; @@ -2673,6 +2682,7 @@ got_driver: __proc_set_tty(current, tty); spin_unlock_irq(¤t->sighand->siglock); mutex_unlock(&tty_mutex); + tty_audit_opening(); return 0; } @@ -2735,8 +2745,10 @@ static int ptmx_open(struct inode * inode, struct file * filp) check_tty_count(tty, "tty_open"); retval = ptm_driver->open(tty, filp); - if (!retval) + if (!retval) { + tty_audit_opening(); return 0; + } out1: release_dev(filp); return retval; diff --git a/include/linux/audit.h b/include/linux/audit.h index fccc6e50298a..8ca7ca0b47f0 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -63,9 +63,12 @@ #define AUDIT_ADD_RULE 1011 /* Add syscall filtering rule */ #define AUDIT_DEL_RULE 1012 /* Delete syscall filtering rule */ #define AUDIT_LIST_RULES 1013 /* List syscall filtering rules */ +#define AUDIT_TTY_GET 1014 /* Get TTY auditing status */ +#define AUDIT_TTY_SET 1015 /* Set TTY auditing status */ #define AUDIT_FIRST_USER_MSG 1100 /* Userspace messages mostly uninteresting to kernel */ #define AUDIT_USER_AVC 1107 /* We filter this differently */ +#define AUDIT_USER_TTY 1124 /* Non-ICANON TTY input meaning */ #define AUDIT_LAST_USER_MSG 1199 #define AUDIT_FIRST_USER_MSG2 2100 /* More user space messages */ #define AUDIT_LAST_USER_MSG2 2999 @@ -92,6 +95,7 @@ #define AUDIT_KERNEL_OTHER 1316 /* For use by 3rd party modules */ #define AUDIT_FD_PAIR 1317 /* audit record for pipe/socketpair */ #define AUDIT_OBJ_PID 1318 /* ptrace target */ +#define AUDIT_TTY 1319 /* Input on an administrative TTY */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ @@ -289,6 +293,10 @@ struct audit_status { __u32 backlog; /* messages waiting in queue */ }; +struct audit_tty_status { + __u32 enabled; /* 1 = enabled, 0 = disabled */ +}; + /* audit_rule_data supports filter rules with both integer and string * fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and * AUDIT_LIST_RULES requests. @@ -515,11 +523,13 @@ extern void audit_log_d_path(struct audit_buffer *ab, const char *prefix, struct dentry *dentry, struct vfsmount *vfsmnt); +extern void audit_log_lost(const char *message); /* Private API (for audit.c only) */ extern int audit_filter_user(struct netlink_skb_parms *cb, int type); extern int audit_filter_type(int type); extern int audit_receive_filter(int type, int pid, int uid, int seq, void *data, size_t datasz, uid_t loginuid, u32 sid); +extern int audit_enabled; #else #define audit_log(c,g,t,f,...) do { ; } while (0) #define audit_log_start(c,g,t) ({ NULL; }) @@ -530,6 +540,7 @@ extern int audit_receive_filter(int type, int pid, int uid, int seq, #define audit_log_untrustedstring(a,s) do { ; } while (0) #define audit_log_n_untrustedstring(a,n,s) do { ; } while (0) #define audit_log_d_path(b,p,d,v) do { ; } while (0) +#define audit_enabled 0 #endif #endif #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 3cffc1204663..b579624477f4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -529,6 +529,10 @@ struct signal_struct { #ifdef CONFIG_TASKSTATS struct taskstats *stats; #endif +#ifdef CONFIG_AUDIT + unsigned audit_tty; + struct tty_audit_buf *tty_audit_buf; +#endif }; /* Context switch must be unlocked if interrupts are to be enabled */ diff --git a/include/linux/tty.h b/include/linux/tty.h index deaba9ec5004..691a1748d9d2 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -178,6 +178,7 @@ struct tty_bufhead { #define L_IEXTEN(tty) _L_FLAG((tty),IEXTEN) struct device; +struct signal_struct; /* * Where all of the state associated with a tty is kept while the tty * is open. Since the termios state should be kept even if the tty @@ -310,6 +311,7 @@ extern void tty_hangup(struct tty_struct * tty); extern void tty_vhangup(struct tty_struct * tty); extern void tty_unhangup(struct file *filp); extern int tty_hung_up_p(struct file * filp); +extern int is_tty(struct file *filp); extern void do_SAK(struct tty_struct *tty); extern void __do_SAK(struct tty_struct *tty); extern void disassociate_ctty(int priv); @@ -347,6 +349,37 @@ extern int tty_write_lock(struct tty_struct *tty, int ndelay); /* n_tty.c */ extern struct tty_ldisc tty_ldisc_N_TTY; +/* tty_audit.c */ +#ifdef CONFIG_AUDIT +extern void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, + size_t size); +extern void tty_audit_exit(void); +extern void tty_audit_fork(struct signal_struct *sig); +extern void tty_audit_push(struct tty_struct *tty); +extern void tty_audit_push_task(struct task_struct *tsk, uid_t loginuid); +extern void tty_audit_opening(void); +#else +static inline void tty_audit_add_data(struct tty_struct *tty, + unsigned char *data, size_t size) +{ +} +static inline void tty_audit_exit(void) +{ +} +static inline void tty_audit_fork(struct signal_struct *sig) +{ +} +static inline void tty_audit_push(struct tty_struct *tty) +{ +} +static inline void tty_audit_push_task(struct task_struct *tsk, uid_t loginuid) +{ +} +static inline void tty_audit_opening(void) +{ +} +#endif + /* tty_ioctl.c */ extern int n_tty_ioctl(struct tty_struct * tty, struct file * file, unsigned int cmd, unsigned long arg); diff --git a/kernel/audit.c b/kernel/audit.c index d13276d41410..5ce8851facf7 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -58,6 +58,7 @@ #include #include #include +#include #include "audit.h" @@ -423,6 +424,31 @@ static int kauditd_thread(void *dummy) return 0; } +static int audit_prepare_user_tty(pid_t pid, uid_t loginuid) +{ + struct task_struct *tsk; + int err; + + read_lock(&tasklist_lock); + tsk = find_task_by_pid(pid); + err = -ESRCH; + if (!tsk) + goto out; + err = 0; + + spin_lock_irq(&tsk->sighand->siglock); + if (!tsk->signal->audit_tty) + err = -EPERM; + spin_unlock_irq(&tsk->sighand->siglock); + if (err) + goto out; + + tty_audit_push_task(tsk, loginuid); +out: + read_unlock(&tasklist_lock); + return err; +} + int audit_send_list(void *_dest) { struct audit_netlink_list *dest = _dest; @@ -511,6 +537,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) case AUDIT_DEL: case AUDIT_DEL_RULE: case AUDIT_SIGNAL_INFO: + case AUDIT_TTY_GET: + case AUDIT_TTY_SET: if (security_netlink_recv(skb, CAP_AUDIT_CONTROL)) err = -EPERM; break; @@ -622,6 +650,11 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) err = audit_filter_user(&NETLINK_CB(skb), msg_type); if (err == 1) { err = 0; + if (msg_type == AUDIT_USER_TTY) { + err = audit_prepare_user_tty(pid, loginuid); + if (err) + break; + } ab = audit_log_start(NULL, GFP_KERNEL, msg_type); if (ab) { audit_log_format(ab, @@ -638,8 +671,17 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) " subj=%s", ctx); kfree(ctx); } - audit_log_format(ab, " msg='%.1024s'", - (char *)data); + if (msg_type != AUDIT_USER_TTY) + audit_log_format(ab, " msg='%.1024s'", + (char *)data); + else { + int size; + + audit_log_format(ab, " msg="); + size = nlmsg_len(nlh); + audit_log_n_untrustedstring(ab, size, + data); + } audit_set_pid(ab, pid); audit_log_end(ab); } @@ -730,6 +772,45 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) 0, 0, sig_data, sizeof(*sig_data) + len); kfree(sig_data); break; + case AUDIT_TTY_GET: { + struct audit_tty_status s; + struct task_struct *tsk; + + read_lock(&tasklist_lock); + tsk = find_task_by_pid(pid); + if (!tsk) + err = -ESRCH; + else { + spin_lock_irq(&tsk->sighand->siglock); + s.enabled = tsk->signal->audit_tty != 0; + spin_unlock_irq(&tsk->sighand->siglock); + } + read_unlock(&tasklist_lock); + audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_TTY_GET, 0, 0, + &s, sizeof(s)); + break; + } + case AUDIT_TTY_SET: { + struct audit_tty_status *s; + struct task_struct *tsk; + + if (nlh->nlmsg_len < sizeof(struct audit_tty_status)) + return -EINVAL; + s = data; + if (s->enabled != 0 && s->enabled != 1) + return -EINVAL; + read_lock(&tasklist_lock); + tsk = find_task_by_pid(pid); + if (!tsk) + err = -ESRCH; + else { + spin_lock_irq(&tsk->sighand->siglock); + tsk->signal->audit_tty = s->enabled != 0; + spin_unlock_irq(&tsk->sighand->siglock); + } + read_unlock(&tasklist_lock); + break; + } default: err = -EINVAL; break; @@ -1185,7 +1266,7 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen, } /** - * audit_log_n_unstrustedstring - log a string that may contain random characters + * audit_log_n_untrustedstring - log a string that may contain random characters * @ab: audit_buffer * @len: lenth of string (not including trailing null) * @string: string to be logged @@ -1201,25 +1282,24 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen, const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len, const char *string) { - const unsigned char *p = string; + const unsigned char *p; - while (*p) { + for (p = string; p < (const unsigned char *)string + len && *p; p++) { if (*p == '"' || *p < 0x21 || *p > 0x7f) { audit_log_hex(ab, string, len); return string + len + 1; } - p++; } audit_log_n_string(ab, len, string); return p + 1; } /** - * audit_log_unstrustedstring - log a string that may contain random characters + * audit_log_untrustedstring - log a string that may contain random characters * @ab: audit_buffer * @string: string to be logged * - * Same as audit_log_n_unstrustedstring(), except that strlen is used to + * Same as audit_log_n_untrustedstring(), except that strlen is used to * determine string length. */ const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string) diff --git a/kernel/audit.h b/kernel/audit.h index 815d6f5c04ee..95877435c347 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -115,7 +115,6 @@ extern struct sk_buff * audit_make_reply(int pid, int seq, int type, extern void audit_send_reply(int pid, int seq, int type, int done, int multi, void *payload, int size); -extern void audit_log_lost(const char *message); extern void audit_panic(const char *message); struct audit_netlink_list { diff --git a/kernel/auditsc.c b/kernel/auditsc.c index e36481ed61b4..7ccc3da30a91 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -71,9 +71,6 @@ extern struct list_head audit_filter_list[]; -/* No syscall auditing will take place unless audit_enabled != 0. */ -extern int audit_enabled; - /* AUDIT_NAMES is the number of slots we reserve in the audit_context * for saving names from getname(). */ #define AUDIT_NAMES 20 diff --git a/kernel/exit.c b/kernel/exit.c index 64a5263c8c7b..57626692cd90 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -965,6 +965,8 @@ fastcall NORET_TYPE void do_exit(long code) if (unlikely(tsk->compat_robust_list)) compat_exit_robust_list(tsk); #endif + if (group_dead) + tty_audit_exit(); if (unlikely(tsk->audit_context)) audit_free(tsk); diff --git a/kernel/fork.c b/kernel/fork.c index 344d693fdc78..4015912aaac2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -897,6 +898,8 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts } acct_init_pacct(&sig->pacct); + tty_audit_fork(sig); + return 0; } diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index 42f12bd65964..89dcc485653b 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -46,10 +46,6 @@ #include "netlabel_cipso_v4.h" #include "netlabel_user.h" -/* do not do any auditing if audit_enabled == 0, see kernel/audit.c for - * details */ -extern int audit_enabled; - /* * NetLabel NETLINK Setup Functions */ diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index ccfe8755735e..eddc7b420109 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -110,6 +110,8 @@ static struct nlmsg_perm nlmsg_audit_perms[] = { AUDIT_DEL_RULE, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, { AUDIT_USER, NETLINK_AUDIT_SOCKET__NLMSG_RELAY }, { AUDIT_SIGNAL_INFO, NETLINK_AUDIT_SOCKET__NLMSG_READ }, + { AUDIT_TTY_GET, NETLINK_AUDIT_SOCKET__NLMSG_READ }, + { AUDIT_TTY_SET, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, }; -- cgit v1.2.3 From 2e27afb300b56d83bb03fbfa68852b9c1e2920c6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 16 Jul 2007 14:31:08 -0700 Subject: Revert "[NET]: Fix races in net_rx_action vs netpoll." This reverts commit 29578624e354f56143d92510fff33a8b2aaa2c03. Ingo Molnar reports complete breakage with his e1000 card (no networking, card reports transmit timeouts), and bisected it down to this commit. Let's figure out what went wrong, but not keep breaking machines until we do. Cc: Ingo Molnar Cc: Olaf Kirch Cc: David Miller Signed-off-by: Linus Torvalds --- include/linux/netdevice.h | 10 ---------- net/core/netpoll.c | 8 -------- 2 files changed, 18 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 322b5eae57dd..da7a13c97eb8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -262,8 +262,6 @@ enum netdev_state_t __LINK_STATE_LINKWATCH_PENDING, __LINK_STATE_DORMANT, __LINK_STATE_QDISC_RUNNING, - /* Set by the netpoll NAPI code */ - __LINK_STATE_POLL_LIST_FROZEN, }; @@ -1022,14 +1020,6 @@ static inline void netif_rx_complete(struct net_device *dev) { unsigned long flags; -#ifdef CONFIG_NETPOLL - /* Prevent race with netpoll - yes, this is a kludge. - * But at least it doesn't penalize the non-netpoll - * code path. */ - if (test_bit(__LINK_STATE_POLL_LIST_FROZEN, &dev->state)) - return; -#endif - local_irq_save(flags); __netif_rx_complete(dev); local_irq_restore(flags); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index d1264e9a50a8..de1b26aa5720 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -124,13 +124,6 @@ static void poll_napi(struct netpoll *np) if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) && npinfo->poll_owner != smp_processor_id() && spin_trylock(&npinfo->poll_lock)) { - /* When calling dev->poll from poll_napi, we may end up in - * netif_rx_complete. However, only the CPU to which the - * device was queued is allowed to remove it from poll_list. - * Setting POLL_LIST_FROZEN tells netif_rx_complete - * to leave the NAPI state alone. - */ - set_bit(__LINK_STATE_POLL_LIST_FROZEN, &np->dev->state); npinfo->rx_flags |= NETPOLL_RX_DROP; atomic_inc(&trapped); @@ -138,7 +131,6 @@ static void poll_napi(struct netpoll *np) atomic_dec(&trapped); npinfo->rx_flags &= ~NETPOLL_RX_DROP; - clear_bit(__LINK_STATE_POLL_LIST_FROZEN, &np->dev->state); spin_unlock(&npinfo->poll_lock); } } -- cgit v1.2.3 From 8e1f936b73150f5095448a0fee6d4f30a1f9001d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 17 Jul 2007 04:03:17 -0700 Subject: mm: clean up and kernelify shrinker registration I can never remember what the function to register to receive VM pressure is called. I have to trace down from __alloc_pages() to find it. It's called "set_shrinker()", and it needs Your Help. 1) Don't hide struct shrinker. It contains no magic. 2) Don't allocate "struct shrinker". It's not helpful. 3) Call them "register_shrinker" and "unregister_shrinker". 4) Call the function "shrink" not "shrinker". 5) Reduce the 17 lines of waffly comments to 13, but document it properly. Signed-off-by: Rusty Russell Cc: David Chinner Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dcache.c | 7 ++++++- fs/dquot.c | 7 ++++++- fs/inode.c | 7 ++++++- fs/mbcache.c | 9 ++++++--- fs/nfs/super.c | 10 ++++++---- fs/xfs/linux-2.6/xfs_buf.c | 14 ++++++-------- fs/xfs/quota/xfs_qm.c | 10 +++++++--- include/linux/mm.h | 38 +++++++++++++++++++++----------------- mm/vmscan.c | 42 +++++++++++------------------------------- net/sunrpc/auth.c | 11 ++++++----- 10 files changed, 81 insertions(+), 74 deletions(-) (limited to 'net') diff --git a/fs/dcache.c b/fs/dcache.c index 0e73aa0a0e8b..cb9d05056b54 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -883,6 +883,11 @@ static int shrink_dcache_memory(int nr, gfp_t gfp_mask) return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; } +static struct shrinker dcache_shrinker = { + .shrink = shrink_dcache_memory, + .seeks = DEFAULT_SEEKS, +}; + /** * d_alloc - allocate a dcache entry * @parent: parent of entry to allocate @@ -2115,7 +2120,7 @@ static void __init dcache_init(unsigned long mempages) dentry_cache = KMEM_CACHE(dentry, SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); - set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory); + register_shrinker(&dcache_shrinker); /* Hash may have been set up in dcache_init_early */ if (!hashdist) diff --git a/fs/dquot.c b/fs/dquot.c index 8819d281500c..7e273151f589 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -538,6 +538,11 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask) return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure; } +static struct shrinker dqcache_shrinker = { + .shrink = shrink_dqcache_memory, + .seeks = DEFAULT_SEEKS, +}; + /* * Put reference to dquot * NOTE: If you change this function please check whether dqput_blocks() works right... @@ -1870,7 +1875,7 @@ static int __init dquot_init(void) printk("Dquot-cache hash table entries: %ld (order %ld, %ld bytes)\n", nr_hash, order, (PAGE_SIZE << order)); - set_shrinker(DEFAULT_SEEKS, shrink_dqcache_memory); + register_shrinker(&dqcache_shrinker); return 0; } diff --git a/fs/inode.c b/fs/inode.c index 47b87b071de3..320e088d0b28 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -462,6 +462,11 @@ static int shrink_icache_memory(int nr, gfp_t gfp_mask) return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; } +static struct shrinker icache_shrinker = { + .shrink = shrink_icache_memory, + .seeks = DEFAULT_SEEKS, +}; + static void __wait_on_freeing_inode(struct inode *inode); /* * Called with the inode lock held. @@ -1385,7 +1390,7 @@ void __init inode_init(unsigned long mempages) SLAB_MEM_SPREAD), init_once, NULL); - set_shrinker(DEFAULT_SEEKS, shrink_icache_memory); + register_shrinker(&icache_shrinker); /* Hash may have been set up in inode_init_early */ if (!hashdist) diff --git a/fs/mbcache.c b/fs/mbcache.c index deeb9dc062d9..fbb1d02f8791 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -100,7 +100,6 @@ struct mb_cache { static LIST_HEAD(mb_cache_list); static LIST_HEAD(mb_cache_lru_list); static DEFINE_SPINLOCK(mb_cache_spinlock); -static struct shrinker *mb_shrinker; static inline int mb_cache_indexes(struct mb_cache *cache) @@ -118,6 +117,10 @@ mb_cache_indexes(struct mb_cache *cache) static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask); +static struct shrinker mb_cache_shrinker = { + .shrink = mb_cache_shrink_fn, + .seeks = DEFAULT_SEEKS, +}; static inline int __mb_cache_entry_is_hashed(struct mb_cache_entry *ce) @@ -662,13 +665,13 @@ mb_cache_entry_find_next(struct mb_cache_entry *prev, int index, static int __init init_mbcache(void) { - mb_shrinker = set_shrinker(DEFAULT_SEEKS, mb_cache_shrink_fn); + register_shrinker(&mb_cache_shrinker); return 0; } static void __exit exit_mbcache(void) { - remove_shrinker(mb_shrinker); + unregister_shrinker(&mb_cache_shrinker); } module_init(init_mbcache) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a2b1af89ca1a..adffe1615c51 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -300,7 +300,10 @@ static const struct super_operations nfs4_sops = { }; #endif -static struct shrinker *acl_shrinker; +static struct shrinker acl_shrinker = { + .shrink = nfs_access_cache_shrinker, + .seeks = DEFAULT_SEEKS, +}; /* * Register the NFS filesystems @@ -321,7 +324,7 @@ int __init register_nfs_fs(void) if (ret < 0) goto error_2; #endif - acl_shrinker = set_shrinker(DEFAULT_SEEKS, nfs_access_cache_shrinker); + register_shrinker(&acl_shrinker); return 0; #ifdef CONFIG_NFS_V4 @@ -339,8 +342,7 @@ error_0: */ void __exit unregister_nfs_fs(void) { - if (acl_shrinker != NULL) - remove_shrinker(acl_shrinker); + unregister_shrinker(&acl_shrinker); #ifdef CONFIG_NFS_V4 unregister_filesystem(&nfs4_fs_type); nfs_unregister_sysctl(); diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 2df63622354e..b0f0e58866de 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -35,10 +35,13 @@ #include static kmem_zone_t *xfs_buf_zone; -static struct shrinker *xfs_buf_shake; STATIC int xfsbufd(void *); STATIC int xfsbufd_wakeup(int, gfp_t); STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); +static struct shrinker xfs_buf_shake = { + .shrink = xfsbufd_wakeup, + .seeks = DEFAULT_SEEKS, +}; static struct workqueue_struct *xfslogd_workqueue; struct workqueue_struct *xfsdatad_workqueue; @@ -1832,14 +1835,9 @@ xfs_buf_init(void) if (!xfsdatad_workqueue) goto out_destroy_xfslogd_workqueue; - xfs_buf_shake = set_shrinker(DEFAULT_SEEKS, xfsbufd_wakeup); - if (!xfs_buf_shake) - goto out_destroy_xfsdatad_workqueue; - + register_shrinker(&xfs_buf_shake); return 0; - out_destroy_xfsdatad_workqueue: - destroy_workqueue(xfsdatad_workqueue); out_destroy_xfslogd_workqueue: destroy_workqueue(xfslogd_workqueue); out_free_buf_zone: @@ -1854,7 +1852,7 @@ xfs_buf_init(void) void xfs_buf_terminate(void) { - remove_shrinker(xfs_buf_shake); + unregister_shrinker(&xfs_buf_shake); destroy_workqueue(xfsdatad_workqueue); destroy_workqueue(xfslogd_workqueue); kmem_zone_destroy(xfs_buf_zone); diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 7def4c699343..2d274b23ade5 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -62,7 +62,6 @@ uint ndquot; kmem_zone_t *qm_dqzone; kmem_zone_t *qm_dqtrxzone; -static struct shrinker *xfs_qm_shaker; static cred_t xfs_zerocr; @@ -78,6 +77,11 @@ STATIC int xfs_qm_init_quotainos(xfs_mount_t *); STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); STATIC int xfs_qm_shake(int, gfp_t); +static struct shrinker xfs_qm_shaker = { + .shrink = xfs_qm_shake, + .seeks = DEFAULT_SEEKS, +}; + #ifdef DEBUG extern mutex_t qcheck_lock; #endif @@ -149,7 +153,7 @@ xfs_Gqm_init(void) } else xqm->qm_dqzone = qm_dqzone; - xfs_qm_shaker = set_shrinker(DEFAULT_SEEKS, xfs_qm_shake); + register_shrinker(&xfs_qm_shaker); /* * The t_dqinfo portion of transactions. @@ -181,7 +185,7 @@ xfs_qm_destroy( ASSERT(xqm != NULL); ASSERT(xqm->qm_nrefs == 0); - remove_shrinker(xfs_qm_shaker); + unregister_shrinker(&xfs_qm_shaker); hsize = xqm->qm_dqhashmask + 1; for (i = 0; i < hsize; i++) { xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); diff --git a/include/linux/mm.h b/include/linux/mm.h index 97d0cddfd223..4c482a3ee870 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -810,27 +810,31 @@ extern unsigned long do_mremap(unsigned long addr, unsigned long flags, unsigned long new_addr); /* - * Prototype to add a shrinker callback for ageable caches. - * - * These functions are passed a count `nr_to_scan' and a gfpmask. They should - * scan `nr_to_scan' objects, attempting to free them. + * A callback you can register to apply pressure to ageable caches. * - * The callback must return the number of objects which remain in the cache. + * 'shrink' is passed a count 'nr_to_scan' and a 'gfpmask'. It should + * look through the least-recently-used 'nr_to_scan' entries and + * attempt to free them up. It should return the number of objects + * which remain in the cache. If it returns -1, it means it cannot do + * any scanning at this time (eg. there is a risk of deadlock). * - * The callback will be passed nr_to_scan == 0 when the VM is querying the - * cache size, so a fastpath for that case is appropriate. - */ -typedef int (*shrinker_t)(int nr_to_scan, gfp_t gfp_mask); - -/* - * Add an aging callback. The int is the number of 'seeks' it takes - * to recreate one of the objects that these functions age. + * The 'gfpmask' refers to the allocation we are currently trying to + * fulfil. + * + * Note that 'shrink' will be passed nr_to_scan == 0 when the VM is + * querying the cache size, so a fastpath for that case is appropriate. */ +struct shrinker { + int (*shrink)(int nr_to_scan, gfp_t gfp_mask); + int seeks; /* seeks to recreate an obj */ -#define DEFAULT_SEEKS 2 -struct shrinker; -extern struct shrinker *set_shrinker(int, shrinker_t); -extern void remove_shrinker(struct shrinker *shrinker); + /* These are for internal use */ + struct list_head list; + long nr; /* objs pending delete */ +}; +#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ +extern void register_shrinker(struct shrinker *); +extern void unregister_shrinker(struct shrinker *); /* * Some shared mappigns will want the pages marked read-only diff --git a/mm/vmscan.c b/mm/vmscan.c index 1d9971d8924b..2225b7c9df85 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -70,17 +70,6 @@ struct scan_control { int order; }; -/* - * The list of shrinker callbacks used by to apply pressure to - * ageable caches. - */ -struct shrinker { - shrinker_t shrinker; - struct list_head list; - int seeks; /* seeks to recreate an obj */ - long nr; /* objs pending delete */ -}; - #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) #ifdef ARCH_HAS_PREFETCH @@ -123,34 +112,25 @@ static DECLARE_RWSEM(shrinker_rwsem); /* * Add a shrinker callback to be called from the vm */ -struct shrinker *set_shrinker(int seeks, shrinker_t theshrinker) +void register_shrinker(struct shrinker *shrinker) { - struct shrinker *shrinker; - - shrinker = kmalloc(sizeof(*shrinker), GFP_KERNEL); - if (shrinker) { - shrinker->shrinker = theshrinker; - shrinker->seeks = seeks; - shrinker->nr = 0; - down_write(&shrinker_rwsem); - list_add_tail(&shrinker->list, &shrinker_list); - up_write(&shrinker_rwsem); - } - return shrinker; + shrinker->nr = 0; + down_write(&shrinker_rwsem); + list_add_tail(&shrinker->list, &shrinker_list); + up_write(&shrinker_rwsem); } -EXPORT_SYMBOL(set_shrinker); +EXPORT_SYMBOL(register_shrinker); /* * Remove one */ -void remove_shrinker(struct shrinker *shrinker) +void unregister_shrinker(struct shrinker *shrinker) { down_write(&shrinker_rwsem); list_del(&shrinker->list); up_write(&shrinker_rwsem); - kfree(shrinker); } -EXPORT_SYMBOL(remove_shrinker); +EXPORT_SYMBOL(unregister_shrinker); #define SHRINK_BATCH 128 /* @@ -187,7 +167,7 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, list_for_each_entry(shrinker, &shrinker_list, list) { unsigned long long delta; unsigned long total_scan; - unsigned long max_pass = (*shrinker->shrinker)(0, gfp_mask); + unsigned long max_pass = (*shrinker->shrink)(0, gfp_mask); delta = (4 * scanned) / shrinker->seeks; delta *= max_pass; @@ -215,8 +195,8 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, int shrink_ret; int nr_before; - nr_before = (*shrinker->shrinker)(0, gfp_mask); - shrink_ret = (*shrinker->shrinker)(this_scan, gfp_mask); + nr_before = (*shrinker->shrink)(0, gfp_mask); + shrink_ret = (*shrinker->shrink)(this_scan, gfp_mask); if (shrink_ret == -1) break; if (shrink_ret < nr_before) diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index aa55d0a03e6f..29a8ecc60928 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -543,17 +543,18 @@ rpcauth_uptodatecred(struct rpc_task *task) test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0; } - -static struct shrinker *rpc_cred_shrinker; +static struct shrinker rpc_cred_shrinker = { + .shrink = rpcauth_cache_shrinker, + .seeks = DEFAULT_SEEKS, +}; void __init rpcauth_init_module(void) { rpc_init_authunix(); - rpc_cred_shrinker = set_shrinker(DEFAULT_SEEKS, rpcauth_cache_shrinker); + register_shrinker(&rpc_cred_shrinker); } void __exit rpcauth_remove_module(void) { - if (rpc_cred_shrinker != NULL) - remove_shrinker(rpc_cred_shrinker); + unregister_shrinker(&rpc_cred_shrinker); } -- cgit v1.2.3 From 831441862956fffa17b9801db37e6ea1650b0f69 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 17 Jul 2007 04:03:35 -0700 Subject: Freezer: make kernel threads nonfreezable by default Currently, the freezer treats all tasks as freezable, except for the kernel threads that explicitly set the PF_NOFREEZE flag for themselves. This approach is problematic, since it requires every kernel thread to either set PF_NOFREEZE explicitly, or call try_to_freeze(), even if it doesn't care for the freezing of tasks at all. It seems better to only require the kernel threads that want to or need to be frozen to use some freezer-related code and to remove any freezer-related code from the other (nonfreezable) kernel threads, which is done in this patch. The patch causes all kernel threads to be nonfreezable by default (ie. to have PF_NOFREEZE set by default) and introduces the set_freezable() function that should be called by the freezable kernel threads in order to unset PF_NOFREEZE. It also makes all of the currently freezable kernel threads call set_freezable(), so it shouldn't cause any (intentional) change of behaviour to appear. Additionally, it updates documentation to describe the freezing of tasks more accurately. [akpm@linux-foundation.org: build fixes] Signed-off-by: Rafael J. Wysocki Acked-by: Nigel Cunningham Cc: Pavel Machek Cc: Oleg Nesterov Cc: Gautham R Shenoy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/power/freezing-of-tasks.txt | 160 ++++++++++++++++++++++++++++++ Documentation/power/kernel_threads.txt | 40 -------- Documentation/power/swsusp.txt | 18 +--- arch/i386/kernel/apm.c | 2 +- arch/i386/kernel/io_apic.c | 1 + drivers/block/loop.c | 8 +- drivers/block/pktcdvd.c | 1 + drivers/char/apm-emulation.c | 12 +-- drivers/char/hvc_console.c | 1 + drivers/edac/edac_mc.c | 1 + drivers/ieee1394/ieee1394_core.c | 3 +- drivers/ieee1394/nodemgr.c | 1 + drivers/input/gameport/gameport.c | 1 + drivers/input/serio/serio.c | 1 + drivers/input/touchscreen/ucb1400_ts.c | 1 + drivers/macintosh/therm_adt746x.c | 1 + drivers/macintosh/windfarm_core.c | 1 + drivers/md/md.c | 1 - drivers/media/dvb/dvb-core/dvb_frontend.c | 1 + drivers/media/video/cx88/cx88-tvaudio.c | 1 + drivers/media/video/msp3400-kthreads.c | 6 +- drivers/media/video/tvaudio.c | 2 +- drivers/media/video/video-buf-dvb.c | 1 + drivers/media/video/vivi.c | 1 + drivers/mfd/ucb1x00-ts.c | 1 + drivers/mmc/card/queue.c | 7 +- drivers/mtd/mtd_blkdevs.c | 3 +- drivers/mtd/ubi/wl.c | 1 + drivers/net/wireless/airo.c | 3 +- drivers/net/wireless/libertas/main.c | 1 + drivers/pcmcia/cs.c | 1 + drivers/pnp/pnpbios/core.c | 1 + drivers/scsi/libsas/sas_scsi_host.c | 3 +- drivers/scsi/scsi_error.c | 3 +- drivers/usb/atm/ueagle-atm.c | 1 + drivers/usb/core/hub.c | 1 + drivers/usb/gadget/file_storage.c | 3 + drivers/usb/storage/usb.c | 3 +- drivers/video/ps3fb.c | 1 + drivers/w1/w1.c | 1 + fs/cifs/cifsfs.c | 1 + fs/cifs/connect.c | 1 + fs/jffs2/background.c | 1 + fs/lockd/svc.c | 2 + fs/nfs/callback.c | 2 + fs/nfsd/nfssvc.c | 2 + fs/xfs/linux-2.6/xfs_super.c | 1 + include/linux/freezer.h | 14 +++ init/do_mounts_initrd.c | 7 +- kernel/audit.c | 1 + kernel/exit.c | 6 ++ kernel/fork.c | 2 +- kernel/rcutorture.c | 4 +- kernel/rtmutex-tester.c | 1 + kernel/sched.c | 3 - kernel/softirq.c | 3 +- kernel/softlockup.c | 2 +- kernel/workqueue.c | 4 +- mm/pdflush.c | 1 + mm/vmscan.c | 1 + net/bluetooth/bnep/core.c | 2 +- net/bluetooth/cmtp/core.c | 2 +- net/bluetooth/hidp/core.c | 2 +- net/bluetooth/rfcomm/core.c | 2 +- net/core/pktgen.c | 2 + 65 files changed, 256 insertions(+), 113 deletions(-) create mode 100644 Documentation/power/freezing-of-tasks.txt delete mode 100644 Documentation/power/kernel_threads.txt (limited to 'net') diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.txt new file mode 100644 index 000000000000..af1a282c71a3 --- /dev/null +++ b/Documentation/power/freezing-of-tasks.txt @@ -0,0 +1,160 @@ +Freezing of tasks + (C) 2007 Rafael J. Wysocki , GPL + +I. What is the freezing of tasks? + +The freezing of tasks is a mechanism by which user space processes and some +kernel threads are controlled during hibernation or system-wide suspend (on some +architectures). + +II. How does it work? + +There are four per-task flags used for that, PF_NOFREEZE, PF_FROZEN, TIF_FREEZE +and PF_FREEZER_SKIP (the last one is auxiliary). The tasks that have +PF_NOFREEZE unset (all user space processes and some kernel threads) are +regarded as 'freezable' and treated in a special way before the system enters a +suspend state as well as before a hibernation image is created (in what follows +we only consider hibernation, but the description also applies to suspend). + +Namely, as the first step of the hibernation procedure the function +freeze_processes() (defined in kernel/power/process.c) is called. It executes +try_to_freeze_tasks() that sets TIF_FREEZE for all of the freezable tasks and +sends a fake signal to each of them. A task that receives such a signal and has +TIF_FREEZE set, should react to it by calling the refrigerator() function +(defined in kernel/power/process.c), which sets the task's PF_FROZEN flag, +changes its state to TASK_UNINTERRUPTIBLE and makes it loop until PF_FROZEN is +cleared for it. Then, we say that the task is 'frozen' and therefore the set of +functions handling this mechanism is called 'the freezer' (these functions are +defined in kernel/power/process.c and include/linux/freezer.h). User space +processes are generally frozen before kernel threads. + +It is not recommended to call refrigerator() directly. Instead, it is +recommended to use the try_to_freeze() function (defined in +include/linux/freezer.h), that checks the task's TIF_FREEZE flag and makes the +task enter refrigerator() if the flag is set. + +For user space processes try_to_freeze() is called automatically from the +signal-handling code, but the freezable kernel threads need to call it +explicitly in suitable places. The code to do this may look like the following: + + do { + hub_events(); + wait_event_interruptible(khubd_wait, + !list_empty(&hub_event_list)); + try_to_freeze(); + } while (!signal_pending(current)); + +(from drivers/usb/core/hub.c::hub_thread()). + +If a freezable kernel thread fails to call try_to_freeze() after the freezer has +set TIF_FREEZE for it, the freezing of tasks will fail and the entire +hibernation operation will be cancelled. For this reason, freezable kernel +threads must call try_to_freeze() somewhere. + +After the system memory state has been restored from a hibernation image and +devices have been reinitialized, the function thaw_processes() is called in +order to clear the PF_FROZEN flag for each frozen task. Then, the tasks that +have been frozen leave refrigerator() and continue running. + +III. Which kernel threads are freezable? + +Kernel threads are not freezable by default. However, a kernel thread may clear +PF_NOFREEZE for itself by calling set_freezable() (the resetting of PF_NOFREEZE +directly is strongly discouraged). From this point it is regarded as freezable +and must call try_to_freeze() in a suitable place. + +IV. Why do we do that? + +Generally speaking, there is a couple of reasons to use the freezing of tasks: + +1. The principal reason is to prevent filesystems from being damaged after +hibernation. At the moment we have no simple means of checkpointing +filesystems, so if there are any modifications made to filesystem data and/or +metadata on disks, we cannot bring them back to the state from before the +modifications. At the same time each hibernation image contains some +filesystem-related information that must be consistent with the state of the +on-disk data and metadata after the system memory state has been restored from +the image (otherwise the filesystems will be damaged in a nasty way, usually +making them almost impossible to repair). We therefore freeze tasks that might +cause the on-disk filesystems' data and metadata to be modified after the +hibernation image has been created and before the system is finally powered off. +The majority of these are user space processes, but if any of the kernel threads +may cause something like this to happen, they have to be freezable. + +2. The second reason is to prevent user space processes and some kernel threads +from interfering with the suspending and resuming of devices. A user space +process running on a second CPU while we are suspending devices may, for +example, be troublesome and without the freezing of tasks we would need some +safeguards against race conditions that might occur in such a case. + +Although Linus Torvalds doesn't like the freezing of tasks, he said this in one +of the discussions on LKML (http://lkml.org/lkml/2007/4/27/608): + +"RJW:> Why we freeze tasks at all or why we freeze kernel threads? + +Linus: In many ways, 'at all'. + +I _do_ realize the IO request queue issues, and that we cannot actually do +s2ram with some devices in the middle of a DMA. So we want to be able to +avoid *that*, there's no question about that. And I suspect that stopping +user threads and then waiting for a sync is practically one of the easier +ways to do so. + +So in practice, the 'at all' may become a 'why freeze kernel threads?' and +freezing user threads I don't find really objectionable." + +Still, there are kernel threads that may want to be freezable. For example, if +a kernel that belongs to a device driver accesses the device directly, it in +principle needs to know when the device is suspended, so that it doesn't try to +access it at that time. However, if the kernel thread is freezable, it will be +frozen before the driver's .suspend() callback is executed and it will be +thawed after the driver's .resume() callback has run, so it won't be accessing +the device while it's suspended. + +3. Another reason for freezing tasks is to prevent user space processes from +realizing that hibernation (or suspend) operation takes place. Ideally, user +space processes should not notice that such a system-wide operation has occurred +and should continue running without any problems after the restore (or resume +from suspend). Unfortunately, in the most general case this is quite difficult +to achieve without the freezing of tasks. Consider, for example, a process +that depends on all CPUs being online while it's running. Since we need to +disable nonboot CPUs during the hibernation, if this process is not frozen, it +may notice that the number of CPUs has changed and may start to work incorrectly +because of that. + +V. Are there any problems related to the freezing of tasks? + +Yes, there are. + +First of all, the freezing of kernel threads may be tricky if they depend one +on another. For example, if kernel thread A waits for a completion (in the +TASK_UNINTERRUPTIBLE state) that needs to be done by freezable kernel thread B +and B is frozen in the meantime, then A will be blocked until B is thawed, which +may be undesirable. That's why kernel threads are not freezable by default. + +Second, there are the following two problems related to the freezing of user +space processes: +1. Putting processes into an uninterruptible sleep distorts the load average. +2. Now that we have FUSE, plus the framework for doing device drivers in +userspace, it gets even more complicated because some userspace processes are +now doing the sorts of things that kernel threads do +(https://lists.linux-foundation.org/pipermail/linux-pm/2007-May/012309.html). + +The problem 1. seems to be fixable, although it hasn't been fixed so far. The +other one is more serious, but it seems that we can work around it by using +hibernation (and suspend) notifiers (in that case, though, we won't be able to +avoid the realization by the user space processes that the hibernation is taking +place). + +There are also problems that the freezing of tasks tends to expose, although +they are not directly related to it. For example, if request_firmware() is +called from a device driver's .resume() routine, it will timeout and eventually +fail, because the user land process that should respond to the request is frozen +at this point. So, seemingly, the failure is due to the freezing of tasks. +Suppose, however, that the firmware file is located on a filesystem accessible +only through another device that hasn't been resumed yet. In that case, +request_firmware() will fail regardless of whether or not the freezing of tasks +is used. Consequently, the problem is not really related to the freezing of +tasks, since it generally exists anyway. [The solution to this particular +problem is to keep the firmware in memory after it's loaded for the first time +and upload if from memory to the device whenever necessary.] diff --git a/Documentation/power/kernel_threads.txt b/Documentation/power/kernel_threads.txt deleted file mode 100644 index fb57784986b1..000000000000 --- a/Documentation/power/kernel_threads.txt +++ /dev/null @@ -1,40 +0,0 @@ -KERNEL THREADS - - -Freezer - -Upon entering a suspended state the system will freeze all -tasks. This is done by delivering pseudosignals. This affects -kernel threads, too. To successfully freeze a kernel thread -the thread has to check for the pseudosignal and enter the -refrigerator. Code to do this looks like this: - - do { - hub_events(); - wait_event_interruptible(khubd_wait, !list_empty(&hub_event_list)); - try_to_freeze(); - } while (!signal_pending(current)); - -from drivers/usb/core/hub.c::hub_thread() - - -The Unfreezable - -Some kernel threads however, must not be frozen. The kernel must -be able to finish pending IO operations and later on be able to -write the memory image to disk. Kernel threads needed to do IO -must stay awake. Such threads must mark themselves unfreezable -like this: - - /* - * This thread doesn't need any user-level access, - * so get rid of all our resources. - */ - daemonize("usb-storage"); - - current->flags |= PF_NOFREEZE; - -from drivers/usb/storage/usb.c::usb_stor_control_thread() - -Such drivers are themselves responsible for staying quiet during -the actual snapshotting. diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt index 152b510d1bbb..aea7e9209667 100644 --- a/Documentation/power/swsusp.txt +++ b/Documentation/power/swsusp.txt @@ -140,21 +140,11 @@ should be sent to the mailing list available through the suspend2 website, and not to the Linux Kernel Mailing List. We are working toward merging suspend2 into the mainline kernel. -Q: A kernel thread must voluntarily freeze itself (call 'refrigerator'). -I found some kernel threads that don't do it, and they don't freeze -so the system can't sleep. Is this a known behavior? - -A: All such kernel threads need to be fixed, one by one. Select the -place where the thread is safe to be frozen (no kernel semaphores -should be held at that point and it must be safe to sleep there), and -add: - - try_to_freeze(); - -If the thread is needed for writing the image to storage, you should -instead set the PF_NOFREEZE process flag when creating the thread (and -be very careful). +Q: What is the freezing of tasks and why are we using it? +A: The freezing of tasks is a mechanism by which user space processes and some +kernel threads are controlled during hibernation or system-wide suspend (on some +architectures). See freezing-of-tasks.txt for details. Q: What is the difference between "platform" and "shutdown"? diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 4112afe712b9..47001d50a083 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -222,6 +222,7 @@ #include #include #include +#include #include #include #include @@ -2311,7 +2312,6 @@ static int __init apm_init(void) remove_proc_entry("apm", NULL); return err; } - kapmd_task->flags |= PF_NOFREEZE; wake_up_process(kapmd_task); if (num_online_cpus() > 1 && !smp ) { diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 7f8b7af2b95f..21db8f56c9a1 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -667,6 +667,7 @@ static int balanced_irq(void *unused) set_pending_irq(i, cpumask_of_cpu(0)); } + set_freezable(); for ( ; ; ) { time_remaining = schedule_timeout_interruptible(time_remaining); try_to_freeze(); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 4503290da407..06eaa11cbc2f 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -68,6 +68,7 @@ #include #include #include +#include #include #include /* for invalidate_bdev() */ #include @@ -600,13 +601,6 @@ static int loop_thread(void *data) struct loop_device *lo = data; struct bio *bio; - /* - * loop can be used in an encrypted device, - * hence, it mustn't be stopped at all - * because it could be indirectly used during suspension - */ - current->flags |= PF_NOFREEZE; - set_user_nice(current, -20); while (!kthread_should_stop() || lo->lo_bio) { diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 7c294a40002e..31be33e4f119 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -1593,6 +1593,7 @@ static int kcdrwd(void *foobar) long min_sleep_time, residue; set_user_nice(current, -20); + set_freezable(); for (;;) { DECLARE_WAITQUEUE(wait, current); diff --git a/drivers/char/apm-emulation.c b/drivers/char/apm-emulation.c index 179c7a3b6e75..ec116df919d9 100644 --- a/drivers/char/apm-emulation.c +++ b/drivers/char/apm-emulation.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -329,13 +330,8 @@ apm_ioctl(struct inode * inode, struct file *filp, u_int cmd, u_long arg) /* * Wait for the suspend/resume to complete. If there * are pending acknowledges, we wait here for them. - * - * Note: we need to ensure that the PM subsystem does - * not kick us out of the wait when it suspends the - * threads. */ flags = current->flags; - current->flags |= PF_NOFREEZE; wait_event(apm_suspend_waitqueue, as->suspend_state == SUSPEND_DONE); @@ -365,13 +361,8 @@ apm_ioctl(struct inode * inode, struct file *filp, u_int cmd, u_long arg) /* * Wait for the suspend/resume to complete. If there * are pending acknowledges, we wait here for them. - * - * Note: we need to ensure that the PM subsystem does - * not kick us out of the wait when it suspends the - * threads. */ flags = current->flags; - current->flags |= PF_NOFREEZE; wait_event_interruptible(apm_suspend_waitqueue, as->suspend_state == SUSPEND_DONE); @@ -598,7 +589,6 @@ static int __init apm_init(void) kapmd_tsk = NULL; return ret; } - kapmd_tsk->flags |= PF_NOFREEZE; wake_up_process(kapmd_tsk); #ifdef CONFIG_PROC_FS diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index b3ab42e0dd4a..83c1151ec7a2 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -679,6 +679,7 @@ static int khvcd(void *unused) int poll_mask; struct hvc_struct *hp; + set_freezable(); __set_current_state(TASK_RUNNING); do { poll_mask = 0; diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 7b622300d0e5..804875de5801 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -1906,6 +1906,7 @@ static void do_edac_check(void) static int edac_kernel_thread(void *arg) { + set_freezable(); while (!kthread_should_stop()) { do_edac_check(); diff --git a/drivers/ieee1394/ieee1394_core.c b/drivers/ieee1394/ieee1394_core.c index 0fc8c6e559e4..ee45259573c8 100644 --- a/drivers/ieee1394/ieee1394_core.c +++ b/drivers/ieee1394/ieee1394_core.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -1128,8 +1129,6 @@ static int hpsbpkt_thread(void *__hi) struct list_head tmp; int may_schedule; - current->flags |= PF_NOFREEZE; - while (!kthread_should_stop()) { INIT_LIST_HEAD(&tmp); diff --git a/drivers/ieee1394/nodemgr.c b/drivers/ieee1394/nodemgr.c index 51a12062ed36..2ffd53461db6 100644 --- a/drivers/ieee1394/nodemgr.c +++ b/drivers/ieee1394/nodemgr.c @@ -1699,6 +1699,7 @@ static int nodemgr_host_thread(void *__hi) unsigned int g, generation = 0; int i, reset_cycles = 0; + set_freezable(); /* Setup our device-model entries */ nodemgr_create_host_dev_files(host); diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c index bd686a2a517d..20896d5e5f0e 100644 --- a/drivers/input/gameport/gameport.c +++ b/drivers/input/gameport/gameport.c @@ -445,6 +445,7 @@ static struct gameport *gameport_get_pending_child(struct gameport *parent) static int gameport_thread(void *nothing) { + set_freezable(); do { gameport_handle_event(); wait_event_interruptible(gameport_wait, diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c index a8f3bc1dff22..372ca4931194 100644 --- a/drivers/input/serio/serio.c +++ b/drivers/input/serio/serio.c @@ -384,6 +384,7 @@ static struct serio *serio_get_pending_child(struct serio *parent) static int serio_thread(void *nothing) { + set_freezable(); do { serio_handle_event(); wait_event_interruptible(serio_wait, diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c index f0cbcdb008ed..36f944019158 100644 --- a/drivers/input/touchscreen/ucb1400_ts.c +++ b/drivers/input/touchscreen/ucb1400_ts.c @@ -292,6 +292,7 @@ static int ucb1400_ts_thread(void *_ucb) sched_setscheduler(tsk, SCHED_FIFO, ¶m); + set_freezable(); while (!kthread_should_stop()) { unsigned int x, y, p; long timeout; diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c index bd55e6ab99fc..f25685b9b7cf 100644 --- a/drivers/macintosh/therm_adt746x.c +++ b/drivers/macintosh/therm_adt746x.c @@ -335,6 +335,7 @@ static int monitor_task(void *arg) { struct thermostat* th = arg; + set_freezable(); while(!kthread_should_stop()) { try_to_freeze(); msleep_interruptible(2000); diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c index 4fcb245ba184..e18d265d5d33 100644 --- a/drivers/macintosh/windfarm_core.c +++ b/drivers/macintosh/windfarm_core.c @@ -92,6 +92,7 @@ static int wf_thread_func(void *data) DBG("wf: thread started\n"); + set_freezable(); while(!kthread_should_stop()) { if (time_after_eq(jiffies, next)) { wf_notify(WF_EVENT_TICK, NULL); diff --git a/drivers/md/md.c b/drivers/md/md.c index 33beaa7da085..9aefc4a023df 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4642,7 +4642,6 @@ static int md_thread(void * arg) * many dirty RAID5 blocks. */ - current->flags |= PF_NOFREEZE; allow_signal(SIGKILL); while (!kthread_should_stop()) { diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c index f4e4ca2dcade..b6c7f6610ec5 100644 --- a/drivers/media/dvb/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb/dvb-core/dvb_frontend.c @@ -523,6 +523,7 @@ static int dvb_frontend_thread(void *data) dvb_frontend_init(fe); + set_freezable(); while (1) { up(&fepriv->sem); /* is locked when we enter the thread... */ restart: diff --git a/drivers/media/video/cx88/cx88-tvaudio.c b/drivers/media/video/cx88/cx88-tvaudio.c index 259ea08e784f..1cc2d286a1cb 100644 --- a/drivers/media/video/cx88/cx88-tvaudio.c +++ b/drivers/media/video/cx88/cx88-tvaudio.c @@ -906,6 +906,7 @@ int cx88_audio_thread(void *data) u32 mode = 0; dprintk("cx88: tvaudio thread started\n"); + set_freezable(); for (;;) { msleep_interruptible(1000); if (kthread_should_stop()) diff --git a/drivers/media/video/msp3400-kthreads.c b/drivers/media/video/msp3400-kthreads.c index e1821eb82fb5..d5ee2629121e 100644 --- a/drivers/media/video/msp3400-kthreads.c +++ b/drivers/media/video/msp3400-kthreads.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -468,6 +469,7 @@ int msp3400c_thread(void *data) v4l_dbg(1, msp_debug, client, "msp3400 daemon started\n"); + set_freezable(); for (;;) { v4l_dbg(2, msp_debug, client, "msp3400 thread: sleep\n"); msp_sleep(state, -1); @@ -646,7 +648,7 @@ int msp3410d_thread(void *data) int val, i, std, count; v4l_dbg(1, msp_debug, client, "msp3410 daemon started\n"); - + set_freezable(); for (;;) { v4l_dbg(2, msp_debug, client, "msp3410 thread: sleep\n"); msp_sleep(state,-1); @@ -940,7 +942,7 @@ int msp34xxg_thread(void *data) int val, i; v4l_dbg(1, msp_debug, client, "msp34xxg daemon started\n"); - + set_freezable(); for (;;) { v4l_dbg(2, msp_debug, client, "msp34xxg thread: sleep\n"); msp_sleep(state, -1); diff --git a/drivers/media/video/tvaudio.c b/drivers/media/video/tvaudio.c index c9bf9dbc2ea3..9da338dc4f3b 100644 --- a/drivers/media/video/tvaudio.c +++ b/drivers/media/video/tvaudio.c @@ -271,7 +271,7 @@ static int chip_thread(void *data) struct CHIPDESC *desc = chiplist + chip->type; v4l_dbg(1, debug, &chip->c, "%s: thread started\n", chip->c.name); - + set_freezable(); for (;;) { set_current_state(TASK_INTERRUPTIBLE); if (!kthread_should_stop()) diff --git a/drivers/media/video/video-buf-dvb.c b/drivers/media/video/video-buf-dvb.c index fcc5467e7636..e617925ba31e 100644 --- a/drivers/media/video/video-buf-dvb.c +++ b/drivers/media/video/video-buf-dvb.c @@ -47,6 +47,7 @@ static int videobuf_dvb_thread(void *data) int err; dprintk("dvb thread started\n"); + set_freezable(); videobuf_read_start(&dvb->dvbq); for (;;) { diff --git a/drivers/media/video/vivi.c b/drivers/media/video/vivi.c index f7e1d1910374..3ef4d0159c33 100644 --- a/drivers/media/video/vivi.c +++ b/drivers/media/video/vivi.c @@ -573,6 +573,7 @@ static int vivi_thread(void *data) dprintk(1,"thread started\n"); mod_timer(&dma_q->timeout, jiffies+BUFFER_TIMEOUT); + set_freezable(); for (;;) { vivi_sleep(dma_q); diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c index 38e815a2e871..fdbaa776f249 100644 --- a/drivers/mfd/ucb1x00-ts.c +++ b/drivers/mfd/ucb1x00-ts.c @@ -209,6 +209,7 @@ static int ucb1x00_thread(void *_ts) DECLARE_WAITQUEUE(wait, tsk); int valid = 0; + set_freezable(); add_wait_queue(&ts->irq_wait, &wait); while (!kthread_should_stop()) { unsigned int x, y, p; diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c index 4fb2089dc690..b53dac8d1b69 100644 --- a/drivers/mmc/card/queue.c +++ b/drivers/mmc/card/queue.c @@ -11,6 +11,7 @@ */ #include #include +#include #include #include @@ -44,11 +45,7 @@ static int mmc_queue_thread(void *d) struct mmc_queue *mq = d; struct request_queue *q = mq->queue; - /* - * Set iothread to ensure that we aren't put to sleep by - * the process freezing. We handle suspension ourselves. - */ - current->flags |= PF_MEMALLOC|PF_NOFREEZE; + current->flags |= PF_MEMALLOC; down(&mq->thread_sem); do { diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 51bc7e2f1f22..ef89780eb9d6 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -80,7 +81,7 @@ static int mtd_blktrans_thread(void *arg) struct request_queue *rq = tr->blkcore_priv->rq; /* we might get involved when memory gets low, so use PF_MEMALLOC */ - current->flags |= PF_MEMALLOC | PF_NOFREEZE; + current->flags |= PF_MEMALLOC; spin_lock_irq(rq->queue_lock); while (!kthread_should_stop()) { diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 9ecaf77eca9e..ab2174a56bc2 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1346,6 +1346,7 @@ static int ubi_thread(void *u) ubi_msg("background thread \"%s\" started, PID %d", ubi->bgt_name, current->pid); + set_freezable(); for (;;) { int err; diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index 1c54908fdc4c..ee1cc14db389 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c @@ -3086,7 +3086,8 @@ static int airo_thread(void *data) { struct net_device *dev = data; struct airo_info *ai = dev->priv; int locked; - + + set_freezable(); while(1) { /* make swsusp happy with our thread */ try_to_freeze(); diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c index 4a59306a3f05..9f366242c392 100644 --- a/drivers/net/wireless/libertas/main.c +++ b/drivers/net/wireless/libertas/main.c @@ -613,6 +613,7 @@ static int wlan_service_main_thread(void *data) init_waitqueue_entry(&wait, current); + set_freezable(); for (;;) { lbs_deb_thread( "main-thread 111: intcounter=%d " "currenttxskb=%p dnld_sent=%d\n", diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c index 50cad3a59a6c..7c93a108f9b8 100644 --- a/drivers/pcmcia/cs.c +++ b/drivers/pcmcia/cs.c @@ -651,6 +651,7 @@ static int pccardd(void *__skt) add_wait_queue(&skt->thread_wait, &wait); complete(&skt->thread_done); + set_freezable(); for (;;) { unsigned long flags; unsigned int events; diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c index 3a201b77b963..03baf1c64a2e 100644 --- a/drivers/pnp/pnpbios/core.c +++ b/drivers/pnp/pnpbios/core.c @@ -160,6 +160,7 @@ static int pnp_dock_thread(void * unused) { static struct pnp_docking_station_info now; int docked = -1, d = 0; + set_freezable(); while (!unloading) { int status; diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index d70ddfda93fc..9c5342e7a69c 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -40,6 +40,7 @@ #include #include +#include #include /* ---------- SCSI Host glue ---------- */ @@ -868,8 +869,6 @@ static int sas_queue_thread(void *_sas_ha) { struct sas_ha_struct *sas_ha = _sas_ha; - current->flags |= PF_NOFREEZE; - while (1) { set_current_state(TASK_INTERRUPTIBLE); schedule(); diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 9adb64ac054c..8a525abda30f 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -1516,8 +1517,6 @@ int scsi_error_handler(void *data) { struct Scsi_Host *shost = data; - current->flags |= PF_NOFREEZE; - /* * We use TASK_INTERRUPTIBLE so that the thread is not * counted against the load average as a running process. diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c index 4973e147bc79..8f046659b4e9 100644 --- a/drivers/usb/atm/ueagle-atm.c +++ b/drivers/usb/atm/ueagle-atm.c @@ -1168,6 +1168,7 @@ static int uea_kthread(void *data) struct uea_softc *sc = data; int ret = -EAGAIN; + set_freezable(); uea_enters(INS_TO_USBDEV(sc)); while (!kthread_should_stop()) { if (ret < 0 || sc->reset) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 50e79010401c..fd74c50b1804 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2728,6 +2728,7 @@ loop: static int hub_thread(void *__unused) { + set_freezable(); do { hub_events(); wait_event_interruptible(khubd_wait, diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c index 8712ef987179..be7a1bd2823b 100644 --- a/drivers/usb/gadget/file_storage.c +++ b/drivers/usb/gadget/file_storage.c @@ -3434,6 +3434,9 @@ static int fsg_main_thread(void *fsg_) allow_signal(SIGKILL); allow_signal(SIGUSR1); + /* Allow the thread to be frozen */ + set_freezable(); + /* Arrange for userspace references to be interpreted as kernel * pointers. That way we can pass a kernel pointer to a routine * that expects a __user pointer and it will work okay. */ diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index bef8bcd9bd98..28842d208bb0 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -311,8 +311,6 @@ static int usb_stor_control_thread(void * __us) struct Scsi_Host *host = us_to_host(us); int autopm_rc; - current->flags |= PF_NOFREEZE; - for(;;) { US_DEBUGP("*** thread sleeping.\n"); if(down_interruptible(&us->sema)) @@ -920,6 +918,7 @@ static int usb_stor_scan_thread(void * __us) printk(KERN_DEBUG "usb-storage: device found at %d\n", us->pusb_dev->devnum); + set_freezable(); /* Wait for the timeout to expire or for a disconnect */ if (delay_use > 0) { printk(KERN_DEBUG "usb-storage: waiting for device " diff --git a/drivers/video/ps3fb.c b/drivers/video/ps3fb.c index 08b7ffbbbbd8..3972aa8cf859 100644 --- a/drivers/video/ps3fb.c +++ b/drivers/video/ps3fb.c @@ -812,6 +812,7 @@ static int ps3fb_ioctl(struct fb_info *info, unsigned int cmd, static int ps3fbd(void *arg) { + set_freezable(); while (!kthread_should_stop()) { try_to_freeze(); set_current_state(TASK_INTERRUPTIBLE); diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index f5c5b760ed7b..c6332108f1c5 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -805,6 +805,7 @@ static int w1_control(void *data) struct w1_master *dev, *n; int have_to_wait = 0; + set_freezable(); while (!kthread_should_stop() || have_to_wait) { have_to_wait = 0; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 8b0cbf4a4ad0..bd0f2f2353ce 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -849,6 +849,7 @@ static int cifs_oplock_thread(void * dummyarg) __u16 netfid; int rc; + set_freezable(); do { if (try_to_freeze()) continue; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f4e92661b223..0a1b8bd1dfcb 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -363,6 +363,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) GFP_KERNEL); } + set_freezable(); while (!kthread_should_stop()) { if (try_to_freeze()) continue; diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c index 0c82dfcfd246..143c5530caf3 100644 --- a/fs/jffs2/background.c +++ b/fs/jffs2/background.c @@ -81,6 +81,7 @@ static int jffs2_garbage_collect_thread(void *_c) set_user_nice(current, 10); + set_freezable(); for (;;) { allow_signal(SIGHUP); diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 26809325469c..9fcdef50aad6 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -119,6 +120,7 @@ lockd(struct svc_rqst *rqstp) complete(&lockd_start_done); daemonize("lockd"); + set_freezable(); /* Process request with signals blocked, but allow SIGKILL. */ allow_signal(SIGKILL); diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 75f309c8741a..a796be5051bf 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -67,6 +68,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp) daemonize("nfsv4-svc"); /* Process request with signals blocked, but allow SIGKILL. */ allow_signal(SIGKILL); + set_freezable(); complete(&nfs_callback_info.started); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index ff55950efb43..5c8192bcbced 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -432,6 +433,7 @@ nfsd(struct svc_rqst *rqstp) * dirty pages. */ current->flags |= PF_LESS_THROTTLE; + set_freezable(); /* * The main request loop diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 06894cf00b12..4528f9a3f304 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -562,6 +562,7 @@ xfssyncd( bhv_vfs_sync_work_t *work, *n; LIST_HEAD (tmp); + set_freezable(); timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); for (;;) { timeleft = schedule_timeout_interruptible(timeleft); diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 4631086f5060..2d38b1a74662 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -1,5 +1,8 @@ /* Freezer declarations */ +#ifndef FREEZER_H_INCLUDED +#define FREEZER_H_INCLUDED + #include #ifdef CONFIG_PM @@ -115,6 +118,14 @@ static inline int freezer_should_skip(struct task_struct *p) return !!(p->flags & PF_FREEZER_SKIP); } +/* + * Tell the freezer that the current task should be frozen by it + */ +static inline void set_freezable(void) +{ + current->flags &= ~PF_NOFREEZE; +} + #else static inline int frozen(struct task_struct *p) { return 0; } static inline int freezing(struct task_struct *p) { return 0; } @@ -130,4 +141,7 @@ static inline int try_to_freeze(void) { return 0; } static inline void freezer_do_not_count(void) {} static inline void freezer_count(void) {} static inline int freezer_should_skip(struct task_struct *p) { return 0; } +static inline void set_freezable(void) {} #endif + +#endif /* FREEZER_H_INCLUDED */ diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index b222ce9e1c8b..a6b4c0c08e13 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -56,12 +56,9 @@ static void __init handle_initrd(void) sys_chroot("."); pid = kernel_thread(do_linuxrc, "/linuxrc", SIGCHLD); - if (pid > 0) { - while (pid != sys_wait4(-1, NULL, 0, NULL)) { - try_to_freeze(); + if (pid > 0) + while (pid != sys_wait4(-1, NULL, 0, NULL)) yield(); - } - } /* move initrd to rootfs' /old */ sys_fchdir(old_fd); diff --git a/kernel/audit.c b/kernel/audit.c index 5ce8851facf7..eb0f9165b401 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -392,6 +392,7 @@ static int kauditd_thread(void *dummy) { struct sk_buff *skb; + set_freezable(); while (!kthread_should_stop()) { skb = skb_dequeue(&audit_skb_queue); wake_up(&audit_backlog_wait); diff --git a/kernel/exit.c b/kernel/exit.c index 57626692cd90..e8af8d0c2483 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -387,6 +388,11 @@ void daemonize(const char *name, ...) * they would be locked into memory. */ exit_mm(current); + /* + * We don't want to have TIF_FREEZE set if the system-wide hibernation + * or suspend transition begins right now. + */ + current->flags |= PF_NOFREEZE; set_special_pids(1, 1); proc_clear_tty(current); diff --git a/kernel/fork.c b/kernel/fork.c index 7c5c5888e00a..ba39bdb2a7b8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -923,7 +923,7 @@ static inline void copy_flags(unsigned long clone_flags, struct task_struct *p) { unsigned long new_flags = p->flags; - new_flags &= ~(PF_SUPERPRIV | PF_NOFREEZE); + new_flags &= ~PF_SUPERPRIV; new_flags |= PF_FORKNOEXEC; if (!(clone_flags & CLONE_PTRACE)) p->ptrace = 0; diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 55ba82a85a66..ddff33247785 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -518,7 +519,6 @@ rcu_torture_writer(void *arg) VERBOSE_PRINTK_STRING("rcu_torture_writer task started"); set_user_nice(current, 19); - current->flags |= PF_NOFREEZE; do { schedule_timeout_uninterruptible(1); @@ -558,7 +558,6 @@ rcu_torture_fakewriter(void *arg) VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started"); set_user_nice(current, 19); - current->flags |= PF_NOFREEZE; do { schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); @@ -589,7 +588,6 @@ rcu_torture_reader(void *arg) VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); set_user_nice(current, 19); - current->flags |= PF_NOFREEZE; do { idx = cur_ops->readlock(); diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index 015fc633c96c..e3055ba69159 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c @@ -260,6 +260,7 @@ static int test_func(void *data) int ret; current->flags |= PF_MUTEX_TESTER; + set_freezable(); allow_signal(SIGHUP); for(;;) { diff --git a/kernel/sched.c b/kernel/sched.c index 1c8076676eb1..cb31fb4a1379 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4912,8 +4912,6 @@ static int migration_thread(void *data) struct migration_req *req; struct list_head *head; - try_to_freeze(); - spin_lock_irq(&rq->lock); if (cpu_is_offline(cpu)) { @@ -5147,7 +5145,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) p = kthread_create(migration_thread, hcpu, "migration/%d", cpu); if (IS_ERR(p)) return NOTIFY_BAD; - p->flags |= PF_NOFREEZE; kthread_bind(p, cpu); /* Must be high prio: stop_machine expects to yield to it. */ rq = task_rq_lock(p, &flags); diff --git a/kernel/softirq.c b/kernel/softirq.c index 8de267790166..0f546ddea43d 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -488,8 +489,6 @@ void __init softirq_init(void) static int ksoftirqd(void * __bind_cpu) { - current->flags |= PF_NOFREEZE; - set_current_state(TASK_INTERRUPTIBLE); while (!kthread_should_stop()) { diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 0131e296ffb4..708d4882c0c3 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -116,7 +117,6 @@ static int watchdog(void * __bind_cpu) struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; sched_setscheduler(current, SCHED_FIFO, ¶m); - current->flags |= PF_NOFREEZE; /* initialize timestamp */ touch_softlockup_watchdog(); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index d7d3fa3072e5..1935302cc645 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -282,8 +282,8 @@ static int worker_thread(void *__cwq) struct cpu_workqueue_struct *cwq = __cwq; DEFINE_WAIT(wait); - if (!cwq->wq->freezeable) - current->flags |= PF_NOFREEZE; + if (cwq->wq->freezeable) + set_freezable(); set_user_nice(current, -5); diff --git a/mm/pdflush.c b/mm/pdflush.c index 8ce0900dc95c..8f6ee073c0e3 100644 --- a/mm/pdflush.c +++ b/mm/pdflush.c @@ -92,6 +92,7 @@ struct pdflush_work { static int __pdflush(struct pdflush_work *my_work) { current->flags |= PF_FLUSHER | PF_SWAPWRITE; + set_freezable(); my_work->fn = NULL; my_work->who = current; INIT_LIST_HEAD(&my_work->list); diff --git a/mm/vmscan.c b/mm/vmscan.c index 2225b7c9df85..d419e10e3daa 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1421,6 +1421,7 @@ static int kswapd(void *p) * trying to free the first piece of memory in the first place). */ tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; + set_freezable(); order = 0; for ( ; ; ) { diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 1c8f4a0c5f43..1f78c3e336d8 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -474,7 +475,6 @@ static int bnep_session(void *arg) daemonize("kbnepd %s", dev->name); set_user_nice(current, -15); - current->flags |= PF_NOFREEZE; init_waitqueue_entry(&wait, current); add_wait_queue(sk->sk_sleep, &wait); diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 66bef1ccee2a..ca60a4517fd3 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -287,7 +288,6 @@ static int cmtp_session(void *arg) daemonize("kcmtpd_ctr_%d", session->num); set_user_nice(current, -15); - current->flags |= PF_NOFREEZE; init_waitqueue_entry(&wait, current); add_wait_queue(sk->sk_sleep, &wait); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 450eb0244bbf..64d89ca28847 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -547,7 +548,6 @@ static int hidp_session(void *arg) daemonize("khidpd_%04x%04x", vendor, product); set_user_nice(current, -15); - current->flags |= PF_NOFREEZE; init_waitqueue_entry(&ctrl_wait, current); init_waitqueue_entry(&intr_wait, current); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 52e04df323ea..bb7220770f2c 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1940,7 +1941,6 @@ static int rfcomm_run(void *unused) daemonize("krfcommd"); set_user_nice(current, -10); - current->flags |= PF_NOFREEZE; BT_DBG(""); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 75215331b045..bca787fdbc51 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3465,6 +3465,8 @@ static int pktgen_thread_worker(void *arg) set_current_state(TASK_INTERRUPTIBLE); + set_freezable(); + while (!kthread_should_stop()) { pkt_dev = next_to_run(t); -- cgit v1.2.3 From c4170583f655dca5da32bd14173d6a93805fc48b Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 17 Jul 2007 04:04:42 -0700 Subject: knfsd: nfsd4: store pseudoflavor in request Add a new field to the svc_rqst structure to record the pseudoflavor that the request was made with. For now we record the pseudoflavor but don't use it for anything. Signed-off-by: Andy Adamson Signed-off-by: "J. Bruce Fields" Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sunrpc/gss_api.h | 1 + include/linux/sunrpc/svc.h | 1 + net/sunrpc/auth_gss/gss_mech_switch.c | 14 ++++++++++++++ net/sunrpc/auth_gss/svcauth_gss.c | 2 ++ net/sunrpc/svcauth_unix.c | 3 +++ 5 files changed, 21 insertions(+) (limited to 'net') diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index bbac101ac372..459c5fc11d51 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -58,6 +58,7 @@ u32 gss_unwrap( u32 gss_delete_sec_context( struct gss_ctx **ctx_id); +u32 gss_svc_to_pseudoflavor(struct gss_api_mech *, u32 service); u32 gss_pseudoflavor_to_service(struct gss_api_mech *, u32 pseudoflavor); char *gss_service_to_auth_domain_name(struct gss_api_mech *, u32 service); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 129d50f2225c..705a90aa345e 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -212,6 +212,7 @@ struct svc_rqst { struct svc_pool * rq_pool; /* thread pool */ struct svc_procedure * rq_procinfo; /* procedure info */ struct auth_ops * rq_authop; /* authentication flavour */ + u32 rq_flavor; /* pseudoflavor */ struct svc_cred rq_cred; /* auth info */ struct sk_buff * rq_skbuff; /* fast recv inet buffer */ struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */ diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 26872517ccf3..61801a069ff0 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -193,6 +193,20 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor) EXPORT_SYMBOL(gss_mech_get_by_pseudoflavor); +u32 +gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service) +{ + int i; + + for (i = 0; i < gm->gm_pf_num; i++) { + if (gm->gm_pfs[i].service == service) { + return gm->gm_pfs[i].pseudoflavor; + } + } + return RPC_AUTH_MAXFLAVOR; /* illegal value */ +} +EXPORT_SYMBOL(gss_svc_to_pseudoflavor); + u32 gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor) { diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index c094583386fd..7a3e1758bea1 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1131,6 +1131,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) } svcdata->rsci = rsci; cache_get(&rsci->h); + rqstp->rq_flavor = gss_svc_to_pseudoflavor( + rsci->mechctx->mech_type, gc->gc_svc); ret = SVC_OK; goto out; } diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 07dcd20cbee4..d9fdf2e4d242 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -707,6 +708,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp) svc_putnl(resv, RPC_AUTH_NULL); svc_putnl(resv, 0); + rqstp->rq_flavor = RPC_AUTH_NULL; return SVC_OK; } @@ -784,6 +786,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) svc_putnl(resv, RPC_AUTH_NULL); svc_putnl(resv, 0); + rqstp->rq_flavor = RPC_AUTH_UNIX; return SVC_OK; badcred: -- cgit v1.2.3 From 3ab4d8b1215d61736e2a9a26bea7cc2e6b029e3d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 17 Jul 2007 04:04:46 -0700 Subject: knfsd: nfsd: set rq_client to ip-address-determined-domain We want it to be possible for users to restrict exports both by IP address and by pseudoflavor. The pseudoflavor information has previously been passed using special auth_domains stored in the rq_client field. After the preceding patch that stored the pseudoflavor in rq_pflavor, that's now superfluous; so now we use rq_client for the ip information, as auth_null and auth_unix do. However, we keep around the special auth_domain in the rq_gssclient field for backwards compatibility purposes, so we can still do upcalls using the old "gss/pseudoflavor" auth_domain if upcalls using the unix domain to give us an appropriate export. This allows us to continue supporting old mountd. In fact, for this first patch, we always use the "gss/pseudoflavor" auth_domain (and only it) if it is available; thus rq_client is ignored in the auth_gss case, and this patch on its own makes no change in behavior; that will be left to later patches. Note on idmap: I'm almost tempted to just replace the auth_domain in the idmap upcall by a dummy value--no version of idmapd has ever used it, and it's unlikely anyone really wants to perform idmapping differently depending on the where the client is (they may want to perform *credential* mapping differently, but that's a different matter--the idmapper just handles id's used in getattr and setattr). But I'm updating the idmapd code anyway, just out of general backwards-compatibility paranoia. Signed-off-by: "J. Bruce Fields" Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/export.c | 15 +++++++++++---- fs/nfsd/nfs4idmap.c | 13 +++++++++++-- fs/nfsd/nfsfh.c | 2 -- include/linux/sunrpc/svc.h | 1 + include/linux/sunrpc/svcauth.h | 1 + net/sunrpc/auth_gss/svcauth_gss.c | 21 ++++++++++++++++++--- net/sunrpc/svcauth_unix.c | 4 +++- 7 files changed, 45 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 9b569af695ab..ac225a79376c 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1237,21 +1237,28 @@ struct svc_export * rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt, struct dentry *dentry) { - return exp_get_by_name(rqstp->rq_client, mnt, dentry, - &rqstp->rq_chandle); + struct auth_domain *clp; + + clp = rqstp->rq_gssclient ? rqstp->rq_gssclient : rqstp->rq_client; + return exp_get_by_name(clp, mnt, dentry, &rqstp->rq_chandle); } struct svc_export * rqst_exp_find(struct svc_rqst *rqstp, int fsid_type, u32 *fsidv) { - return exp_find(rqstp->rq_client, fsid_type, fsidv, - &rqstp->rq_chandle); + struct auth_domain *clp; + + clp = rqstp->rq_gssclient ? rqstp->rq_gssclient : rqstp->rq_client; + return exp_find(clp, fsid_type, fsidv, &rqstp->rq_chandle); } struct svc_export * rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt, struct dentry *dentry) { + struct auth_domain *clp; + + clp = rqstp->rq_gssclient ? rqstp->rq_gssclient : rqstp->rq_client; return exp_parent(rqstp->rq_client, mnt, dentry, &rqstp->rq_chandle); } diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 45aa21ce6784..2cf9a9a2d89c 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -587,6 +587,15 @@ idmap_lookup(struct svc_rqst *rqstp, return ret; } +static char * +rqst_authname(struct svc_rqst *rqstp) +{ + struct auth_domain *clp; + + clp = rqstp->rq_gssclient ? rqstp->rq_gssclient : rqstp->rq_client; + return clp->name; +} + static int idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, uid_t *id) @@ -600,7 +609,7 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen return -EINVAL; memcpy(key.name, name, namelen); key.name[namelen] = '\0'; - strlcpy(key.authname, rqstp->rq_client->name, sizeof(key.authname)); + strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); ret = idmap_lookup(rqstp, nametoid_lookup, &key, &nametoid_cache, &item); if (ret == -ENOENT) ret = -ESRCH; /* nfserr_badname */ @@ -620,7 +629,7 @@ idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) }; int ret; - strlcpy(key.authname, rqstp->rq_client->name, sizeof(key.authname)); + strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); ret = idmap_lookup(rqstp, idtoname_lookup, &key, &idtoname_cache, &item); if (ret == -ENOENT) return sprintf(name, "%u", id); diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 180e068ea064..22cb5be79ad0 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -120,8 +120,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) int data_left = fh->fh_size/4; error = nfserr_stale; - if (rqstp->rq_client == NULL) - goto out; if (rqstp->rq_vers > 2) error = nfserr_badhandle; if (rqstp->rq_vers == 4 && fh->fh_size == 0) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 705a90aa345e..8531a70da73d 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -249,6 +249,7 @@ struct svc_rqst { */ /* Catering to nfsd */ struct auth_domain * rq_client; /* RPC peer info */ + struct auth_domain * rq_gssclient; /* "gss/"-style peer info */ struct svc_cacherep * rq_cacherep; /* cache info */ struct knfsd_fh * rq_reffh; /* Referrence filehandle, used to * determine what device number diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index de92619b0826..22e1ef8e200e 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -127,6 +127,7 @@ extern struct auth_domain *auth_unix_lookup(struct in_addr addr); extern int auth_unix_forget_old(struct auth_domain *dom); extern void svcauth_unix_purge(void); extern void svcauth_unix_info_release(void *); +extern int svcauth_unix_set_client(struct svc_rqst *rqstp); static inline unsigned long hash_str(char *name, int bits) { diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 7a3e1758bea1..e4b3de08b040 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -913,10 +913,23 @@ svcauth_gss_set_client(struct svc_rqst *rqstp) struct gss_svc_data *svcdata = rqstp->rq_auth_data; struct rsc *rsci = svcdata->rsci; struct rpc_gss_wire_cred *gc = &svcdata->clcred; + int stat; - rqstp->rq_client = find_gss_auth_domain(rsci->mechctx, gc->gc_svc); - if (rqstp->rq_client == NULL) + /* + * A gss export can be specified either by: + * export *(sec=krb5,rw) + * or by + * export gss/krb5(rw) + * The latter is deprecated; but for backwards compatibility reasons + * the nfsd code will still fall back on trying it if the former + * doesn't work; so we try to make both available to nfsd, below. + */ + rqstp->rq_gssclient = find_gss_auth_domain(rsci->mechctx, gc->gc_svc); + if (rqstp->rq_gssclient == NULL) return SVC_DENIED; + stat = svcauth_unix_set_client(rqstp); + if (stat == SVC_DROP) + return stat; return SVC_OK; } @@ -1088,7 +1101,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) svc_putnl(resv, GSS_SEQ_WIN); if (svc_safe_putnetobj(resv, &rsip->out_token)) goto drop; - rqstp->rq_client = NULL; } goto complete; case RPC_GSS_PROC_DESTROY: @@ -1319,6 +1331,9 @@ out_err: if (rqstp->rq_client) auth_domain_put(rqstp->rq_client); rqstp->rq_client = NULL; + if (rqstp->rq_gssclient) + auth_domain_put(rqstp->rq_gssclient); + rqstp->rq_gssclient = NULL; if (rqstp->rq_cred.cr_group_info) put_group_info(rqstp->rq_cred.cr_group_info); rqstp->rq_cred.cr_group_info = NULL; diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index d9fdf2e4d242..411479411b21 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -638,7 +638,7 @@ static int unix_gid_find(uid_t uid, struct group_info **gip, } } -static int +int svcauth_unix_set_client(struct svc_rqst *rqstp) { struct sockaddr_in *sin = svc_addr_in(rqstp); @@ -673,6 +673,8 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) return SVC_OK; } +EXPORT_SYMBOL(svcauth_unix_set_client); + static int svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp) { -- cgit v1.2.3 From ae4c40b1d81f5299c04330306736b2f0f0539f4b Mon Sep 17 00:00:00 2001 From: Usha Ketineni Date: Tue, 17 Jul 2007 04:04:50 -0700 Subject: knfsd: rpc: add gss krb5 and spkm3 oid values Adds oid values to the gss_api mechanism structures. On the NFSV4 server side, these are required as part of the security triple (oid,qop,service) information being sent in the response of the SECINFO operation. Signed-off-by: Usha Ketineni Signed-off-by: "J. Bruce Fields" Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/sunrpc/auth_gss/gss_krb5_mech.c | 1 + net/sunrpc/auth_gss/gss_spkm3_mech.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 71b9daefdff3..9843eacef11d 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -231,6 +231,7 @@ static struct pf_desc gss_kerberos_pfs[] = { static struct gss_api_mech gss_kerberos_mech = { .gm_name = "krb5", .gm_owner = THIS_MODULE, + .gm_oid = {9, (void *)"\x2a\x86\x48\x86\xf7\x12\x01\x02\x02"}, .gm_ops = &gss_kerberos_ops, .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs), .gm_pfs = gss_kerberos_pfs, diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 577d590e755f..5deb4b6e4514 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -217,6 +217,7 @@ static struct pf_desc gss_spkm3_pfs[] = { static struct gss_api_mech gss_spkm3_mech = { .gm_name = "spkm3", .gm_owner = THIS_MODULE, + .gm_oid = {7, "\053\006\001\005\005\001\003"}, .gm_ops = &gss_spkm3_ops, .gm_pf_num = ARRAY_SIZE(gss_spkm3_pfs), .gm_pfs = gss_spkm3_pfs, -- cgit v1.2.3 From 4796f45740bc6f2e3e6cc14e7ed481b38bd0bd39 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 17 Jul 2007 04:04:51 -0700 Subject: knfsd: nfsd4: secinfo handling without secinfo= option We could return some sort of error in the case where someone asks for secinfo on an export without the secinfo= option set--that'd be no worse than what we've been doing. But it's not really correct. So, hack up an approximate secinfo response in that case--it may not be complete, but it'll tell the client at least one acceptable security flavor. Signed-off-by: "J. Bruce Fields" Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4xdr.c | 30 +++++++++++++++++++++++++++--- include/linux/sunrpc/svcauth_gss.h | 1 + net/sunrpc/auth_gss/svcauth_gss.c | 9 +++++++++ 3 files changed, 37 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 864498f8f2d9..b3d55c6747fd 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -57,6 +57,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -2454,15 +2455,38 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, int nfserr, { int i = 0; struct svc_export *exp = secinfo->si_exp; + u32 nflavs; + struct exp_flavor_info *flavs; + struct exp_flavor_info def_flavs[2]; ENCODE_HEAD; if (nfserr) goto out; + if (exp->ex_nflavors) { + flavs = exp->ex_flavors; + nflavs = exp->ex_nflavors; + } else { /* Handling of some defaults in absence of real secinfo: */ + flavs = def_flavs; + if (exp->ex_client->flavour->flavour == RPC_AUTH_UNIX) { + nflavs = 2; + flavs[0].pseudoflavor = RPC_AUTH_UNIX; + flavs[1].pseudoflavor = RPC_AUTH_NULL; + } else if (exp->ex_client->flavour->flavour == RPC_AUTH_GSS) { + nflavs = 1; + flavs[0].pseudoflavor + = svcauth_gss_flavor(exp->ex_client); + } else { + nflavs = 1; + flavs[0].pseudoflavor + = exp->ex_client->flavour->flavour; + } + } + RESERVE_SPACE(4); - WRITE32(exp->ex_nflavors); + WRITE32(nflavs); ADJUST_ARGS(); - for (i = 0; i < exp->ex_nflavors; i++) { - u32 flav = exp->ex_flavors[i].pseudoflavor; + for (i = 0; i < nflavs; i++) { + u32 flav = flavs[i].pseudoflavor; struct gss_api_mech *gm = gss_mech_get_by_pseudoflavor(flav); if (gm) { diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h index 5a5db16ab660..417a1def56db 100644 --- a/include/linux/sunrpc/svcauth_gss.h +++ b/include/linux/sunrpc/svcauth_gss.h @@ -22,6 +22,7 @@ int gss_svc_init(void); void gss_svc_shutdown(void); int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name); +u32 svcauth_gss_flavor(struct auth_domain *dom); #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */ diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index e4b3de08b040..490697542fc2 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -743,6 +743,15 @@ find_gss_auth_domain(struct gss_ctx *ctx, u32 svc) static struct auth_ops svcauthops_gss; +u32 svcauth_gss_flavor(struct auth_domain *dom) +{ + struct gss_domain *gd = container_of(dom, struct gss_domain, h); + + return gd->pseudoflavor; +} + +EXPORT_SYMBOL(svcauth_gss_flavor); + int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name) { -- cgit v1.2.3 From 0929c2dd83317813425b937fbc0041013b8685ff Mon Sep 17 00:00:00 2001 From: Ranko Zivojnovic Date: Mon, 16 Jul 2007 18:28:32 -0700 Subject: [NET]: gen_estimator deadlock fix -Fixes ABBA deadlock noted by Patrick McHardy : > There is at least one ABBA deadlock, est_timer() does: > read_lock(&est_lock) > spin_lock(e->stats_lock) (which is dev->queue_lock) > > and qdisc_destroy calls htb_destroy under dev->queue_lock, which > calls htb_destroy_class, then gen_kill_estimator and this > write_locks est_lock. To fix the ABBA deadlock the rate estimators are now kept on an rcu list. -The est_lock changes the use from protecting the list to protecting the update to the 'bstat' pointer in order to avoid NULL dereferencing. -The 'interval' member of the gen_estimator structure removed as it is not needed. Signed-off-by: Ranko Zivojnovic Signed-off-by: David S. Miller --- net/core/gen_estimator.c | 81 +++++++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index cc84d8d8a3c7..590a767b029c 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -79,27 +79,27 @@ struct gen_estimator { - struct gen_estimator *next; + struct list_head list; struct gnet_stats_basic *bstats; struct gnet_stats_rate_est *rate_est; spinlock_t *stats_lock; - unsigned interval; int ewma_log; u64 last_bytes; u32 last_packets; u32 avpps; u32 avbps; + struct rcu_head e_rcu; }; struct gen_estimator_head { struct timer_list timer; - struct gen_estimator *list; + struct list_head list; }; static struct gen_estimator_head elist[EST_MAX_INTERVAL+1]; -/* Estimator array lock */ +/* Protects against NULL dereference */ static DEFINE_RWLOCK(est_lock); static void est_timer(unsigned long arg) @@ -107,13 +107,17 @@ static void est_timer(unsigned long arg) int idx = (int)arg; struct gen_estimator *e; - read_lock(&est_lock); - for (e = elist[idx].list; e; e = e->next) { + rcu_read_lock(); + list_for_each_entry_rcu(e, &elist[idx].list, list) { u64 nbytes; u32 npackets; u32 rate; spin_lock(e->stats_lock); + read_lock(&est_lock); + if (e->bstats == NULL) + goto skip; + nbytes = e->bstats->bytes; npackets = e->bstats->packets; rate = (nbytes - e->last_bytes)<<(7 - idx); @@ -125,12 +129,14 @@ static void est_timer(unsigned long arg) e->last_packets = npackets; e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log; e->rate_est->pps = (e->avpps+0x1FF)>>10; +skip: + read_unlock(&est_lock); spin_unlock(e->stats_lock); } - if (elist[idx].list != NULL) + if (!list_empty(&elist[idx].list)) mod_timer(&elist[idx].timer, jiffies + ((HZ<interval = parm->interval + 2; + idx = parm->interval + 2; est->bstats = bstats; est->rate_est = rate_est; est->stats_lock = stats_lock; @@ -174,20 +185,25 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, est->last_packets = bstats->packets; est->avpps = rate_est->pps<<10; - est->next = elist[est->interval].list; - if (est->next == NULL) { - init_timer(&elist[est->interval].timer); - elist[est->interval].timer.data = est->interval; - elist[est->interval].timer.expires = jiffies + ((HZ<interval)/4); - elist[est->interval].timer.function = est_timer; - add_timer(&elist[est->interval].timer); + if (!elist[idx].timer.function) { + INIT_LIST_HEAD(&elist[idx].list); + setup_timer(&elist[idx].timer, est_timer, idx); } - write_lock_bh(&est_lock); - elist[est->interval].list = est; - write_unlock_bh(&est_lock); + + if (list_empty(&elist[idx].list)) + mod_timer(&elist[idx].timer, jiffies + ((HZ<list, &elist[idx].list); return 0; } +static void __gen_kill_estimator(struct rcu_head *head) +{ + struct gen_estimator *e = container_of(head, + struct gen_estimator, e_rcu); + kfree(e); +} + /** * gen_kill_estimator - remove a rate estimator * @bstats: basic statistics @@ -195,31 +211,32 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, * * Removes the rate estimator specified by &bstats and &rate_est * and deletes the timer. + * + * NOTE: Called under rtnl_mutex */ void gen_kill_estimator(struct gnet_stats_basic *bstats, struct gnet_stats_rate_est *rate_est) { int idx; - struct gen_estimator *est, **pest; + struct gen_estimator *e, *n; for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { - int killed = 0; - pest = &elist[idx].list; - while ((est=*pest) != NULL) { - if (est->rate_est != rate_est || est->bstats != bstats) { - pest = &est->next; + + /* Skip non initialized indexes */ + if (!elist[idx].timer.function) + continue; + + list_for_each_entry_safe(e, n, &elist[idx].list, list) { + if (e->rate_est != rate_est || e->bstats != bstats) continue; - } write_lock_bh(&est_lock); - *pest = est->next; + e->bstats = NULL; write_unlock_bh(&est_lock); - kfree(est); - killed++; + list_del_rcu(&e->list); + call_rcu(&e->e_rcu, __gen_kill_estimator); } - if (killed && elist[idx].list == NULL) - del_timer(&elist[idx].timer); } } -- cgit v1.2.3 From 782f7956891c95e54c5b008b24ebf9e82fd84796 Mon Sep 17 00:00:00 2001 From: vignesh babu Date: Mon, 16 Jul 2007 18:30:36 -0700 Subject: [ATM]: Replacing kmalloc/memset combination with kzalloc. Signed-off-by: vignesh babu Signed-off-by: chas williams Signed-off-by: David S. Miller --- net/sched/sch_atm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 417ec8fb7f1a..ddc4f2c54379 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -292,13 +292,12 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, } } DPRINTK("atm_tc_change: new id %x\n", classid); - flow = kmalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL); + flow = kzalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL); DPRINTK("atm_tc_change: flow %p\n", flow); if (!flow) { error = -ENOBUFS; goto err_out; } - memset(flow, 0, sizeof(*flow)); flow->filter_list = NULL; if (!(flow->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid))) flow->q = &noop_qdisc; -- cgit v1.2.3 From b5492c4ed7a3fbbfa1a62db54e4934fd0b6a45ac Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Mon, 16 Jul 2007 18:31:24 -0700 Subject: [ATM]: Eliminate dead config variable CONFIG_BR2684_FAST_TRANS. Signed-off-by: chas williams Signed-off-by: David S. Miller --- net/atm/br2684.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/atm/br2684.c b/net/atm/br2684.c index faa6aaf67563..c0f6861eefe3 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -460,11 +460,7 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) skb_pull(skb, plen); skb_set_mac_header(skb, -ETH_HLEN); skb->pkt_type = PACKET_HOST; -#ifdef CONFIG_BR2684_FAST_TRANS - skb->protocol = ((u16 *) skb->data)[-1]; -#else /* some protocols might require this: */ skb->protocol = br_type_trans(skb, net_dev); -#endif /* CONFIG_BR2684_FAST_TRANS */ #else skb_pull(skb, plen - ETH_HLEN); skb->protocol = eth_type_trans(skb, net_dev); -- cgit v1.2.3 From 16751347a060a10c09b11593bb179fd5b0240c04 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 16 Jul 2007 18:35:52 -0700 Subject: [TCP]: remove unused argument to cong_avoid op None of the existing TCP congestion controls use the rtt value pased in the ca_ops->cong_avoid interface. Which is lucky because seq_rtt could have been -1 when handling a duplicate ack. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/tcp.h | 6 ++---- net/ipv4/tcp_bic.c | 2 +- net/ipv4/tcp_cong.c | 3 +-- net/ipv4/tcp_cubic.c | 2 +- net/ipv4/tcp_highspeed.c | 2 +- net/ipv4/tcp_htcp.c | 2 +- net/ipv4/tcp_hybla.c | 4 ++-- net/ipv4/tcp_illinois.c | 2 +- net/ipv4/tcp_input.c | 8 ++++---- net/ipv4/tcp_lp.c | 5 ++--- net/ipv4/tcp_scalable.c | 2 +- net/ipv4/tcp_vegas.c | 6 +++--- net/ipv4/tcp_veno.c | 6 +++--- net/ipv4/tcp_yeah.c | 2 +- 14 files changed, 24 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index a8af9ae00177..8b404b1ef7c8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -652,8 +652,7 @@ struct tcp_congestion_ops { /* lower bound for congestion window (optional) */ u32 (*min_cwnd)(const struct sock *sk); /* do new cwnd calculation (required) */ - void (*cong_avoid)(struct sock *sk, u32 ack, - u32 rtt, u32 in_flight, int good_ack); + void (*cong_avoid)(struct sock *sk, u32 ack, u32 in_flight, int good_ack); /* call before changing ca_state (optional) */ void (*set_state)(struct sock *sk, u8 new_state); /* call when cwnd event occurs (optional) */ @@ -684,8 +683,7 @@ extern void tcp_slow_start(struct tcp_sock *tp); extern struct tcp_congestion_ops tcp_init_congestion_ops; extern u32 tcp_reno_ssthresh(struct sock *sk); -extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, - u32 rtt, u32 in_flight, int flag); +extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag); extern u32 tcp_reno_min_cwnd(const struct sock *sk); extern struct tcp_congestion_ops tcp_reno; diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index dd9ef65ad3ff..519de091a94d 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -137,7 +137,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) } static void bictcp_cong_avoid(struct sock *sk, u32 ack, - u32 seq_rtt, u32 in_flight, int data_acked) + u32 in_flight, int data_acked) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 1260e52ad772..55fca1820c34 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -324,8 +324,7 @@ EXPORT_SYMBOL_GPL(tcp_slow_start); /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */ -void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, - int flag) +void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag) { struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index ebfaac2f9f46..d17da30d82d6 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -270,7 +270,7 @@ static inline void measure_delay(struct sock *sk) } static void bictcp_cong_avoid(struct sock *sk, u32 ack, - u32 seq_rtt, u32 in_flight, int data_acked) + u32 in_flight, int data_acked) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 43d624e5043c..14a073d8b60f 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -109,7 +109,7 @@ static void hstcp_init(struct sock *sk) tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); } -static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt, +static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 in_flight, int data_acked) { struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 4ba4a7ae0a85..632c05a75883 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -225,7 +225,7 @@ static u32 htcp_recalc_ssthresh(struct sock *sk) return max((tp->snd_cwnd * ca->beta) >> 7, 2U); } -static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, +static void htcp_cong_avoid(struct sock *sk, u32 ack, s32 rtt, u32 in_flight, int data_acked) { struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index e5be35117223..b3e55cf56171 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -85,7 +85,7 @@ static inline u32 hybla_fraction(u32 odds) * o Give cwnd a new value based on the model proposed * o remember increments <1 */ -static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt, +static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag) { struct tcp_sock *tp = tcp_sk(sk); @@ -103,7 +103,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt, return; if (!ca->hybla_en) - return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag); + return tcp_reno_cong_avoid(sk, ack, in_flight, flag); if (ca->rho == 0) hybla_recalc_param(sk); diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index b2b2256d3b84..cc5de6f69d46 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -258,7 +258,7 @@ static void tcp_illinois_state(struct sock *sk, u8 new_state) /* * Increase window in response to successful acknowledgment. */ -static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 rtt, +static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag) { struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4e5884ac8f29..fec8a7a4dbaf 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2323,11 +2323,11 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, tcp_ack_no_tstamp(sk, seq_rtt, flag); } -static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, +static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int good) { const struct inet_connection_sock *icsk = inet_csk(sk); - icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good); + icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight, good); tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; } @@ -2826,11 +2826,11 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) /* Advance CWND, if state allows this. */ if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && tcp_may_raise_cwnd(sk, flag)) - tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0); + tcp_cong_avoid(sk, ack, prior_in_flight, 0); tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) - tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1); + tcp_cong_avoid(sk, ack, prior_in_flight, 1); } if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP)) diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index e49836ce012e..80e140e3ec2d 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -115,13 +115,12 @@ static void tcp_lp_init(struct sock *sk) * Will only call newReno CA when away from inference. * From TCP-LP's paper, this will be handled in additive increasement. */ -static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, - int flag) +static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag) { struct lp *lp = inet_csk_ca(sk); if (!(lp->flag & LP_WITHIN_INF)) - tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag); + tcp_reno_cong_avoid(sk, ack, in_flight, flag); } /** diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 4624501e9680..be27a33a1c68 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -15,7 +15,7 @@ #define TCP_SCALABLE_AI_CNT 50U #define TCP_SCALABLE_MD_SCALE 3 -static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt, +static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag) { struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index e218a51ceced..914e0307f7af 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -163,13 +163,13 @@ void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, - u32 seq_rtt, u32 in_flight, int flag) + u32 in_flight, int flag) { struct tcp_sock *tp = tcp_sk(sk); struct vegas *vegas = inet_csk_ca(sk); if (!vegas->doing_vegas_now) - return tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag); + return tcp_reno_cong_avoid(sk, ack, in_flight, flag); /* The key players are v_beg_snd_una and v_beg_snd_nxt. * @@ -228,7 +228,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, /* We don't have enough RTT samples to do the Vegas * calculation, so we'll behave like Reno. */ - tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag); + tcp_reno_cong_avoid(sk, ack, in_flight, flag); } else { u32 rtt, target_cwnd, diff; diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index ec854cc5fad5..7a55ddf86032 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -115,13 +115,13 @@ static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event) } static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, - u32 seq_rtt, u32 in_flight, int flag) + u32 in_flight, int flag) { struct tcp_sock *tp = tcp_sk(sk); struct veno *veno = inet_csk_ca(sk); if (!veno->doing_veno_now) - return tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag); + return tcp_reno_cong_avoid(sk, ack, in_flight, flag); /* limited by applications */ if (!tcp_is_cwnd_limited(sk, in_flight)) @@ -132,7 +132,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, /* We don't have enough rtt samples to do the Veno * calculation, so we'll behave like Reno. */ - tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag); + tcp_reno_cong_avoid(sk, ack, in_flight, flag); } else { u32 rtt, target_cwnd; diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 545ed237ab53..c04b7c6ec702 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -70,7 +70,7 @@ static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, ktime_t last) } static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, - u32 seq_rtt, u32 in_flight, int flag) + u32 in_flight, int flag) { struct tcp_sock *tp = tcp_sk(sk); struct yeah *yeah = inet_csk_ca(sk); -- cgit v1.2.3 From bd0bf0765ea1fba80d7085e1f0375ec045631dc1 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 18 Jul 2007 01:55:52 -0700 Subject: [XFRM]: Fix crash introduced by struct dst_entry reordering XFRM expects xfrm_dst->u.next to be same pointer as dst->next, which was broken by the dst_entry reordering in commit 1e19e02c~, causing an oops in xfrm_bundle_ok when walking the bundle upwards. Kill xfrm_dst->u.next and change the only user to use dst->next instead. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 - net/xfrm/xfrm_policy.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index ae959e950174..a5f80bfbaaa4 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -585,7 +585,6 @@ static inline int xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ct struct xfrm_dst { union { - struct xfrm_dst *next; struct dst_entry dst; struct rtable rt; struct rt6_info rt6; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 157bfbd250ba..b48f06fc9fd9 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2141,7 +2141,7 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, if (last == first) break; - last = last->u.next; + last = (struct xfrm_dst *)last->u.dst.next; last->child_mtu_cached = mtu; } -- cgit v1.2.3 From 99acaeb92fc2d52900f00b8e926d9ad81b6e93bb Mon Sep 17 00:00:00 2001 From: Gabriel Craciunescu Date: Wed, 18 Jul 2007 02:00:04 -0700 Subject: [PKT_SCHED]: Some typo fixes in net/sched/Kconfig Signed-off-by: Gabriel Craciunescu Signed-off-by: David S. Miller --- net/sched/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/Kconfig b/net/sched/Kconfig index d3f7c3f9407a..8a74cac0be8c 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -97,7 +97,7 @@ config NET_SCH_ATM select classes of this queuing discipline. Each class maps the flow(s) it is handling to a given virtual circuit. - See the top of ) for more details. + See the top of for more details. To compile this code as a module, choose M here: the module will be called sch_atm. @@ -137,7 +137,7 @@ config NET_SCH_SFQ tristate "Stochastic Fairness Queueing (SFQ)" ---help--- Say Y here if you want to use the Stochastic Fairness Queueing (SFQ) - packet scheduling algorithm . + packet scheduling algorithm. See the top of for more details. @@ -306,7 +306,7 @@ config NET_CLS_RSVP6 is important for real time data such as streaming sound or video. Say Y here if you want to be able to classify outgoing packets based - on their RSVP requests and you are using the IPv6. + on their RSVP requests and you are using the IPv6 protocol. To compile this code as a module, choose M here: the module will be called cls_rsvp6. -- cgit v1.2.3 From eb4965344965530411359891214cd6fcab483649 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Jul 2007 02:07:51 -0700 Subject: [NETLINK]: negative groups in netlink_setsockopt Reading netlink_setsockopt it's not immediately clear why there isn't a bug when you pass in negative numbers, the reason being that the >= comparison is really unsigned although 'val' is signed because nlk->ngroups is unsigned. Make 'val' unsigned too. [ Update the get_user() cast to match. --DaveM ] Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a3c8e692f493..641cfbc278d8 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1012,13 +1012,14 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); - int val = 0, err; + unsigned int val = 0; + int err; if (level != SOL_NETLINK) return -ENOPROTOOPT; if (optlen >= sizeof(int) && - get_user(val, (int __user *)optval)) + get_user(val, (unsigned int __user *)optval)) return -EFAULT; switch (optname) { -- cgit v1.2.3 From 456ad75c89cdb72e11dcdb6b0794802a6f50c8a3 Mon Sep 17 00:00:00 2001 From: Denis Cheng Date: Wed, 18 Jul 2007 02:10:54 -0700 Subject: [NET]: move dev_mc_discard from dev_mcast.c to dev.c Because this function is only called by unregister_netdevice, this moving could make this non-global function static, and also remove its declaration in netdevice.h; Any further, function __dev_addr_discard is also just called by dev_mc_discard and dev_unicast_discard, keeping this two functions both in one c file could make __dev_addr_discard also static and remove its declaration in netdevice.h; Futhermore, the sequential call to dev_unicast_discard and then dev_mc_discard in unregister_netdevice have a similar mechanism that: (netif_tx_lock_bh / __dev_addr_discard / netif_tx_unlock_bh), they should merged into one to eliminate duplicates in acquiring and releasing the dev->_xmit_lock, this would be done in my following patch. Signed-off-by: Denis Cheng Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 -- net/core/dev.c | 14 +++++++++++++- net/core/dev_mcast.c | 12 ------------ 3 files changed, 13 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index da7a13c97eb8..9820ca1e45e2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1098,10 +1098,8 @@ extern int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all extern int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly); extern int dev_mc_sync(struct net_device *to, struct net_device *from); extern void dev_mc_unsync(struct net_device *to, struct net_device *from); -extern void dev_mc_discard(struct net_device *dev); extern int __dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int all); extern int __dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly); -extern void __dev_addr_discard(struct dev_addr_list **list); extern void dev_set_promiscuity(struct net_device *dev, int inc); extern void dev_set_allmulti(struct net_device *dev, int inc); extern void netdev_state_change(struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index 13a0d9f6da54..3ba63aaa3001 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2715,7 +2715,7 @@ int __dev_addr_add(struct dev_addr_list **list, int *count, return 0; } -void __dev_addr_discard(struct dev_addr_list **list) +static void __dev_addr_discard(struct dev_addr_list **list) { struct dev_addr_list *tmp; @@ -2785,6 +2785,18 @@ static void dev_unicast_discard(struct net_device *dev) netif_tx_unlock_bh(dev); } +/* + * Discard multicast list when a device is downed + */ + +static void dev_mc_discard(struct net_device *dev) +{ + netif_tx_lock_bh(dev); + __dev_addr_discard(&dev->mc_list); + dev->mc_count = 0; + netif_tx_unlock_bh(dev); +} + unsigned dev_get_flags(const struct net_device *dev) { unsigned flags; diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index 235a2a8a0d05..99aece1aeccf 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -177,18 +177,6 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from) } EXPORT_SYMBOL(dev_mc_unsync); -/* - * Discard multicast list when a device is downed - */ - -void dev_mc_discard(struct net_device *dev) -{ - netif_tx_lock_bh(dev); - __dev_addr_discard(&dev->mc_list); - dev->mc_count = 0; - netif_tx_unlock_bh(dev); -} - #ifdef CONFIG_PROC_FS static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos) { -- cgit v1.2.3 From 26cc2522cb6ebf0c1c736485e102e9654cde1145 Mon Sep 17 00:00:00 2001 From: Denis Cheng Date: Wed, 18 Jul 2007 02:12:03 -0700 Subject: [NET]: merge dev_unicast_discard and dev_mc_discard into one this two functions could share the dev->_xmit_lock acquired context. Signed-off-by: Denis Cheng Signed-off-by: David S. Miller --- net/core/dev.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 3ba63aaa3001..17c9cbd77eb0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2777,23 +2777,16 @@ int dev_unicast_add(struct net_device *dev, void *addr, int alen) } EXPORT_SYMBOL(dev_unicast_add); -static void dev_unicast_discard(struct net_device *dev) +static void dev_addr_discard(struct net_device *dev) { netif_tx_lock_bh(dev); + __dev_addr_discard(&dev->uc_list); dev->uc_count = 0; - netif_tx_unlock_bh(dev); -} -/* - * Discard multicast list when a device is downed - */ - -static void dev_mc_discard(struct net_device *dev) -{ - netif_tx_lock_bh(dev); __dev_addr_discard(&dev->mc_list); dev->mc_count = 0; + netif_tx_unlock_bh(dev); } @@ -3751,8 +3744,7 @@ void unregister_netdevice(struct net_device *dev) /* * Flush the unicast and multicast chains */ - dev_unicast_discard(dev); - dev_mc_discard(dev); + dev_addr_discard(dev); if (dev->uninit) dev->uninit(dev); -- cgit v1.2.3 From 12972621c8a18465e3d20cc8e3006a8b7f7788df Mon Sep 17 00:00:00 2001 From: Denis Cheng Date: Wed, 18 Jul 2007 02:12:56 -0700 Subject: [NET]: move __dev_addr_discard adjacent to dev_addr_discard for readability Signed-off-by: Denis Cheng Signed-off-by: David S. Miller --- net/core/dev.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 17c9cbd77eb0..6357f54c8ff7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2715,20 +2715,6 @@ int __dev_addr_add(struct dev_addr_list **list, int *count, return 0; } -static void __dev_addr_discard(struct dev_addr_list **list) -{ - struct dev_addr_list *tmp; - - while (*list != NULL) { - tmp = *list; - *list = tmp->next; - if (tmp->da_users > tmp->da_gusers) - printk("__dev_addr_discard: address leakage! " - "da_users=%d\n", tmp->da_users); - kfree(tmp); - } -} - /** * dev_unicast_delete - Release secondary unicast address. * @dev: device @@ -2777,6 +2763,20 @@ int dev_unicast_add(struct net_device *dev, void *addr, int alen) } EXPORT_SYMBOL(dev_unicast_add); +static void __dev_addr_discard(struct dev_addr_list **list) +{ + struct dev_addr_list *tmp; + + while (*list != NULL) { + tmp = *list; + *list = tmp->next; + if (tmp->da_users > tmp->da_gusers) + printk("__dev_addr_discard: address leakage! " + "da_users=%d\n", tmp->da_users); + kfree(tmp); + } +} + static void dev_addr_discard(struct net_device *dev) { netif_tx_lock_bh(dev); -- cgit v1.2.3 From 75a69ac6d66d2504ecbc4b46645fb0835a55a57c Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 18 Jul 2007 02:16:30 -0700 Subject: [IrDA]: Fix IrDA build failure When having built-in IrDA, we hit the following error: `irda_sysctl_unregister' referenced in section `.init.text' of net/built-in.o: defined in discarded section `.exit.text' of net/built-in.o `irda_proc_unregister' referenced in section `.init.text' of net/built-in.o: defined in discarded section `.exit.text' of net/built-in.o `irsock_cleanup' referenced in section `.init.text' of net/built-in.o: defined in discarded section `.exit.text' of net/built-in.o `irttp_cleanup' referenced in section `.init.text' of net/built-in.o: defined in discarded section `.exit.text' of net/built-in.o `iriap_cleanup' referenced in section `.init.text' of net/built-in.o: defined in discarded section `.exit.text' of net/built-in.o `irda_device_cleanup' referenced in section `.init.text' of net/built-in.o: defined in discarded section `.exit.text' of net/built-in.o `irlap_cleanup' referenced in section `.init.text' of net/built-in.o: defined in discarded section `.exit.text' of net/built-in.o `irlmp_cleanup' referenced in section `.init.text' of net/built-in.o: defined in discarded section `.exit.text' of net/built-in.o make[1]: *** [.tmp_vmlinux1] Error 1 make: *** [_all] Error 2 This is due to the irda_init fix recently added, where we call __exit routines from an __init one. It is a build failure that I didn't catch because it doesn't show up when building IrDA as a module. My apologies for that. The following patch fixes that failure and is against your net-2.6 tree. I hope it can make it to the merge window, and stable@kernel.org is CCed on this mail. Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller --- net/irda/af_irda.c | 2 +- net/irda/irda_device.c | 4 ++-- net/irda/iriap.c | 2 +- net/irda/irlap.c | 2 +- net/irda/irlmp.c | 2 +- net/irda/irproc.c | 2 +- net/irda/irsysctl.c | 2 +- net/irda/irttp.c | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index dcd7e325b283..4c670cf6aefa 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -2567,7 +2567,7 @@ int __init irsock_init(void) * Remove IrDA protocol * */ -void __exit irsock_cleanup(void) +void irsock_cleanup(void) { sock_unregister(PF_IRDA); proto_unregister(&irda_proto); diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c index 7b5def1ea633..435b563d29a6 100644 --- a/net/irda/irda_device.c +++ b/net/irda/irda_device.c @@ -95,14 +95,14 @@ int __init irda_device_init( void) return 0; } -static void __exit leftover_dongle(void *arg) +static void leftover_dongle(void *arg) { struct dongle_reg *reg = arg; IRDA_WARNING("IrDA: Dongle type %x not unregistered\n", reg->type); } -void __exit irda_device_cleanup(void) +void irda_device_cleanup(void) { IRDA_DEBUG(4, "%s()\n", __FUNCTION__); diff --git a/net/irda/iriap.c b/net/irda/iriap.c index 774eb707940c..ee3889fa49ab 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -153,7 +153,7 @@ int __init iriap_init(void) * Initializes the IrIAP layer, called by the module cleanup code in * irmod.c */ -void __exit iriap_cleanup(void) +void iriap_cleanup(void) { irlmp_unregister_service(service_handle); diff --git a/net/irda/irlap.c b/net/irda/irlap.c index 2fc9f518f89d..3d76aafdb2e5 100644 --- a/net/irda/irlap.c +++ b/net/irda/irlap.c @@ -95,7 +95,7 @@ int __init irlap_init(void) return 0; } -void __exit irlap_cleanup(void) +void irlap_cleanup(void) { IRDA_ASSERT(irlap != NULL, return;); diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 24a5e3f23778..7efa930ed684 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -116,7 +116,7 @@ int __init irlmp_init(void) * Remove IrLMP layer * */ -void __exit irlmp_cleanup(void) +void irlmp_cleanup(void) { /* Check for main structure */ IRDA_ASSERT(irlmp != NULL, return;); diff --git a/net/irda/irproc.c b/net/irda/irproc.c index d6f9aba5b9dc..181cb51b48a8 100644 --- a/net/irda/irproc.c +++ b/net/irda/irproc.c @@ -84,7 +84,7 @@ void __init irda_proc_register(void) * Unregister irda entry in /proc file system * */ -void __exit irda_proc_unregister(void) +void irda_proc_unregister(void) { int i; diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index 2e968e7d8fea..957e04feb0f7 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -287,7 +287,7 @@ int __init irda_sysctl_register(void) * Unregister our sysctl interface * */ -void __exit irda_sysctl_unregister(void) +void irda_sysctl_unregister(void) { unregister_sysctl_table(irda_table_header); } diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 7f50832a2cd5..3d7ab03fb131 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -109,7 +109,7 @@ int __init irttp_init(void) * Called by module destruction/cleanup code * */ -void __exit irttp_cleanup(void) +void irttp_cleanup(void) { /* Check for main structure */ IRDA_ASSERT(irttp->magic == TTP_MAGIC, return;); -- cgit v1.2.3 From 3fd8f9e4b6c184d03d340bc86630f700de967fa8 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Wed, 18 Jul 2007 02:38:32 -0700 Subject: [NETFILTER]: xt_connlimit needs to depend on nf_conntrack With NF_CONNTRACK=n, NETFILTER_XT_MATCH_CONNLIMIT=m I get the following errors on current git: CC [M] net/netfilter/xt_connlimit.o In file included from net/netfilter/xt_connlimit.c:27: include/net/netfilter/nf_conntrack.h:100: error: field 'ct_general' has incomplete type include/net/netfilter/nf_conntrack.h: In function 'nf_ct_get': include/net/netfilter/nf_conntrack.h:164: error: 'const struct sk_buff' has no member named 'nfct' include/net/netfilter/nf_conntrack.h: In function 'nf_ct_put': include/net/netfilter/nf_conntrack.h:171: warning: implicit declaration of function 'nf_conntrack_put' include/net/netfilter/nf_conntrack.h: In function 'nf_ct_is_untracked': include/net/netfilter/nf_conntrack.h:253: error: 'const struct sk_buff' has no member named 'nfct' In file included from net/netfilter/xt_connlimit.c:28: include/net/netfilter/nf_conntrack_core.h: In function 'nf_conntrack_confirm': include/net/netfilter/nf_conntrack_core.h:68: error: 'struct sk_buff' has no member named 'nfct' Adding a dependency in Kconfig fixes this. Signed-off-by: Cornelia Huck Signed-off-by: David S. Miller --- net/netfilter/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 3ac39f1ec775..3599770a2473 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -436,6 +436,7 @@ config NETFILTER_XT_MATCH_CONNBYTES config NETFILTER_XT_MATCH_CONNLIMIT tristate '"connlimit" match support"' depends on NETFILTER_XTABLES + depends on NF_CONNTRACK ---help--- This match allows you to match against the number of parallel connections to a server per client IP address (or address block). -- cgit v1.2.3 From 1e66df3ee301209f4a38df097d7cc5cb9b367a3f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:02 -0700 Subject: add kstrndup Add a kstrndup function, modelled on strndup. Like strndup this returns a string copied into its own allocated memory, but it copies no more than the specified number of bytes from the source. Remove private strndup() from irda code. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Chris Wright Cc: Andrew Morton Cc: Randy Dunlap Cc: YOSHIFUJI Hideaki Cc: Akinobu Mita Cc: Arnaldo Carvalho de Melo Cc: Al Viro Cc: Panagiotis Issaris Cc: Rene Scharfe --- include/linux/string.h | 1 + mm/util.c | 26 ++++++++++++++++++++++++-- net/irda/irias_object.c | 43 +++++-------------------------------------- 3 files changed, 30 insertions(+), 40 deletions(-) (limited to 'net') diff --git a/include/linux/string.h b/include/linux/string.h index 7f2eb6a477f9..ee5e9ccc4aae 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -105,6 +105,7 @@ extern void * memchr(const void *,int,__kernel_size_t); #endif extern char *kstrdup(const char *s, gfp_t gfp); +extern char *kstrndup(const char *s, size_t len, gfp_t gfp); extern void *kmemdup(const void *src, size_t len, gfp_t gfp); #ifdef __cplusplus diff --git a/mm/util.c b/mm/util.c index 78f3783bdcc8..bf340d806868 100644 --- a/mm/util.c +++ b/mm/util.c @@ -6,7 +6,6 @@ /** * kstrdup - allocate space for and copy an existing string - * * @s: the string to duplicate * @gfp: the GFP mask used in the kmalloc() call when allocating memory */ @@ -26,6 +25,30 @@ char *kstrdup(const char *s, gfp_t gfp) } EXPORT_SYMBOL(kstrdup); +/** + * kstrndup - allocate space for and copy an existing string + * @s: the string to duplicate + * @max: read at most @max chars from @s + * @gfp: the GFP mask used in the kmalloc() call when allocating memory + */ +char *kstrndup(const char *s, size_t max, gfp_t gfp) +{ + size_t len; + char *buf; + + if (!s) + return NULL; + + len = strnlen(s, max); + buf = kmalloc_track_caller(len+1, gfp); + if (buf) { + memcpy(buf, s, len); + buf[len] = '\0'; + } + return buf; +} +EXPORT_SYMBOL(kstrndup); + /** * kmemdup - duplicate region of memory * @@ -80,7 +103,6 @@ EXPORT_SYMBOL(krealloc); /* * strndup_user - duplicate an existing string from user space - * * @s: The string to duplicate * @n: Maximum number of bytes to copy, including the trailing NUL. */ diff --git a/net/irda/irias_object.c b/net/irda/irias_object.c index 4adaae242b9e..cf302457097b 100644 --- a/net/irda/irias_object.c +++ b/net/irda/irias_object.c @@ -36,39 +36,6 @@ hashbin_t *irias_objects; */ struct ias_value irias_missing = { IAS_MISSING, 0, 0, 0, {0}}; -/* - * Function strndup (str, max) - * - * My own kernel version of strndup! - * - * Faster, check boundary... Jean II - */ -static char *strndup(char *str, size_t max) -{ - char *new_str; - int len; - - /* Check string */ - if (str == NULL) - return NULL; - /* Check length, truncate */ - len = strlen(str); - if(len > max) - len = max; - - /* Allocate new string */ - new_str = kmalloc(len + 1, GFP_ATOMIC); - if (new_str == NULL) { - IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__); - return NULL; - } - - /* Copy and truncate */ - memcpy(new_str, str, len); - new_str[len] = '\0'; - - return new_str; -} /* * Function ias_new_object (name, id) @@ -90,7 +57,7 @@ struct ias_object *irias_new_object( char *name, int id) } obj->magic = IAS_OBJECT_MAGIC; - obj->name = strndup(name, IAS_MAX_CLASSNAME); + obj->name = kstrndup(name, IAS_MAX_CLASSNAME, GFP_ATOMIC); if (!obj->name) { IRDA_WARNING("%s(), Unable to allocate name!\n", __FUNCTION__); @@ -360,7 +327,7 @@ void irias_add_integer_attrib(struct ias_object *obj, char *name, int value, } attrib->magic = IAS_ATTRIB_MAGIC; - attrib->name = strndup(name, IAS_MAX_ATTRIBNAME); + attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC); /* Insert value */ attrib->value = irias_new_integer_value(value); @@ -404,7 +371,7 @@ void irias_add_octseq_attrib(struct ias_object *obj, char *name, __u8 *octets, } attrib->magic = IAS_ATTRIB_MAGIC; - attrib->name = strndup(name, IAS_MAX_ATTRIBNAME); + attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC); attrib->value = irias_new_octseq_value( octets, len); if (!attrib->name || !attrib->value) { @@ -446,7 +413,7 @@ void irias_add_string_attrib(struct ias_object *obj, char *name, char *value, } attrib->magic = IAS_ATTRIB_MAGIC; - attrib->name = strndup(name, IAS_MAX_ATTRIBNAME); + attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC); attrib->value = irias_new_string_value(value); if (!attrib->name || !attrib->value) { @@ -506,7 +473,7 @@ struct ias_value *irias_new_string_value(char *string) value->type = IAS_STRING; value->charset = CS_ASCII; - value->t.string = strndup(string, IAS_MAX_STRING); + value->t.string = kstrndup(string, IAS_MAX_STRING, GFP_ATOMIC); if (!value->t.string) { IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__); kfree(value); -- cgit v1.2.3 From 86313c488a6848b7ec2ba04e74f25f79dd32a0b7 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:03 -0700 Subject: usermodehelper: Tidy up waiting Rather than using a tri-state integer for the wait flag in call_usermodehelper_exec, define a proper enum, and use that. I've preserved the integer values so that any callers I've missed should still work OK. Signed-off-by: Jeremy Fitzhardinge Cc: James Bottomley Cc: Randy Dunlap Cc: Christoph Hellwig Cc: Andi Kleen Cc: Paul Mackerras Cc: Johannes Berg Cc: Ralf Baechle Cc: Bjorn Helgaas Cc: Joel Becker Cc: Tony Luck Cc: Kay Sievers Cc: Srivatsa Vaddagiri Cc: Oleg Nesterov Cc: David Howells --- arch/i386/mach-voyager/voyager_thread.c | 2 +- arch/x86_64/kernel/mce.c | 2 +- drivers/macintosh/therm_pm72.c | 3 ++- drivers/macintosh/windfarm_core.c | 3 ++- drivers/net/hamradio/baycom_epp.c | 2 +- drivers/pnp/pnpbios/core.c | 2 +- fs/ocfs2/heartbeat.c | 2 +- include/linux/kmod.h | 12 +++++++++--- kernel/cpuset.c | 2 +- kernel/kmod.c | 27 ++++++++++++++++----------- kernel/sys.c | 2 +- lib/kobject_uevent.c | 2 +- net/bridge/br_stp_if.c | 2 +- security/keys/request_key.c | 3 ++- 14 files changed, 40 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/arch/i386/mach-voyager/voyager_thread.c b/arch/i386/mach-voyager/voyager_thread.c index b4b24e0e45e1..f9d595338159 100644 --- a/arch/i386/mach-voyager/voyager_thread.c +++ b/arch/i386/mach-voyager/voyager_thread.c @@ -52,7 +52,7 @@ execute(const char *string) NULL, }; - if ((ret = call_usermodehelper(argv[0], argv, envp, 1)) != 0) { + if ((ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC)) != 0) { printk(KERN_ERR "Voyager failed to run \"%s\": %i\n", string, ret); } diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index aa1d15991794..f3fb8174559e 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -174,7 +174,7 @@ static void do_mce_trigger(void) if (events != atomic_read(&mce_logged) && trigger[0]) { /* Small race window, but should be harmless. */ atomic_set(&mce_logged, events); - call_usermodehelper(trigger, trigger_argv, NULL, -1); + call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); } } diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c index dbb22403979f..3d90fc002097 100644 --- a/drivers/macintosh/therm_pm72.c +++ b/drivers/macintosh/therm_pm72.c @@ -1770,7 +1770,8 @@ static int call_critical_overtemp(void) "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; - return call_usermodehelper(critical_overtemp_path, argv, envp, 0); + return call_usermodehelper(critical_overtemp_path, + argv, envp, UMH_WAIT_EXEC); } diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c index e18d265d5d33..516d943227e2 100644 --- a/drivers/macintosh/windfarm_core.c +++ b/drivers/macintosh/windfarm_core.c @@ -80,7 +80,8 @@ int wf_critical_overtemp(void) "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; - return call_usermodehelper(critical_overtemp_path, argv, envp, 0); + return call_usermodehelper(critical_overtemp_path, + argv, envp, UMH_WAIT_EXEC); } EXPORT_SYMBOL_GPL(wf_critical_overtemp); diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index 84aa2117c0ee..355c6cf3d112 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -320,7 +320,7 @@ static int eppconfig(struct baycom_state *bc) sprintf(portarg, "%ld", bc->pdev->port->base); printk(KERN_DEBUG "%s: %s -s -p %s -m %s\n", bc_drvname, eppconfig_path, portarg, modearg); - return call_usermodehelper(eppconfig_path, argv, envp, 1); + return call_usermodehelper(eppconfig_path, argv, envp, UMH_WAIT_PROC); } /* ---------------------------------------------------------------------- */ diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c index 03baf1c64a2e..ed112ee16012 100644 --- a/drivers/pnp/pnpbios/core.c +++ b/drivers/pnp/pnpbios/core.c @@ -147,7 +147,7 @@ static int pnp_dock_event(int dock, struct pnp_docking_station_info *info) info->location_id, info->serial, info->capabilities); envp[i] = NULL; - value = call_usermodehelper (argv [0], argv, envp, 0); + value = call_usermodehelper (argv [0], argv, envp, UMH_WAIT_EXEC); kfree (buf); kfree (envp); return 0; diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index 352eb4a13f98..c4c36171240d 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c @@ -209,7 +209,7 @@ void ocfs2_stop_heartbeat(struct ocfs2_super *osb) envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; envp[2] = NULL; - ret = call_usermodehelper(argv[0], argv, envp, 1); + ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); if (ret < 0) mlog_errno(ret); } diff --git a/include/linux/kmod.h b/include/linux/kmod.h index c4cbe59d9c67..5dc13848891b 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -51,15 +51,21 @@ int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info, void call_usermodehelper_setcleanup(struct subprocess_info *info, void (*cleanup)(char **argv, char **envp)); +enum umh_wait { + UMH_NO_WAIT = -1, /* don't wait at all */ + UMH_WAIT_EXEC = 0, /* wait for the exec, but not the process */ + UMH_WAIT_PROC = 1, /* wait for the process to complete */ +}; + /* Actually execute the sub-process */ -int call_usermodehelper_exec(struct subprocess_info *info, int wait); +int call_usermodehelper_exec(struct subprocess_info *info, enum umh_wait wait); /* Free the subprocess_info. This is only needed if you're not going to call call_usermodehelper_exec */ void call_usermodehelper_freeinfo(struct subprocess_info *info); static inline int -call_usermodehelper(char *path, char **argv, char **envp, int wait) +call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait) { struct subprocess_info *info; @@ -71,7 +77,7 @@ call_usermodehelper(char *path, char **argv, char **envp, int wait) static inline int call_usermodehelper_keys(char *path, char **argv, char **envp, - struct key *session_keyring, int wait) + struct key *session_keyring, enum umh_wait wait) { struct subprocess_info *info; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index b4796d850140..57e6448b171e 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -516,7 +516,7 @@ static void cpuset_release_agent(const char *pathbuf) envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; envp[i] = NULL; - call_usermodehelper(argv[0], argv, envp, 0); + call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); kfree(pathbuf); } diff --git a/kernel/kmod.c b/kernel/kmod.c index d2dce71115d8..78d365c524ed 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -119,7 +119,7 @@ struct subprocess_info { char **argv; char **envp; struct key *ring; - int wait; + enum umh_wait wait; int retval; struct file *stdin; void (*cleanup)(char **argv, char **envp); @@ -225,7 +225,7 @@ static int wait_for_helper(void *data) sub_info->retval = ret; } - if (sub_info->wait < 0) + if (sub_info->wait == UMH_NO_WAIT) call_usermodehelper_freeinfo(sub_info); else complete(sub_info->complete); @@ -238,26 +238,31 @@ static void __call_usermodehelper(struct work_struct *work) struct subprocess_info *sub_info = container_of(work, struct subprocess_info, work); pid_t pid; - int wait = sub_info->wait; + enum umh_wait wait = sub_info->wait; /* CLONE_VFORK: wait until the usermode helper has execve'd * successfully We need the data structures to stay around * until that is done. */ - if (wait) + if (wait == UMH_WAIT_PROC || wait == UMH_NO_WAIT) pid = kernel_thread(wait_for_helper, sub_info, CLONE_FS | CLONE_FILES | SIGCHLD); else pid = kernel_thread(____call_usermodehelper, sub_info, CLONE_VFORK | SIGCHLD); - if (wait < 0) - return; + switch (wait) { + case UMH_NO_WAIT: + break; - if (pid < 0) { + case UMH_WAIT_PROC: + if (pid > 0) + break; sub_info->retval = pid; + /* FALLTHROUGH */ + + case UMH_WAIT_EXEC: complete(sub_info->complete); - } else if (!wait) - complete(sub_info->complete); + } } /** @@ -359,7 +364,7 @@ EXPORT_SYMBOL(call_usermodehelper_stdinpipe); * (ie. it runs with full root capabilities). */ int call_usermodehelper_exec(struct subprocess_info *sub_info, - int wait) + enum umh_wait wait) { DECLARE_COMPLETION_ONSTACK(done); int retval; @@ -378,7 +383,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, sub_info->wait = wait; queue_work(khelper_wq, &sub_info->work); - if (wait < 0) /* task has freed sub_info */ + if (wait == UMH_NO_WAIT) /* task has freed sub_info */ return 0; wait_for_completion(&done); retval = sub_info->retval; diff --git a/kernel/sys.c b/kernel/sys.c index aeded9ad66ce..18987c7f6add 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2327,7 +2327,7 @@ int orderly_poweroff(bool force) call_usermodehelper_setcleanup(info, argv_cleanup); - ret = call_usermodehelper_exec(info, -1); + ret = call_usermodehelper_exec(info, UMH_NO_WAIT); out: if (ret && force) { diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 12e311dc664c..bd5ecbbafab1 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -208,7 +208,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, argv [0] = uevent_helper; argv [1] = (char *)subsystem; argv [2] = NULL; - call_usermodehelper (argv[0], argv, envp, 0); + call_usermodehelper (argv[0], argv, envp, UMH_WAIT_EXEC); } exit: diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index a786e7863200..1ea2f86f7683 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -125,7 +125,7 @@ static void br_stp_start(struct net_bridge *br) char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL }; char *envp[] = { NULL }; - r = call_usermodehelper(BR_STP_PROG, argv, envp, 1); + r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); if (r == 0) { br->stp_enabled = BR_USER_STP; printk(KERN_INFO "%s: userspace STP started\n", br->dev->name); diff --git a/security/keys/request_key.c b/security/keys/request_key.c index f573ac189a0a..557500110a13 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -108,7 +108,8 @@ static int call_sbin_request_key(struct key *key, argv[i] = NULL; /* do it */ - ret = call_usermodehelper_keys(argv[0], argv, envp, keyring, 1); + ret = call_usermodehelper_keys(argv[0], argv, envp, keyring, + UMH_WAIT_PROC); error_link: key_put(keyring); -- cgit v1.2.3