From ebe0bbf06c9e03613bdcb6b5a704595a9344b7ff Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 20:52:19 -0700 Subject: [NETFILTER] nfnetlink: use highest bit of nfa_type to indicate nested TLV As Henrik Nordstrom pointed out, all our efforts with "split endian" (i.e. host byte order tags, net byte order values) are useless, unless a parser can determine whether an attribute is nested or not. This patch steals the highest bit of nfattr.nfa_type to indicate whether the data payload contains a nested nfattr (1) or not (0). This will break userspace compatibility, but luckily no kernel with nfnetlink was released so far. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 1d5b10ae2399..f08e870100f4 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -41,11 +41,15 @@ enum nfnetlink_groups { struct nfattr { u_int16_t nfa_len; - u_int16_t nfa_type; + u_int16_t nfa_type; /* we use 15 bits for the type, and the highest + * bit to indicate whether the payload is nested */ } __attribute__ ((packed)); -/* FIXME: Shamelessly copy and pasted from rtnetlink.h, it's time - * to put this in a generic file */ +/* FIXME: Apart from NFNL_NFA_NESTED shamelessly copy and pasted from + * rtnetlink.h, it's time to put this in a generic file */ + +#define NFNL_NFA_NEST 0x8000 +#define NFA_TYPE(attr) ((attr)->nfa_type & 0x7fff) #define NFA_ALIGNTO 4 #define NFA_ALIGN(len) (((len) + NFA_ALIGNTO - 1) & ~(NFA_ALIGNTO - 1)) @@ -59,7 +63,7 @@ struct nfattr #define NFA_PAYLOAD(nfa) ((int)((nfa)->nfa_len) - NFA_LENGTH(0)) #define NFA_NEST(skb, type) \ ({ struct nfattr *__start = (struct nfattr *) (skb)->tail; \ - NFA_PUT(skb, type, 0, NULL); \ + NFA_PUT(skb, (NFNL_NFA_NEST | type), 0, NULL); \ __start; }) #define NFA_NEST_END(skb, start) \ ({ (start)->nfa_len = ((skb)->tail - (unsigned char *) (start)); \ -- cgit v1.2.3 From b3a91d037a2575040f9b6a483f60c407a3d80368 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 20:52:36 -0700 Subject: [NETFILTER] nat: remove bogus structure member When 'rustynat' was merged in 2.6.12, the use of the "helper" pointer of struct ipt_nat_info was obsoleted, but the pointer not removed from the struct. This patch removes the pointer, thereby yet again shrinking struct ip_conntrack. Discovered-by: Rusty Russell Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_nat.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_ipv4/ip_nat.h b/include/linux/netfilter_ipv4/ip_nat.h index e201ec6e9905..41a107de17cf 100644 --- a/include/linux/netfilter_ipv4/ip_nat.h +++ b/include/linux/netfilter_ipv4/ip_nat.h @@ -58,10 +58,6 @@ extern rwlock_t ip_nat_lock; struct ip_nat_info { struct list_head bysource; - - /* Helper (NULL if none). */ - struct ip_nat_helper *helper; - struct ip_nat_seq seq[IP_CT_DIR_MAX]; }; -- cgit v1.2.3 From 5bbc243aafff9ad653dc7a9fa7bcaf0b4631355a Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 20:54:01 -0700 Subject: [NETFILTER]: Add missing include to ip_conntrack_tuple.h Without this #include, __be16 is not defined and userspace programs will break. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_tuple.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h index 20e43f018b7c..3232db11a4e5 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h @@ -1,6 +1,8 @@ #ifndef _IP_CONNTRACK_TUPLE_H #define _IP_CONNTRACK_TUPLE_H +#include + /* A `tuple' is a structure containing the information to uniquely identify a connection. ie. if two packets have the same tuple, they are in the same connection; if not, they are not. -- cgit v1.2.3 From e1c73b78e3706bd3c336d4730a01dd4081dfb7ee Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 10 Oct 2005 20:55:49 -0700 Subject: [NETFILTER] ctnetlink: add one nesting level for TCP state To keep consistency, the TCP private protocol information is nested attributes under CTA_PROTOINFO_TCP. This way the sequence of attributes to access the TCP state information looks like here below: CTA_PROTOINFO CTA_PROTOINFO_TCP CTA_PROTOINFO_TCP_STATE instead of: CTA_PROTOINFO CTA_PROTOINFO_TCP_STATE Signed-off-by: Pablo Neira Ayuso Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_conntrack.h | 9 ++++++++- net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 4 ++++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 5c55751c78e4..fb5511030185 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -70,11 +70,18 @@ enum ctattr_l4proto { enum ctattr_protoinfo { CTA_PROTOINFO_UNSPEC, - CTA_PROTOINFO_TCP_STATE, + CTA_PROTOINFO_TCP, __CTA_PROTOINFO_MAX }; #define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1) +enum ctattr_protoinfo_tcp { + CTA_PROTOINFO_TCP_UNSPEC, + CTA_PROTOINFO_TCP_STATE, + __CTA_PROTOINFO_TCP_MAX +}; +#define CTA_PROTOINFO_TCP_MAX (__CTA_PROTOINFO_TCP_MAX - 1) + enum ctattr_counters { CTA_COUNTERS_UNSPEC, CTA_COUNTERS_PACKETS, diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 121760d6cc50..75e27e65c28f 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -341,11 +341,15 @@ static int tcp_print_conntrack(struct seq_file *s, static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, const struct ip_conntrack *ct) { + struct nfattr *nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP); + read_lock_bh(&tcp_lock); NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), &ct->proto.tcp.state); read_unlock_bh(&tcp_lock); + NFA_NEST_END(skb, nest_parms); + return 0; nfattr_failure: -- cgit v1.2.3 From a051a8f7306476af0a74370ad56e793cb6c43bf7 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 21:21:10 -0700 Subject: [NETFILTER]: Use only 32bit counters for CONNTRACK_ACCT Initially we used 64bit counters for conntrack-based accounting, since we had no event mechanism to tell userspace that our counters are about to overflow. With nfnetlink_conntrack, we now have such a event mechanism and thus can save 16bytes per connection. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_conntrack.h | 6 ++++-- include/linux/netfilter_ipv4/ip_conntrack.h | 8 ++++++-- net/ipv4/netfilter/ip_conntrack_core.c | 13 ++++++++----- net/ipv4/netfilter/ip_conntrack_netlink.c | 8 ++++---- 4 files changed, 22 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index fb5511030185..116fcaced909 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -84,8 +84,10 @@ enum ctattr_protoinfo_tcp { enum ctattr_counters { CTA_COUNTERS_UNSPEC, - CTA_COUNTERS_PACKETS, - CTA_COUNTERS_BYTES, + CTA_COUNTERS_PACKETS, /* old 64bit counters */ + CTA_COUNTERS_BYTES, /* old 64bit counters */ + CTA_COUNTERS32_PACKETS, + CTA_COUNTERS32_BYTES, __CTA_COUNTERS_MAX }; #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1) diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 4ced38736813..d078bb91d9e5 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -117,6 +117,10 @@ enum ip_conntrack_events /* NAT info */ IPCT_NATINFO_BIT = 10, IPCT_NATINFO = (1 << IPCT_NATINFO_BIT), + + /* Counter highest bit has been set */ + IPCT_COUNTER_FILLING_BIT = 11, + IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT), }; enum ip_conntrack_expect_events { @@ -192,8 +196,8 @@ do { \ struct ip_conntrack_counter { - u_int64_t packets; - u_int64_t bytes; + u_int32_t packets; + u_int32_t bytes; }; struct ip_conntrack_helper; diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index ea65dd3e517a..07a80b56e8dc 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -1119,7 +1119,7 @@ void __ip_ct_refresh_acct(struct ip_conntrack *ct, unsigned long extra_jiffies, int do_acct) { - int do_event = 0; + int event = 0; IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct); IP_NF_ASSERT(skb); @@ -1129,13 +1129,13 @@ void __ip_ct_refresh_acct(struct ip_conntrack *ct, /* If not in hash table, timer will not be active yet */ if (!is_confirmed(ct)) { ct->timeout.expires = extra_jiffies; - do_event = 1; + event = IPCT_REFRESH; } else { /* Need del_timer for race avoidance (may already be dying). */ if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); - do_event = 1; + event = IPCT_REFRESH; } } @@ -1144,14 +1144,17 @@ void __ip_ct_refresh_acct(struct ip_conntrack *ct, ct->counters[CTINFO2DIR(ctinfo)].packets++; ct->counters[CTINFO2DIR(ctinfo)].bytes += ntohs(skb->nh.iph->tot_len); + if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000) + || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000)) + event |= IPCT_COUNTER_FILLING; } #endif write_unlock_bh(&ip_conntrack_lock); /* must be unlocked when calling event cache */ - if (do_event) - ip_conntrack_event_cache(IPCT_REFRESH, skb); + if (event) + ip_conntrack_event_cache(event, skb); } #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index eade2749915a..06ed91ee8ace 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -177,11 +177,11 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct, struct nfattr *nest_count = NFA_NEST(skb, type); u_int64_t tmp; - tmp = cpu_to_be64(ct->counters[dir].packets); - NFA_PUT(skb, CTA_COUNTERS_PACKETS, sizeof(u_int64_t), &tmp); + tmp = htonl(ct->counters[dir].packets); + NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp); - tmp = cpu_to_be64(ct->counters[dir].bytes); - NFA_PUT(skb, CTA_COUNTERS_BYTES, sizeof(u_int64_t), &tmp); + tmp = htonl(ct->counters[dir].bytes); + NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp); NFA_NEST_END(skb, nest_count); -- cgit v1.2.3 From 339231537506846cb232a2f0cc4a2c662b2d5b07 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 10 Oct 2005 21:23:28 -0700 Subject: [NETFILTER] ctnetlink: allow userspace to change TCP state This patch adds the ability of changing the state a TCP connection. I know that this must be used with care but it's required to provide a complete conntrack creation via conntrack_netlink. So I'll document this aspect on the upcoming docs. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- .../linux/netfilter_ipv4/ip_conntrack_protocol.h | 3 +++ net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 23 ++++++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h index b6b99be8632a..2c76b879e3dc 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h @@ -52,6 +52,9 @@ struct ip_conntrack_protocol int (*to_nfattr)(struct sk_buff *skb, struct nfattr *nfa, const struct ip_conntrack *ct); + /* convert nfnetlink attributes to protoinfo */ + int (*from_nfattr)(struct nfattr *tb[], struct ip_conntrack *ct); + int (*tuple_to_nfattr)(struct sk_buff *skb, const struct ip_conntrack_tuple *t); int (*nfattr_to_tuple)(struct nfattr *tb[], diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 75e27e65c28f..d6701cafbcc2 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -356,6 +356,28 @@ nfattr_failure: read_unlock_bh(&tcp_lock); return -1; } + +static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct) +{ + struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1]; + struct nfattr *tb[CTA_PROTOINFO_TCP_MAX]; + + if (nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr) < 0) + goto nfattr_failure; + + if (!tb[CTA_PROTOINFO_TCP_STATE-1]) + return -EINVAL; + + write_lock_bh(&tcp_lock); + ct->proto.tcp.state = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]); + write_unlock_bh(&tcp_lock); + + return 0; + +nfattr_failure: + return -1; +} #endif static unsigned int get_conntrack_index(const struct tcphdr *tcph) @@ -1127,6 +1149,7 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp = #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) .to_nfattr = tcp_to_nfattr, + .from_nfattr = nfattr_to_tcp, .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, #endif -- cgit v1.2.3 From eeb2b8560676e454ad37ee30b49bc7d897edc9be Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 10 Oct 2005 21:25:23 -0700 Subject: [TWSK]: Grab the module refcount for timewait sockets This is required to avoid unloading a module that has active timewait sockets, such as DCCP. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 3 +++ net/ipv4/inet_timewait_sock.c | 1 + 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 4ade56ef3a4d..28f7b2103505 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -193,11 +194,13 @@ static inline u32 inet_rcv_saddr(const struct sock *sk) static inline void inet_twsk_put(struct inet_timewait_sock *tw) { if (atomic_dec_and_test(&tw->tw_refcnt)) { + struct module *owner = tw->tw_prot->owner; #ifdef SOCK_REFCNT_DEBUG printk(KERN_DEBUG "%s timewait_sock %p released\n", tw->tw_prot->name, tw); #endif kmem_cache_free(tw->tw_prot->twsk_slab, tw); + module_put(owner); } } diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index f9076ef3a1a8..a010e9a68811 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -111,6 +111,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_prot = sk->sk_prot_creator; atomic_set(&tw->tw_refcnt, 1); inet_twsk_dead_node_init(tw); + __module_get(tw->tw_prot->owner); } return tw; -- cgit v1.2.3 From cbd27b8ced4b1888c93f69b4dd108a69ac4d733f Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 12 Oct 2005 11:39:33 +1000 Subject: [PATCH] ppc32: Fix timekeeping Interestingly enough, ppc32 had broken timekeeping for ages... It worked, but probably drifted a bit more than could be explained by the actual bad precision of the timebase calibration. We discovered that recently when somebody figured out that the common code was using CLOCK_TICK_RATE to correct the timekeeing, and ppc32 had a completely bogus value for it. This patch turns it into something saner. Probably not as good as doing something based on the actual timebase frequency precision but I'll leave that sort of math to others. This at least makes it better for the common HZ values. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Linus Torvalds --- include/asm-powerpc/timex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-powerpc/timex.h b/include/asm-powerpc/timex.h index 51c5b316be55..c02d15aced91 100644 --- a/include/asm-powerpc/timex.h +++ b/include/asm-powerpc/timex.h @@ -10,7 +10,7 @@ #include #include -#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ +#define CLOCK_TICK_RATE 1024000 /* Underlying HZ */ typedef unsigned long cycles_t; -- cgit v1.2.3 From d8e998c58a870770905495a1d45ebf7285b5b1c5 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 12 Oct 2005 14:22:50 +1000 Subject: [PATCH] ppc32: Tell userland about lack of standard TB Glibc is about to get some new high precision timer stuff that relies on the standard timebase of the PPC architecture. However, some (rare & old) CPUs do not have such timebase and it is a bit annoying to have your stuff just crash because you are running on the wrong CPU... This exposes to userland a CPU feature bit that tells that the current processor doesn't have a standard timebase. It's negative logic so that glibc will still "just work" on older kernels (it will just be unhappy on those old CPUs but that doesn't really matter as distro tend to update glibc & kernel at the same time). Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Linus Torvalds --- arch/ppc/kernel/cputable.c | 5 +++-- include/asm-ppc/cputable.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/ppc/kernel/cputable.c b/arch/ppc/kernel/cputable.c index 546e1ea4cafa..6b76cf58d9e0 100644 --- a/arch/ppc/kernel/cputable.c +++ b/arch/ppc/kernel/cputable.c @@ -91,7 +91,7 @@ struct cpu_spec cpu_specs[] = { .cpu_features = CPU_FTR_COMMON | CPU_FTR_601 | CPU_FTR_HPTE_TABLE, .cpu_user_features = COMMON_PPC | PPC_FEATURE_601_INSTR | - PPC_FEATURE_UNIFIED_CACHE, + PPC_FEATURE_UNIFIED_CACHE | PPC_FEATURE_NO_TB, .icache_bsize = 32, .dcache_bsize = 32, .cpu_setup = __setup_cpu_601 @@ -745,7 +745,8 @@ struct cpu_spec cpu_specs[] = { .cpu_name = "403GCX", .cpu_features = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_NO_TB, .icache_bsize = 16, .dcache_bsize = 16, }, diff --git a/include/asm-ppc/cputable.h b/include/asm-ppc/cputable.h index 41d8f8425c04..e17c492c870b 100644 --- a/include/asm-ppc/cputable.h +++ b/include/asm-ppc/cputable.h @@ -24,6 +24,7 @@ #define PPC_FEATURE_HAS_SPE 0x00800000 #define PPC_FEATURE_HAS_EFP_SINGLE 0x00400000 #define PPC_FEATURE_HAS_EFP_DOUBLE 0x00200000 +#define PPC_FEATURE_NO_TB 0x00100000 #ifdef __KERNEL__ -- cgit v1.2.3 From a451e28c7642830d8b066e5a13de46934151ce3a Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Wed, 12 Oct 2005 19:58:12 +0100 Subject: [ARM] 3003/1: SSP channel map register updates for pxa2xx Patch from Liam Girdwood This patch updates the pxa2xx channel map registers definitions in pxa-regs.h Changes:- o Added description for SSP2 registers o Added definitions for SSP3 registers Signed-off-by:Liam Girdwood Signed-off-by: Russell King --- include/asm-arm/arch-pxa/pxa-regs.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-arm/arch-pxa/pxa-regs.h b/include/asm-arm/arch-pxa/pxa-regs.h index 939d9e5020a0..13fa2deb4ddd 100644 --- a/include/asm-arm/arch-pxa/pxa-regs.h +++ b/include/asm-arm/arch-pxa/pxa-regs.h @@ -126,8 +126,8 @@ #define DRCMR12 __REG(0x40000130) /* Request to Channel Map Register for AC97 audio transmit Request */ #define DRCMR13 __REG(0x40000134) /* Request to Channel Map Register for SSP receive Request */ #define DRCMR14 __REG(0x40000138) /* Request to Channel Map Register for SSP transmit Request */ -#define DRCMR15 __REG(0x4000013c) /* Reserved */ -#define DRCMR16 __REG(0x40000140) /* Reserved */ +#define DRCMR15 __REG(0x4000013c) /* Request to Channel Map Register for SSP2 receive Request */ +#define DRCMR16 __REG(0x40000140) /* Request to Channel Map Register for SSP2 transmit Request */ #define DRCMR17 __REG(0x40000144) /* Request to Channel Map Register for ICP receive Request */ #define DRCMR18 __REG(0x40000148) /* Request to Channel Map Register for ICP transmit Request */ #define DRCMR19 __REG(0x4000014c) /* Request to Channel Map Register for STUART receive Request */ @@ -151,7 +151,8 @@ #define DRCMR37 __REG(0x40000194) /* Request to Channel Map Register for USB endpoint 13 Request */ #define DRCMR38 __REG(0x40000198) /* Request to Channel Map Register for USB endpoint 14 Request */ #define DRCMR39 __REG(0x4000019C) /* Reserved */ - +#define DRCMR66 __REG(0x40001108) /* Request to Channel Map Register for SSP3 receive Request */ +#define DRCMR67 __REG(0x4000110C) /* Request to Channel Map Register for SSP3 transmit Request */ #define DRCMR68 __REG(0x40001110) /* Request to Channel Map Register for Camera FIFO 0 Request */ #define DRCMR69 __REG(0x40001114) /* Request to Channel Map Register for Camera FIFO 1 Request */ #define DRCMR70 __REG(0x40001118) /* Request to Channel Map Register for Camera FIFO 2 Request */ -- cgit v1.2.3 From afb997c6163b33292d31a09d6aa5cbb03ffa5bf1 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 12 Oct 2005 15:12:21 -0700 Subject: [NETPOLL]: wrong return for null netpoll_poll_lock() When netpoll is not being used, the macro that defines the removed routing netpoll_poll_lock defines the return as zero, but the real routine returns a `void *` Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- include/linux/netpoll.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 5ade54a78dbb..ca5a8733000f 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -86,7 +86,7 @@ static inline void netpoll_poll_unlock(void *have) #else #define netpoll_rx(a) 0 -#define netpoll_poll_lock(a) 0 +#define netpoll_poll_lock(a) NULL #define netpoll_poll_unlock(a) #endif -- cgit v1.2.3 From 9153bd75f25ff3170f07fb9ac1fb0e718afc6fce Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 13 Oct 2005 16:46:35 +0100 Subject: [ARM] 3005/1: S3C2440 - add definition for s3c2440_set_dsc() call in hardware.h Patch from Ben Dooks include/asm-arm/arch-s3c2410/hardware.h was missing the definition for s3c2440_set_dsc() Signed-off-by: Ben Dooks Signed-off-by: Russell King --- include/asm-arm/arch-s3c2410/hardware.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/asm-arm/arch-s3c2410/hardware.h b/include/asm-arm/arch-s3c2410/hardware.h index 48a39918a760..1c9de29cafef 100644 --- a/include/asm-arm/arch-s3c2410/hardware.h +++ b/include/asm-arm/arch-s3c2410/hardware.h @@ -92,6 +92,13 @@ extern unsigned int s3c2410_gpio_getpin(unsigned int pin); extern unsigned int s3c2410_modify_misccr(unsigned int clr, unsigned int chg); +#ifdef CONFIG_CPU_S3C2440 + +extern int s3c2440_set_dsc(unsigned int pin, unsigned int value); + +#endif /* CONFIG_CPU_S3C2440 */ + + #endif /* __ASSEMBLY__ */ #include -- cgit v1.2.3 From c8923c6b852d3a97c1faad0566e38fca330375a7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 13 Oct 2005 14:41:23 -0700 Subject: [NETFILTER]: Fix OOPSes on machines with discontiguous cpu numbering. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Original patch by Harald Welte, with feedback from Herbert Xu and testing by Sébastien Bernard. EBTABLES, ARP tables, and IP/IP6 tables all assume that cpus are numbered linearly. That is not necessarily true. This patch fixes that up by calculating the largest possible cpu number, and allocating enough per-cpu structure space given that. Signed-off-by: David S. Miller --- arch/cris/arch-v32/kernel/smp.c | 2 ++ arch/sh/kernel/smp.c | 3 +++ include/linux/cpumask.h | 12 ++++++++++++ net/bridge/netfilter/ebtables.c | 27 +++++++++++++++++---------- net/ipv4/netfilter/arp_tables.c | 14 +++++++++----- net/ipv4/netfilter/ip_tables.c | 17 +++++++++++------ net/ipv6/netfilter/ip6_tables.c | 16 +++++++++++----- 7 files changed, 65 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c index 2c5cae04a95c..957f551ba5ce 100644 --- a/arch/cris/arch-v32/kernel/smp.c +++ b/arch/cris/arch-v32/kernel/smp.c @@ -15,6 +15,7 @@ #include #include #include +#include #define IPI_SCHEDULE 1 #define IPI_CALL 2 @@ -28,6 +29,7 @@ spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED}; /* CPU masks */ cpumask_t cpu_online_map = CPU_MASK_NONE; cpumask_t phys_cpu_present_map = CPU_MASK_NONE; +EXPORT_SYMBOL(phys_cpu_present_map); /* Variables used during SMP boot */ volatile int cpu_now_booting = 0; diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 56a39d69e080..5ecefc02896a 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -39,6 +40,8 @@ struct sh_cpuinfo cpu_data[NR_CPUS]; extern void per_cpu_trap_init(void); cpumask_t cpu_possible_map; +EXPORT_SYMBOL(cpu_possible_map); + cpumask_t cpu_online_map; static atomic_t cpus_booted = ATOMIC_INIT(0); diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index b15826f6e3a2..fe9778301d07 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -392,4 +392,16 @@ extern cpumask_t cpu_present_map; #define for_each_online_cpu(cpu) for_each_cpu_mask((cpu), cpu_online_map) #define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map) +/* Find the highest possible smp_processor_id() */ +static inline unsigned int highest_possible_processor_id(void) +{ + unsigned int cpu, highest = 0; + + for_each_cpu_mask(cpu, cpu_possible_map) + highest = cpu; + + return highest; +} + + #endif /* __LINUX_CPUMASK_H */ diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index c4540144f0f4..f8ffbf6e2333 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -26,6 +26,7 @@ #include #include #include +#include #include /* needed for logical [in,out]-dev filtering */ #include "../br_private.h" @@ -823,10 +824,11 @@ static int translate_table(struct ebt_replace *repl, /* this will get free'd in do_replace()/ebt_register_table() if an error occurs */ newinfo->chainstack = (struct ebt_chainstack **) - vmalloc(num_possible_cpus() * sizeof(struct ebt_chainstack)); + vmalloc((highest_possible_processor_id()+1) + * sizeof(struct ebt_chainstack)); if (!newinfo->chainstack) return -ENOMEM; - for (i = 0; i < num_possible_cpus(); i++) { + for_each_cpu(i) { newinfo->chainstack[i] = vmalloc(udc_cnt * sizeof(struct ebt_chainstack)); if (!newinfo->chainstack[i]) { @@ -895,9 +897,12 @@ static void get_counters(struct ebt_counter *oldcounters, /* counters of cpu 0 */ memcpy(counters, oldcounters, - sizeof(struct ebt_counter) * nentries); + sizeof(struct ebt_counter) * nentries); + /* add other counters to those of cpu 0 */ - for (cpu = 1; cpu < num_possible_cpus(); cpu++) { + for_each_cpu(cpu) { + if (cpu == 0) + continue; counter_base = COUNTER_BASE(oldcounters, nentries, cpu); for (i = 0; i < nentries; i++) { counters[i].pcnt += counter_base[i].pcnt; @@ -929,7 +934,8 @@ static int do_replace(void __user *user, unsigned int len) BUGPRINT("Entries_size never zero\n"); return -EINVAL; } - countersize = COUNTER_OFFSET(tmp.nentries) * num_possible_cpus(); + countersize = COUNTER_OFFSET(tmp.nentries) * + (highest_possible_processor_id()+1); newinfo = (struct ebt_table_info *) vmalloc(sizeof(struct ebt_table_info) + countersize); if (!newinfo) @@ -1022,7 +1028,7 @@ static int do_replace(void __user *user, unsigned int len) vfree(table->entries); if (table->chainstack) { - for (i = 0; i < num_possible_cpus(); i++) + for_each_cpu(i) vfree(table->chainstack[i]); vfree(table->chainstack); } @@ -1040,7 +1046,7 @@ free_counterstmp: vfree(counterstmp); /* can be initialized in translate_table() */ if (newinfo->chainstack) { - for (i = 0; i < num_possible_cpus(); i++) + for_each_cpu(i) vfree(newinfo->chainstack[i]); vfree(newinfo->chainstack); } @@ -1132,7 +1138,8 @@ int ebt_register_table(struct ebt_table *table) return -EINVAL; } - countersize = COUNTER_OFFSET(table->table->nentries) * num_possible_cpus(); + countersize = COUNTER_OFFSET(table->table->nentries) * + (highest_possible_processor_id()+1); newinfo = (struct ebt_table_info *) vmalloc(sizeof(struct ebt_table_info) + countersize); ret = -ENOMEM; @@ -1186,7 +1193,7 @@ free_unlock: up(&ebt_mutex); free_chainstack: if (newinfo->chainstack) { - for (i = 0; i < num_possible_cpus(); i++) + for_each_cpu(i) vfree(newinfo->chainstack[i]); vfree(newinfo->chainstack); } @@ -1209,7 +1216,7 @@ void ebt_unregister_table(struct ebt_table *table) up(&ebt_mutex); vfree(table->private->entries); if (table->private->chainstack) { - for (i = 0; i < num_possible_cpus(); i++) + for_each_cpu(i) vfree(table->private->chainstack[i]); vfree(table->private->chainstack); } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index fa1634256680..a7969286e6e7 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -716,8 +716,10 @@ static int translate_table(const char *name, } /* And one copy for every other CPU */ - for (i = 1; i < num_possible_cpus(); i++) { - memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, + for_each_cpu(i) { + if (i == 0) + continue; + memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, newinfo->entries, SMP_ALIGN(newinfo->size)); } @@ -767,7 +769,7 @@ static void get_counters(const struct arpt_table_info *t, unsigned int cpu; unsigned int i; - for (cpu = 0; cpu < num_possible_cpus(); cpu++) { + for_each_cpu(cpu) { i = 0; ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), t->size, @@ -885,7 +887,8 @@ static int do_replace(void __user *user, unsigned int len) return -ENOMEM; newinfo = vmalloc(sizeof(struct arpt_table_info) - + SMP_ALIGN(tmp.size) * num_possible_cpus()); + + SMP_ALIGN(tmp.size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; @@ -1158,7 +1161,8 @@ int arpt_register_table(struct arpt_table *table, = { 0, 0, 0, { 0 }, { 0 }, { } }; newinfo = vmalloc(sizeof(struct arpt_table_info) - + SMP_ALIGN(repl->size) * num_possible_cpus()); + + SMP_ALIGN(repl->size) * + (highest_possible_processor_id()+1)); if (!newinfo) { ret = -ENOMEM; return ret; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index eef99a1b5de6..75c27e92f6ab 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -921,8 +922,10 @@ translate_table(const char *name, } /* And one copy for every other CPU */ - for (i = 1; i < num_possible_cpus(); i++) { - memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, + for_each_cpu(i) { + if (i == 0) + continue; + memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, newinfo->entries, SMP_ALIGN(newinfo->size)); } @@ -943,7 +946,7 @@ replace_table(struct ipt_table *table, struct ipt_entry *table_base; unsigned int i; - for (i = 0; i < num_possible_cpus(); i++) { + for_each_cpu(i) { table_base = (void *)newinfo->entries + TABLE_OFFSET(newinfo, i); @@ -990,7 +993,7 @@ get_counters(const struct ipt_table_info *t, unsigned int cpu; unsigned int i; - for (cpu = 0; cpu < num_possible_cpus(); cpu++) { + for_each_cpu(cpu) { i = 0; IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), t->size, @@ -1128,7 +1131,8 @@ do_replace(void __user *user, unsigned int len) return -ENOMEM; newinfo = vmalloc(sizeof(struct ipt_table_info) - + SMP_ALIGN(tmp.size) * num_possible_cpus()); + + SMP_ALIGN(tmp.size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; @@ -1458,7 +1462,8 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl) = { 0, 0, 0, { 0 }, { 0 }, { } }; newinfo = vmalloc(sizeof(struct ipt_table_info) - + SMP_ALIGN(repl->size) * num_possible_cpus()); + + SMP_ALIGN(repl->size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 2da514b16d95..b03e90649eb5 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -28,6 +28,7 @@ #include #include #include +#include #include @@ -950,8 +951,10 @@ translate_table(const char *name, } /* And one copy for every other CPU */ - for (i = 1; i < num_possible_cpus(); i++) { - memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, + for_each_cpu(i) { + if (i == 0) + continue; + memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, newinfo->entries, SMP_ALIGN(newinfo->size)); } @@ -973,6 +976,7 @@ replace_table(struct ip6t_table *table, unsigned int i; for (i = 0; i < num_possible_cpus(); i++) { + for_each_cpu(i) { table_base = (void *)newinfo->entries + TABLE_OFFSET(newinfo, i); @@ -1019,7 +1023,7 @@ get_counters(const struct ip6t_table_info *t, unsigned int cpu; unsigned int i; - for (cpu = 0; cpu < num_possible_cpus(); cpu++) { + for_each_cpu(cpu) { i = 0; IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), t->size, @@ -1153,7 +1157,8 @@ do_replace(void __user *user, unsigned int len) return -ENOMEM; newinfo = vmalloc(sizeof(struct ip6t_table_info) - + SMP_ALIGN(tmp.size) * num_possible_cpus()); + + SMP_ALIGN(tmp.size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; @@ -1467,7 +1472,8 @@ int ip6t_register_table(struct ip6t_table *table, = { 0, 0, 0, { 0 }, { 0 }, { } }; newinfo = vmalloc(sizeof(struct ip6t_table_info) - + SMP_ALIGN(repl->size) * num_possible_cpus()); + + SMP_ALIGN(repl->size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; -- cgit v1.2.3 From 51e8513615ed8202b22ba9a43b0c7376ea4f6868 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 13 Oct 2005 21:10:08 -0700 Subject: [SPARC64]: Consolidate common PCI IOMMU init code. All the PCI controller drivers were doing the same thing setting up the IOMMU software state, put it all in one spot. Signed-off-by: David S. Miller --- arch/sparc64/kernel/pci_iommu.c | 54 ++++++++++++++++++++++++++++++++++--- arch/sparc64/kernel/pci_psycho.c | 44 ++++--------------------------- arch/sparc64/kernel/pci_sabre.c | 39 +++------------------------ arch/sparc64/kernel/pci_schizo.c | 57 +++------------------------------------- include/asm-sparc64/pbm.h | 2 +- 5 files changed, 63 insertions(+), 133 deletions(-) (limited to 'include') diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c index 425c60cfea19..cdee7d9fed72 100644 --- a/arch/sparc64/kernel/pci_iommu.c +++ b/arch/sparc64/kernel/pci_iommu.c @@ -80,13 +80,59 @@ static void inline iopte_make_dummy(struct pci_iommu *iommu, iopte_t *iopte) iopte_val(*iopte) = val; } -void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize) +void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset, u32 dma_addr_mask) { - int i; + unsigned long i, tsbbase, order; + + /* Setup initial software IOMMU state. */ + spin_lock_init(&iommu->lock); + iommu->ctx_lowest_free = 1; + iommu->page_table_map_base = dma_offset; + iommu->dma_addr_mask = dma_addr_mask; + + switch (tsbsize / (8 * 1024)) { + case 64: + iommu->page_table_sz_bits = 16; + break; + case 128: + iommu->page_table_sz_bits = 17; + break; + default: + prom_printf("PCI_IOMMU: Illegal TSB size %d\n", + tsbsize / (8 * 1024)); + prom_halt(); + break; + }; + + iommu->lowest_consistent_map = + 1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS); + + for (i = 0; i < PBM_NCLUSTERS; i++) { + iommu->alloc_info[i].flush = 0; + iommu->alloc_info[i].next = 0; + } - tsbsize /= sizeof(iopte_t); + /* Allocate and initialize the dummy page which we + * set inactive IO PTEs to point to. + */ + iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0); + if (!iommu->dummy_page) { + prom_printf("PCI_IOMMU: Error, gfp(dummy_page) failed.\n"); + prom_halt(); + } + memset((void *)iommu->dummy_page, 0, PAGE_SIZE); + iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page); + + /* Now allocate and setup the IOMMU page table itself. */ + order = get_order(tsbsize); + tsbbase = __get_free_pages(GFP_KERNEL, order); + if (!tsbbase) { + prom_printf("PCI_IOMMU: Error, gfp(tsb) failed.\n"); + prom_halt(); + } + iommu->page_table = (iopte_t *)tsbbase; - for (i = 0; i < tsbsize; i++) + for (i = 0; i < tsbsize / sizeof(iopte_t); i++) iopte_make_dummy(iommu, &iommu->page_table[i]); } diff --git a/arch/sparc64/kernel/pci_psycho.c b/arch/sparc64/kernel/pci_psycho.c index 6ed1ef25e0ac..c03ed5f49d31 100644 --- a/arch/sparc64/kernel/pci_psycho.c +++ b/arch/sparc64/kernel/pci_psycho.c @@ -1207,13 +1207,9 @@ static void psycho_scan_bus(struct pci_controller_info *p) static void psycho_iommu_init(struct pci_controller_info *p) { struct pci_iommu *iommu = p->pbm_A.iommu; - unsigned long tsbbase, i; + unsigned long i; u64 control; - /* Setup initial software IOMMU state. */ - spin_lock_init(&iommu->lock); - iommu->ctx_lowest_free = 1; - /* Register addresses. */ iommu->iommu_control = p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL; iommu->iommu_tsbbase = p->pbm_A.controller_regs + PSYCHO_IOMMU_TSBBASE; @@ -1240,40 +1236,10 @@ static void psycho_iommu_init(struct pci_controller_info *p) /* Leave diag mode enabled for full-flushing done * in pci_iommu.c */ + pci_iommu_table_init(iommu, IO_TSB_SIZE, 0xc0000000, 0xffffffff); - iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0); - if (!iommu->dummy_page) { - prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n"); - prom_halt(); - } - memset((void *)iommu->dummy_page, 0, PAGE_SIZE); - iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page); - - /* Using assumed page size 8K with 128K entries we need 1MB iommu page - * table (128K ioptes * 8 bytes per iopte). This is - * page order 7 on UltraSparc. - */ - tsbbase = __get_free_pages(GFP_KERNEL, get_order(IO_TSB_SIZE)); - if (!tsbbase) { - prom_printf("PSYCHO_IOMMU: Error, gfp(tsb) failed.\n"); - prom_halt(); - } - iommu->page_table = (iopte_t *)tsbbase; - iommu->page_table_sz_bits = 17; - iommu->page_table_map_base = 0xc0000000; - iommu->dma_addr_mask = 0xffffffff; - pci_iommu_table_init(iommu, IO_TSB_SIZE); - - /* We start with no consistent mappings. */ - iommu->lowest_consistent_map = - 1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS); - - for (i = 0; i < PBM_NCLUSTERS; i++) { - iommu->alloc_info[i].flush = 0; - iommu->alloc_info[i].next = 0; - } - - psycho_write(p->pbm_A.controller_regs + PSYCHO_IOMMU_TSBBASE, __pa(tsbbase)); + psycho_write(p->pbm_A.controller_regs + PSYCHO_IOMMU_TSBBASE, + __pa(iommu->page_table)); control = psycho_read(p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL); control &= ~(PSYCHO_IOMMU_CTRL_TSBSZ | PSYCHO_IOMMU_CTRL_TBWSZ); @@ -1281,7 +1247,7 @@ static void psycho_iommu_init(struct pci_controller_info *p) psycho_write(p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL, control); /* If necessary, hook us up for starfire IRQ translations. */ - if(this_is_starfire) + if (this_is_starfire) p->starfire_cookie = starfire_hookup(p->pbm_A.portid); else p->starfire_cookie = NULL; diff --git a/arch/sparc64/kernel/pci_sabre.c b/arch/sparc64/kernel/pci_sabre.c index 0ee6bd5b9ac6..da8e1364194f 100644 --- a/arch/sparc64/kernel/pci_sabre.c +++ b/arch/sparc64/kernel/pci_sabre.c @@ -1267,13 +1267,9 @@ static void sabre_iommu_init(struct pci_controller_info *p, u32 dma_mask) { struct pci_iommu *iommu = p->pbm_A.iommu; - unsigned long tsbbase, i, order; + unsigned long i; u64 control; - /* Setup initial software IOMMU state. */ - spin_lock_init(&iommu->lock); - iommu->ctx_lowest_free = 1; - /* Register addresses. */ iommu->iommu_control = p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL; iommu->iommu_tsbbase = p->pbm_A.controller_regs + SABRE_IOMMU_TSBBASE; @@ -1295,26 +1291,10 @@ static void sabre_iommu_init(struct pci_controller_info *p, /* Leave diag mode enabled for full-flushing done * in pci_iommu.c */ + pci_iommu_table_init(iommu, tsbsize * 1024 * 8, dvma_offset, dma_mask); - iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0); - if (!iommu->dummy_page) { - prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n"); - prom_halt(); - } - memset((void *)iommu->dummy_page, 0, PAGE_SIZE); - iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page); - - tsbbase = __get_free_pages(GFP_KERNEL, order = get_order(tsbsize * 1024 * 8)); - if (!tsbbase) { - prom_printf("SABRE_IOMMU: Error, gfp(tsb) failed.\n"); - prom_halt(); - } - iommu->page_table = (iopte_t *)tsbbase; - iommu->page_table_map_base = dvma_offset; - iommu->dma_addr_mask = dma_mask; - pci_iommu_table_init(iommu, PAGE_SIZE << order); - - sabre_write(p->pbm_A.controller_regs + SABRE_IOMMU_TSBBASE, __pa(tsbbase)); + sabre_write(p->pbm_A.controller_regs + SABRE_IOMMU_TSBBASE, + __pa(iommu->page_table)); control = sabre_read(p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL); control &= ~(SABRE_IOMMUCTRL_TSBSZ | SABRE_IOMMUCTRL_TBWSZ); @@ -1322,11 +1302,9 @@ static void sabre_iommu_init(struct pci_controller_info *p, switch(tsbsize) { case 64: control |= SABRE_IOMMU_TSBSZ_64K; - iommu->page_table_sz_bits = 16; break; case 128: control |= SABRE_IOMMU_TSBSZ_128K; - iommu->page_table_sz_bits = 17; break; default: prom_printf("iommu_init: Illegal TSB size %d\n", tsbsize); @@ -1334,15 +1312,6 @@ static void sabre_iommu_init(struct pci_controller_info *p, break; } sabre_write(p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL, control); - - /* We start with no consistent mappings. */ - iommu->lowest_consistent_map = - 1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS); - - for (i = 0; i < PBM_NCLUSTERS; i++) { - iommu->alloc_info[i].flush = 0; - iommu->alloc_info[i].next = 0; - } } static void pbm_register_toplevel_resources(struct pci_controller_info *p, diff --git a/arch/sparc64/kernel/pci_schizo.c b/arch/sparc64/kernel/pci_schizo.c index cae5b61fe2f0..d8c4e0919b4e 100644 --- a/arch/sparc64/kernel/pci_schizo.c +++ b/arch/sparc64/kernel/pci_schizo.c @@ -1765,7 +1765,7 @@ static void schizo_pbm_strbuf_init(struct pci_pbm_info *pbm) static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm) { struct pci_iommu *iommu = pbm->iommu; - unsigned long tsbbase, i, tagbase, database, order; + unsigned long i, tagbase, database; u32 vdma[2], dma_mask; u64 control; int err, tsbsize; @@ -1800,10 +1800,6 @@ static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm) prom_halt(); }; - /* Setup initial software IOMMU state. */ - spin_lock_init(&iommu->lock); - iommu->ctx_lowest_free = 1; - /* Register addresses, SCHIZO has iommu ctx flushing. */ iommu->iommu_control = pbm->pbm_regs + SCHIZO_IOMMU_CONTROL; iommu->iommu_tsbbase = pbm->pbm_regs + SCHIZO_IOMMU_TSBBASE; @@ -1832,56 +1828,9 @@ static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm) /* Leave diag mode enabled for full-flushing done * in pci_iommu.c */ + pci_iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask); - iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0); - if (!iommu->dummy_page) { - prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n"); - prom_halt(); - } - memset((void *)iommu->dummy_page, 0, PAGE_SIZE); - iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page); - - /* Using assumed page size 8K with 128K entries we need 1MB iommu page - * table (128K ioptes * 8 bytes per iopte). This is - * page order 7 on UltraSparc. - */ - order = get_order(tsbsize * 8 * 1024); - tsbbase = __get_free_pages(GFP_KERNEL, order); - if (!tsbbase) { - prom_printf("%s: Error, gfp(tsb) failed.\n", pbm->name); - prom_halt(); - } - - iommu->page_table = (iopte_t *)tsbbase; - iommu->page_table_map_base = vdma[0]; - iommu->dma_addr_mask = dma_mask; - pci_iommu_table_init(iommu, PAGE_SIZE << order); - - switch (tsbsize) { - case 64: - iommu->page_table_sz_bits = 16; - break; - - case 128: - iommu->page_table_sz_bits = 17; - break; - - default: - prom_printf("iommu_init: Illegal TSB size %d\n", tsbsize); - prom_halt(); - break; - }; - - /* We start with no consistent mappings. */ - iommu->lowest_consistent_map = - 1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS); - - for (i = 0; i < PBM_NCLUSTERS; i++) { - iommu->alloc_info[i].flush = 0; - iommu->alloc_info[i].next = 0; - } - - schizo_write(iommu->iommu_tsbbase, __pa(tsbbase)); + schizo_write(iommu->iommu_tsbbase, __pa(iommu->page_table)); control = schizo_read(iommu->iommu_control); control &= ~(SCHIZO_IOMMU_CTRL_TSBSZ | SCHIZO_IOMMU_CTRL_TBWSZ); diff --git a/include/asm-sparc64/pbm.h b/include/asm-sparc64/pbm.h index 38bbbccb4068..c067407de0b0 100644 --- a/include/asm-sparc64/pbm.h +++ b/include/asm-sparc64/pbm.h @@ -102,7 +102,7 @@ struct pci_iommu { u32 dma_addr_mask; }; -extern void pci_iommu_table_init(struct pci_iommu *, int); +extern void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset, u32 dma_addr_mask); /* This describes a PCI bus module's streaming buffer. */ struct pci_strbuf { -- cgit v1.2.3 From 688cb30bdc3e398d97682a6a58f825821ee838c2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 13 Oct 2005 22:15:24 -0700 Subject: [SPARC64]: Eliminate PCI IOMMU dma mapping size limit. The hairy fast allocator in the sparc64 PCI IOMMU code has a hard limit of 256 pages. Certain devices can exceed this when performing very large I/Os. So replace with a more simple allocator, based largely upon the arch/ppc64/kernel/iommu.c code. Signed-off-by: David S. Miller --- arch/sparc64/kernel/pci_iommu.c | 365 +++++++++++++++------------------------- include/asm-sparc64/pbm.h | 28 +-- 2 files changed, 145 insertions(+), 248 deletions(-) (limited to 'include') diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c index cdee7d9fed72..a11910be1013 100644 --- a/arch/sparc64/kernel/pci_iommu.c +++ b/arch/sparc64/kernel/pci_iommu.c @@ -49,12 +49,6 @@ static void __iommu_flushall(struct pci_iommu *iommu) /* Ensure completion of previous PIO writes. */ (void) pci_iommu_read(iommu->write_complete_reg); - - /* Now update everyone's flush point. */ - for (entry = 0; entry < PBM_NCLUSTERS; entry++) { - iommu->alloc_info[entry].flush = - iommu->alloc_info[entry].next; - } } #define IOPTE_CONSISTENT(CTX) \ @@ -80,9 +74,61 @@ static void inline iopte_make_dummy(struct pci_iommu *iommu, iopte_t *iopte) iopte_val(*iopte) = val; } +/* Based largely upon the ppc64 iommu allocator. */ +static long pci_arena_alloc(struct pci_iommu *iommu, unsigned long npages) +{ + struct pci_iommu_arena *arena = &iommu->arena; + unsigned long n, i, start, end, limit; + int pass; + + limit = arena->limit; + start = arena->hint; + pass = 0; + +again: + n = find_next_zero_bit(arena->map, limit, start); + end = n + npages; + if (unlikely(end >= limit)) { + if (likely(pass < 1)) { + limit = start; + start = 0; + __iommu_flushall(iommu); + pass++; + goto again; + } else { + /* Scanned the whole thing, give up. */ + return -1; + } + } + + for (i = n; i < end; i++) { + if (test_bit(i, arena->map)) { + start = i + 1; + goto again; + } + } + + for (i = n; i < end; i++) + __set_bit(i, arena->map); + + arena->hint = end; + + return n; +} + +static void pci_arena_free(struct pci_iommu_arena *arena, unsigned long base, unsigned long npages) +{ + unsigned long i; + + for (i = base; i < (base + npages); i++) + __clear_bit(i, arena->map); +} + void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset, u32 dma_addr_mask) { - unsigned long i, tsbbase, order; + unsigned long i, tsbbase, order, sz, num_tsb_entries; + + num_tsb_entries = tsbsize / sizeof(iopte_t); /* Setup initial software IOMMU state. */ spin_lock_init(&iommu->lock); @@ -90,27 +136,16 @@ void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset, iommu->page_table_map_base = dma_offset; iommu->dma_addr_mask = dma_addr_mask; - switch (tsbsize / (8 * 1024)) { - case 64: - iommu->page_table_sz_bits = 16; - break; - case 128: - iommu->page_table_sz_bits = 17; - break; - default: - prom_printf("PCI_IOMMU: Illegal TSB size %d\n", - tsbsize / (8 * 1024)); + /* Allocate and initialize the free area map. */ + sz = num_tsb_entries / 8; + sz = (sz + 7UL) & ~7UL; + iommu->arena.map = kmalloc(sz, GFP_KERNEL); + if (!iommu->arena.map) { + prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n"); prom_halt(); - break; - }; - - iommu->lowest_consistent_map = - 1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS); - - for (i = 0; i < PBM_NCLUSTERS; i++) { - iommu->alloc_info[i].flush = 0; - iommu->alloc_info[i].next = 0; } + memset(iommu->arena.map, 0, sz); + iommu->arena.limit = num_tsb_entries; /* Allocate and initialize the dummy page which we * set inactive IO PTEs to point to. @@ -132,114 +167,24 @@ void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset, } iommu->page_table = (iopte_t *)tsbbase; - for (i = 0; i < tsbsize / sizeof(iopte_t); i++) + for (i = 0; i < num_tsb_entries; i++) iopte_make_dummy(iommu, &iommu->page_table[i]); } -static iopte_t *alloc_streaming_cluster(struct pci_iommu *iommu, unsigned long npages) +static inline iopte_t *alloc_npages(struct pci_iommu *iommu, unsigned long npages) { - iopte_t *iopte, *limit, *first; - unsigned long cnum, ent, flush_point; - - cnum = 0; - while ((1UL << cnum) < npages) - cnum++; - iopte = (iommu->page_table + - (cnum << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS))); - - if (cnum == 0) - limit = (iommu->page_table + - iommu->lowest_consistent_map); - else - limit = (iopte + - (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS))); + long entry; - iopte += ((ent = iommu->alloc_info[cnum].next) << cnum); - flush_point = iommu->alloc_info[cnum].flush; - - first = iopte; - for (;;) { - if (IOPTE_IS_DUMMY(iommu, iopte)) { - if ((iopte + (1 << cnum)) >= limit) - ent = 0; - else - ent = ent + 1; - iommu->alloc_info[cnum].next = ent; - if (ent == flush_point) - __iommu_flushall(iommu); - break; - } - iopte += (1 << cnum); - ent++; - if (iopte >= limit) { - iopte = (iommu->page_table + - (cnum << - (iommu->page_table_sz_bits - PBM_LOGCLUSTERS))); - ent = 0; - } - if (ent == flush_point) - __iommu_flushall(iommu); - if (iopte == first) - goto bad; - } - - /* I've got your streaming cluster right here buddy boy... */ - return iopte; + entry = pci_arena_alloc(iommu, npages); + if (unlikely(entry < 0)) + return NULL; -bad: - printk(KERN_EMERG "pci_iommu: alloc_streaming_cluster of npages(%ld) failed!\n", - npages); - return NULL; + return iommu->page_table + entry; } -static void free_streaming_cluster(struct pci_iommu *iommu, dma_addr_t base, - unsigned long npages, unsigned long ctx) +static inline void free_npages(struct pci_iommu *iommu, dma_addr_t base, unsigned long npages) { - unsigned long cnum, ent; - - cnum = 0; - while ((1UL << cnum) < npages) - cnum++; - - ent = (base << (32 - IO_PAGE_SHIFT + PBM_LOGCLUSTERS - iommu->page_table_sz_bits)) - >> (32 + PBM_LOGCLUSTERS + cnum - iommu->page_table_sz_bits); - - /* If the global flush might not have caught this entry, - * adjust the flush point such that we will flush before - * ever trying to reuse it. - */ -#define between(X,Y,Z) (((Z) - (Y)) >= ((X) - (Y))) - if (between(ent, iommu->alloc_info[cnum].next, iommu->alloc_info[cnum].flush)) - iommu->alloc_info[cnum].flush = ent; -#undef between -} - -/* We allocate consistent mappings from the end of cluster zero. */ -static iopte_t *alloc_consistent_cluster(struct pci_iommu *iommu, unsigned long npages) -{ - iopte_t *iopte; - - iopte = iommu->page_table + (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS)); - while (iopte > iommu->page_table) { - iopte--; - if (IOPTE_IS_DUMMY(iommu, iopte)) { - unsigned long tmp = npages; - - while (--tmp) { - iopte--; - if (!IOPTE_IS_DUMMY(iommu, iopte)) - break; - } - if (tmp == 0) { - u32 entry = (iopte - iommu->page_table); - - if (entry < iommu->lowest_consistent_map) - iommu->lowest_consistent_map = entry; - return iopte; - } - } - } - return NULL; + pci_arena_free(&iommu->arena, base >> IO_PAGE_SHIFT, npages); } static int iommu_alloc_ctx(struct pci_iommu *iommu) @@ -279,7 +224,7 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad struct pcidev_cookie *pcp; struct pci_iommu *iommu; iopte_t *iopte; - unsigned long flags, order, first_page, ctx; + unsigned long flags, order, first_page; void *ret; int npages; @@ -297,9 +242,10 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad iommu = pcp->pbm->iommu; spin_lock_irqsave(&iommu->lock, flags); - iopte = alloc_consistent_cluster(iommu, size >> IO_PAGE_SHIFT); - if (iopte == NULL) { - spin_unlock_irqrestore(&iommu->lock, flags); + iopte = alloc_npages(iommu, size >> IO_PAGE_SHIFT); + spin_unlock_irqrestore(&iommu->lock, flags); + + if (unlikely(iopte == NULL)) { free_pages(first_page, order); return NULL; } @@ -308,31 +254,15 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad ((iopte - iommu->page_table) << IO_PAGE_SHIFT)); ret = (void *) first_page; npages = size >> IO_PAGE_SHIFT; - ctx = 0; - if (iommu->iommu_ctxflush) - ctx = iommu_alloc_ctx(iommu); first_page = __pa(first_page); while (npages--) { - iopte_val(*iopte) = (IOPTE_CONSISTENT(ctx) | + iopte_val(*iopte) = (IOPTE_CONSISTENT(0UL) | IOPTE_WRITE | (first_page & IOPTE_PAGE)); iopte++; first_page += IO_PAGE_SIZE; } - { - int i; - u32 daddr = *dma_addrp; - - npages = size >> IO_PAGE_SHIFT; - for (i = 0; i < npages; i++) { - pci_iommu_write(iommu->iommu_flush, daddr); - daddr += IO_PAGE_SIZE; - } - } - - spin_unlock_irqrestore(&iommu->lock, flags); - return ret; } @@ -342,7 +272,7 @@ void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_ struct pcidev_cookie *pcp; struct pci_iommu *iommu; iopte_t *iopte; - unsigned long flags, order, npages, i, ctx; + unsigned long flags, order, npages; npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; pcp = pdev->sysdata; @@ -352,46 +282,7 @@ void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_ spin_lock_irqsave(&iommu->lock, flags); - if ((iopte - iommu->page_table) == - iommu->lowest_consistent_map) { - iopte_t *walk = iopte + npages; - iopte_t *limit; - - limit = (iommu->page_table + - (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS))); - while (walk < limit) { - if (!IOPTE_IS_DUMMY(iommu, walk)) - break; - walk++; - } - iommu->lowest_consistent_map = - (walk - iommu->page_table); - } - - /* Data for consistent mappings cannot enter the streaming - * buffers, so we only need to update the TSB. We flush - * the IOMMU here as well to prevent conflicts with the - * streaming mapping deferred tlb flush scheme. - */ - - ctx = 0; - if (iommu->iommu_ctxflush) - ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL; - - for (i = 0; i < npages; i++, iopte++) - iopte_make_dummy(iommu, iopte); - - if (iommu->iommu_ctxflush) { - pci_iommu_write(iommu->iommu_ctxflush, ctx); - } else { - for (i = 0; i < npages; i++) { - u32 daddr = dvma + (i << IO_PAGE_SHIFT); - - pci_iommu_write(iommu->iommu_flush, daddr); - } - } - - iommu_free_ctx(iommu, ctx); + free_npages(iommu, dvma, npages); spin_unlock_irqrestore(&iommu->lock, flags); @@ -418,25 +309,27 @@ dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direct iommu = pcp->pbm->iommu; strbuf = &pcp->pbm->stc; - if (direction == PCI_DMA_NONE) - BUG(); + if (unlikely(direction == PCI_DMA_NONE)) + goto bad_no_ctx; oaddr = (unsigned long)ptr; npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); npages >>= IO_PAGE_SHIFT; spin_lock_irqsave(&iommu->lock, flags); + base = alloc_npages(iommu, npages); + ctx = 0; + if (iommu->iommu_ctxflush) + ctx = iommu_alloc_ctx(iommu); + spin_unlock_irqrestore(&iommu->lock, flags); - base = alloc_streaming_cluster(iommu, npages); - if (base == NULL) + if (unlikely(!base)) goto bad; + bus_addr = (iommu->page_table_map_base + ((base - iommu->page_table) << IO_PAGE_SHIFT)); ret = bus_addr | (oaddr & ~IO_PAGE_MASK); base_paddr = __pa(oaddr & IO_PAGE_MASK); - ctx = 0; - if (iommu->iommu_ctxflush) - ctx = iommu_alloc_ctx(iommu); if (strbuf->strbuf_enabled) iopte_protection = IOPTE_STREAMING(ctx); else @@ -447,12 +340,13 @@ dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direct for (i = 0; i < npages; i++, base++, base_paddr += IO_PAGE_SIZE) iopte_val(*base) = iopte_protection | base_paddr; - spin_unlock_irqrestore(&iommu->lock, flags); - return ret; bad: - spin_unlock_irqrestore(&iommu->lock, flags); + iommu_free_ctx(iommu, ctx); +bad_no_ctx: + if (printk_ratelimit()) + WARN_ON(1); return PCI_DMA_ERROR_CODE; } @@ -527,10 +421,13 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int struct pci_iommu *iommu; struct pci_strbuf *strbuf; iopte_t *base; - unsigned long flags, npages, ctx; + unsigned long flags, npages, ctx, i; - if (direction == PCI_DMA_NONE) - BUG(); + if (unlikely(direction == PCI_DMA_NONE)) { + if (printk_ratelimit()) + WARN_ON(1); + return; + } pcp = pdev->sysdata; iommu = pcp->pbm->iommu; @@ -556,13 +453,14 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int /* Step 1: Kick data out of streaming buffers if necessary. */ if (strbuf->strbuf_enabled) - pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction); + pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, + npages, direction); - /* Step 2: Clear out first TSB entry. */ - iopte_make_dummy(iommu, base); + /* Step 2: Clear out TSB entries. */ + for (i = 0; i < npages; i++) + iopte_make_dummy(iommu, base + i); - free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base, - npages, ctx); + free_npages(iommu, bus_addr - iommu->page_table_map_base, npages); iommu_free_ctx(iommu, ctx); @@ -667,6 +565,8 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int pci_map_single(pdev, (page_address(sglist->page) + sglist->offset), sglist->length, direction); + if (unlikely(sglist->dma_address == PCI_DMA_ERROR_CODE)) + return 0; sglist->dma_length = sglist->length; return 1; } @@ -675,21 +575,29 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int iommu = pcp->pbm->iommu; strbuf = &pcp->pbm->stc; - if (direction == PCI_DMA_NONE) - BUG(); + if (unlikely(direction == PCI_DMA_NONE)) + goto bad_no_ctx; /* Step 1: Prepare scatter list. */ npages = prepare_sg(sglist, nelems); - /* Step 2: Allocate a cluster. */ + /* Step 2: Allocate a cluster and context, if necessary. */ spin_lock_irqsave(&iommu->lock, flags); - base = alloc_streaming_cluster(iommu, npages); + base = alloc_npages(iommu, npages); + ctx = 0; + if (iommu->iommu_ctxflush) + ctx = iommu_alloc_ctx(iommu); + + spin_unlock_irqrestore(&iommu->lock, flags); + if (base == NULL) goto bad; - dma_base = iommu->page_table_map_base + ((base - iommu->page_table) << IO_PAGE_SHIFT); + + dma_base = iommu->page_table_map_base + + ((base - iommu->page_table) << IO_PAGE_SHIFT); /* Step 3: Normalize DMA addresses. */ used = nelems; @@ -702,30 +610,28 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int } used = nelems - used; - /* Step 4: Choose a context if necessary. */ - ctx = 0; - if (iommu->iommu_ctxflush) - ctx = iommu_alloc_ctx(iommu); - - /* Step 5: Create the mappings. */ + /* Step 4: Create the mappings. */ if (strbuf->strbuf_enabled) iopte_protection = IOPTE_STREAMING(ctx); else iopte_protection = IOPTE_CONSISTENT(ctx); if (direction != PCI_DMA_TODEVICE) iopte_protection |= IOPTE_WRITE; - fill_sg (base, sglist, used, nelems, iopte_protection); + + fill_sg(base, sglist, used, nelems, iopte_protection); + #ifdef VERIFY_SG verify_sglist(sglist, nelems, base, npages); #endif - spin_unlock_irqrestore(&iommu->lock, flags); - return used; bad: - spin_unlock_irqrestore(&iommu->lock, flags); - return PCI_DMA_ERROR_CODE; + iommu_free_ctx(iommu, ctx); +bad_no_ctx: + if (printk_ratelimit()) + WARN_ON(1); + return 0; } /* Unmap a set of streaming mode DMA translations. */ @@ -738,8 +644,10 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, unsigned long flags, ctx, i, npages; u32 bus_addr; - if (direction == PCI_DMA_NONE) - BUG(); + if (unlikely(direction == PCI_DMA_NONE)) { + if (printk_ratelimit()) + WARN_ON(1); + } pcp = pdev->sysdata; iommu = pcp->pbm->iommu; @@ -751,7 +659,8 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, if (sglist[i].dma_length == 0) break; i--; - npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - bus_addr) >> IO_PAGE_SHIFT; + npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - + bus_addr) >> IO_PAGE_SHIFT; base = iommu->page_table + ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); @@ -772,11 +681,11 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, if (strbuf->strbuf_enabled) pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction); - /* Step 2: Clear out first TSB entry. */ - iopte_make_dummy(iommu, base); + /* Step 2: Clear out the TSB entries. */ + for (i = 0; i < npages; i++) + iopte_make_dummy(iommu, base + i); - free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base, - npages, ctx); + free_npages(iommu, bus_addr - iommu->page_table_map_base, npages); iommu_free_ctx(iommu, ctx); diff --git a/include/asm-sparc64/pbm.h b/include/asm-sparc64/pbm.h index c067407de0b0..dd35a2c7798a 100644 --- a/include/asm-sparc64/pbm.h +++ b/include/asm-sparc64/pbm.h @@ -27,23 +27,27 @@ * PCI bus. */ -#define PBM_LOGCLUSTERS 3 -#define PBM_NCLUSTERS (1 << PBM_LOGCLUSTERS) - struct pci_controller_info; /* This contains the software state necessary to drive a PCI * controller's IOMMU. */ +struct pci_iommu_arena { + unsigned long *map; + unsigned int hint; + unsigned int limit; +}; + struct pci_iommu { /* This protects the controller's IOMMU and all * streaming buffers underneath. */ spinlock_t lock; + struct pci_iommu_arena arena; + /* IOMMU page table, a linear array of ioptes. */ iopte_t *page_table; /* The page table itself. */ - int page_table_sz_bits; /* log2 of ow many pages does it map? */ /* Base PCI memory space address where IOMMU mappings * begin. @@ -62,12 +66,6 @@ struct pci_iommu { */ unsigned long write_complete_reg; - /* The lowest used consistent mapping entry. Since - * we allocate consistent maps out of cluster 0 this - * is relative to the beginning of closter 0. - */ - u32 lowest_consistent_map; - /* In order to deal with some buggy third-party PCI bridges that * do wrong prefetching, we never mark valid mappings as invalid. * Instead we point them at this dummy page. @@ -75,16 +73,6 @@ struct pci_iommu { unsigned long dummy_page; unsigned long dummy_page_pa; - /* If PBM_NCLUSTERS is ever decreased to 4 or lower, - * or if largest supported page_table_sz * 8K goes above - * 2GB, you must increase the size of the type of - * these counters. You have been duly warned. -DaveM - */ - struct { - u16 next; - u16 flush; - } alloc_info[PBM_NCLUSTERS]; - /* CTX allocation. */ unsigned long ctx_lowest_free; unsigned long ctx_bitmap[IOMMU_NUM_CTXS / (sizeof(unsigned long) * 8)]; -- cgit v1.2.3 From 6205d158d16d2619bf30f0aff47a8e09b07106e9 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 14 Oct 2005 12:24:24 +0100 Subject: [ARM] 3009/1: S3C2410 - io.h offsets too large for LDRH/STRH Patch from Ben Dooks The __inwc/__outwc calls are capable of creating LDRH and STRH instructions with offsets over 8bits as GCC does not have a constraint for an 8bit offset. Signed-off-by: Ben Dooks Signed-off-by: Russell King --- include/asm-arm/arch-s3c2410/io.h | 58 ++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/asm-arm/arch-s3c2410/io.h b/include/asm-arm/arch-s3c2410/io.h index 418233a7ee6f..4bf272ed9add 100644 --- a/include/asm-arm/arch-s3c2410/io.h +++ b/include/asm-arm/arch-s3c2410/io.h @@ -9,7 +9,7 @@ * 06-Dec-1997 RMK Created. * 02-Sep-2003 BJD Modified for S3C2410 * 10-Mar-2005 LCVR Changed S3C2410_VA to S3C24XX_VA - * + * 13-Oct-2005 BJD Fixed problems with LDRH/STRH offset range */ #ifndef __ASM_ARM_ARCH_IO_H @@ -97,7 +97,7 @@ DECLARE_IO(int,l,"") else \ __asm__ __volatile__( \ "strb %0, [%1, #0] @ outbc" \ - : : "r" (value), "r" ((port))); \ + : : "r" (value), "r" ((port))); \ }) #define __inbc(port) \ @@ -110,35 +110,61 @@ DECLARE_IO(int,l,"") else \ __asm__ __volatile__( \ "ldrb %0, [%1, #0] @ inbc" \ - : "=r" (result) : "r" ((port))); \ + : "=r" (result) : "r" ((port))); \ result; \ }) #define __outwc(value,port) \ ({ \ unsigned long v = value; \ - if (__PORT_PCIO((port))) \ - __asm__ __volatile__( \ - "strh %0, [%1, %2] @ outwc" \ - : : "r" (v), "r" (PCIO_BASE), "Jr" ((port))); \ - else \ + if (__PORT_PCIO((port))) { \ + if ((port) < 256 && (port) > -256) \ + __asm__ __volatile__( \ + "strh %0, [%1, %2] @ outwc" \ + : : "r" (v), "r" (PCIO_BASE), "Jr" ((port))); \ + else if ((port) > 0) \ + __asm__ __volatile__( \ + "strh %0, [%1, %2] @ outwc" \ + : : "r" (v), \ + "r" (PCIO_BASE + ((port) & ~0xff)), \ + "Jr" (((port) & 0xff))); \ + else \ + __asm__ __volatile__( \ + "strh %0, [%1, #0] @ outwc" \ + : : "r" (v), \ + "r" (PCIO_BASE + (port))); \ + } else \ __asm__ __volatile__( \ "strh %0, [%1, #0] @ outwc" \ - : : "r" (v), "r" ((port))); \ + : : "r" (v), "r" ((port))); \ }) #define __inwc(port) \ ({ \ unsigned short result; \ - if (__PORT_PCIO((port))) \ - __asm__ __volatile__( \ - "ldrh %0, [%1, %2] @ inwc" \ - : "=r" (result) : "r" (PCIO_BASE), "Jr" ((port))); \ - else \ + if (__PORT_PCIO((port))) { \ + if ((port) < 256 && (port) > -256 ) \ + __asm__ __volatile__( \ + "ldrh %0, [%1, %2] @ inwc" \ + : "=r" (result) \ + : "r" (PCIO_BASE), \ + "Jr" ((port))); \ + else if ((port) > 0) \ + __asm__ __volatile__( \ + "ldrh %0, [%1, %2] @ inwc" \ + : "=r" (result) \ + : "r" (PCIO_BASE + ((port) & ~0xff)), \ + "Jr" (((port) & 0xff))); \ + else \ + __asm__ __volatile__( \ + "ldrh %0, [%1, #0] @ inwc" \ + : "=r" (result) \ + : "r" (PCIO_BASE + ((port)))); \ + } else \ __asm__ __volatile__( \ "ldrh %0, [%1, #0] @ inwc" \ - : "=r" (result) : "r" ((port))); \ - result; \ + : "=r" (result) : "r" ((port))); \ + result; \ }) #define __outlc(value,port) \ -- cgit v1.2.3 From cb38c569e5ecf9e922e66963b6da2751b4f13d81 Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Fri, 14 Oct 2005 16:07:25 +0100 Subject: [ARM] 3011/1: pxafb: Add ability to set device parent + fix spitz compile error Patch from Richard Purdie Add a function to allow machines to set the parent of the pxa framebuffer device. This means the power up/down sequence can be controlled where required by the machine. Update spitz to use the new function, fixing a compile error. Signed-off-by: Richard Purdie Signed-off-by: Russell King --- arch/arm/mach-pxa/generic.c | 5 +++++ arch/arm/mach-pxa/spitz.c | 2 +- include/asm-arm/arch-pxa/pxafb.h | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/arm/mach-pxa/generic.c b/arch/arm/mach-pxa/generic.c index d0660a8c4b70..d327c127eddb 100644 --- a/arch/arm/mach-pxa/generic.c +++ b/arch/arm/mach-pxa/generic.c @@ -208,6 +208,11 @@ static struct platform_device pxafb_device = { .resource = pxafb_resources, }; +void __init set_pxa_fb_parent(struct device *parent_dev) +{ + pxafb_device.dev.parent = parent_dev; +} + static struct platform_device ffuart_device = { .name = "pxa2xx-uart", .id = 0, diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index 568afe3d6e1a..7eaeb24aae1e 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -328,7 +328,7 @@ static void __init common_init(void) platform_add_devices(devices, ARRAY_SIZE(devices)); pxa_set_mci_info(&spitz_mci_platform_data); - pxafb_device.dev.parent = &spitzssp_device.dev; + set_pxa_fb_parent(&spitzssp_device.dev); set_pxa_fb_info(&spitz_pxafb_info); } diff --git a/include/asm-arm/arch-pxa/pxafb.h b/include/asm-arm/arch-pxa/pxafb.h index 21c0e16dce5f..aba9b30f4249 100644 --- a/include/asm-arm/arch-pxa/pxafb.h +++ b/include/asm-arm/arch-pxa/pxafb.h @@ -66,4 +66,5 @@ struct pxafb_mach_info { }; void set_pxa_fb_info(struct pxafb_mach_info *hard_pxa_fb_info); +void set_pxa_fb_parent(struct device *parent_dev); unsigned long pxafb_get_hsync_time(struct device *dev); -- cgit v1.2.3 From e26148d934762b61133a64b6862f870624ff617d Mon Sep 17 00:00:00 2001 From: Tim Schmielau Date: Fri, 14 Oct 2005 15:59:05 -0700 Subject: [PATCH] Fix copy-and-paste error in BSD accounting Fix copy and paste error in jiffies_to_AHZ conversion which leads to wrong BSD accounting information on alpha and ia64 when CONFIG_BSD_PROCESS_ACCT_V3 is turned on. Also update comment to match reorganised header files. Signed-off-by: Tim Schmielau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/acct.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/acct.h b/include/linux/acct.h index 1993a3691768..19f70462b3be 100644 --- a/include/linux/acct.h +++ b/include/linux/acct.h @@ -162,13 +162,13 @@ typedef struct acct acct_t; #ifdef __KERNEL__ /* * Yet another set of HZ to *HZ helper functions. - * See for the original. + * See for the original. */ static inline u32 jiffies_to_AHZ(unsigned long x) { #if (TICK_NSEC % (NSEC_PER_SEC / AHZ)) == 0 - return x / (HZ / USER_HZ); + return x / (HZ / AHZ); #else u64 tmp = (u64)x * TICK_NSEC; do_div(tmp, (NSEC_PER_SEC / AHZ)); -- cgit v1.2.3 From 688ce17b8599abc548b406c00e4d18ae0dec954f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 16 Oct 2005 00:17:33 -0700 Subject: [PATCH]: highest_possible_processor_id() has to be a macro ... otherwise, things like alpha and sparc64 break and break badly. They define cpu_possible_map to something else in smp.h *AFTER* having included cpumask.h. If that puppy is a macro, expansion will happen at the actual caller, when we'd already seen #define cpu_possible_map ... and we will get the right thing used. As an inline helper it will be tokenized before we get to that define and that's it; no matter what we define later, it won't affect anything. We get modules with dependency on cpu_possible_map instead of the right symbol (phys_cpu_present_map in case of sparc64), or outright link errors if they are built-in. Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/cpumask.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index fe9778301d07..9bdba8169b41 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -393,15 +393,13 @@ extern cpumask_t cpu_present_map; #define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map) /* Find the highest possible smp_processor_id() */ -static inline unsigned int highest_possible_processor_id(void) -{ - unsigned int cpu, highest = 0; - - for_each_cpu_mask(cpu, cpu_possible_map) - highest = cpu; - - return highest; -} +#define highest_possible_processor_id() \ +({ \ + unsigned int cpu, highest = 0; \ + for_each_cpu_mask(cpu, cpu_possible_map) \ + highest = cpu; \ + highest; \ +}) #endif /* __LINUX_CPUMASK_H */ -- cgit v1.2.3