From 20ae975dfd54de581287b2ca8a1ad97099ab0396 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Wed, 14 Sep 2005 20:52:37 -0700
Subject: [NETLINK]: Reserve a slot for NETLINK_GENERIC.

As requested by Jamal.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 7bbd25970c9e..bdebdc564506 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -20,6 +20,7 @@
 #define NETLINK_IP6_FW		13
 #define NETLINK_DNRTMSG		14	/* DECnet routing messages */
 #define NETLINK_KOBJECT_UEVENT	15	/* Kernel messages to userspace */
+#define NETLINK_GENERIC		16
 
 #define MAX_LINKS 32		
 
-- 
cgit v1.2.3


From 87375ab47cd0ba04124c6d3fd80db5c368f5dcb6 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Wed, 14 Sep 2005 21:08:51 -0700
Subject: [IPVS]: ip_vs_ftp breaks connections using persistence

ip_vs_ftp when loaded can create NAT connections with unknown client
port for passive FTP. For such expectations we lookup with cport=0 on
incoming packet but it matches the format of the persistence templates
causing packets to other persistent virtual servers to be forwarded to
real server without creating connection. Later the reply packets are
treated as foreign and not SNAT-ed.

This patch changes the connection lookup for packets from clients:

* introduce IP_VS_CONN_F_TEMPLATE connection flag to mark the
  connection as template

* create new connection lookup function just for templates -
  ip_vs_ct_in_get

* make sure ip_vs_conn_in_get hits only connections with
  IP_VS_CONN_F_NO_CPORT flag set when s_port is 0. By this way
  we avoid returning template when looking for cport=0 (ftp)

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_vs.h        |  3 +++
 net/ipv4/ipvs/ip_vs_conn.c | 41 ++++++++++++++++++++++++++++++++++++++---
 net/ipv4/ipvs/ip_vs_core.c | 16 ++++++++--------
 net/ipv4/ipvs/ip_vs_sync.c | 20 ++++++++++++++------
 4 files changed, 63 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index e426641c519f..06b4235aa016 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -84,6 +84,7 @@
 #define IP_VS_CONN_F_IN_SEQ	0x0400		/* must do input seq adjust */
 #define IP_VS_CONN_F_SEQ_MASK	0x0600		/* in/out sequence mask */
 #define IP_VS_CONN_F_NO_CPORT	0x0800		/* no client port set yet */
+#define IP_VS_CONN_F_TEMPLATE	0x1000		/* template, not connection */
 
 /* Move it to better place one day, for now keep it unique */
 #define NFC_IPVS_PROPERTY	0x10000
@@ -739,6 +740,8 @@ enum {
 
 extern struct ip_vs_conn *ip_vs_conn_in_get
 (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port);
+extern struct ip_vs_conn *ip_vs_ct_in_get
+(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port);
 extern struct ip_vs_conn *ip_vs_conn_out_get
 (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port);
 
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 5994521fddc5..f828fa2eb7de 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -196,6 +196,7 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get
 	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
 		if (s_addr==cp->caddr && s_port==cp->cport &&
 		    d_port==cp->vport && d_addr==cp->vaddr &&
+		    ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
 		    protocol==cp->protocol) {
 			/* HIT */
 			atomic_inc(&cp->refcnt);
@@ -227,6 +228,40 @@ struct ip_vs_conn *ip_vs_conn_in_get
 	return cp;
 }
 
+/* Get reference to connection template */
+struct ip_vs_conn *ip_vs_ct_in_get
+(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
+{
+	unsigned hash;
+	struct ip_vs_conn *cp;
+
+	hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
+
+	ct_read_lock(hash);
+
+	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+		if (s_addr==cp->caddr && s_port==cp->cport &&
+		    d_port==cp->vport && d_addr==cp->vaddr &&
+		    cp->flags & IP_VS_CONN_F_TEMPLATE &&
+		    protocol==cp->protocol) {
+			/* HIT */
+			atomic_inc(&cp->refcnt);
+			goto out;
+		}
+	}
+	cp = NULL;
+
+  out:
+	ct_read_unlock(hash);
+
+	IP_VS_DBG(7, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
+		  ip_vs_proto_name(protocol),
+		  NIPQUAD(s_addr), ntohs(s_port),
+		  NIPQUAD(d_addr), ntohs(d_port),
+		  cp?"hit":"not hit");
+
+	return cp;
+}
 
 /*
  *  Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
@@ -367,7 +402,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 		  atomic_read(&dest->refcnt));
 
 	/* Update the connection counters */
-	if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+	if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
 		/* It is a normal connection, so increase the inactive
 		   connection counter because it is in TCP SYNRECV
 		   state (inactive) or other protocol inacive state */
@@ -406,7 +441,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
 		  atomic_read(&dest->refcnt));
 
 	/* Update the connection counters */
-	if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+	if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
 		/* It is a normal connection, so decrease the inactconns
 		   or activeconns counter */
 		if (cp->flags & IP_VS_CONN_F_INACTIVE) {
@@ -776,7 +811,7 @@ void ip_vs_random_dropentry(void)
 		ct_write_lock_bh(hash);
 
 		list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-			if (!cp->cport && !(cp->flags & IP_VS_CONN_F_NO_CPORT))
+			if (cp->flags & IP_VS_CONN_F_TEMPLATE)
 				/* connection template */
 				continue;
 
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 3ac7eeca04ac..981cc3244ef2 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -243,10 +243,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	if (ports[1] == svc->port) {
 		/* Check if a template already exists */
 		if (svc->port != FTPPORT)
-			ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+			ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
 					       iph->daddr, ports[1]);
 		else
-			ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+			ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
 					       iph->daddr, 0);
 
 		if (!ct || !ip_vs_check_template(ct)) {
@@ -272,14 +272,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 						    iph->daddr,
 						    ports[1],
 						    dest->addr, dest->port,
-						    0,
+						    IP_VS_CONN_F_TEMPLATE,
 						    dest);
 			else
 				ct = ip_vs_conn_new(iph->protocol,
 						    snet, 0,
 						    iph->daddr, 0,
 						    dest->addr, 0,
-						    0,
+						    IP_VS_CONN_F_TEMPLATE,
 						    dest);
 			if (ct == NULL)
 				return NULL;
@@ -298,10 +298,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 		 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
 		 */
 		if (svc->fwmark)
-			ct = ip_vs_conn_in_get(IPPROTO_IP, snet, 0,
+			ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0,
 					       htonl(svc->fwmark), 0);
 		else
-			ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+			ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
 					       iph->daddr, 0);
 
 		if (!ct || !ip_vs_check_template(ct)) {
@@ -326,14 +326,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 						    snet, 0,
 						    htonl(svc->fwmark), 0,
 						    dest->addr, 0,
-						    0,
+						    IP_VS_CONN_F_TEMPLATE,
 						    dest);
 			else
 				ct = ip_vs_conn_new(iph->protocol,
 						    snet, 0,
 						    iph->daddr, 0,
 						    dest->addr, 0,
-						    0,
+						    IP_VS_CONN_F_TEMPLATE,
 						    dest);
 			if (ct == NULL)
 				return NULL;
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 574d1f509b46..2e5ced3d8062 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -297,16 +297,24 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 
 	p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
 	for (i=0; i<m->nr_conns; i++) {
+		unsigned flags;
+
 		s = (struct ip_vs_sync_conn *)p;
-		cp = ip_vs_conn_in_get(s->protocol,
-				       s->caddr, s->cport,
-				       s->vaddr, s->vport);
+		flags = ntohs(s->flags);
+		if (!(flags & IP_VS_CONN_F_TEMPLATE))
+			cp = ip_vs_conn_in_get(s->protocol,
+					       s->caddr, s->cport,
+					       s->vaddr, s->vport);
+		else
+			cp = ip_vs_ct_in_get(s->protocol,
+					       s->caddr, s->cport,
+					       s->vaddr, s->vport);
 		if (!cp) {
 			cp = ip_vs_conn_new(s->protocol,
 					    s->caddr, s->cport,
 					    s->vaddr, s->vport,
 					    s->daddr, s->dport,
-					    ntohs(s->flags), NULL);
+					    flags, NULL);
 			if (!cp) {
 				IP_VS_ERR("ip_vs_conn_new failed\n");
 				return;
@@ -315,11 +323,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 		} else if (!cp->dest) {
 			/* it is an entry created by the synchronization */
 			cp->state = ntohs(s->state);
-			cp->flags = ntohs(s->flags) | IP_VS_CONN_F_HASHED;
+			cp->flags = flags | IP_VS_CONN_F_HASHED;
 		}	/* Note that we don't touch its state and flags
 			   if it is a normal entry. */
 
-		if (ntohs(s->flags) & IP_VS_CONN_F_SEQ_MASK) {
+		if (flags & IP_VS_CONN_F_SEQ_MASK) {
 			opt = (struct ip_vs_sync_conn_options *)&s[1];
 			memcpy(&cp->in_seq, opt, sizeof(*opt));
 			p += FULL_CONN_SIZE;
-- 
cgit v1.2.3


From fea2efe3bba15f0aa8f840fbe052699808187cb6 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Thu, 15 Sep 2005 12:30:11 +0100
Subject: [ARM] Remove PFN_TO_NID for !DISCONTIGMEM

Platform classes need not define PFN_TO_NID when DISCONTIGMEM is
not selected.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/arch-aaec2000/memory.h | 4 ----
 include/asm-arm/arch-iop3xx/memory.h   | 2 --
 include/asm-arm/arch-lh7a40x/memory.h  | 4 ----
 include/asm-arm/arch-omap/memory.h     | 1 -
 include/asm-arm/arch-pxa/memory.h      | 4 ----
 include/asm-arm/arch-sa1100/memory.h   | 4 ----
 6 files changed, 19 deletions(-)

(limited to 'include')

diff --git a/include/asm-arm/arch-aaec2000/memory.h b/include/asm-arm/arch-aaec2000/memory.h
index 681b6a6171a1..79c90813bc3e 100644
--- a/include/asm-arm/arch-aaec2000/memory.h
+++ b/include/asm-arm/arch-aaec2000/memory.h
@@ -64,10 +64,6 @@
 #define NODE_MAX_MEM_SHIFT	26
 #define NODE_MAX_MEM_SIZE	(1 << NODE_MAX_MEM_SHIFT)
 
-#else
-
-#define PFN_TO_NID(addr)	(0)
-
 #endif /* CONFIG_DISCONTIGMEM */
 
 #endif /* __ASM_ARCH_MEMORY_H */
diff --git a/include/asm-arm/arch-iop3xx/memory.h b/include/asm-arm/arch-iop3xx/memory.h
index dc4735cb0c10..45351f5cd904 100644
--- a/include/asm-arm/arch-iop3xx/memory.h
+++ b/include/asm-arm/arch-iop3xx/memory.h
@@ -36,6 +36,4 @@
 
 #endif
 
-#define PFN_TO_NID(addr)	(0)
-
 #endif
diff --git a/include/asm-arm/arch-lh7a40x/memory.h b/include/asm-arm/arch-lh7a40x/memory.h
index 7e2fea372663..c650e6feb9d5 100644
--- a/include/asm-arm/arch-lh7a40x/memory.h
+++ b/include/asm-arm/arch-lh7a40x/memory.h
@@ -85,10 +85,6 @@
        (((unsigned long)(addr) & 0x01ffffff) >> PAGE_SHIFT)
 # endif
 
-#else
-
-# define PFN_TO_NID(addr)	(0)
-
 #endif
 
 #endif
diff --git a/include/asm-arm/arch-omap/memory.h b/include/asm-arm/arch-omap/memory.h
index 84f81e315a25..ef32d61eec7a 100644
--- a/include/asm-arm/arch-omap/memory.h
+++ b/include/asm-arm/arch-omap/memory.h
@@ -86,6 +86,5 @@
 
 #endif	/* CONFIG_ARCH_OMAP1510 */
 
-#define PHYS_TO_NID(addr) (0)
 #endif
 
diff --git a/include/asm-arm/arch-pxa/memory.h b/include/asm-arm/arch-pxa/memory.h
index 217a80b820ff..58bad9748b5c 100644
--- a/include/asm-arm/arch-pxa/memory.h
+++ b/include/asm-arm/arch-pxa/memory.h
@@ -67,10 +67,6 @@
 #define LOCAL_MAP_NR(addr) \
 	(((unsigned long)(addr) & 0x03ffffff) >> PAGE_SHIFT)
 
-#else
-
-#define PFN_TO_NID(addr)	(0)
-
 #endif
 
 #endif
diff --git a/include/asm-arm/arch-sa1100/memory.h b/include/asm-arm/arch-sa1100/memory.h
index 32d3d5bde34d..8743ff5c1b23 100644
--- a/include/asm-arm/arch-sa1100/memory.h
+++ b/include/asm-arm/arch-sa1100/memory.h
@@ -99,10 +99,6 @@ __arch_adjust_zones(int node, unsigned long *size, unsigned long *holes)
 #define LOCAL_MAP_NR(addr) \
 	(((unsigned long)(addr) & 0x07ffffff) >> PAGE_SHIFT)
 
-#else
-
-#define PFN_TO_NID(addr)	(0)
-
 #endif
 
 #endif
-- 
cgit v1.2.3


From 917afce1000d978dfd3f07da5da9d864bc49c77e Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Thu, 15 Sep 2005 13:00:25 +0100
Subject: [ARM] 2911/1: ixp2000_reg_{read,write} accessors

Patch from Lennert Buytenhek

This patch:
- changes the ixp2000_reg_write accessor to take a 'volatile void *'
  instead of a 'volatile unsigned long *', which then allows passing in
  a u32 * as first argument without being greeted with a warning; and
- adds an ixp2000_reg_read accessor.
We can then use these accessors in ixp2000 code to access on-chip
peripherals, instead of directly dereferencing pointers.  This is for
use by the ixp2000 microengine driver which was recently announced on
netdev.  We can't use readl/writel on the ixp2000 since it is usually
run in big-endian mode, and on big-endian platforms, readl/writel
perform byteswapping.
A future patch will remove the readback from ixp2000_reg_write, since
it's not needed to prevent erratum #66, and add manual readbacks to the
places that need them (writes are not synchronous since we map in device
space using XCB=101 nowadays), such as interrupt disabling and GPIO
manipulation.  See also:
	http://lists.arm.linux.org.uk/pipermail/linux-arm-kernel/2005-February/027084.html
Patch has been ACKed by Jeff Garzik.

Signed-off-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/arch-ixp2000/platform.h | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/asm-arm/arch-ixp2000/platform.h b/include/asm-arm/arch-ixp2000/platform.h
index c0caf3e3e6fd..6519498dbe25 100644
--- a/include/asm-arm/arch-ixp2000/platform.h
+++ b/include/asm-arm/arch-ixp2000/platform.h
@@ -31,20 +31,24 @@
 
 #include <asm/system.h>		/* Pickup local_irq_ functions */
 
-static inline void ixp2000_reg_write(volatile unsigned long *reg, unsigned long val)
+static inline void ixp2000_reg_write(volatile void *reg, unsigned long val)
 {
-	volatile unsigned long dummy;
+	unsigned long dummy;
 	unsigned long flags;
 
 	local_irq_save(flags);
-	*reg = val;
+	*((volatile unsigned long *)reg) = val;
 	barrier();
-	dummy = *reg;
+	dummy = *((volatile unsigned long *)reg);
 	local_irq_restore(flags);
 }
 #else
-#define	ixp2000_reg_write(reg, val) (*reg = val)
+static inline void ixp2000_reg_write(volatile void *reg, unsigned long val)
+{
+	*((volatile unsigned long *)reg) = val;
+}
 #endif	/* IXDP2400 || IXDP2401 */
+#define ixp2000_reg_read(reg)	(*((volatile unsigned long *)reg))
 
 /*
  * Boards may multiplex different devices on the 2nd channel of 
-- 
cgit v1.2.3


From 44449bbf4b051ef7bbea648602f4e21658fe2354 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Thu, 15 Sep 2005 13:00:52 +0100
Subject: [ARM] 2909/1: remove IXP2000_PROD_ID

Patch from Lennert Buytenhek

The intel docs call it IXP2000_PRODUCT_ID, and we have a definition
for IXP2000_PRODUCT_ID as well, so IXP2000_PROD_ID can go.  It's only
used in one place.

Signed-off-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: Deepak Saxena <dsaxena@plexity.net>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/arch-ixp2000/ixp2000-regs.h | 2 --
 include/asm-arm/arch-ixp2000/platform.h     | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-arm/arch-ixp2000/ixp2000-regs.h b/include/asm-arm/arch-ixp2000/ixp2000-regs.h
index 75623f81ef75..32aece069869 100644
--- a/include/asm-arm/arch-ixp2000/ixp2000-regs.h
+++ b/include/asm-arm/arch-ixp2000/ixp2000-regs.h
@@ -370,8 +370,6 @@
 #define GLOBAL_REG_BASE			(IXP2000_GLOBAL_REG_VIRT_BASE + 0x0a00)
 #define GLOBAL_REG(x)			(volatile unsigned long*)(GLOBAL_REG_BASE | (x))
 
-#define IXP2000_PROD_ID			GLOBAL_REG(0x00)
-
 #define IXP2000_MAJ_PROD_TYPE_MASK	0x001F0000
 #define IXP2000_MAJ_PROD_TYPE_IXP2000	0x00000000
 #define IXP2000_MIN_PROD_TYPE_MASK 	0x0000FF00
diff --git a/include/asm-arm/arch-ixp2000/platform.h b/include/asm-arm/arch-ixp2000/platform.h
index 6519498dbe25..abdcf51bd283 100644
--- a/include/asm-arm/arch-ixp2000/platform.h
+++ b/include/asm-arm/arch-ixp2000/platform.h
@@ -88,7 +88,7 @@ void ixp2000_release_slowport(struct slowport_cfg *);
  */
 static inline unsigned ixp2000_has_broken_slowport(void)
 {
-	unsigned long id = *IXP2000_PROD_ID;
+	unsigned long id = *IXP2000_PRODUCT_ID;
 	unsigned long id_prod = id & (IXP2000_MAJ_PROD_TYPE_MASK |
 				      IXP2000_MIN_PROD_TYPE_MASK);
 	return (((id_prod ==
-- 
cgit v1.2.3


From f29d245549aa38325c37716dbecea8d817c00274 Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@rpsys.net>
Date: Thu, 15 Sep 2005 14:53:22 +0100
Subject: [ARM] 2913/1: PXA Poodle: Cleanup some unneeded code

Patch from Richard Purdie

This patch cleans up the PXA Poodle platform code removing an unneeded
static iomap. It also corrects errors in the platform header file and
adds a missing GPIO define.

Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-pxa/poodle.c        | 31 ++++++++++---------------------
 include/asm-arm/arch-pxa/poodle.h | 25 +++++++++++++------------
 2 files changed, 23 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index 47cfb8bb8318..7c4a1cebfd6b 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -126,6 +126,15 @@ static void __init poodle_init(void)
 {
 	int ret = 0;
 
+	/* setup sleep mode values */
+	PWER  = 0x00000002;
+	PFER  = 0x00000000;
+	PRER  = 0x00000002;
+	PGSR0 = 0x00008000;
+	PGSR1 = 0x003F0202;
+	PGSR2 = 0x0001C000;
+	PCFR |= PCFR_OPDE;
+
 	/* cpu initialize */
 	/* Pgsr Register */
   	PGSR0 = 0x0146dd80;
@@ -171,32 +180,12 @@ static void __init fixup_poodle(struct machine_desc *desc,
 	sharpsl_save_param();
 }
 
-static struct map_desc poodle_io_desc[] __initdata = {
- /* virtual     physical    length                   */
-  { 0xef800000, 0x00000000, 0x00800000, MT_DEVICE }, /* Boot Flash */
-};
-
-static void __init poodle_map_io(void)
-{
-	pxa_map_io();
-	iotable_init(poodle_io_desc, ARRAY_SIZE(poodle_io_desc));
-
-	/* setup sleep mode values */
-	PWER  = 0x00000002;
-	PFER  = 0x00000000;
-	PRER  = 0x00000002;
-	PGSR0 = 0x00008000;
-	PGSR1 = 0x003F0202;
-	PGSR2 = 0x0001C000;
-	PCFR |= PCFR_OPDE;
-}
-
 MACHINE_START(POODLE, "SHARP Poodle")
 	.phys_ram	= 0xa0000000,
 	.phys_io	= 0x40000000,
 	.io_pg_offst	= (io_p2v(0x40000000) >> 18) & 0xfffc,
 	.fixup		= fixup_poodle,
-	.map_io		= poodle_map_io,
+	.map_io		= pxa_map_io,
 	.init_irq	= pxa_init_irq,
 	.timer		= &pxa_timer,
 	.init_machine	= poodle_init,
diff --git a/include/asm-arm/arch-pxa/poodle.h b/include/asm-arm/arch-pxa/poodle.h
index 58bda9d571a5..6b5ac5144e70 100644
--- a/include/asm-arm/arch-pxa/poodle.h
+++ b/include/asm-arm/arch-pxa/poodle.h
@@ -37,24 +37,25 @@
 #define POODLE_GPIO_nSD_DETECT		(9)
 #define POODLE_GPIO_MAIN_BAT_LOW	(13)
 #define POODLE_GPIO_BAT_COVER		(13)
+#define POODLE_GPIO_USB_PULLUP		(20)
 #define POODLE_GPIO_ADC_TEMP_ON		(21)
 #define POODLE_GPIO_BYPASS_ON		(36)
 #define POODLE_GPIO_CHRG_ON		(38)
 #define POODLE_GPIO_CHRG_FULL		(16)
 
 /* PXA GPIOs */
-#define POODLE_IRQ_GPIO_ON_KEY		IRQ_GPIO0
-#define POODLE_IRQ_GPIO_AC_IN		IRQ_GPIO1
-#define POODLE_IRQ_GPIO_HP_IN		IRQ_GPIO4
-#define POODLE_IRQ_GPIO_CO		IRQ_GPIO16
-#define POODLE_IRQ_GPIO_TP_INT		IRQ_GPIO5
-#define POODLE_IRQ_GPIO_WAKEUP		IRQ_GPIO11
-#define POODLE_IRQ_GPIO_GA_INT		IRQ_GPIO10
-#define POODLE_IRQ_GPIO_CF_IRQ		IRQ_GPIO17
-#define POODLE_IRQ_GPIO_CF_CD		IRQ_GPIO14
-#define POODLE_IRQ_GPIO_nSD_INT		IRQ_GPIO8
-#define POODLE_IRQ_GPIO_nSD_DETECT	IRQ_GPIO9
-#define POODLE_IRQ_GPIO_MAIN_BAT_LOW	IRQ_GPIO13
+#define POODLE_IRQ_GPIO_ON_KEY		IRQ_GPIO(0)
+#define POODLE_IRQ_GPIO_AC_IN		IRQ_GPIO(1)
+#define POODLE_IRQ_GPIO_HP_IN		IRQ_GPIO(4)
+#define POODLE_IRQ_GPIO_CO		IRQ_GPIO(16)
+#define POODLE_IRQ_GPIO_TP_INT		IRQ_GPIO(5)
+#define POODLE_IRQ_GPIO_WAKEUP		IRQ_GPIO(11)
+#define POODLE_IRQ_GPIO_GA_INT		IRQ_GPIO(10)
+#define POODLE_IRQ_GPIO_CF_IRQ		IRQ_GPIO(17)
+#define POODLE_IRQ_GPIO_CF_CD		IRQ_GPIO(14)
+#define POODLE_IRQ_GPIO_nSD_INT		IRQ_GPIO(8)
+#define POODLE_IRQ_GPIO_nSD_DETECT	IRQ_GPIO(9)
+#define POODLE_IRQ_GPIO_MAIN_BAT_LOW	IRQ_GPIO(13)
 
 /* SCOOP GPIOs */
 #define POODLE_SCOOP_CHARGE_ON	SCOOP_GPCR_PA11
-- 
cgit v1.2.3


From 1b3cb73f7306f97a68fa973dec9f3c3b68bd29cf Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Thu, 15 Sep 2005 15:17:59 +0100
Subject: [ARM] Tighten pfn_valid() test.

Thomas Gleixner reported that mmaping and unmapping each physical
page in turn eventually caused the kernel to oops.  It appears
that pfn_valid() in the discontigmem case was too simplistic for
proper operation.

Tighten the logic so we also check if the PFN is within the range
of the selected memory node.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/memory.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-arm/memory.h b/include/asm-arm/memory.h
index e47bea7d1723..a8a933a775db 100644
--- a/include/asm-arm/memory.h
+++ b/include/asm-arm/memory.h
@@ -160,12 +160,25 @@ static inline __deprecated void *bus_to_virt(unsigned long x)
 #define page_to_pfn(page)					\
 	(( (page) - page_zone(page)->zone_mem_map)		\
 	  + page_zone(page)->zone_start_pfn)
+
 #define pfn_to_page(pfn)					\
 	(PFN_TO_MAPBASE(pfn) + LOCAL_MAP_NR((pfn) << PAGE_SHIFT))
-#define pfn_valid(pfn)		(PFN_TO_NID(pfn) < MAX_NUMNODES)
+
+#define pfn_valid(pfn)						\
+	({							\
+		unsigned int nid = PFN_TO_NID(pfn);		\
+		int valid = nid < MAX_NUMNODES;			\
+		if (valid) {					\
+			pg_data_t *node = NODE_DATA(nid);	\
+			valid = (pfn - node->node_start_pfn) <	\
+				node->node_spanned_pages;	\
+		}						\
+		valid;						\
+	})
 
 #define virt_to_page(kaddr)					\
 	(ADDR_TO_MAPBASE(kaddr) + LOCAL_MAP_NR(kaddr))
+
 #define virt_addr_valid(kaddr)	(KVADDR_TO_NID(kaddr) < MAX_NUMNODES)
 
 /*
-- 
cgit v1.2.3


From 17b14451fd2b187ddd6303726755a3af0a926b6c Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Thu, 15 Sep 2005 15:44:00 +0100
Subject: [PATCH] PATCH: remove function for non-PCI as requested

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 drivers/scsi/libata-core.c | 81 ++++++++++++++++++++++++++++------------------
 include/linux/libata.h     |  1 +
 2 files changed, 50 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 5cc53cd9323e..72bdc91e148c 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -4122,6 +4122,53 @@ err_out:
 	return 0;
 }
 
+/**
+ *	ata_host_set_remove - PCI layer callback for device removal
+ *	@host_set: ATA host set that was removed
+ *
+ *	Unregister all objects associated with this host set. Free those 
+ *	objects.
+ *
+ *	LOCKING:
+ *	Inherited from calling layer (may sleep).
+ */
+
+
+void ata_host_set_remove(struct ata_host_set *host_set)
+{
+	struct ata_port *ap;
+	unsigned int i;
+
+	for (i = 0; i < host_set->n_ports; i++) {
+		ap = host_set->ports[i];
+		scsi_remove_host(ap->host);
+	}
+
+	free_irq(host_set->irq, host_set);
+
+	for (i = 0; i < host_set->n_ports; i++) {
+		ap = host_set->ports[i];
+
+		ata_scsi_release(ap->host);
+
+		if ((ap->flags & ATA_FLAG_NO_LEGACY) == 0) {
+			struct ata_ioports *ioaddr = &ap->ioaddr;
+
+			if (ioaddr->cmd_addr == 0x1f0)
+				release_region(0x1f0, 8);
+			else if (ioaddr->cmd_addr == 0x170)
+				release_region(0x170, 8);
+		}
+
+		scsi_host_put(ap->host);
+	}
+
+	if (host_set->ops->host_stop)
+		host_set->ops->host_stop(host_set);
+
+	kfree(host_set);
+}
+
 /**
  *	ata_scsi_release - SCSI layer callback hook for host unload
  *	@host: libata host to be unloaded
@@ -4462,39 +4509,8 @@ void ata_pci_remove_one (struct pci_dev *pdev)
 {
 	struct device *dev = pci_dev_to_dev(pdev);
 	struct ata_host_set *host_set = dev_get_drvdata(dev);
-	struct ata_port *ap;
-	unsigned int i;
-
-	for (i = 0; i < host_set->n_ports; i++) {
-		ap = host_set->ports[i];
-
-		scsi_remove_host(ap->host);
-	}
-
-	free_irq(host_set->irq, host_set);
-
-	for (i = 0; i < host_set->n_ports; i++) {
-		ap = host_set->ports[i];
-
-		ata_scsi_release(ap->host);
-
-		if ((ap->flags & ATA_FLAG_NO_LEGACY) == 0) {
-			struct ata_ioports *ioaddr = &ap->ioaddr;
-
-			if (ioaddr->cmd_addr == 0x1f0)
-				release_region(0x1f0, 8);
-			else if (ioaddr->cmd_addr == 0x170)
-				release_region(0x170, 8);
-		}
-
-		scsi_host_put(ap->host);
-	}
-
-	if (host_set->ops->host_stop)
-		host_set->ops->host_stop(host_set);
-
-	kfree(host_set);
 
+	ata_host_set_remove(host_set);
 	pci_release_regions(pdev);
 	pci_disable_device(pdev);
 	dev_set_drvdata(dev, NULL);
@@ -4564,6 +4580,7 @@ module_exit(ata_exit);
 EXPORT_SYMBOL_GPL(ata_std_bios_param);
 EXPORT_SYMBOL_GPL(ata_std_ports);
 EXPORT_SYMBOL_GPL(ata_device_add);
+EXPORT_SYMBOL_GPL(ata_host_set_remove);
 EXPORT_SYMBOL_GPL(ata_sg_init);
 EXPORT_SYMBOL_GPL(ata_sg_init_one);
 EXPORT_SYMBOL_GPL(ata_qc_complete);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 022105c745fc..ceee1fc42c60 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -393,6 +393,7 @@ extern int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_i
 extern void ata_pci_remove_one (struct pci_dev *pdev);
 #endif /* CONFIG_PCI */
 extern int ata_device_add(struct ata_probe_ent *ent);
+extern void ata_host_set_remove(struct ata_host_set *host_set);
 extern int ata_scsi_detect(Scsi_Host_Template *sht);
 extern int ata_scsi_ioctl(struct scsi_device *dev, int cmd, void __user *arg);
 extern int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *));
-- 
cgit v1.2.3


From 1832a5862f2e1b4e5835611ee14bc30a8ed3cad5 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <aherrman@de.ibm.com>
Date: Fri, 16 Sep 2005 11:01:14 +0200
Subject: [SCSI] change port speed definitions for scsi_transport_fc

obviously FC Port Speeds in scsi_transport_fc.h are defined according
to FC-HBA:

#define FC_PORTSPEED_1GBIT              1
#define FC_PORTSPEED_2GBIT              2
#define FC_PORTSPEED_10GBIT             4
#define FC_PORTSPEED_4GBIT              8

Problem is, whoever invented FC-HBA did not care about FC-FS or
FC-GS-x. Following FC-FS/FC-GS-x defintions of port speeds would look
like:

1 GBit: 0x0001
2 GBit: 0x0002
4 GBit: 0x0004
10GBit: 0x0008

(and new in FC-LS:
8 Gbit: 0x0010
16GBit: 0x0020)

I really appreciate if scsi_transport_fc.h would define port speeds
according to FC-GS-x/FC-FS. Thus mapping of port speed capabilities to
values defined in scsi_transport_fc.h can be avoided in the LLDD.

Attached is a patch to change the definitions.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 include/scsi/scsi_transport_fc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
index 115db056dc6b..b0d445437372 100644
--- a/include/scsi/scsi_transport_fc.h
+++ b/include/scsi/scsi_transport_fc.h
@@ -103,8 +103,8 @@ enum fc_port_state {
 					     incapable of reporting */
 #define FC_PORTSPEED_1GBIT		1
 #define FC_PORTSPEED_2GBIT		2
-#define FC_PORTSPEED_10GBIT		4
-#define FC_PORTSPEED_4GBIT		8
+#define FC_PORTSPEED_4GBIT		4
+#define FC_PORTSPEED_10GBIT		8
 #define FC_PORTSPEED_NOT_NEGOTIATED	(1 << 15) /* Speed not established */
 
 /*
-- 
cgit v1.2.3


From a063cf5b7dde94d98f3f7c9f1c951e02c6564022 Mon Sep 17 00:00:00 2001
From: Karsten Keil <kkeil@suse.de>
Date: Fri, 16 Sep 2005 19:32:53 +0200
Subject: [PATCH] Add PCI IDs for Sitecom DC-105

Sitecom DC-105 PCI work with hfc_pci HiSax driver

Signed-off-by: Karsten Keil <kkeil@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/isdn/hisax/hfc_pci.c | 1 +
 include/linux/pci_ids.h      | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c
index 8337b0f26cc4..4866fc32d8d9 100644
--- a/drivers/isdn/hisax/hfc_pci.c
+++ b/drivers/isdn/hisax/hfc_pci.c
@@ -61,6 +61,7 @@ static const PCI_ENTRY id_list[] =
 	{PCI_VENDOR_ID_DIGI, PCI_DEVICE_ID_DIGI_DF_M_E,"Digi International", "Digi DataFire Micro V (Europe)"},
 	{PCI_VENDOR_ID_DIGI, PCI_DEVICE_ID_DIGI_DF_M_IOM2_A,"Digi International", "Digi DataFire Micro V IOM2 (North America)"},
 	{PCI_VENDOR_ID_DIGI, PCI_DEVICE_ID_DIGI_DF_M_A,"Digi International", "Digi DataFire Micro V (North America)"},
+	{PCI_VENDOR_ID_SITECOM, PCI_DEVICE_ID_SITECOM_DC105V2, "Sitecom Europe", "DC-105 ISDN PCI"},
 	{0, 0, NULL, NULL},
 };
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 72fe3385743c..b6ef7eb4bcbf 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2252,6 +2252,9 @@
 
 #define PCI_VENDOR_ID_INFINICON		0x1820
 
+#define PCI_VENDOR_ID_SITECOM		0x182d
+#define PCI_DEVICE_ID_SITECOM_DC105V2	0x3069
+        
 #define PCI_VENDOR_ID_TOPSPIN		0x1867
 
 #define PCI_VENDOR_ID_TDI               0x192E
-- 
cgit v1.2.3


From 06168d8a10ceccced51380d683245b33474d428a Mon Sep 17 00:00:00 2001
From: Karsten Keil <kkeil@suse.de>
Date: Fri, 16 Sep 2005 19:34:17 +0200
Subject: [PATCH] cleanup whitespace in pci_ids.h

Signed-off-by: Karsten Keil <kkeil@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/pci_ids.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index b6ef7eb4bcbf..486d1c1676bd 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2254,7 +2254,7 @@
 
 #define PCI_VENDOR_ID_SITECOM		0x182d
 #define PCI_DEVICE_ID_SITECOM_DC105V2	0x3069
-        
+
 #define PCI_VENDOR_ID_TOPSPIN		0x1867
 
 #define PCI_VENDOR_ID_TDI               0x192E
-- 
cgit v1.2.3


From 67e6b629212fa9ffb7420e8a88a41806af637e28 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Fri, 16 Sep 2005 16:58:40 -0700
Subject: [DCCP]: Introduce DCCP_SOCKOPT_SERVICE

As discussed in the dccp@vger mailing list:

Now applications have to use setsockopt(DCCP_SOCKOPT_SERVICE, service[s]),
prior to calling listen() and connect().

An array of unsigned ints can be passed meaning that the listening sock accepts
connection requests for several services.

With this we can ditch struct sockaddr_dccp and use only sockaddr_in (and
sockaddr_in6 in the future).

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dccp.h | 40 ++++++++++++++++++-------
 net/dccp/dccp.h      |  9 +++---
 net/dccp/ipv4.c      | 30 +++++++++++++++++--
 net/dccp/minisocks.c |  6 ++--
 net/dccp/output.c    | 14 ++++-----
 net/dccp/proto.c     | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 6 files changed, 154 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 8bf4bacb5051..0e72708677e4 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -4,16 +4,6 @@
 #include <linux/types.h>
 #include <asm/byteorder.h>
 
-/* Structure describing an Internet (DCCP) socket address. */
-struct sockaddr_dccp {
-	__u16	sdccp_family;	/* Address family   */
-	__u16	sdccp_port;	/* Port number	    */
-	__u32	sdccp_addr;	/* Internet address */
-	__u32	sdccp_service;	/* Service	    */
-	/* Pad to size of `struct sockaddr': 16 bytes . */
-	__u32	sdccp_pad;
-};
-
 /**
  * struct dccp_hdr - generic part of DCCP packet header
  *
@@ -188,6 +178,9 @@ enum {
 
 /* DCCP socket options */
 #define DCCP_SOCKOPT_PACKET_SIZE	1
+#define DCCP_SOCKOPT_SERVICE		2
+
+#define DCCP_SERVICE_LIST_MAX_LEN      32
 
 #ifdef __KERNEL__
 
@@ -382,6 +375,25 @@ enum dccp_role {
 	DCCP_ROLE_SERVER,
 };
 
+struct dccp_service_list {
+	__u32	dccpsl_nr;
+	__u32	dccpsl_list[0];
+};
+
+#define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1)
+
+static inline int dccp_list_has_service(const struct dccp_service_list *sl,
+					const u32 service)
+{
+	if (likely(sl != NULL)) {
+		u32 i = sl->dccpsl_nr;
+		while (i--)
+			if (sl->dccpsl_list[i] == service)
+				return 1; 
+	}
+	return 0;
+}
+
 /**
  * struct dccp_sock - DCCP socket state
  *
@@ -417,7 +429,8 @@ struct dccp_sock {
 	__u64				dccps_gss;
 	__u64				dccps_gsr;
 	__u64				dccps_gar;
-	unsigned long			dccps_service;
+	__u32				dccps_service;
+	struct dccp_service_list	*dccps_service_list;
 	struct timeval			dccps_timestamp_time;
 	__u32				dccps_timestamp_echo;
 	__u32				dccps_packet_size;
@@ -443,6 +456,11 @@ static inline struct dccp_sock *dccp_sk(const struct sock *sk)
 	return (struct dccp_sock *)sk;
 }
 
+static inline int dccp_service_not_initialized(const struct sock *sk)
+{
+	return dccp_sk(sk)->dccps_service == DCCP_SERVICE_INVALID_VALUE;
+}
+
 static inline const char *dccp_role(const struct sock *sk)
 {
 	switch (dccp_sk(sk)->dccps_role) {
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 95c4630b3b18..be7a660b6b24 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -258,13 +258,12 @@ extern int	   dccp_v4_send_reset(struct sock *sk,
 extern void	   dccp_send_close(struct sock *sk, const int active);
 
 struct dccp_skb_cb {
-	__u8 dccpd_type;
-	__u8 dccpd_reset_code;
-	__u8 dccpd_service;
-	__u8 dccpd_ccval;
+	__u8  dccpd_type:4;
+	__u8  dccpd_ccval:4;
+	__u8  dccpd_reset_code;
+	__u16 dccpd_opt_len;
 	__u64 dccpd_seq;
 	__u64 dccpd_ack_seq;
-	int  dccpd_opt_len;
 };
 
 #define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0]))
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index e09907d8b7da..94a440b2685b 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -246,6 +246,9 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	dp->dccps_role = DCCP_ROLE_CLIENT;
 
+	if (dccp_service_not_initialized(sk))
+		return -EPROTO;
+
 	if (addr_len < sizeof(struct sockaddr_in))
 		return -EINVAL;
 
@@ -661,6 +664,16 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk,
 					   dccp_hdr(skb)->dccph_sport);
 }
 
+static inline int dccp_bad_service_code(const struct sock *sk,
+					const __u32 service)
+{
+	const struct dccp_sock *dp = dccp_sk(sk);
+
+	if (dp->dccps_service == service)
+		return 0;
+	return !dccp_list_has_service(dp->dccps_service_list, service);
+}
+
 int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	struct inet_request_sock *ireq;
@@ -669,6 +682,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	struct dccp_request_sock *dreq;
 	const __u32 saddr = skb->nh.iph->saddr;
 	const __u32 daddr = skb->nh.iph->daddr;
+ 	const __u32 service = dccp_hdr_request(skb)->dccph_req_service;
 	struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
 	__u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
 	struct dst_entry *dst = NULL;
@@ -680,6 +694,10 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		goto drop;
 	}
 
+	if (dccp_bad_service_code(sk, service)) {
+		reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
+		goto drop;
+ 	}
 	/*
 	 * TW buckets are converted to open requests without
 	 * limitations, they conserve resources and peer is
@@ -722,9 +740,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	 * dccp_create_openreq_child.
 	 */
 	dreq = dccp_rsk(req);
-	dreq->dreq_isr = dcb->dccpd_seq;
-	dreq->dreq_iss = dccp_v4_init_sequence(sk, skb);
-	dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service;
+	dreq->dreq_isr	   = dcb->dccpd_seq;
+	dreq->dreq_iss	   = dccp_v4_init_sequence(sk, skb);
+	dreq->dreq_service = service;
 
 	if (dccp_v4_send_response(sk, req, dst))
 		goto drop_and_free;
@@ -1284,6 +1302,7 @@ static int dccp_v4_init_sock(struct sock *sk)
 	sk->sk_write_space = dccp_write_space;
 	dp->dccps_mss_cache = 536;
 	dp->dccps_role = DCCP_ROLE_UNDEFINED;
+	dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
 
 	return 0;
 }
@@ -1305,6 +1324,11 @@ static int dccp_v4_destroy_sock(struct sock *sk)
 	if (inet_csk(sk)->icsk_bind_hash != NULL)
 		inet_put_port(&dccp_hashinfo, sk);
 
+	if (dp->dccps_service_list != NULL) {
+		kfree(dp->dccps_service_list);
+		dp->dccps_service_list = NULL;
+	}
+
 	ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
 	ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
 	dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 18461bc04cbe..933e10db1789 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -93,9 +93,11 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
 		struct inet_connection_sock *newicsk = inet_csk(sk);
 		struct dccp_sock *newdp = dccp_sk(newsk);
 
+		newdp->dccps_role	   = DCCP_ROLE_SERVER;
 		newdp->dccps_hc_rx_ackpkts = NULL;
-		newdp->dccps_role = DCCP_ROLE_SERVER;
-		newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
+		newdp->dccps_service_list  = NULL;
+		newdp->dccps_service	   = dreq->dreq_service;
+		newicsk->icsk_rto	   = DCCP_TIMEOUT_INIT;
 		do_gettimeofday(&newdp->dccps_epoch);
 
 		if (newdp->dccps_options.dccpo_send_ack_vector) {
diff --git a/net/dccp/output.c b/net/dccp/output.c
index ea6d0e91e511..156b1d29a156 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -85,7 +85,7 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 		switch (dcb->dccpd_type) {
 		case DCCP_PKT_REQUEST:
 			dccp_hdr_request(skb)->dccph_req_service =
-							dcb->dccpd_service;
+							dp->dccps_service;
 			break;
 		case DCCP_PKT_RESET:
 			dccp_hdr_reset(skb)->dccph_reset_code =
@@ -270,6 +270,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
 				   struct request_sock *req)
 {
 	struct dccp_hdr *dh;
+	struct dccp_request_sock *dreq;
 	const int dccp_header_size = sizeof(struct dccp_hdr) +
 				     sizeof(struct dccp_hdr_ext) +
 				     sizeof(struct dccp_hdr_response);
@@ -285,8 +286,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
 	skb->dst = dst_clone(dst);
 	skb->csum = 0;
 
+	dreq = dccp_rsk(req);
 	DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
-	DCCP_SKB_CB(skb)->dccpd_seq  = dccp_rsk(req)->dreq_iss;
+	DCCP_SKB_CB(skb)->dccpd_seq  = dreq->dreq_iss;
 	dccp_insert_options(sk, skb);
 
 	skb->h.raw = skb_push(skb, dccp_header_size);
@@ -300,8 +302,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
 			   DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
 	dh->dccph_type	= DCCP_PKT_RESPONSE;
 	dh->dccph_x	= 1;
-	dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss);
-	dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr);
+	dccp_hdr_set_seq(dh, dreq->dreq_iss);
+	dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr);
+	dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service;
 
 	dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr,
 					      inet_rsk(req)->rmt_addr);
@@ -397,9 +400,6 @@ int dccp_connect(struct sock *sk)
 	skb_reserve(skb, MAX_DCCP_HEADER);
 
 	DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
-	/* FIXME: set service to something meaningful, coming
-	 * from userspace*/
-	DCCP_SKB_CB(skb)->dccpd_service = 0;
 	skb->csum = 0;
 	skb_set_owner_w(skb, sk);
 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 18a0e69c9dc7..9bda2868eba6 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -94,7 +94,15 @@ EXPORT_SYMBOL_GPL(dccp_state_name);
 
 static inline int dccp_listen_start(struct sock *sk)
 {
-	dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN;
+	struct dccp_sock *dp = dccp_sk(sk);
+
+	dp->dccps_role = DCCP_ROLE_LISTEN;
+	/*
+	 * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
+	 * before calling listen()
+	 */
+	if (dccp_service_not_initialized(sk))
+		return -EPROTO;
 	return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
 }
 
@@ -202,6 +210,42 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 	return -ENOIOCTLCMD;
 }
 
+static int dccp_setsockopt_service(struct sock *sk, const u32 service,
+				   char __user *optval, int optlen)
+{
+	struct dccp_sock *dp = dccp_sk(sk);
+	struct dccp_service_list *sl = NULL;
+
+	if (service == DCCP_SERVICE_INVALID_VALUE || 
+	    optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
+		return -EINVAL;
+
+	if (optlen > sizeof(service)) {
+		sl = kmalloc(optlen, GFP_KERNEL);
+		if (sl == NULL)
+			return -ENOMEM;
+
+		sl->dccpsl_nr = optlen / sizeof(u32) - 1;
+		if (copy_from_user(sl->dccpsl_list,
+				   optval + sizeof(service),
+				   optlen - sizeof(service)) ||
+		    dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
+			kfree(sl);
+			return -EFAULT;
+		}
+	}
+
+	lock_sock(sk);
+	dp->dccps_service = service;
+
+	if (dp->dccps_service_list != NULL)
+		kfree(dp->dccps_service_list);
+
+	dp->dccps_service_list = sl;
+	release_sock(sk);
+	return 0;
+}
+
 int dccp_setsockopt(struct sock *sk, int level, int optname,
 		    char __user *optval, int optlen)
 {
@@ -218,8 +262,10 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
 	if (get_user(val, (int __user *)optval))
 		return -EFAULT;
 
-	lock_sock(sk);
+	if (optname == DCCP_SOCKOPT_SERVICE)
+		return dccp_setsockopt_service(sk, val, optval, optlen);
 
+	lock_sock(sk);
 	dp = dccp_sk(sk);
 	err = 0;
 
@@ -236,6 +282,37 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
 	return err;
 }
 
+static int dccp_getsockopt_service(struct sock *sk, int len,
+				   u32 __user *optval,
+				   int __user *optlen)
+{
+	const struct dccp_sock *dp = dccp_sk(sk);
+	const struct dccp_service_list *sl;
+	int err = -ENOENT, slen = 0, total_len = sizeof(u32);
+
+	lock_sock(sk);
+	if (dccp_service_not_initialized(sk))
+		goto out;
+
+	if ((sl = dp->dccps_service_list) != NULL) {
+		slen = sl->dccpsl_nr * sizeof(u32);
+		total_len += slen;
+	}
+
+	err = -EINVAL;
+	if (total_len > len)
+		goto out;
+
+	err = 0;
+	if (put_user(total_len, optlen) ||
+	    put_user(dp->dccps_service, optval) ||
+	    (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
+		err = -EFAULT;
+out:
+	release_sock(sk);
+	return err;
+}
+
 int dccp_getsockopt(struct sock *sk, int level, int optname,
 		    char __user *optval, int __user *optlen)
 {
@@ -248,6 +325,10 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
 	if (get_user(len, optlen))
 		return -EFAULT;
 
+	if (optname == DCCP_SOCKOPT_SERVICE)
+		return dccp_getsockopt_service(sk, len,
+					       (u32 __user *)optval, optlen);
+
 	len = min_t(unsigned int, len, sizeof(int));
 	if (len < 0)
 		return -EINVAL;
-- 
cgit v1.2.3


From 1cbf07478bbf3e350a2025bc5ea23fedaa95855a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 16 Sep 2005 16:59:20 -0700
Subject: [TG3]: Add AMD K8 to list of write-reorder chipsets.

Thanks to Andy Stewart for the report and testing
debug patches from Michael Chan.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tg3.c       | 8 +++++---
 include/linux/pci_ids.h | 1 +
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 7599f52e15b3..52eae9f096e2 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -67,8 +67,8 @@
 
 #define DRV_MODULE_NAME		"tg3"
 #define PFX DRV_MODULE_NAME	": "
-#define DRV_MODULE_VERSION	"3.39"
-#define DRV_MODULE_RELDATE	"September 5, 2005"
+#define DRV_MODULE_VERSION	"3.40"
+#define DRV_MODULE_RELDATE	"September 15, 2005"
 
 #define TG3_DEF_MAC_MODE	0
 #define TG3_DEF_RX_MODE		0
@@ -9271,6 +9271,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 	static struct pci_device_id write_reorder_chipsets[] = {
 		{ PCI_DEVICE(PCI_VENDOR_ID_AMD,
 		             PCI_DEVICE_ID_AMD_FE_GATE_700C) },
+		{ PCI_DEVICE(PCI_VENDOR_ID_AMD,
+		             PCI_DEVICE_ID_AMD_K8_NB) },
 		{ },
 	};
 	u32 misc_ctrl_reg;
@@ -9285,7 +9287,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 		tp->tg3_flags2 |= TG3_FLG2_SUN_570X;
 #endif
 
-	/* If we have an AMD 762 chipset, write
+	/* If we have an AMD 762 or K8 chipset, write
 	 * reordering to the mailbox registers done by the host
 	 * controller can cause major troubles.  We read back from
 	 * every mailbox register write to force the writes to be
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 72fe3385743c..3dc161894c7b 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -491,6 +491,7 @@
 #define PCI_DEVICE_ID_AMI_MEGARAID2	0x9060
 
 #define PCI_VENDOR_ID_AMD		0x1022
+#define PCI_DEVICE_ID_AMD_K8_NB		0x1100
 #define PCI_DEVICE_ID_AMD_LANCE		0x2000
 #define PCI_DEVICE_ID_AMD_LANCE_HOME	0x2001
 #define PCI_DEVICE_ID_AMD_SCSI		0x2020
-- 
cgit v1.2.3


From 3eddddcf239c89bbd3c50d1440001a3d384ed40a Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Fri, 16 Sep 2005 19:27:46 -0700
Subject: [PATCH] uml: breakpoint an arbitrary thread

This patch implements a stack trace for a thread, not unlike sysrq-t does.
The advantage to this is that a break point can be placed on showreqs, so that
upon showing the stack, you jump immediately into the debugger.  While sysrq-t
does the same thing, sysrq-t shows *all* threads stacks.  It also doesn't work
right now.  In the future, I thought it might be acceptable to make this show
all pids stacks, but perhaps leaving well enough alone and just using sysrq-t
would be okay.  For now, upon receiving the stack command, UML switches
context to that thread, dumps its registers, and then switches context back to
the original thread.  Since UML compacts all threads into one of 4 host
threads, this sort of mechanism could be expanded in the future to include
other debugging helpers that sysrq does not cover.

Note by jdike - The main benefit to this is that it brings an arbitrary thread
back into context, where it can be examined by gdb.  The fact that it dumps it
stack is secondary.  This provides the capability to examine a sleeping
thread, which has existed in tt mode, but not in skas mode until now.

Also, the other threads, that sysrq doesn't cover, can be gdb-ed directly
anyway.

Signed-off-by: Allan Graves<allan.graves@gmail.com>
Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/um/drivers/mconsole_kern.c    | 52 ++++++++++++++++++++++++++++++++++++++
 arch/um/drivers/mconsole_user.c    |  1 +
 arch/um/include/mconsole.h         |  1 +
 arch/um/kernel/process_kern.c      |  9 ++++++-
 include/asm-um/processor-generic.h |  1 +
 5 files changed, 63 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index c190c2414197..12c95368124a 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -32,6 +32,7 @@
 #include "os.h"
 #include "umid.h"
 #include "irq_kern.h"
+#include "choose-mode.h"
 
 static int do_unlink_socket(struct notifier_block *notifier, 
 			    unsigned long what, void *data)
@@ -276,6 +277,7 @@ void mconsole_proc(struct mc_request *req)
     go - continue the UML after a 'stop' \n\
     log <string> - make UML enter <string> into the kernel log\n\
     proc <file> - returns the contents of the UML's /proc/<file>\n\
+    stack <pid> - returns the stack of the specified pid\n\
 "
 
 void mconsole_help(struct mc_request *req)
@@ -479,6 +481,56 @@ void mconsole_sysrq(struct mc_request *req)
 }
 #endif
 
+/* Mconsole stack trace
+ *  Added by Allan Graves, Jeff Dike
+ *  Dumps a stacks registers to the linux console.
+ *  Usage stack <pid>.
+ */
+void do_stack(struct mc_request *req)
+{
+        char *ptr = req->request.data;
+        int pid_requested= -1;
+        struct task_struct *from = NULL;
+	struct task_struct *to = NULL;
+
+        /* Would be nice:
+         * 1) Send showregs output to mconsole.
+	 * 2) Add a way to stack dump all pids.
+	 */
+
+        ptr += strlen("stack");
+        while(isspace(*ptr)) ptr++;
+
+        /* Should really check for multiple pids or reject bad args here */
+        /* What do the arguments in mconsole_reply mean? */
+        if(sscanf(ptr, "%d", &pid_requested) == 0){
+                mconsole_reply(req, "Please specify a pid", 1, 0);
+                return;
+        }
+
+        from = current;
+        to = find_task_by_pid(pid_requested);
+
+        if((to == NULL) || (pid_requested == 0)) {
+                mconsole_reply(req, "Couldn't find that pid", 1, 0);
+                return;
+        }
+        to->thread.saved_task = current;
+
+        switch_to(from, to, from);
+        mconsole_reply(req, "Stack Dumped to console and message log", 0, 0);
+}
+
+void mconsole_stack(struct mc_request *req)
+{
+	/* This command doesn't work in TT mode, so let's check and then
+	 * get out of here
+	 */
+	CHOOSE_MODE(mconsole_reply(req, "Sorry, this doesn't work in TT mode",
+				   1, 0),
+		    do_stack(req));
+}
+
 /* Changed by mconsole_setup, which is __setup, and called before SMP is
  * active.
  */
diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c
index fe5afb13252c..a5b8aeade1c5 100644
--- a/arch/um/drivers/mconsole_user.c
+++ b/arch/um/drivers/mconsole_user.c
@@ -30,6 +30,7 @@ static struct mconsole_command commands[] = {
 	{ "go", mconsole_go, MCONSOLE_INTR },
 	{ "log", mconsole_log, MCONSOLE_INTR },
 	{ "proc", mconsole_proc, MCONSOLE_PROC },
+        { "stack", mconsole_stack, MCONSOLE_INTR },
 };
 
 /* Initialized in mconsole_init, which is an initcall */
diff --git a/arch/um/include/mconsole.h b/arch/um/include/mconsole.h
index cfa368e045a5..b1b512f47035 100644
--- a/arch/um/include/mconsole.h
+++ b/arch/um/include/mconsole.h
@@ -81,6 +81,7 @@ extern void mconsole_stop(struct mc_request *req);
 extern void mconsole_go(struct mc_request *req);
 extern void mconsole_log(struct mc_request *req);
 extern void mconsole_proc(struct mc_request *req);
+extern void mconsole_stack(struct mc_request *req);
 
 extern int mconsole_get_request(int fd, struct mc_request *req);
 extern int mconsole_notify(char *sock_name, int type, const void *data, 
diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c
index 1143f5e522b2..39cf568ccfaf 100644
--- a/arch/um/kernel/process_kern.c
+++ b/arch/um/kernel/process_kern.c
@@ -119,7 +119,14 @@ void *_switch_to(void *prev, void *next, void *last)
         to->thread.prev_sched = from;
         set_current(to);
 
-	CHOOSE_MODE_PROC(switch_to_tt, switch_to_skas, prev, next);
+	do {
+		current->thread.saved_task = NULL ;
+		CHOOSE_MODE_PROC(switch_to_tt, switch_to_skas, prev, next);
+		if(current->thread.saved_task)
+			show_regs(&(current->thread.regs));
+		next= current->thread.saved_task;
+		prev= current;
+	} while(current->thread.saved_task);
 
         return(current->thread.prev_sched);
 
diff --git a/include/asm-um/processor-generic.h b/include/asm-um/processor-generic.h
index b2fc94fbc2d9..2d242360c3d6 100644
--- a/include/asm-um/processor-generic.h
+++ b/include/asm-um/processor-generic.h
@@ -21,6 +21,7 @@ struct thread_struct {
 	 * copy_thread) to mark that we are begin called from userspace (fork /
 	 * vfork / clone), and reset to 0 after. It is left to 0 when called
 	 * from kernelspace (i.e. kernel_thread() or fork_idle(), as of 2.6.11). */
+	struct task_struct *saved_task;
 	int forking;
 	int nsyscalls;
 	struct pt_regs regs;
-- 
cgit v1.2.3


From 73a0b538ee573a76cba59cdc9f177a71776d4678 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 16 Sep 2005 19:27:55 -0700
Subject: [PATCH] x86_64: desc.h-needs smp.h

include/asm/desc.h: In function `load_LDT':
include/asm/desc.h:209: warning: implicit declaration of function `get_cpu'
include/asm/desc.h:211: warning: implicit declaration of function `put_cpu'

Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/desc.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/asm-x86_64/desc.h b/include/asm-x86_64/desc.h
index 594e610f4a1e..68ac3c62fe3d 100644
--- a/include/asm-x86_64/desc.h
+++ b/include/asm-x86_64/desc.h
@@ -8,6 +8,8 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/string.h>
+#include <linux/smp.h>
+
 #include <asm/segment.h>
 #include <asm/mmu.h>
 
-- 
cgit v1.2.3


From 514ccd4e6c414d8064d53235f7fc09fc02ec2078 Mon Sep 17 00:00:00 2001
From: Vitaly Bordug <vbordug@ru.mvista.com>
Date: Fri, 16 Sep 2005 19:28:00 -0700
Subject: [PATCH] ppc32: Add ppc_sys descriptions for PowerQUICC I devices

Added ppc_sys device and system definitions for PowerQUICC I devices.  This
will allow drivers for PQI to be proper platform device drivers.  Currently
sys section contains only MPC885 and MPC866.  Identification should be done
with identify_ppc_sys_by_name call, with board-specific "name" string
passed, since PQI do not have any register that could identify the SOC.

Signed-off-by: Vitaly Bordug <vbordug@ru.mvista.com>
Signed-off-by: Kumar Gala <kumar.gala@freescale.com>
Signed-off-by: Marcelo Tosatti <marcelo.tosatti@cyclades.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/ppc/syslib/Makefile         |   3 +-
 arch/ppc/syslib/mpc8xx_devices.c | 224 +++++++++++++++++++++++++++++++++++++++
 arch/ppc/syslib/mpc8xx_sys.c     |  61 +++++++++++
 include/asm-ppc/irq.h            |  10 ++
 include/asm-ppc/mpc8xx.h         |  16 +++
 include/asm-ppc/ppc_sys.h        |   2 +
 6 files changed, 315 insertions(+), 1 deletion(-)
 create mode 100644 arch/ppc/syslib/mpc8xx_devices.c
 create mode 100644 arch/ppc/syslib/mpc8xx_sys.c

(limited to 'include')

diff --git a/arch/ppc/syslib/Makefile b/arch/ppc/syslib/Makefile
index 8b9b226005d1..b8d08f33f7ee 100644
--- a/arch/ppc/syslib/Makefile
+++ b/arch/ppc/syslib/Makefile
@@ -34,7 +34,8 @@ ifeq ($(CONFIG_40x),y)
 obj-$(CONFIG_PCI)		+= indirect_pci.o pci_auto.o ppc405_pci.o
 endif
 endif
-obj-$(CONFIG_8xx)		+= m8xx_setup.o ppc8xx_pic.o $(wdt-mpc8xx-y)
+obj-$(CONFIG_8xx)		+= m8xx_setup.o ppc8xx_pic.o $(wdt-mpc8xx-y) \
+				   ppc_sys.o mpc8xx_devices.o mpc8xx_sys.o
 ifeq ($(CONFIG_8xx),y)
 obj-$(CONFIG_PCI)		+= qspan_pci.o i8259.o
 endif
diff --git a/arch/ppc/syslib/mpc8xx_devices.c b/arch/ppc/syslib/mpc8xx_devices.c
new file mode 100644
index 000000000000..2b5f0e701687
--- /dev/null
+++ b/arch/ppc/syslib/mpc8xx_devices.c
@@ -0,0 +1,224 @@
+/*
+ * arch/ppc/syslib/mpc8xx_devices.c
+ *
+ * MPC8xx Device descriptions
+ *
+ * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ *
+ * Copyright 2005 MontaVista Software, Inc. by Vitaly Bordug<vbordug@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/serial_8250.h>
+#include <linux/mii.h>
+#include <asm/commproc.h>
+#include <asm/mpc8xx.h>
+#include <asm/irq.h>
+#include <asm/ppc_sys.h>
+
+/* We use offsets for IORESOURCE_MEM to do not set dependences at compile time.
+ * They will get fixed up by mach_mpc8xx_fixup
+ */
+
+struct platform_device ppc_sys_platform_devices[] = {
+	[MPC8xx_CPM_FEC1] =	{
+		.name = "fsl-cpm-fec",
+		.id	= 1,
+		.num_resources = 2,
+		.resource = (struct resource[])	{
+			{
+				.name 	= "regs",
+				.start	= 0xe00,
+				.end	= 0xe88,
+				.flags	= IORESOURCE_MEM,
+			},
+			{
+				.name	= "interrupt",
+				.start	= MPC8xx_INT_FEC1,
+				.end	= MPC8xx_INT_FEC1,
+				.flags	= IORESOURCE_IRQ,
+			},
+		},
+	},
+	[MPC8xx_CPM_FEC2] =	{
+		.name = "fsl-cpm-fec",
+		.id	= 2,
+		.num_resources = 2,
+		.resource = (struct resource[])	{
+			{
+				.name	= "regs",
+				.start	= 0x1e00,
+				.end	= 0x1e88,
+				.flags	= IORESOURCE_MEM,
+			},
+			{
+				.name	= "interrupt",
+				.start	= MPC8xx_INT_FEC2,
+				.end	= MPC8xx_INT_FEC2,
+				.flags	= IORESOURCE_IRQ,
+			},
+		},
+	},
+	[MPC8xx_CPM_SCC1] = {
+		.name = "fsl-cpm-scc",
+		.id	= 1,
+		.num_resources = 3,
+		.resource = (struct resource[]) {
+			{
+				.name	= "regs",
+				.start	= 0xa00,
+				.end	= 0xa18,
+				.flags	= IORESOURCE_MEM,
+			},
+			{
+				.name 	= "pram",
+				.start 	= 0x3c00,
+				.end 	= 0x3c80,
+				.flags 	= IORESOURCE_MEM,
+			},
+			{
+				.name	= "interrupt",
+				.start	= MPC8xx_INT_SCC1,
+				.end	= MPC8xx_INT_SCC1,
+				.flags	= IORESOURCE_IRQ,
+			},
+		},
+	},
+	[MPC8xx_CPM_SCC2] = {
+		.name = "fsl-cpm-scc",
+		.id	= 2,
+		.num_resources	= 3,
+		.resource = (struct resource[]) {
+			{
+				.name	= "regs",
+				.start	= 0xa20,
+				.end	= 0xa38,
+				.flags	= IORESOURCE_MEM,
+			},
+			{
+				.name 	= "pram",
+				.start 	= 0x3d00,
+				.end 	= 0x3d80,
+				.flags 	= IORESOURCE_MEM,
+			},
+
+			{
+				.name	= "interrupt",
+				.start	= MPC8xx_INT_SCC2,
+				.end	= MPC8xx_INT_SCC2,
+				.flags	= IORESOURCE_IRQ,
+			},
+		},
+	},
+	[MPC8xx_CPM_SCC3] = {
+		.name = "fsl-cpm-scc",
+		.id	= 3,
+		.num_resources	= 3,
+		.resource = (struct resource[]) {
+			{
+				.name	= "regs",
+				.start	= 0xa40,
+				.end	= 0xa58,
+				.flags	= IORESOURCE_MEM,
+			},
+			{
+				.name 	= "pram",
+				.start 	= 0x3e00,
+				.end 	= 0x3e80,
+				.flags 	= IORESOURCE_MEM,
+			},
+
+			{
+				.name	= "interrupt",
+				.start	= MPC8xx_INT_SCC3,
+				.end	= MPC8xx_INT_SCC3,
+				.flags	= IORESOURCE_IRQ,
+			},
+		},
+	},
+	[MPC8xx_CPM_SCC4] = {
+		.name = "fsl-cpm-scc",
+		.id	= 4,
+		.num_resources	= 3,
+		.resource = (struct resource[]) {
+			{
+				.name	= "regs",
+				.start	= 0xa60,
+				.end	= 0xa78,
+				.flags	= IORESOURCE_MEM,
+			},
+			{
+				.name 	= "pram",
+				.start 	= 0x3f00,
+				.end 	= 0x3f80,
+				.flags 	= IORESOURCE_MEM,
+			},
+
+			{
+				.name	= "interrupt",
+				.start	= MPC8xx_INT_SCC4,
+				.end	= MPC8xx_INT_SCC4,
+				.flags	= IORESOURCE_IRQ,
+			},
+		},
+	},
+	[MPC8xx_CPM_SMC1] = {
+		.name = "fsl-cpm-smc",
+		.id	= 1,
+		.num_resources	= 2,
+		.resource = (struct resource[]) {
+			{
+				.name	= "regs",
+				.start	= 0xa82,
+				.end	= 0xa91,
+				.flags	= IORESOURCE_MEM,
+			},
+			{
+				.name	= "interrupt",
+				.start	= MPC8xx_INT_SMC1,
+				.end	= MPC8xx_INT_SMC1,
+				.flags	= IORESOURCE_IRQ,
+			},
+		},
+	},
+	[MPC8xx_CPM_SMC2] = {
+		.name = "fsl-cpm-smc",
+		.id	= 2,
+		.num_resources	= 2,
+		.resource = (struct resource[]) {
+			{
+				.name	= "regs",
+				.start	= 0xa92,
+				.end	= 0xaa1,
+				.flags	= IORESOURCE_MEM,
+			},
+			{
+				.name	= "interrupt",
+				.start	= MPC8xx_INT_SMC2,
+				.end	= MPC8xx_INT_SMC2,
+				.flags	= IORESOURCE_IRQ,
+			},
+		},
+	},
+};
+
+static int __init mach_mpc8xx_fixup(struct platform_device *pdev)
+{
+	ppc_sys_fixup_mem_resource (pdev, IMAP_ADDR);
+	return 0;
+}
+
+static int __init mach_mpc8xx_init(void)
+{
+	ppc_sys_device_fixup = mach_mpc8xx_fixup;
+	return 0;
+}
+
+postcore_initcall(mach_mpc8xx_init);
diff --git a/arch/ppc/syslib/mpc8xx_sys.c b/arch/ppc/syslib/mpc8xx_sys.c
new file mode 100644
index 000000000000..a532ccc861c0
--- /dev/null
+++ b/arch/ppc/syslib/mpc8xx_sys.c
@@ -0,0 +1,61 @@
+/*
+ * arch/ppc/platforms/mpc8xx_sys.c
+ *
+ * MPC8xx System descriptions
+ *
+ * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ *
+ * Copyright 2005 MontaVista Software, Inc. by Vitaly Bordug <vbordug@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <asm/ppc_sys.h>
+
+struct ppc_sys_spec *cur_ppc_sys_spec; 
+struct ppc_sys_spec ppc_sys_specs[] = {
+	{
+		.ppc_sys_name	= "MPC86X",
+		.mask 		= 0xFFFFFFFF,
+		.value 		= 0x00000000,
+		.num_devices	= 2,
+		.device_list	= (enum ppc_sys_devices[])
+		{
+			MPC8xx_CPM_FEC1,
+			MPC8xx_CPM_SCC1,
+			MPC8xx_CPM_SCC2,
+			MPC8xx_CPM_SCC3,
+			MPC8xx_CPM_SCC4,
+			MPC8xx_CPM_SMC1,
+			MPC8xx_CPM_SMC2,
+		},
+	},
+	{
+		.ppc_sys_name	= "MPC885",
+		.mask 		= 0xFFFFFFFF,
+		.value 		= 0x00000000,
+		.num_devices	= 3,
+		.device_list	= (enum ppc_sys_devices[])
+		{
+			MPC8xx_CPM_FEC1,
+			MPC8xx_CPM_FEC2,
+			MPC8xx_CPM_SCC1,
+			MPC8xx_CPM_SCC2,
+			MPC8xx_CPM_SCC3,
+			MPC8xx_CPM_SCC4,
+			MPC8xx_CPM_SMC1,
+			MPC8xx_CPM_SMC2,
+		},
+	},
+	{	/* default match */
+		.ppc_sys_name	= "",
+		.mask 		= 0x00000000,
+		.value 		= 0x00000000,
+	},
+};
diff --git a/include/asm-ppc/irq.h b/include/asm-ppc/irq.h
index 55752474d0d9..bd9674807f05 100644
--- a/include/asm-ppc/irq.h
+++ b/include/asm-ppc/irq.h
@@ -138,6 +138,16 @@ irq_canonicalize(int irq)
 #define	SIU_IRQ7	(14)
 #define	SIU_LEVEL7	(15)
 
+#define MPC8xx_INT_FEC1		SIU_LEVEL1
+#define MPC8xx_INT_FEC2		SIU_LEVEL3
+
+#define MPC8xx_INT_SCC1		(CPM_IRQ_OFFSET + CPMVEC_SCC1)
+#define MPC8xx_INT_SCC2		(CPM_IRQ_OFFSET + CPMVEC_SCC2)
+#define MPC8xx_INT_SCC3		(CPM_IRQ_OFFSET + CPMVEC_SCC3)
+#define MPC8xx_INT_SCC4		(CPM_IRQ_OFFSET + CPMVEC_SCC4)
+#define MPC8xx_INT_SMC1		(CPM_IRQ_OFFSET + CPMVEC_SMC1)
+#define MPC8xx_INT_SMC2		(CPM_IRQ_OFFSET + CPMVEC_SMC2)
+
 /* The internal interrupts we can configure as we see fit.
  * My personal preference is CPM at level 2, which puts it above the
  * MBX PCI/ISA/IDE interrupts.
diff --git a/include/asm-ppc/mpc8xx.h b/include/asm-ppc/mpc8xx.h
index dc8e59896050..208a2e11daee 100644
--- a/include/asm-ppc/mpc8xx.h
+++ b/include/asm-ppc/mpc8xx.h
@@ -97,6 +97,22 @@ extern unsigned char __res[];
 
 struct pt_regs;
 
+enum ppc_sys_devices {
+	MPC8xx_CPM_FEC1,
+	MPC8xx_CPM_FEC2,
+	MPC8xx_CPM_I2C,
+	MPC8xx_CPM_SCC1,
+	MPC8xx_CPM_SCC2,
+	MPC8xx_CPM_SCC3,
+	MPC8xx_CPM_SCC4,
+	MPC8xx_CPM_SPI,
+	MPC8xx_CPM_MCC1,
+	MPC8xx_CPM_MCC2,
+	MPC8xx_CPM_SMC1,
+	MPC8xx_CPM_SMC2,
+	MPC8xx_CPM_USB,
+};
+
 #endif /* !__ASSEMBLY__ */
 #endif /* CONFIG_8xx */
 #endif /* __CONFIG_8xx_DEFS */
diff --git a/include/asm-ppc/ppc_sys.h b/include/asm-ppc/ppc_sys.h
index 048f7c8596ee..549f44843c5e 100644
--- a/include/asm-ppc/ppc_sys.h
+++ b/include/asm-ppc/ppc_sys.h
@@ -27,6 +27,8 @@
 #include <asm/mpc83xx.h>
 #elif defined(CONFIG_85xx)
 #include <asm/mpc85xx.h>
+#elif defined(CONFIG_8xx)
+#include <asm/mpc8xx.h>
 #elif defined(CONFIG_PPC_MPC52xx)
 #include <asm/mpc52xx.h>
 #elif defined(CONFIG_MPC10X_BRIDGE)
-- 
cgit v1.2.3


From 3013449243adc3421b507696e5d247a3d292ee0c Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Fri, 16 Sep 2005 19:28:03 -0700
Subject: [PATCH] uml: UML/i386 cmpxchg fix

Using native cmpxchg offers a slight performance improvement in uml/i386.

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/um/Kconfig.i386         | 4 ++++
 include/asm-um/system-i386.h | 2 --
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/um/Kconfig.i386 b/arch/um/Kconfig.i386
index 8ad156a00499..5d92cacd56c6 100644
--- a/arch/um/Kconfig.i386
+++ b/arch/um/Kconfig.i386
@@ -42,3 +42,7 @@ config ARCH_HAS_SC_SIGNALS
 config ARCH_REUSE_HOST_VSYSCALL_AREA
 	bool
 	default y
+
+config X86_CMPXCHG
+	bool
+	default y
diff --git a/include/asm-um/system-i386.h b/include/asm-um/system-i386.h
index ea8381de3cc9..c436263e67ba 100644
--- a/include/asm-um/system-i386.h
+++ b/include/asm-um/system-i386.h
@@ -3,6 +3,4 @@
 
 #include "asm/system-generic.h"
     
-#define __HAVE_ARCH_CMPXCHG 1
-
 #endif
-- 
cgit v1.2.3


From 8ac2120d90273c590cf7662f03d103519101685b Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 16 Sep 2005 19:28:08 -0700
Subject: [PATCH] i2c: kill an unused i2c_adapter struct member
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Kill an unused member of the i2c_adapter structure.  This additionally
fixes a potential bug, because <linux/i2c.h> doesn't include
<linux/config.h>, so different files including <linux/i2c.h> could see a
different definition of the i2c_adapter structure, depending on them
including <linux/config.h> (or other header files themselves including
<linux/config.h>) before <linux/i2c.h>, or not.

Credits go to J�rn Engel for pointing me to the problem.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/i2c.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index be35332b67e6..3d49a305bf88 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -230,11 +230,6 @@ struct i2c_adapter {
 	struct device dev;		/* the adapter device */
 	struct class_device class_dev;	/* the class device */
 
-#ifdef CONFIG_PROC_FS 
-	/* No need to set this when you initialize the adapter          */
-	int inode;
-#endif /* def CONFIG_PROC_FS */
-
 	int nr;
 	struct list_head clients;
 	struct list_head list;
-- 
cgit v1.2.3


From f647e08a55d2c88c4e7ab17a0a8e3fcf568fbc65 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 16 Sep 2005 19:28:10 -0700
Subject: [PATCH] joystick-vs-x.org fix

Fix http://bugzilla.kernel.org/show_bug.cgi?id=5241

2.6.13 broke compilation of the xorg tree, which apprarently insists on
including that file.

Cc: Vojtech Pavlik <vojtech@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/joystick.h | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/joystick.h b/include/linux/joystick.h
index 06b9af77eb7f..5fd20ddd7ae3 100644
--- a/include/linux/joystick.h
+++ b/include/linux/joystick.h
@@ -111,29 +111,30 @@ struct js_corr {
 #define JS_SET_ALL		8
 
 struct JS_DATA_TYPE {
-	__s32 buttons;
-	__s32 x;
-	__s32 y;
+	int32_t buttons;
+	int32_t x;
+	int32_t y;
 };
 
 struct JS_DATA_SAVE_TYPE_32 {
-	__s32 JS_TIMEOUT;
-	__s32 BUSY;
-	__s32 JS_EXPIRETIME;
-	__s32 JS_TIMELIMIT;
+	int32_t JS_TIMEOUT;
+	int32_t BUSY;
+	int32_t JS_EXPIRETIME;
+	int32_t JS_TIMELIMIT;
 	struct JS_DATA_TYPE JS_SAVE;
 	struct JS_DATA_TYPE JS_CORR;
 };
 
 struct JS_DATA_SAVE_TYPE_64 {
-	__s32 JS_TIMEOUT;
-	__s32 BUSY;
-	__s64 JS_EXPIRETIME;
-	__s64 JS_TIMELIMIT;
+	int32_t JS_TIMEOUT;
+	int32_t BUSY;
+	int64_t JS_EXPIRETIME;
+	int64_t JS_TIMELIMIT;
 	struct JS_DATA_TYPE JS_SAVE;
 	struct JS_DATA_TYPE JS_CORR;
 };
 
+#ifdef __KERNEL__
 #if BITS_PER_LONG == 64
 #define JS_DATA_SAVE_TYPE JS_DATA_SAVE_TYPE_64
 #elif BITS_PER_LONG == 32
@@ -141,5 +142,6 @@ struct JS_DATA_SAVE_TYPE_64 {
 #else
 #error Unexpected BITS_PER_LONG
 #endif
+#endif
 
 #endif /* _LINUX_JOYSTICK_H */
-- 
cgit v1.2.3


From ae31c3399d17b1f7bc1742724f70476b5417744f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Sun, 18 Sep 2005 00:17:51 -0700
Subject: [DCCP]: Move the ack vector code to net/dccp/ackvec.[ch]

Isolating it, that will be used when we introduce a CCID2 (TCP-Like)
implementation.

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dccp.h |  14 +-
 net/dccp/Makefile    |   2 +
 net/dccp/ackvec.c    | 419 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/dccp/ackvec.h    | 133 ++++++++++++++++
 net/dccp/dccp.h      |  82 ++--------
 net/dccp/input.c     |  69 ++------
 net/dccp/ipv4.c      |  59 ++-----
 net/dccp/minisocks.c |  13 +-
 net/dccp/options.c   | 440 +--------------------------------------------------
 net/dccp/output.c    |  12 +-
 10 files changed, 612 insertions(+), 631 deletions(-)
 create mode 100644 net/dccp/ackvec.c
 create mode 100644 net/dccp/ackvec.h

(limited to 'include')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 0e72708677e4..8c8e029095a5 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -353,14 +353,8 @@ static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req)
 
 extern struct inet_timewait_death_row dccp_death_row;
 
-/* Read about the ECN nonce to see why it is 253 */
-#define DCCP_MAX_ACK_VECTOR_LEN 253
-
 struct dccp_options_received {
-	u32	dccpor_ndp:24,
-		dccpor_ack_vector_len:8;
-	u32	dccpor_ack_vector_idx:10;
-	/* 22 bits hole, try to pack */
+	u32	dccpor_ndp; /* only 24 bits */
 	u32	dccpor_timestamp;
 	u32	dccpor_timestamp_echo;
 	u32	dccpor_elapsed_time;
@@ -394,6 +388,8 @@ static inline int dccp_list_has_service(const struct dccp_service_list *sl,
 	return 0;
 }
 
+struct dccp_ackvec;
+
 /**
  * struct dccp_sock - DCCP socket state
  *
@@ -414,7 +410,7 @@ static inline int dccp_list_has_service(const struct dccp_service_list *sl,
  * @dccps_packet_size - Set thru setsockopt
  * @dccps_role - Role of this sock, one of %dccp_role
  * @dccps_ndp_count - number of Non Data Packets since last data packet
- * @dccps_hc_rx_ackpkts - receiver half connection acked packets
+ * @dccps_hc_rx_ackvec - rx half connection ack vector
  */
 struct dccp_sock {
 	/* inet_connection_sock has to be the first member of dccp_sock */
@@ -439,7 +435,7 @@ struct dccp_sock {
 	__u32				dccps_pmtu_cookie;
 	__u32				dccps_mss_cache;
 	struct dccp_options		dccps_options;
-	struct dccp_ackpkts		*dccps_hc_rx_ackpkts;
+	struct dccp_ackvec		*dccps_hc_rx_ackvec;
 	void				*dccps_hc_rx_ccid_private;
 	void				*dccps_hc_tx_ccid_private;
 	struct ccid			*dccps_hc_rx_ccid;
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index fb97bb042455..344a8da153fc 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -3,6 +3,8 @@ obj-$(CONFIG_IP_DCCP) += dccp.o
 dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \
 	  timer.o
 
+dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o
+
 obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
 
 dccp_diag-y := diag.o
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
new file mode 100644
index 000000000000..6530283eafca
--- /dev/null
+++ b/net/dccp/ackvec.c
@@ -0,0 +1,419 @@
+/*
+ *  net/dccp/ackvec.c
+ *
+ *  An implementation of the DCCP protocol
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
+ *
+ *      This program is free software; you can redistribute it and/or modify it
+ *      under the terms of the GNU General Public License as published by the
+ *      Free Software Foundation; version 2 of the License;
+ */
+
+#include "ackvec.h"
+#include "dccp.h"
+
+#include <linux/dccp.h>
+#include <linux/skbuff.h>
+
+#include <net/sock.h>
+
+int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
+{
+	struct dccp_sock *dp = dccp_sk(sk);
+	struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
+	int len = av->dccpav_vec_len + 2;
+	struct timeval now;
+	u32 elapsed_time;
+	unsigned char *to, *from;
+
+	dccp_timestamp(sk, &now);
+	elapsed_time = timeval_delta(&now, &av->dccpav_time) / 10;
+
+	if (elapsed_time != 0)
+		dccp_insert_option_elapsed_time(sk, skb, elapsed_time);
+
+	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
+		return -1;
+
+	/*
+	 * XXX: now we have just one ack vector sent record, so
+	 * we have to wait for it to be cleared.
+	 *
+	 * Of course this is not acceptable, but this is just for
+	 * basic testing now.
+	 */
+	if (av->dccpav_ack_seqno != DCCP_MAX_SEQNO + 1)
+		return -1;
+
+	DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+
+	to    = skb_push(skb, len);
+	*to++ = DCCPO_ACK_VECTOR_0;
+	*to++ = len;
+
+	len  = av->dccpav_vec_len;
+	from = av->dccpav_buf + av->dccpav_buf_head;
+
+	/* Check if buf_head wraps */
+	if (av->dccpav_buf_head + len > av->dccpav_vec_len) {
+		const u32 tailsize = (av->dccpav_vec_len - av->dccpav_buf_head);
+
+		memcpy(to, from, tailsize);
+		to   += tailsize;
+		len  -= tailsize;
+		from = av->dccpav_buf;
+	}
+
+	memcpy(to, from, len);
+	/*
+	 *	From draft-ietf-dccp-spec-11.txt:
+	 *
+	 *	For each acknowledgement it sends, the HC-Receiver will add an
+	 *	acknowledgement record.  ack_seqno will equal the HC-Receiver
+	 *	sequence number it used for the ack packet; ack_ptr will equal
+	 *	buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
+	 *	equal buf_nonce.
+	 *
+	 * This implemention uses just one ack record for now.
+	 */
+	av->dccpav_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
+	av->dccpav_ack_ptr   = av->dccpav_buf_head;
+	av->dccpav_ack_ackno = av->dccpav_buf_ackno;
+	av->dccpav_ack_nonce = av->dccpav_buf_nonce;
+	av->dccpav_sent_len  = av->dccpav_vec_len;
+
+	dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, "
+		      "ack_ackno=%llu\n",
+		      debug_prefix, av->dccpav_sent_len,
+		      (unsigned long long)av->dccpav_ack_seqno,
+		      (unsigned long long)av->dccpav_ack_ackno);
+	return -1;
+}
+
+struct dccp_ackvec *dccp_ackvec_alloc(const unsigned int len,
+				      const unsigned int __nocast priority)
+{
+	struct dccp_ackvec *av = kmalloc(sizeof(*av) + len, priority);
+
+	if (av != NULL) {
+		av->dccpav_buf_len	= len;
+		av->dccpav_buf_head	=
+			av->dccpav_buf_tail = av->dccpav_buf_len - 1;
+		av->dccpav_buf_ackno	=
+			av->dccpav_ack_ackno = av->dccpav_ack_seqno = ~0LLU;
+		av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0;
+		av->dccpav_ack_ptr	= 0;
+		av->dccpav_time.tv_sec	= 0;
+		av->dccpav_time.tv_usec	= 0;
+		av->dccpav_sent_len	= av->dccpav_vec_len = 0;
+	}
+
+	return av;
+}
+
+void dccp_ackvec_free(struct dccp_ackvec *av)
+{
+	kfree(av);
+}
+
+static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av,
+				   const unsigned int index)
+{
+	return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK;
+}
+
+static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
+				 const unsigned int index)
+{
+	return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK;
+}
+
+/*
+ * If several packets are missing, the HC-Receiver may prefer to enter multiple
+ * bytes with run length 0, rather than a single byte with a larger run length;
+ * this simplifies table updates if one of the missing packets arrives.
+ */
+static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
+						 const unsigned int packets,
+						  const unsigned char state)
+{
+	unsigned int gap;
+	signed long new_head;
+
+	if (av->dccpav_vec_len + packets > av->dccpav_buf_len)
+		return -ENOBUFS;
+
+	gap	 = packets - 1;
+	new_head = av->dccpav_buf_head - packets;
+
+	if (new_head < 0) {
+		if (gap > 0) {
+			memset(av->dccpav_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED,
+			       gap + new_head + 1);
+			gap = -new_head;
+		}
+		new_head += av->dccpav_buf_len;
+	} 
+
+	av->dccpav_buf_head = new_head;
+
+	if (gap > 0)
+		memset(av->dccpav_buf + av->dccpav_buf_head + 1,
+		       DCCP_ACKVEC_STATE_NOT_RECEIVED, gap);
+
+	av->dccpav_buf[av->dccpav_buf_head] = state;
+	av->dccpav_vec_len += packets;
+	return 0;
+}
+
+/*
+ * Implements the draft-ietf-dccp-spec-11.txt Appendix A
+ */
+int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
+		    const u64 ackno, const u8 state)
+{
+	/*
+	 * Check at the right places if the buffer is full, if it is, tell the
+	 * caller to start dropping packets till the HC-Sender acks our ACK
+	 * vectors, when we will free up space in dccpav_buf.
+	 *
+	 * We may well decide to do buffer compression, etc, but for now lets
+	 * just drop.
+	 *
+	 * From Appendix A:
+	 *
+	 *	Of course, the circular buffer may overflow, either when the
+	 *	HC-Sender is sending data at a very high rate, when the
+	 *	HC-Receiver's acknowledgements are not reaching the HC-Sender,
+	 *	or when the HC-Sender is forgetting to acknowledge those acks
+	 *	(so the HC-Receiver is unable to clean up old state). In this
+	 *	case, the HC-Receiver should either compress the buffer (by
+	 *	increasing run lengths when possible), transfer its state to
+	 *	a larger buffer, or, as a last resort, drop all received
+	 *	packets, without processing them whatsoever, until its buffer
+	 *	shrinks again.
+	 */
+
+	/* See if this is the first ackno being inserted */
+	if (av->dccpav_vec_len == 0) {
+		av->dccpav_buf[av->dccpav_buf_head] = state;
+		av->dccpav_vec_len = 1;
+	} else if (after48(ackno, av->dccpav_buf_ackno)) {
+		const u64 delta = dccp_delta_seqno(av->dccpav_buf_ackno,
+						   ackno);
+
+		/*
+		 * Look if the state of this packet is the same as the
+		 * previous ackno and if so if we can bump the head len.
+		 */
+		if (delta == 1 &&
+		    dccp_ackvec_state(av, av->dccpav_buf_head) == state &&
+		    (dccp_ackvec_len(av, av->dccpav_buf_head) <
+		     DCCP_ACKVEC_LEN_MASK))
+			av->dccpav_buf[av->dccpav_buf_head]++;
+		else if (dccp_ackvec_set_buf_head_state(av, delta, state))
+			return -ENOBUFS;
+	} else {
+		/*
+		 * A.1.2.  Old Packets
+		 *
+		 *	When a packet with Sequence Number S arrives, and
+		 *	S <= buf_ackno, the HC-Receiver will scan the table
+		 *	for the byte corresponding to S. (Indexing structures
+		 *	could reduce the complexity of this scan.)
+		 */
+		u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno);
+		unsigned int index = av->dccpav_buf_head;
+
+		while (1) {
+			const u8 len = dccp_ackvec_len(av, index);
+			const u8 state = dccp_ackvec_state(av, index);
+			/*
+			 * valid packets not yet in dccpav_buf have a reserved
+			 * entry, with a len equal to 0.
+			 */
+			if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED &&
+			    len == 0 && delta == 0) { /* Found our
+							 reserved seat! */
+				dccp_pr_debug("Found %llu reserved seat!\n",
+					      (unsigned long long)ackno);
+				av->dccpav_buf[index] = state;
+				goto out;
+			}
+			/* len == 0 means one packet */
+			if (delta < len + 1)
+				goto out_duplicate;
+
+			delta -= len + 1;
+			if (++index == av->dccpav_buf_len)
+				index = 0;
+		}
+	}
+
+	av->dccpav_buf_ackno = ackno;
+	dccp_timestamp(sk, &av->dccpav_time);
+out:
+	dccp_pr_debug("");
+	return 0;
+
+out_duplicate:
+	/* Duplicate packet */
+	dccp_pr_debug("Received a dup or already considered lost "
+		      "packet: %llu\n", (unsigned long long)ackno);
+	return -EILSEQ;
+}
+
+#ifdef CONFIG_IP_DCCP_DEBUG
+void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len)
+{
+	if (!dccp_debug)
+		return;
+
+	printk("ACK vector len=%d, ackno=%llu |", len,
+	       (unsigned long long)ackno);
+
+	while (len--) {
+		const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6;
+		const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
+
+		printk("%d,%d|", state, rl);
+		++vector;
+	}
+
+	printk("\n");
+}
+
+void dccp_ackvec_print(const struct dccp_ackvec *av)
+{
+	dccp_ackvector_print(av->dccpav_buf_ackno,
+			     av->dccpav_buf + av->dccpav_buf_head,
+			     av->dccpav_vec_len);
+}
+#endif
+
+static void dccp_ackvec_trow_away_ack_record(struct dccp_ackvec *av)
+{
+	/*
+	 * As we're keeping track of the ack vector size (dccpav_vec_len) and
+	 * the sent ack vector size (dccpav_sent_len) we don't need
+	 * dccpav_buf_tail at all, but keep this code here as in the future
+	 * we'll implement a vector of ack records, as suggested in
+	 * draft-ietf-dccp-spec-11.txt Appendix A. -acme
+	 */
+#if 0
+	av->dccpav_buf_tail = av->dccpav_ack_ptr + 1;
+	if (av->dccpav_buf_tail >= av->dccpav_vec_len)
+		av->dccpav_buf_tail -= av->dccpav_vec_len;
+#endif
+	av->dccpav_vec_len -= av->dccpav_sent_len;
+}
+
+void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
+				 const u64 ackno)
+{
+	/* Check if we actually sent an ACK vector */
+	if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1)
+		return;
+
+	if (ackno == av->dccpav_ack_seqno) {
+#ifdef CONFIG_IP_DCCP_DEBUG
+		struct dccp_sock *dp = dccp_sk(sk);
+		const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
+					"CLIENT rx ack: " : "server rx ack: ";
+#endif
+		dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, "
+			      "ack_ackno=%llu, ACKED!\n",
+			      debug_prefix, 1,
+			      (unsigned long long)av->dccpav_ack_seqno,
+			      (unsigned long long)av->dccpav_ack_ackno);
+		dccp_ackvec_trow_away_ack_record(av);
+		av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1;
+	}
+}
+
+static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
+					    struct sock *sk, u64 ackno,
+					    const unsigned char len,
+					    const unsigned char *vector)
+{
+	unsigned char i;
+
+	/* Check if we actually sent an ACK vector */
+	if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1)
+		return;
+	/*
+	 * We're in the receiver half connection, so if the received an ACK
+	 * vector ackno (e.g. 50) before dccpav_ack_seqno (e.g. 52), we're
+	 * not interested.
+	 *
+	 * Extra explanation with example:
+	 * 
+	 * if we received an ACK vector with ackno 50, it can only be acking
+	 * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent).
+	 */
+	/* dccp_pr_debug("is %llu < %llu? ", ackno, av->dccpav_ack_seqno); */
+	if (before48(ackno, av->dccpav_ack_seqno)) {
+		/* dccp_pr_debug_cat("yes\n"); */
+		return;
+	}
+	/* dccp_pr_debug_cat("no\n"); */
+
+	i = len;
+	while (i--) {
+		const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
+		u64 ackno_end_rl;
+
+		dccp_set_seqno(&ackno_end_rl, ackno - rl);
+
+		/*
+		 * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl,
+		 * av->dccpav_ack_seqno, ackno);
+		 */
+		if (between48(av->dccpav_ack_seqno, ackno_end_rl, ackno)) {
+			const u8 state = (*vector &
+					  DCCP_ACKVEC_STATE_MASK) >> 6;
+			/* dccp_pr_debug_cat("yes\n"); */
+
+			if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
+#ifdef CONFIG_IP_DCCP_DEBUG
+				struct dccp_sock *dp = dccp_sk(sk);
+				const char *debug_prefix =
+					dp->dccps_role == DCCP_ROLE_CLIENT ?
+					"CLIENT rx ack: " : "server rx ack: ";
+#endif
+				dccp_pr_debug("%sACK vector 0, len=%d, "
+					      "ack_seqno=%llu, ack_ackno=%llu, "
+					      "ACKED!\n",
+					      debug_prefix, len,
+					      (unsigned long long)
+					      av->dccpav_ack_seqno,
+					      (unsigned long long)
+					      av->dccpav_ack_ackno);
+				dccp_ackvec_trow_away_ack_record(av);
+			}
+			/*
+			 * If dccpav_ack_seqno was not received, no problem
+			 * we'll send another ACK vector.
+			 */
+			av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1;
+			break;
+		}
+		/* dccp_pr_debug_cat("no\n"); */
+
+		dccp_set_seqno(&ackno, ackno_end_rl - 1);
+		++vector;
+	}
+}
+
+int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
+		      const u8 opt, const u8 *value, const u8 len)
+{
+	if (len > DCCP_MAX_ACKVEC_LEN)
+		return -1;
+
+	/* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */
+	dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk,
+					DCCP_SKB_CB(skb)->dccpd_ack_seq,
+				        len, value);
+	return 0;
+}
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
new file mode 100644
index 000000000000..8ca51c9191f7
--- /dev/null
+++ b/net/dccp/ackvec.h
@@ -0,0 +1,133 @@
+#ifndef _ACKVEC_H
+#define _ACKVEC_H
+/*
+ *  net/dccp/ackvec.h
+ *
+ *  An implementation of the DCCP protocol
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com>
+ *
+ *	This program is free software; you can redistribute it and/or modify it
+ *	under the terms of the GNU General Public License version 2 as
+ *	published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/compiler.h>
+#include <linux/time.h>
+#include <linux/types.h>
+
+/* Read about the ECN nonce to see why it is 253 */
+#define DCCP_MAX_ACKVEC_LEN 253
+
+#define DCCP_ACKVEC_STATE_RECEIVED	0
+#define DCCP_ACKVEC_STATE_ECN_MARKED	(1 << 6)
+#define DCCP_ACKVEC_STATE_NOT_RECEIVED	(3 << 6)
+
+#define DCCP_ACKVEC_STATE_MASK		0xC0 /* 11000000 */
+#define DCCP_ACKVEC_LEN_MASK		0x3F /* 00111111 */
+
+/** struct dccp_ackvec - ack vector
+ *
+ * This data structure is the one defined in the DCCP draft
+ * Appendix A.
+ *
+ * @dccpav_buf_head - circular buffer head
+ * @dccpav_buf_tail - circular buffer tail
+ * @dccpav_buf_ackno - ack # of the most recent packet acknowledgeable in the
+ * 		       buffer (i.e. %dccpav_buf_head)
+ * @dccpav_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
+ * 		       by the buffer with State 0
+ *
+ * Additionally, the HC-Receiver must keep some information about the
+ * Ack Vectors it has recently sent. For each packet sent carrying an
+ * Ack Vector, it remembers four variables:
+ *
+ * @dccpav_ack_seqno - the Sequence Number used for the packet
+ * 		       (HC-Receiver seqno)
+ * @dccpav_ack_ptr - the value of buf_head at the time of acknowledgement.
+ * @dccpav_ack_ackno - the Acknowledgement Number used for the packet
+ * 		       (HC-Sender seqno)
+ * @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
+ *
+ * @dccpav_buf_len	- circular buffer length
+ * @dccpav_time		- the time in usecs
+ * @dccpav_buf - circular buffer of acknowledgeable packets
+ */
+struct dccp_ackvec {
+	unsigned int	dccpav_buf_head;
+	unsigned int	dccpav_buf_tail;
+	u64		dccpav_buf_ackno;
+	u64		dccpav_ack_seqno;
+	u64		dccpav_ack_ackno;
+	unsigned int	dccpav_ack_ptr;
+	unsigned int	dccpav_sent_len;
+	unsigned int	dccpav_vec_len;
+	unsigned int	dccpav_buf_len;
+	struct timeval	dccpav_time;
+	u8		dccpav_buf_nonce;
+	u8		dccpav_ack_nonce;
+	u8		dccpav_buf[0];
+};
+
+struct sock;
+struct sk_buff;
+
+#ifdef CONFIG_IP_DCCP_ACKVEC
+extern struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len,
+					  const unsigned int __nocast priority);
+extern void dccp_ackvec_free(struct dccp_ackvec *av);
+
+extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
+			   const u64 ackno, const u8 state);
+
+extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
+					struct sock *sk, const u64 ackno);
+extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
+			     const u8 opt, const u8 *value, const u8 len);
+
+extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb);
+
+static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
+{
+	return av->dccpav_sent_len != av->dccpav_vec_len;
+}
+#else /* CONFIG_IP_DCCP_ACKVEC */
+static inline struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len,
+					   const unsigned int __nocast priority)
+{
+	return NULL;
+}
+
+static inline void dccp_ackvec_free(struct dccp_ackvec *av)
+{
+}
+
+static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
+				  const u64 ackno, const u8 state)
+{
+	return -1;
+}
+
+static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
+					       struct sock *sk, const u64 ackno)
+{
+}
+
+static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
+				    const u8 opt, const u8 *value, const u8 len)
+{
+	return -1;
+}
+
+static inline int dccp_insert_option_ackvec(const struct sock *sk,
+					    const struct sk_buff *skb)
+{
+	return -1;
+}
+
+static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
+{
+	return 0;
+}
+#endif /* CONFIG_IP_DCCP_ACKVEC */
+#endif /* _ACKVEC_H */
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index be7a660b6b24..5871c027f9dc 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -17,6 +17,7 @@
 #include <net/snmp.h>
 #include <net/sock.h>
 #include <net/tcp.h>
+#include "ackvec.h"
 
 #ifdef CONFIG_IP_DCCP_DEBUG
 extern int dccp_debug;
@@ -358,6 +359,17 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq)
 		       (dp->dccps_gss -
 			dp->dccps_options.dccpo_sequence_window + 1));
 }
+				
+static inline int dccp_ack_pending(const struct sock *sk)
+{
+	const struct dccp_sock *dp = dccp_sk(sk);
+	return dp->dccps_timestamp_echo != 0 ||
+#ifdef CONFIG_IP_DCCP_ACKVEC
+	       (dp->dccps_options.dccpo_send_ack_vector &&
+		dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
+#endif
+	       inet_csk_ack_scheduled(sk);
+}
 
 extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb);
 extern void dccp_insert_option_elapsed_time(struct sock *sk,
@@ -371,65 +383,6 @@ extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
 
 extern struct socket *dccp_ctl_socket;
 
-#define DCCP_ACKPKTS_STATE_RECEIVED	0
-#define DCCP_ACKPKTS_STATE_ECN_MARKED	(1 << 6)
-#define DCCP_ACKPKTS_STATE_NOT_RECEIVED	(3 << 6)
-
-#define DCCP_ACKPKTS_STATE_MASK		0xC0 /* 11000000 */
-#define DCCP_ACKPKTS_LEN_MASK		0x3F /* 00111111 */
-
-/** struct dccp_ackpkts - acknowledgeable packets
- *
- * This data structure is the one defined in the DCCP draft
- * Appendix A.
- *
- * @dccpap_buf_head - circular buffer head
- * @dccpap_buf_tail - circular buffer tail
- * @dccpap_buf_ackno - ack # of the most recent packet acknowledgeable in the
- * 		       buffer (i.e. %dccpap_buf_head)
- * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
- * 		       by the buffer with State 0
- *
- * Additionally, the HC-Receiver must keep some information about the
- * Ack Vectors it has recently sent. For each packet sent carrying an
- * Ack Vector, it remembers four variables:
- *
- * @dccpap_ack_seqno - the Sequence Number used for the packet
- * 		       (HC-Receiver seqno)
- * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement.
- * @dccpap_ack_ackno - the Acknowledgement Number used for the packet
- * 		       (HC-Sender seqno)
- * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
- *
- * @dccpap_buf_len - circular buffer length
- * @dccpap_time		- the time in usecs
- * @dccpap_buf - circular buffer of acknowledgeable packets
- */
-struct dccp_ackpkts {
-	unsigned int		dccpap_buf_head;
-	unsigned int		dccpap_buf_tail;
-	u64			dccpap_buf_ackno;
-	u64			dccpap_ack_seqno;
-	u64			dccpap_ack_ackno;
-	unsigned int		dccpap_ack_ptr;
-	unsigned int		dccpap_buf_vector_len;
-	unsigned int		dccpap_ack_vector_len;
-	unsigned int		dccpap_buf_len;
-	struct timeval		dccpap_time;
-	u8			dccpap_buf_nonce;
-	u8			dccpap_ack_nonce;
-	u8			dccpap_buf[0];
-};
-
-extern struct dccp_ackpkts *
-		dccp_ackpkts_alloc(unsigned int len,
-				  const unsigned int __nocast priority);
-extern void dccp_ackpkts_free(struct dccp_ackpkts *ap);
-extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk,
-			    u64 ackno, u8 state);
-extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap,
-					 struct sock *sk, u64 ackno);
-
 extern void dccp_timestamp(const struct sock *sk, struct timeval *tv);
 
 static inline suseconds_t timeval_usecs(const struct timeval *tv)
@@ -470,15 +423,4 @@ static inline void timeval_sub_usecs(struct timeval *tv,
 	}
 }
 
-#ifdef CONFIG_IP_DCCP_DEBUG
-extern void dccp_ackvector_print(const u64 ackno,
-				 const unsigned char *vector, int len);
-extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap);
-#else
-static inline void dccp_ackvector_print(const u64 ackno,
-					const unsigned char *vector,
-					int len) { }
-static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { }
-#endif
-
 #endif /* _DCCP_H */
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 062e9f8359d0..1b6b2cb12376 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -16,6 +16,7 @@
 
 #include <net/sock.h>
 
+#include "ackvec.h"
 #include "ccid.h"
 #include "dccp.h"
 
@@ -60,8 +61,8 @@ static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
 	struct dccp_sock *dp = dccp_sk(sk);
 
 	if (dp->dccps_options.dccpo_send_ack_vector)
-		dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk,
-					     DCCP_SKB_CB(skb)->dccpd_ack_seq);
+		dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk,
+					    DCCP_SKB_CB(skb)->dccpd_ack_seq);
 }
 
 static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
@@ -164,37 +165,11 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
 	if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
 		dccp_event_ack_recv(sk, skb);
 
-	/*
-	 * FIXME: check ECN to see if we should use
-	 * DCCP_ACKPKTS_STATE_ECN_MARKED
-	 */
-	if (dp->dccps_options.dccpo_send_ack_vector) {
-		struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
-
-		if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk,
-				     DCCP_SKB_CB(skb)->dccpd_seq,
-				     DCCP_ACKPKTS_STATE_RECEIVED)) {
-			LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable "
-						    "packets buffer full!\n");
-			ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
-			inet_csk_schedule_ack(sk);
-			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
-						  TCP_DELACK_MIN,
-						  DCCP_RTO_MAX);
-			goto discard;
-		}
-
-		/*
-		 * FIXME: this activation is probably wrong, have to study more
-		 * TCP delack machinery and how it fits into DCCP draft, but
-		 * for now it kinda "works" 8)
-		 */
-		if (!inet_csk_ack_scheduled(sk)) {
-			inet_csk_schedule_ack(sk);
-			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ,
-						  DCCP_RTO_MAX);
-		}
-	}
+	if (dp->dccps_options.dccpo_send_ack_vector &&
+	    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
+			    DCCP_SKB_CB(skb)->dccpd_seq,
+			    DCCP_ACKVEC_STATE_RECEIVED))
+		goto discard;
 
 	ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
 	ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
@@ -495,29 +470,11 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
 		ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
 
-		/*
-		 * FIXME: check ECN to see if we should use
-		 * DCCP_ACKPKTS_STATE_ECN_MARKED
-		 */
-		if (dp->dccps_options.dccpo_send_ack_vector) {
-			if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk,
-					     dcb->dccpd_seq,
-					     DCCP_ACKPKTS_STATE_RECEIVED))
-				goto discard;
-			/*
-			 * FIXME: this activation is probably wrong, have to
-			 * study more TCP delack machinery and how it fits into
-			 * DCCP draft, but for now it kinda "works" 8)
-			 */
-			if ((dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno ==
-			     DCCP_MAX_SEQNO + 1) &&
-			    !inet_csk_ack_scheduled(sk)) {
-				inet_csk_schedule_ack(sk);
-				inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
-							  TCP_DELACK_MIN,
-							  DCCP_RTO_MAX);
-			}
-		}
+ 		if (dp->dccps_options.dccpo_send_ack_vector &&
+		    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
+ 				    DCCP_SKB_CB(skb)->dccpd_seq,
+ 				    DCCP_ACKVEC_STATE_RECEIVED))
+ 			goto discard;
 	}
 
 	/*
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 94a440b2685b..82434e4a42df 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -23,6 +23,7 @@
 #include <net/tcp_states.h>
 #include <net/xfrm.h>
 
+#include "ackvec.h"
 #include "ccid.h"
 #include "dccp.h"
 
@@ -1112,45 +1113,7 @@ int dccp_v4_rcv(struct sk_buff *skb)
 		goto discard_it;
 
 	dh = dccp_hdr(skb);
-#if 0
-	/*
-	 * Use something like this to simulate some DATA/DATAACK loss to test
-	 * dccp_ackpkts_add, you'll get something like this on a session that
-	 * sends 10 DATA/DATAACK packets:
-	 *
-	 * ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1|
-	 *
-	 * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet
-	 * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets
-	 * 						   with the same state
-	 * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet
-	 *
-	 * So...
-	 *
-	 * 281473596467422 was received
-	 * 281473596467421 was not received
-	 * 281473596467420 was received
-	 * 281473596467419 was not received
-	 * 281473596467418 was received
-	 * 281473596467417 was not received
-	 * 281473596467416 was received
-	 * 281473596467415 was not received
-	 * 281473596467414 was received
-	 * 281473596467413 was received (this one was the 3way handshake
-	 * 				 RESPONSE)
-	 *
-	 */
-	if (dh->dccph_type == DCCP_PKT_DATA ||
-	    dh->dccph_type == DCCP_PKT_DATAACK) {
-		static int discard = 0;
 
-		if (discard) {
-			discard = 0;
-			goto discard_it;
-		}
-		discard = 1;
-	}
-#endif
 	DCCP_SKB_CB(skb)->dccpd_seq  = dccp_hdr_seq(skb);
 	DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
 
@@ -1264,11 +1227,9 @@ static int dccp_v4_init_sock(struct sock *sk)
 	do_gettimeofday(&dp->dccps_epoch);
 
 	if (dp->dccps_options.dccpo_send_ack_vector) {
-		dp->dccps_hc_rx_ackpkts =
-			dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
-					   GFP_KERNEL);
-
-		if (dp->dccps_hc_rx_ackpkts == NULL)
+		dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN,
+							   GFP_KERNEL);
+		if (dp->dccps_hc_rx_ackvec == NULL)
 			return -ENOMEM;
 	}
 
@@ -1288,8 +1249,10 @@ static int dccp_v4_init_sock(struct sock *sk)
 		    dp->dccps_hc_tx_ccid == NULL) {
 			ccid_exit(dp->dccps_hc_rx_ccid, sk);
 			ccid_exit(dp->dccps_hc_tx_ccid, sk);
-			dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
-			dp->dccps_hc_rx_ackpkts = NULL;
+			if (dp->dccps_options.dccpo_send_ack_vector) {
+				dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
+				dp->dccps_hc_rx_ackvec = NULL;
+			}
 			dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
 			return -ENOMEM;
 		}
@@ -1331,8 +1294,10 @@ static int dccp_v4_destroy_sock(struct sock *sk)
 
 	ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
 	ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
-	dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
-	dp->dccps_hc_rx_ackpkts = NULL;
+	if (dp->dccps_options.dccpo_send_ack_vector) {
+		dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
+		dp->dccps_hc_rx_ackvec = NULL;
+	}
 	ccid_exit(dp->dccps_hc_rx_ccid, sk);
 	ccid_exit(dp->dccps_hc_tx_ccid, sk);
 	dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 933e10db1789..1393461898bb 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -19,6 +19,7 @@
 #include <net/xfrm.h>
 #include <net/inet_timewait_sock.h>
 
+#include "ackvec.h"
 #include "ccid.h"
 #include "dccp.h"
 
@@ -94,23 +95,23 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
 		struct dccp_sock *newdp = dccp_sk(newsk);
 
 		newdp->dccps_role	   = DCCP_ROLE_SERVER;
-		newdp->dccps_hc_rx_ackpkts = NULL;
+		newdp->dccps_hc_rx_ackvec  = NULL;
 		newdp->dccps_service_list  = NULL;
 		newdp->dccps_service	   = dreq->dreq_service;
 		newicsk->icsk_rto	   = DCCP_TIMEOUT_INIT;
 		do_gettimeofday(&newdp->dccps_epoch);
 
 		if (newdp->dccps_options.dccpo_send_ack_vector) {
-			newdp->dccps_hc_rx_ackpkts =
-				dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
-						   GFP_ATOMIC);
+			newdp->dccps_hc_rx_ackvec =
+				dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN,
+						  GFP_ATOMIC);
 			/*
 			 * XXX: We're using the same CCIDs set on the parent,
 			 * i.e. sk_clone copied the master sock and left the
 			 * CCID pointers for this child, that is why we do the
 			 * __ccid_get calls.
 			 */
-			if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL))
+			if (unlikely(newdp->dccps_hc_rx_ackvec == NULL))
 				goto out_free;
 		}
 
@@ -118,7 +119,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
 					     newsk) != 0 ||
 			     ccid_hc_tx_init(newdp->dccps_hc_tx_ccid,
 				     	     newsk) != 0)) {
-			dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts);
+			dccp_ackvec_free(newdp->dccps_hc_rx_ackvec);
 			ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk);
 			ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk);
 out_free:
diff --git a/net/dccp/options.c b/net/dccp/options.c
index d4c4242d8dd7..c480c506a4a4 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -18,15 +18,10 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 
+#include "ackvec.h"
 #include "ccid.h"
 #include "dccp.h"
 
-static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
-					     struct sock *sk,
-					     const u64 ackno,
-					     const unsigned char len,
-					     const unsigned char *vector);
-
 /* stores the default values for new connection. may be changed with sysctl */
 static const struct dccp_options dccpo_default_values = {
 	.dccpo_sequence_window	  = DCCPF_INITIAL_SEQUENCE_WINDOW,
@@ -113,25 +108,13 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
 				      opt_recv->dccpor_ndp);
 			break;
 		case DCCPO_ACK_VECTOR_0:
-			if (len > DCCP_MAX_ACK_VECTOR_LEN)
-				goto out_invalid_option;
-
+		case DCCPO_ACK_VECTOR_1:
 			if (pkt_type == DCCP_PKT_DATA)
 				continue;
 
-			opt_recv->dccpor_ack_vector_len = len;
-			opt_recv->dccpor_ack_vector_idx = value - options;
-
-			dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n",
-				      debug_prefix, len,
-				      (unsigned long long)
-				      DCCP_SKB_CB(skb)->dccpd_ack_seq);
-			dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq,
-					     value, len);
-			dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts,
-							 sk,
-						 DCCP_SKB_CB(skb)->dccpd_ack_seq,
-							 len, value);
+			if (dp->dccps_options.dccpo_send_ack_vector &&
+			    dccp_ackvec_parse(sk, skb, opt, value, len))
+				goto out_invalid_option;
 			break;
 		case DCCPO_TIMESTAMP:
 			if (len != 4)
@@ -352,86 +335,6 @@ void dccp_insert_option_elapsed_time(struct sock *sk,
 
 EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time);
 
-static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb)
-{
-	struct dccp_sock *dp = dccp_sk(sk);
-#ifdef CONFIG_IP_DCCP_DEBUG
-	const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
-					"CLIENT TX opt: " : "server TX opt: ";
-#endif
-	struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
-	int len = ap->dccpap_buf_vector_len + 2;
-	struct timeval now;
-	u32 elapsed_time;
-	unsigned char *to, *from;
-
-	dccp_timestamp(sk, &now);
-	elapsed_time = timeval_delta(&now, &ap->dccpap_time) / 10;
-
-	if (elapsed_time != 0)
-		dccp_insert_option_elapsed_time(sk, skb, elapsed_time);
-
-	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
-		LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to "
-					 "insert ACK Vector!\n");
-		return;
-	}
-
-	/*
-	 * XXX: now we have just one ack vector sent record, so
-	 * we have to wait for it to be cleared.
-	 *
-	 * Of course this is not acceptable, but this is just for
-	 * basic testing now.
-	 */
-	if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1)
-		return;
-
-	DCCP_SKB_CB(skb)->dccpd_opt_len += len;
-
-	to    = skb_push(skb, len);
-	*to++ = DCCPO_ACK_VECTOR_0;
-	*to++ = len;
-
-	len  = ap->dccpap_buf_vector_len;
-	from = ap->dccpap_buf + ap->dccpap_buf_head;
-
-	/* Check if buf_head wraps */
-	if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) {
-		const unsigned int tailsize = (ap->dccpap_buf_len -
-					       ap->dccpap_buf_head);
-
-		memcpy(to, from, tailsize);
-		to   += tailsize;
-		len  -= tailsize;
-		from = ap->dccpap_buf;
-	}
-
-	memcpy(to, from, len);
-	/*
-	 *	From draft-ietf-dccp-spec-11.txt:
-	 *
-	 *	For each acknowledgement it sends, the HC-Receiver will add an
-	 *	acknowledgement record.  ack_seqno will equal the HC-Receiver
-	 *	sequence number it used for the ack packet; ack_ptr will equal
-	 *	buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
-	 *	equal buf_nonce.
-	 *
-	 * This implemention uses just one ack record for now.
-	 */
-	ap->dccpap_ack_seqno	  = DCCP_SKB_CB(skb)->dccpd_seq;
-	ap->dccpap_ack_ptr	  = ap->dccpap_buf_head;
-	ap->dccpap_ack_ackno	  = ap->dccpap_buf_ackno;
-	ap->dccpap_ack_nonce	  = ap->dccpap_buf_nonce;
-	ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len;
-
-	dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, "
-		      "ack_ackno=%llu\n",
-		      debug_prefix, ap->dccpap_ack_vector_len,
-		      (unsigned long long) ap->dccpap_ack_seqno,
-		      (unsigned long long) ap->dccpap_ack_ackno);
-}
-
 void dccp_timestamp(const struct sock *sk, struct timeval *tv)
 {
 	const struct dccp_sock *dp = dccp_sk(sk);
@@ -528,9 +431,8 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 
 	if (!dccp_packet_without_ack(skb)) {
 		if (dp->dccps_options.dccpo_send_ack_vector &&
-		    (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno !=
-		     DCCP_MAX_SEQNO + 1))
-			dccp_insert_option_ack_vector(sk, skb);
+		    dccp_ackvec_pending(dp->dccps_hc_rx_ackvec))
+			dccp_insert_option_ackvec(sk, skb);
 		if (dp->dccps_timestamp_echo != 0)
 			dccp_insert_option_timestamp_echo(sk, skb);
 	}
@@ -557,331 +459,3 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 		}
 	}
 }
-
-struct dccp_ackpkts *dccp_ackpkts_alloc(const unsigned int len,
-				        const unsigned int __nocast priority)
-{
-	struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority);
-
-	if (ap != NULL) {
-#ifdef CONFIG_IP_DCCP_DEBUG
-		memset(ap->dccpap_buf, 0xFF, len);
-#endif
-		ap->dccpap_buf_len   = len;
-		ap->dccpap_buf_head  =
-			ap->dccpap_buf_tail =
-				ap->dccpap_buf_len - 1;
-		ap->dccpap_buf_ackno =
-			ap->dccpap_ack_ackno =
-				ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
-		ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0;
-		ap->dccpap_ack_ptr   = 0;
-		ap->dccpap_time.tv_sec = 0;
-		ap->dccpap_time.tv_usec = 0;
-		ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0;
-	}
-
-	return ap;
-}
-
-void dccp_ackpkts_free(struct dccp_ackpkts *ap)
-{
-	if (ap != NULL) {
-#ifdef CONFIG_IP_DCCP_DEBUG
-		memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len);
-#endif
-		kfree(ap);
-	}
-}
-
-static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap,
-				    const unsigned int index)
-{
-	return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK;
-}
-
-static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap,
-				  const unsigned int index)
-{
-	return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK;
-}
-
-/*
- * If several packets are missing, the HC-Receiver may prefer to enter multiple
- * bytes with run length 0, rather than a single byte with a larger run length;
- * this simplifies table updates if one of the missing packets arrives.
- */
-static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap,
-						  const unsigned int packets,
-						  const unsigned char state)
-{
-	unsigned int gap;
-	signed long new_head;
-
-	if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len)
-		return -ENOBUFS;
-
-	gap	 = packets - 1;
-	new_head = ap->dccpap_buf_head - packets;
-
-	if (new_head < 0) {
-		if (gap > 0) {
-			memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED,
-			       gap + new_head + 1);
-			gap = -new_head;
-		}
-		new_head += ap->dccpap_buf_len;
-	} 
-
-	ap->dccpap_buf_head = new_head;
-
-	if (gap > 0)
-		memset(ap->dccpap_buf + ap->dccpap_buf_head + 1,
-		       DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap);
-
-	ap->dccpap_buf[ap->dccpap_buf_head] = state;
-	ap->dccpap_buf_vector_len += packets;
-	return 0;
-}
-
-/*
- * Implements the draft-ietf-dccp-spec-11.txt Appendix A
- */
-int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk,
-		     u64 ackno, u8 state)
-{
-	/*
-	 * Check at the right places if the buffer is full, if it is, tell the
-	 * caller to start dropping packets till the HC-Sender acks our ACK
-	 * vectors, when we will free up space in dccpap_buf.
-	 *
-	 * We may well decide to do buffer compression, etc, but for now lets
-	 * just drop.
-	 *
-	 * From Appendix A:
-	 *
-	 *	Of course, the circular buffer may overflow, either when the
-	 *	HC-Sender is sending data at a very high rate, when the
-	 *	HC-Receiver's acknowledgements are not reaching the HC-Sender,
-	 *	or when the HC-Sender is forgetting to acknowledge those acks
-	 *	(so the HC-Receiver is unable to clean up old state). In this
-	 *	case, the HC-Receiver should either compress the buffer (by
-	 *	increasing run lengths when possible), transfer its state to
-	 *	a larger buffer, or, as a last resort, drop all received
-	 *	packets, without processing them whatsoever, until its buffer
-	 *	shrinks again.
-	 */
-
-	/* See if this is the first ackno being inserted */
-	if (ap->dccpap_buf_vector_len == 0) {
-		ap->dccpap_buf[ap->dccpap_buf_head] = state;
-		ap->dccpap_buf_vector_len = 1;
-	} else if (after48(ackno, ap->dccpap_buf_ackno)) {
-		const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno,
-						   ackno);
-
-		/*
-		 * Look if the state of this packet is the same as the
-		 * previous ackno and if so if we can bump the head len.
-		 */
-		if (delta == 1 &&
-		    dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state &&
-		    (dccp_ackpkts_len(ap, ap->dccpap_buf_head) <
-		     DCCP_ACKPKTS_LEN_MASK))
-			ap->dccpap_buf[ap->dccpap_buf_head]++;
-		else if (dccp_ackpkts_set_buf_head_state(ap, delta, state))
-			return -ENOBUFS;
-	} else {
-		/*
-		 * A.1.2.  Old Packets
-		 *
-		 *	When a packet with Sequence Number S arrives, and
-		 *	S <= buf_ackno, the HC-Receiver will scan the table
-		 *	for the byte corresponding to S. (Indexing structures
-		 *	could reduce the complexity of this scan.)
-		 */
-		u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno);
-		unsigned int index = ap->dccpap_buf_head;
-
-		while (1) {
-			const u8 len = dccp_ackpkts_len(ap, index);
-			const u8 state = dccp_ackpkts_state(ap, index);
-			/*
-			 * valid packets not yet in dccpap_buf have a reserved
-			 * entry, with a len equal to 0.
-			 */
-			if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED &&
-			    len == 0 && delta == 0) { /* Found our
-							 reserved seat! */
-				dccp_pr_debug("Found %llu reserved seat!\n",
-					      (unsigned long long) ackno);
-				ap->dccpap_buf[index] = state;
-				goto out;
-			}
-			/* len == 0 means one packet */
-			if (delta < len + 1)
-				goto out_duplicate;
-
-			delta -= len + 1;
-			if (++index == ap->dccpap_buf_len)
-				index = 0;
-		}
-	}
-
-	ap->dccpap_buf_ackno = ackno;
-	dccp_timestamp(sk, &ap->dccpap_time);
-out:
-	dccp_pr_debug("");
-	dccp_ackpkts_print(ap);
-	return 0;
-
-out_duplicate:
-	/* Duplicate packet */
-	dccp_pr_debug("Received a dup or already considered lost "
-		      "packet: %llu\n", (unsigned long long) ackno);
-	return -EILSEQ;
-}
-
-#ifdef CONFIG_IP_DCCP_DEBUG
-void dccp_ackvector_print(const u64 ackno, const unsigned char *vector,
-			  int len)
-{
-	if (!dccp_debug)
-		return;
-
-	printk("ACK vector len=%d, ackno=%llu |", len,
-	       (unsigned long long) ackno);
-
-	while (len--) {
-		const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6;
-		const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
-
-		printk("%d,%d|", state, rl);
-		++vector;
-	}
-
-	printk("\n");
-}
-
-void dccp_ackpkts_print(const struct dccp_ackpkts *ap)
-{
-	dccp_ackvector_print(ap->dccpap_buf_ackno,
-			     ap->dccpap_buf + ap->dccpap_buf_head,
-			     ap->dccpap_buf_vector_len);
-}
-#endif
-
-static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap)
-{
-	/*
-	 * As we're keeping track of the ack vector size
-	 * (dccpap_buf_vector_len) and the sent ack vector size
-	 * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but
-	 * keep this code here as in the future we'll implement a vector of
-	 * ack records, as suggested in draft-ietf-dccp-spec-11.txt
-	 * Appendix A. -acme
-	 */
-#if 0
-	ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1;
-	if (ap->dccpap_buf_tail >= ap->dccpap_buf_len)
-		ap->dccpap_buf_tail -= ap->dccpap_buf_len;
-#endif
-	ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len;
-}
-
-void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk,
-				 u64 ackno)
-{
-	/* Check if we actually sent an ACK vector */
-	if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
-		return;
-
-	if (ackno == ap->dccpap_ack_seqno) {
-#ifdef CONFIG_IP_DCCP_DEBUG
-		struct dccp_sock *dp = dccp_sk(sk);
-		const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
-					"CLIENT rx ack: " : "server rx ack: ";
-#endif
-		dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, "
-			      "ack_ackno=%llu, ACKED!\n",
-			      debug_prefix, 1,
-			      (unsigned long long) ap->dccpap_ack_seqno,
-			      (unsigned long long) ap->dccpap_ack_ackno);
-		dccp_ackpkts_trow_away_ack_record(ap);
-		ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
-	}
-}
-
-static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
-					     struct sock *sk, u64 ackno,
-					     const unsigned char len,
-					     const unsigned char *vector)
-{
-	unsigned char i;
-
-	/* Check if we actually sent an ACK vector */
-	if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
-		return;
-	/*
-	 * We're in the receiver half connection, so if the received an ACK
-	 * vector ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're
-	 * not interested.
-	 *
-	 * Extra explanation with example:
-	 * 
-	 * if we received an ACK vector with ackno 50, it can only be acking
-	 * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent).
-	 */
-	/* dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); */
-	if (before48(ackno, ap->dccpap_ack_seqno)) {
-		/* dccp_pr_debug_cat("yes\n"); */
-		return;
-	}
-	/* dccp_pr_debug_cat("no\n"); */
-
-	i = len;
-	while (i--) {
-		const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
-		u64 ackno_end_rl;
-
-		dccp_set_seqno(&ackno_end_rl, ackno - rl);
-
-		/*
-		 * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl,
-		 * ap->dccpap_ack_seqno, ackno);
-		 */
-		if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) {
-			const u8 state = (*vector &
-					  DCCP_ACKPKTS_STATE_MASK) >> 6;
-			/* dccp_pr_debug_cat("yes\n"); */
-
-			if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) {
-#ifdef CONFIG_IP_DCCP_DEBUG
-				struct dccp_sock *dp = dccp_sk(sk);
-				const char *debug_prefix =
-					dp->dccps_role == DCCP_ROLE_CLIENT ?
-					"CLIENT rx ack: " : "server rx ack: ";
-#endif
-				dccp_pr_debug("%sACK vector 0, len=%d, "
-					      "ack_seqno=%llu, ack_ackno=%llu, "
-					      "ACKED!\n",
-					      debug_prefix, len,
-					      (unsigned long long)
-					      ap->dccpap_ack_seqno,
-					      (unsigned long long)
-					      ap->dccpap_ack_ackno);
-				dccp_ackpkts_trow_away_ack_record(ap);
-			}
-			/*
-			 * If dccpap_ack_seqno was not received, no problem
-			 * we'll send another ACK vector.
-			 */
-			ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
-			break;
-		}
-		/* dccp_pr_debug_cat("no\n"); */
-
-		dccp_set_seqno(&ackno, ackno_end_rl - 1);
-		++vector;
-	}
-}
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 156b1d29a156..4786bdcddcc9 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -16,6 +16,7 @@
 
 #include <net/sock.h>
 
+#include "ackvec.h"
 #include "ccid.h"
 #include "dccp.h"
 
@@ -225,7 +226,6 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo)
 		err = dccp_wait_for_ccid(sk, skb, timeo);
 
 	if (err == 0) {
-		const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
 		struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
 		const int len = skb->len;
 
@@ -236,15 +236,7 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo)
 						  inet_csk(sk)->icsk_rto,
 						  DCCP_RTO_MAX);
 			dcb->dccpd_type = DCCP_PKT_DATAACK;
-			/*
-			 * FIXME: we really should have a
-			 * dccps_ack_pending or use icsk.
-			 */
-		} else if (inet_csk_ack_scheduled(sk) ||
-			   dp->dccps_timestamp_echo != 0 ||
-			   (dp->dccps_options.dccpo_send_ack_vector &&
-			    ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 &&
-			    ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1))
+		} else if (dccp_ack_pending(sk))
 			dcb->dccpd_type = DCCP_PKT_DATAACK;
 		else
 			dcb->dccpd_type = DCCP_PKT_DATA;
-- 
cgit v1.2.3


From 65299d6c3cfb49cc3eee4fc483e7edd23ea7b2ed Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Sun, 18 Sep 2005 00:18:32 -0700
Subject: [CCID3]: Introduce include/linux/tfrc.h

Moving the TFRC sender and receiver variables to separate structs, so
that we can copy these structs to userspace thru getsockopt,
dccp_diag, etc.

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tfrc.h   | 35 +++++++++++++++++++++++++++++++++++
 net/dccp/ccids/ccid3.h | 23 +++++++++++++----------
 2 files changed, 48 insertions(+), 10 deletions(-)
 create mode 100644 include/linux/tfrc.h

(limited to 'include')

diff --git a/include/linux/tfrc.h b/include/linux/tfrc.h
new file mode 100644
index 000000000000..7dab7831c3cb
--- /dev/null
+++ b/include/linux/tfrc.h
@@ -0,0 +1,35 @@
+#ifndef _LINUX_TFRC_H_
+#define _LINUX_TFRC_H_
+/*
+ *  include/linux/tfrc.h
+ *
+ *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ */
+
+#include <linux/types.h>
+
+struct tfrc_rx_info {
+  	__u32 tfrcrx_x_recv;
+	__u32 tfrcrx_rtt;
+  	__u32 tfrcrx_p;
+};
+
+struct tfrc_tx_info {
+	__u32 tfrctx_x;
+	__u32 tfrctx_x_recv;
+	__u32 tfrctx_x_calc;
+	__u32 tfrctx_rtt;
+	__u32 tfrctx_p;
+	__u32 tfrctx_rto;
+	__u32 tfrctx_ipi;
+};
+
+#endif /* _LINUX_TFRC_H_ */
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index eb248778eea3..0bde4583d091 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -40,6 +40,7 @@
 #include <linux/list.h>
 #include <linux/time.h>
 #include <linux/types.h>
+#include <linux/tfrc.h>
 
 #define TFRC_MIN_PACKET_SIZE	   16
 #define TFRC_STD_PACKET_SIZE	  256
@@ -93,12 +94,15 @@ struct ccid3_options_received {
   * @ccid3hctx_hist - Packet history
   */
 struct ccid3_hc_tx_sock {
-	u32				ccid3hctx_x;
-	u32				ccid3hctx_x_recv;
-	u32				ccid3hctx_x_calc;
+	struct tfrc_tx_info		ccid3hctx_tfrc;
+#define ccid3hctx_x			ccid3hctx_tfrc.tfrctx_x
+#define ccid3hctx_x_recv		ccid3hctx_tfrc.tfrctx_x_recv
+#define ccid3hctx_x_calc		ccid3hctx_tfrc.tfrctx_x_calc
+#define ccid3hctx_rtt			ccid3hctx_tfrc.tfrctx_rtt
+#define ccid3hctx_p			ccid3hctx_tfrc.tfrctx_p
+#define ccid3hctx_t_rto			ccid3hctx_tfrc.tfrctx_rto
+#define ccid3hctx_t_ipi			ccid3hctx_tfrc.tfrctx_ipi
 	u16				ccid3hctx_s;
-	u32				ccid3hctx_rtt;
-	u32				ccid3hctx_p;
   	u8				ccid3hctx_state;
 	u8				ccid3hctx_last_win_count;
 	u8				ccid3hctx_idle;
@@ -106,19 +110,19 @@ struct ccid3_hc_tx_sock {
 	struct timer_list		ccid3hctx_no_feedback_timer;
 	struct timeval			ccid3hctx_t_ld;
 	struct timeval			ccid3hctx_t_nom;
-	u32				ccid3hctx_t_rto;
-	u32				ccid3hctx_t_ipi;
 	u32				ccid3hctx_delta;
 	struct list_head		ccid3hctx_hist;
 	struct ccid3_options_received	ccid3hctx_options_received;
 };
 
 struct ccid3_hc_rx_sock {
+	struct tfrc_rx_info	ccid3hcrx_tfrc;
+#define ccid3hcrx_x_recv	ccid3hcrx_tfrc.tfrcrx_x_recv
+#define ccid3hcrx_rtt		ccid3hcrx_tfrc.tfrcrx_rtt
+#define ccid3hcrx_p		ccid3hcrx_tfrc.tfrcrx_p
   	u64			ccid3hcrx_seqno_last_counter:48,
 				ccid3hcrx_state:8,
 				ccid3hcrx_last_counter:4;
-	u32			ccid3hcrx_rtt;
-  	u32			ccid3hcrx_p;
   	u32			ccid3hcrx_bytes_recv;
   	struct timeval		ccid3hcrx_tstamp_last_feedback;
   	struct timeval		ccid3hcrx_tstamp_last_ack;
@@ -127,7 +131,6 @@ struct ccid3_hc_rx_sock {
   	u16			ccid3hcrx_s;
   	u32			ccid3hcrx_pinv;
   	u32			ccid3hcrx_elapsed_time;
-  	u32			ccid3hcrx_x_recv;
 };
 
 static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
-- 
cgit v1.2.3


From 561713cf475de1f671cc89c437927ec008a20209 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Sun, 18 Sep 2005 00:18:52 -0700
Subject: [DCCP]: Don't use necessarily the same CCID for tx and rx

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dccp.h | 3 ++-
 net/dccp/ipv4.c      | 4 ++--
 net/dccp/options.c   | 3 ++-
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 8c8e029095a5..13f9b78483fc 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -330,7 +330,8 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
   */
 struct dccp_options {
 	__u64	dccpo_sequence_window;
-	__u8	dccpo_ccid;
+	__u8	dccpo_rx_ccid;
+	__u8	dccpo_tx_ccid;
 	__u8	dccpo_send_ack_vector;
 	__u8	dccpo_send_ndp_count;
 };
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 82434e4a42df..40fe6afacde6 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -1241,9 +1241,9 @@ static int dccp_v4_init_sock(struct sock *sk)
 	 * setsockopt(CCIDs-I-want/accept). -acme
 	 */
 	if (likely(!dccp_ctl_socket_init)) {
-		dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid,
+		dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_rx_ccid,
 						 sk);
-		dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid,
+		dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_tx_ccid,
 						 sk);
 	    	if (dp->dccps_hc_rx_ccid == NULL ||
 		    dp->dccps_hc_tx_ccid == NULL) {
diff --git a/net/dccp/options.c b/net/dccp/options.c
index c480c506a4a4..0a76426c9aea 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -25,7 +25,8 @@
 /* stores the default values for new connection. may be changed with sysctl */
 static const struct dccp_options dccpo_default_values = {
 	.dccpo_sequence_window	  = DCCPF_INITIAL_SEQUENCE_WINDOW,
-	.dccpo_ccid		  = DCCPF_INITIAL_CCID,
+	.dccpo_rx_ccid		  = DCCPF_INITIAL_CCID,
+	.dccpo_tx_ccid		  = DCCPF_INITIAL_CCID,
 	.dccpo_send_ack_vector	  = DCCPF_INITIAL_SEND_ACK_VECTOR,
 	.dccpo_send_ndp_count	  = DCCPF_INITIAL_SEND_NDP_COUNT,
 };
-- 
cgit v1.2.3


From 88f964db6ef728982734356bf4c406270ea29c1d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Sun, 18 Sep 2005 00:19:32 -0700
Subject: [DCCP]: Introduce CCID getsockopt for the CCIDs

Allocation for the optnames is similar to the DCCP options, with a
range for rx and tx half connection CCIDs.

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dccp.h   |  2 ++
 net/dccp/ccid.h        | 31 ++++++++++++++++++++++++++++
 net/dccp/ccids/ccid3.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++
 net/dccp/proto.c       | 17 +++++++++------
 4 files changed, 100 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 13f9b78483fc..71fab4311e92 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -179,6 +179,8 @@ enum {
 /* DCCP socket options */
 #define DCCP_SOCKOPT_PACKET_SIZE	1
 #define DCCP_SOCKOPT_SERVICE		2
+#define DCCP_SOCKOPT_CCID_RX_INFO	128
+#define DCCP_SOCKOPT_CCID_TX_INFO	192
 
 #define DCCP_SERVICE_LIST_MAX_LEN      32
 
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 962f1e9e2f7e..21e55142dcd3 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -14,6 +14,7 @@
  */
 
 #include <net/sock.h>
+#include <linux/compiler.h>
 #include <linux/dccp.h>
 #include <linux/list.h>
 #include <linux/module.h>
@@ -54,6 +55,14 @@ struct ccid {
 					       struct tcp_info *info);
 	void		(*ccid_hc_tx_get_info)(struct sock *sk,
 					       struct tcp_info *info);
+	int		(*ccid_hc_rx_getsockopt)(struct sock *sk,
+						 const int optname, int len,
+						 u32 __user *optval,
+						 int __user *optlen);
+	int		(*ccid_hc_tx_getsockopt)(struct sock *sk,
+						 const int optname, int len,
+						 u32 __user *optval,
+						 int __user *optlen);
 };
 
 extern int	   ccid_register(struct ccid *ccid);
@@ -177,4 +186,26 @@ static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk,
 	if (ccid->ccid_hc_tx_get_info != NULL)
 		ccid->ccid_hc_tx_get_info(sk, info);
 }
+
+static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk,
+					const int optname, int len,
+					u32 __user *optval, int __user *optlen)
+{
+	int rc = -ENOPROTOOPT;
+	if (ccid->ccid_hc_rx_getsockopt != NULL)
+		rc = ccid->ccid_hc_rx_getsockopt(sk, optname, len,
+						 optval, optlen);
+	return rc;
+}
+
+static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk,
+					const int optname, int len,
+					u32 __user *optval, int __user *optlen)
+{
+	int rc = -ENOPROTOOPT;
+	if (ccid->ccid_hc_tx_getsockopt != NULL)
+		rc = ccid->ccid_hc_tx_getsockopt(sk, optname, len,
+						 optval, optlen);
+	return rc;
+}
 #endif /* _CCID_H */
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 38aa84986118..aa68e0ab274d 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -1120,6 +1120,60 @@ static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_rtt = hctx->ccid3hctx_rtt;
 }
 
+static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
+				  u32 __user *optval, int __user *optlen)
+{
+	const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
+	const void *val;
+	
+	/* Listen socks doesn't have a private CCID block */
+	if (sk->sk_state == DCCP_LISTEN)
+		return -EINVAL;
+
+	switch (optname) {
+	case DCCP_SOCKOPT_CCID_RX_INFO:
+		if (len < sizeof(hcrx->ccid3hcrx_tfrc))
+			return -EINVAL;
+		len = sizeof(hcrx->ccid3hcrx_tfrc);
+		val = &hcrx->ccid3hcrx_tfrc;
+		break;
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	if (put_user(len, optlen) || copy_to_user(optval, val, len))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
+				  u32 __user *optval, int __user *optlen)
+{
+	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+	const void *val;
+	
+	/* Listen socks doesn't have a private CCID block */
+	if (sk->sk_state == DCCP_LISTEN)
+		return -EINVAL;
+
+	switch (optname) {
+	case DCCP_SOCKOPT_CCID_TX_INFO:
+		if (len < sizeof(hctx->ccid3hctx_tfrc))
+			return -EINVAL;
+		len = sizeof(hctx->ccid3hctx_tfrc);
+		val = &hctx->ccid3hctx_tfrc;
+		break;
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	if (put_user(len, optlen) || copy_to_user(optval, val, len))
+		return -EFAULT;
+
+	return 0;
+}
+
 static struct ccid ccid3 = {
 	.ccid_id		   = 3,
 	.ccid_name		   = "ccid3",
@@ -1139,6 +1193,8 @@ static struct ccid ccid3 = {
 	.ccid_hc_rx_packet_recv	   = ccid3_hc_rx_packet_recv,
 	.ccid_hc_rx_get_info	   = ccid3_hc_rx_get_info,
 	.ccid_hc_tx_get_info	   = ccid3_hc_tx_get_info,
+	.ccid_hc_rx_getsockopt	   = ccid3_hc_rx_getsockopt,
+	.ccid_hc_tx_getsockopt	   = ccid3_hc_tx_getsockopt,
 };
  
 module_param(ccid3_debug, int, 0444);
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 9bda2868eba6..a1cfd0e9e3bc 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -325,12 +325,7 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
 	if (get_user(len, optlen))
 		return -EFAULT;
 
-	if (optname == DCCP_SOCKOPT_SERVICE)
-		return dccp_getsockopt_service(sk, len,
-					       (u32 __user *)optval, optlen);
-
-	len = min_t(unsigned int, len, sizeof(int));
-	if (len < 0)
+	if (len < sizeof(int))
 		return -EINVAL;
 
 	dp = dccp_sk(sk);
@@ -338,7 +333,17 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
 	switch (optname) {
 	case DCCP_SOCKOPT_PACKET_SIZE:
 		val = dp->dccps_packet_size;
+		len = sizeof(dp->dccps_packet_size);
 		break;
+	case DCCP_SOCKOPT_SERVICE:
+		return dccp_getsockopt_service(sk, len,
+					       (u32 __user *)optval, optlen);
+	case 128 ... 191:
+		return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
+					     len, (u32 __user *)optval, optlen);
+	case 192 ... 255:
+		return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
+					     len, (u32 __user *)optval, optlen);
 	default:
 		return -ENOPROTOOPT;
 	}
-- 
cgit v1.2.3


From 7f8c0fd78dccaf30e60cb4303bd7a21c7d9e6da3 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sun, 18 Sep 2005 21:11:08 +0100
Subject: [ARM] Fix warning in asm/futex.h

The recently added futex.h contains an unused variable, which gcc
naturally warns about.  Remove this unused variable.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/futex.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-arm/futex.h b/include/asm-arm/futex.h
index 2cac5ecd9d00..9feff4ce1424 100644
--- a/include/asm-arm/futex.h
+++ b/include/asm-arm/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
-- 
cgit v1.2.3


From 939647ee308e0ad924e776657704c7bedd498664 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Sun, 18 Sep 2005 15:05:20 -0500
Subject: [SCSI] fix oops on usb storage device disconnect

We fix the oops by enforcing the host state model.  There have also
been two extra states added: SHOST_CANCEL_RECOVERY and
SHOST_DEL_RECOVERY so we can take the model through host removal while
the recovery thread is active.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/hosts.c      | 35 +++++++++++++++++++++++++++++++++--
 drivers/scsi/scsi.c       |  5 ++---
 drivers/scsi/scsi_error.c | 23 ++++++++++++++++-------
 drivers/scsi/scsi_ioctl.c |  2 +-
 drivers/scsi/scsi_lib.c   |  4 ++--
 drivers/scsi/scsi_sysfs.c |  2 ++
 drivers/scsi/sg.c         |  2 +-
 include/scsi/scsi_host.h  |  9 +++++++++
 8 files changed, 66 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 85503fad789a..f2a72d33132c 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -98,6 +98,7 @@ int scsi_host_set_state(struct Scsi_Host *shost, enum scsi_host_state state)
 		switch (oldstate) {
 		case SHOST_CREATED:
 		case SHOST_RUNNING:
+		case SHOST_CANCEL_RECOVERY:
 			break;
 		default:
 			goto illegal;
@@ -107,12 +108,31 @@ int scsi_host_set_state(struct Scsi_Host *shost, enum scsi_host_state state)
 	case SHOST_DEL:
 		switch (oldstate) {
 		case SHOST_CANCEL:
+		case SHOST_DEL_RECOVERY:
 			break;
 		default:
 			goto illegal;
 		}
 		break;
 
+	case SHOST_CANCEL_RECOVERY:
+		switch (oldstate) {
+		case SHOST_CANCEL:
+		case SHOST_RECOVERY:
+			break;
+		default:
+			goto illegal;
+		}
+		break;
+
+	case SHOST_DEL_RECOVERY:
+		switch (oldstate) {
+		case SHOST_CANCEL_RECOVERY:
+			break;
+		default:
+			goto illegal;
+		}
+		break;
 	}
 	shost->shost_state = state;
 	return 0;
@@ -134,13 +154,24 @@ EXPORT_SYMBOL(scsi_host_set_state);
  **/
 void scsi_remove_host(struct Scsi_Host *shost)
 {
+	unsigned long flags;
 	down(&shost->scan_mutex);
-	scsi_host_set_state(shost, SHOST_CANCEL);
+	spin_lock_irqsave(shost->host_lock, flags);
+	if (scsi_host_set_state(shost, SHOST_CANCEL))
+		if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY)) {
+			spin_unlock_irqrestore(shost->host_lock, flags);
+			up(&shost->scan_mutex);
+			return;
+		}
+	spin_unlock_irqrestore(shost->host_lock, flags);
 	up(&shost->scan_mutex);
 	scsi_forget_host(shost);
 	scsi_proc_host_rm(shost);
 
-	scsi_host_set_state(shost, SHOST_DEL);
+	spin_lock_irqsave(shost->host_lock, flags);
+	if (scsi_host_set_state(shost, SHOST_DEL))
+		BUG_ON(scsi_host_set_state(shost, SHOST_DEL_RECOVERY));
+	spin_unlock_irqrestore(shost->host_lock, flags);
 
 	transport_unregister_device(&shost->shost_gendev);
 	class_device_unregister(&shost->shost_classdev);
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index a780546eda9c..1f0ebabf6d47 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -1265,9 +1265,8 @@ int scsi_device_cancel(struct scsi_device *sdev, int recovery)
 		list_for_each_safe(lh, lh_sf, &active_list) {
 			scmd = list_entry(lh, struct scsi_cmnd, eh_entry);
 			list_del_init(lh);
-			if (recovery) {
-				scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD);
-			} else {
+			if (recovery &&
+			    !scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD)) {
 				scmd->result = (DID_ABORT << 16);
 				scsi_finish_command(scmd);
 			}
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 895c9452be4c..af589fac814d 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -68,19 +68,24 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)
 {
 	struct Scsi_Host *shost = scmd->device->host;
 	unsigned long flags;
+	int ret = 0;
 
 	if (shost->eh_wait == NULL)
 		return 0;
 
 	spin_lock_irqsave(shost->host_lock, flags);
+	if (scsi_host_set_state(shost, SHOST_RECOVERY))
+		if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY))
+			goto out_unlock;
 
+	ret = 1;
 	scmd->eh_eflags |= eh_flag;
 	list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
-	scsi_host_set_state(shost, SHOST_RECOVERY);
 	shost->host_failed++;
 	scsi_eh_wakeup(shost);
+ out_unlock:
 	spin_unlock_irqrestore(shost->host_lock, flags);
-	return 1;
+	return ret;
 }
 
 /**
@@ -176,8 +181,8 @@ void scsi_times_out(struct scsi_cmnd *scmd)
 		}
 
 	if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) {
-		panic("Error handler thread not present at %p %p %s %d",
-		      scmd, scmd->device->host, __FILE__, __LINE__);
+		scmd->result |= DID_TIME_OUT << 16;
+		__scsi_done(scmd);
 	}
 }
 
@@ -196,8 +201,7 @@ int scsi_block_when_processing_errors(struct scsi_device *sdev)
 {
 	int online;
 
-	wait_event(sdev->host->host_wait, (sdev->host->shost_state !=
-					   SHOST_RECOVERY));
+	wait_event(sdev->host->host_wait, !scsi_host_in_recovery(sdev->host));
 
 	online = scsi_device_online(sdev);
 
@@ -1441,6 +1445,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
 static void scsi_restart_operations(struct Scsi_Host *shost)
 {
 	struct scsi_device *sdev;
+	unsigned long flags;
 
 	/*
 	 * If the door was locked, we need to insert a door lock request
@@ -1460,7 +1465,11 @@ static void scsi_restart_operations(struct Scsi_Host *shost)
 	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n",
 					  __FUNCTION__));
 
-	scsi_host_set_state(shost, SHOST_RUNNING);
+	spin_lock_irqsave(shost->host_lock, flags);
+	if (scsi_host_set_state(shost, SHOST_RUNNING))
+		if (scsi_host_set_state(shost, SHOST_CANCEL))
+			BUG_ON(scsi_host_set_state(shost, SHOST_DEL));
+	spin_unlock_irqrestore(shost->host_lock, flags);
 
 	wake_up(&shost->host_wait);
 
diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
index b7fddac81347..de7f98cc38fe 100644
--- a/drivers/scsi/scsi_ioctl.c
+++ b/drivers/scsi/scsi_ioctl.c
@@ -458,7 +458,7 @@ int scsi_nonblockable_ioctl(struct scsi_device *sdev, int cmd,
 	 * error processing, as long as the device was opened
 	 * non-blocking */
 	if (filp && filp->f_flags & O_NONBLOCK) {
-		if (sdev->host->shost_state == SHOST_RECOVERY)
+		if (scsi_host_in_recovery(sdev->host))
 			return -ENODEV;
 	} else if (!scsi_block_when_processing_errors(sdev))
 		return -ENODEV;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index f065cbc1a6e5..dc9c772bc874 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -447,7 +447,7 @@ void scsi_device_unbusy(struct scsi_device *sdev)
 
 	spin_lock_irqsave(shost->host_lock, flags);
 	shost->host_busy--;
-	if (unlikely((shost->shost_state == SHOST_RECOVERY) &&
+	if (unlikely(scsi_host_in_recovery(shost) &&
 		     shost->host_failed))
 		scsi_eh_wakeup(shost);
 	spin_unlock(shost->host_lock);
@@ -1339,7 +1339,7 @@ static inline int scsi_host_queue_ready(struct request_queue *q,
 				   struct Scsi_Host *shost,
 				   struct scsi_device *sdev)
 {
-	if (shost->shost_state == SHOST_RECOVERY)
+	if (scsi_host_in_recovery(shost))
 		return 0;
 	if (shost->host_busy == 0 && shost->host_blocked) {
 		/*
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 1e47b7eddef4..72a6550a056c 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -57,6 +57,8 @@ static struct {
 	{ SHOST_CANCEL, "cancel" },
 	{ SHOST_DEL, "deleted" },
 	{ SHOST_RECOVERY, "recovery" },
+	{ SHOST_CANCEL_RECOVERY, "cancel/recovery" },
+	{ SHOST_DEL_RECOVERY, "deleted/recovery", },
 };
 const char *scsi_host_state_name(enum scsi_host_state state)
 {
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 9ea4765d1d12..4d09a6e4dd2e 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1027,7 +1027,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
 		if (sdp->detached)
 			return -ENODEV;
 		if (filp->f_flags & O_NONBLOCK) {
-			if (sdp->device->host->shost_state == SHOST_RECOVERY)
+			if (scsi_host_in_recovery(sdp->device->host))
 				return -EBUSY;
 		} else if (!scsi_block_when_processing_errors(sdp->device))
 			return -EBUSY;
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 916144be208b..540369ff815c 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -439,6 +439,8 @@ enum scsi_host_state {
 	SHOST_CANCEL,
 	SHOST_DEL,
 	SHOST_RECOVERY,
+	SHOST_CANCEL_RECOVERY,
+	SHOST_DEL_RECOVERY,
 };
 
 struct Scsi_Host {
@@ -621,6 +623,13 @@ static inline struct Scsi_Host *dev_to_shost(struct device *dev)
 	return container_of(dev, struct Scsi_Host, shost_gendev);
 }
 
+static inline int scsi_host_in_recovery(struct Scsi_Host *shost)
+{
+	return shost->shost_state == SHOST_RECOVERY ||
+		shost->shost_state == SHOST_CANCEL_RECOVERY ||
+		shost->shost_state == SHOST_DEL_RECOVERY;
+}
+
 extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *);
 extern void scsi_flush_work(struct Scsi_Host *);
 
-- 
cgit v1.2.3


From 3ed7a4704beb66a155acd67b78b7e9a5674d55fb Mon Sep 17 00:00:00 2001
From: James Bottomley <jejb@titanic.(none)>
Date: Mon, 19 Sep 2005 09:50:04 -0500
Subject: [SCSI] Fix thread termination for the SCSI error handle

From: 	Alan Stern <stern@rowland.harvard.edu>

This patch (as561) fixes the error handler's thread-exit code.  The
kthread_stop call won't wake the thread from a down_interruptible, so
the patch gets rid of the semaphore and simply does

        set_current_state(TASK_INTERRUPTIBLE);

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>

Modified to simplify the termination loop and correct the sleep condition.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi_error.c | 55 ++++++++++++++++++++---------------------------
 include/scsi/scsi_host.h  |  2 --
 2 files changed, 23 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index af589fac814d..ad5342165079 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -50,7 +50,7 @@
 void scsi_eh_wakeup(struct Scsi_Host *shost)
 {
 	if (shost->host_busy == shost->host_failed) {
-		up(shost->eh_wait);
+		wake_up_process(shost->ehandler);
 		SCSI_LOG_ERROR_RECOVERY(5,
 				printk("Waking error handler thread\n"));
 	}
@@ -70,7 +70,7 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)
 	unsigned long flags;
 	int ret = 0;
 
-	if (shost->eh_wait == NULL)
+	if (!shost->ehandler)
 		return 0;
 
 	spin_lock_irqsave(shost->host_lock, flags);
@@ -1591,40 +1591,31 @@ int scsi_error_handler(void *data)
 {
 	struct Scsi_Host *shost = (struct Scsi_Host *) data;
 	int rtn;
-	DECLARE_MUTEX_LOCKED(sem);
 
 	current->flags |= PF_NOFREEZE;
-	shost->eh_wait = &sem;
 
+	
 	/*
-	 * Wake up the thread that created us.
+	 * Note - we always use TASK_INTERRUPTIBLE even if the module
+	 * was loaded as part of the kernel.  The reason is that
+	 * UNINTERRUPTIBLE would cause this thread to be counted in
+	 * the load average as a running process, and an interruptible
+	 * wait doesn't.
 	 */
-	SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent of"
-					  " scsi_eh_%d\n",shost->host_no));
-
-	while (1) {
-		/*
-		 * If we get a signal, it means we are supposed to go
-		 * away and die.  This typically happens if the user is
-		 * trying to unload a module.
-		 */
-		SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
-						  " scsi_eh_%d"
-						  " sleeping\n",shost->host_no));
-
-		/*
-		 * Note - we always use down_interruptible with the semaphore
-		 * even if the module was loaded as part of the kernel.  The
-		 * reason is that down() will cause this thread to be counted
-		 * in the load average as a running process, and down
-		 * interruptible doesn't.  Given that we need to allow this
-		 * thread to die if the driver was loaded as a module, using
-		 * semaphores isn't unreasonable.
-		 */
-		down_interruptible(&sem);
-		if (kthread_should_stop())
-			break;
+	set_current_state(TASK_INTERRUPTIBLE);
+	while (!kthread_should_stop()) {
+		if (shost->host_failed == 0 ||
+		    shost->host_failed != shost->host_busy) {
+			SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
+							  " scsi_eh_%d"
+							  " sleeping\n",
+							  shost->host_no));
+			schedule();
+			set_current_state(TASK_INTERRUPTIBLE);
+			continue;
+		}
 
+		__set_current_state(TASK_RUNNING);
 		SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
 						  " scsi_eh_%d waking"
 						  " up\n",shost->host_no));
@@ -1651,7 +1642,7 @@ int scsi_error_handler(void *data)
 		 * which are still online.
 		 */
 		scsi_restart_operations(shost);
-
+		set_current_state(TASK_INTERRUPTIBLE);
 	}
 
 	SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d"
@@ -1660,7 +1651,7 @@ int scsi_error_handler(void *data)
 	/*
 	 * Make sure that nobody tries to wake us up again.
 	 */
-	shost->eh_wait = NULL;
+	shost->ehandler = NULL;
 	return 0;
 }
 
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 540369ff815c..69313ba7505b 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -467,8 +467,6 @@ struct Scsi_Host {
 
 	struct list_head	eh_cmd_q;
 	struct task_struct    * ehandler;  /* Error recovery thread. */
-	struct semaphore      * eh_wait;   /* The error recovery thread waits
-					      on this. */
 	struct semaphore      * eh_action; /* Wait for specific actions on the
                                           host. */
 	unsigned int            eh_active:1; /* Indicates the eh thread is awake and active if
-- 
cgit v1.2.3


From 926b50f92a30090da2c1a8675de954c2d9b09732 Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@netfilter.org>
Date: Mon, 19 Sep 2005 15:33:08 -0700
Subject: [NETFILTER]: Add new PPTP conntrack and NAT helper

This new "version 3" PPTP conntrack/nat helper is finally ready for
mainline inclusion.  Special thanks to lots of last-minute bugfixing
by Patric McHardy.

Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ip_conntrack.h        |  12 +
 include/linux/netfilter_ipv4/ip_conntrack_pptp.h   | 332 +++++++++
 .../linux/netfilter_ipv4/ip_conntrack_proto_gre.h  | 114 +++
 include/linux/netfilter_ipv4/ip_conntrack_tuple.h  |   7 +
 include/linux/netfilter_ipv4/ip_nat_pptp.h         |  11 +
 net/ipv4/netfilter/Kconfig                         |  22 +
 net/ipv4/netfilter/Makefile                        |   5 +
 net/ipv4/netfilter/ip_conntrack_helper_pptp.c      | 805 +++++++++++++++++++++
 net/ipv4/netfilter/ip_conntrack_proto_gre.c        | 327 +++++++++
 net/ipv4/netfilter/ip_nat_helper_pptp.c            | 401 ++++++++++
 net/ipv4/netfilter/ip_nat_proto_gre.c              | 214 ++++++
 11 files changed, 2250 insertions(+)
 create mode 100644 include/linux/netfilter_ipv4/ip_conntrack_pptp.h
 create mode 100644 include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h
 create mode 100644 include/linux/netfilter_ipv4/ip_nat_pptp.h
 create mode 100644 net/ipv4/netfilter/ip_conntrack_helper_pptp.c
 create mode 100644 net/ipv4/netfilter/ip_conntrack_proto_gre.c
 create mode 100644 net/ipv4/netfilter/ip_nat_helper_pptp.c
 create mode 100644 net/ipv4/netfilter/ip_nat_proto_gre.c

(limited to 'include')

diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h
index 7e033e9271a8..2df446c952ef 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack.h
@@ -133,11 +133,13 @@ enum ip_conntrack_expect_events {
 
 #include <linux/netfilter_ipv4/ip_conntrack_tcp.h>
 #include <linux/netfilter_ipv4/ip_conntrack_icmp.h>
+#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
 #include <linux/netfilter_ipv4/ip_conntrack_sctp.h>
 
 /* per conntrack: protocol private data */
 union ip_conntrack_proto {
 	/* insert conntrack proto private data here */
+	struct ip_ct_gre gre;
 	struct ip_ct_sctp sctp;
 	struct ip_ct_tcp tcp;
 	struct ip_ct_icmp icmp;
@@ -148,6 +150,7 @@ union ip_conntrack_expect_proto {
 };
 
 /* Add protocol helper include file here */
+#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
 #include <linux/netfilter_ipv4/ip_conntrack_amanda.h>
 #include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
 #include <linux/netfilter_ipv4/ip_conntrack_irc.h>
@@ -155,12 +158,20 @@ union ip_conntrack_expect_proto {
 /* per conntrack: application helper private data */
 union ip_conntrack_help {
 	/* insert conntrack helper private data (master) here */
+	struct ip_ct_pptp_master ct_pptp_info;
 	struct ip_ct_ftp_master ct_ftp_info;
 	struct ip_ct_irc_master ct_irc_info;
 };
 
 #ifdef CONFIG_IP_NF_NAT_NEEDED
 #include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_pptp.h>
+
+/* per conntrack: nat application helper private data */
+union ip_conntrack_nat_help {
+	/* insert nat helper private data here */
+	struct ip_nat_pptp nat_pptp_info;
+};
 #endif
 
 #include <linux/types.h>
@@ -223,6 +234,7 @@ struct ip_conntrack
 #ifdef CONFIG_IP_NF_NAT_NEEDED
 	struct {
 		struct ip_nat_info info;
+		union ip_conntrack_nat_help help;
 #if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
 	defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
 		int masq_index;
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
new file mode 100644
index 000000000000..389e3851d52f
--- /dev/null
+++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
@@ -0,0 +1,332 @@
+/* PPTP constants and structs */
+#ifndef _CONNTRACK_PPTP_H
+#define _CONNTRACK_PPTP_H
+
+/* state of the control session */
+enum pptp_ctrlsess_state {
+	PPTP_SESSION_NONE,			/* no session present */
+	PPTP_SESSION_ERROR,			/* some session error */
+	PPTP_SESSION_STOPREQ,			/* stop_sess request seen */
+	PPTP_SESSION_REQUESTED,			/* start_sess request seen */
+	PPTP_SESSION_CONFIRMED,			/* session established */
+};
+
+/* state of the call inside the control session */
+enum pptp_ctrlcall_state {
+	PPTP_CALL_NONE,
+	PPTP_CALL_ERROR,
+	PPTP_CALL_OUT_REQ,
+	PPTP_CALL_OUT_CONF,
+	PPTP_CALL_IN_REQ,
+	PPTP_CALL_IN_REP,
+	PPTP_CALL_IN_CONF,
+	PPTP_CALL_CLEAR_REQ,
+};
+
+
+/* conntrack private data */
+struct ip_ct_pptp_master {
+	enum pptp_ctrlsess_state sstate;	/* session state */
+
+	/* everything below is going to be per-expectation in newnat,
+	 * since there could be more than one call within one session */
+	enum pptp_ctrlcall_state cstate;	/* call state */
+	u_int16_t pac_call_id;			/* call id of PAC, host byte order */
+	u_int16_t pns_call_id;			/* call id of PNS, host byte order */
+
+	/* in pre-2.6.11 this used to be per-expect. Now it is per-conntrack
+	 * and therefore imposes a fixed limit on the number of maps */
+	struct ip_ct_gre_keymap *keymap_orig, *keymap_reply;
+};
+
+/* conntrack_expect private member */
+struct ip_ct_pptp_expect {
+	enum pptp_ctrlcall_state cstate; 	/* call state */
+	u_int16_t pac_call_id;			/* call id of PAC */
+	u_int16_t pns_call_id;			/* call id of PNS */
+};
+
+
+#ifdef __KERNEL__
+
+#define IP_CONNTR_PPTP		PPTP_CONTROL_PORT
+
+#define PPTP_CONTROL_PORT	1723
+
+#define PPTP_PACKET_CONTROL	1
+#define PPTP_PACKET_MGMT	2
+
+#define PPTP_MAGIC_COOKIE	0x1a2b3c4d
+
+struct pptp_pkt_hdr {
+	__u16	packetLength;
+	__u16	packetType;
+	__u32	magicCookie;
+};
+
+/* PptpControlMessageType values */
+#define PPTP_START_SESSION_REQUEST	1
+#define PPTP_START_SESSION_REPLY	2
+#define PPTP_STOP_SESSION_REQUEST	3
+#define PPTP_STOP_SESSION_REPLY		4
+#define PPTP_ECHO_REQUEST		5
+#define PPTP_ECHO_REPLY			6
+#define PPTP_OUT_CALL_REQUEST		7
+#define PPTP_OUT_CALL_REPLY		8
+#define PPTP_IN_CALL_REQUEST		9
+#define PPTP_IN_CALL_REPLY		10
+#define PPTP_IN_CALL_CONNECT		11
+#define PPTP_CALL_CLEAR_REQUEST		12
+#define PPTP_CALL_DISCONNECT_NOTIFY	13
+#define PPTP_WAN_ERROR_NOTIFY		14
+#define PPTP_SET_LINK_INFO		15
+
+#define PPTP_MSG_MAX			15
+
+/* PptpGeneralError values */
+#define PPTP_ERROR_CODE_NONE		0
+#define PPTP_NOT_CONNECTED		1
+#define PPTP_BAD_FORMAT			2
+#define PPTP_BAD_VALUE			3
+#define PPTP_NO_RESOURCE		4
+#define PPTP_BAD_CALLID			5
+#define PPTP_REMOVE_DEVICE_ERROR	6
+
+struct PptpControlHeader {
+	__u16	messageType;
+	__u16	reserved;
+};
+
+/* FramingCapability Bitmap Values */
+#define PPTP_FRAME_CAP_ASYNC		0x1
+#define PPTP_FRAME_CAP_SYNC		0x2
+
+/* BearerCapability Bitmap Values */
+#define PPTP_BEARER_CAP_ANALOG		0x1
+#define PPTP_BEARER_CAP_DIGITAL		0x2
+
+struct PptpStartSessionRequest {
+	__u16	protocolVersion;
+	__u8	reserved1;
+	__u8	reserved2;
+	__u32	framingCapability;
+	__u32	bearerCapability;
+	__u16	maxChannels;
+	__u16	firmwareRevision;
+	__u8	hostName[64];
+	__u8	vendorString[64];
+};
+
+/* PptpStartSessionResultCode Values */
+#define PPTP_START_OK			1
+#define PPTP_START_GENERAL_ERROR	2
+#define PPTP_START_ALREADY_CONNECTED	3
+#define PPTP_START_NOT_AUTHORIZED	4
+#define PPTP_START_UNKNOWN_PROTOCOL	5
+
+struct PptpStartSessionReply {
+	__u16	protocolVersion;
+	__u8	resultCode;
+	__u8	generalErrorCode;
+	__u32	framingCapability;
+	__u32	bearerCapability;
+	__u16	maxChannels;
+	__u16	firmwareRevision;
+	__u8	hostName[64];
+	__u8	vendorString[64];
+};
+
+/* PptpStopReasons */
+#define PPTP_STOP_NONE			1
+#define PPTP_STOP_PROTOCOL		2
+#define PPTP_STOP_LOCAL_SHUTDOWN	3
+
+struct PptpStopSessionRequest {
+	__u8	reason;
+};
+
+/* PptpStopSessionResultCode */
+#define PPTP_STOP_OK			1
+#define PPTP_STOP_GENERAL_ERROR		2
+
+struct PptpStopSessionReply {
+	__u8	resultCode;
+	__u8	generalErrorCode;
+};
+
+struct PptpEchoRequest {
+	__u32 identNumber;
+};
+
+/* PptpEchoReplyResultCode */
+#define PPTP_ECHO_OK			1
+#define PPTP_ECHO_GENERAL_ERROR		2
+
+struct PptpEchoReply {
+	__u32	identNumber;
+	__u8	resultCode;
+	__u8	generalErrorCode;
+	__u16	reserved;
+};
+
+/* PptpFramingType */
+#define PPTP_ASYNC_FRAMING		1
+#define PPTP_SYNC_FRAMING		2
+#define PPTP_DONT_CARE_FRAMING		3
+
+/* PptpCallBearerType */
+#define PPTP_ANALOG_TYPE		1
+#define PPTP_DIGITAL_TYPE		2
+#define PPTP_DONT_CARE_BEARER_TYPE	3
+
+struct PptpOutCallRequest {
+	__u16	callID;
+	__u16	callSerialNumber;
+	__u32	minBPS;
+	__u32	maxBPS;
+	__u32	bearerType;
+	__u32	framingType;
+	__u16	packetWindow;
+	__u16	packetProcDelay;
+	__u16	reserved1;
+	__u16	phoneNumberLength;
+	__u16	reserved2;
+	__u8	phoneNumber[64];
+	__u8	subAddress[64];
+};
+
+/* PptpCallResultCode */
+#define PPTP_OUTCALL_CONNECT		1
+#define PPTP_OUTCALL_GENERAL_ERROR	2
+#define PPTP_OUTCALL_NO_CARRIER		3
+#define PPTP_OUTCALL_BUSY		4
+#define PPTP_OUTCALL_NO_DIAL_TONE	5
+#define PPTP_OUTCALL_TIMEOUT		6
+#define PPTP_OUTCALL_DONT_ACCEPT	7
+
+struct PptpOutCallReply {
+	__u16	callID;
+	__u16	peersCallID;
+	__u8	resultCode;
+	__u8	generalErrorCode;
+	__u16	causeCode;
+	__u32	connectSpeed;
+	__u16	packetWindow;
+	__u16	packetProcDelay;
+	__u32	physChannelID;
+};
+
+struct PptpInCallRequest {
+	__u16	callID;
+	__u16	callSerialNumber;
+	__u32	callBearerType;
+	__u32	physChannelID;
+	__u16	dialedNumberLength;
+	__u16	dialingNumberLength;
+	__u8	dialedNumber[64];
+	__u8	dialingNumber[64];
+	__u8	subAddress[64];
+};
+
+/* PptpInCallResultCode */
+#define PPTP_INCALL_ACCEPT		1
+#define PPTP_INCALL_GENERAL_ERROR	2
+#define PPTP_INCALL_DONT_ACCEPT		3
+
+struct PptpInCallReply {
+	__u16	callID;
+	__u16	peersCallID;
+	__u8	resultCode;
+	__u8	generalErrorCode;
+	__u16	packetWindow;
+	__u16	packetProcDelay;
+	__u16	reserved;
+};
+
+struct PptpInCallConnected {
+	__u16	peersCallID;
+	__u16	reserved;
+	__u32	connectSpeed;
+	__u16	packetWindow;
+	__u16	packetProcDelay;
+	__u32	callFramingType;
+};
+
+struct PptpClearCallRequest {
+	__u16	callID;
+	__u16	reserved;
+};
+
+struct PptpCallDisconnectNotify {
+	__u16	callID;
+	__u8	resultCode;
+	__u8	generalErrorCode;
+	__u16	causeCode;
+	__u16	reserved;
+	__u8	callStatistics[128];
+};
+
+struct PptpWanErrorNotify {
+	__u16	peersCallID;
+	__u16	reserved;
+	__u32	crcErrors;
+	__u32	framingErrors;
+	__u32	hardwareOverRuns;
+	__u32	bufferOverRuns;
+	__u32	timeoutErrors;
+	__u32	alignmentErrors;
+};
+
+struct PptpSetLinkInfo {
+	__u16	peersCallID;
+	__u16	reserved;
+	__u32	sendAccm;
+	__u32	recvAccm;
+};
+
+
+struct pptp_priv_data {
+	__u16	call_id;
+	__u16	mcall_id;
+	__u16	pcall_id;
+};
+
+union pptp_ctrl_union {
+		struct PptpStartSessionRequest	sreq;
+		struct PptpStartSessionReply	srep;
+		struct PptpStopSessionRequest	streq;
+		struct PptpStopSessionReply	strep;
+                struct PptpOutCallRequest       ocreq;
+                struct PptpOutCallReply         ocack;
+                struct PptpInCallRequest        icreq;
+                struct PptpInCallReply          icack;
+                struct PptpInCallConnected      iccon;
+		struct PptpClearCallRequest	clrreq;
+                struct PptpCallDisconnectNotify disc;
+                struct PptpWanErrorNotify       wanerr;
+                struct PptpSetLinkInfo          setlink;
+};
+
+extern int
+(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb,
+			  struct ip_conntrack *ct,
+			  enum ip_conntrack_info ctinfo,
+			  struct PptpControlHeader *ctlh,
+			  union pptp_ctrl_union *pptpReq);
+
+extern int
+(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb,
+			  struct ip_conntrack *ct,
+			  enum ip_conntrack_info ctinfo,
+			  struct PptpControlHeader *ctlh,
+			  union pptp_ctrl_union *pptpReq);
+
+extern int
+(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *exp_orig,
+			    struct ip_conntrack_expect *exp_reply);
+
+extern void
+(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct,
+			     struct ip_conntrack_expect *exp);
+#endif /* __KERNEL__ */
+#endif /* _CONNTRACK_PPTP_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h b/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h
new file mode 100644
index 000000000000..8d090ef82f5f
--- /dev/null
+++ b/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h
@@ -0,0 +1,114 @@
+#ifndef _CONNTRACK_PROTO_GRE_H
+#define _CONNTRACK_PROTO_GRE_H
+#include <asm/byteorder.h>
+
+/* GRE PROTOCOL HEADER */
+
+/* GRE Version field */
+#define GRE_VERSION_1701	0x0
+#define GRE_VERSION_PPTP	0x1
+
+/* GRE Protocol field */
+#define GRE_PROTOCOL_PPTP	0x880B
+
+/* GRE Flags */
+#define GRE_FLAG_C		0x80
+#define GRE_FLAG_R		0x40
+#define GRE_FLAG_K		0x20
+#define GRE_FLAG_S		0x10
+#define GRE_FLAG_A		0x80
+
+#define GRE_IS_C(f)	((f)&GRE_FLAG_C)
+#define GRE_IS_R(f)	((f)&GRE_FLAG_R)
+#define GRE_IS_K(f)	((f)&GRE_FLAG_K)
+#define GRE_IS_S(f)	((f)&GRE_FLAG_S)
+#define GRE_IS_A(f)	((f)&GRE_FLAG_A)
+
+/* GRE is a mess: Four different standards */
+struct gre_hdr {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u16	rec:3,
+		srr:1,
+		seq:1,
+		key:1,
+		routing:1,
+		csum:1,
+		version:3,
+		reserved:4,
+		ack:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+	__u16	csum:1,
+		routing:1,
+		key:1,
+		seq:1,
+		srr:1,
+		rec:3,
+		ack:1,
+		reserved:4,
+		version:3;
+#else
+#error "Adjust your <asm/byteorder.h> defines"
+#endif
+	__u16	protocol;
+};
+
+/* modified GRE header for PPTP */
+struct gre_hdr_pptp {
+	__u8  flags;		/* bitfield */
+	__u8  version;		/* should be GRE_VERSION_PPTP */
+	__u16 protocol;		/* should be GRE_PROTOCOL_PPTP */
+	__u16 payload_len;	/* size of ppp payload, not inc. gre header */
+	__u16 call_id;		/* peer's call_id for this session */
+	__u32 seq;		/* sequence number.  Present if S==1 */
+	__u32 ack;		/* seq number of highest packet recieved by */
+				/*  sender in this session */
+};
+
+
+/* this is part of ip_conntrack */
+struct ip_ct_gre {
+	unsigned int stream_timeout;
+	unsigned int timeout;
+};
+
+#ifdef __KERNEL__
+struct ip_conntrack_expect;
+struct ip_conntrack;
+
+/* structure for original <-> reply keymap */
+struct ip_ct_gre_keymap {
+	struct list_head list;
+
+	struct ip_conntrack_tuple tuple;
+};
+
+/* add new tuple->key_reply pair to keymap */
+int ip_ct_gre_keymap_add(struct ip_conntrack *ct,
+			 struct ip_conntrack_tuple *t,
+			 int reply);
+
+/* delete keymap entries */
+void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct);
+
+
+/* get pointer to gre key, if present */
+static inline u_int32_t *gre_key(struct gre_hdr *greh)
+{
+	if (!greh->key)
+		return NULL;
+	if (greh->csum || greh->routing)
+		return (u_int32_t *) (greh+sizeof(*greh)+4);
+	return (u_int32_t *) (greh+sizeof(*greh));
+}
+
+/* get pointer ot gre csum, if present */
+static inline u_int16_t *gre_csum(struct gre_hdr *greh)
+{
+	if (!greh->csum)
+		return NULL;
+	return (u_int16_t *) (greh+sizeof(*greh));
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* _CONNTRACK_PROTO_GRE_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
index c33f0b5e0d0a..14dc0f7b6556 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
@@ -28,6 +28,9 @@ union ip_conntrack_manip_proto
 	struct {
 		u_int16_t port;
 	} sctp;
+	struct {
+		u_int16_t key;	/* key is 32bit, pptp only uses 16 */
+	} gre;
 };
 
 /* The manipulable part of the tuple. */
@@ -61,6 +64,10 @@ struct ip_conntrack_tuple
 			struct {
 				u_int16_t port;
 			} sctp;
+			struct {
+				u_int16_t key;	/* key is 32bit, 
+						 * pptp only uses 16 */
+			} gre;
 		} u;
 
 		/* The protocol. */
diff --git a/include/linux/netfilter_ipv4/ip_nat_pptp.h b/include/linux/netfilter_ipv4/ip_nat_pptp.h
new file mode 100644
index 000000000000..eaf66c2e8f93
--- /dev/null
+++ b/include/linux/netfilter_ipv4/ip_nat_pptp.h
@@ -0,0 +1,11 @@
+/* PPTP constants and structs */
+#ifndef _NAT_PPTP_H
+#define _NAT_PPTP_H
+
+/* conntrack private data */
+struct ip_nat_pptp {
+	u_int16_t pns_call_id;		/* NAT'ed PNS call id */
+	u_int16_t pac_call_id;		/* NAT'ed PAC call id */
+};
+
+#endif /* _NAT_PPTP_H */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index e2162d270073..3cf9b451675c 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -137,6 +137,22 @@ config IP_NF_AMANDA
 
 	  To compile it as a module, choose M here.  If unsure, say Y.
 
+config IP_NF_PPTP
+	tristate  'PPTP protocol support'
+	help
+	  This module adds support for PPTP (Point to Point Tunnelling
+	  Protocol, RFC2637) conncection tracking and NAT. 
+	
+	  If you are running PPTP sessions over a stateful firewall or NAT
+	  box, you may want to enable this feature.  
+	
+	  Please note that not all PPTP modes of operation are supported yet.
+	  For more info, read top of the file
+	  net/ipv4/netfilter/ip_conntrack_pptp.c
+	
+	  If you want to compile it as a module, say M here and read
+	  Documentation/modules.txt.  If unsure, say `N'.
+
 config IP_NF_QUEUE
 	tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
 	help
@@ -621,6 +637,12 @@ config IP_NF_NAT_AMANDA
 	default IP_NF_NAT if IP_NF_AMANDA=y
 	default m if IP_NF_AMANDA=m
 
+config IP_NF_NAT_PPTP
+	tristate
+	depends on IP_NF_NAT!=n && IP_NF_PPTP!=n
+	default IP_NF_NAT if IP_NF_PPTP=y
+	default m if IP_NF_PPTP=m
+
 # mangle + specific targets
 config IP_NF_MANGLE
 	tristate "Packet mangling"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 1ba0db746817..3d45d3c0283c 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -6,6 +6,9 @@
 ip_conntrack-objs	:= ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
 iptable_nat-objs	:= ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
 
+ip_conntrack_pptp-objs	:= ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o
+ip_nat_pptp-objs	:= ip_nat_helper_pptp.o ip_nat_proto_gre.o
+
 # connection tracking
 obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
 
@@ -17,6 +20,7 @@ obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
 obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o
 
 # connection tracking helpers
+obj-$(CONFIG_IP_NF_PPTP) += ip_conntrack_pptp.o
 obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o
 obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o
 obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o
@@ -24,6 +28,7 @@ obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o
 obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o
 
 # NAT helpers 
+obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o
 obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o
 obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o
 obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
new file mode 100644
index 000000000000..79db5b70d5f6
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -0,0 +1,805 @@
+/*
+ * ip_conntrack_pptp.c	- Version 3.0
+ *
+ * Connection tracking support for PPTP (Point to Point Tunneling Protocol).
+ * PPTP is a a protocol for creating virtual private networks.
+ * It is a specification defined by Microsoft and some vendors
+ * working with Microsoft.  PPTP is built on top of a modified
+ * version of the Internet Generic Routing Encapsulation Protocol.
+ * GRE is defined in RFC 1701 and RFC 1702.  Documentation of
+ * PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ * Limitations:
+ * 	 - We blindly assume that control connections are always
+ * 	   established in PNS->PAC direction.  This is a violation
+ * 	   of RFFC2673
+ * 	 - We can only support one single call within each session
+ *
+ * TODO:
+ *	 - testing of incoming PPTP calls 
+ *
+ * Changes: 
+ * 	2002-02-05 - Version 1.3
+ * 	  - Call ip_conntrack_unexpect_related() from 
+ * 	    pptp_destroy_siblings() to destroy expectations in case
+ * 	    CALL_DISCONNECT_NOTIFY or tcp fin packet was seen
+ * 	    (Philip Craig <philipc@snapgear.com>)
+ * 	  - Add Version information at module loadtime
+ * 	2002-02-10 - Version 1.6
+ * 	  - move to C99 style initializers
+ * 	  - remove second expectation if first arrives
+ * 	2004-10-22 - Version 2.0
+ * 	  - merge Mandrake's 2.6.x port with recent 2.6.x API changes
+ * 	  - fix lots of linear skb assumptions from Mandrake's port
+ * 	2005-06-10 - Version 2.1
+ * 	  - use ip_conntrack_expect_free() instead of kfree() on the
+ * 	    expect's (which are from the slab for quite some time)
+ * 	2005-06-10 - Version 3.0
+ * 	  - port helper to post-2.6.11 API changes,
+ * 	    funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
+ * 	2005-07-30 - Version 3.1
+ * 	  - port helper to 2.6.13 API changes
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
+#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
+
+#define IP_CT_PPTP_VERSION "3.1"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP");
+
+static DEFINE_SPINLOCK(ip_pptp_lock);
+
+int
+(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb,
+			  struct ip_conntrack *ct,
+			  enum ip_conntrack_info ctinfo,
+			  struct PptpControlHeader *ctlh,
+			  union pptp_ctrl_union *pptpReq);
+
+int
+(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb,
+			  struct ip_conntrack *ct,
+			  enum ip_conntrack_info ctinfo,
+			  struct PptpControlHeader *ctlh,
+			  union pptp_ctrl_union *pptpReq);
+
+int
+(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig,
+			    struct ip_conntrack_expect *expect_reply);
+
+void
+(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct,
+			     struct ip_conntrack_expect *exp);
+
+#if 0
+/* PptpControlMessageType names */
+const char *pptp_msg_name[] = {
+	"UNKNOWN_MESSAGE",
+	"START_SESSION_REQUEST",
+	"START_SESSION_REPLY",
+	"STOP_SESSION_REQUEST",
+	"STOP_SESSION_REPLY",
+	"ECHO_REQUEST",
+	"ECHO_REPLY",
+	"OUT_CALL_REQUEST",
+	"OUT_CALL_REPLY",
+	"IN_CALL_REQUEST",
+	"IN_CALL_REPLY",
+	"IN_CALL_CONNECT",
+	"CALL_CLEAR_REQUEST",
+	"CALL_DISCONNECT_NOTIFY",
+	"WAN_ERROR_NOTIFY",
+	"SET_LINK_INFO"
+};
+EXPORT_SYMBOL(pptp_msg_name);
+#define DEBUGP(format, args...)	printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+#define SECS *HZ
+#define MINS * 60 SECS
+#define HOURS * 60 MINS
+
+#define PPTP_GRE_TIMEOUT 		(10 MINS)
+#define PPTP_GRE_STREAM_TIMEOUT 	(5 HOURS)
+
+static void pptp_expectfn(struct ip_conntrack *ct,
+			 struct ip_conntrack_expect *exp)
+{
+	DEBUGP("increasing timeouts\n");
+
+	/* increase timeout of GRE data channel conntrack entry */
+	ct->proto.gre.timeout = PPTP_GRE_TIMEOUT;
+	ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT;
+
+	/* Can you see how rusty this code is, compared with the pre-2.6.11
+	 * one? That's what happened to my shiny newnat of 2002 ;( -HW */
+
+	if (!ip_nat_pptp_hook_expectfn) {
+		struct ip_conntrack_tuple inv_t;
+		struct ip_conntrack_expect *exp_other;
+
+		/* obviously this tuple inversion only works until you do NAT */
+		invert_tuplepr(&inv_t, &exp->tuple);
+		DEBUGP("trying to unexpect other dir: ");
+		DUMP_TUPLE(&inv_t);
+	
+		exp_other = ip_conntrack_expect_find(&inv_t);
+		if (exp_other) {
+			/* delete other expectation.  */
+			DEBUGP("found\n");
+			ip_conntrack_unexpect_related(exp_other);
+			ip_conntrack_expect_put(exp_other);
+		} else {
+			DEBUGP("not found\n");
+		}
+	} else {
+		/* we need more than simple inversion */
+		ip_nat_pptp_hook_expectfn(ct, exp);
+	}
+}
+
+static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t)
+{
+	struct ip_conntrack_tuple_hash *h;
+	struct ip_conntrack_expect *exp;
+
+	DEBUGP("trying to timeout ct or exp for tuple ");
+	DUMP_TUPLE(t);
+
+	h = ip_conntrack_find_get(t, NULL);
+	if (h)  {
+		struct ip_conntrack *sibling = tuplehash_to_ctrack(h);
+		DEBUGP("setting timeout of conntrack %p to 0\n", sibling);
+		sibling->proto.gre.timeout = 0;
+		sibling->proto.gre.stream_timeout = 0;
+		/* refresh_acct will not modify counters if skb == NULL */
+		if (del_timer(&sibling->timeout))
+			sibling->timeout.function((unsigned long)sibling);
+		ip_conntrack_put(sibling);
+		return 1;
+	} else {
+		exp = ip_conntrack_expect_find(t);
+		if (exp) {
+			DEBUGP("unexpect_related of expect %p\n", exp);
+			ip_conntrack_unexpect_related(exp);
+			ip_conntrack_expect_put(exp);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+
+/* timeout GRE data connections */
+static void pptp_destroy_siblings(struct ip_conntrack *ct)
+{
+	struct ip_conntrack_tuple t;
+
+	/* Since ct->sibling_list has literally rusted away in 2.6.11, 
+	 * we now need another way to find out about our sibling
+	 * contrack and expects... -HW */
+
+	/* try original (pns->pac) tuple */
+	memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t));
+	t.dst.protonum = IPPROTO_GRE;
+	t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id);
+	t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id);
+
+	if (!destroy_sibling_or_exp(&t))
+		DEBUGP("failed to timeout original pns->pac ct/exp\n");
+
+	/* try reply (pac->pns) tuple */
+	memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t));
+	t.dst.protonum = IPPROTO_GRE;
+	t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id);
+	t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id);
+
+	if (!destroy_sibling_or_exp(&t))
+		DEBUGP("failed to timeout reply pac->pns ct/exp\n");
+}
+
+/* expect GRE connections (PNS->PAC and PAC->PNS direction) */
+static inline int
+exp_gre(struct ip_conntrack *master,
+	u_int32_t seq,
+	u_int16_t callid,
+	u_int16_t peer_callid)
+{
+	struct ip_conntrack_tuple inv_tuple;
+	struct ip_conntrack_tuple exp_tuples[] = {
+		/* tuple in original direction, PNS->PAC */
+		{ .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip,
+			   .u = { .gre = { .key = peer_callid } }
+			 },
+		  .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip,
+			   .u = { .gre = { .key = callid } },
+			   .protonum = IPPROTO_GRE
+			 },
+		 },
+		/* tuple in reply direction, PAC->PNS */
+		{ .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip,
+			   .u = { .gre = { .key = callid } }
+			 },
+		  .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip,
+			   .u = { .gre = { .key = peer_callid } },
+			   .protonum = IPPROTO_GRE
+			 },
+		 }
+	};
+	struct ip_conntrack_expect *exp_orig, *exp_reply;
+	int ret = 1;
+
+	exp_orig = ip_conntrack_expect_alloc(master);
+	if (exp_orig == NULL)
+		goto out;
+
+	exp_reply = ip_conntrack_expect_alloc(master);
+	if (exp_reply == NULL)
+		goto out_put_orig;
+
+	memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple));
+
+	exp_orig->mask.src.ip = 0xffffffff;
+	exp_orig->mask.src.u.all = 0;
+	exp_orig->mask.dst.u.all = 0;
+	exp_orig->mask.dst.u.gre.key = 0xffff;
+	exp_orig->mask.dst.ip = 0xffffffff;
+	exp_orig->mask.dst.protonum = 0xff;
+		
+	exp_orig->master = master;
+	exp_orig->expectfn = pptp_expectfn;
+	exp_orig->flags = 0;
+
+	exp_orig->dir = IP_CT_DIR_ORIGINAL;
+
+	/* both expectations are identical apart from tuple */
+	memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
+	memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple));
+
+	exp_reply->dir = !exp_orig->dir;
+
+	if (ip_nat_pptp_hook_exp_gre)
+		ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
+	else {
+
+		DEBUGP("calling expect_related PNS->PAC");
+		DUMP_TUPLE(&exp_orig->tuple);
+
+		if (ip_conntrack_expect_related(exp_orig) != 0) {
+			DEBUGP("cannot expect_related()\n");
+			goto out_put_both;
+		}
+
+		DEBUGP("calling expect_related PAC->PNS");
+		DUMP_TUPLE(&exp_reply->tuple);
+
+		if (ip_conntrack_expect_related(exp_reply) != 0) {
+			DEBUGP("cannot expect_related()\n");
+			goto out_unexpect_orig;
+		}
+
+		/* Add GRE keymap entries */
+		if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) {
+			DEBUGP("cannot keymap_add() exp\n");
+			goto out_unexpect_both;
+		}
+
+		invert_tuplepr(&inv_tuple, &exp_reply->tuple);
+		if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) {
+			ip_ct_gre_keymap_destroy(master);
+			DEBUGP("cannot keymap_add() exp_inv\n");
+			goto out_unexpect_both;
+		}
+		ret = 0;
+	}
+
+out_put_both:
+	ip_conntrack_expect_put(exp_reply);
+out_put_orig:
+	ip_conntrack_expect_put(exp_orig);
+out:
+	return ret;
+
+out_unexpect_both:
+	ip_conntrack_unexpect_related(exp_reply);
+out_unexpect_orig:
+	ip_conntrack_unexpect_related(exp_orig);
+	goto out_put_both;
+}
+
+static inline int 
+pptp_inbound_pkt(struct sk_buff **pskb,
+		 struct tcphdr *tcph,
+		 unsigned int nexthdr_off,
+		 unsigned int datalen,
+		 struct ip_conntrack *ct,
+		 enum ip_conntrack_info ctinfo)
+{
+	struct PptpControlHeader _ctlh, *ctlh;
+	unsigned int reqlen;
+	union pptp_ctrl_union _pptpReq, *pptpReq;
+	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
+	u_int16_t msg, *cid, *pcid;
+	u_int32_t seq;	
+
+	ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
+	if (!ctlh) {
+		DEBUGP("error during skb_header_pointer\n");
+		return NF_ACCEPT;
+	}
+	nexthdr_off += sizeof(_ctlh);
+	datalen -= sizeof(_ctlh);
+
+	reqlen = datalen;
+	if (reqlen > sizeof(*pptpReq))
+		reqlen = sizeof(*pptpReq);
+	pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
+	if (!pptpReq) {
+		DEBUGP("error during skb_header_pointer\n");
+		return NF_ACCEPT;
+	}
+
+	msg = ntohs(ctlh->messageType);
+	DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
+
+	switch (msg) {
+	case PPTP_START_SESSION_REPLY:
+		if (reqlen < sizeof(_pptpReq.srep)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			break;
+		}
+
+		/* server confirms new control session */
+		if (info->sstate < PPTP_SESSION_REQUESTED) {
+			DEBUGP("%s without START_SESS_REQUEST\n",
+				pptp_msg_name[msg]);
+			break;
+		}
+		if (pptpReq->srep.resultCode == PPTP_START_OK)
+			info->sstate = PPTP_SESSION_CONFIRMED;
+		else 
+			info->sstate = PPTP_SESSION_ERROR;
+		break;
+
+	case PPTP_STOP_SESSION_REPLY:
+		if (reqlen < sizeof(_pptpReq.strep)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			break;
+		}
+
+		/* server confirms end of control session */
+		if (info->sstate > PPTP_SESSION_STOPREQ) {
+			DEBUGP("%s without STOP_SESS_REQUEST\n",
+				pptp_msg_name[msg]);
+			break;
+		}
+		if (pptpReq->strep.resultCode == PPTP_STOP_OK)
+			info->sstate = PPTP_SESSION_NONE;
+		else
+			info->sstate = PPTP_SESSION_ERROR;
+		break;
+
+	case PPTP_OUT_CALL_REPLY:
+		if (reqlen < sizeof(_pptpReq.ocack)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			break;
+		}
+
+		/* server accepted call, we now expect GRE frames */
+		if (info->sstate != PPTP_SESSION_CONFIRMED) {
+			DEBUGP("%s but no session\n", pptp_msg_name[msg]);
+			break;
+		}
+		if (info->cstate != PPTP_CALL_OUT_REQ &&
+		    info->cstate != PPTP_CALL_OUT_CONF) {
+			DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]);
+			break;
+		}
+		if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) {
+			info->cstate = PPTP_CALL_NONE;
+			break;
+		}
+
+		cid = &pptpReq->ocack.callID;
+		pcid = &pptpReq->ocack.peersCallID;
+
+		info->pac_call_id = ntohs(*cid);
+		
+		if (htons(info->pns_call_id) != *pcid) {
+			DEBUGP("%s for unknown callid %u\n",
+				pptp_msg_name[msg], ntohs(*pcid));
+			break;
+		}
+
+		DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], 
+			ntohs(*cid), ntohs(*pcid));
+		
+		info->cstate = PPTP_CALL_OUT_CONF;
+
+		seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
+				       + sizeof(struct PptpControlHeader)
+				       + ((void *)pcid - (void *)pptpReq);
+			
+		if (exp_gre(ct, seq, *cid, *pcid) != 0)
+			printk("ip_conntrack_pptp: error during exp_gre\n");
+		break;
+
+	case PPTP_IN_CALL_REQUEST:
+		if (reqlen < sizeof(_pptpReq.icack)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			break;
+		}
+
+		/* server tells us about incoming call request */
+		if (info->sstate != PPTP_SESSION_CONFIRMED) {
+			DEBUGP("%s but no session\n", pptp_msg_name[msg]);
+			break;
+		}
+		pcid = &pptpReq->icack.peersCallID;
+		DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
+		info->cstate = PPTP_CALL_IN_REQ;
+		info->pac_call_id = ntohs(*pcid);
+		break;
+
+	case PPTP_IN_CALL_CONNECT:
+		if (reqlen < sizeof(_pptpReq.iccon)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			break;
+		}
+
+		/* server tells us about incoming call established */
+		if (info->sstate != PPTP_SESSION_CONFIRMED) {
+			DEBUGP("%s but no session\n", pptp_msg_name[msg]);
+			break;
+		}
+		if (info->sstate != PPTP_CALL_IN_REP
+		    && info->sstate != PPTP_CALL_IN_CONF) {
+			DEBUGP("%s but never sent IN_CALL_REPLY\n",
+				pptp_msg_name[msg]);
+			break;
+		}
+
+		pcid = &pptpReq->iccon.peersCallID;
+		cid = &info->pac_call_id;
+
+		if (info->pns_call_id != ntohs(*pcid)) {
+			DEBUGP("%s for unknown CallID %u\n", 
+				pptp_msg_name[msg], ntohs(*cid));
+			break;
+		}
+
+		DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
+		info->cstate = PPTP_CALL_IN_CONF;
+
+		/* we expect a GRE connection from PAC to PNS */
+		seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
+				       + sizeof(struct PptpControlHeader)
+				       + ((void *)pcid - (void *)pptpReq);
+			
+		if (exp_gre(ct, seq, *cid, *pcid) != 0)
+			printk("ip_conntrack_pptp: error during exp_gre\n");
+
+		break;
+
+	case PPTP_CALL_DISCONNECT_NOTIFY:
+		if (reqlen < sizeof(_pptpReq.disc)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			break;
+		}
+
+		/* server confirms disconnect */
+		cid = &pptpReq->disc.callID;
+		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
+		info->cstate = PPTP_CALL_NONE;
+
+		/* untrack this call id, unexpect GRE packets */
+		pptp_destroy_siblings(ct);
+		break;
+
+	case PPTP_WAN_ERROR_NOTIFY:
+		break;
+
+	case PPTP_ECHO_REQUEST:
+	case PPTP_ECHO_REPLY:
+		/* I don't have to explain these ;) */
+		break;
+	default:
+		DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)
+			? pptp_msg_name[msg]:pptp_msg_name[0], msg);
+		break;
+	}
+
+
+	if (ip_nat_pptp_hook_inbound)
+		return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh,
+						pptpReq);
+
+	return NF_ACCEPT;
+
+}
+
+static inline int
+pptp_outbound_pkt(struct sk_buff **pskb,
+		  struct tcphdr *tcph,
+		  unsigned int nexthdr_off,
+		  unsigned int datalen,
+		  struct ip_conntrack *ct,
+		  enum ip_conntrack_info ctinfo)
+{
+	struct PptpControlHeader _ctlh, *ctlh;
+	unsigned int reqlen;
+	union pptp_ctrl_union _pptpReq, *pptpReq;
+	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
+	u_int16_t msg, *cid, *pcid;
+
+	ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
+	if (!ctlh)
+		return NF_ACCEPT;
+	nexthdr_off += sizeof(_ctlh);
+	datalen -= sizeof(_ctlh);
+	
+	reqlen = datalen;
+	if (reqlen > sizeof(*pptpReq))
+		reqlen = sizeof(*pptpReq);
+	pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
+	if (!pptpReq)
+		return NF_ACCEPT;
+
+	msg = ntohs(ctlh->messageType);
+	DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
+
+	switch (msg) {
+	case PPTP_START_SESSION_REQUEST:
+		/* client requests for new control session */
+		if (info->sstate != PPTP_SESSION_NONE) {
+			DEBUGP("%s but we already have one",
+				pptp_msg_name[msg]);
+		}
+		info->sstate = PPTP_SESSION_REQUESTED;
+		break;
+	case PPTP_STOP_SESSION_REQUEST:
+		/* client requests end of control session */
+		info->sstate = PPTP_SESSION_STOPREQ;
+		break;
+
+	case PPTP_OUT_CALL_REQUEST:
+		if (reqlen < sizeof(_pptpReq.ocreq)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			/* FIXME: break; */
+		}
+
+		/* client initiating connection to server */
+		if (info->sstate != PPTP_SESSION_CONFIRMED) {
+			DEBUGP("%s but no session\n",
+				pptp_msg_name[msg]);
+			break;
+		}
+		info->cstate = PPTP_CALL_OUT_REQ;
+		/* track PNS call id */
+		cid = &pptpReq->ocreq.callID;
+		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
+		info->pns_call_id = ntohs(*cid);
+		break;
+	case PPTP_IN_CALL_REPLY:
+		if (reqlen < sizeof(_pptpReq.icack)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			break;
+		}
+
+		/* client answers incoming call */
+		if (info->cstate != PPTP_CALL_IN_REQ
+		    && info->cstate != PPTP_CALL_IN_REP) {
+			DEBUGP("%s without incall_req\n", 
+				pptp_msg_name[msg]);
+			break;
+		}
+		if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) {
+			info->cstate = PPTP_CALL_NONE;
+			break;
+		}
+		pcid = &pptpReq->icack.peersCallID;
+		if (info->pac_call_id != ntohs(*pcid)) {
+			DEBUGP("%s for unknown call %u\n", 
+				pptp_msg_name[msg], ntohs(*pcid));
+			break;
+		}
+		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
+		/* part two of the three-way handshake */
+		info->cstate = PPTP_CALL_IN_REP;
+		info->pns_call_id = ntohs(pptpReq->icack.callID);
+		break;
+
+	case PPTP_CALL_CLEAR_REQUEST:
+		/* client requests hangup of call */
+		if (info->sstate != PPTP_SESSION_CONFIRMED) {
+			DEBUGP("CLEAR_CALL but no session\n");
+			break;
+		}
+		/* FUTURE: iterate over all calls and check if
+		 * call ID is valid.  We don't do this without newnat,
+		 * because we only know about last call */
+		info->cstate = PPTP_CALL_CLEAR_REQ;
+		break;
+	case PPTP_SET_LINK_INFO:
+		break;
+	case PPTP_ECHO_REQUEST:
+	case PPTP_ECHO_REPLY:
+		/* I don't have to explain these ;) */
+		break;
+	default:
+		DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? 
+			pptp_msg_name[msg]:pptp_msg_name[0], msg);
+		/* unknown: no need to create GRE masq table entry */
+		break;
+	}
+	
+	if (ip_nat_pptp_hook_outbound)
+		return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh,
+						 pptpReq);
+
+	return NF_ACCEPT;
+}
+
+
+/* track caller id inside control connection, call expect_related */
+static int 
+conntrack_pptp_help(struct sk_buff **pskb,
+		    struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
+
+{
+	struct pptp_pkt_hdr _pptph, *pptph;
+	struct tcphdr _tcph, *tcph;
+	u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
+	u_int32_t datalen;
+	int dir = CTINFO2DIR(ctinfo);
+	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
+	unsigned int nexthdr_off;
+
+	int oldsstate, oldcstate;
+	int ret;
+
+	/* don't do any tracking before tcp handshake complete */
+	if (ctinfo != IP_CT_ESTABLISHED 
+	    && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
+		DEBUGP("ctinfo = %u, skipping\n", ctinfo);
+		return NF_ACCEPT;
+	}
+	
+	nexthdr_off = (*pskb)->nh.iph->ihl*4;
+	tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
+	BUG_ON(!tcph);
+	nexthdr_off += tcph->doff * 4;
+ 	datalen = tcplen - tcph->doff * 4;
+
+	if (tcph->fin || tcph->rst) {
+		DEBUGP("RST/FIN received, timeouting GRE\n");
+		/* can't do this after real newnat */
+		info->cstate = PPTP_CALL_NONE;
+
+		/* untrack this call id, unexpect GRE packets */
+		pptp_destroy_siblings(ct);
+	}
+
+	pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
+	if (!pptph) {
+		DEBUGP("no full PPTP header, can't track\n");
+		return NF_ACCEPT;
+	}
+	nexthdr_off += sizeof(_pptph);
+	datalen -= sizeof(_pptph);
+
+	/* if it's not a control message we can't do anything with it */
+	if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL ||
+	    ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) {
+		DEBUGP("not a control packet\n");
+		return NF_ACCEPT;
+	}
+
+	oldsstate = info->sstate;
+	oldcstate = info->cstate;
+
+	spin_lock_bh(&ip_pptp_lock);
+
+	/* FIXME: We just blindly assume that the control connection is always
+	 * established from PNS->PAC.  However, RFC makes no guarantee */
+	if (dir == IP_CT_DIR_ORIGINAL)
+		/* client -> server (PNS -> PAC) */
+		ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct,
+					ctinfo);
+	else
+		/* server -> client (PAC -> PNS) */
+		ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct,
+				       ctinfo);
+	DEBUGP("sstate: %d->%d, cstate: %d->%d\n",
+		oldsstate, info->sstate, oldcstate, info->cstate);
+	spin_unlock_bh(&ip_pptp_lock);
+
+	return ret;
+}
+
+/* control protocol helper */
+static struct ip_conntrack_helper pptp = { 
+	.list = { NULL, NULL },
+	.name = "pptp", 
+	.me = THIS_MODULE,
+	.max_expected = 2,
+	.timeout = 5 * 60,
+	.tuple = { .src = { .ip = 0, 
+		 	    .u = { .tcp = { .port =  
+				    __constant_htons(PPTP_CONTROL_PORT) } } 
+			  }, 
+		   .dst = { .ip = 0, 
+			    .u = { .all = 0 },
+			    .protonum = IPPROTO_TCP
+			  } 
+		 },
+	.mask = { .src = { .ip = 0, 
+			   .u = { .tcp = { .port = 0xffff } } 
+			 }, 
+		  .dst = { .ip = 0, 
+			   .u = { .all = 0 },
+			   .protonum = 0xff 
+		 	 } 
+		},
+	.help = conntrack_pptp_help
+};
+
+extern void __exit ip_ct_proto_gre_fini(void);
+extern int __init ip_ct_proto_gre_init(void);
+
+/* ip_conntrack_pptp initialization */
+static int __init init(void)
+{
+	int retcode;
+ 
+	retcode = ip_ct_proto_gre_init();
+	if (retcode < 0)
+		return retcode;
+
+	DEBUGP(" registering helper\n");
+	if ((retcode = ip_conntrack_helper_register(&pptp))) {
+		printk(KERN_ERR "Unable to register conntrack application "
+				"helper for pptp: %d\n", retcode);
+		ip_ct_proto_gre_fini();
+		return retcode;
+	}
+
+	printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	ip_conntrack_helper_unregister(&pptp);
+	ip_ct_proto_gre_fini();
+	printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION);
+}
+
+module_init(init);
+module_exit(fini);
+
+EXPORT_SYMBOL(ip_nat_pptp_hook_outbound);
+EXPORT_SYMBOL(ip_nat_pptp_hook_inbound);
+EXPORT_SYMBOL(ip_nat_pptp_hook_exp_gre);
+EXPORT_SYMBOL(ip_nat_pptp_hook_expectfn);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
new file mode 100644
index 000000000000..de3cb9db6f85
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -0,0 +1,327 @@
+/*
+ * ip_conntrack_proto_gre.c - Version 3.0 
+ *
+ * Connection tracking protocol helper module for GRE.
+ *
+ * GRE is a generic encapsulation protocol, which is generally not very
+ * suited for NAT, as it has no protocol-specific part as port numbers.
+ *
+ * It has an optional key field, which may help us distinguishing two 
+ * connections between the same two hosts.
+ *
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 
+ *
+ * PPTP is built on top of a modified version of GRE, and has a mandatory
+ * field called "CallID", which serves us for the same purpose as the key
+ * field in plain GRE.
+ *
+ * Documentation about PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/list.h>
+
+static DEFINE_RWLOCK(ip_ct_gre_lock);
+#define ASSERT_READ_LOCK(x)
+#define ASSERT_WRITE_LOCK(x)
+
+#include <linux/netfilter_ipv4/listhelp.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+
+#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
+#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE");
+
+/* shamelessly stolen from ip_conntrack_proto_udp.c */
+#define GRE_TIMEOUT		(30*HZ)
+#define GRE_STREAM_TIMEOUT	(180*HZ)
+
+#if 0
+#define DEBUGP(format, args...)	printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
+#define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x\n", \
+			NIPQUAD((x)->src.ip), ntohs((x)->src.u.gre.key), \
+			NIPQUAD((x)->dst.ip), ntohs((x)->dst.u.gre.key))
+#else
+#define DEBUGP(x, args...)
+#define DUMP_TUPLE_GRE(x)
+#endif
+				
+/* GRE KEYMAP HANDLING FUNCTIONS */
+static LIST_HEAD(gre_keymap_list);
+
+static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km,
+				const struct ip_conntrack_tuple *t)
+{
+	return ((km->tuple.src.ip == t->src.ip) &&
+		(km->tuple.dst.ip == t->dst.ip) &&
+		(km->tuple.dst.protonum == t->dst.protonum) &&
+		(km->tuple.dst.u.all == t->dst.u.all));
+}
+
+/* look up the source key for a given tuple */
+static u_int32_t gre_keymap_lookup(struct ip_conntrack_tuple *t)
+{
+	struct ip_ct_gre_keymap *km;
+	u_int32_t key = 0;
+
+	read_lock_bh(&ip_ct_gre_lock);
+	km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
+			struct ip_ct_gre_keymap *, t);
+	if (km)
+		key = km->tuple.src.u.gre.key;
+	read_unlock_bh(&ip_ct_gre_lock);
+	
+	DEBUGP("lookup src key 0x%x up key for ", key);
+	DUMP_TUPLE_GRE(t);
+
+	return key;
+}
+
+/* add a single keymap entry, associate with specified master ct */
+int
+ip_ct_gre_keymap_add(struct ip_conntrack *ct,
+		     struct ip_conntrack_tuple *t, int reply)
+{
+	struct ip_ct_gre_keymap **exist_km, *km, *old;
+
+	if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
+		DEBUGP("refusing to add GRE keymap to non-pptp session\n");
+		return -1;
+	}
+
+	if (!reply) 
+		exist_km = &ct->help.ct_pptp_info.keymap_orig;
+	else
+		exist_km = &ct->help.ct_pptp_info.keymap_reply;
+
+	if (*exist_km) {
+		/* check whether it's a retransmission */
+		old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
+				struct ip_ct_gre_keymap *, t);
+		if (old == *exist_km) {
+			DEBUGP("retransmission\n");
+			return 0;
+		}
+
+		DEBUGP("trying to override keymap_%s for ct %p\n", 
+			reply? "reply":"orig", ct);
+		return -EEXIST;
+	}
+
+	km = kmalloc(sizeof(*km), GFP_ATOMIC);
+	if (!km)
+		return -ENOMEM;
+
+	memcpy(&km->tuple, t, sizeof(*t));
+	*exist_km = km;
+
+	DEBUGP("adding new entry %p: ", km);
+	DUMP_TUPLE_GRE(&km->tuple);
+
+	write_lock_bh(&ip_ct_gre_lock);
+	list_append(&gre_keymap_list, km);
+	write_unlock_bh(&ip_ct_gre_lock);
+
+	return 0;
+}
+
+/* destroy the keymap entries associated with specified master ct */
+void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct)
+{
+	DEBUGP("entering for ct %p\n", ct);
+
+	if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
+		DEBUGP("refusing to destroy GRE keymap to non-pptp session\n");
+		return;
+	}
+
+	write_lock_bh(&ip_ct_gre_lock);
+	if (ct->help.ct_pptp_info.keymap_orig) {
+		DEBUGP("removing %p from list\n", 
+			ct->help.ct_pptp_info.keymap_orig);
+		list_del(&ct->help.ct_pptp_info.keymap_orig->list);
+		kfree(ct->help.ct_pptp_info.keymap_orig);
+		ct->help.ct_pptp_info.keymap_orig = NULL;
+	}
+	if (ct->help.ct_pptp_info.keymap_reply) {
+		DEBUGP("removing %p from list\n",
+			ct->help.ct_pptp_info.keymap_reply);
+		list_del(&ct->help.ct_pptp_info.keymap_reply->list);
+		kfree(ct->help.ct_pptp_info.keymap_reply);
+		ct->help.ct_pptp_info.keymap_reply = NULL;
+	}
+	write_unlock_bh(&ip_ct_gre_lock);
+}
+
+
+/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
+
+/* invert gre part of tuple */
+static int gre_invert_tuple(struct ip_conntrack_tuple *tuple,
+			    const struct ip_conntrack_tuple *orig)
+{
+	tuple->dst.u.gre.key = orig->src.u.gre.key;
+	tuple->src.u.gre.key = orig->dst.u.gre.key;
+
+	return 1;
+}
+
+/* gre hdr info to tuple */
+static int gre_pkt_to_tuple(const struct sk_buff *skb,
+			   unsigned int dataoff,
+			   struct ip_conntrack_tuple *tuple)
+{
+	struct gre_hdr_pptp _pgrehdr, *pgrehdr;
+	u_int32_t srckey;
+	struct gre_hdr _grehdr, *grehdr;
+
+	/* first only delinearize old RFC1701 GRE header */
+	grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
+	if (!grehdr || grehdr->version != GRE_VERSION_PPTP) {
+		/* try to behave like "ip_conntrack_proto_generic" */
+		tuple->src.u.all = 0;
+		tuple->dst.u.all = 0;
+		return 1;
+	}
+
+	/* PPTP header is variable length, only need up to the call_id field */
+	pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr);
+	if (!pgrehdr)
+		return 1;
+
+	if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
+		DEBUGP("GRE_VERSION_PPTP but unknown proto\n");
+		return 0;
+	}
+
+	tuple->dst.u.gre.key = pgrehdr->call_id;
+	srckey = gre_keymap_lookup(tuple);
+	tuple->src.u.gre.key = srckey;
+
+	return 1;
+}
+
+/* print gre part of tuple */
+static int gre_print_tuple(struct seq_file *s,
+			   const struct ip_conntrack_tuple *tuple)
+{
+	return seq_printf(s, "srckey=0x%x dstkey=0x%x ", 
+			  ntohs(tuple->src.u.gre.key),
+			  ntohs(tuple->dst.u.gre.key));
+}
+
+/* print private data for conntrack */
+static int gre_print_conntrack(struct seq_file *s,
+			       const struct ip_conntrack *ct)
+{
+	return seq_printf(s, "timeout=%u, stream_timeout=%u ",
+			  (ct->proto.gre.timeout / HZ),
+			  (ct->proto.gre.stream_timeout / HZ));
+}
+
+/* Returns verdict for packet, and may modify conntrack */
+static int gre_packet(struct ip_conntrack *ct,
+		      const struct sk_buff *skb,
+		      enum ip_conntrack_info conntrackinfo)
+{
+	/* If we've seen traffic both ways, this is a GRE connection.
+	 * Extend timeout. */
+	if (ct->status & IPS_SEEN_REPLY) {
+		ip_ct_refresh_acct(ct, conntrackinfo, skb,
+				   ct->proto.gre.stream_timeout);
+		/* Also, more likely to be important, and not a probe. */
+		set_bit(IPS_ASSURED_BIT, &ct->status);
+	} else
+		ip_ct_refresh_acct(ct, conntrackinfo, skb,
+				   ct->proto.gre.timeout);
+	
+	return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int gre_new(struct ip_conntrack *ct,
+		   const struct sk_buff *skb)
+{ 
+	DEBUGP(": ");
+	DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+
+	/* initialize to sane value.  Ideally a conntrack helper
+	 * (e.g. in case of pptp) is increasing them */
+	ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT;
+	ct->proto.gre.timeout = GRE_TIMEOUT;
+
+	return 1;
+}
+
+/* Called when a conntrack entry has already been removed from the hashes
+ * and is about to be deleted from memory */
+static void gre_destroy(struct ip_conntrack *ct)
+{
+	struct ip_conntrack *master = ct->master;
+	DEBUGP(" entering\n");
+
+	if (!master)
+		DEBUGP("no master !?!\n");
+	else
+		ip_ct_gre_keymap_destroy(master);
+}
+
+/* protocol helper struct */
+static struct ip_conntrack_protocol gre = { 
+	.proto		 = IPPROTO_GRE,
+	.name		 = "gre", 
+	.pkt_to_tuple	 = gre_pkt_to_tuple,
+	.invert_tuple	 = gre_invert_tuple,
+	.print_tuple	 = gre_print_tuple,
+	.print_conntrack = gre_print_conntrack,
+	.packet		 = gre_packet,
+	.new		 = gre_new,
+	.destroy	 = gre_destroy,
+	.me 		 = THIS_MODULE,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+	.tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
+	.nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
+#endif
+};
+
+/* ip_conntrack_proto_gre initialization */
+int __init ip_ct_proto_gre_init(void)
+{
+	return ip_conntrack_protocol_register(&gre);
+}
+
+void __exit ip_ct_proto_gre_fini(void)
+{
+	struct list_head *pos, *n;
+
+	/* delete all keymap entries */
+	write_lock_bh(&ip_ct_gre_lock);
+	list_for_each_safe(pos, n, &gre_keymap_list) {
+		DEBUGP("deleting keymap %p at module unload time\n", pos);
+		list_del(pos);
+		kfree(pos);
+	}
+	write_unlock_bh(&ip_ct_gre_lock);
+
+	ip_conntrack_protocol_unregister(&gre); 
+}
+
+EXPORT_SYMBOL(ip_ct_gre_keymap_add);
+EXPORT_SYMBOL(ip_ct_gre_keymap_destroy);
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
new file mode 100644
index 000000000000..3cdd0684d30d
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -0,0 +1,401 @@
+/*
+ * ip_nat_pptp.c	- Version 3.0
+ *
+ * NAT support for PPTP (Point to Point Tunneling Protocol).
+ * PPTP is a a protocol for creating virtual private networks.
+ * It is a specification defined by Microsoft and some vendors
+ * working with Microsoft.  PPTP is built on top of a modified
+ * version of the Internet Generic Routing Encapsulation Protocol.
+ * GRE is defined in RFC 1701 and RFC 1702.  Documentation of
+ * PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ * TODO: - NAT to a unique tuple, not to TCP source port
+ * 	   (needs netfilter tuple reservation)
+ *
+ * Changes:
+ *     2002-02-10 - Version 1.3
+ *       - Use ip_nat_mangle_tcp_packet() because of cloned skb's
+ *	   in local connections (Philip Craig <philipc@snapgear.com>)
+ *       - add checks for magicCookie and pptp version
+ *       - make argument list of pptp_{out,in}bound_packet() shorter
+ *       - move to C99 style initializers
+ *       - print version number at module loadtime
+ *     2003-09-22 - Version 1.5
+ *       - use SNATed tcp sourceport as callid, since we get called before
+ *	   TCP header is mangled (Philip Craig <philipc@snapgear.com>)
+ *     2004-10-22 - Version 2.0
+ *       - kernel 2.6.x version
+ *     2005-06-10 - Version 3.0
+ *       - kernel >= 2.6.11 version,
+ *	   funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
+ * 
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_nat_pptp.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
+#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
+
+#define IP_NAT_PPTP_VERSION "3.0"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP");
+
+
+#if 0
+extern const char *pptp_msg_name[];
+#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
+				       __FUNCTION__, ## args)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static void pptp_nat_expected(struct ip_conntrack *ct,
+			      struct ip_conntrack_expect *exp)
+{
+	struct ip_conntrack *master = ct->master;
+	struct ip_conntrack_expect *other_exp;
+	struct ip_conntrack_tuple t;
+	struct ip_ct_pptp_master *ct_pptp_info;
+	struct ip_nat_pptp *nat_pptp_info;
+
+	ct_pptp_info = &master->help.ct_pptp_info;
+	nat_pptp_info = &master->nat.help.nat_pptp_info;
+
+	/* And here goes the grand finale of corrosion... */
+
+	if (exp->dir == IP_CT_DIR_ORIGINAL) {
+		DEBUGP("we are PNS->PAC\n");
+		/* therefore, build tuple for PAC->PNS */
+		t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
+		t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id);
+		t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
+		t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id);
+		t.dst.protonum = IPPROTO_GRE;
+	} else {
+		DEBUGP("we are PAC->PNS\n");
+		/* build tuple for PNS->PAC */
+		t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
+		t.src.u.gre.key = 
+			htons(master->nat.help.nat_pptp_info.pns_call_id);
+		t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
+		t.dst.u.gre.key = 
+			htons(master->nat.help.nat_pptp_info.pac_call_id);
+		t.dst.protonum = IPPROTO_GRE;
+	}
+
+	DEBUGP("trying to unexpect other dir: ");
+	DUMP_TUPLE(&t);
+	other_exp = ip_conntrack_expect_find(&t);
+	if (other_exp) {
+		ip_conntrack_unexpect_related(other_exp);
+		ip_conntrack_expect_put(other_exp);
+		DEBUGP("success\n");
+	} else {
+		DEBUGP("not found!\n");
+	}
+
+	ip_nat_follow_master(ct, exp);
+}
+
+/* outbound packets == from PNS to PAC */
+static int
+pptp_outbound_pkt(struct sk_buff **pskb,
+		  struct ip_conntrack *ct,
+		  enum ip_conntrack_info ctinfo,
+		  struct PptpControlHeader *ctlh,
+		  union pptp_ctrl_union *pptpReq)
+
+{
+	struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
+	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
+
+	u_int16_t msg, *cid = NULL, new_callid;
+
+	new_callid = htons(ct_pptp_info->pns_call_id);
+	
+	switch (msg = ntohs(ctlh->messageType)) {
+		case PPTP_OUT_CALL_REQUEST:
+			cid = &pptpReq->ocreq.callID;
+			/* FIXME: ideally we would want to reserve a call ID
+			 * here.  current netfilter NAT core is not able to do
+			 * this :( For now we use TCP source port. This breaks
+			 * multiple calls within one control session */
+
+			/* save original call ID in nat_info */
+			nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
+
+			/* don't use tcph->source since we are at a DSTmanip
+			 * hook (e.g. PREROUTING) and pkt is not mangled yet */
+			new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
+
+			/* save new call ID in ct info */
+			ct_pptp_info->pns_call_id = ntohs(new_callid);
+			break;
+		case PPTP_IN_CALL_REPLY:
+			cid = &pptpReq->icreq.callID;
+			break;
+		case PPTP_CALL_CLEAR_REQUEST:
+			cid = &pptpReq->clrreq.callID;
+			break;
+		default:
+			DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
+			      (msg <= PPTP_MSG_MAX)? 
+			      pptp_msg_name[msg]:pptp_msg_name[0]);
+			/* fall through */
+
+		case PPTP_SET_LINK_INFO:
+			/* only need to NAT in case PAC is behind NAT box */
+		case PPTP_START_SESSION_REQUEST:
+		case PPTP_START_SESSION_REPLY:
+		case PPTP_STOP_SESSION_REQUEST:
+		case PPTP_STOP_SESSION_REPLY:
+		case PPTP_ECHO_REQUEST:
+		case PPTP_ECHO_REPLY:
+			/* no need to alter packet */
+			return NF_ACCEPT;
+	}
+
+	/* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
+	 * down to here */
+
+	IP_NF_ASSERT(cid);
+
+	DEBUGP("altering call id from 0x%04x to 0x%04x\n",
+		ntohs(*cid), ntohs(new_callid));
+
+	/* mangle packet */
+	if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+		(void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)),
+				 	sizeof(new_callid), 
+					(char *)&new_callid,
+				 	sizeof(new_callid)) == 0)
+		return NF_DROP;
+
+	return NF_ACCEPT;
+}
+
+static int
+pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
+	     struct ip_conntrack_expect *expect_reply)
+{
+	struct ip_ct_pptp_master *ct_pptp_info = 
+				&expect_orig->master->help.ct_pptp_info;
+	struct ip_nat_pptp *nat_pptp_info = 
+				&expect_orig->master->nat.help.nat_pptp_info;
+
+	struct ip_conntrack *ct = expect_orig->master;
+
+	struct ip_conntrack_tuple inv_t;
+	struct ip_conntrack_tuple *orig_t, *reply_t;
+
+	/* save original PAC call ID in nat_info */
+	nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
+
+	/* alter expectation */
+	orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+	reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+	/* alter expectation for PNS->PAC direction */
+	invert_tuplepr(&inv_t, &expect_orig->tuple);
+	expect_orig->saved_proto.gre.key = htons(nat_pptp_info->pac_call_id);
+	expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
+	expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
+	inv_t.src.ip = reply_t->src.ip;
+	inv_t.dst.ip = reply_t->dst.ip;
+	inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
+	inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
+
+	if (!ip_conntrack_expect_related(expect_orig)) {
+		DEBUGP("successfully registered expect\n");
+	} else {
+		DEBUGP("can't expect_related(expect_orig)\n");
+		return 1;
+	}
+
+	/* alter expectation for PAC->PNS direction */
+	invert_tuplepr(&inv_t, &expect_reply->tuple);
+	expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id);
+	expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
+	expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
+	inv_t.src.ip = orig_t->src.ip;
+	inv_t.dst.ip = orig_t->dst.ip;
+	inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
+	inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
+
+	if (!ip_conntrack_expect_related(expect_reply)) {
+		DEBUGP("successfully registered expect\n");
+	} else {
+		DEBUGP("can't expect_related(expect_reply)\n");
+		ip_conntrack_unexpect_related(expect_orig);
+		return 1;
+	}
+
+	if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) {
+		DEBUGP("can't register original keymap\n");
+		ip_conntrack_unexpect_related(expect_orig);
+		ip_conntrack_unexpect_related(expect_reply);
+		return 1;
+	}
+
+	if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) {
+		DEBUGP("can't register reply keymap\n");
+		ip_conntrack_unexpect_related(expect_orig);
+		ip_conntrack_unexpect_related(expect_reply);
+		ip_ct_gre_keymap_destroy(ct);
+		return 1;
+	}
+
+	return 0;
+}
+
+/* inbound packets == from PAC to PNS */
+static int
+pptp_inbound_pkt(struct sk_buff **pskb,
+		 struct ip_conntrack *ct,
+		 enum ip_conntrack_info ctinfo,
+		 struct PptpControlHeader *ctlh,
+		 union pptp_ctrl_union *pptpReq)
+{
+	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
+	u_int16_t msg, new_cid = 0, new_pcid, *pcid = NULL, *cid = NULL;
+
+	int ret = NF_ACCEPT, rv;
+
+	new_pcid = htons(nat_pptp_info->pns_call_id);
+
+	switch (msg = ntohs(ctlh->messageType)) {
+	case PPTP_OUT_CALL_REPLY:
+		pcid = &pptpReq->ocack.peersCallID;	
+		cid = &pptpReq->ocack.callID;
+		break;
+	case PPTP_IN_CALL_CONNECT:
+		pcid = &pptpReq->iccon.peersCallID;
+		break;
+	case PPTP_IN_CALL_REQUEST:
+		/* only need to nat in case PAC is behind NAT box */
+		break;
+	case PPTP_WAN_ERROR_NOTIFY:
+		pcid = &pptpReq->wanerr.peersCallID;
+		break;
+	case PPTP_CALL_DISCONNECT_NOTIFY:
+		pcid = &pptpReq->disc.callID;
+		break;
+	case PPTP_SET_LINK_INFO:
+		pcid = &pptpReq->setlink.peersCallID;
+		break;
+
+	default:
+		DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? 
+			pptp_msg_name[msg]:pptp_msg_name[0]);
+		/* fall through */
+
+	case PPTP_START_SESSION_REQUEST:
+	case PPTP_START_SESSION_REPLY:
+	case PPTP_STOP_SESSION_REQUEST:
+	case PPTP_STOP_SESSION_REPLY:
+	case PPTP_ECHO_REQUEST:
+	case PPTP_ECHO_REPLY:
+		/* no need to alter packet */
+		return NF_ACCEPT;
+	}
+
+	/* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST,
+	 * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */
+
+	/* mangle packet */
+	IP_NF_ASSERT(pcid);
+	DEBUGP("altering peer call id from 0x%04x to 0x%04x\n",
+		ntohs(*pcid), ntohs(new_pcid));
+	
+	rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, 
+				      (void *)pcid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)),
+				      sizeof(new_pcid), (char *)&new_pcid, 
+				      sizeof(new_pcid));
+	if (rv != NF_ACCEPT) 
+		return rv;
+
+	if (new_cid) {
+		IP_NF_ASSERT(cid);
+		DEBUGP("altering call id from 0x%04x to 0x%04x\n",
+			ntohs(*cid), ntohs(new_cid));
+		rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, 
+					      (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), 
+					      sizeof(new_cid),
+					      (char *)&new_cid, 
+					      sizeof(new_cid));
+		if (rv != NF_ACCEPT)
+			return rv;
+	}
+
+	/* check for earlier return value of 'switch' above */
+	if (ret != NF_ACCEPT)
+		return ret;
+
+	/* great, at least we don't need to resize packets */
+	return NF_ACCEPT;
+}
+
+
+extern int __init ip_nat_proto_gre_init(void);
+extern void __exit ip_nat_proto_gre_fini(void);
+
+static int __init init(void)
+{
+	int ret;
+
+	DEBUGP("%s: registering NAT helper\n", __FILE__);
+
+	ret = ip_nat_proto_gre_init();
+	if (ret < 0)
+		return ret;
+
+	BUG_ON(ip_nat_pptp_hook_outbound);
+	ip_nat_pptp_hook_outbound = &pptp_outbound_pkt;
+
+	BUG_ON(ip_nat_pptp_hook_inbound);
+	ip_nat_pptp_hook_inbound = &pptp_inbound_pkt;
+
+	BUG_ON(ip_nat_pptp_hook_exp_gre);
+	ip_nat_pptp_hook_exp_gre = &pptp_exp_gre;
+
+	BUG_ON(ip_nat_pptp_hook_expectfn);
+	ip_nat_pptp_hook_expectfn = &pptp_nat_expected;
+
+	printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	DEBUGP("cleanup_module\n" );
+
+	ip_nat_pptp_hook_expectfn = NULL;
+	ip_nat_pptp_hook_exp_gre = NULL;
+	ip_nat_pptp_hook_inbound = NULL;
+	ip_nat_pptp_hook_outbound = NULL;
+
+	ip_nat_proto_gre_fini();
+	/* Make sure noone calls it, meanwhile */
+	synchronize_net();
+
+	printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
new file mode 100644
index 000000000000..7c1285401672
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -0,0 +1,214 @@
+/*
+ * ip_nat_proto_gre.c - Version 2.0
+ *
+ * NAT protocol helper module for GRE.
+ *
+ * GRE is a generic encapsulation protocol, which is generally not very
+ * suited for NAT, as it has no protocol-specific part as port numbers.
+ *
+ * It has an optional key field, which may help us distinguishing two 
+ * connections between the same two hosts.
+ *
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 
+ *
+ * PPTP is built on top of a modified version of GRE, and has a mandatory
+ * field called "CallID", which serves us for the same purpose as the key
+ * field in plain GRE.
+ *
+ * Documentation about PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
+
+#if 0
+#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
+				       __FUNCTION__, ## args)
+#else
+#define DEBUGP(x, args...)
+#endif
+
+/* is key in given range between min and max */
+static int
+gre_in_range(const struct ip_conntrack_tuple *tuple,
+	     enum ip_nat_manip_type maniptype,
+	     const union ip_conntrack_manip_proto *min,
+	     const union ip_conntrack_manip_proto *max)
+{
+	u_int32_t key;
+
+	if (maniptype == IP_NAT_MANIP_SRC)
+		key = tuple->src.u.gre.key;
+	else
+		key = tuple->dst.u.gre.key;
+
+	return ntohl(key) >= ntohl(min->gre.key)
+		&& ntohl(key) <= ntohl(max->gre.key);
+}
+
+/* generate unique tuple ... */
+static int 
+gre_unique_tuple(struct ip_conntrack_tuple *tuple,
+		 const struct ip_nat_range *range,
+		 enum ip_nat_manip_type maniptype,
+		 const struct ip_conntrack *conntrack)
+{
+	static u_int16_t key;
+	u_int16_t *keyptr;
+	unsigned int min, i, range_size;
+
+	if (maniptype == IP_NAT_MANIP_SRC)
+		keyptr = &tuple->src.u.gre.key;
+	else
+		keyptr = &tuple->dst.u.gre.key;
+
+	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+		DEBUGP("%p: NATing GRE PPTP\n", conntrack);
+		min = 1;
+		range_size = 0xffff;
+	} else {
+		min = ntohl(range->min.gre.key);
+		range_size = ntohl(range->max.gre.key) - min + 1;
+	}
+
+	DEBUGP("min = %u, range_size = %u\n", min, range_size); 
+
+	for (i = 0; i < range_size; i++, key++) {
+		*keyptr = htonl(min + key % range_size);
+		if (!ip_nat_used_tuple(tuple, conntrack))
+			return 1;
+	}
+
+	DEBUGP("%p: no NAT mapping\n", conntrack);
+
+	return 0;
+}
+
+/* manipulate a GRE packet according to maniptype */
+static int
+gre_manip_pkt(struct sk_buff **pskb,
+	      unsigned int iphdroff,
+	      const struct ip_conntrack_tuple *tuple,
+	      enum ip_nat_manip_type maniptype)
+{
+	struct gre_hdr *greh;
+	struct gre_hdr_pptp *pgreh;
+	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+	unsigned int hdroff = iphdroff + iph->ihl*4;
+
+	/* pgreh includes two optional 32bit fields which are not required
+	 * to be there.  That's where the magic '8' comes from */
+	if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh)-8))
+		return 0;
+
+	greh = (void *)(*pskb)->data + hdroff;
+	pgreh = (struct gre_hdr_pptp *) greh;
+
+	/* we only have destination manip of a packet, since 'source key' 
+	 * is not present in the packet itself */
+	if (maniptype == IP_NAT_MANIP_DST) {
+		/* key manipulation is always dest */
+		switch (greh->version) {
+		case 0:
+			if (!greh->key) {
+				DEBUGP("can't nat GRE w/o key\n");
+				break;
+			}
+			if (greh->csum) {
+				/* FIXME: Never tested this code... */
+				*(gre_csum(greh)) = 
+					ip_nat_cheat_check(~*(gre_key(greh)),
+							tuple->dst.u.gre.key,
+							*(gre_csum(greh)));
+			}
+			*(gre_key(greh)) = tuple->dst.u.gre.key;
+			break;
+		case GRE_VERSION_PPTP:
+			DEBUGP("call_id -> 0x%04x\n", 
+				ntohl(tuple->dst.u.gre.key));
+			pgreh->call_id = htons(ntohl(tuple->dst.u.gre.key));
+			break;
+		default:
+			DEBUGP("can't nat unknown GRE version\n");
+			return 0;
+			break;
+		}
+	}
+	return 1;
+}
+
+/* print out a nat tuple */
+static unsigned int 
+gre_print(char *buffer, 
+	  const struct ip_conntrack_tuple *match,
+	  const struct ip_conntrack_tuple *mask)
+{
+	unsigned int len = 0;
+
+	if (mask->src.u.gre.key)
+		len += sprintf(buffer + len, "srckey=0x%x ", 
+				ntohl(match->src.u.gre.key));
+
+	if (mask->dst.u.gre.key)
+		len += sprintf(buffer + len, "dstkey=0x%x ",
+				ntohl(match->src.u.gre.key));
+
+	return len;
+}
+
+/* print a range of keys */
+static unsigned int 
+gre_print_range(char *buffer, const struct ip_nat_range *range)
+{
+	if (range->min.gre.key != 0 
+	    || range->max.gre.key != 0xFFFF) {
+		if (range->min.gre.key == range->max.gre.key)
+			return sprintf(buffer, "key 0x%x ",
+					ntohl(range->min.gre.key));
+		else
+			return sprintf(buffer, "keys 0x%u-0x%u ",
+					ntohl(range->min.gre.key),
+					ntohl(range->max.gre.key));
+	} else
+		return 0;
+}
+
+/* nat helper struct */
+static struct ip_nat_protocol gre = { 
+	.name		= "GRE", 
+	.protonum	= IPPROTO_GRE,
+	.manip_pkt	= gre_manip_pkt,
+	.in_range	= gre_in_range,
+	.unique_tuple	= gre_unique_tuple,
+	.print		= gre_print,
+	.print_range	= gre_print_range,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+	.range_to_nfattr	= ip_nat_port_range_to_nfattr,
+	.nfattr_to_range	= ip_nat_port_nfattr_to_range,
+#endif
+};
+				  
+int __init ip_nat_proto_gre_init(void)
+{
+	return ip_nat_protocol_register(&gre);
+}
+
+void __exit ip_nat_proto_gre_fini(void)
+{
+	ip_nat_protocol_unregister(&gre);
+}
-- 
cgit v1.2.3


From e674d0f38de6109b59dbe30fba8b296a03229b8e Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Mon, 19 Sep 2005 15:34:40 -0700
Subject: [NETFILTER] ip6tables: remove duplicate code

Some IPv6 matches have very similar loops to find IPv6 extension header
and we can unify them. This patch introduces ipv6_find_hdr() to do it.
I just checked that it can find the target headers in the packet which has
dst,hbh,rt,frag,ah,esp headers.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv6/ip6_tables.h |  3 ++
 net/ipv6/netfilter/ip6_tables.c           | 52 ++++++++++++++++++
 net/ipv6/netfilter/ip6t_ah.c              | 81 ++--------------------------
 net/ipv6/netfilter/ip6t_dst.c             | 88 +++---------------------------
 net/ipv6/netfilter/ip6t_esp.c             | 73 ++-----------------------
 net/ipv6/netfilter/ip6t_frag.c            | 90 ++++---------------------------
 net/ipv6/netfilter/ip6t_hbh.c             | 88 +++---------------------------
 net/ipv6/netfilter/ip6t_rt.c              | 83 +++-------------------------
 8 files changed, 94 insertions(+), 464 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 58c72a52dc65..59f70b34e029 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -455,6 +455,9 @@ extern unsigned int ip6t_do_table(struct sk_buff **pskb,
 
 /* Check for an extension */
 extern int ip6t_ext_hdr(u8 nexthdr);
+/* find specified header and get offset to it */
+extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
+			 u8 target);
 
 #define IP6T_ALIGN(s) (((s) + (__alignof__(struct ip6t_entry)-1)) & ~(__alignof__(struct ip6t_entry)-1))
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 1cb8adb2787f..2da514b16d95 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1955,6 +1955,57 @@ static void __exit fini(void)
 #endif
 }
 
+/*
+ * find specified header up to transport protocol header.
+ * If found target header, the offset to the header is set to *offset
+ * and return 0. otherwise, return -1.
+ *
+ * Notes: - non-1st Fragment Header isn't skipped.
+ *	  - ESP header isn't skipped.
+ *	  - The target header may be trancated.
+ */
+int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target)
+{
+	unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data;
+	u8 nexthdr = skb->nh.ipv6h->nexthdr;
+	unsigned int len = skb->len - start;
+
+	while (nexthdr != target) {
+		struct ipv6_opt_hdr _hdr, *hp;
+		unsigned int hdrlen;
+
+		if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE)
+			return -1;
+		hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
+		if (hp == NULL)
+			return -1;
+		if (nexthdr == NEXTHDR_FRAGMENT) {
+			unsigned short _frag_off, *fp;
+			fp = skb_header_pointer(skb,
+						start+offsetof(struct frag_hdr,
+							       frag_off),
+						sizeof(_frag_off),
+						&_frag_off);
+			if (fp == NULL)
+				return -1;
+
+			if (ntohs(*fp) & ~0x7)
+				return -1;
+			hdrlen = 8;
+		} else if (nexthdr == NEXTHDR_AUTH)
+			hdrlen = (hp->hdrlen + 2) << 2; 
+		else
+			hdrlen = ipv6_optlen(hp); 
+
+		nexthdr = hp->nexthdr;
+		len -= hdrlen;
+		start += hdrlen;
+	}
+
+	*offset = start;
+	return 0;
+}
+
 EXPORT_SYMBOL(ip6t_register_table);
 EXPORT_SYMBOL(ip6t_unregister_table);
 EXPORT_SYMBOL(ip6t_do_table);
@@ -1963,6 +2014,7 @@ EXPORT_SYMBOL(ip6t_unregister_match);
 EXPORT_SYMBOL(ip6t_register_target);
 EXPORT_SYMBOL(ip6t_unregister_target);
 EXPORT_SYMBOL(ip6t_ext_hdr);
+EXPORT_SYMBOL(ipv6_find_hdr);
 
 module_init(init);
 module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index d5b94f142bba..dde37793d20b 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -48,92 +48,21 @@ match(const struct sk_buff *skb,
       unsigned int protoff,
       int *hotdrop)
 {
-	struct ip_auth_hdr *ah = NULL, _ah;
+	struct ip_auth_hdr *ah, _ah;
 	const struct ip6t_ah *ahinfo = matchinfo;
-	unsigned int temp;
-	int len;
-	u8 nexthdr;
 	unsigned int ptr;
 	unsigned int hdrlen = 0;
 
-	/*DEBUGP("IPv6 AH entered\n");*/
-	/* if (opt->auth == 0) return 0;
-	* It does not filled on output */
-
-	/* type of the 1st exthdr */
-	nexthdr = skb->nh.ipv6h->nexthdr;
-	/* pointer to the 1st exthdr */
-	ptr = sizeof(struct ipv6hdr);
-	/* available length */
-	len = skb->len - ptr;
-	temp = 0;
-
-	while (ip6t_ext_hdr(nexthdr)) {
-		struct ipv6_opt_hdr _hdr, *hp;
-
-		DEBUGP("ipv6_ah header iteration \n");
-
-		/* Is there enough space for the next ext header? */
-		if (len < sizeof(struct ipv6_opt_hdr))
-			return 0;
-		/* No more exthdr -> evaluate */
-		if (nexthdr == NEXTHDR_NONE)
-			break;
-		/* ESP -> evaluate */
-		if (nexthdr == NEXTHDR_ESP)
-			break;
-
-		hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-		BUG_ON(hp == NULL);
-
-		/* Calculate the header length */
-		if (nexthdr == NEXTHDR_FRAGMENT)
-			hdrlen = 8;
-		else if (nexthdr == NEXTHDR_AUTH)
-			hdrlen = (hp->hdrlen+2)<<2;
-		else
-			hdrlen = ipv6_optlen(hp);
-
-		/* AH -> evaluate */
-		if (nexthdr == NEXTHDR_AUTH) {
-			temp |= MASK_AH;
-			break;
-		}
-
-		
-		/* set the flag */
-		switch (nexthdr) {
-		case NEXTHDR_HOP:
-		case NEXTHDR_ROUTING:
-		case NEXTHDR_FRAGMENT:
-		case NEXTHDR_AUTH:
-		case NEXTHDR_DEST:
-			break;
-		default:
-			DEBUGP("ipv6_ah match: unknown nextheader %u\n",nexthdr);
-			return 0;
-		}
-
-		nexthdr = hp->nexthdr;
-		len -= hdrlen;
-		ptr += hdrlen;
-		if (ptr > skb->len) {
-			DEBUGP("ipv6_ah: new pointer too large! \n");
-			break;
-		}
-	}
-
-	/* AH header not found */
-	if (temp != MASK_AH)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH) < 0)
 		return 0;
 
-	if (len < sizeof(struct ip_auth_hdr)){
+	ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
+	if (ah == NULL) {
 		*hotdrop = 1;
 		return 0;
 	}
 
-	ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
-	BUG_ON(ah == NULL);
+	hdrlen = (ah->hdrlen + 2) << 2;
 
 	DEBUGP("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen);
 	DEBUGP("RES %04X ", ah->reserved);
diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c
index 540925e4a7a8..c450a635e54b 100644
--- a/net/ipv6/netfilter/ip6t_dst.c
+++ b/net/ipv6/netfilter/ip6t_dst.c
@@ -63,8 +63,6 @@ match(const struct sk_buff *skb,
        struct ipv6_opt_hdr _optsh, *oh;
        const struct ip6t_opts *optinfo = matchinfo;
        unsigned int temp;
-       unsigned int len;
-       u8 nexthdr;
        unsigned int ptr;
        unsigned int hdrlen = 0;
        unsigned int ret = 0;
@@ -72,97 +70,25 @@ match(const struct sk_buff *skb,
        u8 _optlen, *lp = NULL;
        unsigned int optlen;
        
-       /* type of the 1st exthdr */
-       nexthdr = skb->nh.ipv6h->nexthdr;
-       /* pointer to the 1st exthdr */
-       ptr = sizeof(struct ipv6hdr);
-       /* available length */
-       len = skb->len - ptr;
-       temp = 0;
-
-        while (ip6t_ext_hdr(nexthdr)) {
-               struct ipv6_opt_hdr _hdr, *hp;
-
-              DEBUGP("ipv6_opts header iteration \n");
-
-              /* Is there enough space for the next ext header? */
-                if (len < (int)sizeof(struct ipv6_opt_hdr))
-                        return 0;
-              /* No more exthdr -> evaluate */
-                if (nexthdr == NEXTHDR_NONE) {
-                     break;
-              }
-              /* ESP -> evaluate */
-                if (nexthdr == NEXTHDR_ESP) {
-                     break;
-              }
-
-	      hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-	      BUG_ON(hp == NULL);
-
-              /* Calculate the header length */
-                if (nexthdr == NEXTHDR_FRAGMENT) {
-                        hdrlen = 8;
-                } else if (nexthdr == NEXTHDR_AUTH)
-                        hdrlen = (hp->hdrlen+2)<<2;
-                else
-                        hdrlen = ipv6_optlen(hp);
-
-              /* OPTS -> evaluate */
 #if HOPBYHOP
-                if (nexthdr == NEXTHDR_HOP) {
-                     temp |= MASK_HOPOPTS;
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0)
 #else
-                if (nexthdr == NEXTHDR_DEST) {
-                     temp |= MASK_DSTOPTS;
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0)
 #endif
-                     break;
-              }
-
+		return 0;
 
-              /* set the flag */
-              switch (nexthdr){
-                     case NEXTHDR_HOP:
-                     case NEXTHDR_ROUTING:
-                     case NEXTHDR_FRAGMENT:
-                     case NEXTHDR_AUTH:
-                     case NEXTHDR_DEST:
-                            break;
-                     default:
-                            DEBUGP("ipv6_opts match: unknown nextheader %u\n",nexthdr);
-                            return 0;
-                            break;
-              }
-
-                nexthdr = hp->nexthdr;
-                len -= hdrlen;
-                ptr += hdrlen;
-		if ( ptr > skb->len ) {
-			DEBUGP("ipv6_opts: new pointer is too large! \n");
-			break;
-		}
-        }
-
-       /* OPTIONS header not found */
-#if HOPBYHOP
-       if ( temp != MASK_HOPOPTS ) return 0;
-#else
-       if ( temp != MASK_DSTOPTS ) return 0;
-#endif
-
-       if (len < (int)sizeof(struct ipv6_opt_hdr)){
+       oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
+       if (oh == NULL){
 	       *hotdrop = 1;
        		return 0;
        }
 
-       if (len < hdrlen){
+       hdrlen = ipv6_optlen(oh);
+       if (skb->len - ptr < hdrlen){
 	       /* Packet smaller than it's length field */
        		return 0;
        }
 
-       oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
-       BUG_ON(oh == NULL);
-
        DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
 
        DEBUGP("len %02X %04X %02X ",
diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c
index e39dd236fd8e..24bc0cde43a1 100644
--- a/net/ipv6/netfilter/ip6t_esp.c
+++ b/net/ipv6/netfilter/ip6t_esp.c
@@ -48,87 +48,22 @@ match(const struct sk_buff *skb,
       unsigned int protoff,
       int *hotdrop)
 {
-	struct ip_esp_hdr _esp, *eh = NULL;
+	struct ip_esp_hdr _esp, *eh;
 	const struct ip6t_esp *espinfo = matchinfo;
-	unsigned int temp;
-	int len;
-	u8 nexthdr;
 	unsigned int ptr;
 
 	/* Make sure this isn't an evil packet */
 	/*DEBUGP("ipv6_esp entered \n");*/
 
-	/* type of the 1st exthdr */
-	nexthdr = skb->nh.ipv6h->nexthdr;
-	/* pointer to the 1st exthdr */
-	ptr = sizeof(struct ipv6hdr);
-	/* available length */
-	len = skb->len - ptr;
-	temp = 0;
-
-	while (ip6t_ext_hdr(nexthdr)) {
-		struct ipv6_opt_hdr _hdr, *hp;
-		int hdrlen;
-
-		DEBUGP("ipv6_esp header iteration \n");
-
-		/* Is there enough space for the next ext header? */
-		if (len < sizeof(struct ipv6_opt_hdr))
-			return 0;
-		/* No more exthdr -> evaluate */
-		if (nexthdr == NEXTHDR_NONE)
-			break;
-		/* ESP -> evaluate */
-		if (nexthdr == NEXTHDR_ESP) {
-			temp |= MASK_ESP;
-			break;
-		}
-
-		hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-		BUG_ON(hp == NULL);
-
-		/* Calculate the header length */
-		if (nexthdr == NEXTHDR_FRAGMENT)
-			hdrlen = 8;
-		else if (nexthdr == NEXTHDR_AUTH)
-			hdrlen = (hp->hdrlen+2)<<2;
-		else
-			hdrlen = ipv6_optlen(hp);
-
-		/* set the flag */
-		switch (nexthdr) {
-		case NEXTHDR_HOP:
-		case NEXTHDR_ROUTING:
-		case NEXTHDR_FRAGMENT:
-		case NEXTHDR_AUTH:
-		case NEXTHDR_DEST:
-			break;
-		default:
-			DEBUGP("ipv6_esp match: unknown nextheader %u\n",nexthdr);
-			return 0;
-		}
-
-		nexthdr = hp->nexthdr;
-		len -= hdrlen;
-		ptr += hdrlen;
-		if (ptr > skb->len) {
-			DEBUGP("ipv6_esp: new pointer too large! \n");
-			break;
-		}
-	}
-
-	/* ESP header not found */
-	if (temp != MASK_ESP)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP) < 0)
 		return 0;
 
-	if (len < sizeof(struct ip_esp_hdr)) {
+	eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp);
+	if (eh == NULL) {
 		*hotdrop = 1;
 		return 0;
 	}
 
-	eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp);
-	BUG_ON(eh == NULL);
-
 	DEBUGP("IPv6 ESP SPI %u %08X\n", ntohl(eh->spi), ntohl(eh->spi));
 
 	return (eh != NULL)
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 4bfa30a9bc80..085d5f8eea29 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -48,90 +48,18 @@ match(const struct sk_buff *skb,
       unsigned int protoff,
       int *hotdrop)
 {
-       struct frag_hdr _frag, *fh = NULL;
+       struct frag_hdr _frag, *fh;
        const struct ip6t_frag *fraginfo = matchinfo;
-       unsigned int temp;
-       int len;
-       u8 nexthdr;
        unsigned int ptr;
-       unsigned int hdrlen = 0;
-
-       /* type of the 1st exthdr */
-       nexthdr = skb->nh.ipv6h->nexthdr;
-       /* pointer to the 1st exthdr */
-       ptr = sizeof(struct ipv6hdr);
-       /* available length */
-       len = skb->len - ptr;
-       temp = 0;
-
-        while (ip6t_ext_hdr(nexthdr)) {
-               struct ipv6_opt_hdr _hdr, *hp;
-
-              DEBUGP("ipv6_frag header iteration \n");
-
-              /* Is there enough space for the next ext header? */
-                if (len < (int)sizeof(struct ipv6_opt_hdr))
-                        return 0;
-              /* No more exthdr -> evaluate */
-                if (nexthdr == NEXTHDR_NONE) {
-                     break;
-              }
-              /* ESP -> evaluate */
-                if (nexthdr == NEXTHDR_ESP) {
-                     break;
-              }
-
-	      hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-	      BUG_ON(hp == NULL);
-
-              /* Calculate the header length */
-                if (nexthdr == NEXTHDR_FRAGMENT) {
-                        hdrlen = 8;
-                } else if (nexthdr == NEXTHDR_AUTH)
-                        hdrlen = (hp->hdrlen+2)<<2;
-                else
-                        hdrlen = ipv6_optlen(hp);
-
-              /* FRAG -> evaluate */
-                if (nexthdr == NEXTHDR_FRAGMENT) {
-                     temp |= MASK_FRAGMENT;
-                     break;
-              }
-
-
-              /* set the flag */
-              switch (nexthdr){
-                     case NEXTHDR_HOP:
-                     case NEXTHDR_ROUTING:
-                     case NEXTHDR_FRAGMENT:
-                     case NEXTHDR_AUTH:
-                     case NEXTHDR_DEST:
-                            break;
-                     default:
-                            DEBUGP("ipv6_frag match: unknown nextheader %u\n",nexthdr);
-                            return 0;
-                            break;
-              }
-
-                nexthdr = hp->nexthdr;
-                len -= hdrlen;
-                ptr += hdrlen;
-		if ( ptr > skb->len ) {
-			DEBUGP("ipv6_frag: new pointer too large! \n");
-			break;
-		}
-        }
-
-       /* FRAG header not found */
-       if ( temp != MASK_FRAGMENT ) return 0;
-
-       if (len < sizeof(struct frag_hdr)){
-	       *hotdrop = 1;
-       		return 0;
-       }
 
-       fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
-       BUG_ON(fh == NULL);
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT) < 0)
+		return 0;
+
+	fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
+	if (fh == NULL){
+		*hotdrop = 1;
+		return 0;
+	}
 
        DEBUGP("INFO %04X ", fh->frag_off);
        DEBUGP("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7);
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 27f3650d127e..1d09485111d0 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -63,8 +63,6 @@ match(const struct sk_buff *skb,
        struct ipv6_opt_hdr _optsh, *oh;
        const struct ip6t_opts *optinfo = matchinfo;
        unsigned int temp;
-       unsigned int len;
-       u8 nexthdr;
        unsigned int ptr;
        unsigned int hdrlen = 0;
        unsigned int ret = 0;
@@ -72,97 +70,25 @@ match(const struct sk_buff *skb,
        u8 _optlen, *lp = NULL;
        unsigned int optlen;
        
-       /* type of the 1st exthdr */
-       nexthdr = skb->nh.ipv6h->nexthdr;
-       /* pointer to the 1st exthdr */
-       ptr = sizeof(struct ipv6hdr);
-       /* available length */
-       len = skb->len - ptr;
-       temp = 0;
-
-        while (ip6t_ext_hdr(nexthdr)) {
-               struct ipv6_opt_hdr _hdr, *hp;
-
-              DEBUGP("ipv6_opts header iteration \n");
-
-              /* Is there enough space for the next ext header? */
-                if (len < (int)sizeof(struct ipv6_opt_hdr))
-                        return 0;
-              /* No more exthdr -> evaluate */
-                if (nexthdr == NEXTHDR_NONE) {
-                     break;
-              }
-              /* ESP -> evaluate */
-                if (nexthdr == NEXTHDR_ESP) {
-                     break;
-              }
-
-	      hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-	      BUG_ON(hp == NULL);
-
-              /* Calculate the header length */
-                if (nexthdr == NEXTHDR_FRAGMENT) {
-                        hdrlen = 8;
-                } else if (nexthdr == NEXTHDR_AUTH)
-                        hdrlen = (hp->hdrlen+2)<<2;
-                else
-                        hdrlen = ipv6_optlen(hp);
-
-              /* OPTS -> evaluate */
 #if HOPBYHOP
-                if (nexthdr == NEXTHDR_HOP) {
-                     temp |= MASK_HOPOPTS;
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0)
 #else
-                if (nexthdr == NEXTHDR_DEST) {
-                     temp |= MASK_DSTOPTS;
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0)
 #endif
-                     break;
-              }
-
+		return 0;
 
-              /* set the flag */
-              switch (nexthdr){
-                     case NEXTHDR_HOP:
-                     case NEXTHDR_ROUTING:
-                     case NEXTHDR_FRAGMENT:
-                     case NEXTHDR_AUTH:
-                     case NEXTHDR_DEST:
-                            break;
-                     default:
-                            DEBUGP("ipv6_opts match: unknown nextheader %u\n",nexthdr);
-                            return 0;
-                            break;
-              }
-
-                nexthdr = hp->nexthdr;
-                len -= hdrlen;
-                ptr += hdrlen;
-		if ( ptr > skb->len ) {
-			DEBUGP("ipv6_opts: new pointer is too large! \n");
-			break;
-		}
-        }
-
-       /* OPTIONS header not found */
-#if HOPBYHOP
-       if ( temp != MASK_HOPOPTS ) return 0;
-#else
-       if ( temp != MASK_DSTOPTS ) return 0;
-#endif
-
-       if (len < (int)sizeof(struct ipv6_opt_hdr)){
+       oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
+       if (oh == NULL){
 	       *hotdrop = 1;
        		return 0;
        }
 
-       if (len < hdrlen){
+       hdrlen = ipv6_optlen(oh);
+       if (skb->len - ptr < hdrlen){
 	       /* Packet smaller than it's length field */
        		return 0;
        }
 
-       oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
-       BUG_ON(oh == NULL);
-
        DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
 
        DEBUGP("len %02X %04X %02X ",
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 2bb670037df3..beb2fd5cebbb 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -50,98 +50,29 @@ match(const struct sk_buff *skb,
       unsigned int protoff,
       int *hotdrop)
 {
-       struct ipv6_rt_hdr _route, *rh = NULL;
+       struct ipv6_rt_hdr _route, *rh;
        const struct ip6t_rt *rtinfo = matchinfo;
        unsigned int temp;
-       unsigned int len;
-       u8 nexthdr;
        unsigned int ptr;
        unsigned int hdrlen = 0;
        unsigned int ret = 0;
        struct in6_addr *ap, _addr;
 
-       /* type of the 1st exthdr */
-       nexthdr = skb->nh.ipv6h->nexthdr;
-       /* pointer to the 1st exthdr */
-       ptr = sizeof(struct ipv6hdr);
-       /* available length */
-       len = skb->len - ptr;
-       temp = 0;
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING) < 0)
+		return 0;
 
-        while (ip6t_ext_hdr(nexthdr)) {
-               struct ipv6_opt_hdr _hdr, *hp;
-
-              DEBUGP("ipv6_rt header iteration \n");
-
-              /* Is there enough space for the next ext header? */
-                if (len < (int)sizeof(struct ipv6_opt_hdr))
-                        return 0;
-              /* No more exthdr -> evaluate */
-                if (nexthdr == NEXTHDR_NONE) {
-                     break;
-              }
-              /* ESP -> evaluate */
-                if (nexthdr == NEXTHDR_ESP) {
-                     break;
-              }
-
-	      hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-	      BUG_ON(hp == NULL);
-
-              /* Calculate the header length */
-                if (nexthdr == NEXTHDR_FRAGMENT) {
-                        hdrlen = 8;
-                } else if (nexthdr == NEXTHDR_AUTH)
-                        hdrlen = (hp->hdrlen+2)<<2;
-                else
-                        hdrlen = ipv6_optlen(hp);
-
-              /* ROUTING -> evaluate */
-                if (nexthdr == NEXTHDR_ROUTING) {
-                     temp |= MASK_ROUTING;
-                     break;
-              }
-
-
-              /* set the flag */
-              switch (nexthdr){
-                     case NEXTHDR_HOP:
-                     case NEXTHDR_ROUTING:
-                     case NEXTHDR_FRAGMENT:
-                     case NEXTHDR_AUTH:
-                     case NEXTHDR_DEST:
-                            break;
-                     default:
-                            DEBUGP("ipv6_rt match: unknown nextheader %u\n",nexthdr);
-                            return 0;
-                            break;
-              }
-
-                nexthdr = hp->nexthdr;
-                len -= hdrlen;
-                ptr += hdrlen;
-		if ( ptr > skb->len ) {
-			DEBUGP("ipv6_rt: new pointer is too large! \n");
-			break;
-		}
-        }
-
-       /* ROUTING header not found */
-       if ( temp != MASK_ROUTING ) return 0;
-
-       if (len < (int)sizeof(struct ipv6_rt_hdr)){
+       rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
+       if (rh == NULL){
 	       *hotdrop = 1;
        		return 0;
        }
 
-       if (len < hdrlen){
+       hdrlen = ipv6_optlen(rh);
+       if (skb->len - ptr < hdrlen){
 	       /* Pcket smaller than its length field */
        		return 0;
        }
 
-       rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
-       BUG_ON(rh == NULL);
-
        DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen);
        DEBUGP("TYPE %04X ", rh->type);
        DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left);
-- 
cgit v1.2.3


From a41bc00234a0a2ccaa99a194341ae108ae17ddc8 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 19 Sep 2005 15:35:31 -0700
Subject: [NETFILTER]: Rename misnamed function

Both __ip_conntrack_expect_find and ip_conntrack_expect_find_get take
a reference to the expectation, the difference is that callers of
__ip_conntrack_expect_find must hold ip_conntrack_lock.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ip_conntrack.h  | 2 +-
 net/ipv4/netfilter/ip_conntrack_core.c       | 2 +-
 net/ipv4/netfilter/ip_conntrack_netlink.c    | 4 ++--
 net/ipv4/netfilter/ip_conntrack_standalone.c | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h
index 2df446c952ef..bace72a76cc4 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack.h
@@ -384,7 +384,7 @@ extern struct ip_conntrack_expect *
 __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple);
 
 extern struct ip_conntrack_expect *
-ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple);
+ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple);
 
 extern struct ip_conntrack_tuple_hash *
 __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index f8cd8e42961e..c1f82e0c81cf 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -233,7 +233,7 @@ __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
 
 /* Just find a expectation corresponding to a tuple. */
 struct ip_conntrack_expect *
-ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
+ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
 {
 	struct ip_conntrack_expect *i;
 	
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 15aef3564742..b08a432efcf8 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -1270,7 +1270,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	exp = ip_conntrack_expect_find_get(&tuple);
+	exp = ip_conntrack_expect_find(&tuple);
 	if (!exp)
 		return -ENOENT;
 
@@ -1318,7 +1318,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 			return err;
 
 		/* bump usage count to 2 */
-		exp = ip_conntrack_expect_find_get(&tuple);
+		exp = ip_conntrack_expect_find(&tuple);
 		if (!exp)
 			return -ENOENT;
 
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index ae3e3e655db5..d3c7808010ec 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -993,11 +993,11 @@ EXPORT_SYMBOL(ip_ct_refresh_acct);
 
 EXPORT_SYMBOL(ip_conntrack_expect_alloc);
 EXPORT_SYMBOL(ip_conntrack_expect_put);
-EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get);
+EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find);
+EXPORT_SYMBOL_GPL(ip_conntrack_expect_find);
 EXPORT_SYMBOL(ip_conntrack_expect_related);
 EXPORT_SYMBOL(ip_conntrack_unexpect_related);
 EXPORT_SYMBOL_GPL(ip_conntrack_expect_list);
-EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find);
 EXPORT_SYMBOL_GPL(ip_ct_unlink_expect);
 
 EXPORT_SYMBOL(ip_conntrack_tuple_taken);
-- 
cgit v1.2.3


From 3c3f8f25c177e4f9e4e00bcc1b90b28b1be37937 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 19 Sep 2005 15:41:28 -0700
Subject: [8021Q]: Add endian annotations.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 8 ++++----
 net/8021q/vlan_dev.c    | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 17d0c0d40b0e..eef0876d8307 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -42,8 +42,8 @@ struct hlist_node;
 struct vlan_ethhdr {
    unsigned char	h_dest[ETH_ALEN];	   /* destination eth addr	*/
    unsigned char	h_source[ETH_ALEN];	   /* source ether addr	*/
-   unsigned short       h_vlan_proto;              /* Should always be 0x8100 */
-   unsigned short       h_vlan_TCI;                /* Encapsulates priority and VLAN ID */
+   __be16               h_vlan_proto;              /* Should always be 0x8100 */
+   __be16               h_vlan_TCI;                /* Encapsulates priority and VLAN ID */
    unsigned short	h_vlan_encapsulated_proto; /* packet type ID field (or len) */
 };
 
@@ -55,8 +55,8 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
 }
 
 struct vlan_hdr {
-   unsigned short       h_vlan_TCI;                /* Encapsulates priority and VLAN ID */
-   unsigned short       h_vlan_encapsulated_proto; /* packet type ID field (or len) */
+   __be16               h_vlan_TCI;                /* Encapsulates priority and VLAN ID */
+   __be16               h_vlan_encapsulated_proto; /* packet type ID field (or len) */
 };
 
 #define VLAN_VID_MASK	0xfff
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 145f5cde96cf..b74864889670 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -120,7 +120,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	unsigned short vid;
 	struct net_device_stats *stats;
 	unsigned short vlan_TCI;
-	unsigned short proto;
+	__be16 proto;
 
 	/* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */
 	vlan_TCI = ntohs(vhdr->h_vlan_TCI);
-- 
cgit v1.2.3


From e0487992ce1dd7ae7da9c6aabdb19570bb95432b Mon Sep 17 00:00:00 2001
From: "Ed L. Cashin" <ecashin@coraid.com>
Date: Mon, 19 Sep 2005 19:57:36 -0700
Subject: [BYTEORDER]: Document alignment and byteorder macros

This patch comments the fact that although passing le64_to_cpup et
al. is within the intended use of the byteorder macros, using
get_unaligned is the recommended way to go.

Signed-off-by: Ed L. Cashin <ecashin@coraid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/byteorder/generic.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h
index 5fde6f4d6c1e..04bd756efc67 100644
--- a/include/linux/byteorder/generic.h
+++ b/include/linux/byteorder/generic.h
@@ -5,6 +5,10 @@
  * linux/byteorder_generic.h
  * Generic Byte-reordering support
  *
+ * The "... p" macros, like le64_to_cpup, can be used with pointers
+ * to unaligned data, but there will be a performance penalty on 
+ * some architectures.  Use get_unaligned for unaligned data.
+ *
  * Francois-Rene Rideau <fare@tunes.org> 19970707
  *    gathered all the good ideas from all asm-foo/byteorder.h into one file,
  *    cleaned them up.
-- 
cgit v1.2.3


From 6a9b490d5fd7f23c5bcd75f970e01633ad3136e3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Mon, 19 Sep 2005 20:11:57 -0700
Subject: [SPARC64]: Move DCACHE_ALIASING_POSSIBLE define to asm/page.h

This showed that arch/sparc64/kernel/ptrace.c was not getting
the define properly, and thus the code protected by this ifdef
was never actually compiled before.  So fix that too.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/ptrace.c     | 7 ++++---
 include/asm-sparc64/cacheflush.h | 7 -------
 include/asm-sparc64/ide.h        | 1 +
 include/asm-sparc64/page.h       | 7 +++++++
 include/asm-sparc64/pgalloc.h    | 1 +
 5 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/sparc64/kernel/ptrace.c b/arch/sparc64/kernel/ptrace.c
index 23ad839d113f..5efbff90d668 100644
--- a/arch/sparc64/kernel/ptrace.c
+++ b/arch/sparc64/kernel/ptrace.c
@@ -30,6 +30,7 @@
 #include <asm/psrcompat.h>
 #include <asm/visasm.h>
 #include <asm/spitfire.h>
+#include <asm/page.h>
 
 /* Returning from ptrace is a bit tricky because the syscall return
  * low level code assumes any value returned which is negative and
@@ -128,20 +129,20 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
 	 * is mapped to in the user's address space, we can skip the
 	 * D-cache flush.
 	 */
-	if ((uaddr ^ kaddr) & (1UL << 13)) {
+	if ((uaddr ^ (unsigned long) kaddr) & (1UL << 13)) {
 		unsigned long start = __pa(kaddr);
 		unsigned long end = start + len;
 
 		if (tlb_type == spitfire) {
 			for (; start < end; start += 32)
-				spitfire_put_dcache_tag(va & 0x3fe0, 0x0);
+				spitfire_put_dcache_tag(start & 0x3fe0, 0x0);
 		} else {
 			for (; start < end; start += 32)
 				__asm__ __volatile__(
 					"stxa %%g0, [%0] %1\n\t"
 					"membar #Sync"
 					: /* no outputs */
-					: "r" (va),
+					: "r" (start),
 					"i" (ASI_DCACHE_INVALIDATE));
 		}
 	}
diff --git a/include/asm-sparc64/cacheflush.h b/include/asm-sparc64/cacheflush.h
index 51b26e81d828..ededd2659eab 100644
--- a/include/asm-sparc64/cacheflush.h
+++ b/include/asm-sparc64/cacheflush.h
@@ -4,13 +4,6 @@
 #include <linux/config.h>
 #include <asm/page.h>
 
-/* Flushing for D-cache alias handling is only needed if
- * the page size is smaller than 16K.
- */
-#if PAGE_SHIFT < 14
-#define DCACHE_ALIASING_POSSIBLE
-#endif
-
 #ifndef __ASSEMBLY__
 
 #include <linux/mm.h>
diff --git a/include/asm-sparc64/ide.h b/include/asm-sparc64/ide.h
index 4c1098474c73..c393f815b0be 100644
--- a/include/asm-sparc64/ide.h
+++ b/include/asm-sparc64/ide.h
@@ -15,6 +15,7 @@
 #include <asm/io.h>
 #include <asm/spitfire.h>
 #include <asm/cacheflush.h>
+#include <asm/page.h>
 
 #ifndef MAX_HWIFS
 # ifdef CONFIG_BLK_DEV_IDEPCI
diff --git a/include/asm-sparc64/page.h b/include/asm-sparc64/page.h
index c9f8ef208ea5..7f8d764abc47 100644
--- a/include/asm-sparc64/page.h
+++ b/include/asm-sparc64/page.h
@@ -21,6 +21,13 @@
 #define PAGE_SIZE    (_AC(1,UL) << PAGE_SHIFT)
 #define PAGE_MASK    (~(PAGE_SIZE-1))
 
+/* Flushing for D-cache alias handling is only needed if
+ * the page size is smaller than 16K.
+ */
+#if PAGE_SHIFT < 14
+#define DCACHE_ALIASING_POSSIBLE
+#endif
+
 #ifdef __KERNEL__
 
 #ifndef __ASSEMBLY__
diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h
index b9b1914aae63..a96067cca963 100644
--- a/include/asm-sparc64/pgalloc.h
+++ b/include/asm-sparc64/pgalloc.h
@@ -10,6 +10,7 @@
 #include <asm/spitfire.h>
 #include <asm/cpudata.h>
 #include <asm/cacheflush.h>
+#include <asm/page.h>
 
 /* Page table allocation/freeing. */
 #ifdef CONFIG_SMP
-- 
cgit v1.2.3


From 9506057fca54464f3291b62156e6cd907c4cbc95 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Tue, 20 Sep 2005 16:20:49 +0100
Subject: [ARM] 2924/3: taglist - postfix section with .init for `make
 buildcheck`

Patch from Ben Dooks

The `make buildcheck` is erroneously reporting that the taglist
is referencing items in the .init section as it is not itself
postfixed with .init

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/vmlinux.lds.S | 2 +-
 include/asm-arm/setup.h       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index ad2d66c93a5c..350b53b41e5b 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -29,7 +29,7 @@ SECTIONS
 			*(.arch.info)
 		__arch_info_end = .;
 		__tagtable_begin = .;
-			*(.taglist)
+			*(.taglist.init)
 		__tagtable_end = .;
 		. = ALIGN(16);
 		__setup_start = .;
diff --git a/include/asm-arm/setup.h b/include/asm-arm/setup.h
index adcbd79762bf..a4b3ebf9bdd1 100644
--- a/include/asm-arm/setup.h
+++ b/include/asm-arm/setup.h
@@ -171,7 +171,7 @@ struct tagtable {
 	int (*parse)(const struct tag *);
 };
 
-#define __tag __attribute_used__ __attribute__((__section__(".taglist")))
+#define __tag __attribute_used__ __attribute__((__section__(".taglist.init")))
 #define __tagtable(tag, fn) \
 static struct tagtable __tagtable_##fn __tag = { tag, fn }
 
-- 
cgit v1.2.3


From bfe6815e0465035d013b2b676444376fe2b3716e Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Tue, 20 Sep 2005 16:25:12 +0100
Subject: [ARM] 2925/3: earlyparam - postfix section with .init for `make
 buildcheck`

Patch from Ben Dooks

The `make buildcheck` is erroneously reporting that the earlyparam
list is referencing items in the .init section as it is not itself
postfixed with .init
Also, as per rmk's suggestion, rename the __early_param to
.early_param to bring it into line with everything else

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/vmlinux.lds.S | 2 +-
 include/asm-arm/setup.h       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 350b53b41e5b..47423aaf9d77 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -36,7 +36,7 @@ SECTIONS
 			*(.init.setup)
 		__setup_end = .;
 		__early_begin = .;
-			*(__early_param)
+			*(.early_param.init)
 		__early_end = .;
 		__initcall_start = .;
 			*(.initcall1.init)
diff --git a/include/asm-arm/setup.h b/include/asm-arm/setup.h
index a4b3ebf9bdd1..ea3ed2465233 100644
--- a/include/asm-arm/setup.h
+++ b/include/asm-arm/setup.h
@@ -213,6 +213,6 @@ struct early_params {
 
 #define __early_param(name,fn)					\
 static struct early_params __early_##fn __attribute_used__	\
-__attribute__((__section__("__early_param"))) = { name, fn }
+__attribute__((__section__(".early_param.init"))) = { name, fn }
 
 #endif
-- 
cgit v1.2.3


From 9d0fd1eb8a3c19f3ede5418540b3c9f64fac4b86 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Tue, 20 Sep 2005 16:45:20 +0100
Subject: [ARM] 2927/1: .arch.info - postfix section with .init for `make
 buildcheck`

Patch from Ben Dooks

The `make buildcheck` is erroneously reporting that the .arch.info
list is referencing items in the .init section as it is not itself
postfixed with .init

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/vmlinux.lds.S | 2 +-
 include/asm-arm/mach/arch.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 420b8d09a383..08e58ecd44be 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -26,7 +26,7 @@ SECTIONS
 			*(.proc.info.init)
 		__proc_info_end = .;
 		__arch_info_begin = .;
-			*(.arch.info)
+			*(.arch.info.init)
 		__arch_info_end = .;
 		__tagtable_begin = .;
 			*(.taglist.init)
diff --git a/include/asm-arm/mach/arch.h b/include/asm-arm/mach/arch.h
index 56c6bf4ab0c3..4fa95084a8c0 100644
--- a/include/asm-arm/mach/arch.h
+++ b/include/asm-arm/mach/arch.h
@@ -50,7 +50,7 @@ struct machine_desc {
  */
 #define MACHINE_START(_type,_name)		\
 const struct machine_desc __mach_desc_##_type	\
- __attribute__((__section__(".arch.info"))) = {	\
+ __attribute__((__section__(".arch.info.init"))) = {	\
 	.nr		= MACH_TYPE_##_type,	\
 	.name		= _name,
 
-- 
cgit v1.2.3


From 729b4f7de68191478b20fab19a6d0c6b8c4380c9 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Tue, 20 Sep 2005 12:18:38 -0700
Subject: [SPARC64]: Verify vmalloc TLB misses more strictly.

Arrange the modules, OBP, and vmalloc areas such that a range
verification can be done quite minimally.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/entry.S   | 39 +++++++++++++++++++--------------------
 include/asm-sparc64/pgtable.h | 20 +++++++++++---------
 2 files changed, 30 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S
index 3e0badb820c5..b48349527853 100644
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -42,19 +42,15 @@
  * executing (see inherit_locked_prom_mappings() rant).
  */
 sparc64_vpte_nucleus:
-	/* Load 0xf0000000, which is LOW_OBP_ADDRESS.  */
-	mov		0xf, %g5
-	sllx		%g5, 28, %g5
-
-	/* Is addr >= LOW_OBP_ADDRESS?  */
+	/* Note that kvmap below has verified that the address is
+	 * in the range MODULES_VADDR --> VMALLOC_END already.  So
+	 * here we need only check if it is an OBP address or not.
+	 */
+	sethi		%hi(LOW_OBP_ADDRESS), %g5
 	cmp		%g4, %g5
 	blu,pn		%xcc, sparc64_vpte_patchme1
 	 mov		0x1, %g5
-
-	/* Load 0x100000000, which is HI_OBP_ADDRESS.  */
 	sllx		%g5, 32, %g5
-
-	/* Is addr < HI_OBP_ADDRESS?  */
 	cmp		%g4, %g5
 	blu,pn		%xcc, obp_iaddr_patch
 	 nop
@@ -156,26 +152,29 @@ obp_daddr_patch:
  * rather, use information saved during inherit_prom_mappings() using 8k
  * pagesize.
  */
+	.align		32
 kvmap:
-	/* Load 0xf0000000, which is LOW_OBP_ADDRESS.  */
-	mov		0xf, %g5
-	sllx		%g5, 28, %g5
+	sethi		%hi(MODULES_VADDR), %g5
+	cmp		%g4, %g5
+	blu,pn		%xcc, longpath
+	 mov		(VMALLOC_END >> 24), %g5
+	sllx		%g5, 24, %g5
+	cmp		%g4, %g5
+	bgeu,pn		%xcc, longpath
+	 nop
 
-	/* Is addr >= LOW_OBP_ADDRESS?  */
+kvmap_check_obp:
+	sethi		%hi(LOW_OBP_ADDRESS), %g5
 	cmp		%g4, %g5
-	blu,pn		%xcc, vmalloc_addr
+	blu,pn		%xcc, kvmap_vmalloc_addr
 	 mov		0x1, %g5
-
-	/* Load 0x100000000, which is HI_OBP_ADDRESS.  */
 	sllx		%g5, 32, %g5
-
-	/* Is addr < HI_OBP_ADDRESS?  */
 	cmp		%g4, %g5
 	blu,pn		%xcc, obp_daddr_patch
 	 nop
 
-vmalloc_addr:
-	/* If we get here, a vmalloc addr accessed, load kernel VPTE.  */
+kvmap_vmalloc_addr:
+	/* If we get here, a vmalloc addr was accessed, load kernel VPTE.  */
 	ldxa		[%g3 + %g6] ASI_N, %g5
 	brgez,pn	%g5, longpath
 	 nop
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index a2b4f5ed4625..a297f6144f0f 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -24,21 +24,23 @@
 #include <asm/processor.h>
 #include <asm/const.h>
 
-/* The kernel image occupies 0x4000000 to 0x1000000 (4MB --> 16MB).
- * The page copy blockops use 0x1000000 to 0x18000000 (16MB --> 24MB).
+/* The kernel image occupies 0x4000000 to 0x1000000 (4MB --> 32MB).
+ * The page copy blockops can use 0x2000000 to 0x10000000.
  * The PROM resides in an area spanning 0xf0000000 to 0x100000000.
- * The vmalloc area spans 0x140000000 to 0x200000000.
+ * The vmalloc area spans 0x100000000 to 0x200000000.
+ * Since modules need to be in the lowest 32-bits of the address space,
+ * we place them right before the OBP area from 0x10000000 to 0xf0000000.
  * There is a single static kernel PMD which maps from 0x0 to address
  * 0x400000000.
  */
-#define	TLBTEMP_BASE		_AC(0x0000000001000000,UL)
-#define MODULES_VADDR		_AC(0x0000000002000000,UL)
-#define MODULES_LEN		_AC(0x000000007e000000,UL)
-#define MODULES_END		_AC(0x0000000080000000,UL)
-#define VMALLOC_START		_AC(0x0000000140000000,UL)
-#define VMALLOC_END		_AC(0x0000000200000000,UL)
+#define	TLBTEMP_BASE		_AC(0x0000000002000000,UL)
+#define MODULES_VADDR		_AC(0x0000000010000000,UL)
+#define MODULES_LEN		_AC(0x00000000e0000000,UL)
+#define MODULES_END		_AC(0x00000000f0000000,UL)
 #define LOW_OBP_ADDRESS		_AC(0x00000000f0000000,UL)
 #define HI_OBP_ADDRESS		_AC(0x0000000100000000,UL)
+#define VMALLOC_START		_AC(0x0000000100000000,UL)
+#define VMALLOC_END		_AC(0x0000000200000000,UL)
 
 /* XXX All of this needs to be rethought so we can take advantage
  * XXX cheetah's full 64-bit virtual address space, ie. no more hole
-- 
cgit v1.2.3


From 7e871b6c8f1f4fda41e51ef86147facecac3be9f Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Wed, 21 Sep 2005 09:55:38 -0700
Subject: [PATCH] mm: update stale comment for removal of page->list

Update comment for the 2.6.6-rc1 conversion from page->list and
address_space->{clean,dirty,locked}_pages to radix tree tagging and ->lru.

I've mostly avoided to mention page lists (at least I've shortened the
comment).

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Acked-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 82d7024f0765..0d94c94d9d81 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -350,7 +350,8 @@ static inline void put_page(struct page *page)
  * only one copy in memory, at most, normally.
  *
  * For the non-reserved pages, page_count(page) denotes a reference count.
- *   page_count() == 0 means the page is free.
+ *   page_count() == 0 means the page is free. page->lru is then used for
+ *   freelist management in the buddy allocator.
  *   page_count() == 1 means the page is used for exactly one purpose
  *   (e.g. a private data page of one process).
  *
@@ -376,10 +377,8 @@ static inline void put_page(struct page *page)
  * attaches, plus 1 if `private' contains something, plus one for
  * the page cache itself.
  *
- * All pages belonging to an inode are in these doubly linked lists:
- * mapping->clean_pages, mapping->dirty_pages and mapping->locked_pages;
- * using the page->list list_head. These fields are also used for
- * freelist managemet (when page_count()==0).
+ * Instead of keeping dirty/clean pages in per address-space lists, we instead
+ * now tag pages as dirty/under writeback in the radix tree.
  *
  * There is also a per-mapping radix tree mapping index to the page
  * in memory if present. The tree is rooted at mapping->root.  
-- 
cgit v1.2.3


From 7e2cff42cfac27c25202648c5c89f9171e5bc085 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Wed, 21 Sep 2005 09:55:39 -0700
Subject: [PATCH] mm: add a note about partially hardcoded VM_* flags

Hugh made me note this line for permission checking in mprotect():

		if ((newflags & ~(newflags >> 4)) & 0xf) {

after figuring out what's that about, I decided it's nasty enough.  Btw
Hugh itself didn't like the 0xf.

We can safely change it to VM_READ|VM_WRITE|VM_EXEC because we never change
VM_SHARED, so no need to check that.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Acked-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h | 1 +
 mm/mprotect.c      | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0d94c94d9d81..097b3a3c693d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -136,6 +136,7 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_EXEC		0x00000004
 #define VM_SHARED	0x00000008
 
+/* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */
 #define VM_MAYREAD	0x00000010	/* limits for mprotect() etc */
 #define VM_MAYWRITE	0x00000020
 #define VM_MAYEXEC	0x00000040
diff --git a/mm/mprotect.c b/mm/mprotect.c
index e9fbd013ad9a..57577f63b305 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -248,7 +248,8 @@ sys_mprotect(unsigned long start, size_t len, unsigned long prot)
 
 		newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
 
-		if ((newflags & ~(newflags >> 4)) & 0xf) {
+		/* newflags >> 4 shift VM_MAY% in place of VM_% */
+		if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) {
 			error = -EACCES;
 			goto out;
 		}
-- 
cgit v1.2.3


From 7980cbbb30bf044e6f40912a3f6456204ddfc27e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 21 Sep 2005 09:55:43 -0700
Subject: [PATCH] Adds sys_set_mempolicy() in include/linux/syscalls.h

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/syscalls.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 425f58c8ea4a..a6f03e473737 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -508,5 +508,7 @@ asmlinkage long sys_keyctl(int cmd, unsigned long arg2, unsigned long arg3,
 
 asmlinkage long sys_ioprio_set(int which, int who, int ioprio);
 asmlinkage long sys_ioprio_get(int which, int who);
+asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
+					unsigned long maxnode);
 
 #endif
-- 
cgit v1.2.3


From 972d512a17c1bb7c4b784a9da2ca75745fcc6989 Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Wed, 21 Sep 2005 12:31:26 -0700
Subject: [IB] Add MAD data field size definitions

Clean up code by using enums instead of hard-coded magic numbers.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/mad_rmpp.c | 15 ++++++---------
 include/rdma/ib_mad.h              | 15 +++++++++++----
 2 files changed, 17 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 2bd8b1cc57c4..8f7cef0812f6 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -583,6 +583,7 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
 {
 	struct ib_rmpp_mad *rmpp_mad;
 	int timeout;
+	u32 paylen;
 
 	rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr;
 	ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
@@ -590,11 +591,9 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
 
 	if (mad_send_wr->seg_num == 1) {
 		rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST;
-		rmpp_mad->rmpp_hdr.paylen_newwin =
-			cpu_to_be32(mad_send_wr->total_seg *
-				    (sizeof(struct ib_rmpp_mad) -
-				       offsetof(struct ib_rmpp_mad, data)) -
-				    mad_send_wr->pad);
+		paylen = mad_send_wr->total_seg * IB_MGMT_RMPP_DATA -
+			 mad_send_wr->pad;
+		rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen);
 		mad_send_wr->sg_list[0].length = sizeof(struct ib_rmpp_mad);
 	} else {
 		mad_send_wr->send_wr.num_sge = 2;
@@ -608,10 +607,8 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
 
 	if (mad_send_wr->seg_num == mad_send_wr->total_seg) {
 		rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST;
-		rmpp_mad->rmpp_hdr.paylen_newwin =
-			cpu_to_be32(sizeof(struct ib_rmpp_mad) -
-				    offsetof(struct ib_rmpp_mad, data) -
-				    mad_send_wr->pad);
+		paylen = IB_MGMT_RMPP_DATA - mad_send_wr->pad;
+		rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen);
 	}
 
 	/* 2 seconds for an ACK until we can find the packet lifetime */
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 53184a38fdf6..0e293fe733b0 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -108,6 +108,13 @@
 #define IB_QP1_QKEY	0x80010000
 #define IB_QP_SET_QKEY	0x80000000
 
+enum {
+	IB_MGMT_MAD_DATA = 232,
+	IB_MGMT_RMPP_DATA = 220,
+	IB_MGMT_VENDOR_DATA = 216,
+	IB_MGMT_SA_DATA = 200
+};
+
 struct ib_mad_hdr {
 	u8	base_version;
 	u8	mgmt_class;
@@ -149,20 +156,20 @@ struct ib_sa_hdr {
 
 struct ib_mad {
 	struct ib_mad_hdr	mad_hdr;
-	u8			data[232];
+	u8			data[IB_MGMT_MAD_DATA];
 };
 
 struct ib_rmpp_mad {
 	struct ib_mad_hdr	mad_hdr;
 	struct ib_rmpp_hdr	rmpp_hdr;
-	u8			data[220];
+	u8			data[IB_MGMT_RMPP_DATA];
 };
 
 struct ib_sa_mad {
 	struct ib_mad_hdr	mad_hdr;
 	struct ib_rmpp_hdr	rmpp_hdr;
 	struct ib_sa_hdr	sa_hdr;
-	u8			data[200];
+	u8			data[IB_MGMT_SA_DATA];
 } __attribute__ ((packed));
 
 struct ib_vendor_mad {
@@ -170,7 +177,7 @@ struct ib_vendor_mad {
 	struct ib_rmpp_hdr	rmpp_hdr;
 	u8			reserved;
 	u8			oui[3];
-	u8			data[216];
+	u8			data[IB_MGMT_VENDOR_DATA];
 };
 
 struct ib_class_port_info
-- 
cgit v1.2.3


From c51179fb0c77ad91df5825f8f7eb670da97e137e Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Wed, 21 Sep 2005 18:37:14 +0200
Subject: [PATCH] uml: adapt asm/futex.h to our arch

Follow up to 4732efbeb997189d9f9b04708dc26bf8613ed721 - uml must just reuse
as-is the backing architecture support. There is a micro-fixup is needed for the
included file, which won't affect i386 behaviour at all.

I've not tested compilation on x86_64, only on x86, but the code is almost the
same except the culprit test, so everything should be ok on x86_64 too.

Cc: Jakub Jelinek <jakub@redhat.com>
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/futex.h |  2 +-
 include/asm-um/futex.h   | 51 +++++-------------------------------------------
 2 files changed, 6 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/include/asm-i386/futex.h b/include/asm-i386/futex.h
index 44b9db806474..e7a271d39309 100644
--- a/include/asm-i386/futex.h
+++ b/include/asm-i386/futex.h
@@ -61,7 +61,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	if (op == FUTEX_OP_SET)
 		__futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
 	else {
-#ifndef CONFIG_X86_BSWAP
+#if !defined(CONFIG_X86_BSWAP) && !defined(CONFIG_UML)
 		if (boot_cpu_data.x86 == 3)
 			ret = -ENOSYS;
 		else
diff --git a/include/asm-um/futex.h b/include/asm-um/futex.h
index 2cac5ecd9d00..142ee2d8e0fd 100644
--- a/include/asm-um/futex.h
+++ b/include/asm-um/futex.h
@@ -1,53 +1,12 @@
-#ifndef _ASM_FUTEX_H
-#define _ASM_FUTEX_H
-
-#ifdef __KERNEL__
+#ifndef __UM_FUTEX_H
+#define __UM_FUTEX_H
 
 #include <linux/futex.h>
 #include <asm/errno.h>
+#include <asm/system.h>
+#include <asm/processor.h>
 #include <asm/uaccess.h>
 
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-		return -EFAULT;
-
-	inc_preempt_count();
-
-	switch (op) {
-	case FUTEX_OP_SET:
-	case FUTEX_OP_ADD:
-	case FUTEX_OP_OR:
-	case FUTEX_OP_ANDN:
-	case FUTEX_OP_XOR:
-	default:
-		ret = -ENOSYS;
-	}
+#include "asm/arch/futex.h"
 
-	dec_preempt_count();
-
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-		default: ret = -ENOSYS;
-		}
-	}
-	return ret;
-}
-
-#endif
 #endif
-- 
cgit v1.2.3


From 676067cfeaa16f6f338e067e83ce4733b41c0b24 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Wed, 21 Sep 2005 18:38:09 +0200
Subject: [PATCH] Remove unused var from asm/futex.h

As recently done by Russell King for ARM, commit
4732efbeb997189d9f9b04708dc26bf8613ed721 introduces a generic asm/futex.h copied
along most arches, which includes a "-ENOSYS support" to be changed if needed.
However, it includes an unused var (taken from the "real" version) which GCC
warns about.

Remove it from all arches having that file version (i.e. same GIT id).
$ git-diff-tree -r HEAD
and
$ git-ls-tree  -r HEAD include/|grep 9feff4ce1424bc390608326240be369eb13aa648

may be more interesting than looking at the patch itself, to make sure I've
just copied the arm header to all other archs having the original dummy version
of this file.

Cc: Jakub Jelinek <jakub@redhat.com>
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-alpha/futex.h     | 2 +-
 include/asm-arm26/futex.h     | 2 +-
 include/asm-cris/futex.h      | 2 +-
 include/asm-frv/futex.h       | 2 +-
 include/asm-h8300/futex.h     | 2 +-
 include/asm-ia64/futex.h      | 2 +-
 include/asm-m32r/futex.h      | 2 +-
 include/asm-m68k/futex.h      | 2 +-
 include/asm-m68knommu/futex.h | 2 +-
 include/asm-parisc/futex.h    | 2 +-
 include/asm-ppc/futex.h       | 2 +-
 include/asm-s390/futex.h      | 2 +-
 include/asm-sh/futex.h        | 2 +-
 include/asm-sh64/futex.h      | 2 +-
 include/asm-sparc/futex.h     | 2 +-
 include/asm-sparc64/futex.h   | 2 +-
 include/asm-v850/futex.h      | 2 +-
 17 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/asm-alpha/futex.h b/include/asm-alpha/futex.h
index 2cac5ecd9d00..9feff4ce1424 100644
--- a/include/asm-alpha/futex.h
+++ b/include/asm-alpha/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-arm26/futex.h b/include/asm-arm26/futex.h
index 2cac5ecd9d00..9feff4ce1424 100644
--- a/include/asm-arm26/futex.h
+++ b/include/asm-arm26/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-cris/futex.h b/include/asm-cris/futex.h
index 2cac5ecd9d00..9feff4ce1424 100644
--- a/include/asm-cris/futex.h
+++ b/include/asm-cris/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-frv/futex.h b/include/asm-frv/futex.h
index 2cac5ecd9d00..9feff4ce1424 100644
--- a/include/asm-frv/futex.h
+++ b/include/asm-frv/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-h8300/futex.h b/include/asm-h8300/futex.h
index 2cac5ecd9d00..9feff4ce1424 100644
--- a/include/asm-h8300/futex.h
+++ b/include/asm-h8300/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-ia64/futex.h b/include/asm-ia64/futex.h
index 2cac5ecd9d00..9feff4ce1424 100644
--- a/include/asm-ia64/futex.h
+++ b/include/asm-ia64/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-m32r/futex.h b/include/asm-m32r/futex.h
index 2cac5ecd9d00..9feff4ce1424 100644
--- a/include/asm-m32r/futex.h
+++ b/include/asm-m32r/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-m68k/futex.h b/include/asm-m68k/futex.h
index 2cac5ecd9d00..9feff4ce1424 100644
--- a/include/asm-m68k/futex.h
+++ b/include/asm-m68k/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-m68knommu/futex.h b/include/asm-m68knommu/futex.h
index 2cac5ecd9d00..9feff4ce1424 100644
--- a/include/asm-m68knommu/futex.h
+++ b/include/asm-m68knommu/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-parisc/futex.h b/include/asm-parisc/futex.h
index 2cac5ecd9d00..9feff4ce1424 100644
--- a/include/asm-parisc/futex.h
+++ b/include/asm-parisc/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-ppc/futex.h b/include/asm-ppc/futex.h
index 2cac5ecd9d00..9feff4ce1424 100644
--- a/include/asm-ppc/futex.h
+++ b/include/asm-ppc/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-s390/futex.h b/include/asm-s390/futex.h
index 2cac5ecd9d00..9feff4ce1424 100644
--- a/include/asm-s390/futex.h
+++ b/include/asm-s390/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-sh/futex.h b/include/asm-sh/futex.h
index 2cac5ecd9d00..9feff4ce1424 100644
--- a/include/asm-sh/futex.h
+++ b/include/asm-sh/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-sh64/futex.h b/include/asm-sh64/futex.h
index 2cac5ecd9d00..9feff4ce1424 100644
--- a/include/asm-sh64/futex.h
+++ b/include/asm-sh64/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-sparc/futex.h b/include/asm-sparc/futex.h
index 2cac5ecd9d00..9feff4ce1424 100644
--- a/include/asm-sparc/futex.h
+++ b/include/asm-sparc/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-sparc64/futex.h b/include/asm-sparc64/futex.h
index 2cac5ecd9d00..9feff4ce1424 100644
--- a/include/asm-sparc64/futex.h
+++ b/include/asm-sparc64/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-v850/futex.h b/include/asm-v850/futex.h
index 2cac5ecd9d00..9feff4ce1424 100644
--- a/include/asm-v850/futex.h
+++ b/include/asm-v850/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
-- 
cgit v1.2.3


From e86ee6682b649183c11013a98be02f25e9ae399d Mon Sep 17 00:00:00 2001
From: Andy Currid <ACurrid@nvidia.com>
Date: Mon, 19 Sep 2005 06:17:52 -0700
Subject: [PATCH] Add NVIDIA device ID in sata_nv

Signed-off-by: Andy Currid <acurrid@nvidia.com>
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 drivers/scsi/sata_nv.c  | 2 ++
 include/linux/pci_ids.h | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/scsi/sata_nv.c b/drivers/scsi/sata_nv.c
index a1d62dee3be6..c05653c7779d 100644
--- a/drivers/scsi/sata_nv.c
+++ b/drivers/scsi/sata_nv.c
@@ -158,6 +158,8 @@ static struct pci_device_id nv_pci_tbl[] = {
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP51 },
 	{ PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP55 },
+	{ PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP55 },
 	{ PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
 		PCI_ANY_ID, PCI_ANY_ID,
 		PCI_CLASS_STORAGE_IDE<<8, 0xffff00, GENERIC },
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index f6c1a142286a..cb414ea42f02 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1267,7 +1267,8 @@
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA	0x0266
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2	0x0267
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE	0x036E
-#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA	0x036F
+#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA	0x037E
+#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2	0x037F
 #define PCI_DEVICE_ID_NVIDIA_NVENET_12		0x0268
 #define PCI_DEVICE_ID_NVIDIA_NVENET_13		0x0269
 #define PCI_DEVICE_ID_NVIDIA_MCP51_AUDIO	0x026B
-- 
cgit v1.2.3


From 1d67e6501b8dba54ef8dcabebe2ad049b8ad0d67 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Thu, 22 Sep 2005 03:27:56 -0300
Subject: [LLC]: Make llc_frame_alloc take a net_device as an argument

So as to set the newly created sk_buff ->dev member with it, that way we stop
using dev_base->next, that is the wrong thing to do, as there may well be
several interfaces being used with LLC. This was not such a big problem after
all as most of the users of llc_alloc_frame were setting the correct dev, but
this way code is reduced.

This also fixes another bug in llc_station_ac_send_null_dsap_xid_c, that was
not setting the skb->dev field.

Signed-off-by: Jochen Friedrich <jochen@scram.de>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
---
 include/net/llc_sap.h |   3 +-
 net/llc/llc_c_ac.c    | 105 ++++++++++++++++++++------------------------------
 net/llc/llc_s_ac.c    |   6 +--
 net/llc/llc_sap.c     |   5 ++-
 net/llc/llc_station.c |   8 ++--
 5 files changed, 52 insertions(+), 75 deletions(-)

(limited to 'include')

diff --git a/include/net/llc_sap.h b/include/net/llc_sap.h
index 353baaa627f3..6dd4cdccca42 100644
--- a/include/net/llc_sap.h
+++ b/include/net/llc_sap.h
@@ -13,10 +13,11 @@
  */
 struct llc_sap;
 struct sk_buff;
+struct net_device;
 
 extern void llc_sap_rtn_pdu(struct llc_sap *sap, struct sk_buff *skb);
 extern void llc_save_primitive(struct sk_buff* skb, unsigned char prim);
-extern struct sk_buff *llc_alloc_frame(void);
+extern struct sk_buff *llc_alloc_frame(struct net_device *dev);
 
 extern void llc_build_and_send_test_pkt(struct llc_sap *sap,
 				        struct sk_buff *skb,
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index b218be4c10ec..db98ea2113a5 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -217,13 +217,12 @@ int llc_conn_ac_stop_rej_tmr_if_data_flag_eq_2(struct sock *sk,
 int llc_conn_ac_send_disc_cmd_p_set_x(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
-	struct sk_buff *nskb = llc_alloc_frame();
+	struct llc_sock *llc = llc_sk(sk);
+	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
 
 	if (nskb) {
-		struct llc_sock *llc = llc_sk(sk);
 		struct llc_sap *sap = llc->sap;
 
-		nskb->dev = llc->dev;
 		llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap,
 				    llc->daddr.lsap, LLC_PDU_CMD);
 		llc_pdu_init_as_disc_cmd(nskb, 1);
@@ -243,14 +242,13 @@ free:
 int llc_conn_ac_send_dm_rsp_f_set_p(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
-	struct sk_buff *nskb = llc_alloc_frame();
+	struct llc_sock *llc = llc_sk(sk);
+	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
 
 	if (nskb) {
-		struct llc_sock *llc = llc_sk(sk);
 		struct llc_sap *sap = llc->sap;
 		u8 f_bit;
 
-		nskb->dev = llc->dev;
 		llc_pdu_decode_pf_bit(skb, &f_bit);
 		llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap,
 				    llc->daddr.lsap, LLC_PDU_RSP);
@@ -270,14 +268,13 @@ free:
 int llc_conn_ac_send_dm_rsp_f_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
-	struct sk_buff *nskb = llc_alloc_frame();
+	struct llc_sock *llc = llc_sk(sk);
+	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
 
 	if (nskb) {
-		struct llc_sock *llc = llc_sk(sk);
 		struct llc_sap *sap = llc->sap;
 		u8 f_bit = 1;
 
-		nskb->dev = llc->dev;
 		llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap,
 				    llc->daddr.lsap, LLC_PDU_RSP);
 		llc_pdu_init_as_dm_rsp(nskb, f_bit);
@@ -306,11 +303,10 @@ int llc_conn_ac_send_frmr_rsp_f_set_x(struct sock *sk, struct sk_buff *skb)
 		llc_pdu_decode_pf_bit(skb, &f_bit);
 	else
 		f_bit = 0;
-	nskb = llc_alloc_frame();
+	nskb = llc_alloc_frame(llc->dev);
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
 
-		nskb->dev = llc->dev;
 		llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap,
 				    llc->daddr.lsap, LLC_PDU_RSP);
 		llc_pdu_init_as_frmr_rsp(nskb, pdu, f_bit, llc->vS,
@@ -330,15 +326,14 @@ free:
 int llc_conn_ac_resend_frmr_rsp_f_set_0(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
-	struct sk_buff *nskb = llc_alloc_frame();
+	struct llc_sock *llc = llc_sk(sk);
+	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
 
 	if (nskb) {
 		u8 f_bit = 0;
-		struct llc_sock *llc = llc_sk(sk);
 		struct llc_sap *sap = llc->sap;
 		struct llc_pdu_sn *pdu = (struct llc_pdu_sn *)&llc->rx_pdu_hdr;
 
-		nskb->dev = llc->dev;
 		llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap,
 				    llc->daddr.lsap, LLC_PDU_RSP);
 		llc_pdu_init_as_frmr_rsp(nskb, pdu, f_bit, llc->vS,
@@ -360,15 +355,14 @@ int llc_conn_ac_resend_frmr_rsp_f_set_p(struct sock *sk, struct sk_buff *skb)
 	u8 f_bit;
 	int rc = -ENOBUFS;
 	struct sk_buff *nskb;
+	struct llc_sock *llc = llc_sk(sk);
 
 	llc_pdu_decode_pf_bit(skb, &f_bit);
-	nskb = llc_alloc_frame();
+	nskb = llc_alloc_frame(llc->dev);
 	if (nskb) {
-		struct llc_sock *llc = llc_sk(sk);
 		struct llc_sap *sap = llc->sap;
 		struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
 
-		nskb->dev = llc->dev;
 		llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap,
 				    llc->daddr.lsap, LLC_PDU_RSP);
 		llc_pdu_init_as_frmr_rsp(nskb, pdu, f_bit, llc->vS,
@@ -451,13 +445,12 @@ int llc_conn_ac_resend_i_xxx_x_set_0_or_send_rr(struct sock *sk,
 	u8 nr;
 	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
 	int rc = -ENOBUFS;
-	struct sk_buff *nskb = llc_alloc_frame();
+	struct llc_sock *llc = llc_sk(sk);
+	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
 
 	if (nskb) {
-		struct llc_sock *llc = llc_sk(sk);
 		struct llc_sap *sap = llc->sap;
 
-		nskb->dev = llc->dev;
 		llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap,
 				    llc->daddr.lsap, LLC_PDU_RSP);
 		llc_pdu_init_as_rr_rsp(nskb, 0, llc->vR);
@@ -487,13 +480,12 @@ int llc_conn_ac_resend_i_rsp_f_set_1(struct sock *sk, struct sk_buff *skb)
 int llc_conn_ac_send_rej_cmd_p_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
-	struct sk_buff *nskb = llc_alloc_frame();
+	struct llc_sock *llc = llc_sk(sk);
+	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
 
 	if (nskb) {
-		struct llc_sock *llc = llc_sk(sk);
 		struct llc_sap *sap = llc->sap;
 
-		nskb->dev = llc->dev;
 		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
 				    llc->daddr.lsap, LLC_PDU_CMD);
 		llc_pdu_init_as_rej_cmd(nskb, 1, llc->vR);
@@ -512,14 +504,13 @@ free:
 int llc_conn_ac_send_rej_rsp_f_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
-	struct sk_buff *nskb = llc_alloc_frame();
+	struct llc_sock *llc = llc_sk(sk);
+	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
 
 	if (nskb) {
 		u8 f_bit = 1;
-		struct llc_sock *llc = llc_sk(sk);
 		struct llc_sap *sap = llc->sap;
 
-		nskb->dev = llc->dev;
 		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
 				    llc->daddr.lsap, LLC_PDU_RSP);
 		llc_pdu_init_as_rej_rsp(nskb, f_bit, llc->vR);
@@ -538,14 +529,13 @@ free:
 int llc_conn_ac_send_rej_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
-	struct sk_buff *nskb = llc_alloc_frame();
+	struct llc_sock *llc = llc_sk(sk);
+	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
 
 	if (nskb) {
-		struct llc_sock *llc = llc_sk(sk);
 		struct llc_sap *sap = llc->sap;
 		u8 f_bit = 0;
 
-		nskb->dev = llc->dev;
 		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
 				    llc->daddr.lsap, LLC_PDU_RSP);
 		llc_pdu_init_as_rej_rsp(nskb, f_bit, llc->vR);
@@ -564,13 +554,12 @@ free:
 int llc_conn_ac_send_rnr_cmd_p_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
-	struct sk_buff *nskb = llc_alloc_frame();
+	struct llc_sock *llc = llc_sk(sk);
+	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
 
 	if (nskb) {
-		struct llc_sock *llc = llc_sk(sk);
 		struct llc_sap *sap = llc->sap;
 
-		nskb->dev = llc->dev;
 		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
 				    llc->daddr.lsap, LLC_PDU_CMD);
 		llc_pdu_init_as_rnr_cmd(nskb, 1, llc->vR);
@@ -589,14 +578,13 @@ free:
 int llc_conn_ac_send_rnr_rsp_f_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
-	struct sk_buff *nskb = llc_alloc_frame();
+	struct llc_sock *llc = llc_sk(sk);
+	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
 
 	if (nskb) {
-		struct llc_sock *llc = llc_sk(sk);
 		struct llc_sap *sap = llc->sap;
 		u8 f_bit = 1;
 
-		nskb->dev = llc->dev;
 		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
 				    llc->daddr.lsap, LLC_PDU_RSP);
 		llc_pdu_init_as_rnr_rsp(nskb, f_bit, llc->vR);
@@ -615,14 +603,13 @@ free:
 int llc_conn_ac_send_rnr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
-	struct sk_buff *nskb = llc_alloc_frame();
+	struct llc_sock *llc = llc_sk(sk);
+	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
 
 	if (nskb) {
 		u8 f_bit = 0;
-		struct llc_sock *llc = llc_sk(sk);
 		struct llc_sap *sap = llc->sap;
 
-		nskb->dev = llc->dev;
 		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
 				    llc->daddr.lsap, LLC_PDU_RSP);
 		llc_pdu_init_as_rnr_rsp(nskb, f_bit, llc->vR);
@@ -653,13 +640,12 @@ int llc_conn_ac_set_remote_busy(struct sock *sk, struct sk_buff *skb)
 int llc_conn_ac_opt_send_rnr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
-	struct sk_buff *nskb = llc_alloc_frame();
+	struct llc_sock *llc = llc_sk(sk);
+	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
 
 	if (nskb) {
-		struct llc_sock *llc = llc_sk(sk);
 		struct llc_sap *sap = llc->sap;
 
-		nskb->dev = llc->dev;
 		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
 				    llc->daddr.lsap, LLC_PDU_RSP);
 		llc_pdu_init_as_rnr_rsp(nskb, 0, llc->vR);
@@ -678,13 +664,12 @@ free:
 int llc_conn_ac_send_rr_cmd_p_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
-	struct sk_buff *nskb = llc_alloc_frame();
+	struct llc_sock *llc = llc_sk(sk);
+	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
 
 	if (nskb) {
-		struct llc_sock *llc = llc_sk(sk);
 		struct llc_sap *sap = llc->sap;
 
-		nskb->dev = llc->dev;
 		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
 				    llc->daddr.lsap, LLC_PDU_CMD);
 		llc_pdu_init_as_rr_cmd(nskb, 1, llc->vR);
@@ -703,14 +688,13 @@ free:
 int llc_conn_ac_send_rr_rsp_f_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
-	struct sk_buff *nskb = llc_alloc_frame();
+	struct llc_sock *llc = llc_sk(sk);
+	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
 
 	if (nskb) {
-		struct llc_sock *llc = llc_sk(sk);
 		struct llc_sap *sap = llc->sap;
 		u8 f_bit = 1;
 
-		nskb->dev = llc->dev;
 		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
 				    llc->daddr.lsap, LLC_PDU_RSP);
 		llc_pdu_init_as_rr_rsp(nskb, f_bit, llc->vR);
@@ -729,14 +713,13 @@ free:
 int llc_conn_ac_send_ack_rsp_f_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
-	struct sk_buff *nskb = llc_alloc_frame();
+	struct llc_sock *llc = llc_sk(sk);
+	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
 
 	if (nskb) {
-		struct llc_sock *llc = llc_sk(sk);
 		struct llc_sap *sap = llc->sap;
 		u8 f_bit = 1;
 
-		nskb->dev = llc->dev;
 		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
 				    llc->daddr.lsap, LLC_PDU_RSP);
 		llc_pdu_init_as_rr_rsp(nskb, f_bit, llc->vR);
@@ -755,13 +738,12 @@ free:
 int llc_conn_ac_send_rr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
-	struct sk_buff *nskb = llc_alloc_frame();
+	struct llc_sock *llc = llc_sk(sk);
+	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
 
 	if (nskb) {
-		struct llc_sock *llc = llc_sk(sk);
 		struct llc_sap *sap = llc->sap;
 
-		nskb->dev = llc->dev;
 		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
 				    llc->daddr.lsap, LLC_PDU_RSP);
 		llc_pdu_init_as_rr_rsp(nskb, 0, llc->vR);
@@ -780,13 +762,12 @@ free:
 int llc_conn_ac_send_ack_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
-	struct sk_buff *nskb = llc_alloc_frame();
+	struct llc_sock *llc = llc_sk(sk);
+	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
 
 	if (nskb) {
-		struct llc_sock *llc = llc_sk(sk);
 		struct llc_sap *sap = llc->sap;
 
-		nskb->dev = llc->dev;
 		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
 				    llc->daddr.lsap, LLC_PDU_RSP);
 		llc_pdu_init_as_rr_rsp(nskb, 0, llc->vR);
@@ -815,8 +796,8 @@ void llc_conn_set_p_flag(struct sock *sk, u8 value)
 int llc_conn_ac_send_sabme_cmd_p_set_x(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
-	struct sk_buff *nskb = llc_alloc_frame();
 	struct llc_sock *llc = llc_sk(sk);
+	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -824,7 +805,6 @@ int llc_conn_ac_send_sabme_cmd_p_set_x(struct sock *sk, struct sk_buff *skb)
 
 		if (llc->dev->flags & IFF_LOOPBACK)
 			dmac = llc->dev->dev_addr;
-		nskb->dev = llc->dev;
 		llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap,
 				    llc->daddr.lsap, LLC_PDU_CMD);
 		llc_pdu_init_as_sabme_cmd(nskb, 1);
@@ -845,11 +825,11 @@ int llc_conn_ac_send_ua_rsp_f_set_p(struct sock *sk, struct sk_buff *skb)
 {
 	u8 f_bit;
 	int rc = -ENOBUFS;
-	struct sk_buff *nskb = llc_alloc_frame();
+	struct llc_sock *llc = llc_sk(sk);
+	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
 
 	llc_pdu_decode_pf_bit(skb, &f_bit);
 	if (nskb) {
-		struct llc_sock *llc = llc_sk(sk);
 		struct llc_sap *sap = llc->sap;
 
 		nskb->dev = llc->dev;
@@ -1001,13 +981,12 @@ static int llc_conn_ac_send_rr_rsp_f_set_ackpf(struct sock *sk,
 					       struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
-	struct sk_buff *nskb = llc_alloc_frame();
+	struct llc_sock *llc = llc_sk(sk);
+	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
 
 	if (nskb) {
-		struct llc_sock *llc = llc_sk(sk);
 		struct llc_sap *sap = llc->sap;
 
-		nskb->dev = llc->dev;
 		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
 				    llc->daddr.lsap, LLC_PDU_RSP);
 		llc_pdu_init_as_rr_rsp(nskb, llc->ack_pf, llc->vR);
diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c
index ed8ba7de6122..270b2f2030cd 100644
--- a/net/llc/llc_s_ac.c
+++ b/net/llc/llc_s_ac.c
@@ -103,10 +103,9 @@ int llc_sap_action_send_xid_r(struct llc_sap *sap, struct sk_buff *skb)
 	llc_pdu_decode_sa(skb, mac_da);
 	llc_pdu_decode_da(skb, mac_sa);
 	llc_pdu_decode_ssap(skb, &dsap);
-	nskb = llc_alloc_frame();
+	nskb = llc_alloc_frame(skb->dev);
 	if (!nskb)
 		goto out;
-	nskb->dev = skb->dev;
 	llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, dsap,
 			    LLC_PDU_RSP);
 	llc_pdu_init_as_xid_rsp(nskb, LLC_XID_NULL_CLASS_2, 0);
@@ -149,10 +148,9 @@ int llc_sap_action_send_test_r(struct llc_sap *sap, struct sk_buff *skb)
 	llc_pdu_decode_sa(skb, mac_da);
 	llc_pdu_decode_da(skb, mac_sa);
 	llc_pdu_decode_ssap(skb, &dsap);
-	nskb = llc_alloc_frame();
+	nskb = llc_alloc_frame(skb->dev);
 	if (!nskb)
 		goto out;
-	nskb->dev = skb->dev;
 	llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, dsap,
 			    LLC_PDU_RSP);
 	llc_pdu_init_as_test_rsp(nskb, skb);
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 34228ef14985..0adaa289bf0a 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -26,11 +26,12 @@
 
 /**
  *	llc_alloc_frame - allocates sk_buff for frame
+ *	@dev: network device this skb will be sent over
  *
  *	Allocates an sk_buff for frame and initializes sk_buff fields.
  *	Returns allocated skb or %NULL when out of memory.
  */
-struct sk_buff *llc_alloc_frame(void)
+struct sk_buff *llc_alloc_frame(struct net_device *dev)
 {
 	struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC);
 
@@ -38,7 +39,7 @@ struct sk_buff *llc_alloc_frame(void)
 		skb_reserve(skb, 50);
 		skb->nh.raw   = skb->h.raw = skb->data;
 		skb->protocol = htons(ETH_P_802_2);
-		skb->dev      = dev_base->next;
+		skb->dev      = dev;
 		skb->mac.raw  = skb->head;
 	}
 	return skb;
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index 8fe48a24bad5..85a7ac276141 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -249,7 +249,7 @@ static int llc_station_ac_inc_xid_r_cnt_by_1(struct sk_buff *skb)
 static int llc_station_ac_send_null_dsap_xid_c(struct sk_buff *skb)
 {
 	int rc = 1;
-	struct sk_buff *nskb = llc_alloc_frame();
+	struct sk_buff *nskb = llc_alloc_frame(skb->dev);
 
 	if (!nskb)
 		goto out;
@@ -270,12 +270,11 @@ static int llc_station_ac_send_xid_r(struct sk_buff *skb)
 {
 	u8 mac_da[ETH_ALEN], dsap;
 	int rc = 1;
-	struct sk_buff* nskb = llc_alloc_frame();
+	struct sk_buff* nskb = llc_alloc_frame(skb->dev);
 
 	if (!nskb)
 		goto out;
 	rc = 0;
-	nskb->dev = skb->dev;
 	llc_pdu_decode_sa(skb, mac_da);
 	llc_pdu_decode_ssap(skb, &dsap);
 	llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, dsap, LLC_PDU_RSP);
@@ -295,12 +294,11 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb)
 {
 	u8 mac_da[ETH_ALEN], dsap;
 	int rc = 1;
-	struct sk_buff *nskb = llc_alloc_frame();
+	struct sk_buff *nskb = llc_alloc_frame(skb->dev);
 
 	if (!nskb)
 		goto out;
 	rc = 0;
-	nskb->dev = skb->dev;
 	llc_pdu_decode_sa(skb, mac_da);
 	llc_pdu_decode_ssap(skb, &dsap);
 	llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, dsap, LLC_PDU_RSP);
-- 
cgit v1.2.3


From 590232a7150674b2036291eaefce085f3f9659c8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Thu, 22 Sep 2005 04:30:44 -0300
Subject: [LLC]: Add sysctl support for the LLC timeouts

Signed-off-by: Jochen Friedrich <jochen@scram.de>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
---
 include/linux/sysctl.h   |  26 ++++++++-
 include/net/llc.h        |   7 +++
 include/net/llc_conn.h   |  10 ++--
 net/llc/Makefile         |   1 +
 net/llc/af_llc.c         |  47 +++++++++++-----
 net/llc/llc_c_ac.c       |  12 ++---
 net/llc/llc_conn.c       |  13 +++--
 net/llc/llc_station.c    |  11 ++--
 net/llc/sysctl_net_llc.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++
 9 files changed, 232 insertions(+), 31 deletions(-)
 create mode 100644 net/llc/sysctl_net_llc.c

(limited to 'include')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 3a29a9f9b451..fc8e367f671e 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -202,7 +202,8 @@ enum
 	NET_TR=14,
 	NET_DECNET=15,
 	NET_ECONET=16,
-	NET_SCTP=17, 
+	NET_SCTP=17,
+	NET_LLC=18,
 };
 
 /* /proc/sys/kernel/random */
@@ -522,6 +523,29 @@ enum {
 	NET_IPX_FORWARDING=2
 };
 
+/* /proc/sys/net/llc */
+enum {
+	NET_LLC2=1,
+	NET_LLC_STATION=2,
+};
+
+/* /proc/sys/net/llc/llc2 */
+enum {
+	NET_LLC2_TIMEOUT=1,
+};
+
+/* /proc/sys/net/llc/station */
+enum {
+	NET_LLC_STATION_ACK_TIMEOUT=1,
+};
+
+/* /proc/sys/net/llc/llc2/timeout */
+enum {
+	NET_LLC2_ACK_TIMEOUT=1,
+	NET_LLC2_P_TIMEOUT=2,
+	NET_LLC2_REJ_TIMEOUT=3,
+	NET_LLC2_BUSY_TIMEOUT=4,
+};
 
 /* /proc/sys/net/appletalk */
 enum {
diff --git a/include/net/llc.h b/include/net/llc.h
index 71769a5aeef3..8b8e2be289b1 100644
--- a/include/net/llc.h
+++ b/include/net/llc.h
@@ -98,4 +98,11 @@ extern void llc_proc_exit(void);
 #define llc_proc_init()	(0)
 #define llc_proc_exit()	do { } while(0)
 #endif /* CONFIG_PROC_FS */
+#ifdef CONFIG_SYSCTL
+extern int llc_sysctl_init(void);
+extern void llc_sysctl_exit(void);
+#else
+#define llc_sysctl_init() (0)
+#define llc_sysctl_exit() do { } while(0)
+#endif /* CONFIG_SYSCTL */
 #endif /* LLC_H */
diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h
index 8ad3bc2c23d7..8a8ff4810135 100644
--- a/include/net/llc_conn.h
+++ b/include/net/llc_conn.h
@@ -19,14 +19,14 @@
 #define LLC_EVENT                1
 #define LLC_PACKET               2
 
-#define LLC_P_TIME               2
-#define LLC_ACK_TIME             1
-#define LLC_REJ_TIME             3
-#define LLC_BUSY_TIME            3
+#define LLC2_P_TIME               2
+#define LLC2_ACK_TIME             1
+#define LLC2_REJ_TIME             3
+#define LLC2_BUSY_TIME            3
 
 struct llc_timer {
 	struct timer_list timer;
-	u16		  expire;	/* timer expire time */
+	unsigned long	  expire;	/* timer expire time */
 };
 
 struct llc_sock {
diff --git a/net/llc/Makefile b/net/llc/Makefile
index 5ebd4ed2bd42..4e260cff3c5d 100644
--- a/net/llc/Makefile
+++ b/net/llc/Makefile
@@ -22,3 +22,4 @@ llc2-y := llc_if.o llc_c_ev.o llc_c_ac.o llc_conn.o llc_c_st.o llc_pdu.o \
 	  llc_sap.o llc_s_ac.o llc_s_ev.o llc_s_st.o af_llc.o llc_station.o
 
 llc2-$(CONFIG_PROC_FS) += llc_proc.o
+llc2-$(CONFIG_SYSCTL)  += sysctl_net_llc.o
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 95444f227510..ef125345a2db 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -877,22 +877,22 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname,
 	case LLC_OPT_ACK_TMR_EXP:
 		if (opt > LLC_OPT_MAX_ACK_TMR_EXP)
 			goto out;
-		llc->ack_timer.expire = opt;
+		llc->ack_timer.expire = opt * HZ;
 		break;
 	case LLC_OPT_P_TMR_EXP:
 		if (opt > LLC_OPT_MAX_P_TMR_EXP)
 			goto out;
-		llc->pf_cycle_timer.expire = opt;
+		llc->pf_cycle_timer.expire = opt * HZ;
 		break;
 	case LLC_OPT_REJ_TMR_EXP:
 		if (opt > LLC_OPT_MAX_REJ_TMR_EXP)
 			goto out;
-		llc->rej_sent_timer.expire = opt;
+		llc->rej_sent_timer.expire = opt * HZ;
 		break;
 	case LLC_OPT_BUSY_TMR_EXP:
 		if (opt > LLC_OPT_MAX_BUSY_TMR_EXP)
 			goto out;
-		llc->busy_state_timer.expire = opt;
+		llc->busy_state_timer.expire = opt * HZ;
 		break;
 	case LLC_OPT_TX_WIN:
 		if (opt > LLC_OPT_MAX_WIN)
@@ -942,17 +942,17 @@ static int llc_ui_getsockopt(struct socket *sock, int level, int optname,
 		goto out;
 	switch (optname) {
 	case LLC_OPT_RETRY:
-		val = llc->n2;				break;
+		val = llc->n2;					break;
 	case LLC_OPT_SIZE:
-		val = llc->n1;				break;
+		val = llc->n1;					break;
 	case LLC_OPT_ACK_TMR_EXP:
-		val = llc->ack_timer.expire;		break;
+		val = llc->ack_timer.expire / HZ;		break;
 	case LLC_OPT_P_TMR_EXP:
-		val = llc->pf_cycle_timer.expire;	break;
+		val = llc->pf_cycle_timer.expire / HZ;		break;
 	case LLC_OPT_REJ_TMR_EXP:
-		val = llc->rej_sent_timer.expire;	break;
+		val = llc->rej_sent_timer.expire / HZ;		break;
 	case LLC_OPT_BUSY_TMR_EXP:
-		val = llc->busy_state_timer.expire;	break;
+		val = llc->busy_state_timer.expire / HZ;	break;
 	case LLC_OPT_TX_WIN:
 		val = llc->k;				break;
 	case LLC_OPT_RX_WIN:
@@ -999,6 +999,13 @@ static struct proto_ops llc_ui_ops = {
 extern void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb);
 extern void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb);
 
+static char llc_proc_err_msg[] __initdata =
+        KERN_CRIT "LLC: Unable to register the proc_fs entries\n";
+static char llc_sysctl_err_msg[] __initdata =
+        KERN_CRIT "LLC: Unable to register the sysctl entries\n";
+static char llc_sock_err_msg[] __initdata =
+        KERN_CRIT "LLC: Unable to register the network family\n";
+
 static int __init llc2_init(void)
 {
 	int rc = proto_register(&llc_proto, 0);
@@ -1010,13 +1017,28 @@ static int __init llc2_init(void)
 	llc_station_init();
 	llc_ui_sap_last_autoport = LLC_SAP_DYN_START;
 	rc = llc_proc_init();
-	if (rc != 0)
+	if (rc != 0) {
+		printk(llc_proc_err_msg);
 		goto out_unregister_llc_proto;
-	sock_register(&llc_ui_family_ops);
+	}
+	rc = llc_sysctl_init();
+	if (rc) {
+		printk(llc_sysctl_err_msg);
+		goto out_proc;
+	}
+	rc = sock_register(&llc_ui_family_ops);
+	if (rc) {
+		printk(llc_sock_err_msg);
+		goto out_sysctl;
+	}
 	llc_add_pack(LLC_DEST_SAP, llc_sap_handler);
 	llc_add_pack(LLC_DEST_CONN, llc_conn_handler);
 out:
 	return rc;
+out_sysctl:
+	llc_sysctl_exit();
+out_proc:
+	llc_proc_exit();
 out_unregister_llc_proto:
 	proto_unregister(&llc_proto);
 	goto out;
@@ -1029,6 +1051,7 @@ static void __exit llc2_exit(void)
 	llc_remove_pack(LLC_DEST_CONN);
 	sock_unregister(PF_LLC);
 	llc_proc_exit();
+	llc_sysctl_exit();
 	proto_unregister(&llc_proto);
 }
 
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index 50c827e13a9f..a4daa91003dd 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -620,7 +620,7 @@ int llc_conn_ac_set_remote_busy(struct sock *sk, struct sk_buff *skb)
 	if (!llc->remote_busy_flag) {
 		llc->remote_busy_flag = 1;
 		mod_timer(&llc->busy_state_timer.timer,
-			 jiffies + llc->busy_state_timer.expire * HZ);
+			 jiffies + llc->busy_state_timer.expire);
 	}
 	return 0;
 }
@@ -853,7 +853,7 @@ int llc_conn_ac_start_p_timer(struct sock *sk, struct sk_buff *skb)
 
 	llc_conn_set_p_flag(sk, 1);
 	mod_timer(&llc->pf_cycle_timer.timer,
-		  jiffies + llc->pf_cycle_timer.expire * HZ);
+		  jiffies + llc->pf_cycle_timer.expire);
 	return 0;
 }
 
@@ -1131,7 +1131,7 @@ int llc_conn_ac_start_ack_timer(struct sock *sk, struct sk_buff *skb)
 {
 	struct llc_sock *llc = llc_sk(sk);
 
-	mod_timer(&llc->ack_timer.timer, jiffies + llc->ack_timer.expire * HZ);
+	mod_timer(&llc->ack_timer.timer, jiffies + llc->ack_timer.expire);
 	return 0;
 }
 
@@ -1140,7 +1140,7 @@ int llc_conn_ac_start_rej_timer(struct sock *sk, struct sk_buff *skb)
 	struct llc_sock *llc = llc_sk(sk);
 
 	mod_timer(&llc->rej_sent_timer.timer,
-		  jiffies + llc->rej_sent_timer.expire * HZ);
+		  jiffies + llc->rej_sent_timer.expire);
 	return 0;
 }
 
@@ -1151,7 +1151,7 @@ int llc_conn_ac_start_ack_tmr_if_not_running(struct sock *sk,
 
 	if (!timer_pending(&llc->ack_timer.timer))
 		mod_timer(&llc->ack_timer.timer,
-			  jiffies + llc->ack_timer.expire * HZ);
+			  jiffies + llc->ack_timer.expire);
 	return 0;
 }
 
@@ -1199,7 +1199,7 @@ int llc_conn_ac_upd_nr_received(struct sock *sk, struct sk_buff *skb)
 		}
 		if (unacked)
 			mod_timer(&llc->ack_timer.timer,
-				  jiffies + llc->ack_timer.expire * HZ);
+				  jiffies + llc->ack_timer.expire);
 	} else if (llc->failed_data_req) {
 		u8 f_bit;
 
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index ce7b893ed1ab..d3783f8ee481 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -40,6 +40,11 @@ static struct llc_conn_state_trans *llc_qualify_conn_ev(struct sock *sk,
 /* Offset table on connection states transition diagram */
 static int llc_offset_table[NBR_CONN_STATES][NBR_CONN_EV];
 
+int sysctl_llc2_ack_timeout = LLC2_ACK_TIME * HZ;
+int sysctl_llc2_p_timeout = LLC2_P_TIME * HZ;
+int sysctl_llc2_rej_timeout = LLC2_REJ_TIME * HZ;
+int sysctl_llc2_busy_timeout = LLC2_BUSY_TIME * HZ;
+
 /**
  *	llc_conn_state_process - sends event to connection state machine
  *	@sk: connection
@@ -799,22 +804,22 @@ static void llc_sk_init(struct sock* sk)
 	llc->dec_step = llc->connect_step = 1;
 
 	init_timer(&llc->ack_timer.timer);
-	llc->ack_timer.expire	      = LLC_ACK_TIME;
+	llc->ack_timer.expire	      = sysctl_llc2_ack_timeout;
 	llc->ack_timer.timer.data     = (unsigned long)sk;
 	llc->ack_timer.timer.function = llc_conn_ack_tmr_cb;
 
 	init_timer(&llc->pf_cycle_timer.timer);
-	llc->pf_cycle_timer.expire	   = LLC_P_TIME;
+	llc->pf_cycle_timer.expire	   = sysctl_llc2_p_timeout;
 	llc->pf_cycle_timer.timer.data     = (unsigned long)sk;
 	llc->pf_cycle_timer.timer.function = llc_conn_pf_cycle_tmr_cb;
 
 	init_timer(&llc->rej_sent_timer.timer);
-	llc->rej_sent_timer.expire	   = LLC_REJ_TIME;
+	llc->rej_sent_timer.expire	   = sysctl_llc2_rej_timeout;
 	llc->rej_sent_timer.timer.data     = (unsigned long)sk;
 	llc->rej_sent_timer.timer.function = llc_conn_rej_tmr_cb;
 
 	init_timer(&llc->busy_state_timer.timer);
-	llc->busy_state_timer.expire	     = LLC_BUSY_TIME;
+	llc->busy_state_timer.expire	     = sysctl_llc2_busy_timeout;
 	llc->busy_state_timer.timer.data     = (unsigned long)sk;
 	llc->busy_state_timer.timer.function = llc_conn_busy_tmr_cb;
 
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index 85a7ac276141..2d764b0382ce 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -50,6 +50,10 @@ struct llc_station {
 	struct sk_buff_head	    mac_pdu_q;
 };
 
+#define LLC_STATION_ACK_TIME (3 * HZ)
+
+int sysctl_llc_station_ack_timeout = LLC_STATION_ACK_TIME;
+
 /* Types of events (possible values in 'ev->type') */
 #define LLC_STATION_EV_TYPE_SIMPLE	1
 #define LLC_STATION_EV_TYPE_CONDITION	2
@@ -218,7 +222,8 @@ static void llc_station_send_pdu(struct sk_buff *skb)
 
 static int llc_station_ac_start_ack_timer(struct sk_buff *skb)
 {
-	mod_timer(&llc_main_station.ack_timer, jiffies + LLC_ACK_TIME * HZ);
+	mod_timer(&llc_main_station.ack_timer,
+		  jiffies + sysctl_llc_station_ack_timeout);
 	return 0;
 }
 
@@ -687,7 +692,8 @@ int __init llc_station_init(void)
 	init_timer(&llc_main_station.ack_timer);
 	llc_main_station.ack_timer.data     = (unsigned long)&llc_main_station;
 	llc_main_station.ack_timer.function = llc_station_ack_tmr_cb;
-
+	llc_main_station.ack_timer.expires  = jiffies +
+						sysctl_llc_station_ack_timeout;
 	skb = alloc_skb(0, GFP_ATOMIC);
 	if (!skb)
 		goto out;
@@ -695,7 +701,6 @@ int __init llc_station_init(void)
 	llc_set_station_handler(llc_station_rcv);
 	ev = llc_station_ev(skb);
 	memset(ev, 0, sizeof(*ev));
-	llc_main_station.ack_timer.expires = jiffies + 3 * HZ;
 	llc_main_station.maximum_retry	= 1;
 	llc_main_station.state		= LLC_STATION_STATE_DOWN;
 	ev->type	= LLC_STATION_EV_TYPE_SIMPLE;
diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c
new file mode 100644
index 000000000000..4d99d2f27a21
--- /dev/null
+++ b/net/llc/sysctl_net_llc.c
@@ -0,0 +1,136 @@
+/*
+ * sysctl_net_llc.c: sysctl interface to LLC net subsystem.
+ * 
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/sysctl.h>
+
+#ifndef CONFIG_SYSCTL
+#error This file should not be compiled without CONFIG_SYSCTL defined
+#endif
+
+extern int sysctl_llc2_ack_timeout;
+extern int sysctl_llc2_busy_timeout;
+extern int sysctl_llc2_p_timeout;
+extern int sysctl_llc2_rej_timeout;
+extern int sysctl_llc_station_ack_timeout;
+
+static struct ctl_table llc2_timeout_table[] = {
+	{
+		.ctl_name	= NET_LLC2_ACK_TIMEOUT,
+		.procname	= "ack",
+		.data		= &sysctl_llc2_ack_timeout,
+		.maxlen		= sizeof(long),
+		.mode		= 0644,
+		.proc_handler   = &proc_dointvec_jiffies,
+		.strategy       = &sysctl_jiffies,
+	},
+	{
+		.ctl_name	= NET_LLC2_BUSY_TIMEOUT,
+		.procname	= "busy",
+		.data		= &sysctl_llc2_busy_timeout,
+		.maxlen		= sizeof(long),
+		.mode		= 0644,
+		.proc_handler   = &proc_dointvec_jiffies,
+		.strategy       = &sysctl_jiffies,
+	},
+	{
+		.ctl_name	= NET_LLC2_P_TIMEOUT,
+		.procname	= "p",
+		.data		= &sysctl_llc2_p_timeout,
+		.maxlen		= sizeof(long),
+		.mode		= 0644,
+		.proc_handler   = &proc_dointvec_jiffies,
+		.strategy       = &sysctl_jiffies,
+	},
+	{
+		.ctl_name	= NET_LLC2_REJ_TIMEOUT,
+		.procname	= "rej",
+		.data		= &sysctl_llc2_rej_timeout,
+		.maxlen		= sizeof(long),
+		.mode		= 0644,
+		.proc_handler   = &proc_dointvec_jiffies,
+		.strategy       = &sysctl_jiffies,
+	},
+	{ 0 },
+};
+
+static struct ctl_table llc_station_table[] = {
+	{
+		.ctl_name	= NET_LLC_STATION_ACK_TIMEOUT,
+		.procname	= "ack_timeout",
+		.data		= &sysctl_llc_station_ack_timeout,
+		.maxlen		= sizeof(long),
+		.mode		= 0644,
+		.proc_handler   = &proc_dointvec_jiffies,
+		.strategy       = &sysctl_jiffies,
+	},
+	{ 0 },
+};
+
+static struct ctl_table llc2_dir_timeout_table[] = {
+	{
+		.ctl_name	= NET_LLC2,
+		.procname	= "timeout",
+		.mode		= 0555,
+		.child		= llc2_timeout_table,
+       	},
+	{ 0 },
+};
+
+static struct ctl_table llc_table[] = {
+	{
+		.ctl_name	= NET_LLC2,
+		.procname	= "llc2",
+		.mode		= 0555,
+		.child		= llc2_dir_timeout_table,
+       	},
+	{
+		.ctl_name       = NET_LLC_STATION,
+		.procname       = "station",
+		.mode           = 0555,
+		.child          = llc_station_table,
+	},
+	{ 0 },
+};
+
+static struct ctl_table llc_dir_table[] = {
+	{
+		.ctl_name	= NET_LLC,
+		.procname	= "llc",
+		.mode		= 0555,
+		.child		= llc_table,
+       	},
+	{ 0 },
+};
+
+static struct ctl_table llc_root_table[] = {
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net",
+		.mode		= 0555,
+		.child		= llc_dir_table,
+	},
+	{ 0 },
+};
+
+static struct ctl_table_header *llc_table_header;
+
+int __init llc_sysctl_init(void)
+{
+	llc_table_header = register_sysctl_table(llc_root_table, 1);
+
+	return llc_table_header ? 0 : -ENOMEM;
+}
+
+void llc_sysctl_exit(void)
+{
+	if (llc_table_header) {
+		unregister_sysctl_table(llc_table_header);
+		llc_table_header = NULL;
+	}
+}
-- 
cgit v1.2.3


From 04e4223f44b89e50f275cb6b95a58ebe2c4909be Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Thu, 22 Sep 2005 04:40:59 -0300
Subject: [LLC]: Do better struct sock accounting on skbs

Signed-off-by: Jochen Friedrich <jochen@scram.de>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
---
 include/net/llc_sap.h |  6 ++++--
 net/llc/af_llc.c      |  2 --
 net/llc/llc_c_ac.c    |  2 +-
 net/llc/llc_conn.c    | 14 +++++++++-----
 net/llc/llc_sap.c     |  8 ++++----
 5 files changed, 18 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/llc_sap.h b/include/net/llc_sap.h
index 6dd4cdccca42..9dcfcf4a4abb 100644
--- a/include/net/llc_sap.h
+++ b/include/net/llc_sap.h
@@ -12,11 +12,13 @@
  * See the GNU General Public License for more details.
  */
 struct llc_sap;
-struct sk_buff;
 struct net_device;
+struct sk_buff;
+struct sock;
 
 extern void llc_sap_rtn_pdu(struct llc_sap *sap, struct sk_buff *skb);
-extern void llc_save_primitive(struct sk_buff* skb, unsigned char prim);
+extern void llc_save_primitive(struct sock *sk, struct sk_buff* skb,
+			       unsigned char prim);
 extern struct sk_buff *llc_alloc_frame(struct net_device *dev);
 
 extern void llc_build_and_send_test_pkt(struct llc_sap *sap,
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index f536369cdb52..ad9aad807aa8 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -628,7 +628,6 @@ static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags)
 	/* put original socket back into a clean listen state. */
 	sk->sk_state = TCP_LISTEN;
 	sk->sk_ack_backlog--;
-	skb->sk = NULL;
 	dprintk("%s: ok success on %02X, client on %02X\n", __FUNCTION__,
 		llc_sk(sk)->addr.sllc_sap, newllc->daddr.lsap);
 frees:
@@ -740,7 +739,6 @@ static int llc_ui_sendmsg(struct kiocb *iocb, struct socket *sock,
 	lock_sock(sk);
 	if (!skb)
 		goto release;
-	skb->sk	      = sk;
 	skb->dev      = llc->dev;
 	skb->protocol = llc_proto_type(addr->sllc_arphrd);
 	skb_reserve(skb, hdrlen); 
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index 9d9b6c8aeb44..c1e75103189a 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -1335,7 +1335,7 @@ static void llc_conn_tmr_common_cb(unsigned long timeout_data, u8 type)
 	if (skb) {
 		struct llc_conn_state_ev *ev = llc_conn_ev(skb);
 
-		skb->sk  = sk;
+		skb_set_owner_r(skb, sk);
 		ev->type = type;
 		llc_process_tmr_ev(sk, skb);
 	}
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index d3783f8ee481..9f0e10e0978d 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -90,8 +90,8 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
 
 	switch (ev->ind_prim) {
 	case LLC_DATA_PRIM:
-		llc_save_primitive(skb, LLC_DATA_PRIM);
-		if (sock_queue_rcv_skb(sk, skb)) {
+		llc_save_primitive(sk, skb, LLC_DATA_PRIM);
+		if (unlikely(sock_queue_rcv_skb(sk, skb))) {
 			/*
 			 * shouldn't happen
 			 */
@@ -103,6 +103,11 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
 	case LLC_CONN_PRIM: {
 		struct sock *parent = skb->sk;
 
+		skb_orphan(skb);
+		/*
+		 * Set the skb->sk to the new struct sock, so that at accept
+		 * type the upper layer can get the newly created struct sock.
+		 */
 		skb->sk = sk;
 		skb_queue_tail(&parent->sk_receive_queue, skb);
 		sk->sk_state_change(parent);
@@ -702,10 +707,9 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb)
 		memcpy(&llc->daddr, &saddr, sizeof(llc->daddr));
 		llc_sap_add_socket(sap, sk);
 		sock_hold(sk);
+		skb_set_owner_r(skb, parent);
 		sock_put(parent);
-		skb->sk = parent;
-	} else
-		skb->sk = sk;
+	}
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk))
 		llc_conn_rcv(sk, skb);
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 0adaa289bf0a..9f064b3a4335 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -45,12 +45,12 @@ struct sk_buff *llc_alloc_frame(struct net_device *dev)
 	return skb;
 }
 
-void llc_save_primitive(struct sk_buff* skb, u8 prim)
+void llc_save_primitive(struct sock *sk, struct sk_buff* skb, u8 prim)
 {
 	struct sockaddr_llc *addr = llc_ui_skb_cb(skb);
 
        /* save primitive for use by the user. */
-	addr->sllc_family = skb->sk->sk_family;
+	addr->sllc_family = sk->sk_family;
 	addr->sllc_arphrd = skb->dev->type;
 	addr->sllc_test   = prim == LLC_TEST_PRIM;
 	addr->sllc_xid    = prim == LLC_XID_PRIM;
@@ -190,7 +190,7 @@ static void llc_sap_state_process(struct llc_sap *sap, struct sk_buff *skb)
 		if (skb->sk->sk_state == TCP_LISTEN)
 			kfree_skb(skb);
 		else {
-			llc_save_primitive(skb, ev->prim);
+			llc_save_primitive(skb->sk, skb, ev->prim);
 
 			/* queue skb to the user. */
 			if (sock_queue_rcv_skb(skb->sk, skb))
@@ -309,7 +309,7 @@ void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb)
 
 	sk = llc_lookup_dgram(sap, &laddr);
 	if (sk) {
-		skb->sk = sk;
+		skb_set_owner_r(skb, sk);
 		llc_sap_rcv(sap, skb);
 		sock_put(sk);
 	} else
-- 
cgit v1.2.3


From 6e2144b76840be09924de1626e2dcd7b315f75b3 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Thu, 22 Sep 2005 04:43:05 -0300
Subject: [LLC]: Use refcounting with struct llc_sap

Signed-off-by: Jochen Friedrich <jochen@scram.de>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
---
 include/net/llc.h      | 16 +++++++++++++++-
 include/net/llc_conn.h |  1 -
 net/802/p8022.c        |  2 +-
 net/802/psnap.c        |  2 +-
 net/llc/af_llc.c       | 11 ++++++-----
 net/llc/llc_c_ac.c     |  1 +
 net/llc/llc_conn.c     | 28 ++--------------------------
 net/llc/llc_core.c     | 34 +++++++++++++++++++++++-----------
 net/llc/llc_input.c    |  9 +++++++--
 9 files changed, 56 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/net/llc.h b/include/net/llc.h
index 8b8e2be289b1..93e5b443a9a7 100644
--- a/include/net/llc.h
+++ b/include/net/llc.h
@@ -17,6 +17,8 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 
+#include <asm/atomic.h>
+
 struct net_device;
 struct packet_type;
 struct sk_buff;
@@ -44,6 +46,7 @@ struct llc_sap {
 	unsigned char	 state;
 	unsigned char	 p_bit;
 	unsigned char	 f_bit;
+	atomic_t         refcnt;
 	int		 (*rcv_func)(struct sk_buff *skb,
 				     struct net_device *dev,
 				     struct packet_type *pt,
@@ -81,7 +84,18 @@ extern struct llc_sap *llc_sap_open(unsigned char lsap,
 					       struct net_device *dev,
 					       struct packet_type *pt,
 					       struct net_device *orig_dev));
-extern void llc_sap_close(struct llc_sap *sap);
+static inline void llc_sap_hold(struct llc_sap *sap)
+{
+	atomic_inc(&sap->refcnt);
+}
+
+static inline void llc_sap_put(struct llc_sap *sap)
+{
+	extern void llc_sap_close(struct llc_sap *sap);
+
+	if (atomic_dec_and_test(&sap->refcnt))
+		llc_sap_close(sap);
+}
 
 extern struct llc_sap *llc_sap_find(unsigned char sap_value);
 
diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h
index 8a8ff4810135..b2889218c76a 100644
--- a/include/net/llc_conn.h
+++ b/include/net/llc_conn.h
@@ -115,5 +115,4 @@ extern void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk);
 
 extern u8 llc_data_accept_state(u8 state);
 extern void llc_build_offset_table(void);
-extern int llc_release_sockets(struct llc_sap *sap);
 #endif /* LLC_CONN_H */
diff --git a/net/802/p8022.c b/net/802/p8022.c
index b24817c63ca8..2530f35241cd 100644
--- a/net/802/p8022.c
+++ b/net/802/p8022.c
@@ -56,7 +56,7 @@ struct datalink_proto *register_8022_client(unsigned char type,
 
 void unregister_8022_client(struct datalink_proto *proto)
 {
-	llc_sap_close(proto->sap);
+	llc_sap_put(proto->sap);
 	kfree(proto);
 }
 
diff --git a/net/802/psnap.c b/net/802/psnap.c
index ab80b1fab53c..4d638944d933 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -106,7 +106,7 @@ module_init(snap_init);
 
 static void __exit snap_exit(void)
 {
-	llc_sap_close(snap_sap);
+	llc_sap_put(snap_sap);
 }
 
 module_exit(snap_exit);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index ad9aad807aa8..a75b8f2aab19 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -21,6 +21,7 @@
  * See the GNU General Public License for more details.
  */
 #include <linux/config.h>
+#include <linux/compiler.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/rtnetlink.h>
@@ -188,10 +189,6 @@ static int llc_ui_release(struct socket *sock)
 	if (!sock_flag(sk, SOCK_ZAPPED))
 		llc_sap_remove_socket(llc->sap, sk);
 	release_sock(sk);
-	if (llc->sap && hlist_empty(&llc->sap->sk_list.list)) {
-		llc_release_sockets(llc->sap);
-		llc_sap_close(llc->sap);
-	}
 	if (llc->dev)
 		dev_put(llc->dev);
 	sock_put(sk);
@@ -220,6 +217,7 @@ static int llc_ui_autoport(void)
 				llc_ui_sap_last_autoport = i + 2;
 				goto out;
 			}
+			llc_sap_put(sap);
 		}
 		llc_ui_sap_last_autoport = LLC_SAP_DYN_START;
 		tries++;
@@ -310,6 +308,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 		rc = -EBUSY; /* some other network layer is using the sap */
 		if (!sap)
 			goto out;
+		llc_sap_hold(sap);
 	} else {
 		struct llc_addr laddr, daddr;
 		struct sock *ask;
@@ -326,7 +325,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 		ask = llc_lookup_established(sap, &daddr, &laddr);
 		if (ask) {
 			sock_put(ask);
-			goto out;
+			goto out_put;
 		}
 	}
 	llc->laddr.lsap = addr->sllc_sap;
@@ -336,6 +335,8 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 	llc_sap_add_socket(sap, sk);
 	sock_reset_flag(sk, SOCK_ZAPPED);
 	rc = 0;
+out_put:
+	llc_sap_put(sap);
 out:
 	return rc;
 }
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index c1e75103189a..05236c2cbb9e 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -75,6 +75,7 @@ int llc_conn_ac_conn_ind(struct sock *sk, struct sk_buff *skb)
 		llc->dev = skb->dev;
 		ev->ind_prim = LLC_CONN_PRIM;
 		rc = 0;
+		llc_sap_put(sap);
 	}
 	return rc;
 }
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 9f0e10e0978d..ab9e6d7e2875 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -633,6 +633,7 @@ static int llc_find_offset(int state, int ev_type)
  */
 void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk)
 {
+	llc_sap_hold(sap);
 	write_lock_bh(&sap->sk_list.lock);
 	llc_sk(sk)->sap = sap;
 	sk_add_node(sk, &sap->sk_list.list);
@@ -652,6 +653,7 @@ void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk)
 	write_lock_bh(&sap->sk_list.lock);
 	sk_del_node_init(sk);
 	write_unlock_bh(&sap->sk_list.lock);
+	llc_sap_put(sap);
 }
 
 /**
@@ -730,32 +732,6 @@ drop:
 static atomic_t llc_sock_nr;
 #endif
 
-/**
- *	llc_release_sockets - releases all sockets in a sap
- *	@sap: sap to release its sockets
- *
- *	Releases all connections of a sap. Returns 0 if all actions complete
- *	successfully, nonzero otherwise
- */
-int llc_release_sockets(struct llc_sap *sap)
-{
-	int rc = 0;
-	struct sock *sk;
-	struct hlist_node *node;
-
-	write_lock_bh(&sap->sk_list.lock);
-
-	sk_for_each(sk, node, &sap->sk_list.list) {
-		llc_sk(sk)->state = LLC_CONN_STATE_TEMP;
-
-		if (llc_send_disc(sk))
-			rc = 1;
-	}
-
-	write_unlock_bh(&sap->sk_list.lock);
-	return rc;
-}
-
 /**
  *	llc_backlog_rcv - Processes rx frames and expired timers.
  *	@sk: LLC sock (p8022 connection)
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 9727455bf0e7..9ccff1266b26 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -40,6 +40,7 @@ static struct llc_sap *llc_sap_alloc(void)
 		sap->state = LLC_SAP_STATE_ACTIVE;
 		memcpy(sap->laddr.mac, llc_station_mac_sa, ETH_ALEN);
 		rwlock_init(&sap->sk_list.lock);
+		atomic_set(&sap->refcnt, 1);
 	}
 	return sap;
 }
@@ -52,9 +53,7 @@ static struct llc_sap *llc_sap_alloc(void)
  */
 static void llc_add_sap(struct llc_sap *sap)
 {
-	write_lock_bh(&llc_sap_list_lock);
 	list_add_tail(&sap->node, &llc_sap_list);
-	write_unlock_bh(&llc_sap_list_lock);
 }
 
 /**
@@ -70,11 +69,25 @@ static void llc_del_sap(struct llc_sap *sap)
 	write_unlock_bh(&llc_sap_list_lock);
 }
 
+struct llc_sap *__llc_sap_find(unsigned char sap_value)
+{
+	struct llc_sap* sap;
+
+	list_for_each_entry(sap, &llc_sap_list, node)
+		if (sap->laddr.lsap == sap_value)
+			goto out;
+	sap = NULL;
+out:
+	return sap;
+}
+
 /**
  *	llc_sap_find - searchs a SAP in station
  *	@sap_value: sap to be found
  *
  *	Searchs for a sap in the sap list of the LLC's station upon the sap ID.
+ *	If the sap is found it will be refcounted and the user will have to do
+ *	a llc_sap_put after use.
  *	Returns the sap or %NULL if not found.
  */
 struct llc_sap *llc_sap_find(unsigned char sap_value)
@@ -82,11 +95,9 @@ struct llc_sap *llc_sap_find(unsigned char sap_value)
 	struct llc_sap* sap;
 
 	read_lock_bh(&llc_sap_list_lock);
-	list_for_each_entry(sap, &llc_sap_list, node)
-		if (sap->laddr.lsap == sap_value)
-			goto out;
-	sap = NULL;
-out:
+	sap = __llc_sap_find(sap_value);
+	if (sap)
+		llc_sap_hold(sap);
 	read_unlock_bh(&llc_sap_list_lock);
 	return sap;
 }
@@ -106,19 +117,20 @@ struct llc_sap *llc_sap_open(unsigned char lsap,
 					 struct packet_type *pt,
 					 struct net_device *orig_dev))
 {
-	struct llc_sap *sap = llc_sap_find(lsap);
+	struct llc_sap *sap = NULL;
 
-	if (sap) { /* SAP already exists */
-		sap = NULL;
+	write_lock_bh(&llc_sap_list_lock);
+	if (__llc_sap_find(lsap)) /* SAP already exists */
 		goto out;
-	}
 	sap = llc_sap_alloc();
 	if (!sap)
 		goto out;
 	sap->laddr.lsap = lsap;
 	sap->rcv_func	= func;
+	llc_sap_hold(sap);
 	llc_add_sap(sap);
 out:
+	write_unlock_bh(&llc_sap_list_lock);
 	return sap;
 }
 
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index 60c1acac7c97..789eec426451 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -166,17 +166,22 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
 	 */
 	if (sap->rcv_func) {
 		sap->rcv_func(skb, dev, pt, orig_dev);
-		goto out;
+		goto out_put;
 	}
 	dest = llc_pdu_type(skb);
 	if (unlikely(!dest || !llc_type_handlers[dest - 1]))
-		goto drop;
+		goto drop_put;
 	llc_type_handlers[dest - 1](sap, skb);
+out_put:
+	llc_sap_put(sap);
 out:
 	return 0;
 drop:
 	kfree_skb(skb);
 	goto out;
+drop_put:
+	kfree_skb(skb);
+	goto out_put;
 handle_station:
 	if (!llc_station_handler)
 		goto drop;
-- 
cgit v1.2.3


From 2928c19e1086e2f1e90d05931437ab6f1e4cfdc8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Thu, 22 Sep 2005 05:14:33 -0300
Subject: [LLC]: Fix sparse warnings

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
---
 include/net/llc.h        | 13 +++++++++++--
 include/net/llc_conn.h   |  3 ++-
 net/llc/af_llc.c         |  5 +----
 net/llc/llc_c_ac.c       |  2 +-
 net/llc/llc_conn.c       |  3 ++-
 net/llc/llc_core.c       |  2 +-
 net/llc/sysctl_net_llc.c |  7 +------
 7 files changed, 19 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/llc.h b/include/net/llc.h
index 93e5b443a9a7..1adb2ef3f6f7 100644
--- a/include/net/llc.h
+++ b/include/net/llc.h
@@ -89,10 +89,10 @@ static inline void llc_sap_hold(struct llc_sap *sap)
 	atomic_inc(&sap->refcnt);
 }
 
+extern void llc_sap_close(struct llc_sap *sap);
+
 static inline void llc_sap_put(struct llc_sap *sap)
 {
-	extern void llc_sap_close(struct llc_sap *sap);
-
 	if (atomic_dec_and_test(&sap->refcnt))
 		llc_sap_close(sap);
 }
@@ -102,6 +102,9 @@ extern struct llc_sap *llc_sap_find(unsigned char sap_value);
 extern int llc_build_and_send_ui_pkt(struct llc_sap *sap, struct sk_buff *skb,
 				     unsigned char *dmac, unsigned char dsap);
 
+extern void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb);
+extern void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb);
+
 extern int llc_station_init(void);
 extern void llc_station_exit(void);
 
@@ -115,6 +118,12 @@ extern void llc_proc_exit(void);
 #ifdef CONFIG_SYSCTL
 extern int llc_sysctl_init(void);
 extern void llc_sysctl_exit(void);
+
+extern int sysctl_llc2_ack_timeout;
+extern int sysctl_llc2_busy_timeout;
+extern int sysctl_llc2_p_timeout;
+extern int sysctl_llc2_rej_timeout;
+extern int sysctl_llc_station_ack_timeout;
 #else
 #define llc_sysctl_init() (0)
 #define llc_sysctl_exit() do { } while(0)
diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h
index b2889218c76a..e44f494bbef1 100644
--- a/include/net/llc_conn.h
+++ b/include/net/llc_conn.h
@@ -92,7 +92,8 @@ static __inline__ char llc_backlog_type(struct sk_buff *skb)
 	return skb->cb[sizeof(skb->cb) - 1];
 }
 
-extern struct sock *llc_sk_alloc(int family, int priority, struct proto *prot);
+extern struct sock *llc_sk_alloc(int family, unsigned int __nocast priority,
+				 struct proto *prot);
 extern void llc_sk_free(struct sock *sk);
 
 extern void llc_sk_reset(struct sock *sk);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 3361ae900e2b..7aa51eb79b13 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -555,7 +555,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout)
 	return rc;
 }
 
-int llc_wait_data(struct sock *sk, long timeo)
+static int llc_wait_data(struct sock *sk, long timeo)
 {
 	int rc;
 
@@ -1003,9 +1003,6 @@ static struct proto_ops llc_ui_ops = {
 	.sendpage    = sock_no_sendpage,
 };
 
-extern void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb);
-extern void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb);
-
 static char llc_proc_err_msg[] __initdata =
         KERN_CRIT "LLC: Unable to register the proc_fs entries\n";
 static char llc_sysctl_err_msg[] __initdata =
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index 05236c2cbb9e..8f7b46d20638 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -1321,7 +1321,7 @@ int llc_conn_ac_set_vs_nr(struct sock *sk, struct sk_buff *skb)
 	return 0;
 }
 
-int llc_conn_ac_inc_vs_by_1(struct sock *sk, struct sk_buff *skb)
+static int llc_conn_ac_inc_vs_by_1(struct sock *sk, struct sk_buff *skb)
 {
 	llc_sk(sk)->vS = (llc_sk(sk)->vS + 1) % 128;
 	return 0;
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index ab9e6d7e2875..76f94e0d840d 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -819,7 +819,8 @@ static void llc_sk_init(struct sock* sk)
  *	Allocates a LLC sock and initializes it. Returns the new LLC sock
  *	or %NULL if there's no memory available for one
  */
-struct sock *llc_sk_alloc(int family, int priority, struct proto *prot)
+struct sock *llc_sk_alloc(int family, unsigned int __nocast priority,
+			 struct proto *prot)
 {
 	struct sock *sk = sk_alloc(family, priority, prot, 1);
 
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 9ccff1266b26..ab0fcd32fd84 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -69,7 +69,7 @@ static void llc_del_sap(struct llc_sap *sap)
 	write_unlock_bh(&llc_sap_list_lock);
 }
 
-struct llc_sap *__llc_sap_find(unsigned char sap_value)
+static struct llc_sap *__llc_sap_find(unsigned char sap_value)
 {
 	struct llc_sap* sap;
 
diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c
index 4d99d2f27a21..d1eaddb13633 100644
--- a/net/llc/sysctl_net_llc.c
+++ b/net/llc/sysctl_net_llc.c
@@ -8,17 +8,12 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
+#include <net/llc.h>
 
 #ifndef CONFIG_SYSCTL
 #error This file should not be compiled without CONFIG_SYSCTL defined
 #endif
 
-extern int sysctl_llc2_ack_timeout;
-extern int sysctl_llc2_busy_timeout;
-extern int sysctl_llc2_p_timeout;
-extern int sysctl_llc2_rej_timeout;
-extern int sysctl_llc_station_ack_timeout;
-
 static struct ctl_table llc2_timeout_table[] = {
 	{
 		.ctl_name	= NET_LLC2_ACK_TIMEOUT,
-- 
cgit v1.2.3


From d389424e00f9097cd24b3df4ca0ab7221f140eeb Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Thu, 22 Sep 2005 07:57:21 -0300
Subject: [LLC]: Fix the accept path

Borrowing the structure of TCP/IP for this. On the receive of new connections I
was bh_lock_socking the _new_ sock, not the listening one, duh, now it survives
the ssh connections storm I've been using to test this specific bug.

Also fixes send side skb sock accounting.

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
---
 include/net/llc_sap.h |   3 +-
 net/llc/llc_c_ac.c    |  64 +++++++++--------------
 net/llc/llc_conn.c    | 142 +++++++++++++++++++++++++++++++++-----------------
 net/llc/llc_if.c      |   2 +
 net/llc/llc_s_ac.c    |   4 +-
 net/llc/llc_sap.c     |   4 +-
 net/llc/llc_station.c |   6 +--
 7 files changed, 132 insertions(+), 93 deletions(-)

(limited to 'include')

diff --git a/include/net/llc_sap.h b/include/net/llc_sap.h
index 9dcfcf4a4abb..2c56dbece729 100644
--- a/include/net/llc_sap.h
+++ b/include/net/llc_sap.h
@@ -19,7 +19,8 @@ struct sock;
 extern void llc_sap_rtn_pdu(struct llc_sap *sap, struct sk_buff *skb);
 extern void llc_save_primitive(struct sock *sk, struct sk_buff* skb,
 			       unsigned char prim);
-extern struct sk_buff *llc_alloc_frame(struct net_device *dev);
+extern struct sk_buff *llc_alloc_frame(struct sock *sk,
+				       struct net_device *dev);
 
 extern void llc_build_and_send_test_pkt(struct llc_sap *sap,
 				        struct sk_buff *skb,
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index 8f7b46d20638..b0bcfb1f12dd 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -60,24 +60,10 @@ int llc_conn_ac_clear_remote_busy(struct sock *sk, struct sk_buff *skb)
 
 int llc_conn_ac_conn_ind(struct sock *sk, struct sk_buff *skb)
 {
-	int rc = -ENOTCONN;
-	u8 dsap;
-	struct llc_sap *sap;
-
-	llc_pdu_decode_dsap(skb, &dsap);
-	sap = llc_sap_find(dsap);
-	if (sap) {
-		struct llc_conn_state_ev *ev = llc_conn_ev(skb);
-		struct llc_sock *llc = llc_sk(sk);
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
 
-		llc_pdu_decode_sa(skb, llc->daddr.mac);
-		llc_pdu_decode_da(skb, llc->laddr.mac);
-		llc->dev = skb->dev;
-		ev->ind_prim = LLC_CONN_PRIM;
-		rc = 0;
-		llc_sap_put(sap);
-	}
-	return rc;
+	ev->ind_prim = LLC_CONN_PRIM;
+	return 0;
 }
 
 int llc_conn_ac_conn_confirm(struct sock *sk, struct sk_buff *skb)
@@ -213,7 +199,7 @@ int llc_conn_ac_send_disc_cmd_p_set_x(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -238,7 +224,7 @@ int llc_conn_ac_send_dm_rsp_f_set_p(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -264,7 +250,7 @@ int llc_conn_ac_send_dm_rsp_f_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -297,7 +283,7 @@ int llc_conn_ac_send_frmr_rsp_f_set_x(struct sock *sk, struct sk_buff *skb)
 		llc_pdu_decode_pf_bit(skb, &f_bit);
 	else
 		f_bit = 0;
-	nskb = llc_alloc_frame(llc->dev);
+	nskb = llc_alloc_frame(sk, llc->dev);
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
 
@@ -321,7 +307,7 @@ int llc_conn_ac_resend_frmr_rsp_f_set_0(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -351,7 +337,7 @@ int llc_conn_ac_resend_frmr_rsp_f_set_p(struct sock *sk, struct sk_buff *skb)
 	struct llc_sock *llc = llc_sk(sk);
 
 	llc_pdu_decode_pf_bit(skb, &f_bit);
-	nskb = llc_alloc_frame(llc->dev);
+	nskb = llc_alloc_frame(sk, llc->dev);
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
 		struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
@@ -439,7 +425,7 @@ int llc_conn_ac_resend_i_xxx_x_set_0_or_send_rr(struct sock *sk,
 	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -474,7 +460,7 @@ int llc_conn_ac_send_rej_cmd_p_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -498,7 +484,7 @@ int llc_conn_ac_send_rej_rsp_f_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -522,7 +508,7 @@ int llc_conn_ac_send_rej_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -546,7 +532,7 @@ int llc_conn_ac_send_rnr_cmd_p_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -570,7 +556,7 @@ int llc_conn_ac_send_rnr_rsp_f_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -594,7 +580,7 @@ int llc_conn_ac_send_rnr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -630,7 +616,7 @@ int llc_conn_ac_opt_send_rnr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -654,7 +640,7 @@ int llc_conn_ac_send_rr_cmd_p_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -678,7 +664,7 @@ int llc_conn_ac_send_rr_rsp_f_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -703,7 +689,7 @@ int llc_conn_ac_send_ack_rsp_f_set_1(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -727,7 +713,7 @@ int llc_conn_ac_send_rr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -751,7 +737,7 @@ int llc_conn_ac_send_ack_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -785,7 +771,7 @@ int llc_conn_ac_send_sabme_cmd_p_set_x(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
@@ -814,7 +800,7 @@ int llc_conn_ac_send_ua_rsp_f_set_p(struct sock *sk, struct sk_buff *skb)
 	u8 f_bit;
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
 
 	llc_pdu_decode_pf_bit(skb, &f_bit);
 	if (nskb) {
@@ -970,7 +956,7 @@ static int llc_conn_ac_send_rr_rsp_f_set_ackpf(struct sock *sk,
 {
 	int rc = -ENOBUFS;
 	struct llc_sock *llc = llc_sk(sk);
-	struct sk_buff *nskb = llc_alloc_frame(llc->dev);
+	struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev);
 
 	if (nskb) {
 		struct llc_sap *sap = llc->sap;
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 76f94e0d840d..e10ce5adb104 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -58,7 +58,7 @@ int sysctl_llc2_busy_timeout = LLC2_BUSY_TIME * HZ;
 int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
 {
 	int rc;
-	struct llc_sock *llc = llc_sk(sk);
+	struct llc_sock *llc = llc_sk(skb->sk);
 	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
 
 	/*
@@ -68,7 +68,10 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
 	 */
 	skb_get(skb);
 	ev->ind_prim = ev->cfm_prim = 0;
-	rc = llc_conn_service(sk, skb); /* sending event to state machine */
+	/*
+	 * Send event to state machine
+	 */
+	rc = llc_conn_service(skb->sk, skb);
 	if (unlikely(rc != 0)) {
 		printk(KERN_ERR "%s: llc_conn_service failed\n", __FUNCTION__);
 		goto out_kfree_skb;
@@ -100,18 +103,14 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
 			kfree_skb(skb);
 		}
 		break;
-	case LLC_CONN_PRIM: {
-		struct sock *parent = skb->sk;
-
-		skb_orphan(skb);
+	case LLC_CONN_PRIM:
 		/*
-		 * Set the skb->sk to the new struct sock, so that at accept
-		 * type the upper layer can get the newly created struct sock.
+		 * Can't be sock_queue_rcv_skb, because we have to leave the
+		 * skb->sk pointing to the newly created struct sock in
+		 * llc_conn_handler. -acme
 		 */
-		skb->sk = sk;
-		skb_queue_tail(&parent->sk_receive_queue, skb);
-		sk->sk_state_change(parent);
-	}
+		skb_queue_tail(&sk->sk_receive_queue, skb);
+		sk->sk_state_change(sk);
 		break;
 	case LLC_DISC_PRIM:
 		sock_hold(sk);
@@ -475,7 +474,7 @@ static int llc_exec_conn_trans_actions(struct sock *sk,
 }
 
 /**
- *	llc_lookup_established - Finds connection for the remote/local sap/mac
+ *	__llc_lookup_established - Finds connection for the remote/local sap/mac
  *	@sap: SAP
  *	@daddr: address of remote LLC (MAC + SAP)
  *	@laddr: address of local LLC (MAC + SAP)
@@ -483,14 +482,16 @@ static int llc_exec_conn_trans_actions(struct sock *sk,
  *	Search connection list of the SAP and finds connection using the remote
  *	mac, remote sap, local mac, and local sap. Returns pointer for
  *	connection found, %NULL otherwise.
+ *	Caller has to make sure local_bh is disabled.
  */
-struct sock *llc_lookup_established(struct llc_sap *sap, struct llc_addr *daddr,
-				    struct llc_addr *laddr)
+static struct sock *__llc_lookup_established(struct llc_sap *sap,
+					     struct llc_addr *daddr,
+					     struct llc_addr *laddr)
 {
 	struct sock *rc;
 	struct hlist_node *node;
 
-	read_lock_bh(&sap->sk_list.lock);
+	read_lock(&sap->sk_list.lock);
 	sk_for_each(rc, node, &sap->sk_list.list) {
 		struct llc_sock *llc = llc_sk(rc);
 
@@ -504,10 +505,22 @@ struct sock *llc_lookup_established(struct llc_sap *sap, struct llc_addr *daddr,
 	}
 	rc = NULL;
 found:
-	read_unlock_bh(&sap->sk_list.lock);
+	read_unlock(&sap->sk_list.lock);
 	return rc;
 }
 
+struct sock *llc_lookup_established(struct llc_sap *sap,
+				    struct llc_addr *daddr,
+				    struct llc_addr *laddr)
+{
+	struct sock *sk;
+
+	local_bh_disable();
+	sk = __llc_lookup_established(sap, daddr, laddr);
+	local_bh_enable();
+	return sk;
+}
+
 /**
  *	llc_lookup_listener - Finds listener for local MAC + SAP
  *	@sap: SAP
@@ -516,6 +529,7 @@ found:
  *	Search connection list of the SAP and finds connection listening on
  *	local mac, and local sap. Returns pointer for parent socket found,
  *	%NULL otherwise.
+ *	Caller has to make sure local_bh is disabled.
  */
 static struct sock *llc_lookup_listener(struct llc_sap *sap,
 					struct llc_addr *laddr)
@@ -523,7 +537,7 @@ static struct sock *llc_lookup_listener(struct llc_sap *sap,
 	struct sock *rc;
 	struct hlist_node *node;
 
-	read_lock_bh(&sap->sk_list.lock);
+	read_lock(&sap->sk_list.lock);
 	sk_for_each(rc, node, &sap->sk_list.list) {
 		struct llc_sock *llc = llc_sk(rc);
 
@@ -537,10 +551,19 @@ static struct sock *llc_lookup_listener(struct llc_sap *sap,
 	}
 	rc = NULL;
 found:
-	read_unlock_bh(&sap->sk_list.lock);
+	read_unlock(&sap->sk_list.lock);
 	return rc;
 }
 
+static struct sock *__llc_lookup(struct llc_sap *sap,
+				 struct llc_addr *daddr,
+				 struct llc_addr *laddr)
+{
+	struct sock *sk = __llc_lookup_established(sap, daddr, laddr);
+
+	return sk ? : llc_lookup_listener(sap, laddr);
+}
+
 /**
  *	llc_data_accept_state - designates if in this state data can be sent.
  *	@state: state of connection.
@@ -666,15 +689,34 @@ void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk)
 static int llc_conn_rcv(struct sock* sk, struct sk_buff *skb)
 {
 	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
-	struct llc_sock *llc = llc_sk(sk);
 
-	if (!llc->dev)
-		llc->dev = skb->dev;
 	ev->type   = LLC_CONN_EV_TYPE_PDU;
 	ev->reason = 0;
 	return llc_conn_state_process(sk, skb);
 }
 
+static struct sock *llc_create_incoming_sock(struct sock *sk,
+					     struct net_device *dev,
+					     struct llc_addr *saddr,
+					     struct llc_addr *daddr)
+{
+	struct sock *newsk = llc_sk_alloc(sk->sk_family, GFP_ATOMIC,
+					  sk->sk_prot);
+	struct llc_sock *newllc, *llc = llc_sk(sk);
+
+	if (!newsk)
+		goto out;
+	newllc = llc_sk(newsk);
+	memcpy(&newllc->laddr, daddr, sizeof(newllc->laddr));
+	memcpy(&newllc->daddr, saddr, sizeof(newllc->daddr));
+	newllc->dev = dev;
+	dev_hold(dev);
+	llc_sap_add_socket(llc->sap, newsk);
+	llc_sap_hold(llc->sap);
+out:
+	return newsk;
+}
+
 void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb)
 {
 	struct llc_addr saddr, daddr;
@@ -685,34 +727,35 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb)
 	llc_pdu_decode_da(skb, daddr.mac);
 	llc_pdu_decode_dsap(skb, &daddr.lsap);
 
-	sk = llc_lookup_established(sap, &saddr, &daddr);
-	if (!sk) {
+	sk = __llc_lookup(sap, &saddr, &daddr);
+	if (!sk)
+		goto drop;
+
+	bh_lock_sock(sk);
+	/*
+	 * This has to be done here and not at the upper layer ->accept
+	 * method because of the way the PROCOM state machine works:
+	 * it needs to set several state variables (see, for instance,
+	 * llc_adm_actions_2 in net/llc/llc_c_st.c) and send a packet to
+	 * the originator of the new connection, and this state has to be
+	 * in the newly created struct sock private area. -acme
+	 */
+	if (unlikely(sk->sk_state == TCP_LISTEN)) {
+		struct sock *newsk = llc_create_incoming_sock(sk, skb->dev,
+							      &saddr, &daddr);
+		if (!newsk)
+			goto drop_unlock;
+		skb_set_owner_r(skb, newsk);
+	} else {
 		/*
-		 * Didn't find an active connection; verify if there
-		 * is a listening socket for this llc addr
+		 * Can't be skb_set_owner_r, this will be done at the
+		 * llc_conn_state_process function, later on, when we will use
+		 * skb_queue_rcv_skb to send it to upper layers, this is
+		 * another trick required to cope with how the PROCOM state
+		 * machine works. -acme
 		 */
-		struct llc_sock *llc;
-		struct sock *parent = llc_lookup_listener(sap, &daddr);
-
-		if (!parent) {
-			dprintk("llc_lookup_listener failed!\n");
-			goto drop;
-		}
-
-		sk = llc_sk_alloc(parent->sk_family, GFP_ATOMIC, parent->sk_prot);
-		if (!sk) {
-			sock_put(parent);
-			goto drop;
-		}
-		llc = llc_sk(sk);
-		memcpy(&llc->laddr, &daddr, sizeof(llc->laddr));
-		memcpy(&llc->daddr, &saddr, sizeof(llc->daddr));
-		llc_sap_add_socket(sap, sk);
-		sock_hold(sk);
-		skb_set_owner_r(skb, parent);
-		sock_put(parent);
+		skb->sk = sk;
 	}
-	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk))
 		llc_conn_rcv(sk, skb);
 	else {
@@ -720,11 +763,16 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb)
 		llc_set_backlog_type(skb, LLC_PACKET);
 		sk_add_backlog(sk, skb);
 	}
+out:
 	bh_unlock_sock(sk);
 	sock_put(sk);
 	return;
 drop:
 	kfree_skb(skb);
+	return;
+drop_unlock:
+	kfree_skb(skb);
+	goto out;
 }
 
 #undef LLC_REFCNT_DEBUG
diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c
index 764dbd704051..ba90f7f0801a 100644
--- a/net/llc/llc_if.c
+++ b/net/llc/llc_if.c
@@ -107,6 +107,7 @@ int llc_establish_connection(struct sock *sk, u8 *lmac, u8 *dmac, u8 dsap)
 		ev->type      = LLC_CONN_EV_TYPE_PRIM;
 		ev->prim      = LLC_CONN_PRIM;
 		ev->prim_type = LLC_PRIM_TYPE_REQ;
+		skb_set_owner_w(skb, sk);
 		rc = llc_conn_state_process(sk, skb);
 	}
 out_put:
@@ -141,6 +142,7 @@ int llc_send_disc(struct sock *sk)
 	skb = alloc_skb(0, GFP_ATOMIC);
 	if (!skb)
 		goto out;
+	skb_set_owner_w(skb, sk);
 	sk->sk_state  = TCP_CLOSING;
 	ev	      = llc_conn_ev(skb);
 	ev->type      = LLC_CONN_EV_TYPE_PRIM;
diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c
index 6acdebf03b2b..bb3580fb8cfe 100644
--- a/net/llc/llc_s_ac.c
+++ b/net/llc/llc_s_ac.c
@@ -103,7 +103,7 @@ int llc_sap_action_send_xid_r(struct llc_sap *sap, struct sk_buff *skb)
 	llc_pdu_decode_sa(skb, mac_da);
 	llc_pdu_decode_da(skb, mac_sa);
 	llc_pdu_decode_ssap(skb, &dsap);
-	nskb = llc_alloc_frame(skb->dev);
+	nskb = llc_alloc_frame(NULL, skb->dev);
 	if (!nskb)
 		goto out;
 	llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, dsap,
@@ -148,7 +148,7 @@ int llc_sap_action_send_test_r(struct llc_sap *sap, struct sk_buff *skb)
 	llc_pdu_decode_sa(skb, mac_da);
 	llc_pdu_decode_da(skb, mac_sa);
 	llc_pdu_decode_ssap(skb, &dsap);
-	nskb = llc_alloc_frame(skb->dev);
+	nskb = llc_alloc_frame(NULL, skb->dev);
 	if (!nskb)
 		goto out;
 	llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, dsap,
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 9f064b3a4335..e6d538937f93 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -31,7 +31,7 @@
  *	Allocates an sk_buff for frame and initializes sk_buff fields.
  *	Returns allocated skb or %NULL when out of memory.
  */
-struct sk_buff *llc_alloc_frame(struct net_device *dev)
+struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev)
 {
 	struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC);
 
@@ -41,6 +41,8 @@ struct sk_buff *llc_alloc_frame(struct net_device *dev)
 		skb->protocol = htons(ETH_P_802_2);
 		skb->dev      = dev;
 		skb->mac.raw  = skb->head;
+		if (sk != NULL)
+			skb_set_owner_w(skb, sk);
 	}
 	return skb;
 }
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index ec4693fe312f..f37dbf8ef126 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -254,7 +254,7 @@ static int llc_station_ac_inc_xid_r_cnt_by_1(struct sk_buff *skb)
 static int llc_station_ac_send_null_dsap_xid_c(struct sk_buff *skb)
 {
 	int rc = 1;
-	struct sk_buff *nskb = llc_alloc_frame(skb->dev);
+	struct sk_buff *nskb = llc_alloc_frame(NULL, skb->dev);
 
 	if (!nskb)
 		goto out;
@@ -275,7 +275,7 @@ static int llc_station_ac_send_xid_r(struct sk_buff *skb)
 {
 	u8 mac_da[ETH_ALEN], dsap;
 	int rc = 1;
-	struct sk_buff* nskb = llc_alloc_frame(skb->dev);
+	struct sk_buff* nskb = llc_alloc_frame(NULL, skb->dev);
 
 	if (!nskb)
 		goto out;
@@ -299,7 +299,7 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb)
 {
 	u8 mac_da[ETH_ALEN], dsap;
 	int rc = 1;
-	struct sk_buff *nskb = llc_alloc_frame(skb->dev);
+	struct sk_buff *nskb = llc_alloc_frame(NULL, skb->dev);
 
 	if (!nskb)
 		goto out;
-- 
cgit v1.2.3


From 8420e1b541fe92aee1d8d4d25d9e33eaca756a7b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Thu, 22 Sep 2005 08:29:08 -0300
Subject: [LLC]: fix llc_ui_recvmsg, making it behave like tcp_recvmsg

In fact it is an exact copy of the parts that makes sense to LLC :-)

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
---
 include/net/llc_conn.h |   1 +
 net/llc/af_llc.c       | 180 ++++++++++++++++++++++++++++++++++++++++---------
 net/llc/llc_conn.c     |   2 +-
 net/llc/llc_proc.c     |   2 +-
 net/llc/llc_sap.c      |   5 +-
 5 files changed, 154 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h
index e44f494bbef1..54852ff6033b 100644
--- a/include/net/llc_conn.h
+++ b/include/net/llc_conn.h
@@ -38,6 +38,7 @@ struct llc_sock {
 	struct llc_addr	    laddr;		/* lsap/mac pair */
 	struct llc_addr	    daddr;		/* dsap/mac pair */
 	struct net_device   *dev;		/* device to send to remote */
+	u32		    copied_seq;		/* head of yet unread data */
 	u8		    retry_count;	/* number of retries */
 	u8		    ack_must_be_send;
 	u8		    first_pdu_Ns;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 7aa51eb79b13..59d02cbbeb9e 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -648,53 +648,167 @@ out:
  *	llc_ui_recvmsg - copy received data to the socket user.
  *	@sock: Socket to copy data from.
  *	@msg: Various user space related information.
- *	@size: Size of user buffer.
+ *	@len: Size of user buffer.
  *	@flags: User specified flags.
  *
  *	Copy received data to the socket user.
  *	Returns non-negative upon success, negative otherwise.
  */
 static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
-			  struct msghdr *msg, size_t size, int flags)
+			  struct msghdr *msg, size_t len, int flags)
 {
-	struct sock *sk = sock->sk;
 	struct sockaddr_llc *uaddr = (struct sockaddr_llc *)msg->msg_name;
-	struct sk_buff *skb;
+	const int nonblock = flags & MSG_DONTWAIT;
+	struct sk_buff *skb = NULL;
+	struct sock *sk = sock->sk;
+	struct llc_sock *llc = llc_sk(sk);
 	size_t copied = 0;
-	int rc = -ENOMEM;
-	int noblock = flags & MSG_DONTWAIT;
+	u32 peek_seq = 0;
+	u32 *seq;
+	unsigned long used;
+	int target;	/* Read at least this many bytes */
+	long timeo;
 
-	dprintk("%s: receiving in %02X from %02X\n", __FUNCTION__,
-		llc_sk(sk)->laddr.lsap, llc_sk(sk)->daddr.lsap);
 	lock_sock(sk);
-	if (skb_queue_empty(&sk->sk_receive_queue)) {
-		rc = llc_wait_data(sk, sock_rcvtimeo(sk, noblock));
-		if (rc)
-			goto out;
-	}
-	skb = skb_dequeue(&sk->sk_receive_queue);
-	if (!skb) /* shutdown */
+	copied = -ENOTCONN;
+	if (sk->sk_state == TCP_LISTEN)
 		goto out;
-	copied = skb->len;
-	if (copied > size)
-		copied = size;
-	rc = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
-	if (rc)
-		goto dgram_free;
-	if (skb->len > copied) {
-		skb_pull(skb, copied);
-		skb_queue_head(&sk->sk_receive_queue, skb);
-	}
-	if (uaddr)
-		memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr));
-	msg->msg_namelen = sizeof(*uaddr);
-	if (!skb->next) {
-dgram_free:
-		kfree_skb(skb);
-	}
+
+	timeo = sock_rcvtimeo(sk, nonblock);
+
+	seq = &llc->copied_seq;
+	if (flags & MSG_PEEK) {
+		peek_seq = llc->copied_seq;
+		seq = &peek_seq;
+ 	}
+
+	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
+	copied = 0;
+
+	do {
+		u32 offset;
+
+		/*
+		 * We need to check signals first, to get correct SIGURG
+		 * handling. FIXME: Need to check this doesn't impact 1003.1g
+		 * and move it down to the bottom of the loop
+		 */
+		if (signal_pending(current)) {
+			if (copied)
+				break;
+			copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
+			break;
+		}
+
+		/* Next get a buffer. */
+
+		skb = skb_peek(&sk->sk_receive_queue);
+		if (skb) {
+			offset = *seq;
+			goto found_ok_skb;
+		}
+		/* Well, if we have backlog, try to process it now yet. */
+
+		if (copied >= target && !sk->sk_backlog.tail)
+			break;
+
+		if (copied) {
+			if (sk->sk_err ||
+			    sk->sk_state == TCP_CLOSE ||
+			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
+			    !timeo ||
+			    (flags & MSG_PEEK))
+				break;
+		} else {
+			if (sock_flag(sk, SOCK_DONE))
+				break;
+
+			if (sk->sk_err) {
+				copied = sock_error(sk);
+				break;
+			}
+			if (sk->sk_shutdown & RCV_SHUTDOWN)
+				break;
+
+			if (sk->sk_state == TCP_CLOSE) {
+				if (!sock_flag(sk, SOCK_DONE)) {
+					/*
+					 * This occurs when user tries to read
+					 * from never connected socket.
+					 */
+					copied = -ENOTCONN;
+					break;
+				}
+				break;
+			}
+			if (!timeo) {
+				copied = -EAGAIN;
+				break;
+			}
+		}
+
+		if (copied >= target) { /* Do not sleep, just process backlog. */
+			release_sock(sk);
+			lock_sock(sk);
+		} else
+			sk_wait_data(sk, &timeo);
+
+		if ((flags & MSG_PEEK) && peek_seq != llc->copied_seq) {
+			if (net_ratelimit())
+				printk(KERN_DEBUG "LLC(%s:%d): Application "
+						  "bug, race in MSG_PEEK.\n",
+				       current->comm, current->pid);
+			peek_seq = llc->copied_seq;
+		}
+		continue;
+	found_ok_skb:
+		/* Ok so how much can we use? */
+		used = skb->len - offset;
+		if (len < used)
+			used = len;
+
+		if (!(flags & MSG_TRUNC)) {
+			int rc = skb_copy_datagram_iovec(skb, offset,
+							 msg->msg_iov, used);
+			if (rc) {
+				/* Exception. Bailout! */
+				if (!copied)
+					copied = -EFAULT;
+				break;
+			}
+		}
+
+		*seq += used;
+		copied += used;
+		len -= used;
+
+		if (used + offset < skb->len)
+			continue;
+
+		if (!(flags & MSG_PEEK)) {
+			sk_eat_skb(sk, skb);
+			*seq = 0;
+		}
+	} while (len > 0);
+
+	/* 
+	 * According to UNIX98, msg_name/msg_namelen are ignored
+	 * on connected socket. -ANK
+	 * But... af_llc still doesn't have separate sets of methods for
+	 * SOCK_DGRAM and SOCK_STREAM :-( So we have to do this test, will
+	 * eventually fix this tho :-) -acme
+	 */
+	if (sk->sk_type == SOCK_DGRAM)
+		goto copy_uaddr;
 out:
 	release_sock(sk);
-	return rc ? : copied;
+	return copied;
+copy_uaddr:
+	if (uaddr != NULL && skb != NULL) {
+		memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr));
+		msg->msg_namelen = sizeof(*uaddr);
+	}
+	goto out;
 }
 
 /**
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index e10ce5adb104..042b24a8ca4c 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -120,8 +120,8 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
 			sk->sk_socket->state  = SS_UNCONNECTED;
 			sk->sk_state          = TCP_CLOSE;
 			if (!sock_flag(sk, SOCK_DEAD)) {
-				sk->sk_state_change(sk);
 				sock_set_flag(sk, SOCK_DEAD);
+				sk->sk_state_change(sk);
 			}
 		}
 		kfree_skb(skb);
diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c
index 36e8db3fa1a2..bd531cb235a7 100644
--- a/net/llc/llc_proc.c
+++ b/net/llc/llc_proc.c
@@ -134,7 +134,7 @@ static int llc_seq_socket_show(struct seq_file *seq, void *v)
 	llc_ui_format_mac(seq, llc->daddr.mac);
 	seq_printf(seq, "@%02X %8d %8d %2d %3d %4d\n", llc->daddr.lsap,
 		   atomic_read(&sk->sk_wmem_alloc),
-		   atomic_read(&sk->sk_rmem_alloc),
+		   atomic_read(&sk->sk_rmem_alloc) - llc->copied_seq,
 		   sk->sk_state,
 		   sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : -1,
 		   llc->link);
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index e6d538937f93..4029ceee9b91 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -49,9 +49,12 @@ struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev)
 
 void llc_save_primitive(struct sock *sk, struct sk_buff* skb, u8 prim)
 {
-	struct sockaddr_llc *addr = llc_ui_skb_cb(skb);
+	struct sockaddr_llc *addr;
 
+	if (skb->sk->sk_type == SOCK_STREAM) /* See UNIX98 */
+		return;
        /* save primitive for use by the user. */
+	addr		  = llc_ui_skb_cb(skb);
 	addr->sllc_family = sk->sk_family;
 	addr->sllc_arphrd = skb->dev->type;
 	addr->sllc_test   = prim == LLC_TEST_PRIM;
-- 
cgit v1.2.3


From d305ef5d2a4e77bfa66160513f4a7494126a506b Mon Sep 17 00:00:00 2001
From: Daniel Ritz <daniel.ritz@gmx.ch>
Date: Thu, 22 Sep 2005 00:47:24 -0700
Subject: [PATCH] driver core: add helper device_is_registered()

add the helper and use it instead of open coding the klist_node_attached() check
(which is a layering violation IMHO)

idea by Alan Stern.

Signed-off-by: Daniel Ritz <daniel.ritz@gmx.ch>
Cc: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/s390/cio/ccwgroup.c | 2 +-
 drivers/usb/core/message.c  | 2 +-
 drivers/usb/core/usb.c      | 6 +++---
 include/linux/device.h      | 5 +++++
 4 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c
index 91ea8e4777f3..dbb3eb0e330b 100644
--- a/drivers/s390/cio/ccwgroup.c
+++ b/drivers/s390/cio/ccwgroup.c
@@ -437,7 +437,7 @@ __ccwgroup_get_gdev_by_cdev(struct ccw_device *cdev)
 	if (cdev->dev.driver_data) {
 		gdev = (struct ccwgroup_device *)cdev->dev.driver_data;
 		if (get_device(&gdev->dev)) {
-			if (klist_node_attached(&gdev->dev.knode_bus))
+			if (device_is_registered(&gdev->dev))
 				return gdev;
 			put_device(&gdev->dev);
 		}
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index c47c8052b486..f1fb67fe22a8 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -987,7 +987,7 @@ void usb_disable_device(struct usb_device *dev, int skip_ep0)
 
 			/* remove this interface if it has been registered */
 			interface = dev->actconfig->interface[i];
-			if (!klist_node_attached(&interface->dev.knode_bus))
+			if (!device_is_registered(&interface->dev))
 				continue;
 			dev_dbg (&dev->dev, "unregistering interface %s\n",
 				interface->dev.bus_id);
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 087af73a59dd..7d131509e419 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -303,7 +303,7 @@ int usb_driver_claim_interface(struct usb_driver *driver,
 	/* if interface was already added, bind now; else let
 	 * the future device_add() bind it, bypassing probe()
 	 */
-	if (klist_node_attached(&dev->knode_bus))
+	if (device_is_registered(dev))
 		device_bind_driver(dev);
 
 	return 0;
@@ -336,8 +336,8 @@ void usb_driver_release_interface(struct usb_driver *driver,
 	if (iface->condition != USB_INTERFACE_BOUND)
 		return;
 
-	/* release only after device_add() */
-	if (klist_node_attached(&dev->knode_bus)) {
+	/* don't release if the interface hasn't been added yet */
+	if (device_is_registered(dev)) {
 		iface->condition = USB_INTERFACE_UNBINDING;
 		device_release_driver(dev);
 	}
diff --git a/include/linux/device.h b/include/linux/device.h
index 06e5d42f2c7b..95d607a48f06 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -317,6 +317,11 @@ dev_set_drvdata (struct device *dev, void *data)
 	dev->driver_data = data;
 }
 
+static inline int device_is_registered(struct device *dev)
+{
+	return klist_node_attached(&dev->knode_bus);
+}
+
 /*
  * High level routines for use by the bus drivers
  */
-- 
cgit v1.2.3


From 20bb86852a6b7d9ca8c48ff921ff3904038959cf Mon Sep 17 00:00:00 2001
From: Keith Owens <kaos@sgi.com>
Date: Thu, 22 Sep 2005 18:49:15 +1000
Subject: [IA64] Wire in the MCA/INIT handler stacks

Wire the MCA/INIT handler stacks into DTR[2] and track them in
IA64_KR(CURRENT_STACK).  This gives the MCA/INIT handler stacks the
same TLB status as normal kernel stacks.  Reload the old CURRENT_STACK
data on return from OS to SAL.

Signed-off-by: Keith Owens <kaos@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/mca_asm.S | 96 ++++++++++++++++++++++++++++++++++++++++------
 include/asm-ia64/mca.h     |  5 +++
 2 files changed, 90 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index 499a065f4e60..db32fc1d3935 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -489,24 +489,27 @@ ia64_state_save:
 	;;
 	st8 [temp1]=r17,16	// pal_min_state
 	st8 [temp2]=r6,16	// prev_IA64_KR_CURRENT
+	mov r6=IA64_KR(CURRENT_STACK)
+	;;
+	st8 [temp1]=r6,16	// prev_IA64_KR_CURRENT_STACK
+	st8 [temp2]=r0,16	// prev_task, starts off as NULL
 	mov r6=cr.ifa
 	;;
-	st8 [temp1]=r0,16	// prev_task, starts off as NULL
-	st8 [temp2]=r12,16	// cr.isr
+	st8 [temp1]=r12,16	// cr.isr
+	st8 [temp2]=r6,16	// cr.ifa
 	mov r12=cr.itir
 	;;
-	st8 [temp1]=r6,16	// cr.ifa
-	st8 [temp2]=r12,16	// cr.itir
+	st8 [temp1]=r12,16	// cr.itir
+	st8 [temp2]=r11,16	// cr.iipa
 	mov r12=cr.iim
 	;;
-	st8 [temp1]=r11,16	// cr.iipa
-	st8 [temp2]=r12,16	// cr.iim
-	mov r6=cr.iha
+	st8 [temp1]=r12,16	// cr.iim
 (p1)	mov r12=IA64_MCA_COLD_BOOT
 (p2)	mov r12=IA64_INIT_WARM_BOOT
+	mov r6=cr.iha
 	;;
-	st8 [temp1]=r6,16	// cr.iha
-	st8 [temp2]=r12		// os_status, default is cold boot
+	st8 [temp2]=r6,16	// cr.iha
+	st8 [temp1]=r12		// os_status, default is cold boot
 	mov r6=IA64_MCA_SAME_CONTEXT
 	;;
 	st8 [temp1]=r6		// context, default is same context
@@ -823,9 +826,12 @@ ia64_state_restore:
 	ld8 r12=[temp1],16	// sal_ra
 	ld8 r9=[temp2],16	// sal_gp
 	;;
-	ld8 r22=[temp1],24	// pal_min_state, virtual.  skip prev_task
+	ld8 r22=[temp1],16	// pal_min_state, virtual
 	ld8 r21=[temp2],16	// prev_IA64_KR_CURRENT
 	;;
+	ld8 r16=[temp1],16	// prev_IA64_KR_CURRENT_STACK
+	ld8 r20=[temp2],16	// prev_task
+	;;
 	ld8 temp3=[temp1],16	// cr.isr
 	ld8 temp4=[temp2],16	// cr.ifa
 	;;
@@ -846,6 +852,45 @@ ia64_state_restore:
 	ld8 r8=[temp1]		// os_status
 	ld8 r10=[temp2]		// context
 
+	/* Wire IA64_TR_CURRENT_STACK to the stack that we are resuming to.  To
+	 * avoid any dependencies on the algorithm in ia64_switch_to(), just
+	 * purge any existing CURRENT_STACK mapping and insert the new one.
+	 *
+	 * r16 contains prev_IA64_KR_CURRENT_STACK, r21 contains
+	 * prev_IA64_KR_CURRENT, these values may have been changed by the C
+	 * code.  Do not use r8, r9, r10, r22, they contain values ready for
+	 * the return to SAL.
+	 */
+
+	mov r15=IA64_KR(CURRENT_STACK)		// physical granule mapped by IA64_TR_CURRENT_STACK
+	;;
+	shl r15=r15,IA64_GRANULE_SHIFT
+	;;
+	dep r15=-1,r15,61,3			// virtual granule
+	mov r18=IA64_GRANULE_SHIFT<<2		// for cr.itir.ps
+	;;
+	ptr.d r15,r18
+	;;
+	srlz.d
+
+	extr.u r19=r21,61,3			// r21 = prev_IA64_KR_CURRENT
+	shl r20=r16,IA64_GRANULE_SHIFT		// r16 = prev_IA64_KR_CURRENT_STACK
+	movl r21=PAGE_KERNEL			// page properties
+	;;
+	mov IA64_KR(CURRENT_STACK)=r16
+	cmp.ne p6,p0=RGN_KERNEL,r19		// new stack is in the kernel region?
+	or r21=r20,r21				// construct PA | page properties
+(p6)	br.spnt 1f				// the dreaded cpu 0 idle task in region 5:(
+	;;
+	mov cr.itir=r18
+	mov cr.ifa=r21
+	mov r20=IA64_TR_CURRENT_STACK
+	;;
+	itr.d dtr[r20]=r21
+	;;
+	srlz.d
+1:
+
 	br.sptk b0
 
 //EndStub//////////////////////////////////////////////////////////////////////
@@ -982,6 +1027,7 @@ ia64_set_kernel_registers:
 	add temp4=temp4, temp1	// &struct ia64_sal_os_state.os_gp
 	add r12=temp1, temp3	// kernel stack pointer on MCA/INIT stack
 	add r13=temp1, r3	// set current to start of MCA/INIT stack
+	add r20=temp1, r3	// physical start of MCA/INIT stack
 	;;
 	ld8 r1=[temp4]		// OS GP from SAL OS state
 	;;
@@ -991,7 +1037,35 @@ ia64_set_kernel_registers:
 	;;
 	mov IA64_KR(CURRENT)=r13
 
-	// FIXME: do I need to wire IA64_KR_CURRENT_STACK and IA64_TR_CURRENT_STACK?
+	/* Wire IA64_TR_CURRENT_STACK to the MCA/INIT handler stack.  To avoid
+	 * any dependencies on the algorithm in ia64_switch_to(), just purge
+	 * any existing CURRENT_STACK mapping and insert the new one.
+	 */
+
+	mov r16=IA64_KR(CURRENT_STACK)		// physical granule mapped by IA64_TR_CURRENT_STACK
+	;;
+	shl r16=r16,IA64_GRANULE_SHIFT
+	;;
+	dep r16=-1,r16,61,3			// virtual granule
+	mov r18=IA64_GRANULE_SHIFT<<2		// for cr.itir.ps
+	;;
+	ptr.d r16,r18
+	;;
+	srlz.d
+
+	shr.u r16=r20,IA64_GRANULE_SHIFT	// r20 = physical start of MCA/INIT stack
+	movl r21=PAGE_KERNEL			// page properties
+	;;
+	mov IA64_KR(CURRENT_STACK)=r16
+	or r21=r20,r21				// construct PA | page properties
+	;;
+	mov cr.itir=r18
+	mov cr.ifa=r13
+	mov r20=IA64_TR_CURRENT_STACK
+	;;
+	itr.d dtr[r20]=r21
+	;;
+	srlz.d
 
 	br.sptk b0
 
diff --git a/include/asm-ia64/mca.h b/include/asm-ia64/mca.h
index 97a28b8b2ddd..c7d9c9ed38ba 100644
--- a/include/asm-ia64/mca.h
+++ b/include/asm-ia64/mca.h
@@ -80,7 +80,12 @@ struct ia64_sal_os_state {
 	u64			sal_ra;			/* Return address in SAL, physical */
 	u64			sal_gp;			/* GP of the SAL - physical */
 	pal_min_state_area_t	*pal_min_state;		/* from R17.  physical in asm, virtual in C */
+	/* Previous values of IA64_KR(CURRENT) and IA64_KR(CURRENT_STACK).
+	 * Note: if the MCA/INIT recovery code wants to resume to a new context
+	 * then it must change these values to reflect the new kernel stack.
+	 */
 	u64			prev_IA64_KR_CURRENT;	/* previous value of IA64_KR(CURRENT) */
+	u64			prev_IA64_KR_CURRENT_STACK;
 	struct task_struct	*prev_task;		/* previous task, NULL if it is not useful */
 	/* Some interrupt registers are not saved in minstate, pt_regs or
 	 * switch_stack.  Because MCA/INIT can occur when interrupts are
-- 
cgit v1.2.3


From bff06d552240ba7f5b49482a4865871d7bc03dc2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 22 Sep 2005 20:11:33 -0700
Subject: [SPARC64]: Rewrite bootup sequence.

Instead of all of this cpu-specific code to remap the kernel
to the correct location, use portable firmware calls to do
this instead.

What we do now is the following in position independant
assembler:

	chosen_node = prom_finddevice("/chosen");
	prom_mmu_ihandle_cache = prom_getint(chosen_node, "mmu");
	vaddr = 4MB_ALIGN(current_text_addr());
	prom_translate(vaddr, &paddr_high, &paddr_low, &mode);
	prom_boot_mapping_mode = mode;
	prom_boot_mapping_phys_high = paddr_high;
	prom_boot_mapping_phys_low = paddr_low;
	prom_map(-1, 8 * 1024 * 1024, KERNBASE, paddr_low);

and that replaces the massive amount of by-hand TLB probing and
programming we used to do here.

The new code should also handle properly the case where the kernel
is mapped at the correct address already (think: future kexec
support).

Consequently, the bulk of remap_kernel() dies as does the entirety
of arch/sparc64/prom/map.S

We try to share some strings in the PROM library with the ones used
at bootup, and while we're here mark input strings to oplib.h routines
with "const" when appropriate.

There are many more simplifications now possible.  For one thing, we
can consolidate the two copies we now have of a lot of cpu setup code
sitting in head.S and trampoline.S.

This is a significant step towards CONFIG_DEBUG_PAGEALLOC support.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/head.S       | 549 ++++++++++++---------------------------
 arch/sparc64/kernel/setup.c      |  15 +-
 arch/sparc64/kernel/trampoline.S |  16 +-
 arch/sparc64/mm/init.c           |  98 +------
 arch/sparc64/prom/Makefile       |   2 +-
 arch/sparc64/prom/console.c      |   2 +-
 arch/sparc64/prom/devops.c       |   2 +-
 arch/sparc64/prom/init.c         |   2 +-
 arch/sparc64/prom/map.S          |  72 -----
 arch/sparc64/prom/misc.c         |  34 ++-
 arch/sparc64/prom/p1275.c        |   2 +-
 arch/sparc64/prom/printf.c       |   2 +-
 arch/sparc64/prom/tree.c         |  50 ++--
 include/asm-sparc64/oplib.h      |  52 ++--
 14 files changed, 254 insertions(+), 644 deletions(-)
 delete mode 100644 arch/sparc64/prom/map.S

(limited to 'include')

diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
index dc3551f46b76..8b83520e4272 100644
--- a/arch/sparc64/kernel/head.S
+++ b/arch/sparc64/kernel/head.S
@@ -80,15 +80,165 @@ sparc_ramdisk_image64:
 	.xword	0
 	.word	_end
 
-	/* We must be careful, 32-bit OpenBOOT will get confused if it
-	 * tries to save away a register window to a 64-bit kernel
-	 * stack address.  Flush all windows, disable interrupts,
-	 * remap if necessary, jump onto kernel trap table, then kernel
-	 * stack, or else we die.
+	/* PROM cif handler code address is in %o4.  */
+sparc64_boot:
+1:	rd	%pc, %g7
+	set	1b, %g1
+	cmp	%g1, %g7
+	be,pn	%xcc, sparc64_boot_after_remap
+	 mov	%o4, %l7
+
+	/* We need to remap the kernel.  Use position independant
+	 * code to remap us to KERNBASE.
 	 *
-	 * PROM entry point is on %o4
+	 * SILO can invoke us with 32-bit address masking enabled,
+	 * so make sure that's clear.
 	 */
-sparc64_boot:
+	rdpr	%pstate, %g1
+	andn	%g1, PSTATE_AM, %g1
+	wrpr	%g1, 0x0, %pstate
+	ba,a,pt	%xcc, 1f
+
+	.globl	prom_finddev_name, prom_chosen_path
+	.globl	prom_getprop_name, prom_mmu_name
+	.globl	prom_callmethod_name, prom_translate_name
+	.globl	prom_map_name, prom_unmap_name, prom_mmu_ihandle_cache
+	.globl	prom_boot_mapped_pc, prom_boot_mapping_mode
+	.globl	prom_boot_mapping_phys_high, prom_boot_mapping_phys_low
+prom_finddev_name:
+	.asciz	"finddevice"
+prom_chosen_path:
+	.asciz	"/chosen"
+prom_getprop_name:
+	.asciz	"getprop"
+prom_mmu_name:
+	.asciz	"mmu"
+prom_callmethod_name:
+	.asciz	"call-method"
+prom_translate_name:
+	.asciz	"translate"
+prom_map_name:
+	.asciz	"map"
+prom_unmap_name:
+	.asciz	"unmap"
+	.align	4
+prom_mmu_ihandle_cache:
+	.word	0
+prom_boot_mapped_pc:
+	.word	0
+prom_boot_mapping_mode:
+	.word	0
+	.align	8
+prom_boot_mapping_phys_high:
+	.xword	0
+prom_boot_mapping_phys_low:
+	.xword	0
+1:
+	rd	%pc, %l0
+	mov	(1b - prom_finddev_name), %l1
+	mov	(1b - prom_chosen_path), %l2
+	mov	(1b - prom_boot_mapped_pc), %l3
+	sub	%l0, %l1, %l1
+	sub	%l0, %l2, %l2
+	sub	%l0, %l3, %l3
+	stw	%l0, [%l3]
+	sub	%sp, (192 + 128), %sp
+
+	/* chosen_node = prom_finddevice("/chosen") */
+	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "finddevice"
+	mov	1, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 1
+	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
+	stx	%l2, [%sp + 2047 + 128 + 0x18]	! arg1, "/chosen"
+	stx	%g0, [%sp + 2047 + 128 + 0x20]	! ret1
+	call	%l7
+	 add	%sp, (2047 + 128), %o0		! argument array
+
+	ldx	[%sp + 2047 + 128 + 0x20], %l4	! chosen device node
+
+	mov	(1b - prom_getprop_name), %l1
+	mov	(1b - prom_mmu_name), %l2
+	mov	(1b - prom_mmu_ihandle_cache), %l5
+	sub	%l0, %l1, %l1
+	sub	%l0, %l2, %l2
+	sub	%l0, %l5, %l5
+
+	/* prom_mmu_ihandle_cache = prom_getint(chosen_node, "mmu") */
+	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "getprop"
+	mov	4, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 4
+	mov	1, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
+	stx	%l4, [%sp + 2047 + 128 + 0x18]	! arg1, chosen_node
+	stx	%l2, [%sp + 2047 + 128 + 0x20]	! arg2, "mmu"
+	stx	%l5, [%sp + 2047 + 128 + 0x28]	! arg3, &prom_mmu_ihandle_cache
+	mov	4, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x30]	! arg4, sizeof(arg3)
+	stx	%g0, [%sp + 2047 + 128 + 0x38]	! ret1
+	call	%l7
+	 add	%sp, (2047 + 128), %o0		! argument array
+
+	mov	(1b - prom_callmethod_name), %l1
+	mov	(1b - prom_translate_name), %l2
+	sub	%l0, %l1, %l1
+	sub	%l0, %l2, %l2
+	lduw	[%l5], %l5			! prom_mmu_ihandle_cache
+
+	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "call-method"
+	mov	3, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 3
+	mov	5, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 5
+	stx	%l2, [%sp + 2047 + 128 + 0x18]	! arg1: "translate"
+	stx	%l5, [%sp + 2047 + 128 + 0x20]	! arg2: prom_mmu_ihandle_cache
+	srlx	%l0, 22, %l3
+	sllx	%l3, 22, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x28]	! arg3: vaddr, our PC
+	stx	%g0, [%sp + 2047 + 128 + 0x30]	! res1
+	stx	%g0, [%sp + 2047 + 128 + 0x38]	! res2
+	stx	%g0, [%sp + 2047 + 128 + 0x40]	! res3
+	stx	%g0, [%sp + 2047 + 128 + 0x48]	! res4
+	stx	%g0, [%sp + 2047 + 128 + 0x50]	! res5
+	call	%l7
+	 add	%sp, (2047 + 128), %o0		! argument array
+
+	ldx	[%sp + 2047 + 128 + 0x40], %l1	! translation mode
+	mov	(1b - prom_boot_mapping_mode), %l4
+	sub	%l0, %l4, %l4
+	stw	%l1, [%l4]
+	mov	(1b - prom_boot_mapping_phys_high), %l4
+	sub	%l0, %l4, %l4
+	ldx	[%sp + 2047 + 128 + 0x48], %l2	! physaddr high
+	stx	%l2, [%l4 + 0x0]
+	ldx	[%sp + 2047 + 128 + 0x50], %l3	! physaddr low
+	stx	%l3, [%l4 + 0x8]
+
+	/* Leave service as-is, "call-method" */
+	mov	7, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 7
+	mov	1, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 7
+	mov	(1b - prom_map_name), %l3
+	sub	%l0, %l3, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x18]	! arg1: "map"
+	/* Leave arg2 as-is, prom_mmu_ihandle_cache */
+	mov	-1, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x28]	! arg3: mode (-1 default)
+	sethi	%hi(8 * 1024 * 1024), %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x30]	! arg4: size (8MB)
+	sethi	%hi(KERNBASE), %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x38]	! arg5: vaddr (KERNBASE)
+	stx	%g0, [%sp + 2047 + 128 + 0x40]	! arg6: empty
+	mov	(1b - prom_boot_mapping_phys_low), %l3
+	sub	%l0, %l3, %l3
+	ldx	[%l3], %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x48]	! arg7: phys addr
+	call	%l7
+	 add	%sp, (2047 + 128), %o0		! argument array
+
+	add	%sp, (192 + 128), %sp
+
+sparc64_boot_after_remap:
 	BRANCH_IF_CHEETAH_BASE(g1,g7,cheetah_boot)
 	BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,cheetah_plus_boot)
 	ba,pt	%xcc, spitfire_boot
@@ -125,185 +275,7 @@ cheetah_generic_boot:
 	stxa	%g0, [%g3] ASI_IMMU
 	membar	#Sync
 
-	wrpr    %g0, (PSTATE_PRIV|PSTATE_PEF|PSTATE_IE), %pstate
-	wr	%g0, 0, %fprs
-
-	/* Just like for Spitfire, we probe itlb-2 for a mapping which
-	 * matches our current %pc.  We take the physical address in
-	 * that mapping and use it to make our own.
-	 */
-
-	/* %g5 holds the tlb data */
-        sethi   %uhi(_PAGE_VALID | _PAGE_SZ4MB), %g5
-        sllx    %g5, 32, %g5
-        or      %g5, (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W | _PAGE_G), %g5
-
-	/* Put PADDR tlb data mask into %g3. */
-	sethi	%uhi(_PAGE_PADDR), %g3
-	or	%g3, %ulo(_PAGE_PADDR), %g3
-	sllx	%g3, 32, %g3
-	sethi	%hi(_PAGE_PADDR), %g7
-	or	%g7, %lo(_PAGE_PADDR), %g7
-	or	%g3, %g7, %g3
-
-	set	2 << 16, %l0		/* TLB entry walker. */
-	set	0x1fff, %l2		/* Page mask. */
-	rd	%pc, %l3
-	andn	%l3, %l2, %g2		/* vaddr comparator */
-
-1:	ldxa	[%l0] ASI_ITLB_TAG_READ, %g1
-	membar	#Sync
-	andn	%g1, %l2, %g1
-	cmp	%g1, %g2
-	be,pn	%xcc, cheetah_got_tlbentry
-	 nop
-	and	%l0, (127 << 3), %g1
-	cmp	%g1, (127 << 3)
-	blu,pt	%xcc, 1b
-	 add	%l0, (1 << 3), %l0
-
-	/* Search the small TLB.  OBP never maps us like that but
-	 * newer SILO can.
-	 */
-	clr	%l0
-
-1:	ldxa	[%l0] ASI_ITLB_TAG_READ, %g1
-	membar	#Sync
-	andn	%g1, %l2, %g1
-	cmp	%g1, %g2
-	be,pn	%xcc, cheetah_got_tlbentry
-	 nop
-	cmp	%l0, (15 << 3)
-	blu,pt	%xcc, 1b
-	 add	%l0, (1 << 3), %l0
-
-	/* BUG() if we get here... */
-	ta	0x5
-
-cheetah_got_tlbentry:
-	ldxa	[%l0] ASI_ITLB_DATA_ACCESS, %g0
-	ldxa	[%l0] ASI_ITLB_DATA_ACCESS, %g1
-	membar	#Sync
-	and	%g1, %g3, %g1
-	set	0x5fff, %l0
-	andn	%g1, %l0, %g1
-	or	%g5, %g1, %g5
-
-	/* Clear out any KERNBASE area entries. */
-	set	2 << 16, %l0
-	sethi	%hi(KERNBASE), %g3
-	sethi	%hi(KERNBASE<<1), %g7
-	mov	TLB_TAG_ACCESS, %l7
-
-	/* First, check ITLB */
-1:	ldxa	[%l0] ASI_ITLB_TAG_READ, %g1
-	membar	#Sync
-	andn	%g1, %l2, %g1
-	cmp	%g1, %g3
-	blu,pn	%xcc, 2f
-	 cmp	%g1, %g7
-	bgeu,pn	%xcc, 2f
-	 nop
-	stxa	%g0, [%l7] ASI_IMMU
-	membar	#Sync
-	stxa	%g0, [%l0] ASI_ITLB_DATA_ACCESS
-	membar	#Sync
-
-2:	and	%l0, (127 << 3), %g1
-	cmp	%g1, (127 << 3)
-	blu,pt	%xcc, 1b
-	 add	%l0, (1 << 3), %l0
-
-	/* Next, check DTLB */
-	set	2 << 16, %l0
-1:	ldxa	[%l0] ASI_DTLB_TAG_READ, %g1
-	membar	#Sync
-	andn	%g1, %l2, %g1
-	cmp	%g1, %g3
-	blu,pn	%xcc, 2f
-	 cmp	%g1, %g7
-	bgeu,pn	%xcc, 2f
-	 nop
-	stxa	%g0, [%l7] ASI_DMMU
-	membar	#Sync
-	stxa	%g0, [%l0] ASI_DTLB_DATA_ACCESS
-	membar	#Sync
-	
-2:	and	%l0, (511 << 3), %g1
-	cmp	%g1, (511 << 3)
-	blu,pt	%xcc, 1b
-	 add	%l0, (1 << 3), %l0
-
-	/* On Cheetah+, have to check second DTLB.  */
-	BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,l0,2f)
-	ba,pt	%xcc, 9f
-	 nop
-
-2:	set	3 << 16, %l0
-1:	ldxa	[%l0] ASI_DTLB_TAG_READ, %g1
-	membar	#Sync
-	andn	%g1, %l2, %g1
-	cmp	%g1, %g3
-	blu,pn	%xcc, 2f
-	 cmp	%g1, %g7
-	bgeu,pn	%xcc, 2f
-	 nop
-	stxa	%g0, [%l7] ASI_DMMU
-	membar	#Sync
-	stxa	%g0, [%l0] ASI_DTLB_DATA_ACCESS
-	membar	#Sync
-	
-2:	and	%l0, (511 << 3), %g1
-	cmp	%g1, (511 << 3)
-	blu,pt	%xcc, 1b
-	 add	%l0, (1 << 3), %l0
-
-9:
-
-	/* Now lock the TTE we created into ITLB-0 and DTLB-0,
-	 * entry 15 (and maybe 14 too).
-	 */
-	sethi	%hi(KERNBASE), %g3
-	set	(0 << 16) | (15 << 3), %g7
-	stxa	%g3, [%l7] ASI_DMMU
-	membar	#Sync
-	stxa	%g5, [%g7] ASI_DTLB_DATA_ACCESS
-	membar	#Sync
-	stxa	%g3, [%l7] ASI_IMMU
-	membar	#Sync
-	stxa	%g5, [%g7] ASI_ITLB_DATA_ACCESS
-	membar	#Sync
-	flush	%g3
-	membar	#Sync
-	sethi	%hi(_end), %g3			/* Check for bigkernel case */
-	or	%g3, %lo(_end), %g3
-	srl	%g3, 23, %g3			/* Check if _end > 8M */
-	brz,pt	%g3, 1f
-	 sethi	%hi(KERNBASE), %g3		/* Restore for fixup code below */
-	sethi	%hi(0x400000), %g3
-	or	%g3, %lo(0x400000), %g3
-	add	%g5, %g3, %g5			/* New tte data */
-	andn	%g5, (_PAGE_G), %g5
-	sethi	%hi(KERNBASE+0x400000), %g3
-	or	%g3, %lo(KERNBASE+0x400000), %g3
-	set	(0 << 16) | (14 << 3), %g7
-	stxa	%g3, [%l7] ASI_DMMU
-	membar	#Sync
-	stxa	%g5, [%g7] ASI_DTLB_DATA_ACCESS
-	membar	#Sync
-	stxa	%g3, [%l7] ASI_IMMU
-	membar	#Sync
-	stxa	%g5, [%g7] ASI_ITLB_DATA_ACCESS
-	membar	#Sync
-	flush	%g3
-	membar	#Sync
-	sethi	%hi(KERNBASE), %g3		/* Restore for fixup code below */
-	ba,pt	%xcc, 1f
-	 nop
-
-1:	set	sun4u_init, %g2
-	jmpl    %g2 + %g0, %g0
-	 nop
+	ba,a,pt	%xcc, jump_to_sun4u_init
 
 spitfire_boot:
 	/* Typically PROM has already enabled both MMU's and both on-chip
@@ -313,6 +285,7 @@ spitfire_boot:
 	stxa	%g1, [%g0] ASI_LSU_CONTROL
 	membar	#Sync
 
+jump_to_sun4u_init:
 	/*
 	 * Make sure we are in privileged mode, have address masking,
          * using the ordinary globals and have enabled floating
@@ -324,151 +297,6 @@ spitfire_boot:
 	wrpr    %g0, (PSTATE_PRIV|PSTATE_PEF|PSTATE_IE), %pstate
 	wr	%g0, 0, %fprs
 
-spitfire_create_mappings:
-	/* %g5 holds the tlb data */
-        sethi   %uhi(_PAGE_VALID | _PAGE_SZ4MB), %g5
-        sllx    %g5, 32, %g5
-        or      %g5, (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W | _PAGE_G), %g5
-
-	/* Base of physical memory cannot reliably be assumed to be
-	 * at 0x0!  Figure out where it happens to be. -DaveM
-	 */
-
-	/* Put PADDR tlb data mask into %g3. */
-	sethi	%uhi(_PAGE_PADDR_SF), %g3
-	or	%g3, %ulo(_PAGE_PADDR_SF), %g3
-	sllx	%g3, 32, %g3
-	sethi	%hi(_PAGE_PADDR_SF), %g7
-	or	%g7, %lo(_PAGE_PADDR_SF), %g7
-	or	%g3, %g7, %g3
-
-	/* Walk through entire ITLB, looking for entry which maps
-	 * our %pc currently, stick PADDR from there into %g5 tlb data.
-	 */
-	clr	%l0			/* TLB entry walker. */
-	set	0x1fff, %l2		/* Page mask. */
-	rd	%pc, %l3
-	andn	%l3, %l2, %g2		/* vaddr comparator */
-1:
-	/* Yes, the nops seem to be necessary for now, don't ask me why. -DaveM */
-	ldxa	[%l0] ASI_ITLB_TAG_READ, %g1
-	nop
-	nop
-	nop
-	andn	%g1, %l2, %g1		/* Get vaddr */
-	cmp	%g1, %g2
-	be,a,pn	%xcc, spitfire_got_tlbentry
-	 ldxa	[%l0] ASI_ITLB_DATA_ACCESS, %g1
-	cmp	%l0, (63 << 3)
-	blu,pt	%xcc, 1b
-	 add	%l0, (1 << 3), %l0
-
-	/* BUG() if we get here... */
-	ta	0x5
-
-spitfire_got_tlbentry:
-	/* Nops here again, perhaps Cheetah/Blackbird are better behaved... */
-	nop
-	nop
-	nop
-	and	%g1, %g3, %g1		/* Mask to just get paddr bits.       */
-	set	0x5fff, %l3		/* Mask offset to get phys base.      */
-	andn	%g1, %l3, %g1
-
-	/* NOTE: We hold on to %g1 paddr base as we need it below to lock
-	 * NOTE: the PROM cif code into the TLB.
-	 */
-
-	or	%g5, %g1, %g5		/* Or it into TAG being built.        */
-
-	clr	%l0			/* TLB entry walker. */
-	sethi	%hi(KERNBASE), %g3	/* 4M lower limit */
-	sethi	%hi(KERNBASE<<1), %g7	/* 8M upper limit */
-	mov	TLB_TAG_ACCESS, %l7
-1:
-	/* Yes, the nops seem to be necessary for now, don't ask me why. -DaveM */
-	ldxa	[%l0] ASI_ITLB_TAG_READ, %g1
-	nop
-	nop
-	nop
-	andn	%g1, %l2, %g1		/* Get vaddr */
-	cmp	%g1, %g3
-	blu,pn	%xcc, 2f
-	 cmp	%g1, %g7
-	bgeu,pn	%xcc, 2f
-	 nop
-	stxa	%g0, [%l7] ASI_IMMU
-	stxa	%g0, [%l0] ASI_ITLB_DATA_ACCESS
-	membar	#Sync
-2:
-	cmp	%l0, (63 << 3)
-	blu,pt	%xcc, 1b
-	 add	%l0, (1 << 3), %l0
-
-	nop; nop; nop
-
-	clr	%l0			/* TLB entry walker. */
-1:
-	/* Yes, the nops seem to be necessary for now, don't ask me why. -DaveM */
-	ldxa	[%l0] ASI_DTLB_TAG_READ, %g1
-	nop
-	nop
-	nop
-	andn	%g1, %l2, %g1		/* Get vaddr */
-	cmp	%g1, %g3
-	blu,pn	%xcc, 2f
-	 cmp	%g1, %g7
-	bgeu,pn	%xcc, 2f
-	 nop
-	stxa	%g0, [%l7] ASI_DMMU
-	stxa	%g0, [%l0] ASI_DTLB_DATA_ACCESS
-	membar	#Sync
-2:
-	cmp	%l0, (63 << 3)
-	blu,pt	%xcc, 1b
-	 add	%l0, (1 << 3), %l0
-
-	nop; nop; nop
-
-
-	/* PROM never puts any TLB entries into the MMU with the lock bit
-	 * set.  So we gladly use tlb entry 63 for KERNBASE. And maybe 62 too.
-	 */
-
-	sethi	%hi(KERNBASE), %g3
-	mov	(63 << 3), %g7
-	stxa	%g3, [%l7] ASI_DMMU		/* KERNBASE into TLB TAG	*/
-	stxa	%g5, [%g7] ASI_DTLB_DATA_ACCESS	/* TTE into TLB DATA		*/
-	membar	#Sync
-	stxa	%g3, [%l7] ASI_IMMU		/* KERNBASE into TLB TAG	*/
-	stxa	%g5, [%g7] ASI_ITLB_DATA_ACCESS	/* TTE into TLB DATA		*/
-	membar	#Sync
-	flush	%g3
-	membar	#Sync
-	sethi	%hi(_end), %g3			/* Check for bigkernel case */
-	or	%g3, %lo(_end), %g3
-	srl	%g3, 23, %g3			/* Check if _end > 8M */
-	brz,pt	%g3, 2f
-	 sethi	%hi(KERNBASE), %g3		/* Restore for fixup code below */
-	sethi	%hi(0x400000), %g3
-	or	%g3, %lo(0x400000), %g3
-	add	%g5, %g3, %g5			/* New tte data */
-	andn	%g5, (_PAGE_G), %g5
-	sethi	%hi(KERNBASE+0x400000), %g3
-	or	%g3, %lo(KERNBASE+0x400000), %g3
-	mov	(62 << 3), %g7
-	stxa	%g3, [%l7] ASI_DMMU
-	stxa	%g5, [%g7] ASI_DTLB_DATA_ACCESS
-	membar	#Sync
-	stxa	%g3, [%l7] ASI_IMMU
-	stxa	%g5, [%g7] ASI_ITLB_DATA_ACCESS
-	membar	#Sync
-	flush	%g3
-	membar	#Sync
-	sethi	%hi(KERNBASE), %g3		/* Restore for fixup code below */
-2:	ba,pt	%xcc, 1f
-	 nop
-1:
 	set	sun4u_init, %g2
 	jmpl    %g2 + %g0, %g0
 	 nop
@@ -483,38 +311,12 @@ sun4u_init:
 	stxa	%g0, [%g7] ASI_DMMU
 	membar	#Sync
 
-	/* We are now safely (we hope) in Nucleus context (0), rewrite
-	 * the KERNBASE TTE's so they no longer have the global bit set.
-	 * Don't forget to setup TAG_ACCESS first 8-)
-	 */
-	mov	TLB_TAG_ACCESS, %g2
-	stxa	%g3, [%g2] ASI_IMMU
-	stxa	%g3, [%g2] ASI_DMMU
-	membar	#Sync
-
 	BRANCH_IF_ANY_CHEETAH(g1,g7,cheetah_tlb_fixup)
 
 	ba,pt	%xcc, spitfire_tlb_fixup
 	 nop
 
 cheetah_tlb_fixup:
-	set	(0 << 16) | (15 << 3), %g7
-	ldxa	[%g7] ASI_ITLB_DATA_ACCESS, %g0
-	ldxa	[%g7] ASI_ITLB_DATA_ACCESS, %g1
-	andn	%g1, (_PAGE_G), %g1
-	stxa	%g1, [%g7] ASI_ITLB_DATA_ACCESS
-	membar	#Sync
-
-	ldxa	[%g7] ASI_DTLB_DATA_ACCESS, %g0
-	ldxa	[%g7] ASI_DTLB_DATA_ACCESS, %g1
-	andn	%g1, (_PAGE_G), %g1
-	stxa	%g1, [%g7] ASI_DTLB_DATA_ACCESS
-	membar	#Sync
-
-	/* Kill instruction prefetch queues. */
-	flush	%g3
-	membar	#Sync
-
 	mov	2, %g2		/* Set TLB type to cheetah+. */
 	BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,1f)
 
@@ -551,21 +353,6 @@ cheetah_tlb_fixup:
 	 nop
 
 spitfire_tlb_fixup:
-	mov	(63 << 3), %g7
-	ldxa	[%g7] ASI_ITLB_DATA_ACCESS, %g1
-	andn	%g1, (_PAGE_G), %g1
-	stxa	%g1, [%g7] ASI_ITLB_DATA_ACCESS
-	membar	#Sync
-
-	ldxa	[%g7] ASI_DTLB_DATA_ACCESS, %g1
-	andn	%g1, (_PAGE_G), %g1
-	stxa	%g1, [%g7] ASI_DTLB_DATA_ACCESS
-	membar	#Sync
-
-	/* Kill instruction prefetch queues. */
-	flush	%g3
-	membar	#Sync
-
 	/* Set TLB type to spitfire. */
 	mov	0, %g2
 	sethi	%hi(tlb_type), %g1
@@ -578,24 +365,6 @@ tlb_fixup_done:
 	mov	%sp, %l6
 	mov	%o4, %l7
 
-#if 0	/* We don't do it like this anymore, but for historical hack value
-	 * I leave this snippet here to show how crazy we can be sometimes. 8-)
-	 */
-
-	/* Setup "Linux Current Register", thanks Sun 8-) */
-	wr	%g0, 0x1, %pcr
-
-	/* Blackbird errata workaround.  See commentary in
-	 * smp.c:smp_percpu_timer_interrupt() for more
-	 * information.
-	 */
-	ba,pt	%xcc, 99f
-	 nop
-	.align	64
-99:	wr	%g6, %g0, %pic
-	rd	%pic, %g0
-#endif
-
 	wr	%g0, ASI_P, %asi
 	mov	1, %g1
 	sllx	%g1, THREAD_SHIFT, %g1
diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
index ddbed3341a23..0296cb00fc99 100644
--- a/arch/sparc64/kernel/setup.c
+++ b/arch/sparc64/kernel/setup.c
@@ -536,20 +536,7 @@ void __init setup_arch(char **cmdline_p)
 	}
 	pfn_base = phys_base >> PAGE_SHIFT;
 
-	switch (tlb_type) {
-	default:
-	case spitfire:
-		kern_base = spitfire_get_itlb_data(sparc64_highest_locked_tlbent());
-		kern_base &= _PAGE_PADDR_SF;
-		break;
-
-	case cheetah:
-	case cheetah_plus:
-		kern_base = cheetah_get_litlb_data(sparc64_highest_locked_tlbent());
-		kern_base &= _PAGE_PADDR;
-		break;
-	};
-
+	kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
 	kern_size = (unsigned long)&_end - (unsigned long)KERNBASE;
 
 	if (!root_flags)
diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S
index 3a145fc39cf2..89f2fcfcd662 100644
--- a/arch/sparc64/kernel/trampoline.S
+++ b/arch/sparc64/kernel/trampoline.S
@@ -119,8 +119,8 @@ startup_continue:
 	sethi		%hi(itlb_load), %g2
 	or		%g2, %lo(itlb_load), %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x18]
-	sethi		%hi(mmu_ihandle_cache), %g2
-	lduw		[%g2 + %lo(mmu_ihandle_cache)], %g2
+	sethi		%hi(prom_mmu_ihandle_cache), %g2
+	lduw		[%g2 + %lo(prom_mmu_ihandle_cache)], %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x20]
 	sethi		%hi(KERNBASE), %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x28]
@@ -156,8 +156,8 @@ startup_continue:
 	sethi		%hi(itlb_load), %g2
 	or		%g2, %lo(itlb_load), %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x18]
-	sethi		%hi(mmu_ihandle_cache), %g2
-	lduw		[%g2 + %lo(mmu_ihandle_cache)], %g2
+	sethi		%hi(prom_mmu_ihandle_cache), %g2
+	lduw		[%g2 + %lo(prom_mmu_ihandle_cache)], %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x20]
 	sethi		%hi(KERNBASE + 0x400000), %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x28]
@@ -190,8 +190,8 @@ do_dtlb:
 	sethi		%hi(dtlb_load), %g2
 	or		%g2, %lo(dtlb_load), %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x18]
-	sethi		%hi(mmu_ihandle_cache), %g2
-	lduw		[%g2 + %lo(mmu_ihandle_cache)], %g2
+	sethi		%hi(prom_mmu_ihandle_cache), %g2
+	lduw		[%g2 + %lo(prom_mmu_ihandle_cache)], %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x20]
 	sethi		%hi(KERNBASE), %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x28]
@@ -228,8 +228,8 @@ do_dtlb:
 	sethi		%hi(dtlb_load), %g2
 	or		%g2, %lo(dtlb_load), %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x18]
-	sethi		%hi(mmu_ihandle_cache), %g2
-	lduw		[%g2 + %lo(mmu_ihandle_cache)], %g2
+	sethi		%hi(prom_mmu_ihandle_cache), %g2
+	lduw		[%g2 + %lo(prom_mmu_ihandle_cache)], %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x20]
 	sethi		%hi(KERNBASE + 0x400000), %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x28]
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index ec47de494c1f..e0b9eebf21ce 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -505,108 +505,20 @@ static int read_obp_translations(void)
 	return n;
 }
 
-static inline void early_spitfire_errata32(void)
-{
-	/* Spitfire Errata #32 workaround */
-	/* NOTE: Using plain zero for the context value is
-	 *       correct here, we are not using the Linux trap
-	 *       tables yet so we should not use the special
-	 *       UltraSPARC-III+ page size encodings yet.
-	 */
-	__asm__ __volatile__("stxa	%0, [%1] %2\n\t"
-			     "flush	%%g6"
-			     : /* No outputs */
-			     : "r" (0), "r" (PRIMARY_CONTEXT),
-			       "i" (ASI_DMMU));
-}
-
-static void lock_remap_func_page(unsigned long phys_page)
-{
-	unsigned long tte_data = (phys_page | pgprot_val(PAGE_KERNEL));
-
-	if (tlb_type == spitfire) {
-		/* Lock this into i/d tlb entry 59 */
-		__asm__ __volatile__(
-			"stxa	%%g0, [%2] %3\n\t"
-			"stxa	%0, [%1] %4\n\t"
-			"membar	#Sync\n\t"
-			"flush	%%g6\n\t"
-			"stxa	%%g0, [%2] %5\n\t"
-			"stxa	%0, [%1] %6\n\t"
-			"membar	#Sync\n\t"
-			"flush	%%g6"
-		: /* no outputs */
-		: "r" (tte_data), "r" (59 << 3), "r" (TLB_TAG_ACCESS),
-		  "i" (ASI_DMMU), "i" (ASI_DTLB_DATA_ACCESS),
-		  "i" (ASI_IMMU), "i" (ASI_ITLB_DATA_ACCESS)
-		: "memory");
-	} else {
-		/* Lock this into i/d tlb-0 entry 11 */
-		__asm__ __volatile__(
-			"stxa	%%g0, [%2] %3\n\t"
-			"stxa	%0, [%1] %4\n\t"
-			"membar	#Sync\n\t"
-			"flush	%%g6\n\t"
-			"stxa	%%g0, [%2] %5\n\t"
-			"stxa	%0, [%1] %6\n\t"
-			"membar	#Sync\n\t"
-			"flush	%%g6"
-			: /* no outputs */
-			: "r" (tte_data), "r" ((0 << 16) | (11 << 3)),
-			  "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU),
-			  "i" (ASI_DTLB_DATA_ACCESS), "i" (ASI_IMMU),
-			  "i" (ASI_ITLB_DATA_ACCESS)
-			: "memory");
-	}
-}
-
 static void remap_kernel(void)
 {
 	unsigned long phys_page, tte_vaddr, tte_data;
-	void (*remap_func)(unsigned long, unsigned long, int);
 	int tlb_ent = sparc64_highest_locked_tlbent();
 
-	early_spitfire_errata32();
-
-	if (tlb_type == spitfire)
-		phys_page = spitfire_get_dtlb_data(tlb_ent);
-	else
-		phys_page = cheetah_get_ldtlb_data(tlb_ent);
-
-	phys_page &= _PAGE_PADDR;
-	phys_page += ((unsigned long)&prom_boot_page -
-		      (unsigned long)KERNBASE);
-
-	lock_remap_func_page(phys_page);
-
 	tte_vaddr = (unsigned long) KERNBASE;
-
-	early_spitfire_errata32();
-
-	if (tlb_type == spitfire)
-		tte_data = spitfire_get_dtlb_data(tlb_ent);
-	else
-		tte_data = cheetah_get_ldtlb_data(tlb_ent);
+	phys_page = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
+	tte_data = (phys_page | (_PAGE_VALID | _PAGE_SZ4MB |
+				 _PAGE_CP | _PAGE_CV | _PAGE_P |
+				 _PAGE_L | _PAGE_W));
 
 	kern_locked_tte_data = tte_data;
 
-	remap_func = (void *)  ((unsigned long) &prom_remap -
-				(unsigned long) &prom_boot_page);
-
-	early_spitfire_errata32();
-
-	phys_page = tte_data & _PAGE_PADDR;
-	remap_func(phys_page, KERNBASE, prom_get_mmu_ihandle());
-	if (bigkernel)
-		remap_func(phys_page + 0x400000,
-			   KERNBASE + 0x400000,
-			   prom_get_mmu_ihandle());
-
-	/* Flush out that temporary mapping. */
-	spitfire_flush_dtlb_nucleus_page(0x0);
-	spitfire_flush_itlb_nucleus_page(0x0);
-
-	/* Now lock us back into the TLBs via OBP. */
+	/* Now lock us into the TLBs via OBP. */
 	prom_dtlb_load(tlb_ent, tte_data, tte_vaddr);
 	prom_itlb_load(tlb_ent, tte_data, tte_vaddr);
 	if (bigkernel) {
diff --git a/arch/sparc64/prom/Makefile b/arch/sparc64/prom/Makefile
index 8f2420d9e9e6..c7898a5ee456 100644
--- a/arch/sparc64/prom/Makefile
+++ b/arch/sparc64/prom/Makefile
@@ -7,4 +7,4 @@ EXTRA_AFLAGS := -ansi
 EXTRA_CFLAGS := -Werror
 
 lib-y   := bootstr.o devops.o init.o memory.o misc.o \
-	   tree.o console.o printf.o p1275.o map.o cif.o
+	   tree.o console.o printf.o p1275.o cif.o
diff --git a/arch/sparc64/prom/console.c b/arch/sparc64/prom/console.c
index 028a53fcb1ec..eae5db8dda56 100644
--- a/arch/sparc64/prom/console.c
+++ b/arch/sparc64/prom/console.c
@@ -67,7 +67,7 @@ prom_putchar(char c)
 }
 
 void
-prom_puts(char *s, int len)
+prom_puts(const char *s, int len)
 {
 	p1275_cmd("write", P1275_ARG(1,P1275_ARG_IN_BUF)|
 			   P1275_INOUT(3,1),
diff --git a/arch/sparc64/prom/devops.c b/arch/sparc64/prom/devops.c
index 2c99b21b6981..4641839eb39a 100644
--- a/arch/sparc64/prom/devops.c
+++ b/arch/sparc64/prom/devops.c
@@ -16,7 +16,7 @@
  * Returns 0 on failure.
  */
 int
-prom_devopen(char *dstr)
+prom_devopen(const char *dstr)
 {
 	return p1275_cmd ("open", P1275_ARG(0,P1275_ARG_IN_STRING)|
 				  P1275_INOUT(1,1),
diff --git a/arch/sparc64/prom/init.c b/arch/sparc64/prom/init.c
index 817faae058cd..8b4b622d0909 100644
--- a/arch/sparc64/prom/init.c
+++ b/arch/sparc64/prom/init.c
@@ -46,7 +46,7 @@ void __init prom_init(void *cif_handler, void *cif_stack)
 	if((prom_root_node == 0) || (prom_root_node == -1))
 		prom_halt();
 
-	prom_chosen_node = prom_finddevice("/chosen");
+	prom_chosen_node = prom_finddevice(prom_chosen_path);
 	if (!prom_chosen_node || prom_chosen_node == -1)
 		prom_halt();
 
diff --git a/arch/sparc64/prom/map.S b/arch/sparc64/prom/map.S
deleted file mode 100644
index 21b3f9c99ea7..000000000000
--- a/arch/sparc64/prom/map.S
+++ /dev/null
@@ -1,72 +0,0 @@
-/* $Id: map.S,v 1.2 1999/11/19 05:53:02 davem Exp $
- * map.S: Tricky coding required to fixup the kernel OBP maps
- *	  properly.
- *
- * Copyright (C) 1999 David S. Miller (davem@redhat.com)
- */
-
-	.text
-	.align	8192
-	.globl	prom_boot_page
-prom_boot_page:
-call_method:
-	.asciz	"call-method"
-	.align	8
-map:
-	.asciz	"map"
-	.align	8
-
-	/* When we are invoked, our caller has remapped us to
-	 * page zero, therefore we must use PC relative addressing
-	 * for everything after we begin performing the unmap/map
-	 * calls.
-	 */
-	.globl	prom_remap
-prom_remap:	/* %o0 = physpage, %o1 = virtpage, %o2 = mmu_ihandle */
-	rd	%pc, %g1
-	srl	%o2, 0, %o2			! kill sign extension
-	sethi	%hi(p1275buf), %g2
-	or	%g2, %lo(p1275buf), %g2
-	ldx	[%g2 + 0x10], %g3		! prom_cif_stack
-	save	%g3, -(192 + 128), %sp
-	ldx	[%g2 + 0x08], %l0		! prom_cif_handler
-	mov	%g6, %i3
-	mov	%g4, %i4
-	mov	%g5, %i5
-	flushw
-
-	sethi	%hi(prom_remap - call_method), %g7
-	or	%g7, %lo(prom_remap - call_method), %g7
-	sub	%g1, %g7, %l2			! call-method string
-	sethi	%hi(prom_remap - map), %g7
-	or	%g7, %lo(prom_remap - map), %g7
-	sub	%g1, %g7, %l4			! map string
-
-	/* OK, map the 4MB region we really live at. */
-	stx	%l2, [%sp + 2047 + 128 + 0x00]	! call-method
-	mov	7, %l5
-	stx	%l5, [%sp + 2047 + 128 + 0x08]	! num_args
-	mov	1, %l5
-	stx	%l5, [%sp + 2047 + 128 + 0x10]	! num_rets
-	stx	%l4, [%sp + 2047 + 128 + 0x18]	! map
-	stx	%i2, [%sp + 2047 + 128 + 0x20]	! mmu_ihandle
-	mov	-1, %l5
-	stx	%l5, [%sp + 2047 + 128 + 0x28]	! mode == default
-	sethi	%hi(4 * 1024 * 1024), %l5
-	stx	%l5, [%sp + 2047 + 128 + 0x30]	! size
-	stx	%i1, [%sp + 2047 + 128 + 0x38]	! vaddr
-	stx	%g0, [%sp + 2047 + 128 + 0x40]	! filler
-	stx	%i0, [%sp + 2047 + 128 + 0x48]	! paddr
-	call	%l0
-	 add	%sp, (2047 + 128), %o0		! argument array
-
-	/* Restore hard-coded globals. */
-	mov	%i3, %g6
-	mov	%i4, %g4
-	mov	%i5, %g5
-
-	/* Wheee.... we are done. */
-	ret
-	restore
-
-	.align	8192
diff --git a/arch/sparc64/prom/misc.c b/arch/sparc64/prom/misc.c
index 19c44e97e9ee..9b895faf077b 100644
--- a/arch/sparc64/prom/misc.c
+++ b/arch/sparc64/prom/misc.c
@@ -17,14 +17,14 @@
 #include <asm/system.h>
 
 /* Reset and reboot the machine with the command 'bcommand'. */
-void prom_reboot(char *bcommand)
+void prom_reboot(const char *bcommand)
 {
 	p1275_cmd("boot", P1275_ARG(0, P1275_ARG_IN_STRING) |
 		  P1275_INOUT(1, 0), bcommand);
 }
 
 /* Forth evaluate the expression contained in 'fstring'. */
-void prom_feval(char *fstring)
+void prom_feval(const char *fstring)
 {
 	if (!fstring || fstring[0] == 0)
 		return;
@@ -148,21 +148,19 @@ void prom_set_trap_table(unsigned long tba)
 	p1275_cmd("SUNW,set-trap-table", P1275_INOUT(1, 0), tba);
 }
 
-int mmu_ihandle_cache = 0;
-
 int prom_get_mmu_ihandle(void)
 {
 	int node, ret;
 
-	if (mmu_ihandle_cache != 0)
-		return mmu_ihandle_cache;
+	if (prom_mmu_ihandle_cache != 0)
+		return prom_mmu_ihandle_cache;
 
-	node = prom_finddevice("/chosen");
-	ret = prom_getint(node, "mmu");
+	node = prom_finddevice(prom_chosen_path);
+	ret = prom_getint(node, prom_mmu_name);
 	if (ret == -1 || ret == 0)
-		mmu_ihandle_cache = -1;
+		prom_mmu_ihandle_cache = -1;
 	else
-		mmu_ihandle_cache = ret;
+		prom_mmu_ihandle_cache = ret;
 
 	return ret;
 }
@@ -190,7 +188,7 @@ long prom_itlb_load(unsigned long index,
 		    unsigned long tte_data,
 		    unsigned long vaddr)
 {
-	return p1275_cmd("call-method",
+	return p1275_cmd(prom_callmethod_name,
 			 (P1275_ARG(0, P1275_ARG_IN_STRING) |
 			  P1275_ARG(2, P1275_ARG_IN_64B) |
 			  P1275_ARG(3, P1275_ARG_IN_64B) |
@@ -207,7 +205,7 @@ long prom_dtlb_load(unsigned long index,
 		    unsigned long tte_data,
 		    unsigned long vaddr)
 {
-	return p1275_cmd("call-method",
+	return p1275_cmd(prom_callmethod_name,
 			 (P1275_ARG(0, P1275_ARG_IN_STRING) |
 			  P1275_ARG(2, P1275_ARG_IN_64B) |
 			  P1275_ARG(3, P1275_ARG_IN_64B) |
@@ -223,13 +221,13 @@ long prom_dtlb_load(unsigned long index,
 int prom_map(int mode, unsigned long size,
 	     unsigned long vaddr, unsigned long paddr)
 {
-	int ret = p1275_cmd("call-method",
+	int ret = p1275_cmd(prom_callmethod_name,
 			    (P1275_ARG(0, P1275_ARG_IN_STRING) |
 			     P1275_ARG(3, P1275_ARG_IN_64B) |
 			     P1275_ARG(4, P1275_ARG_IN_64B) |
 			     P1275_ARG(6, P1275_ARG_IN_64B) |
 			     P1275_INOUT(7, 1)),
-			    "map",
+			    prom_map_name,
 			    prom_get_mmu_ihandle(),
 			    mode,
 			    size,
@@ -244,12 +242,12 @@ int prom_map(int mode, unsigned long size,
 
 void prom_unmap(unsigned long size, unsigned long vaddr)
 {
-	p1275_cmd("call-method",
+	p1275_cmd(prom_callmethod_name,
 		  (P1275_ARG(0, P1275_ARG_IN_STRING) |
 		   P1275_ARG(2, P1275_ARG_IN_64B) |
 		   P1275_ARG(3, P1275_ARG_IN_64B) |
 		   P1275_INOUT(4, 0)),
-		  "unmap",
+		  prom_unmap_name,
 		  prom_get_mmu_ihandle(),
 		  size,
 		  vaddr);
@@ -258,7 +256,7 @@ void prom_unmap(unsigned long size, unsigned long vaddr)
 /* Set aside physical memory which is not touched or modified
  * across soft resets.
  */
-unsigned long prom_retain(char *name,
+unsigned long prom_retain(const char *name,
 			  unsigned long pa_low, unsigned long pa_high,
 			  long size, long align)
 {
@@ -290,7 +288,7 @@ int prom_getunumber(int syndrome_code,
 		    unsigned long phys_addr,
 		    char *buf, int buflen)
 {
-	return p1275_cmd("call-method",
+	return p1275_cmd(prom_callmethod_name,
 			 (P1275_ARG(0, P1275_ARG_IN_STRING)	|
 			  P1275_ARG(3, P1275_ARG_OUT_BUF)	|
 			  P1275_ARG(6, P1275_ARG_IN_64B)	|
diff --git a/arch/sparc64/prom/p1275.c b/arch/sparc64/prom/p1275.c
index 59fe38bba39e..a5a7c5712028 100644
--- a/arch/sparc64/prom/p1275.c
+++ b/arch/sparc64/prom/p1275.c
@@ -46,7 +46,7 @@ static inline unsigned long spitfire_get_primary_context(void)
  */
 DEFINE_SPINLOCK(prom_entry_lock);
 
-long p1275_cmd (char *service, long fmt, ...)
+long p1275_cmd(const char *service, long fmt, ...)
 {
 	char *p, *q;
 	unsigned long flags;
diff --git a/arch/sparc64/prom/printf.c b/arch/sparc64/prom/printf.c
index a6df82cafa0d..660943ee4c2a 100644
--- a/arch/sparc64/prom/printf.c
+++ b/arch/sparc64/prom/printf.c
@@ -34,7 +34,7 @@ prom_write(const char *buf, unsigned int n)
 }
 
 void
-prom_printf(char *fmt, ...)
+prom_printf(const char *fmt, ...)
 {
 	va_list args;
 	int i;
diff --git a/arch/sparc64/prom/tree.c b/arch/sparc64/prom/tree.c
index ccf73258ebf7..b1ff9e87dcc6 100644
--- a/arch/sparc64/prom/tree.c
+++ b/arch/sparc64/prom/tree.c
@@ -69,7 +69,7 @@ prom_getsibling(int node)
  * Return -1 on error.
  */
 __inline__ int
-prom_getproplen(int node, char *prop)
+prom_getproplen(int node, const char *prop)
 {
 	if((!node) || (!prop)) return -1;
 	return p1275_cmd ("getproplen", 
@@ -83,20 +83,20 @@ prom_getproplen(int node, char *prop)
  * was successful the length will be returned, else -1 is returned.
  */
 __inline__ int
-prom_getproperty(int node, char *prop, char *buffer, int bufsize)
+prom_getproperty(int node, const char *prop, char *buffer, int bufsize)
 {
 	int plen;
 
 	plen = prom_getproplen(node, prop);
-	if((plen > bufsize) || (plen == 0) || (plen == -1))
+	if ((plen > bufsize) || (plen == 0) || (plen == -1)) {
 		return -1;
-	else {
+	} else {
 		/* Ok, things seem all right. */
-		return p1275_cmd ("getprop", 
-				  P1275_ARG(1,P1275_ARG_IN_STRING)|
-				  P1275_ARG(2,P1275_ARG_OUT_BUF)|
-				  P1275_INOUT(4, 1), 
-				  node, prop, buffer, P1275_SIZE(plen));
+		return p1275_cmd(prom_getprop_name, 
+				 P1275_ARG(1,P1275_ARG_IN_STRING)|
+				 P1275_ARG(2,P1275_ARG_OUT_BUF)|
+				 P1275_INOUT(4, 1), 
+				 node, prop, buffer, P1275_SIZE(plen));
 	}
 }
 
@@ -104,7 +104,7 @@ prom_getproperty(int node, char *prop, char *buffer, int bufsize)
  * on failure.
  */
 __inline__ int
-prom_getint(int node, char *prop)
+prom_getint(int node, const char *prop)
 {
 	int intprop;
 
@@ -119,7 +119,7 @@ prom_getint(int node, char *prop)
  */
 
 int
-prom_getintdefault(int node, char *property, int deflt)
+prom_getintdefault(int node, const char *property, int deflt)
 {
 	int retval;
 
@@ -131,7 +131,7 @@ prom_getintdefault(int node, char *property, int deflt)
 
 /* Acquire a boolean property, 1=TRUE 0=FALSE. */
 int
-prom_getbool(int node, char *prop)
+prom_getbool(int node, const char *prop)
 {
 	int retval;
 
@@ -145,7 +145,7 @@ prom_getbool(int node, char *prop)
  * buffer.
  */
 void
-prom_getstring(int node, char *prop, char *user_buf, int ubuf_size)
+prom_getstring(int node, const char *prop, char *user_buf, int ubuf_size)
 {
 	int len;
 
@@ -160,7 +160,7 @@ prom_getstring(int node, char *prop, char *user_buf, int ubuf_size)
  * YES = 1   NO = 0
  */
 int
-prom_nodematch(int node, char *name)
+prom_nodematch(int node, const char *name)
 {
 	char namebuf[128];
 	prom_getproperty(node, "name", namebuf, sizeof(namebuf));
@@ -172,7 +172,7 @@ prom_nodematch(int node, char *name)
  * 'nodename'.  Return node if successful, zero if not.
  */
 int
-prom_searchsiblings(int node_start, char *nodename)
+prom_searchsiblings(int node_start, const char *nodename)
 {
 
 	int thisnode, error;
@@ -294,7 +294,7 @@ prom_firstprop(int node, char *buffer)
  * property types for this node.
  */
 __inline__ char *
-prom_nextprop(int node, char *oprop, char *buffer)
+prom_nextprop(int node, const char *oprop, char *buffer)
 {
 	char buf[32];
 
@@ -314,15 +314,17 @@ prom_nextprop(int node, char *oprop, char *buffer)
 }
 
 int
-prom_finddevice(char *name)
+prom_finddevice(const char *name)
 {
-	if(!name) return 0;
-	return p1275_cmd ("finddevice", P1275_ARG(0,P1275_ARG_IN_STRING)|
-				        P1275_INOUT(1, 1), 
-				        name);
+	if (!name)
+		return 0;
+	return p1275_cmd(prom_finddev_name,
+			 P1275_ARG(0,P1275_ARG_IN_STRING)|
+			 P1275_INOUT(1, 1), 
+			 name);
 }
 
-int prom_node_has_property(int node, char *prop)
+int prom_node_has_property(int node, const char *prop)
 {
 	char buf [32];
         
@@ -339,7 +341,7 @@ int prom_node_has_property(int node, char *prop)
  * of 'size' bytes.  Return the number of bytes the prom accepted.
  */
 int
-prom_setprop(int node, char *pname, char *value, int size)
+prom_setprop(int node, const char *pname, char *value, int size)
 {
 	if(size == 0) return 0;
 	if((pname == 0) || (value == 0)) return 0;
@@ -364,7 +366,7 @@ prom_inst2pkg(int inst)
  * FIXME: Should work for v0 as well
  */
 int
-prom_pathtoinode(char *path)
+prom_pathtoinode(const char *path)
 {
 	int node, inst;
 
diff --git a/include/asm-sparc64/oplib.h b/include/asm-sparc64/oplib.h
index a432d9e7daaa..c628189b6c89 100644
--- a/include/asm-sparc64/oplib.h
+++ b/include/asm-sparc64/oplib.h
@@ -38,6 +38,20 @@ extern int prom_stdin, prom_stdout;
  */
 extern int prom_chosen_node;
 
+/* Helper values and strings in arch/sparc64/kernel/head.S */
+extern const char prom_finddev_name[];
+extern const char prom_chosen_path[];
+extern const char prom_getprop_name[];
+extern const char prom_mmu_name[];
+extern const char prom_callmethod_name[];
+extern const char prom_translate_name[];
+extern const char prom_map_name[];
+extern const char prom_unmap_name[];
+extern int prom_mmu_ihandle_cache;
+extern unsigned int prom_boot_mapped_pc;
+extern unsigned int prom_boot_mapping_mode;
+extern unsigned long prom_boot_mapping_phys_high, prom_boot_mapping_phys_low;
+
 struct linux_mlist_p1275 {
 	struct linux_mlist_p1275 *theres_more;
 	unsigned long start_adr;
@@ -68,7 +82,7 @@ extern char *prom_getbootargs(void);
  * of the string is different on V0 vs. V2->higher proms.  The caller must
  * know what he/she is doing!  Returns the device descriptor, an int.
  */
-extern int prom_devopen(char *device_string);
+extern int prom_devopen(const char *device_string);
 
 /* Close a previously opened device described by the passed integer
  * descriptor.
@@ -98,10 +112,10 @@ extern struct linux_mem_p1275 *prom_meminfo(void);
 /* Miscellaneous routines, don't really fit in any category per se. */
 
 /* Reboot the machine with the command line passed. */
-extern void prom_reboot(char *boot_command);
+extern void prom_reboot(const char *boot_command);
 
 /* Evaluate the forth string passed. */
-extern void prom_feval(char *forth_string);
+extern void prom_feval(const char *forth_string);
 
 /* Enter the prom, with possibility of continuation with the 'go'
  * command in newer proms.
@@ -154,7 +168,7 @@ extern char prom_getchar(void);
 extern void prom_putchar(char character);
 
 /* Prom's internal routines, don't use in kernel/boot code. */
-extern void prom_printf(char *fmt, ...);
+extern void prom_printf(const char *fmt, ...);
 extern void prom_write(const char *buf, unsigned int len);
 
 /* Query for input device type */
@@ -215,7 +229,7 @@ extern int prom_getunumber(int syndrome_code,
 			   char *buf, int buflen);
 
 /* Retain physical memory to the caller across soft resets. */
-extern unsigned long prom_retain(char *name,
+extern unsigned long prom_retain(const char *name,
 				 unsigned long pa_low, unsigned long pa_high,
 				 long size, long align);
 
@@ -269,28 +283,28 @@ extern int prom_getsibling(int node);
 /* Get the length, at the passed node, of the given property type.
  * Returns -1 on error (ie. no such property at this node).
  */
-extern int prom_getproplen(int thisnode, char *property);
+extern int prom_getproplen(int thisnode, const char *property);
 
 /* Fetch the requested property using the given buffer.  Returns
  * the number of bytes the prom put into your buffer or -1 on error.
  */
-extern int prom_getproperty(int thisnode, char *property,
+extern int prom_getproperty(int thisnode, const char *property,
 			    char *prop_buffer, int propbuf_size);
 
 /* Acquire an integer property. */
-extern int prom_getint(int node, char *property);
+extern int prom_getint(int node, const char *property);
 
 /* Acquire an integer property, with a default value. */
-extern int prom_getintdefault(int node, char *property, int defval);
+extern int prom_getintdefault(int node, const char *property, int defval);
 
 /* Acquire a boolean property, 0=FALSE 1=TRUE. */
-extern int prom_getbool(int node, char *prop);
+extern int prom_getbool(int node, const char *prop);
 
 /* Acquire a string property, null string on error. */
-extern void prom_getstring(int node, char *prop, char *buf, int bufsize);
+extern void prom_getstring(int node, const char *prop, char *buf, int bufsize);
 
 /* Does the passed node have the given "name"? YES=1 NO=0 */
-extern int prom_nodematch(int thisnode, char *name);
+extern int prom_nodematch(int thisnode, const char *name);
 
 /* Puts in buffer a prom name in the form name@x,y or name (x for which_io 
  * and y for first regs phys address
@@ -300,7 +314,7 @@ extern int prom_getname(int node, char *buf, int buflen);
 /* Search all siblings starting at the passed node for "name" matching
  * the given string.  Returns the node on success, zero on failure.
  */
-extern int prom_searchsiblings(int node_start, char *name);
+extern int prom_searchsiblings(int node_start, const char *name);
 
 /* Return the first property type, as a string, for the given node.
  * Returns a null string on error. Buffer should be at least 32B long.
@@ -310,21 +324,21 @@ extern char *prom_firstprop(int node, char *buffer);
 /* Returns the next property after the passed property for the given
  * node.  Returns null string on failure. Buffer should be at least 32B long.
  */
-extern char *prom_nextprop(int node, char *prev_property, char *buffer);
+extern char *prom_nextprop(int node, const char *prev_property, char *buffer);
 
 /* Returns 1 if the specified node has given property. */
-extern int prom_node_has_property(int node, char *property);
+extern int prom_node_has_property(int node, const char *property);
 
 /* Returns phandle of the path specified */
-extern int prom_finddevice(char *name);
+extern int prom_finddevice(const char *name);
 
 /* Set the indicated property at the given node with the passed value.
  * Returns the number of bytes of your value that the prom took.
  */
-extern int prom_setprop(int node, char *prop_name, char *prop_value,
+extern int prom_setprop(int node, const char *prop_name, char *prop_value,
 			int value_size);
 			
-extern int prom_pathtoinode(char *path);
+extern int prom_pathtoinode(const char *path);
 extern int prom_inst2pkg(int);
 
 /* CPU probing helpers.  */
@@ -334,7 +348,7 @@ int cpu_find_by_mid(int mid, int *prom_node);
 /* Client interface level routines. */
 extern void prom_set_trap_table(unsigned long tba);
 
-extern long p1275_cmd (char *, long, ...);
+extern long p1275_cmd(const char *, long, ...);
 				   
 
 #if 0
-- 
cgit v1.2.3


From e4c94330e3395ae87451bded2840a25d04f27902 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 22 Sep 2005 21:43:45 -0700
Subject: [PATCH] reboot: comment and factor the main reboot functions

In the lead up to 2.6.13 I fixed a large number of reboot problems by
making the calling conventions consistent.  Despite checking and double
checking my work it appears I missed an obvious one.

This first patch simply refactors the reboot routines so all of the
preparation for various kinds of reboots are in their own functions.
Making it very hard to get the various kinds of reboot out of sync.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/reboot.h |  4 ++++
 kernel/sys.c           | 52 ++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 50 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index 3b3266ff1a95..7ab2cdb83ef0 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -59,6 +59,10 @@ extern void machine_crash_shutdown(struct pt_regs *);
  * Architecture independent implemenations of sys_reboot commands.
  */
 
+extern void kernel_restart_prepare(char *cmd);
+extern void kernel_halt_prepare(void);
+extern void kernel_power_off_prepare(void);
+
 extern void kernel_restart(char *cmd);
 extern void kernel_halt(void);
 extern void kernel_power_off(void);
diff --git a/kernel/sys.c b/kernel/sys.c
index f723522e6986..2fa1ed18123c 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -361,17 +361,35 @@ out_unlock:
 	return retval;
 }
 
+/**
+ *	emergency_restart - reboot the system
+ *
+ *	Without shutting down any hardware or taking any locks
+ *	reboot the system.  This is called when we know we are in
+ *	trouble so this is our best effort to reboot.  This is
+ *	safe to call in interrupt context.
+ */
 void emergency_restart(void)
 {
 	machine_emergency_restart();
 }
 EXPORT_SYMBOL_GPL(emergency_restart);
 
-void kernel_restart(char *cmd)
+/**
+ *	kernel_restart - reboot the system
+ *
+ *	Shutdown everything and perform a clean reboot.
+ *	This is not safe to call in interrupt context.
+ */
+void kernel_restart_prepare(char *cmd)
 {
 	notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
 	system_state = SYSTEM_RESTART;
 	device_shutdown();
+}
+void kernel_restart(char *cmd)
+{
+	kernel_restart_prepare(cmd);
 	if (!cmd) {
 		printk(KERN_EMERG "Restarting system.\n");
 	} else {
@@ -382,6 +400,12 @@ void kernel_restart(char *cmd)
 }
 EXPORT_SYMBOL_GPL(kernel_restart);
 
+/**
+ *	kernel_kexec - reboot the system
+ *
+ *	Move into place and start executing a preloaded standalone
+ *	executable.  If nothing was preloaded return an error.
+ */
 void kernel_kexec(void)
 {
 #ifdef CONFIG_KEXEC
@@ -390,9 +414,7 @@ void kernel_kexec(void)
 	if (!image) {
 		return;
 	}
-	notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
-	system_state = SYSTEM_RESTART;
-	device_shutdown();
+	kernel_restart_prepare(NULL);
 	printk(KERN_EMERG "Starting new kernel\n");
 	machine_shutdown();
 	machine_kexec(image);
@@ -400,21 +422,39 @@ void kernel_kexec(void)
 }
 EXPORT_SYMBOL_GPL(kernel_kexec);
 
-void kernel_halt(void)
+/**
+ *	kernel_halt - halt the system
+ *
+ *	Shutdown everything and perform a clean system halt.
+ */
+void kernel_halt_prepare(void)
 {
 	notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
 	system_state = SYSTEM_HALT;
 	device_shutdown();
+}
+void kernel_halt(void)
+{
+	kernel_halt_prepare();
 	printk(KERN_EMERG "System halted.\n");
 	machine_halt();
 }
 EXPORT_SYMBOL_GPL(kernel_halt);
 
-void kernel_power_off(void)
+/**
+ *	kernel_power_off - power_off the system
+ *
+ *	Shutdown everything and perform a clean system power_off.
+ */
+void kernel_power_off_prepare(void)
 {
 	notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
 	system_state = SYSTEM_POWER_OFF;
 	device_shutdown();
+}
+void kernel_power_off(void)
+{
+	kernel_power_off_prepare();
 	printk(KERN_EMERG "Power down.\n");
 	machine_power_off();
 }
-- 
cgit v1.2.3


From 7243cc05bafdda4c4de77cba00cf87666bd237f7 Mon Sep 17 00:00:00 2001
From: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Date: Thu, 22 Sep 2005 21:43:58 -0700
Subject: [PATCH] slab: alpha inlining fix

It is essential that index_of() be inlined.  But alpha undoes the gcc
inlining hackery and index_of() ends up out-of-line.  So fiddle with things
to make that function inline again.

Cc: Richard Henderson <rth@twiddle.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-alpha/compiler.h | 5 ++++-
 mm/slab.c                    | 7 ++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/asm-alpha/compiler.h b/include/asm-alpha/compiler.h
index 399c33b7be51..0a4a8b40dfcd 100644
--- a/include/asm-alpha/compiler.h
+++ b/include/asm-alpha/compiler.h
@@ -98,6 +98,9 @@
 #undef inline
 #undef __inline__
 #undef __inline
-
+#if __GNUC__ == 3 && __GNUC_MINOR__ >= 1 || __GNUC__ > 3
+#undef __always_inline
+#define __always_inline		inline __attribute__((always_inline))
+#endif
 
 #endif /* __ALPHA_COMPILER_H */
diff --git a/mm/slab.c b/mm/slab.c
index 437d3388054b..cf19ff2ab5e2 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -308,12 +308,12 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
 #define	SIZE_L3 (1 + MAX_NUMNODES)
 
 /*
- * This function may be completely optimized away if
+ * This function must be completely optimized away if
  * a constant is passed to it. Mostly the same as
  * what is in linux/slab.h except it returns an
  * index.
  */
-static inline int index_of(const size_t size)
+static __always_inline int index_of(const size_t size)
 {
 	if (__builtin_constant_p(size)) {
 		int i = 0;
@@ -329,7 +329,8 @@ static inline int index_of(const size_t size)
 			extern void __bad_size(void);
 			__bad_size();
 		}
-	}
+	} else
+		BUG();
 	return 0;
 }
 
-- 
cgit v1.2.3


From 0365ba7fb1fa94a41289d6a3d36b4d95960e56cc Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 22 Sep 2005 21:44:06 -0700
Subject: [PATCH] ppc64: SMU driver update & i2c support

The SMU is the "system controller" chip used by Apple recent G5 machines
including the iMac G5.  It drives things like fans, i2c busses, real time
clock, etc...

The current kernel contains a very crude driver that doesn't do much more
than reading the real time clock synchronously.  This is a completely
rewritten driver that provides interrupt based command queuing, a userland
interface, and an i2c/smbus driver for accessing the devices hanging off
the SMU i2c busses like temperature sensors.  This driver is a basic block
for upcoming work on thermal control for those machines, among others.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/ppc/platforms/pmac_setup.c      |   10 +-
 arch/ppc/syslib/of_device.c          |    6 +-
 arch/ppc64/kernel/of_device.c        |    7 +-
 arch/ppc64/kernel/pmac_setup.c       |   18 +-
 arch/ppc64/kernel/pmac_time.c        |    4 +-
 drivers/i2c/busses/Kconfig           |   12 +
 drivers/i2c/busses/Makefile          |    1 +
 drivers/i2c/busses/i2c-pmac-smu.c    |  316 +++++++++++
 drivers/macintosh/smu.c              | 1030 +++++++++++++++++++++++++++++-----
 drivers/macintosh/therm_adt746x.c    |    2 +-
 drivers/macintosh/therm_pm72.c       |    2 +-
 drivers/macintosh/therm_windtunnel.c |    2 +-
 include/asm-ppc/macio.h              |    1 -
 include/asm-ppc/of_device.h          |    5 +-
 include/asm-ppc64/smu.h              |  365 +++++++++++-
 15 files changed, 1618 insertions(+), 163 deletions(-)
 create mode 100644 drivers/i2c/busses/i2c-pmac-smu.c

(limited to 'include')

diff --git a/arch/ppc/platforms/pmac_setup.c b/arch/ppc/platforms/pmac_setup.c
index b392b9a15987..4c56a4734aec 100644
--- a/arch/ppc/platforms/pmac_setup.c
+++ b/arch/ppc/platforms/pmac_setup.c
@@ -719,7 +719,8 @@ pmac_declare_of_platform_devices(void)
 	if (np) {
 		for (np = np->child; np != NULL; np = np->sibling)
 			if (strncmp(np->name, "i2c", 3) == 0) {
-				of_platform_device_create(np, "uni-n-i2c");
+				of_platform_device_create(np, "uni-n-i2c",
+							  NULL);
 				break;
 			}
 	}
@@ -727,17 +728,18 @@ pmac_declare_of_platform_devices(void)
 	if (np) {
 		for (np = np->child; np != NULL; np = np->sibling)
 			if (strncmp(np->name, "i2c", 3) == 0) {
-				of_platform_device_create(np, "u3-i2c");
+				of_platform_device_create(np, "u3-i2c",
+							  NULL);
 				break;
 			}
 	}
 
 	np = find_devices("valkyrie");
 	if (np)
-		of_platform_device_create(np, "valkyrie");
+		of_platform_device_create(np, "valkyrie", NULL);
 	np = find_devices("platinum");
 	if (np)
-		of_platform_device_create(np, "platinum");
+		of_platform_device_create(np, "platinum", NULL);
 
 	return 0;
 }
diff --git a/arch/ppc/syslib/of_device.c b/arch/ppc/syslib/of_device.c
index da8a0f2128dc..93c7231ea709 100644
--- a/arch/ppc/syslib/of_device.c
+++ b/arch/ppc/syslib/of_device.c
@@ -234,7 +234,9 @@ void of_device_unregister(struct of_device *ofdev)
 	device_unregister(&ofdev->dev);
 }
 
-struct of_device* of_platform_device_create(struct device_node *np, const char *bus_id)
+struct of_device* of_platform_device_create(struct device_node *np,
+					    const char *bus_id,
+					    struct device *parent)
 {
 	struct of_device *dev;
 	u32 *reg;
@@ -247,7 +249,7 @@ struct of_device* of_platform_device_create(struct device_node *np, const char *
 	dev->node = of_node_get(np);
 	dev->dma_mask = 0xffffffffUL;
 	dev->dev.dma_mask = &dev->dma_mask;
-	dev->dev.parent = NULL;
+	dev->dev.parent = parent;
 	dev->dev.bus = &of_platform_bus_type;
 	dev->dev.release = of_release_dev;
 
diff --git a/arch/ppc64/kernel/of_device.c b/arch/ppc64/kernel/of_device.c
index da580812ddfe..9f200f0f2ad5 100644
--- a/arch/ppc64/kernel/of_device.c
+++ b/arch/ppc64/kernel/of_device.c
@@ -233,7 +233,9 @@ void of_device_unregister(struct of_device *ofdev)
 	device_unregister(&ofdev->dev);
 }
 
-struct of_device* of_platform_device_create(struct device_node *np, const char *bus_id)
+struct of_device* of_platform_device_create(struct device_node *np,
+					    const char *bus_id,
+					    struct device *parent)
 {
 	struct of_device *dev;
 
@@ -245,7 +247,7 @@ struct of_device* of_platform_device_create(struct device_node *np, const char *
 	dev->node = np;
 	dev->dma_mask = 0xffffffffUL;
 	dev->dev.dma_mask = &dev->dma_mask;
-	dev->dev.parent = NULL;
+	dev->dev.parent = parent;
 	dev->dev.bus = &of_platform_bus_type;
 	dev->dev.release = of_release_dev;
 
@@ -259,6 +261,7 @@ struct of_device* of_platform_device_create(struct device_node *np, const char *
 	return dev;
 }
 
+
 EXPORT_SYMBOL(of_match_device);
 EXPORT_SYMBOL(of_platform_bus_type);
 EXPORT_SYMBOL(of_register_driver);
diff --git a/arch/ppc64/kernel/pmac_setup.c b/arch/ppc64/kernel/pmac_setup.c
index 325426c7bed0..25755252067a 100644
--- a/arch/ppc64/kernel/pmac_setup.c
+++ b/arch/ppc64/kernel/pmac_setup.c
@@ -434,15 +434,23 @@ static int pmac_check_legacy_ioport(unsigned int baseport)
 
 static int __init pmac_declare_of_platform_devices(void)
 {
-	struct device_node *np;
+	struct device_node *np, *npp;
 
-	np = find_devices("u3");
-	if (np) {
-		for (np = np->child; np != NULL; np = np->sibling)
+	npp = of_find_node_by_name(NULL, "u3");
+	if (npp) {
+		for (np = NULL; (np = of_get_next_child(npp, np)) != NULL;) {
 			if (strncmp(np->name, "i2c", 3) == 0) {
-				of_platform_device_create(np, "u3-i2c");
+				of_platform_device_create(np, "u3-i2c", NULL);
+				of_node_put(np);
 				break;
 			}
+		}
+		of_node_put(npp);
+	}
+        npp = of_find_node_by_type(NULL, "smu");
+        if (npp) {
+		of_platform_device_create(npp, "smu", NULL);
+		of_node_put(npp);
 	}
 
 	return 0;
diff --git a/arch/ppc64/kernel/pmac_time.c b/arch/ppc64/kernel/pmac_time.c
index 3059edb09cc8..41bbb8c59697 100644
--- a/arch/ppc64/kernel/pmac_time.c
+++ b/arch/ppc64/kernel/pmac_time.c
@@ -84,7 +84,7 @@ void __pmac pmac_get_rtc_time(struct rtc_time *tm)
 
 #ifdef CONFIG_PMAC_SMU
 	case SYS_CTRLER_SMU:
-		smu_get_rtc_time(tm);
+		smu_get_rtc_time(tm, 1);
 		break;
 #endif /* CONFIG_PMAC_SMU */
 	default:
@@ -128,7 +128,7 @@ int __pmac pmac_set_rtc_time(struct rtc_time *tm)
 
 #ifdef CONFIG_PMAC_SMU
 	case SYS_CTRLER_SMU:
-		return smu_set_rtc_time(tm);
+		return smu_set_rtc_time(tm, 1);
 #endif /* CONFIG_PMAC_SMU */
 	default:
 		return -ENODEV;
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 8334496a7e0a..3badfec75b1c 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -245,6 +245,18 @@ config I2C_KEYWEST
 	  This support is also available as a module.  If so, the module 
 	  will be called i2c-keywest.
 
+config I2C_PMAC_SMU
+	tristate "Powermac SMU I2C interface"
+	depends on I2C && PMAC_SMU
+	help
+	  This supports the use of the I2C interface in the SMU
+	  chip on recent Apple machines like the iMac G5.  It is used
+	  among others by the thermal control driver for those machines.
+	  Say Y if you have such a machine.
+
+	  This support is also available as a module.  If so, the module
+	  will be called i2c-pmac-smu.
+
 config I2C_MPC
 	tristate "MPC107/824x/85xx/52xx"
 	depends on I2C && PPC32
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 980b3e983670..f1df00f66c6c 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_I2C_ITE)		+= i2c-ite.o
 obj-$(CONFIG_I2C_IXP2000)	+= i2c-ixp2000.o
 obj-$(CONFIG_I2C_IXP4XX)	+= i2c-ixp4xx.o
 obj-$(CONFIG_I2C_KEYWEST)	+= i2c-keywest.o
+obj-$(CONFIG_I2C_PMAC_SMU)	+= i2c-pmac-smu.o
 obj-$(CONFIG_I2C_MPC)		+= i2c-mpc.o
 obj-$(CONFIG_I2C_MV64XXX)	+= i2c-mv64xxx.o
 obj-$(CONFIG_I2C_NFORCE2)	+= i2c-nforce2.o
diff --git a/drivers/i2c/busses/i2c-pmac-smu.c b/drivers/i2c/busses/i2c-pmac-smu.c
new file mode 100644
index 000000000000..8a9f5648a23d
--- /dev/null
+++ b/drivers/i2c/busses/i2c-pmac-smu.c
@@ -0,0 +1,316 @@
+/*
+    i2c Support for Apple SMU Controller
+
+    Copyright (c) 2005 Benjamin Herrenschmidt, IBM Corp.
+                       <benh@kernel.crashing.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/completion.h>
+#include <linux/device.h>
+#include <asm/prom.h>
+#include <asm/of_device.h>
+#include <asm/smu.h>
+
+static int probe;
+
+MODULE_AUTHOR("Benjamin Herrenschmidt <benh@kernel.crashing.org>");
+MODULE_DESCRIPTION("I2C driver for Apple's SMU");
+MODULE_LICENSE("GPL");
+module_param(probe, bool, 0);
+
+
+/* Physical interface */
+struct smu_iface
+{
+	struct i2c_adapter	adapter;
+	struct completion	complete;
+	u32			busid;
+};
+
+static void smu_i2c_done(struct smu_i2c_cmd *cmd, void *misc)
+{
+	struct smu_iface	*iface = misc;
+	complete(&iface->complete);
+}
+
+/*
+ * SMBUS-type transfer entrypoint
+ */
+static s32 smu_smbus_xfer(	struct i2c_adapter*	adap,
+				u16			addr,
+				unsigned short		flags,
+				char			read_write,
+				u8			command,
+				int			size,
+				union i2c_smbus_data*	data)
+{
+	struct smu_iface	*iface = i2c_get_adapdata(adap);
+	struct smu_i2c_cmd	cmd;
+	int			rc = 0;
+	int			read = (read_write == I2C_SMBUS_READ);
+
+	cmd.info.bus = iface->busid;
+	cmd.info.devaddr = (addr << 1) | (read ? 0x01 : 0x00);
+
+	/* Prepare datas & select mode */
+	switch (size) {
+        case I2C_SMBUS_QUICK:
+		cmd.info.type = SMU_I2C_TRANSFER_SIMPLE;
+		cmd.info.datalen = 0;
+	    	break;
+        case I2C_SMBUS_BYTE:
+		cmd.info.type = SMU_I2C_TRANSFER_SIMPLE;
+		cmd.info.datalen = 1;
+		if (!read)
+			cmd.info.data[0] = data->byte;
+	    	break;
+        case I2C_SMBUS_BYTE_DATA:
+		cmd.info.type = SMU_I2C_TRANSFER_STDSUB;
+		cmd.info.datalen = 1;
+		cmd.info.sublen = 1;
+		cmd.info.subaddr[0] = command;
+		cmd.info.subaddr[1] = 0;
+		cmd.info.subaddr[2] = 0;
+		if (!read)
+			cmd.info.data[0] = data->byte;
+	    	break;
+        case I2C_SMBUS_WORD_DATA:
+		cmd.info.type = SMU_I2C_TRANSFER_STDSUB;
+		cmd.info.datalen = 2;
+		cmd.info.sublen = 1;
+		cmd.info.subaddr[0] = command;
+		cmd.info.subaddr[1] = 0;
+		cmd.info.subaddr[2] = 0;
+		if (!read) {
+			cmd.info.data[0] = data->byte & 0xff;
+			cmd.info.data[1] = (data->byte >> 8) & 0xff;
+		}
+		break;
+	/* Note that these are broken vs. the expected smbus API where
+	 * on reads, the lenght is actually returned from the function,
+	 * but I think the current API makes no sense and I don't want
+	 * any driver that I haven't verified for correctness to go
+	 * anywhere near a pmac i2c bus anyway ...
+	 */
+        case I2C_SMBUS_BLOCK_DATA:
+		cmd.info.type = SMU_I2C_TRANSFER_STDSUB;
+		cmd.info.datalen = data->block[0] + 1;
+		if (cmd.info.datalen > 6)
+			return -EINVAL;
+		if (!read)
+			memcpy(cmd.info.data, data->block, cmd.info.datalen);
+		cmd.info.sublen = 1;
+		cmd.info.subaddr[0] = command;
+		cmd.info.subaddr[1] = 0;
+		cmd.info.subaddr[2] = 0;
+		break;
+	case I2C_SMBUS_I2C_BLOCK_DATA:
+		cmd.info.type = SMU_I2C_TRANSFER_STDSUB;
+		cmd.info.datalen = data->block[0];
+		if (cmd.info.datalen > 7)
+			return -EINVAL;
+		if (!read)
+			memcpy(cmd.info.data, &data->block[1],
+			       cmd.info.datalen);
+		cmd.info.sublen = 1;
+		cmd.info.subaddr[0] = command;
+		cmd.info.subaddr[1] = 0;
+		cmd.info.subaddr[2] = 0;
+		break;
+
+        default:
+	    	return -EINVAL;
+	}
+
+	/* Turn a standardsub read into a combined mode access */
+ 	if (read_write == I2C_SMBUS_READ &&
+	    cmd.info.type == SMU_I2C_TRANSFER_STDSUB)
+		cmd.info.type = SMU_I2C_TRANSFER_COMBINED;
+
+	/* Finish filling command and submit it */
+	cmd.done = smu_i2c_done;
+	cmd.misc = iface;
+	rc = smu_queue_i2c(&cmd);
+	if (rc < 0)
+		return rc;
+	wait_for_completion(&iface->complete);
+	rc = cmd.status;
+
+	if (!read || rc < 0)
+		return rc;
+
+	switch (size) {
+        case I2C_SMBUS_BYTE:
+        case I2C_SMBUS_BYTE_DATA:
+		data->byte = cmd.info.data[0];
+	    	break;
+        case I2C_SMBUS_WORD_DATA:
+		data->word = ((u16)cmd.info.data[1]) << 8;
+		data->word |= cmd.info.data[0];
+		break;
+	/* Note that these are broken vs. the expected smbus API where
+	 * on reads, the lenght is actually returned from the function,
+	 * but I think the current API makes no sense and I don't want
+	 * any driver that I haven't verified for correctness to go
+	 * anywhere near a pmac i2c bus anyway ...
+	 */
+        case I2C_SMBUS_BLOCK_DATA:
+	case I2C_SMBUS_I2C_BLOCK_DATA:
+		memcpy(&data->block[0], cmd.info.data, cmd.info.datalen);
+		break;
+	}
+
+	return rc;
+}
+
+static u32
+smu_smbus_func(struct i2c_adapter * adapter)
+{
+	return I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
+	       I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
+	       I2C_FUNC_SMBUS_BLOCK_DATA;
+}
+
+/* For now, we only handle combined mode (smbus) */
+static struct i2c_algorithm smu_algorithm = {
+	.smbus_xfer	= smu_smbus_xfer,
+	.functionality	= smu_smbus_func,
+};
+
+static int create_iface(struct device_node *np, struct device *dev)
+{
+	struct smu_iface* iface;
+	u32 *reg, busid;
+	int rc;
+
+	reg = (u32 *)get_property(np, "reg", NULL);
+	if (reg == NULL) {
+		printk(KERN_ERR "i2c-pmac-smu: can't find bus number !\n");
+		return -ENXIO;
+	}
+	busid = *reg;
+
+	iface = kmalloc(sizeof(struct smu_iface), GFP_KERNEL);
+	if (iface == NULL) {
+		printk(KERN_ERR "i2c-pmac-smu: can't allocate inteface !\n");
+		return -ENOMEM;
+	}
+	memset(iface, 0, sizeof(struct smu_iface));
+	init_completion(&iface->complete);
+	iface->busid = busid;
+
+	dev_set_drvdata(dev, iface);
+
+	sprintf(iface->adapter.name, "smu-i2c-%02x", busid);
+	iface->adapter.algo = &smu_algorithm;
+	iface->adapter.algo_data = NULL;
+	iface->adapter.client_register = NULL;
+	iface->adapter.client_unregister = NULL;
+	i2c_set_adapdata(&iface->adapter, iface);
+	iface->adapter.dev.parent = dev;
+
+	rc = i2c_add_adapter(&iface->adapter);
+	if (rc) {
+		printk(KERN_ERR "i2c-pamc-smu.c: Adapter %s registration "
+		       "failed\n", iface->adapter.name);
+		i2c_set_adapdata(&iface->adapter, NULL);
+	}
+
+	if (probe) {
+		unsigned char addr;
+		printk("Probe: ");
+		for (addr = 0x00; addr <= 0x7f; addr++) {
+			if (i2c_smbus_xfer(&iface->adapter,addr,
+					   0,0,0,I2C_SMBUS_QUICK,NULL) >= 0)
+				printk("%02x ", addr);
+		}
+		printk("\n");
+	}
+
+	printk(KERN_INFO "SMU i2c bus %x registered\n", busid);
+
+	return 0;
+}
+
+static int dispose_iface(struct device *dev)
+{
+	struct smu_iface *iface = dev_get_drvdata(dev);
+	int rc;
+
+	rc = i2c_del_adapter(&iface->adapter);
+	i2c_set_adapdata(&iface->adapter, NULL);
+	/* We aren't that prepared to deal with this... */
+	if (rc)
+		printk("i2c-pmac-smu.c: Failed to remove bus %s !\n",
+		       iface->adapter.name);
+	dev_set_drvdata(dev, NULL);
+	kfree(iface);
+
+	return 0;
+}
+
+
+static int create_iface_of_platform(struct of_device* dev,
+				    const struct of_device_id *match)
+{
+	return create_iface(dev->node, &dev->dev);
+}
+
+
+static int dispose_iface_of_platform(struct of_device* dev)
+{
+	return dispose_iface(&dev->dev);
+}
+
+
+static struct of_device_id i2c_smu_match[] =
+{
+	{
+		.compatible	= "smu-i2c",
+	},
+	{},
+};
+static struct of_platform_driver i2c_smu_of_platform_driver =
+{
+	.name 		= "i2c-smu",
+	.match_table	= i2c_smu_match,
+	.probe		= create_iface_of_platform,
+	.remove		= dispose_iface_of_platform
+};
+
+
+static int __init i2c_pmac_smu_init(void)
+{
+	of_register_driver(&i2c_smu_of_platform_driver);
+	return 0;
+}
+
+
+static void __exit i2c_pmac_smu_cleanup(void)
+{
+	of_unregister_driver(&i2c_smu_of_platform_driver);
+}
+
+module_init(i2c_pmac_smu_init);
+module_exit(i2c_pmac_smu_cleanup);
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index fb535737d17d..a85ac18dd21d 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -8,21 +8,15 @@
  */
 
 /*
- * For now, this driver includes:
- * - RTC get & set
- * - reboot & shutdown commands
- * all synchronous with IRQ disabled (ugh)
- *
  * TODO:
- *   rework in a way the PMU driver works, that is asynchronous
- *   with a queue of commands. I'll do that as soon as I have an
- *   SMU based machine at hand. Some more cleanup is needed too,
- *   like maybe fitting it into a platform device, etc...
- *   Also check what's up with cache coherency, and if we really
- *   can't do better than flushing the cache, maybe build a table
- *   of command len/reply len like the PMU driver to only flush
- *   what is actually necessary.
- *   --BenH.
+ *  - maybe add timeout to commands ?
+ *  - blocking version of time functions
+ *  - polling version of i2c commands (including timer that works with
+ *    interrutps off)
+ *  - maybe avoid some data copies with i2c by directly using the smu cmd
+ *    buffer and a lower level internal interface
+ *  - understand SMU -> CPU events and implement reception of them via
+ *    the userland interface
  */
 
 #include <linux/config.h>
@@ -36,6 +30,11 @@
 #include <linux/jiffies.h>
 #include <linux/interrupt.h>
 #include <linux/rtc.h>
+#include <linux/completion.h>
+#include <linux/miscdevice.h>
+#include <linux/delay.h>
+#include <linux/sysdev.h>
+#include <linux/poll.h>
 
 #include <asm/byteorder.h>
 #include <asm/io.h>
@@ -45,8 +44,13 @@
 #include <asm/smu.h>
 #include <asm/sections.h>
 #include <asm/abs_addr.h>
+#include <asm/uaccess.h>
+#include <asm/of_device.h>
+
+#define VERSION "0.6"
+#define AUTHOR  "(c) 2005 Benjamin Herrenschmidt, IBM Corp."
 
-#define DEBUG_SMU 1
+#undef DEBUG_SMU
 
 #ifdef DEBUG_SMU
 #define DPRINTK(fmt, args...) do { printk(KERN_DEBUG fmt , ##args); } while (0)
@@ -57,20 +61,30 @@
 /*
  * This is the command buffer passed to the SMU hardware
  */
+#define SMU_MAX_DATA	254
+
 struct smu_cmd_buf {
 	u8 cmd;
 	u8 length;
-	u8 data[0x0FFE];
+	u8 data[SMU_MAX_DATA];
 };
 
 struct smu_device {
 	spinlock_t		lock;
 	struct device_node	*of_node;
-	int			db_ack;		/* doorbell ack GPIO */
-	int			db_req;		/* doorbell req GPIO */
+	struct of_device	*of_dev;
+	int			doorbell;	/* doorbell gpio */
 	u32 __iomem		*db_buf;	/* doorbell buffer */
+	int			db_irq;
+	int			msg;
+	int			msg_irq;
 	struct smu_cmd_buf	*cmd_buf;	/* command buffer virtual */
 	u32			cmd_buf_abs;	/* command buffer absolute */
+	struct list_head	cmd_list;
+	struct smu_cmd		*cmd_cur;	/* pending command */
+	struct list_head	cmd_i2c_list;
+	struct smu_i2c_cmd	*cmd_i2c_cur;	/* pending i2c command */
+	struct timer_list	i2c_timer;
 };
 
 /*
@@ -79,113 +93,243 @@ struct smu_device {
  */
 static struct smu_device	*smu;
 
+
 /*
- * SMU low level communication stuff
+ * SMU driver low level stuff
  */
-static inline int smu_cmd_stat(struct smu_cmd_buf *cmd_buf, u8 cmd_ack)
-{
-	rmb();
-	return cmd_buf->cmd == cmd_ack && cmd_buf->length != 0;
-}
 
-static inline u8 smu_save_ack_cmd(struct smu_cmd_buf *cmd_buf)
+static void smu_start_cmd(void)
 {
-	return (~cmd_buf->cmd) & 0xff;
-}
+	unsigned long faddr, fend;
+	struct smu_cmd *cmd;
 
-static void smu_send_cmd(struct smu_device *dev)
-{
-	/* SMU command buf is currently cacheable, we need a physical
-	 * address. This isn't exactly a DMA mapping here, I suspect
+	if (list_empty(&smu->cmd_list))
+		return;
+
+	/* Fetch first command in queue */
+	cmd = list_entry(smu->cmd_list.next, struct smu_cmd, link);
+	smu->cmd_cur = cmd;
+	list_del(&cmd->link);
+
+	DPRINTK("SMU: starting cmd %x, %d bytes data\n", cmd->cmd,
+		cmd->data_len);
+	DPRINTK("SMU: data buffer: %02x %02x %02x %02x ...\n",
+		((u8 *)cmd->data_buf)[0], ((u8 *)cmd->data_buf)[1],
+		((u8 *)cmd->data_buf)[2], ((u8 *)cmd->data_buf)[3]);
+
+	/* Fill the SMU command buffer */
+	smu->cmd_buf->cmd = cmd->cmd;
+	smu->cmd_buf->length = cmd->data_len;
+	memcpy(smu->cmd_buf->data, cmd->data_buf, cmd->data_len);
+
+	/* Flush command and data to RAM */
+	faddr = (unsigned long)smu->cmd_buf;
+	fend = faddr + smu->cmd_buf->length + 2;
+	flush_inval_dcache_range(faddr, fend);
+
+	/* This isn't exactly a DMA mapping here, I suspect
 	 * the SMU is actually communicating with us via i2c to the
 	 * northbridge or the CPU to access RAM.
 	 */
-	writel(dev->cmd_buf_abs, dev->db_buf);
+	writel(smu->cmd_buf_abs, smu->db_buf);
 
 	/* Ring the SMU doorbell */
-	pmac_do_feature_call(PMAC_FTR_WRITE_GPIO, NULL, dev->db_req, 4);
-	pmac_do_feature_call(PMAC_FTR_READ_GPIO, NULL, dev->db_req, 4);
+	pmac_do_feature_call(PMAC_FTR_WRITE_GPIO, NULL, smu->doorbell, 4);
 }
 
-static int smu_cmd_done(struct smu_device *dev)
+
+static irqreturn_t smu_db_intr(int irq, void *arg, struct pt_regs *regs)
 {
-	unsigned long wait = 0;
-	int gpio;
+	unsigned long flags;
+	struct smu_cmd *cmd;
+	void (*done)(struct smu_cmd *cmd, void *misc) = NULL;
+	void *misc = NULL;
+	u8 gpio;
+	int rc = 0;
 
-	/* Check the SMU doorbell */
-	do  {
-		gpio = pmac_do_feature_call(PMAC_FTR_READ_GPIO,
-					    NULL, dev->db_ack);
-		if ((gpio & 7) == 7)
-			return 0;
-		udelay(100);
-	} while(++wait < 10000);
+	/* SMU completed the command, well, we hope, let's make sure
+	 * of it
+	 */
+	spin_lock_irqsave(&smu->lock, flags);
 
-	printk(KERN_ERR "SMU timeout !\n");
-	return -ENXIO;
+	gpio = pmac_do_feature_call(PMAC_FTR_READ_GPIO, NULL, smu->doorbell);
+	if ((gpio & 7) != 7)
+		return IRQ_HANDLED;
+
+	cmd = smu->cmd_cur;
+	smu->cmd_cur = NULL;
+	if (cmd == NULL)
+		goto bail;
+
+	if (rc == 0) {
+		unsigned long faddr;
+		int reply_len;
+		u8 ack;
+
+		/* CPU might have brought back the cache line, so we need
+		 * to flush again before peeking at the SMU response. We
+		 * flush the entire buffer for now as we haven't read the
+		 * reply lenght (it's only 2 cache lines anyway)
+		 */
+		faddr = (unsigned long)smu->cmd_buf;
+		flush_inval_dcache_range(faddr, faddr + 256);
+
+		/* Now check ack */
+		ack = (~cmd->cmd) & 0xff;
+		if (ack != smu->cmd_buf->cmd) {
+			DPRINTK("SMU: incorrect ack, want %x got %x\n",
+				ack, smu->cmd_buf->cmd);
+			rc = -EIO;
+		}
+		reply_len = rc == 0 ? smu->cmd_buf->length : 0;
+		DPRINTK("SMU: reply len: %d\n", reply_len);
+		if (reply_len > cmd->reply_len) {
+			printk(KERN_WARNING "SMU: reply buffer too small,"
+			       "got %d bytes for a %d bytes buffer\n",
+			       reply_len, cmd->reply_len);
+			reply_len = cmd->reply_len;
+		}
+		cmd->reply_len = reply_len;
+		if (cmd->reply_buf && reply_len)
+			memcpy(cmd->reply_buf, smu->cmd_buf->data, reply_len);
+	}
+
+	/* Now complete the command. Write status last in order as we lost
+	 * ownership of the command structure as soon as it's no longer -1
+	 */
+	done = cmd->done;
+	misc = cmd->misc;
+	mb();
+	cmd->status = rc;
+ bail:
+	/* Start next command if any */
+	smu_start_cmd();
+	spin_unlock_irqrestore(&smu->lock, flags);
+
+	/* Call command completion handler if any */
+	if (done)
+		done(cmd, misc);
+
+	/* It's an edge interrupt, nothing to do */
+	return IRQ_HANDLED;
 }
 
-static int smu_do_cmd(struct smu_device *dev)
+
+static irqreturn_t smu_msg_intr(int irq, void *arg, struct pt_regs *regs)
 {
-	int rc;
-	u8 cmd_ack;
+	/* I don't quite know what to do with this one, we seem to never
+	 * receive it, so I suspect we have to arm it someway in the SMU
+	 * to start getting events that way.
+	 */
+
+	printk(KERN_INFO "SMU: message interrupt !\n");
 
-	DPRINTK("SMU do_cmd %02x len=%d %02x\n",
-		dev->cmd_buf->cmd, dev->cmd_buf->length,
-		dev->cmd_buf->data[0]);
+	/* It's an edge interrupt, nothing to do */
+	return IRQ_HANDLED;
+}
 
-	cmd_ack = smu_save_ack_cmd(dev->cmd_buf);
 
-	/* Clear cmd_buf cache lines */
-	flush_inval_dcache_range((unsigned long)dev->cmd_buf,
-				 ((unsigned long)dev->cmd_buf) +
-				 sizeof(struct smu_cmd_buf));
-	smu_send_cmd(dev);
-	rc = smu_cmd_done(dev);
-	if (rc == 0)
-		rc = smu_cmd_stat(dev->cmd_buf, cmd_ack) ? 0 : -1;
+/*
+ * Queued command management.
+ *
+ */
 
-	DPRINTK("SMU do_cmd %02x len=%d %02x => %d (%02x)\n",
-		dev->cmd_buf->cmd, dev->cmd_buf->length,
-		dev->cmd_buf->data[0], rc, cmd_ack);
+int smu_queue_cmd(struct smu_cmd *cmd)
+{
+	unsigned long flags;
 
-	return rc;
+	if (smu == NULL)
+		return -ENODEV;
+	if (cmd->data_len > SMU_MAX_DATA ||
+	    cmd->reply_len > SMU_MAX_DATA)
+		return -EINVAL;
+
+	cmd->status = 1;
+	spin_lock_irqsave(&smu->lock, flags);
+	list_add_tail(&cmd->link, &smu->cmd_list);
+	if (smu->cmd_cur == NULL)
+		smu_start_cmd();
+	spin_unlock_irqrestore(&smu->lock, flags);
+
+	return 0;
 }
+EXPORT_SYMBOL(smu_queue_cmd);
 
-/* RTC low level commands */
-static inline int bcd2hex (int n)
+
+int smu_queue_simple(struct smu_simple_cmd *scmd, u8 command,
+		     unsigned int data_len,
+		     void (*done)(struct smu_cmd *cmd, void *misc),
+		     void *misc, ...)
 {
-	return (((n & 0xf0) >> 4) * 10) + (n & 0xf);
+	struct smu_cmd *cmd = &scmd->cmd;
+	va_list list;
+	int i;
+
+	if (data_len > sizeof(scmd->buffer))
+		return -EINVAL;
+
+	memset(scmd, 0, sizeof(*scmd));
+	cmd->cmd = command;
+	cmd->data_len = data_len;
+	cmd->data_buf = scmd->buffer;
+	cmd->reply_len = sizeof(scmd->buffer);
+	cmd->reply_buf = scmd->buffer;
+	cmd->done = done;
+	cmd->misc = misc;
+
+	va_start(list, misc);
+	for (i = 0; i < data_len; ++i)
+		scmd->buffer[i] = (u8)va_arg(list, int);
+	va_end(list);
+
+	return smu_queue_cmd(cmd);
 }
+EXPORT_SYMBOL(smu_queue_simple);
 
-static inline int hex2bcd (int n)
+
+void smu_poll(void)
 {
-	return ((n / 10) << 4) + (n % 10);
+	u8 gpio;
+
+	if (smu == NULL)
+		return;
+
+	gpio = pmac_do_feature_call(PMAC_FTR_READ_GPIO, NULL, smu->doorbell);
+	if ((gpio & 7) == 7)
+		smu_db_intr(smu->db_irq, smu, NULL);
 }
+EXPORT_SYMBOL(smu_poll);
+
 
-#if 0
-static inline void smu_fill_set_pwrup_timer_cmd(struct smu_cmd_buf *cmd_buf)
+void smu_done_complete(struct smu_cmd *cmd, void *misc)
 {
-	cmd_buf->cmd = 0x8e;
-	cmd_buf->length = 8;
-	cmd_buf->data[0] = 0x00;
-	memset(cmd_buf->data + 1, 0, 7);
+	struct completion *comp = misc;
+
+	complete(comp);
 }
+EXPORT_SYMBOL(smu_done_complete);
+
 
-static inline void smu_fill_get_pwrup_timer_cmd(struct smu_cmd_buf *cmd_buf)
+void smu_spinwait_cmd(struct smu_cmd *cmd)
 {
-	cmd_buf->cmd = 0x8e;
-	cmd_buf->length = 1;
-	cmd_buf->data[0] = 0x01;
+	while(cmd->status == 1)
+		smu_poll();
+}
+EXPORT_SYMBOL(smu_spinwait_cmd);
+
+
+/* RTC low level commands */
+static inline int bcd2hex (int n)
+{
+	return (((n & 0xf0) >> 4) * 10) + (n & 0xf);
 }
 
-static inline void smu_fill_dis_pwrup_timer_cmd(struct smu_cmd_buf *cmd_buf)
+
+static inline int hex2bcd (int n)
 {
-	cmd_buf->cmd = 0x8e;
-	cmd_buf->length = 1;
-	cmd_buf->data[0] = 0x02;
+	return ((n / 10) << 4) + (n % 10);
 }
-#endif
+
 
 static inline void smu_fill_set_rtc_cmd(struct smu_cmd_buf *cmd_buf,
 					struct rtc_time *time)
@@ -202,100 +346,96 @@ static inline void smu_fill_set_rtc_cmd(struct smu_cmd_buf *cmd_buf,
 	cmd_buf->data[7] = hex2bcd(time->tm_year - 100);
 }
 
-static inline void smu_fill_get_rtc_cmd(struct smu_cmd_buf *cmd_buf)
-{
-	cmd_buf->cmd = 0x8e;
-	cmd_buf->length = 1;
-	cmd_buf->data[0] = 0x81;
-}
 
-static void smu_parse_get_rtc_reply(struct smu_cmd_buf *cmd_buf,
-				    struct rtc_time *time)
+int smu_get_rtc_time(struct rtc_time *time, int spinwait)
 {
-	time->tm_sec = bcd2hex(cmd_buf->data[0]);
-	time->tm_min = bcd2hex(cmd_buf->data[1]);
-	time->tm_hour = bcd2hex(cmd_buf->data[2]);
-	time->tm_wday = bcd2hex(cmd_buf->data[3]);
-	time->tm_mday = bcd2hex(cmd_buf->data[4]);
-	time->tm_mon = bcd2hex(cmd_buf->data[5]) - 1;
-	time->tm_year = bcd2hex(cmd_buf->data[6]) + 100;
-}
-
-int smu_get_rtc_time(struct rtc_time *time)
-{
-	unsigned long flags;
+	struct smu_simple_cmd cmd;
 	int rc;
 
 	if (smu == NULL)
 		return -ENODEV;
 
 	memset(time, 0, sizeof(struct rtc_time));
-	spin_lock_irqsave(&smu->lock, flags);
-	smu_fill_get_rtc_cmd(smu->cmd_buf);
-	rc = smu_do_cmd(smu);
-	if (rc == 0)
-		smu_parse_get_rtc_reply(smu->cmd_buf, time);
-	spin_unlock_irqrestore(&smu->lock, flags);
+	rc = smu_queue_simple(&cmd, SMU_CMD_RTC_COMMAND, 1, NULL, NULL,
+			      SMU_CMD_RTC_GET_DATETIME);
+	if (rc)
+		return rc;
+	smu_spinwait_simple(&cmd);
 
-	return rc;
+	time->tm_sec = bcd2hex(cmd.buffer[0]);
+	time->tm_min = bcd2hex(cmd.buffer[1]);
+	time->tm_hour = bcd2hex(cmd.buffer[2]);
+	time->tm_wday = bcd2hex(cmd.buffer[3]);
+	time->tm_mday = bcd2hex(cmd.buffer[4]);
+	time->tm_mon = bcd2hex(cmd.buffer[5]) - 1;
+	time->tm_year = bcd2hex(cmd.buffer[6]) + 100;
+
+	return 0;
 }
 
-int smu_set_rtc_time(struct rtc_time *time)
+
+int smu_set_rtc_time(struct rtc_time *time, int spinwait)
 {
-	unsigned long flags;
+	struct smu_simple_cmd cmd;
 	int rc;
 
 	if (smu == NULL)
 		return -ENODEV;
 
-	spin_lock_irqsave(&smu->lock, flags);
-	smu_fill_set_rtc_cmd(smu->cmd_buf, time);
-	rc = smu_do_cmd(smu);
-	spin_unlock_irqrestore(&smu->lock, flags);
+	rc = smu_queue_simple(&cmd, SMU_CMD_RTC_COMMAND, 8, NULL, NULL,
+			      SMU_CMD_RTC_SET_DATETIME,
+			      hex2bcd(time->tm_sec),
+			      hex2bcd(time->tm_min),
+			      hex2bcd(time->tm_hour),
+			      time->tm_wday,
+			      hex2bcd(time->tm_mday),
+			      hex2bcd(time->tm_mon) + 1,
+			      hex2bcd(time->tm_year - 100));
+	if (rc)
+		return rc;
+	smu_spinwait_simple(&cmd);
 
-	return rc;
+	return 0;
 }
 
+
 void smu_shutdown(void)
 {
-	const unsigned char *command = "SHUTDOWN";
-	unsigned long flags;
+	struct smu_simple_cmd cmd;
 
 	if (smu == NULL)
 		return;
 
-	spin_lock_irqsave(&smu->lock, flags);
-	smu->cmd_buf->cmd = 0xaa;
-	smu->cmd_buf->length = strlen(command);
-	strcpy(smu->cmd_buf->data, command);
-	smu_do_cmd(smu);
+	if (smu_queue_simple(&cmd, SMU_CMD_POWER_COMMAND, 9, NULL, NULL,
+			     'S', 'H', 'U', 'T', 'D', 'O', 'W', 'N', 0))
+		return;
+	smu_spinwait_simple(&cmd);
 	for (;;)
 		;
-	spin_unlock_irqrestore(&smu->lock, flags);
 }
 
+
 void smu_restart(void)
 {
-	const unsigned char *command = "RESTART";
-	unsigned long flags;
+	struct smu_simple_cmd cmd;
 
 	if (smu == NULL)
 		return;
 
-	spin_lock_irqsave(&smu->lock, flags);
-	smu->cmd_buf->cmd = 0xaa;
-	smu->cmd_buf->length = strlen(command);
-	strcpy(smu->cmd_buf->data, command);
-	smu_do_cmd(smu);
+	if (smu_queue_simple(&cmd, SMU_CMD_POWER_COMMAND, 8, NULL, NULL,
+			     'R', 'E', 'S', 'T', 'A', 'R', 'T', 0))
+		return;
+	smu_spinwait_simple(&cmd);
 	for (;;)
 		;
-	spin_unlock_irqrestore(&smu->lock, flags);
 }
 
+
 int smu_present(void)
 {
 	return smu != NULL;
 }
+EXPORT_SYMBOL(smu_present);
 
 
 int smu_init (void)
@@ -307,6 +447,8 @@ int smu_init (void)
         if (np == NULL)
 		return -ENODEV;
 
+	printk(KERN_INFO "SMU driver %s %s\n", VERSION, AUTHOR);
+
 	if (smu_cmdbuf_abs == 0) {
 		printk(KERN_ERR "SMU: Command buffer not allocated !\n");
 		return -EINVAL;
@@ -318,7 +460,13 @@ int smu_init (void)
 	memset(smu, 0, sizeof(*smu));
 
 	spin_lock_init(&smu->lock);
+	INIT_LIST_HEAD(&smu->cmd_list);
+	INIT_LIST_HEAD(&smu->cmd_i2c_list);
 	smu->of_node = np;
+	smu->db_irq = NO_IRQ;
+	smu->msg_irq = NO_IRQ;
+	init_timer(&smu->i2c_timer);
+
 	/* smu_cmdbuf_abs is in the low 2G of RAM, can be converted to a
 	 * 32 bits value safely
 	 */
@@ -331,8 +479,8 @@ int smu_init (void)
 		goto fail;
 	}
 	data = (u32 *)get_property(np, "reg", NULL);
-	of_node_put(np);
 	if (data == NULL) {
+		of_node_put(np);
 		printk(KERN_ERR "SMU: Can't find doorbell GPIO address !\n");
 		goto fail;
 	}
@@ -341,8 +489,31 @@ int smu_init (void)
 	 * and ack. GPIOs are at 0x50, best would be to find that out
 	 * in the device-tree though.
 	 */
-	smu->db_req = 0x50 + *data;
-	smu->db_ack = 0x50 + *data;
+	smu->doorbell = *data;
+	if (smu->doorbell < 0x50)
+		smu->doorbell += 0x50;
+	if (np->n_intrs > 0)
+		smu->db_irq = np->intrs[0].line;
+
+	of_node_put(np);
+
+	/* Now look for the smu-interrupt GPIO */
+	do {
+		np = of_find_node_by_name(NULL, "smu-interrupt");
+		if (np == NULL)
+			break;
+		data = (u32 *)get_property(np, "reg", NULL);
+		if (data == NULL) {
+			of_node_put(np);
+			break;
+		}
+		smu->msg = *data;
+		if (smu->msg < 0x50)
+			smu->msg += 0x50;
+		if (np->n_intrs > 0)
+			smu->msg_irq = np->intrs[0].line;
+		of_node_put(np);
+	} while(0);
 
 	/* Doorbell buffer is currently hard-coded, I didn't find a proper
 	 * device-tree entry giving the address. Best would probably to use
@@ -362,3 +533,584 @@ int smu_init (void)
 	return -ENXIO;
 
 }
+
+
+static int smu_late_init(void)
+{
+	if (!smu)
+		return 0;
+
+	/*
+	 * Try to request the interrupts
+	 */
+
+	if (smu->db_irq != NO_IRQ) {
+		if (request_irq(smu->db_irq, smu_db_intr,
+				SA_SHIRQ, "SMU doorbell", smu) < 0) {
+			printk(KERN_WARNING "SMU: can't "
+			       "request interrupt %d\n",
+			       smu->db_irq);
+			smu->db_irq = NO_IRQ;
+		}
+	}
+
+	if (smu->msg_irq != NO_IRQ) {
+		if (request_irq(smu->msg_irq, smu_msg_intr,
+				SA_SHIRQ, "SMU message", smu) < 0) {
+			printk(KERN_WARNING "SMU: can't "
+			       "request interrupt %d\n",
+			       smu->msg_irq);
+			smu->msg_irq = NO_IRQ;
+		}
+	}
+
+	return 0;
+}
+arch_initcall(smu_late_init);
+
+/*
+ * sysfs visibility
+ */
+
+static void smu_expose_childs(void *unused)
+{
+	struct device_node *np;
+
+	for (np = NULL; (np = of_get_next_child(smu->of_node, np)) != NULL;) {
+		if (device_is_compatible(np, "smu-i2c")) {
+			char name[32];
+			u32 *reg = (u32 *)get_property(np, "reg", NULL);
+
+			if (reg == NULL)
+				continue;
+			sprintf(name, "smu-i2c-%02x", *reg);
+			of_platform_device_create(np, name, &smu->of_dev->dev);
+		}
+	}
+
+}
+
+static DECLARE_WORK(smu_expose_childs_work, smu_expose_childs, NULL);
+
+static int smu_platform_probe(struct of_device* dev,
+			      const struct of_device_id *match)
+{
+	if (!smu)
+		return -ENODEV;
+	smu->of_dev = dev;
+
+	/*
+	 * Ok, we are matched, now expose all i2c busses. We have to defer
+	 * that unfortunately or it would deadlock inside the device model
+	 */
+	schedule_work(&smu_expose_childs_work);
+
+	return 0;
+}
+
+static struct of_device_id smu_platform_match[] =
+{
+	{
+		.type		= "smu",
+	},
+	{},
+};
+
+static struct of_platform_driver smu_of_platform_driver =
+{
+	.name 		= "smu",
+	.match_table	= smu_platform_match,
+	.probe		= smu_platform_probe,
+};
+
+static int __init smu_init_sysfs(void)
+{
+	int rc;
+
+	/*
+	 * Due to sysfs bogosity, a sysdev is not a real device, so
+	 * we should in fact create both if we want sysdev semantics
+	 * for power management.
+	 * For now, we don't power manage machines with an SMU chip,
+	 * I'm a bit too far from figuring out how that works with those
+	 * new chipsets, but that will come back and bite us
+	 */
+	rc = of_register_driver(&smu_of_platform_driver);
+	return 0;
+}
+
+device_initcall(smu_init_sysfs);
+
+struct of_device *smu_get_ofdev(void)
+{
+	if (!smu)
+		return NULL;
+	return smu->of_dev;
+}
+
+EXPORT_SYMBOL_GPL(smu_get_ofdev);
+
+/*
+ * i2c interface
+ */
+
+static void smu_i2c_complete_command(struct smu_i2c_cmd *cmd, int fail)
+{
+	void (*done)(struct smu_i2c_cmd *cmd, void *misc) = cmd->done;
+	void *misc = cmd->misc;
+	unsigned long flags;
+
+	/* Check for read case */
+	if (!fail && cmd->read) {
+		if (cmd->pdata[0] < 1)
+			fail = 1;
+		else
+			memcpy(cmd->info.data, &cmd->pdata[1],
+			       cmd->info.datalen);
+	}
+
+	DPRINTK("SMU: completing, success: %d\n", !fail);
+
+	/* Update status and mark no pending i2c command with lock
+	 * held so nobody comes in while we dequeue an eventual
+	 * pending next i2c command
+	 */
+	spin_lock_irqsave(&smu->lock, flags);
+	smu->cmd_i2c_cur = NULL;
+	wmb();
+	cmd->status = fail ? -EIO : 0;
+
+	/* Is there another i2c command waiting ? */
+	if (!list_empty(&smu->cmd_i2c_list)) {
+		struct smu_i2c_cmd *newcmd;
+
+		/* Fetch it, new current, remove from list */
+		newcmd = list_entry(smu->cmd_i2c_list.next,
+				    struct smu_i2c_cmd, link);
+		smu->cmd_i2c_cur = newcmd;
+		list_del(&cmd->link);
+
+		/* Queue with low level smu */
+		list_add_tail(&cmd->scmd.link, &smu->cmd_list);
+		if (smu->cmd_cur == NULL)
+			smu_start_cmd();
+	}
+	spin_unlock_irqrestore(&smu->lock, flags);
+
+	/* Call command completion handler if any */
+	if (done)
+		done(cmd, misc);
+
+}
+
+
+static void smu_i2c_retry(unsigned long data)
+{
+	struct smu_i2c_cmd	*cmd = (struct smu_i2c_cmd *)data;
+
+	DPRINTK("SMU: i2c failure, requeuing...\n");
+
+	/* requeue command simply by resetting reply_len */
+	cmd->pdata[0] = 0xff;
+	cmd->scmd.reply_len = 0x10;
+	smu_queue_cmd(&cmd->scmd);
+}
+
+
+static void smu_i2c_low_completion(struct smu_cmd *scmd, void *misc)
+{
+	struct smu_i2c_cmd	*cmd = misc;
+	int			fail = 0;
+
+	DPRINTK("SMU: i2c compl. stage=%d status=%x pdata[0]=%x rlen: %x\n",
+		cmd->stage, scmd->status, cmd->pdata[0], scmd->reply_len);
+
+	/* Check for possible status */
+	if (scmd->status < 0)
+		fail = 1;
+	else if (cmd->read) {
+		if (cmd->stage == 0)
+			fail = cmd->pdata[0] != 0;
+		else
+			fail = cmd->pdata[0] >= 0x80;
+	} else {
+		fail = cmd->pdata[0] != 0;
+	}
+
+	/* Handle failures by requeuing command, after 5ms interval
+	 */
+	if (fail && --cmd->retries > 0) {
+		DPRINTK("SMU: i2c failure, starting timer...\n");
+		smu->i2c_timer.function = smu_i2c_retry;
+		smu->i2c_timer.data = (unsigned long)cmd;
+		smu->i2c_timer.expires = jiffies + msecs_to_jiffies(5);
+		add_timer(&smu->i2c_timer);
+		return;
+	}
+
+	/* If failure or stage 1, command is complete */
+	if (fail || cmd->stage != 0) {
+		smu_i2c_complete_command(cmd, fail);
+		return;
+	}
+
+	DPRINTK("SMU: going to stage 1\n");
+
+	/* Ok, initial command complete, now poll status */
+	scmd->reply_buf = cmd->pdata;
+	scmd->reply_len = 0x10;
+	scmd->data_buf = cmd->pdata;
+	scmd->data_len = 1;
+	cmd->pdata[0] = 0;
+	cmd->stage = 1;
+	cmd->retries = 20;
+	smu_queue_cmd(scmd);
+}
+
+
+int smu_queue_i2c(struct smu_i2c_cmd *cmd)
+{
+	unsigned long flags;
+
+	if (smu == NULL)
+		return -ENODEV;
+
+	/* Fill most fields of scmd */
+	cmd->scmd.cmd = SMU_CMD_I2C_COMMAND;
+	cmd->scmd.done = smu_i2c_low_completion;
+	cmd->scmd.misc = cmd;
+	cmd->scmd.reply_buf = cmd->pdata;
+	cmd->scmd.reply_len = 0x10;
+	cmd->scmd.data_buf = (u8 *)(char *)&cmd->info;
+	cmd->scmd.status = 1;
+	cmd->stage = 0;
+	cmd->pdata[0] = 0xff;
+	cmd->retries = 20;
+	cmd->status = 1;
+
+	/* Check transfer type, sanitize some "info" fields
+	 * based on transfer type and do more checking
+	 */
+	cmd->info.caddr = cmd->info.devaddr;
+	cmd->read = cmd->info.devaddr & 0x01;
+	switch(cmd->info.type) {
+	case SMU_I2C_TRANSFER_SIMPLE:
+		memset(&cmd->info.sublen, 0, 4);
+		break;
+	case SMU_I2C_TRANSFER_COMBINED:
+		cmd->info.devaddr &= 0xfe;
+	case SMU_I2C_TRANSFER_STDSUB:
+		if (cmd->info.sublen > 3)
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Finish setting up command based on transfer direction
+	 */
+	if (cmd->read) {
+		if (cmd->info.datalen > SMU_I2C_READ_MAX)
+			return -EINVAL;
+		memset(cmd->info.data, 0xff, cmd->info.datalen);
+		cmd->scmd.data_len = 9;
+	} else {
+		if (cmd->info.datalen > SMU_I2C_WRITE_MAX)
+			return -EINVAL;
+		cmd->scmd.data_len = 9 + cmd->info.datalen;
+	}
+
+	DPRINTK("SMU: i2c enqueuing command\n");
+	DPRINTK("SMU:   %s, len=%d bus=%x addr=%x sub0=%x type=%x\n",
+		cmd->read ? "read" : "write", cmd->info.datalen,
+		cmd->info.bus, cmd->info.caddr,
+		cmd->info.subaddr[0], cmd->info.type);
+
+
+	/* Enqueue command in i2c list, and if empty, enqueue also in
+	 * main command list
+	 */
+	spin_lock_irqsave(&smu->lock, flags);
+	if (smu->cmd_i2c_cur == NULL) {
+		smu->cmd_i2c_cur = cmd;
+		list_add_tail(&cmd->scmd.link, &smu->cmd_list);
+		if (smu->cmd_cur == NULL)
+			smu_start_cmd();
+	} else
+		list_add_tail(&cmd->link, &smu->cmd_i2c_list);
+	spin_unlock_irqrestore(&smu->lock, flags);
+
+	return 0;
+}
+
+
+
+/*
+ * Userland driver interface
+ */
+
+
+static LIST_HEAD(smu_clist);
+static DEFINE_SPINLOCK(smu_clist_lock);
+
+enum smu_file_mode {
+	smu_file_commands,
+	smu_file_events,
+	smu_file_closing
+};
+
+struct smu_private
+{
+	struct list_head	list;
+	enum smu_file_mode	mode;
+	int			busy;
+	struct smu_cmd		cmd;
+	spinlock_t		lock;
+	wait_queue_head_t	wait;
+	u8			buffer[SMU_MAX_DATA];
+};
+
+
+static int smu_open(struct inode *inode, struct file *file)
+{
+	struct smu_private *pp;
+	unsigned long flags;
+
+	pp = kmalloc(sizeof(struct smu_private), GFP_KERNEL);
+	if (pp == 0)
+		return -ENOMEM;
+	memset(pp, 0, sizeof(struct smu_private));
+	spin_lock_init(&pp->lock);
+	pp->mode = smu_file_commands;
+	init_waitqueue_head(&pp->wait);
+
+	spin_lock_irqsave(&smu_clist_lock, flags);
+	list_add(&pp->list, &smu_clist);
+	spin_unlock_irqrestore(&smu_clist_lock, flags);
+	file->private_data = pp;
+
+	return 0;
+}
+
+
+static void smu_user_cmd_done(struct smu_cmd *cmd, void *misc)
+{
+	struct smu_private *pp = misc;
+
+	wake_up_all(&pp->wait);
+}
+
+
+static ssize_t smu_write(struct file *file, const char __user *buf,
+			 size_t count, loff_t *ppos)
+{
+	struct smu_private *pp = file->private_data;
+	unsigned long flags;
+	struct smu_user_cmd_hdr hdr;
+	int rc = 0;
+
+	if (pp->busy)
+		return -EBUSY;
+	else if (copy_from_user(&hdr, buf, sizeof(hdr)))
+		return -EFAULT;
+	else if (hdr.cmdtype == SMU_CMDTYPE_WANTS_EVENTS) {
+		pp->mode = smu_file_events;
+		return 0;
+	} else if (hdr.cmdtype != SMU_CMDTYPE_SMU)
+		return -EINVAL;
+	else if (pp->mode != smu_file_commands)
+		return -EBADFD;
+	else if (hdr.data_len > SMU_MAX_DATA)
+		return -EINVAL;
+
+	spin_lock_irqsave(&pp->lock, flags);
+	if (pp->busy) {
+		spin_unlock_irqrestore(&pp->lock, flags);
+		return -EBUSY;
+	}
+	pp->busy = 1;
+	pp->cmd.status = 1;
+	spin_unlock_irqrestore(&pp->lock, flags);
+
+	if (copy_from_user(pp->buffer, buf + sizeof(hdr), hdr.data_len)) {
+		pp->busy = 0;
+		return -EFAULT;
+	}
+
+	pp->cmd.cmd = hdr.cmd;
+	pp->cmd.data_len = hdr.data_len;
+	pp->cmd.reply_len = SMU_MAX_DATA;
+	pp->cmd.data_buf = pp->buffer;
+	pp->cmd.reply_buf = pp->buffer;
+	pp->cmd.done = smu_user_cmd_done;
+	pp->cmd.misc = pp;
+	rc = smu_queue_cmd(&pp->cmd);
+	if (rc < 0)
+		return rc;
+	return count;
+}
+
+
+static ssize_t smu_read_command(struct file *file, struct smu_private *pp,
+				char __user *buf, size_t count)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct smu_user_reply_hdr hdr;
+	unsigned long flags;
+	int size, rc = 0;
+
+	if (!pp->busy)
+		return 0;
+	if (count < sizeof(struct smu_user_reply_hdr))
+		return -EOVERFLOW;
+	spin_lock_irqsave(&pp->lock, flags);
+	if (pp->cmd.status == 1) {
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+		add_wait_queue(&pp->wait, &wait);
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			rc = 0;
+			if (pp->cmd.status != 1)
+				break;
+			rc = -ERESTARTSYS;
+			if (signal_pending(current))
+				break;
+			spin_unlock_irqrestore(&pp->lock, flags);
+			schedule();
+			spin_lock_irqsave(&pp->lock, flags);
+		}
+		set_current_state(TASK_RUNNING);
+		remove_wait_queue(&pp->wait, &wait);
+	}
+	spin_unlock_irqrestore(&pp->lock, flags);
+	if (rc)
+		return rc;
+	if (pp->cmd.status != 0)
+		pp->cmd.reply_len = 0;
+	size = sizeof(hdr) + pp->cmd.reply_len;
+	if (count < size)
+		size = count;
+	rc = size;
+	hdr.status = pp->cmd.status;
+	hdr.reply_len = pp->cmd.reply_len;
+	if (copy_to_user(buf, &hdr, sizeof(hdr)))
+		return -EFAULT;
+	size -= sizeof(hdr);
+	if (size && copy_to_user(buf + sizeof(hdr), pp->buffer, size))
+		return -EFAULT;
+	pp->busy = 0;
+
+	return rc;
+}
+
+
+static ssize_t smu_read_events(struct file *file, struct smu_private *pp,
+			       char __user *buf, size_t count)
+{
+	/* Not implemented */
+	msleep_interruptible(1000);
+	return 0;
+}
+
+
+static ssize_t smu_read(struct file *file, char __user *buf,
+			size_t count, loff_t *ppos)
+{
+	struct smu_private *pp = file->private_data;
+
+	if (pp->mode == smu_file_commands)
+		return smu_read_command(file, pp, buf, count);
+	if (pp->mode == smu_file_events)
+		return smu_read_events(file, pp, buf, count);
+
+	return -EBADFD;
+}
+
+static unsigned int smu_fpoll(struct file *file, poll_table *wait)
+{
+	struct smu_private *pp = file->private_data;
+	unsigned int mask = 0;
+	unsigned long flags;
+
+	if (pp == 0)
+		return 0;
+
+	if (pp->mode == smu_file_commands) {
+		poll_wait(file, &pp->wait, wait);
+
+		spin_lock_irqsave(&pp->lock, flags);
+		if (pp->busy && pp->cmd.status != 1)
+			mask |= POLLIN;
+		spin_unlock_irqrestore(&pp->lock, flags);
+	} if (pp->mode == smu_file_events) {
+		/* Not yet implemented */
+	}
+	return mask;
+}
+
+static int smu_release(struct inode *inode, struct file *file)
+{
+	struct smu_private *pp = file->private_data;
+	unsigned long flags;
+	unsigned int busy;
+
+	if (pp == 0)
+		return 0;
+
+	file->private_data = NULL;
+
+	/* Mark file as closing to avoid races with new request */
+	spin_lock_irqsave(&pp->lock, flags);
+	pp->mode = smu_file_closing;
+	busy = pp->busy;
+
+	/* Wait for any pending request to complete */
+	if (busy && pp->cmd.status == 1) {
+		DECLARE_WAITQUEUE(wait, current);
+
+		add_wait_queue(&pp->wait, &wait);
+		for (;;) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			if (pp->cmd.status != 1)
+				break;
+			spin_lock_irqsave(&pp->lock, flags);
+			schedule();
+			spin_unlock_irqrestore(&pp->lock, flags);
+		}
+		set_current_state(TASK_RUNNING);
+		remove_wait_queue(&pp->wait, &wait);
+	}
+	spin_unlock_irqrestore(&pp->lock, flags);
+
+	spin_lock_irqsave(&smu_clist_lock, flags);
+	list_del(&pp->list);
+	spin_unlock_irqrestore(&smu_clist_lock, flags);
+	kfree(pp);
+
+	return 0;
+}
+
+
+static struct file_operations smu_device_fops __pmacdata = {
+	.llseek		= no_llseek,
+	.read		= smu_read,
+	.write		= smu_write,
+	.poll		= smu_fpoll,
+	.open		= smu_open,
+	.release	= smu_release,
+};
+
+static struct miscdevice pmu_device __pmacdata = {
+	MISC_DYNAMIC_MINOR, "smu", &smu_device_fops
+};
+
+static int smu_device_init(void)
+{
+	if (!smu)
+		return -ENODEV;
+	if (misc_register(&pmu_device) < 0)
+		printk(KERN_ERR "via-pmu: cannot register misc device.\n");
+	return 0;
+}
+device_initcall(smu_device_init);
diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c
index c9ca1118e449..f38696622eb4 100644
--- a/drivers/macintosh/therm_adt746x.c
+++ b/drivers/macintosh/therm_adt746x.c
@@ -599,7 +599,7 @@ thermostat_init(void)
 		sensor_location[2] = "?";
 	}
 
-	of_dev = of_platform_device_create(np, "temperatures");
+	of_dev = of_platform_device_create(np, "temperatures", NULL);
 	
 	if (of_dev == NULL) {
 		printk(KERN_ERR "Can't register temperatures device !\n");
diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c
index 703e31973314..cc507ceef153 100644
--- a/drivers/macintosh/therm_pm72.c
+++ b/drivers/macintosh/therm_pm72.c
@@ -2051,7 +2051,7 @@ static int __init therm_pm72_init(void)
 			    return -ENODEV;
 		}
 	}
-	of_dev = of_platform_device_create(np, "temperature");
+	of_dev = of_platform_device_create(np, "temperature", NULL);
 	if (of_dev == NULL) {
 		printk(KERN_ERR "Can't register FCU platform device !\n");
 		return -ENODEV;
diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c
index cbb72eb0426d..6aaa1df1a64e 100644
--- a/drivers/macintosh/therm_windtunnel.c
+++ b/drivers/macintosh/therm_windtunnel.c
@@ -504,7 +504,7 @@ g4fan_init( void )
 	}
 	if( !(np=of_find_node_by_name(NULL, "fan")) )
 		return -ENODEV;
-	x.of_dev = of_platform_device_create( np, "temperature" );
+	x.of_dev = of_platform_device_create(np, "temperature", NULL);
 	of_node_put( np );
 
 	if( !x.of_dev ) {
diff --git a/include/asm-ppc/macio.h b/include/asm-ppc/macio.h
index a481b772d154..b553dd4b139e 100644
--- a/include/asm-ppc/macio.h
+++ b/include/asm-ppc/macio.h
@@ -1,7 +1,6 @@
 #ifndef __MACIO_ASIC_H__
 #define __MACIO_ASIC_H__
 
-#include <linux/mod_devicetable.h>
 #include <asm/of_device.h>
 
 extern struct bus_type macio_bus_type;
diff --git a/include/asm-ppc/of_device.h b/include/asm-ppc/of_device.h
index 4b264cfd3998..575bce418f80 100644
--- a/include/asm-ppc/of_device.h
+++ b/include/asm-ppc/of_device.h
@@ -2,6 +2,7 @@
 #define __OF_DEVICE_H__
 
 #include <linux/device.h>
+#include <linux/mod_devicetable.h>
 #include <asm/prom.h>
 
 /*
@@ -55,7 +56,9 @@ extern int of_register_driver(struct of_platform_driver *drv);
 extern void of_unregister_driver(struct of_platform_driver *drv);
 extern int of_device_register(struct of_device *ofdev);
 extern void of_device_unregister(struct of_device *ofdev);
-extern struct of_device *of_platform_device_create(struct device_node *np, const char *bus_id);
+extern struct of_device *of_platform_device_create(struct device_node *np,
+						   const char *bus_id,
+						   struct device *parent);
 extern void of_release_dev(struct device *dev);
 
 #endif /* __OF_DEVICE_H__ */
diff --git a/include/asm-ppc64/smu.h b/include/asm-ppc64/smu.h
index 10b4397af9aa..dee8eefe47bc 100644
--- a/include/asm-ppc64/smu.h
+++ b/include/asm-ppc64/smu.h
@@ -1,22 +1,379 @@
+#ifndef _SMU_H
+#define _SMU_H
+
 /*
  * Definitions for talking to the SMU chip in newer G5 PowerMacs
  */
 
 #include <linux/config.h>
+#include <linux/list.h>
+
+/*
+ * Known SMU commands
+ *
+ * Most of what is below comes from looking at the Open Firmware driver,
+ * though this is still incomplete and could use better documentation here
+ * or there...
+ */
+
+
+/*
+ * Partition info commands
+ *
+ * I do not know what those are for at this point
+ */
+#define SMU_CMD_PARTITION_COMMAND		0x3e
+
+
+/*
+ * Fan control
+ *
+ * This is a "mux" for fan control commands, first byte is the
+ * "sub" command.
+ */
+#define SMU_CMD_FAN_COMMAND			0x4a
+
+
+/*
+ * Battery access
+ *
+ * Same command number as the PMU, could it be same syntax ?
+ */
+#define SMU_CMD_BATTERY_COMMAND			0x6f
+#define   SMU_CMD_GET_BATTERY_INFO		0x00
+
+/*
+ * Real time clock control
+ *
+ * This is a "mux", first data byte contains the "sub" command.
+ * The "RTC" part of the SMU controls the date, time, powerup
+ * timer, but also a PRAM
+ *
+ * Dates are in BCD format on 7 bytes:
+ * [sec] [min] [hour] [weekday] [month day] [month] [year]
+ * with month being 1 based and year minus 100
+ */
+#define SMU_CMD_RTC_COMMAND			0x8e
+#define   SMU_CMD_RTC_SET_PWRUP_TIMER		0x00 /* i: 7 bytes date */
+#define   SMU_CMD_RTC_GET_PWRUP_TIMER		0x01 /* o: 7 bytes date */
+#define   SMU_CMD_RTC_STOP_PWRUP_TIMER		0x02
+#define   SMU_CMD_RTC_SET_PRAM_BYTE_ACC		0x20 /* i: 1 byte (address?) */
+#define   SMU_CMD_RTC_SET_PRAM_AUTOINC		0x21 /* i: 1 byte (data?) */
+#define   SMU_CMD_RTC_SET_PRAM_LO_BYTES 	0x22 /* i: 10 bytes */
+#define   SMU_CMD_RTC_SET_PRAM_HI_BYTES 	0x23 /* i: 10 bytes */
+#define   SMU_CMD_RTC_GET_PRAM_BYTE		0x28 /* i: 1 bytes (address?) */
+#define   SMU_CMD_RTC_GET_PRAM_LO_BYTES 	0x29 /* o: 10 bytes */
+#define   SMU_CMD_RTC_GET_PRAM_HI_BYTES 	0x2a /* o: 10 bytes */
+#define	  SMU_CMD_RTC_SET_DATETIME		0x80 /* i: 7 bytes date */
+#define   SMU_CMD_RTC_GET_DATETIME		0x81 /* o: 7 bytes date */
+
+ /*
+  * i2c commands
+  *
+  * To issue an i2c command, first is to send a parameter block to the
+  * the SMU. This is a command of type 0x9a with 9 bytes of header
+  * eventually followed by data for a write:
+  *
+  * 0: bus number (from device-tree usually, SMU has lots of busses !)
+  * 1: transfer type/format (see below)
+  * 2: device address. For combined and combined4 type transfers, this
+  *    is the "write" version of the address (bit 0x01 cleared)
+  * 3: subaddress length (0..3)
+  * 4: subaddress byte 0 (or only byte for subaddress length 1)
+  * 5: subaddress byte 1
+  * 6: subaddress byte 2
+  * 7: combined address (device address for combined mode data phase)
+  * 8: data length
+  *
+  * The transfer types are the same good old Apple ones it seems,
+  * that is:
+  *   - 0x00: Simple transfer
+  *   - 0x01: Subaddress transfer (addr write + data tx, no restart)
+  *   - 0x02: Combined transfer (addr write + restart + data tx)
+  *
+  * This is then followed by actual data for a write.
+  *
+  * At this point, the OF driver seems to have a limitation on transfer
+  * sizes of 0xd bytes on reads and 0x5 bytes on writes. I do not know
+  * wether this is just an OF limit due to some temporary buffer size
+  * or if this is an SMU imposed limit. This driver has the same limitation
+  * for now as I use a 0x10 bytes temporary buffer as well
+  *
+  * Once that is completed, a response is expected from the SMU. This is
+  * obtained via a command of type 0x9a with a length of 1 byte containing
+  * 0 as the data byte. OF also fills the rest of the data buffer with 0xff's
+  * though I can't tell yet if this is actually necessary. Once this command
+  * is complete, at this point, all I can tell is what OF does. OF tests
+  * byte 0 of the reply:
+  *   - on read, 0xfe or 0xfc : bus is busy, wait (see below) or nak ?
+  *   - on read, 0x00 or 0x01 : reply is in buffer (after the byte 0)
+  *   - on write, < 0 -> failure (immediate exit)
+  *   - else, OF just exists (without error, weird)
+  *
+  * So on read, there is this wait-for-busy thing when getting a 0xfc or
+  * 0xfe result. OF does a loop of up to 64 retries, waiting 20ms and
+  * doing the above again until either the retries expire or the result
+  * is no longer 0xfe or 0xfc
+  *
+  * The Darwin I2C driver is less subtle though. On any non-success status
+  * from the response command, it waits 5ms and tries again up to 20 times,
+  * it doesn't differenciate between fatal errors or "busy" status.
+  *
+  * This driver provides an asynchronous paramblock based i2c command
+  * interface to be used either directly by low level code or by a higher
+  * level driver interfacing to the linux i2c layer. The current
+  * implementation of this relies on working timers & timer interrupts
+  * though, so be careful of calling context for now. This may be "fixed"
+  * in the future by adding a polling facility.
+  */
+#define SMU_CMD_I2C_COMMAND			0x9a
+          /* transfer types */
+#define   SMU_I2C_TRANSFER_SIMPLE	0x00
+#define   SMU_I2C_TRANSFER_STDSUB	0x01
+#define   SMU_I2C_TRANSFER_COMBINED	0x02
+
+/*
+ * Power supply control
+ *
+ * The "sub" command is an ASCII string in the data, the
+ * data lenght is that of the string.
+ *
+ * The VSLEW command can be used to get or set the voltage slewing.
+ *  - lenght 5 (only "VSLEW") : it returns "DONE" and 3 bytes of
+ *    reply at data offset 6, 7 and 8.
+ *  - lenght 8 ("VSLEWxyz") has 3 additional bytes appended, and is
+ *    used to set the voltage slewing point. The SMU replies with "DONE"
+ * I yet have to figure out their exact meaning of those 3 bytes in
+ * both cases.
+ *
+ */
+#define SMU_CMD_POWER_COMMAND			0xaa
+#define   SMU_CMD_POWER_RESTART		       	"RESTART"
+#define   SMU_CMD_POWER_SHUTDOWN		"SHUTDOWN"
+#define   SMU_CMD_POWER_VOLTAGE_SLEW		"VSLEW"
+
+/* Misc commands
+ *
+ * This command seem to be a grab bag of various things
+ */
+#define SMU_CMD_MISC_df_COMMAND			0xdf
+#define   SMU_CMD_MISC_df_SET_DISPLAY_LIT	0x02 /* i: 1 byte */
+#define   SMU_CMD_MISC_df_NMI_OPTION		0x04
+
+/*
+ * Version info commands
+ *
+ * I haven't quite tried to figure out how these work
+ */
+#define SMU_CMD_VERSION_COMMAND			0xea
+
+
+/*
+ * Misc commands
+ *
+ * This command seem to be a grab bag of various things
+ */
+#define SMU_CMD_MISC_ee_COMMAND			0xee
+#define   SMU_CMD_MISC_ee_GET_DATABLOCK_REC	0x02
+#define	  SMU_CMD_MISC_ee_LEDS_CTRL		0x04 /* i: 00 (00,01) [00] */
+#define   SMU_CMD_MISC_ee_GET_DATA		0x05 /* i: 00 , o: ?? */
+
+
+
+/*
+ * - Kernel side interface -
+ */
+
+#ifdef __KERNEL__
+
+/*
+ * Asynchronous SMU commands
+ *
+ * Fill up this structure and submit it via smu_queue_command(),
+ * and get notified by the optional done() callback, or because
+ * status becomes != 1
+ */
+
+struct smu_cmd;
+
+struct smu_cmd
+{
+	/* public */
+	u8			cmd;		/* command */
+	int			data_len;	/* data len */
+	int			reply_len;	/* reply len */
+	void			*data_buf;	/* data buffer */
+	void			*reply_buf;	/* reply buffer */
+	int			status;		/* command status */
+	void			(*done)(struct smu_cmd *cmd, void *misc);
+	void			*misc;
+
+	/* private */
+	struct list_head	link;
+};
+
+/*
+ * Queues an SMU command, all fields have to be initialized
+ */
+extern int smu_queue_cmd(struct smu_cmd *cmd);
+
+/*
+ * Simple command wrapper. This structure embeds a small buffer
+ * to ease sending simple SMU commands from the stack
+ */
+struct smu_simple_cmd
+{
+	struct smu_cmd	cmd;
+	u8	       	buffer[16];
+};
+
+/*
+ * Queues a simple command. All fields will be initialized by that
+ * function
+ */
+extern int smu_queue_simple(struct smu_simple_cmd *scmd, u8 command,
+			    unsigned int data_len,
+			    void (*done)(struct smu_cmd *cmd, void *misc),
+			    void *misc,
+			    ...);
+
+/*
+ * Completion helper. Pass it to smu_queue_simple or as 'done'
+ * member to smu_queue_cmd, it will call complete() on the struct
+ * completion passed in the "misc" argument
+ */
+extern void smu_done_complete(struct smu_cmd *cmd, void *misc);
 
 /*
- * Basic routines for use by architecture. To be extended as
- * we understand more of the chip
+ * Synchronous helpers. Will spin-wait for completion of a command
+ */
+extern void smu_spinwait_cmd(struct smu_cmd *cmd);
+
+static inline void smu_spinwait_simple(struct smu_simple_cmd *scmd)
+{
+	smu_spinwait_cmd(&scmd->cmd);
+}
+
+/*
+ * Poll routine to call if blocked with irqs off
+ */
+extern void smu_poll(void);
+
+
+/*
+ * Init routine, presence check....
  */
 extern int smu_init(void);
 extern int smu_present(void);
+struct of_device;
+extern struct of_device *smu_get_ofdev(void);
+
+
+/*
+ * Common command wrappers
+ */
 extern void smu_shutdown(void);
 extern void smu_restart(void);
-extern int smu_get_rtc_time(struct rtc_time *time);
-extern int smu_set_rtc_time(struct rtc_time *time);
+struct rtc_time;
+extern int smu_get_rtc_time(struct rtc_time *time, int spinwait);
+extern int smu_set_rtc_time(struct rtc_time *time, int spinwait);
 
 /*
  * SMU command buffer absolute address, exported by pmac_setup,
  * this is allocated very early during boot.
  */
 extern unsigned long smu_cmdbuf_abs;
+
+
+/*
+ * Kenrel asynchronous i2c interface
+ */
+
+/* SMU i2c header, exactly matches i2c header on wire */
+struct smu_i2c_param
+{
+	u8	bus;		/* SMU bus ID (from device tree) */
+	u8	type;		/* i2c transfer type */
+	u8	devaddr;	/* device address (includes direction) */
+	u8	sublen;		/* subaddress length */
+	u8	subaddr[3];	/* subaddress */
+	u8	caddr;		/* combined address, filled by SMU driver */
+	u8	datalen;	/* length of transfer */
+	u8	data[7];	/* data */
+};
+
+#define SMU_I2C_READ_MAX	0x0d
+#define SMU_I2C_WRITE_MAX	0x05
+
+struct smu_i2c_cmd
+{
+	/* public */
+	struct smu_i2c_param	info;
+	void			(*done)(struct smu_i2c_cmd *cmd, void *misc);
+	void			*misc;
+	int			status; /* 1 = pending, 0 = ok, <0 = fail */
+
+	/* private */
+	struct smu_cmd		scmd;
+	int			read;
+	int			stage;
+	int			retries;
+	u8			pdata[0x10];
+	struct list_head	link;
+};
+
+/*
+ * Call this to queue an i2c command to the SMU. You must fill info,
+ * including info.data for a write, done and misc.
+ * For now, no polling interface is provided so you have to use completion
+ * callback.
+ */
+extern int smu_queue_i2c(struct smu_i2c_cmd *cmd);
+
+
+#endif /* __KERNEL__ */
+
+/*
+ * - Userland interface -
+ */
+
+/*
+ * A given instance of the device can be configured for 2 different
+ * things at the moment:
+ *
+ *  - sending SMU commands (default at open() time)
+ *  - receiving SMU events (not yet implemented)
+ *
+ * Commands are written with write() of a command block. They can be
+ * "driver" commands (for example to switch to event reception mode)
+ * or real SMU commands. They are made of a header followed by command
+ * data if any.
+ *
+ * For SMU commands (not for driver commands), you can then read() back
+ * a reply. The reader will be blocked or not depending on how the device
+ * file is opened. poll() isn't implemented yet. The reply will consist
+ * of a header as well, followed by the reply data if any. You should
+ * always provide a buffer large enough for the maximum reply data, I
+ * recommand one page.
+ *
+ * It is illegal to send SMU commands through a file descriptor configured
+ * for events reception
+ *
+ */
+struct smu_user_cmd_hdr
+{
+	__u32		cmdtype;
+#define SMU_CMDTYPE_SMU			0	/* SMU command */
+#define SMU_CMDTYPE_WANTS_EVENTS	1	/* switch fd to events mode */
+
+	__u8		cmd;			/* SMU command byte */
+	__u32		data_len;		/* Lenght of data following */
+};
+
+struct smu_user_reply_hdr
+{
+	__u32		status;			/* Command status */
+	__u32		reply_len;		/* Lenght of data follwing */
+};
+
+#endif /*  _SMU_H */
-- 
cgit v1.2.3


From 69e1e688f5698287b45fbff22a01de91b20804cd Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Thu, 22 Sep 2005 21:44:15 -0700
Subject: [PATCH] uml: don't redundantly mark pte as newpage in pte_modify

pte_modify marks a page as needing flush, which is redundant because the
resulting PTE is still set with set_pte, which already handles that.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-um/pgtable.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-um/pgtable.h b/include/asm-um/pgtable.h
index ed06170e0edd..616d02b57ea9 100644
--- a/include/asm-um/pgtable.h
+++ b/include/asm-um/pgtable.h
@@ -346,7 +346,6 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval)
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
 	pte_set_val(pte, (pte_val(pte) & _PAGE_CHG_MASK), newprot);
-	if(pte_present(pte)) pte = pte_mknewpage(pte_mknewprot(pte));
 	return pte; 
 }
 
-- 
cgit v1.2.3


From 288a60cf4d7cc35f84f46cd8ffd0b34f9d8e7346 Mon Sep 17 00:00:00 2001
From: Chris Zankel <czankel@tensilica.com>
Date: Thu, 22 Sep 2005 21:44:23 -0700
Subject: [PATCH] xtensa: remove io_remap_page_range and minor clean-ups

Remove io_remap_page_range() from all of Linux 2.6.x (as requested and
suggested by Randy Dunlap) and minor clean-ups.

Signed-off-by: Chris Zankel <chris@zankel.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/xtensa/kernel/pci.c       |  4 ++--
 arch/xtensa/kernel/platform.c  |  2 +-
 arch/xtensa/kernel/process.c   |  2 +-
 arch/xtensa/kernel/setup.c     |  2 +-
 arch/xtensa/kernel/signal.c    |  2 +-
 arch/xtensa/kernel/time.c      |  2 +-
 arch/xtensa/mm/init.c          |  2 +-
 include/asm-xtensa/atomic.h    |  2 +-
 include/asm-xtensa/bitops.h    |  2 +-
 include/asm-xtensa/hardirq.h   |  1 +
 include/asm-xtensa/semaphore.h | 49 ++++++++++--------------------------------
 include/asm-xtensa/system.h    | 16 --------------
 12 files changed, 22 insertions(+), 64 deletions(-)

(limited to 'include')

diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c
index 09887c96e9a1..de19501aa809 100644
--- a/arch/xtensa/kernel/pci.c
+++ b/arch/xtensa/kernel/pci.c
@@ -402,8 +402,8 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 	__pci_mmap_set_flags(dev, vma, mmap_state);
 	__pci_mmap_set_pgprot(dev, vma, mmap_state, write_combine);
 
-	ret = io_remap_page_range(vma, vma->vm_start, vma->vm_pgoff<<PAGE_SHIFT,
-			       vma->vm_end - vma->vm_start, vma->vm_page_prot);
+	ret = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			         vma->vm_end - vma->vm_start,vma->vm_page_prot);
 
 	return ret;
 }
diff --git a/arch/xtensa/kernel/platform.c b/arch/xtensa/kernel/platform.c
index cf1362784443..03674daabc66 100644
--- a/arch/xtensa/kernel/platform.c
+++ b/arch/xtensa/kernel/platform.c
@@ -39,7 +39,7 @@ _F(int,  pcibios_fixup, (void), { return 0; });
 _F(int, get_rtc_time, (time_t* t), { return 0; });
 _F(int, set_rtc_time, (time_t t), { return 0; });
 
-#if CONFIG_XTENSA_CALIBRATE_CCOUNT
+#ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT
 _F(void, calibrate_ccount, (void),
 {
   printk ("ERROR: Cannot calibrate cpu frequency! Assuming 100MHz.\n");
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index c83bb0d41787..08ef6d82ee51 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -457,7 +457,7 @@ int
 dump_task_fpu(struct pt_regs *regs, struct task_struct *task, elf_fpregset_t *r)
 {
 /* see asm/coprocessor.h for this magic number 16 */
-#if TOTAL_CPEXTRA_SIZE > 16
+#if XTENSA_CP_EXTRA_SIZE > 16
 	do_save_fpregs (r, regs, task);
 
 	/*  For now, bit 16 means some extra state may be present:  */
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 1f5bf5d624e4..513ed8d67766 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -304,7 +304,7 @@ void __init setup_arch(char **cmdline_p)
 # endif
 #endif
 
-#if CONFIG_PCI
+#ifdef CONFIG_PCI
 	platform_pcibios_init();
 #endif
 }
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index dc42cede9394..e252b61e45a5 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -182,7 +182,7 @@ restore_cpextra (struct _cpstate *buf)
 
 	struct task_struct *tsk = current;
 	release_all_cp(tsk);
-	return __copy_from_user(tsk->thread.cpextra, buf, TOTAL_CPEXTRA_SIZE);
+	return __copy_from_user(tsk->thread.cpextra, buf, XTENSA_CP_EXTRA_SIZE);
 #endif
 	return 0;
 }
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index 1ac7d5ce7456..8e423d1335ce 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -68,7 +68,7 @@ void __init time_init(void)
 	 * speed for the CALIBRATE.
 	 */
 
-#if CONFIG_XTENSA_CALIBRATE_CCOUNT
+#ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT
 	printk("Calibrating CPU frequency ");
 	platform_calibrate_ccount();
 	printk("%d.%02d MHz\n", (int)ccount_per_jiffy/(1000000/HZ),
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index 56aace84aaeb..5a91d6c9e66d 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -239,7 +239,7 @@ void __init mem_init(void)
 	high_memory = (void *) __va(max_mapnr << PAGE_SHIFT);
 	highmemsize = 0;
 
-#if CONFIG_HIGHMEM
+#ifdef CONFIG_HIGHMEM
 #error HIGHGMEM not implemented in init.c
 #endif
 
diff --git a/include/asm-xtensa/atomic.h b/include/asm-xtensa/atomic.h
index 24f86f0e43cf..12b5732dc6e5 100644
--- a/include/asm-xtensa/atomic.h
+++ b/include/asm-xtensa/atomic.h
@@ -22,7 +22,7 @@ typedef struct { volatile int counter; } atomic_t;
 #include <asm/processor.h>
 #include <asm/system.h>
 
-#define ATOMIC_INIT(i)	( (atomic_t) { (i) } )
+#define ATOMIC_INIT(i)	{ (i) }
 
 /*
  * This Xtensa implementation assumes that the right mechanism
diff --git a/include/asm-xtensa/bitops.h b/include/asm-xtensa/bitops.h
index d395ef226c32..e76ee889e21d 100644
--- a/include/asm-xtensa/bitops.h
+++ b/include/asm-xtensa/bitops.h
@@ -174,7 +174,7 @@ static __inline__ int test_bit(int nr, const volatile void *addr)
 	return 1UL & (((const volatile unsigned int *)addr)[nr>>5] >> (nr&31));
 }
 
-#if XCHAL_HAVE_NSAU
+#if XCHAL_HAVE_NSA
 
 static __inline__ int __cntlz (unsigned long x)
 {
diff --git a/include/asm-xtensa/hardirq.h b/include/asm-xtensa/hardirq.h
index e07c76c36b95..aa9c1adf68d7 100644
--- a/include/asm-xtensa/hardirq.h
+++ b/include/asm-xtensa/hardirq.h
@@ -23,6 +23,7 @@ typedef struct {
 	unsigned int __nmi_count;	       /* arch dependent */
 } ____cacheline_aligned irq_cpustat_t;
 
+void ack_bad_irq(unsigned int irq);
 #include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
 
 #endif	/* _XTENSA_HARDIRQ_H */
diff --git a/include/asm-xtensa/semaphore.h b/include/asm-xtensa/semaphore.h
index db740b8bc6f0..09e89ab3eb61 100644
--- a/include/asm-xtensa/semaphore.h
+++ b/include/asm-xtensa/semaphore.h
@@ -20,28 +20,19 @@ struct semaphore {
 	atomic_t count;
 	int sleepers;
 	wait_queue_head_t wait;
-#if WAITQUEUE_DEBUG
-	long __magic;
-#endif
 };
 
-#if WAITQUEUE_DEBUG
-# define __SEM_DEBUG_INIT(name) \
-		, (int)&(name).__magic
-#else
-# define __SEM_DEBUG_INIT(name)
-#endif
-
-#define __SEMAPHORE_INITIALIZER(name,count)			\
-	{ ATOMIC_INIT(count), 					\
-	  0,							\
-	  __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)		\
-	__SEM_DEBUG_INIT(name) }
+#define __SEMAPHORE_INITIALIZER(name,n)					\
+{									\
+	.count		= ATOMIC_INIT(n),				\
+	.sleepers	= 0,						\
+	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
+}
 
-#define __MUTEX_INITIALIZER(name) \
+#define __MUTEX_INITIALIZER(name) 					\
 	__SEMAPHORE_INITIALIZER(name, 1)
 
-#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
+#define __DECLARE_SEMAPHORE_GENERIC(name,count) 			\
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
 
 #define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
@@ -49,17 +40,8 @@ struct semaphore {
 
 static inline void sema_init (struct semaphore *sem, int val)
 {
-/*
- *	*sem = (struct semaphore)__SEMAPHORE_INITIALIZER((*sem),val);
- *
- * i'd rather use the more flexible initialization above, but sadly
- * GCC 2.7.2.3 emits a bogus warning. EGCS doesnt. Oh well.
- */
 	atomic_set(&sem->count, val);
 	init_waitqueue_head(&sem->wait);
-#if WAITQUEUE_DEBUG
-	sem->__magic = (int)&sem->__magic;
-#endif
 }
 
 static inline void init_MUTEX (struct semaphore *sem)
@@ -81,9 +63,7 @@ extern spinlock_t semaphore_wake_lock;
 
 static inline void down(struct semaphore * sem)
 {
-#if WAITQUEUE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
+	might_sleep();
 
 	if (atomic_sub_return(1, &sem->count) < 0)
 		__down(sem);
@@ -92,9 +72,8 @@ static inline void down(struct semaphore * sem)
 static inline int down_interruptible(struct semaphore * sem)
 {
 	int ret = 0;
-#if WAITQUEUE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
+
+	might_sleep();
 
 	if (atomic_sub_return(1, &sem->count) < 0)
 		ret = __down_interruptible(sem);
@@ -104,9 +83,6 @@ static inline int down_interruptible(struct semaphore * sem)
 static inline int down_trylock(struct semaphore * sem)
 {
 	int ret = 0;
-#if WAITQUEUE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
 
 	if (atomic_sub_return(1, &sem->count) < 0)
 		ret = __down_trylock(sem);
@@ -119,9 +95,6 @@ static inline int down_trylock(struct semaphore * sem)
  */
 static inline void up(struct semaphore * sem)
 {
-#if WAITQUEUE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
 	if (atomic_add_return(1, &sem->count) <= 0)
 		__up(sem);
 }
diff --git a/include/asm-xtensa/system.h b/include/asm-xtensa/system.h
index f09393232e5e..9284867f1cb9 100644
--- a/include/asm-xtensa/system.h
+++ b/include/asm-xtensa/system.h
@@ -189,20 +189,6 @@ static inline unsigned long xchg_u32(volatile int * m, unsigned long val)
 
 #define tas(ptr) (xchg((ptr),1))
 
-#if ( __XCC__ == 1 )
-
-/* xt-xcc processes __inline__ differently than xt-gcc and decides to
- * insert an out-of-line copy of function __xchg.  This presents the
- * unresolved symbol at link time of __xchg_called_with_bad_pointer,
- * even though such a function would never be called at run-time.
- * xt-gcc always inlines __xchg, and optimizes away the undefined
- * bad_pointer function.
- */
-
-#define xchg(ptr,x) xchg_u32(ptr,x)
-
-#else  /* assume xt-gcc */
-
 #define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
 
 /*
@@ -224,8 +210,6 @@ __xchg(unsigned long x, volatile void * ptr, int size)
 	return x;
 }
 
-#endif
-
 extern void set_except_vector(int n, void *addr);
 
 static inline void spill_registers(void)
-- 
cgit v1.2.3


From 67497205b12e3cb408259cc09b50c3a9d12cd935 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 22 Sep 2005 23:45:24 -0700
Subject: [NETFILTER] Fix sparse endian warnings in pptp helper

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ip_conntrack_pptp.h  | 118 +++++++++++-----------
 include/linux/netfilter_ipv4/ip_conntrack_tuple.h |   6 +-
 net/ipv4/netfilter/ip_conntrack_helper_pptp.c     |  14 +--
 3 files changed, 70 insertions(+), 68 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
index 389e3851d52f..50a761db5a04 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
@@ -60,8 +60,8 @@ struct ip_ct_pptp_expect {
 
 struct pptp_pkt_hdr {
 	__u16	packetLength;
-	__u16	packetType;
-	__u32	magicCookie;
+	__be16	packetType;
+	__be32	magicCookie;
 };
 
 /* PptpControlMessageType values */
@@ -93,7 +93,7 @@ struct pptp_pkt_hdr {
 #define PPTP_REMOVE_DEVICE_ERROR	6
 
 struct PptpControlHeader {
-	__u16	messageType;
+	__be16	messageType;
 	__u16	reserved;
 };
 
@@ -106,13 +106,13 @@ struct PptpControlHeader {
 #define PPTP_BEARER_CAP_DIGITAL		0x2
 
 struct PptpStartSessionRequest {
-	__u16	protocolVersion;
+	__be16	protocolVersion;
 	__u8	reserved1;
 	__u8	reserved2;
-	__u32	framingCapability;
-	__u32	bearerCapability;
-	__u16	maxChannels;
-	__u16	firmwareRevision;
+	__be32	framingCapability;
+	__be32	bearerCapability;
+	__be16	maxChannels;
+	__be16	firmwareRevision;
 	__u8	hostName[64];
 	__u8	vendorString[64];
 };
@@ -125,13 +125,13 @@ struct PptpStartSessionRequest {
 #define PPTP_START_UNKNOWN_PROTOCOL	5
 
 struct PptpStartSessionReply {
-	__u16	protocolVersion;
+	__be16	protocolVersion;
 	__u8	resultCode;
 	__u8	generalErrorCode;
-	__u32	framingCapability;
-	__u32	bearerCapability;
-	__u16	maxChannels;
-	__u16	firmwareRevision;
+	__be32	framingCapability;
+	__be32	bearerCapability;
+	__be16	maxChannels;
+	__be16	firmwareRevision;
 	__u8	hostName[64];
 	__u8	vendorString[64];
 };
@@ -155,7 +155,7 @@ struct PptpStopSessionReply {
 };
 
 struct PptpEchoRequest {
-	__u32 identNumber;
+	__be32 identNumber;
 };
 
 /* PptpEchoReplyResultCode */
@@ -163,7 +163,7 @@ struct PptpEchoRequest {
 #define PPTP_ECHO_GENERAL_ERROR		2
 
 struct PptpEchoReply {
-	__u32	identNumber;
+	__be32	identNumber;
 	__u8	resultCode;
 	__u8	generalErrorCode;
 	__u16	reserved;
@@ -180,16 +180,16 @@ struct PptpEchoReply {
 #define PPTP_DONT_CARE_BEARER_TYPE	3
 
 struct PptpOutCallRequest {
-	__u16	callID;
-	__u16	callSerialNumber;
-	__u32	minBPS;
-	__u32	maxBPS;
-	__u32	bearerType;
-	__u32	framingType;
-	__u16	packetWindow;
-	__u16	packetProcDelay;
+	__be16	callID;
+	__be16	callSerialNumber;
+	__be32	minBPS;
+	__be32	maxBPS;
+	__be32	bearerType;
+	__be32	framingType;
+	__be16	packetWindow;
+	__be16	packetProcDelay;
 	__u16	reserved1;
-	__u16	phoneNumberLength;
+	__be16	phoneNumberLength;
 	__u16	reserved2;
 	__u8	phoneNumber[64];
 	__u8	subAddress[64];
@@ -205,24 +205,24 @@ struct PptpOutCallRequest {
 #define PPTP_OUTCALL_DONT_ACCEPT	7
 
 struct PptpOutCallReply {
-	__u16	callID;
-	__u16	peersCallID;
+	__be16	callID;
+	__be16	peersCallID;
 	__u8	resultCode;
 	__u8	generalErrorCode;
-	__u16	causeCode;
-	__u32	connectSpeed;
-	__u16	packetWindow;
-	__u16	packetProcDelay;
-	__u32	physChannelID;
+	__be16	causeCode;
+	__be32	connectSpeed;
+	__be16	packetWindow;
+	__be16	packetProcDelay;
+	__be32	physChannelID;
 };
 
 struct PptpInCallRequest {
-	__u16	callID;
-	__u16	callSerialNumber;
-	__u32	callBearerType;
-	__u32	physChannelID;
-	__u16	dialedNumberLength;
-	__u16	dialingNumberLength;
+	__be16	callID;
+	__be16	callSerialNumber;
+	__be32	callBearerType;
+	__be32	physChannelID;
+	__be16	dialedNumberLength;
+	__be16	dialingNumberLength;
 	__u8	dialedNumber[64];
 	__u8	dialingNumber[64];
 	__u8	subAddress[64];
@@ -234,54 +234,54 @@ struct PptpInCallRequest {
 #define PPTP_INCALL_DONT_ACCEPT		3
 
 struct PptpInCallReply {
-	__u16	callID;
-	__u16	peersCallID;
+	__be16	callID;
+	__be16	peersCallID;
 	__u8	resultCode;
 	__u8	generalErrorCode;
-	__u16	packetWindow;
-	__u16	packetProcDelay;
+	__be16	packetWindow;
+	__be16	packetProcDelay;
 	__u16	reserved;
 };
 
 struct PptpInCallConnected {
-	__u16	peersCallID;
+	__be16	peersCallID;
 	__u16	reserved;
-	__u32	connectSpeed;
-	__u16	packetWindow;
-	__u16	packetProcDelay;
-	__u32	callFramingType;
+	__be32	connectSpeed;
+	__be16	packetWindow;
+	__be16	packetProcDelay;
+	__be32	callFramingType;
 };
 
 struct PptpClearCallRequest {
-	__u16	callID;
+	__be16	callID;
 	__u16	reserved;
 };
 
 struct PptpCallDisconnectNotify {
-	__u16	callID;
+	__be16	callID;
 	__u8	resultCode;
 	__u8	generalErrorCode;
-	__u16	causeCode;
+	__be16	causeCode;
 	__u16	reserved;
 	__u8	callStatistics[128];
 };
 
 struct PptpWanErrorNotify {
-	__u16	peersCallID;
+	__be16	peersCallID;
 	__u16	reserved;
-	__u32	crcErrors;
-	__u32	framingErrors;
-	__u32	hardwareOverRuns;
-	__u32	bufferOverRuns;
-	__u32	timeoutErrors;
-	__u32	alignmentErrors;
+	__be32	crcErrors;
+	__be32	framingErrors;
+	__be32	hardwareOverRuns;
+	__be32	bufferOverRuns;
+	__be32	timeoutErrors;
+	__be32	alignmentErrors;
 };
 
 struct PptpSetLinkInfo {
-	__u16	peersCallID;
+	__be16	peersCallID;
 	__u16	reserved;
-	__u32	sendAccm;
-	__u32	recvAccm;
+	__be32	sendAccm;
+	__be32	recvAccm;
 };
 
 
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
index 14dc0f7b6556..20e43f018b7c 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
@@ -17,7 +17,7 @@ union ip_conntrack_manip_proto
 	u_int16_t all;
 
 	struct {
-		u_int16_t port;
+		__be16 port;
 	} tcp;
 	struct {
 		u_int16_t port;
@@ -29,7 +29,7 @@ union ip_conntrack_manip_proto
 		u_int16_t port;
 	} sctp;
 	struct {
-		u_int16_t key;	/* key is 32bit, pptp only uses 16 */
+		__be16 key;	/* key is 32bit, pptp only uses 16 */
 	} gre;
 };
 
@@ -65,7 +65,7 @@ struct ip_conntrack_tuple
 				u_int16_t port;
 			} sctp;
 			struct {
-				u_int16_t key;	/* key is 32bit, 
+				__be16 key;	/* key is 32bit, 
 						 * pptp only uses 16 */
 			} gre;
 		} u;
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 117587978700..8236ee0fb090 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -223,8 +223,8 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct)
 static inline int
 exp_gre(struct ip_conntrack *master,
 	u_int32_t seq,
-	u_int16_t callid,
-	u_int16_t peer_callid)
+	__be16 callid,
+	__be16 peer_callid)
 {
 	struct ip_conntrack_tuple inv_tuple;
 	struct ip_conntrack_tuple exp_tuples[] = {
@@ -263,7 +263,7 @@ exp_gre(struct ip_conntrack *master,
 	exp_orig->mask.src.ip = 0xffffffff;
 	exp_orig->mask.src.u.all = 0;
 	exp_orig->mask.dst.u.all = 0;
-	exp_orig->mask.dst.u.gre.key = 0xffff;
+	exp_orig->mask.dst.u.gre.key = htons(0xffff);
 	exp_orig->mask.dst.ip = 0xffffffff;
 	exp_orig->mask.dst.protonum = 0xff;
 		
@@ -340,7 +340,8 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 	unsigned int reqlen;
 	union pptp_ctrl_union _pptpReq, *pptpReq;
 	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
-	u_int16_t msg, *cid, *pcid;
+	u_int16_t msg;
+	__be16 *cid, *pcid;
 	u_int32_t seq;	
 
 	ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
@@ -551,7 +552,8 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 	unsigned int reqlen;
 	union pptp_ctrl_union _pptpReq, *pptpReq;
 	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
-	u_int16_t msg, *cid, *pcid;
+	u_int16_t msg;
+	__be16 *cid, *pcid;
 
 	ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
 	if (!ctlh)
@@ -755,7 +757,7 @@ static struct ip_conntrack_helper pptp = {
 			  } 
 		 },
 	.mask = { .src = { .ip = 0, 
-			   .u = { .tcp = { .port = 0xffff } } 
+			   .u = { .tcp = { .port = __constant_htons(0xffff) } } 
 			 }, 
 		  .dst = { .ip = 0, 
 			   .u = { .all = 0 },
-- 
cgit v1.2.3


From a82b748930fce0dab22c64075c38c830ae116904 Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@netfilter.org>
Date: Thu, 22 Sep 2005 23:45:44 -0700
Subject: [NETFILTER] remove unneeded structure definition from conntrack
 helper

Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ip_conntrack_pptp.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
index 50a761db5a04..816144c75de0 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
@@ -284,13 +284,6 @@ struct PptpSetLinkInfo {
 	__be32	recvAccm;
 };
 
-
-struct pptp_priv_data {
-	__u16	call_id;
-	__u16	mcall_id;
-	__u16	pcall_id;
-};
-
 union pptp_ctrl_union {
 		struct PptpStartSessionRequest	sreq;
 		struct PptpStartSessionReply	srep;
-- 
cgit v1.2.3


From 1dfbab59498d6f227c91988bab6c71af049a5333 Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@netfilter.org>
Date: Thu, 22 Sep 2005 23:46:57 -0700
Subject: [NETFILTER] Fix conntrack event cache deadlock/oops

This patch fixes a number of bugs.  It cannot be reasonably split up in
multiple fixes, since all bugs interact with each other and affect the same
function:

Bug #1:
The event cache code cannot be called while a lock is held.  Therefore, the
call to ip_conntrack_event_cache() within ip_ct_refresh_acct() needs to be
moved outside of the locked section.  This fixes a number of 2.6.14-rcX
oops and deadlock reports.

Bug #2:
We used to call ct_add_counters() for unconfirmed connections without
holding a lock.  Since the add operations are not atomic, we could race
with another CPU.

Bug #3:
ip_ct_refresh_acct() lost REFRESH events in some cases where refresh
(and the corresponding event) are desired, but no accounting shall be
performed.  Both, evenst and accounting implicitly depended on the skb
parameter bein non-null.   We now re-introduce a non-accounting
"ip_ct_refresh()" variant to explicitly state the desired behaviour.

Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ip_conntrack.h   | 25 +++++++++++---
 net/ipv4/netfilter/ip_conntrack_amanda.c      |  2 +-
 net/ipv4/netfilter/ip_conntrack_core.c        | 49 ++++++++++++++-------------
 net/ipv4/netfilter/ip_conntrack_helper_pptp.c |  1 -
 net/ipv4/netfilter/ip_conntrack_netbios_ns.c  |  2 +-
 net/ipv4/netfilter/ip_conntrack_standalone.c  |  2 +-
 6 files changed, 49 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h
index bace72a76cc4..4ced38736813 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack.h
@@ -332,11 +332,28 @@ extern void need_ip_conntrack(void);
 extern int invert_tuplepr(struct ip_conntrack_tuple *inverse,
 			  const struct ip_conntrack_tuple *orig);
 
+extern void __ip_ct_refresh_acct(struct ip_conntrack *ct,
+			         enum ip_conntrack_info ctinfo,
+			         const struct sk_buff *skb,
+			         unsigned long extra_jiffies,
+				 int do_acct);
+
+/* Refresh conntrack for this many jiffies and do accounting */
+static inline void ip_ct_refresh_acct(struct ip_conntrack *ct, 
+				      enum ip_conntrack_info ctinfo,
+				      const struct sk_buff *skb,
+				      unsigned long extra_jiffies)
+{
+	__ip_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, 1);
+}
+
 /* Refresh conntrack for this many jiffies */
-extern void ip_ct_refresh_acct(struct ip_conntrack *ct,
-			       enum ip_conntrack_info ctinfo,
-			       const struct sk_buff *skb,
-			       unsigned long extra_jiffies);
+static inline void ip_ct_refresh(struct ip_conntrack *ct,
+				 const struct sk_buff *skb,
+				 unsigned long extra_jiffies)
+{
+	__ip_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0);
+}
 
 /* These are for NAT.  Icky. */
 /* Update TCP window tracking data when NAT mangles the packet */
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
index dc20881004bc..fa3f914117ec 100644
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -65,7 +65,7 @@ static int help(struct sk_buff **pskb,
 
 	/* increase the UDP timeout of the master connection as replies from
 	 * Amanda clients to the server can be quite delayed */
-	ip_ct_refresh_acct(ct, ctinfo, NULL, master_timeout * HZ);
+	ip_ct_refresh(ct, *pskb, master_timeout * HZ);
 
 	/* No data? */
 	dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index c1f82e0c81cf..ea65dd3e517a 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -1112,45 +1112,46 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
 	synchronize_net();
 }
 
-static inline void ct_add_counters(struct ip_conntrack *ct,
-				   enum ip_conntrack_info ctinfo,
-				   const struct sk_buff *skb)
-{
-#ifdef CONFIG_IP_NF_CT_ACCT
-	if (skb) {
-		ct->counters[CTINFO2DIR(ctinfo)].packets++;
-		ct->counters[CTINFO2DIR(ctinfo)].bytes += 
-					ntohs(skb->nh.iph->tot_len);
-	}
-#endif
-}
-
-/* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */
-void ip_ct_refresh_acct(struct ip_conntrack *ct, 
+/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
+void __ip_ct_refresh_acct(struct ip_conntrack *ct, 
 		        enum ip_conntrack_info ctinfo,
 			const struct sk_buff *skb,
-			unsigned long extra_jiffies)
+			unsigned long extra_jiffies,
+			int do_acct)
 {
+	int do_event = 0;
+
 	IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
+	IP_NF_ASSERT(skb);
+
+	write_lock_bh(&ip_conntrack_lock);
 
 	/* If not in hash table, timer will not be active yet */
 	if (!is_confirmed(ct)) {
 		ct->timeout.expires = extra_jiffies;
-		ct_add_counters(ct, ctinfo, skb);
+		do_event = 1;
 	} else {
-		write_lock_bh(&ip_conntrack_lock);
 		/* Need del_timer for race avoidance (may already be dying). */
 		if (del_timer(&ct->timeout)) {
 			ct->timeout.expires = jiffies + extra_jiffies;
 			add_timer(&ct->timeout);
-			/* FIXME: We loose some REFRESH events if this function
-			 * is called without an skb.  I'll fix this later -HW */
-			if (skb)
-				ip_conntrack_event_cache(IPCT_REFRESH, skb);
+			do_event = 1;
 		}
-		ct_add_counters(ct, ctinfo, skb);
-		write_unlock_bh(&ip_conntrack_lock);
 	}
+
+#ifdef CONFIG_IP_NF_CT_ACCT
+	if (do_acct) {
+		ct->counters[CTINFO2DIR(ctinfo)].packets++;
+		ct->counters[CTINFO2DIR(ctinfo)].bytes += 
+						ntohs(skb->nh.iph->tot_len);
+	}
+#endif
+
+	write_unlock_bh(&ip_conntrack_lock);
+
+	/* must be unlocked when calling event cache */
+	if (do_event)
+		ip_conntrack_event_cache(IPCT_REFRESH, skb);
 }
 
 #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 8236ee0fb090..926a6684643d 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -172,7 +172,6 @@ static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t)
 		DEBUGP("setting timeout of conntrack %p to 0\n", sibling);
 		sibling->proto.gre.timeout = 0;
 		sibling->proto.gre.stream_timeout = 0;
-		/* refresh_acct will not modify counters if skb == NULL */
 		if (del_timer(&sibling->timeout))
 			sibling->timeout.function((unsigned long)sibling);
 		ip_conntrack_put(sibling);
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
index 71ef19d126d0..577bac22dcc6 100644
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -91,7 +91,7 @@ static int help(struct sk_buff **pskb,
 	ip_conntrack_expect_related(exp);
 	ip_conntrack_expect_put(exp);
 
-	ip_ct_refresh_acct(ct, ctinfo, NULL, timeout * HZ);
+	ip_ct_refresh(ct, *pskb, timeout * HZ);
 out:
 	return NF_ACCEPT;
 }
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index d3c7808010ec..dd476b191f4b 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -989,7 +989,7 @@ EXPORT_SYMBOL(need_ip_conntrack);
 EXPORT_SYMBOL(ip_conntrack_helper_register);
 EXPORT_SYMBOL(ip_conntrack_helper_unregister);
 EXPORT_SYMBOL(ip_ct_iterate_cleanup);
-EXPORT_SYMBOL(ip_ct_refresh_acct);
+EXPORT_SYMBOL(__ip_ct_refresh_acct);
 
 EXPORT_SYMBOL(ip_conntrack_expect_alloc);
 EXPORT_SYMBOL(ip_conntrack_expect_put);
-- 
cgit v1.2.3


From d2f607484f8210cf19b1730dee26d426a5a770a5 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sat, 24 Sep 2005 10:42:06 +0100
Subject: [ARM] Fix compiler warnings for memcpy_toio/memcpy_fromio/memset_io

Add 'volatile' to the __iomem pointers for these functions as
per x86.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/io.c | 6 +++---
 include/asm-arm/io.h | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/arm/kernel/io.c b/arch/arm/kernel/io.c
index 6c20c1188b60..1f6822dfae74 100644
--- a/arch/arm/kernel/io.c
+++ b/arch/arm/kernel/io.c
@@ -7,7 +7,7 @@
  * Copy data from IO memory space to "real" memory space.
  * This needs to be optimized.
  */
-void _memcpy_fromio(void *to, void __iomem *from, size_t count)
+void _memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
 {
 	unsigned char *t = to;
 	while (count) {
@@ -22,7 +22,7 @@ void _memcpy_fromio(void *to, void __iomem *from, size_t count)
  * Copy data from "real" memory space to IO memory space.
  * This needs to be optimized.
  */
-void _memcpy_toio(void __iomem *to, const void *from, size_t count)
+void _memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
 {
 	const unsigned char *f = from;
 	while (count) {
@@ -37,7 +37,7 @@ void _memcpy_toio(void __iomem *to, const void *from, size_t count)
  * "memset" on IO memory space.
  * This needs to be optimized.
  */
-void _memset_io(void __iomem *dst, int c, size_t count)
+void _memset_io(volatile void __iomem *dst, int c, size_t count)
 {
 	while (count) {
 		count--;
diff --git a/include/asm-arm/io.h b/include/asm-arm/io.h
index cfa71a0dffb6..5c4ae8f5dbb0 100644
--- a/include/asm-arm/io.h
+++ b/include/asm-arm/io.h
@@ -136,9 +136,9 @@ extern void __readwrite_bug(const char *fn);
 /*
  * String version of IO memory access ops:
  */
-extern void _memcpy_fromio(void *, void __iomem *, size_t);
-extern void _memcpy_toio(void __iomem *, const void *, size_t);
-extern void _memset_io(void __iomem *, int, size_t);
+extern void _memcpy_fromio(void *, const volatile void __iomem *, size_t);
+extern void _memcpy_toio(volatile void __iomem *, const void *, size_t);
+extern void _memset_io(volatile void __iomem *, int, size_t);
 
 #define mmiowb()
 
-- 
cgit v1.2.3


From 6f3a20242db2597312c50abc11f1e747c5d2326a Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Thu, 22 Sep 2005 20:33:28 -0500
Subject: [SCSI] allow REPORT LUN scanning even for LUN 0 PQ of 3

Currently we just ignore the device, which means there are a few
arrays out there that we don't find.

This patch updates the scsi_report_lun_scan() to take a target instead
of a device so it can be called on a return of
SCSI_SCAN_TARGET_PRESENT, which is what a PQ 3 device returns.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi_scan.c   | 96 +++++++++++++++++++++++-----------------------
 include/scsi/scsi_device.h |  1 +
 2 files changed, 48 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index fcf9f6cbb142..327c5d7e5bd2 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -587,6 +587,7 @@ static int scsi_probe_lun(struct scsi_device *sdev, char *inq_result,
 	if (sdev->scsi_level >= 2 ||
 	    (sdev->scsi_level == 1 && (inq_result[3] & 0x0f) == 1))
 		sdev->scsi_level++;
+	sdev->sdev_target->scsi_level = sdev->scsi_level;
 
 	return 0;
 }
@@ -771,6 +772,15 @@ static int scsi_add_lun(struct scsi_device *sdev, char *inq_result, int *bflags)
 	return SCSI_SCAN_LUN_PRESENT;
 }
 
+static inline void scsi_destroy_sdev(struct scsi_device *sdev)
+{
+	if (sdev->host->hostt->slave_destroy)
+		sdev->host->hostt->slave_destroy(sdev);
+	transport_destroy_device(&sdev->sdev_gendev);
+	put_device(&sdev->sdev_gendev);
+}
+
+
 /**
  * scsi_probe_and_add_lun - probe a LUN, if a LUN is found add it
  * @starget:	pointer to target device structure
@@ -803,9 +813,9 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget,
 	 * The rescan flag is used as an optimization, the first scan of a
 	 * host adapter calls into here with rescan == 0.
 	 */
-	if (rescan) {
-		sdev = scsi_device_lookup_by_target(starget, lun);
-		if (sdev) {
+	sdev = scsi_device_lookup_by_target(starget, lun);
+	if (sdev) {
+		if (rescan || sdev->sdev_state != SDEV_CREATED) {
 			SCSI_LOG_SCAN_BUS(3, printk(KERN_INFO
 				"scsi scan: device exists on %s\n",
 				sdev->sdev_gendev.bus_id));
@@ -820,9 +830,9 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget,
 								 sdev->model);
 			return SCSI_SCAN_LUN_PRESENT;
 		}
-	}
-
-	sdev = scsi_alloc_sdev(starget, lun, hostdata);
+		scsi_device_put(sdev);
+	} else
+		sdev = scsi_alloc_sdev(starget, lun, hostdata);
 	if (!sdev)
 		goto out;
 
@@ -877,12 +887,8 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget,
 				res = SCSI_SCAN_NO_RESPONSE;
 			}
 		}
-	} else {
-		if (sdev->host->hostt->slave_destroy)
-			sdev->host->hostt->slave_destroy(sdev);
-		transport_destroy_device(&sdev->sdev_gendev);
-		put_device(&sdev->sdev_gendev);
-	}
+	} else
+		scsi_destroy_sdev(sdev);
  out:
 	return res;
 }
@@ -1054,7 +1060,7 @@ EXPORT_SYMBOL(int_to_scsilun);
  *     0: scan completed (or no memory, so further scanning is futile)
  *     1: no report lun scan, or not configured
  **/
-static int scsi_report_lun_scan(struct scsi_device *sdev, int bflags,
+static int scsi_report_lun_scan(struct scsi_target *starget, int bflags,
 				int rescan)
 {
 	char devname[64];
@@ -1067,7 +1073,8 @@ static int scsi_report_lun_scan(struct scsi_device *sdev, int bflags,
 	struct scsi_lun *lunp, *lun_data;
 	u8 *data;
 	struct scsi_sense_hdr sshdr;
-	struct scsi_target *starget = scsi_target(sdev);
+	struct scsi_device *sdev;
+	struct Scsi_Host *shost = dev_to_shost(&starget->dev);
 
 	/*
 	 * Only support SCSI-3 and up devices if BLIST_NOREPORTLUN is not set.
@@ -1075,15 +1082,23 @@ static int scsi_report_lun_scan(struct scsi_device *sdev, int bflags,
 	 * support more than 8 LUNs.
 	 */
 	if ((bflags & BLIST_NOREPORTLUN) || 
-	     sdev->scsi_level < SCSI_2 ||
-	    (sdev->scsi_level < SCSI_3 && 
-	     (!(bflags & BLIST_REPORTLUN2) || sdev->host->max_lun <= 8)) )
+	     starget->scsi_level < SCSI_2 ||
+	    (starget->scsi_level < SCSI_3 && 
+	     (!(bflags & BLIST_REPORTLUN2) || shost->max_lun <= 8)) )
 		return 1;
 	if (bflags & BLIST_NOLUN)
 		return 0;
 
+	if (!(sdev = scsi_device_lookup_by_target(starget, 0))) {
+		sdev = scsi_alloc_sdev(starget, 0, NULL);
+		if (!sdev)
+			return 0;
+		if (scsi_device_get(sdev))
+			return 0;
+	}
+
 	sprintf(devname, "host %d channel %d id %d",
-		sdev->host->host_no, sdev->channel, sdev->id);
+		shost->host_no, sdev->channel, sdev->id);
 
 	/*
 	 * Allocate enough to hold the header (the same size as one scsi_lun)
@@ -1098,8 +1113,10 @@ static int scsi_report_lun_scan(struct scsi_device *sdev, int bflags,
 	length = (max_scsi_report_luns + 1) * sizeof(struct scsi_lun);
 	lun_data = kmalloc(length, GFP_ATOMIC |
 			   (sdev->host->unchecked_isa_dma ? __GFP_DMA : 0));
-	if (!lun_data)
+	if (!lun_data) {
+		printk(ALLOC_FAILURE_MSG, __FUNCTION__);
 		goto out;
+	}
 
 	scsi_cmd[0] = REPORT_LUNS;
 
@@ -1201,10 +1218,6 @@ static int scsi_report_lun_scan(struct scsi_device *sdev, int bflags,
 			for (i = 0; i < sizeof(struct scsi_lun); i++)
 				printk("%02x", data[i]);
 			printk(" has a LUN larger than currently supported.\n");
-		} else if (lun == 0) {
-			/*
-			 * LUN 0 has already been scanned.
-			 */
 		} else if (lun > sdev->host->max_lun) {
 			printk(KERN_WARNING "scsi: %s lun%d has a LUN larger"
 			       " than allowed by the host adapter\n",
@@ -1227,13 +1240,13 @@ static int scsi_report_lun_scan(struct scsi_device *sdev, int bflags,
 	}
 
 	kfree(lun_data);
-	return 0;
-
  out:
-	/*
-	 * We are out of memory, don't try scanning any further.
-	 */
-	printk(ALLOC_FAILURE_MSG, __FUNCTION__);
+	scsi_device_put(sdev);
+	if (sdev->sdev_state == SDEV_CREATED)
+		/*
+		 * the sdev we used didn't appear in the report luns scan
+		 */
+		scsi_destroy_sdev(sdev);
 	return 0;
 }
 
@@ -1299,7 +1312,6 @@ static void __scsi_scan_target(struct device *parent, unsigned int channel,
 	struct Scsi_Host *shost = dev_to_shost(parent);
 	int bflags = 0;
 	int res;
-	struct scsi_device *sdev = NULL;
 	struct scsi_target *starget;
 
 	if (shost->this_id == id)
@@ -1325,27 +1337,16 @@ static void __scsi_scan_target(struct device *parent, unsigned int channel,
 	 * Scan LUN 0, if there is some response, scan further. Ideally, we
 	 * would not configure LUN 0 until all LUNs are scanned.
 	 */
-	res = scsi_probe_and_add_lun(starget, 0, &bflags, &sdev, rescan, NULL);
-	if (res == SCSI_SCAN_LUN_PRESENT) {
-		if (scsi_report_lun_scan(sdev, bflags, rescan) != 0)
+	res = scsi_probe_and_add_lun(starget, 0, &bflags, NULL, rescan, NULL);
+	if (res == SCSI_SCAN_LUN_PRESENT || res == SCSI_SCAN_TARGET_PRESENT) {
+		if (scsi_report_lun_scan(starget, bflags, rescan) != 0)
 			/*
 			 * The REPORT LUN did not scan the target,
 			 * do a sequential scan.
 			 */
 			scsi_sequential_lun_scan(starget, bflags,
-				       	res, sdev->scsi_level, rescan);
-	} else if (res == SCSI_SCAN_TARGET_PRESENT) {
-		/*
-		 * There's a target here, but lun 0 is offline so we
-		 * can't use the report_lun scan.  Fall back to a
-		 * sequential lun scan with a bflags of SPARSELUN and
-		 * a default scsi level of SCSI_2
-		 */
-		scsi_sequential_lun_scan(starget, BLIST_SPARSELUN,
-				SCSI_SCAN_TARGET_PRESENT, SCSI_2, rescan);
+				       	res, starget->scsi_level, rescan);
 	}
-	if (sdev)
-		scsi_device_put(sdev);
 
  out_reap:
 	/* now determine if the target has any children at all
@@ -1542,10 +1543,7 @@ void scsi_free_host_dev(struct scsi_device *sdev)
 {
 	BUG_ON(sdev->id != sdev->host->this_id);
 
-	if (sdev->host->hostt->slave_destroy)
-		sdev->host->hostt->slave_destroy(sdev);
-	transport_destroy_device(&sdev->sdev_gendev);
-	put_device(&sdev->sdev_gendev);
+	scsi_destroy_sdev(sdev);
 }
 EXPORT_SYMBOL(scsi_free_host_dev);
 
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index c0e4c67d836f..7ece05666feb 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -163,6 +163,7 @@ struct scsi_target {
 	unsigned int		id; /* target id ... replace
 				     * scsi_device.id eventually */
 	unsigned long		create:1; /* signal that it needs to be added */
+	char			scsi_level;
 	void 			*hostdata; /* available to low-level driver */
 	unsigned long		starget_data[0]; /* for the transport */
 	/* starget_data must be the last element!!!! */
-- 
cgit v1.2.3


From 5b58745203f16ab83e50f4a015eea84c416d9279 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Sun, 25 Sep 2005 23:04:48 +0100
Subject: [ARM] 2934/1: Anubis - fix VA offsets for CPLD registers

Patch from Ben Dooks

The VA addresses of the Anubis CPLD registers
confoict with the addresses for the ISA space
maps used by the rest of the s3c2410 architecture

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/arch-s3c2410/anubis-map.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/asm-arm/arch-s3c2410/anubis-map.h b/include/asm-arm/arch-s3c2410/anubis-map.h
index 97741d6e506a..d529ffda8599 100644
--- a/include/asm-arm/arch-s3c2410/anubis-map.h
+++ b/include/asm-arm/arch-s3c2410/anubis-map.h
@@ -20,22 +20,22 @@
 
 /* start peripherals off after the S3C2410 */
 
-#define ANUBIS_IOADDR(x)	(S3C2410_ADDR((x) + 0x02000000))
+#define ANUBIS_IOADDR(x)	(S3C2410_ADDR((x) + 0x01800000))
 
 #define ANUBIS_PA_CPLD		(S3C2410_CS1 | (1<<26))
 
 /* we put the CPLD registers next, to get them out of the way */
 
-#define ANUBIS_VA_CTRL1	    ANUBIS_IOADDR(0x00000000)	 /* 0x01300000 */
+#define ANUBIS_VA_CTRL1	    ANUBIS_IOADDR(0x00000000)	 /* 0x01800000 */
 #define ANUBIS_PA_CTRL1	    (ANUBIS_PA_CPLD)
 
-#define ANUBIS_VA_CTRL2	    ANUBIS_IOADDR(0x00100000)	 /* 0x01400000 */
+#define ANUBIS_VA_CTRL2	    ANUBIS_IOADDR(0x00100000)	 /* 0x01900000 */
 #define ANUBIS_PA_CTRL2	    (ANUBIS_PA_CPLD)
 
-#define ANUBIS_VA_CTRL3	    ANUBIS_IOADDR(0x00200000)	 /* 0x01500000 */
+#define ANUBIS_VA_CTRL3	    ANUBIS_IOADDR(0x00200000)	 /* 0x01A00000 */
 #define ANUBIS_PA_CTRL3	    (ANUBIS_PA_CPLD)
 
-#define ANUBIS_VA_CTRL4	    ANUBIS_IOADDR(0x00300000)	 /* 0x01600000 */
+#define ANUBIS_VA_CTRL4	    ANUBIS_IOADDR(0x00300000)	 /* 0x01B00000 */
 #define ANUBIS_PA_CTRL4	    (ANUBIS_PA_CPLD)
 
 #define ANUBIS_IDEPRI	    ANUBIS_IOADDR(0x01000000)
-- 
cgit v1.2.3


From 56425306517ef28a9b480161cdb96d182172bc1d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Sun, 25 Sep 2005 16:46:57 -0700
Subject: [SPARC64]: Add CONFIG_DEBUG_PAGEALLOC support.

The trick is that we do the kernel linear mapping TLB miss starting
with an instruction sequence like this:

	ba,pt		%xcc, kvmap_load
	 xor		%g2, %g4, %g5

succeeded by an instruction sequence which performs a full page table
walk starting at swapper_pg_dir.

We first take over the trap table from the firmware.  Then, using this
constant PTE generation for the linear mapping area above, we build
the kernel page tables for the linear mapping.

After this is setup, we patch that branch above into a "nop", which
will cause TLB misses to fall through to the full page table walk.

With this, the page unmapping for CONFIG_DEBUG_PAGEALLOC is trivial.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/Kconfig.debug        |   8 +++
 arch/sparc64/kernel/head.S        |   6 ---
 arch/sparc64/kernel/ktlb.S        |  33 +++++++++++-
 arch/sparc64/kernel/vmlinux.lds.S |   3 +-
 arch/sparc64/mm/init.c            | 109 ++++++++++++++++++++++++++++++++++++--
 include/asm-sparc64/cacheflush.h  |   5 ++
 include/asm-sparc64/pgtable.h     |   7 +--
 7 files changed, 156 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/arch/sparc64/Kconfig.debug b/arch/sparc64/Kconfig.debug
index af0e9411b83e..fa06ea04837b 100644
--- a/arch/sparc64/Kconfig.debug
+++ b/arch/sparc64/Kconfig.debug
@@ -33,6 +33,14 @@ config DEBUG_BOOTMEM
 	depends on DEBUG_KERNEL
 	bool "Debug BOOTMEM initialization"
 
+config DEBUG_PAGEALLOC
+	bool "Page alloc debugging"
+	depends on DEBUG_KERNEL && !SOFTWARE_SUSPEND
+	help
+	  Unmap pages from the kernel linear mapping after free_pages().
+	  This results in a large slowdown, but helps to find certain types
+	  of memory corruptions.
+
 config MCOUNT
 	bool
 	depends on STACK_DEBUG
diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
index 56af714f5f1b..ecc748fb9ad7 100644
--- a/arch/sparc64/kernel/head.S
+++ b/arch/sparc64/kernel/head.S
@@ -525,12 +525,6 @@ bootup_user_stack_end:
 
 #include "ttable.S"
 #include "systbls.S"
-
-	.align	1024
-	.globl	swapper_pg_dir
-swapper_pg_dir:
-	.word	0
-
 #include "ktlb.S"
 #include "etrap.S"
 #include "rtrap.S"
diff --git a/arch/sparc64/kernel/ktlb.S b/arch/sparc64/kernel/ktlb.S
index a591bc0ebc7b..7796b37f478c 100644
--- a/arch/sparc64/kernel/ktlb.S
+++ b/arch/sparc64/kernel/ktlb.S
@@ -132,9 +132,40 @@ kvmap_do_obp:
  */
 	.align		32
 kvmap:
-	brlz,pt		%g4, kvmap_load
+	brgez,pn	%g4, kvmap_nonlinear
+	 nop
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	.globl		kvmap_linear_patch
+kvmap_linear_patch:
+#endif
+	ba,pt		%xcc, kvmap_load
 	 xor		%g2, %g4, %g5
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	sethi		%hi(swapper_pg_dir), %g5
+	or		%g5, %lo(swapper_pg_dir), %g5
+	sllx		%g4, 64 - (PGDIR_SHIFT + PGDIR_BITS), %g6
+	srlx		%g6, 64 - PAGE_SHIFT, %g6
+	andn		%g6, 0x3, %g6
+	lduw		[%g5 + %g6], %g5
+	brz,pn		%g5, longpath
+	 sllx		%g4, 64 - (PMD_SHIFT + PMD_BITS), %g6
+	srlx		%g6, 64 - PAGE_SHIFT, %g6
+	sllx		%g5, 11, %g5
+	andn		%g6, 0x3, %g6
+	lduwa		[%g5 + %g6] ASI_PHYS_USE_EC, %g5
+	brz,pn		%g5, longpath
+	 sllx		%g4, 64 - PMD_SHIFT, %g6
+	srlx		%g6, 64 - PAGE_SHIFT, %g6
+	sllx		%g5, 11, %g5
+	andn		%g6, 0x7, %g6
+	ldxa		[%g5 + %g6] ASI_PHYS_USE_EC, %g5
+	brz,pn		%g5, longpath
+	 nop
+	ba,a,pt		%xcc, kvmap_load
+#endif
+
 kvmap_nonlinear:
 	sethi		%hi(MODULES_VADDR), %g5
 	cmp		%g4, %g5
diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S
index f47d0be39378..2af0cf0a8640 100644
--- a/arch/sparc64/kernel/vmlinux.lds.S
+++ b/arch/sparc64/kernel/vmlinux.lds.S
@@ -9,8 +9,7 @@ ENTRY(_start)
 jiffies = jiffies_64;
 SECTIONS
 {
-  swapper_pmd_dir = 0x0000000000402000;
-  empty_pg_dir = 0x0000000000403000;
+  swapper_low_pmd_dir = 0x0000000000402000;
   . = 0x4000;
   .text 0x0000000000404000 :
   {
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index cf747372f0c9..8d72f8a1268e 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -1332,15 +1332,114 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
 	return end_pfn;
 }
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+static unsigned long kernel_map_range(unsigned long pstart, unsigned long pend, pgprot_t prot)
+{
+	unsigned long vstart = PAGE_OFFSET + pstart;
+	unsigned long vend = PAGE_OFFSET + pend;
+	unsigned long alloc_bytes = 0UL;
+
+	if ((vstart & ~PAGE_MASK) || (vend & ~PAGE_MASK)) {
+		prom_printf("kernel_map: Unaligned sp_banks[%lx:%lx]\n",
+			    vstart, vend);
+		prom_halt();
+	}
+
+	while (vstart < vend) {
+		unsigned long this_end, paddr = __pa(vstart);
+		pgd_t *pgd = pgd_offset_k(vstart);
+		pud_t *pud;
+		pmd_t *pmd;
+		pte_t *pte;
+
+		pud = pud_offset(pgd, vstart);
+		if (pud_none(*pud)) {
+			pmd_t *new;
+
+			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+			alloc_bytes += PAGE_SIZE;
+			pud_populate(&init_mm, pud, new);
+		}
+
+		pmd = pmd_offset(pud, vstart);
+		if (!pmd_present(*pmd)) {
+			pte_t *new;
+
+			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+			alloc_bytes += PAGE_SIZE;
+			pmd_populate_kernel(&init_mm, pmd, new);
+		}
+
+		pte = pte_offset_kernel(pmd, vstart);
+		this_end = (vstart + PMD_SIZE) & PMD_MASK;
+		if (this_end > vend)
+			this_end = vend;
+
+		while (vstart < this_end) {
+			pte_val(*pte) = (paddr | pgprot_val(prot));
+
+			vstart += PAGE_SIZE;
+			paddr += PAGE_SIZE;
+			pte++;
+		}
+	}
+
+	return alloc_bytes;
+}
+
+extern struct linux_mlist_p1275 *prom_ptot_ptr;
+extern unsigned int kvmap_linear_patch[1];
+
+static void __init kernel_physical_mapping_init(void)
+{
+	struct linux_mlist_p1275 *p = prom_ptot_ptr;
+	unsigned long mem_alloced = 0UL;
+
+	while (p) {
+		unsigned long phys_start, phys_end;
+
+		phys_start = p->start_adr;
+		phys_end = phys_start + p->num_bytes;
+		mem_alloced += kernel_map_range(phys_start, phys_end,
+						PAGE_KERNEL);
+
+		p = p->theres_more;
+	}
+
+	printk("Allocated %ld bytes for kernel page tables.\n",
+	       mem_alloced);
+
+	kvmap_linear_patch[0] = 0x01000000; /* nop */
+	flushi(&kvmap_linear_patch[0]);
+
+	__flush_tlb_all();
+}
+
+void kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	unsigned long phys_start = page_to_pfn(page) << PAGE_SHIFT;
+	unsigned long phys_end = phys_start + (numpages * PAGE_SIZE);
+
+	kernel_map_range(phys_start, phys_end,
+			 (enable ? PAGE_KERNEL : __pgprot(0)));
+
+	/* we should perform an IPI and flush all tlbs,
+	 * but that can deadlock->flush only current cpu.
+	 */
+	__flush_tlb_kernel_range(PAGE_OFFSET + phys_start,
+				 PAGE_OFFSET + phys_end);
+}
+#endif
+
 /* paging_init() sets up the page tables */
 
 extern void cheetah_ecache_flush_init(void);
 
 static unsigned long last_valid_pfn;
+pgd_t swapper_pg_dir[2048];
 
 void __init paging_init(void)
 {
-	extern pmd_t swapper_pmd_dir[1024];
 	unsigned long end_pfn, pages_avail, shift;
 	unsigned long real_end;
 
@@ -1361,11 +1460,11 @@ void __init paging_init(void)
 	 */
 	init_mm.pgd += ((shift) / (sizeof(pgd_t)));
 	
-	memset(swapper_pmd_dir, 0, sizeof(swapper_pmd_dir));
+	memset(swapper_low_pmd_dir, 0, sizeof(swapper_low_pmd_dir));
 
 	/* Now can init the kernel/bad page tables. */
 	pud_set(pud_offset(&swapper_pg_dir[0], 0),
-		swapper_pmd_dir + (shift / sizeof(pgd_t)));
+		swapper_low_pmd_dir + (shift / sizeof(pgd_t)));
 	
 	swapper_pgd_zero = pgd_val(swapper_pg_dir[0]);
 	
@@ -1390,6 +1489,10 @@ void __init paging_init(void)
 	pages_avail = 0;
 	last_valid_pfn = end_pfn = bootmem_init(&pages_avail);
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	kernel_physical_mapping_init();
+#endif
+
 	{
 		unsigned long zones_size[MAX_NR_ZONES];
 		unsigned long zholes_size[MAX_NR_ZONES];
diff --git a/include/asm-sparc64/cacheflush.h b/include/asm-sparc64/cacheflush.h
index ededd2659eab..b3f61659ba81 100644
--- a/include/asm-sparc64/cacheflush.h
+++ b/include/asm-sparc64/cacheflush.h
@@ -66,6 +66,11 @@ extern void flush_ptrace_access(struct vm_area_struct *, struct page *,
 #define flush_cache_vmap(start, end)		do { } while (0)
 #define flush_cache_vunmap(start, end)		do { } while (0)
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+/* internal debugging function */
+void kernel_map_pages(struct page *page, int numpages, int enable);
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _SPARC64_CACHEFLUSH_H */
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index a297f6144f0f..43cbb089cde2 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -60,13 +60,13 @@
  * table can map
  */
 #define PMD_SHIFT	(PAGE_SHIFT + (PAGE_SHIFT-3))
-#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_SIZE	(_AC(1,UL) << PMD_SHIFT)
 #define PMD_MASK	(~(PMD_SIZE-1))
 #define PMD_BITS	(PAGE_SHIFT - 2)
 
 /* PGDIR_SHIFT determines what a third-level page table entry can map */
 #define PGDIR_SHIFT	(PAGE_SHIFT + (PAGE_SHIFT-3) + PMD_BITS)
-#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_SIZE	(_AC(1,UL) << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 #define PGDIR_BITS	(PAGE_SHIFT - 2)
 
@@ -336,7 +336,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *p
 #define pte_clear(mm,addr,ptep)		\
 	set_pte_at((mm), (addr), (ptep), __pte(0UL))
 
-extern pgd_t swapper_pg_dir[1];
+extern pgd_t swapper_pg_dir[2048];
+extern pmd_t swapper_low_pmd_dir[2048];
 
 /* These do nothing with the way I have things setup. */
 #define mmu_lockarea(vaddr, len)		(vaddr)
-- 
cgit v1.2.3


From 80dc0d6b44ce0f01df58d8899e46612690ed7d81 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Mon, 26 Sep 2005 00:32:17 -0700
Subject: [SPARC64]: Probe D/I/E-cache config and use.

At boot time, determine the D-cache, I-cache and E-cache size and
line-size.  Use them in cache flushes when appropriate.

This change was motivated by discovering that the D-cache on
UltraSparc-IIIi and later are 64K not 32K, and the flushes done by the
Cheetah error handlers were assuming a 32K size.

There are still some pieces of code that are hard coding things and
will need to be fixed up at some point.

While we're here, fix the D-cache and I-cache parity error handlers
to run with interrupts disabled, and when the trap occurs at trap
level > 1 log the event via a counter displayed in /proc/cpuinfo.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/devices.c | 22 ++++++++++++++++++++++
 arch/sparc64/kernel/entry.S   | 27 ++++++++++++++++++---------
 arch/sparc64/kernel/setup.c   |  9 ++++++++-
 arch/sparc64/kernel/smp.c     | 21 +++++++++++++++++++++
 arch/sparc64/kernel/traps.c   | 40 +++++++++++++++++++++++++++-------------
 include/asm-sparc64/cpudata.h | 10 ++++++++++
 6 files changed, 106 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/arch/sparc64/kernel/devices.c b/arch/sparc64/kernel/devices.c
index d710274e516b..df9a1ca8fd77 100644
--- a/arch/sparc64/kernel/devices.c
+++ b/arch/sparc64/kernel/devices.c
@@ -135,6 +135,28 @@ void __init device_scan(void)
 		cpu_data(0).clock_tick = prom_getintdefault(cpu_node,
 							    "clock-frequency",
 							    0);
+		cpu_data(0).dcache_size = prom_getintdefault(cpu_node,
+							     "dcache-size",
+							     16 * 1024);
+		cpu_data(0).dcache_line_size =
+			prom_getintdefault(cpu_node, "dcache-line-size", 32);
+		cpu_data(0).icache_size = prom_getintdefault(cpu_node,
+							     "icache-size",
+							     16 * 1024);
+		cpu_data(0).icache_line_size =
+			prom_getintdefault(cpu_node, "icache-line-size", 32);
+		cpu_data(0).ecache_size = prom_getintdefault(cpu_node,
+							     "ecache-size",
+							     4 * 1024 * 1024);
+		cpu_data(0).ecache_line_size =
+			prom_getintdefault(cpu_node, "ecache-line-size", 64);
+		printk("CPU[0]: Caches "
+		       "D[sz(%d):line_sz(%d)] "
+		       "I[sz(%d):line_sz(%d)] "
+		       "E[sz(%d):line_sz(%d)]\n",
+		       cpu_data(0).dcache_size, cpu_data(0).dcache_line_size,
+		       cpu_data(0).icache_size, cpu_data(0).icache_line_size,
+		       cpu_data(0).ecache_size, cpu_data(0).ecache_line_size);
 	}
 #endif
 
diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S
index 45cd3bbdb7e0..2879b1072921 100644
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -372,14 +372,13 @@ cheetah_plus_patch_fpdis:
 	 *
 	 * DATA 0: [low 32-bits]  Address of function to call, jmp to this
 	 *         [high 32-bits] MMU Context Argument 0, place in %g5
-	 * DATA 1: Address Argument 1, place in %g6
+	 * DATA 1: Address Argument 1, place in %g1
 	 * DATA 2: Address Argument 2, place in %g7
 	 *
 	 * With this method we can do most of the cross-call tlb/cache
 	 * flushing very quickly.
 	 *
-	 * Current CPU's IRQ worklist table is locked into %g1,
-	 * don't touch.
+	 * Current CPU's IRQ worklist table is locked into %g6, don't touch.
 	 */
 	.text
 	.align		32
@@ -853,13 +852,14 @@ cheetah_plus_dcpe_trap_vector:
 	nop
 
 do_cheetah_plus_data_parity:
-	ba,pt		%xcc, etrap
+	rdpr		%pil, %g2
+	wrpr		%g0, 15, %pil
+	ba,pt		%xcc, etrap_irq
 	 rd		%pc, %g7
 	mov		0x0, %o0
 	call		cheetah_plus_parity_error
 	 add		%sp, PTREGS_OFF, %o1
-	ba,pt		%xcc, rtrap
-	 clr		%l6
+	ba,a,pt		%xcc, rtrap_irq
 
 cheetah_plus_dcpe_trap_vector_tl1:
 	membar		#Sync
@@ -883,13 +883,14 @@ cheetah_plus_icpe_trap_vector:
 	nop
 
 do_cheetah_plus_insn_parity:
-	ba,pt		%xcc, etrap
+	rdpr		%pil, %g2
+	wrpr		%g0, 15, %pil
+	ba,pt		%xcc, etrap_irq
 	 rd		%pc, %g7
 	mov		0x1, %o0
 	call		cheetah_plus_parity_error
 	 add		%sp, PTREGS_OFF, %o1
-	ba,pt		%xcc, rtrap
-	 clr		%l6
+	ba,a,pt		%xcc, rtrap_irq
 
 cheetah_plus_icpe_trap_vector_tl1:
 	membar		#Sync
@@ -922,6 +923,10 @@ do_dcpe_tl1:
 	 nop
 	wrpr		%g1, %tl		! Restore original trap level
 do_dcpe_tl1_nonfatal:	/* Ok we may use interrupt globals safely. */
+	sethi		%hi(dcache_parity_tl1_occurred), %g2
+	lduw		[%g2 + %lo(dcache_parity_tl1_occurred)], %g1
+	add		%g1, 1, %g1
+	stw		%g1, [%g2 + %lo(dcache_parity_tl1_occurred)]
 	/* Reset D-cache parity */
 	sethi		%hi(1 << 16), %g1	! D-cache size
 	mov		(1 << 5), %g2		! D-cache line size
@@ -968,6 +973,10 @@ do_icpe_tl1:
 	 nop
 	wrpr		%g1, %tl		! Restore original trap level
 do_icpe_tl1_nonfatal:	/* Ok we may use interrupt globals safely. */
+	sethi		%hi(icache_parity_tl1_occurred), %g2
+	lduw		[%g2 + %lo(icache_parity_tl1_occurred)], %g1
+	add		%g1, 1, %g1
+	stw		%g1, [%g2 + %lo(icache_parity_tl1_occurred)]
 	/* Flush I-cache */
 	sethi		%hi(1 << 15), %g1	! I-cache size
 	mov		(1 << 5), %g2		! I-cache line size
diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
index f4345d837284..8e8baf2354df 100644
--- a/arch/sparc64/kernel/setup.c
+++ b/arch/sparc64/kernel/setup.c
@@ -605,6 +605,9 @@ extern void smp_info(struct seq_file *);
 extern void smp_bogo(struct seq_file *);
 extern void mmu_info(struct seq_file *);
 
+unsigned int dcache_parity_tl1_occurred;
+unsigned int icache_parity_tl1_occurred;
+
 static int show_cpuinfo(struct seq_file *m, void *__unused)
 {
 	seq_printf(m, 
@@ -615,6 +618,8 @@ static int show_cpuinfo(struct seq_file *m, void *__unused)
 		   "type\t\t: sun4u\n"
 		   "ncpus probed\t: %ld\n"
 		   "ncpus active\t: %ld\n"
+		   "D$ parity tl1\t: %u\n"
+		   "I$ parity tl1\t: %u\n"
 #ifndef CONFIG_SMP
 		   "Cpu0Bogo\t: %lu.%02lu\n"
 		   "Cpu0ClkTck\t: %016lx\n"
@@ -627,7 +632,9 @@ static int show_cpuinfo(struct seq_file *m, void *__unused)
 		   (prom_prev >> 8) & 0xff,
 		   prom_prev & 0xff,
 		   (long)num_possible_cpus(),
-		   (long)num_online_cpus()
+		   (long)num_online_cpus(),
+		   dcache_parity_tl1_occurred,
+		   icache_parity_tl1_occurred
 #ifndef CONFIG_SMP
 		   , cpu_data(0).udelay_val/(500000/HZ),
 		   (cpu_data(0).udelay_val/(5000/HZ)) % 100,
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index b4fc6a5462b2..590df5a16f5a 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -93,6 +93,27 @@ void __init smp_store_cpu_info(int id)
 	cpu_data(id).pte_cache[1]		= NULL;
 	cpu_data(id).pgd_cache			= NULL;
 	cpu_data(id).idle_volume		= 1;
+
+	cpu_data(id).dcache_size = prom_getintdefault(cpu_node, "dcache-size",
+						      16 * 1024);
+	cpu_data(id).dcache_line_size =
+		prom_getintdefault(cpu_node, "dcache-line-size", 32);
+	cpu_data(id).icache_size = prom_getintdefault(cpu_node, "icache-size",
+						      16 * 1024);
+	cpu_data(id).icache_line_size =
+		prom_getintdefault(cpu_node, "icache-line-size", 32);
+	cpu_data(id).ecache_size = prom_getintdefault(cpu_node, "ecache-size",
+						      4 * 1024 * 1024);
+	cpu_data(id).ecache_line_size =
+		prom_getintdefault(cpu_node, "ecache-line-size", 64);
+	printk("CPU[%d]: Caches "
+	       "D[sz(%d):line_sz(%d)] "
+	       "I[sz(%d):line_sz(%d)] "
+	       "E[sz(%d):line_sz(%d)]\n",
+	       id,
+	       cpu_data(id).dcache_size, cpu_data(id).dcache_line_size,
+	       cpu_data(id).icache_size, cpu_data(id).icache_line_size,
+	       cpu_data(id).ecache_size, cpu_data(id).ecache_line_size);
 }
 
 static void smp_setup_percpu_timer(void);
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index b280b2ef674f..f8e7005fede9 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -869,14 +869,19 @@ static void cheetah_flush_ecache_line(unsigned long physaddr)
  */
 static void __cheetah_flush_icache(void)
 {
-	unsigned long i;
+	unsigned int icache_size, icache_line_size;
+	unsigned long addr;
+
+	icache_size = local_cpu_data().icache_size;
+	icache_line_size = local_cpu_data().icache_line_size;
 
 	/* Clear the valid bits in all the tags. */
-	for (i = 0; i < (1 << 15); i += (1 << 5)) {
+	for (addr = 0; addr < icache_size; addr += icache_line_size) {
 		__asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
 				     "membar #Sync"
 				     : /* no outputs */
-				     : "r" (i | (2 << 3)), "i" (ASI_IC_TAG));
+				     : "r" (addr | (2 << 3)),
+				       "i" (ASI_IC_TAG));
 	}
 }
 
@@ -904,13 +909,17 @@ static void cheetah_flush_icache(void)
 
 static void cheetah_flush_dcache(void)
 {
-	unsigned long i;
+	unsigned int dcache_size, dcache_line_size;
+	unsigned long addr;
+
+	dcache_size = local_cpu_data().dcache_size;
+	dcache_line_size = local_cpu_data().dcache_line_size;
 
-	for (i = 0; i < (1 << 16); i += (1 << 5)) {
+	for (addr = 0; addr < dcache_size; addr += dcache_line_size) {
 		__asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
 				     "membar #Sync"
 				     : /* no outputs */
-				     : "r" (i), "i" (ASI_DCACHE_TAG));
+				     : "r" (addr), "i" (ASI_DCACHE_TAG));
 	}
 }
 
@@ -921,24 +930,29 @@ static void cheetah_flush_dcache(void)
  */
 static void cheetah_plus_zap_dcache_parity(void)
 {
-	unsigned long i;
+	unsigned int dcache_size, dcache_line_size;
+	unsigned long addr;
+
+	dcache_size = local_cpu_data().dcache_size;
+	dcache_line_size = local_cpu_data().dcache_line_size;
 
-	for (i = 0; i < (1 << 16); i += (1 << 5)) {
-		unsigned long tag = (i >> 14);
-		unsigned long j;
+	for (addr = 0; addr < dcache_size; addr += dcache_line_size) {
+		unsigned long tag = (addr >> 14);
+		unsigned long line;
 
 		__asm__ __volatile__("membar	#Sync\n\t"
 				     "stxa	%0, [%1] %2\n\t"
 				     "membar	#Sync"
 				     : /* no outputs */
-				     : "r" (tag), "r" (i),
+				     : "r" (tag), "r" (addr),
 				       "i" (ASI_DCACHE_UTAG));
-		for (j = i; j < i + (1 << 5); j += (1 << 3))
+		for (line = addr; line < addr + dcache_line_size; line += 8)
 			__asm__ __volatile__("membar	#Sync\n\t"
 					     "stxa	%%g0, [%0] %1\n\t"
 					     "membar	#Sync"
 					     : /* no outputs */
-					     : "r" (j), "i" (ASI_DCACHE_DATA));
+					     : "r" (line),
+					       "i" (ASI_DCACHE_DATA));
 	}
 }
 
diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h
index 9a3a81f1cc58..74de79dca915 100644
--- a/include/asm-sparc64/cpudata.h
+++ b/include/asm-sparc64/cpudata.h
@@ -22,6 +22,16 @@ typedef struct {
 	unsigned int	__pad1;
 	unsigned long	*pte_cache[2];
 	unsigned long	*pgd_cache;
+
+	/* Dcache line 3, rarely used */
+	unsigned int	dcache_size;
+	unsigned int	dcache_line_size;
+	unsigned int	icache_size;
+	unsigned int	icache_line_size;
+	unsigned int	ecache_size;
+	unsigned int	ecache_line_size;
+	unsigned int	__pad2;
+	unsigned int	__pad3;
 } cpuinfo_sparc;
 
 DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
-- 
cgit v1.2.3


From 8c3520d4eb3b1bbf2e45fbae8dcfb8db06d5e775 Mon Sep 17 00:00:00 2001
From: Daniel Ritz <daniel.ritz@gmx.ch>
Date: Sun, 21 Aug 2005 22:29:26 -0700
Subject: [PATCH] yenta: auto-tune EnE bridges for CardBus cards

Echo Audio cardbus products are known to be incompatible with EnE bridges.
in order to maybe solve the problem a EnE specific test bit has to be set,
another cleared...but other setups have a good chance to break when just
forcing the bits.  so do the whole thingy automatically.

The patch adds a hook in cb_alloc() that allows special tuning for the
different chipsets.  for ene just match the Echo products and set/clear the
test bits, defaults to do the same thing as w/o the patch to not break
working setups.

Signed-off-by: Daniel Ritz <daniel.ritz@gmx.ch>
Cc: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/pcmcia/cardbus.c      |  5 +++
 drivers/pcmcia/ti113x.h       | 86 ++++++++++++++++++++++++++++++++++++++-----
 drivers/pcmcia/yenta_socket.c | 15 ++++++--
 include/linux/pci_ids.h       |  5 +++
 include/pcmcia/ss.h           |  9 ++++-
 5 files changed, 105 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/pcmcia/cardbus.c b/drivers/pcmcia/cardbus.c
index 1d755e20880c..3f6d51d11374 100644
--- a/drivers/pcmcia/cardbus.c
+++ b/drivers/pcmcia/cardbus.c
@@ -228,6 +228,11 @@ int cb_alloc(struct pcmcia_socket * s)
 	pci_bus_size_bridges(bus);
 	pci_bus_assign_resources(bus);
 	cardbus_assign_irqs(bus, s->pci_irq);
+
+	/* socket specific tune function */
+	if (s->tune_bridge)
+		s->tune_bridge(s, bus);
+
 	pci_enable_bridges(bus);
 	pci_bus_add_devices(bus);
 
diff --git a/drivers/pcmcia/ti113x.h b/drivers/pcmcia/ti113x.h
index fbe233e19ceb..d319f2e7d053 100644
--- a/drivers/pcmcia/ti113x.h
+++ b/drivers/pcmcia/ti113x.h
@@ -153,6 +153,12 @@
 /* EnE test register */
 #define ENE_TEST_C9			0xc9	/* 8bit */
 #define ENE_TEST_C9_TLTENABLE		0x02
+#define ENE_TEST_C9_PFENABLE_F0		0x04
+#define ENE_TEST_C9_PFENABLE_F1		0x08
+#define ENE_TEST_C9_PFENABLE		(ENE_TEST_C9_PFENABLE_F0 | ENE_TEST_C9_PFENABLE_F0)
+#define ENE_TEST_C9_WPDISALBLE_F0	0x40
+#define ENE_TEST_C9_WPDISALBLE_F1	0x80
+#define ENE_TEST_C9_WPDISALBLE		(ENE_TEST_C9_WPDISALBLE_F0 | ENE_TEST_C9_WPDISALBLE_F1)
 
 /*
  * Texas Instruments CardBus controller overrides.
@@ -790,16 +796,6 @@ static int ti12xx_override(struct yenta_socket *socket)
 	if (val_orig != val)
 		config_writel(socket, TI113X_SYSTEM_CONTROL, val);
 
-	/*
-	 * for EnE bridges only: clear testbit TLTEnable. this makes the
-	 * RME Hammerfall DSP sound card working.
-	 */
-	if (socket->dev->vendor == PCI_VENDOR_ID_ENE) {
-		u8 test_c9 = config_readb(socket, ENE_TEST_C9);
-		test_c9 &= ~ENE_TEST_C9_TLTENABLE;
-		config_writeb(socket, ENE_TEST_C9, test_c9);
-	}
-
 	/*
 	 * Yenta expects controllers to use CSCINT to route
 	 * CSC interrupts to PCI rather than INTVAL.
@@ -841,5 +837,75 @@ static int ti1250_override(struct yenta_socket *socket)
 	return ti12xx_override(socket);
 }
 
+
+/**
+ * EnE specific part. EnE bridges are register compatible with TI bridges but
+ * have their own test registers and more important their own little problems.
+ * Some fixup code to make everybody happy (TM).
+ */
+
+/**
+ * set/clear various test bits:
+ * Defaults to clear the bit.
+ * - mask (u8) defines what bits to change
+ * - bits (u8) is the values to change them to
+ * -> it's
+ * 	current = (current & ~mask) | bits
+ */
+/* pci ids of devices that wants to have the bit set */
+#define DEVID(_vend,_dev,_subvend,_subdev,mask,bits) {		\
+		.vendor		= _vend,			\
+		.device		= _dev,				\
+		.subvendor	= _subvend,			\
+		.subdevice	= _subdev,			\
+		.driver_data	= ((mask) << 8 | (bits)),	\
+	}
+static struct pci_device_id ene_tune_tbl[] = {
+	/* Echo Audio products based on motorola DSP56301 and DSP56361 */
+	DEVID(PCI_VENDOR_ID_MOTOROLA, 0x1801, 0xECC0, PCI_ANY_ID,
+		ENE_TEST_C9_TLTENABLE | ENE_TEST_C9_PFENABLE, ENE_TEST_C9_TLTENABLE),
+	DEVID(PCI_VENDOR_ID_MOTOROLA, 0x3410, 0xECC0, PCI_ANY_ID,
+		ENE_TEST_C9_TLTENABLE | ENE_TEST_C9_PFENABLE, ENE_TEST_C9_TLTENABLE),
+
+	{}
+};
+
+static void ene_tune_bridge(struct pcmcia_socket *sock, struct pci_bus *bus)
+{
+	struct yenta_socket *socket = container_of(sock, struct yenta_socket, socket);
+	struct pci_dev *dev;
+	struct pci_device_id *id = NULL;
+	u8 test_c9, old_c9, mask, bits;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		id = (struct pci_device_id *) pci_match_id(ene_tune_tbl, dev);
+		if (id)
+			break;
+	}
+
+	test_c9 = old_c9 = config_readb(socket, ENE_TEST_C9);
+	if (id) {
+		mask = (id->driver_data >> 8) & 0xFF;
+		bits = id->driver_data & 0xFF;
+
+		test_c9 = (test_c9 & ~mask) | bits;
+	}
+	else
+		/* default to clear TLTEnable bit, old behaviour */
+		test_c9 &= ~ENE_TEST_C9_TLTENABLE;
+
+	printk(KERN_INFO "yenta EnE: chaning testregister 0xC9, %02x -> %02x\n", old_c9, test_c9);
+	config_writeb(socket, ENE_TEST_C9, test_c9);
+}
+
+
+static int ene_override(struct yenta_socket *socket)
+{
+	/* install tune_bridge() function */
+	socket->socket.tune_bridge = ene_tune_bridge;
+
+	return ti1250_override(socket);
+}
+
 #endif /* _LINUX_TI113X_H */
 
diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c
index ba4d78e5b121..fd2a6f892c41 100644
--- a/drivers/pcmcia/yenta_socket.c
+++ b/drivers/pcmcia/yenta_socket.c
@@ -819,6 +819,7 @@ enum {
 	CARDBUS_TYPE_TOPIC95,
 	CARDBUS_TYPE_TOPIC97,
 	CARDBUS_TYPE_O2MICRO,
+	CARDBUS_TYPE_ENE,
 };
 
 /*
@@ -865,6 +866,12 @@ static struct cardbus_type cardbus_type[] = {
 		.override	= o2micro_override,
 		.restore_state	= o2micro_restore_state,
 	},
+	[CARDBUS_TYPE_ENE]	= {
+		.override	= ene_override,
+		.save_state	= ti_save_state,
+		.restore_state	= ti_restore_state,
+		.sock_init	= ti_init,
+	},
 };
 
 
@@ -1265,10 +1272,10 @@ static struct pci_device_id yenta_table [] = {
 	CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_1250, TI1250),
 	CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_1410, TI1250),
 
-	CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1211, TI12XX),
-	CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1225, TI12XX),
-	CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1410, TI1250),
-	CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1420, TI12XX),
+	CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1211, ENE),
+	CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1225, ENE),
+	CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1410, ENE),
+	CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1420, ENE),
 
 	CB_ID(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_RL5C465, RICOH),
 	CB_ID(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_RL5C466, RICOH),
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index b86a4b77007e..92efb2c767f9 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2187,7 +2187,12 @@
 #define PCI_DEVICE_ID_ENE_1211		0x1211
 #define PCI_DEVICE_ID_ENE_1225		0x1225
 #define PCI_DEVICE_ID_ENE_1410		0x1410
+#define PCI_DEVICE_ID_ENE_710		0x1411
+#define PCI_DEVICE_ID_ENE_712		0x1412
 #define PCI_DEVICE_ID_ENE_1420		0x1420
+#define PCI_DEVICE_ID_ENE_720		0x1421
+#define PCI_DEVICE_ID_ENE_722		0x1422
+
 #define PCI_VENDOR_ID_CHELSIO		0x1425
 
 #define PCI_VENDOR_ID_MIPS		0x153f
diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h
index 0f7aacc33fe9..c8592c7e8eaa 100644
--- a/include/pcmcia/ss.h
+++ b/include/pcmcia/ss.h
@@ -21,6 +21,9 @@
 #include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
 #include <pcmcia/bulkmem.h>
+#ifdef CONFIG_CARDBUS
+#include <linux/pci.h>
+#endif
 
 /* Definitions for card status flags for GetStatus */
 #define SS_WRPROT	0x0001
@@ -233,7 +236,11 @@ struct pcmcia_socket {
 
 	/* so is power hook */
 	int (*power_hook)(struct pcmcia_socket *sock, int operation);
-                           
+#ifdef CONFIG_CARDBUS
+	/* allows tuning the CB bridge before loading driver for the CB card */
+	void (*tune_bridge)(struct pcmcia_socket *sock, struct pci_bus *bus);
+#endif
+
 	/* state thread */
 	struct semaphore		skt_sem;	/* protects socket h/w state */
 
-- 
cgit v1.2.3


From 6c1a10dba92cbacb58563f5eacf93807125b488a Mon Sep 17 00:00:00 2001
From: Daniel Ritz <daniel.ritz@gmx.ch>
Date: Tue, 20 Sep 2005 14:12:17 -0700
Subject: [PATCH] yenta: add support for more TI bridges

Support some more TI cardbus bridges.  most of them are multifunction
devices which adds 1394 controllers, smartcard readers etc.  this could
also help with the various problems with the XX21 controllers seen on the
linux-pcmcia list.

Signed-off-by: Daniel Ritz <daniel.ritz@gmx.ch>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/pcmcia/ti113x.h       | 29 +++++++++++++++++++++++++++++
 drivers/pcmcia/yenta_socket.c |  8 ++++++++
 include/linux/pci_ids.h       |  7 +++++++
 3 files changed, 44 insertions(+)

(limited to 'include')

diff --git a/drivers/pcmcia/ti113x.h b/drivers/pcmcia/ti113x.h
index d319f2e7d053..da0b404561c9 100644
--- a/drivers/pcmcia/ti113x.h
+++ b/drivers/pcmcia/ti113x.h
@@ -59,6 +59,7 @@
 
 #define  TI122X_SCR_SER_STEP		0xc0000000
 #define  TI122X_SCR_INTRTIE		0x20000000
+#define  TIXX21_SCR_TIEALL		0x10000000
 #define  TI122X_SCR_CBRSVD		0x00400000
 #define  TI122X_SCR_MRBURSTDN		0x00008000
 #define  TI122X_SCR_MRBURSTUP		0x00004000
@@ -624,6 +625,7 @@ static int ti12xx_2nd_slot_empty(struct yenta_socket *socket)
 	int devfn;
 	unsigned int state;
 	int ret = 1;
+	u32 sysctl;
 
 	/* catch the two-slot controllers */
 	switch (socket->dev->device) {
@@ -646,6 +648,24 @@ static int ti12xx_2nd_slot_empty(struct yenta_socket *socket)
 		 */
 		break;
 
+	case PCI_DEVICE_ID_TI_X515:
+	case PCI_DEVICE_ID_TI_X420:
+	case PCI_DEVICE_ID_TI_X620:
+	case PCI_DEVICE_ID_TI_XX21_XX11:
+	case PCI_DEVICE_ID_TI_7410:
+	case PCI_DEVICE_ID_TI_7610:
+		/*
+		 * those are either single or dual slot CB with additional functions
+		 * like 1394, smartcard reader, etc. check the TIEALL flag for them
+		 * the TIEALL flag binds the IRQ of all functions toghether.
+		 * we catch the single slot variants later.
+		 */
+		sysctl = config_readl(socket, TI113X_SYSTEM_CONTROL);
+		if (sysctl & TIXX21_SCR_TIEALL)
+			return 0;
+
+		break;
+
 	/* single-slot controllers have the 2nd slot empty always :) */
 	default:
 		return 1;
@@ -658,6 +678,15 @@ static int ti12xx_2nd_slot_empty(struct yenta_socket *socket)
 	if (!func)
 		return 1;
 
+	/*
+	 * check that the device id of both slots match. this is needed for the
+	 * XX21 and the XX11 controller that share the same device id for single
+	 * and dual slot controllers. return '2nd slot empty'. we already checked
+	 * if the interrupt is tied to another function.
+	 */
+	if (socket->dev->device != func->device)
+		goto out;
+
 	slot2 = pci_get_drvdata(func);
 	if (!slot2)
 		goto out;
diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c
index 80806c9b43ad..c3e22fca105a 100644
--- a/drivers/pcmcia/yenta_socket.c
+++ b/drivers/pcmcia/yenta_socket.c
@@ -1249,6 +1249,14 @@ static struct pci_device_id yenta_table [] = {
 	CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_1250, TI1250),
 	CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_1410, TI1250),
 
+	CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX21_XX11, TI12XX),
+	CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_X515, TI12XX),
+	CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_X420, TI12XX),
+	CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_X620, TI12XX),
+	CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_7410, TI12XX),
+	CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_7510, TI12XX),
+	CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_7610, TI12XX),
+
 	CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1211, ENE),
 	CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1225, ENE),
 	CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1410, ENE),
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 92efb2c767f9..68f11ac1a314 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -769,6 +769,8 @@
 #define PCI_DEVICE_ID_TI_TVP4010	0x3d04
 #define PCI_DEVICE_ID_TI_TVP4020	0x3d07
 #define PCI_DEVICE_ID_TI_4450		0x8011
+#define PCI_DEVICE_ID_TI_XX21_XX11	0x8031
+#define PCI_DEVICE_ID_TI_X515		0x8036
 #define PCI_DEVICE_ID_TI_1130		0xac12
 #define PCI_DEVICE_ID_TI_1031		0xac13
 #define PCI_DEVICE_ID_TI_1131		0xac15
@@ -785,12 +787,17 @@
 #define PCI_DEVICE_ID_TI_4451		0xac42
 #define PCI_DEVICE_ID_TI_4510		0xac44
 #define PCI_DEVICE_ID_TI_4520		0xac46
+#define PCI_DEVICE_ID_TI_7510		0xac47
+#define PCI_DEVICE_ID_TI_7610		0xac48
+#define PCI_DEVICE_ID_TI_7410		0xac49
 #define PCI_DEVICE_ID_TI_1410		0xac50
 #define PCI_DEVICE_ID_TI_1420		0xac51
 #define PCI_DEVICE_ID_TI_1451A		0xac52
 #define PCI_DEVICE_ID_TI_1620		0xac54
 #define PCI_DEVICE_ID_TI_1520		0xac55
 #define PCI_DEVICE_ID_TI_1510		0xac56
+#define PCI_DEVICE_ID_TI_X620		0xac8d
+#define PCI_DEVICE_ID_TI_X420		0xac8e
 
 #define PCI_VENDOR_ID_SONY		0x104d
 #define PCI_DEVICE_ID_SONY_CXD3222	0x8039
-- 
cgit v1.2.3


From 4fb7edce52e5b6cf41e3375822d74a27f0b6f2dd Mon Sep 17 00:00:00 2001
From: Kars de Jong <jongk@linux-m68k.org>
Date: Sun, 25 Sep 2005 14:39:46 +0200
Subject: [PATCH] pcmcia: fix cross-platform issues with pcmcia module aliases

- Added a missing TO_NATIVE call to scripts/mod/file2alias.c:do_pcmcia_entry()
- Add an alignment attribute to struct pcmcia_device_no to solve an alignment
  issue seen when cross-compiling on x86 for m68k.

Signed-off-by: Kars de Jong <jongk@linux-m68k.org>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 include/linux/mod_devicetable.h | 5 +++--
 scripts/mod/file2alias.c        | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 47da39ba3f03..4ed2107bc020 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -209,10 +209,11 @@ struct pcmcia_device_id {
 	/* for real multi-function devices */
 	__u8  		function;
 
-	/* for pseude multi-function devices */
+	/* for pseudo multi-function devices */
 	__u8  		device_no;
 
-	__u32 		prod_id_hash[4];
+	__u32 		prod_id_hash[4]
+		__attribute__((aligned(sizeof(__u32))));
 
 	/* not matched against in kernelspace*/
 #ifdef __KERNEL__
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index d8ee38aede26..f2ee673329a7 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -295,11 +295,13 @@ static int do_pcmcia_entry(const char *filename,
 {
 	unsigned int i;
 
+	id->match_flags = TO_NATIVE(id->match_flags);
 	id->manf_id = TO_NATIVE(id->manf_id);
 	id->card_id = TO_NATIVE(id->card_id);
 	id->func_id = TO_NATIVE(id->func_id);
 	id->function = TO_NATIVE(id->function);
 	id->device_no = TO_NATIVE(id->device_no);
+
 	for (i=0; i<4; i++) {
 		id->prod_id_hash[i] = TO_NATIVE(id->prod_id_hash[i]);
        }
-- 
cgit v1.2.3


From cbf8fd9f5aa5164e05cb04d4a34fcbe82f60beeb Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Mon, 26 Sep 2005 15:30:20 +0100
Subject: [ARM] Remove SA_IRQNOMASK

SA_IRQNOMASK is unused, remove it.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/signal.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-arm/signal.h b/include/asm-arm/signal.h
index 760f6e65af05..ced69161917b 100644
--- a/include/asm-arm/signal.h
+++ b/include/asm-arm/signal.h
@@ -115,7 +115,6 @@ typedef unsigned long sigset_t;
 
 #ifdef __KERNEL__
 #define SA_TIMER		0x40000000
-#define SA_IRQNOMASK		0x08000000
 #endif
 
 #include <asm-generic/signal.h>
-- 
cgit v1.2.3


From 63c47c286d062d93e0501d60797274c84a587e97 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Mon, 26 Sep 2005 13:01:03 -0700
Subject: [IB] uverbs: Close some exploitable races

Al Viro pointed out that the current IB userspace verbs interface
allows userspace to cause mischief by closing file descriptors before
we're ready, or issuing the same command twice at the same time.  This
patch closes those races, and fixes other obvious problems such as a
module reference leak.

Some other interface bogosities will require an ABI change to fix
properly, so I'm deferring those fixes until 2.6.15.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/uverbs.h      |   1 +
 drivers/infiniband/core/uverbs_cmd.c  | 120 +++++++++++++++++++---------------
 drivers/infiniband/core/uverbs_main.c |  27 +++++---
 include/rdma/ib_verbs.h               |   1 -
 4 files changed, 86 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index b1897bed14ad..cc124344dd2c 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -69,6 +69,7 @@ struct ib_uverbs_event_file {
 
 struct ib_uverbs_file {
 	struct kref				ref;
+	struct semaphore			mutex;
 	struct ib_uverbs_device		       *device;
 	struct ib_ucontext		       *ucontext;
 	struct ib_event_handler			event_handler;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index e91ebde46481..562445165d2b 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -76,8 +76,9 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 	struct ib_uverbs_get_context_resp resp;
 	struct ib_udata                   udata;
 	struct ib_device                 *ibdev = file->device->ib_dev;
+	struct ib_ucontext		 *ucontext;
 	int i;
-	int ret = in_len;
+	int ret;
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -85,45 +86,56 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
+	down(&file->mutex);
+
+	if (file->ucontext) {
+		ret = -EINVAL;
+		goto err;
+	}
+
 	INIT_UDATA(&udata, buf + sizeof cmd,
 		   (unsigned long) cmd.response + sizeof resp,
 		   in_len - sizeof cmd, out_len - sizeof resp);
 
-	file->ucontext = ibdev->alloc_ucontext(ibdev, &udata);
-	if (IS_ERR(file->ucontext)) {
-		ret = PTR_ERR(file->ucontext);
-		file->ucontext = NULL;
-		return ret;
-	}
+	ucontext = ibdev->alloc_ucontext(ibdev, &udata);
+	if (IS_ERR(ucontext))
+		return PTR_ERR(file->ucontext);
 
-	file->ucontext->device = ibdev;
-	INIT_LIST_HEAD(&file->ucontext->pd_list);
-	INIT_LIST_HEAD(&file->ucontext->mr_list);
-	INIT_LIST_HEAD(&file->ucontext->mw_list);
-	INIT_LIST_HEAD(&file->ucontext->cq_list);
-	INIT_LIST_HEAD(&file->ucontext->qp_list);
-	INIT_LIST_HEAD(&file->ucontext->srq_list);
-	INIT_LIST_HEAD(&file->ucontext->ah_list);
-	spin_lock_init(&file->ucontext->lock);
+	ucontext->device = ibdev;
+	INIT_LIST_HEAD(&ucontext->pd_list);
+	INIT_LIST_HEAD(&ucontext->mr_list);
+	INIT_LIST_HEAD(&ucontext->mw_list);
+	INIT_LIST_HEAD(&ucontext->cq_list);
+	INIT_LIST_HEAD(&ucontext->qp_list);
+	INIT_LIST_HEAD(&ucontext->srq_list);
+	INIT_LIST_HEAD(&ucontext->ah_list);
 
 	resp.async_fd = file->async_file.fd;
 	for (i = 0; i < file->device->num_comp; ++i)
 		if (copy_to_user((void __user *) (unsigned long) cmd.cq_fd_tab +
 				 i * sizeof (__u32),
-				 &file->comp_file[i].fd, sizeof (__u32)))
-			goto err;
+				 &file->comp_file[i].fd, sizeof (__u32))) {
+			ret = -EFAULT;
+			goto err_free;
+		}
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
-			 &resp, sizeof resp))
-		goto err;
+			 &resp, sizeof resp)) {
+		ret = -EFAULT;
+		goto err_free;
+	}
+
+	file->ucontext = ucontext;
+	up(&file->mutex);
 
 	return in_len;
 
-err:
-	ibdev->dealloc_ucontext(file->ucontext);
-	file->ucontext = NULL;
+err_free:
+	ibdev->dealloc_ucontext(ucontext);
 
-	return -EFAULT;
+err:
+	up(&file->mutex);
+	return ret;
 }
 
 ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
@@ -352,9 +364,9 @@ retry:
 	if (ret)
 		goto err_pd;
 
-	spin_lock_irq(&file->ucontext->lock);
+	down(&file->mutex);
 	list_add_tail(&uobj->list, &file->ucontext->pd_list);
-	spin_unlock_irq(&file->ucontext->lock);
+	up(&file->mutex);
 
 	memset(&resp, 0, sizeof resp);
 	resp.pd_handle = uobj->id;
@@ -368,9 +380,9 @@ retry:
 	return in_len;
 
 err_list:
- 	spin_lock_irq(&file->ucontext->lock);
+ 	down(&file->mutex);
 	list_del(&uobj->list);
-	spin_unlock_irq(&file->ucontext->lock);
+	up(&file->mutex);
 
 	down(&ib_uverbs_idr_mutex);
 	idr_remove(&ib_uverbs_pd_idr, uobj->id);
@@ -410,9 +422,9 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
 
 	idr_remove(&ib_uverbs_pd_idr, cmd.pd_handle);
 
-	spin_lock_irq(&file->ucontext->lock);
+	down(&file->mutex);
 	list_del(&uobj->list);
-	spin_unlock_irq(&file->ucontext->lock);
+	up(&file->mutex);
 
 	kfree(uobj);
 
@@ -512,9 +524,9 @@ retry:
 
 	resp.mr_handle = obj->uobject.id;
 
-	spin_lock_irq(&file->ucontext->lock);
+	down(&file->mutex);
 	list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
-	spin_unlock_irq(&file->ucontext->lock);
+	up(&file->mutex);
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
@@ -527,9 +539,9 @@ retry:
 	return in_len;
 
 err_list:
-	spin_lock_irq(&file->ucontext->lock);
+	down(&file->mutex);
 	list_del(&obj->uobject.list);
-	spin_unlock_irq(&file->ucontext->lock);
+	up(&file->mutex);
 
 err_unreg:
 	ib_dereg_mr(mr);
@@ -570,9 +582,9 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
 
 	idr_remove(&ib_uverbs_mr_idr, cmd.mr_handle);
 
-	spin_lock_irq(&file->ucontext->lock);
+	down(&file->mutex);
 	list_del(&memobj->uobject.list);
-	spin_unlock_irq(&file->ucontext->lock);
+	up(&file->mutex);
 
 	ib_umem_release(file->device->ib_dev, &memobj->umem);
 	kfree(memobj);
@@ -647,9 +659,9 @@ retry:
 	if (ret)
 		goto err_cq;
 
-	spin_lock_irq(&file->ucontext->lock);
+	down(&file->mutex);
 	list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list);
-	spin_unlock_irq(&file->ucontext->lock);
+	up(&file->mutex);
 
 	memset(&resp, 0, sizeof resp);
 	resp.cq_handle = uobj->uobject.id;
@@ -664,9 +676,9 @@ retry:
 	return in_len;
 
 err_list:
- 	spin_lock_irq(&file->ucontext->lock);
+ 	down(&file->mutex);
 	list_del(&uobj->uobject.list);
-	spin_unlock_irq(&file->ucontext->lock);
+	up(&file->mutex);
 
 	down(&ib_uverbs_idr_mutex);
 	idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id);
@@ -712,9 +724,9 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
 
 	idr_remove(&ib_uverbs_cq_idr, cmd.cq_handle);
 
-	spin_lock_irq(&file->ucontext->lock);
+	down(&file->mutex);
 	list_del(&uobj->uobject.list);
-	spin_unlock_irq(&file->ucontext->lock);
+	up(&file->mutex);
 
 	spin_lock_irq(&file->comp_file[0].lock);
 	list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
@@ -847,9 +859,9 @@ retry:
 
 	resp.qp_handle = uobj->uobject.id;
 
-	spin_lock_irq(&file->ucontext->lock);
+	down(&file->mutex);
 	list_add_tail(&uobj->uobject.list, &file->ucontext->qp_list);
-	spin_unlock_irq(&file->ucontext->lock);
+	up(&file->mutex);
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
@@ -862,9 +874,9 @@ retry:
 	return in_len;
 
 err_list:
-	spin_lock_irq(&file->ucontext->lock);
+	down(&file->mutex);
 	list_del(&uobj->uobject.list);
-	spin_unlock_irq(&file->ucontext->lock);
+	up(&file->mutex);
 
 err_destroy:
 	ib_destroy_qp(qp);
@@ -989,9 +1001,9 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
 
 	idr_remove(&ib_uverbs_qp_idr, cmd.qp_handle);
 
-	spin_lock_irq(&file->ucontext->lock);
+	down(&file->mutex);
 	list_del(&uobj->uobject.list);
-	spin_unlock_irq(&file->ucontext->lock);
+	up(&file->mutex);
 
 	spin_lock_irq(&file->async_file.lock);
 	list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
@@ -1136,9 +1148,9 @@ retry:
 
 	resp.srq_handle = uobj->uobject.id;
 
-	spin_lock_irq(&file->ucontext->lock);
+	down(&file->mutex);
 	list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list);
-	spin_unlock_irq(&file->ucontext->lock);
+	up(&file->mutex);
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
@@ -1151,9 +1163,9 @@ retry:
 	return in_len;
 
 err_list:
-	spin_lock_irq(&file->ucontext->lock);
+	down(&file->mutex);
 	list_del(&uobj->uobject.list);
-	spin_unlock_irq(&file->ucontext->lock);
+	up(&file->mutex);
 
 err_destroy:
 	ib_destroy_srq(srq);
@@ -1227,9 +1239,9 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 
 	idr_remove(&ib_uverbs_srq_idr, cmd.srq_handle);
 
-	spin_lock_irq(&file->ucontext->lock);
+	down(&file->mutex);
 	list_del(&uobj->uobject.list);
-	spin_unlock_irq(&file->ucontext->lock);
+	up(&file->mutex);
 
 	spin_lock_irq(&file->async_file.lock);
 	list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index ce5bdb7af306..12511808de21 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -448,7 +448,9 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 	if (hdr.in_words * 4 != count)
 		return -EINVAL;
 
-	if (hdr.command < 0 || hdr.command >= ARRAY_SIZE(uverbs_cmd_table))
+	if (hdr.command < 0				||
+	    hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
+	    !uverbs_cmd_table[hdr.command])
 		return -EINVAL;
 
 	if (!file->ucontext                               &&
@@ -484,27 +486,29 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
 	file = kmalloc(sizeof *file +
 		       (dev->num_comp - 1) * sizeof (struct ib_uverbs_event_file),
 		       GFP_KERNEL);
-	if (!file)
-		return -ENOMEM;
+	if (!file) {
+		ret = -ENOMEM;
+		goto err;
+	}
 
 	file->device = dev;
 	kref_init(&file->ref);
+	init_MUTEX(&file->mutex);
 
 	file->ucontext = NULL;
 
+	kref_get(&file->ref);
 	ret = ib_uverbs_event_init(&file->async_file, file);
 	if (ret)
-		goto err;
+		goto err_kref;
 
 	file->async_file.is_async = 1;
 
-	kref_get(&file->ref);
-
 	for (i = 0; i < dev->num_comp; ++i) {
+		kref_get(&file->ref);
 		ret = ib_uverbs_event_init(&file->comp_file[i], file);
 		if (ret)
 			goto err_async;
-		kref_get(&file->ref);
 		file->comp_file[i].is_async = 0;
 	}
 
@@ -524,9 +528,16 @@ err_async:
 
 	ib_uverbs_event_release(&file->async_file);
 
-err:
+err_kref:
+	/*
+	 * One extra kref_put() because we took a reference before the
+	 * event file creation that failed and got us here.
+	 */
+	kref_put(&file->ref, ib_uverbs_release_file);
 	kref_put(&file->ref, ib_uverbs_release_file);
 
+err:
+	module_put(dev->ib_dev->owner);
 	return ret;
 }
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index e16cf94870f2..e6f4c9e55df7 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -665,7 +665,6 @@ struct ib_ucontext {
 	struct list_head	qp_list;
 	struct list_head	srq_list;
 	struct list_head	ah_list;
-	spinlock_t              lock;
 };
 
 struct ib_uobject {
-- 
cgit v1.2.3


From acd042bb2de50d4e6fb969281a00cc8b8b71e46d Mon Sep 17 00:00:00 2001
From: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Date: Mon, 26 Sep 2005 15:06:50 -0700
Subject: [CONNECTOR]: async connector mode.

If input message rate from userspace is too high, do not drop them,
but try to deliver using work queue allocation.

Failing there is some kind of congestion control.

It also removes warn_on on this condition, which scares people.

Signed-off-by: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/connector/cn_queue.c  | 32 +++++++++++--------
 drivers/connector/connector.c | 74 ++++++++++++++++++++++---------------------
 include/linux/connector.h     | 21 ++++++++----
 3 files changed, 72 insertions(+), 55 deletions(-)

(limited to 'include')

diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c
index 966632182e2d..9f2f00d82917 100644
--- a/drivers/connector/cn_queue.c
+++ b/drivers/connector/cn_queue.c
@@ -31,16 +31,19 @@
 #include <linux/connector.h>
 #include <linux/delay.h>
 
-static void cn_queue_wrapper(void *data)
+void cn_queue_wrapper(void *data)
 {
-	struct cn_callback_entry *cbq = data;
+	struct cn_callback_data *d = data;
 
-	cbq->cb->callback(cbq->cb->priv);
-	cbq->destruct_data(cbq->ddata);
-	cbq->ddata = NULL;
+	d->callback(d->callback_priv);
+
+	d->destruct_data(d->ddata);
+	d->ddata = NULL;
+
+	kfree(d->free);
 }
 
-static struct cn_callback_entry *cn_queue_alloc_callback_entry(struct cn_callback *cb)
+static struct cn_callback_entry *cn_queue_alloc_callback_entry(char *name, struct cb_id *id, void (*callback)(void *))
 {
 	struct cn_callback_entry *cbq;
 
@@ -50,8 +53,11 @@ static struct cn_callback_entry *cn_queue_alloc_callback_entry(struct cn_callbac
 		return NULL;
 	}
 
-	cbq->cb = cb;
-	INIT_WORK(&cbq->work, &cn_queue_wrapper, cbq);
+	snprintf(cbq->id.name, sizeof(cbq->id.name), "%s", name);
+	memcpy(&cbq->id.id, id, sizeof(struct cb_id));
+	cbq->data.callback = callback;
+	
+	INIT_WORK(&cbq->work, &cn_queue_wrapper, &cbq->data);
 	return cbq;
 }
 
@@ -68,12 +74,12 @@ int cn_cb_equal(struct cb_id *i1, struct cb_id *i2)
 	return ((i1->idx == i2->idx) && (i1->val == i2->val));
 }
 
-int cn_queue_add_callback(struct cn_queue_dev *dev, struct cn_callback *cb)
+int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(void *))
 {
 	struct cn_callback_entry *cbq, *__cbq;
 	int found = 0;
 
-	cbq = cn_queue_alloc_callback_entry(cb);
+	cbq = cn_queue_alloc_callback_entry(name, id, callback);
 	if (!cbq)
 		return -ENOMEM;
 
@@ -82,7 +88,7 @@ int cn_queue_add_callback(struct cn_queue_dev *dev, struct cn_callback *cb)
 
 	spin_lock_bh(&dev->queue_lock);
 	list_for_each_entry(__cbq, &dev->queue_list, callback_entry) {
-		if (cn_cb_equal(&__cbq->cb->id, &cb->id)) {
+		if (cn_cb_equal(&__cbq->id.id, id)) {
 			found = 1;
 			break;
 		}
@@ -99,7 +105,7 @@ int cn_queue_add_callback(struct cn_queue_dev *dev, struct cn_callback *cb)
 
 	cbq->nls = dev->nls;
 	cbq->seq = 0;
-	cbq->group = cbq->cb->id.idx;
+	cbq->group = cbq->id.id.idx;
 
 	return 0;
 }
@@ -111,7 +117,7 @@ void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id)
 
 	spin_lock_bh(&dev->queue_lock);
 	list_for_each_entry_safe(cbq, n, &dev->queue_list, callback_entry) {
-		if (cn_cb_equal(&cbq->cb->id, id)) {
+		if (cn_cb_equal(&cbq->id.id, id)) {
 			list_del(&cbq->callback_entry);
 			found = 1;
 			break;
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index aaf6d468a8b9..bb0b3a8de14b 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -84,7 +84,7 @@ int cn_netlink_send(struct cn_msg *msg, u32 __group, int gfp_mask)
 		spin_lock_bh(&dev->cbdev->queue_lock);
 		list_for_each_entry(__cbq, &dev->cbdev->queue_list,
 				    callback_entry) {
-			if (cn_cb_equal(&__cbq->cb->id, &msg->id)) {
+			if (cn_cb_equal(&__cbq->id.id, &msg->id)) {
 				found = 1;
 				group = __cbq->group;
 			}
@@ -127,42 +127,56 @@ static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), v
 {
 	struct cn_callback_entry *__cbq;
 	struct cn_dev *dev = &cdev;
-	int found = 0;
+	int err = -ENODEV;
 
 	spin_lock_bh(&dev->cbdev->queue_lock);
 	list_for_each_entry(__cbq, &dev->cbdev->queue_list, callback_entry) {
-		if (cn_cb_equal(&__cbq->cb->id, &msg->id)) {
-			/*
-			 * Let's scream if there is some magic and the
-			 * data will arrive asynchronously here.
-			 * [i.e. netlink messages will be queued].
-			 * After the first warning I will fix it
-			 * quickly, but now I think it is
-			 * impossible. --zbr (2004_04_27).
-			 */
+		if (cn_cb_equal(&__cbq->id.id, &msg->id)) {
 			if (likely(!test_bit(0, &__cbq->work.pending) &&
-					__cbq->ddata == NULL)) {
-				__cbq->cb->priv = msg;
+					__cbq->data.ddata == NULL)) {
+				__cbq->data.callback_priv = msg;
 
-				__cbq->ddata = data;
-				__cbq->destruct_data = destruct_data;
+				__cbq->data.ddata = data;
+				__cbq->data.destruct_data = destruct_data;
 
 				if (queue_work(dev->cbdev->cn_queue,
 						&__cbq->work))
-					found = 1;
+					err = 0;
 			} else {
-				printk("%s: cbq->data=%p, "
-				       "work->pending=%08lx.\n",
-				       __func__, __cbq->ddata,
-				       __cbq->work.pending);
-				WARN_ON(1);
+				struct work_struct *w;
+				struct cn_callback_data *d;
+				
+				w = kzalloc(sizeof(*w) + sizeof(*d), GFP_ATOMIC);
+				if (w) {
+					d = (struct cn_callback_data *)(w+1);
+
+					d->callback_priv = msg;
+					d->callback = __cbq->data.callback;
+					d->ddata = data;
+					d->destruct_data = destruct_data;
+					d->free = w;
+
+					INIT_LIST_HEAD(&w->entry);
+					w->pending = 0;
+					w->func = &cn_queue_wrapper;
+					w->data = d;
+					init_timer(&w->timer);
+					
+					if (queue_work(dev->cbdev->cn_queue, w))
+						err = 0;
+					else {
+						kfree(w);
+						err = -EINVAL;
+					}
+				} else
+					err = -ENOMEM;
 			}
 			break;
 		}
 	}
 	spin_unlock_bh(&dev->cbdev->queue_lock);
 
-	return found ? 0 : -ENODEV;
+	return err;
 }
 
 /*
@@ -291,22 +305,10 @@ int cn_add_callback(struct cb_id *id, char *name, void (*callback)(void *))
 {
 	int err;
 	struct cn_dev *dev = &cdev;
-	struct cn_callback *cb;
-
-	cb = kzalloc(sizeof(*cb), GFP_KERNEL);
-	if (!cb)
-		return -ENOMEM;
-
-	scnprintf(cb->name, sizeof(cb->name), "%s", name);
 
-	memcpy(&cb->id, id, sizeof(cb->id));
-	cb->callback = callback;
-
-	err = cn_queue_add_callback(dev->cbdev, cb);
-	if (err) {
-		kfree(cb);
+	err = cn_queue_add_callback(dev->cbdev, name, id, callback);
+	if (err)
 		return err;
-	}
 
 	cn_notify(id, 0);
 
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 96de26301f84..86d4b0a81713 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -104,12 +104,19 @@ struct cn_queue_dev {
 	struct sock *nls;
 };
 
-struct cn_callback {
+struct cn_callback_id {
 	unsigned char name[CN_CBQ_NAMELEN];
-
 	struct cb_id id;
+};
+
+struct cn_callback_data {
+	void (*destruct_data) (void *);
+	void *ddata;
+	
+	void *callback_priv;
 	void (*callback) (void *);
-	void *priv;
+
+	void *free;
 };
 
 struct cn_callback_entry {
@@ -118,8 +125,8 @@ struct cn_callback_entry {
 	struct work_struct work;
 	struct cn_queue_dev *pdev;
 
-	void (*destruct_data) (void *);
-	void *ddata;
+	struct cn_callback_id id;
+	struct cn_callback_data data;
 
 	int seq, group;
 	struct sock *nls;
@@ -144,7 +151,7 @@ int cn_add_callback(struct cb_id *, char *, void (*callback) (void *));
 void cn_del_callback(struct cb_id *);
 int cn_netlink_send(struct cn_msg *, u32, int);
 
-int cn_queue_add_callback(struct cn_queue_dev *dev, struct cn_callback *cb);
+int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(void *));
 void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id);
 
 struct cn_queue_dev *cn_queue_alloc_dev(char *name, struct sock *);
@@ -152,6 +159,8 @@ void cn_queue_free_dev(struct cn_queue_dev *dev);
 
 int cn_cb_equal(struct cb_id *, struct cb_id *);
 
+void cn_queue_wrapper(void *data);
+
 extern int cn_already_initialized;
 
 #endif				/* __KERNEL__ */
-- 
cgit v1.2.3


From 188bab3ae0ed164bc18f98be932512d777dd038b Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@netfilter.org>
Date: Mon, 26 Sep 2005 15:25:11 -0700
Subject: [NETFILTER]: Fix invalid module autoloading by splitting iptable_nat

When you've enabled conntrack and NAT as a module (standard case in all
distributions), and you've also enabled the new conntrack netlink
interface, loading ip_conntrack_netlink.ko will auto-load iptable_nat.ko.
This causes a huge performance penalty, since for every packet you iterate
the nat code, even if you don't want it.

This patch splits iptable_nat.ko into the NAT core (ip_nat.ko) and the
iptables frontend (iptable_nat.ko).  Threfore, ip_conntrack_netlink.ko will
only pull ip_nat.ko, but not the frontend.  ip_nat.ko will "only" allocate
some resources, but not affect runtime performance.

This separation is also a nice step in anticipation of new packet filters
(nf-hipac, ipset, pkttables) being able to use the NAT core.

Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ip_nat_core.h | 12 +++++-----
 net/ipv4/netfilter/Makefile                |  5 +++--
 net/ipv4/netfilter/ip_nat_core.c           | 35 ++++++++++++++++++++----------
 net/ipv4/netfilter/ip_nat_helper.c         |  4 ++++
 net/ipv4/netfilter/ip_nat_standalone.c     | 25 ++++-----------------
 5 files changed, 40 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv4/ip_nat_core.h b/include/linux/netfilter_ipv4/ip_nat_core.h
index 3b50eb91f007..30db23f06b03 100644
--- a/include/linux/netfilter_ipv4/ip_nat_core.h
+++ b/include/linux/netfilter_ipv4/ip_nat_core.h
@@ -5,16 +5,14 @@
 
 /* This header used to share core functionality between the standalone
    NAT module, and the compatibility layer's use of NAT for masquerading. */
-extern int ip_nat_init(void);
-extern void ip_nat_cleanup(void);
 
-extern unsigned int nat_packet(struct ip_conntrack *ct,
+extern unsigned int ip_nat_packet(struct ip_conntrack *ct,
 			       enum ip_conntrack_info conntrackinfo,
 			       unsigned int hooknum,
 			       struct sk_buff **pskb);
 
-extern int icmp_reply_translation(struct sk_buff **pskb,
-				  struct ip_conntrack *ct,
-				  enum ip_nat_manip_type manip,
-				  enum ip_conntrack_dir dir);
+extern int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
+					 struct ip_conntrack *ct,
+					 enum ip_nat_manip_type manip,
+					 enum ip_conntrack_dir dir);
 #endif /* _IP_NAT_CORE_H */
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 89002533f2a2..dab4b58dd31e 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -4,7 +4,8 @@
 
 # objects for the standalone - connection tracking / NAT
 ip_conntrack-objs	:= ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
-iptable_nat-objs	:= ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
+ip_nat-objs	:= ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
+iptable_nat-objs	:= ip_nat_rule.o ip_nat_standalone.o
 
 ip_conntrack_pptp-objs	:= ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o
 ip_nat_pptp-objs	:= ip_nat_helper_pptp.o ip_nat_proto_gre.o
@@ -40,7 +41,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
 # the three instances of ip_tables
 obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
 obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
-obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
+obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o ip_nat.o
 obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
 
 # matches
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index c3ea891d38e7..c5e3abd24672 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -74,12 +74,14 @@ ip_nat_proto_find_get(u_int8_t protonum)
 
 	return p;
 }
+EXPORT_SYMBOL_GPL(ip_nat_proto_find_get);
 
 void
 ip_nat_proto_put(struct ip_nat_protocol *p)
 {
 	module_put(p->me);
 }
+EXPORT_SYMBOL_GPL(ip_nat_proto_put);
 
 /* We keep an extra hash for each conntrack, for fast searching. */
 static inline unsigned int
@@ -111,6 +113,7 @@ ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
 	return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
 				      oldcheck^0xFFFF));
 }
+EXPORT_SYMBOL(ip_nat_cheat_check);
 
 /* Is this tuple already taken? (not by us) */
 int
@@ -127,6 +130,7 @@ ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
 	invert_tuplepr(&reply, tuple);
 	return ip_conntrack_tuple_taken(&reply, ignored_conntrack);
 }
+EXPORT_SYMBOL(ip_nat_used_tuple);
 
 /* If we source map this tuple so reply looks like reply_tuple, will
  * that meet the constraints of range. */
@@ -347,6 +351,7 @@ ip_nat_setup_info(struct ip_conntrack *conntrack,
 
 	return NF_ACCEPT;
 }
+EXPORT_SYMBOL(ip_nat_setup_info);
 
 /* Returns true if succeeded. */
 static int
@@ -387,10 +392,10 @@ manip_pkt(u_int16_t proto,
 }
 
 /* Do packet manipulations according to ip_nat_setup_info. */
-unsigned int nat_packet(struct ip_conntrack *ct,
-			enum ip_conntrack_info ctinfo,
-			unsigned int hooknum,
-			struct sk_buff **pskb)
+unsigned int ip_nat_packet(struct ip_conntrack *ct,
+			   enum ip_conntrack_info ctinfo,
+			   unsigned int hooknum,
+			   struct sk_buff **pskb)
 {
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	unsigned long statusbit;
@@ -417,12 +422,13 @@ unsigned int nat_packet(struct ip_conntrack *ct,
 	}
 	return NF_ACCEPT;
 }
+EXPORT_SYMBOL_GPL(ip_nat_packet);
 
 /* Dir is direction ICMP is coming from (opposite to packet it contains) */
-int icmp_reply_translation(struct sk_buff **pskb,
-			   struct ip_conntrack *ct,
-			   enum ip_nat_manip_type manip,
-			   enum ip_conntrack_dir dir)
+int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
+				  struct ip_conntrack *ct,
+				  enum ip_nat_manip_type manip,
+				  enum ip_conntrack_dir dir)
 {
 	struct {
 		struct icmphdr icmp;
@@ -509,6 +515,7 @@ int icmp_reply_translation(struct sk_buff **pskb,
 
 	return 1;
 }
+EXPORT_SYMBOL_GPL(ip_nat_icmp_reply_translation);
 
 /* Protocol registration. */
 int ip_nat_protocol_register(struct ip_nat_protocol *proto)
@@ -525,6 +532,7 @@ int ip_nat_protocol_register(struct ip_nat_protocol *proto)
 	write_unlock_bh(&ip_nat_lock);
 	return ret;
 }
+EXPORT_SYMBOL(ip_nat_protocol_register);
 
 /* Noone stores the protocol anywhere; simply delete it. */
 void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
@@ -536,6 +544,7 @@ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
 	/* Someone could be still looking at the proto in a bh. */
 	synchronize_net();
 }
+EXPORT_SYMBOL(ip_nat_protocol_unregister);
 
 #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
     defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
@@ -582,7 +591,7 @@ EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range);
 EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
 #endif
 
-int __init ip_nat_init(void)
+static int __init ip_nat_init(void)
 {
 	size_t i;
 
@@ -624,10 +633,14 @@ static int clean_nat(struct ip_conntrack *i, void *data)
 	return 0;
 }
 
-/* Not __exit: called from ip_nat_standalone.c:init_or_cleanup() --RR */
-void ip_nat_cleanup(void)
+static void __exit ip_nat_cleanup(void)
 {
 	ip_ct_iterate_cleanup(&clean_nat, NULL);
 	ip_conntrack_destroyed = NULL;
 	vfree(bysource);
 }
+
+MODULE_LICENSE("GPL");
+
+module_init(ip_nat_init);
+module_exit(ip_nat_cleanup);
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index d2dd5d313556..5d506e0564d5 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -199,6 +199,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
 	}
 	return 1;
 }
+EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
 			
 /* Generic function for mangling variable-length address changes inside
  * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
@@ -256,6 +257,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb,
 
 	return 1;
 }
+EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
 
 /* Adjust one found SACK option including checksum correction */
 static void
@@ -399,6 +401,7 @@ ip_nat_seq_adjust(struct sk_buff **pskb,
 
 	return 1;
 }
+EXPORT_SYMBOL(ip_nat_seq_adjust);
 
 /* Setup NAT on this expected conntrack so it follows master. */
 /* If we fail to get a free NAT slot, we'll get dropped on confirm */
@@ -425,3 +428,4 @@ void ip_nat_follow_master(struct ip_conntrack *ct,
 	/* hook doesn't matter, but it has to do destination manip */
 	ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
 }
+EXPORT_SYMBOL(ip_nat_follow_master);
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 0ff368b131f6..30cd4e18c129 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -108,8 +108,8 @@ ip_nat_fn(unsigned int hooknum,
 	case IP_CT_RELATED:
 	case IP_CT_RELATED+IP_CT_IS_REPLY:
 		if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
-			if (!icmp_reply_translation(pskb, ct, maniptype,
-						    CTINFO2DIR(ctinfo)))
+			if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype,
+							   CTINFO2DIR(ctinfo)))
 				return NF_DROP;
 			else
 				return NF_ACCEPT;
@@ -152,7 +152,7 @@ ip_nat_fn(unsigned int hooknum,
 	}
 
 	IP_NF_ASSERT(info);
-	return nat_packet(ct, ctinfo, hooknum, pskb);
+	return ip_nat_packet(ct, ctinfo, hooknum, pskb);
 }
 
 static unsigned int
@@ -325,15 +325,10 @@ static int init_or_cleanup(int init)
 		printk("ip_nat_init: can't setup rules.\n");
 		goto cleanup_nothing;
 	}
-	ret = ip_nat_init();
-	if (ret < 0) {
-		printk("ip_nat_init: can't setup rules.\n");
-		goto cleanup_rule_init;
-	}
 	ret = nf_register_hook(&ip_nat_in_ops);
 	if (ret < 0) {
 		printk("ip_nat_init: can't register in hook.\n");
-		goto cleanup_nat;
+		goto cleanup_rule_init;
 	}
 	ret = nf_register_hook(&ip_nat_out_ops);
 	if (ret < 0) {
@@ -374,8 +369,6 @@ static int init_or_cleanup(int init)
 	nf_unregister_hook(&ip_nat_out_ops);
  cleanup_inops:
 	nf_unregister_hook(&ip_nat_in_ops);
- cleanup_nat:
-	ip_nat_cleanup();
  cleanup_rule_init:
 	ip_nat_rule_cleanup();
  cleanup_nothing:
@@ -395,14 +388,4 @@ static void __exit fini(void)
 module_init(init);
 module_exit(fini);
 
-EXPORT_SYMBOL(ip_nat_setup_info);
-EXPORT_SYMBOL(ip_nat_protocol_register);
-EXPORT_SYMBOL(ip_nat_protocol_unregister);
-EXPORT_SYMBOL_GPL(ip_nat_proto_find_get);
-EXPORT_SYMBOL_GPL(ip_nat_proto_put);
-EXPORT_SYMBOL(ip_nat_cheat_check);
-EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
-EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
-EXPORT_SYMBOL(ip_nat_used_tuple);
-EXPORT_SYMBOL(ip_nat_follow_master);
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From c4a3e0a529ab3e65223e81681c7c6b1bc188fa58 Mon Sep 17 00:00:00 2001
From: "Bagalkote, Sreenivas" <Sreenivas.Bagalkote@engenio.com>
Date: Tue, 20 Sep 2005 17:46:58 -0400
Subject: [SCSI] MegaRAID SAS RAID: new driver

Signed-off-by: Sreenivas Bagalkote <Sreenivas.Bagalkote@lsil.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/Makefile                  |    1 +
 drivers/scsi/megaraid/Kconfig.megaraid |    9 +
 drivers/scsi/megaraid/Makefile         |    1 +
 drivers/scsi/megaraid/megaraid_sas.c   | 2805 ++++++++++++++++++++++++++++++++
 drivers/scsi/megaraid/megaraid_sas.h   | 1142 +++++++++++++
 include/linux/pci_ids.h                |    2 +
 6 files changed, 3960 insertions(+)
 create mode 100644 drivers/scsi/megaraid/megaraid_sas.c
 create mode 100644 drivers/scsi/megaraid/megaraid_sas.h

(limited to 'include')

diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 1e4edbdf2730..48529d180ca8 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -99,6 +99,7 @@ obj-$(CONFIG_SCSI_DC395x)	+= dc395x.o
 obj-$(CONFIG_SCSI_DC390T)	+= tmscsim.o
 obj-$(CONFIG_MEGARAID_LEGACY)	+= megaraid.o
 obj-$(CONFIG_MEGARAID_NEWGEN)	+= megaraid/
+obj-$(CONFIG_MEGARAID_SAS)	+= megaraid/
 obj-$(CONFIG_SCSI_ACARD)	+= atp870u.o
 obj-$(CONFIG_SCSI_SUNESP)	+= esp.o
 obj-$(CONFIG_SCSI_GDTH)		+= gdth.o
diff --git a/drivers/scsi/megaraid/Kconfig.megaraid b/drivers/scsi/megaraid/Kconfig.megaraid
index 917d591d90b2..7363e12663ac 100644
--- a/drivers/scsi/megaraid/Kconfig.megaraid
+++ b/drivers/scsi/megaraid/Kconfig.megaraid
@@ -76,3 +76,12 @@ config MEGARAID_LEGACY
 	To compile this driver as a module, choose M here: the
 	module will be called megaraid
 endif
+
+config MEGARAID_SAS
+	tristate "LSI Logic MegaRAID SAS RAID Module"
+	depends on PCI && SCSI
+	help
+	Module for LSI Logic's SAS based RAID controllers.
+	To compile this driver as a module, choose 'm' here.
+	Module will be called megaraid_sas
+
diff --git a/drivers/scsi/megaraid/Makefile b/drivers/scsi/megaraid/Makefile
index 6dd99f275722..f469915b97c3 100644
--- a/drivers/scsi/megaraid/Makefile
+++ b/drivers/scsi/megaraid/Makefile
@@ -1,2 +1,3 @@
 obj-$(CONFIG_MEGARAID_MM)	+= megaraid_mm.o
 obj-$(CONFIG_MEGARAID_MAILBOX)	+= megaraid_mbox.o
+obj-$(CONFIG_MEGARAID_SAS)	+= megaraid_sas.o
diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
new file mode 100644
index 000000000000..1b3148e842af
--- /dev/null
+++ b/drivers/scsi/megaraid/megaraid_sas.c
@@ -0,0 +1,2805 @@
+/*
+ *
+ *		Linux MegaRAID driver for SAS based RAID controllers
+ *
+ * Copyright (c) 2003-2005  LSI Logic Corporation.
+ *
+ *	   This program is free software; you can redistribute it and/or
+ *	   modify it under the terms of the GNU General Public License
+ *	   as published by the Free Software Foundation; either version
+ *	   2 of the License, or (at your option) any later version.
+ *
+ * FILE		: megaraid_sas.c
+ * Version	: v00.00.02.00-rc4
+ *
+ * Authors:
+ * 	Sreenivas Bagalkote	<Sreenivas.Bagalkote@lsil.com>
+ * 	Sumant Patro		<Sumant.Patro@lsil.com>
+ *
+ * List of supported controllers
+ *
+ * OEM	Product Name			VID	DID	SSVID	SSID
+ * ---	------------			---	---	----	----
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/list.h>
+#include <linux/version.h>
+#include <linux/moduleparam.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/uio.h>
+#include <asm/uaccess.h>
+#include <linux/compat.h>
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_host.h>
+#include "megaraid_sas.h"
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(MEGASAS_VERSION);
+MODULE_AUTHOR("sreenivas.bagalkote@lsil.com");
+MODULE_DESCRIPTION("LSI Logic MegaRAID SAS Driver");
+
+/*
+ * PCI ID table for all supported controllers
+ */
+static struct pci_device_id megasas_pci_table[] = {
+
+	{
+	 PCI_VENDOR_ID_LSI_LOGIC,
+	 PCI_DEVICE_ID_LSI_SAS1064R,
+	 PCI_ANY_ID,
+	 PCI_ANY_ID,
+	 },
+	{
+	 PCI_VENDOR_ID_DELL,
+	 PCI_DEVICE_ID_DELL_PERC5,
+	 PCI_ANY_ID,
+	 PCI_ANY_ID,
+	 },
+	{0}			/* Terminating entry */
+};
+
+MODULE_DEVICE_TABLE(pci, megasas_pci_table);
+
+static int megasas_mgmt_majorno;
+static struct megasas_mgmt_info megasas_mgmt_info;
+static struct fasync_struct *megasas_async_queue;
+static DECLARE_MUTEX(megasas_async_queue_mutex);
+
+/**
+ * megasas_get_cmd -	Get a command from the free pool
+ * @instance:		Adapter soft state
+ *
+ * Returns a free command from the pool
+ */
+static inline struct megasas_cmd *megasas_get_cmd(struct megasas_instance
+						  *instance)
+{
+	unsigned long flags;
+	struct megasas_cmd *cmd = NULL;
+
+	spin_lock_irqsave(&instance->cmd_pool_lock, flags);
+
+	if (!list_empty(&instance->cmd_pool)) {
+		cmd = list_entry((&instance->cmd_pool)->next,
+				 struct megasas_cmd, list);
+		list_del_init(&cmd->list);
+	} else {
+		printk(KERN_ERR "megasas: Command pool empty!\n");
+	}
+
+	spin_unlock_irqrestore(&instance->cmd_pool_lock, flags);
+	return cmd;
+}
+
+/**
+ * megasas_return_cmd -	Return a cmd to free command pool
+ * @instance:		Adapter soft state
+ * @cmd:		Command packet to be returned to free command pool
+ */
+static inline void
+megasas_return_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&instance->cmd_pool_lock, flags);
+
+	cmd->scmd = NULL;
+	list_add_tail(&cmd->list, &instance->cmd_pool);
+
+	spin_unlock_irqrestore(&instance->cmd_pool_lock, flags);
+}
+
+/**
+ * megasas_enable_intr -	Enables interrupts
+ * @regs:			MFI register set
+ */
+static inline void
+megasas_enable_intr(struct megasas_register_set __iomem * regs)
+{
+	writel(1, &(regs)->outbound_intr_mask);
+
+	/* Dummy readl to force pci flush */
+	readl(&regs->outbound_intr_mask);
+}
+
+/**
+ * megasas_disable_intr -	Disables interrupts
+ * @regs:			MFI register set
+ */
+static inline void
+megasas_disable_intr(struct megasas_register_set __iomem * regs)
+{
+	u32 mask = readl(&regs->outbound_intr_mask) & (~0x00000001);
+	writel(mask, &regs->outbound_intr_mask);
+
+	/* Dummy readl to force pci flush */
+	readl(&regs->outbound_intr_mask);
+}
+
+/**
+ * megasas_issue_polled -	Issues a polling command
+ * @instance:			Adapter soft state
+ * @cmd:			Command packet to be issued 
+ *
+ * For polling, MFI requires the cmd_status to be set to 0xFF before posting.
+ */
+static int
+megasas_issue_polled(struct megasas_instance *instance, struct megasas_cmd *cmd)
+{
+	int i;
+	u32 msecs = MFI_POLL_TIMEOUT_SECS * 1000;
+
+	struct megasas_header *frame_hdr = &cmd->frame->hdr;
+
+	frame_hdr->cmd_status = 0xFF;
+	frame_hdr->flags |= MFI_FRAME_DONT_POST_IN_REPLY_QUEUE;
+
+	/*
+	 * Issue the frame using inbound queue port
+	 */
+	writel(cmd->frame_phys_addr >> 3,
+	       &instance->reg_set->inbound_queue_port);
+
+	/*
+	 * Wait for cmd_status to change
+	 */
+	for (i = 0; (i < msecs) && (frame_hdr->cmd_status == 0xff); i++) {
+		rmb();
+		msleep(1);
+	}
+
+	if (frame_hdr->cmd_status == 0xff)
+		return -ETIME;
+
+	return 0;
+}
+
+/**
+ * megasas_issue_blocked_cmd -	Synchronous wrapper around regular FW cmds
+ * @instance:			Adapter soft state
+ * @cmd:			Command to be issued
+ *
+ * This function waits on an event for the command to be returned from ISR.
+ * Used to issue ioctl commands.
+ */
+static int
+megasas_issue_blocked_cmd(struct megasas_instance *instance,
+			  struct megasas_cmd *cmd)
+{
+	cmd->cmd_status = ENODATA;
+
+	writel(cmd->frame_phys_addr >> 3,
+	       &instance->reg_set->inbound_queue_port);
+
+	wait_event(instance->int_cmd_wait_q, (cmd->cmd_status != ENODATA));
+
+	return 0;
+}
+
+/**
+ * megasas_issue_blocked_abort_cmd -	Aborts previously issued cmd
+ * @instance:				Adapter soft state
+ * @cmd_to_abort:			Previously issued cmd to be aborted
+ *
+ * MFI firmware can abort previously issued AEN comamnd (automatic event
+ * notification). The megasas_issue_blocked_abort_cmd() issues such abort
+ * cmd and blocks till it is completed.
+ */
+static int
+megasas_issue_blocked_abort_cmd(struct megasas_instance *instance,
+				struct megasas_cmd *cmd_to_abort)
+{
+	struct megasas_cmd *cmd;
+	struct megasas_abort_frame *abort_fr;
+
+	cmd = megasas_get_cmd(instance);
+
+	if (!cmd)
+		return -1;
+
+	abort_fr = &cmd->frame->abort;
+
+	/*
+	 * Prepare and issue the abort frame
+	 */
+	abort_fr->cmd = MFI_CMD_ABORT;
+	abort_fr->cmd_status = 0xFF;
+	abort_fr->flags = 0;
+	abort_fr->abort_context = cmd_to_abort->index;
+	abort_fr->abort_mfi_phys_addr_lo = cmd_to_abort->frame_phys_addr;
+	abort_fr->abort_mfi_phys_addr_hi = 0;
+
+	cmd->sync_cmd = 1;
+	cmd->cmd_status = 0xFF;
+
+	writel(cmd->frame_phys_addr >> 3,
+	       &instance->reg_set->inbound_queue_port);
+
+	/*
+	 * Wait for this cmd to complete
+	 */
+	wait_event(instance->abort_cmd_wait_q, (cmd->cmd_status != 0xFF));
+
+	megasas_return_cmd(instance, cmd);
+	return 0;
+}
+
+/**
+ * megasas_make_sgl32 -	Prepares 32-bit SGL
+ * @instance:		Adapter soft state
+ * @scp:		SCSI command from the mid-layer
+ * @mfi_sgl:		SGL to be filled in
+ *
+ * If successful, this function returns the number of SG elements. Otherwise,
+ * it returnes -1.
+ */
+static inline int
+megasas_make_sgl32(struct megasas_instance *instance, struct scsi_cmnd *scp,
+		   union megasas_sgl *mfi_sgl)
+{
+	int i;
+	int sge_count;
+	struct scatterlist *os_sgl;
+
+	/*
+	 * Return 0 if there is no data transfer
+	 */
+	if (!scp->request_buffer || !scp->request_bufflen)
+		return 0;
+
+	if (!scp->use_sg) {
+		mfi_sgl->sge32[0].phys_addr = pci_map_single(instance->pdev,
+							     scp->
+							     request_buffer,
+							     scp->
+							     request_bufflen,
+							     scp->
+							     sc_data_direction);
+		mfi_sgl->sge32[0].length = scp->request_bufflen;
+
+		return 1;
+	}
+
+	os_sgl = (struct scatterlist *)scp->request_buffer;
+	sge_count = pci_map_sg(instance->pdev, os_sgl, scp->use_sg,
+			       scp->sc_data_direction);
+
+	for (i = 0; i < sge_count; i++, os_sgl++) {
+		mfi_sgl->sge32[i].length = sg_dma_len(os_sgl);
+		mfi_sgl->sge32[i].phys_addr = sg_dma_address(os_sgl);
+	}
+
+	return sge_count;
+}
+
+/**
+ * megasas_make_sgl64 -	Prepares 64-bit SGL
+ * @instance:		Adapter soft state
+ * @scp:		SCSI command from the mid-layer
+ * @mfi_sgl:		SGL to be filled in
+ *
+ * If successful, this function returns the number of SG elements. Otherwise,
+ * it returnes -1.
+ */
+static inline int
+megasas_make_sgl64(struct megasas_instance *instance, struct scsi_cmnd *scp,
+		   union megasas_sgl *mfi_sgl)
+{
+	int i;
+	int sge_count;
+	struct scatterlist *os_sgl;
+
+	/*
+	 * Return 0 if there is no data transfer
+	 */
+	if (!scp->request_buffer || !scp->request_bufflen)
+		return 0;
+
+	if (!scp->use_sg) {
+		mfi_sgl->sge64[0].phys_addr = pci_map_single(instance->pdev,
+							     scp->
+							     request_buffer,
+							     scp->
+							     request_bufflen,
+							     scp->
+							     sc_data_direction);
+
+		mfi_sgl->sge64[0].length = scp->request_bufflen;
+
+		return 1;
+	}
+
+	os_sgl = (struct scatterlist *)scp->request_buffer;
+	sge_count = pci_map_sg(instance->pdev, os_sgl, scp->use_sg,
+			       scp->sc_data_direction);
+
+	for (i = 0; i < sge_count; i++, os_sgl++) {
+		mfi_sgl->sge64[i].length = sg_dma_len(os_sgl);
+		mfi_sgl->sge64[i].phys_addr = sg_dma_address(os_sgl);
+	}
+
+	return sge_count;
+}
+
+/**
+ * megasas_build_dcdb -	Prepares a direct cdb (DCDB) command
+ * @instance:		Adapter soft state
+ * @scp:		SCSI command
+ * @cmd:		Command to be prepared in
+ *
+ * This function prepares CDB commands. These are typcially pass-through
+ * commands to the devices.
+ */
+static inline int
+megasas_build_dcdb(struct megasas_instance *instance, struct scsi_cmnd *scp,
+		   struct megasas_cmd *cmd)
+{
+	u32 sge_sz;
+	int sge_bytes;
+	u32 is_logical;
+	u32 device_id;
+	u16 flags = 0;
+	struct megasas_pthru_frame *pthru;
+
+	is_logical = MEGASAS_IS_LOGICAL(scp);
+	device_id = MEGASAS_DEV_INDEX(instance, scp);
+	pthru = (struct megasas_pthru_frame *)cmd->frame;
+
+	if (scp->sc_data_direction == PCI_DMA_TODEVICE)
+		flags = MFI_FRAME_DIR_WRITE;
+	else if (scp->sc_data_direction == PCI_DMA_FROMDEVICE)
+		flags = MFI_FRAME_DIR_READ;
+	else if (scp->sc_data_direction == PCI_DMA_NONE)
+		flags = MFI_FRAME_DIR_NONE;
+
+	/*
+	 * Prepare the DCDB frame
+	 */
+	pthru->cmd = (is_logical) ? MFI_CMD_LD_SCSI_IO : MFI_CMD_PD_SCSI_IO;
+	pthru->cmd_status = 0x0;
+	pthru->scsi_status = 0x0;
+	pthru->target_id = device_id;
+	pthru->lun = scp->device->lun;
+	pthru->cdb_len = scp->cmd_len;
+	pthru->timeout = 0;
+	pthru->flags = flags;
+	pthru->data_xfer_len = scp->request_bufflen;
+
+	memcpy(pthru->cdb, scp->cmnd, scp->cmd_len);
+
+	/*
+	 * Construct SGL
+	 */
+	sge_sz = (IS_DMA64) ? sizeof(struct megasas_sge64) :
+	    sizeof(struct megasas_sge32);
+
+	if (IS_DMA64) {
+		pthru->flags |= MFI_FRAME_SGL64;
+		pthru->sge_count = megasas_make_sgl64(instance, scp,
+						      &pthru->sgl);
+	} else
+		pthru->sge_count = megasas_make_sgl32(instance, scp,
+						      &pthru->sgl);
+
+	/*
+	 * Sense info specific
+	 */
+	pthru->sense_len = SCSI_SENSE_BUFFERSIZE;
+	pthru->sense_buf_phys_addr_hi = 0;
+	pthru->sense_buf_phys_addr_lo = cmd->sense_phys_addr;
+
+	sge_bytes = sge_sz * pthru->sge_count;
+
+	/*
+	 * Compute the total number of frames this command consumes. FW uses
+	 * this number to pull sufficient number of frames from host memory.
+	 */
+	cmd->frame_count = (sge_bytes / MEGAMFI_FRAME_SIZE) +
+	    ((sge_bytes % MEGAMFI_FRAME_SIZE) ? 1 : 0) + 1;
+
+	if (cmd->frame_count > 7)
+		cmd->frame_count = 8;
+
+	return cmd->frame_count;
+}
+
+/**
+ * megasas_build_ldio -	Prepares IOs to logical devices
+ * @instance:		Adapter soft state
+ * @scp:		SCSI command
+ * @cmd:		Command to to be prepared
+ *
+ * Frames (and accompanying SGLs) for regular SCSI IOs use this function.
+ */
+static inline int
+megasas_build_ldio(struct megasas_instance *instance, struct scsi_cmnd *scp,
+		   struct megasas_cmd *cmd)
+{
+	u32 sge_sz;
+	int sge_bytes;
+	u32 device_id;
+	u8 sc = scp->cmnd[0];
+	u16 flags = 0;
+	struct megasas_io_frame *ldio;
+
+	device_id = MEGASAS_DEV_INDEX(instance, scp);
+	ldio = (struct megasas_io_frame *)cmd->frame;
+
+	if (scp->sc_data_direction == PCI_DMA_TODEVICE)
+		flags = MFI_FRAME_DIR_WRITE;
+	else if (scp->sc_data_direction == PCI_DMA_FROMDEVICE)
+		flags = MFI_FRAME_DIR_READ;
+
+	/*
+	 * Preare the Logical IO frame: 2nd bit is zero for all read cmds
+	 */
+	ldio->cmd = (sc & 0x02) ? MFI_CMD_LD_WRITE : MFI_CMD_LD_READ;
+	ldio->cmd_status = 0x0;
+	ldio->scsi_status = 0x0;
+	ldio->target_id = device_id;
+	ldio->timeout = 0;
+	ldio->reserved_0 = 0;
+	ldio->pad_0 = 0;
+	ldio->flags = flags;
+	ldio->start_lba_hi = 0;
+	ldio->access_byte = (scp->cmd_len != 6) ? scp->cmnd[1] : 0;
+
+	/*
+	 * 6-byte READ(0x08) or WRITE(0x0A) cdb
+	 */
+	if (scp->cmd_len == 6) {
+		ldio->lba_count = (u32) scp->cmnd[4];
+		ldio->start_lba_lo = ((u32) scp->cmnd[1] << 16) |
+		    ((u32) scp->cmnd[2] << 8) | (u32) scp->cmnd[3];
+
+		ldio->start_lba_lo &= 0x1FFFFF;
+	}
+
+	/*
+	 * 10-byte READ(0x28) or WRITE(0x2A) cdb
+	 */
+	else if (scp->cmd_len == 10) {
+		ldio->lba_count = (u32) scp->cmnd[8] |
+		    ((u32) scp->cmnd[7] << 8);
+		ldio->start_lba_lo = ((u32) scp->cmnd[2] << 24) |
+		    ((u32) scp->cmnd[3] << 16) |
+		    ((u32) scp->cmnd[4] << 8) | (u32) scp->cmnd[5];
+	}
+
+	/*
+	 * 12-byte READ(0xA8) or WRITE(0xAA) cdb
+	 */
+	else if (scp->cmd_len == 12) {
+		ldio->lba_count = ((u32) scp->cmnd[6] << 24) |
+		    ((u32) scp->cmnd[7] << 16) |
+		    ((u32) scp->cmnd[8] << 8) | (u32) scp->cmnd[9];
+
+		ldio->start_lba_lo = ((u32) scp->cmnd[2] << 24) |
+		    ((u32) scp->cmnd[3] << 16) |
+		    ((u32) scp->cmnd[4] << 8) | (u32) scp->cmnd[5];
+	}
+
+	/*
+	 * 16-byte READ(0x88) or WRITE(0x8A) cdb
+	 */
+	else if (scp->cmd_len == 16) {
+		ldio->lba_count = ((u32) scp->cmnd[10] << 24) |
+		    ((u32) scp->cmnd[11] << 16) |
+		    ((u32) scp->cmnd[12] << 8) | (u32) scp->cmnd[13];
+
+		ldio->start_lba_lo = ((u32) scp->cmnd[6] << 24) |
+		    ((u32) scp->cmnd[7] << 16) |
+		    ((u32) scp->cmnd[8] << 8) | (u32) scp->cmnd[9];
+
+		ldio->start_lba_hi = ((u32) scp->cmnd[2] << 24) |
+		    ((u32) scp->cmnd[3] << 16) |
+		    ((u32) scp->cmnd[4] << 8) | (u32) scp->cmnd[5];
+
+	}
+
+	/*
+	 * Construct SGL
+	 */
+	sge_sz = (IS_DMA64) ? sizeof(struct megasas_sge64) :
+	    sizeof(struct megasas_sge32);
+
+	if (IS_DMA64) {
+		ldio->flags |= MFI_FRAME_SGL64;
+		ldio->sge_count = megasas_make_sgl64(instance, scp, &ldio->sgl);
+	} else
+		ldio->sge_count = megasas_make_sgl32(instance, scp, &ldio->sgl);
+
+	/*
+	 * Sense info specific
+	 */
+	ldio->sense_len = SCSI_SENSE_BUFFERSIZE;
+	ldio->sense_buf_phys_addr_hi = 0;
+	ldio->sense_buf_phys_addr_lo = cmd->sense_phys_addr;
+
+	sge_bytes = sge_sz * ldio->sge_count;
+
+	cmd->frame_count = (sge_bytes / MEGAMFI_FRAME_SIZE) +
+	    ((sge_bytes % MEGAMFI_FRAME_SIZE) ? 1 : 0) + 1;
+
+	if (cmd->frame_count > 7)
+		cmd->frame_count = 8;
+
+	return cmd->frame_count;
+}
+
+/**
+ * megasas_build_cmd -	Prepares a command packet
+ * @instance:		Adapter soft state
+ * @scp:		SCSI command
+ * @frame_count:	[OUT] Number of frames used to prepare this command
+ */
+static inline struct megasas_cmd *megasas_build_cmd(struct megasas_instance
+						    *instance,
+						    struct scsi_cmnd *scp,
+						    int *frame_count)
+{
+	u32 logical_cmd;
+	struct megasas_cmd *cmd;
+
+	/*
+	 * Find out if this is logical or physical drive command.
+	 */
+	logical_cmd = MEGASAS_IS_LOGICAL(scp);
+
+	/*
+	 * Logical drive command
+	 */
+	if (logical_cmd) {
+
+		if (scp->device->id >= MEGASAS_MAX_LD) {
+			scp->result = DID_BAD_TARGET << 16;
+			return NULL;
+		}
+
+		switch (scp->cmnd[0]) {
+
+		case READ_10:
+		case WRITE_10:
+		case READ_12:
+		case WRITE_12:
+		case READ_6:
+		case WRITE_6:
+		case READ_16:
+		case WRITE_16:
+			/*
+			 * Fail for LUN > 0
+			 */
+			if (scp->device->lun) {
+				scp->result = DID_BAD_TARGET << 16;
+				return NULL;
+			}
+
+			cmd = megasas_get_cmd(instance);
+
+			if (!cmd) {
+				scp->result = DID_IMM_RETRY << 16;
+				return NULL;
+			}
+
+			*frame_count = megasas_build_ldio(instance, scp, cmd);
+
+			if (!(*frame_count)) {
+				megasas_return_cmd(instance, cmd);
+				return NULL;
+			}
+
+			return cmd;
+
+		default:
+			/*
+			 * Fail for LUN > 0
+			 */
+			if (scp->device->lun) {
+				scp->result = DID_BAD_TARGET << 16;
+				return NULL;
+			}
+
+			cmd = megasas_get_cmd(instance);
+
+			if (!cmd) {
+				scp->result = DID_IMM_RETRY << 16;
+				return NULL;
+			}
+
+			*frame_count = megasas_build_dcdb(instance, scp, cmd);
+
+			if (!(*frame_count)) {
+				megasas_return_cmd(instance, cmd);
+				return NULL;
+			}
+
+			return cmd;
+		}
+	} else {
+		cmd = megasas_get_cmd(instance);
+
+		if (!cmd) {
+			scp->result = DID_IMM_RETRY << 16;
+			return NULL;
+		}
+
+		*frame_count = megasas_build_dcdb(instance, scp, cmd);
+
+		if (!(*frame_count)) {
+			megasas_return_cmd(instance, cmd);
+			return NULL;
+		}
+
+		return cmd;
+	}
+
+	return NULL;
+}
+
+/**
+ * megasas_queue_command -	Queue entry point
+ * @scmd:			SCSI command to be queued
+ * @done:			Callback entry point
+ */
+static int
+megasas_queue_command(struct scsi_cmnd *scmd, void (*done) (struct scsi_cmnd *))
+{
+	u32 frame_count;
+	unsigned long flags;
+	struct megasas_cmd *cmd;
+	struct megasas_instance *instance;
+
+	instance = (struct megasas_instance *)
+	    scmd->device->host->hostdata;
+	scmd->scsi_done = done;
+	scmd->result = 0;
+
+	cmd = megasas_build_cmd(instance, scmd, &frame_count);
+
+	if (!cmd) {
+		done(scmd);
+		return 0;
+	}
+
+	cmd->scmd = scmd;
+	scmd->SCp.ptr = (char *)cmd;
+	scmd->SCp.sent_command = jiffies;
+
+	/*
+	 * Issue the command to the FW
+	 */
+	spin_lock_irqsave(&instance->instance_lock, flags);
+	instance->fw_outstanding++;
+	spin_unlock_irqrestore(&instance->instance_lock, flags);
+
+	writel(((cmd->frame_phys_addr >> 3) | (cmd->frame_count - 1)),
+	       &instance->reg_set->inbound_queue_port);
+
+	return 0;
+}
+
+/**
+ * megasas_wait_for_outstanding -	Wait for all outstanding cmds
+ * @instance:				Adapter soft state
+ *
+ * This function waits for upto MEGASAS_RESET_WAIT_TIME seconds for FW to
+ * complete all its outstanding commands. Returns error if one or more IOs
+ * are pending after this time period. It also marks the controller dead.
+ */
+static int megasas_wait_for_outstanding(struct megasas_instance *instance)
+{
+	int i;
+	u32 wait_time = MEGASAS_RESET_WAIT_TIME;
+
+	for (i = 0; i < wait_time; i++) {
+
+		if (!instance->fw_outstanding)
+			break;
+
+		if (!(i % MEGASAS_RESET_NOTICE_INTERVAL)) {
+			printk(KERN_NOTICE "megasas: [%2d]waiting for %d "
+			       "commands to complete\n", i,
+			       instance->fw_outstanding);
+		}
+
+		msleep(1000);
+	}
+
+	if (instance->fw_outstanding) {
+		instance->hw_crit_error = 1;
+		return FAILED;
+	}
+
+	return SUCCESS;
+}
+
+/**
+ * megasas_generic_reset -	Generic reset routine
+ * @scmd:			Mid-layer SCSI command
+ *
+ * This routine implements a generic reset handler for device, bus and host
+ * reset requests. Device, bus and host specific reset handlers can use this
+ * function after they do their specific tasks.
+ */
+static int megasas_generic_reset(struct scsi_cmnd *scmd)
+{
+	int ret_val;
+	struct megasas_instance *instance;
+
+	instance = (struct megasas_instance *)scmd->device->host->hostdata;
+
+	printk(KERN_NOTICE "megasas: RESET -%ld cmd=%x <c=%d t=%d l=%d>\n",
+	       scmd->serial_number, scmd->cmnd[0], scmd->device->channel,
+	       scmd->device->id, scmd->device->lun);
+
+	if (instance->hw_crit_error) {
+		printk(KERN_ERR "megasas: cannot recover from previous reset "
+		       "failures\n");
+		return FAILED;
+	}
+
+	spin_unlock(scmd->device->host->host_lock);
+
+	ret_val = megasas_wait_for_outstanding(instance);
+
+	if (ret_val == SUCCESS)
+		printk(KERN_NOTICE "megasas: reset successful \n");
+	else
+		printk(KERN_ERR "megasas: failed to do reset\n");
+
+	spin_lock(scmd->device->host->host_lock);
+
+	return ret_val;
+}
+
+static enum scsi_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd)
+{
+	unsigned long seconds;
+
+	if (scmd->SCp.ptr) {
+		seconds = (jiffies - scmd->SCp.sent_command) / HZ;
+
+		if (seconds < 90) {
+			return EH_RESET_TIMER;
+		} else {
+			return EH_NOT_HANDLED;
+		}
+	}
+
+	return EH_HANDLED;
+}
+
+/**
+ * megasas_reset_device -	Device reset handler entry point
+ */
+static int megasas_reset_device(struct scsi_cmnd *scmd)
+{
+	int ret;
+
+	/*
+	 * First wait for all commands to complete
+	 */
+	ret = megasas_generic_reset(scmd);
+
+	return ret;
+}
+
+/**
+ * megasas_reset_bus_host -	Bus & host reset handler entry point
+ */
+static int megasas_reset_bus_host(struct scsi_cmnd *scmd)
+{
+	int ret;
+
+	/*
+	 * Frist wait for all commands to complete
+	 */
+	ret = megasas_generic_reset(scmd);
+
+	return ret;
+}
+
+/**
+ * megasas_service_aen -	Processes an event notification
+ * @instance:			Adapter soft state
+ * @cmd:			AEN command completed by the ISR
+ *
+ * For AEN, driver sends a command down to FW that is held by the FW till an
+ * event occurs. When an event of interest occurs, FW completes the command
+ * that it was previously holding.
+ *
+ * This routines sends SIGIO signal to processes that have registered with the
+ * driver for AEN.
+ */
+static void
+megasas_service_aen(struct megasas_instance *instance, struct megasas_cmd *cmd)
+{
+	/*
+	 * Don't signal app if it is just an aborted previously registered aen
+	 */
+	if (!cmd->abort_aen)
+		kill_fasync(&megasas_async_queue, SIGIO, POLL_IN);
+	else
+		cmd->abort_aen = 0;
+
+	instance->aen_cmd = NULL;
+	megasas_return_cmd(instance, cmd);
+}
+
+/*
+ * Scsi host template for megaraid_sas driver
+ */
+static struct scsi_host_template megasas_template = {
+
+	.module = THIS_MODULE,
+	.name = "LSI Logic SAS based MegaRAID driver",
+	.proc_name = "megaraid_sas",
+	.queuecommand = megasas_queue_command,
+	.eh_device_reset_handler = megasas_reset_device,
+	.eh_bus_reset_handler = megasas_reset_bus_host,
+	.eh_host_reset_handler = megasas_reset_bus_host,
+	.eh_timed_out = megasas_reset_timer,
+	.use_clustering = ENABLE_CLUSTERING,
+};
+
+/**
+ * megasas_complete_int_cmd -	Completes an internal command
+ * @instance:			Adapter soft state
+ * @cmd:			Command to be completed
+ *
+ * The megasas_issue_blocked_cmd() function waits for a command to complete
+ * after it issues a command. This function wakes up that waiting routine by
+ * calling wake_up() on the wait queue.
+ */
+static void
+megasas_complete_int_cmd(struct megasas_instance *instance,
+			 struct megasas_cmd *cmd)
+{
+	cmd->cmd_status = cmd->frame->io.cmd_status;
+
+	if (cmd->cmd_status == ENODATA) {
+		cmd->cmd_status = 0;
+	}
+	wake_up(&instance->int_cmd_wait_q);
+}
+
+/**
+ * megasas_complete_abort -	Completes aborting a command
+ * @instance:			Adapter soft state
+ * @cmd:			Cmd that was issued to abort another cmd
+ *
+ * The megasas_issue_blocked_abort_cmd() function waits on abort_cmd_wait_q 
+ * after it issues an abort on a previously issued command. This function 
+ * wakes up all functions waiting on the same wait queue.
+ */
+static void
+megasas_complete_abort(struct megasas_instance *instance,
+		       struct megasas_cmd *cmd)
+{
+	if (cmd->sync_cmd) {
+		cmd->sync_cmd = 0;
+		cmd->cmd_status = 0;
+		wake_up(&instance->abort_cmd_wait_q);
+	}
+
+	return;
+}
+
+/**
+ * megasas_unmap_sgbuf -	Unmap SG buffers
+ * @instance:			Adapter soft state
+ * @cmd:			Completed command
+ */
+static inline void
+megasas_unmap_sgbuf(struct megasas_instance *instance, struct megasas_cmd *cmd)
+{
+	dma_addr_t buf_h;
+	u8 opcode;
+
+	if (cmd->scmd->use_sg) {
+		pci_unmap_sg(instance->pdev, cmd->scmd->request_buffer,
+			     cmd->scmd->use_sg, cmd->scmd->sc_data_direction);
+		return;
+	}
+
+	if (!cmd->scmd->request_bufflen)
+		return;
+
+	opcode = cmd->frame->hdr.cmd;
+
+	if ((opcode == MFI_CMD_LD_READ) || (opcode == MFI_CMD_LD_WRITE)) {
+		if (IS_DMA64)
+			buf_h = cmd->frame->io.sgl.sge64[0].phys_addr;
+		else
+			buf_h = cmd->frame->io.sgl.sge32[0].phys_addr;
+	} else {
+		if (IS_DMA64)
+			buf_h = cmd->frame->pthru.sgl.sge64[0].phys_addr;
+		else
+			buf_h = cmd->frame->pthru.sgl.sge32[0].phys_addr;
+	}
+
+	pci_unmap_single(instance->pdev, buf_h, cmd->scmd->request_bufflen,
+			 cmd->scmd->sc_data_direction);
+	return;
+}
+
+/**
+ * megasas_complete_cmd -	Completes a command
+ * @instance:			Adapter soft state
+ * @cmd:			Command to be completed
+ * @alt_status:			If non-zero, use this value as status to 
+ * 				SCSI mid-layer instead of the value returned
+ * 				by the FW. This should be used if caller wants
+ * 				an alternate status (as in the case of aborted
+ * 				commands)
+ */
+static inline void
+megasas_complete_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd,
+		     u8 alt_status)
+{
+	int exception = 0;
+	struct megasas_header *hdr = &cmd->frame->hdr;
+	unsigned long flags;
+
+	if (cmd->scmd) {
+		cmd->scmd->SCp.ptr = (char *)0;
+	}
+
+	switch (hdr->cmd) {
+
+	case MFI_CMD_PD_SCSI_IO:
+	case MFI_CMD_LD_SCSI_IO:
+
+		/*
+		 * MFI_CMD_PD_SCSI_IO and MFI_CMD_LD_SCSI_IO could have been
+		 * issued either through an IO path or an IOCTL path. If it
+		 * was via IOCTL, we will send it to internal completion.
+		 */
+		if (cmd->sync_cmd) {
+			cmd->sync_cmd = 0;
+			megasas_complete_int_cmd(instance, cmd);
+			break;
+		}
+
+		/*
+		 * Don't export physical disk devices to mid-layer.
+		 */
+		if (!MEGASAS_IS_LOGICAL(cmd->scmd) &&
+		    (hdr->cmd_status == MFI_STAT_OK) &&
+		    (cmd->scmd->cmnd[0] == INQUIRY)) {
+
+			if (((*(u8 *) cmd->scmd->request_buffer) & 0x1F) ==
+			    TYPE_DISK) {
+				cmd->scmd->result = DID_BAD_TARGET << 16;
+				exception = 1;
+			}
+		}
+
+	case MFI_CMD_LD_READ:
+	case MFI_CMD_LD_WRITE:
+
+		if (alt_status) {
+			cmd->scmd->result = alt_status << 16;
+			exception = 1;
+		}
+
+		if (exception) {
+
+			spin_lock_irqsave(&instance->instance_lock, flags);
+			instance->fw_outstanding--;
+			spin_unlock_irqrestore(&instance->instance_lock, flags);
+
+			megasas_unmap_sgbuf(instance, cmd);
+			cmd->scmd->scsi_done(cmd->scmd);
+			megasas_return_cmd(instance, cmd);
+
+			break;
+		}
+
+		switch (hdr->cmd_status) {
+
+		case MFI_STAT_OK:
+			cmd->scmd->result = DID_OK << 16;
+			break;
+
+		case MFI_STAT_SCSI_IO_FAILED:
+		case MFI_STAT_LD_INIT_IN_PROGRESS:
+			cmd->scmd->result =
+			    (DID_ERROR << 16) | hdr->scsi_status;
+			break;
+
+		case MFI_STAT_SCSI_DONE_WITH_ERROR:
+
+			cmd->scmd->result = (DID_OK << 16) | hdr->scsi_status;
+
+			if (hdr->scsi_status == SAM_STAT_CHECK_CONDITION) {
+				memset(cmd->scmd->sense_buffer, 0,
+				       SCSI_SENSE_BUFFERSIZE);
+				memcpy(cmd->scmd->sense_buffer, cmd->sense,
+				       hdr->sense_len);
+
+				cmd->scmd->result |= DRIVER_SENSE << 24;
+			}
+
+			break;
+
+		case MFI_STAT_LD_OFFLINE:
+		case MFI_STAT_DEVICE_NOT_FOUND:
+			cmd->scmd->result = DID_BAD_TARGET << 16;
+			break;
+
+		default:
+			printk(KERN_DEBUG "megasas: MFI FW status %#x\n",
+			       hdr->cmd_status);
+			cmd->scmd->result = DID_ERROR << 16;
+			break;
+		}
+
+		spin_lock_irqsave(&instance->instance_lock, flags);
+		instance->fw_outstanding--;
+		spin_unlock_irqrestore(&instance->instance_lock, flags);
+
+		megasas_unmap_sgbuf(instance, cmd);
+		cmd->scmd->scsi_done(cmd->scmd);
+		megasas_return_cmd(instance, cmd);
+
+		break;
+
+	case MFI_CMD_SMP:
+	case MFI_CMD_STP:
+	case MFI_CMD_DCMD:
+
+		/*
+		 * See if got an event notification
+		 */
+		if (cmd->frame->dcmd.opcode == MR_DCMD_CTRL_EVENT_WAIT)
+			megasas_service_aen(instance, cmd);
+		else
+			megasas_complete_int_cmd(instance, cmd);
+
+		break;
+
+	case MFI_CMD_ABORT:
+		/*
+		 * Cmd issued to abort another cmd returned
+		 */
+		megasas_complete_abort(instance, cmd);
+		break;
+
+	default:
+		printk("megasas: Unknown command completed! [0x%X]\n",
+		       hdr->cmd);
+		break;
+	}
+}
+
+/**
+ * megasas_deplete_reply_queue -	Processes all completed commands
+ * @instance:				Adapter soft state
+ * @alt_status:				Alternate status to be returned to
+ * 					SCSI mid-layer instead of the status
+ * 					returned by the FW
+ */
+static inline int
+megasas_deplete_reply_queue(struct megasas_instance *instance, u8 alt_status)
+{
+	u32 status;
+	u32 producer;
+	u32 consumer;
+	u32 context;
+	struct megasas_cmd *cmd;
+
+	/*
+	 * Check if it is our interrupt
+	 */
+	status = readl(&instance->reg_set->outbound_intr_status);
+
+	if (!(status & MFI_OB_INTR_STATUS_MASK)) {
+		return IRQ_NONE;
+	}
+
+	/*
+	 * Clear the interrupt by writing back the same value
+	 */
+	writel(status, &instance->reg_set->outbound_intr_status);
+
+	producer = *instance->producer;
+	consumer = *instance->consumer;
+
+	while (consumer != producer) {
+		context = instance->reply_queue[consumer];
+
+		cmd = instance->cmd_list[context];
+
+		megasas_complete_cmd(instance, cmd, alt_status);
+
+		consumer++;
+		if (consumer == (instance->max_fw_cmds + 1)) {
+			consumer = 0;
+		}
+	}
+
+	*instance->consumer = producer;
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * megasas_isr - isr entry point
+ */
+static irqreturn_t megasas_isr(int irq, void *devp, struct pt_regs *regs)
+{
+	return megasas_deplete_reply_queue((struct megasas_instance *)devp,
+					   DID_OK);
+}
+
+/**
+ * megasas_transition_to_ready -	Move the FW to READY state
+ * @reg_set:				MFI register set
+ *
+ * During the initialization, FW passes can potentially be in any one of
+ * several possible states. If the FW in operational, waiting-for-handshake
+ * states, driver must take steps to bring it to ready state. Otherwise, it
+ * has to wait for the ready state.
+ */
+static int
+megasas_transition_to_ready(struct megasas_register_set __iomem * reg_set)
+{
+	int i;
+	u8 max_wait;
+	u32 fw_state;
+	u32 cur_state;
+
+	fw_state = readl(&reg_set->outbound_msg_0) & MFI_STATE_MASK;
+
+	while (fw_state != MFI_STATE_READY) {
+
+		printk(KERN_INFO "megasas: Waiting for FW to come to ready"
+		       " state\n");
+		switch (fw_state) {
+
+		case MFI_STATE_FAULT:
+
+			printk(KERN_DEBUG "megasas: FW in FAULT state!!\n");
+			return -ENODEV;
+
+		case MFI_STATE_WAIT_HANDSHAKE:
+			/*
+			 * Set the CLR bit in inbound doorbell
+			 */
+			writel(MFI_INIT_CLEAR_HANDSHAKE,
+			       &reg_set->inbound_doorbell);
+
+			max_wait = 2;
+			cur_state = MFI_STATE_WAIT_HANDSHAKE;
+			break;
+
+		case MFI_STATE_OPERATIONAL:
+			/*
+			 * Bring it to READY state; assuming max wait 2 secs
+			 */
+			megasas_disable_intr(reg_set);
+			writel(MFI_INIT_READY, &reg_set->inbound_doorbell);
+
+			max_wait = 10;
+			cur_state = MFI_STATE_OPERATIONAL;
+			break;
+
+		case MFI_STATE_UNDEFINED:
+			/*
+			 * This state should not last for more than 2 seconds
+			 */
+			max_wait = 2;
+			cur_state = MFI_STATE_UNDEFINED;
+			break;
+
+		case MFI_STATE_BB_INIT:
+			max_wait = 2;
+			cur_state = MFI_STATE_BB_INIT;
+			break;
+
+		case MFI_STATE_FW_INIT:
+			max_wait = 20;
+			cur_state = MFI_STATE_FW_INIT;
+			break;
+
+		case MFI_STATE_FW_INIT_2:
+			max_wait = 20;
+			cur_state = MFI_STATE_FW_INIT_2;
+			break;
+
+		case MFI_STATE_DEVICE_SCAN:
+			max_wait = 20;
+			cur_state = MFI_STATE_DEVICE_SCAN;
+			break;
+
+		case MFI_STATE_FLUSH_CACHE:
+			max_wait = 20;
+			cur_state = MFI_STATE_FLUSH_CACHE;
+			break;
+
+		default:
+			printk(KERN_DEBUG "megasas: Unknown state 0x%x\n",
+			       fw_state);
+			return -ENODEV;
+		}
+
+		/*
+		 * The cur_state should not last for more than max_wait secs
+		 */
+		for (i = 0; i < (max_wait * 1000); i++) {
+			fw_state = MFI_STATE_MASK &
+			    readl(&reg_set->outbound_msg_0);
+
+			if (fw_state == cur_state) {
+				msleep(1);
+			} else
+				break;
+		}
+
+		/*
+		 * Return error if fw_state hasn't changed after max_wait
+		 */
+		if (fw_state == cur_state) {
+			printk(KERN_DEBUG "FW state [%d] hasn't changed "
+			       "in %d secs\n", fw_state, max_wait);
+			return -ENODEV;
+		}
+	};
+
+	return 0;
+}
+
+/**
+ * megasas_teardown_frame_pool -	Destroy the cmd frame DMA pool
+ * @instance:				Adapter soft state
+ */
+static void megasas_teardown_frame_pool(struct megasas_instance *instance)
+{
+	int i;
+	u32 max_cmd = instance->max_fw_cmds;
+	struct megasas_cmd *cmd;
+
+	if (!instance->frame_dma_pool)
+		return;
+
+	/*
+	 * Return all frames to pool
+	 */
+	for (i = 0; i < max_cmd; i++) {
+
+		cmd = instance->cmd_list[i];
+
+		if (cmd->frame)
+			pci_pool_free(instance->frame_dma_pool, cmd->frame,
+				      cmd->frame_phys_addr);
+
+		if (cmd->sense)
+			pci_pool_free(instance->sense_dma_pool, cmd->frame,
+				      cmd->sense_phys_addr);
+	}
+
+	/*
+	 * Now destroy the pool itself
+	 */
+	pci_pool_destroy(instance->frame_dma_pool);
+	pci_pool_destroy(instance->sense_dma_pool);
+
+	instance->frame_dma_pool = NULL;
+	instance->sense_dma_pool = NULL;
+}
+
+/**
+ * megasas_create_frame_pool -	Creates DMA pool for cmd frames
+ * @instance:			Adapter soft state
+ *
+ * Each command packet has an embedded DMA memory buffer that is used for
+ * filling MFI frame and the SG list that immediately follows the frame. This
+ * function creates those DMA memory buffers for each command packet by using
+ * PCI pool facility.
+ */
+static int megasas_create_frame_pool(struct megasas_instance *instance)
+{
+	int i;
+	u32 max_cmd;
+	u32 sge_sz;
+	u32 sgl_sz;
+	u32 total_sz;
+	u32 frame_count;
+	struct megasas_cmd *cmd;
+
+	max_cmd = instance->max_fw_cmds;
+
+	/*
+	 * Size of our frame is 64 bytes for MFI frame, followed by max SG
+	 * elements and finally SCSI_SENSE_BUFFERSIZE bytes for sense buffer
+	 */
+	sge_sz = (IS_DMA64) ? sizeof(struct megasas_sge64) :
+	    sizeof(struct megasas_sge32);
+
+	/*
+	 * Calculated the number of 64byte frames required for SGL
+	 */
+	sgl_sz = sge_sz * instance->max_num_sge;
+	frame_count = (sgl_sz + MEGAMFI_FRAME_SIZE - 1) / MEGAMFI_FRAME_SIZE;
+
+	/*
+	 * We need one extra frame for the MFI command
+	 */
+	frame_count++;
+
+	total_sz = MEGAMFI_FRAME_SIZE * frame_count;
+	/*
+	 * Use DMA pool facility provided by PCI layer
+	 */
+	instance->frame_dma_pool = pci_pool_create("megasas frame pool",
+						   instance->pdev, total_sz, 64,
+						   0);
+
+	if (!instance->frame_dma_pool) {
+		printk(KERN_DEBUG "megasas: failed to setup frame pool\n");
+		return -ENOMEM;
+	}
+
+	instance->sense_dma_pool = pci_pool_create("megasas sense pool",
+						   instance->pdev, 128, 4, 0);
+
+	if (!instance->sense_dma_pool) {
+		printk(KERN_DEBUG "megasas: failed to setup sense pool\n");
+
+		pci_pool_destroy(instance->frame_dma_pool);
+		instance->frame_dma_pool = NULL;
+
+		return -ENOMEM;
+	}
+
+	/*
+	 * Allocate and attach a frame to each of the commands in cmd_list.
+	 * By making cmd->index as the context instead of the &cmd, we can
+	 * always use 32bit context regardless of the architecture
+	 */
+	for (i = 0; i < max_cmd; i++) {
+
+		cmd = instance->cmd_list[i];
+
+		cmd->frame = pci_pool_alloc(instance->frame_dma_pool,
+					    GFP_KERNEL, &cmd->frame_phys_addr);
+
+		cmd->sense = pci_pool_alloc(instance->sense_dma_pool,
+					    GFP_KERNEL, &cmd->sense_phys_addr);
+
+		/*
+		 * megasas_teardown_frame_pool() takes care of freeing
+		 * whatever has been allocated
+		 */
+		if (!cmd->frame || !cmd->sense) {
+			printk(KERN_DEBUG "megasas: pci_pool_alloc failed \n");
+			megasas_teardown_frame_pool(instance);
+			return -ENOMEM;
+		}
+
+		cmd->frame->io.context = cmd->index;
+	}
+
+	return 0;
+}
+
+/**
+ * megasas_free_cmds -	Free all the cmds in the free cmd pool
+ * @instance:		Adapter soft state
+ */
+static void megasas_free_cmds(struct megasas_instance *instance)
+{
+	int i;
+	/* First free the MFI frame pool */
+	megasas_teardown_frame_pool(instance);
+
+	/* Free all the commands in the cmd_list */
+	for (i = 0; i < instance->max_fw_cmds; i++)
+		kfree(instance->cmd_list[i]);
+
+	/* Free the cmd_list buffer itself */
+	kfree(instance->cmd_list);
+	instance->cmd_list = NULL;
+
+	INIT_LIST_HEAD(&instance->cmd_pool);
+}
+
+/**
+ * megasas_alloc_cmds -	Allocates the command packets
+ * @instance:		Adapter soft state
+ *
+ * Each command that is issued to the FW, whether IO commands from the OS or
+ * internal commands like IOCTLs, are wrapped in local data structure called
+ * megasas_cmd. The frame embedded in this megasas_cmd is actually issued to
+ * the FW.
+ *
+ * Each frame has a 32-bit field called context (tag). This context is used
+ * to get back the megasas_cmd from the frame when a frame gets completed in
+ * the ISR. Typically the address of the megasas_cmd itself would be used as
+ * the context. But we wanted to keep the differences between 32 and 64 bit
+ * systems to the mininum. We always use 32 bit integers for the context. In
+ * this driver, the 32 bit values are the indices into an array cmd_list.
+ * This array is used only to look up the megasas_cmd given the context. The
+ * free commands themselves are maintained in a linked list called cmd_pool.
+ */
+static int megasas_alloc_cmds(struct megasas_instance *instance)
+{
+	int i;
+	int j;
+	u32 max_cmd;
+	struct megasas_cmd *cmd;
+
+	max_cmd = instance->max_fw_cmds;
+
+	/*
+	 * instance->cmd_list is an array of struct megasas_cmd pointers.
+	 * Allocate the dynamic array first and then allocate individual
+	 * commands.
+	 */
+	instance->cmd_list = kmalloc(sizeof(struct megasas_cmd *) * max_cmd,
+				     GFP_KERNEL);
+
+	if (!instance->cmd_list) {
+		printk(KERN_DEBUG "megasas: out of memory\n");
+		return -ENOMEM;
+	}
+
+	memset(instance->cmd_list, 0, sizeof(struct megasas_cmd *) * max_cmd);
+
+	for (i = 0; i < max_cmd; i++) {
+		instance->cmd_list[i] = kmalloc(sizeof(struct megasas_cmd),
+						GFP_KERNEL);
+
+		if (!instance->cmd_list[i]) {
+
+			for (j = 0; j < i; j++)
+				kfree(instance->cmd_list[j]);
+
+			kfree(instance->cmd_list);
+			instance->cmd_list = NULL;
+
+			return -ENOMEM;
+		}
+	}
+
+	/*
+	 * Add all the commands to command pool (instance->cmd_pool)
+	 */
+	for (i = 0; i < max_cmd; i++) {
+		cmd = instance->cmd_list[i];
+		memset(cmd, 0, sizeof(struct megasas_cmd));
+		cmd->index = i;
+		cmd->instance = instance;
+
+		list_add_tail(&cmd->list, &instance->cmd_pool);
+	}
+
+	/*
+	 * Create a frame pool and assign one frame to each cmd
+	 */
+	if (megasas_create_frame_pool(instance)) {
+		printk(KERN_DEBUG "megasas: Error creating frame DMA pool\n");
+		megasas_free_cmds(instance);
+	}
+
+	return 0;
+}
+
+/**
+ * megasas_get_controller_info -	Returns FW's controller structure
+ * @instance:				Adapter soft state
+ * @ctrl_info:				Controller information structure
+ *
+ * Issues an internal command (DCMD) to get the FW's controller structure.
+ * This information is mainly used to find out the maximum IO transfer per
+ * command supported by the FW.
+ */
+static int
+megasas_get_ctrl_info(struct megasas_instance *instance,
+		      struct megasas_ctrl_info *ctrl_info)
+{
+	int ret = 0;
+	struct megasas_cmd *cmd;
+	struct megasas_dcmd_frame *dcmd;
+	struct megasas_ctrl_info *ci;
+	dma_addr_t ci_h = 0;
+
+	cmd = megasas_get_cmd(instance);
+
+	if (!cmd) {
+		printk(KERN_DEBUG "megasas: Failed to get a free cmd\n");
+		return -ENOMEM;
+	}
+
+	dcmd = &cmd->frame->dcmd;
+
+	ci = pci_alloc_consistent(instance->pdev,
+				  sizeof(struct megasas_ctrl_info), &ci_h);
+
+	if (!ci) {
+		printk(KERN_DEBUG "Failed to alloc mem for ctrl info\n");
+		megasas_return_cmd(instance, cmd);
+		return -ENOMEM;
+	}
+
+	memset(ci, 0, sizeof(*ci));
+	memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
+
+	dcmd->cmd = MFI_CMD_DCMD;
+	dcmd->cmd_status = 0xFF;
+	dcmd->sge_count = 1;
+	dcmd->flags = MFI_FRAME_DIR_READ;
+	dcmd->timeout = 0;
+	dcmd->data_xfer_len = sizeof(struct megasas_ctrl_info);
+	dcmd->opcode = MR_DCMD_CTRL_GET_INFO;
+	dcmd->sgl.sge32[0].phys_addr = ci_h;
+	dcmd->sgl.sge32[0].length = sizeof(struct megasas_ctrl_info);
+
+	if (!megasas_issue_polled(instance, cmd)) {
+		ret = 0;
+		memcpy(ctrl_info, ci, sizeof(struct megasas_ctrl_info));
+	} else {
+		ret = -1;
+	}
+
+	pci_free_consistent(instance->pdev, sizeof(struct megasas_ctrl_info),
+			    ci, ci_h);
+
+	megasas_return_cmd(instance, cmd);
+	return ret;
+}
+
+/**
+ * megasas_init_mfi -	Initializes the FW
+ * @instance:		Adapter soft state
+ *
+ * This is the main function for initializing MFI firmware.
+ */
+static int megasas_init_mfi(struct megasas_instance *instance)
+{
+	u32 context_sz;
+	u32 reply_q_sz;
+	u32 max_sectors_1;
+	u32 max_sectors_2;
+	struct megasas_register_set __iomem *reg_set;
+
+	struct megasas_cmd *cmd;
+	struct megasas_ctrl_info *ctrl_info;
+
+	struct megasas_init_frame *init_frame;
+	struct megasas_init_queue_info *initq_info;
+	dma_addr_t init_frame_h;
+	dma_addr_t initq_info_h;
+
+	/*
+	 * Map the message registers
+	 */
+	instance->base_addr = pci_resource_start(instance->pdev, 0);
+
+	if (pci_request_regions(instance->pdev, "megasas: LSI Logic")) {
+		printk(KERN_DEBUG "megasas: IO memory region busy!\n");
+		return -EBUSY;
+	}
+
+	instance->reg_set = ioremap_nocache(instance->base_addr, 8192);
+
+	if (!instance->reg_set) {
+		printk(KERN_DEBUG "megasas: Failed to map IO mem\n");
+		goto fail_ioremap;
+	}
+
+	reg_set = instance->reg_set;
+
+	/*
+	 * We expect the FW state to be READY
+	 */
+	if (megasas_transition_to_ready(instance->reg_set))
+		goto fail_ready_state;
+
+	/*
+	 * Get various operational parameters from status register
+	 */
+	instance->max_fw_cmds = readl(&reg_set->outbound_msg_0) & 0x00FFFF;
+	instance->max_num_sge = (readl(&reg_set->outbound_msg_0) & 0xFF0000) >>
+	    0x10;
+	/*
+	 * Create a pool of commands
+	 */
+	if (megasas_alloc_cmds(instance))
+		goto fail_alloc_cmds;
+
+	/*
+	 * Allocate memory for reply queue. Length of reply queue should
+	 * be _one_ more than the maximum commands handled by the firmware.
+	 *
+	 * Note: When FW completes commands, it places corresponding contex
+	 * values in this circular reply queue. This circular queue is a fairly
+	 * typical producer-consumer queue. FW is the producer (of completed
+	 * commands) and the driver is the consumer.
+	 */
+	context_sz = sizeof(u32);
+	reply_q_sz = context_sz * (instance->max_fw_cmds + 1);
+
+	instance->reply_queue = pci_alloc_consistent(instance->pdev,
+						     reply_q_sz,
+						     &instance->reply_queue_h);
+
+	if (!instance->reply_queue) {
+		printk(KERN_DEBUG "megasas: Out of DMA mem for reply queue\n");
+		goto fail_reply_queue;
+	}
+
+	/*
+	 * Prepare a init frame. Note the init frame points to queue info
+	 * structure. Each frame has SGL allocated after first 64 bytes. For
+	 * this frame - since we don't need any SGL - we use SGL's space as
+	 * queue info structure
+	 *
+	 * We will not get a NULL command below. We just created the pool.
+	 */
+	cmd = megasas_get_cmd(instance);
+
+	init_frame = (struct megasas_init_frame *)cmd->frame;
+	initq_info = (struct megasas_init_queue_info *)
+	    ((unsigned long)init_frame + 64);
+
+	init_frame_h = cmd->frame_phys_addr;
+	initq_info_h = init_frame_h + 64;
+
+	memset(init_frame, 0, MEGAMFI_FRAME_SIZE);
+	memset(initq_info, 0, sizeof(struct megasas_init_queue_info));
+
+	initq_info->reply_queue_entries = instance->max_fw_cmds + 1;
+	initq_info->reply_queue_start_phys_addr_lo = instance->reply_queue_h;
+
+	initq_info->producer_index_phys_addr_lo = instance->producer_h;
+	initq_info->consumer_index_phys_addr_lo = instance->consumer_h;
+
+	init_frame->cmd = MFI_CMD_INIT;
+	init_frame->cmd_status = 0xFF;
+	init_frame->queue_info_new_phys_addr_lo = initq_info_h;
+
+	init_frame->data_xfer_len = sizeof(struct megasas_init_queue_info);
+
+	/*
+	 * Issue the init frame in polled mode
+	 */
+	if (megasas_issue_polled(instance, cmd)) {
+		printk(KERN_DEBUG "megasas: Failed to init firmware\n");
+		goto fail_fw_init;
+	}
+
+	megasas_return_cmd(instance, cmd);
+
+	ctrl_info = kmalloc(sizeof(struct megasas_ctrl_info), GFP_KERNEL);
+
+	/*
+	 * Compute the max allowed sectors per IO: The controller info has two
+	 * limits on max sectors. Driver should use the minimum of these two.
+	 *
+	 * 1 << stripe_sz_ops.min = max sectors per strip
+	 *
+	 * Note that older firmwares ( < FW ver 30) didn't report information
+	 * to calculate max_sectors_1. So the number ended up as zero always.
+	 */
+	if (ctrl_info && !megasas_get_ctrl_info(instance, ctrl_info)) {
+
+		max_sectors_1 = (1 << ctrl_info->stripe_sz_ops.min) *
+		    ctrl_info->max_strips_per_io;
+		max_sectors_2 = ctrl_info->max_request_size;
+
+		instance->max_sectors_per_req = (max_sectors_1 < max_sectors_2)
+		    ? max_sectors_1 : max_sectors_2;
+	} else
+		instance->max_sectors_per_req = instance->max_num_sge *
+		    PAGE_SIZE / 512;
+
+	kfree(ctrl_info);
+
+	return 0;
+
+      fail_fw_init:
+	megasas_return_cmd(instance, cmd);
+
+	pci_free_consistent(instance->pdev, reply_q_sz,
+			    instance->reply_queue, instance->reply_queue_h);
+      fail_reply_queue:
+	megasas_free_cmds(instance);
+
+      fail_alloc_cmds:
+      fail_ready_state:
+	iounmap(instance->reg_set);
+
+      fail_ioremap:
+	pci_release_regions(instance->pdev);
+
+	return -EINVAL;
+}
+
+/**
+ * megasas_release_mfi -	Reverses the FW initialization
+ * @intance:			Adapter soft state
+ */
+static void megasas_release_mfi(struct megasas_instance *instance)
+{
+	u32 reply_q_sz = sizeof(u32) * (instance->max_fw_cmds + 1);
+
+	pci_free_consistent(instance->pdev, reply_q_sz,
+			    instance->reply_queue, instance->reply_queue_h);
+
+	megasas_free_cmds(instance);
+
+	iounmap(instance->reg_set);
+
+	pci_release_regions(instance->pdev);
+}
+
+/**
+ * megasas_get_seq_num -	Gets latest event sequence numbers
+ * @instance:			Adapter soft state
+ * @eli:			FW event log sequence numbers information
+ *
+ * FW maintains a log of all events in a non-volatile area. Upper layers would
+ * usually find out the latest sequence number of the events, the seq number at
+ * the boot etc. They would "read" all the events below the latest seq number
+ * by issuing a direct fw cmd (DCMD). For the future events (beyond latest seq
+ * number), they would subsribe to AEN (asynchronous event notification) and
+ * wait for the events to happen.
+ */
+static int
+megasas_get_seq_num(struct megasas_instance *instance,
+		    struct megasas_evt_log_info *eli)
+{
+	struct megasas_cmd *cmd;
+	struct megasas_dcmd_frame *dcmd;
+	struct megasas_evt_log_info *el_info;
+	dma_addr_t el_info_h = 0;
+
+	cmd = megasas_get_cmd(instance);
+
+	if (!cmd) {
+		return -ENOMEM;
+	}
+
+	dcmd = &cmd->frame->dcmd;
+	el_info = pci_alloc_consistent(instance->pdev,
+				       sizeof(struct megasas_evt_log_info),
+				       &el_info_h);
+
+	if (!el_info) {
+		megasas_return_cmd(instance, cmd);
+		return -ENOMEM;
+	}
+
+	memset(el_info, 0, sizeof(*el_info));
+	memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
+
+	dcmd->cmd = MFI_CMD_DCMD;
+	dcmd->cmd_status = 0x0;
+	dcmd->sge_count = 1;
+	dcmd->flags = MFI_FRAME_DIR_READ;
+	dcmd->timeout = 0;
+	dcmd->data_xfer_len = sizeof(struct megasas_evt_log_info);
+	dcmd->opcode = MR_DCMD_CTRL_EVENT_GET_INFO;
+	dcmd->sgl.sge32[0].phys_addr = el_info_h;
+	dcmd->sgl.sge32[0].length = sizeof(struct megasas_evt_log_info);
+
+	megasas_issue_blocked_cmd(instance, cmd);
+
+	/*
+	 * Copy the data back into callers buffer
+	 */
+	memcpy(eli, el_info, sizeof(struct megasas_evt_log_info));
+
+	pci_free_consistent(instance->pdev, sizeof(struct megasas_evt_log_info),
+			    el_info, el_info_h);
+
+	megasas_return_cmd(instance, cmd);
+
+	return 0;
+}
+
+/**
+ * megasas_register_aen -	Registers for asynchronous event notification
+ * @instance:			Adapter soft state
+ * @seq_num:			The starting sequence number
+ * @class_locale:		Class of the event
+ *
+ * This function subscribes for AEN for events beyond the @seq_num. It requests
+ * to be notified if and only if the event is of type @class_locale
+ */
+static int
+megasas_register_aen(struct megasas_instance *instance, u32 seq_num,
+		     u32 class_locale_word)
+{
+	int ret_val;
+	struct megasas_cmd *cmd;
+	struct megasas_dcmd_frame *dcmd;
+	union megasas_evt_class_locale curr_aen;
+	union megasas_evt_class_locale prev_aen;
+
+	/*
+	 * If there an AEN pending already (aen_cmd), check if the
+	 * class_locale of that pending AEN is inclusive of the new
+	 * AEN request we currently have. If it is, then we don't have
+	 * to do anything. In other words, whichever events the current
+	 * AEN request is subscribing to, have already been subscribed
+	 * to.
+	 *
+	 * If the old_cmd is _not_ inclusive, then we have to abort
+	 * that command, form a class_locale that is superset of both
+	 * old and current and re-issue to the FW
+	 */
+
+	curr_aen.word = class_locale_word;
+
+	if (instance->aen_cmd) {
+
+		prev_aen.word = instance->aen_cmd->frame->dcmd.mbox.w[1];
+
+		/*
+		 * A class whose enum value is smaller is inclusive of all
+		 * higher values. If a PROGRESS (= -1) was previously
+		 * registered, then a new registration requests for higher
+		 * classes need not be sent to FW. They are automatically
+		 * included.
+		 *
+		 * Locale numbers don't have such hierarchy. They are bitmap
+		 * values
+		 */
+		if ((prev_aen.members.class <= curr_aen.members.class) &&
+		    !((prev_aen.members.locale & curr_aen.members.locale) ^
+		      curr_aen.members.locale)) {
+			/*
+			 * Previously issued event registration includes
+			 * current request. Nothing to do.
+			 */
+			return 0;
+		} else {
+			curr_aen.members.locale |= prev_aen.members.locale;
+
+			if (prev_aen.members.class < curr_aen.members.class)
+				curr_aen.members.class = prev_aen.members.class;
+
+			instance->aen_cmd->abort_aen = 1;
+			ret_val = megasas_issue_blocked_abort_cmd(instance,
+								  instance->
+								  aen_cmd);
+
+			if (ret_val) {
+				printk(KERN_DEBUG "megasas: Failed to abort "
+				       "previous AEN command\n");
+				return ret_val;
+			}
+		}
+	}
+
+	cmd = megasas_get_cmd(instance);
+
+	if (!cmd)
+		return -ENOMEM;
+
+	dcmd = &cmd->frame->dcmd;
+
+	memset(instance->evt_detail, 0, sizeof(struct megasas_evt_detail));
+
+	/*
+	 * Prepare DCMD for aen registration
+	 */
+	memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
+
+	dcmd->cmd = MFI_CMD_DCMD;
+	dcmd->cmd_status = 0x0;
+	dcmd->sge_count = 1;
+	dcmd->flags = MFI_FRAME_DIR_READ;
+	dcmd->timeout = 0;
+	dcmd->data_xfer_len = sizeof(struct megasas_evt_detail);
+	dcmd->opcode = MR_DCMD_CTRL_EVENT_WAIT;
+	dcmd->mbox.w[0] = seq_num;
+	dcmd->mbox.w[1] = curr_aen.word;
+	dcmd->sgl.sge32[0].phys_addr = (u32) instance->evt_detail_h;
+	dcmd->sgl.sge32[0].length = sizeof(struct megasas_evt_detail);
+
+	/*
+	 * Store reference to the cmd used to register for AEN. When an
+	 * application wants us to register for AEN, we have to abort this
+	 * cmd and re-register with a new EVENT LOCALE supplied by that app
+	 */
+	instance->aen_cmd = cmd;
+
+	/*
+	 * Issue the aen registration frame
+	 */
+	writel(cmd->frame_phys_addr >> 3,
+	       &instance->reg_set->inbound_queue_port);
+
+	return 0;
+}
+
+/**
+ * megasas_start_aen -	Subscribes to AEN during driver load time
+ * @instance:		Adapter soft state
+ */
+static int megasas_start_aen(struct megasas_instance *instance)
+{
+	struct megasas_evt_log_info eli;
+	union megasas_evt_class_locale class_locale;
+
+	/*
+	 * Get the latest sequence number from FW
+	 */
+	memset(&eli, 0, sizeof(eli));
+
+	if (megasas_get_seq_num(instance, &eli))
+		return -1;
+
+	/*
+	 * Register AEN with FW for latest sequence number plus 1
+	 */
+	class_locale.members.reserved = 0;
+	class_locale.members.locale = MR_EVT_LOCALE_ALL;
+	class_locale.members.class = MR_EVT_CLASS_DEBUG;
+
+	return megasas_register_aen(instance, eli.newest_seq_num + 1,
+				    class_locale.word);
+}
+
+/**
+ * megasas_io_attach -	Attaches this driver to SCSI mid-layer
+ * @instance:		Adapter soft state
+ */
+static int megasas_io_attach(struct megasas_instance *instance)
+{
+	struct Scsi_Host *host = instance->host;
+
+	/*
+	 * Export parameters required by SCSI mid-layer
+	 */
+	host->irq = instance->pdev->irq;
+	host->unique_id = instance->unique_id;
+	host->can_queue = instance->max_fw_cmds - MEGASAS_INT_CMDS;
+	host->this_id = instance->init_id;
+	host->sg_tablesize = instance->max_num_sge;
+	host->max_sectors = instance->max_sectors_per_req;
+	host->cmd_per_lun = 128;
+	host->max_channel = MEGASAS_MAX_CHANNELS - 1;
+	host->max_id = MEGASAS_MAX_DEV_PER_CHANNEL;
+	host->max_lun = MEGASAS_MAX_LUN;
+
+	/*
+	 * Notify the mid-layer about the new controller
+	 */
+	if (scsi_add_host(host, &instance->pdev->dev)) {
+		printk(KERN_DEBUG "megasas: scsi_add_host failed\n");
+		return -ENODEV;
+	}
+
+	/*
+	 * Trigger SCSI to scan our drives
+	 */
+	scsi_scan_host(host);
+	return 0;
+}
+
+/**
+ * megasas_probe_one -	PCI hotplug entry point
+ * @pdev:		PCI device structure
+ * @id:			PCI ids of supported hotplugged adapter	
+ */
+static int __devinit
+megasas_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	int rval;
+	struct Scsi_Host *host;
+	struct megasas_instance *instance;
+
+	/*
+	 * Announce PCI information
+	 */
+	printk(KERN_INFO "megasas: %#4.04x:%#4.04x:%#4.04x:%#4.04x: ",
+	       pdev->vendor, pdev->device, pdev->subsystem_vendor,
+	       pdev->subsystem_device);
+
+	printk("bus %d:slot %d:func %d\n",
+	       pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+
+	/*
+	 * PCI prepping: enable device set bus mastering and dma mask
+	 */
+	rval = pci_enable_device(pdev);
+
+	if (rval) {
+		return rval;
+	}
+
+	pci_set_master(pdev);
+
+	/*
+	 * All our contollers are capable of performing 64-bit DMA
+	 */
+	if (IS_DMA64) {
+		if (pci_set_dma_mask(pdev, DMA_64BIT_MASK) != 0) {
+
+			if (pci_set_dma_mask(pdev, DMA_32BIT_MASK) != 0)
+				goto fail_set_dma_mask;
+		}
+	} else {
+		if (pci_set_dma_mask(pdev, DMA_32BIT_MASK) != 0)
+			goto fail_set_dma_mask;
+	}
+
+	host = scsi_host_alloc(&megasas_template,
+			       sizeof(struct megasas_instance));
+
+	if (!host) {
+		printk(KERN_DEBUG "megasas: scsi_host_alloc failed\n");
+		goto fail_alloc_instance;
+	}
+
+	instance = (struct megasas_instance *)host->hostdata;
+	memset(instance, 0, sizeof(*instance));
+
+	instance->producer = pci_alloc_consistent(pdev, sizeof(u32),
+						  &instance->producer_h);
+	instance->consumer = pci_alloc_consistent(pdev, sizeof(u32),
+						  &instance->consumer_h);
+
+	if (!instance->producer || !instance->consumer) {
+		printk(KERN_DEBUG "megasas: Failed to allocate memory for "
+		       "producer, consumer\n");
+		goto fail_alloc_dma_buf;
+	}
+
+	*instance->producer = 0;
+	*instance->consumer = 0;
+
+	instance->evt_detail = pci_alloc_consistent(pdev,
+						    sizeof(struct
+							   megasas_evt_detail),
+						    &instance->evt_detail_h);
+
+	if (!instance->evt_detail) {
+		printk(KERN_DEBUG "megasas: Failed to allocate memory for "
+		       "event detail structure\n");
+		goto fail_alloc_dma_buf;
+	}
+
+	/*
+	 * Initialize locks and queues
+	 */
+	INIT_LIST_HEAD(&instance->cmd_pool);
+
+	init_waitqueue_head(&instance->int_cmd_wait_q);
+	init_waitqueue_head(&instance->abort_cmd_wait_q);
+
+	spin_lock_init(&instance->cmd_pool_lock);
+	spin_lock_init(&instance->instance_lock);
+
+	sema_init(&instance->aen_mutex, 1);
+	sema_init(&instance->ioctl_sem, MEGASAS_INT_CMDS);
+
+	/*
+	 * Initialize PCI related and misc parameters
+	 */
+	instance->pdev = pdev;
+	instance->host = host;
+	instance->unique_id = pdev->bus->number << 8 | pdev->devfn;
+	instance->init_id = MEGASAS_DEFAULT_INIT_ID;
+
+	/*
+	 * Initialize MFI Firmware
+	 */
+	if (megasas_init_mfi(instance))
+		goto fail_init_mfi;
+
+	/*
+	 * Register IRQ
+	 */
+	if (request_irq(pdev->irq, megasas_isr, SA_SHIRQ, "megasas", instance)) {
+		printk(KERN_DEBUG "megasas: Failed to register IRQ\n");
+		goto fail_irq;
+	}
+
+	megasas_enable_intr(instance->reg_set);
+
+	/*
+	 * Store instance in PCI softstate
+	 */
+	pci_set_drvdata(pdev, instance);
+
+	/*
+	 * Add this controller to megasas_mgmt_info structure so that it
+	 * can be exported to management applications
+	 */
+	megasas_mgmt_info.count++;
+	megasas_mgmt_info.instance[megasas_mgmt_info.max_index] = instance;
+	megasas_mgmt_info.max_index++;
+
+	/*
+	 * Initiate AEN (Asynchronous Event Notification)
+	 */
+	if (megasas_start_aen(instance)) {
+		printk(KERN_DEBUG "megasas: start aen failed\n");
+		goto fail_start_aen;
+	}
+
+	/*
+	 * Register with SCSI mid-layer
+	 */
+	if (megasas_io_attach(instance))
+		goto fail_io_attach;
+
+	return 0;
+
+      fail_start_aen:
+      fail_io_attach:
+	megasas_mgmt_info.count--;
+	megasas_mgmt_info.instance[megasas_mgmt_info.max_index] = NULL;
+	megasas_mgmt_info.max_index--;
+
+	pci_set_drvdata(pdev, NULL);
+	megasas_disable_intr(instance->reg_set);
+	free_irq(instance->pdev->irq, instance);
+
+	megasas_release_mfi(instance);
+
+      fail_irq:
+      fail_init_mfi:
+      fail_alloc_dma_buf:
+	if (instance->evt_detail)
+		pci_free_consistent(pdev, sizeof(struct megasas_evt_detail),
+				    instance->evt_detail,
+				    instance->evt_detail_h);
+
+	if (instance->producer)
+		pci_free_consistent(pdev, sizeof(u32), instance->producer,
+				    instance->producer_h);
+	if (instance->consumer)
+		pci_free_consistent(pdev, sizeof(u32), instance->consumer,
+				    instance->consumer_h);
+	scsi_host_put(host);
+
+      fail_alloc_instance:
+      fail_set_dma_mask:
+	pci_disable_device(pdev);
+
+	return -ENODEV;
+}
+
+/**
+ * megasas_flush_cache -	Requests FW to flush all its caches
+ * @instance:			Adapter soft state
+ */
+static void megasas_flush_cache(struct megasas_instance *instance)
+{
+	struct megasas_cmd *cmd;
+	struct megasas_dcmd_frame *dcmd;
+
+	cmd = megasas_get_cmd(instance);
+
+	if (!cmd)
+		return;
+
+	dcmd = &cmd->frame->dcmd;
+
+	memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
+
+	dcmd->cmd = MFI_CMD_DCMD;
+	dcmd->cmd_status = 0x0;
+	dcmd->sge_count = 0;
+	dcmd->flags = MFI_FRAME_DIR_NONE;
+	dcmd->timeout = 0;
+	dcmd->data_xfer_len = 0;
+	dcmd->opcode = MR_DCMD_CTRL_CACHE_FLUSH;
+	dcmd->mbox.b[0] = MR_FLUSH_CTRL_CACHE | MR_FLUSH_DISK_CACHE;
+
+	megasas_issue_blocked_cmd(instance, cmd);
+
+	megasas_return_cmd(instance, cmd);
+
+	return;
+}
+
+/**
+ * megasas_shutdown_controller -	Instructs FW to shutdown the controller
+ * @instance:				Adapter soft state
+ */
+static void megasas_shutdown_controller(struct megasas_instance *instance)
+{
+	struct megasas_cmd *cmd;
+	struct megasas_dcmd_frame *dcmd;
+
+	cmd = megasas_get_cmd(instance);
+
+	if (!cmd)
+		return;
+
+	if (instance->aen_cmd)
+		megasas_issue_blocked_abort_cmd(instance, instance->aen_cmd);
+
+	dcmd = &cmd->frame->dcmd;
+
+	memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
+
+	dcmd->cmd = MFI_CMD_DCMD;
+	dcmd->cmd_status = 0x0;
+	dcmd->sge_count = 0;
+	dcmd->flags = MFI_FRAME_DIR_NONE;
+	dcmd->timeout = 0;
+	dcmd->data_xfer_len = 0;
+	dcmd->opcode = MR_DCMD_CTRL_SHUTDOWN;
+
+	megasas_issue_blocked_cmd(instance, cmd);
+
+	megasas_return_cmd(instance, cmd);
+
+	return;
+}
+
+/**
+ * megasas_detach_one -	PCI hot"un"plug entry point
+ * @pdev:		PCI device structure
+ */
+static void megasas_detach_one(struct pci_dev *pdev)
+{
+	int i;
+	struct Scsi_Host *host;
+	struct megasas_instance *instance;
+
+	instance = pci_get_drvdata(pdev);
+	host = instance->host;
+
+	scsi_remove_host(instance->host);
+	megasas_flush_cache(instance);
+	megasas_shutdown_controller(instance);
+
+	/*
+	 * Take the instance off the instance array. Note that we will not
+	 * decrement the max_index. We let this array be sparse array
+	 */
+	for (i = 0; i < megasas_mgmt_info.max_index; i++) {
+		if (megasas_mgmt_info.instance[i] == instance) {
+			megasas_mgmt_info.count--;
+			megasas_mgmt_info.instance[i] = NULL;
+
+			break;
+		}
+	}
+
+	pci_set_drvdata(instance->pdev, NULL);
+
+	megasas_disable_intr(instance->reg_set);
+
+	free_irq(instance->pdev->irq, instance);
+
+	megasas_release_mfi(instance);
+
+	pci_free_consistent(pdev, sizeof(struct megasas_evt_detail),
+			    instance->evt_detail, instance->evt_detail_h);
+
+	pci_free_consistent(pdev, sizeof(u32), instance->producer,
+			    instance->producer_h);
+
+	pci_free_consistent(pdev, sizeof(u32), instance->consumer,
+			    instance->consumer_h);
+
+	scsi_host_put(host);
+
+	pci_set_drvdata(pdev, NULL);
+
+	pci_disable_device(pdev);
+
+	return;
+}
+
+/**
+ * megasas_shutdown -	Shutdown entry point
+ * @device:		Generic device structure
+ */
+static void megasas_shutdown(struct pci_dev *pdev)
+{
+	struct megasas_instance *instance = pci_get_drvdata(pdev);
+	megasas_flush_cache(instance);
+}
+
+/**
+ * megasas_mgmt_open -	char node "open" entry point
+ */
+static int megasas_mgmt_open(struct inode *inode, struct file *filep)
+{
+	/*
+	 * Allow only those users with admin rights
+	 */
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	return 0;
+}
+
+/**
+ * megasas_mgmt_release - char node "release" entry point
+ */
+static int megasas_mgmt_release(struct inode *inode, struct file *filep)
+{
+	filep->private_data = NULL;
+	fasync_helper(-1, filep, 0, &megasas_async_queue);
+
+	return 0;
+}
+
+/**
+ * megasas_mgmt_fasync -	Async notifier registration from applications
+ *
+ * This function adds the calling process to a driver global queue. When an
+ * event occurs, SIGIO will be sent to all processes in this queue.
+ */
+static int megasas_mgmt_fasync(int fd, struct file *filep, int mode)
+{
+	int rc;
+
+	down(&megasas_async_queue_mutex);
+
+	rc = fasync_helper(fd, filep, mode, &megasas_async_queue);
+
+	up(&megasas_async_queue_mutex);
+
+	if (rc >= 0) {
+		/* For sanity check when we get ioctl */
+		filep->private_data = filep;
+		return 0;
+	}
+
+	printk(KERN_DEBUG "megasas: fasync_helper failed [%d]\n", rc);
+
+	return rc;
+}
+
+/**
+ * megasas_mgmt_fw_ioctl -	Issues management ioctls to FW
+ * @instance:			Adapter soft state
+ * @argp:			User's ioctl packet
+ */
+static int
+megasas_mgmt_fw_ioctl(struct megasas_instance *instance,
+		      struct megasas_iocpacket __user * user_ioc,
+		      struct megasas_iocpacket *ioc)
+{
+	struct megasas_sge32 *kern_sge32;
+	struct megasas_cmd *cmd;
+	void *kbuff_arr[MAX_IOCTL_SGE];
+	dma_addr_t buf_handle = 0;
+	int error = 0, i;
+	void *sense = NULL;
+	dma_addr_t sense_handle;
+	u32 *sense_ptr;
+
+	memset(kbuff_arr, 0, sizeof(kbuff_arr));
+
+	if (ioc->sge_count > MAX_IOCTL_SGE) {
+		printk(KERN_DEBUG "megasas: SGE count [%d] >  max limit [%d]\n",
+		       ioc->sge_count, MAX_IOCTL_SGE);
+		return -EINVAL;
+	}
+
+	cmd = megasas_get_cmd(instance);
+	if (!cmd) {
+		printk(KERN_DEBUG "megasas: Failed to get a cmd packet\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * User's IOCTL packet has 2 frames (maximum). Copy those two
+	 * frames into our cmd's frames. cmd->frame's context will get
+	 * overwritten when we copy from user's frames. So set that value
+	 * alone separately
+	 */
+	memcpy(cmd->frame, ioc->frame.raw, 2 * MEGAMFI_FRAME_SIZE);
+	cmd->frame->hdr.context = cmd->index;
+
+	/*
+	 * The management interface between applications and the fw uses
+	 * MFI frames. E.g, RAID configuration changes, LD property changes
+	 * etc are accomplishes through different kinds of MFI frames. The
+	 * driver needs to care only about substituting user buffers with
+	 * kernel buffers in SGLs. The location of SGL is embedded in the
+	 * struct iocpacket itself.
+	 */
+	kern_sge32 = (struct megasas_sge32 *)
+	    ((unsigned long)cmd->frame + ioc->sgl_off);
+
+	/*
+	 * For each user buffer, create a mirror buffer and copy in
+	 */
+	for (i = 0; i < ioc->sge_count; i++) {
+		kbuff_arr[i] = pci_alloc_consistent(instance->pdev,
+						    ioc->sgl[i].iov_len,
+						    &buf_handle);
+		if (!kbuff_arr[i]) {
+			printk(KERN_DEBUG "megasas: Failed to alloc "
+			       "kernel SGL buffer for IOCTL \n");
+			error = -ENOMEM;
+			goto out;
+		}
+
+		/*
+		 * We don't change the dma_coherent_mask, so
+		 * pci_alloc_consistent only returns 32bit addresses
+		 */
+		kern_sge32[i].phys_addr = (u32) buf_handle;
+		kern_sge32[i].length = ioc->sgl[i].iov_len;
+
+		/*
+		 * We created a kernel buffer corresponding to the
+		 * user buffer. Now copy in from the user buffer
+		 */
+		if (copy_from_user(kbuff_arr[i], ioc->sgl[i].iov_base,
+				   (u32) (ioc->sgl[i].iov_len))) {
+			error = -EFAULT;
+			goto out;
+		}
+	}
+
+	if (ioc->sense_len) {
+		sense = pci_alloc_consistent(instance->pdev, ioc->sense_len,
+					     &sense_handle);
+		if (!sense) {
+			error = -ENOMEM;
+			goto out;
+		}
+
+		sense_ptr =
+		    (u32 *) ((unsigned long)cmd->frame + ioc->sense_off);
+		*sense_ptr = sense_handle;
+	}
+
+	/*
+	 * Set the sync_cmd flag so that the ISR knows not to complete this
+	 * cmd to the SCSI mid-layer
+	 */
+	cmd->sync_cmd = 1;
+	megasas_issue_blocked_cmd(instance, cmd);
+	cmd->sync_cmd = 0;
+
+	/*
+	 * copy out the kernel buffers to user buffers
+	 */
+	for (i = 0; i < ioc->sge_count; i++) {
+		if (copy_to_user(ioc->sgl[i].iov_base, kbuff_arr[i],
+				 ioc->sgl[i].iov_len)) {
+			error = -EFAULT;
+			goto out;
+		}
+	}
+
+	/*
+	 * copy out the sense
+	 */
+	if (ioc->sense_len) {
+		/*
+		 * sense_ptr points to the location that has the user
+		 * sense buffer address
+		 */
+		sense_ptr = (u32 *) ((unsigned long)ioc->frame.raw +
+				     ioc->sense_off);
+
+		if (copy_to_user((void __user *)((unsigned long)(*sense_ptr)),
+				 sense, ioc->sense_len)) {
+			error = -EFAULT;
+			goto out;
+		}
+	}
+
+	/*
+	 * copy the status codes returned by the fw
+	 */
+	if (copy_to_user(&user_ioc->frame.hdr.cmd_status,
+			 &cmd->frame->hdr.cmd_status, sizeof(u8))) {
+		printk(KERN_DEBUG "megasas: Error copying out cmd_status\n");
+		error = -EFAULT;
+	}
+
+      out:
+	if (sense) {
+		pci_free_consistent(instance->pdev, ioc->sense_len,
+				    sense, sense_handle);
+	}
+
+	for (i = 0; i < ioc->sge_count && kbuff_arr[i]; i++) {
+		pci_free_consistent(instance->pdev,
+				    kern_sge32[i].length,
+				    kbuff_arr[i], kern_sge32[i].phys_addr);
+	}
+
+	megasas_return_cmd(instance, cmd);
+	return error;
+}
+
+static struct megasas_instance *megasas_lookup_instance(u16 host_no)
+{
+	int i;
+
+	for (i = 0; i < megasas_mgmt_info.max_index; i++) {
+
+		if ((megasas_mgmt_info.instance[i]) &&
+		    (megasas_mgmt_info.instance[i]->host->host_no == host_no))
+			return megasas_mgmt_info.instance[i];
+	}
+
+	return NULL;
+}
+
+static int megasas_mgmt_ioctl_fw(struct file *file, unsigned long arg)
+{
+	struct megasas_iocpacket __user *user_ioc =
+	    (struct megasas_iocpacket __user *)arg;
+	struct megasas_iocpacket *ioc;
+	struct megasas_instance *instance;
+	int error;
+
+	ioc = kmalloc(sizeof(*ioc), GFP_KERNEL);
+	if (!ioc)
+		return -ENOMEM;
+
+	if (copy_from_user(ioc, user_ioc, sizeof(*ioc))) {
+		error = -EFAULT;
+		goto out_kfree_ioc;
+	}
+
+	instance = megasas_lookup_instance(ioc->host_no);
+	if (!instance) {
+		error = -ENODEV;
+		goto out_kfree_ioc;
+	}
+
+	/*
+	 * We will allow only MEGASAS_INT_CMDS number of parallel ioctl cmds
+	 */
+	if (down_interruptible(&instance->ioctl_sem)) {
+		error = -ERESTARTSYS;
+		goto out_kfree_ioc;
+	}
+	error = megasas_mgmt_fw_ioctl(instance, user_ioc, ioc);
+	up(&instance->ioctl_sem);
+
+      out_kfree_ioc:
+	kfree(ioc);
+	return error;
+}
+
+static int megasas_mgmt_ioctl_aen(struct file *file, unsigned long arg)
+{
+	struct megasas_instance *instance;
+	struct megasas_aen aen;
+	int error;
+
+	if (file->private_data != file) {
+		printk(KERN_DEBUG "megasas: fasync_helper was not "
+		       "called first\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&aen, (void __user *)arg, sizeof(aen)))
+		return -EFAULT;
+
+	instance = megasas_lookup_instance(aen.host_no);
+
+	if (!instance)
+		return -ENODEV;
+
+	down(&instance->aen_mutex);
+	error = megasas_register_aen(instance, aen.seq_num,
+				     aen.class_locale_word);
+	up(&instance->aen_mutex);
+	return error;
+}
+
+/**
+ * megasas_mgmt_ioctl -	char node ioctl entry point
+ */
+static long
+megasas_mgmt_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case MEGASAS_IOC_FIRMWARE:
+		return megasas_mgmt_ioctl_fw(file, arg);
+
+	case MEGASAS_IOC_GET_AEN:
+		return megasas_mgmt_ioctl_aen(file, arg);
+	}
+
+	return -ENOTTY;
+}
+
+#ifdef CONFIG_COMPAT
+static int megasas_mgmt_compat_ioctl_fw(struct file *file, unsigned long arg)
+{
+	struct compat_megasas_iocpacket __user *cioc =
+	    (struct compat_megasas_iocpacket __user *)arg;
+	struct megasas_iocpacket __user *ioc =
+	    compat_alloc_user_space(sizeof(struct megasas_iocpacket));
+	int i;
+	int error = 0;
+
+	clear_user(ioc, sizeof(*ioc));
+
+	if (copy_in_user(&ioc->host_no, &cioc->host_no, sizeof(u16)) ||
+	    copy_in_user(&ioc->sgl_off, &cioc->sgl_off, sizeof(u32)) ||
+	    copy_in_user(&ioc->sense_off, &cioc->sense_off, sizeof(u32)) ||
+	    copy_in_user(&ioc->sense_len, &cioc->sense_len, sizeof(u32)) ||
+	    copy_in_user(ioc->frame.raw, cioc->frame.raw, 128) ||
+	    copy_in_user(&ioc->sge_count, &cioc->sge_count, sizeof(u32)))
+		return -EFAULT;
+
+	for (i = 0; i < MAX_IOCTL_SGE; i++) {
+		compat_uptr_t ptr;
+
+		if (get_user(ptr, &cioc->sgl[i].iov_base) ||
+		    put_user(compat_ptr(ptr), &ioc->sgl[i].iov_base) ||
+		    copy_in_user(&ioc->sgl[i].iov_len,
+				 &cioc->sgl[i].iov_len, sizeof(compat_size_t)))
+			return -EFAULT;
+	}
+
+	error = megasas_mgmt_ioctl_fw(file, (unsigned long)ioc);
+
+	if (copy_in_user(&cioc->frame.hdr.cmd_status,
+			 &ioc->frame.hdr.cmd_status, sizeof(u8))) {
+		printk(KERN_DEBUG "megasas: error copy_in_user cmd_status\n");
+		return -EFAULT;
+	}
+	return error;
+}
+
+static long
+megasas_mgmt_compat_ioctl(struct file *file, unsigned int cmd,
+			  unsigned long arg)
+{
+	switch (cmd) {
+	case MEGASAS_IOC_FIRMWARE:{
+			return megasas_mgmt_compat_ioctl_fw(file, arg);
+		}
+	case MEGASAS_IOC_GET_AEN:
+		return megasas_mgmt_ioctl_aen(file, arg);
+	}
+
+	return -ENOTTY;
+}
+#endif
+
+/*
+ * File operations structure for management interface
+ */
+static struct file_operations megasas_mgmt_fops = {
+	.owner = THIS_MODULE,
+	.open = megasas_mgmt_open,
+	.release = megasas_mgmt_release,
+	.fasync = megasas_mgmt_fasync,
+	.unlocked_ioctl = megasas_mgmt_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = megasas_mgmt_compat_ioctl,
+#endif
+};
+
+/*
+ * PCI hotplug support registration structure
+ */
+static struct pci_driver megasas_pci_driver = {
+
+	.name = "megaraid_sas",
+	.id_table = megasas_pci_table,
+	.probe = megasas_probe_one,
+	.remove = __devexit_p(megasas_detach_one),
+	.shutdown = megasas_shutdown,
+};
+
+/*
+ * Sysfs driver attributes
+ */
+static ssize_t megasas_sysfs_show_version(struct device_driver *dd, char *buf)
+{
+	return snprintf(buf, strlen(MEGASAS_VERSION) + 2, "%s\n",
+			MEGASAS_VERSION);
+}
+
+static DRIVER_ATTR(version, S_IRUGO, megasas_sysfs_show_version, NULL);
+
+static ssize_t
+megasas_sysfs_show_release_date(struct device_driver *dd, char *buf)
+{
+	return snprintf(buf, strlen(MEGASAS_RELDATE) + 2, "%s\n",
+			MEGASAS_RELDATE);
+}
+
+static DRIVER_ATTR(release_date, S_IRUGO, megasas_sysfs_show_release_date,
+		   NULL);
+
+/**
+ * megasas_init - Driver load entry point
+ */
+static int __init megasas_init(void)
+{
+	int rval;
+
+	/*
+	 * Announce driver version and other information
+	 */
+	printk(KERN_INFO "megasas: %s %s\n", MEGASAS_VERSION,
+	       MEGASAS_EXT_VERSION);
+
+	memset(&megasas_mgmt_info, 0, sizeof(megasas_mgmt_info));
+
+	/*
+	 * Register character device node
+	 */
+	rval = register_chrdev(0, "megaraid_sas_ioctl", &megasas_mgmt_fops);
+
+	if (rval < 0) {
+		printk(KERN_DEBUG "megasas: failed to open device node\n");
+		return rval;
+	}
+
+	megasas_mgmt_majorno = rval;
+
+	/*
+	 * Register ourselves as PCI hotplug module
+	 */
+	rval = pci_module_init(&megasas_pci_driver);
+
+	if (rval) {
+		printk(KERN_DEBUG "megasas: PCI hotplug regisration failed \n");
+		unregister_chrdev(megasas_mgmt_majorno, "megaraid_sas_ioctl");
+	}
+
+	driver_create_file(&megasas_pci_driver.driver, &driver_attr_version);
+	driver_create_file(&megasas_pci_driver.driver,
+			   &driver_attr_release_date);
+
+	return rval;
+}
+
+/**
+ * megasas_exit - Driver unload entry point
+ */
+static void __exit megasas_exit(void)
+{
+	driver_remove_file(&megasas_pci_driver.driver, &driver_attr_version);
+	driver_remove_file(&megasas_pci_driver.driver,
+			   &driver_attr_release_date);
+
+	pci_unregister_driver(&megasas_pci_driver);
+	unregister_chrdev(megasas_mgmt_majorno, "megaraid_sas_ioctl");
+}
+
+module_init(megasas_init);
+module_exit(megasas_exit);
diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h
new file mode 100644
index 000000000000..eaec9d531424
--- /dev/null
+++ b/drivers/scsi/megaraid/megaraid_sas.h
@@ -0,0 +1,1142 @@
+/*
+ *
+ *		Linux MegaRAID driver for SAS based RAID controllers
+ *
+ * Copyright (c) 2003-2005  LSI Logic Corporation.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * FILE		: megaraid_sas.h
+ */
+
+#ifndef LSI_MEGARAID_SAS_H
+#define LSI_MEGARAID_SAS_H
+
+/**
+ * MegaRAID SAS Driver meta data
+ */
+#define MEGASAS_VERSION				"00.00.02.00-rc4"
+#define MEGASAS_RELDATE				"Sep 16, 2005"
+#define MEGASAS_EXT_VERSION			"Fri Sep 16 12:37:08 EDT 2005"
+
+/*
+ * =====================================
+ * MegaRAID SAS MFI firmware definitions
+ * =====================================
+ */
+
+/*
+ * MFI stands for  MegaRAID SAS FW Interface. This is just a moniker for 
+ * protocol between the software and firmware. Commands are issued using
+ * "message frames"
+ */
+
+/**
+ * FW posts its state in upper 4 bits of outbound_msg_0 register
+ */
+#define MFI_STATE_MASK				0xF0000000
+#define MFI_STATE_UNDEFINED			0x00000000
+#define MFI_STATE_BB_INIT			0x10000000
+#define MFI_STATE_FW_INIT			0x40000000
+#define MFI_STATE_WAIT_HANDSHAKE		0x60000000
+#define MFI_STATE_FW_INIT_2			0x70000000
+#define MFI_STATE_DEVICE_SCAN			0x80000000
+#define MFI_STATE_FLUSH_CACHE			0xA0000000
+#define MFI_STATE_READY				0xB0000000
+#define MFI_STATE_OPERATIONAL			0xC0000000
+#define MFI_STATE_FAULT				0xF0000000
+
+#define MEGAMFI_FRAME_SIZE			64
+
+/**
+ * During FW init, clear pending cmds & reset state using inbound_msg_0
+ *
+ * ABORT	: Abort all pending cmds
+ * READY	: Move from OPERATIONAL to READY state; discard queue info
+ * MFIMODE	: Discard (possible) low MFA posted in 64-bit mode (??)
+ * CLR_HANDSHAKE: FW is waiting for HANDSHAKE from BIOS or Driver
+ */
+#define MFI_INIT_ABORT				0x00000000
+#define MFI_INIT_READY				0x00000002
+#define MFI_INIT_MFIMODE			0x00000004
+#define MFI_INIT_CLEAR_HANDSHAKE		0x00000008
+#define MFI_RESET_FLAGS				MFI_INIT_READY|MFI_INIT_MFIMODE
+
+/**
+ * MFI frame flags
+ */
+#define MFI_FRAME_POST_IN_REPLY_QUEUE		0x0000
+#define MFI_FRAME_DONT_POST_IN_REPLY_QUEUE	0x0001
+#define MFI_FRAME_SGL32				0x0000
+#define MFI_FRAME_SGL64				0x0002
+#define MFI_FRAME_SENSE32			0x0000
+#define MFI_FRAME_SENSE64			0x0004
+#define MFI_FRAME_DIR_NONE			0x0000
+#define MFI_FRAME_DIR_WRITE			0x0008
+#define MFI_FRAME_DIR_READ			0x0010
+#define MFI_FRAME_DIR_BOTH			0x0018
+
+/**
+ * Definition for cmd_status
+ */
+#define MFI_CMD_STATUS_POLL_MODE		0xFF
+
+/**
+ * MFI command opcodes
+ */
+#define MFI_CMD_INIT				0x00
+#define MFI_CMD_LD_READ				0x01
+#define MFI_CMD_LD_WRITE			0x02
+#define MFI_CMD_LD_SCSI_IO			0x03
+#define MFI_CMD_PD_SCSI_IO			0x04
+#define MFI_CMD_DCMD				0x05
+#define MFI_CMD_ABORT				0x06
+#define MFI_CMD_SMP				0x07
+#define MFI_CMD_STP				0x08
+
+#define MR_DCMD_CTRL_GET_INFO			0x01010000
+
+#define MR_DCMD_CTRL_CACHE_FLUSH		0x01101000
+#define MR_FLUSH_CTRL_CACHE			0x01
+#define MR_FLUSH_DISK_CACHE			0x02
+
+#define MR_DCMD_CTRL_SHUTDOWN			0x01050000
+#define MR_ENABLE_DRIVE_SPINDOWN		0x01
+
+#define MR_DCMD_CTRL_EVENT_GET_INFO		0x01040100
+#define MR_DCMD_CTRL_EVENT_GET			0x01040300
+#define MR_DCMD_CTRL_EVENT_WAIT			0x01040500
+#define MR_DCMD_LD_GET_PROPERTIES		0x03030000
+
+#define MR_DCMD_CLUSTER				0x08000000
+#define MR_DCMD_CLUSTER_RESET_ALL		0x08010100
+#define MR_DCMD_CLUSTER_RESET_LD		0x08010200
+
+/**
+ * MFI command completion codes
+ */
+enum MFI_STAT {
+	MFI_STAT_OK = 0x00,
+	MFI_STAT_INVALID_CMD = 0x01,
+	MFI_STAT_INVALID_DCMD = 0x02,
+	MFI_STAT_INVALID_PARAMETER = 0x03,
+	MFI_STAT_INVALID_SEQUENCE_NUMBER = 0x04,
+	MFI_STAT_ABORT_NOT_POSSIBLE = 0x05,
+	MFI_STAT_APP_HOST_CODE_NOT_FOUND = 0x06,
+	MFI_STAT_APP_IN_USE = 0x07,
+	MFI_STAT_APP_NOT_INITIALIZED = 0x08,
+	MFI_STAT_ARRAY_INDEX_INVALID = 0x09,
+	MFI_STAT_ARRAY_ROW_NOT_EMPTY = 0x0a,
+	MFI_STAT_CONFIG_RESOURCE_CONFLICT = 0x0b,
+	MFI_STAT_DEVICE_NOT_FOUND = 0x0c,
+	MFI_STAT_DRIVE_TOO_SMALL = 0x0d,
+	MFI_STAT_FLASH_ALLOC_FAIL = 0x0e,
+	MFI_STAT_FLASH_BUSY = 0x0f,
+	MFI_STAT_FLASH_ERROR = 0x10,
+	MFI_STAT_FLASH_IMAGE_BAD = 0x11,
+	MFI_STAT_FLASH_IMAGE_INCOMPLETE = 0x12,
+	MFI_STAT_FLASH_NOT_OPEN = 0x13,
+	MFI_STAT_FLASH_NOT_STARTED = 0x14,
+	MFI_STAT_FLUSH_FAILED = 0x15,
+	MFI_STAT_HOST_CODE_NOT_FOUNT = 0x16,
+	MFI_STAT_LD_CC_IN_PROGRESS = 0x17,
+	MFI_STAT_LD_INIT_IN_PROGRESS = 0x18,
+	MFI_STAT_LD_LBA_OUT_OF_RANGE = 0x19,
+	MFI_STAT_LD_MAX_CONFIGURED = 0x1a,
+	MFI_STAT_LD_NOT_OPTIMAL = 0x1b,
+	MFI_STAT_LD_RBLD_IN_PROGRESS = 0x1c,
+	MFI_STAT_LD_RECON_IN_PROGRESS = 0x1d,
+	MFI_STAT_LD_WRONG_RAID_LEVEL = 0x1e,
+	MFI_STAT_MAX_SPARES_EXCEEDED = 0x1f,
+	MFI_STAT_MEMORY_NOT_AVAILABLE = 0x20,
+	MFI_STAT_MFC_HW_ERROR = 0x21,
+	MFI_STAT_NO_HW_PRESENT = 0x22,
+	MFI_STAT_NOT_FOUND = 0x23,
+	MFI_STAT_NOT_IN_ENCL = 0x24,
+	MFI_STAT_PD_CLEAR_IN_PROGRESS = 0x25,
+	MFI_STAT_PD_TYPE_WRONG = 0x26,
+	MFI_STAT_PR_DISABLED = 0x27,
+	MFI_STAT_ROW_INDEX_INVALID = 0x28,
+	MFI_STAT_SAS_CONFIG_INVALID_ACTION = 0x29,
+	MFI_STAT_SAS_CONFIG_INVALID_DATA = 0x2a,
+	MFI_STAT_SAS_CONFIG_INVALID_PAGE = 0x2b,
+	MFI_STAT_SAS_CONFIG_INVALID_TYPE = 0x2c,
+	MFI_STAT_SCSI_DONE_WITH_ERROR = 0x2d,
+	MFI_STAT_SCSI_IO_FAILED = 0x2e,
+	MFI_STAT_SCSI_RESERVATION_CONFLICT = 0x2f,
+	MFI_STAT_SHUTDOWN_FAILED = 0x30,
+	MFI_STAT_TIME_NOT_SET = 0x31,
+	MFI_STAT_WRONG_STATE = 0x32,
+	MFI_STAT_LD_OFFLINE = 0x33,
+	MFI_STAT_PEER_NOTIFICATION_REJECTED = 0x34,
+	MFI_STAT_PEER_NOTIFICATION_FAILED = 0x35,
+	MFI_STAT_RESERVATION_IN_PROGRESS = 0x36,
+	MFI_STAT_I2C_ERRORS_DETECTED = 0x37,
+	MFI_STAT_PCI_ERRORS_DETECTED = 0x38,
+
+	MFI_STAT_INVALID_STATUS = 0xFF
+};
+
+/*
+ * Number of mailbox bytes in DCMD message frame
+ */
+#define MFI_MBOX_SIZE				12
+
+enum MR_EVT_CLASS {
+
+	MR_EVT_CLASS_DEBUG = -2,
+	MR_EVT_CLASS_PROGRESS = -1,
+	MR_EVT_CLASS_INFO = 0,
+	MR_EVT_CLASS_WARNING = 1,
+	MR_EVT_CLASS_CRITICAL = 2,
+	MR_EVT_CLASS_FATAL = 3,
+	MR_EVT_CLASS_DEAD = 4,
+
+};
+
+enum MR_EVT_LOCALE {
+
+	MR_EVT_LOCALE_LD = 0x0001,
+	MR_EVT_LOCALE_PD = 0x0002,
+	MR_EVT_LOCALE_ENCL = 0x0004,
+	MR_EVT_LOCALE_BBU = 0x0008,
+	MR_EVT_LOCALE_SAS = 0x0010,
+	MR_EVT_LOCALE_CTRL = 0x0020,
+	MR_EVT_LOCALE_CONFIG = 0x0040,
+	MR_EVT_LOCALE_CLUSTER = 0x0080,
+	MR_EVT_LOCALE_ALL = 0xffff,
+
+};
+
+enum MR_EVT_ARGS {
+
+	MR_EVT_ARGS_NONE,
+	MR_EVT_ARGS_CDB_SENSE,
+	MR_EVT_ARGS_LD,
+	MR_EVT_ARGS_LD_COUNT,
+	MR_EVT_ARGS_LD_LBA,
+	MR_EVT_ARGS_LD_OWNER,
+	MR_EVT_ARGS_LD_LBA_PD_LBA,
+	MR_EVT_ARGS_LD_PROG,
+	MR_EVT_ARGS_LD_STATE,
+	MR_EVT_ARGS_LD_STRIP,
+	MR_EVT_ARGS_PD,
+	MR_EVT_ARGS_PD_ERR,
+	MR_EVT_ARGS_PD_LBA,
+	MR_EVT_ARGS_PD_LBA_LD,
+	MR_EVT_ARGS_PD_PROG,
+	MR_EVT_ARGS_PD_STATE,
+	MR_EVT_ARGS_PCI,
+	MR_EVT_ARGS_RATE,
+	MR_EVT_ARGS_STR,
+	MR_EVT_ARGS_TIME,
+	MR_EVT_ARGS_ECC,
+
+};
+
+/*
+ * SAS controller properties
+ */
+struct megasas_ctrl_prop {
+
+	u16 seq_num;
+	u16 pred_fail_poll_interval;
+	u16 intr_throttle_count;
+	u16 intr_throttle_timeouts;
+	u8 rebuild_rate;
+	u8 patrol_read_rate;
+	u8 bgi_rate;
+	u8 cc_rate;
+	u8 recon_rate;
+	u8 cache_flush_interval;
+	u8 spinup_drv_count;
+	u8 spinup_delay;
+	u8 cluster_enable;
+	u8 coercion_mode;
+	u8 alarm_enable;
+	u8 disable_auto_rebuild;
+	u8 disable_battery_warn;
+	u8 ecc_bucket_size;
+	u16 ecc_bucket_leak_rate;
+	u8 restore_hotspare_on_insertion;
+	u8 expose_encl_devices;
+	u8 reserved[38];
+
+} __attribute__ ((packed));
+
+/*
+ * SAS controller information
+ */
+struct megasas_ctrl_info {
+
+	/*
+	 * PCI device information
+	 */
+	struct {
+
+		u16 vendor_id;
+		u16 device_id;
+		u16 sub_vendor_id;
+		u16 sub_device_id;
+		u8 reserved[24];
+
+	} __attribute__ ((packed)) pci;
+
+	/*
+	 * Host interface information
+	 */
+	struct {
+
+		u8 PCIX:1;
+		u8 PCIE:1;
+		u8 iSCSI:1;
+		u8 SAS_3G:1;
+		u8 reserved_0:4;
+		u8 reserved_1[6];
+		u8 port_count;
+		u64 port_addr[8];
+
+	} __attribute__ ((packed)) host_interface;
+
+	/*
+	 * Device (backend) interface information
+	 */
+	struct {
+
+		u8 SPI:1;
+		u8 SAS_3G:1;
+		u8 SATA_1_5G:1;
+		u8 SATA_3G:1;
+		u8 reserved_0:4;
+		u8 reserved_1[6];
+		u8 port_count;
+		u64 port_addr[8];
+
+	} __attribute__ ((packed)) device_interface;
+
+	/*
+	 * List of components residing in flash. All str are null terminated
+	 */
+	u32 image_check_word;
+	u32 image_component_count;
+
+	struct {
+
+		char name[8];
+		char version[32];
+		char build_date[16];
+		char built_time[16];
+
+	} __attribute__ ((packed)) image_component[8];
+
+	/*
+	 * List of flash components that have been flashed on the card, but
+	 * are not in use, pending reset of the adapter. This list will be
+	 * empty if a flash operation has not occurred. All stings are null
+	 * terminated
+	 */
+	u32 pending_image_component_count;
+
+	struct {
+
+		char name[8];
+		char version[32];
+		char build_date[16];
+		char build_time[16];
+
+	} __attribute__ ((packed)) pending_image_component[8];
+
+	u8 max_arms;
+	u8 max_spans;
+	u8 max_arrays;
+	u8 max_lds;
+
+	char product_name[80];
+	char serial_no[32];
+
+	/*
+	 * Other physical/controller/operation information. Indicates the
+	 * presence of the hardware
+	 */
+	struct {
+
+		u32 bbu:1;
+		u32 alarm:1;
+		u32 nvram:1;
+		u32 uart:1;
+		u32 reserved:28;
+
+	} __attribute__ ((packed)) hw_present;
+
+	u32 current_fw_time;
+
+	/*
+	 * Maximum data transfer sizes
+	 */
+	u16 max_concurrent_cmds;
+	u16 max_sge_count;
+	u32 max_request_size;
+
+	/*
+	 * Logical and physical device counts
+	 */
+	u16 ld_present_count;
+	u16 ld_degraded_count;
+	u16 ld_offline_count;
+
+	u16 pd_present_count;
+	u16 pd_disk_present_count;
+	u16 pd_disk_pred_failure_count;
+	u16 pd_disk_failed_count;
+
+	/*
+	 * Memory size information
+	 */
+	u16 nvram_size;
+	u16 memory_size;
+	u16 flash_size;
+
+	/*
+	 * Error counters
+	 */
+	u16 mem_correctable_error_count;
+	u16 mem_uncorrectable_error_count;
+
+	/*
+	 * Cluster information
+	 */
+	u8 cluster_permitted;
+	u8 cluster_active;
+
+	/*
+	 * Additional max data transfer sizes
+	 */
+	u16 max_strips_per_io;
+
+	/*
+	 * Controller capabilities structures
+	 */
+	struct {
+
+		u32 raid_level_0:1;
+		u32 raid_level_1:1;
+		u32 raid_level_5:1;
+		u32 raid_level_1E:1;
+		u32 raid_level_6:1;
+		u32 reserved:27;
+
+	} __attribute__ ((packed)) raid_levels;
+
+	struct {
+
+		u32 rbld_rate:1;
+		u32 cc_rate:1;
+		u32 bgi_rate:1;
+		u32 recon_rate:1;
+		u32 patrol_rate:1;
+		u32 alarm_control:1;
+		u32 cluster_supported:1;
+		u32 bbu:1;
+		u32 spanning_allowed:1;
+		u32 dedicated_hotspares:1;
+		u32 revertible_hotspares:1;
+		u32 foreign_config_import:1;
+		u32 self_diagnostic:1;
+		u32 mixed_redundancy_arr:1;
+		u32 global_hot_spares:1;
+		u32 reserved:17;
+
+	} __attribute__ ((packed)) adapter_operations;
+
+	struct {
+
+		u32 read_policy:1;
+		u32 write_policy:1;
+		u32 io_policy:1;
+		u32 access_policy:1;
+		u32 disk_cache_policy:1;
+		u32 reserved:27;
+
+	} __attribute__ ((packed)) ld_operations;
+
+	struct {
+
+		u8 min;
+		u8 max;
+		u8 reserved[2];
+
+	} __attribute__ ((packed)) stripe_sz_ops;
+
+	struct {
+
+		u32 force_online:1;
+		u32 force_offline:1;
+		u32 force_rebuild:1;
+		u32 reserved:29;
+
+	} __attribute__ ((packed)) pd_operations;
+
+	struct {
+
+		u32 ctrl_supports_sas:1;
+		u32 ctrl_supports_sata:1;
+		u32 allow_mix_in_encl:1;
+		u32 allow_mix_in_ld:1;
+		u32 allow_sata_in_cluster:1;
+		u32 reserved:27;
+
+	} __attribute__ ((packed)) pd_mix_support;
+
+	/*
+	 * Define ECC single-bit-error bucket information
+	 */
+	u8 ecc_bucket_count;
+	u8 reserved_2[11];
+
+	/*
+	 * Include the controller properties (changeable items)
+	 */
+	struct megasas_ctrl_prop properties;
+
+	/*
+	 * Define FW pkg version (set in envt v'bles on OEM basis)
+	 */
+	char package_version[0x60];
+
+	u8 pad[0x800 - 0x6a0];
+
+} __attribute__ ((packed));
+
+/*
+ * ===============================
+ * MegaRAID SAS driver definitions
+ * ===============================
+ */
+#define MEGASAS_MAX_PD_CHANNELS			2
+#define MEGASAS_MAX_LD_CHANNELS			2
+#define MEGASAS_MAX_CHANNELS			(MEGASAS_MAX_PD_CHANNELS + \
+						MEGASAS_MAX_LD_CHANNELS)
+#define MEGASAS_MAX_DEV_PER_CHANNEL		128
+#define MEGASAS_DEFAULT_INIT_ID			-1
+#define MEGASAS_MAX_LUN				8
+#define MEGASAS_MAX_LD				64
+
+/*
+ * When SCSI mid-layer calls driver's reset routine, driver waits for
+ * MEGASAS_RESET_WAIT_TIME seconds for all outstanding IO to complete. Note
+ * that the driver cannot _actually_ abort or reset pending commands. While
+ * it is waiting for the commands to complete, it prints a diagnostic message
+ * every MEGASAS_RESET_NOTICE_INTERVAL seconds
+ */
+#define MEGASAS_RESET_WAIT_TIME			180
+#define	MEGASAS_RESET_NOTICE_INTERVAL		5
+
+#define MEGASAS_IOCTL_CMD			0
+
+/*
+ * FW reports the maximum of number of commands that it can accept (maximum
+ * commands that can be outstanding) at any time. The driver must report a
+ * lower number to the mid layer because it can issue a few internal commands
+ * itself (E.g, AEN, abort cmd, IOCTLs etc). The number of commands it needs
+ * is shown below
+ */
+#define MEGASAS_INT_CMDS			32
+
+/*
+ * FW can accept both 32 and 64 bit SGLs. We want to allocate 32/64 bit
+ * SGLs based on the size of dma_addr_t
+ */
+#define IS_DMA64				(sizeof(dma_addr_t) == 8)
+
+#define MFI_OB_INTR_STATUS_MASK			0x00000002
+#define MFI_POLL_TIMEOUT_SECS			10
+
+struct megasas_register_set {
+
+	u32 reserved_0[4];	/*0000h */
+
+	u32 inbound_msg_0;	/*0010h */
+	u32 inbound_msg_1;	/*0014h */
+	u32 outbound_msg_0;	/*0018h */
+	u32 outbound_msg_1;	/*001Ch */
+
+	u32 inbound_doorbell;	/*0020h */
+	u32 inbound_intr_status;	/*0024h */
+	u32 inbound_intr_mask;	/*0028h */
+
+	u32 outbound_doorbell;	/*002Ch */
+	u32 outbound_intr_status;	/*0030h */
+	u32 outbound_intr_mask;	/*0034h */
+
+	u32 reserved_1[2];	/*0038h */
+
+	u32 inbound_queue_port;	/*0040h */
+	u32 outbound_queue_port;	/*0044h */
+
+	u32 reserved_2;		/*004Ch */
+
+	u32 index_registers[1004];	/*0050h */
+
+} __attribute__ ((packed));
+
+struct megasas_sge32 {
+
+	u32 phys_addr;
+	u32 length;
+
+} __attribute__ ((packed));
+
+struct megasas_sge64 {
+
+	u64 phys_addr;
+	u32 length;
+
+} __attribute__ ((packed));
+
+union megasas_sgl {
+
+	struct megasas_sge32 sge32[1];
+	struct megasas_sge64 sge64[1];
+
+} __attribute__ ((packed));
+
+struct megasas_header {
+
+	u8 cmd;			/*00h */
+	u8 sense_len;		/*01h */
+	u8 cmd_status;		/*02h */
+	u8 scsi_status;		/*03h */
+
+	u8 target_id;		/*04h */
+	u8 lun;			/*05h */
+	u8 cdb_len;		/*06h */
+	u8 sge_count;		/*07h */
+
+	u32 context;		/*08h */
+	u32 pad_0;		/*0Ch */
+
+	u16 flags;		/*10h */
+	u16 timeout;		/*12h */
+	u32 data_xferlen;	/*14h */
+
+} __attribute__ ((packed));
+
+union megasas_sgl_frame {
+
+	struct megasas_sge32 sge32[8];
+	struct megasas_sge64 sge64[5];
+
+} __attribute__ ((packed));
+
+struct megasas_init_frame {
+
+	u8 cmd;			/*00h */
+	u8 reserved_0;		/*01h */
+	u8 cmd_status;		/*02h */
+
+	u8 reserved_1;		/*03h */
+	u32 reserved_2;		/*04h */
+
+	u32 context;		/*08h */
+	u32 pad_0;		/*0Ch */
+
+	u16 flags;		/*10h */
+	u16 reserved_3;		/*12h */
+	u32 data_xfer_len;	/*14h */
+
+	u32 queue_info_new_phys_addr_lo;	/*18h */
+	u32 queue_info_new_phys_addr_hi;	/*1Ch */
+	u32 queue_info_old_phys_addr_lo;	/*20h */
+	u32 queue_info_old_phys_addr_hi;	/*24h */
+
+	u32 reserved_4[6];	/*28h */
+
+} __attribute__ ((packed));
+
+struct megasas_init_queue_info {
+
+	u32 init_flags;		/*00h */
+	u32 reply_queue_entries;	/*04h */
+
+	u32 reply_queue_start_phys_addr_lo;	/*08h */
+	u32 reply_queue_start_phys_addr_hi;	/*0Ch */
+	u32 producer_index_phys_addr_lo;	/*10h */
+	u32 producer_index_phys_addr_hi;	/*14h */
+	u32 consumer_index_phys_addr_lo;	/*18h */
+	u32 consumer_index_phys_addr_hi;	/*1Ch */
+
+} __attribute__ ((packed));
+
+struct megasas_io_frame {
+
+	u8 cmd;			/*00h */
+	u8 sense_len;		/*01h */
+	u8 cmd_status;		/*02h */
+	u8 scsi_status;		/*03h */
+
+	u8 target_id;		/*04h */
+	u8 access_byte;		/*05h */
+	u8 reserved_0;		/*06h */
+	u8 sge_count;		/*07h */
+
+	u32 context;		/*08h */
+	u32 pad_0;		/*0Ch */
+
+	u16 flags;		/*10h */
+	u16 timeout;		/*12h */
+	u32 lba_count;		/*14h */
+
+	u32 sense_buf_phys_addr_lo;	/*18h */
+	u32 sense_buf_phys_addr_hi;	/*1Ch */
+
+	u32 start_lba_lo;	/*20h */
+	u32 start_lba_hi;	/*24h */
+
+	union megasas_sgl sgl;	/*28h */
+
+} __attribute__ ((packed));
+
+struct megasas_pthru_frame {
+
+	u8 cmd;			/*00h */
+	u8 sense_len;		/*01h */
+	u8 cmd_status;		/*02h */
+	u8 scsi_status;		/*03h */
+
+	u8 target_id;		/*04h */
+	u8 lun;			/*05h */
+	u8 cdb_len;		/*06h */
+	u8 sge_count;		/*07h */
+
+	u32 context;		/*08h */
+	u32 pad_0;		/*0Ch */
+
+	u16 flags;		/*10h */
+	u16 timeout;		/*12h */
+	u32 data_xfer_len;	/*14h */
+
+	u32 sense_buf_phys_addr_lo;	/*18h */
+	u32 sense_buf_phys_addr_hi;	/*1Ch */
+
+	u8 cdb[16];		/*20h */
+	union megasas_sgl sgl;	/*30h */
+
+} __attribute__ ((packed));
+
+struct megasas_dcmd_frame {
+
+	u8 cmd;			/*00h */
+	u8 reserved_0;		/*01h */
+	u8 cmd_status;		/*02h */
+	u8 reserved_1[4];	/*03h */
+	u8 sge_count;		/*07h */
+
+	u32 context;		/*08h */
+	u32 pad_0;		/*0Ch */
+
+	u16 flags;		/*10h */
+	u16 timeout;		/*12h */
+
+	u32 data_xfer_len;	/*14h */
+	u32 opcode;		/*18h */
+
+	union {			/*1Ch */
+		u8 b[12];
+		u16 s[6];
+		u32 w[3];
+	} mbox;
+
+	union megasas_sgl sgl;	/*28h */
+
+} __attribute__ ((packed));
+
+struct megasas_abort_frame {
+
+	u8 cmd;			/*00h */
+	u8 reserved_0;		/*01h */
+	u8 cmd_status;		/*02h */
+
+	u8 reserved_1;		/*03h */
+	u32 reserved_2;		/*04h */
+
+	u32 context;		/*08h */
+	u32 pad_0;		/*0Ch */
+
+	u16 flags;		/*10h */
+	u16 reserved_3;		/*12h */
+	u32 reserved_4;		/*14h */
+
+	u32 abort_context;	/*18h */
+	u32 pad_1;		/*1Ch */
+
+	u32 abort_mfi_phys_addr_lo;	/*20h */
+	u32 abort_mfi_phys_addr_hi;	/*24h */
+
+	u32 reserved_5[6];	/*28h */
+
+} __attribute__ ((packed));
+
+struct megasas_smp_frame {
+
+	u8 cmd;			/*00h */
+	u8 reserved_1;		/*01h */
+	u8 cmd_status;		/*02h */
+	u8 connection_status;	/*03h */
+
+	u8 reserved_2[3];	/*04h */
+	u8 sge_count;		/*07h */
+
+	u32 context;		/*08h */
+	u32 pad_0;		/*0Ch */
+
+	u16 flags;		/*10h */
+	u16 timeout;		/*12h */
+
+	u32 data_xfer_len;	/*14h */
+	u64 sas_addr;		/*18h */
+
+	union {
+		struct megasas_sge32 sge32[2];	/* [0]: resp [1]: req */
+		struct megasas_sge64 sge64[2];	/* [0]: resp [1]: req */
+	} sgl;
+
+} __attribute__ ((packed));
+
+struct megasas_stp_frame {
+
+	u8 cmd;			/*00h */
+	u8 reserved_1;		/*01h */
+	u8 cmd_status;		/*02h */
+	u8 reserved_2;		/*03h */
+
+	u8 target_id;		/*04h */
+	u8 reserved_3[2];	/*05h */
+	u8 sge_count;		/*07h */
+
+	u32 context;		/*08h */
+	u32 pad_0;		/*0Ch */
+
+	u16 flags;		/*10h */
+	u16 timeout;		/*12h */
+
+	u32 data_xfer_len;	/*14h */
+
+	u16 fis[10];		/*18h */
+	u32 stp_flags;
+
+	union {
+		struct megasas_sge32 sge32[2];	/* [0]: resp [1]: data */
+		struct megasas_sge64 sge64[2];	/* [0]: resp [1]: data */
+	} sgl;
+
+} __attribute__ ((packed));
+
+union megasas_frame {
+
+	struct megasas_header hdr;
+	struct megasas_init_frame init;
+	struct megasas_io_frame io;
+	struct megasas_pthru_frame pthru;
+	struct megasas_dcmd_frame dcmd;
+	struct megasas_abort_frame abort;
+	struct megasas_smp_frame smp;
+	struct megasas_stp_frame stp;
+
+	u8 raw_bytes[64];
+};
+
+struct megasas_cmd;
+
+union megasas_evt_class_locale {
+
+	struct {
+		u16 locale;
+		u8 reserved;
+		s8 class;
+	} __attribute__ ((packed)) members;
+
+	u32 word;
+
+} __attribute__ ((packed));
+
+struct megasas_evt_log_info {
+	u32 newest_seq_num;
+	u32 oldest_seq_num;
+	u32 clear_seq_num;
+	u32 shutdown_seq_num;
+	u32 boot_seq_num;
+
+} __attribute__ ((packed));
+
+struct megasas_progress {
+
+	u16 progress;
+	u16 elapsed_seconds;
+
+} __attribute__ ((packed));
+
+struct megasas_evtarg_ld {
+
+	u16 target_id;
+	u8 ld_index;
+	u8 reserved;
+
+} __attribute__ ((packed));
+
+struct megasas_evtarg_pd {
+	u16 device_id;
+	u8 encl_index;
+	u8 slot_number;
+
+} __attribute__ ((packed));
+
+struct megasas_evt_detail {
+
+	u32 seq_num;
+	u32 time_stamp;
+	u32 code;
+	union megasas_evt_class_locale cl;
+	u8 arg_type;
+	u8 reserved1[15];
+
+	union {
+		struct {
+			struct megasas_evtarg_pd pd;
+			u8 cdb_length;
+			u8 sense_length;
+			u8 reserved[2];
+			u8 cdb[16];
+			u8 sense[64];
+		} __attribute__ ((packed)) cdbSense;
+
+		struct megasas_evtarg_ld ld;
+
+		struct {
+			struct megasas_evtarg_ld ld;
+			u64 count;
+		} __attribute__ ((packed)) ld_count;
+
+		struct {
+			u64 lba;
+			struct megasas_evtarg_ld ld;
+		} __attribute__ ((packed)) ld_lba;
+
+		struct {
+			struct megasas_evtarg_ld ld;
+			u32 prevOwner;
+			u32 newOwner;
+		} __attribute__ ((packed)) ld_owner;
+
+		struct {
+			u64 ld_lba;
+			u64 pd_lba;
+			struct megasas_evtarg_ld ld;
+			struct megasas_evtarg_pd pd;
+		} __attribute__ ((packed)) ld_lba_pd_lba;
+
+		struct {
+			struct megasas_evtarg_ld ld;
+			struct megasas_progress prog;
+		} __attribute__ ((packed)) ld_prog;
+
+		struct {
+			struct megasas_evtarg_ld ld;
+			u32 prev_state;
+			u32 new_state;
+		} __attribute__ ((packed)) ld_state;
+
+		struct {
+			u64 strip;
+			struct megasas_evtarg_ld ld;
+		} __attribute__ ((packed)) ld_strip;
+
+		struct megasas_evtarg_pd pd;
+
+		struct {
+			struct megasas_evtarg_pd pd;
+			u32 err;
+		} __attribute__ ((packed)) pd_err;
+
+		struct {
+			u64 lba;
+			struct megasas_evtarg_pd pd;
+		} __attribute__ ((packed)) pd_lba;
+
+		struct {
+			u64 lba;
+			struct megasas_evtarg_pd pd;
+			struct megasas_evtarg_ld ld;
+		} __attribute__ ((packed)) pd_lba_ld;
+
+		struct {
+			struct megasas_evtarg_pd pd;
+			struct megasas_progress prog;
+		} __attribute__ ((packed)) pd_prog;
+
+		struct {
+			struct megasas_evtarg_pd pd;
+			u32 prevState;
+			u32 newState;
+		} __attribute__ ((packed)) pd_state;
+
+		struct {
+			u16 vendorId;
+			u16 deviceId;
+			u16 subVendorId;
+			u16 subDeviceId;
+		} __attribute__ ((packed)) pci;
+
+		u32 rate;
+		char str[96];
+
+		struct {
+			u32 rtc;
+			u32 elapsedSeconds;
+		} __attribute__ ((packed)) time;
+
+		struct {
+			u32 ecar;
+			u32 elog;
+			char str[64];
+		} __attribute__ ((packed)) ecc;
+
+		u8 b[96];
+		u16 s[48];
+		u32 w[24];
+		u64 d[12];
+	} args;
+
+	char description[128];
+
+} __attribute__ ((packed));
+
+struct megasas_instance {
+
+	u32 *producer;
+	dma_addr_t producer_h;
+	u32 *consumer;
+	dma_addr_t consumer_h;
+
+	u32 *reply_queue;
+	dma_addr_t reply_queue_h;
+
+	unsigned long base_addr;
+	struct megasas_register_set __iomem *reg_set;
+
+	s8 init_id;
+	u8 reserved[3];
+
+	u16 max_num_sge;
+	u16 max_fw_cmds;
+	u32 max_sectors_per_req;
+
+	struct megasas_cmd **cmd_list;
+	struct list_head cmd_pool;
+	spinlock_t cmd_pool_lock;
+	struct dma_pool *frame_dma_pool;
+	struct dma_pool *sense_dma_pool;
+
+	struct megasas_evt_detail *evt_detail;
+	dma_addr_t evt_detail_h;
+	struct megasas_cmd *aen_cmd;
+	struct semaphore aen_mutex;
+	struct semaphore ioctl_sem;
+
+	struct Scsi_Host *host;
+
+	wait_queue_head_t int_cmd_wait_q;
+	wait_queue_head_t abort_cmd_wait_q;
+
+	struct pci_dev *pdev;
+	u32 unique_id;
+
+	u32 fw_outstanding;
+	u32 hw_crit_error;
+	spinlock_t instance_lock;
+};
+
+#define MEGASAS_IS_LOGICAL(scp)						\
+	(scp->device->channel < MEGASAS_MAX_PD_CHANNELS) ? 0 : 1
+
+#define MEGASAS_DEV_INDEX(inst, scp)					\
+	((scp->device->channel % 2) * MEGASAS_MAX_DEV_PER_CHANNEL) + 	\
+	scp->device->id
+
+struct megasas_cmd {
+
+	union megasas_frame *frame;
+	dma_addr_t frame_phys_addr;
+	u8 *sense;
+	dma_addr_t sense_phys_addr;
+
+	u32 index;
+	u8 sync_cmd;
+	u8 cmd_status;
+	u16 abort_aen;
+
+	struct list_head list;
+	struct scsi_cmnd *scmd;
+	struct megasas_instance *instance;
+	u32 frame_count;
+};
+
+#define MAX_MGMT_ADAPTERS		1024
+#define MAX_IOCTL_SGE			16
+
+struct megasas_iocpacket {
+
+	u16 host_no;
+	u16 __pad1;
+	u32 sgl_off;
+	u32 sge_count;
+	u32 sense_off;
+	u32 sense_len;
+	union {
+		u8 raw[128];
+		struct megasas_header hdr;
+	} frame;
+
+	struct iovec sgl[MAX_IOCTL_SGE];
+
+} __attribute__ ((packed));
+
+struct megasas_aen {
+	u16 host_no;
+	u16 __pad1;
+	u32 seq_num;
+	u32 class_locale_word;
+} __attribute__ ((packed));
+
+#ifdef CONFIG_COMPAT
+struct compat_megasas_iocpacket {
+	u16 host_no;
+	u16 __pad1;
+	u32 sgl_off;
+	u32 sge_count;
+	u32 sense_off;
+	u32 sense_len;
+	union {
+		u8 raw[128];
+		struct megasas_header hdr;
+	} frame;
+	struct compat_iovec sgl[MAX_IOCTL_SGE];
+} __attribute__ ((packed));
+
+#define MEGASAS_IOC_FIRMWARE	_IOWR('M', 1, struct compat_megasas_iocpacket)
+#else
+#define MEGASAS_IOC_FIRMWARE	_IOWR('M', 1, struct megasas_iocpacket)
+#endif
+
+#define MEGASAS_IOC_GET_AEN	_IOW('M', 3, struct megasas_aen)
+
+struct megasas_mgmt_info {
+
+	u16 count;
+	struct megasas_instance *instance[MAX_MGMT_ADAPTERS];
+	int max_index;
+};
+
+#endif				/*LSI_MEGARAID_SAS_H */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index c49d28eca561..20fb79810c3c 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -185,6 +185,7 @@
 #define PCI_DEVICE_ID_LSI_61C102	0x0901
 #define PCI_DEVICE_ID_LSI_63C815	0x1000
 #define PCI_DEVICE_ID_LSI_SAS1064	0x0050
+#define PCI_DEVICE_ID_LSI_SAS1064R	0x0411
 #define PCI_DEVICE_ID_LSI_SAS1066	0x005E
 #define PCI_DEVICE_ID_LSI_SAS1068	0x0054
 #define PCI_DEVICE_ID_LSI_SAS1064A	0x005C
@@ -559,6 +560,7 @@
 #define PCI_VENDOR_ID_DELL		0x1028
 #define PCI_DEVICE_ID_DELL_RACIII	0x0008
 #define PCI_DEVICE_ID_DELL_RAC4		0x0012
+#define PCI_DEVICE_ID_DELL_PERC5	0x0015
 
 #define PCI_VENDOR_ID_MATROX		0x102B
 #define PCI_DEVICE_ID_MATROX_MGA_2	0x0518
-- 
cgit v1.2.3


From 24558a0f7a00fccd19a6e6502956463f056ce90e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Mon, 26 Sep 2005 06:19:28 +0100
Subject: [PATCH] m32r: missing __iomem in ioremap() declaration

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-m32r/io.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-m32r/io.h b/include/asm-m32r/io.h
index 8e9e481e6996..70ad1c949c2b 100644
--- a/include/asm-m32r/io.h
+++ b/include/asm-m32r/io.h
@@ -60,7 +60,7 @@ __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
  *	address.
  */
 
-static inline void * ioremap(unsigned long offset, unsigned long size)
+static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
 {
 	return __ioremap(offset, size, 0);
 }
-- 
cgit v1.2.3


From a880948b2b88c05af9a471ca5c52883e64d3f7b8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Mon, 26 Sep 2005 06:19:28 +0100
Subject: [PATCH] m32r: more basic __user annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/m32r/lib/usercopy.c   | 16 ++++++++--------
 include/asm-m32r/uaccess.h | 10 ++++++----
 2 files changed, 14 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/arch/m32r/lib/usercopy.c b/arch/m32r/lib/usercopy.c
index 6c6855f1aa05..ce16bbe26a52 100644
--- a/arch/m32r/lib/usercopy.c
+++ b/arch/m32r/lib/usercopy.c
@@ -13,7 +13,7 @@
 #include <asm/uaccess.h>
 
 unsigned long
-__generic_copy_to_user(void *to, const void *from, unsigned long n)
+__generic_copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	prefetch(from);
 	if (access_ok(VERIFY_WRITE, to, n))
@@ -22,7 +22,7 @@ __generic_copy_to_user(void *to, const void *from, unsigned long n)
 }
 
 unsigned long
-__generic_copy_from_user(void *to, const void *from, unsigned long n)
+__generic_copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	prefetchw(to);
 	if (access_ok(VERIFY_READ, from, n))
@@ -111,7 +111,7 @@ do {									\
 #endif /* CONFIG_ISA_DUAL_ISSUE */
 
 long
-__strncpy_from_user(char *dst, const char *src, long count)
+__strncpy_from_user(char *dst, const char __user *src, long count)
 {
 	long res;
 	__do_strncpy_from_user(dst, src, count, res);
@@ -119,7 +119,7 @@ __strncpy_from_user(char *dst, const char *src, long count)
 }
 
 long
-strncpy_from_user(char *dst, const char *src, long count)
+strncpy_from_user(char *dst, const char __user *src, long count)
 {
 	long res = -EFAULT;
 	if (access_ok(VERIFY_READ, src, 1))
@@ -222,7 +222,7 @@ do {									\
 #endif /* not CONFIG_ISA_DUAL_ISSUE */
 
 unsigned long
-clear_user(void *to, unsigned long n)
+clear_user(void __user *to, unsigned long n)
 {
 	if (access_ok(VERIFY_WRITE, to, n))
 		__do_clear_user(to, n);
@@ -230,7 +230,7 @@ clear_user(void *to, unsigned long n)
 }
 
 unsigned long
-__clear_user(void *to, unsigned long n)
+__clear_user(void __user *to, unsigned long n)
 {
 	__do_clear_user(to, n);
 	return n;
@@ -244,7 +244,7 @@ __clear_user(void *to, unsigned long n)
 
 #ifdef CONFIG_ISA_DUAL_ISSUE
 
-long strnlen_user(const char *s, long n)
+long strnlen_user(const char __user *s, long n)
 {
 	unsigned long mask = -__addr_ok(s);
 	unsigned long res;
@@ -313,7 +313,7 @@ long strnlen_user(const char *s, long n)
 
 #else /* not CONFIG_ISA_DUAL_ISSUE */
 
-long strnlen_user(const char *s, long n)
+long strnlen_user(const char __user *s, long n)
 {
 	unsigned long mask = -__addr_ok(s);
 	unsigned long res;
diff --git a/include/asm-m32r/uaccess.h b/include/asm-m32r/uaccess.h
index 93d863c455a1..0da7c47d2f01 100644
--- a/include/asm-m32r/uaccess.h
+++ b/include/asm-m32r/uaccess.h
@@ -208,7 +208,8 @@ extern void __get_user_4(void);
  * On error, the variable @x is set to zero.
  */
 #define get_user(x,ptr)							\
-({	int __ret_gu,__val_gu;						\
+({	int __ret_gu;							\
+	unsigned long __val_gu;						\
 	__chk_user_ptr(ptr);						\
 	switch(sizeof (*(ptr))) {					\
 	case 1:  __get_user_x(1,__ret_gu,__val_gu,ptr); break;		\
@@ -403,7 +404,8 @@ struct __large_struct { unsigned long buf[100]; };
 
 #define __get_user_nocheck(x,ptr,size)					\
 ({									\
-	long __gu_err, __gu_val;					\
+	long __gu_err;							\
+	unsigned long __gu_val;						\
 	__get_user_size(__gu_val,(ptr),(size),__gu_err);		\
 	(x) = (__typeof__(*(ptr)))__gu_val;				\
 	__gu_err;							\
@@ -594,8 +596,8 @@ static inline unsigned long __generic_copy_to_user_nocheck(void __user *to,
 	return n;
 }
 
-unsigned long __generic_copy_to_user(void *, const void *, unsigned long);
-unsigned long __generic_copy_from_user(void *, const void *, unsigned long);
+unsigned long __generic_copy_to_user(void __user *, const void *, unsigned long);
+unsigned long __generic_copy_from_user(void *, const void __user *, unsigned long);
 
 /**
  * __copy_to_user: - Copy a block of data into user space, with less checking.
-- 
cgit v1.2.3


From 9356b8fc07dc126cd91d2b12f314d760ab48996e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 27 Sep 2005 15:23:16 -0700
Subject: [NET]: Reorder some hot fields of struct net_device

Place them on separate cache lines in SMP to lower memory bouncing
between multiple CPU accessing the device.

     - One part is mostly used on receive path (including
       eth_type_trans()) (poll_list, poll, quota, weight, last_rx,
       dev_addr, broadcast)

     - One part is mostly used on queue transmit path (qdisc)
      (queue_lock, qdisc, qdisc_sleeping, qdisc_list, tx_queue_len)

     - One part is mostly used on xmit path (device)
      (xmit_lock, xmit_lock_owner, priv, hard_start_xmit, trans_start)

'features' is placed outside of these hot points, in a location that
may be shared by all cpus (because mostly read)

name_hlist is moved close to name[IFNAMSIZ] to speedup __dev_get_by_name()

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 89 +++++++++++++++++++++++++++++------------------
 1 file changed, 55 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7c717907896d..368e4c825ff1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -265,6 +265,8 @@ struct net_device
 	 * the interface.
 	 */
 	char			name[IFNAMSIZ];
+	/* device name hash chain */
+	struct hlist_node	name_hlist;
 
 	/*
 	 *	I/O specific fields
@@ -292,6 +294,21 @@ struct net_device
 
 	/* ------- Fields preinitialized in Space.c finish here ------- */
 
+	/* Net device features */
+	unsigned long		features;
+#define NETIF_F_SG		1	/* Scatter/gather IO. */
+#define NETIF_F_IP_CSUM		2	/* Can checksum only TCP/UDP over IPv4. */
+#define NETIF_F_NO_CSUM		4	/* Does not require checksum. F.e. loopack. */
+#define NETIF_F_HW_CSUM		8	/* Can checksum all the packets. */
+#define NETIF_F_HIGHDMA		32	/* Can DMA to high memory. */
+#define NETIF_F_FRAGLIST	64	/* Scatter/gather IO. */
+#define NETIF_F_HW_VLAN_TX	128	/* Transmit VLAN hw acceleration */
+#define NETIF_F_HW_VLAN_RX	256	/* Receive VLAN hw acceleration */
+#define NETIF_F_HW_VLAN_FILTER	512	/* Receive filtering on VLAN */
+#define NETIF_F_VLAN_CHALLENGED	1024	/* Device cannot handle VLAN packets */
+#define NETIF_F_TSO		2048	/* Can offload TCP/IP segmentation */
+#define NETIF_F_LLTX		4096	/* LockLess TX */
+
 	struct net_device	*next_sched;
 
 	/* Interface index. Unique device identifier	*/
@@ -316,9 +333,6 @@ struct net_device
 	 * will (read: may be cleaned up at will).
 	 */
 
-	/* These may be needed for future network-power-down code. */
-	unsigned long		trans_start;	/* Time (in jiffies) of last Tx	*/
-	unsigned long		last_rx;	/* Time of last Rx	*/
 
 	unsigned short		flags;	/* interface flags (a la BSD)	*/
 	unsigned short		gflags;
@@ -328,15 +342,12 @@ struct net_device
 	unsigned		mtu;	/* interface MTU value		*/
 	unsigned short		type;	/* interface hardware type	*/
 	unsigned short		hard_header_len;	/* hardware hdr length	*/
-	void			*priv;	/* pointer to private data	*/
 
 	struct net_device	*master; /* Pointer to master device of a group,
 					  * which this device is member of.
 					  */
 
 	/* Interface address info. */
-	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
-	unsigned char		dev_addr[MAX_ADDR_LEN];	/* hw address	*/
 	unsigned char		perm_addr[MAX_ADDR_LEN]; /* permanent hw address */
 	unsigned char		addr_len;	/* hardware address length	*/
 	unsigned short          dev_id;		/* for shared network cards */
@@ -346,8 +357,6 @@ struct net_device
 	int			promiscuity;
 	int			allmulti;
 
-	int			watchdog_timeo;
-	struct timer_list	watchdog_timer;
 
 	/* Protocol specific pointers */
 	
@@ -358,32 +367,62 @@ struct net_device
 	void			*ec_ptr;	/* Econet specific data	*/
 	void			*ax25_ptr;	/* AX.25 specific data */
 
-	struct list_head	poll_list;	/* Link to poll list	*/
+/*
+ * Cache line mostly used on receive path (including eth_type_trans())
+ */
+	struct list_head	poll_list ____cacheline_aligned_in_smp;
+					/* Link to poll list	*/
+
+	int			(*poll) (struct net_device *dev, int *quota);
 	int			quota;
 	int			weight;
+	unsigned long		last_rx;	/* Time of last Rx	*/
+	/* Interface address info used in eth_type_trans() */
+	unsigned char		dev_addr[MAX_ADDR_LEN];	/* hw address, (before bcast 
+							because most packets are unicast) */
+
+	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
 
+/*
+ * Cache line mostly used on queue transmit path (qdisc)
+ */
+	/* device queue lock */
+	spinlock_t		queue_lock ____cacheline_aligned_in_smp;
 	struct Qdisc		*qdisc;
 	struct Qdisc		*qdisc_sleeping;
-	struct Qdisc		*qdisc_ingress;
 	struct list_head	qdisc_list;
 	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
 
 	/* ingress path synchronizer */
 	spinlock_t		ingress_lock;
+	struct Qdisc		*qdisc_ingress;
+
+/*
+ * One part is mostly used on xmit path (device)
+ */
 	/* hard_start_xmit synchronizer */
-	spinlock_t		xmit_lock;
+	spinlock_t		xmit_lock ____cacheline_aligned_in_smp;
 	/* cpu id of processor entered to hard_start_xmit or -1,
 	   if nobody entered there.
 	 */
 	int			xmit_lock_owner;
-	/* device queue lock */
-	spinlock_t		queue_lock;
+	void			*priv;	/* pointer to private data	*/
+	int			(*hard_start_xmit) (struct sk_buff *skb,
+						    struct net_device *dev);
+	/* These may be needed for future network-power-down code. */
+	unsigned long		trans_start;	/* Time (in jiffies) of last Tx	*/
+
+	int			watchdog_timeo; /* used by dev_watchdog() */
+	struct timer_list	watchdog_timer;
+
+/*
+ * refcnt is a very hot point, so align it on SMP
+ */
 	/* Number of references to this device */
-	atomic_t		refcnt;
+	atomic_t		refcnt ____cacheline_aligned_in_smp;
+
 	/* delayed register/unregister */
 	struct list_head	todo_list;
-	/* device name hash chain */
-	struct hlist_node	name_hlist;
 	/* device index hash chain */
 	struct hlist_node	index_hlist;
 
@@ -396,21 +435,6 @@ struct net_device
 	       NETREG_RELEASED,		/* called free_netdev */
 	} reg_state;
 
-	/* Net device features */
-	unsigned long		features;
-#define NETIF_F_SG		1	/* Scatter/gather IO. */
-#define NETIF_F_IP_CSUM		2	/* Can checksum only TCP/UDP over IPv4. */
-#define NETIF_F_NO_CSUM		4	/* Does not require checksum. F.e. loopack. */
-#define NETIF_F_HW_CSUM		8	/* Can checksum all the packets. */
-#define NETIF_F_HIGHDMA		32	/* Can DMA to high memory. */
-#define NETIF_F_FRAGLIST	64	/* Scatter/gather IO. */
-#define NETIF_F_HW_VLAN_TX	128	/* Transmit VLAN hw acceleration */
-#define NETIF_F_HW_VLAN_RX	256	/* Receive VLAN hw acceleration */
-#define NETIF_F_HW_VLAN_FILTER	512	/* Receive filtering on VLAN */
-#define NETIF_F_VLAN_CHALLENGED	1024	/* Device cannot handle VLAN packets */
-#define NETIF_F_TSO		2048	/* Can offload TCP/IP segmentation */
-#define NETIF_F_LLTX		4096	/* LockLess TX */
-
 	/* Called after device is detached from network. */
 	void			(*uninit)(struct net_device *dev);
 	/* Called after last user reference disappears. */
@@ -419,10 +443,7 @@ struct net_device
 	/* Pointers to interface service routines.	*/
 	int			(*open)(struct net_device *dev);
 	int			(*stop)(struct net_device *dev);
-	int			(*hard_start_xmit) (struct sk_buff *skb,
-						    struct net_device *dev);
 #define HAVE_NETDEV_POLL
-	int			(*poll) (struct net_device *dev, int *quota);
 	int			(*hard_header) (struct sk_buff *skb,
 						struct net_device *dev,
 						unsigned short type,
-- 
cgit v1.2.3


From 1f26dac32057baaf67d10b45c6b5277db862911d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 27 Sep 2005 15:24:13 -0700
Subject: [NET]: Add Sun Cassini driver.

Written by Adrian Sun (asun@darksunrising.com).
Ported to 2.6.x by Tom 'spot' Callaway <tcallawa@redhat.com>.
Further cleaned up and integrated by David S. Miller

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig     |    8 +
 drivers/net/Makefile    |    1 +
 drivers/net/cassini.c   | 5311 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/net/cassini.h   | 4425 +++++++++++++++++++++++++++++++++++++++
 include/linux/pci_ids.h |    2 +
 5 files changed, 9747 insertions(+)
 create mode 100644 drivers/net/cassini.c
 create mode 100644 drivers/net/cassini.h

(limited to 'include')

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 96f14ab1c1f5..2a908c4690a7 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -548,6 +548,14 @@ config SUNGEM
 	  Support for the Sun GEM chip, aka Sun GigabitEthernet/P 2.0.  See also
 	  <http://www.sun.com/products-n-solutions/hardware/docs/pdf/806-3985-10.pdf>.
 
+config CASSINI
+	tristate "Sun Cassini support"
+	depends on NET_ETHERNET && PCI
+	select CRC32
+	help
+	  Support for the Sun Cassini chip, aka Sun GigaSwift Ethernet. See also
+	  <http://www.sun.com/products-n-solutions/hardware/docs/pdf/817-4341-10.pdf>
+
 config NET_VENDOR_3COM
 	bool "3COM cards"
 	depends on NET_ETHERNET && (ISA || EISA || MCA || PCI)
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 8645c843cf4d..8aeec9f2495b 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_SUNQE) += sunqe.o
 obj-$(CONFIG_SUNBMAC) += sunbmac.o
 obj-$(CONFIG_MYRI_SBUS) += myri_sbus.o
 obj-$(CONFIG_SUNGEM) += sungem.o sungem_phy.o
+obj-$(CONFIG_CASSINI) += cassini.o
 
 obj-$(CONFIG_MACE) += mace.o
 obj-$(CONFIG_BMAC) += bmac.o
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
new file mode 100644
index 000000000000..69cb368247e7
--- /dev/null
+++ b/drivers/net/cassini.c
@@ -0,0 +1,5311 @@
+/* cassini.c: Sun Microsystems Cassini(+) ethernet driver.
+ *
+ * Copyright (C) 2004 Sun Microsystems Inc.
+ * Copyright (C) 2003 Adrian Sun (asun@darksunrising.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ *
+ * This driver uses the sungem driver (c) David Miller
+ * (davem@redhat.com) as its basis.
+ *
+ * The cassini chip has a number of features that distinguish it from
+ * the gem chip:
+ *  4 transmit descriptor rings that are used for either QoS (VLAN) or
+ *      load balancing (non-VLAN mode)
+ *  batching of multiple packets
+ *  multiple CPU dispatching
+ *  page-based RX descriptor engine with separate completion rings
+ *  Gigabit support (GMII and PCS interface)
+ *  MIF link up/down detection works
+ *
+ * RX is handled by page sized buffers that are attached as fragments to
+ * the skb. here's what's done:
+ *  -- driver allocates pages at a time and keeps reference counts
+ *     on them.
+ *  -- the upper protocol layers assume that the header is in the skb
+ *     itself. as a result, cassini will copy a small amount (64 bytes)
+ *     to make them happy.
+ *  -- driver appends the rest of the data pages as frags to skbuffs
+ *     and increments the reference count
+ *  -- on page reclamation, the driver swaps the page with a spare page.
+ *     if that page is still in use, it frees its reference to that page,
+ *     and allocates a new page for use. otherwise, it just recycles the
+ *     the page. 
+ *
+ * NOTE: cassini can parse the header. however, it's not worth it
+ *       as long as the network stack requires a header copy.
+ *
+ * TX has 4 queues. currently these queues are used in a round-robin
+ * fashion for load balancing. They can also be used for QoS. for that
+ * to work, however, QoS information needs to be exposed down to the driver
+ * level so that subqueues get targetted to particular transmit rings.
+ * alternatively, the queues can be configured via use of the all-purpose
+ * ioctl.
+ *
+ * RX DATA: the rx completion ring has all the info, but the rx desc
+ * ring has all of the data. RX can conceivably come in under multiple
+ * interrupts, but the INT# assignment needs to be set up properly by
+ * the BIOS and conveyed to the driver. PCI BIOSes don't know how to do
+ * that. also, the two descriptor rings are designed to distinguish between
+ * encrypted and non-encrypted packets, but we use them for buffering 
+ * instead.
+ *
+ * by default, the selective clear mask is set up to process rx packets.  
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/pci.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/list.h>
+#include <linux/dma-mapping.h>
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/ethtool.h>
+#include <linux/crc32.h>
+#include <linux/random.h>
+#include <linux/mii.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+
+#include <net/checksum.h>
+
+#include <asm/atomic.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/byteorder.h>
+#include <asm/uaccess.h>
+
+#define cas_page_map(x)      kmap_atomic((x), KM_SKB_DATA_SOFTIRQ)
+#define cas_page_unmap(x)    kunmap_atomic((x), KM_SKB_DATA_SOFTIRQ)
+#define CAS_NCPUS            num_online_cpus()
+
+#if defined(CONFIG_CASSINI_NAPI) && defined(HAVE_NETDEV_POLL)
+#define USE_NAPI
+#define cas_skb_release(x)  netif_receive_skb(x)
+#else
+#define cas_skb_release(x)  netif_rx(x)
+#endif
+
+/* select which firmware to use */
+#define USE_HP_WORKAROUND     
+#define HP_WORKAROUND_DEFAULT /* select which firmware to use as default */
+#define CAS_HP_ALT_FIRMWARE   cas_prog_null /* alternate firmware */
+
+#include "cassini.h"
+
+#define USE_TX_COMPWB      /* use completion writeback registers */
+#define USE_CSMA_CD_PROTO  /* standard CSMA/CD */
+#define USE_RX_BLANK       /* hw interrupt mitigation */
+#undef USE_ENTROPY_DEV     /* don't test for entropy device */
+
+/* NOTE: these aren't useable unless PCI interrupts can be assigned.
+ * also, we need to make cp->lock finer-grained.
+ */
+#undef  USE_PCI_INTB
+#undef  USE_PCI_INTC
+#undef  USE_PCI_INTD
+#undef  USE_QOS
+
+#undef  USE_VPD_DEBUG       /* debug vpd information if defined */
+
+/* rx processing options */
+#define USE_PAGE_ORDER      /* specify to allocate large rx pages */
+#define RX_DONT_BATCH  0    /* if 1, don't batch flows */
+#define RX_COPY_ALWAYS 0    /* if 0, use frags */
+#define RX_COPY_MIN    64   /* copy a little to make upper layers happy */
+#undef  RX_COUNT_BUFFERS    /* define to calculate RX buffer stats */
+
+#define DRV_MODULE_NAME		"cassini"
+#define PFX DRV_MODULE_NAME	": "
+#define DRV_MODULE_VERSION	"1.4"
+#define DRV_MODULE_RELDATE	"1 July 2004"
+
+#define CAS_DEF_MSG_ENABLE	  \
+	(NETIF_MSG_DRV		| \
+	 NETIF_MSG_PROBE	| \
+	 NETIF_MSG_LINK		| \
+	 NETIF_MSG_TIMER	| \
+	 NETIF_MSG_IFDOWN	| \
+	 NETIF_MSG_IFUP		| \
+	 NETIF_MSG_RX_ERR	| \
+	 NETIF_MSG_TX_ERR)
+
+/* length of time before we decide the hardware is borked,
+ * and dev->tx_timeout() should be called to fix the problem
+ */
+#define CAS_TX_TIMEOUT			(HZ)
+#define CAS_LINK_TIMEOUT                (22*HZ/10)
+#define CAS_LINK_FAST_TIMEOUT           (1)
+
+/* timeout values for state changing. these specify the number
+ * of 10us delays to be used before giving up.
+ */
+#define STOP_TRIES_PHY 1000
+#define STOP_TRIES     5000
+
+/* specify a minimum frame size to deal with some fifo issues 
+ * max mtu == 2 * page size - ethernet header - 64 - swivel =
+ *            2 * page_size - 0x50
+ */
+#define CAS_MIN_FRAME			97
+#define CAS_1000MB_MIN_FRAME            255
+#define CAS_MIN_MTU                     60
+#define CAS_MAX_MTU                     min(((cp->page_size << 1) - 0x50), 9000)
+
+#if 1
+/*
+ * Eliminate these and use separate atomic counters for each, to
+ * avoid a race condition.
+ */
+#else
+#define CAS_RESET_MTU                   1
+#define CAS_RESET_ALL                   2
+#define CAS_RESET_SPARE                 3
+#endif
+
+static char version[] __devinitdata =
+	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
+
+MODULE_AUTHOR("Adrian Sun (asun@darksunrising.com)");
+MODULE_DESCRIPTION("Sun Cassini(+) ethernet driver");
+MODULE_LICENSE("GPL");
+MODULE_PARM(cassini_debug, "i");
+MODULE_PARM_DESC(cassini_debug, "Cassini bitmapped debugging message enable value");
+MODULE_PARM(link_mode, "i");
+MODULE_PARM_DESC(link_mode, "default link mode");
+
+/*
+ * Work around for a PCS bug in which the link goes down due to the chip
+ * being confused and never showing a link status of "up."
+ */
+#define DEFAULT_LINKDOWN_TIMEOUT 5
+/* 
+ * Value in seconds, for user input.
+ */
+static int linkdown_timeout = DEFAULT_LINKDOWN_TIMEOUT;
+MODULE_PARM(linkdown_timeout, "i");
+MODULE_PARM_DESC(linkdown_timeout,
+"min reset interval in sec. for PCS linkdown issue; disabled if not positive");
+
+/*
+ * value in 'ticks' (units used by jiffies). Set when we init the
+ * module because 'HZ' in actually a function call on some flavors of
+ * Linux.  This will default to DEFAULT_LINKDOWN_TIMEOUT * HZ.
+ */
+static int link_transition_timeout;
+
+
+static int cassini_debug = -1;	/* -1 == use CAS_DEF_MSG_ENABLE as value */
+static int link_mode;
+
+static u16 link_modes[] __devinitdata = {
+	BMCR_ANENABLE,			 /* 0 : autoneg */
+	0,				 /* 1 : 10bt half duplex */
+	BMCR_SPEED100,			 /* 2 : 100bt half duplex */
+	BMCR_FULLDPLX,			 /* 3 : 10bt full duplex */
+	BMCR_SPEED100|BMCR_FULLDPLX,	 /* 4 : 100bt full duplex */
+	CAS_BMCR_SPEED1000|BMCR_FULLDPLX /* 5 : 1000bt full duplex */
+};
+
+static struct pci_device_id cas_pci_tbl[] __devinitdata = {
+	{ PCI_VENDOR_ID_SUN, PCI_DEVICE_ID_SUN_CASSINI,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+	{ PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SATURN,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+	{ 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, cas_pci_tbl);
+
+static void cas_set_link_modes(struct cas *cp);
+
+static inline void cas_lock_tx(struct cas *cp)
+{
+	int i;
+
+	for (i = 0; i < N_TX_RINGS; i++)  
+		spin_lock(&cp->tx_lock[i]);
+}
+
+static inline void cas_lock_all(struct cas *cp)
+{
+	spin_lock_irq(&cp->lock);
+	cas_lock_tx(cp);
+}
+
+/* WTZ: QA was finding deadlock problems with the previous
+ * versions after long test runs with multiple cards per machine.
+ * See if replacing cas_lock_all with safer versions helps. The
+ * symptoms QA is reporting match those we'd expect if interrupts
+ * aren't being properly restored, and we fixed a previous deadlock
+ * with similar symptoms by using save/restore versions in other
+ * places.
+ */
+#define cas_lock_all_save(cp, flags) \
+do { \
+	struct cas *xxxcp = (cp); \
+	spin_lock_irqsave(&xxxcp->lock, flags); \
+	cas_lock_tx(xxxcp); \
+} while (0)
+
+static inline void cas_unlock_tx(struct cas *cp)
+{
+	int i;
+
+	for (i = N_TX_RINGS; i > 0; i--)  
+		spin_unlock(&cp->tx_lock[i - 1]);  
+}
+
+static inline void cas_unlock_all(struct cas *cp)
+{
+	cas_unlock_tx(cp);
+	spin_unlock_irq(&cp->lock);
+}
+
+#define cas_unlock_all_restore(cp, flags) \
+do { \
+	struct cas *xxxcp = (cp); \
+	cas_unlock_tx(xxxcp); \
+	spin_unlock_irqrestore(&xxxcp->lock, flags); \
+} while (0)
+
+static void cas_disable_irq(struct cas *cp, const int ring)
+{
+	/* Make sure we won't get any more interrupts */
+	if (ring == 0) {
+		writel(0xFFFFFFFF, cp->regs + REG_INTR_MASK);
+		return;
+	}
+
+	/* disable completion interrupts and selectively mask */
+	if (cp->cas_flags & CAS_FLAG_REG_PLUS) {
+		switch (ring) {
+#if defined (USE_PCI_INTB) || defined(USE_PCI_INTC) || defined(USE_PCI_INTD)
+#ifdef USE_PCI_INTB
+		case 1:
+#endif
+#ifdef USE_PCI_INTC
+		case 2:
+#endif
+#ifdef USE_PCI_INTD
+		case 3:
+#endif
+			writel(INTRN_MASK_CLEAR_ALL | INTRN_MASK_RX_EN, 
+			       cp->regs + REG_PLUS_INTRN_MASK(ring));
+			break;
+#endif
+		default:
+			writel(INTRN_MASK_CLEAR_ALL, cp->regs +
+			       REG_PLUS_INTRN_MASK(ring));
+			break;
+		}
+	}
+}
+
+static inline void cas_mask_intr(struct cas *cp)
+{
+	int i;
+
+	for (i = 0; i < N_RX_COMP_RINGS; i++)
+		cas_disable_irq(cp, i);
+}
+
+static void cas_enable_irq(struct cas *cp, const int ring)
+{
+	if (ring == 0) { /* all but TX_DONE */
+		writel(INTR_TX_DONE, cp->regs + REG_INTR_MASK);
+		return;
+	}
+
+	if (cp->cas_flags & CAS_FLAG_REG_PLUS) {
+		switch (ring) {
+#if defined (USE_PCI_INTB) || defined(USE_PCI_INTC) || defined(USE_PCI_INTD)
+#ifdef USE_PCI_INTB
+		case 1:
+#endif
+#ifdef USE_PCI_INTC
+		case 2:
+#endif
+#ifdef USE_PCI_INTD
+		case 3:
+#endif
+			writel(INTRN_MASK_RX_EN, cp->regs +
+			       REG_PLUS_INTRN_MASK(ring));
+			break;
+#endif
+		default:
+			break;
+		}
+	}
+}
+
+static inline void cas_unmask_intr(struct cas *cp)
+{
+	int i;
+
+	for (i = 0; i < N_RX_COMP_RINGS; i++)
+		cas_enable_irq(cp, i);
+}
+
+static inline void cas_entropy_gather(struct cas *cp)
+{
+#ifdef USE_ENTROPY_DEV
+	if ((cp->cas_flags & CAS_FLAG_ENTROPY_DEV) == 0)
+		return;
+
+	batch_entropy_store(readl(cp->regs + REG_ENTROPY_IV),
+			    readl(cp->regs + REG_ENTROPY_IV),
+			    sizeof(uint64_t)*8);
+#endif
+}
+
+static inline void cas_entropy_reset(struct cas *cp)
+{
+#ifdef USE_ENTROPY_DEV
+	if ((cp->cas_flags & CAS_FLAG_ENTROPY_DEV) == 0)
+		return;
+
+	writel(BIM_LOCAL_DEV_PAD | BIM_LOCAL_DEV_PROM | BIM_LOCAL_DEV_EXT, 
+	       cp->regs + REG_BIM_LOCAL_DEV_EN);
+	writeb(ENTROPY_RESET_STC_MODE, cp->regs + REG_ENTROPY_RESET);
+	writeb(0x55, cp->regs + REG_ENTROPY_RAND_REG);
+
+	/* if we read back 0x0, we don't have an entropy device */
+	if (readb(cp->regs + REG_ENTROPY_RAND_REG) == 0)
+		cp->cas_flags &= ~CAS_FLAG_ENTROPY_DEV;
+#endif
+}
+
+/* access to the phy. the following assumes that we've initialized the MIF to 
+ * be in frame rather than bit-bang mode
+ */
+static u16 cas_phy_read(struct cas *cp, int reg)
+{
+	u32 cmd;
+	int limit = STOP_TRIES_PHY;
+
+	cmd = MIF_FRAME_ST | MIF_FRAME_OP_READ;
+	cmd |= CAS_BASE(MIF_FRAME_PHY_ADDR, cp->phy_addr);
+	cmd |= CAS_BASE(MIF_FRAME_REG_ADDR, reg);
+	cmd |= MIF_FRAME_TURN_AROUND_MSB;
+	writel(cmd, cp->regs + REG_MIF_FRAME);
+	
+	/* poll for completion */
+	while (limit-- > 0) {
+		udelay(10);
+		cmd = readl(cp->regs + REG_MIF_FRAME);
+		if (cmd & MIF_FRAME_TURN_AROUND_LSB)
+			return (cmd & MIF_FRAME_DATA_MASK);
+	}
+	return 0xFFFF; /* -1 */
+}
+
+static int cas_phy_write(struct cas *cp, int reg, u16 val)
+{
+	int limit = STOP_TRIES_PHY;
+	u32 cmd;
+
+	cmd = MIF_FRAME_ST | MIF_FRAME_OP_WRITE;
+	cmd |= CAS_BASE(MIF_FRAME_PHY_ADDR, cp->phy_addr);
+	cmd |= CAS_BASE(MIF_FRAME_REG_ADDR, reg);
+	cmd |= MIF_FRAME_TURN_AROUND_MSB;
+	cmd |= val & MIF_FRAME_DATA_MASK;
+	writel(cmd, cp->regs + REG_MIF_FRAME);
+	
+	/* poll for completion */
+	while (limit-- > 0) {
+		udelay(10);
+		cmd = readl(cp->regs + REG_MIF_FRAME);
+		if (cmd & MIF_FRAME_TURN_AROUND_LSB)
+			return 0;
+	}
+	return -1;
+}
+
+static void cas_phy_powerup(struct cas *cp)
+{
+	u16 ctl = cas_phy_read(cp, MII_BMCR);	
+
+	if ((ctl & BMCR_PDOWN) == 0)
+		return;
+	ctl &= ~BMCR_PDOWN;
+	cas_phy_write(cp, MII_BMCR, ctl);
+}
+
+static void cas_phy_powerdown(struct cas *cp)
+{
+	u16 ctl = cas_phy_read(cp, MII_BMCR);	
+
+	if (ctl & BMCR_PDOWN)
+		return;
+	ctl |= BMCR_PDOWN;
+	cas_phy_write(cp, MII_BMCR, ctl);
+}
+
+/* cp->lock held. note: the last put_page will free the buffer */
+static int cas_page_free(struct cas *cp, cas_page_t *page)
+{
+	pci_unmap_page(cp->pdev, page->dma_addr, cp->page_size, 
+		       PCI_DMA_FROMDEVICE);
+	__free_pages(page->buffer, cp->page_order);
+	kfree(page);
+	return 0;
+}
+
+#ifdef RX_COUNT_BUFFERS
+#define RX_USED_ADD(x, y)       ((x)->used += (y))
+#define RX_USED_SET(x, y)       ((x)->used  = (y))
+#else
+#define RX_USED_ADD(x, y) 
+#define RX_USED_SET(x, y)
+#endif
+
+/* local page allocation routines for the receive buffers. jumbo pages
+ * require at least 8K contiguous and 8K aligned buffers.
+ */
+static cas_page_t *cas_page_alloc(struct cas *cp, const int flags)
+{
+	cas_page_t *page;
+
+	page = kmalloc(sizeof(cas_page_t), flags);
+	if (!page)
+		return NULL;
+
+	INIT_LIST_HEAD(&page->list);
+	RX_USED_SET(page, 0);
+	page->buffer = alloc_pages(flags, cp->page_order);
+	if (!page->buffer)
+		goto page_err;
+	page->dma_addr = pci_map_page(cp->pdev, page->buffer, 0,
+				      cp->page_size, PCI_DMA_FROMDEVICE);
+	return page;
+
+page_err:
+	kfree(page);
+	return NULL;
+}
+
+/* initialize spare pool of rx buffers, but allocate during the open */
+static void cas_spare_init(struct cas *cp)
+{
+  	spin_lock(&cp->rx_inuse_lock);
+	INIT_LIST_HEAD(&cp->rx_inuse_list);
+	spin_unlock(&cp->rx_inuse_lock);
+
+	spin_lock(&cp->rx_spare_lock);
+	INIT_LIST_HEAD(&cp->rx_spare_list);
+	cp->rx_spares_needed = RX_SPARE_COUNT;
+	spin_unlock(&cp->rx_spare_lock);
+}
+
+/* used on close. free all the spare buffers. */
+static void cas_spare_free(struct cas *cp)
+{
+	struct list_head list, *elem, *tmp;
+
+	/* free spare buffers */
+	INIT_LIST_HEAD(&list);
+	spin_lock(&cp->rx_spare_lock);
+	list_splice(&cp->rx_spare_list, &list);
+	INIT_LIST_HEAD(&cp->rx_spare_list);
+	spin_unlock(&cp->rx_spare_lock);
+	list_for_each_safe(elem, tmp, &list) {
+		cas_page_free(cp, list_entry(elem, cas_page_t, list));
+	}
+
+	INIT_LIST_HEAD(&list);
+#if 1
+	/*
+	 * Looks like Adrian had protected this with a different
+	 * lock than used everywhere else to manipulate this list.
+	 */
+	spin_lock(&cp->rx_inuse_lock);
+	list_splice(&cp->rx_inuse_list, &list);
+	INIT_LIST_HEAD(&cp->rx_inuse_list);
+	spin_unlock(&cp->rx_inuse_lock);
+#else
+	spin_lock(&cp->rx_spare_lock);
+	list_splice(&cp->rx_inuse_list, &list);
+	INIT_LIST_HEAD(&cp->rx_inuse_list);
+	spin_unlock(&cp->rx_spare_lock);
+#endif
+	list_for_each_safe(elem, tmp, &list) {
+		cas_page_free(cp, list_entry(elem, cas_page_t, list));
+	}
+}
+
+/* replenish spares if needed */
+static void cas_spare_recover(struct cas *cp, const int flags)
+{
+	struct list_head list, *elem, *tmp;
+	int needed, i;
+
+	/* check inuse list. if we don't need any more free buffers,
+	 * just free it
+	 */
+
+	/* make a local copy of the list */
+	INIT_LIST_HEAD(&list);
+	spin_lock(&cp->rx_inuse_lock);
+	list_splice(&cp->rx_inuse_list, &list);
+	INIT_LIST_HEAD(&cp->rx_inuse_list);
+	spin_unlock(&cp->rx_inuse_lock);
+	
+	list_for_each_safe(elem, tmp, &list) {
+		cas_page_t *page = list_entry(elem, cas_page_t, list);
+
+		if (page_count(page->buffer) > 1) 
+			continue;
+
+		list_del(elem);
+		spin_lock(&cp->rx_spare_lock);
+		if (cp->rx_spares_needed > 0) {
+			list_add(elem, &cp->rx_spare_list);
+			cp->rx_spares_needed--;
+			spin_unlock(&cp->rx_spare_lock);
+		} else {
+			spin_unlock(&cp->rx_spare_lock);
+			cas_page_free(cp, page);
+		}
+	}
+
+	/* put any inuse buffers back on the list */
+	if (!list_empty(&list)) {
+		spin_lock(&cp->rx_inuse_lock);
+		list_splice(&list, &cp->rx_inuse_list);
+		spin_unlock(&cp->rx_inuse_lock);
+	}
+	
+	spin_lock(&cp->rx_spare_lock);
+	needed = cp->rx_spares_needed;
+	spin_unlock(&cp->rx_spare_lock);
+	if (!needed)
+		return;
+
+	/* we still need spares, so try to allocate some */
+	INIT_LIST_HEAD(&list);
+	i = 0;
+	while (i < needed) {
+		cas_page_t *spare = cas_page_alloc(cp, flags);
+		if (!spare) 
+			break;
+		list_add(&spare->list, &list);
+		i++;
+	}
+
+	spin_lock(&cp->rx_spare_lock);
+	list_splice(&list, &cp->rx_spare_list);
+	cp->rx_spares_needed -= i;
+	spin_unlock(&cp->rx_spare_lock);
+}
+
+/* pull a page from the list. */
+static cas_page_t *cas_page_dequeue(struct cas *cp)
+{
+	struct list_head *entry;
+	int recover;
+
+	spin_lock(&cp->rx_spare_lock);
+	if (list_empty(&cp->rx_spare_list)) {
+		/* try to do a quick recovery */
+		spin_unlock(&cp->rx_spare_lock);
+		cas_spare_recover(cp, GFP_ATOMIC);
+		spin_lock(&cp->rx_spare_lock);
+		if (list_empty(&cp->rx_spare_list)) {
+			if (netif_msg_rx_err(cp))
+				printk(KERN_ERR "%s: no spare buffers "
+				       "available.\n", cp->dev->name);
+			spin_unlock(&cp->rx_spare_lock);
+			return NULL;
+		}
+	}
+
+	entry = cp->rx_spare_list.next;
+	list_del(entry);
+	recover = ++cp->rx_spares_needed;
+	spin_unlock(&cp->rx_spare_lock);
+
+	/* trigger the timer to do the recovery */
+	if ((recover & (RX_SPARE_RECOVER_VAL - 1)) == 0) {
+#if 1
+		atomic_inc(&cp->reset_task_pending);
+		atomic_inc(&cp->reset_task_pending_spare);
+		schedule_work(&cp->reset_task);
+#else
+		atomic_set(&cp->reset_task_pending, CAS_RESET_SPARE);
+		schedule_work(&cp->reset_task);
+#endif
+	}
+	return list_entry(entry, cas_page_t, list);
+}
+
+
+static void cas_mif_poll(struct cas *cp, const int enable)
+{
+	u32 cfg;
+	
+	cfg  = readl(cp->regs + REG_MIF_CFG); 
+	cfg &= (MIF_CFG_MDIO_0 | MIF_CFG_MDIO_1);
+
+	if (cp->phy_type & CAS_PHY_MII_MDIO1)
+		cfg |= MIF_CFG_PHY_SELECT; 
+
+	/* poll and interrupt on link status change. */
+	if (enable) {
+		cfg |= MIF_CFG_POLL_EN;
+		cfg |= CAS_BASE(MIF_CFG_POLL_REG, MII_BMSR);
+		cfg |= CAS_BASE(MIF_CFG_POLL_PHY, cp->phy_addr);
+	}
+	writel((enable) ? ~(BMSR_LSTATUS | BMSR_ANEGCOMPLETE) : 0xFFFF, 
+	       cp->regs + REG_MIF_MASK); 
+	writel(cfg, cp->regs + REG_MIF_CFG);
+}
+
+/* Must be invoked under cp->lock */
+static void cas_begin_auto_negotiation(struct cas *cp, struct ethtool_cmd *ep)
+{
+	u16 ctl;
+#if 1
+	int lcntl;
+	int changed = 0;
+	int oldstate = cp->lstate;
+	int link_was_not_down = !(oldstate == link_down);
+#endif
+	/* Setup link parameters */
+	if (!ep)
+		goto start_aneg;
+	lcntl = cp->link_cntl;
+	if (ep->autoneg == AUTONEG_ENABLE)
+		cp->link_cntl = BMCR_ANENABLE;
+	else {
+		cp->link_cntl = 0;
+		if (ep->speed == SPEED_100)
+			cp->link_cntl |= BMCR_SPEED100;
+		else if (ep->speed == SPEED_1000)
+			cp->link_cntl |= CAS_BMCR_SPEED1000;
+		if (ep->duplex == DUPLEX_FULL)
+			cp->link_cntl |= BMCR_FULLDPLX;
+	}
+#if 1
+	changed = (lcntl != cp->link_cntl);
+#endif
+start_aneg:
+	if (cp->lstate == link_up) {
+		printk(KERN_INFO "%s: PCS link down.\n",
+		       cp->dev->name);
+	} else {
+		if (changed) {
+			printk(KERN_INFO "%s: link configuration changed\n",
+			       cp->dev->name);
+		}
+	}
+	cp->lstate = link_down;
+	cp->link_transition = LINK_TRANSITION_LINK_DOWN;
+	if (!cp->hw_running)
+		return;
+#if 1
+	/*
+	 * WTZ: If the old state was link_up, we turn off the carrier
+	 * to replicate everything we do elsewhere on a link-down
+	 * event when we were already in a link-up state..  
+	 */
+	if (oldstate == link_up)
+		netif_carrier_off(cp->dev);
+	if (changed  && link_was_not_down) {
+		/*
+		 * WTZ: This branch will simply schedule a full reset after
+		 * we explicitly changed link modes in an ioctl. See if this
+		 * fixes the link-problems we were having for forced mode. 
+		 */
+		atomic_inc(&cp->reset_task_pending);
+		atomic_inc(&cp->reset_task_pending_all);
+		schedule_work(&cp->reset_task);
+		cp->timer_ticks = 0;
+		mod_timer(&cp->link_timer, jiffies + CAS_LINK_TIMEOUT);
+		return;
+	}
+#endif
+	if (cp->phy_type & CAS_PHY_SERDES) {
+		u32 val = readl(cp->regs + REG_PCS_MII_CTRL);
+
+		if (cp->link_cntl & BMCR_ANENABLE) {
+			val |= (PCS_MII_RESTART_AUTONEG | PCS_MII_AUTONEG_EN);
+			cp->lstate = link_aneg;
+		} else {
+			if (cp->link_cntl & BMCR_FULLDPLX)
+				val |= PCS_MII_CTRL_DUPLEX;
+			val &= ~PCS_MII_AUTONEG_EN;
+			cp->lstate = link_force_ok;
+		}
+		cp->link_transition = LINK_TRANSITION_LINK_CONFIG;
+		writel(val, cp->regs + REG_PCS_MII_CTRL);
+
+	} else {
+		cas_mif_poll(cp, 0);
+		ctl = cas_phy_read(cp, MII_BMCR);
+		ctl &= ~(BMCR_FULLDPLX | BMCR_SPEED100 | 
+			 CAS_BMCR_SPEED1000 | BMCR_ANENABLE);
+		ctl |= cp->link_cntl;
+		if (ctl & BMCR_ANENABLE) {
+			ctl |= BMCR_ANRESTART;
+			cp->lstate = link_aneg;
+		} else {
+			cp->lstate = link_force_ok;
+		}
+		cp->link_transition = LINK_TRANSITION_LINK_CONFIG;
+		cas_phy_write(cp, MII_BMCR, ctl);
+		cas_mif_poll(cp, 1);
+	}
+
+	cp->timer_ticks = 0;
+	mod_timer(&cp->link_timer, jiffies + CAS_LINK_TIMEOUT);
+}
+
+/* Must be invoked under cp->lock. */
+static int cas_reset_mii_phy(struct cas *cp)
+{
+	int limit = STOP_TRIES_PHY;
+	u16 val;
+	
+	cas_phy_write(cp, MII_BMCR, BMCR_RESET);
+	udelay(100);
+	while (limit--) {
+		val = cas_phy_read(cp, MII_BMCR);
+		if ((val & BMCR_RESET) == 0)
+			break;
+		udelay(10);
+	}
+	return (limit <= 0);
+}
+
+static void cas_saturn_firmware_load(struct cas *cp)
+{
+	cas_saturn_patch_t *patch = cas_saturn_patch;
+
+	cas_phy_powerdown(cp);
+
+	/* expanded memory access mode */
+	cas_phy_write(cp, DP83065_MII_MEM, 0x0);
+
+	/* pointer configuration for new firmware */
+	cas_phy_write(cp, DP83065_MII_REGE, 0x8ff9);
+	cas_phy_write(cp, DP83065_MII_REGD, 0xbd);
+	cas_phy_write(cp, DP83065_MII_REGE, 0x8ffa);
+	cas_phy_write(cp, DP83065_MII_REGD, 0x82);
+	cas_phy_write(cp, DP83065_MII_REGE, 0x8ffb);
+	cas_phy_write(cp, DP83065_MII_REGD, 0x0);
+	cas_phy_write(cp, DP83065_MII_REGE, 0x8ffc);
+	cas_phy_write(cp, DP83065_MII_REGD, 0x39);
+
+	/* download new firmware */
+	cas_phy_write(cp, DP83065_MII_MEM, 0x1);
+	cas_phy_write(cp, DP83065_MII_REGE, patch->addr);
+	while (patch->addr) {
+		cas_phy_write(cp, DP83065_MII_REGD, patch->val);
+		patch++;
+	}
+
+	/* enable firmware */
+	cas_phy_write(cp, DP83065_MII_REGE, 0x8ff8);
+	cas_phy_write(cp, DP83065_MII_REGD, 0x1);
+}
+
+
+/* phy initialization */
+static void cas_phy_init(struct cas *cp)
+{
+	u16 val;
+
+	/* if we're in MII/GMII mode, set up phy */
+	if (CAS_PHY_MII(cp->phy_type)) {
+		writel(PCS_DATAPATH_MODE_MII,
+		       cp->regs + REG_PCS_DATAPATH_MODE);
+
+		cas_mif_poll(cp, 0);
+		cas_reset_mii_phy(cp); /* take out of isolate mode */
+
+		if (PHY_LUCENT_B0 == cp->phy_id) {
+			/* workaround link up/down issue with lucent */
+			cas_phy_write(cp, LUCENT_MII_REG, 0x8000);
+			cas_phy_write(cp, MII_BMCR, 0x00f1);
+			cas_phy_write(cp, LUCENT_MII_REG, 0x0);
+
+		} else if (PHY_BROADCOM_B0 == (cp->phy_id & 0xFFFFFFFC)) {
+			/* workarounds for broadcom phy */
+			cas_phy_write(cp, BROADCOM_MII_REG8, 0x0C20);
+			cas_phy_write(cp, BROADCOM_MII_REG7, 0x0012);
+			cas_phy_write(cp, BROADCOM_MII_REG5, 0x1804);
+			cas_phy_write(cp, BROADCOM_MII_REG7, 0x0013);
+			cas_phy_write(cp, BROADCOM_MII_REG5, 0x1204);
+			cas_phy_write(cp, BROADCOM_MII_REG7, 0x8006);
+			cas_phy_write(cp, BROADCOM_MII_REG5, 0x0132);
+			cas_phy_write(cp, BROADCOM_MII_REG7, 0x8006);
+			cas_phy_write(cp, BROADCOM_MII_REG5, 0x0232);
+			cas_phy_write(cp, BROADCOM_MII_REG7, 0x201F);
+			cas_phy_write(cp, BROADCOM_MII_REG5, 0x0A20);
+
+		} else if (PHY_BROADCOM_5411 == cp->phy_id) {
+			val = cas_phy_read(cp, BROADCOM_MII_REG4);
+			val = cas_phy_read(cp, BROADCOM_MII_REG4);
+			if (val & 0x0080) {
+				/* link workaround */
+				cas_phy_write(cp, BROADCOM_MII_REG4, 
+					      val & ~0x0080);
+			}
+			
+		} else if (cp->cas_flags & CAS_FLAG_SATURN) {
+			writel((cp->phy_type & CAS_PHY_MII_MDIO0) ? 
+			       SATURN_PCFG_FSI : 0x0, 
+			       cp->regs + REG_SATURN_PCFG);
+
+			/* load firmware to address 10Mbps auto-negotiation
+			 * issue. NOTE: this will need to be changed if the 
+			 * default firmware gets fixed.
+			 */
+			if (PHY_NS_DP83065 == cp->phy_id) {
+				cas_saturn_firmware_load(cp);
+			}
+			cas_phy_powerup(cp);
+		}
+
+		/* advertise capabilities */
+		val = cas_phy_read(cp, MII_BMCR);
+		val &= ~BMCR_ANENABLE;
+		cas_phy_write(cp, MII_BMCR, val);
+		udelay(10);
+
+		cas_phy_write(cp, MII_ADVERTISE,
+			      cas_phy_read(cp, MII_ADVERTISE) |
+			      (ADVERTISE_10HALF | ADVERTISE_10FULL |
+			       ADVERTISE_100HALF | ADVERTISE_100FULL |
+			       CAS_ADVERTISE_PAUSE | 
+			       CAS_ADVERTISE_ASYM_PAUSE));
+		
+		if (cp->cas_flags & CAS_FLAG_1000MB_CAP) {
+			/* make sure that we don't advertise half
+			 * duplex to avoid a chip issue
+			 */
+			val  = cas_phy_read(cp, CAS_MII_1000_CTRL);
+			val &= ~CAS_ADVERTISE_1000HALF;
+			val |= CAS_ADVERTISE_1000FULL;
+			cas_phy_write(cp, CAS_MII_1000_CTRL, val);
+		}
+
+	} else {
+		/* reset pcs for serdes */
+		u32 val;
+		int limit;
+
+		writel(PCS_DATAPATH_MODE_SERDES,
+		       cp->regs + REG_PCS_DATAPATH_MODE);
+
+		/* enable serdes pins on saturn */
+		if (cp->cas_flags & CAS_FLAG_SATURN)
+			writel(0, cp->regs + REG_SATURN_PCFG);
+
+		/* Reset PCS unit. */
+		val = readl(cp->regs + REG_PCS_MII_CTRL);
+		val |= PCS_MII_RESET;
+		writel(val, cp->regs + REG_PCS_MII_CTRL);
+
+		limit = STOP_TRIES;
+		while (limit-- > 0) {
+			udelay(10);
+			if ((readl(cp->regs + REG_PCS_MII_CTRL) & 
+			     PCS_MII_RESET) == 0)
+				break;
+		}
+		if (limit <= 0)
+			printk(KERN_WARNING "%s: PCS reset bit would not "
+			       "clear [%08x].\n", cp->dev->name,
+			       readl(cp->regs + REG_PCS_STATE_MACHINE));
+
+		/* Make sure PCS is disabled while changing advertisement
+		 * configuration.
+		 */
+		writel(0x0, cp->regs + REG_PCS_CFG);
+
+		/* Advertise all capabilities except half-duplex. */
+		val  = readl(cp->regs + REG_PCS_MII_ADVERT);
+		val &= ~PCS_MII_ADVERT_HD;
+		val |= (PCS_MII_ADVERT_FD | PCS_MII_ADVERT_SYM_PAUSE | 
+			PCS_MII_ADVERT_ASYM_PAUSE);
+		writel(val, cp->regs + REG_PCS_MII_ADVERT);
+
+		/* enable PCS */
+		writel(PCS_CFG_EN, cp->regs + REG_PCS_CFG);
+
+		/* pcs workaround: enable sync detect */
+		writel(PCS_SERDES_CTRL_SYNCD_EN,
+		       cp->regs + REG_PCS_SERDES_CTRL);
+	}
+}
+
+
+static int cas_pcs_link_check(struct cas *cp)
+{
+	u32 stat, state_machine;
+	int retval = 0;
+
+	/* The link status bit latches on zero, so you must
+	 * read it twice in such a case to see a transition
+	 * to the link being up.
+	 */
+	stat = readl(cp->regs + REG_PCS_MII_STATUS);
+	if ((stat & PCS_MII_STATUS_LINK_STATUS) == 0)
+		stat = readl(cp->regs + REG_PCS_MII_STATUS);
+
+	/* The remote-fault indication is only valid
+	 * when autoneg has completed.
+	 */
+	if ((stat & (PCS_MII_STATUS_AUTONEG_COMP |
+		     PCS_MII_STATUS_REMOTE_FAULT)) ==
+	    (PCS_MII_STATUS_AUTONEG_COMP | PCS_MII_STATUS_REMOTE_FAULT)) {
+		if (netif_msg_link(cp))
+			printk(KERN_INFO "%s: PCS RemoteFault\n", 
+			       cp->dev->name);
+	}
+
+	/* work around link detection issue by querying the PCS state
+	 * machine directly.
+	 */
+	state_machine = readl(cp->regs + REG_PCS_STATE_MACHINE);
+	if ((state_machine & PCS_SM_LINK_STATE_MASK) != SM_LINK_STATE_UP) {
+		stat &= ~PCS_MII_STATUS_LINK_STATUS;
+	} else if (state_machine & PCS_SM_WORD_SYNC_STATE_MASK) {
+		stat |= PCS_MII_STATUS_LINK_STATUS;
+	}
+
+	if (stat & PCS_MII_STATUS_LINK_STATUS) {
+		if (cp->lstate != link_up) {
+			if (cp->opened) {
+				cp->lstate = link_up;
+				cp->link_transition = LINK_TRANSITION_LINK_UP;
+				
+				cas_set_link_modes(cp);
+				netif_carrier_on(cp->dev);
+			}
+		}
+	} else if (cp->lstate == link_up) {
+		cp->lstate = link_down;
+		if (link_transition_timeout != 0 &&
+		    cp->link_transition != LINK_TRANSITION_REQUESTED_RESET &&
+		    !cp->link_transition_jiffies_valid) {
+			/*
+			 * force a reset, as a workaround for the 
+			 * link-failure problem. May want to move this to a 
+			 * point a bit earlier in the sequence. If we had
+			 * generated a reset a short time ago, we'll wait for
+			 * the link timer to check the status until a
+			 * timer expires (link_transistion_jiffies_valid is
+			 * true when the timer is running.)  Instead of using
+			 * a system timer, we just do a check whenever the
+			 * link timer is running - this clears the flag after
+			 * a suitable delay.
+			 */
+			retval = 1;
+			cp->link_transition = LINK_TRANSITION_REQUESTED_RESET;
+			cp->link_transition_jiffies = jiffies;
+			cp->link_transition_jiffies_valid = 1;
+		} else {
+			cp->link_transition = LINK_TRANSITION_ON_FAILURE;
+		}
+		netif_carrier_off(cp->dev);
+		if (cp->opened && netif_msg_link(cp)) {
+			printk(KERN_INFO "%s: PCS link down.\n",
+			       cp->dev->name);
+		}
+
+		/* Cassini only: if you force a mode, there can be
+		 * sync problems on link down. to fix that, the following
+		 * things need to be checked:
+		 * 1) read serialink state register
+		 * 2) read pcs status register to verify link down.
+		 * 3) if link down and serial link == 0x03, then you need
+		 *    to global reset the chip.
+		 */
+		if ((cp->cas_flags & CAS_FLAG_REG_PLUS) == 0) {
+			/* should check to see if we're in a forced mode */
+			stat = readl(cp->regs + REG_PCS_SERDES_STATE);
+			if (stat == 0x03)
+				return 1;
+		}
+	} else if (cp->lstate == link_down) {
+		if (link_transition_timeout != 0 &&
+		    cp->link_transition != LINK_TRANSITION_REQUESTED_RESET &&
+		    !cp->link_transition_jiffies_valid) {
+			/* force a reset, as a workaround for the
+			 * link-failure problem.  May want to move
+			 * this to a point a bit earlier in the
+			 * sequence.
+			 */
+			retval = 1;
+			cp->link_transition = LINK_TRANSITION_REQUESTED_RESET;
+			cp->link_transition_jiffies = jiffies;
+			cp->link_transition_jiffies_valid = 1;
+		} else {
+			cp->link_transition = LINK_TRANSITION_STILL_FAILED;
+		}
+	}
+
+	return retval;
+}
+
+static int cas_pcs_interrupt(struct net_device *dev, 
+			     struct cas *cp, u32 status)
+{
+	u32 stat = readl(cp->regs + REG_PCS_INTR_STATUS);
+
+	if ((stat & PCS_INTR_STATUS_LINK_CHANGE) == 0) 
+		return 0;
+	return cas_pcs_link_check(cp);
+}
+
+static int cas_txmac_interrupt(struct net_device *dev, 
+			       struct cas *cp, u32 status)
+{
+	u32 txmac_stat = readl(cp->regs + REG_MAC_TX_STATUS);
+
+	if (!txmac_stat)
+		return 0;
+
+	if (netif_msg_intr(cp))
+		printk(KERN_DEBUG "%s: txmac interrupt, txmac_stat: 0x%x\n",
+			cp->dev->name, txmac_stat);
+
+	/* Defer timer expiration is quite normal,
+	 * don't even log the event.
+	 */
+	if ((txmac_stat & MAC_TX_DEFER_TIMER) &&
+	    !(txmac_stat & ~MAC_TX_DEFER_TIMER))
+		return 0;
+
+	spin_lock(&cp->stat_lock[0]);
+	if (txmac_stat & MAC_TX_UNDERRUN) {
+		printk(KERN_ERR "%s: TX MAC xmit underrun.\n",
+		       dev->name);
+		cp->net_stats[0].tx_fifo_errors++;
+	}
+
+	if (txmac_stat & MAC_TX_MAX_PACKET_ERR) {
+		printk(KERN_ERR "%s: TX MAC max packet size error.\n",
+		       dev->name);
+		cp->net_stats[0].tx_errors++;
+	}
+
+	/* The rest are all cases of one of the 16-bit TX
+	 * counters expiring.
+	 */
+	if (txmac_stat & MAC_TX_COLL_NORMAL)
+		cp->net_stats[0].collisions += 0x10000;
+
+	if (txmac_stat & MAC_TX_COLL_EXCESS) {
+		cp->net_stats[0].tx_aborted_errors += 0x10000;
+		cp->net_stats[0].collisions += 0x10000;
+	}
+
+	if (txmac_stat & MAC_TX_COLL_LATE) {
+		cp->net_stats[0].tx_aborted_errors += 0x10000;
+		cp->net_stats[0].collisions += 0x10000;
+	}
+	spin_unlock(&cp->stat_lock[0]);
+
+	/* We do not keep track of MAC_TX_COLL_FIRST and
+	 * MAC_TX_PEAK_ATTEMPTS events.
+	 */
+	return 0;
+}
+
+static void cas_load_firmware(struct cas *cp, cas_hp_inst_t *firmware) 
+{
+	cas_hp_inst_t *inst;
+	u32 val;
+	int i;
+
+	i = 0;
+	while ((inst = firmware) && inst->note) {
+		writel(i, cp->regs + REG_HP_INSTR_RAM_ADDR);
+
+		val = CAS_BASE(HP_INSTR_RAM_HI_VAL, inst->val);
+		val |= CAS_BASE(HP_INSTR_RAM_HI_MASK, inst->mask);
+		writel(val, cp->regs + REG_HP_INSTR_RAM_DATA_HI);
+
+		val = CAS_BASE(HP_INSTR_RAM_MID_OUTARG, inst->outarg >> 10);
+		val |= CAS_BASE(HP_INSTR_RAM_MID_OUTOP, inst->outop);
+		val |= CAS_BASE(HP_INSTR_RAM_MID_FNEXT, inst->fnext);
+		val |= CAS_BASE(HP_INSTR_RAM_MID_FOFF, inst->foff);
+		val |= CAS_BASE(HP_INSTR_RAM_MID_SNEXT, inst->snext);
+		val |= CAS_BASE(HP_INSTR_RAM_MID_SOFF, inst->soff);
+		val |= CAS_BASE(HP_INSTR_RAM_MID_OP, inst->op);
+		writel(val, cp->regs + REG_HP_INSTR_RAM_DATA_MID);
+
+		val = CAS_BASE(HP_INSTR_RAM_LOW_OUTMASK, inst->outmask);
+		val |= CAS_BASE(HP_INSTR_RAM_LOW_OUTSHIFT, inst->outshift);
+		val |= CAS_BASE(HP_INSTR_RAM_LOW_OUTEN, inst->outenab);
+		val |= CAS_BASE(HP_INSTR_RAM_LOW_OUTARG, inst->outarg);
+		writel(val, cp->regs + REG_HP_INSTR_RAM_DATA_LOW);
+		++firmware;
+		++i;
+	}
+}
+
+static void cas_init_rx_dma(struct cas *cp)
+{
+	u64 desc_dma = cp->block_dvma; 
+	u32 val;
+	int i, size;
+
+	/* rx free descriptors */
+	val = CAS_BASE(RX_CFG_SWIVEL, RX_SWIVEL_OFF_VAL); 
+	val |= CAS_BASE(RX_CFG_DESC_RING, RX_DESC_RINGN_INDEX(0));
+	val |= CAS_BASE(RX_CFG_COMP_RING, RX_COMP_RINGN_INDEX(0));
+	if ((N_RX_DESC_RINGS > 1) &&
+	    (cp->cas_flags & CAS_FLAG_REG_PLUS))  /* do desc 2 */
+		val |= CAS_BASE(RX_CFG_DESC_RING1, RX_DESC_RINGN_INDEX(1));
+	writel(val, cp->regs + REG_RX_CFG);
+
+	val = (unsigned long) cp->init_rxds[0] - 
+		(unsigned long) cp->init_block;
+	writel((desc_dma + val) >> 32, cp->regs + REG_RX_DB_HI);
+	writel((desc_dma + val) & 0xffffffff, cp->regs + REG_RX_DB_LOW);
+	writel(RX_DESC_RINGN_SIZE(0) - 4, cp->regs + REG_RX_KICK);
+
+	if (cp->cas_flags & CAS_FLAG_REG_PLUS) {
+		/* rx desc 2 is for IPSEC packets. however, 
+		 * we don't it that for that purpose.
+		 */
+		val = (unsigned long) cp->init_rxds[1] - 
+			(unsigned long) cp->init_block;
+		writel((desc_dma + val) >> 32, cp->regs + REG_PLUS_RX_DB1_HI);
+		writel((desc_dma + val) & 0xffffffff, cp->regs + 
+		       REG_PLUS_RX_DB1_LOW);
+		writel(RX_DESC_RINGN_SIZE(1) - 4, cp->regs + 
+		       REG_PLUS_RX_KICK1);
+	}
+	
+	/* rx completion registers */
+	val = (unsigned long) cp->init_rxcs[0] - 
+		(unsigned long) cp->init_block;
+	writel((desc_dma + val) >> 32, cp->regs + REG_RX_CB_HI);
+	writel((desc_dma + val) & 0xffffffff, cp->regs + REG_RX_CB_LOW);
+
+	if (cp->cas_flags & CAS_FLAG_REG_PLUS) {
+		/* rx comp 2-4 */
+		for (i = 1; i < MAX_RX_COMP_RINGS; i++) {
+			val = (unsigned long) cp->init_rxcs[i] - 
+				(unsigned long) cp->init_block;
+			writel((desc_dma + val) >> 32, cp->regs + 
+			       REG_PLUS_RX_CBN_HI(i));
+			writel((desc_dma + val) & 0xffffffff, cp->regs + 
+			       REG_PLUS_RX_CBN_LOW(i));
+		}
+	}
+
+	/* read selective clear regs to prevent spurious interrupts
+	 * on reset because complete == kick.
+	 * selective clear set up to prevent interrupts on resets
+	 */
+	readl(cp->regs + REG_INTR_STATUS_ALIAS);
+	writel(INTR_RX_DONE | INTR_RX_BUF_UNAVAIL, cp->regs + REG_ALIAS_CLEAR);
+	if (cp->cas_flags & CAS_FLAG_REG_PLUS) {
+		for (i = 1; i < N_RX_COMP_RINGS; i++)
+			readl(cp->regs + REG_PLUS_INTRN_STATUS_ALIAS(i));
+
+		/* 2 is different from 3 and 4 */
+		if (N_RX_COMP_RINGS > 1)
+			writel(INTR_RX_DONE_ALT | INTR_RX_BUF_UNAVAIL_1, 
+			       cp->regs + REG_PLUS_ALIASN_CLEAR(1));
+
+		for (i = 2; i < N_RX_COMP_RINGS; i++) 
+			writel(INTR_RX_DONE_ALT, 
+			       cp->regs + REG_PLUS_ALIASN_CLEAR(i));
+	}
+
+	/* set up pause thresholds */
+	val  = CAS_BASE(RX_PAUSE_THRESH_OFF,
+			cp->rx_pause_off / RX_PAUSE_THRESH_QUANTUM);
+	val |= CAS_BASE(RX_PAUSE_THRESH_ON, 
+			cp->rx_pause_on / RX_PAUSE_THRESH_QUANTUM);
+	writel(val, cp->regs + REG_RX_PAUSE_THRESH);
+	
+	/* zero out dma reassembly buffers */
+	for (i = 0; i < 64; i++) {
+		writel(i, cp->regs + REG_RX_TABLE_ADDR);
+		writel(0x0, cp->regs + REG_RX_TABLE_DATA_LOW);
+		writel(0x0, cp->regs + REG_RX_TABLE_DATA_MID);
+		writel(0x0, cp->regs + REG_RX_TABLE_DATA_HI);
+	}
+
+	/* make sure address register is 0 for normal operation */
+	writel(0x0, cp->regs + REG_RX_CTRL_FIFO_ADDR);
+	writel(0x0, cp->regs + REG_RX_IPP_FIFO_ADDR);
+
+	/* interrupt mitigation */
+#ifdef USE_RX_BLANK
+	val = CAS_BASE(RX_BLANK_INTR_TIME, RX_BLANK_INTR_TIME_VAL);
+	val |= CAS_BASE(RX_BLANK_INTR_PKT, RX_BLANK_INTR_PKT_VAL);
+	writel(val, cp->regs + REG_RX_BLANK);
+#else
+	writel(0x0, cp->regs + REG_RX_BLANK);
+#endif
+
+	/* interrupt generation as a function of low water marks for
+	 * free desc and completion entries. these are used to trigger
+	 * housekeeping for rx descs. we don't use the free interrupt
+	 * as it's not very useful
+	 */
+	/* val = CAS_BASE(RX_AE_THRESH_FREE, RX_AE_FREEN_VAL(0)); */
+	val = CAS_BASE(RX_AE_THRESH_COMP, RX_AE_COMP_VAL);
+	writel(val, cp->regs + REG_RX_AE_THRESH);
+	if (cp->cas_flags & CAS_FLAG_REG_PLUS) {
+		val = CAS_BASE(RX_AE1_THRESH_FREE, RX_AE_FREEN_VAL(1));
+		writel(val, cp->regs + REG_PLUS_RX_AE1_THRESH);
+	}
+
+	/* Random early detect registers. useful for congestion avoidance.
+	 * this should be tunable.
+	 */
+	writel(0x0, cp->regs + REG_RX_RED);
+	
+	/* receive page sizes. default == 2K (0x800) */
+	val = 0;
+	if (cp->page_size == 0x1000)
+		val = 0x1;
+	else if (cp->page_size == 0x2000)
+		val = 0x2;
+	else if (cp->page_size == 0x4000)
+		val = 0x3;
+	
+	/* round mtu + offset. constrain to page size. */
+	size = cp->dev->mtu + 64;
+	if (size > cp->page_size)
+		size = cp->page_size;
+
+	if (size <= 0x400)
+		i = 0x0;
+	else if (size <= 0x800)
+		i = 0x1;
+	else if (size <= 0x1000)
+		i = 0x2;
+	else
+		i = 0x3;
+
+	cp->mtu_stride = 1 << (i + 10);
+	val  = CAS_BASE(RX_PAGE_SIZE, val);
+	val |= CAS_BASE(RX_PAGE_SIZE_MTU_STRIDE, i); 
+	val |= CAS_BASE(RX_PAGE_SIZE_MTU_COUNT, cp->page_size >> (i + 10));
+	val |= CAS_BASE(RX_PAGE_SIZE_MTU_OFF, 0x1);
+	writel(val, cp->regs + REG_RX_PAGE_SIZE);
+	
+	/* enable the header parser if desired */
+	if (CAS_HP_FIRMWARE == cas_prog_null)
+		return;
+
+	val = CAS_BASE(HP_CFG_NUM_CPU, CAS_NCPUS > 63 ? 0 : CAS_NCPUS);
+	val |= HP_CFG_PARSE_EN | HP_CFG_SYN_INC_MASK;
+	val |= CAS_BASE(HP_CFG_TCP_THRESH, HP_TCP_THRESH_VAL);
+	writel(val, cp->regs + REG_HP_CFG);
+}
+
+static inline void cas_rxc_init(struct cas_rx_comp *rxc)
+{
+	memset(rxc, 0, sizeof(*rxc));
+	rxc->word4 = cpu_to_le64(RX_COMP4_ZERO); 
+}
+
+/* NOTE: we use the ENC RX DESC ring for spares. the rx_page[0,1]
+ * flipping is protected by the fact that the chip will not
+ * hand back the same page index while it's being processed.
+ */
+static inline cas_page_t *cas_page_spare(struct cas *cp, const int index)
+{
+	cas_page_t *page = cp->rx_pages[1][index];
+	cas_page_t *new;
+
+	if (page_count(page->buffer) == 1)
+		return page;
+
+	new = cas_page_dequeue(cp);
+	if (new) {
+		spin_lock(&cp->rx_inuse_lock);
+		list_add(&page->list, &cp->rx_inuse_list);
+		spin_unlock(&cp->rx_inuse_lock);
+	}
+	return new;
+}
+				   
+/* this needs to be changed if we actually use the ENC RX DESC ring */
+static cas_page_t *cas_page_swap(struct cas *cp, const int ring, 
+				 const int index)
+{
+	cas_page_t **page0 = cp->rx_pages[0];
+	cas_page_t **page1 = cp->rx_pages[1];
+
+	/* swap if buffer is in use */
+	if (page_count(page0[index]->buffer) > 1) {
+		cas_page_t *new = cas_page_spare(cp, index);
+		if (new) {
+			page1[index] = page0[index];
+			page0[index] = new;
+		}
+	} 
+	RX_USED_SET(page0[index], 0);
+	return page0[index];
+}
+
+static void cas_clean_rxds(struct cas *cp)
+{
+	/* only clean ring 0 as ring 1 is used for spare buffers */
+        struct cas_rx_desc *rxd = cp->init_rxds[0];
+	int i, size;
+
+	/* release all rx flows */
+	for (i = 0; i < N_RX_FLOWS; i++) {
+		struct sk_buff *skb;
+		while ((skb = __skb_dequeue(&cp->rx_flows[i]))) {
+			cas_skb_release(skb);
+		}
+	}
+
+	/* initialize descriptors */
+	size = RX_DESC_RINGN_SIZE(0);
+	for (i = 0; i < size; i++) {
+		cas_page_t *page = cas_page_swap(cp, 0, i);
+		rxd[i].buffer = cpu_to_le64(page->dma_addr);
+		rxd[i].index  = cpu_to_le64(CAS_BASE(RX_INDEX_NUM, i) | 
+					    CAS_BASE(RX_INDEX_RING, 0));
+	}
+
+	cp->rx_old[0]  = RX_DESC_RINGN_SIZE(0) - 4; 
+	cp->rx_last[0] = 0;
+	cp->cas_flags &= ~CAS_FLAG_RXD_POST(0);
+}
+
+static void cas_clean_rxcs(struct cas *cp)
+{
+	int i, j;
+
+	/* take ownership of rx comp descriptors */
+	memset(cp->rx_cur, 0, sizeof(*cp->rx_cur)*N_RX_COMP_RINGS);
+	memset(cp->rx_new, 0, sizeof(*cp->rx_new)*N_RX_COMP_RINGS);
+	for (i = 0; i < N_RX_COMP_RINGS; i++) {
+		struct cas_rx_comp *rxc = cp->init_rxcs[i];
+		for (j = 0; j < RX_COMP_RINGN_SIZE(i); j++) {
+			cas_rxc_init(rxc + j);
+		}
+	}
+}
+
+#if 0
+/* When we get a RX fifo overflow, the RX unit is probably hung
+ * so we do the following.
+ *
+ * If any part of the reset goes wrong, we return 1 and that causes the
+ * whole chip to be reset.
+ */
+static int cas_rxmac_reset(struct cas *cp)
+{
+	struct net_device *dev = cp->dev;
+	int limit;
+	u32 val;
+
+	/* First, reset MAC RX. */
+	writel(cp->mac_rx_cfg & ~MAC_RX_CFG_EN, cp->regs + REG_MAC_RX_CFG);
+	for (limit = 0; limit < STOP_TRIES; limit++) {
+		if (!(readl(cp->regs + REG_MAC_RX_CFG) & MAC_RX_CFG_EN))
+			break;
+		udelay(10);
+	}
+	if (limit == STOP_TRIES) {
+		printk(KERN_ERR "%s: RX MAC will not disable, resetting whole "
+		       "chip.\n", dev->name);
+		return 1;
+	}
+
+	/* Second, disable RX DMA. */
+	writel(0, cp->regs + REG_RX_CFG);
+	for (limit = 0; limit < STOP_TRIES; limit++) {
+		if (!(readl(cp->regs + REG_RX_CFG) & RX_CFG_DMA_EN))
+			break;
+		udelay(10);
+	}
+	if (limit == STOP_TRIES) {
+		printk(KERN_ERR "%s: RX DMA will not disable, resetting whole "
+		       "chip.\n", dev->name);
+		return 1;
+	}
+
+	mdelay(5);
+
+	/* Execute RX reset command. */
+	writel(SW_RESET_RX, cp->regs + REG_SW_RESET);
+	for (limit = 0; limit < STOP_TRIES; limit++) {
+		if (!(readl(cp->regs + REG_SW_RESET) & SW_RESET_RX))
+			break;
+		udelay(10);
+	}
+	if (limit == STOP_TRIES) {
+		printk(KERN_ERR "%s: RX reset command will not execute, "
+		       "resetting whole chip.\n", dev->name);
+		return 1;
+	}
+
+	/* reset driver rx state */
+	cas_clean_rxds(cp);
+	cas_clean_rxcs(cp);
+
+	/* Now, reprogram the rest of RX unit. */
+	cas_init_rx_dma(cp);
+
+	/* re-enable */
+	val = readl(cp->regs + REG_RX_CFG);
+	writel(val | RX_CFG_DMA_EN, cp->regs + REG_RX_CFG);
+	writel(MAC_RX_FRAME_RECV, cp->regs + REG_MAC_RX_MASK);
+	val = readl(cp->regs + REG_MAC_RX_CFG);
+	writel(val | MAC_RX_CFG_EN, cp->regs + REG_MAC_RX_CFG);
+	return 0;
+}
+#endif
+
+static int cas_rxmac_interrupt(struct net_device *dev, struct cas *cp,
+			       u32 status)
+{
+	u32 stat = readl(cp->regs + REG_MAC_RX_STATUS);
+
+	if (!stat)
+		return 0;
+
+	if (netif_msg_intr(cp))
+		printk(KERN_DEBUG "%s: rxmac interrupt, stat: 0x%x\n",
+			cp->dev->name, stat);
+
+	/* these are all rollovers */
+	spin_lock(&cp->stat_lock[0]);
+	if (stat & MAC_RX_ALIGN_ERR) 
+		cp->net_stats[0].rx_frame_errors += 0x10000;
+
+	if (stat & MAC_RX_CRC_ERR)
+		cp->net_stats[0].rx_crc_errors += 0x10000;
+
+	if (stat & MAC_RX_LEN_ERR)
+		cp->net_stats[0].rx_length_errors += 0x10000;
+
+	if (stat & MAC_RX_OVERFLOW) {
+		cp->net_stats[0].rx_over_errors++;
+		cp->net_stats[0].rx_fifo_errors++;
+	}
+
+	/* We do not track MAC_RX_FRAME_COUNT and MAC_RX_VIOL_ERR
+	 * events.
+	 */
+	spin_unlock(&cp->stat_lock[0]);
+	return 0;
+}
+
+static int cas_mac_interrupt(struct net_device *dev, struct cas *cp,
+			     u32 status)
+{
+	u32 stat = readl(cp->regs + REG_MAC_CTRL_STATUS);
+
+	if (!stat)
+		return 0;
+
+	if (netif_msg_intr(cp))
+		printk(KERN_DEBUG "%s: mac interrupt, stat: 0x%x\n",
+			cp->dev->name, stat);
+
+	/* This interrupt is just for pause frame and pause
+	 * tracking.  It is useful for diagnostics and debug
+	 * but probably by default we will mask these events.
+	 */
+	if (stat & MAC_CTRL_PAUSE_STATE)
+		cp->pause_entered++;
+
+	if (stat & MAC_CTRL_PAUSE_RECEIVED)
+		cp->pause_last_time_recvd = (stat >> 16);
+
+	return 0;
+}
+
+	
+/* Must be invoked under cp->lock. */
+static inline int cas_mdio_link_not_up(struct cas *cp)
+{
+	u16 val;
+	
+	switch (cp->lstate) {
+	case link_force_ret:
+		if (netif_msg_link(cp))
+			printk(KERN_INFO "%s: Autoneg failed again, keeping"
+				" forced mode\n", cp->dev->name);
+		cas_phy_write(cp, MII_BMCR, cp->link_fcntl);
+		cp->timer_ticks = 5;
+		cp->lstate = link_force_ok;
+		cp->link_transition = LINK_TRANSITION_LINK_CONFIG;
+		break;
+		
+	case link_aneg:
+		val = cas_phy_read(cp, MII_BMCR);
+
+		/* Try forced modes. we try things in the following order:
+		 * 1000 full -> 100 full/half -> 10 half
+		 */
+		val &= ~(BMCR_ANRESTART | BMCR_ANENABLE);
+		val |= BMCR_FULLDPLX;
+		val |= (cp->cas_flags & CAS_FLAG_1000MB_CAP) ? 
+			CAS_BMCR_SPEED1000 : BMCR_SPEED100;
+		cas_phy_write(cp, MII_BMCR, val);
+		cp->timer_ticks = 5;
+		cp->lstate = link_force_try;
+		cp->link_transition = LINK_TRANSITION_LINK_CONFIG;
+		break;
+
+	case link_force_try:
+		/* Downgrade from 1000 to 100 to 10 Mbps if necessary. */
+		val = cas_phy_read(cp, MII_BMCR);
+		cp->timer_ticks = 5;
+		if (val & CAS_BMCR_SPEED1000) { /* gigabit */
+			val &= ~CAS_BMCR_SPEED1000;
+			val |= (BMCR_SPEED100 | BMCR_FULLDPLX);
+			cas_phy_write(cp, MII_BMCR, val);
+			break;
+		}
+
+		if (val & BMCR_SPEED100) {
+			if (val & BMCR_FULLDPLX) /* fd failed */
+				val &= ~BMCR_FULLDPLX;
+			else { /* 100Mbps failed */
+				val &= ~BMCR_SPEED100;
+			}
+			cas_phy_write(cp, MII_BMCR, val);
+			break;
+		}
+	default:
+		break;
+	}
+	return 0;
+}
+
+
+/* must be invoked with cp->lock held */
+static int cas_mii_link_check(struct cas *cp, const u16 bmsr)
+{
+	int restart;
+
+	if (bmsr & BMSR_LSTATUS) {
+		/* Ok, here we got a link. If we had it due to a forced
+		 * fallback, and we were configured for autoneg, we 
+		 * retry a short autoneg pass. If you know your hub is
+		 * broken, use ethtool ;)
+		 */
+		if ((cp->lstate == link_force_try) && 
+		    (cp->link_cntl & BMCR_ANENABLE)) {
+			cp->lstate = link_force_ret;
+			cp->link_transition = LINK_TRANSITION_LINK_CONFIG;
+			cas_mif_poll(cp, 0);
+			cp->link_fcntl = cas_phy_read(cp, MII_BMCR);
+			cp->timer_ticks = 5;
+			if (cp->opened && netif_msg_link(cp))
+				printk(KERN_INFO "%s: Got link after fallback, retrying"
+				       " autoneg once...\n", cp->dev->name);
+			cas_phy_write(cp, MII_BMCR,
+				      cp->link_fcntl | BMCR_ANENABLE |
+				      BMCR_ANRESTART);
+			cas_mif_poll(cp, 1);
+
+		} else if (cp->lstate != link_up) {
+			cp->lstate = link_up;
+			cp->link_transition = LINK_TRANSITION_LINK_UP;
+
+			if (cp->opened) {
+				cas_set_link_modes(cp);
+				netif_carrier_on(cp->dev);
+			}
+		}
+		return 0;
+	}
+
+	/* link not up. if the link was previously up, we restart the
+	 * whole process
+	 */
+	restart = 0;
+	if (cp->lstate == link_up) {
+		cp->lstate = link_down;
+		cp->link_transition = LINK_TRANSITION_LINK_DOWN;
+
+		netif_carrier_off(cp->dev);
+		if (cp->opened && netif_msg_link(cp))
+			printk(KERN_INFO "%s: Link down\n",
+			       cp->dev->name);
+		restart = 1;
+		
+	} else if (++cp->timer_ticks > 10)
+		cas_mdio_link_not_up(cp);
+		
+	return restart;
+}
+
+static int cas_mif_interrupt(struct net_device *dev, struct cas *cp,
+			     u32 status)
+{
+	u32 stat = readl(cp->regs + REG_MIF_STATUS);
+	u16 bmsr;
+
+	/* check for a link change */
+	if (CAS_VAL(MIF_STATUS_POLL_STATUS, stat) == 0)
+		return 0;
+
+	bmsr = CAS_VAL(MIF_STATUS_POLL_DATA, stat);
+	return cas_mii_link_check(cp, bmsr);
+}
+
+static int cas_pci_interrupt(struct net_device *dev, struct cas *cp,
+			     u32 status)
+{
+	u32 stat = readl(cp->regs + REG_PCI_ERR_STATUS);
+
+	if (!stat)
+		return 0;
+
+	printk(KERN_ERR "%s: PCI error [%04x:%04x] ", dev->name, stat,
+	       readl(cp->regs + REG_BIM_DIAG));
+
+	/* cassini+ has this reserved */
+	if ((stat & PCI_ERR_BADACK) &&
+	    ((cp->cas_flags & CAS_FLAG_REG_PLUS) == 0))
+		printk("<No ACK64# during ABS64 cycle> ");
+
+	if (stat & PCI_ERR_DTRTO)
+		printk("<Delayed transaction timeout> ");
+	if (stat & PCI_ERR_OTHER)
+		printk("<other> ");
+	if (stat & PCI_ERR_BIM_DMA_WRITE)
+		printk("<BIM DMA 0 write req> ");
+	if (stat & PCI_ERR_BIM_DMA_READ)
+		printk("<BIM DMA 0 read req> ");
+	printk("\n");
+
+	if (stat & PCI_ERR_OTHER) {
+		u16 cfg;
+
+		/* Interrogate PCI config space for the
+		 * true cause.
+		 */
+		pci_read_config_word(cp->pdev, PCI_STATUS, &cfg);
+		printk(KERN_ERR "%s: Read PCI cfg space status [%04x]\n",
+		       dev->name, cfg);
+		if (cfg & PCI_STATUS_PARITY)
+			printk(KERN_ERR "%s: PCI parity error detected.\n",
+			       dev->name);
+		if (cfg & PCI_STATUS_SIG_TARGET_ABORT)
+			printk(KERN_ERR "%s: PCI target abort.\n",
+			       dev->name);
+		if (cfg & PCI_STATUS_REC_TARGET_ABORT)
+			printk(KERN_ERR "%s: PCI master acks target abort.\n",
+			       dev->name);
+		if (cfg & PCI_STATUS_REC_MASTER_ABORT)
+			printk(KERN_ERR "%s: PCI master abort.\n", dev->name);
+		if (cfg & PCI_STATUS_SIG_SYSTEM_ERROR)
+			printk(KERN_ERR "%s: PCI system error SERR#.\n",
+			       dev->name);
+		if (cfg & PCI_STATUS_DETECTED_PARITY)
+			printk(KERN_ERR "%s: PCI parity error.\n",
+			       dev->name);
+
+		/* Write the error bits back to clear them. */
+		cfg &= (PCI_STATUS_PARITY |
+			PCI_STATUS_SIG_TARGET_ABORT |
+			PCI_STATUS_REC_TARGET_ABORT |
+			PCI_STATUS_REC_MASTER_ABORT |
+			PCI_STATUS_SIG_SYSTEM_ERROR |
+			PCI_STATUS_DETECTED_PARITY);
+		pci_write_config_word(cp->pdev, PCI_STATUS, cfg);
+	}
+
+	/* For all PCI errors, we should reset the chip. */
+	return 1;
+}
+
+/* All non-normal interrupt conditions get serviced here.
+ * Returns non-zero if we should just exit the interrupt
+ * handler right now (ie. if we reset the card which invalidates
+ * all of the other original irq status bits).
+ */
+static int cas_abnormal_irq(struct net_device *dev, struct cas *cp,
+			    u32 status)
+{
+	if (status & INTR_RX_TAG_ERROR) {
+		/* corrupt RX tag framing */
+		if (netif_msg_rx_err(cp))
+			printk(KERN_DEBUG "%s: corrupt rx tag framing\n",
+				cp->dev->name);
+		spin_lock(&cp->stat_lock[0]);
+		cp->net_stats[0].rx_errors++;
+		spin_unlock(&cp->stat_lock[0]);
+		goto do_reset;
+	}
+
+	if (status & INTR_RX_LEN_MISMATCH) {
+		/* length mismatch. */
+		if (netif_msg_rx_err(cp))
+			printk(KERN_DEBUG "%s: length mismatch for rx frame\n",
+				cp->dev->name);
+		spin_lock(&cp->stat_lock[0]);
+		cp->net_stats[0].rx_errors++;
+		spin_unlock(&cp->stat_lock[0]);
+		goto do_reset;
+	}
+
+	if (status & INTR_PCS_STATUS) {
+		if (cas_pcs_interrupt(dev, cp, status))
+			goto do_reset;
+	}
+
+	if (status & INTR_TX_MAC_STATUS) {
+		if (cas_txmac_interrupt(dev, cp, status))
+			goto do_reset;
+	}
+
+	if (status & INTR_RX_MAC_STATUS) {
+		if (cas_rxmac_interrupt(dev, cp, status))
+			goto do_reset;
+	}
+
+	if (status & INTR_MAC_CTRL_STATUS) {
+		if (cas_mac_interrupt(dev, cp, status))
+			goto do_reset;
+	}
+
+	if (status & INTR_MIF_STATUS) {
+		if (cas_mif_interrupt(dev, cp, status))
+			goto do_reset;
+	}
+
+	if (status & INTR_PCI_ERROR_STATUS) {
+		if (cas_pci_interrupt(dev, cp, status))
+			goto do_reset;
+	}
+	return 0;
+
+do_reset:
+#if 1
+	atomic_inc(&cp->reset_task_pending);
+	atomic_inc(&cp->reset_task_pending_all);
+	printk(KERN_ERR "%s:reset called in cas_abnormal_irq [0x%x]\n",
+	       dev->name, status);
+	schedule_work(&cp->reset_task);
+#else
+	atomic_set(&cp->reset_task_pending, CAS_RESET_ALL);
+	printk(KERN_ERR "reset called in cas_abnormal_irq\n");
+	schedule_work(&cp->reset_task);
+#endif
+	return 1;
+}
+
+/* NOTE: CAS_TABORT returns 1 or 2 so that it can be used when
+ *       determining whether to do a netif_stop/wakeup
+ */
+#define CAS_TABORT(x)      (((x)->cas_flags & CAS_FLAG_TARGET_ABORT) ? 2 : 1)
+#define CAS_ROUND_PAGE(x)  (((x) + PAGE_SIZE - 1) & PAGE_MASK)
+static inline int cas_calc_tabort(struct cas *cp, const unsigned long addr,
+				  const int len)
+{
+	unsigned long off = addr + len;
+
+	if (CAS_TABORT(cp) == 1)
+		return 0;
+	if ((CAS_ROUND_PAGE(off) - off) > TX_TARGET_ABORT_LEN)
+		return 0;
+	return TX_TARGET_ABORT_LEN;
+}
+
+static inline void cas_tx_ringN(struct cas *cp, int ring, int limit)
+{
+	struct cas_tx_desc *txds;
+	struct sk_buff **skbs;
+	struct net_device *dev = cp->dev;
+	int entry, count;
+
+	spin_lock(&cp->tx_lock[ring]);
+	txds = cp->init_txds[ring];
+	skbs = cp->tx_skbs[ring];
+	entry = cp->tx_old[ring];
+
+	count = TX_BUFF_COUNT(ring, entry, limit);
+	while (entry != limit) {
+		struct sk_buff *skb = skbs[entry];
+		dma_addr_t daddr;
+		u32 dlen;
+		int frag;
+
+		if (!skb) {
+			/* this should never occur */
+			entry = TX_DESC_NEXT(ring, entry);
+			continue;
+		}
+
+		/* however, we might get only a partial skb release. */
+		count -= skb_shinfo(skb)->nr_frags +
+			+ cp->tx_tiny_use[ring][entry].nbufs + 1;
+		if (count < 0)
+			break;
+
+		if (netif_msg_tx_done(cp))
+			printk(KERN_DEBUG "%s: tx[%d] done, slot %d\n",
+			       cp->dev->name, ring, entry);
+
+		skbs[entry] = NULL;
+		cp->tx_tiny_use[ring][entry].nbufs = 0;
+		
+		for (frag = 0; frag <= skb_shinfo(skb)->nr_frags; frag++) {
+			struct cas_tx_desc *txd = txds + entry;
+
+			daddr = le64_to_cpu(txd->buffer);
+			dlen = CAS_VAL(TX_DESC_BUFLEN,
+				       le64_to_cpu(txd->control));
+			pci_unmap_page(cp->pdev, daddr, dlen,
+				       PCI_DMA_TODEVICE);
+			entry = TX_DESC_NEXT(ring, entry);
+
+			/* tiny buffer may follow */
+			if (cp->tx_tiny_use[ring][entry].used) {
+				cp->tx_tiny_use[ring][entry].used = 0;
+				entry = TX_DESC_NEXT(ring, entry);
+			} 
+		}
+
+		spin_lock(&cp->stat_lock[ring]);
+		cp->net_stats[ring].tx_packets++;
+		cp->net_stats[ring].tx_bytes += skb->len;
+		spin_unlock(&cp->stat_lock[ring]);
+		dev_kfree_skb_irq(skb);
+	}
+	cp->tx_old[ring] = entry;
+
+	/* this is wrong for multiple tx rings. the net device needs
+	 * multiple queues for this to do the right thing.  we wait
+	 * for 2*packets to be available when using tiny buffers
+	 */
+	if (netif_queue_stopped(dev) &&
+	    (TX_BUFFS_AVAIL(cp, ring) > CAS_TABORT(cp)*(MAX_SKB_FRAGS + 1)))
+		netif_wake_queue(dev);
+	spin_unlock(&cp->tx_lock[ring]);
+}
+
+static void cas_tx(struct net_device *dev, struct cas *cp,
+		   u32 status)
+{
+        int limit, ring;
+#ifdef USE_TX_COMPWB
+	u64 compwb = le64_to_cpu(cp->init_block->tx_compwb);
+#endif
+	if (netif_msg_intr(cp))
+		printk(KERN_DEBUG "%s: tx interrupt, status: 0x%x, %lx\n",
+			cp->dev->name, status, compwb);
+	/* process all the rings */
+	for (ring = 0; ring < N_TX_RINGS; ring++) {
+#ifdef USE_TX_COMPWB
+		/* use the completion writeback registers */
+		limit = (CAS_VAL(TX_COMPWB_MSB, compwb) << 8) |
+			CAS_VAL(TX_COMPWB_LSB, compwb);
+		compwb = TX_COMPWB_NEXT(compwb);
+#else
+		limit = readl(cp->regs + REG_TX_COMPN(ring));
+#endif
+		if (cp->tx_old[ring] != limit) 
+			cas_tx_ringN(cp, ring, limit);
+	}
+}
+
+
+static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, 
+			      int entry, const u64 *words, 
+			      struct sk_buff **skbref)
+{
+	int dlen, hlen, len, i, alloclen;
+	int off, swivel = RX_SWIVEL_OFF_VAL;
+	struct cas_page *page;
+	struct sk_buff *skb;
+	void *addr, *crcaddr;
+	char *p; 
+
+	hlen = CAS_VAL(RX_COMP2_HDR_SIZE, words[1]);
+	dlen = CAS_VAL(RX_COMP1_DATA_SIZE, words[0]);
+	len  = hlen + dlen;
+
+	if (RX_COPY_ALWAYS || (words[2] & RX_COMP3_SMALL_PKT)) 
+		alloclen = len;
+	else 
+		alloclen = max(hlen, RX_COPY_MIN);
+
+	skb = dev_alloc_skb(alloclen + swivel + cp->crc_size);
+	if (skb == NULL) 
+		return -1;
+
+	*skbref = skb;
+	skb->dev = cp->dev;
+	skb_reserve(skb, swivel);
+
+	p = skb->data;
+	addr = crcaddr = NULL;
+	if (hlen) { /* always copy header pages */
+		i = CAS_VAL(RX_COMP2_HDR_INDEX, words[1]);
+		page = cp->rx_pages[CAS_VAL(RX_INDEX_RING, i)][CAS_VAL(RX_INDEX_NUM, i)];
+		off = CAS_VAL(RX_COMP2_HDR_OFF, words[1]) * 0x100 + 
+			swivel;
+
+		i = hlen;
+		if (!dlen) /* attach FCS */
+			i += cp->crc_size;
+		pci_dma_sync_single_for_cpu(cp->pdev, page->dma_addr + off, i,
+				    PCI_DMA_FROMDEVICE);
+		addr = cas_page_map(page->buffer);
+		memcpy(p, addr + off, i);
+		pci_dma_sync_single_for_device(cp->pdev, page->dma_addr + off, i,
+				    PCI_DMA_FROMDEVICE);
+		cas_page_unmap(addr);
+		RX_USED_ADD(page, 0x100);
+		p += hlen;
+		swivel = 0;
+	} 
+
+
+	if (alloclen < (hlen + dlen)) {
+		skb_frag_t *frag = skb_shinfo(skb)->frags;
+
+		/* normal or jumbo packets. we use frags */
+		i = CAS_VAL(RX_COMP1_DATA_INDEX, words[0]);
+		page = cp->rx_pages[CAS_VAL(RX_INDEX_RING, i)][CAS_VAL(RX_INDEX_NUM, i)];
+		off = CAS_VAL(RX_COMP1_DATA_OFF, words[0]) + swivel;
+
+		hlen = min(cp->page_size - off, dlen);
+		if (hlen < 0) {
+			if (netif_msg_rx_err(cp)) {
+				printk(KERN_DEBUG "%s: rx page overflow: "
+				       "%d\n", cp->dev->name, hlen);
+			}
+			dev_kfree_skb_irq(skb);
+			return -1;
+		}
+		i = hlen;
+		if (i == dlen)  /* attach FCS */
+			i += cp->crc_size;
+		pci_dma_sync_single_for_cpu(cp->pdev, page->dma_addr + off, i,
+				    PCI_DMA_FROMDEVICE);
+
+		/* make sure we always copy a header */
+		swivel = 0;
+		if (p == (char *) skb->data) { /* not split */
+			addr = cas_page_map(page->buffer);
+			memcpy(p, addr + off, RX_COPY_MIN);
+			pci_dma_sync_single_for_device(cp->pdev, page->dma_addr + off, i,
+					PCI_DMA_FROMDEVICE);
+			cas_page_unmap(addr);
+			off += RX_COPY_MIN;
+			swivel = RX_COPY_MIN;
+			RX_USED_ADD(page, cp->mtu_stride);
+		} else {
+			RX_USED_ADD(page, hlen);
+		}
+		skb_put(skb, alloclen);
+
+		skb_shinfo(skb)->nr_frags++;
+		skb->data_len += hlen - swivel;
+		skb->len      += hlen - swivel;
+
+		get_page(page->buffer);
+		frag->page = page->buffer;
+		frag->page_offset = off;
+		frag->size = hlen - swivel;
+		
+		/* any more data? */
+		if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) {
+			hlen = dlen;
+			off = 0;
+
+			i = CAS_VAL(RX_COMP2_NEXT_INDEX, words[1]);
+			page = cp->rx_pages[CAS_VAL(RX_INDEX_RING, i)][CAS_VAL(RX_INDEX_NUM, i)];
+			pci_dma_sync_single_for_cpu(cp->pdev, page->dma_addr, 
+					    hlen + cp->crc_size, 
+					    PCI_DMA_FROMDEVICE);
+			pci_dma_sync_single_for_device(cp->pdev, page->dma_addr,
+					    hlen + cp->crc_size,
+					    PCI_DMA_FROMDEVICE);
+
+			skb_shinfo(skb)->nr_frags++;
+			skb->data_len += hlen;
+			skb->len      += hlen; 
+			frag++;
+
+			get_page(page->buffer);
+			frag->page = page->buffer;
+			frag->page_offset = 0;
+			frag->size = hlen;
+			RX_USED_ADD(page, hlen + cp->crc_size);
+		}
+
+		if (cp->crc_size) {
+			addr = cas_page_map(page->buffer);
+			crcaddr  = addr + off + hlen;
+		}
+
+	} else {
+		/* copying packet */
+		if (!dlen)
+			goto end_copy_pkt;
+
+		i = CAS_VAL(RX_COMP1_DATA_INDEX, words[0]);
+		page = cp->rx_pages[CAS_VAL(RX_INDEX_RING, i)][CAS_VAL(RX_INDEX_NUM, i)];
+		off = CAS_VAL(RX_COMP1_DATA_OFF, words[0]) + swivel;
+		hlen = min(cp->page_size - off, dlen);
+		if (hlen < 0) {
+			if (netif_msg_rx_err(cp)) {
+				printk(KERN_DEBUG "%s: rx page overflow: "
+				       "%d\n", cp->dev->name, hlen);
+			}
+			dev_kfree_skb_irq(skb);
+			return -1;
+		}
+		i = hlen;
+		if (i == dlen) /* attach FCS */
+			i += cp->crc_size;
+		pci_dma_sync_single_for_cpu(cp->pdev, page->dma_addr + off, i,
+				    PCI_DMA_FROMDEVICE);
+		addr = cas_page_map(page->buffer);
+		memcpy(p, addr + off, i);
+		pci_dma_sync_single_for_device(cp->pdev, page->dma_addr + off, i,
+				    PCI_DMA_FROMDEVICE);
+		cas_page_unmap(addr);
+		if (p == (char *) skb->data) /* not split */
+			RX_USED_ADD(page, cp->mtu_stride);
+		else
+			RX_USED_ADD(page, i);
+	
+		/* any more data? */
+		if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) {
+			p += hlen;
+			i = CAS_VAL(RX_COMP2_NEXT_INDEX, words[1]);
+			page = cp->rx_pages[CAS_VAL(RX_INDEX_RING, i)][CAS_VAL(RX_INDEX_NUM, i)];
+			pci_dma_sync_single_for_cpu(cp->pdev, page->dma_addr, 
+					    dlen + cp->crc_size, 
+					    PCI_DMA_FROMDEVICE);
+			addr = cas_page_map(page->buffer);
+			memcpy(p, addr, dlen + cp->crc_size);
+			pci_dma_sync_single_for_device(cp->pdev, page->dma_addr,
+					    dlen + cp->crc_size,
+					    PCI_DMA_FROMDEVICE);
+			cas_page_unmap(addr);
+			RX_USED_ADD(page, dlen + cp->crc_size); 
+		}
+end_copy_pkt:
+		if (cp->crc_size) {
+			addr    = NULL;
+			crcaddr = skb->data + alloclen;
+		}
+		skb_put(skb, alloclen);
+	}
+
+	i = CAS_VAL(RX_COMP4_TCP_CSUM, words[3]);
+	if (cp->crc_size) {
+		/* checksum includes FCS. strip it out. */
+		i = csum_fold(csum_partial(crcaddr, cp->crc_size, i));
+		if (addr)
+			cas_page_unmap(addr);
+	}
+	skb->csum = ntohs(i ^ 0xffff);
+	skb->ip_summed = CHECKSUM_HW;
+	skb->protocol = eth_type_trans(skb, cp->dev);
+	return len;
+}
+
+
+/* we can handle up to 64 rx flows at a time. we do the same thing
+ * as nonreassm except that we batch up the buffers. 
+ * NOTE: we currently just treat each flow as a bunch of packets that
+ *       we pass up. a better way would be to coalesce the packets
+ *       into a jumbo packet. to do that, we need to do the following:
+ *       1) the first packet will have a clean split between header and
+ *          data. save both.
+ *       2) each time the next flow packet comes in, extend the
+ *          data length and merge the checksums.
+ *       3) on flow release, fix up the header.
+ *       4) make sure the higher layer doesn't care.
+ * because packets get coalesced, we shouldn't run into fragment count 
+ * issues.
+ */
+static inline void cas_rx_flow_pkt(struct cas *cp, const u64 *words,
+				   struct sk_buff *skb)
+{
+	int flowid = CAS_VAL(RX_COMP3_FLOWID, words[2]) & (N_RX_FLOWS - 1);
+	struct sk_buff_head *flow = &cp->rx_flows[flowid];
+	
+	/* this is protected at a higher layer, so no need to 
+	 * do any additional locking here. stick the buffer
+	 * at the end.
+	 */
+	__skb_insert(skb, flow->prev, (struct sk_buff *) flow, flow);
+	if (words[0] & RX_COMP1_RELEASE_FLOW) {
+		while ((skb = __skb_dequeue(flow))) {
+			cas_skb_release(skb);
+		}
+	}
+}
+
+/* put rx descriptor back on ring. if a buffer is in use by a higher
+ * layer, this will need to put in a replacement.
+ */
+static void cas_post_page(struct cas *cp, const int ring, const int index)
+{
+	cas_page_t *new;
+	int entry;
+
+	entry = cp->rx_old[ring];
+
+	new = cas_page_swap(cp, ring, index);
+	cp->init_rxds[ring][entry].buffer = cpu_to_le64(new->dma_addr);
+	cp->init_rxds[ring][entry].index  =
+		cpu_to_le64(CAS_BASE(RX_INDEX_NUM, index) | 
+			    CAS_BASE(RX_INDEX_RING, ring));
+
+	entry = RX_DESC_ENTRY(ring, entry + 1);
+	cp->rx_old[ring] = entry;
+	
+	if (entry % 4)
+		return;
+
+	if (ring == 0)
+		writel(entry, cp->regs + REG_RX_KICK);
+	else if ((N_RX_DESC_RINGS > 1) &&
+		 (cp->cas_flags & CAS_FLAG_REG_PLUS)) 
+		writel(entry, cp->regs + REG_PLUS_RX_KICK1);
+}
+
+
+/* only when things are bad */
+static int cas_post_rxds_ringN(struct cas *cp, int ring, int num)
+{
+	unsigned int entry, last, count, released;
+	int cluster;
+	cas_page_t **page = cp->rx_pages[ring];
+
+	entry = cp->rx_old[ring];
+
+	if (netif_msg_intr(cp))
+		printk(KERN_DEBUG "%s: rxd[%d] interrupt, done: %d\n",
+		       cp->dev->name, ring, entry);
+
+	cluster = -1;
+	count = entry & 0x3; 
+	last = RX_DESC_ENTRY(ring, num ? entry + num - 4: entry - 4);
+	released = 0;
+	while (entry != last) {
+		/* make a new buffer if it's still in use */
+		if (page_count(page[entry]->buffer) > 1) {
+			cas_page_t *new = cas_page_dequeue(cp);
+			if (!new) {
+				/* let the timer know that we need to 
+				 * do this again
+				 */
+				cp->cas_flags |= CAS_FLAG_RXD_POST(ring);
+				if (!timer_pending(&cp->link_timer))
+					mod_timer(&cp->link_timer, jiffies + 
+						  CAS_LINK_FAST_TIMEOUT);
+				cp->rx_old[ring]  = entry;
+				cp->rx_last[ring] = num ? num - released : 0;
+				return -ENOMEM;
+			}
+			spin_lock(&cp->rx_inuse_lock);
+			list_add(&page[entry]->list, &cp->rx_inuse_list);
+			spin_unlock(&cp->rx_inuse_lock);
+			cp->init_rxds[ring][entry].buffer = 
+				cpu_to_le64(new->dma_addr);
+			page[entry] = new;
+			
+		}
+
+		if (++count == 4) {
+			cluster = entry;
+			count = 0;
+		}
+		released++;
+		entry = RX_DESC_ENTRY(ring, entry + 1);
+	}
+	cp->rx_old[ring] = entry;
+
+	if (cluster < 0) 
+		return 0;
+
+	if (ring == 0)
+		writel(cluster, cp->regs + REG_RX_KICK);
+	else if ((N_RX_DESC_RINGS > 1) &&
+		 (cp->cas_flags & CAS_FLAG_REG_PLUS)) 
+		writel(cluster, cp->regs + REG_PLUS_RX_KICK1);
+	return 0;
+}
+
+
+/* process a completion ring. packets are set up in three basic ways:
+ * small packets: should be copied header + data in single buffer.
+ * large packets: header and data in a single buffer.
+ * split packets: header in a separate buffer from data. 
+ *                data may be in multiple pages. data may be > 256
+ *                bytes but in a single page. 
+ *
+ * NOTE: RX page posting is done in this routine as well. while there's
+ *       the capability of using multiple RX completion rings, it isn't
+ *       really worthwhile due to the fact that the page posting will
+ *       force serialization on the single descriptor ring. 
+ */
+static int cas_rx_ringN(struct cas *cp, int ring, int budget)
+{
+	struct cas_rx_comp *rxcs = cp->init_rxcs[ring];
+	int entry, drops;
+	int npackets = 0;
+
+	if (netif_msg_intr(cp))
+		printk(KERN_DEBUG "%s: rx[%d] interrupt, done: %d/%d\n",
+		       cp->dev->name, ring,
+		       readl(cp->regs + REG_RX_COMP_HEAD), 
+		       cp->rx_new[ring]);
+
+	entry = cp->rx_new[ring];
+	drops = 0;
+	while (1) {
+		struct cas_rx_comp *rxc = rxcs + entry;
+		struct sk_buff *skb;
+		int type, len;
+		u64 words[4];
+		int i, dring;
+
+		words[0] = le64_to_cpu(rxc->word1);
+		words[1] = le64_to_cpu(rxc->word2);
+		words[2] = le64_to_cpu(rxc->word3);
+		words[3] = le64_to_cpu(rxc->word4);
+
+		/* don't touch if still owned by hw */
+		type = CAS_VAL(RX_COMP1_TYPE, words[0]);
+		if (type == 0)
+			break;
+
+		/* hw hasn't cleared the zero bit yet */
+		if (words[3] & RX_COMP4_ZERO) {
+			break;
+		}
+
+		/* get info on the packet */
+		if (words[3] & (RX_COMP4_LEN_MISMATCH | RX_COMP4_BAD)) {
+			spin_lock(&cp->stat_lock[ring]);
+			cp->net_stats[ring].rx_errors++;
+			if (words[3] & RX_COMP4_LEN_MISMATCH)
+				cp->net_stats[ring].rx_length_errors++;
+			if (words[3] & RX_COMP4_BAD)
+				cp->net_stats[ring].rx_crc_errors++;
+			spin_unlock(&cp->stat_lock[ring]);
+
+			/* We'll just return it to Cassini. */
+		drop_it:
+			spin_lock(&cp->stat_lock[ring]);
+			++cp->net_stats[ring].rx_dropped;
+			spin_unlock(&cp->stat_lock[ring]);
+			goto next;
+		}
+
+		len = cas_rx_process_pkt(cp, rxc, entry, words, &skb);
+		if (len < 0) {
+			++drops;
+			goto drop_it;
+		}
+
+		/* see if it's a flow re-assembly or not. the driver
+		 * itself handles release back up.
+		 */
+		if (RX_DONT_BATCH || (type == 0x2)) {
+			/* non-reassm: these always get released */
+			cas_skb_release(skb); 
+		} else {
+			cas_rx_flow_pkt(cp, words, skb);
+		}
+
+		spin_lock(&cp->stat_lock[ring]);
+		cp->net_stats[ring].rx_packets++;
+		cp->net_stats[ring].rx_bytes += len;
+		spin_unlock(&cp->stat_lock[ring]);
+		cp->dev->last_rx = jiffies;
+
+	next:
+		npackets++;
+
+		/* should it be released? */
+		if (words[0] & RX_COMP1_RELEASE_HDR) {
+			i = CAS_VAL(RX_COMP2_HDR_INDEX, words[1]);
+			dring = CAS_VAL(RX_INDEX_RING, i);
+			i = CAS_VAL(RX_INDEX_NUM, i);
+			cas_post_page(cp, dring, i);
+		}
+		
+		if (words[0] & RX_COMP1_RELEASE_DATA) {
+			i = CAS_VAL(RX_COMP1_DATA_INDEX, words[0]);
+			dring = CAS_VAL(RX_INDEX_RING, i);
+			i = CAS_VAL(RX_INDEX_NUM, i);
+			cas_post_page(cp, dring, i);
+		}
+
+		if (words[0] & RX_COMP1_RELEASE_NEXT) {
+			i = CAS_VAL(RX_COMP2_NEXT_INDEX, words[1]);
+			dring = CAS_VAL(RX_INDEX_RING, i);
+			i = CAS_VAL(RX_INDEX_NUM, i);
+			cas_post_page(cp, dring, i);
+		}
+
+		/* skip to the next entry */
+		entry = RX_COMP_ENTRY(ring, entry + 1 + 
+				      CAS_VAL(RX_COMP1_SKIP, words[0]));
+#ifdef USE_NAPI
+		if (budget && (npackets >= budget))
+			break;
+#endif
+	}
+	cp->rx_new[ring] = entry;
+
+	if (drops)
+		printk(KERN_INFO "%s: Memory squeeze, deferring packet.\n",
+		       cp->dev->name);
+	return npackets;
+}
+
+
+/* put completion entries back on the ring */
+static void cas_post_rxcs_ringN(struct net_device *dev,
+				struct cas *cp, int ring)
+{
+	struct cas_rx_comp *rxc = cp->init_rxcs[ring];
+	int last, entry;
+
+	last = cp->rx_cur[ring];
+	entry = cp->rx_new[ring]; 
+	if (netif_msg_intr(cp))
+		printk(KERN_DEBUG "%s: rxc[%d] interrupt, done: %d/%d\n",
+		       dev->name, ring, readl(cp->regs + REG_RX_COMP_HEAD),
+		       entry);
+	
+	/* zero and re-mark descriptors */
+	while (last != entry) {
+		cas_rxc_init(rxc + last);
+		last = RX_COMP_ENTRY(ring, last + 1);
+	}
+	cp->rx_cur[ring] = last;
+
+	if (ring == 0)
+		writel(last, cp->regs + REG_RX_COMP_TAIL);
+	else if (cp->cas_flags & CAS_FLAG_REG_PLUS) 
+		writel(last, cp->regs + REG_PLUS_RX_COMPN_TAIL(ring));
+}
+
+
+
+/* cassini can use all four PCI interrupts for the completion ring. 
+ * rings 3 and 4 are identical
+ */
+#if defined(USE_PCI_INTC) || defined(USE_PCI_INTD)
+static inline void cas_handle_irqN(struct net_device *dev, 
+				   struct cas *cp, const u32 status,
+				   const int ring)
+{
+	if (status & (INTR_RX_COMP_FULL_ALT | INTR_RX_COMP_AF_ALT)) 
+		cas_post_rxcs_ringN(dev, cp, ring);
+}
+
+static irqreturn_t cas_interruptN(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct net_device *dev = dev_id;
+	struct cas *cp = netdev_priv(dev);
+	unsigned long flags;
+	int ring;
+	u32 status = readl(cp->regs + REG_PLUS_INTRN_STATUS(ring));
+
+	/* check for shared irq */
+	if (status == 0)
+		return IRQ_NONE;
+
+	ring = (irq == cp->pci_irq_INTC) ? 2 : 3;
+	spin_lock_irqsave(&cp->lock, flags);
+	if (status & INTR_RX_DONE_ALT) { /* handle rx separately */
+#ifdef USE_NAPI
+		cas_mask_intr(cp);
+		netif_rx_schedule(dev);
+#else
+		cas_rx_ringN(cp, ring, 0);
+#endif
+		status &= ~INTR_RX_DONE_ALT;
+	}
+
+	if (status)
+		cas_handle_irqN(dev, cp, status, ring);
+	spin_unlock_irqrestore(&cp->lock, flags);
+	return IRQ_HANDLED;
+}
+#endif
+
+#ifdef USE_PCI_INTB
+/* everything but rx packets */
+static inline void cas_handle_irq1(struct cas *cp, const u32 status)
+{
+	if (status & INTR_RX_BUF_UNAVAIL_1) {
+		/* Frame arrived, no free RX buffers available. 
+		 * NOTE: we can get this on a link transition. */
+		cas_post_rxds_ringN(cp, 1, 0);
+		spin_lock(&cp->stat_lock[1]);
+		cp->net_stats[1].rx_dropped++;
+		spin_unlock(&cp->stat_lock[1]);
+	}
+
+	if (status & INTR_RX_BUF_AE_1) 
+		cas_post_rxds_ringN(cp, 1, RX_DESC_RINGN_SIZE(1) - 
+				    RX_AE_FREEN_VAL(1));
+
+	if (status & (INTR_RX_COMP_AF | INTR_RX_COMP_FULL))
+		cas_post_rxcs_ringN(cp, 1);
+}
+
+/* ring 2 handles a few more events than 3 and 4 */
+static irqreturn_t cas_interrupt1(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct net_device *dev = dev_id;
+	struct cas *cp = netdev_priv(dev);
+	unsigned long flags;
+	u32 status = readl(cp->regs + REG_PLUS_INTRN_STATUS(1));
+
+	/* check for shared interrupt */
+	if (status == 0)
+		return IRQ_NONE;
+
+	spin_lock_irqsave(&cp->lock, flags);
+	if (status & INTR_RX_DONE_ALT) { /* handle rx separately */
+#ifdef USE_NAPI
+		cas_mask_intr(cp);
+		netif_rx_schedule(dev);
+#else
+		cas_rx_ringN(cp, 1, 0);
+#endif
+		status &= ~INTR_RX_DONE_ALT;
+	}
+	if (status)
+		cas_handle_irq1(cp, status);
+	spin_unlock_irqrestore(&cp->lock, flags);
+	return IRQ_HANDLED;
+}
+#endif
+
+static inline void cas_handle_irq(struct net_device *dev,
+				  struct cas *cp, const u32 status)
+{
+	/* housekeeping interrupts */
+	if (status & INTR_ERROR_MASK)
+		cas_abnormal_irq(dev, cp, status);
+
+	if (status & INTR_RX_BUF_UNAVAIL) {
+		/* Frame arrived, no free RX buffers available. 
+		 * NOTE: we can get this on a link transition.
+		 */
+		cas_post_rxds_ringN(cp, 0, 0);
+		spin_lock(&cp->stat_lock[0]);
+		cp->net_stats[0].rx_dropped++;
+		spin_unlock(&cp->stat_lock[0]);
+	} else if (status & INTR_RX_BUF_AE) {
+		cas_post_rxds_ringN(cp, 0, RX_DESC_RINGN_SIZE(0) -
+				    RX_AE_FREEN_VAL(0));
+	}
+
+	if (status & (INTR_RX_COMP_AF | INTR_RX_COMP_FULL))
+		cas_post_rxcs_ringN(dev, cp, 0);
+}
+
+static irqreturn_t cas_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct net_device *dev = dev_id;
+	struct cas *cp = netdev_priv(dev);
+	unsigned long flags;
+	u32 status = readl(cp->regs + REG_INTR_STATUS);
+
+	if (status == 0)
+		return IRQ_NONE;
+
+	spin_lock_irqsave(&cp->lock, flags);
+	if (status & (INTR_TX_ALL | INTR_TX_INTME)) {
+		cas_tx(dev, cp, status);
+		status &= ~(INTR_TX_ALL | INTR_TX_INTME);
+	}
+
+	if (status & INTR_RX_DONE) {
+#ifdef USE_NAPI
+		cas_mask_intr(cp);
+		netif_rx_schedule(dev);
+#else
+		cas_rx_ringN(cp, 0, 0);
+#endif
+		status &= ~INTR_RX_DONE;
+	}
+
+	if (status)
+		cas_handle_irq(dev, cp, status);
+	spin_unlock_irqrestore(&cp->lock, flags);
+	return IRQ_HANDLED;
+}
+
+
+#ifdef USE_NAPI
+static int cas_poll(struct net_device *dev, int *budget)
+{
+	struct cas *cp = netdev_priv(dev);
+	int i, enable_intr, todo, credits;
+	u32 status = readl(cp->regs + REG_INTR_STATUS);
+	unsigned long flags;
+
+	spin_lock_irqsave(&cp->lock, flags);
+	cas_tx(dev, cp, status);
+	spin_unlock_irqrestore(&cp->lock, flags);
+
+	/* NAPI rx packets. we spread the credits across all of the
+	 * rxc rings
+	 */
+	todo = min(*budget, dev->quota);
+
+	/* to make sure we're fair with the work we loop through each
+	 * ring N_RX_COMP_RING times with a request of 
+	 * todo / N_RX_COMP_RINGS
+	 */
+	enable_intr = 1;
+	credits = 0;
+	for (i = 0; i < N_RX_COMP_RINGS; i++) {
+		int j;
+		for (j = 0; j < N_RX_COMP_RINGS; j++) {
+			credits += cas_rx_ringN(cp, j, todo / N_RX_COMP_RINGS);
+			if (credits >= todo) {
+				enable_intr = 0;
+				goto rx_comp;
+			}
+		}
+	}
+
+rx_comp:
+	*budget    -= credits;
+	dev->quota -= credits;
+
+	/* final rx completion */
+	spin_lock_irqsave(&cp->lock, flags);
+	if (status)
+		cas_handle_irq(dev, cp, status);
+
+#ifdef USE_PCI_INTB
+	if (N_RX_COMP_RINGS > 1) {
+		status = readl(cp->regs + REG_PLUS_INTRN_STATUS(1));
+		if (status)
+			cas_handle_irq1(dev, cp, status);
+	}
+#endif
+
+#ifdef USE_PCI_INTC
+	if (N_RX_COMP_RINGS > 2) {
+		status = readl(cp->regs + REG_PLUS_INTRN_STATUS(2));
+		if (status)
+			cas_handle_irqN(dev, cp, status, 2);
+	}
+#endif
+
+#ifdef USE_PCI_INTD
+	if (N_RX_COMP_RINGS > 3) {
+		status = readl(cp->regs + REG_PLUS_INTRN_STATUS(3));
+		if (status)
+			cas_handle_irqN(dev, cp, status, 3);
+	}
+#endif
+	spin_unlock_irqrestore(&cp->lock, flags);
+	if (enable_intr) {
+		netif_rx_complete(dev);
+		cas_unmask_intr(cp);
+		return 0;
+	}
+	return 1;
+}
+#endif
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void cas_netpoll(struct net_device *dev)
+{
+	struct cas *cp = netdev_priv(dev);
+
+	cas_disable_irq(cp, 0);
+	cas_interrupt(cp->pdev->irq, dev, NULL);
+	cas_enable_irq(cp, 0);
+
+#ifdef USE_PCI_INTB
+	if (N_RX_COMP_RINGS > 1) {
+		/* cas_interrupt1(); */
+	}
+#endif
+#ifdef USE_PCI_INTC
+	if (N_RX_COMP_RINGS > 2) {
+		/* cas_interruptN(); */
+	}
+#endif
+#ifdef USE_PCI_INTD
+	if (N_RX_COMP_RINGS > 3) {
+		/* cas_interruptN(); */
+	}
+#endif
+}
+#endif
+
+static void cas_tx_timeout(struct net_device *dev)
+{
+	struct cas *cp = netdev_priv(dev);
+
+	printk(KERN_ERR "%s: transmit timed out, resetting\n", dev->name);
+	if (!cp->hw_running) {
+		printk("%s: hrm.. hw not running!\n", dev->name);
+		return;
+	}
+
+	printk(KERN_ERR "%s: MIF_STATE[%08x]\n",
+	       dev->name, readl(cp->regs + REG_MIF_STATE_MACHINE));
+
+	printk(KERN_ERR "%s: MAC_STATE[%08x]\n",
+	       dev->name, readl(cp->regs + REG_MAC_STATE_MACHINE));
+
+	printk(KERN_ERR "%s: TX_STATE[%08x:%08x:%08x] "
+	       "FIFO[%08x:%08x:%08x] SM1[%08x] SM2[%08x]\n",
+	       dev->name,
+	       readl(cp->regs + REG_TX_CFG),
+	       readl(cp->regs + REG_MAC_TX_STATUS),
+	       readl(cp->regs + REG_MAC_TX_CFG),
+	       readl(cp->regs + REG_TX_FIFO_PKT_CNT),
+	       readl(cp->regs + REG_TX_FIFO_WRITE_PTR),
+	       readl(cp->regs + REG_TX_FIFO_READ_PTR),
+	       readl(cp->regs + REG_TX_SM_1),
+	       readl(cp->regs + REG_TX_SM_2));
+
+	printk(KERN_ERR "%s: RX_STATE[%08x:%08x:%08x]\n",
+	       dev->name,
+	       readl(cp->regs + REG_RX_CFG),
+	       readl(cp->regs + REG_MAC_RX_STATUS),
+	       readl(cp->regs + REG_MAC_RX_CFG));
+
+	printk(KERN_ERR "%s: HP_STATE[%08x:%08x:%08x:%08x]\n",
+	       dev->name,
+	       readl(cp->regs + REG_HP_STATE_MACHINE),
+	       readl(cp->regs + REG_HP_STATUS0),
+	       readl(cp->regs + REG_HP_STATUS1),
+	       readl(cp->regs + REG_HP_STATUS2));
+
+#if 1
+	atomic_inc(&cp->reset_task_pending);
+	atomic_inc(&cp->reset_task_pending_all);
+	schedule_work(&cp->reset_task);
+#else
+	atomic_set(&cp->reset_task_pending, CAS_RESET_ALL);
+	schedule_work(&cp->reset_task);
+#endif
+}
+
+static inline int cas_intme(int ring, int entry)
+{
+	/* Algorithm: IRQ every 1/2 of descriptors. */
+	if (!(entry & ((TX_DESC_RINGN_SIZE(ring) >> 1) - 1)))
+		return 1;
+	return 0;
+}
+
+
+static void cas_write_txd(struct cas *cp, int ring, int entry,
+			  dma_addr_t mapping, int len, u64 ctrl, int last)
+{
+	struct cas_tx_desc *txd = cp->init_txds[ring] + entry;
+
+	ctrl |= CAS_BASE(TX_DESC_BUFLEN, len);
+	if (cas_intme(ring, entry))
+		ctrl |= TX_DESC_INTME;
+	if (last)
+		ctrl |= TX_DESC_EOF;
+	txd->control = cpu_to_le64(ctrl);
+	txd->buffer = cpu_to_le64(mapping);
+}
+
+static inline void *tx_tiny_buf(struct cas *cp, const int ring, 
+				const int entry)
+{
+	return cp->tx_tiny_bufs[ring] + TX_TINY_BUF_LEN*entry;
+}
+
+static inline dma_addr_t tx_tiny_map(struct cas *cp, const int ring, 
+				     const int entry, const int tentry)
+{
+	cp->tx_tiny_use[ring][tentry].nbufs++;
+	cp->tx_tiny_use[ring][entry].used = 1;
+	return cp->tx_tiny_dvma[ring] + TX_TINY_BUF_LEN*entry;
+}
+
+static inline int cas_xmit_tx_ringN(struct cas *cp, int ring, 
+				    struct sk_buff *skb)
+{
+	struct net_device *dev = cp->dev;
+	int entry, nr_frags, frag, tabort, tentry;
+	dma_addr_t mapping;
+	unsigned long flags;
+	u64 ctrl;
+	u32 len;
+
+	spin_lock_irqsave(&cp->tx_lock[ring], flags);
+
+	/* This is a hard error, log it. */
+	if (TX_BUFFS_AVAIL(cp, ring) <= 
+	    CAS_TABORT(cp)*(skb_shinfo(skb)->nr_frags + 1)) {
+		netif_stop_queue(dev);
+		spin_unlock_irqrestore(&cp->tx_lock[ring], flags);
+		printk(KERN_ERR PFX "%s: BUG! Tx Ring full when "
+		       "queue awake!\n", dev->name);
+		return 1;
+	}
+
+	ctrl = 0;
+	if (skb->ip_summed == CHECKSUM_HW) {
+		u64 csum_start_off, csum_stuff_off;
+
+		csum_start_off = (u64) (skb->h.raw - skb->data);
+		csum_stuff_off = (u64) ((skb->h.raw + skb->csum) - skb->data);
+
+		ctrl =  TX_DESC_CSUM_EN | 
+			CAS_BASE(TX_DESC_CSUM_START, csum_start_off) |
+			CAS_BASE(TX_DESC_CSUM_STUFF, csum_stuff_off);
+	}
+
+	entry = cp->tx_new[ring];
+	cp->tx_skbs[ring][entry] = skb;
+
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	len = skb_headlen(skb);
+	mapping = pci_map_page(cp->pdev, virt_to_page(skb->data),
+			       offset_in_page(skb->data), len,
+			       PCI_DMA_TODEVICE);
+
+	tentry = entry;
+	tabort = cas_calc_tabort(cp, (unsigned long) skb->data, len);
+	if (unlikely(tabort)) {
+		/* NOTE: len is always >  tabort */
+		cas_write_txd(cp, ring, entry, mapping, len - tabort, 
+			      ctrl | TX_DESC_SOF, 0);
+		entry = TX_DESC_NEXT(ring, entry);
+
+		memcpy(tx_tiny_buf(cp, ring, entry), skb->data + 
+		       len - tabort, tabort);
+		mapping = tx_tiny_map(cp, ring, entry, tentry);
+		cas_write_txd(cp, ring, entry, mapping, tabort, ctrl,
+			      (nr_frags == 0));
+	} else {
+		cas_write_txd(cp, ring, entry, mapping, len, ctrl | 
+			      TX_DESC_SOF, (nr_frags == 0));
+	}
+	entry = TX_DESC_NEXT(ring, entry);
+
+	for (frag = 0; frag < nr_frags; frag++) {
+		skb_frag_t *fragp = &skb_shinfo(skb)->frags[frag];
+
+		len = fragp->size;
+		mapping = pci_map_page(cp->pdev, fragp->page,
+				       fragp->page_offset, len,
+				       PCI_DMA_TODEVICE);
+
+		tabort = cas_calc_tabort(cp, fragp->page_offset, len);
+		if (unlikely(tabort)) {
+			void *addr;
+
+			/* NOTE: len is always > tabort */
+			cas_write_txd(cp, ring, entry, mapping, len - tabort,
+				      ctrl, 0);
+			entry = TX_DESC_NEXT(ring, entry);
+			
+			addr = cas_page_map(fragp->page);
+			memcpy(tx_tiny_buf(cp, ring, entry),
+			       addr + fragp->page_offset + len - tabort, 
+			       tabort);
+			cas_page_unmap(addr);
+			mapping = tx_tiny_map(cp, ring, entry, tentry);
+			len     = tabort;
+		}
+
+		cas_write_txd(cp, ring, entry, mapping, len, ctrl,
+			      (frag + 1 == nr_frags));
+		entry = TX_DESC_NEXT(ring, entry);
+	}
+
+	cp->tx_new[ring] = entry;
+	if (TX_BUFFS_AVAIL(cp, ring) <= CAS_TABORT(cp)*(MAX_SKB_FRAGS + 1))
+		netif_stop_queue(dev);
+
+	if (netif_msg_tx_queued(cp))
+		printk(KERN_DEBUG "%s: tx[%d] queued, slot %d, skblen %d, "
+		       "avail %d\n",
+		       dev->name, ring, entry, skb->len, 
+		       TX_BUFFS_AVAIL(cp, ring));
+	writel(entry, cp->regs + REG_TX_KICKN(ring));
+	spin_unlock_irqrestore(&cp->tx_lock[ring], flags);
+	return 0;
+} 
+
+static int cas_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct cas *cp = netdev_priv(dev);
+
+	/* this is only used as a load-balancing hint, so it doesn't
+	 * need to be SMP safe
+	 */
+	static int ring; 
+
+	skb = skb_padto(skb, cp->min_frame_size);
+	if (!skb)
+		return 0;
+
+	/* XXX: we need some higher-level QoS hooks to steer packets to
+	 *      individual queues.
+	 */
+	if (cas_xmit_tx_ringN(cp, ring++ & N_TX_RINGS_MASK, skb))
+		return 1;
+	dev->trans_start = jiffies;
+	return 0;
+}
+
+static void cas_init_tx_dma(struct cas *cp)
+{
+	u64 desc_dma = cp->block_dvma;
+	unsigned long off;
+	u32 val;
+	int i;
+
+	/* set up tx completion writeback registers. must be 8-byte aligned */
+#ifdef USE_TX_COMPWB
+	off = offsetof(struct cas_init_block, tx_compwb);
+	writel((desc_dma + off) >> 32, cp->regs + REG_TX_COMPWB_DB_HI);
+	writel((desc_dma + off) & 0xffffffff, cp->regs + REG_TX_COMPWB_DB_LOW);
+#endif
+
+	/* enable completion writebacks, enable paced mode,
+	 * disable read pipe, and disable pre-interrupt compwbs
+	 */
+	val =   TX_CFG_COMPWB_Q1 | TX_CFG_COMPWB_Q2 | 
+		TX_CFG_COMPWB_Q3 | TX_CFG_COMPWB_Q4 |
+		TX_CFG_DMA_RDPIPE_DIS | TX_CFG_PACED_MODE | 
+		TX_CFG_INTR_COMPWB_DIS;
+
+	/* write out tx ring info and tx desc bases */
+	for (i = 0; i < MAX_TX_RINGS; i++) {
+		off = (unsigned long) cp->init_txds[i] - 
+			(unsigned long) cp->init_block;
+
+		val |= CAS_TX_RINGN_BASE(i);
+		writel((desc_dma + off) >> 32, cp->regs + REG_TX_DBN_HI(i));
+		writel((desc_dma + off) & 0xffffffff, cp->regs +
+		       REG_TX_DBN_LOW(i));
+		/* don't zero out the kick register here as the system
+		 * will wedge
+		 */
+	}
+	writel(val, cp->regs + REG_TX_CFG);
+
+	/* program max burst sizes. these numbers should be different
+	 * if doing QoS.
+	 */
+#ifdef USE_QOS
+	writel(0x800, cp->regs + REG_TX_MAXBURST_0);
+	writel(0x1600, cp->regs + REG_TX_MAXBURST_1);
+	writel(0x2400, cp->regs + REG_TX_MAXBURST_2);
+	writel(0x4800, cp->regs + REG_TX_MAXBURST_3);
+#else
+	writel(0x800, cp->regs + REG_TX_MAXBURST_0);
+	writel(0x800, cp->regs + REG_TX_MAXBURST_1);
+	writel(0x800, cp->regs + REG_TX_MAXBURST_2);
+	writel(0x800, cp->regs + REG_TX_MAXBURST_3);
+#endif
+}
+
+/* Must be invoked under cp->lock. */
+static inline void cas_init_dma(struct cas *cp)
+{
+	cas_init_tx_dma(cp);
+	cas_init_rx_dma(cp);
+}
+
+/* Must be invoked under cp->lock. */
+static u32 cas_setup_multicast(struct cas *cp)
+{
+	u32 rxcfg = 0;
+	int i;
+	
+	if (cp->dev->flags & IFF_PROMISC) {
+		rxcfg |= MAC_RX_CFG_PROMISC_EN;
+
+	} else if (cp->dev->flags & IFF_ALLMULTI) {
+	    	for (i=0; i < 16; i++)
+			writel(0xFFFF, cp->regs + REG_MAC_HASH_TABLEN(i));
+		rxcfg |= MAC_RX_CFG_HASH_FILTER_EN;
+
+	} else {
+		u16 hash_table[16];
+		u32 crc;
+		struct dev_mc_list *dmi = cp->dev->mc_list;
+		int i;
+
+		/* use the alternate mac address registers for the
+		 * first 15 multicast addresses
+		 */
+		for (i = 1; i <= CAS_MC_EXACT_MATCH_SIZE; i++) {
+			if (!dmi) {
+				writel(0x0, cp->regs + REG_MAC_ADDRN(i*3 + 0));
+				writel(0x0, cp->regs + REG_MAC_ADDRN(i*3 + 1));
+				writel(0x0, cp->regs + REG_MAC_ADDRN(i*3 + 2));
+				continue;
+			}
+			writel((dmi->dmi_addr[4] << 8) | dmi->dmi_addr[5], 
+			       cp->regs + REG_MAC_ADDRN(i*3 + 0));
+			writel((dmi->dmi_addr[2] << 8) | dmi->dmi_addr[3], 
+			       cp->regs + REG_MAC_ADDRN(i*3 + 1));
+			writel((dmi->dmi_addr[0] << 8) | dmi->dmi_addr[1], 
+			       cp->regs + REG_MAC_ADDRN(i*3 + 2));
+			dmi = dmi->next;
+		}
+
+		/* use hw hash table for the next series of 
+		 * multicast addresses
+		 */
+		memset(hash_table, 0, sizeof(hash_table));
+		while (dmi) {
+ 			crc = ether_crc_le(ETH_ALEN, dmi->dmi_addr);
+			crc >>= 24;
+			hash_table[crc >> 4] |= 1 << (15 - (crc & 0xf));
+			dmi = dmi->next;
+		}
+	    	for (i=0; i < 16; i++)
+			writel(hash_table[i], cp->regs + 
+			       REG_MAC_HASH_TABLEN(i));
+		rxcfg |= MAC_RX_CFG_HASH_FILTER_EN;
+	}
+
+	return rxcfg;
+}
+
+/* must be invoked under cp->stat_lock[N_TX_RINGS] */
+static void cas_clear_mac_err(struct cas *cp)
+{
+	writel(0, cp->regs + REG_MAC_COLL_NORMAL);
+	writel(0, cp->regs + REG_MAC_COLL_FIRST);
+	writel(0, cp->regs + REG_MAC_COLL_EXCESS);
+	writel(0, cp->regs + REG_MAC_COLL_LATE);
+	writel(0, cp->regs + REG_MAC_TIMER_DEFER);
+	writel(0, cp->regs + REG_MAC_ATTEMPTS_PEAK);
+	writel(0, cp->regs + REG_MAC_RECV_FRAME);
+	writel(0, cp->regs + REG_MAC_LEN_ERR);
+	writel(0, cp->regs + REG_MAC_ALIGN_ERR);
+	writel(0, cp->regs + REG_MAC_FCS_ERR);
+	writel(0, cp->regs + REG_MAC_RX_CODE_ERR);
+}
+
+
+static void cas_mac_reset(struct cas *cp)
+{
+	int i;
+
+	/* do both TX and RX reset */
+	writel(0x1, cp->regs + REG_MAC_TX_RESET);
+	writel(0x1, cp->regs + REG_MAC_RX_RESET);
+
+	/* wait for TX */
+	i = STOP_TRIES;
+	while (i-- > 0) {
+		if (readl(cp->regs + REG_MAC_TX_RESET) == 0)
+			break;
+		udelay(10);
+	}
+
+	/* wait for RX */
+	i = STOP_TRIES;
+	while (i-- > 0) {
+		if (readl(cp->regs + REG_MAC_RX_RESET) == 0)
+			break;
+		udelay(10);
+	}
+
+	if (readl(cp->regs + REG_MAC_TX_RESET) |
+	    readl(cp->regs + REG_MAC_RX_RESET))
+		printk(KERN_ERR "%s: mac tx[%d]/rx[%d] reset failed [%08x]\n",
+		       cp->dev->name, readl(cp->regs + REG_MAC_TX_RESET),
+		       readl(cp->regs + REG_MAC_RX_RESET),
+		       readl(cp->regs + REG_MAC_STATE_MACHINE));
+}
+
+
+/* Must be invoked under cp->lock. */
+static void cas_init_mac(struct cas *cp)
+{
+	unsigned char *e = &cp->dev->dev_addr[0];
+	int i;
+#ifdef CONFIG_CASSINI_MULTICAST_REG_WRITE
+	u32 rxcfg;
+#endif
+	cas_mac_reset(cp);
+
+	/* setup core arbitration weight register */
+	writel(CAWR_RR_DIS, cp->regs + REG_CAWR);
+
+	/* XXX Use pci_dma_burst_advice() */
+#if !defined(CONFIG_SPARC64) && !defined(CONFIG_ALPHA)
+	/* set the infinite burst register for chips that don't have
+	 * pci issues.
+	 */
+	if ((cp->cas_flags & CAS_FLAG_TARGET_ABORT) == 0)
+		writel(INF_BURST_EN, cp->regs + REG_INF_BURST);
+#endif
+
+	writel(0x1BF0, cp->regs + REG_MAC_SEND_PAUSE);
+
+	writel(0x00, cp->regs + REG_MAC_IPG0);
+	writel(0x08, cp->regs + REG_MAC_IPG1);
+	writel(0x04, cp->regs + REG_MAC_IPG2);
+	
+	/* change later for 802.3z */
+	writel(0x40, cp->regs + REG_MAC_SLOT_TIME); 
+
+	/* min frame + FCS */
+	writel(ETH_ZLEN + 4, cp->regs + REG_MAC_FRAMESIZE_MIN);
+
+	/* Ethernet payload + header + FCS + optional VLAN tag. NOTE: we
+	 * specify the maximum frame size to prevent RX tag errors on 
+	 * oversized frames.
+	 */
+	writel(CAS_BASE(MAC_FRAMESIZE_MAX_BURST, 0x2000) |
+	       CAS_BASE(MAC_FRAMESIZE_MAX_FRAME, 
+			(CAS_MAX_MTU + ETH_HLEN + 4 + 4)), 
+	       cp->regs + REG_MAC_FRAMESIZE_MAX);
+
+	/* NOTE: crc_size is used as a surrogate for half-duplex. 
+	 * workaround saturn half-duplex issue by increasing preamble
+	 * size to 65 bytes.
+	 */
+	if ((cp->cas_flags & CAS_FLAG_SATURN) && cp->crc_size)
+		writel(0x41, cp->regs + REG_MAC_PA_SIZE);
+	else
+		writel(0x07, cp->regs + REG_MAC_PA_SIZE);
+	writel(0x04, cp->regs + REG_MAC_JAM_SIZE);
+	writel(0x10, cp->regs + REG_MAC_ATTEMPT_LIMIT);
+	writel(0x8808, cp->regs + REG_MAC_CTRL_TYPE);
+
+	writel((e[5] | (e[4] << 8)) & 0x3ff, cp->regs + REG_MAC_RANDOM_SEED);
+
+	writel(0, cp->regs + REG_MAC_ADDR_FILTER0);
+	writel(0, cp->regs + REG_MAC_ADDR_FILTER1);
+	writel(0, cp->regs + REG_MAC_ADDR_FILTER2);
+	writel(0, cp->regs + REG_MAC_ADDR_FILTER2_1_MASK);
+	writel(0, cp->regs + REG_MAC_ADDR_FILTER0_MASK);
+
+	/* setup mac address in perfect filter array */
+	for (i = 0; i < 45; i++)
+		writel(0x0, cp->regs + REG_MAC_ADDRN(i));
+
+	writel((e[4] << 8) | e[5], cp->regs + REG_MAC_ADDRN(0));
+	writel((e[2] << 8) | e[3], cp->regs + REG_MAC_ADDRN(1));
+	writel((e[0] << 8) | e[1], cp->regs + REG_MAC_ADDRN(2));
+
+	writel(0x0001, cp->regs + REG_MAC_ADDRN(42));
+	writel(0xc200, cp->regs + REG_MAC_ADDRN(43));
+	writel(0x0180, cp->regs + REG_MAC_ADDRN(44));
+
+#ifndef CONFIG_CASSINI_MULTICAST_REG_WRITE
+	cp->mac_rx_cfg = cas_setup_multicast(cp);
+#else
+	/* WTZ: Do what Adrian did in cas_set_multicast. Doing
+	 * a writel does not seem to be necessary because Cassini
+	 * seems to preserve the configuration when we do the reset.
+	 * If the chip is in trouble, though, it is not clear if we
+	 * can really count on this behavior. cas_set_multicast uses
+	 * spin_lock_irqsave, but we are called only in cas_init_hw and
+	 * cas_init_hw is protected by cas_lock_all, which calls
+	 * spin_lock_irq (so it doesn't need to save the flags, and
+	 * we should be OK for the writel, as that is the only 
+	 * difference).
+	 */
+	cp->mac_rx_cfg = rxcfg = cas_setup_multicast(cp);
+	writel(rxcfg, cp->regs + REG_MAC_RX_CFG);
+#endif
+	spin_lock(&cp->stat_lock[N_TX_RINGS]);
+	cas_clear_mac_err(cp);
+	spin_unlock(&cp->stat_lock[N_TX_RINGS]);
+
+	/* Setup MAC interrupts.  We want to get all of the interesting
+	 * counter expiration events, but we do not want to hear about
+	 * normal rx/tx as the DMA engine tells us that.
+	 */
+	writel(MAC_TX_FRAME_XMIT, cp->regs + REG_MAC_TX_MASK);
+	writel(MAC_RX_FRAME_RECV, cp->regs + REG_MAC_RX_MASK);
+
+	/* Don't enable even the PAUSE interrupts for now, we
+	 * make no use of those events other than to record them.
+	 */
+	writel(0xffffffff, cp->regs + REG_MAC_CTRL_MASK);
+}
+
+/* Must be invoked under cp->lock. */
+static void cas_init_pause_thresholds(struct cas *cp)
+{
+	/* Calculate pause thresholds.  Setting the OFF threshold to the
+	 * full RX fifo size effectively disables PAUSE generation
+	 */
+	if (cp->rx_fifo_size <= (2 * 1024)) {
+		cp->rx_pause_off = cp->rx_pause_on = cp->rx_fifo_size;
+	} else {
+		int max_frame = (cp->dev->mtu + ETH_HLEN + 4 + 4 + 64) & ~63;
+		if (max_frame * 3 > cp->rx_fifo_size) {
+			cp->rx_pause_off = 7104;
+			cp->rx_pause_on  = 960;
+		} else {
+			int off = (cp->rx_fifo_size - (max_frame * 2));
+			int on = off - max_frame;
+			cp->rx_pause_off = off;
+			cp->rx_pause_on = on;
+		}
+	}
+}
+
+static int cas_vpd_match(const void __iomem *p, const char *str)
+{
+	int len = strlen(str) + 1;
+	int i;
+	
+	for (i = 0; i < len; i++) {
+		if (readb(p + i) != str[i])
+			return 0;
+	}
+	return 1;
+}
+
+
+/* get the mac address by reading the vpd information in the rom.
+ * also get the phy type and determine if there's an entropy generator.
+ * NOTE: this is a bit convoluted for the following reasons:
+ *  1) vpd info has order-dependent mac addresses for multinic cards
+ *  2) the only way to determine the nic order is to use the slot
+ *     number.
+ *  3) fiber cards don't have bridges, so their slot numbers don't
+ *     mean anything.
+ *  4) we don't actually know we have a fiber card until after 
+ *     the mac addresses are parsed.
+ */
+static int cas_get_vpd_info(struct cas *cp, unsigned char *dev_addr,
+			    const int offset)
+{
+	void __iomem *p = cp->regs + REG_EXPANSION_ROM_RUN_START;
+	void __iomem *base, *kstart;
+	int i, len;
+	int found = 0;
+#define VPD_FOUND_MAC        0x01
+#define VPD_FOUND_PHY        0x02
+
+	int phy_type = CAS_PHY_MII_MDIO0; /* default phy type */
+	int mac_off  = 0;
+
+	/* give us access to the PROM */
+	writel(BIM_LOCAL_DEV_PROM | BIM_LOCAL_DEV_PAD,
+	       cp->regs + REG_BIM_LOCAL_DEV_EN);
+
+	/* check for an expansion rom */
+	if (readb(p) != 0x55 || readb(p + 1) != 0xaa)
+		goto use_random_mac_addr;
+
+	/* search for beginning of vpd */
+	base = 0;
+	for (i = 2; i < EXPANSION_ROM_SIZE; i++) {
+		/* check for PCIR */
+		if ((readb(p + i + 0) == 0x50) &&
+		    (readb(p + i + 1) == 0x43) &&
+		    (readb(p + i + 2) == 0x49) &&
+		    (readb(p + i + 3) == 0x52)) {
+			base = p + (readb(p + i + 8) | 
+				    (readb(p + i + 9) << 8));
+			break;
+		}		
+	}
+
+	if (!base || (readb(base) != 0x82))
+		goto use_random_mac_addr;
+	
+	i = (readb(base + 1) | (readb(base + 2) << 8)) + 3;
+	while (i < EXPANSION_ROM_SIZE) {
+		if (readb(base + i) != 0x90) /* no vpd found */
+			goto use_random_mac_addr;
+
+		/* found a vpd field */
+		len = readb(base + i + 1) | (readb(base + i + 2) << 8);
+
+		/* extract keywords */
+		kstart = base + i + 3;
+		p = kstart;
+		while ((p - kstart) < len) {
+			int klen = readb(p + 2);
+			int j;
+			char type;
+
+			p += 3;
+			
+			/* look for the following things:
+			 * -- correct length == 29
+			 * 3 (type) + 2 (size) + 
+			 * 18 (strlen("local-mac-address") + 1) + 
+			 * 6 (mac addr) 
+			 * -- VPD Instance 'I'
+			 * -- VPD Type Bytes 'B'
+			 * -- VPD data length == 6
+			 * -- property string == local-mac-address
+			 * 
+			 * -- correct length == 24
+			 * 3 (type) + 2 (size) + 
+			 * 12 (strlen("entropy-dev") + 1) + 
+			 * 7 (strlen("vms110") + 1)
+			 * -- VPD Instance 'I'
+			 * -- VPD Type String 'B'
+			 * -- VPD data length == 7
+			 * -- property string == entropy-dev
+			 *
+			 * -- correct length == 18
+			 * 3 (type) + 2 (size) + 
+			 * 9 (strlen("phy-type") + 1) + 
+			 * 4 (strlen("pcs") + 1)
+			 * -- VPD Instance 'I'
+			 * -- VPD Type String 'S'
+			 * -- VPD data length == 4
+			 * -- property string == phy-type
+			 * 
+			 * -- correct length == 23
+			 * 3 (type) + 2 (size) + 
+			 * 14 (strlen("phy-interface") + 1) + 
+			 * 4 (strlen("pcs") + 1)
+			 * -- VPD Instance 'I'
+			 * -- VPD Type String 'S'
+			 * -- VPD data length == 4
+			 * -- property string == phy-interface
+			 */
+			if (readb(p) != 'I')
+				goto next;
+
+			/* finally, check string and length */
+			type = readb(p + 3);
+			if (type == 'B') {
+				if ((klen == 29) && readb(p + 4) == 6 &&
+				    cas_vpd_match(p + 5, 
+						  "local-mac-address")) {
+					if (mac_off++ > offset) 
+						goto next;
+
+					/* set mac address */
+					for (j = 0; j < 6; j++) 
+						dev_addr[j] = 
+							readb(p + 23 + j);
+					goto found_mac;
+				}
+			}
+
+			if (type != 'S')
+				goto next;
+
+#ifdef USE_ENTROPY_DEV
+			if ((klen == 24) && 
+			    cas_vpd_match(p + 5, "entropy-dev") &&
+			    cas_vpd_match(p + 17, "vms110")) {
+				cp->cas_flags |= CAS_FLAG_ENTROPY_DEV;
+				goto next;
+			}
+#endif
+
+			if (found & VPD_FOUND_PHY)
+				goto next;
+
+			if ((klen == 18) && readb(p + 4) == 4 &&
+			    cas_vpd_match(p + 5, "phy-type")) {
+				if (cas_vpd_match(p + 14, "pcs")) {
+					phy_type = CAS_PHY_SERDES;
+					goto found_phy;
+				}
+			}
+			
+			if ((klen == 23) && readb(p + 4) == 4 &&
+			    cas_vpd_match(p + 5, "phy-interface")) {
+				if (cas_vpd_match(p + 19, "pcs")) {
+					phy_type = CAS_PHY_SERDES;
+					goto found_phy;
+				}
+			}
+found_mac:
+			found |= VPD_FOUND_MAC;
+			goto next;
+
+found_phy:
+			found |= VPD_FOUND_PHY;
+
+next:
+			p += klen;
+		}
+		i += len + 3;
+	}
+
+use_random_mac_addr:
+	if (found & VPD_FOUND_MAC)
+		goto done;
+
+	/* Sun MAC prefix then 3 random bytes. */
+	printk(PFX "MAC address not found in ROM VPD\n");
+	dev_addr[0] = 0x08;
+	dev_addr[1] = 0x00;
+	dev_addr[2] = 0x20;
+	get_random_bytes(dev_addr + 3, 3);
+
+done:
+	writel(0, cp->regs + REG_BIM_LOCAL_DEV_EN);
+	return phy_type;
+}
+
+/* check pci invariants */
+static void cas_check_pci_invariants(struct cas *cp)
+{
+	struct pci_dev *pdev = cp->pdev;
+	u8 rev;
+
+	cp->cas_flags = 0;
+	pci_read_config_byte(pdev, PCI_REVISION_ID, &rev);
+	if ((pdev->vendor == PCI_VENDOR_ID_SUN) &&
+	    (pdev->device == PCI_DEVICE_ID_SUN_CASSINI)) {
+		if (rev >= CAS_ID_REVPLUS)
+			cp->cas_flags |= CAS_FLAG_REG_PLUS;
+		if (rev < CAS_ID_REVPLUS02u)
+			cp->cas_flags |= CAS_FLAG_TARGET_ABORT;
+
+		/* Original Cassini supports HW CSUM, but it's not
+		 * enabled by default as it can trigger TX hangs.
+		 */
+		if (rev < CAS_ID_REV2)
+			cp->cas_flags |= CAS_FLAG_NO_HW_CSUM;
+	} else {
+		/* Only sun has original cassini chips.  */
+		cp->cas_flags |= CAS_FLAG_REG_PLUS;
+
+		/* We use a flag because the same phy might be externally
+		 * connected.
+		 */
+		if ((pdev->vendor == PCI_VENDOR_ID_NS) &&
+		    (pdev->device == PCI_DEVICE_ID_NS_SATURN))
+			cp->cas_flags |= CAS_FLAG_SATURN;
+	}
+}
+
+
+static int cas_check_invariants(struct cas *cp)
+{
+	struct pci_dev *pdev = cp->pdev;
+	u32 cfg;
+	int i;
+
+	/* get page size for rx buffers. */
+	cp->page_order = 0; 
+#ifdef USE_PAGE_ORDER
+	if (PAGE_SHIFT < CAS_JUMBO_PAGE_SHIFT) {
+		/* see if we can allocate larger pages */
+		struct page *page = alloc_pages(GFP_ATOMIC, 
+						CAS_JUMBO_PAGE_SHIFT - 
+						PAGE_SHIFT);
+		if (page) {
+			__free_pages(page, CAS_JUMBO_PAGE_SHIFT - PAGE_SHIFT);
+			cp->page_order = CAS_JUMBO_PAGE_SHIFT - PAGE_SHIFT;
+		} else {
+			printk(PFX "MTU limited to %d bytes\n", CAS_MAX_MTU);
+		}
+	}
+#endif
+	cp->page_size = (PAGE_SIZE << cp->page_order);
+
+	/* Fetch the FIFO configurations. */
+	cp->tx_fifo_size = readl(cp->regs + REG_TX_FIFO_SIZE) * 64;
+	cp->rx_fifo_size = RX_FIFO_SIZE;
+
+	/* finish phy determination. MDIO1 takes precedence over MDIO0 if 
+	 * they're both connected.
+	 */
+	cp->phy_type = cas_get_vpd_info(cp, cp->dev->dev_addr, 
+					PCI_SLOT(pdev->devfn));
+	if (cp->phy_type & CAS_PHY_SERDES) {
+		cp->cas_flags |= CAS_FLAG_1000MB_CAP;
+		return 0; /* no more checking needed */
+	} 
+
+	/* MII */
+	cfg = readl(cp->regs + REG_MIF_CFG);
+	if (cfg & MIF_CFG_MDIO_1) {
+		cp->phy_type = CAS_PHY_MII_MDIO1;
+	} else if (cfg & MIF_CFG_MDIO_0) {
+		cp->phy_type = CAS_PHY_MII_MDIO0;
+	}
+
+	cas_mif_poll(cp, 0);
+	writel(PCS_DATAPATH_MODE_MII, cp->regs + REG_PCS_DATAPATH_MODE);
+
+	for (i = 0; i < 32; i++) {
+		u32 phy_id;
+		int j;
+
+		for (j = 0; j < 3; j++) {
+			cp->phy_addr = i;
+			phy_id = cas_phy_read(cp, MII_PHYSID1) << 16;
+			phy_id |= cas_phy_read(cp, MII_PHYSID2);
+			if (phy_id && (phy_id != 0xFFFFFFFF)) {
+				cp->phy_id = phy_id;
+				goto done;
+			}
+		}
+	}
+	printk(KERN_ERR PFX "MII phy did not respond [%08x]\n",
+	       readl(cp->regs + REG_MIF_STATE_MACHINE));
+	return -1;
+
+done:
+	/* see if we can do gigabit */
+	cfg = cas_phy_read(cp, MII_BMSR);
+	if ((cfg & CAS_BMSR_1000_EXTEND) && 
+	    cas_phy_read(cp, CAS_MII_1000_EXTEND))
+		cp->cas_flags |= CAS_FLAG_1000MB_CAP;
+	return 0;
+}
+
+/* Must be invoked under cp->lock. */
+static inline void cas_start_dma(struct cas *cp)
+{
+	int i;
+	u32 val;
+	int txfailed = 0;
+	
+	/* enable dma */
+	val = readl(cp->regs + REG_TX_CFG) | TX_CFG_DMA_EN;
+	writel(val, cp->regs + REG_TX_CFG);
+	val = readl(cp->regs + REG_RX_CFG) | RX_CFG_DMA_EN;
+	writel(val, cp->regs + REG_RX_CFG);
+
+	/* enable the mac */
+	val = readl(cp->regs + REG_MAC_TX_CFG) | MAC_TX_CFG_EN;
+	writel(val, cp->regs + REG_MAC_TX_CFG);
+	val = readl(cp->regs + REG_MAC_RX_CFG) | MAC_RX_CFG_EN;
+	writel(val, cp->regs + REG_MAC_RX_CFG);
+
+	i = STOP_TRIES;
+	while (i-- > 0) {
+		val = readl(cp->regs + REG_MAC_TX_CFG);
+		if ((val & MAC_TX_CFG_EN))
+			break;
+		udelay(10);
+	}
+	if (i < 0) txfailed = 1;
+	i = STOP_TRIES;
+	while (i-- > 0) {
+		val = readl(cp->regs + REG_MAC_RX_CFG);
+		if ((val & MAC_RX_CFG_EN)) {
+			if (txfailed) {
+			  printk(KERN_ERR 
+				 "%s: enabling mac failed [tx:%08x:%08x].\n", 
+				 cp->dev->name,
+				 readl(cp->regs + REG_MIF_STATE_MACHINE),
+				 readl(cp->regs + REG_MAC_STATE_MACHINE));
+			}
+			goto enable_rx_done;
+		}
+		udelay(10);
+	}
+	printk(KERN_ERR "%s: enabling mac failed [%s:%08x:%08x].\n", 
+	       cp->dev->name,
+	       (txfailed? "tx,rx":"rx"),
+	       readl(cp->regs + REG_MIF_STATE_MACHINE),
+	       readl(cp->regs + REG_MAC_STATE_MACHINE));
+
+enable_rx_done:
+	cas_unmask_intr(cp); /* enable interrupts */
+	writel(RX_DESC_RINGN_SIZE(0) - 4, cp->regs + REG_RX_KICK);
+	writel(0, cp->regs + REG_RX_COMP_TAIL);
+
+	if (cp->cas_flags & CAS_FLAG_REG_PLUS) {
+		if (N_RX_DESC_RINGS > 1) 
+			writel(RX_DESC_RINGN_SIZE(1) - 4, 
+			       cp->regs + REG_PLUS_RX_KICK1);
+
+		for (i = 1; i < N_RX_COMP_RINGS; i++) 
+			writel(0, cp->regs + REG_PLUS_RX_COMPN_TAIL(i));
+	}
+}
+
+/* Must be invoked under cp->lock. */
+static void cas_read_pcs_link_mode(struct cas *cp, int *fd, int *spd,
+				   int *pause)
+{
+	u32 val = readl(cp->regs + REG_PCS_MII_LPA);
+	*fd     = (val & PCS_MII_LPA_FD) ? 1 : 0;
+	*pause  = (val & PCS_MII_LPA_SYM_PAUSE) ? 0x01 : 0x00;
+	if (val & PCS_MII_LPA_ASYM_PAUSE)
+		*pause |= 0x10;
+	*spd = 1000;
+}
+
+/* Must be invoked under cp->lock. */
+static void cas_read_mii_link_mode(struct cas *cp, int *fd, int *spd,
+				   int *pause)
+{
+	u32 val;
+
+	*fd = 0;
+	*spd = 10;
+	*pause = 0;
+	
+	/* use GMII registers */
+	val = cas_phy_read(cp, MII_LPA);
+	if (val & CAS_LPA_PAUSE)
+		*pause = 0x01;
+
+	if (val & CAS_LPA_ASYM_PAUSE)
+		*pause |= 0x10;
+
+	if (val & LPA_DUPLEX)
+		*fd = 1;
+	if (val & LPA_100)
+		*spd = 100;
+
+	if (cp->cas_flags & CAS_FLAG_1000MB_CAP) {
+		val = cas_phy_read(cp, CAS_MII_1000_STATUS);
+		if (val & (CAS_LPA_1000FULL | CAS_LPA_1000HALF))
+			*spd = 1000;
+		if (val & CAS_LPA_1000FULL)
+			*fd = 1;
+	}
+}
+
+/* A link-up condition has occurred, initialize and enable the
+ * rest of the chip.
+ *
+ * Must be invoked under cp->lock.
+ */
+static void cas_set_link_modes(struct cas *cp)
+{
+	u32 val;
+	int full_duplex, speed, pause;
+
+	full_duplex = 0;
+	speed = 10;
+	pause = 0;
+
+	if (CAS_PHY_MII(cp->phy_type)) {
+		cas_mif_poll(cp, 0);
+		val = cas_phy_read(cp, MII_BMCR);
+		if (val & BMCR_ANENABLE) {
+			cas_read_mii_link_mode(cp, &full_duplex, &speed, 
+					       &pause);
+		} else {
+			if (val & BMCR_FULLDPLX)
+				full_duplex = 1;
+
+			if (val & BMCR_SPEED100)
+				speed = 100;
+			else if (val & CAS_BMCR_SPEED1000)
+				speed = (cp->cas_flags & CAS_FLAG_1000MB_CAP) ?
+					1000 : 100;
+		}
+		cas_mif_poll(cp, 1);
+
+	} else {
+		val = readl(cp->regs + REG_PCS_MII_CTRL);
+		cas_read_pcs_link_mode(cp, &full_duplex, &speed, &pause);
+		if ((val & PCS_MII_AUTONEG_EN) == 0) {
+			if (val & PCS_MII_CTRL_DUPLEX)
+				full_duplex = 1;
+		}
+	}
+
+	if (netif_msg_link(cp))
+		printk(KERN_INFO "%s: Link up at %d Mbps, %s-duplex.\n",
+		       cp->dev->name, speed, (full_duplex ? "full" : "half"));
+
+	val = MAC_XIF_TX_MII_OUTPUT_EN | MAC_XIF_LINK_LED;
+	if (CAS_PHY_MII(cp->phy_type)) {
+		val |= MAC_XIF_MII_BUFFER_OUTPUT_EN;
+		if (!full_duplex)
+			val |= MAC_XIF_DISABLE_ECHO;
+	}
+	if (full_duplex) 
+		val |= MAC_XIF_FDPLX_LED;
+	if (speed == 1000)
+		val |= MAC_XIF_GMII_MODE;
+	writel(val, cp->regs + REG_MAC_XIF_CFG);
+
+	/* deal with carrier and collision detect. */
+	val = MAC_TX_CFG_IPG_EN;
+	if (full_duplex) {
+		val |= MAC_TX_CFG_IGNORE_CARRIER;
+		val |= MAC_TX_CFG_IGNORE_COLL;
+	} else {
+#ifndef USE_CSMA_CD_PROTO
+		val |= MAC_TX_CFG_NEVER_GIVE_UP_EN;
+		val |= MAC_TX_CFG_NEVER_GIVE_UP_LIM;
+#endif
+	}
+	/* val now set up for REG_MAC_TX_CFG */
+
+	/* If gigabit and half-duplex, enable carrier extension
+	 * mode.  increase slot time to 512 bytes as well. 
+	 * else, disable it and make sure slot time is 64 bytes.
+	 * also activate checksum bug workaround
+	 */
+	if ((speed == 1000) && !full_duplex) {
+		writel(val | MAC_TX_CFG_CARRIER_EXTEND, 
+		       cp->regs + REG_MAC_TX_CFG);
+
+		val = readl(cp->regs + REG_MAC_RX_CFG);
+		val &= ~MAC_RX_CFG_STRIP_FCS; /* checksum workaround */
+		writel(val | MAC_RX_CFG_CARRIER_EXTEND, 
+		       cp->regs + REG_MAC_RX_CFG);
+
+		writel(0x200, cp->regs + REG_MAC_SLOT_TIME);
+
+		cp->crc_size = 4;
+		/* minimum size gigabit frame at half duplex */
+		cp->min_frame_size = CAS_1000MB_MIN_FRAME;
+
+	} else {
+		writel(val, cp->regs + REG_MAC_TX_CFG);
+
+		/* checksum bug workaround. don't strip FCS when in 
+		 * half-duplex mode
+		 */
+		val = readl(cp->regs + REG_MAC_RX_CFG);
+		if (full_duplex) {
+			val |= MAC_RX_CFG_STRIP_FCS;
+			cp->crc_size = 0;
+			cp->min_frame_size = CAS_MIN_MTU;
+		} else {
+			val &= ~MAC_RX_CFG_STRIP_FCS;
+			cp->crc_size = 4;
+			cp->min_frame_size = CAS_MIN_FRAME;
+		}
+		writel(val & ~MAC_RX_CFG_CARRIER_EXTEND, 
+		       cp->regs + REG_MAC_RX_CFG);
+		writel(0x40, cp->regs + REG_MAC_SLOT_TIME);
+	}
+
+	if (netif_msg_link(cp)) {
+		if (pause & 0x01) {
+			printk(KERN_INFO "%s: Pause is enabled "
+			       "(rxfifo: %d off: %d on: %d)\n",
+			       cp->dev->name,
+			       cp->rx_fifo_size,
+			       cp->rx_pause_off,
+			       cp->rx_pause_on);
+		} else if (pause & 0x10) {
+			printk(KERN_INFO "%s: TX pause enabled\n",
+			       cp->dev->name);
+		} else {
+			printk(KERN_INFO "%s: Pause is disabled\n",
+			       cp->dev->name);
+		}
+	}
+
+	val = readl(cp->regs + REG_MAC_CTRL_CFG);
+	val &= ~(MAC_CTRL_CFG_SEND_PAUSE_EN | MAC_CTRL_CFG_RECV_PAUSE_EN);
+	if (pause) { /* symmetric or asymmetric pause */
+		val |= MAC_CTRL_CFG_SEND_PAUSE_EN;
+		if (pause & 0x01) { /* symmetric pause */
+			val |= MAC_CTRL_CFG_RECV_PAUSE_EN;
+		} 
+	}
+	writel(val, cp->regs + REG_MAC_CTRL_CFG);
+	cas_start_dma(cp);
+}
+
+/* Must be invoked under cp->lock. */
+static void cas_init_hw(struct cas *cp, int restart_link)
+{
+	if (restart_link)
+		cas_phy_init(cp);
+
+	cas_init_pause_thresholds(cp);
+	cas_init_mac(cp);
+	cas_init_dma(cp);
+
+	if (restart_link) {
+		/* Default aneg parameters */
+		cp->timer_ticks = 0;
+		cas_begin_auto_negotiation(cp, NULL);
+	} else if (cp->lstate == link_up) {
+		cas_set_link_modes(cp);
+		netif_carrier_on(cp->dev);
+	}
+}
+
+/* Must be invoked under cp->lock. on earlier cassini boards,
+ * SOFT_0 is tied to PCI reset. we use this to force a pci reset,
+ * let it settle out, and then restore pci state.
+ */
+static void cas_hard_reset(struct cas *cp)
+{
+	writel(BIM_LOCAL_DEV_SOFT_0, cp->regs + REG_BIM_LOCAL_DEV_EN); 
+	udelay(20);
+	pci_restore_state(cp->pdev);
+}
+
+
+static void cas_global_reset(struct cas *cp, int blkflag)
+{
+	int limit;
+
+	/* issue a global reset. don't use RSTOUT. */
+	if (blkflag && !CAS_PHY_MII(cp->phy_type)) {
+		/* For PCS, when the blkflag is set, we should set the
+		 * SW_REST_BLOCK_PCS_SLINK bit to prevent the results of
+		 * the last autonegotiation from being cleared.  We'll
+		 * need some special handling if the chip is set into a
+		 * loopback mode.
+		 */
+		writel((SW_RESET_TX | SW_RESET_RX | SW_RESET_BLOCK_PCS_SLINK), 
+		       cp->regs + REG_SW_RESET);
+	} else {
+		writel(SW_RESET_TX | SW_RESET_RX, cp->regs + REG_SW_RESET);
+	}
+
+	/* need to wait at least 3ms before polling register */
+	mdelay(3);
+
+	limit = STOP_TRIES;
+	while (limit-- > 0) {
+		u32 val = readl(cp->regs + REG_SW_RESET);
+		if ((val & (SW_RESET_TX | SW_RESET_RX)) == 0)
+			goto done;
+		udelay(10);
+	}
+	printk(KERN_ERR "%s: sw reset failed.\n", cp->dev->name);
+
+done:
+	/* enable various BIM interrupts */
+	writel(BIM_CFG_DPAR_INTR_ENABLE | BIM_CFG_RMA_INTR_ENABLE | 
+	       BIM_CFG_RTA_INTR_ENABLE, cp->regs + REG_BIM_CFG);
+
+	/* clear out pci error status mask for handled errors.
+	 * we don't deal with DMA counter overflows as they happen
+	 * all the time.
+	 */
+	writel(0xFFFFFFFFU & ~(PCI_ERR_BADACK | PCI_ERR_DTRTO | 
+			       PCI_ERR_OTHER | PCI_ERR_BIM_DMA_WRITE | 
+			       PCI_ERR_BIM_DMA_READ), cp->regs + 
+	       REG_PCI_ERR_STATUS_MASK);
+
+	/* set up for MII by default to address mac rx reset timeout
+	 * issue
+	 */
+	writel(PCS_DATAPATH_MODE_MII, cp->regs + REG_PCS_DATAPATH_MODE);
+}
+
+static void cas_reset(struct cas *cp, int blkflag)
+{
+	u32 val;
+
+	cas_mask_intr(cp);
+	cas_global_reset(cp, blkflag);
+	cas_mac_reset(cp);
+	cas_entropy_reset(cp);
+
+	/* disable dma engines. */
+	val = readl(cp->regs + REG_TX_CFG);
+	val &= ~TX_CFG_DMA_EN;
+	writel(val, cp->regs + REG_TX_CFG);
+
+	val = readl(cp->regs + REG_RX_CFG);
+	val &= ~RX_CFG_DMA_EN;
+	writel(val, cp->regs + REG_RX_CFG);
+
+	/* program header parser */
+	if ((cp->cas_flags & CAS_FLAG_TARGET_ABORT) ||
+	    (CAS_HP_ALT_FIRMWARE == cas_prog_null)) {
+		cas_load_firmware(cp, CAS_HP_FIRMWARE);
+	} else {
+		cas_load_firmware(cp, CAS_HP_ALT_FIRMWARE);
+	}
+
+	/* clear out error registers */
+	spin_lock(&cp->stat_lock[N_TX_RINGS]);
+	cas_clear_mac_err(cp);
+	spin_unlock(&cp->stat_lock[N_TX_RINGS]);
+}
+
+/* Shut down the chip, must be called with pm_sem held.  */
+static void cas_shutdown(struct cas *cp)
+{
+	unsigned long flags;
+
+	/* Make us not-running to avoid timers respawning */
+	cp->hw_running = 0;
+
+	del_timer_sync(&cp->link_timer);
+
+	/* Stop the reset task */
+#if 0
+	while (atomic_read(&cp->reset_task_pending_mtu) ||
+	       atomic_read(&cp->reset_task_pending_spare) ||
+	       atomic_read(&cp->reset_task_pending_all))
+		schedule();
+
+#else
+	while (atomic_read(&cp->reset_task_pending))
+		schedule();
+#endif	
+	/* Actually stop the chip */
+	cas_lock_all_save(cp, flags);
+	cas_reset(cp, 0);
+	if (cp->cas_flags & CAS_FLAG_SATURN)
+		cas_phy_powerdown(cp);
+	cas_unlock_all_restore(cp, flags);
+}
+
+static int cas_change_mtu(struct net_device *dev, int new_mtu)
+{
+	struct cas *cp = netdev_priv(dev);
+
+	if (new_mtu < CAS_MIN_MTU || new_mtu > CAS_MAX_MTU)
+		return -EINVAL;
+
+	dev->mtu = new_mtu;
+	if (!netif_running(dev) || !netif_device_present(dev))
+		return 0;
+
+	/* let the reset task handle it */
+#if 1
+	atomic_inc(&cp->reset_task_pending);
+	if ((cp->phy_type & CAS_PHY_SERDES)) {
+		atomic_inc(&cp->reset_task_pending_all);
+	} else {
+		atomic_inc(&cp->reset_task_pending_mtu);
+	}
+	schedule_work(&cp->reset_task);
+#else
+	atomic_set(&cp->reset_task_pending, (cp->phy_type & CAS_PHY_SERDES) ? 
+		   CAS_RESET_ALL : CAS_RESET_MTU);
+	printk(KERN_ERR "reset called in cas_change_mtu\n");
+	schedule_work(&cp->reset_task);
+#endif
+
+	flush_scheduled_work();
+	return 0;
+}
+
+static void cas_clean_txd(struct cas *cp, int ring)
+{
+	struct cas_tx_desc *txd = cp->init_txds[ring];
+	struct sk_buff *skb, **skbs = cp->tx_skbs[ring];
+	u64 daddr, dlen;
+	int i, size;
+
+	size = TX_DESC_RINGN_SIZE(ring);
+	for (i = 0; i < size; i++) {
+		int frag;
+
+		if (skbs[i] == NULL)
+			continue;
+
+		skb = skbs[i];
+		skbs[i] = NULL;
+
+		for (frag = 0; frag <= skb_shinfo(skb)->nr_frags;  frag++) {
+			int ent = i & (size - 1);
+
+			/* first buffer is never a tiny buffer and so
+			 * needs to be unmapped.
+			 */
+			daddr = le64_to_cpu(txd[ent].buffer);
+			dlen  =  CAS_VAL(TX_DESC_BUFLEN, 
+					 le64_to_cpu(txd[ent].control));
+			pci_unmap_page(cp->pdev, daddr, dlen,
+				       PCI_DMA_TODEVICE);
+
+			if (frag != skb_shinfo(skb)->nr_frags) {
+				i++;
+
+				/* next buffer might by a tiny buffer.
+				 * skip past it.
+				 */
+				ent = i & (size - 1);
+				if (cp->tx_tiny_use[ring][ent].used)
+					i++;
+			}
+		}
+		dev_kfree_skb_any(skb);
+	}
+
+	/* zero out tiny buf usage */
+	memset(cp->tx_tiny_use[ring], 0, size*sizeof(*cp->tx_tiny_use[ring]));
+}
+
+/* freed on close */
+static inline void cas_free_rx_desc(struct cas *cp, int ring)
+{
+	cas_page_t **page = cp->rx_pages[ring];
+	int i, size;
+
+	size = RX_DESC_RINGN_SIZE(ring);
+	for (i = 0; i < size; i++) {
+		if (page[i]) {
+			cas_page_free(cp, page[i]);
+			page[i] = NULL;
+		}
+	}
+}
+
+static void cas_free_rxds(struct cas *cp)
+{
+	int i;
+
+	for (i = 0; i < N_RX_DESC_RINGS; i++)
+		cas_free_rx_desc(cp, i);
+}
+
+/* Must be invoked under cp->lock. */
+static void cas_clean_rings(struct cas *cp)
+{
+	int i;
+
+	/* need to clean all tx rings */
+	memset(cp->tx_old, 0, sizeof(*cp->tx_old)*N_TX_RINGS);
+	memset(cp->tx_new, 0, sizeof(*cp->tx_new)*N_TX_RINGS);
+	for (i = 0; i < N_TX_RINGS; i++)
+		cas_clean_txd(cp, i);
+
+	/* zero out init block */
+	memset(cp->init_block, 0, sizeof(struct cas_init_block));
+	cas_clean_rxds(cp);
+	cas_clean_rxcs(cp);
+}
+
+/* allocated on open */
+static inline int cas_alloc_rx_desc(struct cas *cp, int ring)
+{
+	cas_page_t **page = cp->rx_pages[ring];
+	int size, i = 0;
+
+	size = RX_DESC_RINGN_SIZE(ring);
+	for (i = 0; i < size; i++) {
+		if ((page[i] = cas_page_alloc(cp, GFP_KERNEL)) == NULL) 
+			return -1;
+	}
+	return 0;
+}
+
+static int cas_alloc_rxds(struct cas *cp)
+{
+	int i;
+
+	for (i = 0; i < N_RX_DESC_RINGS; i++) {
+		if (cas_alloc_rx_desc(cp, i) < 0) {
+			cas_free_rxds(cp);
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static void cas_reset_task(void *data)
+{
+	struct cas *cp = (struct cas *) data;
+#if 0
+	int pending = atomic_read(&cp->reset_task_pending);
+#else
+	int pending_all = atomic_read(&cp->reset_task_pending_all);
+	int pending_spare = atomic_read(&cp->reset_task_pending_spare);
+	int pending_mtu = atomic_read(&cp->reset_task_pending_mtu);
+
+	if (pending_all == 0 && pending_spare == 0 && pending_mtu == 0) {
+		/* We can have more tasks scheduled than actually
+		 * needed.
+		 */
+		atomic_dec(&cp->reset_task_pending);
+		return;
+	}
+#endif
+	/* The link went down, we reset the ring, but keep
+	 * DMA stopped. Use this function for reset
+	 * on error as well.
+	 */
+	if (cp->hw_running) {
+		unsigned long flags;
+
+		/* Make sure we don't get interrupts or tx packets */
+		netif_device_detach(cp->dev);
+		cas_lock_all_save(cp, flags);
+
+		if (cp->opened) {
+			/* We call cas_spare_recover when we call cas_open.
+			 * but we do not initialize the lists cas_spare_recover
+			 * uses until cas_open is called.
+			 */
+			cas_spare_recover(cp, GFP_ATOMIC);
+		}
+#if 1
+		/* test => only pending_spare set */
+		if (!pending_all && !pending_mtu)
+			goto done;
+#else
+		if (pending == CAS_RESET_SPARE)
+			goto done;
+#endif
+		/* when pending == CAS_RESET_ALL, the following
+		 * call to cas_init_hw will restart auto negotiation.
+		 * Setting the second argument of cas_reset to
+		 * !(pending == CAS_RESET_ALL) will set this argument
+		 * to 1 (avoiding reinitializing the PHY for the normal 
+		 * PCS case) when auto negotiation is not restarted.
+		 */
+#if 1
+		cas_reset(cp, !(pending_all > 0));
+		if (cp->opened)
+			cas_clean_rings(cp);
+		cas_init_hw(cp, (pending_all > 0));
+#else
+		cas_reset(cp, !(pending == CAS_RESET_ALL));
+		if (cp->opened)
+			cas_clean_rings(cp);
+		cas_init_hw(cp, pending == CAS_RESET_ALL);
+#endif
+
+done:
+		cas_unlock_all_restore(cp, flags);
+		netif_device_attach(cp->dev);
+	}
+#if 1
+	atomic_sub(pending_all, &cp->reset_task_pending_all);
+	atomic_sub(pending_spare, &cp->reset_task_pending_spare);
+	atomic_sub(pending_mtu, &cp->reset_task_pending_mtu);
+	atomic_dec(&cp->reset_task_pending);
+#else
+	atomic_set(&cp->reset_task_pending, 0);
+#endif
+}
+
+static void cas_link_timer(unsigned long data)
+{
+	struct cas *cp = (struct cas *) data;
+	int mask, pending = 0, reset = 0;
+	unsigned long flags;
+
+	if (link_transition_timeout != 0 &&
+	    cp->link_transition_jiffies_valid &&
+	    ((jiffies - cp->link_transition_jiffies) > 
+	      (link_transition_timeout))) {
+		/* One-second counter so link-down workaround doesn't 
+		 * cause resets to occur so fast as to fool the switch
+		 * into thinking the link is down.
+		 */
+		cp->link_transition_jiffies_valid = 0;
+	}
+
+	if (!cp->hw_running)
+		return;
+
+	spin_lock_irqsave(&cp->lock, flags);
+	cas_lock_tx(cp);
+	cas_entropy_gather(cp);
+
+	/* If the link task is still pending, we just
+	 * reschedule the link timer
+	 */
+#if 1
+	if (atomic_read(&cp->reset_task_pending_all) ||
+	    atomic_read(&cp->reset_task_pending_spare) ||
+	    atomic_read(&cp->reset_task_pending_mtu)) 
+		goto done;
+#else
+	if (atomic_read(&cp->reset_task_pending)) 
+		goto done;
+#endif
+
+	/* check for rx cleaning */
+	if ((mask = (cp->cas_flags & CAS_FLAG_RXD_POST_MASK))) {
+		int i, rmask;
+
+		for (i = 0; i < MAX_RX_DESC_RINGS; i++) {
+			rmask = CAS_FLAG_RXD_POST(i);
+			if ((mask & rmask) == 0)
+				continue;
+
+			/* post_rxds will do a mod_timer */
+			if (cas_post_rxds_ringN(cp, i, cp->rx_last[i]) < 0) {
+				pending = 1;
+				continue;
+			}
+			cp->cas_flags &= ~rmask;
+		}
+	}
+
+	if (CAS_PHY_MII(cp->phy_type)) {
+		u16 bmsr;
+		cas_mif_poll(cp, 0);
+		bmsr = cas_phy_read(cp, MII_BMSR);
+		/* WTZ: Solaris driver reads this twice, but that
+		 * may be due to the PCS case and the use of a
+		 * common implementation. Read it twice here to be
+		 * safe.
+		 */
+		bmsr = cas_phy_read(cp, MII_BMSR);
+		cas_mif_poll(cp, 1);
+		readl(cp->regs + REG_MIF_STATUS); /* avoid dups */
+		reset = cas_mii_link_check(cp, bmsr);
+	} else {
+		reset = cas_pcs_link_check(cp);
+	}
+
+	if (reset)
+		goto done;
+
+	/* check for tx state machine confusion */
+	if ((readl(cp->regs + REG_MAC_TX_STATUS) & MAC_TX_FRAME_XMIT) == 0) {
+		u32 val = readl(cp->regs + REG_MAC_STATE_MACHINE);
+		u32 wptr, rptr;
+		int tlm  = CAS_VAL(MAC_SM_TLM, val);
+
+		if (((tlm == 0x5) || (tlm == 0x3)) &&
+		    (CAS_VAL(MAC_SM_ENCAP_SM, val) == 0)) {
+			if (netif_msg_tx_err(cp))
+				printk(KERN_DEBUG "%s: tx err: "
+				       "MAC_STATE[%08x]\n",
+				       cp->dev->name, val);
+			reset = 1;
+			goto done;
+		}
+
+		val  = readl(cp->regs + REG_TX_FIFO_PKT_CNT);
+		wptr = readl(cp->regs + REG_TX_FIFO_WRITE_PTR);
+		rptr = readl(cp->regs + REG_TX_FIFO_READ_PTR);
+		if ((val == 0) && (wptr != rptr)) {
+			if (netif_msg_tx_err(cp))
+				printk(KERN_DEBUG "%s: tx err: "
+				       "TX_FIFO[%08x:%08x:%08x]\n",
+				       cp->dev->name, val, wptr, rptr);
+			reset = 1;
+		}
+
+		if (reset)
+			cas_hard_reset(cp);
+	}
+
+done:
+	if (reset) {
+#if 1
+		atomic_inc(&cp->reset_task_pending);
+		atomic_inc(&cp->reset_task_pending_all);
+		schedule_work(&cp->reset_task);
+#else
+		atomic_set(&cp->reset_task_pending, CAS_RESET_ALL);
+		printk(KERN_ERR "reset called in cas_link_timer\n");
+		schedule_work(&cp->reset_task);
+#endif
+	}
+
+	if (!pending)
+		mod_timer(&cp->link_timer, jiffies + CAS_LINK_TIMEOUT);
+	cas_unlock_tx(cp);
+	spin_unlock_irqrestore(&cp->lock, flags);
+}
+
+/* tiny buffers are used to avoid target abort issues with 
+ * older cassini's
+ */
+static void cas_tx_tiny_free(struct cas *cp)
+{
+	struct pci_dev *pdev = cp->pdev;
+	int i;
+
+	for (i = 0; i < N_TX_RINGS; i++) {
+		if (!cp->tx_tiny_bufs[i])
+			continue;
+
+		pci_free_consistent(pdev, TX_TINY_BUF_BLOCK, 
+				    cp->tx_tiny_bufs[i],
+				    cp->tx_tiny_dvma[i]);
+		cp->tx_tiny_bufs[i] = NULL;
+	}
+}
+
+static int cas_tx_tiny_alloc(struct cas *cp)
+{
+	struct pci_dev *pdev = cp->pdev;
+	int i;
+
+	for (i = 0; i < N_TX_RINGS; i++) {
+		cp->tx_tiny_bufs[i] = 
+			pci_alloc_consistent(pdev, TX_TINY_BUF_BLOCK,
+					     &cp->tx_tiny_dvma[i]);
+		if (!cp->tx_tiny_bufs[i]) {
+			cas_tx_tiny_free(cp);
+			return -1;
+		}
+	}
+	return 0;
+}
+
+
+static int cas_open(struct net_device *dev)
+{
+	struct cas *cp = netdev_priv(dev);
+	int hw_was_up, err;
+	unsigned long flags;
+
+	down(&cp->pm_sem);
+
+	hw_was_up = cp->hw_running;
+
+	/* The power-management semaphore protects the hw_running
+	 * etc. state so it is safe to do this bit without cp->lock
+	 */
+	if (!cp->hw_running) {
+		/* Reset the chip */
+		cas_lock_all_save(cp, flags);
+		/* We set the second arg to cas_reset to zero
+		 * because cas_init_hw below will have its second 
+		 * argument set to non-zero, which will force
+		 * autonegotiation to start.
+		 */
+		cas_reset(cp, 0);
+		cp->hw_running = 1;
+		cas_unlock_all_restore(cp, flags);
+	}
+
+	if (cas_tx_tiny_alloc(cp) < 0)
+		return -ENOMEM;
+
+	/* alloc rx descriptors */
+	err = -ENOMEM;
+	if (cas_alloc_rxds(cp) < 0)
+		goto err_tx_tiny;
+	
+	/* allocate spares */
+	cas_spare_init(cp);
+	cas_spare_recover(cp, GFP_KERNEL);
+
+	/* We can now request the interrupt as we know it's masked
+	 * on the controller. cassini+ has up to 4 interrupts
+	 * that can be used, but you need to do explicit pci interrupt 
+	 * mapping to expose them
+	 */
+	if (request_irq(cp->pdev->irq, cas_interrupt,
+			SA_SHIRQ, dev->name, (void *) dev)) {
+		printk(KERN_ERR "%s: failed to request irq !\n", 
+		       cp->dev->name);
+		err = -EAGAIN;
+		goto err_spare;
+	}
+
+	/* init hw */
+	cas_lock_all_save(cp, flags);
+	cas_clean_rings(cp);
+	cas_init_hw(cp, !hw_was_up);
+	cp->opened = 1;
+	cas_unlock_all_restore(cp, flags);
+
+	netif_start_queue(dev);
+	up(&cp->pm_sem);
+	return 0;
+
+err_spare:
+	cas_spare_free(cp);
+	cas_free_rxds(cp);
+err_tx_tiny:
+	cas_tx_tiny_free(cp);
+	up(&cp->pm_sem);
+	return err;
+}
+
+static int cas_close(struct net_device *dev)
+{
+	unsigned long flags;
+	struct cas *cp = netdev_priv(dev);
+
+	/* Make sure we don't get distracted by suspend/resume */
+	down(&cp->pm_sem);
+
+	netif_stop_queue(dev);
+
+	/* Stop traffic, mark us closed */
+	cas_lock_all_save(cp, flags);
+	cp->opened = 0;	
+	cas_reset(cp, 0);
+	cas_phy_init(cp); 
+	cas_begin_auto_negotiation(cp, NULL);
+	cas_clean_rings(cp);
+	cas_unlock_all_restore(cp, flags);
+
+	free_irq(cp->pdev->irq, (void *) dev);
+	cas_spare_free(cp);
+	cas_free_rxds(cp);
+	cas_tx_tiny_free(cp);
+	up(&cp->pm_sem);
+	return 0;
+}
+
+static struct {
+	const char name[ETH_GSTRING_LEN];
+} ethtool_cassini_statnames[] = {
+	{"collisions"},
+	{"rx_bytes"},
+	{"rx_crc_errors"},
+	{"rx_dropped"},
+	{"rx_errors"},
+	{"rx_fifo_errors"},
+	{"rx_frame_errors"},
+	{"rx_length_errors"},
+	{"rx_over_errors"},
+	{"rx_packets"},
+	{"tx_aborted_errors"},
+	{"tx_bytes"},
+	{"tx_dropped"},
+	{"tx_errors"},
+	{"tx_fifo_errors"},
+	{"tx_packets"}
+};
+#define CAS_NUM_STAT_KEYS (sizeof(ethtool_cassini_statnames)/ETH_GSTRING_LEN)
+
+static struct {
+	const int offsets;	/* neg. values for 2nd arg to cas_read_phy */
+} ethtool_register_table[] = {
+	{-MII_BMSR},
+	{-MII_BMCR},
+	{REG_CAWR},
+	{REG_INF_BURST},
+	{REG_BIM_CFG},
+	{REG_RX_CFG},
+	{REG_HP_CFG},
+	{REG_MAC_TX_CFG},
+	{REG_MAC_RX_CFG},
+	{REG_MAC_CTRL_CFG},
+	{REG_MAC_XIF_CFG},
+	{REG_MIF_CFG},
+	{REG_PCS_CFG},
+	{REG_SATURN_PCFG},
+	{REG_PCS_MII_STATUS},
+	{REG_PCS_STATE_MACHINE},
+	{REG_MAC_COLL_EXCESS},
+	{REG_MAC_COLL_LATE}
+};
+#define CAS_REG_LEN 	(sizeof(ethtool_register_table)/sizeof(int))
+#define CAS_MAX_REGS 	(sizeof (u32)*CAS_REG_LEN)
+
+static u8 *cas_get_regs(struct cas *cp)
+{
+	u8 *ptr = kmalloc(CAS_MAX_REGS, GFP_KERNEL);
+	u8 *p;
+	int i;
+	unsigned long flags;
+
+	if (!ptr)
+		return NULL;
+
+	spin_lock_irqsave(&cp->lock, flags);
+	for (i = 0, p = ptr; i < CAS_REG_LEN ; i ++, p += sizeof(u32)) {
+		u16 hval;
+		u32 val;
+		if (ethtool_register_table[i].offsets < 0) {
+			hval = cas_phy_read(cp,
+				    -ethtool_register_table[i].offsets);
+			val = hval;
+		} else {
+			val= readl(cp->regs+ethtool_register_table[i].offsets);
+		}
+		memcpy(p, (u8 *)&val, sizeof(u32));
+	}
+	spin_unlock_irqrestore(&cp->lock, flags);
+
+	return ptr;
+}
+
+static struct net_device_stats *cas_get_stats(struct net_device *dev)
+{
+	struct cas *cp = netdev_priv(dev);
+	struct net_device_stats *stats = cp->net_stats;
+	unsigned long flags;
+	int i;
+	unsigned long tmp;
+
+	/* we collate all of the stats into net_stats[N_TX_RING] */
+	if (!cp->hw_running)
+		return stats + N_TX_RINGS;
+	
+	/* collect outstanding stats */
+	/* WTZ: the Cassini spec gives these as 16 bit counters but
+	 * stored in 32-bit words.  Added a mask of 0xffff to be safe,
+	 * in case the chip somehow puts any garbage in the other bits.
+	 * Also, counter usage didn't seem to mach what Adrian did
+	 * in the parts of the code that set these quantities. Made
+	 * that consistent.
+	 */
+	spin_lock_irqsave(&cp->stat_lock[N_TX_RINGS], flags);
+	stats[N_TX_RINGS].rx_crc_errors += 
+	  readl(cp->regs + REG_MAC_FCS_ERR) & 0xffff;
+	stats[N_TX_RINGS].rx_frame_errors += 
+		readl(cp->regs + REG_MAC_ALIGN_ERR) &0xffff;
+	stats[N_TX_RINGS].rx_length_errors += 
+		readl(cp->regs + REG_MAC_LEN_ERR) & 0xffff;
+#if 1
+	tmp = (readl(cp->regs + REG_MAC_COLL_EXCESS) & 0xffff) +
+		(readl(cp->regs + REG_MAC_COLL_LATE) & 0xffff);
+	stats[N_TX_RINGS].tx_aborted_errors += tmp;
+	stats[N_TX_RINGS].collisions +=
+	  tmp + (readl(cp->regs + REG_MAC_COLL_NORMAL) & 0xffff);
+#else
+	stats[N_TX_RINGS].tx_aborted_errors += 
+		readl(cp->regs + REG_MAC_COLL_EXCESS);
+	stats[N_TX_RINGS].collisions += readl(cp->regs + REG_MAC_COLL_EXCESS) +
+		readl(cp->regs + REG_MAC_COLL_LATE);
+#endif
+	cas_clear_mac_err(cp);
+
+	/* saved bits that are unique to ring 0 */
+	spin_lock(&cp->stat_lock[0]);
+	stats[N_TX_RINGS].collisions        += stats[0].collisions;
+	stats[N_TX_RINGS].rx_over_errors    += stats[0].rx_over_errors;
+	stats[N_TX_RINGS].rx_frame_errors   += stats[0].rx_frame_errors;
+	stats[N_TX_RINGS].rx_fifo_errors    += stats[0].rx_fifo_errors;
+	stats[N_TX_RINGS].tx_aborted_errors += stats[0].tx_aborted_errors;
+	stats[N_TX_RINGS].tx_fifo_errors    += stats[0].tx_fifo_errors;
+	spin_unlock(&cp->stat_lock[0]);
+
+	for (i = 0; i < N_TX_RINGS; i++) {
+		spin_lock(&cp->stat_lock[i]);
+		stats[N_TX_RINGS].rx_length_errors += 
+			stats[i].rx_length_errors;
+		stats[N_TX_RINGS].rx_crc_errors += stats[i].rx_crc_errors;
+		stats[N_TX_RINGS].rx_packets    += stats[i].rx_packets;
+		stats[N_TX_RINGS].tx_packets    += stats[i].tx_packets;
+		stats[N_TX_RINGS].rx_bytes      += stats[i].rx_bytes;
+		stats[N_TX_RINGS].tx_bytes      += stats[i].tx_bytes;
+		stats[N_TX_RINGS].rx_errors     += stats[i].rx_errors;
+		stats[N_TX_RINGS].tx_errors     += stats[i].tx_errors;
+		stats[N_TX_RINGS].rx_dropped    += stats[i].rx_dropped;
+		stats[N_TX_RINGS].tx_dropped    += stats[i].tx_dropped;
+		memset(stats + i, 0, sizeof(struct net_device_stats));
+		spin_unlock(&cp->stat_lock[i]);
+	}
+	spin_unlock_irqrestore(&cp->stat_lock[N_TX_RINGS], flags);
+	return stats + N_TX_RINGS;
+}
+
+
+static void cas_set_multicast(struct net_device *dev)
+{
+	struct cas *cp = netdev_priv(dev);
+	u32 rxcfg, rxcfg_new;
+	unsigned long flags;
+	int limit = STOP_TRIES;
+	
+	if (!cp->hw_running)
+		return;
+		
+	spin_lock_irqsave(&cp->lock, flags);
+	rxcfg = readl(cp->regs + REG_MAC_RX_CFG);
+
+	/* disable RX MAC and wait for completion */
+	writel(rxcfg & ~MAC_RX_CFG_EN, cp->regs + REG_MAC_RX_CFG);
+	while (readl(cp->regs + REG_MAC_RX_CFG) & MAC_RX_CFG_EN) {
+		if (!limit--)
+			break;
+		udelay(10);
+	}
+
+	/* disable hash filter and wait for completion */
+	limit = STOP_TRIES;
+	rxcfg &= ~(MAC_RX_CFG_PROMISC_EN | MAC_RX_CFG_HASH_FILTER_EN);
+	writel(rxcfg & ~MAC_RX_CFG_EN, cp->regs + REG_MAC_RX_CFG);
+	while (readl(cp->regs + REG_MAC_RX_CFG) & MAC_RX_CFG_HASH_FILTER_EN) {
+		if (!limit--)
+			break;
+		udelay(10);
+	}
+
+	/* program hash filters */
+	cp->mac_rx_cfg = rxcfg_new = cas_setup_multicast(cp);
+	rxcfg |= rxcfg_new;
+	writel(rxcfg, cp->regs + REG_MAC_RX_CFG);
+	spin_unlock_irqrestore(&cp->lock, flags);
+}
+
+/* Eventually add support for changing the advertisement
+ * on autoneg.
+ */
+static int cas_ethtool_ioctl(struct net_device *dev, void *ep_user)
+{
+	struct cas *cp = netdev_priv(dev);
+	u16 bmcr;
+	int full_duplex, speed, pause;
+	struct ethtool_cmd ecmd;
+	unsigned long flags;
+	enum link_state linkstate = link_up;
+
+	if (copy_from_user(&ecmd, ep_user, sizeof(ecmd)))
+		return -EFAULT;
+		
+	switch(ecmd.cmd) {
+        case ETHTOOL_GDRVINFO: {
+		struct ethtool_drvinfo info = { cmd: ETHTOOL_GDRVINFO };
+
+		strncpy(info.driver, DRV_MODULE_NAME,
+			ETHTOOL_BUSINFO_LEN);
+		strncpy(info.version, DRV_MODULE_VERSION,
+			ETHTOOL_BUSINFO_LEN);
+		info.fw_version[0] = '\0';
+		strncpy(info.bus_info, pci_name(cp->pdev),
+			ETHTOOL_BUSINFO_LEN);
+		info.regdump_len = cp->casreg_len < CAS_MAX_REGS ?
+			cp->casreg_len : CAS_MAX_REGS;
+		info.n_stats = CAS_NUM_STAT_KEYS;
+		if (copy_to_user(ep_user, &info, sizeof(info)))
+			return -EFAULT;
+
+		return 0;
+	}
+
+	case ETHTOOL_GSET:
+		ecmd.advertising = 0;
+		ecmd.supported = SUPPORTED_Autoneg;
+		if (cp->cas_flags & CAS_FLAG_1000MB_CAP) {
+			ecmd.supported |= SUPPORTED_1000baseT_Full;
+			ecmd.advertising |= ADVERTISED_1000baseT_Full;
+		}
+
+		/* Record PHY settings if HW is on. */
+		spin_lock_irqsave(&cp->lock, flags);
+		bmcr = 0;
+		linkstate = cp->lstate;
+		if (CAS_PHY_MII(cp->phy_type)) {
+			ecmd.port = PORT_MII;
+			ecmd.transceiver = (cp->cas_flags & CAS_FLAG_SATURN) ?
+				XCVR_INTERNAL : XCVR_EXTERNAL;
+			ecmd.phy_address = cp->phy_addr;
+			ecmd.advertising |= ADVERTISED_TP | ADVERTISED_MII |
+				ADVERTISED_10baseT_Half | 
+				ADVERTISED_10baseT_Full | 
+				ADVERTISED_100baseT_Half | 
+				ADVERTISED_100baseT_Full;
+
+			ecmd.supported |=
+				(SUPPORTED_10baseT_Half | 
+				 SUPPORTED_10baseT_Full |
+				 SUPPORTED_100baseT_Half | 
+				 SUPPORTED_100baseT_Full |
+				 SUPPORTED_TP | SUPPORTED_MII);
+
+			if (cp->hw_running) {
+				cas_mif_poll(cp, 0);
+				bmcr = cas_phy_read(cp, MII_BMCR);
+				cas_read_mii_link_mode(cp, &full_duplex, 
+						       &speed, &pause);
+				cas_mif_poll(cp, 1);
+			}
+
+		} else {
+			ecmd.port = PORT_FIBRE;
+			ecmd.transceiver = XCVR_INTERNAL;
+			ecmd.phy_address = 0;
+			ecmd.supported   |= SUPPORTED_FIBRE;
+			ecmd.advertising |= ADVERTISED_FIBRE;
+
+			if (cp->hw_running) {
+				/* pcs uses the same bits as mii */ 
+				bmcr = readl(cp->regs + REG_PCS_MII_CTRL);
+				cas_read_pcs_link_mode(cp, &full_duplex, 
+						       &speed, &pause);
+			}
+		}
+		spin_unlock_irqrestore(&cp->lock, flags);
+
+		if (bmcr & BMCR_ANENABLE) {
+			ecmd.advertising |= ADVERTISED_Autoneg;
+			ecmd.autoneg = AUTONEG_ENABLE;
+			ecmd.speed = ((speed == 10) ?
+				      SPEED_10 :
+				      ((speed == 1000) ?
+				       SPEED_1000 : SPEED_100));
+			ecmd.duplex = full_duplex ? DUPLEX_FULL : DUPLEX_HALF;
+		} else {
+			ecmd.autoneg = AUTONEG_DISABLE;
+			ecmd.speed =
+				(bmcr & CAS_BMCR_SPEED1000) ?
+				SPEED_1000 : 
+				((bmcr & BMCR_SPEED100) ? SPEED_100: 
+				 SPEED_10);
+			ecmd.duplex =
+				(bmcr & BMCR_FULLDPLX) ?
+				DUPLEX_FULL : DUPLEX_HALF;
+		}
+		if (linkstate != link_up) {
+			/* Force these to "unknown" if the link is not up and
+			 * autonogotiation in enabled. We can set the link 
+			 * speed to 0, but not ecmd.duplex,
+			 * because its legal values are 0 and 1.  Ethtool will
+			 * print the value reported in parentheses after the
+			 * word "Unknown" for unrecognized values.
+			 *
+			 * If in forced mode, we report the speed and duplex
+			 * settings that we configured.
+			 */
+			if (cp->link_cntl & BMCR_ANENABLE) {
+				ecmd.speed = 0;
+				ecmd.duplex = 0xff;
+			} else {
+				ecmd.speed = SPEED_10;
+				if (cp->link_cntl & BMCR_SPEED100) {
+					ecmd.speed = SPEED_100;
+				} else if (cp->link_cntl & CAS_BMCR_SPEED1000) {
+					ecmd.speed = SPEED_1000;
+				}
+				ecmd.duplex = (cp->link_cntl & BMCR_FULLDPLX)?
+					DUPLEX_FULL : DUPLEX_HALF;
+			}
+		}
+		if (copy_to_user(ep_user, &ecmd, sizeof(ecmd)))
+			return -EFAULT;
+		return 0;
+
+	case ETHTOOL_SSET:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+
+		/* Verify the settings we care about. */
+		if (ecmd.autoneg != AUTONEG_ENABLE &&
+		    ecmd.autoneg != AUTONEG_DISABLE)
+			return -EINVAL;
+
+		if (ecmd.autoneg == AUTONEG_DISABLE &&
+		    ((ecmd.speed != SPEED_1000 &&
+		      ecmd.speed != SPEED_100 &&
+		      ecmd.speed != SPEED_10) ||
+		     (ecmd.duplex != DUPLEX_HALF &&
+		      ecmd.duplex != DUPLEX_FULL)))
+			return -EINVAL;
+
+		/* Apply settings and restart link process. */
+		spin_lock_irqsave(&cp->lock, flags);
+		cas_begin_auto_negotiation(cp, &ecmd);
+		spin_unlock_irqrestore(&cp->lock, flags);
+		return 0;
+
+	case ETHTOOL_NWAY_RST:
+		if ((cp->link_cntl & BMCR_ANENABLE) == 0)
+			return -EINVAL;
+
+		/* Restart link process. */
+		spin_lock_irqsave(&cp->lock, flags);
+		cas_begin_auto_negotiation(cp, NULL);
+		spin_unlock_irqrestore(&cp->lock, flags);
+
+		return 0;
+
+	case ETHTOOL_GWOL:
+	case ETHTOOL_SWOL:
+		break; /* doesn't exist */
+
+	/* get link status */
+	case ETHTOOL_GLINK: {
+		struct ethtool_value edata = { cmd: ETHTOOL_GLINK };
+
+		edata.data = (cp->lstate == link_up);
+		if (copy_to_user(ep_user, &edata, sizeof(edata)))
+			return -EFAULT;
+		return 0;
+	}
+
+	/* get message-level */
+	case ETHTOOL_GMSGLVL: {
+		struct ethtool_value edata = { cmd: ETHTOOL_GMSGLVL };
+
+		edata.data = cp->msg_enable;
+		if (copy_to_user(ep_user, &edata, sizeof(edata)))
+			return -EFAULT;
+		return 0;
+	}
+
+	/* set message-level */
+	case ETHTOOL_SMSGLVL: {
+		struct ethtool_value edata;
+
+		if (!capable(CAP_NET_ADMIN)) {
+			return (-EPERM);
+		}
+		if (copy_from_user(&edata, ep_user, sizeof(edata)))
+			return -EFAULT;
+		cp->msg_enable = edata.data;
+		return 0;
+	}
+
+	case ETHTOOL_GREGS: {
+		struct ethtool_regs edata;
+		u8 *ptr;
+		int len = cp->casreg_len < CAS_MAX_REGS ?
+			cp->casreg_len: CAS_MAX_REGS;
+
+		if (copy_from_user(&edata, ep_user, sizeof (edata)))
+			return -EFAULT;
+
+		if (edata.len > len)
+			edata.len = len;
+		edata.version = 0;
+		if (copy_to_user (ep_user, &edata, sizeof(edata)))
+			return -EFAULT;
+
+		/* cas_get_regs handles locks (cp->lock).  */
+		ptr = cas_get_regs(cp);
+		if (ptr == NULL)
+			return -ENOMEM;
+		if (copy_to_user(ep_user + sizeof (edata), ptr, edata.len))
+			return -EFAULT;
+
+		kfree(ptr);
+		return (0);
+	}
+	case ETHTOOL_GSTRINGS: {
+		struct ethtool_gstrings edata;
+		int len;
+
+		if (copy_from_user(&edata, ep_user, sizeof(edata)))
+			return -EFAULT;
+
+		len = edata.len;
+		switch(edata.string_set) {
+		case ETH_SS_STATS:
+			edata.len = (len < CAS_NUM_STAT_KEYS) ?
+				len : CAS_NUM_STAT_KEYS;
+			if (copy_to_user(ep_user, &edata, sizeof(edata)))
+				return -EFAULT;
+
+			if (copy_to_user(ep_user + sizeof(edata),
+					 &ethtool_cassini_statnames, 
+					 (edata.len * ETH_GSTRING_LEN)))
+				return -EFAULT;
+			return 0;
+		default:
+			return -EINVAL;
+		}
+	}
+	case ETHTOOL_GSTATS: {
+		int i = 0;
+		u64 *tmp;
+		struct ethtool_stats edata;
+		struct net_device_stats *stats;
+		int len;
+
+		if (copy_from_user(&edata, ep_user, sizeof(edata)))
+			return -EFAULT;
+
+		len = edata.n_stats;
+		stats = cas_get_stats(cp->dev);
+		edata.cmd = ETHTOOL_GSTATS;
+		edata.n_stats = (len < CAS_NUM_STAT_KEYS) ?
+			len : CAS_NUM_STAT_KEYS;
+		if (copy_to_user(ep_user, &edata, sizeof (edata)))
+			return -EFAULT;
+
+		tmp = kmalloc(sizeof(u64)*CAS_NUM_STAT_KEYS, GFP_KERNEL);
+		if (tmp) {
+			tmp[i++] = stats->collisions;
+			tmp[i++] = stats->rx_bytes;
+			tmp[i++] = stats->rx_crc_errors;
+			tmp[i++] = stats->rx_dropped;
+			tmp[i++] = stats->rx_errors;
+			tmp[i++] = stats->rx_fifo_errors;
+			tmp[i++] = stats->rx_frame_errors;
+			tmp[i++] = stats->rx_length_errors;
+			tmp[i++] = stats->rx_over_errors;
+			tmp[i++] = stats->rx_packets;
+			tmp[i++] = stats->tx_aborted_errors;
+			tmp[i++] = stats->tx_bytes;
+			tmp[i++] = stats->tx_dropped;
+			tmp[i++] = stats->tx_errors;
+			tmp[i++] = stats->tx_fifo_errors;
+			tmp[i++] = stats->tx_packets;
+			BUG_ON(i != CAS_NUM_STAT_KEYS);
+
+			i = copy_to_user(ep_user + sizeof(edata),
+					 tmp, sizeof(u64)*edata.n_stats);
+			kfree(tmp);
+		} else {
+			return -ENOMEM;
+		}
+		if (i)
+			return -EFAULT;
+		return 0;
+	}
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int cas_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	struct cas *cp = netdev_priv(dev);
+	struct mii_ioctl_data *data = (struct mii_ioctl_data *)&ifr->ifr_data;
+	unsigned long flags;
+	int rc = -EOPNOTSUPP;
+	
+	/* Hold the PM semaphore while doing ioctl's or we may collide
+	 * with open/close and power management and oops.
+	 */
+	down(&cp->pm_sem);
+	switch (cmd) {
+	case SIOCETHTOOL:
+		rc = cas_ethtool_ioctl(dev, ifr->ifr_data);
+		break;
+
+	case SIOCGMIIPHY:		/* Get address of MII PHY in use. */
+		data->phy_id = cp->phy_addr;
+		/* Fallthrough... */
+
+	case SIOCGMIIREG:		/* Read MII PHY register. */
+		spin_lock_irqsave(&cp->lock, flags);
+		cas_mif_poll(cp, 0);
+		data->val_out = cas_phy_read(cp, data->reg_num & 0x1f);
+		cas_mif_poll(cp, 1);
+		spin_unlock_irqrestore(&cp->lock, flags);
+		rc = 0;
+		break;
+
+	case SIOCSMIIREG:		/* Write MII PHY register. */
+		if (!capable(CAP_NET_ADMIN)) {
+			rc = -EPERM;
+			break;
+		}
+		spin_lock_irqsave(&cp->lock, flags);
+		cas_mif_poll(cp, 0);
+		rc = cas_phy_write(cp, data->reg_num & 0x1f, data->val_in);
+		cas_mif_poll(cp, 1);
+		spin_unlock_irqrestore(&cp->lock, flags);
+		break;
+	default:
+		break;
+	};
+
+	up(&cp->pm_sem);
+	return rc;
+}
+
+static int __devinit cas_init_one(struct pci_dev *pdev,
+				  const struct pci_device_id *ent)
+{
+	static int cas_version_printed = 0;
+	unsigned long casreg_base, casreg_len;
+	struct net_device *dev;
+	struct cas *cp;
+	int i, err, pci_using_dac;
+	u16 pci_cmd;
+	u8 orig_cacheline_size = 0, cas_cacheline_size = 0;
+
+	if (cas_version_printed++ == 0)
+		printk(KERN_INFO "%s", version);
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		printk(KERN_ERR PFX "Cannot enable PCI device, "
+		       "aborting.\n");
+		return err;
+	}
+
+	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
+		printk(KERN_ERR PFX "Cannot find proper PCI device "
+		       "base address, aborting.\n");
+		err = -ENODEV;
+		goto err_out_disable_pdev;
+	}
+
+	dev = alloc_etherdev(sizeof(*cp));
+	if (!dev) {
+		printk(KERN_ERR PFX "Etherdev alloc failed, aborting.\n");
+		err = -ENOMEM;
+		goto err_out_disable_pdev;
+	}
+	SET_MODULE_OWNER(dev);
+	SET_NETDEV_DEV(dev, &pdev->dev);
+
+	err = pci_request_regions(pdev, dev->name);
+	if (err) {
+		printk(KERN_ERR PFX "Cannot obtain PCI resources, "
+		       "aborting.\n");
+		goto err_out_free_netdev;
+	}
+	pci_set_master(pdev);
+
+	/* we must always turn on parity response or else parity
+	 * doesn't get generated properly. disable SERR/PERR as well.
+	 * in addition, we want to turn MWI on.
+	 */
+	pci_read_config_word(pdev, PCI_COMMAND, &pci_cmd);
+	pci_cmd &= ~PCI_COMMAND_SERR;
+	pci_cmd |= PCI_COMMAND_PARITY;
+	pci_write_config_word(pdev, PCI_COMMAND, pci_cmd);
+	pci_set_mwi(pdev);
+	/*
+	 * On some architectures, the default cache line size set
+	 * by pci_set_mwi reduces perforamnce.  We have to increase
+	 * it for this case.  To start, we'll print some configuration
+	 * data.
+	 */
+#if 1
+	pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE,
+			     &orig_cacheline_size);
+	if (orig_cacheline_size < CAS_PREF_CACHELINE_SIZE) {
+		cas_cacheline_size = 
+			(CAS_PREF_CACHELINE_SIZE < SMP_CACHE_BYTES) ? 
+			CAS_PREF_CACHELINE_SIZE : SMP_CACHE_BYTES;
+		if (pci_write_config_byte(pdev, 
+					  PCI_CACHE_LINE_SIZE, 
+					  cas_cacheline_size)) {
+			printk(KERN_ERR PFX "Could not set PCI cache "
+			       "line size\n");
+			goto err_write_cacheline;
+		}
+	}
+#endif
+
+
+	/* Configure DMA attributes. */
+	if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK)) {
+		pci_using_dac = 1;
+		err = pci_set_consistent_dma_mask(pdev,
+						  DMA_64BIT_MASK);
+		if (err < 0) {
+			printk(KERN_ERR PFX "Unable to obtain 64-bit DMA "
+			       "for consistent allocations\n");
+			goto err_out_free_res;
+		}
+
+	} else {
+		err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+		if (err) {
+			printk(KERN_ERR PFX "No usable DMA configuration, "
+			       "aborting.\n");
+			goto err_out_free_res;
+		}
+		pci_using_dac = 0;
+	}
+
+	casreg_base = pci_resource_start(pdev, 0);
+	casreg_len = pci_resource_len(pdev, 0);
+
+	cp = netdev_priv(dev);
+	cp->pdev = pdev;
+#if 1
+	/* A value of 0 indicates we never explicitly set it */
+	cp->orig_cacheline_size = cas_cacheline_size ? orig_cacheline_size: 0;
+#endif
+	cp->dev = dev;
+	cp->msg_enable = (cassini_debug < 0) ? CAS_DEF_MSG_ENABLE : 
+	  cassini_debug;
+
+	cp->link_transition = LINK_TRANSITION_UNKNOWN;
+	cp->link_transition_jiffies_valid = 0;
+
+	spin_lock_init(&cp->lock);
+	spin_lock_init(&cp->rx_inuse_lock);
+	spin_lock_init(&cp->rx_spare_lock);
+	for (i = 0; i < N_TX_RINGS; i++) {
+		spin_lock_init(&cp->stat_lock[i]);
+		spin_lock_init(&cp->tx_lock[i]);
+	}
+	spin_lock_init(&cp->stat_lock[N_TX_RINGS]);
+	init_MUTEX(&cp->pm_sem);
+
+	init_timer(&cp->link_timer);
+	cp->link_timer.function = cas_link_timer;
+	cp->link_timer.data = (unsigned long) cp;
+
+#if 1
+	/* Just in case the implementation of atomic operations
+	 * change so that an explicit initialization is necessary.
+	 */
+	atomic_set(&cp->reset_task_pending, 0);
+	atomic_set(&cp->reset_task_pending_all, 0);
+	atomic_set(&cp->reset_task_pending_spare, 0);
+	atomic_set(&cp->reset_task_pending_mtu, 0);
+#endif
+	INIT_WORK(&cp->reset_task, cas_reset_task, cp);
+
+	/* Default link parameters */
+	if (link_mode >= 0 && link_mode <= 6)
+		cp->link_cntl = link_modes[link_mode];
+	else
+		cp->link_cntl = BMCR_ANENABLE;
+	cp->lstate = link_down;
+	cp->link_transition = LINK_TRANSITION_LINK_DOWN;
+	netif_carrier_off(cp->dev);
+	cp->timer_ticks = 0;
+
+	/* give us access to cassini registers */
+	cp->regs = ioremap(casreg_base, casreg_len);
+	if (cp->regs == 0UL) {
+		printk(KERN_ERR PFX "Cannot map device registers, "
+		       "aborting.\n");
+		goto err_out_free_res;
+	}
+	cp->casreg_len = casreg_len;
+
+	pci_save_state(pdev);
+	cas_check_pci_invariants(cp);
+	cas_hard_reset(cp);
+	cas_reset(cp, 0);
+	if (cas_check_invariants(cp))
+		goto err_out_iounmap;
+
+	cp->init_block = (struct cas_init_block *)
+		pci_alloc_consistent(pdev, sizeof(struct cas_init_block),
+				     &cp->block_dvma);
+	if (!cp->init_block) {
+		printk(KERN_ERR PFX "Cannot allocate init block, "
+		       "aborting.\n");
+		goto err_out_iounmap;
+	}
+
+	for (i = 0; i < N_TX_RINGS; i++) 
+		cp->init_txds[i] = cp->init_block->txds[i];
+
+	for (i = 0; i < N_RX_DESC_RINGS; i++) 
+		cp->init_rxds[i] = cp->init_block->rxds[i];
+
+	for (i = 0; i < N_RX_COMP_RINGS; i++) 
+		cp->init_rxcs[i] = cp->init_block->rxcs[i];
+
+	for (i = 0; i < N_RX_FLOWS; i++)
+		skb_queue_head_init(&cp->rx_flows[i]);
+
+	dev->open = cas_open;
+	dev->stop = cas_close;
+	dev->hard_start_xmit = cas_start_xmit;
+	dev->get_stats = cas_get_stats;
+	dev->set_multicast_list = cas_set_multicast;
+	dev->do_ioctl = cas_ioctl;
+	dev->tx_timeout = cas_tx_timeout;
+	dev->watchdog_timeo = CAS_TX_TIMEOUT;
+	dev->change_mtu = cas_change_mtu;
+#ifdef USE_NAPI
+	dev->poll = cas_poll;
+	dev->weight = 64;
+#endif
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	dev->poll_controller = cas_netpoll;
+#endif
+	dev->irq = pdev->irq;
+	dev->dma = 0;
+
+	/* Cassini features. */
+	if ((cp->cas_flags & CAS_FLAG_NO_HW_CSUM) == 0)
+		dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
+
+	if (pci_using_dac)
+		dev->features |= NETIF_F_HIGHDMA;
+
+	if (register_netdev(dev)) {
+		printk(KERN_ERR PFX "Cannot register net device, "
+		       "aborting.\n");
+		goto err_out_free_consistent;
+	}
+
+	i = readl(cp->regs + REG_BIM_CFG);
+	printk(KERN_INFO "%s: Sun Cassini%s (%sbit/%sMHz PCI/%s) "
+	       "Ethernet[%d] ",  dev->name, 
+	       (cp->cas_flags & CAS_FLAG_REG_PLUS) ? "+" : "", 
+	       (i & BIM_CFG_32BIT) ? "32" : "64",
+	       (i & BIM_CFG_66MHZ) ? "66" : "33",
+	       (cp->phy_type == CAS_PHY_SERDES) ? "Fi" : "Cu", pdev->irq); 
+
+	for (i = 0; i < 6; i++)
+		printk("%2.2x%c", dev->dev_addr[i],
+		       i == 5 ? ' ' : ':');
+	printk("\n");
+
+	pci_set_drvdata(pdev, dev);
+	cp->hw_running = 1;
+	cas_entropy_reset(cp);
+	cas_phy_init(cp);
+	cas_begin_auto_negotiation(cp, NULL);
+	return 0;
+
+err_out_free_consistent:
+	pci_free_consistent(pdev, sizeof(struct cas_init_block),
+			    cp->init_block, cp->block_dvma);
+
+err_out_iounmap:
+	down(&cp->pm_sem);
+	if (cp->hw_running)
+		cas_shutdown(cp);
+	up(&cp->pm_sem);
+
+	iounmap((void *) cp->regs);
+
+
+err_out_free_res:
+	pci_release_regions(pdev);
+
+err_write_cacheline:
+	/* Try to restore it in case the error occured after we
+	 * set it. 
+	 */
+	pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, orig_cacheline_size);
+
+err_out_free_netdev:
+	free_netdev(dev);
+
+err_out_disable_pdev:
+	pci_disable_device(pdev);
+	pci_set_drvdata(pdev, NULL);
+	return -ENODEV;
+}
+
+static void __devexit cas_remove_one(struct pci_dev *pdev)
+{
+	struct net_device *dev = pci_get_drvdata(pdev);
+	struct cas *cp;
+	if (!dev)
+		return;
+
+	cp = netdev_priv(dev);
+	unregister_netdev(dev);
+
+	down(&cp->pm_sem);
+	flush_scheduled_work();
+	if (cp->hw_running)
+		cas_shutdown(cp);
+	up(&cp->pm_sem);
+
+#if 1
+	if (cp->orig_cacheline_size) {
+		/* Restore the cache line size if we had modified
+		 * it.
+		 */
+		pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, 
+				      cp->orig_cacheline_size);
+	}
+#endif
+	pci_free_consistent(pdev, sizeof(struct cas_init_block),
+			    cp->init_block, cp->block_dvma);
+	iounmap((void *) cp->regs);
+	free_netdev(dev);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+	pci_set_drvdata(pdev, NULL);
+}
+
+#ifdef CONFIG_PM
+static int cas_suspend(struct pci_dev *pdev, u32 state)
+{
+	struct net_device *dev = pci_get_drvdata(pdev);
+	struct cas *cp = netdev_priv(dev);
+	unsigned long flags;
+
+	/* We hold the PM semaphore during entire driver
+	 * sleep time
+	 */
+	down(&cp->pm_sem);
+	
+	/* If the driver is opened, we stop the DMA */
+	if (cp->opened) {
+		netif_device_detach(dev);
+
+		cas_lock_all_save(cp, flags);
+
+		/* We can set the second arg of cas_reset to 0
+		 * because on resume, we'll call cas_init_hw with
+		 * its second arg set so that autonegotiation is
+		 * restarted.
+		 */
+		cas_reset(cp, 0);
+		cas_clean_rings(cp);
+		cas_unlock_all_restore(cp, flags);
+	}
+
+	if (cp->hw_running)
+		cas_shutdown(cp);
+
+	return 0;
+}
+
+static int cas_resume(struct pci_dev *pdev)
+{
+	struct net_device *dev = pci_get_drvdata(pdev);
+	struct cas *cp = netdev_priv(dev);
+
+	printk(KERN_INFO "%s: resuming\n", dev->name);
+
+	cas_hard_reset(cp);
+	if (cp->opened) {
+		unsigned long flags;
+		cas_lock_all_save(cp, flags);
+		cas_reset(cp, 0);
+		cp->hw_running = 1;
+		cas_clean_rings(cp);
+		cas_init_hw(cp, 1);
+		cas_unlock_all_restore(cp, flags);
+
+		netif_device_attach(dev);
+	}
+	up(&cp->pm_sem);
+	return 0;
+}
+#endif /* CONFIG_PM */
+
+static struct pci_driver cas_driver = {
+	.name		= DRV_MODULE_NAME,
+	.id_table	= cas_pci_tbl,
+	.probe		= cas_init_one,
+	.remove		= __devexit_p(cas_remove_one),
+#ifdef CONFIG_PM
+	.suspend	= cas_suspend,
+	.resume		= cas_resume
+#endif
+};
+
+static int __init cas_init(void)
+{
+	if (linkdown_timeout > 0)
+		link_transition_timeout = linkdown_timeout * HZ;
+	else
+		link_transition_timeout = 0;
+
+	return pci_module_init(&cas_driver);
+}
+
+static void __exit cas_cleanup(void)
+{
+	pci_unregister_driver(&cas_driver);
+}
+
+module_init(cas_init);
+module_exit(cas_cleanup);
diff --git a/drivers/net/cassini.h b/drivers/net/cassini.h
new file mode 100644
index 000000000000..88063ef16cf6
--- /dev/null
+++ b/drivers/net/cassini.h
@@ -0,0 +1,4425 @@
+/* $Id: cassini.h,v 1.16 2004/08/17 21:15:16 zaumen Exp $
+ * cassini.h: Definitions for Sun Microsystems Cassini(+) ethernet driver.
+ *
+ * Copyright (C) 2004 Sun Microsystems Inc.
+ * Copyright (c) 2003 Adrian Sun (asun@darksunrising.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ *
+ * vendor id: 0x108E (Sun Microsystems, Inc.)
+ * device id: 0xabba (Cassini)
+ * revision ids: 0x01 = Cassini 
+ *               0x02 = Cassini rev 2
+ *               0x10 = Cassini+
+ *               0x11 = Cassini+ 0.2u
+ *
+ * vendor id: 0x100b (National Semiconductor)
+ * device id: 0x0035 (DP83065/Saturn)
+ * revision ids: 0x30 = Saturn B2
+ *
+ * rings are all offset from 0.
+ *
+ * there are two clock domains:
+ * PCI:  33/66MHz clock
+ * chip: 125MHz clock
+ */
+
+#ifndef _CASSINI_H
+#define _CASSINI_H
+
+/* cassini register map: 2M memory mapped in 32-bit memory space accessible as
+ * 32-bit words. there is no i/o port access. REG_ addresses are
+ * shared between cassini and cassini+. REG_PLUS_ addresses only
+ * appear in cassini+. REG_MINUS_ addresses only appear in cassini.
+ */
+#define CAS_ID_REV2          0x02
+#define CAS_ID_REVPLUS       0x10 
+#define CAS_ID_REVPLUS02u    0x11 
+#define CAS_ID_REVSATURNB2   0x30
+
+/** global resources **/
+
+/* this register sets the weights for the weighted round robin arbiter. e.g.,
+ * if rx weight == 1 and tx weight == 0, rx == 2x tx transfer credit
+ * for its next turn to access the pci bus. 
+ * map: 0x0 = x1, 0x1 = x2, 0x2 = x4, 0x3 = x8 
+ * DEFAULT: 0x0, SIZE: 5 bits
+ */
+#define  REG_CAWR	               0x0004  /* core arbitration weight */
+#define    CAWR_RX_DMA_WEIGHT_SHIFT    0
+#define    CAWR_RX_DMA_WEIGHT_MASK     0x03    /* [0:1] */
+#define    CAWR_TX_DMA_WEIGHT_SHIFT    2
+#define    CAWR_TX_DMA_WEIGHT_MASK     0x0C    /* [3:2] */
+#define    CAWR_RR_DIS                 0x10    /* [4] */
+
+/* if enabled, BIM can send bursts across PCI bus > cacheline size. burst
+ * sizes determined by length of packet or descriptor transfer and the 
+ * max length allowed by the target. 
+ * DEFAULT: 0x0, SIZE: 1 bit
+ */
+#define  REG_INF_BURST                 0x0008  /* infinite burst enable reg */
+#define    INF_BURST_EN                0x1     /* enable */
+
+/* top level interrupts [0-9] are auto-cleared to 0 when the status
+ * register is read. second level interrupts [13 - 18] are cleared at
+ * the source. tx completion register 3 is replicated in [19 - 31] 
+ * DEFAULT: 0x00000000, SIZE: 29 bits
+ */
+#define  REG_INTR_STATUS               0x000C  /* interrupt status register */
+#define    INTR_TX_INTME               0x00000001  /* frame w/ INT ME desc bit set 
+						      xferred from host queue to
+						      TX FIFO */
+#define    INTR_TX_ALL                 0x00000002  /* all xmit frames xferred into
+						      TX FIFO. i.e.,
+						      TX Kick == TX complete. if 
+						      PACED_MODE set, then TX FIFO
+						      also empty */
+#define    INTR_TX_DONE                0x00000004  /* any frame xferred into tx 
+						      FIFO */
+#define    INTR_TX_TAG_ERROR           0x00000008  /* TX FIFO tag framing 
+						      corrupted. FATAL ERROR */
+#define    INTR_RX_DONE                0x00000010  /* at least 1 frame xferred
+						      from RX FIFO to host mem.
+						      RX completion reg updated.
+						      may be delayed by recv
+						      intr blanking. */
+#define    INTR_RX_BUF_UNAVAIL         0x00000020  /* no more receive buffers.
+						      RX Kick == RX complete */
+#define    INTR_RX_TAG_ERROR           0x00000040  /* RX FIFO tag framing 
+						      corrupted. FATAL ERROR */
+#define    INTR_RX_COMP_FULL           0x00000080  /* no more room in completion
+						      ring to post descriptors.
+						      RX complete head incr to
+						      almost reach RX complete
+						      tail */
+#define    INTR_RX_BUF_AE              0x00000100  /* less than the 
+						      programmable threshold #
+						      of free descr avail for
+						      hw use */
+#define    INTR_RX_COMP_AF             0x00000200  /* less than the 
+						      programmable threshold #
+						      of descr spaces for hw
+						      use in completion descr
+						      ring */
+#define    INTR_RX_LEN_MISMATCH        0x00000400  /* len field from MAC !=
+						      len of non-reassembly pkt
+						      from fifo during DMA or
+						      header parser provides TCP
+						      header and payload size >
+						      MAC packet size. 
+						      FATAL ERROR */
+#define    INTR_SUMMARY                0x00001000  /* summary interrupt bit. this
+						      bit will be set if an interrupt 
+						      generated on the pci bus. useful
+						      when driver is polling for 
+						      interrupts */
+#define    INTR_PCS_STATUS             0x00002000  /* PCS interrupt status register */
+#define    INTR_TX_MAC_STATUS          0x00004000  /* TX MAC status register has at 
+						      least 1 unmasked interrupt set */
+#define    INTR_RX_MAC_STATUS          0x00008000  /* RX MAC status register has at 
+						      least 1 unmasked interrupt set */
+#define    INTR_MAC_CTRL_STATUS        0x00010000  /* MAC control status register has
+						      at least 1 unmasked interrupt
+						      set */
+#define    INTR_MIF_STATUS             0x00020000  /* MIF status register has at least
+						      1 unmasked interrupt set */
+#define    INTR_PCI_ERROR_STATUS       0x00040000  /* PCI error status register in the
+						      BIF has at least 1 unmasked 
+						      interrupt set */
+#define    INTR_TX_COMP_3_MASK         0xFFF80000  /* mask for TX completion 
+						      3 reg data */
+#define    INTR_TX_COMP_3_SHIFT        19
+#define    INTR_ERROR_MASK (INTR_MIF_STATUS | INTR_PCI_ERROR_STATUS | \
+                            INTR_PCS_STATUS | INTR_RX_LEN_MISMATCH | \
+                            INTR_TX_MAC_STATUS | INTR_RX_MAC_STATUS | \
+                            INTR_TX_TAG_ERROR | INTR_RX_TAG_ERROR | \
+                            INTR_MAC_CTRL_STATUS)
+
+/* determines which status events will cause an interrupt. layout same
+ * as REG_INTR_STATUS. 
+ * DEFAULT: 0xFFFFFFFF, SIZE: 16 bits
+ */
+#define  REG_INTR_MASK                 0x0010  /* Interrupt mask */
+
+/* top level interrupt bits that are cleared during read of REG_INTR_STATUS_ALIAS.
+ * useful when driver is polling for interrupts. layout same as REG_INTR_MASK.
+ * DEFAULT: 0x00000000, SIZE: 12 bits
+ */
+#define  REG_ALIAS_CLEAR               0x0014  /* alias clear mask 
+						  (used w/ status alias) */
+/* same as REG_INTR_STATUS except that only bits cleared are those selected by
+ * REG_ALIAS_CLEAR 
+ * DEFAULT: 0x00000000, SIZE: 29 bits
+ */
+#define  REG_INTR_STATUS_ALIAS         0x001C  /* interrupt status alias 
+						  (selective clear) */
+
+/* DEFAULT: 0x0, SIZE: 3 bits */
+#define  REG_PCI_ERR_STATUS            0x1000  /* PCI error status */
+#define    PCI_ERR_BADACK              0x01    /* reserved in Cassini+. 
+						  set if no ACK64# during ABS64 cycle
+						  in Cassini. */
+#define    PCI_ERR_DTRTO               0x02    /* delayed xaction timeout. set if
+						  no read retry after 2^15 clocks */
+#define    PCI_ERR_OTHER               0x04    /* other PCI errors */
+#define    PCI_ERR_BIM_DMA_WRITE       0x08    /* BIM received 0 count DMA write req.
+						  unused in Cassini. */
+#define    PCI_ERR_BIM_DMA_READ        0x10    /* BIM received 0 count DMA read req.
+						  unused in Cassini. */
+#define    PCI_ERR_BIM_DMA_TIMEOUT     0x20    /* BIM received 255 retries during 
+						  DMA. unused in cassini. */
+
+/* mask for PCI status events that will set PCI_ERR_STATUS. if cleared, event
+ * causes an interrupt to be generated. 
+ * DEFAULT: 0x7, SIZE: 3 bits
+ */
+#define  REG_PCI_ERR_STATUS_MASK       0x1004  /* PCI Error status mask */
+
+/* used to configure PCI related parameters that are not in PCI config space. 
+ * DEFAULT: 0bxx000, SIZE: 5 bits
+ */
+#define  REG_BIM_CFG                0x1008  /* BIM Configuration */
+#define    BIM_CFG_RESERVED0        0x001   /* reserved */
+#define    BIM_CFG_RESERVED1        0x002   /* reserved */
+#define    BIM_CFG_64BIT_DISABLE    0x004   /* disable 64-bit mode */
+#define    BIM_CFG_66MHZ            0x008   /* (ro) 1 = 66MHz, 0 = < 66MHz */
+#define    BIM_CFG_32BIT            0x010   /* (ro) 1 = 32-bit slot, 0 = 64-bit */
+#define    BIM_CFG_DPAR_INTR_ENABLE 0x020   /* detected parity err enable */
+#define    BIM_CFG_RMA_INTR_ENABLE  0x040   /* master abort intr enable */
+#define    BIM_CFG_RTA_INTR_ENABLE  0x080   /* target abort intr enable */
+#define    BIM_CFG_RESERVED2        0x100   /* reserved */
+#define    BIM_CFG_BIM_DISABLE      0x200   /* stop BIM DMA. use before global 
+					       reset. reserved in Cassini. */
+#define    BIM_CFG_BIM_STATUS       0x400   /* (ro) 1 = BIM DMA suspended.
+						  reserved in Cassini. */
+#define    BIM_CFG_PERROR_BLOCK     0x800  /* block PERR# to pci bus. def: 0.
+						 reserved in Cassini. */
+
+/* DEFAULT: 0x00000000, SIZE: 32 bits */
+#define  REG_BIM_DIAG                  0x100C  /* BIM Diagnostic */
+#define    BIM_DIAG_MSTR_SM_MASK       0x3FFFFF00 /* PCI master controller state
+						     machine bits [21:0] */
+#define    BIM_DIAG_BRST_SM_MASK       0x7F    /* PCI burst controller state 
+						  machine bits [6:0] */
+
+/* writing to SW_RESET_TX and SW_RESET_RX will issue a global
+ * reset. poll until TX and RX read back as 0's for completion.
+ */
+#define  REG_SW_RESET                  0x1010  /* Software reset */
+#define    SW_RESET_TX                 0x00000001  /* reset TX DMA engine. poll until
+						      cleared to 0.  */
+#define    SW_RESET_RX                 0x00000002  /* reset RX DMA engine. poll until
+						      cleared to 0. */
+#define    SW_RESET_RSTOUT             0x00000004  /* force RSTOUT# pin active (low).
+						      resets PHY and anything else 
+						      connected to RSTOUT#. RSTOUT#
+						      is also activated by local PCI
+						      reset when hot-swap is being 
+						      done. */
+#define    SW_RESET_BLOCK_PCS_SLINK    0x00000008  /* if a global reset is done with 
+						      this bit set, PCS and SLINK 
+						      modules won't be reset. 
+						      i.e., link won't drop. */
+#define    SW_RESET_BREQ_SM_MASK       0x00007F00  /* breq state machine [6:0] */
+#define    SW_RESET_PCIARB_SM_MASK     0x00070000  /* pci arbitration state bits:
+						      0b000: ARB_IDLE1
+						      0b001: ARB_IDLE2
+						      0b010: ARB_WB_ACK
+						      0b011: ARB_WB_WAT
+						      0b100: ARB_RB_ACK
+						      0b101: ARB_RB_WAT
+						      0b110: ARB_RB_END
+						      0b111: ARB_WB_END */
+#define    SW_RESET_RDPCI_SM_MASK      0x00300000  /* read pci state bits:
+						      0b00: RD_PCI_WAT
+						      0b01: RD_PCI_RDY
+						      0b11: RD_PCI_ACK */
+#define    SW_RESET_RDARB_SM_MASK      0x00C00000  /* read arbitration state bits:
+						      0b00: AD_IDL_RX
+						      0b01: AD_ACK_RX
+						      0b10: AD_ACK_TX
+						      0b11: AD_IDL_TX */
+#define    SW_RESET_WRPCI_SM_MASK      0x06000000  /* write pci state bits 
+						      0b00: WR_PCI_WAT
+						      0b01: WR_PCI_RDY
+						      0b11: WR_PCI_ACK */
+#define    SW_RESET_WRARB_SM_MASK      0x38000000  /* write arbitration state bits:
+						      0b000: ARB_IDLE1
+						      0b001: ARB_IDLE2
+						      0b010: ARB_TX_ACK
+						      0b011: ARB_TX_WAT
+						      0b100: ARB_RX_ACK
+						      0b110: ARB_RX_WAT */
+
+/* Cassini only. 64-bit register used to check PCI datapath. when read,
+ * value written has both lower and upper 32-bit halves rotated to the right
+ * one bit position. e.g., FFFFFFFF FFFFFFFF -> 7FFFFFFF 7FFFFFFF
+ */
+#define  REG_MINUS_BIM_DATAPATH_TEST   0x1018  /* Cassini: BIM datapath test 
+						  Cassini+: reserved */
+
+/* output enables are provided for each device's chip select and for the rest
+ * of the outputs from cassini to its local bus devices. two sw programmable
+ * bits are connected to general purpus control/status bits.
+ * DEFAULT: 0x7
+ */
+#define  REG_BIM_LOCAL_DEV_EN          0x1020  /* BIM local device 
+						  output EN. default: 0x7 */
+#define    BIM_LOCAL_DEV_PAD           0x01    /* address bus, RW signal, and
+						  OE signal output enable on the
+						  local bus interface. these
+						  are shared between both local 
+						  bus devices. tristate when 0. */
+#define    BIM_LOCAL_DEV_PROM          0x02    /* PROM chip select */
+#define    BIM_LOCAL_DEV_EXT           0x04    /* secondary local bus device chip
+						  select output enable */
+#define    BIM_LOCAL_DEV_SOFT_0        0x08    /* sw programmable ctrl bit 0 */
+#define    BIM_LOCAL_DEV_SOFT_1        0x10    /* sw programmable ctrl bit 1 */
+#define    BIM_LOCAL_DEV_HW_RESET      0x20    /* internal hw reset. Cassini+ only. */
+
+/* access 24 entry BIM read and write buffers. put address in REG_BIM_BUFFER_ADDR
+ * and read/write from/to it REG_BIM_BUFFER_DATA_LOW and _DATA_HI. 
+ * _DATA_HI should be the last access of the sequence. 
+ * DEFAULT: undefined
+ */
+#define  REG_BIM_BUFFER_ADDR           0x1024  /* BIM buffer address. for
+						  purposes. */
+#define    BIM_BUFFER_ADDR_MASK        0x3F    /* index (0 - 23) of buffer  */
+#define    BIM_BUFFER_WR_SELECT        0x40    /* write buffer access = 1
+						  read buffer access = 0 */
+/* DEFAULT: undefined */
+#define  REG_BIM_BUFFER_DATA_LOW       0x1028  /* BIM buffer data low */
+#define  REG_BIM_BUFFER_DATA_HI        0x102C  /* BIM buffer data high */
+
+/* set BIM_RAM_BIST_START to start built-in self test for BIM read buffer. 
+ * bit auto-clears when done with status read from _SUMMARY and _PASS bits.
+ */
+#define  REG_BIM_RAM_BIST              0x102C  /* BIM RAM (read buffer) BIST 
+						  control/status */
+#define    BIM_RAM_BIST_RD_START       0x01    /* start BIST for BIM read buffer */
+#define    BIM_RAM_BIST_WR_START       0x02    /* start BIST for BIM write buffer.
+						  Cassini only. reserved in
+						  Cassini+. */
+#define    BIM_RAM_BIST_RD_PASS        0x04    /* summary BIST pass status for read
+						  buffer. */
+#define    BIM_RAM_BIST_WR_PASS        0x08    /* summary BIST pass status for write
+						  buffer. Cassini only. reserved
+						  in Cassini+. */
+#define    BIM_RAM_BIST_RD_LOW_PASS    0x10    /* read low bank passes BIST */
+#define    BIM_RAM_BIST_RD_HI_PASS     0x20    /* read high bank passes BIST */
+#define    BIM_RAM_BIST_WR_LOW_PASS    0x40    /* write low bank passes BIST.
+						  Cassini only. reserved in 
+						  Cassini+. */
+#define    BIM_RAM_BIST_WR_HI_PASS     0x80    /* write high bank passes BIST.
+						  Cassini only. reserved in
+						  Cassini+. */
+
+/* ASUN: i'm not sure what this does as it's not in the spec.
+ * DEFAULT: 0xFC
+ */
+#define  REG_BIM_DIAG_MUX              0x1030  /* BIM diagnostic probe mux
+						  select register */
+
+/* enable probe monitoring mode and select data appearing on the P_A* bus. bit 
+ * values for _SEL_HI_MASK and _SEL_LOW_MASK:
+ * 0x0: internal probe[7:0] (pci arb state, wtc empty w, wtc full w, wtc empty w,
+ *                           wtc empty r, post pci)
+ * 0x1: internal probe[15:8] (pci wbuf comp, pci wpkt comp, pci rbuf comp,
+ *                            pci rpkt comp, txdma wr req, txdma wr ack,
+ *			      txdma wr rdy, txdma wr xfr done)
+ * 0x2: internal probe[23:16] (txdma rd req, txdma rd ack, txdma rd rdy, rxdma rd,
+ *                             rd arb state, rd pci state)
+ * 0x3: internal probe[31:24] (rxdma req, rxdma ack, rxdma rdy, wrarb state,
+ *                             wrpci state)
+ * 0x4: pci io probe[7:0]     0x5: pci io probe[15:8]
+ * 0x6: pci io probe[23:16]   0x7: pci io probe[31:24]
+ * 0x8: pci io probe[39:32]   0x9: pci io probe[47:40]
+ * 0xa: pci io probe[55:48]   0xb: pci io probe[63:56]
+ * the following are not available in Cassini:
+ * 0xc: rx probe[7:0]         0xd: tx probe[7:0]
+ * 0xe: hp probe[7:0] 	      0xf: mac probe[7:0]
+ */
+#define  REG_PLUS_PROBE_MUX_SELECT     0x1034 /* Cassini+: PROBE MUX SELECT */
+#define    PROBE_MUX_EN                0x80000000 /* allow probe signals to be 
+						     driven on local bus P_A[15:0]
+						     for debugging */
+#define    PROBE_MUX_SUB_MUX_MASK      0x0000FF00 /* select sub module probe signals:
+						     0x03 = mac[1:0]
+						     0x0C = rx[1:0]
+						     0x30 = tx[1:0]
+						     0xC0 = hp[1:0] */
+#define    PROBE_MUX_SEL_HI_MASK       0x000000F0 /* select which module to appear
+						     on P_A[15:8]. see above for 
+						     values. */
+#define    PROBE_MUX_SEL_LOW_MASK      0x0000000F /* select which module to appear
+						     on P_A[7:0]. see above for 
+						     values. */
+
+/* values mean the same thing as REG_INTR_MASK excep that it's for INTB. 
+ DEFAULT: 0x1F */
+#define  REG_PLUS_INTR_MASK_1          0x1038 /* Cassini+: interrupt mask
+						 register 2 for INTB */
+#define  REG_PLUS_INTRN_MASK(x)       (REG_PLUS_INTR_MASK_1 + ((x) - 1)*16)
+/* bits correspond to both _MASK and _STATUS registers. _ALT corresponds to 
+ * all of the alternate (2-4) INTR registers while _1 corresponds to only 
+ * _MASK_1 and _STATUS_1 registers. 
+ * DEFAULT: 0x7 for MASK registers, 0x0 for ALIAS_CLEAR registers
+ */
+#define    INTR_RX_DONE_ALT              0x01  
+#define    INTR_RX_COMP_FULL_ALT         0x02
+#define    INTR_RX_COMP_AF_ALT           0x04
+#define    INTR_RX_BUF_UNAVAIL_1         0x08
+#define    INTR_RX_BUF_AE_1              0x10 /* almost empty */
+#define    INTRN_MASK_RX_EN              0x80     
+#define    INTRN_MASK_CLEAR_ALL          (INTR_RX_DONE_ALT | \
+                                          INTR_RX_COMP_FULL_ALT | \
+                                          INTR_RX_COMP_AF_ALT | \
+                                          INTR_RX_BUF_UNAVAIL_1 | \
+                                          INTR_RX_BUF_AE_1)
+#define  REG_PLUS_INTR_STATUS_1        0x103C /* Cassini+: interrupt status
+						 register 2 for INTB. default: 0x1F */
+#define  REG_PLUS_INTRN_STATUS(x)       (REG_PLUS_INTR_STATUS_1 + ((x) - 1)*16)
+#define    INTR_STATUS_ALT_INTX_EN     0x80   /* generate INTX when one of the
+						 flags are set. enables desc ring. */
+
+#define  REG_PLUS_ALIAS_CLEAR_1        0x1040 /* Cassini+: alias clear mask
+						 register 2 for INTB */
+#define  REG_PLUS_ALIASN_CLEAR(x)      (REG_PLUS_ALIAS_CLEAR_1 + ((x) - 1)*16)
+
+#define  REG_PLUS_INTR_STATUS_ALIAS_1  0x1044 /* Cassini+: interrupt status 
+						 register alias 2 for INTB */
+#define  REG_PLUS_INTRN_STATUS_ALIAS(x) (REG_PLUS_INTR_STATUS_ALIAS_1 + ((x) - 1)*16)
+
+#define REG_SATURN_PCFG               0x106c /* pin configuration register for
+						integrated macphy */
+
+#define   SATURN_PCFG_TLA             0x00000001 /* 1 = phy actled */
+#define   SATURN_PCFG_FLA             0x00000002 /* 1 = phy link10led */
+#define   SATURN_PCFG_CLA             0x00000004 /* 1 = phy link100led */
+#define   SATURN_PCFG_LLA             0x00000008 /* 1 = phy link1000led */
+#define   SATURN_PCFG_RLA             0x00000010 /* 1 = phy duplexled */
+#define   SATURN_PCFG_PDS             0x00000020 /* phy debug mode. 
+						    0 = normal */
+#define   SATURN_PCFG_MTP             0x00000080 /* test point select */ 
+#define   SATURN_PCFG_GMO             0x00000100 /* GMII observe. 1 = 
+						    GMII on SERDES pins for
+						    monitoring. */
+#define   SATURN_PCFG_FSI             0x00000200 /* 1 = freeze serdes/gmii. all
+						    pins configed as outputs.
+						    for power saving when using
+						    internal phy. */
+#define   SATURN_PCFG_LAD             0x00000800 /* 0 = mac core led ctrl 
+						    polarity from strapping 
+						    value.
+						    1 = mac core led ctrl
+						    polarity active low. */
+
+
+/** transmit dma registers **/
+#define MAX_TX_RINGS_SHIFT            2
+#define MAX_TX_RINGS                  (1 << MAX_TX_RINGS_SHIFT)
+#define MAX_TX_RINGS_MASK             (MAX_TX_RINGS - 1)
+
+/* TX configuration. 
+ * descr ring sizes size = 32 * (1 << n), n < 9. e.g., 0x8 = 8k. default: 0x8 
+ * DEFAULT: 0x3F000001
+ */
+#define  REG_TX_CFG                    0x2004  /* TX config */
+#define    TX_CFG_DMA_EN               0x00000001  /* enable TX DMA. if cleared, DMA
+						      will stop after xfer of current
+						      buffer has been completed. */
+#define    TX_CFG_FIFO_PIO_SEL         0x00000002  /* TX DMA FIFO can be 
+						      accessed w/ FIFO addr 
+						      and data registers. 
+						      TX DMA should be 
+						      disabled. */
+#define    TX_CFG_DESC_RING0_MASK      0x0000003C  /* # desc entries in
+						      ring 1. */
+#define    TX_CFG_DESC_RING0_SHIFT     2
+#define    TX_CFG_DESC_RINGN_MASK(a)   (TX_CFG_DESC_RING0_MASK << (a)*4)
+#define    TX_CFG_DESC_RINGN_SHIFT(a)  (TX_CFG_DESC_RING0_SHIFT + (a)*4)
+#define    TX_CFG_PACED_MODE           0x00100000  /* TX_ALL only set after 
+						      TX FIFO becomes empty. 
+						      if 0, TX_ALL set
+						      if descr queue empty. */
+#define    TX_CFG_DMA_RDPIPE_DIS       0x01000000  /* always set to 1 */
+#define    TX_CFG_COMPWB_Q1            0x02000000  /* completion writeback happens at
+						      the end of every packet kicked
+						      through Q1. */
+#define    TX_CFG_COMPWB_Q2            0x04000000  /* completion writeback happens at
+						      the end of every packet kicked
+						      through Q2. */
+#define    TX_CFG_COMPWB_Q3            0x08000000  /* completion writeback happens at
+						      the end of every packet kicked
+						      through Q3 */
+#define    TX_CFG_COMPWB_Q4            0x10000000  /* completion writeback happens at
+						      the end of every packet kicked
+						      through Q4 */
+#define    TX_CFG_INTR_COMPWB_DIS      0x20000000  /* disable pre-interrupt completion
+						      writeback */
+#define    TX_CFG_CTX_SEL_MASK         0xC0000000  /* selects tx test port 
+						      connection
+						      0b00: tx mac req, 
+						            tx mac retry req,
+							    tx ack and tx tag.
+						      0b01: txdma rd req, 
+						            txdma rd ack,
+							    txdma rd rdy,
+							    txdma rd type0
+						      0b11: txdma wr req, 
+						            txdma wr ack,
+							    txdma wr rdy,
+							    txdma wr xfr done. */
+#define    TX_CFG_CTX_SEL_SHIFT        30
+						      
+/* 11-bit counters that point to next location in FIFO to be loaded/retrieved.
+ * used for diagnostics only.
+ */
+#define  REG_TX_FIFO_WRITE_PTR         0x2014  /* TX FIFO write pointer */
+#define  REG_TX_FIFO_SHADOW_WRITE_PTR  0x2018  /* TX FIFO shadow write 
+						  pointer. temp hold reg.
+					          diagnostics only. */
+#define  REG_TX_FIFO_READ_PTR          0x201C  /* TX FIFO read pointer */
+#define  REG_TX_FIFO_SHADOW_READ_PTR   0x2020  /* TX FIFO shadow read
+						  pointer */
+
+/* (ro) 11-bit up/down counter w/ # of frames currently in TX FIFO */
+#define  REG_TX_FIFO_PKT_CNT           0x2024  /* TX FIFO packet counter */
+
+/* current state of all state machines in TX */
+#define  REG_TX_SM_1                   0x2028  /* TX state machine reg #1 */
+#define    TX_SM_1_CHAIN_MASK          0x000003FF   /* chaining state machine */
+#define    TX_SM_1_CSUM_MASK           0x00000C00   /* checksum state machine */
+#define    TX_SM_1_FIFO_LOAD_MASK      0x0003F000   /* FIFO load state machine.
+						       = 0x01 when TX disabled. */
+#define    TX_SM_1_FIFO_UNLOAD_MASK    0x003C0000   /* FIFO unload state machine */
+#define    TX_SM_1_CACHE_MASK          0x03C00000   /* desc. prefetch cache controller
+						       state machine */
+#define    TX_SM_1_CBQ_ARB_MASK        0xF8000000   /* CBQ arbiter state machine */
+						         
+#define  REG_TX_SM_2                   0x202C  /* TX state machine reg #2 */
+#define    TX_SM_2_COMP_WB_MASK        0x07    /* completion writeback sm */
+#define	   TX_SM_2_SUB_LOAD_MASK       0x38    /* sub load state machine */
+#define	   TX_SM_2_KICK_MASK           0xC0    /* kick state machine */
+
+/* 64-bit pointer to the transmit data buffer. only the 50 LSB are incremented
+ * while the upper 23 bits are taken from the TX descriptor
+ */
+#define  REG_TX_DATA_PTR_LOW           0x2030  /* TX data pointer low */
+#define  REG_TX_DATA_PTR_HI            0x2034  /* TX data pointer high */
+
+/* 13 bit registers written by driver w/ descriptor value that follows 
+ * last valid xmit descriptor. kick # and complete # values are used by
+ * the xmit dma engine to control tx descr fetching. if > 1 valid 
+ * tx descr is available within the cache line being read, cassini will
+ * internally cache up to 4 of them. 0 on reset. _KICK = rw, _COMP = ro.
+ */
+#define  REG_TX_KICK0                  0x2038  /* TX kick reg #1 */
+#define  REG_TX_KICKN(x)               (REG_TX_KICK0 + (x)*4)
+#define  REG_TX_COMP0                  0x2048  /* TX completion reg #1 */
+#define  REG_TX_COMPN(x)               (REG_TX_COMP0 + (x)*4)
+
+/* values of TX_COMPLETE_1-4 are written. each completion register 
+ * is 2bytes in size and contiguous. 8B allocation w/ 8B alignment. 
+ * NOTE: completion reg values are only written back prior to TX_INTME and
+ * TX_ALL interrupts. at all other times, the most up-to-date index values 
+ * should be obtained from the REG_TX_COMPLETE_# registers. 
+ * here's the layout: 
+ * offset from base addr      completion # byte
+ *           0                TX_COMPLETE_1_MSB
+ *	     1                TX_COMPLETE_1_LSB
+ *           2                TX_COMPLETE_2_MSB
+ *	     3                TX_COMPLETE_2_LSB
+ *           4                TX_COMPLETE_3_MSB
+ *	     5                TX_COMPLETE_3_LSB
+ *           6                TX_COMPLETE_4_MSB
+ *	     7                TX_COMPLETE_4_LSB
+ */
+#define  TX_COMPWB_SIZE             8
+#define  REG_TX_COMPWB_DB_LOW       0x2058  /* TX completion write back
+					       base low */
+#define  REG_TX_COMPWB_DB_HI        0x205C  /* TX completion write back
+					       base high */
+#define    TX_COMPWB_MSB_MASK       0x00000000000000FFULL
+#define    TX_COMPWB_MSB_SHIFT      0
+#define    TX_COMPWB_LSB_MASK       0x000000000000FF00ULL
+#define    TX_COMPWB_LSB_SHIFT      8
+#define    TX_COMPWB_NEXT(x)        ((x) >> 16)
+						      
+/* 53 MSB used as base address. 11 LSB assumed to be 0. TX desc pointer must
+ * be 2KB-aligned. */
+#define  REG_TX_DB0_LOW         0x2060  /* TX descriptor base low #1 */
+#define  REG_TX_DB0_HI          0x2064  /* TX descriptor base hi #1 */
+#define  REG_TX_DBN_LOW(x)      (REG_TX_DB0_LOW + (x)*8)
+#define  REG_TX_DBN_HI(x)       (REG_TX_DB0_HI + (x)*8)
+
+/* 16-bit registers hold weights for the weighted round-robin of the
+ * four CBQ TX descr rings. weights correspond to # bytes xferred from
+ * host to TXFIFO in a round of WRR arbitration. can be set
+ * dynamically with new weights set upon completion of the current
+ * packet transfer from host memory to TXFIFO. a dummy write to any of
+ * these registers causes a queue1 pre-emption with all historical bw
+ * deficit data reset to 0 (useful when congestion requires a
+ * pre-emption/re-allocation of network bandwidth
+ */
+#define  REG_TX_MAXBURST_0             0x2080  /* TX MaxBurst #1 */
+#define  REG_TX_MAXBURST_1             0x2084  /* TX MaxBurst #2 */
+#define  REG_TX_MAXBURST_2             0x2088  /* TX MaxBurst #3 */
+#define  REG_TX_MAXBURST_3             0x208C  /* TX MaxBurst #4 */
+
+/* diagnostics access to any TX FIFO location. every access is 65
+ * bits.  _DATA_LOW = 32 LSB, _DATA_HI_T1/T0 = 32 MSB. _TAG = tag bit.
+ * writing _DATA_HI_T0 sets tag bit low, writing _DATA_HI_T1 sets tag
+ * bit high.  TX_FIFO_PIO_SEL must be set for TX FIFO PIO access. if
+ * TX FIFO data integrity is desired, TX DMA should be
+ * disabled. _DATA_HI_Tx should be the last access of the sequence.
+ */
+#define  REG_TX_FIFO_ADDR              0x2104  /* TX FIFO address */
+#define  REG_TX_FIFO_TAG               0x2108  /* TX FIFO tag */
+#define  REG_TX_FIFO_DATA_LOW          0x210C  /* TX FIFO data low */
+#define  REG_TX_FIFO_DATA_HI_T1        0x2110  /* TX FIFO data high t1 */
+#define  REG_TX_FIFO_DATA_HI_T0        0x2114  /* TX FIFO data high t0 */
+#define  REG_TX_FIFO_SIZE              0x2118  /* (ro) TX FIFO size = 0x090 = 9KB */
+
+/* 9-bit register controls BIST of TX FIFO. bit set indicates that the BIST 
+ * passed for the specified memory
+ */
+#define  REG_TX_RAMBIST                0x211C /* TX RAMBIST control/status */
+#define    TX_RAMBIST_STATE            0x01C0 /* progress state of RAMBIST 
+						 controller state machine */
+#define    TX_RAMBIST_RAM33A_PASS      0x0020 /* RAM33A passed */
+#define    TX_RAMBIST_RAM32A_PASS      0x0010 /* RAM32A passed */
+#define    TX_RAMBIST_RAM33B_PASS      0x0008 /* RAM33B passed */
+#define    TX_RAMBIST_RAM32B_PASS      0x0004 /* RAM32B passed */
+#define    TX_RAMBIST_SUMMARY          0x0002 /* all RAM passed */
+#define    TX_RAMBIST_START            0x0001 /* write 1 to start BIST. self
+						 clears on completion. */
+
+/** receive dma registers **/
+#define MAX_RX_DESC_RINGS              2
+#define MAX_RX_COMP_RINGS              4
+
+/* receive DMA channel configuration. default: 0x80910 
+ * free ring size       = (1 << n)*32  -> [32 - 8k]
+ * completion ring size = (1 << n)*128 -> [128 - 32k], n < 9 
+ * DEFAULT: 0x80910
+ */
+#define  REG_RX_CFG                     0x4000  /* RX config */
+#define    RX_CFG_DMA_EN                0x00000001 /* enable RX DMA. 0 stops
+							 channel as soon as current
+							 frame xfer has completed.
+							 driver should disable MAC 
+							 for 200ms before disabling 
+							 RX */
+#define    RX_CFG_DESC_RING_MASK        0x0000001E /* # desc entries in RX 
+							 free desc ring. 
+							 def: 0x8 = 8k */
+#define    RX_CFG_DESC_RING_SHIFT       1
+#define    RX_CFG_COMP_RING_MASK        0x000001E0 /* # desc entries in RX complete
+							 ring. def: 0x8 = 32k */
+#define    RX_CFG_COMP_RING_SHIFT       5
+#define    RX_CFG_BATCH_DIS             0x00000200 /* disable receive desc 
+						      batching. def: 0x0 =
+						      enabled */
+#define    RX_CFG_SWIVEL_MASK           0x00001C00 /* byte offset of the 1st 
+						      data byte of the packet 
+						      w/in 8 byte boundares.
+						      this swivels the data 
+						      DMA'ed to header 
+						      buffers, jumbo buffers
+						      when header split is not
+						      requested and MTU sized
+						      buffers. def: 0x2 */
+#define    RX_CFG_SWIVEL_SHIFT          10
+
+/* cassini+ only */
+#define    RX_CFG_DESC_RING1_MASK       0x000F0000 /* # of desc entries in
+							 RX free desc ring 2. 
+							 def: 0x8 = 8k */
+#define    RX_CFG_DESC_RING1_SHIFT      16
+
+
+/* the page size register allows cassini chips to do the following with 
+ * received data:
+ * [--------------------------------------------------------------] page
+ * [off][buf1][pad][off][buf2][pad][off][buf3][pad][off][buf4][pad]
+ * |--------------| = PAGE_SIZE_BUFFER_STRIDE
+ * page = PAGE_SIZE 
+ * offset = PAGE_SIZE_MTU_OFF
+ * for the above example, MTU_BUFFER_COUNT = 4.
+ * NOTE: as is apparent, you need to ensure that the following holds:
+ * MTU_BUFFER_COUNT <= PAGE_SIZE/PAGE_SIZE_BUFFER_STRIDE
+ * DEFAULT: 0x48002002 (8k pages)
+ */
+#define  REG_RX_PAGE_SIZE               0x4004  /* RX page size */
+#define    RX_PAGE_SIZE_MASK            0x00000003 /* size of pages pointed to
+						      by receive descriptors.
+						      if jumbo buffers are 
+						      supported the page size 
+						      should not be < 8k.
+						      0b00 = 2k, 0b01 = 4k
+						      0b10 = 8k, 0b11 = 16k
+						      DEFAULT: 8k */
+#define    RX_PAGE_SIZE_SHIFT           0
+#define    RX_PAGE_SIZE_MTU_COUNT_MASK  0x00007800 /* # of MTU buffers the hw
+						      packs into a page. 
+						      DEFAULT: 4 */
+#define    RX_PAGE_SIZE_MTU_COUNT_SHIFT 11
+#define    RX_PAGE_SIZE_MTU_STRIDE_MASK 0x18000000 /* # of bytes that separate
+							 each MTU buffer + 
+							 offset from each 
+							 other.
+							 0b00 = 1k, 0b01 = 2k
+							 0b10 = 4k, 0b11 = 8k
+							 DEFAULT: 0x1 */
+#define    RX_PAGE_SIZE_MTU_STRIDE_SHIFT 27
+#define    RX_PAGE_SIZE_MTU_OFF_MASK    0xC0000000 /* offset in each page that
+						      hw writes the MTU buffer
+						      into. 
+						      0b00 = 0,  
+						      0b01 = 64 bytes
+						      0b10 = 96, 0b11 = 128
+						      DEFAULT: 0x1 */
+#define    RX_PAGE_SIZE_MTU_OFF_SHIFT   30
+							 
+/* 11-bit counter points to next location in RX FIFO to be loaded/read. 
+ * shadow write pointers enable retries in case of early receive aborts.
+ * DEFAULT: 0x0. generated on 64-bit boundaries.
+ */
+#define  REG_RX_FIFO_WRITE_PTR             0x4008  /* RX FIFO write pointer */
+#define  REG_RX_FIFO_READ_PTR              0x400C  /* RX FIFO read pointer */
+#define  REG_RX_IPP_FIFO_SHADOW_WRITE_PTR  0x4010  /* RX IPP FIFO shadow write 
+						      pointer */
+#define  REG_RX_IPP_FIFO_SHADOW_READ_PTR   0x4014  /* RX IPP FIFO shadow read
+						      pointer */
+#define  REG_RX_IPP_FIFO_READ_PTR          0x400C  /* RX IPP FIFO read 
+						      pointer. (8-bit counter) */
+
+/* current state of RX DMA state engines + other info
+ * DEFAULT: 0x0
+ */
+#define  REG_RX_DEBUG                      0x401C  /* RX debug */
+#define    RX_DEBUG_LOAD_STATE_MASK        0x0000000F /* load state machine w/ MAC:
+							 0x0 = idle,   0x1 = load_bop
+							 0x2 = load 1, 0x3 = load 2
+							 0x4 = load 3, 0x5 = load 4
+							 0x6 = last detect
+							 0x7 = wait req
+							 0x8 = wait req statuss 1st
+							 0x9 = load st
+							 0xa = bubble mac
+							 0xb = error */
+#define    RX_DEBUG_LM_STATE_MASK          0x00000070 /* load state machine w/ HP and
+							 RX FIFO:
+							 0x0 = idle,   0x1 = hp xfr
+							 0x2 = wait hp ready
+							 0x3 = wait flow code
+							 0x4 = fifo xfer
+							 0x5 = make status
+							 0x6 = csum ready
+							 0x7 = error */
+#define    RX_DEBUG_FC_STATE_MASK          0x000000180 /* flow control state machine
+							 w/ MAC:
+							 0x0 = idle
+							 0x1 = wait xoff ack
+							 0x2 = wait xon
+							 0x3 = wait xon ack */
+#define    RX_DEBUG_DATA_STATE_MASK        0x000001E00 /* unload data state machine
+							 states: 
+							 0x0 = idle data
+							 0x1 = header begin
+							 0x2 = xfer header
+							 0x3 = xfer header ld
+							 0x4 = mtu begin
+							 0x5 = xfer mtu
+							 0x6 = xfer mtu ld
+							 0x7 = jumbo begin
+							 0x8 = xfer jumbo 
+							 0x9 = xfer jumbo ld
+							 0xa = reas begin
+							 0xb = xfer reas
+							 0xc = flush tag
+							 0xd = xfer reas ld
+							 0xe = error
+							 0xf = bubble idle */
+#define    RX_DEBUG_DESC_STATE_MASK        0x0001E000 /* unload desc state machine
+							 states:
+							 0x0 = idle desc
+							 0x1 = wait ack
+							 0x9 = wait ack 2
+							 0x2 = fetch desc 1
+							 0xa = fetch desc 2
+							 0x3 = load ptrs
+							 0x4 = wait dma
+							 0x5 = wait ack batch
+							 0x6 = post batch
+							 0x7 = xfr done */
+#define    RX_DEBUG_INTR_READ_PTR_MASK     0x30000000 /* interrupt read ptr of the
+							 interrupt queue */
+#define    RX_DEBUG_INTR_WRITE_PTR_MASK    0xC0000000 /* interrupt write pointer
+							 of the interrupt queue */
+
+/* flow control frames are emmitted using two PAUSE thresholds:
+ * XOFF PAUSE uses pause time value pre-programmed in the Send PAUSE MAC reg
+ * XON PAUSE uses a pause time of 0. granularity of threshold is 64bytes.
+ * PAUSE thresholds defined in terms of FIFO occupancy and may be translated
+ * into FIFO vacancy using RX_FIFO_SIZE. setting ON will trigger XON frames 
+ * when FIFO reaches 0. OFF threshold should not be > size of RX FIFO. max
+ * value is is 0x6F. 
+ * DEFAULT: 0x00078
+ */
+#define  REG_RX_PAUSE_THRESH               0x4020  /* RX pause thresholds */
+#define    RX_PAUSE_THRESH_QUANTUM         64
+#define    RX_PAUSE_THRESH_OFF_MASK        0x000001FF /* XOFF PAUSE emitted when
+							 RX FIFO occupancy > 
+							 value*64B */
+#define    RX_PAUSE_THRESH_OFF_SHIFT       0
+#define    RX_PAUSE_THRESH_ON_MASK         0x001FF000 /* XON PAUSE emitted after
+							 emitting XOFF PAUSE when RX
+							 FIFO occupancy falls below
+							 this value*64B. must be
+							 < XOFF threshold. if =
+							 RX_FIFO_SIZE< XON frames are
+							 never emitted. */
+#define    RX_PAUSE_THRESH_ON_SHIFT        12
+
+/* 13-bit register used to control RX desc fetching and intr generation. if 4+
+ * valid RX descriptors are available, Cassini will read 4 at a time. 
+ * writing N means that all desc up to *but* excluding N are available. N must
+ * be a multiple of 4 (N % 4 = 0). first desc should be cache-line aligned. 
+ * DEFAULT: 0 on reset
+ */
+#define  REG_RX_KICK                    0x4024  /* RX kick reg */
+
+/* 8KB aligned 64-bit pointer to the base of the RX free/completion rings.
+ * lower 13 bits of the low register are hard-wired to 0.
+ */
+#define  REG_RX_DB_LOW                     0x4028  /* RX descriptor ring 
+							 base low */
+#define  REG_RX_DB_HI                      0x402C  /* RX descriptor ring
+							 base hi */
+#define  REG_RX_CB_LOW                     0x4030  /* RX completion ring
+							 base low */
+#define  REG_RX_CB_HI                      0x4034  /* RX completion ring 
+							 base hi */
+/* 13-bit register indicate desc used by cassini for receive frames. used
+ * for diagnostic purposes. 
+ * DEFAULT: 0 on reset
+ */
+#define  REG_RX_COMP                       0x4038  /* (ro) RX completion */
+
+/* HEAD and TAIL are used to control RX desc posting and interrupt
+ * generation.  hw moves the head register to pass ownership to sw. sw
+ * moves the tail register to pass ownership back to hw. to give all
+ * entries to hw, set TAIL = HEAD.  if HEAD and TAIL indicate that no
+ * more entries are available, DMA will pause and an interrupt will be
+ * generated to indicate no more entries are available.  sw can use
+ * this interrupt to reduce the # of times it must update the
+ * completion tail register.
+ * DEFAULT: 0 on reset
+ */
+#define  REG_RX_COMP_HEAD                  0x403C  /* RX completion head */
+#define  REG_RX_COMP_TAIL                  0x4040  /* RX completion tail */
+
+/* values used for receive interrupt blanking. loaded each time the ISR is read
+ * DEFAULT: 0x00000000
+ */
+#define  REG_RX_BLANK                      0x4044  /* RX blanking register 
+							 for ISR read */
+#define    RX_BLANK_INTR_PKT_MASK          0x000001FF /* RX_DONE intr asserted if 
+							 this many sets of completion
+							 writebacks (up to 2 packets)
+							 occur since the last time
+							 the ISR was read. 0 = no
+							 packet blanking */
+#define    RX_BLANK_INTR_PKT_SHIFT         0
+#define    RX_BLANK_INTR_TIME_MASK         0x3FFFF000 /* RX_DONE interrupt asserted
+							 if that many clocks were
+							 counted since last time the
+							 ISR was read. 
+							 each count is 512 core
+							 clocks (125MHz). 0 = no
+							 time blanking */
+#define    RX_BLANK_INTR_TIME_SHIFT        12
+
+/* values used for interrupt generation based on threshold values of how 
+ * many free desc and completion entries are available for hw use.
+ * DEFAULT: 0x00000000
+ */
+#define  REG_RX_AE_THRESH                  0x4048  /* RX almost empty 
+							 thresholds */
+#define    RX_AE_THRESH_FREE_MASK          0x00001FFF /* RX_BUF_AE will be 
+							 generated if # desc
+							 avail for hw use <= 
+							 # */
+#define    RX_AE_THRESH_FREE_SHIFT         0
+#define    RX_AE_THRESH_COMP_MASK          0x0FFFE000 /* RX_COMP_AE will be
+							 generated if # of 
+							 completion entries
+							 avail for hw use <= 
+							 # */
+#define    RX_AE_THRESH_COMP_SHIFT         13
+
+/* probabilities for random early drop (RED) thresholds on a FIFO threshold 
+ * basis. probability should increase when the FIFO level increases. control 
+ * packets are never dropped and not counted in stats. probability programmed 
+ * on a 12.5% granularity. e.g., 0x1 = 1/8 packets dropped.
+ * DEFAULT: 0x00000000
+ */
+#define  REG_RX_RED                      0x404C  /* RX random early detect enable */
+#define    RX_RED_4K_6K_FIFO_MASK        0x000000FF /*  4KB < FIFO thresh < 6KB */
+#define    RX_RED_6K_8K_FIFO_MASK        0x0000FF00 /*  6KB < FIFO thresh < 8KB */
+#define    RX_RED_8K_10K_FIFO_MASK       0x00FF0000 /*  8KB < FIFO thresh < 10KB */
+#define    RX_RED_10K_12K_FIFO_MASK      0xFF000000 /* 10KB < FIFO thresh < 12KB */
+
+/* FIFO fullness levels for RX FIFO, RX control FIFO, and RX IPP FIFO. 
+ * RX control FIFO = # of packets in RX FIFO. 
+ * DEFAULT: 0x0
+ */
+#define  REG_RX_FIFO_FULLNESS              0x4050  /* (ro) RX FIFO fullness */
+#define    RX_FIFO_FULLNESS_RX_FIFO_MASK   0x3FF80000 /* level w/ 8B granularity */
+#define    RX_FIFO_FULLNESS_IPP_FIFO_MASK  0x0007FF00 /* level w/ 8B granularity */
+#define    RX_FIFO_FULLNESS_RX_PKT_MASK    0x000000FF /* # packets in RX FIFO */
+#define  REG_RX_IPP_PACKET_COUNT           0x4054  /* RX IPP packet counter */
+#define  REG_RX_WORK_DMA_PTR_LOW           0x4058  /* RX working DMA ptr low */
+#define  REG_RX_WORK_DMA_PTR_HI            0x405C  /* RX working DMA ptr 
+						      high */
+
+/* BIST testing ro RX FIFO, RX control FIFO, and RX IPP FIFO. only RX BIST
+ * START/COMPLETE is writeable. START will clear when the BIST has completed
+ * checking all 17 RAMS. 
+ * DEFAULT: 0bxxxx xxxxx xxxx xxxx xxxx x000 0000 0000 00x0
+ */
+#define  REG_RX_BIST                       0x4060  /* (ro) RX BIST */
+#define    RX_BIST_32A_PASS                0x80000000 /* RX FIFO 32A passed */
+#define    RX_BIST_33A_PASS                0x40000000 /* RX FIFO 33A passed */
+#define    RX_BIST_32B_PASS                0x20000000 /* RX FIFO 32B passed */
+#define    RX_BIST_33B_PASS                0x10000000 /* RX FIFO 33B passed */
+#define    RX_BIST_32C_PASS                0x08000000 /* RX FIFO 32C passed */
+#define    RX_BIST_33C_PASS                0x04000000 /* RX FIFO 33C passed */
+#define    RX_BIST_IPP_32A_PASS            0x02000000 /* RX IPP FIFO 33B passed */
+#define    RX_BIST_IPP_33A_PASS            0x01000000 /* RX IPP FIFO 33A passed */
+#define    RX_BIST_IPP_32B_PASS            0x00800000 /* RX IPP FIFO 32B passed */
+#define    RX_BIST_IPP_33B_PASS            0x00400000 /* RX IPP FIFO 33B passed */
+#define    RX_BIST_IPP_32C_PASS            0x00200000 /* RX IPP FIFO 32C passed */
+#define    RX_BIST_IPP_33C_PASS            0x00100000 /* RX IPP FIFO 33C passed */
+#define    RX_BIST_CTRL_32_PASS            0x00800000 /* RX CTRL FIFO 32 passed */
+#define    RX_BIST_CTRL_33_PASS            0x00400000 /* RX CTRL FIFO 33 passed */
+#define    RX_BIST_REAS_26A_PASS           0x00200000 /* RX Reas 26A passed */
+#define    RX_BIST_REAS_26B_PASS           0x00100000 /* RX Reas 26B passed */
+#define    RX_BIST_REAS_27_PASS            0x00080000 /* RX Reas 27 passed */
+#define    RX_BIST_STATE_MASK              0x00078000 /* BIST state machine */
+#define    RX_BIST_SUMMARY                 0x00000002 /* when BIST complete,
+							 summary pass bit 
+							 contains AND of BIST
+							 results of all 16
+							 RAMS */
+#define    RX_BIST_START                   0x00000001 /* write 1 to start 
+							 BIST. self clears
+							 on completion. */
+
+/* next location in RX CTRL FIFO that will be loaded w/ data from RX IPP/read
+ * from to retrieve packet control info. 
+ * DEFAULT: 0
+ */
+#define  REG_RX_CTRL_FIFO_WRITE_PTR        0x4064  /* (ro) RX control FIFO 
+						      write ptr */
+#define  REG_RX_CTRL_FIFO_READ_PTR         0x4068  /* (ro) RX control FIFO read
+						      ptr */
+
+/* receive interrupt blanking. loaded each time interrupt alias register is
+ * read. 
+ * DEFAULT: 0x0
+ */
+#define  REG_RX_BLANK_ALIAS_READ           0x406C  /* RX blanking register for
+						      alias read */
+#define    RX_BAR_INTR_PACKET_MASK         0x000001FF /* assert RX_DONE if # 
+							 completion writebacks 
+							 > # since last ISR 
+							 read. 0 = no 
+							 blanking. up to 2 
+							 packets per 
+							 completion wb. */
+#define    RX_BAR_INTR_TIME_MASK           0x3FFFF000 /* assert RX_DONE if #
+							 clocks > # since last
+							 ISR read. each count
+							 is 512 core clocks
+							 (125MHz). 0 = no 
+							 blanking. */
+
+/* diagnostic access to RX FIFO. 32 LSB accessed via DATA_LOW. 32 MSB accessed
+ * via DATA_HI_T0 or DATA_HI_T1. TAG reads the tag bit. writing HI_T0
+ * will unset the tag bit while writing HI_T1 will set the tag bit. to reset
+ * to normal operation after diagnostics, write to address location 0x0.
+ * RX_DMA_EN bit must be set to 0x0 for RX FIFO PIO access. DATA_HI should
+ * be the last write access of a write sequence.
+ * DEFAULT: undefined
+ */
+#define  REG_RX_FIFO_ADDR                  0x4080  /* RX FIFO address */
+#define  REG_RX_FIFO_TAG                   0x4084  /* RX FIFO tag */
+#define  REG_RX_FIFO_DATA_LOW              0x4088  /* RX FIFO data low */
+#define  REG_RX_FIFO_DATA_HI_T0            0x408C  /* RX FIFO data high T0 */
+#define  REG_RX_FIFO_DATA_HI_T1            0x4090  /* RX FIFO data high T1 */
+
+/* diagnostic assess to RX CTRL FIFO. 8-bit FIFO_ADDR holds address of
+ * 81 bit control entry and 6 bit flow id. LOW and MID are both 32-bit
+ * accesses. HI is 7-bits with 6-bit flow id and 1 bit control
+ * word. RX_DMA_EN must be 0 for RX CTRL FIFO PIO access. DATA_HI
+ * should be last write access of the write sequence.
+ * DEFAULT: undefined
+ */
+#define  REG_RX_CTRL_FIFO_ADDR             0x4094  /* RX Control FIFO and 
+						      Batching FIFO addr */
+#define  REG_RX_CTRL_FIFO_DATA_LOW         0x4098  /* RX Control FIFO data 
+						      low */
+#define  REG_RX_CTRL_FIFO_DATA_MID         0x409C  /* RX Control FIFO data 
+						      mid */
+#define  REG_RX_CTRL_FIFO_DATA_HI          0x4100  /* RX Control FIFO data 
+						      hi and flow id */
+#define    RX_CTRL_FIFO_DATA_HI_CTRL       0x0001  /* upper bit of ctrl word */
+#define    RX_CTRL_FIFO_DATA_HI_FLOW_MASK  0x007E  /* flow id */
+
+/* diagnostic access to RX IPP FIFO. same semantics as RX_FIFO.
+ * DEFAULT: undefined
+ */
+#define  REG_RX_IPP_FIFO_ADDR              0x4104  /* RX IPP FIFO address */
+#define  REG_RX_IPP_FIFO_TAG               0x4108  /* RX IPP FIFO tag */
+#define  REG_RX_IPP_FIFO_DATA_LOW          0x410C  /* RX IPP FIFO data low */
+#define  REG_RX_IPP_FIFO_DATA_HI_T0        0x4110  /* RX IPP FIFO data high
+						      T0 */
+#define  REG_RX_IPP_FIFO_DATA_HI_T1        0x4114  /* RX IPP FIFO data high
+						      T1 */
+
+/* 64-bit pointer to receive data buffer in host memory used for headers and
+ * small packets. MSB in high register. loaded by DMA state machine and 
+ * increments as DMA writes receive data. only 50 LSB are incremented. top
+ * 13 bits taken from RX descriptor.
+ * DEFAULT: undefined
+ */
+#define  REG_RX_HEADER_PAGE_PTR_LOW        0x4118  /* (ro) RX header page ptr
+						      low */
+#define  REG_RX_HEADER_PAGE_PTR_HI         0x411C  /* (ro) RX header page ptr
+						      high */
+#define  REG_RX_MTU_PAGE_PTR_LOW           0x4120  /* (ro) RX MTU page pointer 
+						      low */
+#define  REG_RX_MTU_PAGE_PTR_HI            0x4124  /* (ro) RX MTU page pointer 
+						      high */
+
+/* PIO diagnostic access to RX reassembly DMA Table RAM. 6-bit register holds
+ * one of 64 79-bit locations in the RX Reassembly DMA table and the addr of
+ * one of the 64 byte locations in the Batching table. LOW holds 32 LSB. 
+ * MID holds the next 32 LSB. HIGH holds the 15 MSB. RX_DMA_EN must be set
+ * to 0 for PIO access. DATA_HIGH should be last write of write sequence.
+ * layout:  
+ * reassmbl ptr [78:15] | reassmbl index [14:1] | reassmbl entry valid [0]
+ * DEFAULT: undefined
+ */
+#define  REG_RX_TABLE_ADDR             0x4128  /* RX reassembly DMA table
+						  address */
+#define    RX_TABLE_ADDR_MASK          0x0000003F /* address mask */
+
+#define  REG_RX_TABLE_DATA_LOW         0x412C  /* RX reassembly DMA table
+						  data low */
+#define  REG_RX_TABLE_DATA_MID         0x4130  /* RX reassembly DMA table 
+						  data mid */
+#define  REG_RX_TABLE_DATA_HI          0x4134  /* RX reassembly DMA table
+						  data high */
+
+/* cassini+ only */
+/* 8KB aligned 64-bit pointer to base of RX rings. lower 13 bits hardwired to
+ * 0. same semantics as primary desc/complete rings.
+ */
+#define  REG_PLUS_RX_DB1_LOW            0x4200  /* RX descriptor ring
+						   2 base low */
+#define  REG_PLUS_RX_DB1_HI             0x4204  /* RX descriptor ring
+						   2 base high */
+#define  REG_PLUS_RX_CB1_LOW            0x4208  /* RX completion ring
+						   2 base low. 4 total */
+#define  REG_PLUS_RX_CB1_HI             0x420C  /* RX completion ring
+						   2 base high. 4 total */
+#define  REG_PLUS_RX_CBN_LOW(x)        (REG_PLUS_RX_CB1_LOW + 8*((x) - 1))
+#define  REG_PLUS_RX_CBN_HI(x)         (REG_PLUS_RX_CB1_HI + 8*((x) - 1))
+#define  REG_PLUS_RX_KICK1             0x4220  /* RX Kick 2 register */
+#define  REG_PLUS_RX_COMP1             0x4224  /* (ro) RX completion 2 
+						  reg */
+#define  REG_PLUS_RX_COMP1_HEAD        0x4228  /* (ro) RX completion 2 
+						  head reg. 4 total. */
+#define  REG_PLUS_RX_COMP1_TAIL        0x422C  /* RX completion 2 
+						  tail reg. 4 total. */
+#define  REG_PLUS_RX_COMPN_HEAD(x)    (REG_PLUS_RX_COMP1_HEAD + 8*((x) - 1))
+#define  REG_PLUS_RX_COMPN_TAIL(x)    (REG_PLUS_RX_COMP1_TAIL + 8*((x) - 1))
+#define  REG_PLUS_RX_AE1_THRESH        0x4240  /* RX almost empty 2
+						  thresholds */
+#define    RX_AE1_THRESH_FREE_MASK     RX_AE_THRESH_FREE_MASK
+#define    RX_AE1_THRESH_FREE_SHIFT    RX_AE_THRESH_FREE_SHIFT
+
+/** header parser registers **/
+
+/* RX parser configuration register. 
+ * DEFAULT: 0x1651004
+ */
+#define  REG_HP_CFG                       0x4140  /* header parser 
+						     configuration reg */
+#define    HP_CFG_PARSE_EN                0x00000001 /* enab header parsing */
+#define    HP_CFG_NUM_CPU_MASK            0x000000FC /* # processors 
+						      0 = 64. 0x3f = 63 */
+#define    HP_CFG_NUM_CPU_SHIFT           2
+#define    HP_CFG_SYN_INC_MASK            0x00000100 /* SYN bit won't increment
+							TCP seq # by one when
+							stored in FDBM */
+#define    HP_CFG_TCP_THRESH_MASK         0x000FFE00 /* # bytes of TCP data
+							needed to be considered
+							for reassembly */
+#define    HP_CFG_TCP_THRESH_SHIFT        9
+
+/* access to RX Instruction RAM. 5-bit register/counter holds addr
+ * of 39 bit entry to be read/written. 32 LSB in _DATA_LOW. 7 MSB in _DATA_HI.
+ * RX_DMA_EN must be 0 for RX instr PIO access. DATA_HI should be last access
+ * of sequence. 
+ * DEFAULT: undefined
+ */
+#define  REG_HP_INSTR_RAM_ADDR             0x4144  /* HP instruction RAM
+						      address */
+#define    HP_INSTR_RAM_ADDR_MASK          0x01F   /* 5-bit mask */
+#define  REG_HP_INSTR_RAM_DATA_LOW         0x4148  /* HP instruction RAM
+						      data low */
+#define    HP_INSTR_RAM_LOW_OUTMASK_MASK   0x0000FFFF
+#define    HP_INSTR_RAM_LOW_OUTMASK_SHIFT  0
+#define    HP_INSTR_RAM_LOW_OUTSHIFT_MASK  0x000F0000
+#define    HP_INSTR_RAM_LOW_OUTSHIFT_SHIFT 16
+#define    HP_INSTR_RAM_LOW_OUTEN_MASK     0x00300000
+#define    HP_INSTR_RAM_LOW_OUTEN_SHIFT    20
+#define    HP_INSTR_RAM_LOW_OUTARG_MASK    0xFFC00000
+#define    HP_INSTR_RAM_LOW_OUTARG_SHIFT   22
+#define  REG_HP_INSTR_RAM_DATA_MID         0x414C  /* HP instruction RAM 
+						      data mid */
+#define    HP_INSTR_RAM_MID_OUTARG_MASK    0x00000003
+#define    HP_INSTR_RAM_MID_OUTARG_SHIFT   0
+#define    HP_INSTR_RAM_MID_OUTOP_MASK     0x0000003C
+#define    HP_INSTR_RAM_MID_OUTOP_SHIFT    2
+#define    HP_INSTR_RAM_MID_FNEXT_MASK     0x000007C0
+#define    HP_INSTR_RAM_MID_FNEXT_SHIFT    6
+#define    HP_INSTR_RAM_MID_FOFF_MASK      0x0003F800
+#define    HP_INSTR_RAM_MID_FOFF_SHIFT     11
+#define    HP_INSTR_RAM_MID_SNEXT_MASK     0x007C0000
+#define    HP_INSTR_RAM_MID_SNEXT_SHIFT    18
+#define    HP_INSTR_RAM_MID_SOFF_MASK      0x3F800000
+#define    HP_INSTR_RAM_MID_SOFF_SHIFT     23
+#define    HP_INSTR_RAM_MID_OP_MASK        0xC0000000
+#define    HP_INSTR_RAM_MID_OP_SHIFT       30
+#define  REG_HP_INSTR_RAM_DATA_HI          0x4150  /* HP instruction RAM
+						      data high */
+#define    HP_INSTR_RAM_HI_VAL_MASK        0x0000FFFF
+#define    HP_INSTR_RAM_HI_VAL_SHIFT       0
+#define    HP_INSTR_RAM_HI_MASK_MASK       0xFFFF0000
+#define    HP_INSTR_RAM_HI_MASK_SHIFT      16
+
+/* PIO access into RX Header parser data RAM and flow database.
+ * 11-bit register. Data fills the LSB portion of bus if less than 32 bits.
+ * DATA_RAM: write RAM_FDB_DATA with index to access DATA_RAM.
+ * RAM bytes = 4*(x - 1) + [3:0]. e.g., 0 -> [3:0], 31 -> [123:120]
+ * FLOWDB: write DATA_RAM_FDB register and then read/write FDB1-12 to access 
+ * flow database.
+ * RX_DMA_EN must be 0 for RX parser RAM PIO access. RX Parser RAM data reg
+ * should be the last write access of the write sequence.
+ * DEFAULT: undefined
+ */
+#define  REG_HP_DATA_RAM_FDB_ADDR          0x4154  /* HP data and FDB
+						      RAM address */
+#define    HP_DATA_RAM_FDB_DATA_MASK       0x001F  /* select 1 of 86 byte 
+						      locations in header 
+						      parser data ram to 
+						      read/write */
+#define    HP_DATA_RAM_FDB_FDB_MASK        0x3F00  /* 1 of 64 353-bit locations
+						      in the flow database */
+#define  REG_HP_DATA_RAM_DATA              0x4158  /* HP data RAM data */
+
+/* HP flow database registers: 1 - 12, 0x415C - 0x4188, 4 8-bit bytes 
+ * FLOW_DB(1) = IP_SA[127:96], FLOW_DB(2) = IP_SA[95:64]
+ * FLOW_DB(3) = IP_SA[63:32],  FLOW_DB(4) = IP_SA[31:0] 
+ * FLOW_DB(5) = IP_DA[127:96], FLOW_DB(6) = IP_DA[95:64]
+ * FLOW_DB(7) = IP_DA[63:32],  FLOW_DB(8) = IP_DA[31:0]
+ * FLOW_DB(9) = {TCP_SP[15:0],TCP_DP[15:0]}
+ * FLOW_DB(10) = bit 0 has value for flow valid
+ * FLOW_DB(11) = TCP_SEQ[63:32], FLOW_DB(12) = TCP_SEQ[31:0]
+ */
+#define  REG_HP_FLOW_DB0                   0x415C  /* HP flow database 1 reg */
+#define  REG_HP_FLOW_DBN(x)                (REG_HP_FLOW_DB0 + (x)*4)
+
+/* diagnostics for RX Header Parser block. 
+ * ASUN: the header parser state machine register is used for diagnostics
+ * purposes. however, the spec doesn't have any details on it.
+ */
+#define  REG_HP_STATE_MACHINE              0x418C  /* (ro) HP state machine */
+#define  REG_HP_STATUS0                    0x4190  /* (ro) HP status 1 */
+#define    HP_STATUS0_SAP_MASK             0xFFFF0000 /* SAP */
+#define    HP_STATUS0_L3_OFF_MASK          0x0000FE00 /* L3 offset */
+#define    HP_STATUS0_LB_CPUNUM_MASK       0x000001F8 /* load balancing CPU 
+							 number */
+#define    HP_STATUS0_HRP_OPCODE_MASK      0x00000007 /* HRP opcode */
+
+#define  REG_HP_STATUS1                    0x4194  /* (ro) HP status 2 */
+#define    HP_STATUS1_ACCUR2_MASK          0xE0000000 /* accu R2[6:4] */
+#define    HP_STATUS1_FLOWID_MASK          0x1F800000 /* flow id */
+#define    HP_STATUS1_TCP_OFF_MASK         0x007F0000 /* tcp payload offset */
+#define    HP_STATUS1_TCP_SIZE_MASK        0x0000FFFF /* tcp payload size */
+
+#define  REG_HP_STATUS2                    0x4198  /* (ro) HP status 3 */
+#define    HP_STATUS2_ACCUR2_MASK          0xF0000000 /* accu R2[3:0] */
+#define    HP_STATUS2_CSUM_OFF_MASK        0x07F00000 /* checksum start 
+							 start offset */
+#define    HP_STATUS2_ACCUR1_MASK          0x000FE000 /* accu R1 */
+#define    HP_STATUS2_FORCE_DROP           0x00001000 /* force drop */
+#define    HP_STATUS2_BWO_REASSM           0x00000800 /* batching w/o 
+							 reassembly */
+#define    HP_STATUS2_JH_SPLIT_EN          0x00000400 /* jumbo header split
+							 enable */
+#define    HP_STATUS2_FORCE_TCP_NOCHECK    0x00000200 /* force tcp no payload
+							 check */
+#define    HP_STATUS2_DATA_MASK_ZERO       0x00000100 /* mask of data length
+							 equal to zero */
+#define    HP_STATUS2_FORCE_TCP_CHECK      0x00000080 /* force tcp payload 
+							 chk */
+#define    HP_STATUS2_MASK_TCP_THRESH      0x00000040 /* mask of payload 
+							 threshold */
+#define    HP_STATUS2_NO_ASSIST            0x00000020 /* no assist */
+#define    HP_STATUS2_CTRL_PACKET_FLAG     0x00000010 /* control packet flag */
+#define    HP_STATUS2_TCP_FLAG_CHECK       0x00000008 /* tcp flag check */
+#define    HP_STATUS2_SYN_FLAG             0x00000004 /* syn flag */
+#define    HP_STATUS2_TCP_CHECK            0x00000002 /* tcp payload chk */
+#define    HP_STATUS2_TCP_NOCHECK          0x00000001 /* tcp no payload chk */
+
+/* BIST for header parser(HP) and flow database memories (FDBM). set _START
+ * to start BIST. controller clears _START on completion. _START can also
+ * be cleared to force termination of BIST. a bit set indicates that that
+ * memory passed its BIST.
+ */
+#define  REG_HP_RAM_BIST                   0x419C  /* HP RAM BIST reg */
+#define    HP_RAM_BIST_HP_DATA_PASS        0x80000000 /* HP data ram */
+#define    HP_RAM_BIST_HP_INSTR0_PASS      0x40000000 /* HP instr ram 0 */
+#define    HP_RAM_BIST_HP_INSTR1_PASS      0x20000000 /* HP instr ram 1 */
+#define    HP_RAM_BIST_HP_INSTR2_PASS      0x10000000 /* HP instr ram 2 */
+#define    HP_RAM_BIST_FDBM_AGE0_PASS      0x08000000 /* FDBM aging RAM0 */
+#define    HP_RAM_BIST_FDBM_AGE1_PASS      0x04000000 /* FDBM aging RAM1 */
+#define    HP_RAM_BIST_FDBM_FLOWID00_PASS  0x02000000 /* FDBM flowid RAM0 
+							 bank 0 */
+#define    HP_RAM_BIST_FDBM_FLOWID10_PASS  0x01000000 /* FDBM flowid RAM1
+							 bank 0 */
+#define    HP_RAM_BIST_FDBM_FLOWID20_PASS  0x00800000 /* FDBM flowid RAM2
+							 bank 0 */
+#define    HP_RAM_BIST_FDBM_FLOWID30_PASS  0x00400000 /* FDBM flowid RAM3
+							 bank 0 */
+#define    HP_RAM_BIST_FDBM_FLOWID01_PASS  0x00200000 /* FDBM flowid RAM0
+							 bank 1 */
+#define    HP_RAM_BIST_FDBM_FLOWID11_PASS  0x00100000 /* FDBM flowid RAM1
+							 bank 2 */
+#define    HP_RAM_BIST_FDBM_FLOWID21_PASS  0x00080000 /* FDBM flowid RAM2
+							 bank 1 */
+#define    HP_RAM_BIST_FDBM_FLOWID31_PASS  0x00040000 /* FDBM flowid RAM3
+							 bank 1 */
+#define    HP_RAM_BIST_FDBM_TCPSEQ_PASS    0x00020000 /* FDBM tcp sequence
+							 RAM */
+#define    HP_RAM_BIST_SUMMARY             0x00000002 /* all BIST tests */
+#define    HP_RAM_BIST_START               0x00000001 /* start/stop BIST */
+
+
+/** MAC registers.  **/
+/* reset bits are set using a PIO write and self-cleared after the command
+ * execution has completed.
+ */
+#define  REG_MAC_TX_RESET                  0x6000  /* TX MAC software reset
+						      command (default: 0x0) */
+#define  REG_MAC_RX_RESET                  0x6004  /* RX MAC software reset
+						      command (default: 0x0) */
+/* execute a pause flow control frame transmission
+ DEFAULT: 0x0XXXX */
+#define  REG_MAC_SEND_PAUSE                0x6008  /* send pause command reg */
+#define    MAC_SEND_PAUSE_TIME_MASK        0x0000FFFF /* value of pause time 
+							 to be sent on network
+							 in units of slot 
+							 times */
+#define    MAC_SEND_PAUSE_SEND             0x00010000 /* send pause flow ctrl
+							 frame on network */
+
+/* bit set indicates that event occurred. auto-cleared when status register
+ * is read and have corresponding mask bits in mask register. events will 
+ * trigger an interrupt if the corresponding mask bit is 0. 
+ * status register default: 0x00000000
+ * mask register default = 0xFFFFFFFF on reset
+ */
+#define  REG_MAC_TX_STATUS                 0x6010  /* TX MAC status reg */
+#define    MAC_TX_FRAME_XMIT               0x0001  /* successful frame 
+						      transmision */
+#define    MAC_TX_UNDERRUN                 0x0002  /* terminated frame 
+						      transmission due to
+						      data starvation in the 
+						      xmit data path */
+#define    MAC_TX_MAX_PACKET_ERR           0x0004  /* frame exceeds max allowed
+						      length passed to TX MAC
+						      by the DMA engine */
+#define    MAC_TX_COLL_NORMAL              0x0008  /* rollover of the normal
+						      collision counter */
+#define    MAC_TX_COLL_EXCESS              0x0010  /* rollover of the excessive
+						      collision counter */
+#define    MAC_TX_COLL_LATE                0x0020  /* rollover of the late
+						      collision counter */
+#define    MAC_TX_COLL_FIRST               0x0040  /* rollover of the first
+						      collision counter */
+#define    MAC_TX_DEFER_TIMER              0x0080  /* rollover of the defer
+						      timer */
+#define    MAC_TX_PEAK_ATTEMPTS            0x0100  /* rollover of the peak
+						      attempts counter */
+
+#define  REG_MAC_RX_STATUS                 0x6014  /* RX MAC status reg */
+#define    MAC_RX_FRAME_RECV               0x0001  /* successful receipt of
+						      a frame */
+#define    MAC_RX_OVERFLOW                 0x0002  /* dropped frame due to 
+						      RX FIFO overflow */
+#define    MAC_RX_FRAME_COUNT              0x0004  /* rollover of receive frame
+						      counter */
+#define    MAC_RX_ALIGN_ERR                0x0008  /* rollover of alignment
+						      error counter */
+#define    MAC_RX_CRC_ERR                  0x0010  /* rollover of crc error
+						      counter */
+#define    MAC_RX_LEN_ERR                  0x0020  /* rollover of length 
+						      error counter */
+#define    MAC_RX_VIOL_ERR                 0x0040  /* rollover of code 
+						      violation error */
+
+/* DEFAULT: 0xXXXX0000 on reset */
+#define  REG_MAC_CTRL_STATUS               0x6018  /* MAC control status reg */
+#define    MAC_CTRL_PAUSE_RECEIVED         0x00000001  /* successful 
+							  reception of a 
+							  pause control 
+							  frame */
+#define    MAC_CTRL_PAUSE_STATE            0x00000002  /* MAC has made a 
+							  transition from 
+							  "not paused" to 
+							  "paused" */
+#define    MAC_CTRL_NOPAUSE_STATE          0x00000004  /* MAC has made a 
+							  transition from 
+							  "paused" to "not
+							  paused" */
+#define    MAC_CTRL_PAUSE_TIME_MASK        0xFFFF0000  /* value of pause time
+							  operand that was 
+							  received in the last
+							  pause flow control
+							  frame */
+
+/* layout identical to TX MAC[8:0] */
+#define  REG_MAC_TX_MASK                   0x6020  /* TX MAC mask reg */
+/* layout identical to RX MAC[6:0] */
+#define  REG_MAC_RX_MASK                   0x6024  /* RX MAC mask reg */
+/* layout identical to CTRL MAC[2:0] */
+#define  REG_MAC_CTRL_MASK                 0x6028  /* MAC control mask reg */
+
+/* to ensure proper operation, CFG_EN must be cleared to 0 and a delay 
+ * imposed before writes to other bits in the TX_MAC_CFG register or any of
+ * the MAC parameters is performed. delay dependent upon time required to
+ * transmit a maximum size frame (= MAC_FRAMESIZE_MAX*8/Mbps). e.g.,
+ * the delay for a 1518-byte frame on a 100Mbps network is 125us. 
+ * alternatively, just poll TX_CFG_EN until it reads back as 0. 
+ * NOTE: on half-duplex 1Gbps, TX_CFG_CARRIER_EXTEND and 
+ * RX_CFG_CARRIER_EXTEND should be set and the SLOT_TIME register should
+ * be 0x200 (slot time of 512 bytes)
+ */
+#define  REG_MAC_TX_CFG                 0x6030  /* TX MAC config reg */
+#define    MAC_TX_CFG_EN                0x0001  /* enable TX MAC. 0 will
+						      force TXMAC state
+						      machine to remain in
+						      idle state or to 
+						      transition to idle state
+						      on completion of an
+						      ongoing packet. */
+#define    MAC_TX_CFG_IGNORE_CARRIER    0x0002  /* disable CSMA/CD deferral
+						   process. set to 1 when 
+						   full duplex and 0 when
+						   half duplex */
+#define    MAC_TX_CFG_IGNORE_COLL       0x0004  /* disable CSMA/CD backoff
+						   algorithm. set to 1 when
+						   full duplex and 0 when
+						   half duplex */
+#define    MAC_TX_CFG_IPG_EN            0x0008  /* enable extension of the
+						   Rx-to-TX IPG. after 
+						   receiving a frame, TX 
+						   MAC will reset its 
+						   deferral process to 
+						   carrier sense for the
+						   amount of time = IPG0 +
+						   IPG1 and commit to 
+						   transmission for time
+						   specified in IPG2. when
+						   0 or when xmitting frames
+						   back-to-pack (Tx-to-Tx
+						   IPG), TX MAC ignores 
+						   IPG0 and will only use
+						   IPG1 for deferral time.
+						   IPG2 still used. */
+#define    MAC_TX_CFG_NEVER_GIVE_UP_EN  0x0010  /* TX MAC will not easily
+						   give up on frame 
+						   xmission. if backoff 
+						   algorithm reaches the
+						   ATTEMPT_LIMIT, it will
+						   clear attempts counter
+						   and continue trying to
+						   send the frame as 
+						   specified by 
+						   GIVE_UP_LIM. when 0,
+						   TX MAC will execute 
+						   standard CSMA/CD prot. */
+#define    MAC_TX_CFG_NEVER_GIVE_UP_LIM 0x0020  /* when set, TX MAC will
+						   continue to try to xmit
+						   until successful. when
+						   0, TX MAC will continue
+						   to try xmitting until
+						   successful or backoff
+						   algorithm reaches 
+						   ATTEMPT_LIMIT*16 */
+#define    MAC_TX_CFG_NO_BACKOFF        0x0040  /* modify CSMA/CD to disable
+						   backoff algorithm. TX
+						   MAC will not back off
+						   after a xmission attempt
+						   that resulted in a 
+						   collision. */
+#define    MAC_TX_CFG_SLOW_DOWN         0x0080  /* modify CSMA/CD so that
+						   deferral process is reset
+						   in response to carrier
+						   sense during the entire
+						   duration of IPG. TX MAC
+						   will only commit to frame
+						   xmission after frame
+						   xmission has actually
+						   begun. */
+#define    MAC_TX_CFG_NO_FCS            0x0100  /* TX MAC will not generate
+						   CRC for all xmitted
+						   packets. when clear, CRC
+						   generation is dependent
+						   upon NO_CRC bit in the
+						   xmit control word from 
+						   TX DMA */
+#define    MAC_TX_CFG_CARRIER_EXTEND    0x0200  /* enables xmit part of the
+						   carrier extension 
+						   feature. this allows for 
+						   longer collision domains
+						   by extending the carrier
+						   and collision window
+						   from the end of FCS until
+						   the end of the slot time
+						   if necessary. Required
+						   for half-duplex at 1Gbps,
+						   clear otherwise. */
+
+/* when CRC is not stripped, reassembly packets will not contain the CRC. 
+ * these will be stripped by HRP because it reassembles layer 4 data, and the
+ * CRC is layer 2. however, non-reassembly packets will still contain the CRC 
+ * when passed to the host. to ensure proper operation, need to wait 3.2ms
+ * after clearing RX_CFG_EN before writing to any other RX MAC registers
+ * or other MAC parameters. alternatively, poll RX_CFG_EN until it clears
+ * to 0. similary, HASH_FILTER_EN and ADDR_FILTER_EN have the same 
+ * restrictions as CFG_EN.
+ */
+#define  REG_MAC_RX_CFG                 0x6034  /* RX MAC config reg */
+#define    MAC_RX_CFG_EN                0x0001  /* enable RX MAC */
+#define    MAC_RX_CFG_STRIP_PAD         0x0002  /* always program to 0.
+						   feature not supported */
+#define    MAC_RX_CFG_STRIP_FCS         0x0004  /* RX MAC will strip the 
+						   last 4 bytes of a 
+						   received frame. */
+#define    MAC_RX_CFG_PROMISC_EN        0x0008  /* promiscuous mode */
+#define    MAC_RX_CFG_PROMISC_GROUP_EN  0x0010  /* accept all valid 
+						   multicast frames (group
+						   bit in DA field set) */
+#define    MAC_RX_CFG_HASH_FILTER_EN    0x0020  /* use hash table to filter
+						   multicast addresses */
+#define    MAC_RX_CFG_ADDR_FILTER_EN    0x0040  /* cause RX MAC to use 
+						   address filtering regs 
+						   to filter both unicast
+						   and multicast 
+						   addresses */
+#define    MAC_RX_CFG_DISABLE_DISCARD   0x0080  /* pass errored frames to
+						   RX DMA by setting BAD
+						   bit but not Abort bit
+						   in the status. CRC, 
+						   framing, and length errs
+						   will not increment 
+						   error counters. frames
+						   which don't match dest
+						   addr will be passed up
+						   w/ BAD bit set. */
+#define    MAC_RX_CFG_CARRIER_EXTEND    0x0100  /* enable reception of 
+						   packet bursts generated
+						   by carrier extension
+						   with packet bursting
+						   senders. only applies
+						   to half-duplex 1Gbps */
+
+/* DEFAULT: 0x0 */
+#define  REG_MAC_CTRL_CFG               0x6038  /* MAC control config reg */
+#define    MAC_CTRL_CFG_SEND_PAUSE_EN   0x0001  /* respond to requests for 
+						   sending pause flow ctrl 
+						   frames */
+#define    MAC_CTRL_CFG_RECV_PAUSE_EN   0x0002  /* respond to received 
+						   pause flow ctrl frames */
+#define    MAC_CTRL_CFG_PASS_CTRL       0x0004  /* pass valid MAC ctrl
+						   packets to RX DMA */
+
+/* to ensure proper operation, a global initialization sequence should be
+ * performed when a loopback config is entered or exited. if programmed after
+ * a hw or global sw reset, RX/TX MAC software reset and initialization 
+ * should be done to ensure stable clocking. 
+ * DEFAULT: 0x0
+ */
+#define  REG_MAC_XIF_CFG                0x603C  /* XIF config reg */
+#define    MAC_XIF_TX_MII_OUTPUT_EN        0x0001  /* enable output drivers
+						      on MII xmit bus */
+#define    MAC_XIF_MII_INT_LOOPBACK        0x0002  /* loopback GMII xmit data
+						      path to GMII recv data
+						      path. phy mode register
+						      clock selection must be
+						      set to GMII mode and 
+						      GMII_MODE should be set
+						      to 1. in loopback mode,
+						      REFCLK will drive the
+						      entire mac core. 0 for
+						      normal operation. */
+#define    MAC_XIF_DISABLE_ECHO            0x0004  /* disables receive data
+						      path during packet 
+						      xmission. clear to 0
+						      in any full duplex mode,
+						      in any loopback mode,
+						      or in half-duplex SERDES
+						      or SLINK modes. set when
+						      in half-duplex when 
+						      using external phy. */
+#define    MAC_XIF_GMII_MODE               0x0008  /* MAC operates with GMII
+						      clocks and datapath */
+#define    MAC_XIF_MII_BUFFER_OUTPUT_EN    0x0010  /* MII_BUF_EN pin. enable
+						      external tristate buffer
+						      on the MII receive 
+						      bus. */
+#define    MAC_XIF_LINK_LED                0x0020  /* LINKLED# active (low) */
+#define    MAC_XIF_FDPLX_LED               0x0040  /* FDPLXLED# active (low) */
+
+#define  REG_MAC_IPG0                      0x6040  /* inter-packet gap0 reg.
+						      recommended: 0x00 */
+#define  REG_MAC_IPG1                      0x6044  /* inter-packet gap1 reg
+						      recommended: 0x08 */
+#define  REG_MAC_IPG2                      0x6048  /* inter-packet gap2 reg
+						      recommended: 0x04 */
+#define  REG_MAC_SLOT_TIME                 0x604C  /* slot time reg
+						      recommended: 0x40 */
+#define  REG_MAC_FRAMESIZE_MIN             0x6050  /* min frame size reg 
+						      recommended: 0x40 */
+
+/* FRAMESIZE_MAX holds both the max frame size as well as the max burst size.
+ * recommended value:  0x2000.05EE
+ */
+#define  REG_MAC_FRAMESIZE_MAX             0x6054  /* max frame size reg */
+#define    MAC_FRAMESIZE_MAX_BURST_MASK    0x3FFF0000 /* max burst size */
+#define    MAC_FRAMESIZE_MAX_BURST_SHIFT   16
+#define    MAC_FRAMESIZE_MAX_FRAME_MASK    0x00007FFF /* max frame size */
+#define    MAC_FRAMESIZE_MAX_FRAME_SHIFT   0
+#define  REG_MAC_PA_SIZE                   0x6058  /* PA size reg. number of
+						      preamble bytes that the
+						      TX MAC will xmit at the
+						      beginning of each frame
+						      value should be 2 or 
+						      greater. recommended 
+						      value: 0x07 */
+#define  REG_MAC_JAM_SIZE                  0x605C  /* jam size reg. duration 
+						      of jam in units of media
+						      byte time. recommended
+						      value: 0x04 */
+#define  REG_MAC_ATTEMPT_LIMIT             0x6060  /* attempt limit reg. #
+						      of attempts TX MAC will
+						      make to xmit a frame 
+						      before it resets its
+						      attempts counter. after
+						      the limit has been 
+						      reached, TX MAC may or
+						      may not drop the frame
+						      dependent upon value
+						      in TX_MAC_CFG. 
+						      recommended 
+						      value: 0x10 */
+#define  REG_MAC_CTRL_TYPE                 0x6064  /* MAC control type reg.
+						      type field of a MAC 
+						      ctrl frame. recommended
+						      value: 0x8808 */
+
+/* mac address registers: 0 - 44, 0x6080 - 0x6130, 4 8-bit bytes.
+ * register           contains                   comparison  
+ *    0        16 MSB of primary MAC addr        [47:32] of DA field
+ *    1        16 middle bits ""                 [31:16] of DA field
+ *    2        16 LSB ""                         [15:0] of DA field
+ *    3*x      16MSB of alt MAC addr 1-15        [47:32] of DA field
+ *    4*x      16 middle bits ""                 [31:16]
+ *    5*x      16 LSB ""                         [15:0]
+ *    42       16 MSB of MAC CTRL addr           [47:32] of DA. 
+ *    43       16 middle bits ""                 [31:16]
+ *    44       16 LSB ""                         [15:0]
+ *    MAC CTRL addr must be the reserved multicast addr for MAC CTRL frames.
+ *    if there is a match, MAC will set the bit for alternative address
+ *    filter pass [15]
+
+ *    here is the map of registers given MAC address notation: a:b:c:d:e:f
+ *                     ab             cd             ef
+ *    primary addr     reg 2          reg 1          reg 0
+ *    alt addr 1       reg 5          reg 4          reg 3
+ *    alt addr x       reg 5*x        reg 4*x        reg 3*x
+ *    ctrl addr        reg 44         reg 43         reg 42
+ */
+#define  REG_MAC_ADDR0                     0x6080  /* MAC address 0 reg */
+#define  REG_MAC_ADDRN(x)                  (REG_MAC_ADDR0 + (x)*4)
+#define  REG_MAC_ADDR_FILTER0              0x614C  /* address filter 0 reg
+						      [47:32] */
+#define  REG_MAC_ADDR_FILTER1              0x6150  /* address filter 1 reg 
+						      [31:16] */
+#define  REG_MAC_ADDR_FILTER2              0x6154  /* address filter 2 reg 
+						      [15:0] */
+#define  REG_MAC_ADDR_FILTER2_1_MASK       0x6158  /* address filter 2 and 1
+						      mask reg. 8-bit reg
+						      contains nibble mask for
+						      reg 2 and 1. */
+#define  REG_MAC_ADDR_FILTER0_MASK         0x615C  /* address filter 0 mask 
+						      reg */
+
+/* hash table registers: 0 - 15, 0x6160 - 0x619C, 4 8-bit bytes 
+ * 16-bit registers contain bits of the hash table.
+ * reg x  -> [16*(15 - x) + 15 : 16*(15 - x)]. 
+ * e.g., 15 -> [15:0], 0 -> [255:240]
+ */
+#define  REG_MAC_HASH_TABLE0               0x6160  /* hash table 0 reg */
+#define  REG_MAC_HASH_TABLEN(x)            (REG_MAC_HASH_TABLE0 + (x)*4)
+
+/* statistics registers. these registers generate an interrupt on 
+ * overflow. recommended initialization: 0x0000. most are 16-bits except
+ * for PEAK_ATTEMPTS register which is 8 bits.
+ */
+#define  REG_MAC_COLL_NORMAL               0x61A0 /* normal collision 
+						     counter. */
+#define  REG_MAC_COLL_FIRST                0x61A4 /* first attempt
+						     successful collision 
+						     counter */
+#define  REG_MAC_COLL_EXCESS               0x61A8 /* excessive collision 
+						     counter */
+#define  REG_MAC_COLL_LATE                 0x61AC /* late collision counter */
+#define  REG_MAC_TIMER_DEFER               0x61B0 /* defer timer. time base 
+						     is the media byte 
+						     clock/256 */
+#define  REG_MAC_ATTEMPTS_PEAK             0x61B4 /* peak attempts reg */
+#define  REG_MAC_RECV_FRAME                0x61B8 /* receive frame counter */
+#define  REG_MAC_LEN_ERR                   0x61BC /* length error counter */
+#define  REG_MAC_ALIGN_ERR                 0x61C0 /* alignment error counter */
+#define  REG_MAC_FCS_ERR                   0x61C4 /* FCS error counter */
+#define  REG_MAC_RX_CODE_ERR               0x61C8 /* RX code violation
+						     error counter */
+
+/* misc registers */
+#define  REG_MAC_RANDOM_SEED               0x61CC /* random number seed reg.
+						   10-bit register used as a
+						   seed  for the random number
+						   generator for the CSMA/CD
+						   backoff algorithm. only 
+						   programmed after power-on
+						   reset and should be a 
+						   random value which has a 
+						   high likelihood of being 
+						   unique for each MAC 
+						   attached to a network 
+						   segment (e.g., 10 LSB of
+						   MAC address) */
+
+/* ASUN: there's a PAUSE_TIMER (ro) described, but it's not in the address
+ *       map
+ */
+
+/* 27-bit register has the current state for key state machines in the MAC */
+#define  REG_MAC_STATE_MACHINE             0x61D0 /* (ro) state machine reg */
+#define    MAC_SM_RLM_MASK                 0x07800000 
+#define    MAC_SM_RLM_SHIFT                23
+#define    MAC_SM_RX_FC_MASK               0x00700000
+#define    MAC_SM_RX_FC_SHIFT              20
+#define    MAC_SM_TLM_MASK                 0x000F0000
+#define    MAC_SM_TLM_SHIFT                16
+#define    MAC_SM_ENCAP_SM_MASK            0x0000F000
+#define    MAC_SM_ENCAP_SM_SHIFT           12
+#define    MAC_SM_TX_REQ_MASK              0x00000C00
+#define    MAC_SM_TX_REQ_SHIFT             10
+#define    MAC_SM_TX_FC_MASK               0x000003C0
+#define    MAC_SM_TX_FC_SHIFT              6
+#define    MAC_SM_FIFO_WRITE_SEL_MASK      0x00000038
+#define    MAC_SM_FIFO_WRITE_SEL_SHIFT     3
+#define    MAC_SM_TX_FIFO_EMPTY_MASK       0x00000007
+#define    MAC_SM_TX_FIFO_EMPTY_SHIFT      0
+
+/** MIF registers. the MIF can be programmed in either bit-bang or 
+ *  frame mode.
+ **/
+#define  REG_MIF_BIT_BANG_CLOCK            0x6200 /* MIF bit-bang clock.
+						   1 -> 0 will generate a 
+						   rising edge. 0 -> 1 will
+						   generate a falling edge. */
+#define  REG_MIF_BIT_BANG_DATA             0x6204 /* MIF bit-bang data. 1-bit
+						     register generates data */
+#define  REG_MIF_BIT_BANG_OUTPUT_EN        0x6208 /* MIF bit-bang output 
+						     enable. enable when 
+						     xmitting data from MIF to
+						     transceiver. */
+
+/* 32-bit register serves as an instruction register when the MIF is 
+ * programmed in frame mode. load this register w/ a valid instruction
+ * (as per IEEE 802.3u MII spec). poll this register to check for instruction
+ * execution completion. during a read operation, this register will also
+ * contain the 16-bit data returned by the tranceiver. unless specified 
+ * otherwise, fields are considered "don't care" when polling for 
+ * completion.
+ */
+#define  REG_MIF_FRAME                     0x620C /* MIF frame/output reg */
+#define    MIF_FRAME_START_MASK            0xC0000000 /* start of frame.
+							 load w/ 01 when
+							 issuing an instr */
+#define    MIF_FRAME_ST                    0x40000000 /* STart of frame */
+#define    MIF_FRAME_OPCODE_MASK           0x30000000 /* opcode. 01 for a 
+							 write. 10 for a 
+							 read */
+#define    MIF_FRAME_OP_READ               0x20000000 /* read OPcode */
+#define    MIF_FRAME_OP_WRITE              0x10000000 /* write OPcode */
+#define    MIF_FRAME_PHY_ADDR_MASK         0x0F800000 /* phy address. when
+							 issuing an instr,
+							 this field should be 
+							 loaded w/ the XCVR
+							 addr */
+#define    MIF_FRAME_PHY_ADDR_SHIFT        23
+#define    MIF_FRAME_REG_ADDR_MASK         0x007C0000 /* register address.
+							 when issuing an instr,
+							 addr of register
+							 to be read/written */
+#define    MIF_FRAME_REG_ADDR_SHIFT        18
+#define    MIF_FRAME_TURN_AROUND_MSB       0x00020000 /* turn around, MSB.
+							 when issuing an instr,
+							 set this bit to 1 */
+#define    MIF_FRAME_TURN_AROUND_LSB       0x00010000 /* turn around, LSB.
+							 when issuing an instr,
+							 set this bit to 0.
+							 when polling for
+							 completion, 1 means
+							 that instr execution
+							 has been completed */
+#define    MIF_FRAME_DATA_MASK             0x0000FFFF /* instruction payload
+							 load with 16-bit data
+							 to be written in
+							 transceiver reg for a
+							 write. doesn't matter
+							 in a read. when 
+							 polling for 
+							 completion, field is
+							 "don't care" for write
+							 and 16-bit data 
+							 returned by the 
+							 transceiver for a
+							 read (if valid bit
+							 is set) */
+#define  REG_MIF_CFG                    0x6210 /* MIF config reg */
+#define    MIF_CFG_PHY_SELECT           0x0001 /* 1 -> select MDIO_1
+						  0 -> select MDIO_0 */
+#define    MIF_CFG_POLL_EN              0x0002 /* enable polling
+						  mechanism. if set,
+						  BB_MODE should be 0 */
+#define    MIF_CFG_BB_MODE              0x0004 /* 1 -> bit-bang mode
+						  0 -> frame mode */
+#define    MIF_CFG_POLL_REG_MASK        0x00F8 /* register address to be
+						  used by polling mode.
+						  only meaningful if POLL_EN
+						  is set to 1 */
+#define    MIF_CFG_POLL_REG_SHIFT       3
+#define    MIF_CFG_MDIO_0               0x0100 /* (ro) dual purpose.
+						  when MDIO_0 is idle,
+						  1 -> tranceiver is 
+						  connected to MDIO_0.
+						  when MIF is communicating
+						  w/ MDIO_0 in bit-bang 
+						  mode, this bit indicates
+						  the incoming bit stream
+						  during a read op */
+#define    MIF_CFG_MDIO_1               0x0200 /* (ro) dual purpose.
+						  when MDIO_1 is idle, 
+						  1 -> transceiver is 
+						  connected to MDIO_1.
+						  when MIF is communicating
+						  w/ MDIO_1 in bit-bang
+						  mode, this bit indicates
+						  the incoming bit stream
+						  during a read op */
+#define    MIF_CFG_POLL_PHY_MASK        0x7C00 /* tranceiver address to
+						  be polled */
+#define    MIF_CFG_POLL_PHY_SHIFT       10
+
+/* 16-bit register used to determine which bits in the POLL_STATUS portion of
+ * the MIF_STATUS register will cause an interrupt. if a mask bit is 0,
+ * corresponding bit of the POLL_STATUS will generate a MIF interrupt when 
+ * set. DEFAULT: 0xFFFF
+ */
+#define  REG_MIF_MASK                      0x6214 /* MIF mask reg */
+
+/* 32-bit register used when in poll mode. auto-cleared after being read */
+#define  REG_MIF_STATUS                    0x6218 /* MIF status reg */
+#define    MIF_STATUS_POLL_DATA_MASK       0xFFFF0000 /* poll data contains
+							 the "latest image"
+							 update of the XCVR 
+							 reg being read */
+#define    MIF_STATUS_POLL_DATA_SHIFT      16
+#define    MIF_STATUS_POLL_STATUS_MASK     0x0000FFFF /* poll status indicates
+							 which bits in the
+							 POLL_DATA field have
+							 changed since the
+							 MIF_STATUS reg was
+							 last read */
+#define    MIF_STATUS_POLL_STATUS_SHIFT    0
+
+/* 7-bit register has current state for all state machines in the MIF */
+#define  REG_MIF_STATE_MACHINE             0x621C /* MIF state machine reg */
+#define    MIF_SM_CONTROL_MASK             0x07   /* control state machine 
+						     state */
+#define    MIF_SM_EXECUTION_MASK           0x60   /* execution state machine
+						     state */
+
+/** PCS/Serialink. the following registers are equivalent to the standard
+ *  MII management registers except that they're directly mapped in 
+ *  Cassini's register space.
+ **/
+
+/* the auto-negotiation enable bit should be programmed the same at
+ * the link partner as in the local device to enable auto-negotiation to
+ * complete. when that bit is reprogrammed, auto-neg/manual config is 
+ * restarted automatically.
+ * DEFAULT: 0x1040
+ */
+#define  REG_PCS_MII_CTRL                  0x9000 /* PCS MII control reg */
+#define    PCS_MII_CTRL_1000_SEL           0x0040 /* reads 1. ignored on
+						     writes */
+#define    PCS_MII_CTRL_COLLISION_TEST     0x0080 /* COL signal at the PCS
+						     to MAC interface is
+						     activated regardless
+						     of activity */
+#define    PCS_MII_CTRL_DUPLEX             0x0100 /* forced 0x0. PCS 
+						     behaviour same for
+						     half and full dplx */
+#define    PCS_MII_RESTART_AUTONEG         0x0200 /* self clearing. 
+						     restart auto-
+						     negotiation */
+#define    PCS_MII_ISOLATE                 0x0400 /* read as 0. ignored
+						     on writes */
+#define    PCS_MII_POWER_DOWN              0x0800 /* read as 0. ignored
+						     on writes */
+#define    PCS_MII_AUTONEG_EN              0x1000 /* default 1. PCS goes
+						     through automatic
+						     link config before it
+						     can be used. when 0,
+						     link can be used 
+						     w/out any link config
+						     phase */
+#define    PCS_MII_10_100_SEL              0x2000 /* read as 0. ignored on 
+						     writes */
+#define    PCS_MII_RESET                   0x8000 /* reset PCS. self-clears
+						     when done */
+
+/* DEFAULT: 0x0108 */
+#define  REG_PCS_MII_STATUS                0x9004 /* PCS MII status reg */
+#define    PCS_MII_STATUS_EXTEND_CAP       0x0001 /* reads 0 */
+#define    PCS_MII_STATUS_JABBER_DETECT    0x0002 /* reads 0 */
+#define    PCS_MII_STATUS_LINK_STATUS      0x0004 /* 1 -> link up. 
+						     0 -> link down. 0 is
+						     latched so that 0 is
+						     kept until read. read
+						     2x to determine if the
+						     link has gone up again */
+#define    PCS_MII_STATUS_AUTONEG_ABLE     0x0008 /* reads 1 (able to perform
+						     auto-neg) */
+#define    PCS_MII_STATUS_REMOTE_FAULT     0x0010 /* 1 -> remote fault detected
+						     from received link code
+						     word. only valid after
+						     auto-neg completed */
+#define    PCS_MII_STATUS_AUTONEG_COMP     0x0020 /* 1 -> auto-negotiation 
+						          completed
+						     0 -> auto-negotiation not
+						     completed */
+#define    PCS_MII_STATUS_EXTEND_STATUS    0x0100 /* reads as 1. used as an
+						     indication that this is
+						     a 1000 Base-X PHY. writes
+						     to it are ignored */
+
+/* used during auto-negotiation. 
+ * DEFAULT: 0x00E0
+ */
+#define  REG_PCS_MII_ADVERT                0x9008 /* PCS MII advertisement
+						     reg */
+#define    PCS_MII_ADVERT_FD               0x0020  /* advertise full duplex
+						      1000 Base-X */
+#define    PCS_MII_ADVERT_HD               0x0040  /* advertise half-duplex
+						      1000 Base-X */
+#define    PCS_MII_ADVERT_SYM_PAUSE        0x0080  /* advertise PAUSE
+						      symmetric capability */
+#define    PCS_MII_ADVERT_ASYM_PAUSE       0x0100  /* advertises PAUSE 
+						      asymmetric capability */
+#define    PCS_MII_ADVERT_RF_MASK          0x3000 /* remote fault. write bit13
+						     to optionally indicate to
+						     link partner that chip is
+						     going off-line. bit12 will
+						     get set when signal
+						     detect == FAIL and will
+						     remain set until 
+						     successful negotiation */
+#define    PCS_MII_ADVERT_ACK              0x4000 /* (ro) */
+#define    PCS_MII_ADVERT_NEXT_PAGE        0x8000 /* (ro) forced 0x0 */
+
+/* contents updated as a result of autonegotiation. layout and definitions
+ * identical to PCS_MII_ADVERT
+ */
+#define  REG_PCS_MII_LPA                   0x900C /* PCS MII link partner
+						     ability reg */
+#define    PCS_MII_LPA_FD             PCS_MII_ADVERT_FD
+#define    PCS_MII_LPA_HD             PCS_MII_ADVERT_HD
+#define    PCS_MII_LPA_SYM_PAUSE      PCS_MII_ADVERT_SYM_PAUSE
+#define    PCS_MII_LPA_ASYM_PAUSE     PCS_MII_ADVERT_ASYM_PAUSE
+#define    PCS_MII_LPA_RF_MASK        PCS_MII_ADVERT_RF_MASK
+#define    PCS_MII_LPA_ACK            PCS_MII_ADVERT_ACK
+#define    PCS_MII_LPA_NEXT_PAGE      PCS_MII_ADVERT_NEXT_PAGE
+
+/* DEFAULT: 0x0 */
+#define  REG_PCS_CFG                       0x9010 /* PCS config reg */
+#define    PCS_CFG_EN                      0x01   /* enable PCS. must be
+						     0 when modifying
+						     PCS_MII_ADVERT */
+#define    PCS_CFG_SD_OVERRIDE             0x02   /* sets signal detect to
+						     OK. bit is 
+						     non-resettable */
+#define    PCS_CFG_SD_ACTIVE_LOW           0x04   /* changes interpretation
+						     of optical signal to make
+						     signal detect okay when
+						     signal is low */
+#define    PCS_CFG_JITTER_STUDY_MASK       0x18   /* used to make jitter
+						     measurements. a single
+						     code group is xmitted
+						     regularly. 
+						     0x0 = normal operation
+						     0x1 = high freq test 
+						           pattern, D21.5
+						     0x2 = low freq test
+						           pattern, K28.7
+						     0x3 = reserved */
+#define    PCS_CFG_10MS_TIMER_OVERRIDE     0x20   /* shortens 10-20ms auto-
+						     negotiation timer to 
+						     a few cycles for test
+						     purposes */
+
+/* used for diagnostic purposes. bits 20-22 autoclear on read */
+#define  REG_PCS_STATE_MACHINE             0x9014 /* (ro) PCS state machine 
+						     and diagnostic reg */
+#define    PCS_SM_TX_STATE_MASK            0x0000000F /* 0 and 1 indicate 
+							 xmission of idle. 
+							 otherwise, xmission of
+							 a packet */
+#define    PCS_SM_RX_STATE_MASK            0x000000F0 /* 0 indicates reception
+							 of idle. otherwise,
+							 reception of packet */
+#define    PCS_SM_WORD_SYNC_STATE_MASK     0x00000700 /* 0 indicates loss of
+							 sync */
+#define    PCS_SM_SEQ_DETECT_STATE_MASK    0x00001800 /* cycling through 0-3
+							 indicates reception of
+							 Config codes. cycling
+							 through 0-1 indicates
+							 reception of idles */
+#define    PCS_SM_LINK_STATE_MASK          0x0001E000 
+#define        SM_LINK_STATE_UP            0x00016000 /* link state is up */
+
+#define    PCS_SM_LOSS_LINK_C              0x00100000 /* loss of link due to
+							 recept of Config 
+							 codes */
+#define    PCS_SM_LOSS_LINK_SYNC           0x00200000 /* loss of link due to
+							 loss of sync */
+#define    PCS_SM_LOSS_SIGNAL_DETECT       0x00400000 /* signal detect goes 
+							 from OK to FAIL. bit29
+							 will also be set if 
+							 this is set */
+#define    PCS_SM_NO_LINK_BREAKLINK        0x01000000 /* link not up due to
+							receipt of breaklink
+							C codes from partner.
+							C codes w/ 0 content
+							received triggering
+							start/restart of 
+							autonegotiation. 
+							should be sent for
+							no longer than 20ms */
+#define    PCS_SM_NO_LINK_SERDES           0x02000000 /* serdes being 
+							initialized. see serdes
+							state reg */
+#define    PCS_SM_NO_LINK_C                0x04000000 /* C codes not stable or
+							 not received */
+#define    PCS_SM_NO_LINK_SYNC             0x08000000 /* word sync not 
+							 achieved */
+#define    PCS_SM_NO_LINK_WAIT_C           0x10000000 /* waiting for C codes 
+							 w/ ack bit set */
+#define    PCS_SM_NO_LINK_NO_IDLE          0x20000000 /* link partner continues
+							 to send C codes 
+							 instead of idle 
+							 symbols or pkt data */
+
+/* this register indicates interrupt changes in specific PCS MII status bits.
+ * PCS_INT may be masked at the ISR level. only a single bit is implemented
+ * for link status change.
+ */
+#define  REG_PCS_INTR_STATUS               0x9018 /* PCS interrupt status */
+#define    PCS_INTR_STATUS_LINK_CHANGE     0x04   /* link status has changed
+						     since last read */
+
+/* control which network interface is used. no more than one bit should
+ * be set.
+ * DEFAULT: none
+ */
+#define  REG_PCS_DATAPATH_MODE             0x9050 /* datapath mode reg */
+#define    PCS_DATAPATH_MODE_MII           0x00 /* PCS is not used and 
+						   MII/GMII is selected. 
+						   selection between MII and
+						   GMII is controlled by 
+						   XIF_CFG */
+#define    PCS_DATAPATH_MODE_SERDES        0x02 /* PCS is used via the
+						   10-bit interface */
+
+/* input to serdes chip or serialink block */
+#define  REG_PCS_SERDES_CTRL              0x9054 /* serdes control reg */
+#define    PCS_SERDES_CTRL_LOOPBACK       0x01   /* enable loopback on 
+						    serdes interface */
+#define    PCS_SERDES_CTRL_SYNCD_EN       0x02   /* enable sync carrier
+						    detection. should be
+						    0x0 for normal 
+						    operation */
+#define    PCS_SERDES_CTRL_LOCKREF       0x04   /* frequency-lock RBC[0:1]
+						   to REFCLK when set.
+						   when clear, receiver
+						   clock locks to incoming
+						   serial data */
+
+/* multiplex test outputs into the PROM address (PA_3 through PA_0) pins.
+ * should be 0x0 for normal operations. 
+ * 0b000          normal operation, PROM address[3:0] selected
+ * 0b001          rxdma req, rxdma ack, rxdma ready, rxdma read 
+ * 0b010          rxmac req, rx ack, rx tag, rx clk shared 
+ * 0b011          txmac req, tx ack, tx tag, tx retry req 
+ * 0b100          tx tp3, tx tp2, tx tp1, tx tp0 
+ * 0b101          R period RX, R period TX, R period HP, R period BIM
+ * DEFAULT: 0x0
+ */
+#define  REG_PCS_SHARED_OUTPUT_SEL         0x9058 /* shared output select */
+#define    PCS_SOS_PROM_ADDR_MASK          0x0007
+
+/* used for diagnostics. this register indicates progress of the SERDES 
+ * boot up. 
+ * 0b00       undergoing reset
+ * 0b01       waiting 500us while lockrefn is asserted
+ * 0b10       waiting for comma detect
+ * 0b11       receive data is synchronized 
+ * DEFAULT: 0x0
+ */
+#define  REG_PCS_SERDES_STATE              0x905C /* (ro) serdes state */
+#define    PCS_SERDES_STATE_MASK           0x03   
+
+/* used for diagnostics. indicates number of packets transmitted or received.
+ * counters rollover w/out generating an interrupt.
+ * DEFAULT: 0x0
+ */
+#define  REG_PCS_PACKET_COUNT              0x9060 /* (ro) PCS packet counter */
+#define    PCS_PACKET_COUNT_TX             0x000007FF /* pkts xmitted by PCS */
+#define    PCS_PACKET_COUNT_RX             0x07FF0000 /* pkts recvd by PCS
+							 whether they 
+							 encountered an error
+							 or not */
+
+/** LocalBus Devices. the following provides run-time access to the 
+ *  Cassini's PROM
+ ***/
+#define  REG_EXPANSION_ROM_RUN_START       0x100000 /* expansion rom run time
+						       access */
+#define  REG_EXPANSION_ROM_RUN_END         0x17FFFF
+
+#define  REG_SECOND_LOCALBUS_START         0x180000 /* secondary local bus 
+						       device */
+#define  REG_SECOND_LOCALBUS_END           0x1FFFFF
+
+/* entropy device */
+#define  REG_ENTROPY_START                 REG_SECOND_LOCALBUS_START
+#define  REG_ENTROPY_DATA                  (REG_ENTROPY_START + 0x00)
+#define  REG_ENTROPY_STATUS                (REG_ENTROPY_START + 0x04)
+#define      ENTROPY_STATUS_DRDY           0x01
+#define      ENTROPY_STATUS_BUSY           0x02
+#define      ENTROPY_STATUS_CIPHER         0x04
+#define      ENTROPY_STATUS_BYPASS_MASK    0x18
+#define  REG_ENTROPY_MODE                  (REG_ENTROPY_START + 0x05)
+#define      ENTROPY_MODE_KEY_MASK         0x07
+#define      ENTROPY_MODE_ENCRYPT          0x40
+#define  REG_ENTROPY_RAND_REG              (REG_ENTROPY_START + 0x06)
+#define  REG_ENTROPY_RESET                 (REG_ENTROPY_START + 0x07)
+#define      ENTROPY_RESET_DES_IO          0x01
+#define      ENTROPY_RESET_STC_MODE        0x02
+#define      ENTROPY_RESET_KEY_CACHE       0x04
+#define      ENTROPY_RESET_IV              0x08
+#define  REG_ENTROPY_IV                    (REG_ENTROPY_START + 0x08)
+#define  REG_ENTROPY_KEY0                  (REG_ENTROPY_START + 0x10)
+#define  REG_ENTROPY_KEYN(x)               (REG_ENTROPY_KEY0 + 4*(x))
+
+/* phys of interest w/ their special mii registers */
+#define PHY_LUCENT_B0     0x00437421
+#define   LUCENT_MII_REG      0x1F
+
+#define PHY_NS_DP83065    0x20005c78
+#define   DP83065_MII_MEM     0x16
+#define   DP83065_MII_REGD    0x1D
+#define   DP83065_MII_REGE    0x1E
+
+#define PHY_BROADCOM_5411 0x00206071
+#define PHY_BROADCOM_B0   0x00206050
+#define   BROADCOM_MII_REG4   0x14
+#define   BROADCOM_MII_REG5   0x15
+#define   BROADCOM_MII_REG7   0x17
+#define   BROADCOM_MII_REG8   0x18
+
+#define   CAS_MII_ANNPTR          0x07
+#define   CAS_MII_ANNPRR          0x08
+#define   CAS_MII_1000_CTRL       0x09
+#define   CAS_MII_1000_STATUS     0x0A
+#define   CAS_MII_1000_EXTEND     0x0F
+
+#define   CAS_BMSR_1000_EXTEND    0x0100 /* supports 1000Base-T extended status */
+/* 
+ * if autoneg is disabled, here's the table:
+ * BMCR_SPEED100 = 100Mbps
+ * BMCR_SPEED1000 = 1000Mbps
+ * ~(BMCR_SPEED100 | BMCR_SPEED1000) = 10Mbps
+ */
+#define   CAS_BMCR_SPEED1000      0x0040  /* Select 1000Mbps */
+
+#define   CAS_ADVERTISE_1000HALF   0x0100
+#define   CAS_ADVERTISE_1000FULL   0x0200
+#define   CAS_ADVERTISE_PAUSE      0x0400
+#define   CAS_ADVERTISE_ASYM_PAUSE 0x0800
+
+/* regular lpa register */
+#define   CAS_LPA_PAUSE	           CAS_ADVERTISE_PAUSE
+#define   CAS_LPA_ASYM_PAUSE       CAS_ADVERTISE_ASYM_PAUSE
+
+/* 1000_STATUS register */
+#define   CAS_LPA_1000HALF        0x0400
+#define   CAS_LPA_1000FULL        0x0800
+
+#define   CAS_EXTEND_1000XFULL    0x8000
+#define   CAS_EXTEND_1000XHALF    0x4000
+#define   CAS_EXTEND_1000TFULL    0x2000
+#define   CAS_EXTEND_1000THALF    0x1000
+
+/* cassini header parser firmware */
+typedef struct cas_hp_inst {
+	const char *note;
+
+	u16 mask, val;
+
+	u8 op;
+	u8 soff, snext;	/* if match succeeds, new offset and match */
+	u8 foff, fnext;	/* if match fails, new offset and match */
+	/* output info */
+	u8 outop;    /* output opcode */
+
+	u16 outarg;  /* output argument */
+	u8 outenab;  /* output enable: 0 = not, 1 = if match
+			 2 = if !match, 3 = always */
+	u8 outshift; /* barrel shift right, 4 bits */
+	u16 outmask; 
+} cas_hp_inst_t;
+
+/* comparison */
+#define OP_EQ     0 /* packet == value */
+#define OP_LT     1 /* packet < value */
+#define OP_GT     2 /* packet > value */
+#define OP_NP     3 /* new packet */
+
+/* output opcodes */
+#define	CL_REG	0
+#define	LD_FID	1
+#define	LD_SEQ	2
+#define	LD_CTL	3
+#define	LD_SAP	4
+#define	LD_R1	5
+#define	LD_L3	6
+#define	LD_SUM	7
+#define	LD_HDR	8
+#define	IM_FID	9
+#define	IM_SEQ	10
+#define	IM_SAP	11
+#define	IM_R1	12
+#define	IM_CTL	13
+#define	LD_LEN	14
+#define	ST_FLG	15
+
+/* match setp #s for IP4TCP4 */
+#define S1_PCKT         0
+#define S1_VLAN         1
+#define S1_CFI          2
+#define S1_8023         3
+#define S1_LLC          4
+#define S1_LLCc         5
+#define S1_IPV4         6
+#define S1_IPV4c        7
+#define S1_IPV4F        8
+#define S1_TCP44        9
+#define S1_IPV6         10
+#define S1_IPV6L        11
+#define S1_IPV6c        12
+#define S1_TCP64        13
+#define S1_TCPSQ        14
+#define S1_TCPFG        15
+#define	S1_TCPHL	16
+#define	S1_TCPHc	17
+#define	S1_CLNP		18
+#define	S1_CLNP2	19
+#define	S1_DROP		20
+#define	S2_HTTP		21
+#define	S1_ESP4		22
+#define	S1_AH4		23
+#define	S1_ESP6		24
+#define	S1_AH6		25
+
+#define CAS_PROG_IP46TCP4_PREAMBLE \
+{ "packet arrival?", 0xffff, 0x0000, OP_NP,  6, S1_VLAN,  0, S1_PCKT,  \
+  CL_REG, 0x3ff, 1, 0x0, 0x0000}, \
+{ "VLAN?", 0xffff, 0x8100, OP_EQ,  1, S1_CFI,   0, S1_8023,  \
+  IM_CTL, 0x00a,  3, 0x0, 0xffff}, \
+{ "CFI?", 0x1000, 0x1000, OP_EQ,  0, S1_DROP,  1, S1_8023, \
+  CL_REG, 0x000,  0, 0x0, 0x0000}, \
+{ "8023?", 0xffff, 0x0600, OP_LT,  1, S1_LLC,   0, S1_IPV4, \
+  CL_REG, 0x000,  0, 0x0, 0x0000}, \
+{ "LLC?", 0xffff, 0xaaaa, OP_EQ,  1, S1_LLCc,  0, S1_CLNP, \
+  CL_REG, 0x000,  0, 0x0, 0x0000}, \
+{ "LLCc?", 0xff00, 0x0300, OP_EQ,  2, S1_IPV4,  0, S1_CLNP, \
+  CL_REG, 0x000,  0, 0x0, 0x0000}, \
+{ "IPV4?", 0xffff, 0x0800, OP_EQ,  1, S1_IPV4c, 0, S1_IPV6, \
+  LD_SAP, 0x100,  3, 0x0, 0xffff}, \
+{ "IPV4 cont?", 0xff00, 0x4500, OP_EQ,  3, S1_IPV4F, 0, S1_CLNP, \
+  LD_SUM, 0x00a,  1, 0x0, 0x0000}, \
+{ "IPV4 frag?", 0x3fff, 0x0000, OP_EQ,  1, S1_TCP44, 0, S1_CLNP, \
+  LD_LEN, 0x03e,  1, 0x0, 0xffff}, \
+{ "TCP44?", 0x00ff, 0x0006, OP_EQ,  7, S1_TCPSQ, 0, S1_CLNP, \
+  LD_FID, 0x182,  1, 0x0, 0xffff}, /* FID IP4&TCP src+dst */ \
+{ "IPV6?", 0xffff, 0x86dd, OP_EQ,  1, S1_IPV6L, 0, S1_CLNP,  \
+  LD_SUM, 0x015,  1, 0x0, 0x0000}, \
+{ "IPV6 len", 0xf000, 0x6000, OP_EQ,  0, S1_IPV6c, 0, S1_CLNP, \
+  IM_R1,  0x128,  1, 0x0, 0xffff}, \
+{ "IPV6 cont?", 0x0000, 0x0000, OP_EQ,  3, S1_TCP64, 0, S1_CLNP, \
+  LD_FID, 0x484,  1, 0x0, 0xffff}, /* FID IP6&TCP src+dst */ \
+{ "TCP64?", 0xff00, 0x0600, OP_EQ, 18, S1_TCPSQ, 0, S1_CLNP, \
+  LD_LEN, 0x03f,  1, 0x0, 0xffff}
+
+#ifdef USE_HP_IP46TCP4
+static cas_hp_inst_t cas_prog_ip46tcp4tab[] = {
+	CAS_PROG_IP46TCP4_PREAMBLE, 
+	{ "TCP seq", /* DADDR should point to dest port */ 
+	  0x0000, 0x0000, OP_EQ, 0, S1_TCPFG, 4, S1_TCPFG, LD_SEQ, 
+	  0x081,  3, 0x0, 0xffff}, /* Load TCP seq # */
+	{ "TCP control flags", 0x0000, 0x0000, OP_EQ,  0, S1_TCPHL, 0,
+	  S1_TCPHL, ST_FLG, 0x045,  3, 0x0, 0x002f}, /* Load TCP flags */
+	{ "TCP length", 0x0000, 0x0000, OP_EQ,  0, S1_TCPHc, 0,
+	  S1_TCPHc, LD_R1,  0x205,  3, 0xB, 0xf000},
+	{ "TCP length cont", 0x0000, 0x0000, OP_EQ,  0, S1_PCKT,  0,
+	  S1_PCKT,  LD_HDR, 0x0ff,  3, 0x0, 0xffff},
+	{ "Cleanup", 0x0000, 0x0000, OP_EQ,  0, S1_CLNP2,  0, S1_CLNP2,
+	  IM_CTL, 0x001,  3, 0x0, 0x0001},
+	{ "Cleanup 2", 0x0000, 0x0000, OP_EQ,  0, S1_PCKT,  0, S1_PCKT,
+	  IM_CTL, 0x000,  0, 0x0, 0x0000},
+	{ "Drop packet", 0x0000, 0x0000, OP_EQ,  0, S1_PCKT,  0, S1_PCKT,
+	  IM_CTL, 0x080,  3, 0x0, 0xffff},
+	{ NULL },
+};
+#ifdef HP_IP46TCP4_DEFAULT
+#define CAS_HP_FIRMWARE               cas_prog_ip46tcp4tab
+#endif
+#endif
+
+/*
+ * Alternate table load which excludes HTTP server traffic from reassembly.
+ * It is substantially similar to the basic table, with one extra state
+ * and a few extra compares. */
+#ifdef USE_HP_IP46TCP4NOHTTP
+static cas_hp_inst_t cas_prog_ip46tcp4nohttptab[] = {
+	CAS_PROG_IP46TCP4_PREAMBLE,
+	{ "TCP seq", /* DADDR should point to dest port */
+	  0xFFFF, 0x0080, OP_EQ,  0, S2_HTTP,  0, S1_TCPFG, LD_SEQ, 
+	  0x081,  3, 0x0, 0xffff} , /* Load TCP seq # */
+	{ "TCP control flags", 0xFFFF, 0x8080, OP_EQ,  0, S2_HTTP,  0,
+	  S1_TCPHL, ST_FLG, 0x145,  2, 0x0, 0x002f, }, /* Load TCP flags */
+	{ "TCP length", 0x0000, 0x0000, OP_EQ,  0, S1_TCPHc, 0, S1_TCPHc,
+	  LD_R1,  0x205,  3, 0xB, 0xf000},
+	{ "TCP length cont", 0x0000, 0x0000, OP_EQ,  0, S1_PCKT,  0, S1_PCKT,
+	  LD_HDR, 0x0ff,  3, 0x0, 0xffff},
+	{ "Cleanup", 0x0000, 0x0000, OP_EQ,  0, S1_CLNP2,  0, S1_CLNP2,
+	  IM_CTL, 0x001,  3, 0x0, 0x0001},
+	{ "Cleanup 2", 0x0000, 0x0000, OP_EQ,  0, S1_PCKT,  0, S1_PCKT,
+	  CL_REG, 0x002,  3, 0x0, 0x0000},
+	{ "Drop packet", 0x0000, 0x0000, OP_EQ,  0, S1_PCKT,  0, S1_PCKT,
+	  IM_CTL, 0x080,  3, 0x0, 0xffff},
+	{ "No HTTP", 0x0000, 0x0000, OP_EQ,  0, S1_PCKT,  0, S1_PCKT,
+	  IM_CTL, 0x044,  3, 0x0, 0xffff},
+	{ NULL },
+};
+#ifdef HP_IP46TCP4NOHTTP_DEFAULT
+#define CAS_HP_FIRMWARE               cas_prog_ip46tcp4nohttptab
+#endif
+#endif
+
+/* match step #s for IP4FRAG */
+#define	S3_IPV6c	11
+#define	S3_TCP64	12
+#define	S3_TCPSQ	13
+#define	S3_TCPFG	14
+#define	S3_TCPHL	15
+#define	S3_TCPHc	16
+#define	S3_FRAG		17
+#define	S3_FOFF		18
+#define	S3_CLNP		19
+
+#ifdef USE_HP_IP4FRAG
+static cas_hp_inst_t cas_prog_ip4fragtab[] = {
+	{ "packet arrival?", 0xffff, 0x0000, OP_NP,  6, S1_VLAN,  0, S1_PCKT,
+	  CL_REG, 0x3ff, 1, 0x0, 0x0000},
+	{ "VLAN?", 0xffff, 0x8100, OP_EQ,  1, S1_CFI,   0, S1_8023,
+	  IM_CTL, 0x00a,  3, 0x0, 0xffff},
+	{ "CFI?", 0x1000, 0x1000, OP_EQ,  0, S3_CLNP,  1, S1_8023,
+	  CL_REG, 0x000,  0, 0x0, 0x0000},
+	{ "8023?", 0xffff, 0x0600, OP_LT,  1, S1_LLC,   0, S1_IPV4,
+	  CL_REG, 0x000,  0, 0x0, 0x0000},
+	{ "LLC?", 0xffff, 0xaaaa, OP_EQ,  1, S1_LLCc,  0, S3_CLNP,
+	  CL_REG, 0x000,  0, 0x0, 0x0000},
+	{ "LLCc?",0xff00, 0x0300, OP_EQ,  2, S1_IPV4,  0, S3_CLNP,
+	  CL_REG, 0x000,  0, 0x0, 0x0000},
+	{ "IPV4?", 0xffff, 0x0800, OP_EQ,  1, S1_IPV4c, 0, S1_IPV6,
+	  LD_SAP, 0x100,  3, 0x0, 0xffff},
+	{ "IPV4 cont?", 0xff00, 0x4500, OP_EQ,  3, S1_IPV4F, 0, S3_CLNP,
+	  LD_SUM, 0x00a,  1, 0x0, 0x0000},
+	{ "IPV4 frag?", 0x3fff, 0x0000, OP_EQ,  1, S1_TCP44, 0, S3_FRAG,
+	  LD_LEN, 0x03e,  3, 0x0, 0xffff},
+	{ "TCP44?", 0x00ff, 0x0006, OP_EQ,  7, S3_TCPSQ, 0, S3_CLNP,
+	  LD_FID, 0x182,  3, 0x0, 0xffff}, /* FID IP4&TCP src+dst */
+	{ "IPV6?", 0xffff, 0x86dd, OP_EQ,  1, S3_IPV6c, 0, S3_CLNP,
+	  LD_SUM, 0x015,  1, 0x0, 0x0000},
+	{ "IPV6 cont?", 0xf000, 0x6000, OP_EQ,  3, S3_TCP64, 0, S3_CLNP,
+	  LD_FID, 0x484,  1, 0x0, 0xffff}, /* FID IP6&TCP src+dst */
+	{ "TCP64?", 0xff00, 0x0600, OP_EQ, 18, S3_TCPSQ, 0, S3_CLNP,
+	  LD_LEN, 0x03f,  1, 0x0, 0xffff},
+	{ "TCP seq",	/* DADDR should point to dest port */
+	  0x0000, 0x0000, OP_EQ,  0, S3_TCPFG, 4, S3_TCPFG, LD_SEQ,
+	  0x081,  3, 0x0, 0xffff}, /* Load TCP seq # */
+	{ "TCP control flags", 0x0000, 0x0000, OP_EQ,  0, S3_TCPHL, 0, 
+	  S3_TCPHL, ST_FLG, 0x045,  3, 0x0, 0x002f}, /* Load TCP flags */
+	{ "TCP length", 0x0000, 0x0000, OP_EQ,  0, S3_TCPHc, 0, S3_TCPHc,
+	  LD_R1,  0x205,  3, 0xB, 0xf000},
+	{ "TCP length cont", 0x0000, 0x0000, OP_EQ,  0, S1_PCKT,  0, S1_PCKT,
+	  LD_HDR, 0x0ff,  3, 0x0, 0xffff},
+	{ "IP4 Fragment", 0x0000, 0x0000, OP_EQ,  0, S3_FOFF,  0, S3_FOFF,
+	  LD_FID, 0x103,  3, 0x0, 0xffff}, /* FID IP4 src+dst */
+	{ "IP4 frag offset", 0x0000, 0x0000, OP_EQ,  0, S3_FOFF,  0, S3_FOFF,
+	  LD_SEQ, 0x040,  1, 0xD, 0xfff8},
+	{ "Cleanup", 0x0000, 0x0000, OP_EQ,  0, S1_PCKT,  0, S1_PCKT,  
+	  IM_CTL, 0x001,  3, 0x0, 0x0001},
+	{ NULL },
+};
+#ifdef HP_IP4FRAG_DEFAULT
+#define CAS_HP_FIRMWARE               cas_prog_ip4fragtab
+#endif
+#endif
+
+/*
+ * Alternate table which does batching without reassembly
+ */
+#ifdef USE_HP_IP46TCP4BATCH
+static cas_hp_inst_t cas_prog_ip46tcp4batchtab[] = {
+	CAS_PROG_IP46TCP4_PREAMBLE,
+	{ "TCP seq",	/* DADDR should point to dest port */
+	  0x0000, 0x0000, OP_EQ,  0, S1_TCPFG, 0, S1_TCPFG, LD_SEQ,
+	  0x081,  3, 0x0, 0xffff}, /* Load TCP seq # */
+	{ "TCP control flags", 0x0000, 0x0000, OP_EQ,  0, S1_TCPHL, 0, 
+	  S1_TCPHL, ST_FLG, 0x000,  3, 0x0, 0x0000}, /* Load TCP flags */
+	{ "TCP length", 0x0000, 0x0000, OP_EQ,  0, S1_TCPHc, 0, 
+	  S1_TCPHc, LD_R1,  0x205,  3, 0xB, 0xf000},
+	{ "TCP length cont", 0x0000, 0x0000, OP_EQ,  0, S1_PCKT,  0, 
+	  S1_PCKT,  IM_CTL, 0x040,  3, 0x0, 0xffff}, /* set batch bit */
+	{ "Cleanup", 0x0000, 0x0000, OP_EQ,  0, S1_PCKT,  0, S1_PCKT,
+	  IM_CTL, 0x001,  3, 0x0, 0x0001},
+	{ "Drop packet", 0x0000, 0x0000, OP_EQ,  0, S1_PCKT,  0,
+	  S1_PCKT,  IM_CTL, 0x080,  3, 0x0, 0xffff},
+	{ NULL },
+};
+#ifdef HP_IP46TCP4BATCH_DEFAULT
+#define CAS_HP_FIRMWARE               cas_prog_ip46tcp4batchtab
+#endif
+#endif
+
+/* Workaround for Cassini rev2 descriptor corruption problem.
+ * Does batching without reassembly, and sets the SAP to a known
+ * data pattern for all packets.
+ */
+#ifdef USE_HP_WORKAROUND
+static cas_hp_inst_t  cas_prog_workaroundtab[] = {
+	{ "packet arrival?", 0xffff, 0x0000, OP_NP,  6, S1_VLAN,  0,
+	  S1_PCKT,  CL_REG, 0x3ff,  1, 0x0, 0x0000} ,
+	{ "VLAN?", 0xffff, 0x8100, OP_EQ,  1, S1_CFI, 0, S1_8023, 
+	  IM_CTL, 0x04a,  3, 0x0, 0xffff},
+	{ "CFI?", 0x1000, 0x1000, OP_EQ,  0, S1_CLNP,  1, S1_8023,
+	  CL_REG, 0x000,  0, 0x0, 0x0000},
+	{ "8023?", 0xffff, 0x0600, OP_LT,  1, S1_LLC,   0, S1_IPV4,
+	  CL_REG, 0x000,  0, 0x0, 0x0000},
+	{ "LLC?", 0xffff, 0xaaaa, OP_EQ,  1, S1_LLCc,  0, S1_CLNP,
+	  CL_REG, 0x000,  0, 0x0, 0x0000},
+	{ "LLCc?", 0xff00, 0x0300, OP_EQ,  2, S1_IPV4,  0, S1_CLNP,
+	  CL_REG, 0x000,  0, 0x0, 0x0000},
+	{ "IPV4?", 0xffff, 0x0800, OP_EQ,  1, S1_IPV4c, 0, S1_IPV6,
+	  IM_SAP, 0x6AE,  3, 0x0, 0xffff},
+	{ "IPV4 cont?", 0xff00, 0x4500, OP_EQ,  3, S1_IPV4F, 0, S1_CLNP,
+	  LD_SUM, 0x00a,  1, 0x0, 0x0000},
+	{ "IPV4 frag?", 0x3fff, 0x0000, OP_EQ,  1, S1_TCP44, 0, S1_CLNP,  
+	  LD_LEN, 0x03e,  1, 0x0, 0xffff},
+	{ "TCP44?", 0x00ff, 0x0006, OP_EQ,  7, S1_TCPSQ, 0, S1_CLNP,
+	  LD_FID, 0x182,  3, 0x0, 0xffff}, /* FID IP4&TCP src+dst */
+	{ "IPV6?", 0xffff, 0x86dd, OP_EQ,  1, S1_IPV6L, 0, S1_CLNP,
+	  LD_SUM, 0x015,  1, 0x0, 0x0000},
+	{ "IPV6 len", 0xf000, 0x6000, OP_EQ,  0, S1_IPV6c, 0, S1_CLNP,
+	  IM_R1,  0x128,  1, 0x0, 0xffff},
+	{ "IPV6 cont?", 0x0000, 0x0000, OP_EQ,  3, S1_TCP64, 0, S1_CLNP,
+	  LD_FID, 0x484,  1, 0x0, 0xffff}, /* FID IP6&TCP src+dst */
+	{ "TCP64?", 0xff00, 0x0600, OP_EQ, 18, S1_TCPSQ, 0, S1_CLNP,
+	  LD_LEN, 0x03f,  1, 0x0, 0xffff},
+	{ "TCP seq",      /* DADDR should point to dest port */
+	  0x0000, 0x0000, OP_EQ,  0, S1_TCPFG, 4, S1_TCPFG, LD_SEQ, 
+	  0x081,  3, 0x0, 0xffff}, /* Load TCP seq # */
+	{ "TCP control flags", 0x0000, 0x0000, OP_EQ,  0, S1_TCPHL, 0,
+	  S1_TCPHL, ST_FLG, 0x045,  3, 0x0, 0x002f}, /* Load TCP flags */
+	{ "TCP length", 0x0000, 0x0000, OP_EQ,  0, S1_TCPHc, 0, S1_TCPHc,
+	  LD_R1,  0x205,  3, 0xB, 0xf000},
+	{ "TCP length cont", 0x0000, 0x0000, OP_EQ,  0, S1_PCKT,  0,
+	  S1_PCKT,  LD_HDR, 0x0ff,  3, 0x0, 0xffff},
+	{ "Cleanup", 0x0000, 0x0000, OP_EQ,  0, S1_CLNP2, 0, S1_CLNP2,
+	  IM_SAP, 0x6AE,  3, 0x0, 0xffff} ,
+	{ "Cleanup 2", 0x0000, 0x0000, OP_EQ,  0, S1_PCKT,  0, S1_PCKT,
+	  IM_CTL, 0x001,  3, 0x0, 0x0001},
+	{ NULL },
+};
+#ifdef HP_WORKAROUND_DEFAULT
+#define CAS_HP_FIRMWARE               cas_prog_workaroundtab
+#endif
+#endif
+
+#ifdef USE_HP_ENCRYPT
+static cas_hp_inst_t  cas_prog_encryptiontab[] = {
+	{ "packet arrival?", 0xffff, 0x0000, OP_NP,  6, S1_VLAN,  0, 
+	  S1_PCKT,  CL_REG, 0x3ff,  1, 0x0, 0x0000},
+	{ "VLAN?", 0xffff, 0x8100, OP_EQ,  1, S1_CFI,   0, S1_8023,
+	  IM_CTL, 0x00a,  3, 0x0, 0xffff},
+#if 0
+//"CFI?", /* 02 FIND CFI and If FIND go to S1_DROP */
+//0x1000, 0x1000, OP_EQ,  0, S1_DROP,  1, S1_8023,  CL_REG, 0x000,  0, 0x0, 0x00
+	00,
+#endif
+	{ "CFI?", /* FIND CFI and If FIND go to CleanUP1 (ignore and send to host) */
+	  0x1000, 0x1000, OP_EQ,  0, S1_CLNP,  1, S1_8023,  
+	  CL_REG, 0x000,  0, 0x0, 0x0000},
+	{ "8023?", 0xffff, 0x0600, OP_LT,  1, S1_LLC,   0, S1_IPV4, 
+	  CL_REG, 0x000,  0, 0x0, 0x0000},
+	{ "LLC?", 0xffff, 0xaaaa, OP_EQ,  1, S1_LLCc,  0, S1_CLNP, 
+	  CL_REG, 0x000,  0, 0x0, 0x0000},
+	{ "LLCc?", 0xff00, 0x0300, OP_EQ,  2, S1_IPV4,  0, S1_CLNP,
+	  CL_REG, 0x000,  0, 0x0, 0x0000},
+	{ "IPV4?", 0xffff, 0x0800, OP_EQ,  1, S1_IPV4c, 0, S1_IPV6,  
+	  LD_SAP, 0x100,  3, 0x0, 0xffff},
+	{ "IPV4 cont?", 0xff00, 0x4500, OP_EQ,  3, S1_IPV4F, 0, S1_CLNP,  
+	  LD_SUM, 0x00a,  1, 0x0, 0x0000},
+	{ "IPV4 frag?", 0x3fff, 0x0000, OP_EQ,  1, S1_TCP44, 0, S1_CLNP, 
+	  LD_LEN, 0x03e,  1, 0x0, 0xffff},
+	{ "TCP44?", 0x00ff, 0x0006, OP_EQ,  7, S1_TCPSQ, 0, S1_ESP4,
+	  LD_FID, 0x182,  1, 0x0, 0xffff}, /* FID IP4&TCP src+dst */
+	{ "IPV6?", 0xffff, 0x86dd, OP_EQ,  1, S1_IPV6L, 0, S1_CLNP,
+	  LD_SUM, 0x015,  1, 0x0, 0x0000},
+	{ "IPV6 len", 0xf000, 0x6000, OP_EQ,  0, S1_IPV6c, 0, S1_CLNP,
+	  IM_R1,  0x128,  1, 0x0, 0xffff},
+	{ "IPV6 cont?", 0x0000, 0x0000, OP_EQ,  3, S1_TCP64, 0, S1_CLNP, 
+	  LD_FID, 0x484,  1, 0x0, 0xffff}, /*  FID IP6&TCP src+dst */
+	{ "TCP64?", 
+#if 0
+//@@@0xff00, 0x0600, OP_EQ, 18, S1_TCPSQ, 0, S1_ESP6,  LD_LEN, 0x03f,  1, 0x0, 0xffff,
+#endif
+	  0xff00, 0x0600, OP_EQ, 12, S1_TCPSQ, 0, S1_ESP6,  LD_LEN,
+	  0x03f,  1, 0x0, 0xffff},
+	{ "TCP seq", /* 14:DADDR should point to dest port */
+	  0xFFFF, 0x0080, OP_EQ,  0, S2_HTTP,  0, S1_TCPFG, LD_SEQ,
+	  0x081,  3, 0x0, 0xffff}, /* Load TCP seq # */
+	{ "TCP control flags", 0xFFFF, 0x8080, OP_EQ,  0, S2_HTTP,  0,
+	  S1_TCPHL, ST_FLG, 0x145,  2, 0x0, 0x002f}, /* Load TCP flags */
+	{ "TCP length", 0x0000, 0x0000, OP_EQ,  0, S1_TCPHc, 0, S1_TCPHc, 
+	  LD_R1,  0x205,  3, 0xB, 0xf000} ,
+	{ "TCP length cont", 0x0000, 0x0000, OP_EQ,  0, S1_PCKT,  0,
+	  S1_PCKT,  LD_HDR, 0x0ff,  3, 0x0, 0xffff}, 
+	{ "Cleanup", 0x0000, 0x0000, OP_EQ,  0, S1_CLNP2,  0, S1_CLNP2,
+	  IM_CTL, 0x001,  3, 0x0, 0x0001},
+	{ "Cleanup 2", 0x0000, 0x0000, OP_EQ,  0, S1_PCKT,  0, S1_PCKT,
+	  CL_REG, 0x002,  3, 0x0, 0x0000},
+	{ "Drop packet", 0x0000, 0x0000, OP_EQ,  0, S1_PCKT,  0, S1_PCKT,
+	  IM_CTL, 0x080,  3, 0x0, 0xffff},
+	{ "No HTTP", 0x0000, 0x0000, OP_EQ,  0, S1_PCKT,  0, S1_PCKT,
+	  IM_CTL, 0x044,  3, 0x0, 0xffff}, 
+	{ "IPV4 ESP encrypted?",  /* S1_ESP4 */
+	  0x00ff, 0x0032, OP_EQ,  0, S1_CLNP2, 0, S1_AH4, IM_CTL,
+	  0x021, 1,  0x0, 0xffff},
+	{ "IPV4 AH encrypted?",   /* S1_AH4 */
+	  0x00ff, 0x0033, OP_EQ,  0, S1_CLNP2, 0, S1_CLNP, IM_CTL,
+	  0x021, 1,  0x0, 0xffff},
+	{ "IPV6 ESP encrypted?",  /* S1_ESP6 */
+#if 0
+//@@@0x00ff, 0x0032, OP_EQ,  0, S1_CLNP2, 0, S1_AH6, IM_CTL, 0x021, 1,  0x0, 0xffff,
+#endif
+	  0xff00, 0x3200, OP_EQ,  0, S1_CLNP2, 0, S1_AH6, IM_CTL,
+	  0x021, 1,  0x0, 0xffff},
+	{ "IPV6 AH encrypted?",   /* S1_AH6 */
+#if 0
+//@@@0x00ff, 0x0033, OP_EQ,  0, S1_CLNP2, 0, S1_CLNP, IM_CTL, 0x021, 1,  0x0, 0xffff,
+#endif
+	  0xff00, 0x3300, OP_EQ,  0, S1_CLNP2, 0, S1_CLNP, IM_CTL,
+	  0x021, 1,  0x0, 0xffff},
+	{ NULL },
+};
+#ifdef HP_ENCRYPT_DEFAULT
+#define CAS_HP_FIRMWARE               cas_prog_encryptiontab
+#endif
+#endif
+
+static cas_hp_inst_t cas_prog_null[] = { {NULL} };
+#ifdef HP_NULL_DEFAULT
+#define CAS_HP_FIRMWARE               cas_prog_null
+#endif
+
+/* firmware patch for NS_DP83065 */
+typedef struct cas_saturn_patch {
+	u16 addr;
+	u16 val;
+} cas_saturn_patch_t;
+
+#if 1
+cas_saturn_patch_t cas_saturn_patch[] = {
+{0x8200,    0x007e}, {0x8201,    0x0082}, {0x8202,    0x0009},
+{0x8203,    0x0000}, {0x8204,    0x0000}, {0x8205,    0x0000},
+{0x8206,    0x0000}, {0x8207,    0x0000}, {0x8208,    0x0000},
+{0x8209,    0x008e}, {0x820a,    0x008e}, {0x820b,    0x00ff},
+{0x820c,    0x00ce}, {0x820d,    0x0082}, {0x820e,    0x0025},
+{0x820f,    0x00ff}, {0x8210,    0x0001}, {0x8211,    0x000f},
+{0x8212,    0x00ce}, {0x8213,    0x0084}, {0x8214,    0x0026},
+{0x8215,    0x00ff}, {0x8216,    0x0001}, {0x8217,    0x0011},
+{0x8218,    0x00ce}, {0x8219,    0x0085}, {0x821a,    0x003d},
+{0x821b,    0x00df}, {0x821c,    0x00e5}, {0x821d,    0x0086},
+{0x821e,    0x0039}, {0x821f,    0x00b7}, {0x8220,    0x008f},
+{0x8221,    0x00f8}, {0x8222,    0x007e}, {0x8223,    0x00c3},
+{0x8224,    0x00c2}, {0x8225,    0x0096}, {0x8226,    0x0047},
+{0x8227,    0x0084}, {0x8228,    0x00f3}, {0x8229,    0x008a},
+{0x822a,    0x0000}, {0x822b,    0x0097}, {0x822c,    0x0047},
+{0x822d,    0x00ce}, {0x822e,    0x0082}, {0x822f,    0x0033},
+{0x8230,    0x00ff}, {0x8231,    0x0001}, {0x8232,    0x000f},
+{0x8233,    0x0096}, {0x8234,    0x0046}, {0x8235,    0x0084},
+{0x8236,    0x000c}, {0x8237,    0x0081}, {0x8238,    0x0004},
+{0x8239,    0x0027}, {0x823a,    0x000b}, {0x823b,    0x0096},
+{0x823c,    0x0046}, {0x823d,    0x0084}, {0x823e,    0x000c},
+{0x823f,    0x0081}, {0x8240,    0x0008}, {0x8241,    0x0027},
+{0x8242,    0x0057}, {0x8243,    0x007e}, {0x8244,    0x0084},
+{0x8245,    0x0025}, {0x8246,    0x0096}, {0x8247,    0x0047},
+{0x8248,    0x0084}, {0x8249,    0x00f3}, {0x824a,    0x008a},
+{0x824b,    0x0004}, {0x824c,    0x0097}, {0x824d,    0x0047},
+{0x824e,    0x00ce}, {0x824f,    0x0082}, {0x8250,    0x0054},
+{0x8251,    0x00ff}, {0x8252,    0x0001}, {0x8253,    0x000f},
+{0x8254,    0x0096}, {0x8255,    0x0046}, {0x8256,    0x0084},
+{0x8257,    0x000c}, {0x8258,    0x0081}, {0x8259,    0x0004},
+{0x825a,    0x0026}, {0x825b,    0x0038}, {0x825c,    0x00b6},
+{0x825d,    0x0012}, {0x825e,    0x0020}, {0x825f,    0x0084},
+{0x8260,    0x0020}, {0x8261,    0x0026}, {0x8262,    0x0003},
+{0x8263,    0x007e}, {0x8264,    0x0084}, {0x8265,    0x0025},
+{0x8266,    0x0096}, {0x8267,    0x007b}, {0x8268,    0x00d6},
+{0x8269,    0x007c}, {0x826a,    0x00fe}, {0x826b,    0x008f},
+{0x826c,    0x0056}, {0x826d,    0x00bd}, {0x826e,    0x00f7},
+{0x826f,    0x00b6}, {0x8270,    0x00fe}, {0x8271,    0x008f},
+{0x8272,    0x004e}, {0x8273,    0x00bd}, {0x8274,    0x00ec},
+{0x8275,    0x008e}, {0x8276,    0x00bd}, {0x8277,    0x00fa},
+{0x8278,    0x00f7}, {0x8279,    0x00bd}, {0x827a,    0x00f7},
+{0x827b,    0x0028}, {0x827c,    0x00ce}, {0x827d,    0x0082},
+{0x827e,    0x0082}, {0x827f,    0x00ff}, {0x8280,    0x0001},
+{0x8281,    0x000f}, {0x8282,    0x0096}, {0x8283,    0x0046},
+{0x8284,    0x0084}, {0x8285,    0x000c}, {0x8286,    0x0081},
+{0x8287,    0x0004}, {0x8288,    0x0026}, {0x8289,    0x000a},
+{0x828a,    0x00b6}, {0x828b,    0x0012}, {0x828c,    0x0020},
+{0x828d,    0x0084}, {0x828e,    0x0020}, {0x828f,    0x0027},
+{0x8290,    0x00b5}, {0x8291,    0x007e}, {0x8292,    0x0084},
+{0x8293,    0x0025}, {0x8294,    0x00bd}, {0x8295,    0x00f7},
+{0x8296,    0x001f}, {0x8297,    0x007e}, {0x8298,    0x0084},
+{0x8299,    0x001f}, {0x829a,    0x0096}, {0x829b,    0x0047},
+{0x829c,    0x0084}, {0x829d,    0x00f3}, {0x829e,    0x008a},
+{0x829f,    0x0008}, {0x82a0,    0x0097}, {0x82a1,    0x0047},
+{0x82a2,    0x00de}, {0x82a3,    0x00e1}, {0x82a4,    0x00ad},
+{0x82a5,    0x0000}, {0x82a6,    0x00ce}, {0x82a7,    0x0082},
+{0x82a8,    0x00af}, {0x82a9,    0x00ff}, {0x82aa,    0x0001},
+{0x82ab,    0x000f}, {0x82ac,    0x007e}, {0x82ad,    0x0084},
+{0x82ae,    0x0025}, {0x82af,    0x0096}, {0x82b0,    0x0041},
+{0x82b1,    0x0085}, {0x82b2,    0x0010}, {0x82b3,    0x0026},
+{0x82b4,    0x0006}, {0x82b5,    0x0096}, {0x82b6,    0x0023},
+{0x82b7,    0x0085}, {0x82b8,    0x0040}, {0x82b9,    0x0027},
+{0x82ba,    0x0006}, {0x82bb,    0x00bd}, {0x82bc,    0x00ed},
+{0x82bd,    0x0000}, {0x82be,    0x007e}, {0x82bf,    0x0083},
+{0x82c0,    0x00a2}, {0x82c1,    0x00de}, {0x82c2,    0x0042},
+{0x82c3,    0x00bd}, {0x82c4,    0x00eb}, {0x82c5,    0x008e},
+{0x82c6,    0x0096}, {0x82c7,    0x0024}, {0x82c8,    0x0084},
+{0x82c9,    0x0008}, {0x82ca,    0x0027}, {0x82cb,    0x0003},
+{0x82cc,    0x007e}, {0x82cd,    0x0083}, {0x82ce,    0x00df},
+{0x82cf,    0x0096}, {0x82d0,    0x007b}, {0x82d1,    0x00d6},
+{0x82d2,    0x007c}, {0x82d3,    0x00fe}, {0x82d4,    0x008f},
+{0x82d5,    0x0056}, {0x82d6,    0x00bd}, {0x82d7,    0x00f7},
+{0x82d8,    0x00b6}, {0x82d9,    0x00fe}, {0x82da,    0x008f},
+{0x82db,    0x0050}, {0x82dc,    0x00bd}, {0x82dd,    0x00ec},
+{0x82de,    0x008e}, {0x82df,    0x00bd}, {0x82e0,    0x00fa},
+{0x82e1,    0x00f7}, {0x82e2,    0x0086}, {0x82e3,    0x0011},
+{0x82e4,    0x00c6}, {0x82e5,    0x0049}, {0x82e6,    0x00bd},
+{0x82e7,    0x00e4}, {0x82e8,    0x0012}, {0x82e9,    0x00ce},
+{0x82ea,    0x0082}, {0x82eb,    0x00ef}, {0x82ec,    0x00ff},
+{0x82ed,    0x0001}, {0x82ee,    0x000f}, {0x82ef,    0x0096},
+{0x82f0,    0x0046}, {0x82f1,    0x0084}, {0x82f2,    0x000c},
+{0x82f3,    0x0081}, {0x82f4,    0x0000}, {0x82f5,    0x0027},
+{0x82f6,    0x0017}, {0x82f7,    0x00c6}, {0x82f8,    0x0049},
+{0x82f9,    0x00bd}, {0x82fa,    0x00e4}, {0x82fb,    0x0091},
+{0x82fc,    0x0024}, {0x82fd,    0x000d}, {0x82fe,    0x00b6},
+{0x82ff,    0x0012}, {0x8300,    0x0020}, {0x8301,    0x0085},
+{0x8302,    0x0020}, {0x8303,    0x0026}, {0x8304,    0x000c},
+{0x8305,    0x00ce}, {0x8306,    0x0082}, {0x8307,    0x00c1},
+{0x8308,    0x00ff}, {0x8309,    0x0001}, {0x830a,    0x000f},
+{0x830b,    0x007e}, {0x830c,    0x0084}, {0x830d,    0x0025},
+{0x830e,    0x007e}, {0x830f,    0x0084}, {0x8310,    0x0016},
+{0x8311,    0x00fe}, {0x8312,    0x008f}, {0x8313,    0x0052},
+{0x8314,    0x00bd}, {0x8315,    0x00ec}, {0x8316,    0x008e},
+{0x8317,    0x00bd}, {0x8318,    0x00fa}, {0x8319,    0x00f7},
+{0x831a,    0x0086}, {0x831b,    0x006a}, {0x831c,    0x00c6},
+{0x831d,    0x0049}, {0x831e,    0x00bd}, {0x831f,    0x00e4},
+{0x8320,    0x0012}, {0x8321,    0x00ce}, {0x8322,    0x0083},
+{0x8323,    0x0027}, {0x8324,    0x00ff}, {0x8325,    0x0001},
+{0x8326,    0x000f}, {0x8327,    0x0096}, {0x8328,    0x0046},
+{0x8329,    0x0084}, {0x832a,    0x000c}, {0x832b,    0x0081},
+{0x832c,    0x0000}, {0x832d,    0x0027}, {0x832e,    0x000a},
+{0x832f,    0x00c6}, {0x8330,    0x0049}, {0x8331,    0x00bd},
+{0x8332,    0x00e4}, {0x8333,    0x0091}, {0x8334,    0x0025},
+{0x8335,    0x0006}, {0x8336,    0x007e}, {0x8337,    0x0084},
+{0x8338,    0x0025}, {0x8339,    0x007e}, {0x833a,    0x0084},
+{0x833b,    0x0016}, {0x833c,    0x00b6}, {0x833d,    0x0018},
+{0x833e,    0x0070}, {0x833f,    0x00bb}, {0x8340,    0x0019},
+{0x8341,    0x0070}, {0x8342,    0x002a}, {0x8343,    0x0004},
+{0x8344,    0x0081}, {0x8345,    0x00af}, {0x8346,    0x002e},
+{0x8347,    0x0019}, {0x8348,    0x0096}, {0x8349,    0x007b},
+{0x834a,    0x00f6}, {0x834b,    0x0020}, {0x834c,    0x0007},
+{0x834d,    0x00fa}, {0x834e,    0x0020}, {0x834f,    0x0027},
+{0x8350,    0x00c4}, {0x8351,    0x0038}, {0x8352,    0x0081},
+{0x8353,    0x0038}, {0x8354,    0x0027}, {0x8355,    0x000b},
+{0x8356,    0x00f6}, {0x8357,    0x0020}, {0x8358,    0x0007},
+{0x8359,    0x00fa}, {0x835a,    0x0020}, {0x835b,    0x0027},
+{0x835c,    0x00cb}, {0x835d,    0x0008}, {0x835e,    0x007e},
+{0x835f,    0x0082}, {0x8360,    0x00d3}, {0x8361,    0x00bd},
+{0x8362,    0x00f7}, {0x8363,    0x0066}, {0x8364,    0x0086},
+{0x8365,    0x0074}, {0x8366,    0x00c6}, {0x8367,    0x0049},
+{0x8368,    0x00bd}, {0x8369,    0x00e4}, {0x836a,    0x0012},
+{0x836b,    0x00ce}, {0x836c,    0x0083}, {0x836d,    0x0071},
+{0x836e,    0x00ff}, {0x836f,    0x0001}, {0x8370,    0x000f},
+{0x8371,    0x0096}, {0x8372,    0x0046}, {0x8373,    0x0084},
+{0x8374,    0x000c}, {0x8375,    0x0081}, {0x8376,    0x0008},
+{0x8377,    0x0026}, {0x8378,    0x000a}, {0x8379,    0x00c6},
+{0x837a,    0x0049}, {0x837b,    0x00bd}, {0x837c,    0x00e4},
+{0x837d,    0x0091}, {0x837e,    0x0025}, {0x837f,    0x0006},
+{0x8380,    0x007e}, {0x8381,    0x0084}, {0x8382,    0x0025},
+{0x8383,    0x007e}, {0x8384,    0x0084}, {0x8385,    0x0016},
+{0x8386,    0x00bd}, {0x8387,    0x00f7}, {0x8388,    0x003e},
+{0x8389,    0x0026}, {0x838a,    0x000e}, {0x838b,    0x00bd},
+{0x838c,    0x00e5}, {0x838d,    0x0009}, {0x838e,    0x0026},
+{0x838f,    0x0006}, {0x8390,    0x00ce}, {0x8391,    0x0082},
+{0x8392,    0x00c1}, {0x8393,    0x00ff}, {0x8394,    0x0001},
+{0x8395,    0x000f}, {0x8396,    0x007e}, {0x8397,    0x0084},
+{0x8398,    0x0025}, {0x8399,    0x00fe}, {0x839a,    0x008f},
+{0x839b,    0x0054}, {0x839c,    0x00bd}, {0x839d,    0x00ec},
+{0x839e,    0x008e}, {0x839f,    0x00bd}, {0x83a0,    0x00fa},
+{0x83a1,    0x00f7}, {0x83a2,    0x00bd}, {0x83a3,    0x00f7},
+{0x83a4,    0x0033}, {0x83a5,    0x0086}, {0x83a6,    0x000f},
+{0x83a7,    0x00c6}, {0x83a8,    0x0051}, {0x83a9,    0x00bd},
+{0x83aa,    0x00e4}, {0x83ab,    0x0012}, {0x83ac,    0x00ce},
+{0x83ad,    0x0083}, {0x83ae,    0x00b2}, {0x83af,    0x00ff},
+{0x83b0,    0x0001}, {0x83b1,    0x000f}, {0x83b2,    0x0096},
+{0x83b3,    0x0046}, {0x83b4,    0x0084}, {0x83b5,    0x000c},
+{0x83b6,    0x0081}, {0x83b7,    0x0008}, {0x83b8,    0x0026},
+{0x83b9,    0x005c}, {0x83ba,    0x00b6}, {0x83bb,    0x0012},
+{0x83bc,    0x0020}, {0x83bd,    0x0084}, {0x83be,    0x003f},
+{0x83bf,    0x0081}, {0x83c0,    0x003a}, {0x83c1,    0x0027},
+{0x83c2,    0x001c}, {0x83c3,    0x0096}, {0x83c4,    0x0023},
+{0x83c5,    0x0085}, {0x83c6,    0x0040}, {0x83c7,    0x0027},
+{0x83c8,    0x0003}, {0x83c9,    0x007e}, {0x83ca,    0x0084},
+{0x83cb,    0x0025}, {0x83cc,    0x00c6}, {0x83cd,    0x0051},
+{0x83ce,    0x00bd}, {0x83cf,    0x00e4}, {0x83d0,    0x0091},
+{0x83d1,    0x0025}, {0x83d2,    0x0003}, {0x83d3,    0x007e},
+{0x83d4,    0x0084}, {0x83d5,    0x0025}, {0x83d6,    0x00ce},
+{0x83d7,    0x0082}, {0x83d8,    0x00c1}, {0x83d9,    0x00ff},
+{0x83da,    0x0001}, {0x83db,    0x000f}, {0x83dc,    0x007e},
+{0x83dd,    0x0084}, {0x83de,    0x0025}, {0x83df,    0x00bd},
+{0x83e0,    0x00f8}, {0x83e1,    0x0037}, {0x83e2,    0x007c},
+{0x83e3,    0x0000}, {0x83e4,    0x007a}, {0x83e5,    0x00ce},
+{0x83e6,    0x0083}, {0x83e7,    0x00ee}, {0x83e8,    0x00ff},
+{0x83e9,    0x0001}, {0x83ea,    0x000f}, {0x83eb,    0x007e},
+{0x83ec,    0x0084}, {0x83ed,    0x0025}, {0x83ee,    0x0096},
+{0x83ef,    0x0046}, {0x83f0,    0x0084}, {0x83f1,    0x000c},
+{0x83f2,    0x0081}, {0x83f3,    0x0008}, {0x83f4,    0x0026},
+{0x83f5,    0x0020}, {0x83f6,    0x0096}, {0x83f7,    0x0024},
+{0x83f8,    0x0084}, {0x83f9,    0x0008}, {0x83fa,    0x0026},
+{0x83fb,    0x0029}, {0x83fc,    0x00b6}, {0x83fd,    0x0018},
+{0x83fe,    0x0082}, {0x83ff,    0x00bb}, {0x8400,    0x0019},
+{0x8401,    0x0082}, {0x8402,    0x00b1}, {0x8403,    0x0001},
+{0x8404,    0x003b}, {0x8405,    0x0022}, {0x8406,    0x0009},
+{0x8407,    0x00b6}, {0x8408,    0x0012}, {0x8409,    0x0020},
+{0x840a,    0x0084}, {0x840b,    0x0037}, {0x840c,    0x0081},
+{0x840d,    0x0032}, {0x840e,    0x0027}, {0x840f,    0x0015},
+{0x8410,    0x00bd}, {0x8411,    0x00f8}, {0x8412,    0x0044},
+{0x8413,    0x007e}, {0x8414,    0x0082}, {0x8415,    0x00c1},
+{0x8416,    0x00bd}, {0x8417,    0x00f7}, {0x8418,    0x001f},
+{0x8419,    0x00bd}, {0x841a,    0x00f8}, {0x841b,    0x0044},
+{0x841c,    0x00bd}, {0x841d,    0x00fc}, {0x841e,    0x0029},
+{0x841f,    0x00ce}, {0x8420,    0x0082}, {0x8421,    0x0025},
+{0x8422,    0x00ff}, {0x8423,    0x0001}, {0x8424,    0x000f},
+{0x8425,    0x0039}, {0x8426,    0x0096}, {0x8427,    0x0047},
+{0x8428,    0x0084}, {0x8429,    0x00fc}, {0x842a,    0x008a},
+{0x842b,    0x0000}, {0x842c,    0x0097}, {0x842d,    0x0047},
+{0x842e,    0x00ce}, {0x842f,    0x0084}, {0x8430,    0x0034},
+{0x8431,    0x00ff}, {0x8432,    0x0001}, {0x8433,    0x0011},
+{0x8434,    0x0096}, {0x8435,    0x0046}, {0x8436,    0x0084},
+{0x8437,    0x0003}, {0x8438,    0x0081}, {0x8439,    0x0002},
+{0x843a,    0x0027}, {0x843b,    0x0003}, {0x843c,    0x007e},
+{0x843d,    0x0085}, {0x843e,    0x001e}, {0x843f,    0x0096},
+{0x8440,    0x0047}, {0x8441,    0x0084}, {0x8442,    0x00fc},
+{0x8443,    0x008a}, {0x8444,    0x0002}, {0x8445,    0x0097},
+{0x8446,    0x0047}, {0x8447,    0x00de}, {0x8448,    0x00e1},
+{0x8449,    0x00ad}, {0x844a,    0x0000}, {0x844b,    0x0086},
+{0x844c,    0x0001}, {0x844d,    0x00b7}, {0x844e,    0x0012},
+{0x844f,    0x0051}, {0x8450,    0x00bd}, {0x8451,    0x00f7},
+{0x8452,    0x0014}, {0x8453,    0x00b6}, {0x8454,    0x0010},
+{0x8455,    0x0031}, {0x8456,    0x0084}, {0x8457,    0x00fd},
+{0x8458,    0x00b7}, {0x8459,    0x0010}, {0x845a,    0x0031},
+{0x845b,    0x00bd}, {0x845c,    0x00f8}, {0x845d,    0x001e},
+{0x845e,    0x0096}, {0x845f,    0x0081}, {0x8460,    0x00d6},
+{0x8461,    0x0082}, {0x8462,    0x00fe}, {0x8463,    0x008f},
+{0x8464,    0x005a}, {0x8465,    0x00bd}, {0x8466,    0x00f7},
+{0x8467,    0x00b6}, {0x8468,    0x00fe}, {0x8469,    0x008f},
+{0x846a,    0x005c}, {0x846b,    0x00bd}, {0x846c,    0x00ec},
+{0x846d,    0x008e}, {0x846e,    0x00bd}, {0x846f,    0x00fa},
+{0x8470,    0x00f7}, {0x8471,    0x0086}, {0x8472,    0x0008},
+{0x8473,    0x00d6}, {0x8474,    0x0000}, {0x8475,    0x00c5},
+{0x8476,    0x0010}, {0x8477,    0x0026}, {0x8478,    0x0002},
+{0x8479,    0x008b}, {0x847a,    0x0020}, {0x847b,    0x00c6},
+{0x847c,    0x0051}, {0x847d,    0x00bd}, {0x847e,    0x00e4},
+{0x847f,    0x0012}, {0x8480,    0x00ce}, {0x8481,    0x0084},
+{0x8482,    0x0086}, {0x8483,    0x00ff}, {0x8484,    0x0001},
+{0x8485,    0x0011}, {0x8486,    0x0096}, {0x8487,    0x0046},
+{0x8488,    0x0084}, {0x8489,    0x0003}, {0x848a,    0x0081},
+{0x848b,    0x0002}, {0x848c,    0x0027}, {0x848d,    0x0003},
+{0x848e,    0x007e}, {0x848f,    0x0085}, {0x8490,    0x000f},
+{0x8491,    0x00c6}, {0x8492,    0x0051}, {0x8493,    0x00bd},
+{0x8494,    0x00e4}, {0x8495,    0x0091}, {0x8496,    0x0025},
+{0x8497,    0x0003}, {0x8498,    0x007e}, {0x8499,    0x0085},
+{0x849a,    0x001e}, {0x849b,    0x0096}, {0x849c,    0x0044},
+{0x849d,    0x0085}, {0x849e,    0x0010}, {0x849f,    0x0026},
+{0x84a0,    0x000a}, {0x84a1,    0x00b6}, {0x84a2,    0x0012},
+{0x84a3,    0x0050}, {0x84a4,    0x00ba}, {0x84a5,    0x0001},
+{0x84a6,    0x003c}, {0x84a7,    0x0085}, {0x84a8,    0x0010},
+{0x84a9,    0x0027}, {0x84aa,    0x00a8}, {0x84ab,    0x00bd},
+{0x84ac,    0x00f7}, {0x84ad,    0x0066}, {0x84ae,    0x00ce},
+{0x84af,    0x0084}, {0x84b0,    0x00b7}, {0x84b1,    0x00ff},
+{0x84b2,    0x0001}, {0x84b3,    0x0011}, {0x84b4,    0x007e},
+{0x84b5,    0x0085}, {0x84b6,    0x001e}, {0x84b7,    0x0096},
+{0x84b8,    0x0046}, {0x84b9,    0x0084}, {0x84ba,    0x0003},
+{0x84bb,    0x0081}, {0x84bc,    0x0002}, {0x84bd,    0x0026},
+{0x84be,    0x0050}, {0x84bf,    0x00b6}, {0x84c0,    0x0012},
+{0x84c1,    0x0030}, {0x84c2,    0x0084}, {0x84c3,    0x0003},
+{0x84c4,    0x0081}, {0x84c5,    0x0001}, {0x84c6,    0x0027},
+{0x84c7,    0x0003}, {0x84c8,    0x007e}, {0x84c9,    0x0085},
+{0x84ca,    0x001e}, {0x84cb,    0x0096}, {0x84cc,    0x0044},
+{0x84cd,    0x0085}, {0x84ce,    0x0010}, {0x84cf,    0x0026},
+{0x84d0,    0x0013}, {0x84d1,    0x00b6}, {0x84d2,    0x0012},
+{0x84d3,    0x0050}, {0x84d4,    0x00ba}, {0x84d5,    0x0001},
+{0x84d6,    0x003c}, {0x84d7,    0x0085}, {0x84d8,    0x0010},
+{0x84d9,    0x0026}, {0x84da,    0x0009}, {0x84db,    0x00ce},
+{0x84dc,    0x0084}, {0x84dd,    0x0053}, {0x84de,    0x00ff},
+{0x84df,    0x0001}, {0x84e0,    0x0011}, {0x84e1,    0x007e},
+{0x84e2,    0x0085}, {0x84e3,    0x001e}, {0x84e4,    0x00b6},
+{0x84e5,    0x0010}, {0x84e6,    0x0031}, {0x84e7,    0x008a},
+{0x84e8,    0x0002}, {0x84e9,    0x00b7}, {0x84ea,    0x0010},
+{0x84eb,    0x0031}, {0x84ec,    0x00bd}, {0x84ed,    0x0085},
+{0x84ee,    0x001f}, {0x84ef,    0x00bd}, {0x84f0,    0x00f8},
+{0x84f1,    0x0037}, {0x84f2,    0x007c}, {0x84f3,    0x0000},
+{0x84f4,    0x0080}, {0x84f5,    0x00ce}, {0x84f6,    0x0084},
+{0x84f7,    0x00fe}, {0x84f8,    0x00ff}, {0x84f9,    0x0001},
+{0x84fa,    0x0011}, {0x84fb,    0x007e}, {0x84fc,    0x0085},
+{0x84fd,    0x001e}, {0x84fe,    0x0096}, {0x84ff,    0x0046},
+{0x8500,    0x0084}, {0x8501,    0x0003}, {0x8502,    0x0081},
+{0x8503,    0x0002}, {0x8504,    0x0026}, {0x8505,    0x0009},
+{0x8506,    0x00b6}, {0x8507,    0x0012}, {0x8508,    0x0030},
+{0x8509,    0x0084}, {0x850a,    0x0003}, {0x850b,    0x0081},
+{0x850c,    0x0001}, {0x850d,    0x0027}, {0x850e,    0x000f},
+{0x850f,    0x00bd}, {0x8510,    0x00f8}, {0x8511,    0x0044},
+{0x8512,    0x00bd}, {0x8513,    0x00f7}, {0x8514,    0x000b},
+{0x8515,    0x00bd}, {0x8516,    0x00fc}, {0x8517,    0x0029},
+{0x8518,    0x00ce}, {0x8519,    0x0084}, {0x851a,    0x0026},
+{0x851b,    0x00ff}, {0x851c,    0x0001}, {0x851d,    0x0011},
+{0x851e,    0x0039}, {0x851f,    0x00d6}, {0x8520,    0x0022},
+{0x8521,    0x00c4}, {0x8522,    0x000f}, {0x8523,    0x00b6},
+{0x8524,    0x0012}, {0x8525,    0x0030}, {0x8526,    0x00ba},
+{0x8527,    0x0012}, {0x8528,    0x0032}, {0x8529,    0x0084},
+{0x852a,    0x0004}, {0x852b,    0x0027}, {0x852c,    0x000d},
+{0x852d,    0x0096}, {0x852e,    0x0022}, {0x852f,    0x0085},
+{0x8530,    0x0004}, {0x8531,    0x0027}, {0x8532,    0x0005},
+{0x8533,    0x00ca}, {0x8534,    0x0010}, {0x8535,    0x007e},
+{0x8536,    0x0085}, {0x8537,    0x003a}, {0x8538,    0x00ca},
+{0x8539,    0x0020}, {0x853a,    0x00d7}, {0x853b,    0x0022},
+{0x853c,    0x0039}, {0x853d,    0x0086}, {0x853e,    0x0000},
+{0x853f,    0x0097}, {0x8540,    0x0083}, {0x8541,    0x0018},
+{0x8542,    0x00ce}, {0x8543,    0x001c}, {0x8544,    0x0000},
+{0x8545,    0x00bd}, {0x8546,    0x00eb}, {0x8547,    0x0046},
+{0x8548,    0x0096}, {0x8549,    0x0057}, {0x854a,    0x0085},
+{0x854b,    0x0001}, {0x854c,    0x0027}, {0x854d,    0x0002},
+{0x854e,    0x004f}, {0x854f,    0x0039}, {0x8550,    0x0085},
+{0x8551,    0x0002}, {0x8552,    0x0027}, {0x8553,    0x0001},
+{0x8554,    0x0039}, {0x8555,    0x007f}, {0x8556,    0x008f},
+{0x8557,    0x007d}, {0x8558,    0x0086}, {0x8559,    0x0004},
+{0x855a,    0x00b7}, {0x855b,    0x0012}, {0x855c,    0x0004},
+{0x855d,    0x0086}, {0x855e,    0x0008}, {0x855f,    0x00b7},
+{0x8560,    0x0012}, {0x8561,    0x0007}, {0x8562,    0x0086},
+{0x8563,    0x0010}, {0x8564,    0x00b7}, {0x8565,    0x0012},
+{0x8566,    0x000c}, {0x8567,    0x0086}, {0x8568,    0x0007},
+{0x8569,    0x00b7}, {0x856a,    0x0012}, {0x856b,    0x0006},
+{0x856c,    0x00b6}, {0x856d,    0x008f}, {0x856e,    0x007d},
+{0x856f,    0x00b7}, {0x8570,    0x0012}, {0x8571,    0x0070},
+{0x8572,    0x0086}, {0x8573,    0x0001}, {0x8574,    0x00ba},
+{0x8575,    0x0012}, {0x8576,    0x0004}, {0x8577,    0x00b7},
+{0x8578,    0x0012}, {0x8579,    0x0004}, {0x857a,    0x0001},
+{0x857b,    0x0001}, {0x857c,    0x0001}, {0x857d,    0x0001},
+{0x857e,    0x0001}, {0x857f,    0x0001}, {0x8580,    0x00b6},
+{0x8581,    0x0012}, {0x8582,    0x0004}, {0x8583,    0x0084},
+{0x8584,    0x00fe}, {0x8585,    0x008a}, {0x8586,    0x0002},
+{0x8587,    0x00b7}, {0x8588,    0x0012}, {0x8589,    0x0004},
+{0x858a,    0x0001}, {0x858b,    0x0001}, {0x858c,    0x0001},
+{0x858d,    0x0001}, {0x858e,    0x0001}, {0x858f,    0x0001},
+{0x8590,    0x0086}, {0x8591,    0x00fd}, {0x8592,    0x00b4},
+{0x8593,    0x0012}, {0x8594,    0x0004}, {0x8595,    0x00b7},
+{0x8596,    0x0012}, {0x8597,    0x0004}, {0x8598,    0x00b6},
+{0x8599,    0x0012}, {0x859a,    0x0000}, {0x859b,    0x0084},
+{0x859c,    0x0008}, {0x859d,    0x0081}, {0x859e,    0x0008},
+{0x859f,    0x0027}, {0x85a0,    0x0016}, {0x85a1,    0x00b6},
+{0x85a2,    0x008f}, {0x85a3,    0x007d}, {0x85a4,    0x0081},
+{0x85a5,    0x000c}, {0x85a6,    0x0027}, {0x85a7,    0x0008},
+{0x85a8,    0x008b}, {0x85a9,    0x0004}, {0x85aa,    0x00b7},
+{0x85ab,    0x008f}, {0x85ac,    0x007d}, {0x85ad,    0x007e},
+{0x85ae,    0x0085}, {0x85af,    0x006c}, {0x85b0,    0x0086},
+{0x85b1,    0x0003}, {0x85b2,    0x0097}, {0x85b3,    0x0040},
+{0x85b4,    0x007e}, {0x85b5,    0x0089}, {0x85b6,    0x006e},
+{0x85b7,    0x0086}, {0x85b8,    0x0007}, {0x85b9,    0x00b7},
+{0x85ba,    0x0012}, {0x85bb,    0x0006}, {0x85bc,    0x005f},
+{0x85bd,    0x00f7}, {0x85be,    0x008f}, {0x85bf,    0x0082},
+{0x85c0,    0x005f}, {0x85c1,    0x00f7}, {0x85c2,    0x008f},
+{0x85c3,    0x007f}, {0x85c4,    0x00f7}, {0x85c5,    0x008f},
+{0x85c6,    0x0070}, {0x85c7,    0x00f7}, {0x85c8,    0x008f},
+{0x85c9,    0x0071}, {0x85ca,    0x00f7}, {0x85cb,    0x008f},
+{0x85cc,    0x0072}, {0x85cd,    0x00f7}, {0x85ce,    0x008f},
+{0x85cf,    0x0073}, {0x85d0,    0x00f7}, {0x85d1,    0x008f},
+{0x85d2,    0x0074}, {0x85d3,    0x00f7}, {0x85d4,    0x008f},
+{0x85d5,    0x0075}, {0x85d6,    0x00f7}, {0x85d7,    0x008f},
+{0x85d8,    0x0076}, {0x85d9,    0x00f7}, {0x85da,    0x008f},
+{0x85db,    0x0077}, {0x85dc,    0x00f7}, {0x85dd,    0x008f},
+{0x85de,    0x0078}, {0x85df,    0x00f7}, {0x85e0,    0x008f},
+{0x85e1,    0x0079}, {0x85e2,    0x00f7}, {0x85e3,    0x008f},
+{0x85e4,    0x007a}, {0x85e5,    0x00f7}, {0x85e6,    0x008f},
+{0x85e7,    0x007b}, {0x85e8,    0x00b6}, {0x85e9,    0x0012},
+{0x85ea,    0x0004}, {0x85eb,    0x008a}, {0x85ec,    0x0010},
+{0x85ed,    0x00b7}, {0x85ee,    0x0012}, {0x85ef,    0x0004},
+{0x85f0,    0x0086}, {0x85f1,    0x00e4}, {0x85f2,    0x00b7},
+{0x85f3,    0x0012}, {0x85f4,    0x0070}, {0x85f5,    0x00b7},
+{0x85f6,    0x0012}, {0x85f7,    0x0007}, {0x85f8,    0x00f7},
+{0x85f9,    0x0012}, {0x85fa,    0x0005}, {0x85fb,    0x00f7},
+{0x85fc,    0x0012}, {0x85fd,    0x0009}, {0x85fe,    0x0086},
+{0x85ff,    0x0008}, {0x8600,    0x00ba}, {0x8601,    0x0012},
+{0x8602,    0x0004}, {0x8603,    0x00b7}, {0x8604,    0x0012},
+{0x8605,    0x0004}, {0x8606,    0x0086}, {0x8607,    0x00f7},
+{0x8608,    0x00b4}, {0x8609,    0x0012}, {0x860a,    0x0004},
+{0x860b,    0x00b7}, {0x860c,    0x0012}, {0x860d,    0x0004},
+{0x860e,    0x0001}, {0x860f,    0x0001}, {0x8610,    0x0001},
+{0x8611,    0x0001}, {0x8612,    0x0001}, {0x8613,    0x0001},
+{0x8614,    0x00b6}, {0x8615,    0x0012}, {0x8616,    0x0008},
+{0x8617,    0x0027}, {0x8618,    0x007f}, {0x8619,    0x0081},
+{0x861a,    0x0080}, {0x861b,    0x0026}, {0x861c,    0x000b},
+{0x861d,    0x0086}, {0x861e,    0x0008}, {0x861f,    0x00ce},
+{0x8620,    0x008f}, {0x8621,    0x0079}, {0x8622,    0x00bd},
+{0x8623,    0x0089}, {0x8624,    0x007b}, {0x8625,    0x007e},
+{0x8626,    0x0086}, {0x8627,    0x008e}, {0x8628,    0x0081},
+{0x8629,    0x0040}, {0x862a,    0x0026}, {0x862b,    0x000b},
+{0x862c,    0x0086}, {0x862d,    0x0004}, {0x862e,    0x00ce},
+{0x862f,    0x008f}, {0x8630,    0x0076}, {0x8631,    0x00bd},
+{0x8632,    0x0089}, {0x8633,    0x007b}, {0x8634,    0x007e},
+{0x8635,    0x0086}, {0x8636,    0x008e}, {0x8637,    0x0081},
+{0x8638,    0x0020}, {0x8639,    0x0026}, {0x863a,    0x000b},
+{0x863b,    0x0086}, {0x863c,    0x0002}, {0x863d,    0x00ce},
+{0x863e,    0x008f}, {0x863f,    0x0073}, {0x8640,    0x00bd},
+{0x8641,    0x0089}, {0x8642,    0x007b}, {0x8643,    0x007e},
+{0x8644,    0x0086}, {0x8645,    0x008e}, {0x8646,    0x0081},
+{0x8647,    0x0010}, {0x8648,    0x0026}, {0x8649,    0x000b},
+{0x864a,    0x0086}, {0x864b,    0x0001}, {0x864c,    0x00ce},
+{0x864d,    0x008f}, {0x864e,    0x0070}, {0x864f,    0x00bd},
+{0x8650,    0x0089}, {0x8651,    0x007b}, {0x8652,    0x007e},
+{0x8653,    0x0086}, {0x8654,    0x008e}, {0x8655,    0x0081},
+{0x8656,    0x0008}, {0x8657,    0x0026}, {0x8658,    0x000b},
+{0x8659,    0x0086}, {0x865a,    0x0008}, {0x865b,    0x00ce},
+{0x865c,    0x008f}, {0x865d,    0x0079}, {0x865e,    0x00bd},
+{0x865f,    0x0089}, {0x8660,    0x007f}, {0x8661,    0x007e},
+{0x8662,    0x0086}, {0x8663,    0x008e}, {0x8664,    0x0081},
+{0x8665,    0x0004}, {0x8666,    0x0026}, {0x8667,    0x000b},
+{0x8668,    0x0086}, {0x8669,    0x0004}, {0x866a,    0x00ce},
+{0x866b,    0x008f}, {0x866c,    0x0076}, {0x866d,    0x00bd},
+{0x866e,    0x0089}, {0x866f,    0x007f}, {0x8670,    0x007e},
+{0x8671,    0x0086}, {0x8672,    0x008e}, {0x8673,    0x0081},
+{0x8674,    0x0002}, {0x8675,    0x0026}, {0x8676,    0x000b},
+{0x8677,    0x008a}, {0x8678,    0x0002}, {0x8679,    0x00ce},
+{0x867a,    0x008f}, {0x867b,    0x0073}, {0x867c,    0x00bd},
+{0x867d,    0x0089}, {0x867e,    0x007f}, {0x867f,    0x007e},
+{0x8680,    0x0086}, {0x8681,    0x008e}, {0x8682,    0x0081},
+{0x8683,    0x0001}, {0x8684,    0x0026}, {0x8685,    0x0008},
+{0x8686,    0x0086}, {0x8687,    0x0001}, {0x8688,    0x00ce},
+{0x8689,    0x008f}, {0x868a,    0x0070}, {0x868b,    0x00bd},
+{0x868c,    0x0089}, {0x868d,    0x007f}, {0x868e,    0x00b6},
+{0x868f,    0x008f}, {0x8690,    0x007f}, {0x8691,    0x0081},
+{0x8692,    0x000f}, {0x8693,    0x0026}, {0x8694,    0x0003},
+{0x8695,    0x007e}, {0x8696,    0x0087}, {0x8697,    0x0047},
+{0x8698,    0x00b6}, {0x8699,    0x0012}, {0x869a,    0x0009},
+{0x869b,    0x0084}, {0x869c,    0x0003}, {0x869d,    0x0081},
+{0x869e,    0x0003}, {0x869f,    0x0027}, {0x86a0,    0x0006},
+{0x86a1,    0x007c}, {0x86a2,    0x0012}, {0x86a3,    0x0009},
+{0x86a4,    0x007e}, {0x86a5,    0x0085}, {0x86a6,    0x00fe},
+{0x86a7,    0x00b6}, {0x86a8,    0x0012}, {0x86a9,    0x0006},
+{0x86aa,    0x0084}, {0x86ab,    0x0007}, {0x86ac,    0x0081},
+{0x86ad,    0x0007}, {0x86ae,    0x0027}, {0x86af,    0x0008},
+{0x86b0,    0x008b}, {0x86b1,    0x0001}, {0x86b2,    0x00b7},
+{0x86b3,    0x0012}, {0x86b4,    0x0006}, {0x86b5,    0x007e},
+{0x86b6,    0x0086}, {0x86b7,    0x00d5}, {0x86b8,    0x00b6},
+{0x86b9,    0x008f}, {0x86ba,    0x0082}, {0x86bb,    0x0026},
+{0x86bc,    0x000a}, {0x86bd,    0x007c}, {0x86be,    0x008f},
+{0x86bf,    0x0082}, {0x86c0,    0x004f}, {0x86c1,    0x00b7},
+{0x86c2,    0x0012}, {0x86c3,    0x0006}, {0x86c4,    0x007e},
+{0x86c5,    0x0085}, {0x86c6,    0x00c0}, {0x86c7,    0x00b6},
+{0x86c8,    0x0012}, {0x86c9,    0x0006}, {0x86ca,    0x0084},
+{0x86cb,    0x003f}, {0x86cc,    0x0081}, {0x86cd,    0x003f},
+{0x86ce,    0x0027}, {0x86cf,    0x0010}, {0x86d0,    0x008b},
+{0x86d1,    0x0008}, {0x86d2,    0x00b7}, {0x86d3,    0x0012},
+{0x86d4,    0x0006}, {0x86d5,    0x00b6}, {0x86d6,    0x0012},
+{0x86d7,    0x0009}, {0x86d8,    0x0084}, {0x86d9,    0x00fc},
+{0x86da,    0x00b7}, {0x86db,    0x0012}, {0x86dc,    0x0009},
+{0x86dd,    0x007e}, {0x86de,    0x0085}, {0x86df,    0x00fe},
+{0x86e0,    0x00ce}, {0x86e1,    0x008f}, {0x86e2,    0x0070},
+{0x86e3,    0x0018}, {0x86e4,    0x00ce}, {0x86e5,    0x008f},
+{0x86e6,    0x0084}, {0x86e7,    0x00c6}, {0x86e8,    0x000c},
+{0x86e9,    0x00bd}, {0x86ea,    0x0089}, {0x86eb,    0x006f},
+{0x86ec,    0x00ce}, {0x86ed,    0x008f}, {0x86ee,    0x0084},
+{0x86ef,    0x0018}, {0x86f0,    0x00ce}, {0x86f1,    0x008f},
+{0x86f2,    0x0070}, {0x86f3,    0x00c6}, {0x86f4,    0x000c},
+{0x86f5,    0x00bd}, {0x86f6,    0x0089}, {0x86f7,    0x006f},
+{0x86f8,    0x00d6}, {0x86f9,    0x0083}, {0x86fa,    0x00c1},
+{0x86fb,    0x004f}, {0x86fc,    0x002d}, {0x86fd,    0x0003},
+{0x86fe,    0x007e}, {0x86ff,    0x0087}, {0x8700,    0x0040},
+{0x8701,    0x00b6}, {0x8702,    0x008f}, {0x8703,    0x007f},
+{0x8704,    0x0081}, {0x8705,    0x0007}, {0x8706,    0x0027},
+{0x8707,    0x000f}, {0x8708,    0x0081}, {0x8709,    0x000b},
+{0x870a,    0x0027}, {0x870b,    0x0015}, {0x870c,    0x0081},
+{0x870d,    0x000d}, {0x870e,    0x0027}, {0x870f,    0x001b},
+{0x8710,    0x0081}, {0x8711,    0x000e}, {0x8712,    0x0027},
+{0x8713,    0x0021}, {0x8714,    0x007e}, {0x8715,    0x0087},
+{0x8716,    0x0040}, {0x8717,    0x00f7}, {0x8718,    0x008f},
+{0x8719,    0x007b}, {0x871a,    0x0086}, {0x871b,    0x0002},
+{0x871c,    0x00b7}, {0x871d,    0x008f}, {0x871e,    0x007a},
+{0x871f,    0x0020}, {0x8720,    0x001c}, {0x8721,    0x00f7},
+{0x8722,    0x008f}, {0x8723,    0x0078}, {0x8724,    0x0086},
+{0x8725,    0x0002}, {0x8726,    0x00b7}, {0x8727,    0x008f},
+{0x8728,    0x0077}, {0x8729,    0x0020}, {0x872a,    0x0012},
+{0x872b,    0x00f7}, {0x872c,    0x008f}, {0x872d,    0x0075},
+{0x872e,    0x0086}, {0x872f,    0x0002}, {0x8730,    0x00b7},
+{0x8731,    0x008f}, {0x8732,    0x0074}, {0x8733,    0x0020},
+{0x8734,    0x0008}, {0x8735,    0x00f7}, {0x8736,    0x008f},
+{0x8737,    0x0072}, {0x8738,    0x0086}, {0x8739,    0x0002},
+{0x873a,    0x00b7}, {0x873b,    0x008f}, {0x873c,    0x0071},
+{0x873d,    0x007e}, {0x873e,    0x0087}, {0x873f,    0x0047},
+{0x8740,    0x0086}, {0x8741,    0x0004}, {0x8742,    0x0097},
+{0x8743,    0x0040}, {0x8744,    0x007e}, {0x8745,    0x0089},
+{0x8746,    0x006e}, {0x8747,    0x00ce}, {0x8748,    0x008f},
+{0x8749,    0x0072}, {0x874a,    0x00bd}, {0x874b,    0x0089},
+{0x874c,    0x00f7}, {0x874d,    0x00ce}, {0x874e,    0x008f},
+{0x874f,    0x0075}, {0x8750,    0x00bd}, {0x8751,    0x0089},
+{0x8752,    0x00f7}, {0x8753,    0x00ce}, {0x8754,    0x008f},
+{0x8755,    0x0078}, {0x8756,    0x00bd}, {0x8757,    0x0089},
+{0x8758,    0x00f7}, {0x8759,    0x00ce}, {0x875a,    0x008f},
+{0x875b,    0x007b}, {0x875c,    0x00bd}, {0x875d,    0x0089},
+{0x875e,    0x00f7}, {0x875f,    0x004f}, {0x8760,    0x00b7},
+{0x8761,    0x008f}, {0x8762,    0x007d}, {0x8763,    0x00b7},
+{0x8764,    0x008f}, {0x8765,    0x0081}, {0x8766,    0x00b6},
+{0x8767,    0x008f}, {0x8768,    0x0072}, {0x8769,    0x0027},
+{0x876a,    0x0047}, {0x876b,    0x007c}, {0x876c,    0x008f},
+{0x876d,    0x007d}, {0x876e,    0x00b6}, {0x876f,    0x008f},
+{0x8770,    0x0075}, {0x8771,    0x0027}, {0x8772,    0x003f},
+{0x8773,    0x007c}, {0x8774,    0x008f}, {0x8775,    0x007d},
+{0x8776,    0x00b6}, {0x8777,    0x008f}, {0x8778,    0x0078},
+{0x8779,    0x0027}, {0x877a,    0x0037}, {0x877b,    0x007c},
+{0x877c,    0x008f}, {0x877d,    0x007d}, {0x877e,    0x00b6},
+{0x877f,    0x008f}, {0x8780,    0x007b}, {0x8781,    0x0027},
+{0x8782,    0x002f}, {0x8783,    0x007f}, {0x8784,    0x008f},
+{0x8785,    0x007d}, {0x8786,    0x007c}, {0x8787,    0x008f},
+{0x8788,    0x0081}, {0x8789,    0x007a}, {0x878a,    0x008f},
+{0x878b,    0x0072}, {0x878c,    0x0027}, {0x878d,    0x001b},
+{0x878e,    0x007c}, {0x878f,    0x008f}, {0x8790,    0x007d},
+{0x8791,    0x007a}, {0x8792,    0x008f}, {0x8793,    0x0075},
+{0x8794,    0x0027}, {0x8795,    0x0016}, {0x8796,    0x007c},
+{0x8797,    0x008f}, {0x8798,    0x007d}, {0x8799,    0x007a},
+{0x879a,    0x008f}, {0x879b,    0x0078}, {0x879c,    0x0027},
+{0x879d,    0x0011}, {0x879e,    0x007c}, {0x879f,    0x008f},
+{0x87a0,    0x007d}, {0x87a1,    0x007a}, {0x87a2,    0x008f},
+{0x87a3,    0x007b}, {0x87a4,    0x0027}, {0x87a5,    0x000c},
+{0x87a6,    0x007e}, {0x87a7,    0x0087}, {0x87a8,    0x0083},
+{0x87a9,    0x007a}, {0x87aa,    0x008f}, {0x87ab,    0x0075},
+{0x87ac,    0x007a}, {0x87ad,    0x008f}, {0x87ae,    0x0078},
+{0x87af,    0x007a}, {0x87b0,    0x008f}, {0x87b1,    0x007b},
+{0x87b2,    0x00ce}, {0x87b3,    0x00c1}, {0x87b4,    0x00fc},
+{0x87b5,    0x00f6}, {0x87b6,    0x008f}, {0x87b7,    0x007d},
+{0x87b8,    0x003a}, {0x87b9,    0x00a6}, {0x87ba,    0x0000},
+{0x87bb,    0x00b7}, {0x87bc,    0x0012}, {0x87bd,    0x0070},
+{0x87be,    0x00b6}, {0x87bf,    0x008f}, {0x87c0,    0x0072},
+{0x87c1,    0x0026}, {0x87c2,    0x0003}, {0x87c3,    0x007e},
+{0x87c4,    0x0087}, {0x87c5,    0x00fa}, {0x87c6,    0x00b6},
+{0x87c7,    0x008f}, {0x87c8,    0x0075}, {0x87c9,    0x0026},
+{0x87ca,    0x000a}, {0x87cb,    0x0018}, {0x87cc,    0x00ce},
+{0x87cd,    0x008f}, {0x87ce,    0x0073}, {0x87cf,    0x00bd},
+{0x87d0,    0x0089}, {0x87d1,    0x00d5}, {0x87d2,    0x007e},
+{0x87d3,    0x0087}, {0x87d4,    0x00fa}, {0x87d5,    0x00b6},
+{0x87d6,    0x008f}, {0x87d7,    0x0078}, {0x87d8,    0x0026},
+{0x87d9,    0x000a}, {0x87da,    0x0018}, {0x87db,    0x00ce},
+{0x87dc,    0x008f}, {0x87dd,    0x0076}, {0x87de,    0x00bd},
+{0x87df,    0x0089}, {0x87e0,    0x00d5}, {0x87e1,    0x007e},
+{0x87e2,    0x0087}, {0x87e3,    0x00fa}, {0x87e4,    0x00b6},
+{0x87e5,    0x008f}, {0x87e6,    0x007b}, {0x87e7,    0x0026},
+{0x87e8,    0x000a}, {0x87e9,    0x0018}, {0x87ea,    0x00ce},
+{0x87eb,    0x008f}, {0x87ec,    0x0079}, {0x87ed,    0x00bd},
+{0x87ee,    0x0089}, {0x87ef,    0x00d5}, {0x87f0,    0x007e},
+{0x87f1,    0x0087}, {0x87f2,    0x00fa}, {0x87f3,    0x0086},
+{0x87f4,    0x0005}, {0x87f5,    0x0097}, {0x87f6,    0x0040},
+{0x87f7,    0x007e}, {0x87f8,    0x0089}, {0x87f9,    0x0000},
+{0x87fa,    0x00b6}, {0x87fb,    0x008f}, {0x87fc,    0x0075},
+{0x87fd,    0x0081}, {0x87fe,    0x0007}, {0x87ff,    0x002e},
+{0x8800,    0x00f2}, {0x8801,    0x00f6}, {0x8802,    0x0012},
+{0x8803,    0x0006}, {0x8804,    0x00c4}, {0x8805,    0x00f8},
+{0x8806,    0x001b}, {0x8807,    0x00b7}, {0x8808,    0x0012},
+{0x8809,    0x0006}, {0x880a,    0x00b6}, {0x880b,    0x008f},
+{0x880c,    0x0078}, {0x880d,    0x0081}, {0x880e,    0x0007},
+{0x880f,    0x002e}, {0x8810,    0x00e2}, {0x8811,    0x0048},
+{0x8812,    0x0048}, {0x8813,    0x0048}, {0x8814,    0x00f6},
+{0x8815,    0x0012}, {0x8816,    0x0006}, {0x8817,    0x00c4},
+{0x8818,    0x00c7}, {0x8819,    0x001b}, {0x881a,    0x00b7},
+{0x881b,    0x0012}, {0x881c,    0x0006}, {0x881d,    0x00b6},
+{0x881e,    0x008f}, {0x881f,    0x007b}, {0x8820,    0x0081},
+{0x8821,    0x0007}, {0x8822,    0x002e}, {0x8823,    0x00cf},
+{0x8824,    0x00f6}, {0x8825,    0x0012}, {0x8826,    0x0005},
+{0x8827,    0x00c4}, {0x8828,    0x00f8}, {0x8829,    0x001b},
+{0x882a,    0x00b7}, {0x882b,    0x0012}, {0x882c,    0x0005},
+{0x882d,    0x0086}, {0x882e,    0x0000}, {0x882f,    0x00f6},
+{0x8830,    0x008f}, {0x8831,    0x0071}, {0x8832,    0x00bd},
+{0x8833,    0x0089}, {0x8834,    0x0094}, {0x8835,    0x0086},
+{0x8836,    0x0001}, {0x8837,    0x00f6}, {0x8838,    0x008f},
+{0x8839,    0x0074}, {0x883a,    0x00bd}, {0x883b,    0x0089},
+{0x883c,    0x0094}, {0x883d,    0x0086}, {0x883e,    0x0002},
+{0x883f,    0x00f6}, {0x8840,    0x008f}, {0x8841,    0x0077},
+{0x8842,    0x00bd}, {0x8843,    0x0089}, {0x8844,    0x0094},
+{0x8845,    0x0086}, {0x8846,    0x0003}, {0x8847,    0x00f6},
+{0x8848,    0x008f}, {0x8849,    0x007a}, {0x884a,    0x00bd},
+{0x884b,    0x0089}, {0x884c,    0x0094}, {0x884d,    0x00ce},
+{0x884e,    0x008f}, {0x884f,    0x0070}, {0x8850,    0x00a6},
+{0x8851,    0x0001}, {0x8852,    0x0081}, {0x8853,    0x0001},
+{0x8854,    0x0027}, {0x8855,    0x0007}, {0x8856,    0x0081},
+{0x8857,    0x0003}, {0x8858,    0x0027}, {0x8859,    0x0003},
+{0x885a,    0x007e}, {0x885b,    0x0088}, {0x885c,    0x0066},
+{0x885d,    0x00a6}, {0x885e,    0x0000}, {0x885f,    0x00b8},
+{0x8860,    0x008f}, {0x8861,    0x0081}, {0x8862,    0x0084},
+{0x8863,    0x0001}, {0x8864,    0x0026}, {0x8865,    0x000b},
+{0x8866,    0x008c}, {0x8867,    0x008f}, {0x8868,    0x0079},
+{0x8869,    0x002c}, {0x886a,    0x000e}, {0x886b,    0x0008},
+{0x886c,    0x0008}, {0x886d,    0x0008}, {0x886e,    0x007e},
+{0x886f,    0x0088}, {0x8870,    0x0050}, {0x8871,    0x00b6},
+{0x8872,    0x0012}, {0x8873,    0x0004}, {0x8874,    0x008a},
+{0x8875,    0x0040}, {0x8876,    0x00b7}, {0x8877,    0x0012},
+{0x8878,    0x0004}, {0x8879,    0x00b6}, {0x887a,    0x0012},
+{0x887b,    0x0004}, {0x887c,    0x0084}, {0x887d,    0x00fb},
+{0x887e,    0x0084}, {0x887f,    0x00ef}, {0x8880,    0x00b7},
+{0x8881,    0x0012}, {0x8882,    0x0004}, {0x8883,    0x00b6},
+{0x8884,    0x0012}, {0x8885,    0x0007}, {0x8886,    0x0036},
+{0x8887,    0x00b6}, {0x8888,    0x008f}, {0x8889,    0x007c},
+{0x888a,    0x0048}, {0x888b,    0x0048}, {0x888c,    0x00b7},
+{0x888d,    0x0012}, {0x888e,    0x0007}, {0x888f,    0x0086},
+{0x8890,    0x0001}, {0x8891,    0x00ba}, {0x8892,    0x0012},
+{0x8893,    0x0004}, {0x8894,    0x00b7}, {0x8895,    0x0012},
+{0x8896,    0x0004}, {0x8897,    0x0001}, {0x8898,    0x0001},
+{0x8899,    0x0001}, {0x889a,    0x0001}, {0x889b,    0x0001},
+{0x889c,    0x0001}, {0x889d,    0x0086}, {0x889e,    0x00fe},
+{0x889f,    0x00b4}, {0x88a0,    0x0012}, {0x88a1,    0x0004},
+{0x88a2,    0x00b7}, {0x88a3,    0x0012}, {0x88a4,    0x0004},
+{0x88a5,    0x0086}, {0x88a6,    0x0002}, {0x88a7,    0x00ba},
+{0x88a8,    0x0012}, {0x88a9,    0x0004}, {0x88aa,    0x00b7},
+{0x88ab,    0x0012}, {0x88ac,    0x0004}, {0x88ad,    0x0086},
+{0x88ae,    0x00fd}, {0x88af,    0x00b4}, {0x88b0,    0x0012},
+{0x88b1,    0x0004}, {0x88b2,    0x00b7}, {0x88b3,    0x0012},
+{0x88b4,    0x0004}, {0x88b5,    0x0032}, {0x88b6,    0x00b7},
+{0x88b7,    0x0012}, {0x88b8,    0x0007}, {0x88b9,    0x00b6},
+{0x88ba,    0x0012}, {0x88bb,    0x0000}, {0x88bc,    0x0084},
+{0x88bd,    0x0008}, {0x88be,    0x0081}, {0x88bf,    0x0008},
+{0x88c0,    0x0027}, {0x88c1,    0x000f}, {0x88c2,    0x007c},
+{0x88c3,    0x0082}, {0x88c4,    0x0008}, {0x88c5,    0x0026},
+{0x88c6,    0x0007}, {0x88c7,    0x0086}, {0x88c8,    0x0076},
+{0x88c9,    0x0097}, {0x88ca,    0x0040}, {0x88cb,    0x007e},
+{0x88cc,    0x0089}, {0x88cd,    0x006e}, {0x88ce,    0x007e},
+{0x88cf,    0x0086}, {0x88d0,    0x00ec}, {0x88d1,    0x00b6},
+{0x88d2,    0x008f}, {0x88d3,    0x007f}, {0x88d4,    0x0081},
+{0x88d5,    0x000f}, {0x88d6,    0x0027}, {0x88d7,    0x003c},
+{0x88d8,    0x00bd}, {0x88d9,    0x00e6}, {0x88da,    0x00c7},
+{0x88db,    0x00b7}, {0x88dc,    0x0012}, {0x88dd,    0x000d},
+{0x88de,    0x00bd}, {0x88df,    0x00e6}, {0x88e0,    0x00cb},
+{0x88e1,    0x00b6}, {0x88e2,    0x0012}, {0x88e3,    0x0004},
+{0x88e4,    0x008a}, {0x88e5,    0x0020}, {0x88e6,    0x00b7},
+{0x88e7,    0x0012}, {0x88e8,    0x0004}, {0x88e9,    0x00ce},
+{0x88ea,    0x00ff}, {0x88eb,    0x00ff}, {0x88ec,    0x00b6},
+{0x88ed,    0x0012}, {0x88ee,    0x0000}, {0x88ef,    0x0081},
+{0x88f0,    0x000c}, {0x88f1,    0x0026}, {0x88f2,    0x0005},
+{0x88f3,    0x0009}, {0x88f4,    0x0026}, {0x88f5,    0x00f6},
+{0x88f6,    0x0027}, {0x88f7,    0x001c}, {0x88f8,    0x00b6},
+{0x88f9,    0x0012}, {0x88fa,    0x0004}, {0x88fb,    0x0084},
+{0x88fc,    0x00df}, {0x88fd,    0x00b7}, {0x88fe,    0x0012},
+{0x88ff,    0x0004}, {0x8900,    0x0096}, {0x8901,    0x0083},
+{0x8902,    0x0081}, {0x8903,    0x0007}, {0x8904,    0x002c},
+{0x8905,    0x0005}, {0x8906,    0x007c}, {0x8907,    0x0000},
+{0x8908,    0x0083}, {0x8909,    0x0020}, {0x890a,    0x0006},
+{0x890b,    0x0096}, {0x890c,    0x0083}, {0x890d,    0x008b},
+{0x890e,    0x0008}, {0x890f,    0x0097}, {0x8910,    0x0083},
+{0x8911,    0x007e}, {0x8912,    0x0085}, {0x8913,    0x0041},
+{0x8914,    0x007f}, {0x8915,    0x008f}, {0x8916,    0x007e},
+{0x8917,    0x0086}, {0x8918,    0x0080}, {0x8919,    0x00b7},
+{0x891a,    0x0012}, {0x891b,    0x000c}, {0x891c,    0x0086},
+{0x891d,    0x0001}, {0x891e,    0x00b7}, {0x891f,    0x008f},
+{0x8920,    0x007d}, {0x8921,    0x00b6}, {0x8922,    0x0012},
+{0x8923,    0x000c}, {0x8924,    0x0084}, {0x8925,    0x007f},
+{0x8926,    0x00b7}, {0x8927,    0x0012}, {0x8928,    0x000c},
+{0x8929,    0x008a}, {0x892a,    0x0080}, {0x892b,    0x00b7},
+{0x892c,    0x0012}, {0x892d,    0x000c}, {0x892e,    0x0086},
+{0x892f,    0x000a}, {0x8930,    0x00bd}, {0x8931,    0x008a},
+{0x8932,    0x0006}, {0x8933,    0x00b6}, {0x8934,    0x0012},
+{0x8935,    0x000a}, {0x8936,    0x002a}, {0x8937,    0x0009},
+{0x8938,    0x00b6}, {0x8939,    0x0012}, {0x893a,    0x000c},
+{0x893b,    0x00ba}, {0x893c,    0x008f}, {0x893d,    0x007d},
+{0x893e,    0x00b7}, {0x893f,    0x0012}, {0x8940,    0x000c},
+{0x8941,    0x00b6}, {0x8942,    0x008f}, {0x8943,    0x007e},
+{0x8944,    0x0081}, {0x8945,    0x0060}, {0x8946,    0x0027},
+{0x8947,    0x001a}, {0x8948,    0x008b}, {0x8949,    0x0020},
+{0x894a,    0x00b7}, {0x894b,    0x008f}, {0x894c,    0x007e},
+{0x894d,    0x00b6}, {0x894e,    0x0012}, {0x894f,    0x000c},
+{0x8950,    0x0084}, {0x8951,    0x009f}, {0x8952,    0x00ba},
+{0x8953,    0x008f}, {0x8954,    0x007e}, {0x8955,    0x00b7},
+{0x8956,    0x0012}, {0x8957,    0x000c}, {0x8958,    0x00b6},
+{0x8959,    0x008f}, {0x895a,    0x007d}, {0x895b,    0x0048},
+{0x895c,    0x00b7}, {0x895d,    0x008f}, {0x895e,    0x007d},
+{0x895f,    0x007e}, {0x8960,    0x0089}, {0x8961,    0x0021},
+{0x8962,    0x00b6}, {0x8963,    0x0012}, {0x8964,    0x0004},
+{0x8965,    0x008a}, {0x8966,    0x0020}, {0x8967,    0x00b7},
+{0x8968,    0x0012}, {0x8969,    0x0004}, {0x896a,    0x00bd},
+{0x896b,    0x008a}, {0x896c,    0x000a}, {0x896d,    0x004f},
+{0x896e,    0x0039}, {0x896f,    0x00a6}, {0x8970,    0x0000},
+{0x8971,    0x0018}, {0x8972,    0x00a7}, {0x8973,    0x0000},
+{0x8974,    0x0008}, {0x8975,    0x0018}, {0x8976,    0x0008},
+{0x8977,    0x005a}, {0x8978,    0x0026}, {0x8979,    0x00f5},
+{0x897a,    0x0039}, {0x897b,    0x0036}, {0x897c,    0x006c},
+{0x897d,    0x0000}, {0x897e,    0x0032}, {0x897f,    0x00ba},
+{0x8980,    0x008f}, {0x8981,    0x007f}, {0x8982,    0x00b7},
+{0x8983,    0x008f}, {0x8984,    0x007f}, {0x8985,    0x00b6},
+{0x8986,    0x0012}, {0x8987,    0x0009}, {0x8988,    0x0084},
+{0x8989,    0x0003}, {0x898a,    0x00a7}, {0x898b,    0x0001},
+{0x898c,    0x00b6}, {0x898d,    0x0012}, {0x898e,    0x0006},
+{0x898f,    0x0084}, {0x8990,    0x003f}, {0x8991,    0x00a7},
+{0x8992,    0x0002}, {0x8993,    0x0039}, {0x8994,    0x0036},
+{0x8995,    0x0086}, {0x8996,    0x0003}, {0x8997,    0x00b7},
+{0x8998,    0x008f}, {0x8999,    0x0080}, {0x899a,    0x0032},
+{0x899b,    0x00c1}, {0x899c,    0x0000}, {0x899d,    0x0026},
+{0x899e,    0x0006}, {0x899f,    0x00b7}, {0x89a0,    0x008f},
+{0x89a1,    0x007c}, {0x89a2,    0x007e}, {0x89a3,    0x0089},
+{0x89a4,    0x00c9}, {0x89a5,    0x00c1}, {0x89a6,    0x0001},
+{0x89a7,    0x0027}, {0x89a8,    0x0018}, {0x89a9,    0x00c1},
+{0x89aa,    0x0002}, {0x89ab,    0x0027}, {0x89ac,    0x000c},
+{0x89ad,    0x00c1}, {0x89ae,    0x0003}, {0x89af,    0x0027},
+{0x89b0,    0x0000}, {0x89b1,    0x00f6}, {0x89b2,    0x008f},
+{0x89b3,    0x0080}, {0x89b4,    0x0005}, {0x89b5,    0x0005},
+{0x89b6,    0x00f7}, {0x89b7,    0x008f}, {0x89b8,    0x0080},
+{0x89b9,    0x00f6}, {0x89ba,    0x008f}, {0x89bb,    0x0080},
+{0x89bc,    0x0005}, {0x89bd,    0x0005}, {0x89be,    0x00f7},
+{0x89bf,    0x008f}, {0x89c0,    0x0080}, {0x89c1,    0x00f6},
+{0x89c2,    0x008f}, {0x89c3,    0x0080}, {0x89c4,    0x0005},
+{0x89c5,    0x0005}, {0x89c6,    0x00f7}, {0x89c7,    0x008f},
+{0x89c8,    0x0080}, {0x89c9,    0x00f6}, {0x89ca,    0x008f},
+{0x89cb,    0x0080}, {0x89cc,    0x0053}, {0x89cd,    0x00f4},
+{0x89ce,    0x0012}, {0x89cf,    0x0007}, {0x89d0,    0x001b},
+{0x89d1,    0x00b7}, {0x89d2,    0x0012}, {0x89d3,    0x0007},
+{0x89d4,    0x0039}, {0x89d5,    0x00ce}, {0x89d6,    0x008f},
+{0x89d7,    0x0070}, {0x89d8,    0x00a6}, {0x89d9,    0x0000},
+{0x89da,    0x0018}, {0x89db,    0x00e6}, {0x89dc,    0x0000},
+{0x89dd,    0x0018}, {0x89de,    0x00a7}, {0x89df,    0x0000},
+{0x89e0,    0x00e7}, {0x89e1,    0x0000}, {0x89e2,    0x00a6},
+{0x89e3,    0x0001}, {0x89e4,    0x0018}, {0x89e5,    0x00e6},
+{0x89e6,    0x0001}, {0x89e7,    0x0018}, {0x89e8,    0x00a7},
+{0x89e9,    0x0001}, {0x89ea,    0x00e7}, {0x89eb,    0x0001},
+{0x89ec,    0x00a6}, {0x89ed,    0x0002}, {0x89ee,    0x0018},
+{0x89ef,    0x00e6}, {0x89f0,    0x0002}, {0x89f1,    0x0018},
+{0x89f2,    0x00a7}, {0x89f3,    0x0002}, {0x89f4,    0x00e7},
+{0x89f5,    0x0002}, {0x89f6,    0x0039}, {0x89f7,    0x00a6},
+{0x89f8,    0x0000}, {0x89f9,    0x0084}, {0x89fa,    0x0007},
+{0x89fb,    0x00e6}, {0x89fc,    0x0000}, {0x89fd,    0x00c4},
+{0x89fe,    0x0038}, {0x89ff,    0x0054}, {0x8a00,    0x0054},
+{0x8a01,    0x0054}, {0x8a02,    0x001b}, {0x8a03,    0x00a7},
+{0x8a04,    0x0000}, {0x8a05,    0x0039}, {0x8a06,    0x004a},
+{0x8a07,    0x0026}, {0x8a08,    0x00fd}, {0x8a09,    0x0039},
+{0x8a0a,    0x0096}, {0x8a0b,    0x0022}, {0x8a0c,    0x0084},
+{0x8a0d,    0x000f}, {0x8a0e,    0x0097}, {0x8a0f,    0x0022},
+{0x8a10,    0x0086}, {0x8a11,    0x0001}, {0x8a12,    0x00b7},
+{0x8a13,    0x008f}, {0x8a14,    0x0070}, {0x8a15,    0x00b6},
+{0x8a16,    0x0012}, {0x8a17,    0x0007}, {0x8a18,    0x00b7},
+{0x8a19,    0x008f}, {0x8a1a,    0x0071}, {0x8a1b,    0x00f6},
+{0x8a1c,    0x0012}, {0x8a1d,    0x000c}, {0x8a1e,    0x00c4},
+{0x8a1f,    0x000f}, {0x8a20,    0x00c8}, {0x8a21,    0x000f},
+{0x8a22,    0x00f7}, {0x8a23,    0x008f}, {0x8a24,    0x0072},
+{0x8a25,    0x00f6}, {0x8a26,    0x008f}, {0x8a27,    0x0072},
+{0x8a28,    0x00b6}, {0x8a29,    0x008f}, {0x8a2a,    0x0071},
+{0x8a2b,    0x0084}, {0x8a2c,    0x0003}, {0x8a2d,    0x0027},
+{0x8a2e,    0x0014}, {0x8a2f,    0x0081}, {0x8a30,    0x0001},
+{0x8a31,    0x0027}, {0x8a32,    0x001c}, {0x8a33,    0x0081},
+{0x8a34,    0x0002}, {0x8a35,    0x0027}, {0x8a36,    0x0024},
+{0x8a37,    0x00f4}, {0x8a38,    0x008f}, {0x8a39,    0x0070},
+{0x8a3a,    0x0027}, {0x8a3b,    0x002a}, {0x8a3c,    0x0096},
+{0x8a3d,    0x0022}, {0x8a3e,    0x008a}, {0x8a3f,    0x0080},
+{0x8a40,    0x007e}, {0x8a41,    0x008a}, {0x8a42,    0x0064},
+{0x8a43,    0x00f4}, {0x8a44,    0x008f}, {0x8a45,    0x0070},
+{0x8a46,    0x0027}, {0x8a47,    0x001e}, {0x8a48,    0x0096},
+{0x8a49,    0x0022}, {0x8a4a,    0x008a}, {0x8a4b,    0x0010},
+{0x8a4c,    0x007e}, {0x8a4d,    0x008a}, {0x8a4e,    0x0064},
+{0x8a4f,    0x00f4}, {0x8a50,    0x008f}, {0x8a51,    0x0070},
+{0x8a52,    0x0027}, {0x8a53,    0x0012}, {0x8a54,    0x0096},
+{0x8a55,    0x0022}, {0x8a56,    0x008a}, {0x8a57,    0x0020},
+{0x8a58,    0x007e}, {0x8a59,    0x008a}, {0x8a5a,    0x0064},
+{0x8a5b,    0x00f4}, {0x8a5c,    0x008f}, {0x8a5d,    0x0070},
+{0x8a5e,    0x0027}, {0x8a5f,    0x0006}, {0x8a60,    0x0096},
+{0x8a61,    0x0022}, {0x8a62,    0x008a}, {0x8a63,    0x0040},
+{0x8a64,    0x0097}, {0x8a65,    0x0022}, {0x8a66,    0x0074},
+{0x8a67,    0x008f}, {0x8a68,    0x0071}, {0x8a69,    0x0074},
+{0x8a6a,    0x008f}, {0x8a6b,    0x0071}, {0x8a6c,    0x0078},
+{0x8a6d,    0x008f}, {0x8a6e,    0x0070}, {0x8a6f,    0x00b6},
+{0x8a70,    0x008f}, {0x8a71,    0x0070}, {0x8a72,    0x0085},
+{0x8a73,    0x0010}, {0x8a74,    0x0027}, {0x8a75,    0x00af},
+{0x8a76,    0x00d6}, {0x8a77,    0x0022}, {0x8a78,    0x00c4},
+{0x8a79,    0x0010}, {0x8a7a,    0x0058}, {0x8a7b,    0x00b6},
+{0x8a7c,    0x0012}, {0x8a7d,    0x0070}, {0x8a7e,    0x0081},
+{0x8a7f,    0x00e4}, {0x8a80,    0x0027}, {0x8a81,    0x0036},
+{0x8a82,    0x0081}, {0x8a83,    0x00e1}, {0x8a84,    0x0026},
+{0x8a85,    0x000c}, {0x8a86,    0x0096}, {0x8a87,    0x0022},
+{0x8a88,    0x0084}, {0x8a89,    0x0020}, {0x8a8a,    0x0044},
+{0x8a8b,    0x001b}, {0x8a8c,    0x00d6}, {0x8a8d,    0x0022},
+{0x8a8e,    0x00c4}, {0x8a8f,    0x00cf}, {0x8a90,    0x0020},
+{0x8a91,    0x0023}, {0x8a92,    0x0058}, {0x8a93,    0x0081},
+{0x8a94,    0x00c6}, {0x8a95,    0x0026}, {0x8a96,    0x000d},
+{0x8a97,    0x0096}, {0x8a98,    0x0022}, {0x8a99,    0x0084},
+{0x8a9a,    0x0040}, {0x8a9b,    0x0044}, {0x8a9c,    0x0044},
+{0x8a9d,    0x001b}, {0x8a9e,    0x00d6}, {0x8a9f,    0x0022},
+{0x8aa0,    0x00c4}, {0x8aa1,    0x00af}, {0x8aa2,    0x0020},
+{0x8aa3,    0x0011}, {0x8aa4,    0x0058}, {0x8aa5,    0x0081},
+{0x8aa6,    0x0027}, {0x8aa7,    0x0026}, {0x8aa8,    0x000f},
+{0x8aa9,    0x0096}, {0x8aaa,    0x0022}, {0x8aab,    0x0084},
+{0x8aac,    0x0080}, {0x8aad,    0x0044}, {0x8aae,    0x0044},
+{0x8aaf,    0x0044}, {0x8ab0,    0x001b}, {0x8ab1,    0x00d6},
+{0x8ab2,    0x0022}, {0x8ab3,    0x00c4}, {0x8ab4,    0x006f},
+{0x8ab5,    0x001b}, {0x8ab6,    0x0097}, {0x8ab7,    0x0022},
+{0x8ab8,    0x0039}, {0x8ab9,    0x0027}, {0x8aba,    0x000c},
+{0x8abb,    0x007c}, {0x8abc,    0x0082}, {0x8abd,    0x0006},
+{0x8abe,    0x00bd}, {0x8abf,    0x00d9}, {0x8ac0,    0x00ed},
+{0x8ac1,    0x00b6}, {0x8ac2,    0x0082}, {0x8ac3,    0x0007},
+{0x8ac4,    0x007e}, {0x8ac5,    0x008a}, {0x8ac6,    0x00b9},
+{0x8ac7,    0x007f}, {0x8ac8,    0x0082}, {0x8ac9,    0x0006},
+{0x8aca,    0x0039}, { 0x0, 0x0 }
+};
+#else
+cas_saturn_patch_t cas_saturn_patch[] = {
+{0x8200,    0x007e}, {0x8201,    0x0082}, {0x8202,    0x0009},
+{0x8203,    0x0000}, {0x8204,    0x0000}, {0x8205,    0x0000},
+{0x8206,    0x0000}, {0x8207,    0x0000}, {0x8208,    0x0000},
+{0x8209,    0x008e}, {0x820a,    0x008e}, {0x820b,    0x00ff},
+{0x820c,    0x00ce}, {0x820d,    0x0082}, {0x820e,    0x0025},
+{0x820f,    0x00ff}, {0x8210,    0x0001}, {0x8211,    0x000f},
+{0x8212,    0x00ce}, {0x8213,    0x0084}, {0x8214,    0x0026},
+{0x8215,    0x00ff}, {0x8216,    0x0001}, {0x8217,    0x0011},
+{0x8218,    0x00ce}, {0x8219,    0x0085}, {0x821a,    0x003d},
+{0x821b,    0x00df}, {0x821c,    0x00e5}, {0x821d,    0x0086},
+{0x821e,    0x0039}, {0x821f,    0x00b7}, {0x8220,    0x008f},
+{0x8221,    0x00f8}, {0x8222,    0x007e}, {0x8223,    0x00c3},
+{0x8224,    0x00c2}, {0x8225,    0x0096}, {0x8226,    0x0047},
+{0x8227,    0x0084}, {0x8228,    0x00f3}, {0x8229,    0x008a},
+{0x822a,    0x0000}, {0x822b,    0x0097}, {0x822c,    0x0047},
+{0x822d,    0x00ce}, {0x822e,    0x0082}, {0x822f,    0x0033},
+{0x8230,    0x00ff}, {0x8231,    0x0001}, {0x8232,    0x000f},
+{0x8233,    0x0096}, {0x8234,    0x0046}, {0x8235,    0x0084},
+{0x8236,    0x000c}, {0x8237,    0x0081}, {0x8238,    0x0004},
+{0x8239,    0x0027}, {0x823a,    0x000b}, {0x823b,    0x0096},
+{0x823c,    0x0046}, {0x823d,    0x0084}, {0x823e,    0x000c},
+{0x823f,    0x0081}, {0x8240,    0x0008}, {0x8241,    0x0027},
+{0x8242,    0x0057}, {0x8243,    0x007e}, {0x8244,    0x0084},
+{0x8245,    0x0025}, {0x8246,    0x0096}, {0x8247,    0x0047},
+{0x8248,    0x0084}, {0x8249,    0x00f3}, {0x824a,    0x008a},
+{0x824b,    0x0004}, {0x824c,    0x0097}, {0x824d,    0x0047},
+{0x824e,    0x00ce}, {0x824f,    0x0082}, {0x8250,    0x0054},
+{0x8251,    0x00ff}, {0x8252,    0x0001}, {0x8253,    0x000f},
+{0x8254,    0x0096}, {0x8255,    0x0046}, {0x8256,    0x0084},
+{0x8257,    0x000c}, {0x8258,    0x0081}, {0x8259,    0x0004},
+{0x825a,    0x0026}, {0x825b,    0x0038}, {0x825c,    0x00b6},
+{0x825d,    0x0012}, {0x825e,    0x0020}, {0x825f,    0x0084},
+{0x8260,    0x0020}, {0x8261,    0x0026}, {0x8262,    0x0003},
+{0x8263,    0x007e}, {0x8264,    0x0084}, {0x8265,    0x0025},
+{0x8266,    0x0096}, {0x8267,    0x007b}, {0x8268,    0x00d6},
+{0x8269,    0x007c}, {0x826a,    0x00fe}, {0x826b,    0x008f},
+{0x826c,    0x0056}, {0x826d,    0x00bd}, {0x826e,    0x00f7},
+{0x826f,    0x00b6}, {0x8270,    0x00fe}, {0x8271,    0x008f},
+{0x8272,    0x004e}, {0x8273,    0x00bd}, {0x8274,    0x00ec},
+{0x8275,    0x008e}, {0x8276,    0x00bd}, {0x8277,    0x00fa},
+{0x8278,    0x00f7}, {0x8279,    0x00bd}, {0x827a,    0x00f7},
+{0x827b,    0x0028}, {0x827c,    0x00ce}, {0x827d,    0x0082},
+{0x827e,    0x0082}, {0x827f,    0x00ff}, {0x8280,    0x0001},
+{0x8281,    0x000f}, {0x8282,    0x0096}, {0x8283,    0x0046},
+{0x8284,    0x0084}, {0x8285,    0x000c}, {0x8286,    0x0081},
+{0x8287,    0x0004}, {0x8288,    0x0026}, {0x8289,    0x000a},
+{0x828a,    0x00b6}, {0x828b,    0x0012}, {0x828c,    0x0020},
+{0x828d,    0x0084}, {0x828e,    0x0020}, {0x828f,    0x0027},
+{0x8290,    0x00b5}, {0x8291,    0x007e}, {0x8292,    0x0084},
+{0x8293,    0x0025}, {0x8294,    0x00bd}, {0x8295,    0x00f7},
+{0x8296,    0x001f}, {0x8297,    0x007e}, {0x8298,    0x0084},
+{0x8299,    0x001f}, {0x829a,    0x0096}, {0x829b,    0x0047},
+{0x829c,    0x0084}, {0x829d,    0x00f3}, {0x829e,    0x008a},
+{0x829f,    0x0008}, {0x82a0,    0x0097}, {0x82a1,    0x0047},
+{0x82a2,    0x00de}, {0x82a3,    0x00e1}, {0x82a4,    0x00ad},
+{0x82a5,    0x0000}, {0x82a6,    0x00ce}, {0x82a7,    0x0082},
+{0x82a8,    0x00af}, {0x82a9,    0x00ff}, {0x82aa,    0x0001},
+{0x82ab,    0x000f}, {0x82ac,    0x007e}, {0x82ad,    0x0084},
+{0x82ae,    0x0025}, {0x82af,    0x0096}, {0x82b0,    0x0041},
+{0x82b1,    0x0085}, {0x82b2,    0x0010}, {0x82b3,    0x0026},
+{0x82b4,    0x0006}, {0x82b5,    0x0096}, {0x82b6,    0x0023},
+{0x82b7,    0x0085}, {0x82b8,    0x0040}, {0x82b9,    0x0027},
+{0x82ba,    0x0006}, {0x82bb,    0x00bd}, {0x82bc,    0x00ed},
+{0x82bd,    0x0000}, {0x82be,    0x007e}, {0x82bf,    0x0083},
+{0x82c0,    0x00a2}, {0x82c1,    0x00de}, {0x82c2,    0x0042},
+{0x82c3,    0x00bd}, {0x82c4,    0x00eb}, {0x82c5,    0x008e},
+{0x82c6,    0x0096}, {0x82c7,    0x0024}, {0x82c8,    0x0084},
+{0x82c9,    0x0008}, {0x82ca,    0x0027}, {0x82cb,    0x0003},
+{0x82cc,    0x007e}, {0x82cd,    0x0083}, {0x82ce,    0x00df},
+{0x82cf,    0x0096}, {0x82d0,    0x007b}, {0x82d1,    0x00d6},
+{0x82d2,    0x007c}, {0x82d3,    0x00fe}, {0x82d4,    0x008f},
+{0x82d5,    0x0056}, {0x82d6,    0x00bd}, {0x82d7,    0x00f7},
+{0x82d8,    0x00b6}, {0x82d9,    0x00fe}, {0x82da,    0x008f},
+{0x82db,    0x0050}, {0x82dc,    0x00bd}, {0x82dd,    0x00ec},
+{0x82de,    0x008e}, {0x82df,    0x00bd}, {0x82e0,    0x00fa},
+{0x82e1,    0x00f7}, {0x82e2,    0x0086}, {0x82e3,    0x0011},
+{0x82e4,    0x00c6}, {0x82e5,    0x0049}, {0x82e6,    0x00bd},
+{0x82e7,    0x00e4}, {0x82e8,    0x0012}, {0x82e9,    0x00ce},
+{0x82ea,    0x0082}, {0x82eb,    0x00ef}, {0x82ec,    0x00ff},
+{0x82ed,    0x0001}, {0x82ee,    0x000f}, {0x82ef,    0x0096},
+{0x82f0,    0x0046}, {0x82f1,    0x0084}, {0x82f2,    0x000c},
+{0x82f3,    0x0081}, {0x82f4,    0x0000}, {0x82f5,    0x0027},
+{0x82f6,    0x0017}, {0x82f7,    0x00c6}, {0x82f8,    0x0049},
+{0x82f9,    0x00bd}, {0x82fa,    0x00e4}, {0x82fb,    0x0091},
+{0x82fc,    0x0024}, {0x82fd,    0x000d}, {0x82fe,    0x00b6},
+{0x82ff,    0x0012}, {0x8300,    0x0020}, {0x8301,    0x0085},
+{0x8302,    0x0020}, {0x8303,    0x0026}, {0x8304,    0x000c},
+{0x8305,    0x00ce}, {0x8306,    0x0082}, {0x8307,    0x00c1},
+{0x8308,    0x00ff}, {0x8309,    0x0001}, {0x830a,    0x000f},
+{0x830b,    0x007e}, {0x830c,    0x0084}, {0x830d,    0x0025},
+{0x830e,    0x007e}, {0x830f,    0x0084}, {0x8310,    0x0016},
+{0x8311,    0x00fe}, {0x8312,    0x008f}, {0x8313,    0x0052},
+{0x8314,    0x00bd}, {0x8315,    0x00ec}, {0x8316,    0x008e},
+{0x8317,    0x00bd}, {0x8318,    0x00fa}, {0x8319,    0x00f7},
+{0x831a,    0x0086}, {0x831b,    0x006a}, {0x831c,    0x00c6},
+{0x831d,    0x0049}, {0x831e,    0x00bd}, {0x831f,    0x00e4},
+{0x8320,    0x0012}, {0x8321,    0x00ce}, {0x8322,    0x0083},
+{0x8323,    0x0027}, {0x8324,    0x00ff}, {0x8325,    0x0001},
+{0x8326,    0x000f}, {0x8327,    0x0096}, {0x8328,    0x0046},
+{0x8329,    0x0084}, {0x832a,    0x000c}, {0x832b,    0x0081},
+{0x832c,    0x0000}, {0x832d,    0x0027}, {0x832e,    0x000a},
+{0x832f,    0x00c6}, {0x8330,    0x0049}, {0x8331,    0x00bd},
+{0x8332,    0x00e4}, {0x8333,    0x0091}, {0x8334,    0x0025},
+{0x8335,    0x0006}, {0x8336,    0x007e}, {0x8337,    0x0084},
+{0x8338,    0x0025}, {0x8339,    0x007e}, {0x833a,    0x0084},
+{0x833b,    0x0016}, {0x833c,    0x00b6}, {0x833d,    0x0018},
+{0x833e,    0x0070}, {0x833f,    0x00bb}, {0x8340,    0x0019},
+{0x8341,    0x0070}, {0x8342,    0x002a}, {0x8343,    0x0004},
+{0x8344,    0x0081}, {0x8345,    0x00af}, {0x8346,    0x002e},
+{0x8347,    0x0019}, {0x8348,    0x0096}, {0x8349,    0x007b},
+{0x834a,    0x00f6}, {0x834b,    0x0020}, {0x834c,    0x0007},
+{0x834d,    0x00fa}, {0x834e,    0x0020}, {0x834f,    0x0027},
+{0x8350,    0x00c4}, {0x8351,    0x0038}, {0x8352,    0x0081},
+{0x8353,    0x0038}, {0x8354,    0x0027}, {0x8355,    0x000b},
+{0x8356,    0x00f6}, {0x8357,    0x0020}, {0x8358,    0x0007},
+{0x8359,    0x00fa}, {0x835a,    0x0020}, {0x835b,    0x0027},
+{0x835c,    0x00cb}, {0x835d,    0x0008}, {0x835e,    0x007e},
+{0x835f,    0x0082}, {0x8360,    0x00d3}, {0x8361,    0x00bd},
+{0x8362,    0x00f7}, {0x8363,    0x0066}, {0x8364,    0x0086},
+{0x8365,    0x0074}, {0x8366,    0x00c6}, {0x8367,    0x0049},
+{0x8368,    0x00bd}, {0x8369,    0x00e4}, {0x836a,    0x0012},
+{0x836b,    0x00ce}, {0x836c,    0x0083}, {0x836d,    0x0071},
+{0x836e,    0x00ff}, {0x836f,    0x0001}, {0x8370,    0x000f},
+{0x8371,    0x0096}, {0x8372,    0x0046}, {0x8373,    0x0084},
+{0x8374,    0x000c}, {0x8375,    0x0081}, {0x8376,    0x0008},
+{0x8377,    0x0026}, {0x8378,    0x000a}, {0x8379,    0x00c6},
+{0x837a,    0x0049}, {0x837b,    0x00bd}, {0x837c,    0x00e4},
+{0x837d,    0x0091}, {0x837e,    0x0025}, {0x837f,    0x0006},
+{0x8380,    0x007e}, {0x8381,    0x0084}, {0x8382,    0x0025},
+{0x8383,    0x007e}, {0x8384,    0x0084}, {0x8385,    0x0016},
+{0x8386,    0x00bd}, {0x8387,    0x00f7}, {0x8388,    0x003e},
+{0x8389,    0x0026}, {0x838a,    0x000e}, {0x838b,    0x00bd},
+{0x838c,    0x00e5}, {0x838d,    0x0009}, {0x838e,    0x0026},
+{0x838f,    0x0006}, {0x8390,    0x00ce}, {0x8391,    0x0082},
+{0x8392,    0x00c1}, {0x8393,    0x00ff}, {0x8394,    0x0001},
+{0x8395,    0x000f}, {0x8396,    0x007e}, {0x8397,    0x0084},
+{0x8398,    0x0025}, {0x8399,    0x00fe}, {0x839a,    0x008f},
+{0x839b,    0x0054}, {0x839c,    0x00bd}, {0x839d,    0x00ec},
+{0x839e,    0x008e}, {0x839f,    0x00bd}, {0x83a0,    0x00fa},
+{0x83a1,    0x00f7}, {0x83a2,    0x00bd}, {0x83a3,    0x00f7},
+{0x83a4,    0x0033}, {0x83a5,    0x0086}, {0x83a6,    0x000f},
+{0x83a7,    0x00c6}, {0x83a8,    0x0051}, {0x83a9,    0x00bd},
+{0x83aa,    0x00e4}, {0x83ab,    0x0012}, {0x83ac,    0x00ce},
+{0x83ad,    0x0083}, {0x83ae,    0x00b2}, {0x83af,    0x00ff},
+{0x83b0,    0x0001}, {0x83b1,    0x000f}, {0x83b2,    0x0096},
+{0x83b3,    0x0046}, {0x83b4,    0x0084}, {0x83b5,    0x000c},
+{0x83b6,    0x0081}, {0x83b7,    0x0008}, {0x83b8,    0x0026},
+{0x83b9,    0x005c}, {0x83ba,    0x00b6}, {0x83bb,    0x0012},
+{0x83bc,    0x0020}, {0x83bd,    0x0084}, {0x83be,    0x003f},
+{0x83bf,    0x0081}, {0x83c0,    0x003a}, {0x83c1,    0x0027},
+{0x83c2,    0x001c}, {0x83c3,    0x0096}, {0x83c4,    0x0023},
+{0x83c5,    0x0085}, {0x83c6,    0x0040}, {0x83c7,    0x0027},
+{0x83c8,    0x0003}, {0x83c9,    0x007e}, {0x83ca,    0x0084},
+{0x83cb,    0x0025}, {0x83cc,    0x00c6}, {0x83cd,    0x0051},
+{0x83ce,    0x00bd}, {0x83cf,    0x00e4}, {0x83d0,    0x0091},
+{0x83d1,    0x0025}, {0x83d2,    0x0003}, {0x83d3,    0x007e},
+{0x83d4,    0x0084}, {0x83d5,    0x0025}, {0x83d6,    0x00ce},
+{0x83d7,    0x0082}, {0x83d8,    0x00c1}, {0x83d9,    0x00ff},
+{0x83da,    0x0001}, {0x83db,    0x000f}, {0x83dc,    0x007e},
+{0x83dd,    0x0084}, {0x83de,    0x0025}, {0x83df,    0x00bd},
+{0x83e0,    0x00f8}, {0x83e1,    0x0037}, {0x83e2,    0x007c},
+{0x83e3,    0x0000}, {0x83e4,    0x007a}, {0x83e5,    0x00ce},
+{0x83e6,    0x0083}, {0x83e7,    0x00ee}, {0x83e8,    0x00ff},
+{0x83e9,    0x0001}, {0x83ea,    0x000f}, {0x83eb,    0x007e},
+{0x83ec,    0x0084}, {0x83ed,    0x0025}, {0x83ee,    0x0096},
+{0x83ef,    0x0046}, {0x83f0,    0x0084}, {0x83f1,    0x000c},
+{0x83f2,    0x0081}, {0x83f3,    0x0008}, {0x83f4,    0x0026},
+{0x83f5,    0x0020}, {0x83f6,    0x0096}, {0x83f7,    0x0024},
+{0x83f8,    0x0084}, {0x83f9,    0x0008}, {0x83fa,    0x0026},
+{0x83fb,    0x0029}, {0x83fc,    0x00b6}, {0x83fd,    0x0018},
+{0x83fe,    0x0082}, {0x83ff,    0x00bb}, {0x8400,    0x0019},
+{0x8401,    0x0082}, {0x8402,    0x00b1}, {0x8403,    0x0001},
+{0x8404,    0x003b}, {0x8405,    0x0022}, {0x8406,    0x0009},
+{0x8407,    0x00b6}, {0x8408,    0x0012}, {0x8409,    0x0020},
+{0x840a,    0x0084}, {0x840b,    0x0037}, {0x840c,    0x0081},
+{0x840d,    0x0032}, {0x840e,    0x0027}, {0x840f,    0x0015},
+{0x8410,    0x00bd}, {0x8411,    0x00f8}, {0x8412,    0x0044},
+{0x8413,    0x007e}, {0x8414,    0x0082}, {0x8415,    0x00c1},
+{0x8416,    0x00bd}, {0x8417,    0x00f7}, {0x8418,    0x001f},
+{0x8419,    0x00bd}, {0x841a,    0x00f8}, {0x841b,    0x0044},
+{0x841c,    0x00bd}, {0x841d,    0x00fc}, {0x841e,    0x0029},
+{0x841f,    0x00ce}, {0x8420,    0x0082}, {0x8421,    0x0025},
+{0x8422,    0x00ff}, {0x8423,    0x0001}, {0x8424,    0x000f},
+{0x8425,    0x0039}, {0x8426,    0x0096}, {0x8427,    0x0047},
+{0x8428,    0x0084}, {0x8429,    0x00fc}, {0x842a,    0x008a},
+{0x842b,    0x0000}, {0x842c,    0x0097}, {0x842d,    0x0047},
+{0x842e,    0x00ce}, {0x842f,    0x0084}, {0x8430,    0x0034},
+{0x8431,    0x00ff}, {0x8432,    0x0001}, {0x8433,    0x0011},
+{0x8434,    0x0096}, {0x8435,    0x0046}, {0x8436,    0x0084},
+{0x8437,    0x0003}, {0x8438,    0x0081}, {0x8439,    0x0002},
+{0x843a,    0x0027}, {0x843b,    0x0003}, {0x843c,    0x007e},
+{0x843d,    0x0085}, {0x843e,    0x001e}, {0x843f,    0x0096},
+{0x8440,    0x0047}, {0x8441,    0x0084}, {0x8442,    0x00fc},
+{0x8443,    0x008a}, {0x8444,    0x0002}, {0x8445,    0x0097},
+{0x8446,    0x0047}, {0x8447,    0x00de}, {0x8448,    0x00e1},
+{0x8449,    0x00ad}, {0x844a,    0x0000}, {0x844b,    0x0086},
+{0x844c,    0x0001}, {0x844d,    0x00b7}, {0x844e,    0x0012},
+{0x844f,    0x0051}, {0x8450,    0x00bd}, {0x8451,    0x00f7},
+{0x8452,    0x0014}, {0x8453,    0x00b6}, {0x8454,    0x0010},
+{0x8455,    0x0031}, {0x8456,    0x0084}, {0x8457,    0x00fd},
+{0x8458,    0x00b7}, {0x8459,    0x0010}, {0x845a,    0x0031},
+{0x845b,    0x00bd}, {0x845c,    0x00f8}, {0x845d,    0x001e},
+{0x845e,    0x0096}, {0x845f,    0x0081}, {0x8460,    0x00d6},
+{0x8461,    0x0082}, {0x8462,    0x00fe}, {0x8463,    0x008f},
+{0x8464,    0x005a}, {0x8465,    0x00bd}, {0x8466,    0x00f7},
+{0x8467,    0x00b6}, {0x8468,    0x00fe}, {0x8469,    0x008f},
+{0x846a,    0x005c}, {0x846b,    0x00bd}, {0x846c,    0x00ec},
+{0x846d,    0x008e}, {0x846e,    0x00bd}, {0x846f,    0x00fa},
+{0x8470,    0x00f7}, {0x8471,    0x0086}, {0x8472,    0x0008},
+{0x8473,    0x00d6}, {0x8474,    0x0000}, {0x8475,    0x00c5},
+{0x8476,    0x0010}, {0x8477,    0x0026}, {0x8478,    0x0002},
+{0x8479,    0x008b}, {0x847a,    0x0020}, {0x847b,    0x00c6},
+{0x847c,    0x0051}, {0x847d,    0x00bd}, {0x847e,    0x00e4},
+{0x847f,    0x0012}, {0x8480,    0x00ce}, {0x8481,    0x0084},
+{0x8482,    0x0086}, {0x8483,    0x00ff}, {0x8484,    0x0001},
+{0x8485,    0x0011}, {0x8486,    0x0096}, {0x8487,    0x0046},
+{0x8488,    0x0084}, {0x8489,    0x0003}, {0x848a,    0x0081},
+{0x848b,    0x0002}, {0x848c,    0x0027}, {0x848d,    0x0003},
+{0x848e,    0x007e}, {0x848f,    0x0085}, {0x8490,    0x000f},
+{0x8491,    0x00c6}, {0x8492,    0x0051}, {0x8493,    0x00bd},
+{0x8494,    0x00e4}, {0x8495,    0x0091}, {0x8496,    0x0025},
+{0x8497,    0x0003}, {0x8498,    0x007e}, {0x8499,    0x0085},
+{0x849a,    0x001e}, {0x849b,    0x0096}, {0x849c,    0x0044},
+{0x849d,    0x0085}, {0x849e,    0x0010}, {0x849f,    0x0026},
+{0x84a0,    0x000a}, {0x84a1,    0x00b6}, {0x84a2,    0x0012},
+{0x84a3,    0x0050}, {0x84a4,    0x00ba}, {0x84a5,    0x0001},
+{0x84a6,    0x003c}, {0x84a7,    0x0085}, {0x84a8,    0x0010},
+{0x84a9,    0x0027}, {0x84aa,    0x00a8}, {0x84ab,    0x00bd},
+{0x84ac,    0x00f7}, {0x84ad,    0x0066}, {0x84ae,    0x00ce},
+{0x84af,    0x0084}, {0x84b0,    0x00b7}, {0x84b1,    0x00ff},
+{0x84b2,    0x0001}, {0x84b3,    0x0011}, {0x84b4,    0x007e},
+{0x84b5,    0x0085}, {0x84b6,    0x001e}, {0x84b7,    0x0096},
+{0x84b8,    0x0046}, {0x84b9,    0x0084}, {0x84ba,    0x0003},
+{0x84bb,    0x0081}, {0x84bc,    0x0002}, {0x84bd,    0x0026},
+{0x84be,    0x0050}, {0x84bf,    0x00b6}, {0x84c0,    0x0012},
+{0x84c1,    0x0030}, {0x84c2,    0x0084}, {0x84c3,    0x0003},
+{0x84c4,    0x0081}, {0x84c5,    0x0001}, {0x84c6,    0x0027},
+{0x84c7,    0x0003}, {0x84c8,    0x007e}, {0x84c9,    0x0085},
+{0x84ca,    0x001e}, {0x84cb,    0x0096}, {0x84cc,    0x0044},
+{0x84cd,    0x0085}, {0x84ce,    0x0010}, {0x84cf,    0x0026},
+{0x84d0,    0x0013}, {0x84d1,    0x00b6}, {0x84d2,    0x0012},
+{0x84d3,    0x0050}, {0x84d4,    0x00ba}, {0x84d5,    0x0001},
+{0x84d6,    0x003c}, {0x84d7,    0x0085}, {0x84d8,    0x0010},
+{0x84d9,    0x0026}, {0x84da,    0x0009}, {0x84db,    0x00ce},
+{0x84dc,    0x0084}, {0x84dd,    0x0053}, {0x84de,    0x00ff},
+{0x84df,    0x0001}, {0x84e0,    0x0011}, {0x84e1,    0x007e},
+{0x84e2,    0x0085}, {0x84e3,    0x001e}, {0x84e4,    0x00b6},
+{0x84e5,    0x0010}, {0x84e6,    0x0031}, {0x84e7,    0x008a},
+{0x84e8,    0x0002}, {0x84e9,    0x00b7}, {0x84ea,    0x0010},
+{0x84eb,    0x0031}, {0x84ec,    0x00bd}, {0x84ed,    0x0085},
+{0x84ee,    0x001f}, {0x84ef,    0x00bd}, {0x84f0,    0x00f8},
+{0x84f1,    0x0037}, {0x84f2,    0x007c}, {0x84f3,    0x0000},
+{0x84f4,    0x0080}, {0x84f5,    0x00ce}, {0x84f6,    0x0084},
+{0x84f7,    0x00fe}, {0x84f8,    0x00ff}, {0x84f9,    0x0001},
+{0x84fa,    0x0011}, {0x84fb,    0x007e}, {0x84fc,    0x0085},
+{0x84fd,    0x001e}, {0x84fe,    0x0096}, {0x84ff,    0x0046},
+{0x8500,    0x0084}, {0x8501,    0x0003}, {0x8502,    0x0081},
+{0x8503,    0x0002}, {0x8504,    0x0026}, {0x8505,    0x0009},
+{0x8506,    0x00b6}, {0x8507,    0x0012}, {0x8508,    0x0030},
+{0x8509,    0x0084}, {0x850a,    0x0003}, {0x850b,    0x0081},
+{0x850c,    0x0001}, {0x850d,    0x0027}, {0x850e,    0x000f},
+{0x850f,    0x00bd}, {0x8510,    0x00f8}, {0x8511,    0x0044},
+{0x8512,    0x00bd}, {0x8513,    0x00f7}, {0x8514,    0x000b},
+{0x8515,    0x00bd}, {0x8516,    0x00fc}, {0x8517,    0x0029},
+{0x8518,    0x00ce}, {0x8519,    0x0084}, {0x851a,    0x0026},
+{0x851b,    0x00ff}, {0x851c,    0x0001}, {0x851d,    0x0011},
+{0x851e,    0x0039}, {0x851f,    0x00d6}, {0x8520,    0x0022},
+{0x8521,    0x00c4}, {0x8522,    0x000f}, {0x8523,    0x00b6},
+{0x8524,    0x0012}, {0x8525,    0x0030}, {0x8526,    0x00ba},
+{0x8527,    0x0012}, {0x8528,    0x0032}, {0x8529,    0x0084},
+{0x852a,    0x0004}, {0x852b,    0x0027}, {0x852c,    0x000d},
+{0x852d,    0x0096}, {0x852e,    0x0022}, {0x852f,    0x0085},
+{0x8530,    0x0004}, {0x8531,    0x0027}, {0x8532,    0x0005},
+{0x8533,    0x00ca}, {0x8534,    0x0010}, {0x8535,    0x007e},
+{0x8536,    0x0085}, {0x8537,    0x003a}, {0x8538,    0x00ca},
+{0x8539,    0x0020}, {0x853a,    0x00d7}, {0x853b,    0x0022},
+{0x853c,    0x0039}, {0x853d,    0x0086}, {0x853e,    0x0000},
+{0x853f,    0x0097}, {0x8540,    0x0083}, {0x8541,    0x0018},
+{0x8542,    0x00ce}, {0x8543,    0x001c}, {0x8544,    0x0000},
+{0x8545,    0x00bd}, {0x8546,    0x00eb}, {0x8547,    0x0046},
+{0x8548,    0x0096}, {0x8549,    0x0057}, {0x854a,    0x0085},
+{0x854b,    0x0001}, {0x854c,    0x0027}, {0x854d,    0x0002},
+{0x854e,    0x004f}, {0x854f,    0x0039}, {0x8550,    0x0085},
+{0x8551,    0x0002}, {0x8552,    0x0027}, {0x8553,    0x0001},
+{0x8554,    0x0039}, {0x8555,    0x007f}, {0x8556,    0x008f},
+{0x8557,    0x007d}, {0x8558,    0x0086}, {0x8559,    0x0004},
+{0x855a,    0x00b7}, {0x855b,    0x0012}, {0x855c,    0x0004},
+{0x855d,    0x0086}, {0x855e,    0x0008}, {0x855f,    0x00b7},
+{0x8560,    0x0012}, {0x8561,    0x0007}, {0x8562,    0x0086},
+{0x8563,    0x0010}, {0x8564,    0x00b7}, {0x8565,    0x0012},
+{0x8566,    0x000c}, {0x8567,    0x0086}, {0x8568,    0x0007},
+{0x8569,    0x00b7}, {0x856a,    0x0012}, {0x856b,    0x0006},
+{0x856c,    0x00b6}, {0x856d,    0x008f}, {0x856e,    0x007d},
+{0x856f,    0x00b7}, {0x8570,    0x0012}, {0x8571,    0x0070},
+{0x8572,    0x0086}, {0x8573,    0x0001}, {0x8574,    0x00ba},
+{0x8575,    0x0012}, {0x8576,    0x0004}, {0x8577,    0x00b7},
+{0x8578,    0x0012}, {0x8579,    0x0004}, {0x857a,    0x0001},
+{0x857b,    0x0001}, {0x857c,    0x0001}, {0x857d,    0x0001},
+{0x857e,    0x0001}, {0x857f,    0x0001}, {0x8580,    0x00b6},
+{0x8581,    0x0012}, {0x8582,    0x0004}, {0x8583,    0x0084},
+{0x8584,    0x00fe}, {0x8585,    0x008a}, {0x8586,    0x0002},
+{0x8587,    0x00b7}, {0x8588,    0x0012}, {0x8589,    0x0004},
+{0x858a,    0x0001}, {0x858b,    0x0001}, {0x858c,    0x0001},
+{0x858d,    0x0001}, {0x858e,    0x0001}, {0x858f,    0x0001},
+{0x8590,    0x0086}, {0x8591,    0x00fd}, {0x8592,    0x00b4},
+{0x8593,    0x0012}, {0x8594,    0x0004}, {0x8595,    0x00b7},
+{0x8596,    0x0012}, {0x8597,    0x0004}, {0x8598,    0x00b6},
+{0x8599,    0x0012}, {0x859a,    0x0000}, {0x859b,    0x0084},
+{0x859c,    0x0008}, {0x859d,    0x0081}, {0x859e,    0x0008},
+{0x859f,    0x0027}, {0x85a0,    0x0016}, {0x85a1,    0x00b6},
+{0x85a2,    0x008f}, {0x85a3,    0x007d}, {0x85a4,    0x0081},
+{0x85a5,    0x000c}, {0x85a6,    0x0027}, {0x85a7,    0x0008},
+{0x85a8,    0x008b}, {0x85a9,    0x0004}, {0x85aa,    0x00b7},
+{0x85ab,    0x008f}, {0x85ac,    0x007d}, {0x85ad,    0x007e},
+{0x85ae,    0x0085}, {0x85af,    0x006c}, {0x85b0,    0x0086},
+{0x85b1,    0x0003}, {0x85b2,    0x0097}, {0x85b3,    0x0040},
+{0x85b4,    0x007e}, {0x85b5,    0x0089}, {0x85b6,    0x006e},
+{0x85b7,    0x0086}, {0x85b8,    0x0007}, {0x85b9,    0x00b7},
+{0x85ba,    0x0012}, {0x85bb,    0x0006}, {0x85bc,    0x005f},
+{0x85bd,    0x00f7}, {0x85be,    0x008f}, {0x85bf,    0x0082},
+{0x85c0,    0x005f}, {0x85c1,    0x00f7}, {0x85c2,    0x008f},
+{0x85c3,    0x007f}, {0x85c4,    0x00f7}, {0x85c5,    0x008f},
+{0x85c6,    0x0070}, {0x85c7,    0x00f7}, {0x85c8,    0x008f},
+{0x85c9,    0x0071}, {0x85ca,    0x00f7}, {0x85cb,    0x008f},
+{0x85cc,    0x0072}, {0x85cd,    0x00f7}, {0x85ce,    0x008f},
+{0x85cf,    0x0073}, {0x85d0,    0x00f7}, {0x85d1,    0x008f},
+{0x85d2,    0x0074}, {0x85d3,    0x00f7}, {0x85d4,    0x008f},
+{0x85d5,    0x0075}, {0x85d6,    0x00f7}, {0x85d7,    0x008f},
+{0x85d8,    0x0076}, {0x85d9,    0x00f7}, {0x85da,    0x008f},
+{0x85db,    0x0077}, {0x85dc,    0x00f7}, {0x85dd,    0x008f},
+{0x85de,    0x0078}, {0x85df,    0x00f7}, {0x85e0,    0x008f},
+{0x85e1,    0x0079}, {0x85e2,    0x00f7}, {0x85e3,    0x008f},
+{0x85e4,    0x007a}, {0x85e5,    0x00f7}, {0x85e6,    0x008f},
+{0x85e7,    0x007b}, {0x85e8,    0x00b6}, {0x85e9,    0x0012},
+{0x85ea,    0x0004}, {0x85eb,    0x008a}, {0x85ec,    0x0010},
+{0x85ed,    0x00b7}, {0x85ee,    0x0012}, {0x85ef,    0x0004},
+{0x85f0,    0x0086}, {0x85f1,    0x00e4}, {0x85f2,    0x00b7},
+{0x85f3,    0x0012}, {0x85f4,    0x0070}, {0x85f5,    0x00b7},
+{0x85f6,    0x0012}, {0x85f7,    0x0007}, {0x85f8,    0x00f7},
+{0x85f9,    0x0012}, {0x85fa,    0x0005}, {0x85fb,    0x00f7},
+{0x85fc,    0x0012}, {0x85fd,    0x0009}, {0x85fe,    0x0086},
+{0x85ff,    0x0008}, {0x8600,    0x00ba}, {0x8601,    0x0012},
+{0x8602,    0x0004}, {0x8603,    0x00b7}, {0x8604,    0x0012},
+{0x8605,    0x0004}, {0x8606,    0x0086}, {0x8607,    0x00f7},
+{0x8608,    0x00b4}, {0x8609,    0x0012}, {0x860a,    0x0004},
+{0x860b,    0x00b7}, {0x860c,    0x0012}, {0x860d,    0x0004},
+{0x860e,    0x0001}, {0x860f,    0x0001}, {0x8610,    0x0001},
+{0x8611,    0x0001}, {0x8612,    0x0001}, {0x8613,    0x0001},
+{0x8614,    0x00b6}, {0x8615,    0x0012}, {0x8616,    0x0008},
+{0x8617,    0x0027}, {0x8618,    0x007f}, {0x8619,    0x0081},
+{0x861a,    0x0080}, {0x861b,    0x0026}, {0x861c,    0x000b},
+{0x861d,    0x0086}, {0x861e,    0x0008}, {0x861f,    0x00ce},
+{0x8620,    0x008f}, {0x8621,    0x0079}, {0x8622,    0x00bd},
+{0x8623,    0x0089}, {0x8624,    0x007b}, {0x8625,    0x007e},
+{0x8626,    0x0086}, {0x8627,    0x008e}, {0x8628,    0x0081},
+{0x8629,    0x0040}, {0x862a,    0x0026}, {0x862b,    0x000b},
+{0x862c,    0x0086}, {0x862d,    0x0004}, {0x862e,    0x00ce},
+{0x862f,    0x008f}, {0x8630,    0x0076}, {0x8631,    0x00bd},
+{0x8632,    0x0089}, {0x8633,    0x007b}, {0x8634,    0x007e},
+{0x8635,    0x0086}, {0x8636,    0x008e}, {0x8637,    0x0081},
+{0x8638,    0x0020}, {0x8639,    0x0026}, {0x863a,    0x000b},
+{0x863b,    0x0086}, {0x863c,    0x0002}, {0x863d,    0x00ce},
+{0x863e,    0x008f}, {0x863f,    0x0073}, {0x8640,    0x00bd},
+{0x8641,    0x0089}, {0x8642,    0x007b}, {0x8643,    0x007e},
+{0x8644,    0x0086}, {0x8645,    0x008e}, {0x8646,    0x0081},
+{0x8647,    0x0010}, {0x8648,    0x0026}, {0x8649,    0x000b},
+{0x864a,    0x0086}, {0x864b,    0x0001}, {0x864c,    0x00ce},
+{0x864d,    0x008f}, {0x864e,    0x0070}, {0x864f,    0x00bd},
+{0x8650,    0x0089}, {0x8651,    0x007b}, {0x8652,    0x007e},
+{0x8653,    0x0086}, {0x8654,    0x008e}, {0x8655,    0x0081},
+{0x8656,    0x0008}, {0x8657,    0x0026}, {0x8658,    0x000b},
+{0x8659,    0x0086}, {0x865a,    0x0008}, {0x865b,    0x00ce},
+{0x865c,    0x008f}, {0x865d,    0x0079}, {0x865e,    0x00bd},
+{0x865f,    0x0089}, {0x8660,    0x007f}, {0x8661,    0x007e},
+{0x8662,    0x0086}, {0x8663,    0x008e}, {0x8664,    0x0081},
+{0x8665,    0x0004}, {0x8666,    0x0026}, {0x8667,    0x000b},
+{0x8668,    0x0086}, {0x8669,    0x0004}, {0x866a,    0x00ce},
+{0x866b,    0x008f}, {0x866c,    0x0076}, {0x866d,    0x00bd},
+{0x866e,    0x0089}, {0x866f,    0x007f}, {0x8670,    0x007e},
+{0x8671,    0x0086}, {0x8672,    0x008e}, {0x8673,    0x0081},
+{0x8674,    0x0002}, {0x8675,    0x0026}, {0x8676,    0x000b},
+{0x8677,    0x008a}, {0x8678,    0x0002}, {0x8679,    0x00ce},
+{0x867a,    0x008f}, {0x867b,    0x0073}, {0x867c,    0x00bd},
+{0x867d,    0x0089}, {0x867e,    0x007f}, {0x867f,    0x007e},
+{0x8680,    0x0086}, {0x8681,    0x008e}, {0x8682,    0x0081},
+{0x8683,    0x0001}, {0x8684,    0x0026}, {0x8685,    0x0008},
+{0x8686,    0x0086}, {0x8687,    0x0001}, {0x8688,    0x00ce},
+{0x8689,    0x008f}, {0x868a,    0x0070}, {0x868b,    0x00bd},
+{0x868c,    0x0089}, {0x868d,    0x007f}, {0x868e,    0x00b6},
+{0x868f,    0x008f}, {0x8690,    0x007f}, {0x8691,    0x0081},
+{0x8692,    0x000f}, {0x8693,    0x0026}, {0x8694,    0x0003},
+{0x8695,    0x007e}, {0x8696,    0x0087}, {0x8697,    0x0047},
+{0x8698,    0x00b6}, {0x8699,    0x0012}, {0x869a,    0x0009},
+{0x869b,    0x0084}, {0x869c,    0x0003}, {0x869d,    0x0081},
+{0x869e,    0x0003}, {0x869f,    0x0027}, {0x86a0,    0x0006},
+{0x86a1,    0x007c}, {0x86a2,    0x0012}, {0x86a3,    0x0009},
+{0x86a4,    0x007e}, {0x86a5,    0x0085}, {0x86a6,    0x00fe},
+{0x86a7,    0x00b6}, {0x86a8,    0x0012}, {0x86a9,    0x0006},
+{0x86aa,    0x0084}, {0x86ab,    0x0007}, {0x86ac,    0x0081},
+{0x86ad,    0x0007}, {0x86ae,    0x0027}, {0x86af,    0x0008},
+{0x86b0,    0x008b}, {0x86b1,    0x0001}, {0x86b2,    0x00b7},
+{0x86b3,    0x0012}, {0x86b4,    0x0006}, {0x86b5,    0x007e},
+{0x86b6,    0x0086}, {0x86b7,    0x00d5}, {0x86b8,    0x00b6},
+{0x86b9,    0x008f}, {0x86ba,    0x0082}, {0x86bb,    0x0026},
+{0x86bc,    0x000a}, {0x86bd,    0x007c}, {0x86be,    0x008f},
+{0x86bf,    0x0082}, {0x86c0,    0x004f}, {0x86c1,    0x00b7},
+{0x86c2,    0x0012}, {0x86c3,    0x0006}, {0x86c4,    0x007e},
+{0x86c5,    0x0085}, {0x86c6,    0x00c0}, {0x86c7,    0x00b6},
+{0x86c8,    0x0012}, {0x86c9,    0x0006}, {0x86ca,    0x0084},
+{0x86cb,    0x003f}, {0x86cc,    0x0081}, {0x86cd,    0x003f},
+{0x86ce,    0x0027}, {0x86cf,    0x0010}, {0x86d0,    0x008b},
+{0x86d1,    0x0008}, {0x86d2,    0x00b7}, {0x86d3,    0x0012},
+{0x86d4,    0x0006}, {0x86d5,    0x00b6}, {0x86d6,    0x0012},
+{0x86d7,    0x0009}, {0x86d8,    0x0084}, {0x86d9,    0x00fc},
+{0x86da,    0x00b7}, {0x86db,    0x0012}, {0x86dc,    0x0009},
+{0x86dd,    0x007e}, {0x86de,    0x0085}, {0x86df,    0x00fe},
+{0x86e0,    0x00ce}, {0x86e1,    0x008f}, {0x86e2,    0x0070},
+{0x86e3,    0x0018}, {0x86e4,    0x00ce}, {0x86e5,    0x008f},
+{0x86e6,    0x0084}, {0x86e7,    0x00c6}, {0x86e8,    0x000c},
+{0x86e9,    0x00bd}, {0x86ea,    0x0089}, {0x86eb,    0x006f},
+{0x86ec,    0x00ce}, {0x86ed,    0x008f}, {0x86ee,    0x0084},
+{0x86ef,    0x0018}, {0x86f0,    0x00ce}, {0x86f1,    0x008f},
+{0x86f2,    0x0070}, {0x86f3,    0x00c6}, {0x86f4,    0x000c},
+{0x86f5,    0x00bd}, {0x86f6,    0x0089}, {0x86f7,    0x006f},
+{0x86f8,    0x00d6}, {0x86f9,    0x0083}, {0x86fa,    0x00c1},
+{0x86fb,    0x004f}, {0x86fc,    0x002d}, {0x86fd,    0x0003},
+{0x86fe,    0x007e}, {0x86ff,    0x0087}, {0x8700,    0x0040},
+{0x8701,    0x00b6}, {0x8702,    0x008f}, {0x8703,    0x007f},
+{0x8704,    0x0081}, {0x8705,    0x0007}, {0x8706,    0x0027},
+{0x8707,    0x000f}, {0x8708,    0x0081}, {0x8709,    0x000b},
+{0x870a,    0x0027}, {0x870b,    0x0015}, {0x870c,    0x0081},
+{0x870d,    0x000d}, {0x870e,    0x0027}, {0x870f,    0x001b},
+{0x8710,    0x0081}, {0x8711,    0x000e}, {0x8712,    0x0027},
+{0x8713,    0x0021}, {0x8714,    0x007e}, {0x8715,    0x0087},
+{0x8716,    0x0040}, {0x8717,    0x00f7}, {0x8718,    0x008f},
+{0x8719,    0x007b}, {0x871a,    0x0086}, {0x871b,    0x0002},
+{0x871c,    0x00b7}, {0x871d,    0x008f}, {0x871e,    0x007a},
+{0x871f,    0x0020}, {0x8720,    0x001c}, {0x8721,    0x00f7},
+{0x8722,    0x008f}, {0x8723,    0x0078}, {0x8724,    0x0086},
+{0x8725,    0x0002}, {0x8726,    0x00b7}, {0x8727,    0x008f},
+{0x8728,    0x0077}, {0x8729,    0x0020}, {0x872a,    0x0012},
+{0x872b,    0x00f7}, {0x872c,    0x008f}, {0x872d,    0x0075},
+{0x872e,    0x0086}, {0x872f,    0x0002}, {0x8730,    0x00b7},
+{0x8731,    0x008f}, {0x8732,    0x0074}, {0x8733,    0x0020},
+{0x8734,    0x0008}, {0x8735,    0x00f7}, {0x8736,    0x008f},
+{0x8737,    0x0072}, {0x8738,    0x0086}, {0x8739,    0x0002},
+{0x873a,    0x00b7}, {0x873b,    0x008f}, {0x873c,    0x0071},
+{0x873d,    0x007e}, {0x873e,    0x0087}, {0x873f,    0x0047},
+{0x8740,    0x0086}, {0x8741,    0x0004}, {0x8742,    0x0097},
+{0x8743,    0x0040}, {0x8744,    0x007e}, {0x8745,    0x0089},
+{0x8746,    0x006e}, {0x8747,    0x00ce}, {0x8748,    0x008f},
+{0x8749,    0x0072}, {0x874a,    0x00bd}, {0x874b,    0x0089},
+{0x874c,    0x00f7}, {0x874d,    0x00ce}, {0x874e,    0x008f},
+{0x874f,    0x0075}, {0x8750,    0x00bd}, {0x8751,    0x0089},
+{0x8752,    0x00f7}, {0x8753,    0x00ce}, {0x8754,    0x008f},
+{0x8755,    0x0078}, {0x8756,    0x00bd}, {0x8757,    0x0089},
+{0x8758,    0x00f7}, {0x8759,    0x00ce}, {0x875a,    0x008f},
+{0x875b,    0x007b}, {0x875c,    0x00bd}, {0x875d,    0x0089},
+{0x875e,    0x00f7}, {0x875f,    0x004f}, {0x8760,    0x00b7},
+{0x8761,    0x008f}, {0x8762,    0x007d}, {0x8763,    0x00b7},
+{0x8764,    0x008f}, {0x8765,    0x0081}, {0x8766,    0x00b6},
+{0x8767,    0x008f}, {0x8768,    0x0072}, {0x8769,    0x0027},
+{0x876a,    0x0047}, {0x876b,    0x007c}, {0x876c,    0x008f},
+{0x876d,    0x007d}, {0x876e,    0x00b6}, {0x876f,    0x008f},
+{0x8770,    0x0075}, {0x8771,    0x0027}, {0x8772,    0x003f},
+{0x8773,    0x007c}, {0x8774,    0x008f}, {0x8775,    0x007d},
+{0x8776,    0x00b6}, {0x8777,    0x008f}, {0x8778,    0x0078},
+{0x8779,    0x0027}, {0x877a,    0x0037}, {0x877b,    0x007c},
+{0x877c,    0x008f}, {0x877d,    0x007d}, {0x877e,    0x00b6},
+{0x877f,    0x008f}, {0x8780,    0x007b}, {0x8781,    0x0027},
+{0x8782,    0x002f}, {0x8783,    0x007f}, {0x8784,    0x008f},
+{0x8785,    0x007d}, {0x8786,    0x007c}, {0x8787,    0x008f},
+{0x8788,    0x0081}, {0x8789,    0x007a}, {0x878a,    0x008f},
+{0x878b,    0x0072}, {0x878c,    0x0027}, {0x878d,    0x001b},
+{0x878e,    0x007c}, {0x878f,    0x008f}, {0x8790,    0x007d},
+{0x8791,    0x007a}, {0x8792,    0x008f}, {0x8793,    0x0075},
+{0x8794,    0x0027}, {0x8795,    0x0016}, {0x8796,    0x007c},
+{0x8797,    0x008f}, {0x8798,    0x007d}, {0x8799,    0x007a},
+{0x879a,    0x008f}, {0x879b,    0x0078}, {0x879c,    0x0027},
+{0x879d,    0x0011}, {0x879e,    0x007c}, {0x879f,    0x008f},
+{0x87a0,    0x007d}, {0x87a1,    0x007a}, {0x87a2,    0x008f},
+{0x87a3,    0x007b}, {0x87a4,    0x0027}, {0x87a5,    0x000c},
+{0x87a6,    0x007e}, {0x87a7,    0x0087}, {0x87a8,    0x0083},
+{0x87a9,    0x007a}, {0x87aa,    0x008f}, {0x87ab,    0x0075},
+{0x87ac,    0x007a}, {0x87ad,    0x008f}, {0x87ae,    0x0078},
+{0x87af,    0x007a}, {0x87b0,    0x008f}, {0x87b1,    0x007b},
+{0x87b2,    0x00ce}, {0x87b3,    0x00c1}, {0x87b4,    0x00fc},
+{0x87b5,    0x00f6}, {0x87b6,    0x008f}, {0x87b7,    0x007d},
+{0x87b8,    0x003a}, {0x87b9,    0x00a6}, {0x87ba,    0x0000},
+{0x87bb,    0x00b7}, {0x87bc,    0x0012}, {0x87bd,    0x0070},
+{0x87be,    0x00b6}, {0x87bf,    0x008f}, {0x87c0,    0x0072},
+{0x87c1,    0x0026}, {0x87c2,    0x0003}, {0x87c3,    0x007e},
+{0x87c4,    0x0087}, {0x87c5,    0x00fa}, {0x87c6,    0x00b6},
+{0x87c7,    0x008f}, {0x87c8,    0x0075}, {0x87c9,    0x0026},
+{0x87ca,    0x000a}, {0x87cb,    0x0018}, {0x87cc,    0x00ce},
+{0x87cd,    0x008f}, {0x87ce,    0x0073}, {0x87cf,    0x00bd},
+{0x87d0,    0x0089}, {0x87d1,    0x00d5}, {0x87d2,    0x007e},
+{0x87d3,    0x0087}, {0x87d4,    0x00fa}, {0x87d5,    0x00b6},
+{0x87d6,    0x008f}, {0x87d7,    0x0078}, {0x87d8,    0x0026},
+{0x87d9,    0x000a}, {0x87da,    0x0018}, {0x87db,    0x00ce},
+{0x87dc,    0x008f}, {0x87dd,    0x0076}, {0x87de,    0x00bd},
+{0x87df,    0x0089}, {0x87e0,    0x00d5}, {0x87e1,    0x007e},
+{0x87e2,    0x0087}, {0x87e3,    0x00fa}, {0x87e4,    0x00b6},
+{0x87e5,    0x008f}, {0x87e6,    0x007b}, {0x87e7,    0x0026},
+{0x87e8,    0x000a}, {0x87e9,    0x0018}, {0x87ea,    0x00ce},
+{0x87eb,    0x008f}, {0x87ec,    0x0079}, {0x87ed,    0x00bd},
+{0x87ee,    0x0089}, {0x87ef,    0x00d5}, {0x87f0,    0x007e},
+{0x87f1,    0x0087}, {0x87f2,    0x00fa}, {0x87f3,    0x0086},
+{0x87f4,    0x0005}, {0x87f5,    0x0097}, {0x87f6,    0x0040},
+{0x87f7,    0x007e}, {0x87f8,    0x0089}, {0x87f9,    0x006e},
+{0x87fa,    0x00b6}, {0x87fb,    0x008f}, {0x87fc,    0x0075},
+{0x87fd,    0x0081}, {0x87fe,    0x0007}, {0x87ff,    0x002e},
+{0x8800,    0x00f2}, {0x8801,    0x00f6}, {0x8802,    0x0012},
+{0x8803,    0x0006}, {0x8804,    0x00c4}, {0x8805,    0x00f8},
+{0x8806,    0x001b}, {0x8807,    0x00b7}, {0x8808,    0x0012},
+{0x8809,    0x0006}, {0x880a,    0x00b6}, {0x880b,    0x008f},
+{0x880c,    0x0078}, {0x880d,    0x0081}, {0x880e,    0x0007},
+{0x880f,    0x002e}, {0x8810,    0x00e2}, {0x8811,    0x0048},
+{0x8812,    0x0048}, {0x8813,    0x0048}, {0x8814,    0x00f6},
+{0x8815,    0x0012}, {0x8816,    0x0006}, {0x8817,    0x00c4},
+{0x8818,    0x00c7}, {0x8819,    0x001b}, {0x881a,    0x00b7},
+{0x881b,    0x0012}, {0x881c,    0x0006}, {0x881d,    0x00b6},
+{0x881e,    0x008f}, {0x881f,    0x007b}, {0x8820,    0x0081},
+{0x8821,    0x0007}, {0x8822,    0x002e}, {0x8823,    0x00cf},
+{0x8824,    0x00f6}, {0x8825,    0x0012}, {0x8826,    0x0005},
+{0x8827,    0x00c4}, {0x8828,    0x00f8}, {0x8829,    0x001b},
+{0x882a,    0x00b7}, {0x882b,    0x0012}, {0x882c,    0x0005},
+{0x882d,    0x0086}, {0x882e,    0x0000}, {0x882f,    0x00f6},
+{0x8830,    0x008f}, {0x8831,    0x0071}, {0x8832,    0x00bd},
+{0x8833,    0x0089}, {0x8834,    0x0094}, {0x8835,    0x0086},
+{0x8836,    0x0001}, {0x8837,    0x00f6}, {0x8838,    0x008f},
+{0x8839,    0x0074}, {0x883a,    0x00bd}, {0x883b,    0x0089},
+{0x883c,    0x0094}, {0x883d,    0x0086}, {0x883e,    0x0002},
+{0x883f,    0x00f6}, {0x8840,    0x008f}, {0x8841,    0x0077},
+{0x8842,    0x00bd}, {0x8843,    0x0089}, {0x8844,    0x0094},
+{0x8845,    0x0086}, {0x8846,    0x0003}, {0x8847,    0x00f6},
+{0x8848,    0x008f}, {0x8849,    0x007a}, {0x884a,    0x00bd},
+{0x884b,    0x0089}, {0x884c,    0x0094}, {0x884d,    0x00ce},
+{0x884e,    0x008f}, {0x884f,    0x0070}, {0x8850,    0x00a6},
+{0x8851,    0x0001}, {0x8852,    0x0081}, {0x8853,    0x0001},
+{0x8854,    0x0027}, {0x8855,    0x0007}, {0x8856,    0x0081},
+{0x8857,    0x0003}, {0x8858,    0x0027}, {0x8859,    0x0003},
+{0x885a,    0x007e}, {0x885b,    0x0088}, {0x885c,    0x0066},
+{0x885d,    0x00a6}, {0x885e,    0x0000}, {0x885f,    0x00b8},
+{0x8860,    0x008f}, {0x8861,    0x0081}, {0x8862,    0x0084},
+{0x8863,    0x0001}, {0x8864,    0x0026}, {0x8865,    0x000b},
+{0x8866,    0x008c}, {0x8867,    0x008f}, {0x8868,    0x0079},
+{0x8869,    0x002c}, {0x886a,    0x000e}, {0x886b,    0x0008},
+{0x886c,    0x0008}, {0x886d,    0x0008}, {0x886e,    0x007e},
+{0x886f,    0x0088}, {0x8870,    0x0050}, {0x8871,    0x00b6},
+{0x8872,    0x0012}, {0x8873,    0x0004}, {0x8874,    0x008a},
+{0x8875,    0x0040}, {0x8876,    0x00b7}, {0x8877,    0x0012},
+{0x8878,    0x0004}, {0x8879,    0x00b6}, {0x887a,    0x0012},
+{0x887b,    0x0004}, {0x887c,    0x0084}, {0x887d,    0x00fb},
+{0x887e,    0x0084}, {0x887f,    0x00ef}, {0x8880,    0x00b7},
+{0x8881,    0x0012}, {0x8882,    0x0004}, {0x8883,    0x00b6},
+{0x8884,    0x0012}, {0x8885,    0x0007}, {0x8886,    0x0036},
+{0x8887,    0x00b6}, {0x8888,    0x008f}, {0x8889,    0x007c},
+{0x888a,    0x0048}, {0x888b,    0x0048}, {0x888c,    0x00b7},
+{0x888d,    0x0012}, {0x888e,    0x0007}, {0x888f,    0x0086},
+{0x8890,    0x0001}, {0x8891,    0x00ba}, {0x8892,    0x0012},
+{0x8893,    0x0004}, {0x8894,    0x00b7}, {0x8895,    0x0012},
+{0x8896,    0x0004}, {0x8897,    0x0001}, {0x8898,    0x0001},
+{0x8899,    0x0001}, {0x889a,    0x0001}, {0x889b,    0x0001},
+{0x889c,    0x0001}, {0x889d,    0x0086}, {0x889e,    0x00fe},
+{0x889f,    0x00b4}, {0x88a0,    0x0012}, {0x88a1,    0x0004},
+{0x88a2,    0x00b7}, {0x88a3,    0x0012}, {0x88a4,    0x0004},
+{0x88a5,    0x0086}, {0x88a6,    0x0002}, {0x88a7,    0x00ba},
+{0x88a8,    0x0012}, {0x88a9,    0x0004}, {0x88aa,    0x00b7},
+{0x88ab,    0x0012}, {0x88ac,    0x0004}, {0x88ad,    0x0086},
+{0x88ae,    0x00fd}, {0x88af,    0x00b4}, {0x88b0,    0x0012},
+{0x88b1,    0x0004}, {0x88b2,    0x00b7}, {0x88b3,    0x0012},
+{0x88b4,    0x0004}, {0x88b5,    0x0032}, {0x88b6,    0x00b7},
+{0x88b7,    0x0012}, {0x88b8,    0x0007}, {0x88b9,    0x00b6},
+{0x88ba,    0x0012}, {0x88bb,    0x0000}, {0x88bc,    0x0084},
+{0x88bd,    0x0008}, {0x88be,    0x0081}, {0x88bf,    0x0008},
+{0x88c0,    0x0027}, {0x88c1,    0x000f}, {0x88c2,    0x007c},
+{0x88c3,    0x0082}, {0x88c4,    0x0008}, {0x88c5,    0x0026},
+{0x88c6,    0x0007}, {0x88c7,    0x0086}, {0x88c8,    0x0076},
+{0x88c9,    0x0097}, {0x88ca,    0x0040}, {0x88cb,    0x007e},
+{0x88cc,    0x0089}, {0x88cd,    0x006e}, {0x88ce,    0x007e},
+{0x88cf,    0x0086}, {0x88d0,    0x00ec}, {0x88d1,    0x00b6},
+{0x88d2,    0x008f}, {0x88d3,    0x007f}, {0x88d4,    0x0081},
+{0x88d5,    0x000f}, {0x88d6,    0x0027}, {0x88d7,    0x003c},
+{0x88d8,    0x00bd}, {0x88d9,    0x00e6}, {0x88da,    0x00c7},
+{0x88db,    0x00b7}, {0x88dc,    0x0012}, {0x88dd,    0x000d},
+{0x88de,    0x00bd}, {0x88df,    0x00e6}, {0x88e0,    0x00cb},
+{0x88e1,    0x00b6}, {0x88e2,    0x0012}, {0x88e3,    0x0004},
+{0x88e4,    0x008a}, {0x88e5,    0x0020}, {0x88e6,    0x00b7},
+{0x88e7,    0x0012}, {0x88e8,    0x0004}, {0x88e9,    0x00ce},
+{0x88ea,    0x00ff}, {0x88eb,    0x00ff}, {0x88ec,    0x00b6},
+{0x88ed,    0x0012}, {0x88ee,    0x0000}, {0x88ef,    0x0081},
+{0x88f0,    0x000c}, {0x88f1,    0x0026}, {0x88f2,    0x0005},
+{0x88f3,    0x0009}, {0x88f4,    0x0026}, {0x88f5,    0x00f6},
+{0x88f6,    0x0027}, {0x88f7,    0x001c}, {0x88f8,    0x00b6},
+{0x88f9,    0x0012}, {0x88fa,    0x0004}, {0x88fb,    0x0084},
+{0x88fc,    0x00df}, {0x88fd,    0x00b7}, {0x88fe,    0x0012},
+{0x88ff,    0x0004}, {0x8900,    0x0096}, {0x8901,    0x0083},
+{0x8902,    0x0081}, {0x8903,    0x0007}, {0x8904,    0x002c},
+{0x8905,    0x0005}, {0x8906,    0x007c}, {0x8907,    0x0000},
+{0x8908,    0x0083}, {0x8909,    0x0020}, {0x890a,    0x0006},
+{0x890b,    0x0096}, {0x890c,    0x0083}, {0x890d,    0x008b},
+{0x890e,    0x0008}, {0x890f,    0x0097}, {0x8910,    0x0083},
+{0x8911,    0x007e}, {0x8912,    0x0085}, {0x8913,    0x0041},
+{0x8914,    0x007f}, {0x8915,    0x008f}, {0x8916,    0x007e},
+{0x8917,    0x0086}, {0x8918,    0x0080}, {0x8919,    0x00b7},
+{0x891a,    0x0012}, {0x891b,    0x000c}, {0x891c,    0x0086},
+{0x891d,    0x0001}, {0x891e,    0x00b7}, {0x891f,    0x008f},
+{0x8920,    0x007d}, {0x8921,    0x00b6}, {0x8922,    0x0012},
+{0x8923,    0x000c}, {0x8924,    0x0084}, {0x8925,    0x007f},
+{0x8926,    0x00b7}, {0x8927,    0x0012}, {0x8928,    0x000c},
+{0x8929,    0x008a}, {0x892a,    0x0080}, {0x892b,    0x00b7},
+{0x892c,    0x0012}, {0x892d,    0x000c}, {0x892e,    0x0086},
+{0x892f,    0x000a}, {0x8930,    0x00bd}, {0x8931,    0x008a},
+{0x8932,    0x0006}, {0x8933,    0x00b6}, {0x8934,    0x0012},
+{0x8935,    0x000a}, {0x8936,    0x002a}, {0x8937,    0x0009},
+{0x8938,    0x00b6}, {0x8939,    0x0012}, {0x893a,    0x000c},
+{0x893b,    0x00ba}, {0x893c,    0x008f}, {0x893d,    0x007d},
+{0x893e,    0x00b7}, {0x893f,    0x0012}, {0x8940,    0x000c},
+{0x8941,    0x00b6}, {0x8942,    0x008f}, {0x8943,    0x007e},
+{0x8944,    0x0081}, {0x8945,    0x0060}, {0x8946,    0x0027},
+{0x8947,    0x001a}, {0x8948,    0x008b}, {0x8949,    0x0020},
+{0x894a,    0x00b7}, {0x894b,    0x008f}, {0x894c,    0x007e},
+{0x894d,    0x00b6}, {0x894e,    0x0012}, {0x894f,    0x000c},
+{0x8950,    0x0084}, {0x8951,    0x009f}, {0x8952,    0x00ba},
+{0x8953,    0x008f}, {0x8954,    0x007e}, {0x8955,    0x00b7},
+{0x8956,    0x0012}, {0x8957,    0x000c}, {0x8958,    0x00b6},
+{0x8959,    0x008f}, {0x895a,    0x007d}, {0x895b,    0x0048},
+{0x895c,    0x00b7}, {0x895d,    0x008f}, {0x895e,    0x007d},
+{0x895f,    0x007e}, {0x8960,    0x0089}, {0x8961,    0x0021},
+{0x8962,    0x00b6}, {0x8963,    0x0012}, {0x8964,    0x0004},
+{0x8965,    0x008a}, {0x8966,    0x0020}, {0x8967,    0x00b7},
+{0x8968,    0x0012}, {0x8969,    0x0004}, {0x896a,    0x00bd},
+{0x896b,    0x008a}, {0x896c,    0x000a}, {0x896d,    0x004f},
+{0x896e,    0x0039}, {0x896f,    0x00a6}, {0x8970,    0x0000},
+{0x8971,    0x0018}, {0x8972,    0x00a7}, {0x8973,    0x0000},
+{0x8974,    0x0008}, {0x8975,    0x0018}, {0x8976,    0x0008},
+{0x8977,    0x005a}, {0x8978,    0x0026}, {0x8979,    0x00f5},
+{0x897a,    0x0039}, {0x897b,    0x0036}, {0x897c,    0x006c},
+{0x897d,    0x0000}, {0x897e,    0x0032}, {0x897f,    0x00ba},
+{0x8980,    0x008f}, {0x8981,    0x007f}, {0x8982,    0x00b7},
+{0x8983,    0x008f}, {0x8984,    0x007f}, {0x8985,    0x00b6},
+{0x8986,    0x0012}, {0x8987,    0x0009}, {0x8988,    0x0084},
+{0x8989,    0x0003}, {0x898a,    0x00a7}, {0x898b,    0x0001},
+{0x898c,    0x00b6}, {0x898d,    0x0012}, {0x898e,    0x0006},
+{0x898f,    0x0084}, {0x8990,    0x003f}, {0x8991,    0x00a7},
+{0x8992,    0x0002}, {0x8993,    0x0039}, {0x8994,    0x0036},
+{0x8995,    0x0086}, {0x8996,    0x0003}, {0x8997,    0x00b7},
+{0x8998,    0x008f}, {0x8999,    0x0080}, {0x899a,    0x0032},
+{0x899b,    0x00c1}, {0x899c,    0x0000}, {0x899d,    0x0026},
+{0x899e,    0x0006}, {0x899f,    0x00b7}, {0x89a0,    0x008f},
+{0x89a1,    0x007c}, {0x89a2,    0x007e}, {0x89a3,    0x0089},
+{0x89a4,    0x00c9}, {0x89a5,    0x00c1}, {0x89a6,    0x0001},
+{0x89a7,    0x0027}, {0x89a8,    0x0018}, {0x89a9,    0x00c1},
+{0x89aa,    0x0002}, {0x89ab,    0x0027}, {0x89ac,    0x000c},
+{0x89ad,    0x00c1}, {0x89ae,    0x0003}, {0x89af,    0x0027},
+{0x89b0,    0x0000}, {0x89b1,    0x00f6}, {0x89b2,    0x008f},
+{0x89b3,    0x0080}, {0x89b4,    0x0005}, {0x89b5,    0x0005},
+{0x89b6,    0x00f7}, {0x89b7,    0x008f}, {0x89b8,    0x0080},
+{0x89b9,    0x00f6}, {0x89ba,    0x008f}, {0x89bb,    0x0080},
+{0x89bc,    0x0005}, {0x89bd,    0x0005}, {0x89be,    0x00f7},
+{0x89bf,    0x008f}, {0x89c0,    0x0080}, {0x89c1,    0x00f6},
+{0x89c2,    0x008f}, {0x89c3,    0x0080}, {0x89c4,    0x0005},
+{0x89c5,    0x0005}, {0x89c6,    0x00f7}, {0x89c7,    0x008f},
+{0x89c8,    0x0080}, {0x89c9,    0x00f6}, {0x89ca,    0x008f},
+{0x89cb,    0x0080}, {0x89cc,    0x0053}, {0x89cd,    0x00f4},
+{0x89ce,    0x0012}, {0x89cf,    0x0007}, {0x89d0,    0x001b},
+{0x89d1,    0x00b7}, {0x89d2,    0x0012}, {0x89d3,    0x0007},
+{0x89d4,    0x0039}, {0x89d5,    0x00ce}, {0x89d6,    0x008f},
+{0x89d7,    0x0070}, {0x89d8,    0x00a6}, {0x89d9,    0x0000},
+{0x89da,    0x0018}, {0x89db,    0x00e6}, {0x89dc,    0x0000},
+{0x89dd,    0x0018}, {0x89de,    0x00a7}, {0x89df,    0x0000},
+{0x89e0,    0x00e7}, {0x89e1,    0x0000}, {0x89e2,    0x00a6},
+{0x89e3,    0x0001}, {0x89e4,    0x0018}, {0x89e5,    0x00e6},
+{0x89e6,    0x0001}, {0x89e7,    0x0018}, {0x89e8,    0x00a7},
+{0x89e9,    0x0001}, {0x89ea,    0x00e7}, {0x89eb,    0x0001},
+{0x89ec,    0x00a6}, {0x89ed,    0x0002}, {0x89ee,    0x0018},
+{0x89ef,    0x00e6}, {0x89f0,    0x0002}, {0x89f1,    0x0018},
+{0x89f2,    0x00a7}, {0x89f3,    0x0002}, {0x89f4,    0x00e7},
+{0x89f5,    0x0002}, {0x89f6,    0x0039}, {0x89f7,    0x00a6},
+{0x89f8,    0x0000}, {0x89f9,    0x0084}, {0x89fa,    0x0007},
+{0x89fb,    0x00e6}, {0x89fc,    0x0000}, {0x89fd,    0x00c4},
+{0x89fe,    0x0038}, {0x89ff,    0x0054}, {0x8a00,    0x0054},
+{0x8a01,    0x0054}, {0x8a02,    0x001b}, {0x8a03,    0x00a7},
+{0x8a04,    0x0000}, {0x8a05,    0x0039}, {0x8a06,    0x004a},
+{0x8a07,    0x0026}, {0x8a08,    0x00fd}, {0x8a09,    0x0039},
+{0x8a0a,    0x0096}, {0x8a0b,    0x0022}, {0x8a0c,    0x0084},
+{0x8a0d,    0x000f}, {0x8a0e,    0x0097}, {0x8a0f,    0x0022},
+{0x8a10,    0x0086}, {0x8a11,    0x0001}, {0x8a12,    0x00b7},
+{0x8a13,    0x008f}, {0x8a14,    0x0070}, {0x8a15,    0x00b6},
+{0x8a16,    0x0012}, {0x8a17,    0x0007}, {0x8a18,    0x00b7},
+{0x8a19,    0x008f}, {0x8a1a,    0x0071}, {0x8a1b,    0x00f6},
+{0x8a1c,    0x0012}, {0x8a1d,    0x000c}, {0x8a1e,    0x00c4},
+{0x8a1f,    0x000f}, {0x8a20,    0x00c8}, {0x8a21,    0x000f},
+{0x8a22,    0x00f7}, {0x8a23,    0x008f}, {0x8a24,    0x0072},
+{0x8a25,    0x00f6}, {0x8a26,    0x008f}, {0x8a27,    0x0072},
+{0x8a28,    0x00b6}, {0x8a29,    0x008f}, {0x8a2a,    0x0071},
+{0x8a2b,    0x0084}, {0x8a2c,    0x0003}, {0x8a2d,    0x0027},
+{0x8a2e,    0x0014}, {0x8a2f,    0x0081}, {0x8a30,    0x0001},
+{0x8a31,    0x0027}, {0x8a32,    0x001c}, {0x8a33,    0x0081},
+{0x8a34,    0x0002}, {0x8a35,    0x0027}, {0x8a36,    0x0024},
+{0x8a37,    0x00f4}, {0x8a38,    0x008f}, {0x8a39,    0x0070},
+{0x8a3a,    0x0027}, {0x8a3b,    0x002a}, {0x8a3c,    0x0096},
+{0x8a3d,    0x0022}, {0x8a3e,    0x008a}, {0x8a3f,    0x0080},
+{0x8a40,    0x007e}, {0x8a41,    0x008a}, {0x8a42,    0x0064},
+{0x8a43,    0x00f4}, {0x8a44,    0x008f}, {0x8a45,    0x0070},
+{0x8a46,    0x0027}, {0x8a47,    0x001e}, {0x8a48,    0x0096},
+{0x8a49,    0x0022}, {0x8a4a,    0x008a}, {0x8a4b,    0x0010},
+{0x8a4c,    0x007e}, {0x8a4d,    0x008a}, {0x8a4e,    0x0064},
+{0x8a4f,    0x00f4}, {0x8a50,    0x008f}, {0x8a51,    0x0070},
+{0x8a52,    0x0027}, {0x8a53,    0x0012}, {0x8a54,    0x0096},
+{0x8a55,    0x0022}, {0x8a56,    0x008a}, {0x8a57,    0x0020},
+{0x8a58,    0x007e}, {0x8a59,    0x008a}, {0x8a5a,    0x0064},
+{0x8a5b,    0x00f4}, {0x8a5c,    0x008f}, {0x8a5d,    0x0070},
+{0x8a5e,    0x0027}, {0x8a5f,    0x0006}, {0x8a60,    0x0096},
+{0x8a61,    0x0022}, {0x8a62,    0x008a}, {0x8a63,    0x0040},
+{0x8a64,    0x0097}, {0x8a65,    0x0022}, {0x8a66,    0x0074},
+{0x8a67,    0x008f}, {0x8a68,    0x0071}, {0x8a69,    0x0074},
+{0x8a6a,    0x008f}, {0x8a6b,    0x0071}, {0x8a6c,    0x0078},
+{0x8a6d,    0x008f}, {0x8a6e,    0x0070}, {0x8a6f,    0x00b6},
+{0x8a70,    0x008f}, {0x8a71,    0x0070}, {0x8a72,    0x0085},
+{0x8a73,    0x0010}, {0x8a74,    0x0027}, {0x8a75,    0x00af},
+{0x8a76,    0x00d6}, {0x8a77,    0x0022}, {0x8a78,    0x00c4},
+{0x8a79,    0x0010}, {0x8a7a,    0x0058}, {0x8a7b,    0x00b6},
+{0x8a7c,    0x0012}, {0x8a7d,    0x0070}, {0x8a7e,    0x0081},
+{0x8a7f,    0x00e4}, {0x8a80,    0x0027}, {0x8a81,    0x0036},
+{0x8a82,    0x0081}, {0x8a83,    0x00e1}, {0x8a84,    0x0026},
+{0x8a85,    0x000c}, {0x8a86,    0x0096}, {0x8a87,    0x0022},
+{0x8a88,    0x0084}, {0x8a89,    0x0020}, {0x8a8a,    0x0044},
+{0x8a8b,    0x001b}, {0x8a8c,    0x00d6}, {0x8a8d,    0x0022},
+{0x8a8e,    0x00c4}, {0x8a8f,    0x00cf}, {0x8a90,    0x0020},
+{0x8a91,    0x0023}, {0x8a92,    0x0058}, {0x8a93,    0x0081},
+{0x8a94,    0x00c6}, {0x8a95,    0x0026}, {0x8a96,    0x000d},
+{0x8a97,    0x0096}, {0x8a98,    0x0022}, {0x8a99,    0x0084},
+{0x8a9a,    0x0040}, {0x8a9b,    0x0044}, {0x8a9c,    0x0044},
+{0x8a9d,    0x001b}, {0x8a9e,    0x00d6}, {0x8a9f,    0x0022},
+{0x8aa0,    0x00c4}, {0x8aa1,    0x00af}, {0x8aa2,    0x0020},
+{0x8aa3,    0x0011}, {0x8aa4,    0x0058}, {0x8aa5,    0x0081},
+{0x8aa6,    0x0027}, {0x8aa7,    0x0026}, {0x8aa8,    0x000f},
+{0x8aa9,    0x0096}, {0x8aaa,    0x0022}, {0x8aab,    0x0084},
+{0x8aac,    0x0080}, {0x8aad,    0x0044}, {0x8aae,    0x0044},
+{0x8aaf,    0x0044}, {0x8ab0,    0x001b}, {0x8ab1,    0x00d6},
+{0x8ab2,    0x0022}, {0x8ab3,    0x00c4}, {0x8ab4,    0x006f},
+{0x8ab5,    0x001b}, {0x8ab6,    0x0097}, {0x8ab7,    0x0022},
+{0x8ab8,    0x0039}, {0x8ab9,    0x0027}, {0x8aba,    0x000c},
+{0x8abb,    0x007c}, {0x8abc,    0x0082}, {0x8abd,    0x0006},
+{0x8abe,    0x00bd}, {0x8abf,    0x00d9}, {0x8ac0,    0x00ed},
+{0x8ac1,    0x00b6}, {0x8ac2,    0x0082}, {0x8ac3,    0x0007},
+{0x8ac4,    0x007e}, {0x8ac5,    0x008a}, {0x8ac6,    0x00b9},
+{0x8ac7,    0x007f}, {0x8ac8,    0x0082}, {0x8ac9,    0x0006},
+{0x8aca,    0x0039}, { 0x0, 0x0 }
+};
+#endif
+
+
+/* phy types */
+#define   CAS_PHY_UNKNOWN       0x00
+#define   CAS_PHY_SERDES        0x01
+#define   CAS_PHY_MII_MDIO0     0x02
+#define   CAS_PHY_MII_MDIO1     0x04
+#define   CAS_PHY_MII(x)        ((x) & (CAS_PHY_MII_MDIO0 | CAS_PHY_MII_MDIO1))
+
+/* _RING_INDEX is the index for the ring sizes to be used.  _RING_SIZE
+ * is the actual size. the default index for the various rings is
+ * 8. NOTE: there a bunch of alignment constraints for the rings. to
+ * deal with that, i just allocate rings to create the desired
+ * alignment. here are the constraints:
+ *   RX DESC and COMP rings must be 8KB aligned
+ *   TX DESC must be 2KB aligned. 
+ * if you change the numbers, be cognizant of how the alignment will change
+ * in INIT_BLOCK as well.
+ */
+
+#define DESC_RING_I_TO_S(x)  (32*(1 << (x)))
+#define COMP_RING_I_TO_S(x)  (128*(1 << (x)))
+#define TX_DESC_RING_INDEX 4  /* 512 = 8k */
+#define RX_DESC_RING_INDEX 4  /* 512 = 8k */
+#define RX_COMP_RING_INDEX 4  /* 2048 = 64k: should be 4x rx ring size */
+
+#if (TX_DESC_RING_INDEX > 8) || (TX_DESC_RING_INDEX < 0)
+#error TX_DESC_RING_INDEX must be between 0 and 8
+#endif
+
+#if (RX_DESC_RING_INDEX > 8) || (RX_DESC_RING_INDEX < 0)
+#error RX_DESC_RING_INDEX must be between 0 and 8
+#endif
+
+#if (RX_COMP_RING_INDEX > 8) || (RX_COMP_RING_INDEX < 0)
+#error RX_COMP_RING_INDEX must be between 0 and 8
+#endif
+
+#define N_TX_RINGS                    MAX_TX_RINGS      /* for QoS */
+#define N_TX_RINGS_MASK               MAX_TX_RINGS_MASK
+#define N_RX_DESC_RINGS               MAX_RX_DESC_RINGS /* 1 for ipsec */
+#define N_RX_COMP_RINGS               0x1 /* for mult. PCI interrupts */
+
+/* number of flows that can go through re-assembly */
+#define N_RX_FLOWS                    64
+
+#define TX_DESC_RING_SIZE  DESC_RING_I_TO_S(TX_DESC_RING_INDEX)
+#define RX_DESC_RING_SIZE  DESC_RING_I_TO_S(RX_DESC_RING_INDEX)
+#define RX_COMP_RING_SIZE  COMP_RING_I_TO_S(RX_COMP_RING_INDEX)
+#define TX_DESC_RINGN_INDEX(x) TX_DESC_RING_INDEX
+#define RX_DESC_RINGN_INDEX(x) RX_DESC_RING_INDEX
+#define RX_COMP_RINGN_INDEX(x) RX_COMP_RING_INDEX
+#define TX_DESC_RINGN_SIZE(x)  TX_DESC_RING_SIZE
+#define RX_DESC_RINGN_SIZE(x)  RX_DESC_RING_SIZE
+#define RX_COMP_RINGN_SIZE(x)  RX_COMP_RING_SIZE
+
+/* convert values */
+#define CAS_BASE(x, y)                (((y) << (x ## _SHIFT)) & (x ## _MASK))
+#define CAS_VAL(x, y)                 (((y) & (x ## _MASK)) >> (x ## _SHIFT))
+#define CAS_TX_RINGN_BASE(y)          ((TX_DESC_RINGN_INDEX(y) << \
+                                        TX_CFG_DESC_RINGN_SHIFT(y)) & \
+                                        TX_CFG_DESC_RINGN_MASK(y))
+
+/* min is 2k, but we can't do jumbo frames unless it's at least 8k */
+#define CAS_MIN_PAGE_SHIFT            11 /* 2048 */
+#define CAS_JUMBO_PAGE_SHIFT          13 /* 8192 */
+#define CAS_MAX_PAGE_SHIFT            14 /* 16384 */             
+
+#define TX_DESC_BUFLEN_MASK         0x0000000000003FFFULL /* buffer length in
+							     bytes. 0 - 9256 */
+#define TX_DESC_BUFLEN_SHIFT        0
+#define TX_DESC_CSUM_START_MASK     0x00000000001F8000ULL /* checksum start. #
+							     of bytes to be 
+							     skipped before
+							     csum calc begins.
+							     value must be
+							     even */
+#define TX_DESC_CSUM_START_SHIFT    15
+#define TX_DESC_CSUM_STUFF_MASK     0x000000001FE00000ULL /* checksum stuff.
+							     byte offset w/in 
+							     the pkt for the
+							     1st csum byte.
+							     must be > 8 */
+#define TX_DESC_CSUM_STUFF_SHIFT    21
+#define TX_DESC_CSUM_EN             0x0000000020000000ULL /* enable checksum */
+#define TX_DESC_EOF                 0x0000000040000000ULL /* end of frame */
+#define TX_DESC_SOF                 0x0000000080000000ULL /* start of frame */
+#define TX_DESC_INTME               0x0000000100000000ULL /* interrupt me */
+#define TX_DESC_NO_CRC              0x0000000200000000ULL /* debugging only.
+							     CRC will not be
+							     inserted into
+							     outgoing frame. */
+struct cas_tx_desc {
+	u64     control;
+	u64     buffer;
+};
+
+/* descriptor ring for free buffers contains page-sized buffers. the index
+ * value is not used by the hw in any way. it's just stored and returned in
+ * the completion ring.
+ */
+struct cas_rx_desc {
+	u64     index;
+	u64     buffer;
+};
+
+/* received packets are put on the completion ring. */
+/* word 1 */
+#define RX_COMP1_DATA_SIZE_MASK           0x0000000007FFE000ULL   
+#define RX_COMP1_DATA_SIZE_SHIFT          13
+#define RX_COMP1_DATA_OFF_MASK            0x000001FFF8000000ULL
+#define RX_COMP1_DATA_OFF_SHIFT           27
+#define RX_COMP1_DATA_INDEX_MASK          0x007FFE0000000000ULL
+#define RX_COMP1_DATA_INDEX_SHIFT         41
+#define RX_COMP1_SKIP_MASK                0x0180000000000000ULL
+#define RX_COMP1_SKIP_SHIFT               55
+#define RX_COMP1_RELEASE_NEXT             0x0200000000000000ULL
+#define RX_COMP1_SPLIT_PKT                0x0400000000000000ULL
+#define RX_COMP1_RELEASE_FLOW             0x0800000000000000ULL  
+#define RX_COMP1_RELEASE_DATA             0x1000000000000000ULL  
+#define RX_COMP1_RELEASE_HDR              0x2000000000000000ULL
+#define RX_COMP1_TYPE_MASK                0xC000000000000000ULL
+#define RX_COMP1_TYPE_SHIFT               62
+
+/* word 2 */
+#define RX_COMP2_NEXT_INDEX_MASK          0x00000007FFE00000ULL
+#define RX_COMP2_NEXT_INDEX_SHIFT         21
+#define RX_COMP2_HDR_SIZE_MASK            0x00000FF800000000ULL
+#define RX_COMP2_HDR_SIZE_SHIFT           35
+#define RX_COMP2_HDR_OFF_MASK             0x0003F00000000000ULL
+#define RX_COMP2_HDR_OFF_SHIFT            44
+#define RX_COMP2_HDR_INDEX_MASK           0xFFFC000000000000ULL
+#define RX_COMP2_HDR_INDEX_SHIFT          50
+
+/* word 3 */
+#define RX_COMP3_SMALL_PKT                0x0000000000000001ULL
+#define RX_COMP3_JUMBO_PKT                0x0000000000000002ULL
+#define RX_COMP3_JUMBO_HDR_SPLIT_EN       0x0000000000000004ULL
+#define RX_COMP3_CSUM_START_MASK          0x000000000007F000ULL
+#define RX_COMP3_CSUM_START_SHIFT         12
+#define RX_COMP3_FLOWID_MASK              0x0000000001F80000ULL
+#define RX_COMP3_FLOWID_SHIFT             19
+#define RX_COMP3_OPCODE_MASK              0x000000000E000000ULL
+#define RX_COMP3_OPCODE_SHIFT             25
+#define RX_COMP3_FORCE_FLAG               0x0000000010000000ULL
+#define RX_COMP3_NO_ASSIST                0x0000000020000000ULL
+#define RX_COMP3_LOAD_BAL_MASK            0x000001F800000000ULL
+#define RX_COMP3_LOAD_BAL_SHIFT           35
+#define RX_PLUS_COMP3_ENC_PKT             0x0000020000000000ULL /* cas+ */
+#define RX_COMP3_L3_HEAD_OFF_MASK         0x0000FE0000000000ULL /* cas */
+#define RX_COMP3_L3_HEAD_OFF_SHIFT        41
+#define RX_PLUS_COMP_L3_HEAD_OFF_MASK     0x0000FC0000000000ULL /* cas+ */
+#define RX_PLUS_COMP_L3_HEAD_OFF_SHIFT    42
+#define RX_COMP3_SAP_MASK                 0xFFFF000000000000ULL
+#define RX_COMP3_SAP_SHIFT                48
+
+/* word 4 */
+#define RX_COMP4_TCP_CSUM_MASK            0x000000000000FFFFULL
+#define RX_COMP4_TCP_CSUM_SHIFT           0
+#define RX_COMP4_PKT_LEN_MASK             0x000000003FFF0000ULL
+#define RX_COMP4_PKT_LEN_SHIFT            16
+#define RX_COMP4_PERFECT_MATCH_MASK       0x00000003C0000000ULL
+#define RX_COMP4_PERFECT_MATCH_SHIFT      30
+#define RX_COMP4_ZERO                     0x0000080000000000ULL
+#define RX_COMP4_HASH_VAL_MASK            0x0FFFF00000000000ULL
+#define RX_COMP4_HASH_VAL_SHIFT           44
+#define RX_COMP4_HASH_PASS                0x1000000000000000ULL
+#define RX_COMP4_BAD                      0x4000000000000000ULL
+#define RX_COMP4_LEN_MISMATCH             0x8000000000000000ULL
+
+/* we encode the following: ring/index/release. only 14 bits
+ * are usable.
+ * NOTE: the encoding is dependent upon RX_DESC_RING_SIZE and 
+ *       MAX_RX_DESC_RINGS. */
+#define RX_INDEX_NUM_MASK                 0x0000000000000FFFULL
+#define RX_INDEX_NUM_SHIFT                0
+#define RX_INDEX_RING_MASK                0x0000000000001000ULL
+#define RX_INDEX_RING_SHIFT               12
+#define RX_INDEX_RELEASE                  0x0000000000002000ULL
+
+struct cas_rx_comp {
+	u64     word1;
+	u64     word2;
+	u64     word3;
+	u64     word4;
+}; 
+
+enum link_state {
+	link_down = 0,	/* No link, will retry */
+	link_aneg,	/* Autoneg in progress */
+	link_force_try,	/* Try Forced link speed */
+	link_force_ret,	/* Forced mode worked, retrying autoneg */
+	link_force_ok,	/* Stay in forced mode */
+	link_up		/* Link is up */
+};
+
+typedef struct cas_page {
+	struct list_head list;
+	struct page *buffer;
+	dma_addr_t dma_addr;
+	int used;
+} cas_page_t;
+
+
+/* some alignment constraints:
+ * TX DESC, RX DESC, and RX COMP must each be 8K aligned.
+ * TX COMPWB must be 8-byte aligned. 
+ * to accomplish this, here's what we do:
+ * 
+ * INIT_BLOCK_RX_COMP  = 64k (already aligned)
+ * INIT_BLOCK_RX_DESC  = 8k
+ * INIT_BLOCK_TX       = 8k
+ * INIT_BLOCK_RX1_DESC = 8k
+ * TX COMPWB
+ */
+#define INIT_BLOCK_TX           (TX_DESC_RING_SIZE)
+#define INIT_BLOCK_RX_DESC      (RX_DESC_RING_SIZE)
+#define INIT_BLOCK_RX_COMP      (RX_COMP_RING_SIZE)
+
+struct cas_init_block {
+	struct cas_rx_comp rxcs[N_RX_COMP_RINGS][INIT_BLOCK_RX_COMP];
+	struct cas_rx_desc rxds[N_RX_DESC_RINGS][INIT_BLOCK_RX_DESC]; 
+	struct cas_tx_desc txds[N_TX_RINGS][INIT_BLOCK_TX];
+	u64 tx_compwb; 
+};
+
+/* tiny buffers to deal with target abort issue. we allocate a bit
+ * over so that we don't have target abort issues with these buffers
+ * as well.
+ */
+#define TX_TINY_BUF_LEN    0x100
+#define TX_TINY_BUF_BLOCK  ((INIT_BLOCK_TX + 1)*TX_TINY_BUF_LEN)
+
+struct cas_tiny_count {
+	int nbufs;
+	int used;
+};
+
+struct cas {
+	spinlock_t lock; /* for most bits */
+	spinlock_t tx_lock[N_TX_RINGS]; /* tx bits */
+	spinlock_t stat_lock[N_TX_RINGS + 1]; /* for stat gathering */
+	spinlock_t rx_inuse_lock; /* rx inuse list */
+	spinlock_t rx_spare_lock; /* rx spare list */
+
+	void __iomem *regs;
+	int tx_new[N_TX_RINGS], tx_old[N_TX_RINGS];
+	int rx_old[N_RX_DESC_RINGS];
+	int rx_cur[N_RX_COMP_RINGS], rx_new[N_RX_COMP_RINGS];
+	int rx_last[N_RX_DESC_RINGS]; 
+
+	/* Set when chip is actually in operational state
+	 * (ie. not power managed) */
+	int hw_running;
+	int opened;
+	struct semaphore pm_sem; /* open/close/suspend/resume */
+
+	struct cas_init_block *init_block;
+	struct cas_tx_desc *init_txds[MAX_TX_RINGS];
+	struct cas_rx_desc *init_rxds[MAX_RX_DESC_RINGS];
+	struct cas_rx_comp *init_rxcs[MAX_RX_COMP_RINGS];
+
+	/* we use sk_buffs for tx and pages for rx. the rx skbuffs
+	 * are there for flow re-assembly. */
+	struct sk_buff      *tx_skbs[N_TX_RINGS][TX_DESC_RING_SIZE];
+	struct sk_buff_head  rx_flows[N_RX_FLOWS];
+	cas_page_t          *rx_pages[N_RX_DESC_RINGS][RX_DESC_RING_SIZE];
+	struct list_head     rx_spare_list, rx_inuse_list;
+	int                  rx_spares_needed;
+
+	/* for small packets when copying would be quicker than
+	   mapping */
+	struct cas_tiny_count tx_tiny_use[N_TX_RINGS][TX_DESC_RING_SIZE];
+	u8 *tx_tiny_bufs[N_TX_RINGS];
+
+	u32			msg_enable;
+
+	/* N_TX_RINGS must be >= N_RX_DESC_RINGS */
+	struct net_device_stats net_stats[N_TX_RINGS + 1];
+
+	u32			pci_cfg[64 >> 2];
+	u8                      pci_revision;
+
+	int                     phy_type;
+	int			phy_addr;
+	u32                     phy_id;
+#define CAS_FLAG_1000MB_CAP     0x00000001
+#define CAS_FLAG_REG_PLUS       0x00000002
+#define CAS_FLAG_TARGET_ABORT   0x00000004
+#define CAS_FLAG_SATURN         0x00000008
+#define CAS_FLAG_RXD_POST_MASK  0x000000F0
+#define CAS_FLAG_RXD_POST_SHIFT 4
+#define CAS_FLAG_RXD_POST(x)    ((1 << (CAS_FLAG_RXD_POST_SHIFT + (x))) & \
+                                 CAS_FLAG_RXD_POST_MASK)
+#define CAS_FLAG_ENTROPY_DEV    0x00000100
+#define CAS_FLAG_NO_HW_CSUM     0x00000200
+	u32                     cas_flags;
+	int                     packet_min; /* minimum packet size */
+	int			tx_fifo_size;
+	int			rx_fifo_size;
+	int			rx_pause_off;
+	int			rx_pause_on;
+	int                     crc_size;      /* 4 if half-duplex */
+
+	int                     pci_irq_INTC;
+	int                     min_frame_size; /* for tx fifo workaround */
+
+	/* page size allocation */
+	int                     page_size; 
+	int                     page_order;
+	int                     mtu_stride;
+
+	u32			mac_rx_cfg;
+
+	/* Autoneg & PHY control */
+	int			link_cntl;
+	int			link_fcntl;
+	enum link_state		lstate;
+	struct timer_list	link_timer;
+	int			timer_ticks;
+	struct work_struct	reset_task;
+#if 0
+	atomic_t		reset_task_pending;
+#else
+	atomic_t		reset_task_pending;
+	atomic_t		reset_task_pending_mtu;
+	atomic_t		reset_task_pending_spare;
+	atomic_t		reset_task_pending_all;
+#endif
+
+#ifdef CONFIG_CASSINI_QGE_DEBUG
+	atomic_t interrupt_seen; /* 1 if any interrupts are getting through */
+#endif
+	
+	/* Link-down problem workaround */
+#define LINK_TRANSITION_UNKNOWN 	0
+#define LINK_TRANSITION_ON_FAILURE 	1
+#define LINK_TRANSITION_STILL_FAILED 	2
+#define LINK_TRANSITION_LINK_UP 	3
+#define LINK_TRANSITION_LINK_CONFIG	4
+#define LINK_TRANSITION_LINK_DOWN	5
+#define LINK_TRANSITION_REQUESTED_RESET	6
+	int			link_transition;
+	int 			link_transition_jiffies_valid;
+	unsigned long		link_transition_jiffies;
+
+	/* Tuning */
+	u8 orig_cacheline_size;	/* value when loaded */
+#define CAS_PREF_CACHELINE_SIZE	 0x20	/* Minimum desired */
+
+	/* Diagnostic counters and state. */
+	int 			casreg_len; /* reg-space size for dumping */
+	u64			pause_entered;
+	u16			pause_last_time_recvd;
+  
+	dma_addr_t block_dvma, tx_tiny_dvma[N_TX_RINGS];
+	struct pci_dev *pdev;
+	struct net_device *dev;
+};
+
+#define TX_DESC_NEXT(r, x)  (((x) + 1) & (TX_DESC_RINGN_SIZE(r) - 1))
+#define RX_DESC_ENTRY(r, x) ((x) & (RX_DESC_RINGN_SIZE(r) - 1))
+#define RX_COMP_ENTRY(r, x) ((x) & (RX_COMP_RINGN_SIZE(r) - 1))
+
+#define TX_BUFF_COUNT(r, x, y)    ((x) <= (y) ? ((y) - (x)) : \
+        (TX_DESC_RINGN_SIZE(r) - (x) + (y)))    
+
+#define TX_BUFFS_AVAIL(cp, i)	((cp)->tx_old[(i)] <= (cp)->tx_new[(i)] ? \
+        (cp)->tx_old[(i)] + (TX_DESC_RINGN_SIZE(i) - 1) - (cp)->tx_new[(i)] : \
+        (cp)->tx_old[(i)] - (cp)->tx_new[(i)] - 1)
+
+#define CAS_ALIGN(addr, align) \
+     (((unsigned long) (addr) + ((align) - 1UL)) & ~((align) - 1))
+
+#define RX_FIFO_SIZE                  16384
+#define EXPANSION_ROM_SIZE            65536
+
+#define CAS_MC_EXACT_MATCH_SIZE       15
+#define CAS_MC_HASH_SIZE              256
+#define CAS_MC_HASH_MAX              (CAS_MC_EXACT_MATCH_SIZE + \
+                                      CAS_MC_HASH_SIZE)
+
+#define TX_TARGET_ABORT_LEN           0x20
+#define RX_SWIVEL_OFF_VAL             0x2
+#define RX_AE_FREEN_VAL(x)            (RX_DESC_RINGN_SIZE(x) >> 1)
+#define RX_AE_COMP_VAL                (RX_COMP_RING_SIZE >> 1)
+#define RX_BLANK_INTR_PKT_VAL         0x05
+#define RX_BLANK_INTR_TIME_VAL        0x0F
+#define HP_TCP_THRESH_VAL             1530 /* reduce to enable reassembly */
+
+#define RX_SPARE_COUNT                (RX_DESC_RING_SIZE >> 1)
+#define RX_SPARE_RECOVER_VAL          (RX_SPARE_COUNT >> 2)
+
+#endif /* _CASSINI_H */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 68f11ac1a314..eb36fd293b41 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -392,6 +392,7 @@
 #define PCI_DEVICE_ID_NS_87560_USB	0x0012
 #define PCI_DEVICE_ID_NS_83815		0x0020
 #define PCI_DEVICE_ID_NS_83820		0x0022
+#define PCI_DEVICE_ID_NS_SATURN		0x0035
 #define PCI_DEVICE_ID_NS_SCx200_BRIDGE	0x0500
 #define PCI_DEVICE_ID_NS_SCx200_SMI	0x0501
 #define PCI_DEVICE_ID_NS_SCx200_IDE	0x0502
@@ -983,6 +984,7 @@
 #define PCI_DEVICE_ID_SUN_SABRE		0xa000
 #define PCI_DEVICE_ID_SUN_HUMMINGBIRD	0xa001
 #define PCI_DEVICE_ID_SUN_TOMATILLO	0xa801
+#define PCI_DEVICE_ID_SUN_CASSINI	0xabba
 
 #define PCI_VENDOR_ID_CMD		0x1095
 #define PCI_DEVICE_ID_CMD_640		0x0640
-- 
cgit v1.2.3


From 2fab35d78f32fc107e1af4b1ec23f557fa20d911 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Tue, 27 Sep 2005 15:59:43 -0700
Subject: [NET]: Fix GCC4 compile error: sysctl in linux/if_ether.h

The following is generated when compiling a
recent (2.6.14-rc2-git5) kernel configured for
ARM, with GCC4.

  CC      init/main.o
In file included from include/linux/netdevice.h:29,
                 from include/net/sock.h:48,
                 from init/main.c:50:
include/linux/if_ether.h:114: error: array type has incomplete element type

It seems that if CONFIG_SYSCTL is not set, then
the compiler will throw an error due to the definition
of the ether_table[] array

Attached is a solution to the problem

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index fc2d4c8225aa..d21c305c6c64 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -111,7 +111,9 @@ static inline struct ethhdr *eth_hdr(const struct sk_buff *skb)
 	return (struct ethhdr *)skb->mac.raw;
 }
 
+#ifdef CONFIG_SYSCTL
 extern struct ctl_table ether_table[];
 #endif
+#endif
 
 #endif	/* _LINUX_IF_ETHER_H */
-- 
cgit v1.2.3


From f16af555cc46a724507da371fbac94b430c93315 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Tue, 27 Sep 2005 22:37:08 -0700
Subject: [SPARC64]: Add defines for 32MB/256MB PTE page size on Ultra-IV+.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/asm-sparc64/pgtable.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index 43cbb089cde2..53d612aba8d5 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -98,7 +98,9 @@
 #define _PAGE_NFO	_AC(0x1000000000000000,UL) /* No Fault Only          */
 #define _PAGE_IE	_AC(0x0800000000000000,UL) /* Invert Endianness      */
 #define _PAGE_SOFT2	_AC(0x07FC000000000000,UL) /* Software bits, set 2   */
-#define _PAGE_RES1	_AC(0x0003000000000000,UL) /* Reserved               */
+#define _PAGE_RES1	_AC(0x0002000000000000,UL) /* Reserved               */
+#define _PAGE_SZ32MB	_AC(0x0001000000000000,UL) /* (Panther) 32MB page    */
+#define _PAGE_SZ256MB	_AC(0x2001000000000000,UL) /* (Panther) 256MB page   */
 #define _PAGE_SN	_AC(0x0000800000000000,UL) /* (Cheetah) Snoop        */
 #define _PAGE_RES2	_AC(0x0000780000000000,UL) /* Reserved               */
 #define _PAGE_PADDR_SF	_AC(0x000001FFFFFFE000,UL) /* (Spitfire) paddr[40:13]*/
-- 
cgit v1.2.3


From d2212bc7db13268bef0799d9ff4b2e511c284885 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Tue, 27 Sep 2005 22:50:06 -0700
Subject: [SPARC64]: Add missing IDs for newer cpus.

Also, the us3_cpufreq driver can work on Ultra-IV and IV+.
They use the SAFARI bus register to control the clock divider
just like Ultra-III and III+ do.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/cpu.c         | 4 ++++
 arch/sparc64/kernel/us3_cpufreq.c | 5 ++++-
 include/asm-sparc64/head.h        | 9 ++++++---
 3 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/sparc64/kernel/cpu.c b/arch/sparc64/kernel/cpu.c
index 48756958116b..77ef5df4e5a7 100644
--- a/arch/sparc64/kernel/cpu.c
+++ b/arch/sparc64/kernel/cpu.c
@@ -39,6 +39,8 @@ struct cpu_fp_info linux_sparc_fpu[] = {
   { 0x3e, 0x15, 0, "UltraSparc III+ integrated FPU"},
   { 0x3e, 0x16, 0, "UltraSparc IIIi integrated FPU"},
   { 0x3e, 0x18, 0, "UltraSparc IV integrated FPU"},
+  { 0x3e, 0x19, 0, "UltraSparc IV+ integrated FPU"},
+  { 0x3e, 0x22, 0, "UltraSparc IIIi+ integrated FPU"},
 };
 
 #define NSPARCFPU  (sizeof(linux_sparc_fpu)/sizeof(struct cpu_fp_info))
@@ -53,6 +55,8 @@ struct cpu_iu_info linux_sparc_chips[] = {
   { 0x3e, 0x15, "TI UltraSparc III+ (Cheetah+)"},
   { 0x3e, 0x16, "TI UltraSparc IIIi (Jalapeno)"},
   { 0x3e, 0x18, "TI UltraSparc IV (Jaguar)"},
+  { 0x3e, 0x19, "TI UltraSparc IV+ (Panther)"},
+  { 0x3e, 0x22, "TI UltraSparc IIIi+ (Serrano)"},
 };
 
 #define NSPARCCHIPS  (sizeof(linux_sparc_chips)/sizeof(struct cpu_iu_info))
diff --git a/arch/sparc64/kernel/us3_cpufreq.c b/arch/sparc64/kernel/us3_cpufreq.c
index 9080e7cd4bb0..0340041f6143 100644
--- a/arch/sparc64/kernel/us3_cpufreq.c
+++ b/arch/sparc64/kernel/us3_cpufreq.c
@@ -208,7 +208,10 @@ static int __init us3_freq_init(void)
 	impl  = ((ver >> 32) & 0xffff);
 
 	if (manuf == CHEETAH_MANUF &&
-	    (impl == CHEETAH_IMPL || impl == CHEETAH_PLUS_IMPL)) {
+	    (impl == CHEETAH_IMPL ||
+	     impl == CHEETAH_PLUS_IMPL ||
+	     impl == JAGUAR_IMPL ||
+	     impl == PANTHER_IMPL)) {
 		struct cpufreq_driver *driver;
 
 		ret = -ENOMEM;
diff --git a/include/asm-sparc64/head.h b/include/asm-sparc64/head.h
index b63a33cf4971..0abd3a674e8f 100644
--- a/include/asm-sparc64/head.h
+++ b/include/asm-sparc64/head.h
@@ -12,9 +12,12 @@
 #define __JALAPENO_ID	0x003e0016
 
 #define CHEETAH_MANUF		0x003e
-#define CHEETAH_IMPL		0x0014
-#define CHEETAH_PLUS_IMPL	0x0015
-#define JALAPENO_IMPL		0x0016
+#define CHEETAH_IMPL		0x0014 /* Ultra-III   */
+#define CHEETAH_PLUS_IMPL	0x0015 /* Ultra-III+  */
+#define JALAPENO_IMPL		0x0016 /* Ultra-IIIi  */
+#define JAGUAR_IMPL		0x0018 /* Ultra-IV    */
+#define PANTHER_IMPL		0x0019 /* Ultra-IV+   */
+#define SERRANO_IMPL		0x0022 /* Ultra-IIIi+ */
 
 #define BRANCH_IF_CHEETAH_BASE(tmp1,tmp2,label)	\
 	rdpr	%ver, %tmp1;			\
-- 
cgit v1.2.3


From 8b1f3124618b54cf125dea3a074b9cf469117723 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Tue, 27 Sep 2005 21:45:18 -0700
Subject: [PATCH] mm: move_pte to remap ZERO_PAGE

Move the ZERO_PAGE remapping complexity to the move_pte macro in
asm-generic, have it conditionally depend on
__HAVE_ARCH_MULTIPLE_ZERO_PAGE, which gets defined for MIPS.

For architectures without __HAVE_ARCH_MULTIPLE_ZERO_PAGE, move_pte becomes
a noop.

From: Hugh Dickins <hugh@veritas.com>

Fix nasty little bug we've missed in Nick's mremap move ZERO_PAGE patch.
The "pte" at that point may be a swap entry or a pte_file entry: we must
check pte_present before perhaps corrupting such an entry.

Patch below against 2.6.14-rc2-mm1, but the same bug is in 2.6.14-rc2's
mm/mremap.c, and more dangerous there since it's affecting all arches: I
think the safest course is to send Nick's patch and Yoichi's build fix and
this fix (build tested) on to Linus - so only MIPS can be affected.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-generic/pgtable.h | 13 +++++++++++++
 include/asm-mips/pgtable.h    |  2 ++
 mm/mremap.c                   |  6 +++---
 3 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index f86c1e549466..ff28c8b31f58 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -158,6 +158,19 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
 #define lazy_mmu_prot_update(pte)	do { } while (0)
 #endif
 
+#ifndef __HAVE_ARCH_MULTIPLE_ZERO_PAGE
+#define move_pte(pte, prot, old_addr, new_addr)	(pte)
+#else
+#define move_pte(pte, prot, old_addr, new_addr)				\
+({									\
+ 	pte_t newpte = (pte);						\
+	if (pte_present(pte) && pfn_valid(pte_pfn(pte)) &&		\
+			pte_page(pte) == ZERO_PAGE(old_addr))		\
+		newpte = mk_pte(ZERO_PAGE(new_addr), (prot));		\
+	newpte;								\
+})
+#endif
+
 /*
  * When walking page tables, get the address of the next boundary,
  * or the end address of the range if that comes earlier.  Although no
diff --git a/include/asm-mips/pgtable.h b/include/asm-mips/pgtable.h
index cbd1672c94cb..eaf5d9b3a0e1 100644
--- a/include/asm-mips/pgtable.h
+++ b/include/asm-mips/pgtable.h
@@ -68,6 +68,8 @@ extern unsigned long zero_page_mask;
 #define ZERO_PAGE(vaddr) \
 	(virt_to_page(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask)))
 
+#define __HAVE_ARCH_MULTIPLE_ZERO_PAGE
+
 extern void paging_init(void);
 
 /*
diff --git a/mm/mremap.c b/mm/mremap.c
index a32fed454bd7..f343fc73a8bd 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -141,10 +141,10 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
 			if (dst) {
 				pte_t pte;
 				pte = ptep_clear_flush(vma, old_addr, src);
+
 				/* ZERO_PAGE can be dependant on virtual addr */
-				if (pfn_valid(pte_pfn(pte)) &&
-					pte_page(pte) == ZERO_PAGE(old_addr))
-					pte = pte_wrprotect(mk_pte(ZERO_PAGE(new_addr), new_vma->vm_page_prot));
+				pte = move_pte(pte, new_vma->vm_page_prot,
+							old_addr, new_addr);
 				set_pte_at(mm, new_addr, dst, pte);
 			} else
 				error = -ENOMEM;
-- 
cgit v1.2.3


From 0f9578b70a9f112bfb541e1d5ab486a376e64503 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 27 Sep 2005 21:45:45 -0700
Subject: [PATCH] ppc64: More hugepage fixes

My previous patch fixing invalidation of huge PTEs wasn't good enough, we
still had an issue if a PTE invalidation batch contained both small and
large pages.  This patch fixes this by making sure the batch is flushed if
the page size fed to it changes.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/ppc64/mm/hash_native.c  | 5 ++---
 arch/ppc64/mm/tlb.c          | 4 +++-
 include/asm-ppc64/tlbflush.h | 1 +
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/ppc64/mm/hash_native.c b/arch/ppc64/mm/hash_native.c
index eb1bbb5b6c16..bfd385b7713c 100644
--- a/arch/ppc64/mm/hash_native.c
+++ b/arch/ppc64/mm/hash_native.c
@@ -343,7 +343,7 @@ static void native_flush_hash_range(unsigned long context,
 	hpte_t *hptep;
 	unsigned long hpte_v;
 	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
-	unsigned long large;
+	unsigned long large = batch->large;
 
 	local_irq_save(flags);
 
@@ -356,7 +356,6 @@ static void native_flush_hash_range(unsigned long context,
 
 		va = (vsid << 28) | (batch->addr[i] & 0x0fffffff);
 		batch->vaddr[j] = va;
-		large = pte_huge(batch->pte[i]);
 		if (large)
 			vpn = va >> HPAGE_SHIFT;
 		else
@@ -406,7 +405,7 @@ static void native_flush_hash_range(unsigned long context,
 		asm volatile("ptesync":::"memory");
 
 		for (i = 0; i < j; i++)
-			__tlbie(batch->vaddr[i], 0);
+			__tlbie(batch->vaddr[i], large);
 
 		asm volatile("eieio; tlbsync; ptesync":::"memory");
 
diff --git a/arch/ppc64/mm/tlb.c b/arch/ppc64/mm/tlb.c
index d8a6593a13f0..21fbffb23a43 100644
--- a/arch/ppc64/mm/tlb.c
+++ b/arch/ppc64/mm/tlb.c
@@ -143,7 +143,8 @@ void hpte_update(struct mm_struct *mm, unsigned long addr,
 	 * up scanning and resetting referenced bits then our batch context
 	 * will change mid stream.
 	 */
-	if (unlikely(i != 0 && context != batch->context)) {
+	if (i != 0 && (context != batch->context ||
+		       batch->large != pte_huge(pte))) {
 		flush_tlb_pending();
 		i = 0;
 	}
@@ -151,6 +152,7 @@ void hpte_update(struct mm_struct *mm, unsigned long addr,
 	if (i == 0) {
 		batch->context = context;
 		batch->mm = mm;
+		batch->large = pte_huge(pte);
 	}
 	batch->pte[i] = __pte(pte);
 	batch->addr[i] = addr;
diff --git a/include/asm-ppc64/tlbflush.h b/include/asm-ppc64/tlbflush.h
index 45411a67e082..74271d7c1d16 100644
--- a/include/asm-ppc64/tlbflush.h
+++ b/include/asm-ppc64/tlbflush.h
@@ -25,6 +25,7 @@ struct ppc64_tlb_batch {
 	pte_t pte[PPC64_TLB_BATCH_NR];
 	unsigned long addr[PPC64_TLB_BATCH_NR];
 	unsigned long vaddr[PPC64_TLB_BATCH_NR];
+	unsigned int large;
 };
 DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
 
-- 
cgit v1.2.3


From 664cceb0093b755739e56572b836a99104ee8a75 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 28 Sep 2005 17:03:15 +0100
Subject: [PATCH] Keys: Add possessor permissions to keys [try #3]

The attached patch adds extra permission grants to keys for the possessor of a
key in addition to the owner, group and other permissions bits. This makes
SUID binaries easier to support without going as far as labelling keys and key
targets using the LSM facilities.

This patch adds a second "pointer type" to key structures (struct key_ref *)
that can have the bottom bit of the address set to indicate the possession of
a key. This is propagated through searches from the keyring to the discovered
key. It has been made a separate type so that the compiler can spot attempts
to dereference a potentially incorrect pointer.

The "possession" attribute can't be attached to a key structure directly as
it's not an intrinsic property of a key.

Pointers to keys have been replaced with struct key_ref *'s wherever
possession information needs to be passed through.

This does assume that the bottom bit of the pointer will always be zero on
return from kmem_cache_alloc().

The key reference type has been made into a typedef so that at least it can be
located in the sources, even though it's basically a pointer to an undefined
type. I've also renamed the accessor functions to be more useful, and all
reference variables should now end in "_ref".

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/keys.txt           |  74 +++++++---
 include/linux/key-ui.h           |  28 ++--
 include/linux/key.h              |  78 ++++++++--
 security/keys/internal.h         |  26 ++--
 security/keys/key.c              |  81 ++++++-----
 security/keys/keyctl.c           | 301 ++++++++++++++++++++-------------------
 security/keys/keyring.c          |  86 ++++++-----
 security/keys/proc.c             |   2 +-
 security/keys/process_keys.c     | 164 +++++++++++++--------
 security/keys/request_key.c      |  36 ++++-
 security/keys/request_key_auth.c |   2 +-
 11 files changed, 528 insertions(+), 350 deletions(-)

(limited to 'include')

diff --git a/Documentation/keys.txt b/Documentation/keys.txt
index 0321ded4b9ae..b22e7c8d059a 100644
--- a/Documentation/keys.txt
+++ b/Documentation/keys.txt
@@ -195,8 +195,8 @@ KEY ACCESS PERMISSIONS
 ======================
 
 Keys have an owner user ID, a group access ID, and a permissions mask. The mask
-has up to eight bits each for user, group and other access. Only five of each
-set of eight bits are defined. These permissions granted are:
+has up to eight bits each for possessor, user, group and other access. Only
+five of each set of eight bits are defined. These permissions granted are:
 
  (*) View
 
@@ -241,16 +241,16 @@ about the status of the key service:
      type, description and permissions. The payload of the key is not available
      this way:
 
-	SERIAL   FLAGS  USAGE EXPY PERM   UID   GID   TYPE      DESCRIPTION: SUMMARY
-	00000001 I-----    39 perm 1f0000     0     0 keyring   _uid_ses.0: 1/4
-	00000002 I-----     2 perm 1f0000     0     0 keyring   _uid.0: empty
-	00000007 I-----     1 perm 1f0000     0     0 keyring   _pid.1: empty
-	0000018d I-----     1 perm 1f0000     0     0 keyring   _pid.412: empty
-	000004d2 I--Q--     1 perm 1f0000    32    -1 keyring   _uid.32: 1/4
-	000004d3 I--Q--     3 perm 1f0000    32    -1 keyring   _uid_ses.32: empty
-	00000892 I--QU-     1 perm 1f0000     0     0 user      metal:copper: 0
-	00000893 I--Q-N     1  35s 1f0000     0     0 user      metal:silver: 0
-	00000894 I--Q--     1  10h 1f0000     0     0 user      metal:gold: 0
+	SERIAL   FLAGS  USAGE EXPY PERM     UID   GID   TYPE      DESCRIPTION: SUMMARY
+	00000001 I-----    39 perm 1f1f0000     0     0 keyring   _uid_ses.0: 1/4
+	00000002 I-----     2 perm 1f1f0000     0     0 keyring   _uid.0: empty
+	00000007 I-----     1 perm 1f1f0000     0     0 keyring   _pid.1: empty
+	0000018d I-----     1 perm 1f1f0000     0     0 keyring   _pid.412: empty
+	000004d2 I--Q--     1 perm 1f1f0000    32    -1 keyring   _uid.32: 1/4
+	000004d3 I--Q--     3 perm 1f1f0000    32    -1 keyring   _uid_ses.32: empty
+	00000892 I--QU-     1 perm 1f000000     0     0 user      metal:copper: 0
+	00000893 I--Q-N     1  35s 1f1f0000     0     0 user      metal:silver: 0
+	00000894 I--Q--     1  10h 001f0000     0     0 user      metal:gold: 0
 
      The flags are:
 
@@ -637,6 +637,34 @@ call, and the key released upon close. How to deal with conflicting keys due to
 two different users opening the same file is left to the filesystem author to
 solve.
 
+Note that there are two different types of pointers to keys that may be
+encountered:
+
+ (*) struct key *
+
+     This simply points to the key structure itself. Key structures will be at
+     least four-byte aligned.
+
+ (*) key_ref_t
+
+     This is equivalent to a struct key *, but the least significant bit is set
+     if the caller "possesses" the key. By "possession" it is meant that the
+     calling processes has a searchable link to the key from one of its
+     keyrings. There are three functions for dealing with these:
+
+	key_ref_t make_key_ref(const struct key *key,
+			       unsigned long possession);
+
+	struct key *key_ref_to_ptr(const key_ref_t key_ref);
+
+	unsigned long is_key_possessed(const key_ref_t key_ref);
+
+     The first function constructs a key reference from a key pointer and
+     possession information (which must be 0 or 1 and not any other value).
+
+     The second function retrieves the key pointer from a reference and the
+     third retrieves the possession flag.
+
 When accessing a key's payload contents, certain precautions must be taken to
 prevent access vs modification races. See the section "Notes on accessing
 payload contents" for more information.
@@ -665,7 +693,11 @@ payload contents" for more information.
 
 	void key_put(struct key *key);
 
-    This can be called from interrupt context. If CONFIG_KEYS is not set then
+    Or:
+
+	void key_ref_put(key_ref_t key_ref);
+
+    These can be called from interrupt context. If CONFIG_KEYS is not set then
     the argument will not be parsed.
 
 
@@ -689,13 +721,17 @@ payload contents" for more information.
 
 (*) If a keyring was found in the search, this can be further searched by:
 
-	struct key *keyring_search(struct key *keyring,
-				   const struct key_type *type,
-				   const char *description)
+	key_ref_t keyring_search(key_ref_t keyring_ref,
+				 const struct key_type *type,
+				 const char *description)
 
     This searches the keyring tree specified for a matching key. Error ENOKEY
-    is returned upon failure. If successful, the returned key will need to be
-    released.
+    is returned upon failure (use IS_ERR/PTR_ERR to determine). If successful,
+    the returned key will need to be released.
+
+    The possession attribute from the keyring reference is used to control
+    access through the permissions mask and is propagated to the returned key
+    reference pointer if successful.
 
 
 (*) To check the validity of a key, this function can be called:
@@ -732,7 +768,7 @@ More complex payload contents must be allocated and a pointer to them set in
 key->payload.data. One of the following ways must be selected to access the
 data:
 
- (1) Unmodifyable key type.
+ (1) Unmodifiable key type.
 
      If the key type does not have a modify method, then the key's payload can
      be accessed without any form of locking, provided that it's known to be
diff --git a/include/linux/key-ui.h b/include/linux/key-ui.h
index cc326174a808..918c34a8347e 100644
--- a/include/linux/key-ui.h
+++ b/include/linux/key-ui.h
@@ -42,11 +42,14 @@ struct keyring_list {
 /*
  * check to see whether permission is granted to use a key in the desired way
  */
-static inline int key_permission(const struct key *key, key_perm_t perm)
+static inline int key_permission(const key_ref_t key_ref, key_perm_t perm)
 {
+	struct key *key = key_ref_to_ptr(key_ref);
 	key_perm_t kperm;
 
-	if (key->uid == current->fsuid)
+	if (is_key_possessed(key_ref))
+		kperm = key->perm >> 24;
+	else if (key->uid == current->fsuid)
 		kperm = key->perm >> 16;
 	else if (key->gid != -1 &&
 		 key->perm & KEY_GRP_ALL &&
@@ -65,11 +68,14 @@ static inline int key_permission(const struct key *key, key_perm_t perm)
  * check to see whether permission is granted to use a key in at least one of
  * the desired ways
  */
-static inline int key_any_permission(const struct key *key, key_perm_t perm)
+static inline int key_any_permission(const key_ref_t key_ref, key_perm_t perm)
 {
+	struct key *key = key_ref_to_ptr(key_ref);
 	key_perm_t kperm;
 
-	if (key->uid == current->fsuid)
+	if (is_key_possessed(key_ref))
+		kperm = key->perm >> 24;
+	else if (key->uid == current->fsuid)
 		kperm = key->perm >> 16;
 	else if (key->gid != -1 &&
 		 key->perm & KEY_GRP_ALL &&
@@ -94,13 +100,17 @@ static inline int key_task_groups_search(struct task_struct *tsk, gid_t gid)
 	return ret;
 }
 
-static inline int key_task_permission(const struct key *key,
+static inline int key_task_permission(const key_ref_t key_ref,
 				      struct task_struct *context,
 				      key_perm_t perm)
 {
+	struct key *key = key_ref_to_ptr(key_ref);
 	key_perm_t kperm;
 
-	if (key->uid == context->fsuid) {
+	if (is_key_possessed(key_ref)) {
+		kperm = key->perm >> 24;
+	}
+	else if (key->uid == context->fsuid) {
 		kperm = key->perm >> 16;
 	}
 	else if (key->gid != -1 &&
@@ -121,9 +131,9 @@ static inline int key_task_permission(const struct key *key,
 
 }
 
-extern struct key *lookup_user_key(struct task_struct *context,
-				   key_serial_t id, int create, int partial,
-				   key_perm_t perm);
+extern key_ref_t lookup_user_key(struct task_struct *context,
+				 key_serial_t id, int create, int partial,
+				 key_perm_t perm);
 
 extern long join_session_keyring(const char *name);
 
diff --git a/include/linux/key.h b/include/linux/key.h
index 970bbd916cf4..f1efa016dbf3 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -35,11 +35,18 @@ struct key;
 
 #undef KEY_DEBUGGING
 
-#define KEY_USR_VIEW	0x00010000	/* user can view a key's attributes */
-#define KEY_USR_READ	0x00020000	/* user can read key payload / view keyring */
-#define KEY_USR_WRITE	0x00040000	/* user can update key payload / add link to keyring */
-#define KEY_USR_SEARCH	0x00080000	/* user can find a key in search / search a keyring */
-#define KEY_USR_LINK	0x00100000	/* user can create a link to a key/keyring */
+#define KEY_POS_VIEW	0x01000000	/* possessor can view a key's attributes */
+#define KEY_POS_READ	0x02000000	/* possessor can read key payload / view keyring */
+#define KEY_POS_WRITE	0x04000000	/* possessor can update key payload / add link to keyring */
+#define KEY_POS_SEARCH	0x08000000	/* possessor can find a key in search / search a keyring */
+#define KEY_POS_LINK	0x10000000	/* possessor can create a link to a key/keyring */
+#define KEY_POS_ALL	0x1f000000
+
+#define KEY_USR_VIEW	0x00010000	/* user permissions... */
+#define KEY_USR_READ	0x00020000
+#define KEY_USR_WRITE	0x00040000
+#define KEY_USR_SEARCH	0x00080000
+#define KEY_USR_LINK	0x00100000
 #define KEY_USR_ALL	0x001f0000
 
 #define KEY_GRP_VIEW	0x00000100	/* group permissions... */
@@ -65,6 +72,38 @@ struct key_owner;
 struct keyring_list;
 struct keyring_name;
 
+/*****************************************************************************/
+/*
+ * key reference with possession attribute handling
+ *
+ * NOTE! key_ref_t is a typedef'd pointer to a type that is not actually
+ * defined. This is because we abuse the bottom bit of the reference to carry a
+ * flag to indicate whether the calling process possesses that key in one of
+ * its keyrings.
+ *
+ * the key_ref_t has been made a separate type so that the compiler can reject
+ * attempts to dereference it without proper conversion.
+ *
+ * the three functions are used to assemble and disassemble references
+ */
+typedef struct __key_reference_with_attributes *key_ref_t;
+
+static inline key_ref_t make_key_ref(const struct key *key,
+				     unsigned long possession)
+{
+	return (key_ref_t) ((unsigned long) key | possession);
+}
+
+static inline struct key *key_ref_to_ptr(const key_ref_t key_ref)
+{
+	return (struct key *) ((unsigned long) key_ref & ~1UL);
+}
+
+static inline unsigned long is_key_possessed(const key_ref_t key_ref)
+{
+	return (unsigned long) key_ref & 1UL;
+}
+
 /*****************************************************************************/
 /*
  * authentication token / access credential / keyring
@@ -215,20 +254,25 @@ static inline struct key *key_get(struct key *key)
 	return key;
 }
 
+static inline void key_ref_put(key_ref_t key_ref)
+{
+	key_put(key_ref_to_ptr(key_ref));
+}
+
 extern struct key *request_key(struct key_type *type,
 			       const char *description,
 			       const char *callout_info);
 
 extern int key_validate(struct key *key);
 
-extern struct key *key_create_or_update(struct key *keyring,
-					const char *type,
-					const char *description,
-					const void *payload,
-					size_t plen,
-					int not_in_quota);
+extern key_ref_t key_create_or_update(key_ref_t keyring,
+				      const char *type,
+				      const char *description,
+				      const void *payload,
+				      size_t plen,
+				      int not_in_quota);
 
-extern int key_update(struct key *key,
+extern int key_update(key_ref_t key,
 		      const void *payload,
 		      size_t plen);
 
@@ -243,9 +287,9 @@ extern struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
 
 extern int keyring_clear(struct key *keyring);
 
-extern struct key *keyring_search(struct key *keyring,
-				  struct key_type *type,
-				  const char *description);
+extern key_ref_t keyring_search(key_ref_t keyring,
+				struct key_type *type,
+				const char *description);
 
 extern int keyring_add_key(struct key *keyring,
 			   struct key *key);
@@ -285,6 +329,10 @@ extern void key_init(void);
 #define key_serial(k)			0
 #define key_get(k) 			({ NULL; })
 #define key_put(k)			do { } while(0)
+#define key_ref_put(k)			do { } while(0)
+#define make_key_ref(k)			({ NULL; })
+#define key_ref_to_ptr(k)		({ NULL; })
+#define is_key_possessed(k)		0
 #define alloc_uid_keyring(u)		0
 #define switch_uid_keyring(u)		do { } while(0)
 #define __install_session_keyring(t, k)	({ NULL; })
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 46c8602661c9..db99ed434f3a 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -71,26 +71,26 @@ extern void keyring_publish_name(struct key *keyring);
 
 extern int __key_link(struct key *keyring, struct key *key);
 
-extern struct key *__keyring_search_one(struct key *keyring,
-					const struct key_type *type,
-					const char *description,
-					key_perm_t perm);
+extern key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+				      const struct key_type *type,
+				      const char *description,
+				      key_perm_t perm);
 
 extern struct key *keyring_search_instkey(struct key *keyring,
 					  key_serial_t target_id);
 
 typedef int (*key_match_func_t)(const struct key *, const void *);
 
-extern struct key *keyring_search_aux(struct key *keyring,
-				      struct task_struct *tsk,
-				      struct key_type *type,
-				      const void *description,
-				      key_match_func_t match);
+extern key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+				    struct task_struct *tsk,
+				    struct key_type *type,
+				    const void *description,
+				    key_match_func_t match);
 
-extern struct key *search_process_keyrings(struct key_type *type,
-					   const void *description,
-					   key_match_func_t match,
-					   struct task_struct *tsk);
+extern key_ref_t search_process_keyrings(struct key_type *type,
+					 const void *description,
+					 key_match_func_t match,
+					 struct task_struct *tsk);
 
 extern struct key *find_keyring_by_name(const char *name, key_serial_t bound);
 
diff --git a/security/keys/key.c b/security/keys/key.c
index fb89f9844465..2182be9e9309 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -693,14 +693,15 @@ void key_type_put(struct key_type *ktype)
  * - the key has an incremented refcount
  * - we need to put the key if we get an error
  */
-static inline struct key *__key_update(struct key *key, const void *payload,
-				       size_t plen)
+static inline key_ref_t __key_update(key_ref_t key_ref,
+				     const void *payload, size_t plen)
 {
+	struct key *key = key_ref_to_ptr(key_ref);
 	int ret;
 
 	/* need write permission on the key to update it */
 	ret = -EACCES;
-	if (!key_permission(key, KEY_WRITE))
+	if (!key_permission(key_ref, KEY_WRITE))
 		goto error;
 
 	ret = -EEXIST;
@@ -719,12 +720,12 @@ static inline struct key *__key_update(struct key *key, const void *payload,
 
 	if (ret < 0)
 		goto error;
- out:
-	return key;
+out:
+	return key_ref;
 
- error:
+error:
 	key_put(key);
-	key = ERR_PTR(ret);
+	key_ref = ERR_PTR(ret);
 	goto out;
 
 } /* end __key_update() */
@@ -734,52 +735,56 @@ static inline struct key *__key_update(struct key *key, const void *payload,
  * search the specified keyring for a key of the same description; if one is
  * found, update it, otherwise add a new one
  */
-struct key *key_create_or_update(struct key *keyring,
-				 const char *type,
-				 const char *description,
-				 const void *payload,
-				 size_t plen,
-				 int not_in_quota)
+key_ref_t key_create_or_update(key_ref_t keyring_ref,
+			       const char *type,
+			       const char *description,
+			       const void *payload,
+			       size_t plen,
+			       int not_in_quota)
 {
 	struct key_type *ktype;
-	struct key *key = NULL;
+	struct key *keyring, *key = NULL;
 	key_perm_t perm;
+	key_ref_t key_ref;
 	int ret;
 
-	key_check(keyring);
-
 	/* look up the key type to see if it's one of the registered kernel
 	 * types */
 	ktype = key_type_lookup(type);
 	if (IS_ERR(ktype)) {
-		key = ERR_PTR(-ENODEV);
+		key_ref = ERR_PTR(-ENODEV);
 		goto error;
 	}
 
-	ret = -EINVAL;
+	key_ref = ERR_PTR(-EINVAL);
 	if (!ktype->match || !ktype->instantiate)
 		goto error_2;
 
+	keyring = key_ref_to_ptr(keyring_ref);
+
+	key_check(keyring);
+
+	down_write(&keyring->sem);
+
+	/* if we're going to allocate a new key, we're going to have
+	 * to modify the keyring */
+	key_ref = ERR_PTR(-EACCES);
+	if (!key_permission(keyring_ref, KEY_WRITE))
+		goto error_3;
+
 	/* search for an existing key of the same type and description in the
 	 * destination keyring
 	 */
-	down_write(&keyring->sem);
-
-	key = __keyring_search_one(keyring, ktype, description, 0);
-	if (!IS_ERR(key))
+	key_ref = __keyring_search_one(keyring_ref, ktype, description, 0);
+	if (!IS_ERR(key_ref))
 		goto found_matching_key;
 
-	/* if we're going to allocate a new key, we're going to have to modify
-	 * the keyring */
-	ret = -EACCES;
-	if (!key_permission(keyring, KEY_WRITE))
-		goto error_3;
-
 	/* decide on the permissions we want */
-	perm = KEY_USR_VIEW | KEY_USR_SEARCH | KEY_USR_LINK;
+	perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK;
+	perm |= KEY_USR_VIEW | KEY_USR_SEARCH | KEY_USR_LINK;
 
 	if (ktype->read)
-		perm |= KEY_USR_READ;
+		perm |= KEY_POS_READ | KEY_USR_READ;
 
 	if (ktype == &key_type_keyring || ktype->update)
 		perm |= KEY_USR_WRITE;
@@ -788,7 +793,7 @@ struct key *key_create_or_update(struct key *keyring,
 	key = key_alloc(ktype, description, current->fsuid, current->fsgid,
 			perm, not_in_quota);
 	if (IS_ERR(key)) {
-		ret = PTR_ERR(key);
+		key_ref = ERR_PTR(PTR_ERR(key));
 		goto error_3;
 	}
 
@@ -796,15 +801,18 @@ struct key *key_create_or_update(struct key *keyring,
 	ret = __key_instantiate_and_link(key, payload, plen, keyring, NULL);
 	if (ret < 0) {
 		key_put(key);
-		key = ERR_PTR(ret);
+		key_ref = ERR_PTR(ret);
+		goto error_3;
 	}
 
+	key_ref = make_key_ref(key, is_key_possessed(keyring_ref));
+
  error_3:
 	up_write(&keyring->sem);
  error_2:
 	key_type_put(ktype);
  error:
-	return key;
+	return key_ref;
 
  found_matching_key:
 	/* we found a matching key, so we're going to try to update it
@@ -813,7 +821,7 @@ struct key *key_create_or_update(struct key *keyring,
 	up_write(&keyring->sem);
 	key_type_put(ktype);
 
-	key = __key_update(key, payload, plen);
+	key_ref = __key_update(key_ref, payload, plen);
 	goto error;
 
 } /* end key_create_or_update() */
@@ -824,15 +832,16 @@ EXPORT_SYMBOL(key_create_or_update);
 /*
  * update a key
  */
-int key_update(struct key *key, const void *payload, size_t plen)
+int key_update(key_ref_t key_ref, const void *payload, size_t plen)
 {
+	struct key *key = key_ref_to_ptr(key_ref);
 	int ret;
 
 	key_check(key);
 
 	/* the key must be writable */
 	ret = -EACCES;
-	if (!key_permission(key, KEY_WRITE))
+	if (!key_permission(key_ref, KEY_WRITE))
 		goto error;
 
 	/* attempt to update it if supported */
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index a6516a64b297..4c670ee6acf9 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -34,7 +34,7 @@ asmlinkage long sys_add_key(const char __user *_type,
 			    size_t plen,
 			    key_serial_t ringid)
 {
-	struct key *keyring, *key;
+	key_ref_t keyring_ref, key_ref;
 	char type[32], *description;
 	void *payload;
 	long dlen, ret;
@@ -86,25 +86,25 @@ asmlinkage long sys_add_key(const char __user *_type,
 	}
 
 	/* find the target keyring (which must be writable) */
-	keyring = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE);
-	if (IS_ERR(keyring)) {
-		ret = PTR_ERR(keyring);
+	keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE);
+	if (IS_ERR(keyring_ref)) {
+		ret = PTR_ERR(keyring_ref);
 		goto error3;
 	}
 
 	/* create or update the requested key and add it to the target
 	 * keyring */
-	key = key_create_or_update(keyring, type, description,
-				   payload, plen, 0);
-	if (!IS_ERR(key)) {
-		ret = key->serial;
-		key_put(key);
+	key_ref = key_create_or_update(keyring_ref, type, description,
+				       payload, plen, 0);
+	if (!IS_ERR(key_ref)) {
+		ret = key_ref_to_ptr(key_ref)->serial;
+		key_ref_put(key_ref);
 	}
 	else {
-		ret = PTR_ERR(key);
+		ret = PTR_ERR(key_ref);
 	}
 
-	key_put(keyring);
+	key_ref_put(keyring_ref);
  error3:
 	kfree(payload);
  error2:
@@ -131,7 +131,8 @@ asmlinkage long sys_request_key(const char __user *_type,
 				key_serial_t destringid)
 {
 	struct key_type *ktype;
-	struct key *key, *dest;
+	struct key *key;
+	key_ref_t dest_ref;
 	char type[32], *description, *callout_info;
 	long dlen, ret;
 
@@ -187,11 +188,11 @@ asmlinkage long sys_request_key(const char __user *_type,
 	}
 
 	/* get the destination keyring if specified */
-	dest = NULL;
+	dest_ref = NULL;
 	if (destringid) {
-		dest = lookup_user_key(NULL, destringid, 1, 0, KEY_WRITE);
-		if (IS_ERR(dest)) {
-			ret = PTR_ERR(dest);
+		dest_ref = lookup_user_key(NULL, destringid, 1, 0, KEY_WRITE);
+		if (IS_ERR(dest_ref)) {
+			ret = PTR_ERR(dest_ref);
 			goto error3;
 		}
 	}
@@ -204,7 +205,8 @@ asmlinkage long sys_request_key(const char __user *_type,
 	}
 
 	/* do the search */
-	key = request_key_and_link(ktype, description, callout_info, dest);
+	key = request_key_and_link(ktype, description, callout_info,
+				   key_ref_to_ptr(dest_ref));
 	if (IS_ERR(key)) {
 		ret = PTR_ERR(key);
 		goto error5;
@@ -216,7 +218,7 @@ asmlinkage long sys_request_key(const char __user *_type,
  error5:
 	key_type_put(ktype);
  error4:
-	key_put(dest);
+	key_ref_put(dest_ref);
  error3:
 	kfree(callout_info);
  error2:
@@ -234,17 +236,17 @@ asmlinkage long sys_request_key(const char __user *_type,
  */
 long keyctl_get_keyring_ID(key_serial_t id, int create)
 {
-	struct key *key;
+	key_ref_t key_ref;
 	long ret;
 
-	key = lookup_user_key(NULL, id, create, 0, KEY_SEARCH);
-	if (IS_ERR(key)) {
-		ret = PTR_ERR(key);
+	key_ref = lookup_user_key(NULL, id, create, 0, KEY_SEARCH);
+	if (IS_ERR(key_ref)) {
+		ret = PTR_ERR(key_ref);
 		goto error;
 	}
 
-	ret = key->serial;
-	key_put(key);
+	ret = key_ref_to_ptr(key_ref)->serial;
+	key_ref_put(key_ref);
  error:
 	return ret;
 
@@ -302,7 +304,7 @@ long keyctl_update_key(key_serial_t id,
 		       const void __user *_payload,
 		       size_t plen)
 {
-	struct key *key;
+	key_ref_t key_ref;
 	void *payload;
 	long ret;
 
@@ -324,16 +326,16 @@ long keyctl_update_key(key_serial_t id,
 	}
 
 	/* find the target key (which must be writable) */
-	key = lookup_user_key(NULL, id, 0, 0, KEY_WRITE);
-	if (IS_ERR(key)) {
-		ret = PTR_ERR(key);
+	key_ref = lookup_user_key(NULL, id, 0, 0, KEY_WRITE);
+	if (IS_ERR(key_ref)) {
+		ret = PTR_ERR(key_ref);
 		goto error2;
 	}
 
 	/* update the key */
-	ret = key_update(key, payload, plen);
+	ret = key_update(key_ref, payload, plen);
 
-	key_put(key);
+	key_ref_put(key_ref);
  error2:
 	kfree(payload);
  error:
@@ -349,19 +351,19 @@ long keyctl_update_key(key_serial_t id,
  */
 long keyctl_revoke_key(key_serial_t id)
 {
-	struct key *key;
+	key_ref_t key_ref;
 	long ret;
 
-	key = lookup_user_key(NULL, id, 0, 0, KEY_WRITE);
-	if (IS_ERR(key)) {
-		ret = PTR_ERR(key);
+	key_ref = lookup_user_key(NULL, id, 0, 0, KEY_WRITE);
+	if (IS_ERR(key_ref)) {
+		ret = PTR_ERR(key_ref);
 		goto error;
 	}
 
-	key_revoke(key);
+	key_revoke(key_ref_to_ptr(key_ref));
 	ret = 0;
 
-	key_put(key);
+	key_ref_put(key_ref);
  error:
 	return ret;
 
@@ -375,18 +377,18 @@ long keyctl_revoke_key(key_serial_t id)
  */
 long keyctl_keyring_clear(key_serial_t ringid)
 {
-	struct key *keyring;
+	key_ref_t keyring_ref;
 	long ret;
 
-	keyring = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE);
-	if (IS_ERR(keyring)) {
-		ret = PTR_ERR(keyring);
+	keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE);
+	if (IS_ERR(keyring_ref)) {
+		ret = PTR_ERR(keyring_ref);
 		goto error;
 	}
 
-	ret = keyring_clear(keyring);
+	ret = keyring_clear(key_ref_to_ptr(keyring_ref));
 
-	key_put(keyring);
+	key_ref_put(keyring_ref);
  error:
 	return ret;
 
@@ -401,26 +403,26 @@ long keyctl_keyring_clear(key_serial_t ringid)
  */
 long keyctl_keyring_link(key_serial_t id, key_serial_t ringid)
 {
-	struct key *keyring, *key;
+	key_ref_t keyring_ref, key_ref;
 	long ret;
 
-	keyring = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE);
-	if (IS_ERR(keyring)) {
-		ret = PTR_ERR(keyring);
+	keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE);
+	if (IS_ERR(keyring_ref)) {
+		ret = PTR_ERR(keyring_ref);
 		goto error;
 	}
 
-	key = lookup_user_key(NULL, id, 1, 0, KEY_LINK);
-	if (IS_ERR(key)) {
-		ret = PTR_ERR(key);
+	key_ref = lookup_user_key(NULL, id, 1, 0, KEY_LINK);
+	if (IS_ERR(key_ref)) {
+		ret = PTR_ERR(key_ref);
 		goto error2;
 	}
 
-	ret = key_link(keyring, key);
+	ret = key_link(key_ref_to_ptr(keyring_ref), key_ref_to_ptr(key_ref));
 
-	key_put(key);
+	key_ref_put(key_ref);
  error2:
-	key_put(keyring);
+	key_ref_put(keyring_ref);
  error:
 	return ret;
 
@@ -435,26 +437,26 @@ long keyctl_keyring_link(key_serial_t id, key_serial_t ringid)
  */
 long keyctl_keyring_unlink(key_serial_t id, key_serial_t ringid)
 {
-	struct key *keyring, *key;
+	key_ref_t keyring_ref, key_ref;
 	long ret;
 
-	keyring = lookup_user_key(NULL, ringid, 0, 0, KEY_WRITE);
-	if (IS_ERR(keyring)) {
-		ret = PTR_ERR(keyring);
+	keyring_ref = lookup_user_key(NULL, ringid, 0, 0, KEY_WRITE);
+	if (IS_ERR(keyring_ref)) {
+		ret = PTR_ERR(keyring_ref);
 		goto error;
 	}
 
-	key = lookup_user_key(NULL, id, 0, 0, 0);
-	if (IS_ERR(key)) {
-		ret = PTR_ERR(key);
+	key_ref = lookup_user_key(NULL, id, 0, 0, 0);
+	if (IS_ERR(key_ref)) {
+		ret = PTR_ERR(key_ref);
 		goto error2;
 	}
 
-	ret = key_unlink(keyring, key);
+	ret = key_unlink(key_ref_to_ptr(keyring_ref), key_ref_to_ptr(key_ref));
 
-	key_put(key);
+	key_ref_put(key_ref);
  error2:
-	key_put(keyring);
+	key_ref_put(keyring_ref);
  error:
 	return ret;
 
@@ -476,24 +478,26 @@ long keyctl_describe_key(key_serial_t keyid,
 			 size_t buflen)
 {
 	struct key *key, *instkey;
+	key_ref_t key_ref;
 	char *tmpbuf;
 	long ret;
 
-	key = lookup_user_key(NULL, keyid, 0, 1, KEY_VIEW);
-	if (IS_ERR(key)) {
+	key_ref = lookup_user_key(NULL, keyid, 0, 1, KEY_VIEW);
+	if (IS_ERR(key_ref)) {
 		/* viewing a key under construction is permitted if we have the
 		 * authorisation token handy */
-		if (PTR_ERR(key) == -EACCES) {
+		if (PTR_ERR(key_ref) == -EACCES) {
 			instkey = key_get_instantiation_authkey(keyid);
 			if (!IS_ERR(instkey)) {
 				key_put(instkey);
-				key = lookup_user_key(NULL, keyid, 0, 1, 0);
-				if (!IS_ERR(key))
+				key_ref = lookup_user_key(NULL, keyid,
+							  0, 1, 0);
+				if (!IS_ERR(key_ref))
 					goto okay;
 			}
 		}
 
-		ret = PTR_ERR(key);
+		ret = PTR_ERR(key_ref);
 		goto error;
 	}
 
@@ -504,13 +508,16 @@ okay:
 	if (!tmpbuf)
 		goto error2;
 
+	key = key_ref_to_ptr(key_ref);
+
 	ret = snprintf(tmpbuf, PAGE_SIZE - 1,
-		       "%s;%d;%d;%06x;%s",
-		       key->type->name,
-		       key->uid,
-		       key->gid,
-		       key->perm,
-		       key->description ? key->description :""
+		       "%s;%d;%d;%08x;%s",
+		       key_ref_to_ptr(key_ref)->type->name,
+		       key_ref_to_ptr(key_ref)->uid,
+		       key_ref_to_ptr(key_ref)->gid,
+		       key_ref_to_ptr(key_ref)->perm,
+		       key_ref_to_ptr(key_ref)->description ?
+		       key_ref_to_ptr(key_ref)->description : ""
 		       );
 
 	/* include a NUL char at the end of the data */
@@ -530,7 +537,7 @@ okay:
 
 	kfree(tmpbuf);
  error2:
-	key_put(key);
+	key_ref_put(key_ref);
  error:
 	return ret;
 
@@ -552,7 +559,7 @@ long keyctl_keyring_search(key_serial_t ringid,
 			   key_serial_t destringid)
 {
 	struct key_type *ktype;
-	struct key *keyring, *key, *dest;
+	key_ref_t keyring_ref, key_ref, dest_ref;
 	char type[32], *description;
 	long dlen, ret;
 
@@ -581,18 +588,18 @@ long keyctl_keyring_search(key_serial_t ringid,
 		goto error2;
 
 	/* get the keyring at which to begin the search */
-	keyring = lookup_user_key(NULL, ringid, 0, 0, KEY_SEARCH);
-	if (IS_ERR(keyring)) {
-		ret = PTR_ERR(keyring);
+	keyring_ref = lookup_user_key(NULL, ringid, 0, 0, KEY_SEARCH);
+	if (IS_ERR(keyring_ref)) {
+		ret = PTR_ERR(keyring_ref);
 		goto error2;
 	}
 
 	/* get the destination keyring if specified */
-	dest = NULL;
+	dest_ref = NULL;
 	if (destringid) {
-		dest = lookup_user_key(NULL, destringid, 1, 0, KEY_WRITE);
-		if (IS_ERR(dest)) {
-			ret = PTR_ERR(dest);
+		dest_ref = lookup_user_key(NULL, destringid, 1, 0, KEY_WRITE);
+		if (IS_ERR(dest_ref)) {
+			ret = PTR_ERR(dest_ref);
 			goto error3;
 		}
 	}
@@ -605,9 +612,9 @@ long keyctl_keyring_search(key_serial_t ringid,
 	}
 
 	/* do the search */
-	key = keyring_search(keyring, ktype, description);
-	if (IS_ERR(key)) {
-		ret = PTR_ERR(key);
+	key_ref = keyring_search(keyring_ref, ktype, description);
+	if (IS_ERR(key_ref)) {
+		ret = PTR_ERR(key_ref);
 
 		/* treat lack or presence of a negative key the same */
 		if (ret == -EAGAIN)
@@ -616,26 +623,26 @@ long keyctl_keyring_search(key_serial_t ringid,
 	}
 
 	/* link the resulting key to the destination keyring if we can */
-	if (dest) {
+	if (dest_ref) {
 		ret = -EACCES;
-		if (!key_permission(key, KEY_LINK))
+		if (!key_permission(key_ref, KEY_LINK))
 			goto error6;
 
-		ret = key_link(dest, key);
+		ret = key_link(key_ref_to_ptr(dest_ref), key_ref_to_ptr(key_ref));
 		if (ret < 0)
 			goto error6;
 	}
 
-	ret = key->serial;
+	ret = key_ref_to_ptr(key_ref)->serial;
 
  error6:
-	key_put(key);
+	key_ref_put(key_ref);
  error5:
 	key_type_put(ktype);
  error4:
-	key_put(dest);
+	key_ref_put(dest_ref);
  error3:
-	key_put(keyring);
+	key_ref_put(keyring_ref);
  error2:
 	kfree(description);
  error:
@@ -643,16 +650,6 @@ long keyctl_keyring_search(key_serial_t ringid,
 
 } /* end keyctl_keyring_search() */
 
-/*****************************************************************************/
-/*
- * see if the key we're looking at is the target key
- */
-static int keyctl_read_key_same(const struct key *key, const void *target)
-{
-	return key == target;
-
-} /* end keyctl_read_key_same() */
-
 /*****************************************************************************/
 /*
  * read a user key's payload
@@ -665,38 +662,33 @@ static int keyctl_read_key_same(const struct key *key, const void *target)
  */
 long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen)
 {
-	struct key *key, *skey;
+	struct key *key;
+	key_ref_t key_ref;
 	long ret;
 
 	/* find the key first */
-	key = lookup_user_key(NULL, keyid, 0, 0, 0);
-	if (!IS_ERR(key)) {
-		/* see if we can read it directly */
-		if (key_permission(key, KEY_READ))
-			goto can_read_key;
-
-		/* we can't; see if it's searchable from this process's
-		 * keyrings
-		 * - we automatically take account of the fact that it may be
-		 *   dangling off an instantiation key
-		 */
-		skey = search_process_keyrings(key->type, key,
-					       keyctl_read_key_same, current);
-		if (!IS_ERR(skey))
-			goto can_read_key2;
-
-		ret = PTR_ERR(skey);
-		if (ret == -EAGAIN)
-			ret = -EACCES;
-		goto error2;
+	key_ref = lookup_user_key(NULL, keyid, 0, 0, 0);
+	if (IS_ERR(key_ref)) {
+		ret = -ENOKEY;
+		goto error;
 	}
 
-	ret = -ENOKEY;
-	goto error;
+	key = key_ref_to_ptr(key_ref);
+
+	/* see if we can read it directly */
+	if (key_permission(key_ref, KEY_READ))
+		goto can_read_key;
+
+	/* we can't; see if it's searchable from this process's keyrings
+	 * - we automatically take account of the fact that it may be
+	 *   dangling off an instantiation key
+	 */
+	if (!is_key_possessed(key_ref)) {
+		ret = -EACCES;
+		goto error2;
+	}
 
 	/* the key is probably readable - now try to read it */
- can_read_key2:
-	key_put(skey);
  can_read_key:
 	ret = key_validate(key);
 	if (ret == 0) {
@@ -727,18 +719,21 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen)
 long keyctl_chown_key(key_serial_t id, uid_t uid, gid_t gid)
 {
 	struct key *key;
+	key_ref_t key_ref;
 	long ret;
 
 	ret = 0;
 	if (uid == (uid_t) -1 && gid == (gid_t) -1)
 		goto error;
 
-	key = lookup_user_key(NULL, id, 1, 1, 0);
-	if (IS_ERR(key)) {
-		ret = PTR_ERR(key);
+	key_ref = lookup_user_key(NULL, id, 1, 1, 0);
+	if (IS_ERR(key_ref)) {
+		ret = PTR_ERR(key_ref);
 		goto error;
 	}
 
+	key = key_ref_to_ptr(key_ref);
+
 	/* make the changes with the locks held to prevent chown/chown races */
 	ret = -EACCES;
 	down_write(&key->sem);
@@ -784,18 +779,21 @@ long keyctl_chown_key(key_serial_t id, uid_t uid, gid_t gid)
 long keyctl_setperm_key(key_serial_t id, key_perm_t perm)
 {
 	struct key *key;
+	key_ref_t key_ref;
 	long ret;
 
 	ret = -EINVAL;
-	if (perm & ~(KEY_USR_ALL | KEY_GRP_ALL | KEY_OTH_ALL))
+	if (perm & ~(KEY_POS_ALL | KEY_USR_ALL | KEY_GRP_ALL | KEY_OTH_ALL))
 		goto error;
 
-	key = lookup_user_key(NULL, id, 1, 1, 0);
-	if (IS_ERR(key)) {
-		ret = PTR_ERR(key);
+	key_ref = lookup_user_key(NULL, id, 1, 1, 0);
+	if (IS_ERR(key_ref)) {
+		ret = PTR_ERR(key_ref);
 		goto error;
 	}
 
+	key = key_ref_to_ptr(key_ref);
+
 	/* make the changes with the locks held to prevent chown/chmod races */
 	ret = -EACCES;
 	down_write(&key->sem);
@@ -824,7 +822,8 @@ long keyctl_instantiate_key(key_serial_t id,
 			    key_serial_t ringid)
 {
 	struct request_key_auth *rka;
-	struct key *instkey, *keyring;
+	struct key *instkey;
+	key_ref_t keyring_ref;
 	void *payload;
 	long ret;
 
@@ -857,21 +856,21 @@ long keyctl_instantiate_key(key_serial_t id,
 
 	/* find the destination keyring amongst those belonging to the
 	 * requesting task */
-	keyring = NULL;
+	keyring_ref = NULL;
 	if (ringid) {
-		keyring = lookup_user_key(rka->context, ringid, 1, 0,
-					  KEY_WRITE);
-		if (IS_ERR(keyring)) {
-			ret = PTR_ERR(keyring);
+		keyring_ref = lookup_user_key(rka->context, ringid, 1, 0,
+					      KEY_WRITE);
+		if (IS_ERR(keyring_ref)) {
+			ret = PTR_ERR(keyring_ref);
 			goto error3;
 		}
 	}
 
 	/* instantiate the key and link it into a keyring */
 	ret = key_instantiate_and_link(rka->target_key, payload, plen,
-				       keyring, instkey);
+				       key_ref_to_ptr(keyring_ref), instkey);
 
-	key_put(keyring);
+	key_ref_put(keyring_ref);
  error3:
 	key_put(instkey);
  error2:
@@ -889,7 +888,8 @@ long keyctl_instantiate_key(key_serial_t id,
 long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid)
 {
 	struct request_key_auth *rka;
-	struct key *instkey, *keyring;
+	struct key *instkey;
+	key_ref_t keyring_ref;
 	long ret;
 
 	/* find the instantiation authorisation key */
@@ -903,19 +903,20 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid)
 
 	/* find the destination keyring if present (which must also be
 	 * writable) */
-	keyring = NULL;
+	keyring_ref = NULL;
 	if (ringid) {
-		keyring = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE);
-		if (IS_ERR(keyring)) {
-			ret = PTR_ERR(keyring);
+		keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE);
+		if (IS_ERR(keyring_ref)) {
+			ret = PTR_ERR(keyring_ref);
 			goto error2;
 		}
 	}
 
 	/* instantiate the key and link it into a keyring */
-	ret = key_negate_and_link(rka->target_key, timeout, keyring, instkey);
+	ret = key_negate_and_link(rka->target_key, timeout,
+				  key_ref_to_ptr(keyring_ref), instkey);
 
-	key_put(keyring);
+	key_ref_put(keyring_ref);
  error2:
 	key_put(instkey);
  error:
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 9c208c756df8..0639396dd441 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -309,7 +309,7 @@ struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
 	int ret;
 
 	keyring = key_alloc(&key_type_keyring, description,
-			    uid, gid, KEY_USR_ALL, not_in_quota);
+			    uid, gid, KEY_POS_ALL | KEY_USR_ALL, not_in_quota);
 
 	if (!IS_ERR(keyring)) {
 		ret = key_instantiate_and_link(keyring, NULL, 0, dest, NULL);
@@ -333,12 +333,13 @@ struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
  * - we rely on RCU to prevent the keyring lists from disappearing on us
  * - we return -EAGAIN if we didn't find any matching key
  * - we return -ENOKEY if we only found negative matching keys
+ * - we propagate the possession attribute from the keyring ref to the key ref
  */
-struct key *keyring_search_aux(struct key *keyring,
-			       struct task_struct *context,
-			       struct key_type *type,
-			       const void *description,
-			       key_match_func_t match)
+key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+			     struct task_struct *context,
+			     struct key_type *type,
+			     const void *description,
+			     key_match_func_t match)
 {
 	struct {
 		struct keyring_list *keylist;
@@ -347,29 +348,33 @@ struct key *keyring_search_aux(struct key *keyring,
 
 	struct keyring_list *keylist;
 	struct timespec now;
-	struct key *key;
+	unsigned long possessed;
+	struct key *keyring, *key;
+	key_ref_t key_ref;
 	long err;
 	int sp, kix;
 
+	keyring = key_ref_to_ptr(keyring_ref);
+	possessed = is_key_possessed(keyring_ref);
 	key_check(keyring);
 
-	rcu_read_lock();
-
 	/* top keyring must have search permission to begin the search */
-	key = ERR_PTR(-EACCES);
-	if (!key_task_permission(keyring, context, KEY_SEARCH))
+	key_ref = ERR_PTR(-EACCES);
+	if (!key_task_permission(keyring_ref, context, KEY_SEARCH))
 		goto error;
 
-	key = ERR_PTR(-ENOTDIR);
+	key_ref = ERR_PTR(-ENOTDIR);
 	if (keyring->type != &key_type_keyring)
 		goto error;
 
+	rcu_read_lock();
+
 	now = current_kernel_time();
 	err = -EAGAIN;
 	sp = 0;
 
 	/* start processing a new keyring */
- descend:
+descend:
 	if (test_bit(KEY_FLAG_REVOKED, &keyring->flags))
 		goto not_this_keyring;
 
@@ -397,7 +402,8 @@ struct key *keyring_search_aux(struct key *keyring,
 			continue;
 
 		/* key must have search permissions */
-		if (!key_task_permission(key, context, KEY_SEARCH))
+		if (!key_task_permission(make_key_ref(key, possessed),
+					 context, KEY_SEARCH))
 			continue;
 
 		/* we set a different error code if we find a negative key */
@@ -411,7 +417,7 @@ struct key *keyring_search_aux(struct key *keyring,
 
 	/* search through the keyrings nested in this one */
 	kix = 0;
- ascend:
+ascend:
 	for (; kix < keylist->nkeys; kix++) {
 		key = keylist->keys[kix];
 		if (key->type != &key_type_keyring)
@@ -423,7 +429,8 @@ struct key *keyring_search_aux(struct key *keyring,
 		if (sp >= KEYRING_SEARCH_MAX_DEPTH)
 			continue;
 
-		if (!key_task_permission(key, context, KEY_SEARCH))
+		if (!key_task_permission(make_key_ref(key, possessed),
+					 context, KEY_SEARCH))
 			continue;
 
 		/* stack the current position */
@@ -438,7 +445,7 @@ struct key *keyring_search_aux(struct key *keyring,
 
 	/* the keyring we're looking at was disqualified or didn't contain a
 	 * matching key */
- not_this_keyring:
+not_this_keyring:
 	if (sp > 0) {
 		/* resume the processing of a keyring higher up in the tree */
 		sp--;
@@ -447,16 +454,18 @@ struct key *keyring_search_aux(struct key *keyring,
 		goto ascend;
 	}
 
-	key = ERR_PTR(err);
-	goto error;
+	key_ref = ERR_PTR(err);
+	goto error_2;
 
 	/* we found a viable match */
- found:
+found:
 	atomic_inc(&key->usage);
 	key_check(key);
- error:
+	key_ref = make_key_ref(key, possessed);
+error_2:
 	rcu_read_unlock();
-	return key;
+error:
+	return key_ref;
 
 } /* end keyring_search_aux() */
 
@@ -469,9 +478,9 @@ struct key *keyring_search_aux(struct key *keyring,
  * - we return -EAGAIN if we didn't find any matching key
  * - we return -ENOKEY if we only found negative matching keys
  */
-struct key *keyring_search(struct key *keyring,
-			   struct key_type *type,
-			   const char *description)
+key_ref_t keyring_search(key_ref_t keyring,
+			 struct key_type *type,
+			 const char *description)
 {
 	if (!type->match)
 		return ERR_PTR(-ENOKEY);
@@ -488,15 +497,19 @@ EXPORT_SYMBOL(keyring_search);
  * search the given keyring only (no recursion)
  * - keyring must be locked by caller
  */
-struct key *__keyring_search_one(struct key *keyring,
-				 const struct key_type *ktype,
-				 const char *description,
-				 key_perm_t perm)
+key_ref_t __keyring_search_one(key_ref_t keyring_ref,
+			       const struct key_type *ktype,
+			       const char *description,
+			       key_perm_t perm)
 {
 	struct keyring_list *klist;
-	struct key *key;
+	unsigned long possessed;
+	struct key *keyring, *key;
 	int loop;
 
+	keyring = key_ref_to_ptr(keyring_ref);
+	possessed = is_key_possessed(keyring_ref);
+
 	rcu_read_lock();
 
 	klist = rcu_dereference(keyring->payload.subscriptions);
@@ -507,21 +520,21 @@ struct key *__keyring_search_one(struct key *keyring,
 			if (key->type == ktype &&
 			    (!key->type->match ||
 			     key->type->match(key, description)) &&
-			    key_permission(key, perm) &&
+			    key_permission(make_key_ref(key, possessed),
+					   perm) &&
 			    !test_bit(KEY_FLAG_REVOKED, &key->flags)
 			    )
 				goto found;
 		}
 	}
 
-	key = ERR_PTR(-ENOKEY);
-	goto error;
+	rcu_read_unlock();
+	return ERR_PTR(-ENOKEY);
 
  found:
 	atomic_inc(&key->usage);
- error:
 	rcu_read_unlock();
-	return key;
+	return make_key_ref(key, possessed);
 
 } /* end __keyring_search_one() */
 
@@ -603,7 +616,8 @@ struct key *find_keyring_by_name(const char *name, key_serial_t bound)
 			if (strcmp(keyring->description, name) != 0)
 				continue;
 
-			if (!key_permission(keyring, KEY_SEARCH))
+			if (!key_permission(make_key_ref(keyring, 0),
+					    KEY_SEARCH))
 				continue;
 
 			/* found a potential candidate, but we still need to
diff --git a/security/keys/proc.c b/security/keys/proc.c
index c55cf1fd0826..12b750e51fbf 100644
--- a/security/keys/proc.c
+++ b/security/keys/proc.c
@@ -167,7 +167,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
 #define showflag(KEY, LETTER, FLAG) \
 	(test_bit(FLAG,	&(KEY)->flags) ? LETTER : '-')
 
-	seq_printf(m, "%08x %c%c%c%c%c%c %5d %4s %06x %5d %5d %-9.9s ",
+	seq_printf(m, "%08x %c%c%c%c%c%c %5d %4s %08x %5d %5d %-9.9s ",
 		   key->serial,
 		   showflag(key, 'I', KEY_FLAG_INSTANTIATED),
 		   showflag(key, 'R', KEY_FLAG_REVOKED),
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index c089f78fb94e..d42d2158ce13 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -39,7 +39,7 @@ struct key root_user_keyring = {
 	.type		= &key_type_keyring,
 	.user		= &root_key_user,
 	.sem		= __RWSEM_INITIALIZER(root_user_keyring.sem),
-	.perm		= KEY_USR_ALL,
+	.perm		= KEY_POS_ALL | KEY_USR_ALL,
 	.flags		= 1 << KEY_FLAG_INSTANTIATED,
 	.description	= "_uid.0",
 #ifdef KEY_DEBUGGING
@@ -54,7 +54,7 @@ struct key root_session_keyring = {
 	.type		= &key_type_keyring,
 	.user		= &root_key_user,
 	.sem		= __RWSEM_INITIALIZER(root_session_keyring.sem),
-	.perm		= KEY_USR_ALL,
+	.perm		= KEY_POS_ALL | KEY_USR_ALL,
 	.flags		= 1 << KEY_FLAG_INSTANTIATED,
 	.description	= "_uid_ses.0",
 #ifdef KEY_DEBUGGING
@@ -98,7 +98,7 @@ int alloc_uid_keyring(struct user_struct *user)
 	user->session_keyring = session_keyring;
 	ret = 0;
 
- error:
+error:
 	return ret;
 
 } /* end alloc_uid_keyring() */
@@ -156,7 +156,7 @@ int install_thread_keyring(struct task_struct *tsk)
 	ret = 0;
 
 	key_put(old);
- error:
+error:
 	return ret;
 
 } /* end install_thread_keyring() */
@@ -193,7 +193,7 @@ int install_process_keyring(struct task_struct *tsk)
 	}
 
 	ret = 0;
- error:
+error:
 	return ret;
 
 } /* end install_process_keyring() */
@@ -236,7 +236,7 @@ static int install_session_keyring(struct task_struct *tsk,
 	/* we're using RCU on the pointer */
 	synchronize_rcu();
 	key_put(old);
- error:
+error:
 	return ret;
 
 } /* end install_session_keyring() */
@@ -376,13 +376,13 @@ void key_fsgid_changed(struct task_struct *tsk)
  * - we return -EAGAIN if we didn't find any matching key
  * - we return -ENOKEY if we found only negative matching keys
  */
-struct key *search_process_keyrings(struct key_type *type,
-				    const void *description,
-				    key_match_func_t match,
-				    struct task_struct *context)
+key_ref_t search_process_keyrings(struct key_type *type,
+				  const void *description,
+				  key_match_func_t match,
+				  struct task_struct *context)
 {
 	struct request_key_auth *rka;
-	struct key *key, *ret, *err, *instkey;
+	key_ref_t key_ref, ret, err, instkey_ref;
 
 	/* we want to return -EAGAIN or -ENOKEY if any of the keyrings were
 	 * searchable, but we failed to find a key or we found a negative key;
@@ -391,46 +391,48 @@ struct key *search_process_keyrings(struct key_type *type,
 	 *
 	 * in terms of priority: success > -ENOKEY > -EAGAIN > other error
 	 */
-	key = NULL;
+	key_ref = NULL;
 	ret = NULL;
 	err = ERR_PTR(-EAGAIN);
 
 	/* search the thread keyring first */
 	if (context->thread_keyring) {
-		key = keyring_search_aux(context->thread_keyring,
-					 context, type, description, match);
-		if (!IS_ERR(key))
+		key_ref = keyring_search_aux(
+			make_key_ref(context->thread_keyring, 1),
+			context, type, description, match);
+		if (!IS_ERR(key_ref))
 			goto found;
 
-		switch (PTR_ERR(key)) {
+		switch (PTR_ERR(key_ref)) {
 		case -EAGAIN: /* no key */
 			if (ret)
 				break;
 		case -ENOKEY: /* negative key */
-			ret = key;
+			ret = key_ref;
 			break;
 		default:
-			err = key;
+			err = key_ref;
 			break;
 		}
 	}
 
 	/* search the process keyring second */
 	if (context->signal->process_keyring) {
-		key = keyring_search_aux(context->signal->process_keyring,
-					 context, type, description, match);
-		if (!IS_ERR(key))
+		key_ref = keyring_search_aux(
+			make_key_ref(context->signal->process_keyring, 1),
+			context, type, description, match);
+		if (!IS_ERR(key_ref))
 			goto found;
 
-		switch (PTR_ERR(key)) {
+		switch (PTR_ERR(key_ref)) {
 		case -EAGAIN: /* no key */
 			if (ret)
 				break;
 		case -ENOKEY: /* negative key */
-			ret = key;
+			ret = key_ref;
 			break;
 		default:
-			err = key;
+			err = key_ref;
 			break;
 		}
 	}
@@ -438,23 +440,25 @@ struct key *search_process_keyrings(struct key_type *type,
 	/* search the session keyring */
 	if (context->signal->session_keyring) {
 		rcu_read_lock();
-		key = keyring_search_aux(
-			rcu_dereference(context->signal->session_keyring),
+		key_ref = keyring_search_aux(
+			make_key_ref(rcu_dereference(
+					     context->signal->session_keyring),
+				     1),
 			context, type, description, match);
 		rcu_read_unlock();
 
-		if (!IS_ERR(key))
+		if (!IS_ERR(key_ref))
 			goto found;
 
-		switch (PTR_ERR(key)) {
+		switch (PTR_ERR(key_ref)) {
 		case -EAGAIN: /* no key */
 			if (ret)
 				break;
 		case -ENOKEY: /* negative key */
-			ret = key;
+			ret = key_ref;
 			break;
 		default:
-			err = key;
+			err = key_ref;
 			break;
 		}
 
@@ -465,51 +469,54 @@ struct key *search_process_keyrings(struct key_type *type,
 			goto no_key;
 
 		rcu_read_lock();
-		instkey = __keyring_search_one(
-			rcu_dereference(context->signal->session_keyring),
+		instkey_ref = __keyring_search_one(
+			make_key_ref(rcu_dereference(
+					     context->signal->session_keyring),
+				     1),
 			&key_type_request_key_auth, NULL, 0);
 		rcu_read_unlock();
 
-		if (IS_ERR(instkey))
+		if (IS_ERR(instkey_ref))
 			goto no_key;
 
-		rka = instkey->payload.data;
+		rka = key_ref_to_ptr(instkey_ref)->payload.data;
 
-		key = search_process_keyrings(type, description, match,
-					      rka->context);
-		key_put(instkey);
+		key_ref = search_process_keyrings(type, description, match,
+						  rka->context);
+		key_ref_put(instkey_ref);
 
-		if (!IS_ERR(key))
+		if (!IS_ERR(key_ref))
 			goto found;
 
-		switch (PTR_ERR(key)) {
+		switch (PTR_ERR(key_ref)) {
 		case -EAGAIN: /* no key */
 			if (ret)
 				break;
 		case -ENOKEY: /* negative key */
-			ret = key;
+			ret = key_ref;
 			break;
 		default:
-			err = key;
+			err = key_ref;
 			break;
 		}
 	}
 	/* or search the user-session keyring */
 	else {
-		key = keyring_search_aux(context->user->session_keyring,
-					 context, type, description, match);
-		if (!IS_ERR(key))
+		key_ref = keyring_search_aux(
+			make_key_ref(context->user->session_keyring, 1),
+			context, type, description, match);
+		if (!IS_ERR(key_ref))
 			goto found;
 
-		switch (PTR_ERR(key)) {
+		switch (PTR_ERR(key_ref)) {
 		case -EAGAIN: /* no key */
 			if (ret)
 				break;
 		case -ENOKEY: /* negative key */
-			ret = key;
+			ret = key_ref;
 			break;
 		default:
-			err = key;
+			err = key_ref;
 			break;
 		}
 	}
@@ -517,29 +524,40 @@ struct key *search_process_keyrings(struct key_type *type,
 
 no_key:
 	/* no key - decide on the error we're going to go for */
-	key = ret ? ret : err;
+	key_ref = ret ? ret : err;
 
 found:
-	return key;
+	return key_ref;
 
 } /* end search_process_keyrings() */
 
+/*****************************************************************************/
+/*
+ * see if the key we're looking at is the target key
+ */
+static int lookup_user_key_possessed(const struct key *key, const void *target)
+{
+	return key == target;
+
+} /* end lookup_user_key_possessed() */
+
 /*****************************************************************************/
 /*
  * lookup a key given a key ID from userspace with a given permissions mask
  * - don't create special keyrings unless so requested
  * - partially constructed keys aren't found unless requested
  */
-struct key *lookup_user_key(struct task_struct *context, key_serial_t id,
-			    int create, int partial, key_perm_t perm)
+key_ref_t lookup_user_key(struct task_struct *context, key_serial_t id,
+			  int create, int partial, key_perm_t perm)
 {
+	key_ref_t key_ref, skey_ref;
 	struct key *key;
 	int ret;
 
 	if (!context)
 		context = current;
 
-	key = ERR_PTR(-ENOKEY);
+	key_ref = ERR_PTR(-ENOKEY);
 
 	switch (id) {
 	case KEY_SPEC_THREAD_KEYRING:
@@ -556,6 +574,7 @@ struct key *lookup_user_key(struct task_struct *context, key_serial_t id,
 
 		key = context->thread_keyring;
 		atomic_inc(&key->usage);
+		key_ref = make_key_ref(key, 1);
 		break;
 
 	case KEY_SPEC_PROCESS_KEYRING:
@@ -572,6 +591,7 @@ struct key *lookup_user_key(struct task_struct *context, key_serial_t id,
 
 		key = context->signal->process_keyring;
 		atomic_inc(&key->usage);
+		key_ref = make_key_ref(key, 1);
 		break;
 
 	case KEY_SPEC_SESSION_KEYRING:
@@ -579,7 +599,7 @@ struct key *lookup_user_key(struct task_struct *context, key_serial_t id,
 			/* always install a session keyring upon access if one
 			 * doesn't exist yet */
 			ret = install_session_keyring(
-			       context, context->user->session_keyring);
+				context, context->user->session_keyring);
 			if (ret < 0)
 				goto error;
 		}
@@ -588,16 +608,19 @@ struct key *lookup_user_key(struct task_struct *context, key_serial_t id,
 		key = rcu_dereference(context->signal->session_keyring);
 		atomic_inc(&key->usage);
 		rcu_read_unlock();
+		key_ref = make_key_ref(key, 1);
 		break;
 
 	case KEY_SPEC_USER_KEYRING:
 		key = context->user->uid_keyring;
 		atomic_inc(&key->usage);
+		key_ref = make_key_ref(key, 1);
 		break;
 
 	case KEY_SPEC_USER_SESSION_KEYRING:
 		key = context->user->session_keyring;
 		atomic_inc(&key->usage);
+		key_ref = make_key_ref(key, 1);
 		break;
 
 	case KEY_SPEC_GROUP_KEYRING:
@@ -606,13 +629,28 @@ struct key *lookup_user_key(struct task_struct *context, key_serial_t id,
 		goto error;
 
 	default:
-		key = ERR_PTR(-EINVAL);
+		key_ref = ERR_PTR(-EINVAL);
 		if (id < 1)
 			goto error;
 
 		key = key_lookup(id);
-		if (IS_ERR(key))
+		if (IS_ERR(key)) {
+			key_ref = ERR_PTR(PTR_ERR(key));
 			goto error;
+		}
+
+		key_ref = make_key_ref(key, 0);
+
+		/* check to see if we possess the key */
+		skey_ref = search_process_keyrings(key->type, key,
+						   lookup_user_key_possessed,
+						   current);
+
+		if (!IS_ERR(skey_ref)) {
+			key_put(key);
+			key_ref = skey_ref;
+		}
+
 		break;
 	}
 
@@ -630,15 +668,15 @@ struct key *lookup_user_key(struct task_struct *context, key_serial_t id,
 	/* check the permissions */
 	ret = -EACCES;
 
-	if (!key_task_permission(key, context, perm))
+	if (!key_task_permission(key_ref, context, perm))
 		goto invalid_key;
 
- error:
-	return key;
+error:
+	return key_ref;
 
- invalid_key:
-	key_put(key);
-	key = ERR_PTR(ret);
+invalid_key:
+	key_ref_put(key_ref);
+	key_ref = ERR_PTR(ret);
 	goto error;
 
 } /* end lookup_user_key() */
@@ -694,9 +732,9 @@ long join_session_keyring(const char *name)
 	ret = keyring->serial;
 	key_put(keyring);
 
- error2:
+error2:
 	up(&key_session_sem);
- error:
+error:
 	return ret;
 
 } /* end join_session_keyring() */
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 90c1506d007c..e6dd366d43a3 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -129,7 +129,7 @@ static struct key *__request_key_construction(struct key_type *type,
 
 	/* create a key and add it to the queue */
 	key = key_alloc(type, description,
-			current->fsuid, current->fsgid, KEY_USR_ALL, 0);
+			current->fsuid, current->fsgid, KEY_POS_ALL, 0);
 	if (IS_ERR(key))
 		goto alloc_failed;
 
@@ -365,14 +365,24 @@ struct key *request_key_and_link(struct key_type *type,
 {
 	struct key_user *user;
 	struct key *key;
+	key_ref_t key_ref;
 
 	kenter("%s,%s,%s,%p",
 	       type->name, description, callout_info, dest_keyring);
 
 	/* search all the process keyrings for a key */
-	key = search_process_keyrings(type, description, type->match, current);
+	key_ref = search_process_keyrings(type, description, type->match,
+					  current);
 
-	if (PTR_ERR(key) == -EAGAIN) {
+	kdebug("search 1: %p", key_ref);
+
+	if (!IS_ERR(key_ref)) {
+		key = key_ref_to_ptr(key_ref);
+	}
+	else if (PTR_ERR(key_ref) != -EAGAIN) {
+		key = ERR_PTR(PTR_ERR(key_ref));
+	}
+	else  {
 		/* the search failed, but the keyrings were searchable, so we
 		 * should consult userspace if we can */
 		key = ERR_PTR(-ENOKEY);
@@ -384,7 +394,7 @@ struct key *request_key_and_link(struct key_type *type,
 		if (!user)
 			goto nomem;
 
-		do {
+		for (;;) {
 			if (signal_pending(current))
 				goto interrupted;
 
@@ -397,10 +407,22 @@ struct key *request_key_and_link(struct key_type *type,
 
 			/* someone else made the key we want, so we need to
 			 * search again as it might now be available to us */
-			key = search_process_keyrings(type, description,
-						      type->match, current);
+			key_ref = search_process_keyrings(type, description,
+							  type->match,
+							  current);
+
+			kdebug("search 2: %p", key_ref);
 
-		} while (PTR_ERR(key) == -EAGAIN);
+			if (!IS_ERR(key_ref)) {
+				key = key_ref_to_ptr(key_ref);
+				break;
+			}
+
+			if (PTR_ERR(key_ref) != -EAGAIN) {
+				key = ERR_PTR(PTR_ERR(key_ref));
+				break;
+			}
+		}
 
 		key_user_put(user);
 
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index f22264632229..1ecd3d3fa9f8 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -126,7 +126,7 @@ struct key *request_key_auth_new(struct key *target, struct key **_rkakey)
 
 	rkakey = key_alloc(&key_type_request_key_auth, desc,
 			   current->fsuid, current->fsgid,
-			   KEY_USR_VIEW, 1);
+			   KEY_POS_VIEW | KEY_USR_VIEW, 1);
 	if (IS_ERR(rkakey)) {
 		key_put(keyring);
 		kleave("= %ld", PTR_ERR(rkakey));
-- 
cgit v1.2.3


From 8cf14af0a740fb7e9f94a203b5a989beb875d58f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Wed, 28 Sep 2005 20:21:11 -0700
Subject: [SPARC64]: Convert to use generic exception table support.

The funny "range" exception table entries we had were only
used by the compat layer socketcall assembly, and it wasn't
even needed there.

For free we now get proper exception table sorting and fast
binary searching.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/sys32.S     | 145 ++++++++++++++++++++++------------------
 arch/sparc64/kernel/traps.c     |  24 +++----
 arch/sparc64/kernel/unaligned.c |   9 ++-
 arch/sparc64/mm/Makefile        |   2 +-
 arch/sparc64/mm/extable.c       |  80 ----------------------
 arch/sparc64/mm/fault.c         |  10 +--
 include/asm-sparc64/uaccess.h   |  17 +----
 7 files changed, 101 insertions(+), 186 deletions(-)
 delete mode 100644 arch/sparc64/mm/extable.c

(limited to 'include')

diff --git a/arch/sparc64/kernel/sys32.S b/arch/sparc64/kernel/sys32.S
index 5f9e4fae612e..4fb99e0bc7c3 100644
--- a/arch/sparc64/kernel/sys32.S
+++ b/arch/sparc64/kernel/sys32.S
@@ -158,163 +158,163 @@ sys32_socketcall:	/* %o0=call, %o1=args */
 	jmpl		%g2 + %o0, %g0
 	 nop
 
-	/* Each entry is exactly 32 bytes. */
 	.align		32
 __socketcall_table_begin:
+
+	/* Each entry is exactly 32 bytes. */
 do_sys_socket: /* sys_socket(int, int, int) */
-	ldswa		[%o1 + 0x0] %asi, %o0
+1:	ldswa		[%o1 + 0x0] %asi, %o0
 	sethi		%hi(sys_socket), %g1
-	ldswa		[%o1 + 0x8] %asi, %o2
+2:	ldswa		[%o1 + 0x8] %asi, %o2
 	jmpl		%g1 + %lo(sys_socket), %g0
-	 ldswa		[%o1 + 0x4] %asi, %o1
+3:	 ldswa		[%o1 + 0x4] %asi, %o1
 	nop
 	nop
 	nop
 do_sys_bind: /* sys_bind(int fd, struct sockaddr *, int) */
-	ldswa		[%o1 + 0x0] %asi, %o0
+4:	ldswa		[%o1 + 0x0] %asi, %o0
 	sethi		%hi(sys_bind), %g1
-	ldswa		[%o1 + 0x8] %asi, %o2
+5:	ldswa		[%o1 + 0x8] %asi, %o2
 	jmpl		%g1 + %lo(sys_bind), %g0
-	 lduwa		[%o1 + 0x4] %asi, %o1
+6:	 lduwa		[%o1 + 0x4] %asi, %o1
 	nop
 	nop
 	nop
 do_sys_connect: /* sys_connect(int, struct sockaddr *, int) */
-	ldswa		[%o1 + 0x0] %asi, %o0
+7:	ldswa		[%o1 + 0x0] %asi, %o0
 	sethi		%hi(sys_connect), %g1
-	ldswa		[%o1 + 0x8] %asi, %o2
+8:	ldswa		[%o1 + 0x8] %asi, %o2
 	jmpl		%g1 + %lo(sys_connect), %g0
-	 lduwa		[%o1 + 0x4] %asi, %o1
+9:	 lduwa		[%o1 + 0x4] %asi, %o1
 	nop
 	nop
 	nop
 do_sys_listen: /* sys_listen(int, int) */
-	ldswa		[%o1 + 0x0] %asi, %o0
+10:	ldswa		[%o1 + 0x0] %asi, %o0
 	sethi		%hi(sys_listen), %g1
 	jmpl		%g1 + %lo(sys_listen), %g0
-	 ldswa		[%o1 + 0x4] %asi, %o1
+11:	 ldswa		[%o1 + 0x4] %asi, %o1
 	nop
 	nop
 	nop
 	nop
 do_sys_accept: /* sys_accept(int, struct sockaddr *, int *) */
-	ldswa		[%o1 + 0x0] %asi, %o0
+12:	ldswa		[%o1 + 0x0] %asi, %o0
 	sethi		%hi(sys_accept), %g1
-	lduwa		[%o1 + 0x8] %asi, %o2
+13:	lduwa		[%o1 + 0x8] %asi, %o2
 	jmpl		%g1 + %lo(sys_accept), %g0
-	 lduwa		[%o1 + 0x4] %asi, %o1
+14:	 lduwa		[%o1 + 0x4] %asi, %o1
 	nop
 	nop
 	nop
 do_sys_getsockname: /* sys_getsockname(int, struct sockaddr *, int *) */
-	ldswa		[%o1 + 0x0] %asi, %o0
+15:	ldswa		[%o1 + 0x0] %asi, %o0
 	sethi		%hi(sys_getsockname), %g1
-	lduwa		[%o1 + 0x8] %asi, %o2
+16:	lduwa		[%o1 + 0x8] %asi, %o2
 	jmpl		%g1 + %lo(sys_getsockname), %g0
-	 lduwa		[%o1 + 0x4] %asi, %o1
+17:	 lduwa		[%o1 + 0x4] %asi, %o1
 	nop
 	nop
 	nop
 do_sys_getpeername: /* sys_getpeername(int, struct sockaddr *, int *) */
-	ldswa		[%o1 + 0x0] %asi, %o0
+18:	ldswa		[%o1 + 0x0] %asi, %o0
 	sethi		%hi(sys_getpeername), %g1
-	lduwa		[%o1 + 0x8] %asi, %o2
+19:	lduwa		[%o1 + 0x8] %asi, %o2
 	jmpl		%g1 + %lo(sys_getpeername), %g0
-	 lduwa		[%o1 + 0x4] %asi, %o1
+20:	 lduwa		[%o1 + 0x4] %asi, %o1
 	nop
 	nop
 	nop
 do_sys_socketpair: /* sys_socketpair(int, int, int, int *) */
-	ldswa		[%o1 + 0x0] %asi, %o0
+21:	ldswa		[%o1 + 0x0] %asi, %o0
 	sethi		%hi(sys_socketpair), %g1
-	ldswa		[%o1 + 0x8] %asi, %o2
-	lduwa		[%o1 + 0xc] %asi, %o3
+22:	ldswa		[%o1 + 0x8] %asi, %o2
+23:	lduwa		[%o1 + 0xc] %asi, %o3
 	jmpl		%g1 + %lo(sys_socketpair), %g0
-	 ldswa		[%o1 + 0x4] %asi, %o1
+24:	 ldswa		[%o1 + 0x4] %asi, %o1
 	nop
 	nop
 do_sys_send: /* sys_send(int, void *, size_t, unsigned int) */
-	ldswa		[%o1 + 0x0] %asi, %o0
+25:	ldswa		[%o1 + 0x0] %asi, %o0
 	sethi		%hi(sys_send), %g1
-	lduwa		[%o1 + 0x8] %asi, %o2
-	lduwa		[%o1 + 0xc] %asi, %o3
+26:	lduwa		[%o1 + 0x8] %asi, %o2
+27:	lduwa		[%o1 + 0xc] %asi, %o3
 	jmpl		%g1 + %lo(sys_send), %g0
-	 lduwa		[%o1 + 0x4] %asi, %o1
+28:	 lduwa		[%o1 + 0x4] %asi, %o1
 	nop
 	nop
 do_sys_recv: /* sys_recv(int, void *, size_t, unsigned int) */
-	ldswa		[%o1 + 0x0] %asi, %o0
+29:	ldswa		[%o1 + 0x0] %asi, %o0
 	sethi		%hi(sys_recv), %g1
-	lduwa		[%o1 + 0x8] %asi, %o2
-	lduwa		[%o1 + 0xc] %asi, %o3
+30:	lduwa		[%o1 + 0x8] %asi, %o2
+31:	lduwa		[%o1 + 0xc] %asi, %o3
 	jmpl		%g1 + %lo(sys_recv), %g0
-	 lduwa		[%o1 + 0x4] %asi, %o1
+32:	 lduwa		[%o1 + 0x4] %asi, %o1
 	nop
 	nop
 do_sys_sendto: /* sys_sendto(int, u32, compat_size_t, unsigned int, u32, int) */
-	ldswa		[%o1 + 0x0] %asi, %o0
+33:	ldswa		[%o1 + 0x0] %asi, %o0
 	sethi		%hi(sys_sendto), %g1
-	lduwa		[%o1 + 0x8] %asi, %o2
-	lduwa		[%o1 + 0xc] %asi, %o3
-	lduwa		[%o1 + 0x10] %asi, %o4
-	ldswa		[%o1 + 0x14] %asi, %o5
+34:	lduwa		[%o1 + 0x8] %asi, %o2
+35:	lduwa		[%o1 + 0xc] %asi, %o3
+36:	lduwa		[%o1 + 0x10] %asi, %o4
+37:	ldswa		[%o1 + 0x14] %asi, %o5
 	jmpl		%g1 + %lo(sys_sendto), %g0
-	 lduwa		[%o1 + 0x4] %asi, %o1
+38:	 lduwa		[%o1 + 0x4] %asi, %o1
 do_sys_recvfrom: /* sys_recvfrom(int, u32, compat_size_t, unsigned int, u32, u32) */
-	ldswa		[%o1 + 0x0] %asi, %o0
+39:	ldswa		[%o1 + 0x0] %asi, %o0
 	sethi		%hi(sys_recvfrom), %g1
-	lduwa		[%o1 + 0x8] %asi, %o2
-	lduwa		[%o1 + 0xc] %asi, %o3
-	lduwa		[%o1 + 0x10] %asi, %o4
-	lduwa		[%o1 + 0x14] %asi, %o5
+40:	lduwa		[%o1 + 0x8] %asi, %o2
+41:	lduwa		[%o1 + 0xc] %asi, %o3
+42:	lduwa		[%o1 + 0x10] %asi, %o4
+43:	lduwa		[%o1 + 0x14] %asi, %o5
 	jmpl		%g1 + %lo(sys_recvfrom), %g0
-	 lduwa		[%o1 + 0x4] %asi, %o1
+44:	 lduwa		[%o1 + 0x4] %asi, %o1
 do_sys_shutdown: /* sys_shutdown(int, int) */
-	ldswa		[%o1 + 0x0] %asi, %o0
+45:	ldswa		[%o1 + 0x0] %asi, %o0
 	sethi		%hi(sys_shutdown), %g1
 	jmpl		%g1 + %lo(sys_shutdown), %g0
-	 ldswa		[%o1 + 0x4] %asi, %o1
+46:	 ldswa		[%o1 + 0x4] %asi, %o1
 	nop
 	nop
 	nop
 	nop
 do_sys_setsockopt: /* compat_sys_setsockopt(int, int, int, char *, int) */
-	ldswa		[%o1 + 0x0] %asi, %o0
+47:	ldswa		[%o1 + 0x0] %asi, %o0
 	sethi		%hi(compat_sys_setsockopt), %g1
-	ldswa		[%o1 + 0x8] %asi, %o2
-	lduwa		[%o1 + 0xc] %asi, %o3
-	ldswa		[%o1 + 0x10] %asi, %o4
+48:	ldswa		[%o1 + 0x8] %asi, %o2
+49:	lduwa		[%o1 + 0xc] %asi, %o3
+50:	ldswa		[%o1 + 0x10] %asi, %o4
 	jmpl		%g1 + %lo(compat_sys_setsockopt), %g0
-	 ldswa		[%o1 + 0x4] %asi, %o1
+51:	 ldswa		[%o1 + 0x4] %asi, %o1
 	nop
 do_sys_getsockopt: /* compat_sys_getsockopt(int, int, int, u32, u32) */
-	ldswa		[%o1 + 0x0] %asi, %o0
+52:	ldswa		[%o1 + 0x0] %asi, %o0
 	sethi		%hi(compat_sys_getsockopt), %g1
-	ldswa		[%o1 + 0x8] %asi, %o2
-	lduwa		[%o1 + 0xc] %asi, %o3
-	lduwa		[%o1 + 0x10] %asi, %o4
+53:	ldswa		[%o1 + 0x8] %asi, %o2
+54:	lduwa		[%o1 + 0xc] %asi, %o3
+55:	lduwa		[%o1 + 0x10] %asi, %o4
 	jmpl		%g1 + %lo(compat_sys_getsockopt), %g0
-	 ldswa		[%o1 + 0x4] %asi, %o1
+56:	 ldswa		[%o1 + 0x4] %asi, %o1
 	nop
 do_sys_sendmsg: /* compat_sys_sendmsg(int, struct compat_msghdr *, unsigned int) */
-	ldswa		[%o1 + 0x0] %asi, %o0
+57:	ldswa		[%o1 + 0x0] %asi, %o0
 	sethi		%hi(compat_sys_sendmsg), %g1
-	lduwa		[%o1 + 0x8] %asi, %o2
+58:	lduwa		[%o1 + 0x8] %asi, %o2
 	jmpl		%g1 + %lo(compat_sys_sendmsg), %g0
-	 lduwa		[%o1 + 0x4] %asi, %o1
+59:	 lduwa		[%o1 + 0x4] %asi, %o1
 	nop
 	nop
 	nop
 do_sys_recvmsg: /* compat_sys_recvmsg(int, struct compat_msghdr *, unsigned int) */
-	ldswa		[%o1 + 0x0] %asi, %o0
+60:	ldswa		[%o1 + 0x0] %asi, %o0
 	sethi		%hi(compat_sys_recvmsg), %g1
-	lduwa		[%o1 + 0x8] %asi, %o2
+61:	lduwa		[%o1 + 0x8] %asi, %o2
 	jmpl		%g1 + %lo(compat_sys_recvmsg), %g0
-	 lduwa		[%o1 + 0x4] %asi, %o1
+62:	 lduwa		[%o1 + 0x4] %asi, %o1
 	nop
 	nop
 	nop
-__socketcall_table_end:
 
 do_einval:
 	retl
@@ -325,5 +325,20 @@ do_efault:
 
 	.section	__ex_table
 	.align		4
-	.word		__socketcall_table_begin, 0, __socketcall_table_end, do_efault
+	.word	1b, do_efault, 2b, do_efault, 3b, do_efault, 4b, do_efault
+	.word	5b, do_efault, 6b, do_efault, 7b, do_efault, 8b, do_efault
+	.word	9b, do_efault, 10b, do_efault, 11b, do_efault, 12b, do_efault
+	.word	13b, do_efault, 14b, do_efault, 15b, do_efault, 16b, do_efault
+	.word	17b, do_efault, 18b, do_efault, 19b, do_efault, 20b, do_efault
+	.word	21b, do_efault, 22b, do_efault, 23b, do_efault, 24b, do_efault
+	.word	25b, do_efault, 26b, do_efault, 27b, do_efault, 28b, do_efault
+	.word	29b, do_efault, 30b, do_efault, 31b, do_efault, 32b, do_efault
+	.word	33b, do_efault, 34b, do_efault, 35b, do_efault, 36b, do_efault
+	.word	37b, do_efault, 38b, do_efault, 39b, do_efault, 40b, do_efault
+	.word	41b, do_efault, 42b, do_efault, 43b, do_efault, 44b, do_efault
+	.word	45b, do_efault, 46b, do_efault, 47b, do_efault, 48b, do_efault
+	.word	49b, do_efault, 50b, do_efault, 51b, do_efault, 52b, do_efault
+	.word	53b, do_efault, 54b, do_efault, 55b, do_efault, 56b, do_efault
+	.word	57b, do_efault, 58b, do_efault, 59b, do_efault, 60b, do_efault
+	.word	61b, do_efault, 62b, do_efault
 	.previous
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index f8e7005fede9..1aa15990f5af 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -189,19 +189,18 @@ void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, un
 
 	if (regs->tstate & TSTATE_PRIV) {
 		/* Test if this comes from uaccess places. */
-		unsigned long fixup;
-		unsigned long g2 = regs->u_regs[UREG_G2];
+		const struct exception_table_entry *entry;
 
-		if ((fixup = search_extables_range(regs->tpc, &g2))) {
-			/* Ouch, somebody is trying ugly VM hole tricks on us... */
+		entry = search_exception_tables(regs->tpc);
+		if (entry) {
+			/* Ouch, somebody is trying VM hole tricks on us... */
 #ifdef DEBUG_EXCEPTIONS
 			printk("Exception: PC<%016lx> faddr<UNKNOWN>\n", regs->tpc);
-			printk("EX_TABLE: insn<%016lx> fixup<%016lx> "
-			       "g2<%016lx>\n", regs->tpc, fixup, g2);
+			printk("EX_TABLE: insn<%016lx> fixup<%016lx>\n",
+			       regs->tpc, entry->fixup);
 #endif
-			regs->tpc = fixup;
+			regs->tpc = entry->fixup;
 			regs->tnpc = regs->tpc + 4;
-			regs->u_regs[UREG_G2] = g2;
 			return;
 		}
 		/* Shit... */
@@ -1610,10 +1609,10 @@ void cheetah_deferred_handler(struct pt_regs *regs, unsigned long afsr, unsigned
 			/* OK, usermode access. */
 			recoverable = 1;
 		} else {
-			unsigned long g2 = regs->u_regs[UREG_G2];
-			unsigned long fixup = search_extables_range(regs->tpc, &g2);
+			const struct exception_table_entry *entry;
 
-			if (fixup != 0UL) {
+			entry = search_exception_tables(regs->tpc);
+			if (entry) {
 				/* OK, kernel access to userspace. */
 				recoverable = 1;
 
@@ -1632,9 +1631,8 @@ void cheetah_deferred_handler(struct pt_regs *regs, unsigned long afsr, unsigned
 				 * recoverable condition.
 				 */
 				if (recoverable) {
-					regs->tpc = fixup;
+					regs->tpc = entry->fixup;
 					regs->tnpc = regs->tpc + 4;
-					regs->u_regs[UREG_G2] = g2;
 				}
 			}
 		}
diff --git a/arch/sparc64/kernel/unaligned.c b/arch/sparc64/kernel/unaligned.c
index 02af08ffec8f..9d6be20f4125 100644
--- a/arch/sparc64/kernel/unaligned.c
+++ b/arch/sparc64/kernel/unaligned.c
@@ -246,10 +246,10 @@ void kernel_mna_trap_fault(void)
 {
 	struct pt_regs *regs = current_thread_info()->kern_una_regs;
 	unsigned int insn = current_thread_info()->kern_una_insn;
-	unsigned long g2 = regs->u_regs[UREG_G2];
-	unsigned long fixup = search_extables_range(regs->tpc, &g2);
+	const struct exception_table_entry *entry;
 
-	if (!fixup) {
+	entry = search_exception_tables(regs->tpc);
+	if (!entry) {
 		unsigned long address;
 
 		address = compute_effective_address(regs, insn,
@@ -270,9 +270,8 @@ void kernel_mna_trap_fault(void)
 	        die_if_kernel("Oops", regs);
 		/* Not reached */
 	}
-	regs->tpc = fixup;
+	regs->tpc = entry->fixup;
 	regs->tnpc = regs->tpc + 4;
-	regs->u_regs [UREG_G2] = g2;
 
 	regs->tstate &= ~TSTATE_ASI;
 	regs->tstate |= (ASI_AIUS << 24UL);
diff --git a/arch/sparc64/mm/Makefile b/arch/sparc64/mm/Makefile
index cda87333a77b..9d0960e69f48 100644
--- a/arch/sparc64/mm/Makefile
+++ b/arch/sparc64/mm/Makefile
@@ -5,6 +5,6 @@
 EXTRA_AFLAGS := -ansi
 EXTRA_CFLAGS := -Werror
 
-obj-y    := ultra.o tlb.o fault.o init.o generic.o extable.o
+obj-y    := ultra.o tlb.o fault.o init.o generic.o
 
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/sparc64/mm/extable.c b/arch/sparc64/mm/extable.c
deleted file mode 100644
index ec334297ff4f..000000000000
--- a/arch/sparc64/mm/extable.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * linux/arch/sparc64/mm/extable.c
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <asm/uaccess.h>
-
-extern const struct exception_table_entry __start___ex_table[];
-extern const struct exception_table_entry __stop___ex_table[];
-
-void sort_extable(struct exception_table_entry *start,
-		  struct exception_table_entry *finish)
-{
-}
-
-/* Caller knows they are in a range if ret->fixup == 0 */
-const struct exception_table_entry *
-search_extable(const struct exception_table_entry *start,
-	       const struct exception_table_entry *last,
-	       unsigned long value)
-{
-	const struct exception_table_entry *walk;
-
-	/* Single insn entries are encoded as:
-	 *	word 1:	insn address
-	 *	word 2:	fixup code address
-	 *
-	 * Range entries are encoded as:
-	 *	word 1: first insn address
-	 *	word 2: 0
-	 *	word 3: last insn address + 4 bytes
-	 *	word 4: fixup code address
-	 *
-	 * See asm/uaccess.h for more details.
-	 */
-
-	/* 1. Try to find an exact match. */
-	for (walk = start; walk <= last; walk++) {
-		if (walk->fixup == 0) {
-			/* A range entry, skip both parts. */
-			walk++;
-			continue;
-		}
-
-		if (walk->insn == value)
-			return walk;
-	}
-
-	/* 2. Try to find a range match. */
-	for (walk = start; walk <= (last - 1); walk++) {
-		if (walk->fixup)
-			continue;
-
-		if (walk[0].insn <= value && walk[1].insn > value)
-			return walk;
-
-		walk++;
-	}
-
-        return NULL;
-}
-
-/* Special extable search, which handles ranges.  Returns fixup */
-unsigned long search_extables_range(unsigned long addr, unsigned long *g2)
-{
-	const struct exception_table_entry *entry;
-
-	entry = search_exception_tables(addr);
-	if (!entry)
-		return 0;
-
-	/* Inside range?  Fix g2 and return correct fixup */
-	if (!entry->fixup) {
-		*g2 = (addr - entry->insn) / 4;
-		return (entry + 1)->fixup;
-	}
-
-	return entry->fixup;
-}
diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c
index db1e3310e907..59dc9a2ece5a 100644
--- a/arch/sparc64/mm/fault.c
+++ b/arch/sparc64/mm/fault.c
@@ -242,7 +242,6 @@ static unsigned int get_fault_insn(struct pt_regs *regs, unsigned int insn)
 static void do_kernel_fault(struct pt_regs *regs, int si_code, int fault_code,
 			    unsigned int insn, unsigned long address)
 {
-	unsigned long g2;
 	unsigned char asi = ASI_P;
  
 	if ((!insn) && (regs->tstate & TSTATE_PRIV))
@@ -273,11 +272,9 @@ static void do_kernel_fault(struct pt_regs *regs, int si_code, int fault_code,
 		}
 	}
 		
-	g2 = regs->u_regs[UREG_G2];
-
 	/* Is this in ex_table? */
 	if (regs->tstate & TSTATE_PRIV) {
-		unsigned long fixup;
+		const struct exception_table_entry *entry;
 
 		if (asi == ASI_P && (insn & 0xc0800000) == 0xc0800000) {
 			if (insn & 0x2000)
@@ -288,10 +285,9 @@ static void do_kernel_fault(struct pt_regs *regs, int si_code, int fault_code,
 	
 		/* Look in asi.h: All _S asis have LS bit set */
 		if ((asi & 0x1) &&
-		    (fixup = search_extables_range(regs->tpc, &g2))) {
-			regs->tpc = fixup;
+		    (entry = search_exception_tables(regs->tpc))) {
+			regs->tpc = entry->fixup;
 			regs->tnpc = regs->tpc + 4;
-			regs->u_regs[UREG_G2] = g2;
 			return;
 		}
 	} else {
diff --git a/include/asm-sparc64/uaccess.h b/include/asm-sparc64/uaccess.h
index 80a65d7e3dbf..c099aa339784 100644
--- a/include/asm-sparc64/uaccess.h
+++ b/include/asm-sparc64/uaccess.h
@@ -70,25 +70,12 @@ static inline int access_ok(int type, const void __user * addr, unsigned long si
  * with the main instruction path.  This means when everything is well,
  * we don't even have to jump over them.  Further, they do not intrude
  * on our cache or tlb entries.
- *
- * There is a special way how to put a range of potentially faulting
- * insns (like twenty ldd/std's with now intervening other instructions)
- * You specify address of first in insn and 0 in fixup and in the next
- * exception_table_entry you specify last potentially faulting insn + 1
- * and in fixup the routine which should handle the fault.
- * That fixup code will get
- * (faulting_insn_address - first_insn_in_the_range_address)/4
- * in %g2 (ie. index of the faulting instruction in the range).
  */
 
-struct exception_table_entry
-{
-        unsigned insn, fixup;
+struct exception_table_entry {
+        unsigned int insn, fixup;
 };
 
-/* Special exable search, which handles ranges.  Returns fixup */
-unsigned long search_extables_range(unsigned long addr, unsigned long *g2);
-
 extern void __ret_efault(void);
 
 /* Uh, these should become the main single-value transfer routines..
-- 
cgit v1.2.3


From 5fd29752f09cabff582f65c0ce35518db4c64937 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Wed, 28 Sep 2005 20:41:45 -0700
Subject: [SPARC64]: Fix fault handling in unaligned trap handler.

We were not calling kernel_mna_trap_fault() correctly.
Instead of being fancy, just return 0 vs. -EFAULT from
the assembler stubs, and handle that return value as
appropriate.

Create an "__retl_efault" stub for assembler exception
table entries and use it where possible.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/head.S           |  7 ++--
 arch/sparc64/kernel/sys32.S          | 57 ++++++++++++++++++-------------
 arch/sparc64/kernel/una_asm.S        | 65 ++++++++++++++++--------------------
 arch/sparc64/kernel/unaligned.c      | 36 +++++++++++---------
 arch/sparc64/lib/strncpy_from_user.S | 16 ++++-----
 include/asm-sparc64/uaccess.h        |  1 +
 6 files changed, 95 insertions(+), 87 deletions(-)

(limited to 'include')

diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
index ecc748fb9ad7..89406f9649a9 100644
--- a/arch/sparc64/kernel/head.S
+++ b/arch/sparc64/kernel/head.S
@@ -540,8 +540,11 @@ bootup_user_stack_end:
 prom_tba:	.xword	0
 tlb_type:	.word	0	/* Must NOT end up in BSS */
 	.section	".fixup",#alloc,#execinstr
-	.globl	__ret_efault
+
+	.globl	__ret_efault, __retl_efault
 __ret_efault:
 	ret
 	 restore %g0, -EFAULT, %o0
-
+__retl_efault:
+	retl
+	 mov	-EFAULT, %o0
diff --git a/arch/sparc64/kernel/sys32.S b/arch/sparc64/kernel/sys32.S
index 4fb99e0bc7c3..9cd272ac3ac1 100644
--- a/arch/sparc64/kernel/sys32.S
+++ b/arch/sparc64/kernel/sys32.S
@@ -157,6 +157,9 @@ sys32_socketcall:	/* %o0=call, %o1=args */
 	or		%g2, %lo(__socketcall_table_begin), %g2
 	jmpl		%g2 + %o0, %g0
 	 nop
+do_einval:
+	retl
+	 mov		-EINVAL, %o0
 
 	.align		32
 __socketcall_table_begin:
@@ -316,29 +319,37 @@ do_sys_recvmsg: /* compat_sys_recvmsg(int, struct compat_msghdr *, unsigned int)
 	nop
 	nop
 
-do_einval:
-	retl
-	 mov		-EINVAL, %o0
-do_efault:
-	retl
-	 mov		-EFAULT, %o0
-
 	.section	__ex_table
 	.align		4
-	.word	1b, do_efault, 2b, do_efault, 3b, do_efault, 4b, do_efault
-	.word	5b, do_efault, 6b, do_efault, 7b, do_efault, 8b, do_efault
-	.word	9b, do_efault, 10b, do_efault, 11b, do_efault, 12b, do_efault
-	.word	13b, do_efault, 14b, do_efault, 15b, do_efault, 16b, do_efault
-	.word	17b, do_efault, 18b, do_efault, 19b, do_efault, 20b, do_efault
-	.word	21b, do_efault, 22b, do_efault, 23b, do_efault, 24b, do_efault
-	.word	25b, do_efault, 26b, do_efault, 27b, do_efault, 28b, do_efault
-	.word	29b, do_efault, 30b, do_efault, 31b, do_efault, 32b, do_efault
-	.word	33b, do_efault, 34b, do_efault, 35b, do_efault, 36b, do_efault
-	.word	37b, do_efault, 38b, do_efault, 39b, do_efault, 40b, do_efault
-	.word	41b, do_efault, 42b, do_efault, 43b, do_efault, 44b, do_efault
-	.word	45b, do_efault, 46b, do_efault, 47b, do_efault, 48b, do_efault
-	.word	49b, do_efault, 50b, do_efault, 51b, do_efault, 52b, do_efault
-	.word	53b, do_efault, 54b, do_efault, 55b, do_efault, 56b, do_efault
-	.word	57b, do_efault, 58b, do_efault, 59b, do_efault, 60b, do_efault
-	.word	61b, do_efault, 62b, do_efault
+	.word		1b, __retl_efault, 2b, __retl_efault
+	.word		3b, __retl_efault, 4b, __retl_efault
+	.word		5b, __retl_efault, 6b, __retl_efault
+	.word		7b, __retl_efault, 8b, __retl_efault
+	.word		9b, __retl_efault, 10b, __retl_efault
+	.word		11b, __retl_efault, 12b, __retl_efault
+	.word		13b, __retl_efault, 14b, __retl_efault
+	.word		15b, __retl_efault, 16b, __retl_efault
+	.word		17b, __retl_efault, 18b, __retl_efault
+	.word		19b, __retl_efault, 20b, __retl_efault
+	.word		21b, __retl_efault, 22b, __retl_efault
+	.word		23b, __retl_efault, 24b, __retl_efault
+	.word		25b, __retl_efault, 26b, __retl_efault
+	.word		27b, __retl_efault, 28b, __retl_efault
+	.word		29b, __retl_efault, 30b, __retl_efault
+	.word		31b, __retl_efault, 32b, __retl_efault
+	.word		33b, __retl_efault, 34b, __retl_efault
+	.word		35b, __retl_efault, 36b, __retl_efault
+	.word		37b, __retl_efault, 38b, __retl_efault
+	.word		39b, __retl_efault, 40b, __retl_efault
+	.word		41b, __retl_efault, 42b, __retl_efault
+	.word		43b, __retl_efault, 44b, __retl_efault
+	.word		45b, __retl_efault, 46b, __retl_efault
+	.word		47b, __retl_efault, 48b, __retl_efault
+	.word		49b, __retl_efault, 50b, __retl_efault
+	.word		51b, __retl_efault, 52b, __retl_efault
+	.word		53b, __retl_efault, 54b, __retl_efault
+	.word		55b, __retl_efault, 56b, __retl_efault
+	.word		57b, __retl_efault, 58b, __retl_efault
+	.word		59b, __retl_efault, 60b, __retl_efault
+	.word		61b, __retl_efault, 62b, __retl_efault
 	.previous
diff --git a/arch/sparc64/kernel/una_asm.S b/arch/sparc64/kernel/una_asm.S
index da48400bcc95..1f5b5b708ce7 100644
--- a/arch/sparc64/kernel/una_asm.S
+++ b/arch/sparc64/kernel/una_asm.S
@@ -6,13 +6,6 @@
 
 	.text
 
-kernel_unaligned_trap_fault:
-	call	kernel_mna_trap_fault
-	 nop
-	retl
-	 nop
-	.size	kern_unaligned_trap_fault, .-kern_unaligned_trap_fault
-
 	.globl	__do_int_store
 __do_int_store:
 	rd	%asi, %o4
@@ -51,24 +44,24 @@ __do_int_store:
 0:
 	wr	%o4, 0x0, %asi
 	retl
-	 nop
+	 mov	0, %o0
 	.size	__do_int_store, .-__do_int_store
 
 	.section	__ex_table
-	.word		4b, kernel_unaligned_trap_fault
-	.word		5b, kernel_unaligned_trap_fault
-	.word		6b, kernel_unaligned_trap_fault
-	.word		7b, kernel_unaligned_trap_fault
-	.word		8b, kernel_unaligned_trap_fault
-	.word		9b, kernel_unaligned_trap_fault
-	.word		10b, kernel_unaligned_trap_fault
-	.word		11b, kernel_unaligned_trap_fault
-	.word		12b, kernel_unaligned_trap_fault
-	.word		13b, kernel_unaligned_trap_fault
-	.word		14b, kernel_unaligned_trap_fault
-	.word		15b, kernel_unaligned_trap_fault
-	.word		16b, kernel_unaligned_trap_fault
-	.word		17b, kernel_unaligned_trap_fault
+	.word		4b, __retl_efault
+	.word		5b, __retl_efault
+	.word		6b, __retl_efault
+	.word		7b, __retl_efault
+	.word		8b, __retl_efault
+	.word		9b, __retl_efault
+	.word		10b, __retl_efault
+	.word		11b, __retl_efault
+	.word		12b, __retl_efault
+	.word		13b, __retl_efault
+	.word		14b, __retl_efault
+	.word		15b, __retl_efault
+	.word		16b, __retl_efault
+	.word		17b, __retl_efault
 	.previous
 
 	.globl	do_int_load
@@ -133,21 +126,21 @@ do_int_load:
 0:
 	wr	%o5, 0x0, %asi
 	retl
-	 nop
+	 mov	0, %o0
 	.size	__do_int_load, .-__do_int_load
 
 	.section	__ex_table
-	.word		4b, kernel_unaligned_trap_fault
-	.word		5b, kernel_unaligned_trap_fault
-	.word		6b, kernel_unaligned_trap_fault
-	.word		7b, kernel_unaligned_trap_fault
-	.word		8b, kernel_unaligned_trap_fault
-	.word		9b, kernel_unaligned_trap_fault
-	.word		10b, kernel_unaligned_trap_fault
-	.word		11b, kernel_unaligned_trap_fault
-	.word		12b, kernel_unaligned_trap_fault
-	.word		13b, kernel_unaligned_trap_fault
-	.word		14b, kernel_unaligned_trap_fault
-	.word		15b, kernel_unaligned_trap_fault
-	.word		16b, kernel_unaligned_trap_fault
+	.word		4b, __retl_efault
+	.word		5b, __retl_efault
+	.word		6b, __retl_efault
+	.word		7b, __retl_efault
+	.word		8b, __retl_efault
+	.word		9b, __retl_efault
+	.word		10b, __retl_efault
+	.word		11b, __retl_efault
+	.word		12b, __retl_efault
+	.word		13b, __retl_efault
+	.word		14b, __retl_efault
+	.word		15b, __retl_efault
+	.word		16b, __retl_efault
 	.previous
diff --git a/arch/sparc64/kernel/unaligned.c b/arch/sparc64/kernel/unaligned.c
index 9d6be20f4125..70faf630603b 100644
--- a/arch/sparc64/kernel/unaligned.c
+++ b/arch/sparc64/kernel/unaligned.c
@@ -180,14 +180,14 @@ static void __attribute_used__ unaligned_panic(char *str, struct pt_regs *regs)
 	die_if_kernel(str, regs);
 }
 
-extern void do_int_load(unsigned long *dest_reg, int size,
-			unsigned long *saddr, int is_signed, int asi);
+extern int do_int_load(unsigned long *dest_reg, int size,
+		       unsigned long *saddr, int is_signed, int asi);
 	
-extern void __do_int_store(unsigned long *dst_addr, int size,
-			   unsigned long src_val, int asi);
+extern int __do_int_store(unsigned long *dst_addr, int size,
+			  unsigned long src_val, int asi);
 
-static inline void do_int_store(int reg_num, int size, unsigned long *dst_addr,
-				struct pt_regs *regs, int asi, int orig_asi)
+static inline int do_int_store(int reg_num, int size, unsigned long *dst_addr,
+			       struct pt_regs *regs, int asi, int orig_asi)
 {
 	unsigned long zero = 0;
 	unsigned long *src_val_p = &zero;
@@ -219,7 +219,7 @@ static inline void do_int_store(int reg_num, int size, unsigned long *dst_addr,
 			break;
 		};
 	}
-	__do_int_store(dst_addr, size, src_val, asi);
+	return __do_int_store(dst_addr, size, src_val, asi);
 }
 
 static inline void advance(struct pt_regs *regs)
@@ -242,7 +242,7 @@ static inline int ok_for_kernel(unsigned int insn)
 	return !floating_point_load_or_store_p(insn);
 }
 
-void kernel_mna_trap_fault(void)
+static void kernel_mna_trap_fault(void)
 {
 	struct pt_regs *regs = current_thread_info()->kern_una_regs;
 	unsigned int insn = current_thread_info()->kern_una_insn;
@@ -294,7 +294,7 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn, u
 		kernel_mna_trap_fault();
 	} else {
 		unsigned long addr, *reg_addr;
-		int orig_asi, asi;
+		int orig_asi, asi, err;
 
 		addr = compute_effective_address(regs, insn,
 						 ((insn >> 25) & 0x1f));
@@ -319,9 +319,10 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn, u
 		switch (dir) {
 		case load:
 			reg_addr = fetch_reg_addr(((insn>>25)&0x1f), regs);
-			do_int_load(reg_addr, size, (unsigned long *) addr,
-				    decode_signedness(insn), asi);
-			if (unlikely(asi != orig_asi)) {
+			err = do_int_load(reg_addr, size,
+					  (unsigned long *) addr,
+					  decode_signedness(insn), asi);
+			if (likely(!err) && unlikely(asi != orig_asi)) {
 				unsigned long val_in = *reg_addr;
 				switch (size) {
 				case 2:
@@ -343,16 +344,19 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn, u
 			break;
 
 		case store:
-			do_int_store(((insn>>25)&0x1f), size,
-				     (unsigned long *) addr, regs,
-				     asi, orig_asi);
+			err = do_int_store(((insn>>25)&0x1f), size,
+					   (unsigned long *) addr, regs,
+					   asi, orig_asi);
 			break;
 
 		default:
 			panic("Impossible kernel unaligned trap.");
 			/* Not reached... */
 		}
-		advance(regs);
+		if (unlikely(err))
+			kernel_mna_trap_fault();
+		else
+			advance(regs);
 	}
 }
 
diff --git a/arch/sparc64/lib/strncpy_from_user.S b/arch/sparc64/lib/strncpy_from_user.S
index 09cbbaa0ebf4..e1264650ca7a 100644
--- a/arch/sparc64/lib/strncpy_from_user.S
+++ b/arch/sparc64/lib/strncpy_from_user.S
@@ -125,15 +125,11 @@ __strncpy_from_user:
 	 add	%o2, %o3, %o0
 	.size	__strncpy_from_user, .-__strncpy_from_user
 
-	.section .fixup,#alloc,#execinstr
-	.align	4
-4:	retl
-	 mov	-EFAULT, %o0
-
 	.section __ex_table,#alloc
 	.align	4
-	.word	60b, 4b
-	.word	61b, 4b
-	.word	62b, 4b
-	.word	63b, 4b
-	.word	64b, 4b
+	.word	60b, __retl_efault
+	.word	61b, __retl_efault
+	.word	62b, __retl_efault
+	.word	63b, __retl_efault
+	.word	64b, __retl_efault
+	.previous
diff --git a/include/asm-sparc64/uaccess.h b/include/asm-sparc64/uaccess.h
index c099aa339784..bc8ddbb1cbed 100644
--- a/include/asm-sparc64/uaccess.h
+++ b/include/asm-sparc64/uaccess.h
@@ -77,6 +77,7 @@ struct exception_table_entry {
 };
 
 extern void __ret_efault(void);
+extern void __retl_efault(void);
 
 /* Uh, these should become the main single-value transfer routines..
  * They automatically use the right size if we just have the right
-- 
cgit v1.2.3


From efdc1e2083e04cc70721d55803889b346c1a3de2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Wed, 28 Sep 2005 21:06:47 -0700
Subject: [SPARC64]: Simplify user fault fixup handling.

Instead of doing byte-at-a-time user accesses to figure
out where the fault occurred, read the saved fault_address
from the current thread structure.

For the sake of defensive programming, if the fault_address
does not fall into the user buffer range, simply assume the
whole area faulted.  This will cause the fixup for
copy_from_user() to clear the entire kernel side buffer.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/lib/user_fixup.c | 63 ++++++++++++++++++++-----------------------
 arch/sparc64/mm/fault.c       | 10 ++-----
 include/asm-sparc64/uaccess.h |  6 ++---
 3 files changed, 34 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/arch/sparc64/lib/user_fixup.c b/arch/sparc64/lib/user_fixup.c
index 0278e34125db..19d1fdb17d0e 100644
--- a/arch/sparc64/lib/user_fixup.c
+++ b/arch/sparc64/lib/user_fixup.c
@@ -11,61 +11,56 @@
 
 /* Calculating the exact fault address when using
  * block loads and stores can be very complicated.
+ *
  * Instead of trying to be clever and handling all
  * of the cases, just fix things up simply here.
  */
 
-unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size)
+static unsigned long compute_size(unsigned long start, unsigned long size, unsigned long *offset)
 {
-	char *dst = to;
-	const char __user *src = from;
+	unsigned long fault_addr = current_thread_info()->fault_address;
+	unsigned long end = start + size;
 
-	while (size) {
-		if (__get_user(*dst, src))
-			break;
-		dst++;
-		src++;
-		size--;
+	if (fault_addr < start || fault_addr >= end) {
+		*offset = 0;
+	} else {
+		*offset = start - fault_addr;
+		size = end - fault_addr;
 	}
+	return size;
+}
 
-	if (size)
-		memset(dst, 0, size);
+unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size)
+{
+	unsigned long offset;
+
+	size = compute_size((unsigned long) from, size, &offset);
+	if (likely(size))
+		memset(to + offset, 0, size);
 
 	return size;
 }
 
 unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size)
 {
-	char __user *dst = to;
-	const char *src = from;
-
-	while (size) {
-		if (__put_user(*src, dst))
-			break;
-		dst++;
-		src++;
-		size--;
-	}
+	unsigned long offset;
 
-	return size;
+	return compute_size((unsigned long) to, size, &offset);
 }
 
 unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size)
 {
-	char __user *dst = to;
-	char __user *src = from;
+	unsigned long fault_addr = current_thread_info()->fault_address;
+	unsigned long start = (unsigned long) to;
+	unsigned long end = start + size;
 
-	while (size) {
-		char tmp;
+	if (fault_addr >= start && fault_addr < end)
+		return end - fault_addr;
 
-		if (__get_user(tmp, src))
-			break;
-		if (__put_user(tmp, dst))
-			break;
-		dst++;
-		src++;
-		size--;
-	}
+	start = (unsigned long) from;
+	end = start + size;
+	if (fault_addr >= start && fault_addr < end)
+		return end - fault_addr;
 
 	return size;
 }
diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c
index 59dc9a2ece5a..4a52e79d515f 100644
--- a/arch/sparc64/mm/fault.c
+++ b/arch/sparc64/mm/fault.c
@@ -457,7 +457,7 @@ good_area:
 	}
 
 	up_read(&mm->mmap_sem);
-	goto fault_done;
+	return;
 
 	/*
 	 * Something tried to access memory that isn't in our memory map..
@@ -469,8 +469,7 @@ bad_area:
 
 handle_kernel_fault:
 	do_kernel_fault(regs, si_code, fault_code, insn, address);
-
-	goto fault_done;
+	return;
 
 /*
  * We ran out of memory, or some other thing happened to us that made
@@ -501,9 +500,4 @@ do_sigbus:
 	/* Kernel mode? Handle exceptions or die */
 	if (regs->tstate & TSTATE_PRIV)
 		goto handle_kernel_fault;
-
-fault_done:
-	/* These values are no longer needed, clear them. */
-	set_thread_fault_code(0);
-	current_thread_info()->fault_address = 0;
 }
diff --git a/include/asm-sparc64/uaccess.h b/include/asm-sparc64/uaccess.h
index bc8ddbb1cbed..203e8eee6351 100644
--- a/include/asm-sparc64/uaccess.h
+++ b/include/asm-sparc64/uaccess.h
@@ -251,7 +251,7 @@ copy_from_user(void *to, const void __user *from, unsigned long size)
 {
 	unsigned long ret = ___copy_from_user(to, from, size);
 
-	if (ret)
+	if (unlikely(ret))
 		ret = copy_from_user_fixup(to, from, size);
 	return ret;
 }
@@ -267,7 +267,7 @@ copy_to_user(void __user *to, const void *from, unsigned long size)
 {
 	unsigned long ret = ___copy_to_user(to, from, size);
 
-	if (ret)
+	if (unlikely(ret))
 		ret = copy_to_user_fixup(to, from, size);
 	return ret;
 }
@@ -283,7 +283,7 @@ copy_in_user(void __user *to, void __user *from, unsigned long size)
 {
 	unsigned long ret = ___copy_in_user(to, from, size);
 
-	if (ret)
+	if (unlikely(ret))
 		ret = copy_in_user_fixup(to, from, size);
 	return ret;
 }
-- 
cgit v1.2.3


From 801ab3c731e77324c055769491711e620100dbfb Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Wed, 28 Sep 2005 21:31:25 -0700
Subject: [SPARC]: Declare paging_init() in asm/pgtable.h

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/setup.c     | 2 --
 arch/sparc64/kernel/setup.c   | 2 --
 include/asm-sparc/pgtable.h   | 2 ++
 include/asm-sparc64/pgtable.h | 2 ++
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/kernel/setup.c b/arch/sparc/kernel/setup.c
index 53c192a4982f..3509e4305532 100644
--- a/arch/sparc/kernel/setup.c
+++ b/arch/sparc/kernel/setup.c
@@ -249,8 +249,6 @@ struct tt_entry *sparc_ttable;
 
 struct pt_regs fake_swapper_regs;
 
-extern void paging_init(void);
-
 void __init setup_arch(char **cmdline_p)
 {
 	int i;
diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
index 8e8baf2354df..516f4854955f 100644
--- a/arch/sparc64/kernel/setup.c
+++ b/arch/sparc64/kernel/setup.c
@@ -492,8 +492,6 @@ void register_prom_callbacks(void)
 		   "' linux-.soft2 to .soft2");
 }
 
-extern void paging_init(void);
-
 void __init setup_arch(char **cmdline_p)
 {
 	int i;
diff --git a/include/asm-sparc/pgtable.h b/include/asm-sparc/pgtable.h
index 8f4f6a959651..8395ad2f1c09 100644
--- a/include/asm-sparc/pgtable.h
+++ b/include/asm-sparc/pgtable.h
@@ -82,6 +82,8 @@ extern unsigned long page_kernel;
 /* Top-level page directory */
 extern pgd_t swapper_pg_dir[1024];
 
+extern void paging_init(void);
+
 /* Page table for 0-4MB for everybody, on the Sparc this
  * holds the same as on the i386.
  */
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index 53d612aba8d5..82273c801b07 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -341,6 +341,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *p
 extern pgd_t swapper_pg_dir[2048];
 extern pmd_t swapper_low_pmd_dir[2048];
 
+extern void paging_init(void);
+
 /* These do nothing with the way I have things setup. */
 #define mmu_lockarea(vaddr, len)		(vaddr)
 #define mmu_unlockarea(vaddr, len)		do { } while(0)
-- 
cgit v1.2.3


From 10147570f9eaff3920f0c67bad7244c2eb958d4f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Wed, 28 Sep 2005 21:46:43 -0700
Subject: [SPARC64]: Kill all external references to sp_banks[]

Thus, we can mark sp_banks[] static in arch/sparc64/mm/init.c

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/traps.c   | 33 ++++++---------------------------
 arch/sparc64/mm/fault.c       |  2 --
 arch/sparc64/mm/init.c        | 23 ++++++++++++++++++++++-
 include/asm-sparc64/page.h    | 17 -----------------
 include/asm-sparc64/pgtable.h |  1 +
 5 files changed, 29 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index 1aa15990f5af..eeb1e835c423 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -757,26 +757,12 @@ void __init cheetah_ecache_flush_init(void)
 	ecache_flush_size = (2 * largest_size);
 	ecache_flush_linesize = smallest_linesize;
 
-	/* Discover a physically contiguous chunk of physical
-	 * memory in 'sp_banks' of size ecache_flush_size calculated
-	 * above.  Store the physical base of this area at
-	 * ecache_flush_physbase.
-	 */
-	for (node = 0; ; node++) {
-		if (sp_banks[node].num_bytes == 0)
-			break;
-		if (sp_banks[node].num_bytes >= ecache_flush_size) {
-			ecache_flush_physbase = sp_banks[node].base_addr;
-			break;
-		}
-	}
+	ecache_flush_physbase = find_ecache_flush_span(ecache_flush_size);
 
-	/* Note: Zero would be a valid value of ecache_flush_physbase so
-	 * don't use that as the success test. :-)
-	 */
-	if (sp_banks[node].num_bytes == 0) {
+	if (ecache_flush_physbase == ~0UL) {
 		prom_printf("cheetah_ecache_flush_init: Cannot find %d byte "
-			    "contiguous physical memory.\n", ecache_flush_size);
+			    "contiguous physical memory.\n",
+			    ecache_flush_size);
 		prom_halt();
 	}
 
@@ -1345,16 +1331,9 @@ static int cheetah_fix_ce(unsigned long physaddr)
 /* Return non-zero if PADDR is a valid physical memory address. */
 static int cheetah_check_main_memory(unsigned long paddr)
 {
-	int i;
+	unsigned long vaddr = PAGE_OFFSET + paddr;
 
-	for (i = 0; ; i++) {
-		if (sp_banks[i].num_bytes == 0)
-			break;
-		if (paddr >= sp_banks[i].base_addr &&
-		    paddr < (sp_banks[i].base_addr + sp_banks[i].num_bytes))
-			return 1;
-	}
-	return 0;
+	return kern_addr_valid(vaddr);
 }
 
 void cheetah_cee_handler(struct pt_regs *regs, unsigned long afsr, unsigned long afar)
diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c
index 80793d61f311..31fbc67719a1 100644
--- a/arch/sparc64/mm/fault.c
+++ b/arch/sparc64/mm/fault.c
@@ -32,8 +32,6 @@
 
 #define ELEMENTS(arr) (sizeof (arr)/sizeof (arr[0]))
 
-extern struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS];
-
 /*
  * To debug kernel to catch accesses to certain virtual/physical addresses.
  * Mode = 0 selects physical watchpoints, mode = 1 selects virtual watchpoints.
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 63a7b9bafaff..48851a2e4fe1 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -41,7 +41,14 @@
 
 extern void device_scan(void);
 
-struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS];
+struct sparc_phys_banks {
+	unsigned long base_addr;
+	unsigned long num_bytes;
+};
+
+#define SPARC_PHYS_BANKS 32
+
+static struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS];
 
 unsigned long *sparc64_valid_addr_bitmap __read_mostly;
 
@@ -1425,6 +1432,20 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
 }
 #endif
 
+unsigned long __init find_ecache_flush_span(unsigned long size)
+{
+	unsigned long i;
+
+	for (i = 0; ; i++) {
+		if (sp_banks[i].num_bytes == 0)
+			break;
+		if (sp_banks[i].num_bytes >= size)
+			return sp_banks[i].base_addr;
+	}
+
+	return ~0UL;
+}
+
 static void __init prom_probe_memory(void)
 {
 	struct linux_mlist_p1275 *mlist;
diff --git a/include/asm-sparc64/page.h b/include/asm-sparc64/page.h
index 7f8d764abc47..5426bb28a993 100644
--- a/include/asm-sparc64/page.h
+++ b/include/asm-sparc64/page.h
@@ -140,23 +140,6 @@ extern unsigned long page_to_pfn(struct page *);
 #define virt_to_phys __pa
 #define phys_to_virt __va
 
-/* The following structure is used to hold the physical
- * memory configuration of the machine.  This is filled in
- * probe_memory() and is later used by mem_init() to set up
- * mem_map[].  We statically allocate SPARC_PHYS_BANKS of
- * these structs, this is arbitrary.  The entry after the
- * last valid one has num_bytes==0.
- */
-
-struct sparc_phys_banks {
-	unsigned long base_addr;
-	unsigned long num_bytes;
-};
-
-#define SPARC_PHYS_BANKS 32
-
-extern struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS];
-
 #endif /* !(__ASSEMBLY__) */
 
 #define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index 82273c801b07..8c6dfc6c7af6 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -342,6 +342,7 @@ extern pgd_t swapper_pg_dir[2048];
 extern pmd_t swapper_low_pmd_dir[2048];
 
 extern void paging_init(void);
+extern unsigned long find_ecache_flush_span(unsigned long size);
 
 /* These do nothing with the way I have things setup. */
 #define mmu_lockarea(vaddr, len)		(vaddr)
-- 
cgit v1.2.3


From 2ad4f86b60b649fd7428265c08d73a3bd360c81b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 29 Sep 2005 00:09:02 +0100
Subject: [PATCH] arm/versatile iomem annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/arm/mach-versatile/core.c      | 43 +++++++++++++++++++------------------
 include/asm-arm/arch-versatile/io.h |  6 +++++-
 2 files changed, 27 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index 3c8862fde51a..58c1330d8638 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -52,8 +52,9 @@
  *
  * Setup a VA for the Versatile Vectored Interrupt Controller.
  */
-#define VA_VIC_BASE		 IO_ADDRESS(VERSATILE_VIC_BASE)
-#define VA_SIC_BASE		 IO_ADDRESS(VERSATILE_SIC_BASE)
+#define __io_address(n)		__io(IO_ADDRESS(n))
+#define VA_VIC_BASE		__io_address(VERSATILE_VIC_BASE)
+#define VA_SIC_BASE		__io_address(VERSATILE_SIC_BASE)
 
 static void vic_mask_irq(unsigned int irq)
 {
@@ -214,7 +215,7 @@ void __init versatile_map_io(void)
 	iotable_init(versatile_io_desc, ARRAY_SIZE(versatile_io_desc));
 }
 
-#define VERSATILE_REFCOUNTER	(IO_ADDRESS(VERSATILE_SYS_BASE) + VERSATILE_SYS_24MHz_OFFSET)
+#define VERSATILE_REFCOUNTER	(__io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_24MHz_OFFSET)
 
 /*
  * This is the Versatile sched_clock implementation.  This has
@@ -231,7 +232,7 @@ unsigned long long sched_clock(void)
 }
 
 
-#define VERSATILE_FLASHCTRL    (IO_ADDRESS(VERSATILE_SYS_BASE) + VERSATILE_SYS_FLASH_OFFSET)
+#define VERSATILE_FLASHCTRL    (__io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_FLASH_OFFSET)
 
 static int versatile_flash_init(void)
 {
@@ -309,7 +310,7 @@ static struct platform_device smc91x_device = {
 	.resource	= smc91x_resources,
 };
 
-#define VERSATILE_SYSMCI	(IO_ADDRESS(VERSATILE_SYS_BASE) + VERSATILE_SYS_MCI_OFFSET)
+#define VERSATILE_SYSMCI	(__io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_MCI_OFFSET)
 
 unsigned int mmc_status(struct device *dev)
 {
@@ -343,11 +344,11 @@ static const struct icst307_params versatile_oscvco_params = {
 
 static void versatile_oscvco_set(struct clk *clk, struct icst307_vco vco)
 {
-	unsigned long sys_lock = IO_ADDRESS(VERSATILE_SYS_BASE) + VERSATILE_SYS_LOCK_OFFSET;
+	void __iomem *sys_lock = __io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_LOCK_OFFSET;
 #if defined(CONFIG_ARCH_VERSATILE_PB)
-	unsigned long sys_osc = IO_ADDRESS(VERSATILE_SYS_BASE) + VERSATILE_SYS_OSC4_OFFSET;
+	void __iomem *sys_osc = __io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_OSC4_OFFSET;
 #elif defined(CONFIG_MACH_VERSATILE_AB)
-	unsigned long sys_osc = IO_ADDRESS(VERSATILE_SYS_BASE) + VERSATILE_SYS_OSC1_OFFSET;
+	void __iomem *sys_osc = __io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_OSC1_OFFSET;
 #endif
 	u32 val;
 
@@ -483,7 +484,7 @@ static struct clcd_panel epson_2_2_in = {
  */
 static struct clcd_panel *versatile_clcd_panel(void)
 {
-	unsigned long sys_clcd = IO_ADDRESS(VERSATILE_SYS_BASE) + VERSATILE_SYS_CLCD_OFFSET;
+	void __iomem *sys_clcd = __io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_CLCD_OFFSET;
 	struct clcd_panel *panel = &vga;
 	u32 val;
 
@@ -510,7 +511,7 @@ static struct clcd_panel *versatile_clcd_panel(void)
  */
 static void versatile_clcd_disable(struct clcd_fb *fb)
 {
-	unsigned long sys_clcd = IO_ADDRESS(VERSATILE_SYS_BASE) + VERSATILE_SYS_CLCD_OFFSET;
+	void __iomem *sys_clcd = __io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_CLCD_OFFSET;
 	u32 val;
 
 	val = readl(sys_clcd);
@@ -522,7 +523,7 @@ static void versatile_clcd_disable(struct clcd_fb *fb)
 	 * If the LCD is Sanyo 2x5 in on the IB2 board, turn the back-light off
 	 */
 	if (fb->panel == &sanyo_2_5_in) {
-		unsigned long versatile_ib2_ctrl = IO_ADDRESS(VERSATILE_IB2_CTRL);
+		void __iomem *versatile_ib2_ctrl = __io_address(VERSATILE_IB2_CTRL);
 		unsigned long ctrl;
 
 		ctrl = readl(versatile_ib2_ctrl);
@@ -537,7 +538,7 @@ static void versatile_clcd_disable(struct clcd_fb *fb)
  */
 static void versatile_clcd_enable(struct clcd_fb *fb)
 {
-	unsigned long sys_clcd = IO_ADDRESS(VERSATILE_SYS_BASE) + VERSATILE_SYS_CLCD_OFFSET;
+	void __iomem *sys_clcd = __io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_CLCD_OFFSET;
 	u32 val;
 
 	val = readl(sys_clcd);
@@ -571,7 +572,7 @@ static void versatile_clcd_enable(struct clcd_fb *fb)
 	 * If the LCD is Sanyo 2x5 in on the IB2 board, turn the back-light on
 	 */
 	if (fb->panel == &sanyo_2_5_in) {
-		unsigned long versatile_ib2_ctrl = IO_ADDRESS(VERSATILE_IB2_CTRL);
+		void __iomem *versatile_ib2_ctrl = __io_address(VERSATILE_IB2_CTRL);
 		unsigned long ctrl;
 
 		ctrl = readl(versatile_ib2_ctrl);
@@ -720,7 +721,7 @@ static struct amba_device *amba_devs[] __initdata = {
 };
 
 #ifdef CONFIG_LEDS
-#define VA_LEDS_BASE (IO_ADDRESS(VERSATILE_SYS_BASE) + VERSATILE_SYS_LED_OFFSET)
+#define VA_LEDS_BASE (__io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_LED_OFFSET)
 
 static void versatile_leds_event(led_event_t ledevt)
 {
@@ -778,11 +779,11 @@ void __init versatile_init(void)
 /*
  * Where is the timer (VA)?
  */
-#define TIMER0_VA_BASE		 IO_ADDRESS(VERSATILE_TIMER0_1_BASE)
-#define TIMER1_VA_BASE		(IO_ADDRESS(VERSATILE_TIMER0_1_BASE) + 0x20)
-#define TIMER2_VA_BASE		 IO_ADDRESS(VERSATILE_TIMER2_3_BASE)
-#define TIMER3_VA_BASE		(IO_ADDRESS(VERSATILE_TIMER2_3_BASE) + 0x20)
-#define VA_IC_BASE		 IO_ADDRESS(VERSATILE_VIC_BASE) 
+#define TIMER0_VA_BASE		 __io_address(VERSATILE_TIMER0_1_BASE)
+#define TIMER1_VA_BASE		(__io_address(VERSATILE_TIMER0_1_BASE) + 0x20)
+#define TIMER2_VA_BASE		 __io_address(VERSATILE_TIMER2_3_BASE)
+#define TIMER3_VA_BASE		(__io_address(VERSATILE_TIMER2_3_BASE) + 0x20)
+#define VA_IC_BASE		 __io_address(VERSATILE_VIC_BASE) 
 
 /*
  * How long is the timer interval?
@@ -877,12 +878,12 @@ static void __init versatile_timer_init(void)
 	 *	VERSATILE_REFCLK is 32KHz
 	 *	VERSATILE_TIMCLK is 1MHz
 	 */
-	val = readl(IO_ADDRESS(VERSATILE_SCTL_BASE));
+	val = readl(__io_address(VERSATILE_SCTL_BASE));
 	writel((VERSATILE_TIMCLK << VERSATILE_TIMER1_EnSel) |
 	       (VERSATILE_TIMCLK << VERSATILE_TIMER2_EnSel) | 
 	       (VERSATILE_TIMCLK << VERSATILE_TIMER3_EnSel) |
 	       (VERSATILE_TIMCLK << VERSATILE_TIMER4_EnSel) | val,
-	       IO_ADDRESS(VERSATILE_SCTL_BASE));
+	       __io_address(VERSATILE_SCTL_BASE));
 
 	/*
 	 * Initialise to a known state (all timers off)
diff --git a/include/asm-arm/arch-versatile/io.h b/include/asm-arm/arch-versatile/io.h
index 9f895bf61494..47e904cf25c7 100644
--- a/include/asm-arm/arch-versatile/io.h
+++ b/include/asm-arm/arch-versatile/io.h
@@ -22,7 +22,11 @@
 
 #define IO_SPACE_LIMIT 0xffffffff
 
-#define __io(a)			((void __iomem *)(a))
+static inline void __iomem *__io(unsigned long addr)
+{
+	return (void __iomem *)addr;
+}
+#define __io(a)	__io(a)
 #define __mem_pci(a)		(a)
 #define __mem_isa(a)		(a)
 
-- 
cgit v1.2.3


From 272cd2b71d478371e0a5bf2c76a1e50cbf178698 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 29 Sep 2005 00:10:01 +0100
Subject: [PATCH] arm/rpc iomem annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-arm/arch-rpc/hardware.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-arm/arch-rpc/hardware.h b/include/asm-arm/arch-rpc/hardware.h
index be9754a05c19..9d7f87375aa7 100644
--- a/include/asm-arm/arch-rpc/hardware.h
+++ b/include/asm-arm/arch-rpc/hardware.h
@@ -15,7 +15,7 @@
 #include <asm/arch/memory.h>
 
 #ifndef __ASSEMBLY__
-#define IOMEM(x) ((void __iomem *)(x))
+#define IOMEM(x) ((void __iomem *)(unsigned long)(x))
 #else
 #define IOMEM(x) x
 #endif /* __ASSEMBLY__ */
@@ -52,7 +52,7 @@
 /*
  * IO Addresses
  */
-#define VIDC_BASE		(void __iomem *)0xe0400000
+#define VIDC_BASE		IOMEM(0xe0400000)
 #define EXPMASK_BASE		0xe0360000
 #define IOMD_BASE		IOMEM(0xe0200000)
 #define IOC_BASE		IOMEM(0xe0200000)
-- 
cgit v1.2.3


From 0cc13a5442901835192ba47427f0f4e4d525d935 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 29 Sep 2005 00:12:13 +0100
Subject: [PATCH] ia64 basic __user annotations

 - document places where we pass kernel address to low-level primitive
   that deals with kernel/user addresses
 - uintptr_t is unsigned long, not long

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-ia64/uaccess.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/asm-ia64/uaccess.h b/include/asm-ia64/uaccess.h
index 3a7829bb5954..9adb51211c22 100644
--- a/include/asm-ia64/uaccess.h
+++ b/include/asm-ia64/uaccess.h
@@ -187,8 +187,8 @@ extern void __get_user_unknown (void);
 ({											\
 	const __typeof__(*(ptr)) __user *__gu_ptr = (ptr);				\
 	__typeof__ (size) __gu_size = (size);						\
-	long __gu_err = -EFAULT, __gu_val = 0;						\
-											\
+	long __gu_err = -EFAULT;							\
+	unsigned long __gu_val = 0;							\
 	if (!check || __access_ok(__gu_ptr, size, segment))				\
 		switch (__gu_size) {							\
 		      case 1: __get_user_size(__gu_val, __gu_ptr, 1, __gu_err); break;	\
@@ -240,13 +240,13 @@ extern unsigned long __must_check __copy_user (void __user *to, const void __use
 static inline unsigned long
 __copy_to_user (void __user *to, const void *from, unsigned long count)
 {
-	return __copy_user(to, (void __user *) from, count);
+	return __copy_user(to, (__force void __user *) from, count);
 }
 
 static inline unsigned long
 __copy_from_user (void *to, const void __user *from, unsigned long count)
 {
-	return __copy_user((void __user *) to, from, count);
+	return __copy_user((__force void __user *) to, from, count);
 }
 
 #define __copy_to_user_inatomic		__copy_to_user
@@ -258,7 +258,7 @@ __copy_from_user (void *to, const void __user *from, unsigned long count)
 	long __cu_len = (n);								\
 											\
 	if (__access_ok(__cu_to, __cu_len, get_fs()))					\
-		__cu_len = __copy_user(__cu_to, (void __user *) __cu_from, __cu_len);	\
+		__cu_len = __copy_user(__cu_to, (__force void __user *) __cu_from, __cu_len);	\
 	__cu_len;									\
 })
 
@@ -270,7 +270,7 @@ __copy_from_user (void *to, const void __user *from, unsigned long count)
 											\
 	__chk_user_ptr(__cu_from);							\
 	if (__access_ok(__cu_from, __cu_len, get_fs()))					\
-		__cu_len = __copy_user((void __user *) __cu_to, __cu_from, __cu_len);	\
+		__cu_len = __copy_user((__force void __user *) __cu_to, __cu_from, __cu_len);	\
 	__cu_len;									\
 })
 
-- 
cgit v1.2.3


From c28144763a7dcdceb2c16a5ac9c8e0022d547d28 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 29 Sep 2005 00:16:02 +0100
Subject: [PATCH] s390 signal annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/s390/kernel/compat_signal.c | 6 +++---
 arch/s390/kernel/signal.c        | 4 ++--
 include/asm-s390/sigcontext.h    | 2 +-
 include/asm-s390/signal.h        | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 7358cdb8441f..4ff6808456ea 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -143,7 +143,7 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
 			break;
 		case __SI_FAULT >> 16:
 			err |= __get_user(tmp, &from->si_addr);
-			to->si_addr = (void *)(u64) (tmp & PSW32_ADDR_INSN);
+			to->si_addr = (void __user *)(u64) (tmp & PSW32_ADDR_INSN);
 			break;
 		case __SI_POLL >> 16:
 			err |= __get_user(to->si_band, &from->si_band);
@@ -338,7 +338,7 @@ sys32_sigaltstack(const stack_t32 __user *uss, stack_t32 __user *uoss,
 		err |= __get_user(kss.ss_flags, &uss->ss_flags);
 		if (err)
 			return -EFAULT;
-		kss.ss_sp = (void *) ss_sp;
+		kss.ss_sp = (void __user *) ss_sp;
 	}
 
 	set_fs (KERNEL_DS);
@@ -461,7 +461,7 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
 		goto badframe;
 
 	err = __get_user(ss_sp, &frame->uc.uc_stack.ss_sp);
-	st.ss_sp = (void *) A((unsigned long)ss_sp);
+	st.ss_sp = compat_ptr(ss_sp);
 	err |= __get_user(st.ss_size, &frame->uc.uc_stack.ss_size);
 	err |= __get_user(st.ss_flags, &frame->uc.uc_stack.ss_flags);
 	if (err)
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 6a3f5b7473a9..6e0110d71191 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -376,8 +376,8 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 
 	/* Create the ucontext.  */
 	err |= __put_user(0, &frame->uc.uc_flags);
-	err |= __put_user(0, &frame->uc.uc_link);
-	err |= __put_user((void *)current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+	err |= __put_user(NULL, &frame->uc.uc_link);
+	err |= __put_user((void __user *)current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
 	err |= __put_user(sas_ss_flags(regs->gprs[15]),
 			  &frame->uc.uc_stack.ss_flags);
 	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
diff --git a/include/asm-s390/sigcontext.h b/include/asm-s390/sigcontext.h
index d57bc0cebdce..803545351dd8 100644
--- a/include/asm-s390/sigcontext.h
+++ b/include/asm-s390/sigcontext.h
@@ -61,7 +61,7 @@ typedef struct
 struct sigcontext
 {
 	unsigned long	oldmask[_SIGCONTEXT_NSIG_WORDS];
-	_sigregs        *sregs;
+	_sigregs        __user *sregs;
 };
 
 
diff --git a/include/asm-s390/signal.h b/include/asm-s390/signal.h
index 3d6e11c6c1fd..7084626de215 100644
--- a/include/asm-s390/signal.h
+++ b/include/asm-s390/signal.h
@@ -165,7 +165,7 @@ struct sigaction {
 #endif /* __KERNEL__ */
 
 typedef struct sigaltstack {
-        void *ss_sp;
+        void __user *ss_sp;
         int ss_flags;
         size_t ss_size;
 } stack_t;
-- 
cgit v1.2.3


From ea8a918eb71b503be13b861dbe18b93932fd6b62 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 29 Sep 2005 00:17:49 +0100
Subject: [PATCH] ppc64 get_user annotations

long is not uintptr_t, unsigned long is.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-ppc64/uaccess.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-ppc64/uaccess.h b/include/asm-ppc64/uaccess.h
index c181a60d868c..132c1276547b 100644
--- a/include/asm-ppc64/uaccess.h
+++ b/include/asm-ppc64/uaccess.h
@@ -164,7 +164,8 @@ do {									\
 
 #define __get_user_nocheck(x,ptr,size)				\
 ({								\
-	long __gu_err, __gu_val;				\
+	long __gu_err;						\
+	unsigned long __gu_val;					\
 	might_sleep();						\
 	__get_user_size(__gu_val,(ptr),(size),__gu_err,-EFAULT);\
 	(x) = (__typeof__(*(ptr)))__gu_val;			\
@@ -173,7 +174,8 @@ do {									\
 
 #define __get_user_check(x,ptr,size)					\
 ({									\
-	long __gu_err = -EFAULT, __gu_val = 0;				\
+	long __gu_err = -EFAULT;					\
+	unsigned long __gu_val = 0;					\
 	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);		\
 	might_sleep();							\
 	if (access_ok(VERIFY_READ,__gu_addr,size))			\
-- 
cgit v1.2.3


From a7625d6e49cb4fd94be7576d85422c33003101b7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 29 Sep 2005 00:34:30 +0100
Subject: [PATCH] mv64x60 iomem annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/ppc/syslib/mv64x60.c | 4 ++--
 include/asm-ppc/mv64x60.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/ppc/syslib/mv64x60.c b/arch/ppc/syslib/mv64x60.c
index 839f8872826f..4849850a59ed 100644
--- a/arch/ppc/syslib/mv64x60.c
+++ b/arch/ppc/syslib/mv64x60.c
@@ -34,7 +34,7 @@ u8 mv64x60_pci_exclude_bridge = 1;
 DEFINE_SPINLOCK(mv64x60_lock);
 
 static phys_addr_t 	mv64x60_bridge_pbase;
-static void 		*mv64x60_bridge_vbase;
+static void 		__iomem *mv64x60_bridge_vbase;
 static u32		mv64x60_bridge_type = MV64x60_TYPE_INVALID;
 static u32		mv64x60_bridge_rev;
 #if defined(CONFIG_SYSFS) && !defined(CONFIG_GT64260)
@@ -938,7 +938,7 @@ mv64x60_setup_for_chip(struct mv64x60_handle *bh)
  *
  * Return the virtual address of the bridge's registers.
  */
-void *
+void __iomem *
 mv64x60_get_bridge_vbase(void)
 {
 	return mv64x60_bridge_vbase;
diff --git a/include/asm-ppc/mv64x60.h b/include/asm-ppc/mv64x60.h
index 75c2ffa26b26..ee2f9188cc64 100644
--- a/include/asm-ppc/mv64x60.h
+++ b/include/asm-ppc/mv64x60.h
@@ -233,7 +233,7 @@ struct mv64x60_chip_info {
 struct mv64x60_handle {
 	u32		type;		/* type of bridge */
 	u32		rev;		/* revision of bridge */
-	void		*v_base;	/* virtual base addr of bridge regs */
+	void		__iomem *v_base;/* virtual base addr of bridge regs */
 	phys_addr_t	p_base;		/* physical base addr of bridge regs */
 
 	u32		pci_mode_a;	/* pci 0 mode: conventional pci, pci-x*/
@@ -303,7 +303,7 @@ void mv64x60_alloc_hose(struct mv64x60_handle *bh, u32 cfg_addr,
 	u32 cfg_data, struct pci_controller **hose);
 int mv64x60_get_type(struct mv64x60_handle *bh);
 int mv64x60_setup_for_chip(struct mv64x60_handle *bh);
-void *mv64x60_get_bridge_vbase(void);
+void __iomem *mv64x60_get_bridge_vbase(void);
 u32 mv64x60_get_bridge_type(void);
 u32 mv64x60_get_bridge_rev(void);
 void mv64x60_get_mem_windows(struct mv64x60_handle *bh,
-- 
cgit v1.2.3


From aa55a08687059aa169d10a313c41f238c2070488 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Thu, 29 Sep 2005 19:58:53 +0400
Subject: [PATCH] fix TASK_STOPPED vs TASK_NONINTERACTIVE interaction

do_signal_stop:

	for_each_thread(t) {
		if (t->state < TASK_STOPPED)
			++sig->group_stop_count;
	}

However, TASK_NONINTERACTIVE > TASK_STOPPED, so this loop will not
count TASK_INTERRUPTIBLE | TASK_NONINTERACTIVE threads.

See also wait_task_stopped(), which checks ->state > TASK_STOPPED.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>

[ We really probably should always use the appropriate bitmasks to test
  task states, not do it like this. Using something like

	#define TASK_RUNNABLE (TASK_RUNNING | TASK_INTERRUPTIBLE | \
				TASK_UNINTERRUPTIBLE | TASK_NONINTERACTIVE)

  and then doing "if (task->state & TASK_RUNNABLE)" or similar. But the
  ordering of the task states is historical, and keeping the ordering
  does make sense regardless. ]

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 49e617fa0f66..afe6c61f13e5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -110,11 +110,11 @@ extern unsigned long nr_iowait(void);
 #define TASK_RUNNING		0
 #define TASK_INTERRUPTIBLE	1
 #define TASK_UNINTERRUPTIBLE	2
-#define TASK_STOPPED		4
-#define TASK_TRACED		8
-#define EXIT_ZOMBIE		16
-#define EXIT_DEAD		32
-#define TASK_NONINTERACTIVE	64
+#define TASK_NONINTERACTIVE	4
+#define TASK_STOPPED		8
+#define TASK_TRACED		16
+#define EXIT_ZOMBIE		32
+#define EXIT_DEAD		64
 
 #define __set_task_state(tsk, state_value)		\
 	do { (tsk)->state = (state_value); } while (0)
-- 
cgit v1.2.3


From 4a8342d233a39ee582e9f7260e12d2f5fd194a05 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Thu, 29 Sep 2005 15:18:21 -0700
Subject: Revert task flag re-ordering, add comments

Roland points out that the flags end up having non-obvious dependencies
elsewhere, so revert aa55a08687059aa169d10a313c41f238c2070488 and add
some comments about why things are as they are.

We'll just have to fix up the broken comparisons. Roland has a patch.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index afe6c61f13e5..c3ba31f210a9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -107,14 +107,26 @@ extern unsigned long nr_iowait(void);
 
 #include <asm/processor.h>
 
+/*
+ * Task state bitmask. NOTE! These bits are also
+ * encoded in fs/proc/array.c: get_task_state().
+ *
+ * We have two separate sets of flags: task->state
+ * is about runnability, while task->exit_state are
+ * about the task exiting. Confusing, but this way
+ * modifying one set can't modify the other one by
+ * mistake.
+ */
 #define TASK_RUNNING		0
 #define TASK_INTERRUPTIBLE	1
 #define TASK_UNINTERRUPTIBLE	2
-#define TASK_NONINTERACTIVE	4
-#define TASK_STOPPED		8
-#define TASK_TRACED		16
-#define EXIT_ZOMBIE		32
-#define EXIT_DEAD		64
+#define TASK_STOPPED		4
+#define TASK_TRACED		8
+/* in tsk->exit_state */
+#define EXIT_ZOMBIE		16
+#define EXIT_DEAD		32
+/* in tsk->state again */
+#define TASK_NONINTERACTIVE	64
 
 #define __set_task_state(tsk, state_value)		\
 	do { (tsk)->state = (state_value); } while (0)
-- 
cgit v1.2.3


From 7d318d774789657c37a5e994a4a2cf59d4879ae7 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 29 Sep 2005 22:05:55 +0200
Subject: [PATCH] Fix up TLB flush filter disabling

I checked with AMD and they requested to only disable it for family 15.
Also disable it for i386 too. And some style fixes.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/cpu/amd.c | 16 ++++++++++++++++
 arch/x86_64/kernel/setup.c | 22 ++++++++++++----------
 include/asm-x86_64/msr.h   |  1 +
 3 files changed, 29 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 73aeaf5a9d4e..4c1ddf2b57cc 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -28,6 +28,22 @@ static void __init init_amd(struct cpuinfo_x86 *c)
 	int mbytes = num_physpages >> (20-PAGE_SHIFT);
 	int r;
 
+#ifdef CONFIG_SMP
+	unsigned long value;
+
+	/* Disable TLB flush filter by setting HWCR.FFDIS on K8
+	 * bit 6 of msr C001_0015
+	 *
+	 * Errata 63 for SH-B3 steppings
+	 * Errata 122 for all steppings (F+ have it disabled by default)
+	 */
+	if (c->x86 == 15) {
+		rdmsrl(MSR_K7_HWCR, value);
+		value |= 1 << 6;
+		wrmsrl(MSR_K7_HWCR, value);
+	}
+#endif
+
 	/*
 	 *	FIXME: We should handle the K5 here. Set up the write
 	 *	range and also turn on MSR 83 bits 4 and 31 (write alloc,
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 238f73e1a834..257f5ba17902 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -831,8 +831,6 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
 #endif
 }
 
-#define HWCR 0xc0010015
-
 static int __init init_amd(struct cpuinfo_x86 *c)
 {
 	int r;
@@ -841,14 +839,18 @@ static int __init init_amd(struct cpuinfo_x86 *c)
 #ifdef CONFIG_SMP
 	unsigned long value;
 
-	// Disable TLB flush filter by setting HWCR.FFDIS:
-	// bit 6 of msr C001_0015
-	//
-	// Errata 63 for SH-B3 steppings
-	// Errata 122 for all(?) steppings
-	rdmsrl(HWCR, value);
-	value |= 1 << 6;
-	wrmsrl(HWCR, value);
+	/*
+	 * Disable TLB flush filter by setting HWCR.FFDIS on K8
+	 * bit 6 of msr C001_0015
+ 	 *
+	 * Errata 63 for SH-B3 steppings
+	 * Errata 122 for all steppings (F+ have it disabled by default)
+	 */
+	if (c->x86 == 15) {
+		rdmsrl(MSR_K8_HWCR, value);
+		value |= 1 << 6;
+		wrmsrl(MSR_K8_HWCR, value);
+	}
 #endif
 
 	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
diff --git a/include/asm-x86_64/msr.h b/include/asm-x86_64/msr.h
index 4d727f3f5550..5a7fe3c6c3d8 100644
--- a/include/asm-x86_64/msr.h
+++ b/include/asm-x86_64/msr.h
@@ -234,6 +234,7 @@ static inline unsigned int cpuid_edx(unsigned int op)
 #define MSR_K8_TOP_MEM1		   0xC001001A
 #define MSR_K8_TOP_MEM2		   0xC001001D
 #define MSR_K8_SYSCFG		   0xC0010010
+#define MSR_K8_HWCR		   0xC0010015
 
 /* K6 MSRs */
 #define MSR_K6_EFER			0xC0000080
-- 
cgit v1.2.3


From dce79affd5d04e9cbabe35016eda55213b9b36f6 Mon Sep 17 00:00:00 2001
From: Daniel Jacobowitz <drow@false.org>
Date: Fri, 30 Sep 2005 00:17:35 +0100
Subject: [ARM] 2941/1: Fix running legacy binaries from a soft-float root
 filesystem with CONFIG_IWMMXT.

Patch from Daniel Jacobowitz

Thread flags are inherited on fork().  In order for a binary which has
the iWMMXt coprocessor enabled to run a binary which needs the FPA
emulation, we need to explicitly clear TIF_USING_IWMMXT if we are not
going to set it.

Signed-off-by: Daniel Jacobowitz <dan@codesourcery.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/elf.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/asm-arm/elf.h b/include/asm-arm/elf.h
index a1696ba238d3..7da97a937548 100644
--- a/include/asm-arm/elf.h
+++ b/include/asm-arm/elf.h
@@ -124,6 +124,8 @@ do { \
 	if (((ex).e_flags & EF_ARM_EABI_MASK) || \
 	    ((ex).e_flags & EF_ARM_SOFT_FLOAT)) \
 		set_thread_flag(TIF_USING_IWMMXT); \
+	else \
+		clear_thread_flag(TIF_USING_IWMMXT); \
 } while (0)
 
 #endif
-- 
cgit v1.2.3


From 13edad7a5cef1c952459742482482a6b05e1a8a1 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 29 Sep 2005 17:58:26 -0700
Subject: [SPARC64]: Rewrite convoluted physical memory probing.

Delete all of the code working with sp_banks[] and replace
with clean acquisition and sorting of physical memory
parameters from the firmware.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/traps.c    |   2 +-
 arch/sparc64/mm/init.c         | 302 +++++++++++++++--------------------------
 include/asm-sparc64/openprom.h |   4 +-
 3 files changed, 114 insertions(+), 194 deletions(-)

(limited to 'include')

diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index 7f190fc57545..5570e7bb22bb 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -1333,7 +1333,7 @@ static int cheetah_check_main_memory(unsigned long paddr)
 {
 	unsigned long vaddr = PAGE_OFFSET + paddr;
 
-	if (vaddr > high_memory)
+	if (vaddr > (unsigned long) high_memory)
 		return 0;
 
 	return kern_addr_valid(vaddr);
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 48851a2e4fe1..5db50524f20d 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -21,6 +21,7 @@
 #include <linux/seq_file.h>
 #include <linux/kprobes.h>
 #include <linux/cache.h>
+#include <linux/sort.h>
 
 #include <asm/head.h>
 #include <asm/system.h>
@@ -41,14 +42,72 @@
 
 extern void device_scan(void);
 
-struct sparc_phys_banks {
-	unsigned long base_addr;
-	unsigned long num_bytes;
-};
+#define MAX_BANKS	32
+
+static struct linux_prom64_registers pavail[MAX_BANKS] __initdata;
+static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata;
+static int pavail_ents __initdata;
+static int pavail_rescan_ents __initdata;
+
+static int cmp_p64(const void *a, const void *b)
+{
+	const struct linux_prom64_registers *x = a, *y = b;
+
+	if (x->phys_addr > y->phys_addr)
+		return 1;
+	if (x->phys_addr < y->phys_addr)
+		return -1;
+	return 0;
+}
+
+static void __init read_obp_memory(const char *property,
+				   struct linux_prom64_registers *regs,
+				   int *num_ents)
+{
+	int node = prom_finddevice("/memory");
+	int prop_size = prom_getproplen(node, property);
+	int ents, ret, i;
+
+	ents = prop_size / sizeof(struct linux_prom64_registers);
+	if (ents > MAX_BANKS) {
+		prom_printf("The machine has more %s property entries than "
+			    "this kernel can support (%d).\n",
+			    property, MAX_BANKS);
+		prom_halt();
+	}
+
+	ret = prom_getproperty(node, property, (char *) regs, prop_size);
+	if (ret == -1) {
+		prom_printf("Couldn't get %s property from /memory.\n");
+		prom_halt();
+	}
+
+	*num_ents = ents;
 
-#define SPARC_PHYS_BANKS 32
+	/* Sanitize what we got from the firmware, by page aligning
+	 * everything.
+	 */
+	for (i = 0; i < ents; i++) {
+		unsigned long base, size;
+
+		base = regs[i].phys_addr;
+		size = regs[i].reg_size;
 
-static struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS];
+		size &= PAGE_MASK;
+		if (base & ~PAGE_MASK) {
+			unsigned long new_base = PAGE_ALIGN(base);
+
+			size -= new_base - base;
+			if ((long) size < 0L)
+				size = 0UL;
+			base = new_base;
+		}
+		regs[i].phys_addr = base;
+		regs[i].reg_size = size;
+	}
+	sort(regs, ents, sizeof(struct  linux_prom64_registers),
+	     cmp_p64, NULL);
+}
 
 unsigned long *sparc64_valid_addr_bitmap __read_mostly;
 
@@ -1213,14 +1272,14 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
 	int i;
 
 #ifdef CONFIG_DEBUG_BOOTMEM
-	prom_printf("bootmem_init: Scan sp_banks, ");
+	prom_printf("bootmem_init: Scan pavail, ");
 #endif
 
 	bytes_avail = 0UL;
-	for (i = 0; sp_banks[i].num_bytes != 0; i++) {
-		end_of_phys_memory = sp_banks[i].base_addr +
-			sp_banks[i].num_bytes;
-		bytes_avail += sp_banks[i].num_bytes;
+	for (i = 0; i < pavail_ents; i++) {
+		end_of_phys_memory = pavail[i].phys_addr +
+			pavail[i].reg_size;
+		bytes_avail += pavail[i].reg_size;
 		if (cmdline_memory_size) {
 			if (bytes_avail > cmdline_memory_size) {
 				unsigned long slack = bytes_avail - cmdline_memory_size;
@@ -1228,12 +1287,15 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
 				bytes_avail -= slack;
 				end_of_phys_memory -= slack;
 
-				sp_banks[i].num_bytes -= slack;
-				if (sp_banks[i].num_bytes == 0) {
-					sp_banks[i].base_addr = 0xdeadbeef;
+				pavail[i].reg_size -= slack;
+				if ((long)pavail[i].reg_size <= 0L) {
+					pavail[i].phys_addr = 0xdeadbeefUL;
+					pavail[i].reg_size = 0UL;
+					pavail_ents = i;
 				} else {
-					sp_banks[i+1].num_bytes = 0;
-					sp_banks[i+1].base_addr = 0xdeadbeef;
+					pavail[i+1].reg_size = 0Ul;
+					pavail[i+1].phys_addr = 0xdeadbeefUL;
+					pavail_ents = i + 1;
 				}
 				break;
 			}
@@ -1287,12 +1349,12 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
 	/* Now register the available physical memory with the
 	 * allocator.
 	 */
-	for (i = 0; sp_banks[i].num_bytes != 0; i++) {
+	for (i = 0; i < pavail_ents; i++) {
 #ifdef CONFIG_DEBUG_BOOTMEM
-		prom_printf("free_bootmem(sp_banks:%d): base[%lx] size[%lx]\n",
-			    i, sp_banks[i].base_addr, sp_banks[i].num_bytes);
+		prom_printf("free_bootmem(pavail:%d): base[%lx] size[%lx]\n",
+			    i, pavail[i].phys_addr, pavail[i].reg_size);
 #endif
-		free_bootmem(sp_banks[i].base_addr, sp_banks[i].num_bytes);
+		free_bootmem(pavail[i].phys_addr, pavail[i].reg_size);
 	}
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -1341,7 +1403,7 @@ static unsigned long kernel_map_range(unsigned long pstart, unsigned long pend,
 	unsigned long alloc_bytes = 0UL;
 
 	if ((vstart & ~PAGE_MASK) || (vend & ~PAGE_MASK)) {
-		prom_printf("kernel_map: Unaligned sp_banks[%lx:%lx]\n",
+		prom_printf("kernel_map: Unaligned physmem[%lx:%lx]\n",
 			    vstart, vend);
 		prom_halt();
 	}
@@ -1388,23 +1450,24 @@ static unsigned long kernel_map_range(unsigned long pstart, unsigned long pend,
 	return alloc_bytes;
 }
 
-extern struct linux_mlist_p1275 *prom_ptot_ptr;
+static struct linux_prom64_registers pall[MAX_BANKS] __initdata;
+static int pall_ents __initdata;
+
 extern unsigned int kvmap_linear_patch[1];
 
 static void __init kernel_physical_mapping_init(void)
 {
-	struct linux_mlist_p1275 *p = prom_ptot_ptr;
-	unsigned long mem_alloced = 0UL;
+	unsigned long i, mem_alloced = 0UL;
 
-	while (p) {
+	read_obp_memory("reg", &pall[0], &pall_ents);
+
+	for (i = 0; i < pall_ents; i++) {
 		unsigned long phys_start, phys_end;
 
-		phys_start = p->start_adr;
-		phys_end = phys_start + p->num_bytes;
+		phys_start = pall[i].phys_addr;
+		phys_end = phys_start + pall[i].reg_size;
 		mem_alloced += kernel_map_range(phys_start, phys_end,
 						PAGE_KERNEL);
-
-		p = p->theres_more;
 	}
 
 	printk("Allocated %ld bytes for kernel page tables.\n",
@@ -1434,60 +1497,14 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
 
 unsigned long __init find_ecache_flush_span(unsigned long size)
 {
-	unsigned long i;
-
-	for (i = 0; ; i++) {
-		if (sp_banks[i].num_bytes == 0)
-			break;
-		if (sp_banks[i].num_bytes >= size)
-			return sp_banks[i].base_addr;
-	}
-
-	return ~0UL;
-}
-
-static void __init prom_probe_memory(void)
-{
-	struct linux_mlist_p1275 *mlist;
-	unsigned long bytes, base_paddr, tally;
 	int i;
 
-	i = 0;
-	mlist = *prom_meminfo()->p1275_available;
-	bytes = tally = mlist->num_bytes;
-	base_paddr = mlist->start_adr;
-  
-	sp_banks[0].base_addr = base_paddr;
-	sp_banks[0].num_bytes = bytes;
-
-	while (mlist->theres_more != (void *) 0) {
-		i++;
-		mlist = mlist->theres_more;
-		bytes = mlist->num_bytes;
-		tally += bytes;
-		if (i >= SPARC_PHYS_BANKS-1) {
-			printk ("The machine has more banks than "
-				"this kernel can support\n"
-				"Increase the SPARC_PHYS_BANKS "
-				"setting (currently %d)\n",
-				SPARC_PHYS_BANKS);
-			i = SPARC_PHYS_BANKS-1;
-			break;
-		}
-    
-		sp_banks[i].base_addr = mlist->start_adr;
-		sp_banks[i].num_bytes = mlist->num_bytes;
+	for (i = 0; i < pavail_ents; i++) {
+		if (pavail[i].reg_size >= size)
+			return pavail[i].phys_addr;
 	}
 
-	i++;
-	sp_banks[i].base_addr = 0xdeadbeefbeefdeadUL;
-	sp_banks[i].num_bytes = 0;
-
-	/* Now mask all bank sizes on a page boundary, it is all we can
-	 * use anyways.
-	 */
-	for (i = 0; sp_banks[i].num_bytes != 0; i++)
-		sp_banks[i].num_bytes &= PAGE_MASK;
+	return ~0UL;
 }
 
 /* paging_init() sets up the page tables */
@@ -1502,17 +1519,13 @@ void __init paging_init(void)
 	unsigned long end_pfn, pages_avail, shift;
 	unsigned long real_end, i;
 
-	prom_probe_memory();
+	/* Find available physical memory... */
+	read_obp_memory("available", &pavail[0], &pavail_ents);
 
 	phys_base = 0xffffffffffffffffUL;
-	for (i = 0; sp_banks[i].num_bytes != 0; i++) {
-		unsigned long top;
+	for (i = 0; i < pavail_ents; i++)
+		phys_base = min(phys_base, pavail[i].phys_addr);
 
-		if (sp_banks[i].base_addr < phys_base)
-			phys_base = sp_banks[i].base_addr;
-		top = sp_banks[i].base_addr +
-			sp_banks[i].num_bytes;
-	}
 	pfn_base = phys_base >> PAGE_SHIFT;
 
 	kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
@@ -1588,128 +1601,35 @@ void __init paging_init(void)
 	device_scan();
 }
 
-/* Ok, it seems that the prom can allocate some more memory chunks
- * as a side effect of some prom calls we perform during the
- * boot sequence.  My most likely theory is that it is from the
- * prom_set_traptable() call, and OBP is allocating a scratchpad
- * for saving client program register state etc.
- */
-static void __init sort_memlist(struct linux_mlist_p1275 *thislist)
-{
-	int swapi = 0;
-	int i, mitr;
-	unsigned long tmpaddr, tmpsize;
-	unsigned long lowest;
-
-	for (i = 0; thislist[i].theres_more != 0; i++) {
-		lowest = thislist[i].start_adr;
-		for (mitr = i+1; thislist[mitr-1].theres_more != 0; mitr++)
-			if (thislist[mitr].start_adr < lowest) {
-				lowest = thislist[mitr].start_adr;
-				swapi = mitr;
-			}
-		if (lowest == thislist[i].start_adr)
-			continue;
-		tmpaddr = thislist[swapi].start_adr;
-		tmpsize = thislist[swapi].num_bytes;
-		for (mitr = swapi; mitr > i; mitr--) {
-			thislist[mitr].start_adr = thislist[mitr-1].start_adr;
-			thislist[mitr].num_bytes = thislist[mitr-1].num_bytes;
-		}
-		thislist[i].start_adr = tmpaddr;
-		thislist[i].num_bytes = tmpsize;
-	}
-}
-
-void __init rescan_sp_banks(void)
-{
-	struct linux_prom64_registers memlist[64];
-	struct linux_mlist_p1275 avail[64], *mlist;
-	unsigned long bytes, base_paddr;
-	int num_regs, node = prom_finddevice("/memory");
-	int i;
-
-	num_regs = prom_getproperty(node, "available",
-				    (char *) memlist, sizeof(memlist));
-	num_regs = (num_regs / sizeof(struct linux_prom64_registers));
-	for (i = 0; i < num_regs; i++) {
-		avail[i].start_adr = memlist[i].phys_addr;
-		avail[i].num_bytes = memlist[i].reg_size;
-		avail[i].theres_more = &avail[i + 1];
-	}
-	avail[i - 1].theres_more = NULL;
-	sort_memlist(avail);
-
-	mlist = &avail[0];
-	i = 0;
-	bytes = mlist->num_bytes;
-	base_paddr = mlist->start_adr;
-  
-	sp_banks[0].base_addr = base_paddr;
-	sp_banks[0].num_bytes = bytes;
-
-	while (mlist->theres_more != NULL){
-		i++;
-		mlist = mlist->theres_more;
-		bytes = mlist->num_bytes;
-		if (i >= SPARC_PHYS_BANKS-1) {
-			printk ("The machine has more banks than "
-				"this kernel can support\n"
-				"Increase the SPARC_PHYS_BANKS "
-				"setting (currently %d)\n",
-				SPARC_PHYS_BANKS);
-			i = SPARC_PHYS_BANKS-1;
-			break;
-		}
-    
-		sp_banks[i].base_addr = mlist->start_adr;
-		sp_banks[i].num_bytes = mlist->num_bytes;
-	}
-
-	i++;
-	sp_banks[i].base_addr = 0xdeadbeefbeefdeadUL;
-	sp_banks[i].num_bytes = 0;
-
-	for (i = 0; sp_banks[i].num_bytes != 0; i++)
-		sp_banks[i].num_bytes &= PAGE_MASK;
-}
-
 static void __init taint_real_pages(void)
 {
-	struct sparc_phys_banks saved_sp_banks[SPARC_PHYS_BANKS];
 	int i;
 
-	for (i = 0; i < SPARC_PHYS_BANKS; i++) {
-		saved_sp_banks[i].base_addr =
-			sp_banks[i].base_addr;
-		saved_sp_banks[i].num_bytes =
-			sp_banks[i].num_bytes;
-	}
-
-	rescan_sp_banks();
+	read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents);
 
-	/* Find changes discovered in the sp_bank rescan and
+	/* Find changes discovered in the physmem available rescan and
 	 * reserve the lost portions in the bootmem maps.
 	 */
-	for (i = 0; saved_sp_banks[i].num_bytes; i++) {
+	for (i = 0; i < pavail_ents; i++) {
 		unsigned long old_start, old_end;
 
-		old_start = saved_sp_banks[i].base_addr;
+		old_start = pavail[i].phys_addr;
 		old_end = old_start +
-			saved_sp_banks[i].num_bytes;
+			pavail[i].reg_size;
 		while (old_start < old_end) {
 			int n;
 
-			for (n = 0; sp_banks[n].num_bytes; n++) {
+			for (n = 0; pavail_rescan_ents; n++) {
 				unsigned long new_start, new_end;
 
-				new_start = sp_banks[n].base_addr;
-				new_end = new_start + sp_banks[n].num_bytes;
+				new_start = pavail_rescan[n].phys_addr;
+				new_end = new_start +
+					pavail_rescan[n].reg_size;
 
 				if (new_start <= old_start &&
 				    new_end >= (old_start + PAGE_SIZE)) {
-					set_bit (old_start >> 22,
-						 sparc64_valid_addr_bitmap);
+					set_bit(old_start >> 22,
+						sparc64_valid_addr_bitmap);
 					goto do_next_page;
 				}
 			}
diff --git a/include/asm-sparc64/openprom.h b/include/asm-sparc64/openprom.h
index 0a336901d585..b4959d2b0d99 100644
--- a/include/asm-sparc64/openprom.h
+++ b/include/asm-sparc64/openprom.h
@@ -186,8 +186,8 @@ struct linux_prom_registers {
 };
 
 struct linux_prom64_registers {
-	long phys_addr;
-	long reg_size;
+	unsigned long phys_addr;
+	unsigned long reg_size;
 };
 
 struct linux_prom_irqs {
-- 
cgit v1.2.3


From 4cb29d18129fb425c6202ab535c3fc1856391b99 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 29 Sep 2005 18:05:28 -0700
Subject: [SPARC64]: Kill arch/sparc64/prom/memory.c

No longer used.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/prom/Makefile  |   2 +-
 arch/sparc64/prom/init.c    |   3 -
 arch/sparc64/prom/memory.c  | 152 --------------------------------------------
 include/asm-sparc64/oplib.h |  14 ----
 4 files changed, 1 insertion(+), 170 deletions(-)
 delete mode 100644 arch/sparc64/prom/memory.c

(limited to 'include')

diff --git a/arch/sparc64/prom/Makefile b/arch/sparc64/prom/Makefile
index c7898a5ee456..3d33ed27bc27 100644
--- a/arch/sparc64/prom/Makefile
+++ b/arch/sparc64/prom/Makefile
@@ -6,5 +6,5 @@
 EXTRA_AFLAGS := -ansi
 EXTRA_CFLAGS := -Werror
 
-lib-y   := bootstr.o devops.o init.o memory.o misc.o \
+lib-y   := bootstr.o devops.o init.o misc.o \
 	   tree.o console.o printf.o p1275.o cif.o
diff --git a/arch/sparc64/prom/init.c b/arch/sparc64/prom/init.c
index 8b4b622d0909..f3cc2d8578b2 100644
--- a/arch/sparc64/prom/init.c
+++ b/arch/sparc64/prom/init.c
@@ -27,7 +27,6 @@ int prom_chosen_node;
  * failure.  It gets passed the pointer to the PROM vector.
  */
 
-extern void prom_meminit(void);
 extern void prom_cif_init(void *, void *);
 
 void __init prom_init(void *cif_handler, void *cif_stack)
@@ -90,8 +89,6 @@ void __init prom_init(void *cif_handler, void *cif_stack)
 
 	printk ("PROMLIB: Sun IEEE Boot Prom %s\n", buffer + bufadjust);
 
-	prom_meminit();
-
 	/* Initialization successful. */
 	return;
 
diff --git a/arch/sparc64/prom/memory.c b/arch/sparc64/prom/memory.c
deleted file mode 100644
index f4a8143e052c..000000000000
--- a/arch/sparc64/prom/memory.c
+++ /dev/null
@@ -1,152 +0,0 @@
-/* $Id: memory.c,v 1.5 1999/08/31 06:55:04 davem Exp $
- * memory.c: Prom routine for acquiring various bits of information
- *           about RAM on the machine, both virtual and physical.
- *
- * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
- * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-
-#include <asm/openprom.h>
-#include <asm/oplib.h>
-
-/* This routine, for consistency, returns the ram parameters in the
- * V0 prom memory descriptor format.  I choose this format because I
- * think it was the easiest to work with.  I feel the religious
- * arguments now... ;)  Also, I return the linked lists sorted to
- * prevent paging_init() upset stomach as I have not yet written
- * the pepto-bismol kernel module yet.
- */
-
-struct linux_prom64_registers prom_reg_memlist[64];
-struct linux_prom64_registers prom_reg_tmp[64];
-
-struct linux_mlist_p1275 prom_phys_total[64];
-struct linux_mlist_p1275 prom_prom_taken[64];
-struct linux_mlist_p1275 prom_phys_avail[64];
-
-struct linux_mlist_p1275 *prom_ptot_ptr = prom_phys_total;
-struct linux_mlist_p1275 *prom_ptak_ptr = prom_prom_taken;
-struct linux_mlist_p1275 *prom_pavl_ptr = prom_phys_avail;
-
-struct linux_mem_p1275 prom_memlist;
-
-
-/* Internal Prom library routine to sort a linux_mlist_p1275 memory
- * list.  Used below in initialization.
- */
-static void __init
-prom_sortmemlist(struct linux_mlist_p1275 *thislist)
-{
-	int swapi = 0;
-	int i, mitr;
-	unsigned long tmpaddr, tmpsize;
-	unsigned long lowest;
-
-	for(i=0; thislist[i].theres_more; i++) {
-		lowest = thislist[i].start_adr;
-		for(mitr = i+1; thislist[mitr-1].theres_more; mitr++)
-			if(thislist[mitr].start_adr < lowest) {
-				lowest = thislist[mitr].start_adr;
-				swapi = mitr;
-			}
-		if(lowest == thislist[i].start_adr) continue;
-		tmpaddr = thislist[swapi].start_adr;
-		tmpsize = thislist[swapi].num_bytes;
-		for(mitr = swapi; mitr > i; mitr--) {
-			thislist[mitr].start_adr = thislist[mitr-1].start_adr;
-			thislist[mitr].num_bytes = thislist[mitr-1].num_bytes;
-		}
-		thislist[i].start_adr = tmpaddr;
-		thislist[i].num_bytes = tmpsize;
-	}
-}
-
-/* Initialize the memory lists based upon the prom version. */
-void __init prom_meminit(void)
-{
-	int node = 0;
-	unsigned int iter, num_regs;
-
-	node = prom_finddevice("/memory");
-	num_regs = prom_getproperty(node, "available",
-				    (char *) prom_reg_memlist,
-				    sizeof(prom_reg_memlist));
-	num_regs = (num_regs/sizeof(struct linux_prom64_registers));
-	for(iter=0; iter<num_regs; iter++) {
-		prom_phys_avail[iter].start_adr =
-			prom_reg_memlist[iter].phys_addr;
-		prom_phys_avail[iter].num_bytes =
-			prom_reg_memlist[iter].reg_size;
-		prom_phys_avail[iter].theres_more =
-			&prom_phys_avail[iter+1];
-	}
-	prom_phys_avail[iter-1].theres_more = NULL;
-
-	num_regs = prom_getproperty(node, "reg",
-				    (char *) prom_reg_memlist,
-				    sizeof(prom_reg_memlist));
-	num_regs = (num_regs/sizeof(struct linux_prom64_registers));
-	for(iter=0; iter<num_regs; iter++) {
-		prom_phys_total[iter].start_adr =
-			prom_reg_memlist[iter].phys_addr;
-		prom_phys_total[iter].num_bytes =
-			prom_reg_memlist[iter].reg_size;
-		prom_phys_total[iter].theres_more =
-			&prom_phys_total[iter+1];
-	}
-	prom_phys_total[iter-1].theres_more = NULL;
-
-	node = prom_finddevice("/virtual-memory");
-	num_regs = prom_getproperty(node, "available",
-				    (char *) prom_reg_memlist,
-				    sizeof(prom_reg_memlist));
-	num_regs = (num_regs/sizeof(struct linux_prom64_registers));
-
-	/* Convert available virtual areas to taken virtual
-	 * areas.  First sort, then convert.
-	 */
-	for(iter=0; iter<num_regs; iter++) {
-		prom_prom_taken[iter].start_adr =
-			prom_reg_memlist[iter].phys_addr;
-		prom_prom_taken[iter].num_bytes =
-			prom_reg_memlist[iter].reg_size;
-		prom_prom_taken[iter].theres_more =
-			&prom_prom_taken[iter+1];
-	}
-	prom_prom_taken[iter-1].theres_more = NULL;
-
-	prom_sortmemlist(prom_prom_taken);
-
-	/* Finally, convert. */
-	for(iter=0; iter<num_regs; iter++) {
-		prom_prom_taken[iter].start_adr =
-			prom_prom_taken[iter].start_adr +
-			prom_prom_taken[iter].num_bytes;
-		prom_prom_taken[iter].num_bytes =
-			prom_prom_taken[iter+1].start_adr -
-			prom_prom_taken[iter].start_adr;
-	}
-	prom_prom_taken[iter-1].num_bytes =
-		-1UL - prom_prom_taken[iter-1].start_adr;
-
-	/* Sort the other two lists. */
-	prom_sortmemlist(prom_phys_total);
-	prom_sortmemlist(prom_phys_avail);
-
-	/* Link all the lists into the top-level descriptor. */
-	prom_memlist.p1275_totphys=&prom_ptot_ptr;
-	prom_memlist.p1275_prommap=&prom_ptak_ptr;
-	prom_memlist.p1275_available=&prom_pavl_ptr;
-}
-
-/* This returns a pointer to our libraries internal p1275 format
- * memory descriptor.
- */
-struct linux_mem_p1275 *
-prom_meminfo(void)
-{
-	return &prom_memlist;
-}
diff --git a/include/asm-sparc64/oplib.h b/include/asm-sparc64/oplib.h
index c628189b6c89..d02f1e8ae1a6 100644
--- a/include/asm-sparc64/oplib.h
+++ b/include/asm-sparc64/oplib.h
@@ -95,20 +95,6 @@ extern int prom_devclose(int device_handle);
 extern void prom_seek(int device_handle, unsigned int seek_hival,
 		      unsigned int seek_lowval);
 
-/* Machine memory configuration routine. */
-
-/* This function returns a V0 format memory descriptor table, it has three
- * entries.  One for the total amount of physical ram on the machine, one
- * for the amount of physical ram available, and one describing the virtual
- * areas which are allocated by the prom.  So, in a sense the physical
- * available is a calculation of the total physical minus the physical mapped
- * by the prom with virtual mappings.
- *
- * These lists are returned pre-sorted, this should make your life easier
- * since the prom itself is way too lazy to do such nice things.
- */
-extern struct linux_mem_p1275 *prom_meminfo(void);
-
 /* Miscellaneous routines, don't really fit in any category per se. */
 
 /* Reboot the machine with the command line passed. */
-- 
cgit v1.2.3


From 4735885701d7c24ed54f35f5102d32b3aabee55e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Fri, 30 Sep 2005 03:29:05 +0100
Subject: [PATCH] uml get_user() NULL noise removal

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-um/uaccess.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-um/uaccess.h b/include/asm-um/uaccess.h
index 801710d00a40..2ee028b8de9d 100644
--- a/include/asm-um/uaccess.h
+++ b/include/asm-um/uaccess.h
@@ -44,7 +44,7 @@
         const __typeof__(ptr) __private_ptr = ptr; \
         __typeof__(*(__private_ptr)) __private_val; \
         int __private_ret = -EFAULT; \
-        (x) = 0; \
+        (x) = (__typeof__(*(__private_ptr)))0; \
 	if (__copy_from_user(&__private_val, (__private_ptr), \
 	    sizeof(*(__private_ptr))) == 0) {\
         	(x) = (__typeof__(*(__private_ptr))) __private_val; \
-- 
cgit v1.2.3


From a3ca066efb18524bf3f07137a8f8ff434022c4f2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Fri, 30 Sep 2005 04:20:57 +0100
Subject: [PATCH] missing qualifiers in readb() et.al. on ppc

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-ppc/io.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/asm-ppc/io.h b/include/asm-ppc/io.h
index 7eb7cf6360bd..94d83998a759 100644
--- a/include/asm-ppc/io.h
+++ b/include/asm-ppc/io.h
@@ -56,7 +56,7 @@ extern unsigned long pci_dram_offset;
  * is actually performed (i.e. the data has come back) before we start
  * executing any following instructions.
  */
-extern inline int in_8(volatile unsigned char __iomem *addr)
+extern inline int in_8(const volatile unsigned char __iomem *addr)
 {
 	int ret;
 
@@ -72,7 +72,7 @@ extern inline void out_8(volatile unsigned char __iomem *addr, int val)
 	__asm__ __volatile__("stb%U0%X0 %1,%0; eieio" : "=m" (*addr) : "r" (val));
 }
 
-extern inline int in_le16(volatile unsigned short __iomem *addr)
+extern inline int in_le16(const volatile unsigned short __iomem *addr)
 {
 	int ret;
 
@@ -83,7 +83,7 @@ extern inline int in_le16(volatile unsigned short __iomem *addr)
 	return ret;
 }
 
-extern inline int in_be16(volatile unsigned short __iomem *addr)
+extern inline int in_be16(const volatile unsigned short __iomem *addr)
 {
 	int ret;
 
@@ -104,7 +104,7 @@ extern inline void out_be16(volatile unsigned short __iomem *addr, int val)
 	__asm__ __volatile__("sth%U0%X0 %1,%0; eieio" : "=m" (*addr) : "r" (val));
 }
 
-extern inline unsigned in_le32(volatile unsigned __iomem *addr)
+extern inline unsigned in_le32(const volatile unsigned __iomem *addr)
 {
 	unsigned ret;
 
@@ -115,7 +115,7 @@ extern inline unsigned in_le32(volatile unsigned __iomem *addr)
 	return ret;
 }
 
-extern inline unsigned in_be32(volatile unsigned __iomem *addr)
+extern inline unsigned in_be32(const volatile unsigned __iomem *addr)
 {
 	unsigned ret;
 
@@ -139,7 +139,7 @@ extern inline void out_be32(volatile unsigned __iomem *addr, int val)
 #define readb(addr) in_8((volatile u8 *)(addr))
 #define writeb(b,addr) out_8((volatile u8 *)(addr), (b))
 #else
-static inline __u8 readb(volatile void __iomem *addr)
+static inline __u8 readb(const volatile void __iomem *addr)
 {
 	return in_8(addr);
 }
@@ -150,11 +150,11 @@ static inline void writeb(__u8 b, volatile void __iomem *addr)
 #endif
 
 #if defined(CONFIG_APUS)
-static inline __u16 readw(volatile void __iomem *addr)
+static inline __u16 readw(const volatile void __iomem *addr)
 {
 	return *(__force volatile __u16 *)(addr);
 }
-static inline __u32 readl(volatile void __iomem *addr)
+static inline __u32 readl(const volatile void __iomem *addr)
 {
 	return *(__force volatile __u32 *)(addr);
 }
@@ -173,11 +173,11 @@ static inline void writel(__u32 b, volatile void __iomem *addr)
 #define writew(b,addr) out_le16((volatile u16 *)(addr),(b))
 #define writel(b,addr) out_le32((volatile u32 *)(addr),(b))
 #else
-static inline __u16 readw(volatile void __iomem *addr)
+static inline __u16 readw(const volatile void __iomem *addr)
 {
 	return in_le16(addr);
 }
-static inline __u32 readl(volatile void __iomem *addr)
+static inline __u32 readl(const volatile void __iomem *addr)
 {
 	return in_le32(addr);
 }
-- 
cgit v1.2.3


From 1294b118cb53fb14515666e2b218ad5ab40318c1 Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@sw.ru>
Date: Fri, 30 Sep 2005 10:32:19 +0400
Subject: [PATCH] x86_64: Add missing () around arguments of pte_index macro

x86-64: Add missing () around arguments of pte_index macro

Signed-Off-By: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Signed-Off-By: Kirill Korotaev <dev@sw.ru>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/pgtable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
index 2cb483516459..dd8711ecaf2f 100644
--- a/include/asm-x86_64/pgtable.h
+++ b/include/asm-x86_64/pgtable.h
@@ -384,7 +384,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 }
 
 #define pte_index(address) \
-		((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+		(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 #define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \
 			pte_index(address))
 
-- 
cgit v1.2.3


From 897f15fb587fd2772b9e7ff6ec0265057f3c3975 Mon Sep 17 00:00:00 2001
From: Zach Brown <zach.brown@oracle.com>
Date: Fri, 30 Sep 2005 11:58:55 -0700
Subject: [PATCH] aio: remove unlocked task_list test and resulting race

Only one of the run or kick path is supposed to put an iocb on the run
list.  If both of them do it than one of them can end up referencing a
freed iocb.  The kick path could delete the task_list item from the wait
queue before getting the ctx_lock and putting the iocb on the run list.
The run path was testing the task_list item outside the lock so that it
could catch ki_retry methods that return -EIOCBRETRY *without* putting the
iocb on a wait queue and promising to call kick_iocb.  This unlocked check
could then race with the kick path to cause both to try and put the iocb on
the run list.

The patch stops the run path from testing task_list by requring that any
ki_retry that returns -EIOCBRETRY *must* guarantee that kick_iocb() will be
called in the future.  aio_p{read,write}, the only in-tree -EIOCBRETRY
users, are updated.

Signed-off-by: Zach Brown <zach.brown@oracle.com>
Signed-off-by: Benjamin LaHaise <bcrl@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/aio.c            | 79 ++++++++++++++++++++++-------------------------------
 include/linux/aio.h | 34 +++++++++++++++++++++++
 2 files changed, 67 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/fs/aio.c b/fs/aio.c
index b8f296999c04..9edc0e4a1219 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -741,19 +741,9 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
 	ret = retry(iocb);
 	current->io_wait = NULL;
 
-	if (-EIOCBRETRY != ret) {
- 		if (-EIOCBQUEUED != ret) {
-			BUG_ON(!list_empty(&iocb->ki_wait.task_list));
-			aio_complete(iocb, ret, 0);
-			/* must not access the iocb after this */
-		}
-	} else {
-		/*
-		 * Issue an additional retry to avoid waiting forever if
-		 * no waits were queued (e.g. in case of a short read).
-		 */
-		if (list_empty(&iocb->ki_wait.task_list))
-			kiocbSetKicked(iocb);
+	if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) {
+		BUG_ON(!list_empty(&iocb->ki_wait.task_list));
+		aio_complete(iocb, ret, 0);
 	}
 out:
 	spin_lock_irq(&ctx->ctx_lock);
@@ -1327,8 +1317,11 @@ asmlinkage long sys_io_destroy(aio_context_t ctx)
 }
 
 /*
- * Default retry method for aio_read (also used for first time submit)
- * Responsible for updating iocb state as retries progress
+ * aio_p{read,write} are the default  ki_retry methods for
+ * IO_CMD_P{READ,WRITE}.  They maintains kiocb retry state around potentially
+ * multiple calls to f_op->aio_read().  They loop around partial progress
+ * instead of returning -EIOCBRETRY because they don't have the means to call
+ * kick_iocb().
  */
 static ssize_t aio_pread(struct kiocb *iocb)
 {
@@ -1337,25 +1330,25 @@ static ssize_t aio_pread(struct kiocb *iocb)
 	struct inode *inode = mapping->host;
 	ssize_t ret = 0;
 
-	ret = file->f_op->aio_read(iocb, iocb->ki_buf,
-		iocb->ki_left, iocb->ki_pos);
+	do {
+		ret = file->f_op->aio_read(iocb, iocb->ki_buf,
+			iocb->ki_left, iocb->ki_pos);
+		/*
+		 * Can't just depend on iocb->ki_left to determine
+		 * whether we are done. This may have been a short read.
+		 */
+		if (ret > 0) {
+			iocb->ki_buf += ret;
+			iocb->ki_left -= ret;
+		}
 
-	/*
-	 * Can't just depend on iocb->ki_left to determine
-	 * whether we are done. This may have been a short read.
-	 */
-	if (ret > 0) {
-		iocb->ki_buf += ret;
-		iocb->ki_left -= ret;
 		/*
-		 * For pipes and sockets we return once we have
-		 * some data; for regular files we retry till we
-		 * complete the entire read or find that we can't
-		 * read any more data (e.g short reads).
+		 * For pipes and sockets we return once we have some data; for
+		 * regular files we retry till we complete the entire read or
+		 * find that we can't read any more data (e.g short reads).
 		 */
-		if (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode))
-			ret = -EIOCBRETRY;
-	}
+	} while (ret > 0 &&
+		 !S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode));
 
 	/* This means we must have transferred all that we could */
 	/* No need to retry anymore */
@@ -1365,27 +1358,21 @@ static ssize_t aio_pread(struct kiocb *iocb)
 	return ret;
 }
 
-/*
- * Default retry method for aio_write (also used for first time submit)
- * Responsible for updating iocb state as retries progress
- */
+/* see aio_pread() */
 static ssize_t aio_pwrite(struct kiocb *iocb)
 {
 	struct file *file = iocb->ki_filp;
 	ssize_t ret = 0;
 
-	ret = file->f_op->aio_write(iocb, iocb->ki_buf,
-		iocb->ki_left, iocb->ki_pos);
-
-	if (ret > 0) {
-		iocb->ki_buf += ret;
-		iocb->ki_left -= ret;
-
-		ret = -EIOCBRETRY;
-	}
+	do {
+		ret = file->f_op->aio_write(iocb, iocb->ki_buf,
+			iocb->ki_left, iocb->ki_pos);
+		if (ret > 0) {
+			iocb->ki_buf += ret;
+			iocb->ki_left -= ret;
+		}
+	} while (ret > 0);
 
-	/* This means we must have transferred all that we could */
-	/* No need to retry anymore */
 	if ((ret == 0) || (iocb->ki_left == 0))
 		ret = iocb->ki_nbytes - iocb->ki_left;
 
diff --git a/include/linux/aio.h b/include/linux/aio.h
index a4d5af907f90..60def658b246 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -43,6 +43,40 @@ struct kioctx;
 #define kiocbIsKicked(iocb)	test_bit(KIF_KICKED, &(iocb)->ki_flags)
 #define kiocbIsCancelled(iocb)	test_bit(KIF_CANCELLED, &(iocb)->ki_flags)
 
+/* is there a better place to document function pointer methods? */
+/**
+ * ki_retry	-	iocb forward progress callback
+ * @kiocb:	The kiocb struct to advance by performing an operation.
+ *
+ * This callback is called when the AIO core wants a given AIO operation
+ * to make forward progress.  The kiocb argument describes the operation
+ * that is to be performed.  As the operation proceeds, perhaps partially,
+ * ki_retry is expected to update the kiocb with progress made.  Typically
+ * ki_retry is set in the AIO core and it itself calls file_operations
+ * helpers.
+ *
+ * ki_retry's return value determines when the AIO operation is completed
+ * and an event is generated in the AIO event ring.  Except the special
+ * return values described below, the value that is returned from ki_retry
+ * is transferred directly into the completion ring as the operation's
+ * resulting status.  Once this has happened ki_retry *MUST NOT* reference
+ * the kiocb pointer again.
+ *
+ * If ki_retry returns -EIOCBQUEUED it has made a promise that aio_complete()
+ * will be called on the kiocb pointer in the future.  The AIO core will
+ * not ask the method again -- ki_retry must ensure forward progress.
+ * aio_complete() must be called once and only once in the future, multiple
+ * calls may result in undefined behaviour.
+ *
+ * If ki_retry returns -EIOCBRETRY it has made a promise that kick_iocb()
+ * will be called on the kiocb pointer in the future.  This may happen
+ * through generic helpers that associate kiocb->ki_wait with a wait
+ * queue head that ki_retry uses via current->io_wait.  It can also happen
+ * with custom tracking and manual calls to kick_iocb(), though that is
+ * discouraged.  In either case, kick_iocb() must be called once and only
+ * once.  ki_retry must ensure forward progress, the AIO core will wait
+ * indefinitely for kick_iocb() to be called.
+ */
 struct kiocb {
 	struct list_head	ki_run_list;
 	long			ki_flags;
-- 
cgit v1.2.3


From d96c4e7bb039ae16c9b7e6809feb4fcfc45fcc87 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 30 Sep 2005 11:58:57 -0700
Subject: [PATCH] x86: hw_irq.h warning fix

include/asm/hw_irq.h:70: warning: `struct hw_interrupt_type' declared inside parameter list
include/asm/hw_irq.h:70: warning: its scope is only this definition or declaration, which is probably not what you want

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/hw_irq.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/asm-i386/hw_irq.h b/include/asm-i386/hw_irq.h
index 4ac84cc6f01a..622815bf3243 100644
--- a/include/asm-i386/hw_irq.h
+++ b/include/asm-i386/hw_irq.h
@@ -18,6 +18,8 @@
 #include <asm/irq.h>
 #include <asm/sections.h>
 
+struct hw_interrupt_type;
+
 /*
  * Various low-level irq details needed by irq.c, process.c,
  * time.c, io_apic.c and smp.c
-- 
cgit v1.2.3


From fd2e54b35bd70d11c160ded4834e2378e915356e Mon Sep 17 00:00:00 2001
From: Diego Calleja <diegocg@gmail.com>
Date: Sat, 1 Oct 2005 17:00:48 +0200
Subject: [PATCH] trivial #if -> #ifdef

Use '#ifdef' consistently on __KERNEL__.  This was reported as bug #5340
(isn't easier to send a fix than report the bug?!)

Signed-off-by: Diego Calleja <diegocg@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mod_devicetable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 4ed2107bc020..2f0299a448f6 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -183,7 +183,7 @@ struct of_device_id
 	char	name[32];
 	char	type[32];
 	char	compatible[128];
-#if __KERNEL__
+#ifdef __KERNEL__
 	void	*data;
 #else
 	kernel_ulong_t data;
-- 
cgit v1.2.3


From 325ed8239309cb29f10ea58c5a668058ead11479 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 3 Oct 2005 13:57:23 -0700
Subject: [NET]: Fix packet timestamping.

I've found the problem in general.  It affects any 64-bit
architecture.  The problem occurs when you change the system time.

Suppose that when you boot your system clock is forward by a day.
This gets recorded down in skb_tv_base.  You then wind the clock back
by a day.  From that point onwards the offset will be negative which
essentially overflows the 32-bit variables they're stored in.

In fact, why don't we just store the real time stamp in those 32-bit
variables? After all, we're not going to overflow for quite a while
yet.

When we do overflow, we'll need a better solution of course.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h          | 12 +++---------
 net/core/skbuff.c               |  5 -----
 net/ipv4/netfilter/ip_queue.c   |  4 ++--
 net/ipv4/netfilter/ipt_ULOG.c   |  4 ++--
 net/ipv6/netfilter/ip6_queue.c  |  4 ++--
 net/netfilter/nfnetlink_log.c   |  4 ++--
 net/netfilter/nfnetlink_queue.c |  4 ++--
 net/packet/af_packet.c          |  4 ++--
 8 files changed, 15 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2741c0c55e83..466c879f82b8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -155,8 +155,6 @@ struct skb_shared_info {
 #define SKB_DATAREF_SHIFT 16
 #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1)
 
-extern struct timeval skb_tv_base;
-
 struct skb_timeval {
 	u32	off_sec;
 	u32	off_usec;
@@ -175,7 +173,7 @@ enum {
  *	@prev: Previous buffer in list
  *	@list: List we are on
  *	@sk: Socket we are owned by
- *	@tstamp: Time we arrived stored as offset to skb_tv_base
+ *	@tstamp: Time we arrived
  *	@dev: Device we arrived on/are leaving by
  *	@input_dev: Device we arrived on
  *	@h: Transport layer header
@@ -1255,10 +1253,6 @@ static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval *
 {
 	stamp->tv_sec  = skb->tstamp.off_sec;
 	stamp->tv_usec = skb->tstamp.off_usec;
-	if (skb->tstamp.off_sec) {
-		stamp->tv_sec  += skb_tv_base.tv_sec;
-		stamp->tv_usec += skb_tv_base.tv_usec;
-	}
 }
 
 /**
@@ -1272,8 +1266,8 @@ static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval *
  */
 static inline void skb_set_timestamp(struct sk_buff *skb, const struct timeval *stamp)
 {
-	skb->tstamp.off_sec  = stamp->tv_sec - skb_tv_base.tv_sec;
-	skb->tstamp.off_usec = stamp->tv_usec - skb_tv_base.tv_usec;
+	skb->tstamp.off_sec  = stamp->tv_sec;
+	skb->tstamp.off_usec = stamp->tv_usec;
 }
 
 extern void __net_timestamp(struct sk_buff *skb);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f80a28785610..0e9431b59fb2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -71,8 +71,6 @@
 static kmem_cache_t *skbuff_head_cache __read_mostly;
 static kmem_cache_t *skbuff_fclone_cache __read_mostly;
 
-struct timeval __read_mostly skb_tv_base;
-
 /*
  *	Keep out-of-line to prevent kernel bloat.
  *	__builtin_return_address is not used because it is not always
@@ -1708,8 +1706,6 @@ void __init skb_init(void)
 						NULL, NULL);
 	if (!skbuff_fclone_cache)
 		panic("cannot create skbuff cache");
-
-	do_gettimeofday(&skb_tv_base);
 }
 
 EXPORT_SYMBOL(___pskb_trim);
@@ -1743,4 +1739,3 @@ EXPORT_SYMBOL(skb_prepare_seq_read);
 EXPORT_SYMBOL(skb_seq_read);
 EXPORT_SYMBOL(skb_abort_seq_read);
 EXPORT_SYMBOL(skb_find_text);
-EXPORT_SYMBOL(skb_tv_base);
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index d54f14d926f6..36339eb39e17 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -240,8 +240,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
 
 	pmsg->packet_id       = (unsigned long )entry;
 	pmsg->data_len        = data_len;
-	pmsg->timestamp_sec   = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec;
-	pmsg->timestamp_usec  = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec;
+	pmsg->timestamp_sec   = entry->skb->tstamp.off_sec;
+	pmsg->timestamp_usec  = entry->skb->tstamp.off_usec;
 	pmsg->mark            = entry->skb->nfmark;
 	pmsg->hook            = entry->info->hook;
 	pmsg->hw_protocol     = entry->skb->protocol;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index e2c14f3cb2fc..2883ccd8a91d 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -225,8 +225,8 @@ static void ipt_ulog_packet(unsigned int hooknum,
 
 	/* copy hook, prefix, timestamp, payload, etc. */
 	pm->data_len = copy_len;
-	pm->timestamp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec;
-	pm->timestamp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec;
+	pm->timestamp_sec = skb->tstamp.off_sec;
+	pm->timestamp_usec = skb->tstamp.off_usec;
 	pm->mark = skb->nfmark;
 	pm->hook = hooknum;
 	if (prefix != NULL)
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index aa11cf366efa..5027bbe6415e 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -238,8 +238,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
 
 	pmsg->packet_id       = (unsigned long )entry;
 	pmsg->data_len        = data_len;
-	pmsg->timestamp_sec   = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec;
-	pmsg->timestamp_usec  = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec;
+	pmsg->timestamp_sec   = entry->skb->tstamp.off_sec;
+	pmsg->timestamp_usec  = entry->skb->tstamp.off_usec;
 	pmsg->mark            = entry->skb->nfmark;
 	pmsg->hook            = entry->info->hook;
 	pmsg->hw_protocol     = entry->skb->protocol;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index ff5601ceedcb..efcd10f996ba 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -494,8 +494,8 @@ __build_packet_message(struct nfulnl_instance *inst,
 	if (skb->tstamp.off_sec) {
 		struct nfulnl_msg_packet_timestamp ts;
 
-		ts.sec = cpu_to_be64(skb_tv_base.tv_sec + skb->tstamp.off_sec);
-		ts.usec = cpu_to_be64(skb_tv_base.tv_usec + skb->tstamp.off_usec);
+		ts.sec = cpu_to_be64(skb->tstamp.off_sec);
+		ts.usec = cpu_to_be64(skb->tstamp.off_usec);
 
 		NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts);
 	}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index f81fe8c52e99..eaa44c49567b 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -492,8 +492,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	if (entry->skb->tstamp.off_sec) {
 		struct nfqnl_msg_packet_timestamp ts;
 
-		ts.sec = cpu_to_be64(skb_tv_base.tv_sec + entry->skb->tstamp.off_sec);
-		ts.usec = cpu_to_be64(skb_tv_base.tv_usec + entry->skb->tstamp.off_usec);
+		ts.sec = cpu_to_be64(entry->skb->tstamp.off_sec);
+		ts.usec = cpu_to_be64(entry->skb->tstamp.off_usec);
 
 		NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts);
 	}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 6a67a87384cc..499ae3df4a44 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -654,8 +654,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 		__net_timestamp(skb);
 		sock_enable_timestamp(sk);
 	}
-	h->tp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec;
-	h->tp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec;
+	h->tp_sec = skb->tstamp.off_sec;
+	h->tp_usec = skb->tstamp.off_usec;
 
 	sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
 	sll->sll_halen = 0;
-- 
cgit v1.2.3


From 81c3d5470ecc70564eb9209946730fe2be93ad06 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 3 Oct 2005 14:13:38 -0700
Subject: [INET]: speedup inet (tcp/dccp) lookups

Arnaldo and I agreed it could be applied now, because I have other
pending patches depending on this one (Thank you Arnaldo)

(The other important patch moves skc_refcnt in a separate cache line,
so that the SMP/NUMA performance doesnt suffer from cache line ping pongs)

1) First some performance data :
--------------------------------

tcp_v4_rcv() wastes a *lot* of time in __inet_lookup_established()

The most time critical code is :

sk_for_each(sk, node, &head->chain) {
     if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif))
         goto hit; /* You sunk my battleship! */
}

The sk_for_each() does use prefetch() hints but only the begining of
"struct sock" is prefetched.

As INET_MATCH first comparison uses inet_sk(__sk)->daddr, wich is far
away from the begining of "struct sock", it has to bring into CPU
cache cold cache line. Each iteration has to use at least 2 cache
lines.

This can be problematic if some chains are very long.

2) The goal
-----------

The idea I had is to change things so that INET_MATCH() may return
FALSE in 99% of cases only using the data already in the CPU cache,
using one cache line per iteration.

3) Description of the patch
---------------------------

Adds a new 'unsigned int skc_hash' field in 'struct sock_common',
filling a 32 bits hole on 64 bits platform.

struct sock_common {
	unsigned short		skc_family;
	volatile unsigned char	skc_state;
	unsigned char		skc_reuse;
	int			skc_bound_dev_if;
	struct hlist_node	skc_node;
	struct hlist_node	skc_bind_node;
	atomic_t		skc_refcnt;
+	unsigned int		skc_hash;
	struct proto		*skc_prot;
};

Store in this 32 bits field the full hash, not masked by (ehash_size -
1) Using this full hash as the first comparison done in INET_MATCH
permits us immediatly skip the element without touching a second cache
line in case of a miss.

Suppress the sk_hashent/tw_hashent fields since skc_hash (aliased to
sk_hash and tw_hash) already contains the slot number if we mask with
(ehash_size - 1)

File include/net/inet_hashtables.h

64 bits platforms :
#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
     (((__sk)->sk_hash == (__hash))
     ((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie))   &&  \
     ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))   &&  \
     (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))

32bits platforms:
#define TCP_IPV4_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
     (((__sk)->sk_hash == (__hash))                 &&  \
     (inet_sk(__sk)->daddr          == (__saddr))   &&  \
     (inet_sk(__sk)->rcv_saddr      == (__daddr))   &&  \
     (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))


- Adds a prefetch(head->chain.first) in
__inet_lookup_established()/__tcp_v4_check_established() and
__inet6_lookup_established()/__tcp_v6_check_established() and
__dccp_v4_check_established() to bring into cache the first element of the
list, before the {read|write}_lock(&head->lock);

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h                 |  5 +--
 include/linux/tc_ematch/tc_em_meta.h |  2 +-
 include/net/inet6_hashtables.h       | 21 ++++++------
 include/net/inet_hashtables.h        | 64 +++++++++++++++++++++---------------
 include/net/inet_timewait_sock.h     |  2 +-
 include/net/sock.h                   |  5 +--
 net/atm/common.c                     |  2 +-
 net/dccp/ipv4.c                      | 12 +++----
 net/ipv4/inet_timewait_sock.c        |  6 ++--
 net/ipv4/tcp_ipv4.c                  | 11 ++++---
 net/ipv6/tcp_ipv6.c                  | 18 +++++-----
 net/sched/em_meta.c                  |  6 ++--
 12 files changed, 85 insertions(+), 69 deletions(-)

(limited to 'include')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index bb6f88e14061..e0b922785d98 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -372,8 +372,9 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 #define inet_v6_ipv6only(__sk)		0
 #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
 
-#define INET6_MATCH(__sk, __saddr, __daddr, __ports, __dif)	   \
-	(((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))  	&& \
+#define INET6_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif)\
+	(((__sk)->sk_hash == (__hash))				&& \
+	 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))  	&& \
 	 ((__sk)->sk_family		== AF_INET6)		&& \
 	 ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr))	&& \
 	 ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr))	&& \
diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h
index 081b1ee8516e..e21937cf91d0 100644
--- a/include/linux/tc_ematch/tc_em_meta.h
+++ b/include/linux/tc_ematch/tc_em_meta.h
@@ -71,7 +71,7 @@ enum
 	TCF_META_ID_SK_SNDBUF,
  	TCF_META_ID_SK_ALLOCS,
  	TCF_META_ID_SK_ROUTE_CAPS,
- 	TCF_META_ID_SK_HASHENT,
+ 	TCF_META_ID_SK_HASH,
  	TCF_META_ID_SK_LINGERTIME,
  	TCF_META_ID_SK_ACK_BACKLOG,
  	TCF_META_ID_SK_MAX_ACK_BACKLOG,
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 03df3b157960..5a2beed5a770 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -26,19 +26,18 @@
 struct inet_hashinfo;
 
 /* I have no idea if this is a good hash for v6 or not. -DaveM */
-static inline int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport,
-				const struct in6_addr *faddr, const u16 fport,
-				const int ehash_size)
+static inline unsigned int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport,
+				const struct in6_addr *faddr, const u16 fport)
 {
-	int hashent = (lport ^ fport);
+	unsigned int hashent = (lport ^ fport);
 
 	hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
 	hashent ^= hashent >> 16;
 	hashent ^= hashent >> 8;
-	return (hashent & (ehash_size - 1));
+	return hashent;
 }
 
-static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size)
+static inline int inet6_sk_ehashfn(const struct sock *sk)
 {
 	const struct inet_sock *inet = inet_sk(sk);
 	const struct ipv6_pinfo *np = inet6_sk(sk);
@@ -46,7 +45,7 @@ static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size)
 	const struct in6_addr *faddr = &np->daddr;
 	const __u16 lport = inet->num;
 	const __u16 fport = inet->dport;
-	return inet6_ehashfn(laddr, lport, faddr, fport, ehash_size);
+	return inet6_ehashfn(laddr, lport, faddr, fport);
 }
 
 /*
@@ -69,14 +68,14 @@ static inline struct sock *
 	/* Optimize here for direct hit, only listening connections can
 	 * have wildcards anyways.
 	 */
-	const int hash = inet6_ehashfn(daddr, hnum, saddr, sport,
-				       hashinfo->ehash_size);
-	struct inet_ehash_bucket *head = &hashinfo->ehash[hash];
+	unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport);
+	struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
 
+	prefetch(head->chain.first);
 	read_lock(&head->lock);
 	sk_for_each(sk, node, &head->chain) {
 		/* For IPV6 do the cheaper port and family tests first. */
-		if (INET6_MATCH(sk, saddr, daddr, ports, dif))
+		if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif))
 			goto hit; /* You sunk my battleship! */
 	}
 	/* Must check for a TIME_WAIT'er before going to listener hash. */
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 646b6ea7fe26..35f49e65e295 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -108,7 +108,7 @@ struct inet_hashinfo {
 	struct inet_bind_hashbucket	*bhash;
 
 	int				bhash_size;
-	int				ehash_size;
+	unsigned int			ehash_size;
 
 	/* All sockets in TCP_LISTEN state will be in here.  This is the only
 	 * table where wildcard'd TCP sockets can exist.  Hash function here
@@ -130,17 +130,16 @@ struct inet_hashinfo {
 	int				port_rover;
 };
 
-static inline int inet_ehashfn(const __u32 laddr, const __u16 lport,
-			       const __u32 faddr, const __u16 fport,
-			       const int ehash_size)
+static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport,
+			       const __u32 faddr, const __u16 fport)
 {
-	int h = (laddr ^ lport) ^ (faddr ^ fport);
+	unsigned int h = (laddr ^ lport) ^ (faddr ^ fport);
 	h ^= h >> 16;
 	h ^= h >> 8;
-	return h & (ehash_size - 1);
+	return h;
 }
 
-static inline int inet_sk_ehashfn(const struct sock *sk, const int ehash_size)
+static inline int inet_sk_ehashfn(const struct sock *sk)
 {
 	const struct inet_sock *inet = inet_sk(sk);
 	const __u32 laddr = inet->rcv_saddr;
@@ -148,7 +147,14 @@ static inline int inet_sk_ehashfn(const struct sock *sk, const int ehash_size)
 	const __u32 faddr = inet->daddr;
 	const __u16 fport = inet->dport;
 
-	return inet_ehashfn(laddr, lport, faddr, fport, ehash_size);
+	return inet_ehashfn(laddr, lport, faddr, fport);
+}
+
+static inline struct inet_ehash_bucket *inet_ehash_bucket(
+	struct inet_hashinfo *hashinfo,
+	unsigned int hash)
+{
+	return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)];
 }
 
 extern struct inet_bind_bucket *
@@ -235,9 +241,11 @@ static inline void __inet_hash(struct inet_hashinfo *hashinfo,
 		lock = &hashinfo->lhash_lock;
 		inet_listen_wlock(hashinfo);
 	} else {
-		sk->sk_hashent = inet_sk_ehashfn(sk, hashinfo->ehash_size);
-		list = &hashinfo->ehash[sk->sk_hashent].chain;
-		lock = &hashinfo->ehash[sk->sk_hashent].lock;
+		struct inet_ehash_bucket *head;
+		sk->sk_hash = inet_sk_ehashfn(sk);
+		head = inet_ehash_bucket(hashinfo, sk->sk_hash);
+		list = &head->chain;
+		lock = &head->lock;
 		write_lock(lock);
 	}
 	__sk_add_node(sk, list);
@@ -268,9 +276,8 @@ static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk)
 		inet_listen_wlock(hashinfo);
 		lock = &hashinfo->lhash_lock;
 	} else {
-		struct inet_ehash_bucket *head = &hashinfo->ehash[sk->sk_hashent];
-		lock = &head->lock;
-		write_lock_bh(&head->lock);
+		lock = &inet_ehash_bucket(hashinfo, sk->sk_hash)->lock;
+		write_lock_bh(lock);
 	}
 
 	if (__sk_del_node_init(sk))
@@ -337,23 +344,27 @@ sherry_cache:
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
 	const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr));
 #endif /* __BIG_ENDIAN */
-#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
-	(((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie))	&&	\
+#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
+	(((__sk)->sk_hash == (__hash))				&&	\
+	 ((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie))	&&	\
 	 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))	&&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
-#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
-	(((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) &&	\
+#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
+	(((__sk)->sk_hash == (__hash))				&&	\
+	 ((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) &&	\
 	 ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) &&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 #else /* 32-bit arch */
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr)
-#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)	\
-	((inet_sk(__sk)->daddr		== (__saddr))		&&	\
+#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)	\
+	(((__sk)->sk_hash == (__hash))				&&	\
+	 (inet_sk(__sk)->daddr		== (__saddr))		&&	\
 	 (inet_sk(__sk)->rcv_saddr	== (__daddr))		&&	\
 	 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))	&&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
-#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)	\
-	((inet_twsk(__sk)->tw_daddr	== (__saddr))		&&	\
+#define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif)	\
+	(((__sk)->sk_hash == (__hash))				&&	\
+	 (inet_twsk(__sk)->tw_daddr	== (__saddr))		&&	\
 	 (inet_twsk(__sk)->tw_rcv_saddr	== (__daddr))		&&	\
 	 ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) &&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
@@ -378,18 +389,19 @@ static inline struct sock *
 	/* Optimize here for direct hit, only listening connections can
 	 * have wildcards anyways.
 	 */
-	const int hash = inet_ehashfn(daddr, hnum, saddr, sport, hashinfo->ehash_size);
-	struct inet_ehash_bucket *head = &hashinfo->ehash[hash];
+	unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport);
+	struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
 
+	prefetch(head->chain.first);
 	read_lock(&head->lock);
 	sk_for_each(sk, node, &head->chain) {
-		if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif))
+		if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
 			goto hit; /* You sunk my battleship! */
 	}
 
 	/* Must check for a TIME_WAIT'er before going to listener hash. */
 	sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
-		if (INET_TW_MATCH(sk, acookie, saddr, daddr, ports, dif))
+		if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
 			goto hit;
 	}
 	sk = NULL;
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 3b070352e869..4ade56ef3a4d 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -112,6 +112,7 @@ struct inet_timewait_sock {
 #define tw_node			__tw_common.skc_node
 #define tw_bind_node		__tw_common.skc_bind_node
 #define tw_refcnt		__tw_common.skc_refcnt
+#define tw_hash			__tw_common.skc_hash
 #define tw_prot			__tw_common.skc_prot
 	volatile unsigned char	tw_substate;
 	/* 3 bits hole, try to pack */
@@ -126,7 +127,6 @@ struct inet_timewait_sock {
 	/* And these are ours. */
 	__u8			tw_ipv6only:1;
 	/* 31 bits hole, try to pack */
-	int			tw_hashent;
 	int			tw_timeout;
 	unsigned long		tw_ttd;
 	struct inet_bind_bucket	*tw_tb;
diff --git a/include/net/sock.h b/include/net/sock.h
index 8c48fbecb7cf..b6440805c420 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -99,6 +99,7 @@ struct proto;
  *	@skc_node: main hash linkage for various protocol lookup tables
  *	@skc_bind_node: bind hash linkage for various protocol lookup tables
  *	@skc_refcnt: reference count
+ *	@skc_hash: hash value used with various protocol lookup tables
  *	@skc_prot: protocol handlers inside a network family
  *
  *	This is the minimal network layer representation of sockets, the header
@@ -112,6 +113,7 @@ struct sock_common {
 	struct hlist_node	skc_node;
 	struct hlist_node	skc_bind_node;
 	atomic_t		skc_refcnt;
+	unsigned int		skc_hash;
 	struct proto		*skc_prot;
 };
 
@@ -139,7 +141,6 @@ struct sock_common {
   *	@sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets
   *	@sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
   *	@sk_lingertime: %SO_LINGER l_linger setting
-  *	@sk_hashent: hash entry in several tables (e.g. inet_hashinfo.ehash)
   *	@sk_backlog: always used with the per-socket spinlock held
   *	@sk_callback_lock: used with the callbacks in the end of this struct
   *	@sk_error_queue: rarely used
@@ -186,6 +187,7 @@ struct sock {
 #define sk_node			__sk_common.skc_node
 #define sk_bind_node		__sk_common.skc_bind_node
 #define sk_refcnt		__sk_common.skc_refcnt
+#define sk_hash			__sk_common.skc_hash
 #define sk_prot			__sk_common.skc_prot
 	unsigned char		sk_shutdown : 2,
 				sk_no_check : 2,
@@ -208,7 +210,6 @@ struct sock {
 	unsigned int		sk_allocation;
 	int			sk_sndbuf;
 	int			sk_route_caps;
-	int			sk_hashent;
 	unsigned long 		sk_flags;
 	unsigned long	        sk_lingertime;
 	/*
diff --git a/net/atm/common.c b/net/atm/common.c
index 801a5813ec60..63feea49fb13 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -46,7 +46,7 @@ static void __vcc_insert_socket(struct sock *sk)
 	struct atm_vcc *vcc = atm_sk(sk);
 	struct hlist_head *head = &vcc_hash[vcc->vci &
 					(VCC_HTABLE_SIZE - 1)];
-	sk->sk_hashent = vcc->vci & (VCC_HTABLE_SIZE - 1);
+	sk->sk_hash = vcc->vci & (VCC_HTABLE_SIZE - 1);
 	sk_add_node(sk, head);
 }
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 40fe6afacde6..ae088d1347af 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -62,27 +62,27 @@ static int __dccp_v4_check_established(struct sock *sk, const __u16 lport,
 	const int dif = sk->sk_bound_dev_if;
 	INET_ADDR_COOKIE(acookie, saddr, daddr)
 	const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
-	const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport,
-				      dccp_hashinfo.ehash_size);
-	struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash];
+	unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
+	struct inet_ehash_bucket *head = inet_ehash_bucket(&dccp_hashinfo, hash);
 	const struct sock *sk2;
 	const struct hlist_node *node;
 	struct inet_timewait_sock *tw;
 
+	prefetch(head->chain.first);
 	write_lock(&head->lock);
 
 	/* Check TIME-WAIT sockets first. */
 	sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) {
 		tw = inet_twsk(sk2);
 
-		if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif))
+		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
 			goto not_unique;
 	}
 	tw = NULL;
 
 	/* And established part... */
 	sk_for_each(sk2, node, &head->chain) {
-		if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif))
+		if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
 			goto not_unique;
 	}
 
@@ -90,7 +90,7 @@ static int __dccp_v4_check_established(struct sock *sk, const __u16 lport,
 	 * in hash table socket with a funny identity. */
 	inet->num = lport;
 	inet->sport = htons(lport);
-	sk->sk_hashent = hash;
+	sk->sk_hash = hash;
 	BUG_TRAP(sk_unhashed(sk));
 	__sk_add_node(sk, &head->chain);
 	sock_prot_inc_use(sk->sk_prot);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 4d1502a49852..f9076ef3a1a8 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -20,7 +20,7 @@ void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashi
 	struct inet_bind_hashbucket *bhead;
 	struct inet_bind_bucket *tb;
 	/* Unlink from established hashes. */
-	struct inet_ehash_bucket *ehead = &hashinfo->ehash[tw->tw_hashent];
+	struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, tw->tw_hash);
 
 	write_lock(&ehead->lock);
 	if (hlist_unhashed(&tw->tw_node)) {
@@ -60,7 +60,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 {
 	const struct inet_sock *inet = inet_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	struct inet_ehash_bucket *ehead = &hashinfo->ehash[sk->sk_hashent];
+	struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);
 	struct inet_bind_hashbucket *bhead;
 	/* Step 1: Put TW into bind hash. Original socket stays there too.
 	   Note, that any socket with inet->num != 0 MUST be bound in
@@ -106,7 +106,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
 		tw->tw_dport	    = inet->dport;
 		tw->tw_family	    = sk->sk_family;
 		tw->tw_reuse	    = sk->sk_reuse;
-		tw->tw_hashent	    = sk->sk_hashent;
+		tw->tw_hash	    = sk->sk_hash;
 		tw->tw_ipv6only	    = 0;
 		tw->tw_prot	    = sk->sk_prot_creator;
 		atomic_set(&tw->tw_refcnt, 1);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 13dfb391cdf1..c85819d8474b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -130,19 +130,20 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
 	int dif = sk->sk_bound_dev_if;
 	INET_ADDR_COOKIE(acookie, saddr, daddr)
 	const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
-	const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_hashinfo.ehash_size);
-	struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
+	unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
+	struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
 	struct sock *sk2;
 	const struct hlist_node *node;
 	struct inet_timewait_sock *tw;
 
+	prefetch(head->chain.first);
 	write_lock(&head->lock);
 
 	/* Check TIME-WAIT sockets first. */
 	sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
 		tw = inet_twsk(sk2);
 
-		if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
+		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
 			const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
 			struct tcp_sock *tp = tcp_sk(sk);
 
@@ -179,7 +180,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
 
 	/* And established part... */
 	sk_for_each(sk2, node, &head->chain) {
-		if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif))
+		if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
 			goto not_unique;
 	}
 
@@ -188,7 +189,7 @@ unique:
 	 * in hash table socket with a funny identity. */
 	inet->num = lport;
 	inet->sport = htons(lport);
-	sk->sk_hashent = hash;
+	sk->sk_hash = hash;
 	BUG_TRAP(sk_unhashed(sk));
 	__sk_add_node(sk, &head->chain);
 	sock_prot_inc_use(sk->sk_prot);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 80643e6b346b..d693cb988b78 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -209,9 +209,11 @@ static __inline__ void __tcp_v6_hash(struct sock *sk)
 		lock = &tcp_hashinfo.lhash_lock;
 		inet_listen_wlock(&tcp_hashinfo);
 	} else {
-		sk->sk_hashent = inet6_sk_ehashfn(sk, tcp_hashinfo.ehash_size);
-		list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
-		lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock;
+		unsigned int hash;
+		sk->sk_hash = hash = inet6_sk_ehashfn(sk);
+		hash &= (tcp_hashinfo.ehash_size - 1);
+		list = &tcp_hashinfo.ehash[hash].chain;
+		lock = &tcp_hashinfo.ehash[hash].lock;
 		write_lock(lock);
 	}
 
@@ -322,13 +324,13 @@ static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
 	const struct in6_addr *saddr = &np->daddr;
 	const int dif = sk->sk_bound_dev_if;
 	const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
-	const int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport,
-				       tcp_hashinfo.ehash_size);
-	struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
+	unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport);
+	struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
 	struct sock *sk2;
 	const struct hlist_node *node;
 	struct inet_timewait_sock *tw;
 
+	prefetch(head->chain.first);
 	write_lock(&head->lock);
 
 	/* Check TIME-WAIT sockets first. */
@@ -365,14 +367,14 @@ static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
 
 	/* And established part... */
 	sk_for_each(sk2, node, &head->chain) {
-		if (INET6_MATCH(sk2, saddr, daddr, ports, dif))
+		if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
 			goto not_unique;
 	}
 
 unique:
 	BUG_TRAP(sk_unhashed(sk));
 	__sk_add_node(sk, &head->chain);
-	sk->sk_hashent = hash;
+	sk->sk_hash = hash;
 	sock_prot_inc_use(sk->sk_prot);
 	write_unlock(&head->lock);
 
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 00eae5f9a01a..cf68a59fdc5a 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -393,10 +393,10 @@ META_COLLECTOR(int_sk_route_caps)
 	dst->value = skb->sk->sk_route_caps;
 }
 
-META_COLLECTOR(int_sk_hashent)
+META_COLLECTOR(int_sk_hash)
 {
 	SKIP_NONLOCAL(skb);
-	dst->value = skb->sk->sk_hashent;
+	dst->value = skb->sk->sk_hash;
 }
 
 META_COLLECTOR(int_sk_lingertime)
@@ -515,7 +515,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
 		[META_ID(SK_FORWARD_ALLOCS)]	= META_FUNC(int_sk_fwd_alloc),
 		[META_ID(SK_ALLOCS)]		= META_FUNC(int_sk_alloc),
 		[META_ID(SK_ROUTE_CAPS)]	= META_FUNC(int_sk_route_caps),
-		[META_ID(SK_HASHENT)]		= META_FUNC(int_sk_hashent),
+		[META_ID(SK_HASH)]		= META_FUNC(int_sk_hash),
 		[META_ID(SK_LINGERTIME)]	= META_FUNC(int_sk_lingertime),
 		[META_ID(SK_ACK_BACKLOG)]	= META_FUNC(int_sk_ack_bl),
 		[META_ID(SK_MAX_ACK_BACKLOG)]	= META_FUNC(int_sk_max_ack_bl),
-- 
cgit v1.2.3


From e5ed639913eea3e4783a550291775ab78dd84966 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 3 Oct 2005 14:35:55 -0700
Subject: [IPV4]: Replace __in_dev_get with __in_dev_get_rcu/rtnl

The following patch renames __in_dev_get() to __in_dev_get_rtnl() and
introduces __in_dev_get_rcu() to cover the second case.

1) RCU with refcnt should use in_dev_get().
2) RCU without refcnt should use __in_dev_get_rcu().
3) All others must hold RTNL and use __in_dev_get_rtnl().

There is one exception in net/ipv4/route.c which is in fact a pre-existing
race condition.  I've marked it as such so that we remember to fix it.

This patch is based on suggestions and prior work by Suzanne Wood and
Paul McKenney.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c              |  2 +-
 drivers/net/wan/sdlamain.c                   | 23 ++++++++++++++---------
 drivers/net/wan/syncppp.c                    |  2 +-
 drivers/net/wireless/strip.c                 |  4 ++--
 drivers/parisc/led.c                         |  5 ++++-
 drivers/s390/net/qeth_main.c                 |  4 ++--
 include/linux/inetdevice.h                   | 12 ++++++++++--
 net/atm/clip.c                               |  2 +-
 net/core/netpoll.c                           |  2 +-
 net/core/pktgen.c                            |  2 +-
 net/econet/af_econet.c                       |  2 +-
 net/ipv4/arp.c                               | 10 +++++-----
 net/ipv4/devinet.c                           | 22 +++++++++++-----------
 net/ipv4/fib_frontend.c                      |  4 ++--
 net/ipv4/fib_semantics.c                     |  4 ++--
 net/ipv4/igmp.c                              |  2 +-
 net/ipv4/ip_gre.c                            |  4 ++--
 net/ipv4/ipmr.c                              |  6 +++---
 net/ipv4/netfilter/ip_conntrack_netbios_ns.c |  2 +-
 net/ipv4/netfilter/ipt_REDIRECT.c            |  2 +-
 net/ipv4/route.c                             |  6 ++++--
 net/ipv6/addrconf.c                          |  2 +-
 net/irda/irlan/irlan_eth.c                   |  2 +-
 net/sctp/protocol.c                          |  2 +-
 24 files changed, 73 insertions(+), 55 deletions(-)

(limited to 'include')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 6d00c3de1a83..bf81cd45e4d4 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2776,7 +2776,7 @@ static u32 bond_glean_dev_ip(struct net_device *dev)
 		return 0;
 
 	rcu_read_lock();
-	idev = __in_dev_get(dev);
+	idev = __in_dev_get_rcu(dev);
 	if (!idev)
 		goto out;
 
diff --git a/drivers/net/wan/sdlamain.c b/drivers/net/wan/sdlamain.c
index 74e151acef3e..7a8b22a7ea31 100644
--- a/drivers/net/wan/sdlamain.c
+++ b/drivers/net/wan/sdlamain.c
@@ -57,6 +57,7 @@
 #include <linux/ioport.h>	/* request_region(), release_region() */
 #include <linux/wanrouter.h>	/* WAN router definitions */
 #include <linux/wanpipe.h>	/* WANPIPE common user API definitions */
+#include <linux/rcupdate.h>
 
 #include <linux/in.h>
 #include <asm/io.h>		/* phys_to_virt() */
@@ -1268,37 +1269,41 @@ unsigned long get_ip_address(struct net_device *dev, int option)
 	
 	struct in_ifaddr *ifaddr;
 	struct in_device *in_dev;
+	unsigned long addr = 0;
 
-	if ((in_dev = __in_dev_get(dev)) == NULL){
-		return 0;
+	rcu_read_lock();
+	if ((in_dev = __in_dev_get_rcu(dev)) == NULL){
+		goto out;
 	}
 
 	if ((ifaddr = in_dev->ifa_list)== NULL ){
-		return 0;
+		goto out;
 	}
 	
 	switch (option){
 
 	case WAN_LOCAL_IP:
-		return ifaddr->ifa_local;
+		addr = ifaddr->ifa_local;
 		break;
 	
 	case WAN_POINTOPOINT_IP:
-		return ifaddr->ifa_address;
+		addr = ifaddr->ifa_address;
 		break;	
 
 	case WAN_NETMASK_IP:
-		return ifaddr->ifa_mask;
+		addr = ifaddr->ifa_mask;
 		break;
 
 	case WAN_BROADCAST_IP:
-		return ifaddr->ifa_broadcast;
+		addr = ifaddr->ifa_broadcast;
 		break;
 	default:
-		return 0;
+		break;
 	}
 
-	return 0;
+out:
+	rcu_read_unlock();
+	return addr;
 }	
 
 void add_gateway(sdla_t *card, struct net_device *dev)
diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c
index b56a7b516d24..a6d3b55013a5 100644
--- a/drivers/net/wan/syncppp.c
+++ b/drivers/net/wan/syncppp.c
@@ -769,7 +769,7 @@ static void sppp_cisco_input (struct sppp *sp, struct sk_buff *skb)
 		u32 addr = 0, mask = ~0; /* FIXME: is the mask correct? */
 #ifdef CONFIG_INET
 		rcu_read_lock();
-		if ((in_dev = __in_dev_get(dev)) != NULL)
+		if ((in_dev = __in_dev_get_rcu(dev)) != NULL)
 		{
 			for (ifa=in_dev->ifa_list; ifa != NULL;
 				ifa=ifa->ifa_next) {
diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c
index 4b0acae22b0d..7bc7fc823128 100644
--- a/drivers/net/wireless/strip.c
+++ b/drivers/net/wireless/strip.c
@@ -1352,7 +1352,7 @@ static unsigned char *strip_make_packet(unsigned char *buffer,
 		struct in_device *in_dev;
 
 		rcu_read_lock();
-		in_dev = __in_dev_get(strip_info->dev);
+		in_dev = __in_dev_get_rcu(strip_info->dev);
 		if (in_dev == NULL) {
 			rcu_read_unlock();
 			return NULL;
@@ -1508,7 +1508,7 @@ static void strip_send(struct strip *strip_info, struct sk_buff *skb)
 
 		brd = addr = 0;
 		rcu_read_lock();
-		in_dev = __in_dev_get(strip_info->dev);
+		in_dev = __in_dev_get_rcu(strip_info->dev);
 		if (in_dev) {
 			if (in_dev->ifa_list) {
 				brd = in_dev->ifa_list->ifa_broadcast;
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
index e90fb72a6962..286902298e33 100644
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -37,6 +37,7 @@
 #include <linux/proc_fs.h>
 #include <linux/ctype.h>
 #include <linux/blkdev.h>
+#include <linux/rcupdate.h>
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/hardware.h>
@@ -358,9 +359,10 @@ static __inline__ int led_get_net_activity(void)
 	/* we are running as tasklet, so locking dev_base 
 	 * for reading should be OK */
 	read_lock(&dev_base_lock);
+	rcu_read_lock();
 	for (dev = dev_base; dev; dev = dev->next) {
 	    struct net_device_stats *stats;
-	    struct in_device *in_dev = __in_dev_get(dev);
+	    struct in_device *in_dev = __in_dev_get_rcu(dev);
 	    if (!in_dev || !in_dev->ifa_list)
 		continue;
 	    if (LOOPBACK(in_dev->ifa_list->ifa_local))
@@ -371,6 +373,7 @@ static __inline__ int led_get_net_activity(void)
 	    rx_total += stats->rx_packets;
 	    tx_total += stats->tx_packets;
 	}
+	rcu_read_unlock();
 	read_unlock(&dev_base_lock);
 
 	retval = 0;
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index 86582cf1e19e..71de834ece1a 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -5200,7 +5200,7 @@ qeth_free_vlan_addresses4(struct qeth_card *card, unsigned short vid)
 	if (!card->vlangrp)
 		return;
 	rcu_read_lock();
-	in_dev = __in_dev_get(card->vlangrp->vlan_devices[vid]);
+	in_dev = __in_dev_get_rcu(card->vlangrp->vlan_devices[vid]);
 	if (!in_dev)
 		goto out;
 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
@@ -7725,7 +7725,7 @@ qeth_arp_constructor(struct neighbour *neigh)
 		goto out;
 
 	rcu_read_lock();
-	in_dev = rcu_dereference(__in_dev_get(dev));
+	in_dev = __in_dev_get_rcu(dev);
 	if (in_dev == NULL) {
 		rcu_read_unlock();
 		return -EINVAL;
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 7e1e15f934f3..fd7af86151b1 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -142,13 +142,21 @@ static __inline__ int bad_mask(u32 mask, u32 addr)
 
 #define endfor_ifa(in_dev) }
 
+static inline struct in_device *__in_dev_get_rcu(const struct net_device *dev)
+{
+	struct in_device *in_dev = dev->ip_ptr;
+	if (in_dev)
+		in_dev = rcu_dereference(in_dev);
+	return in_dev;
+}
+
 static __inline__ struct in_device *
 in_dev_get(const struct net_device *dev)
 {
 	struct in_device *in_dev;
 
 	rcu_read_lock();
-	in_dev = dev->ip_ptr;
+	in_dev = __in_dev_get_rcu(dev);
 	if (in_dev)
 		atomic_inc(&in_dev->refcnt);
 	rcu_read_unlock();
@@ -156,7 +164,7 @@ in_dev_get(const struct net_device *dev)
 }
 
 static __inline__ struct in_device *
-__in_dev_get(const struct net_device *dev)
+__in_dev_get_rtnl(const struct net_device *dev)
 {
 	return (struct in_device*)dev->ip_ptr;
 }
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 28dab55a4387..4f54c9a5e84a 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -310,7 +310,7 @@ static int clip_constructor(struct neighbour *neigh)
 	if (neigh->type != RTN_UNICAST) return -EINVAL;
 
 	rcu_read_lock();
-	in_dev = rcu_dereference(__in_dev_get(dev));
+	in_dev = __in_dev_get_rcu(dev);
 	if (!in_dev) {
 		rcu_read_unlock();
 		return -EINVAL;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 5265dfd69928..802fe11efad0 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -703,7 +703,7 @@ int netpoll_setup(struct netpoll *np)
 
 	if (!np->local_ip) {
 		rcu_read_lock();
-		in_dev = __in_dev_get(ndev);
+		in_dev = __in_dev_get_rcu(ndev);
 
 		if (!in_dev || !in_dev->ifa_list) {
 			rcu_read_unlock();
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index b7f2d65a614f..44de070b6045 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1667,7 +1667,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 			struct in_device *in_dev; 
 
 			rcu_read_lock();
-			in_dev = __in_dev_get(pkt_dev->odev);
+			in_dev = __in_dev_get_rcu(pkt_dev->odev);
 			if (in_dev) {
 				if (in_dev->ifa_list) {
 					pkt_dev->saddr_min = in_dev->ifa_list->ifa_address;
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 4a62093eb343..34fdac51df96 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -406,7 +406,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 		unsigned long network = 0;
 
 		rcu_read_lock();
-		idev = __in_dev_get(dev);
+		idev = __in_dev_get_rcu(dev);
 		if (idev) {
 			if (idev->ifa_list)
 				network = ntohl(idev->ifa_list->ifa_address) & 
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index ec0e36893b01..b425748f02d7 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -241,7 +241,7 @@ static int arp_constructor(struct neighbour *neigh)
 	neigh->type = inet_addr_type(addr);
 
 	rcu_read_lock();
-	in_dev = rcu_dereference(__in_dev_get(dev));
+	in_dev = __in_dev_get_rcu(dev);
 	if (in_dev == NULL) {
 		rcu_read_unlock();
 		return -EINVAL;
@@ -989,8 +989,8 @@ static int arp_req_set(struct arpreq *r, struct net_device * dev)
 			ipv4_devconf.proxy_arp = 1;
 			return 0;
 		}
-		if (__in_dev_get(dev)) {
-			__in_dev_get(dev)->cnf.proxy_arp = 1;
+		if (__in_dev_get_rtnl(dev)) {
+			__in_dev_get_rtnl(dev)->cnf.proxy_arp = 1;
 			return 0;
 		}
 		return -ENXIO;
@@ -1095,8 +1095,8 @@ static int arp_req_delete(struct arpreq *r, struct net_device * dev)
 				ipv4_devconf.proxy_arp = 0;
 				return 0;
 			}
-			if (__in_dev_get(dev)) {
-				__in_dev_get(dev)->cnf.proxy_arp = 0;
+			if (__in_dev_get_rtnl(dev)) {
+				__in_dev_get_rtnl(dev)->cnf.proxy_arp = 0;
 				return 0;
 			}
 			return -ENXIO;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ba2895ae8151..74f2207e131a 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -351,7 +351,7 @@ static int inet_insert_ifa(struct in_ifaddr *ifa)
 
 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 {
-	struct in_device *in_dev = __in_dev_get(dev);
+	struct in_device *in_dev = __in_dev_get_rtnl(dev);
 
 	ASSERT_RTNL();
 
@@ -449,7 +449,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
 		goto out;
 
 	rc = -ENOBUFS;
-	if ((in_dev = __in_dev_get(dev)) == NULL) {
+	if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
 		in_dev = inetdev_init(dev);
 		if (!in_dev)
 			goto out;
@@ -584,7 +584,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
 	if (colon)
 		*colon = ':';
 
-	if ((in_dev = __in_dev_get(dev)) != NULL) {
+	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
 		if (tryaddrmatch) {
 			/* Matthias Andree */
 			/* compare label and address (4.4BSD style) */
@@ -748,7 +748,7 @@ rarok:
 
 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
 {
-	struct in_device *in_dev = __in_dev_get(dev);
+	struct in_device *in_dev = __in_dev_get_rtnl(dev);
 	struct in_ifaddr *ifa;
 	struct ifreq ifr;
 	int done = 0;
@@ -791,7 +791,7 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope)
 	struct in_device *in_dev;
 
 	rcu_read_lock();
-	in_dev = __in_dev_get(dev);
+	in_dev = __in_dev_get_rcu(dev);
 	if (!in_dev)
 		goto no_in_dev;
 
@@ -818,7 +818,7 @@ no_in_dev:
 	read_lock(&dev_base_lock);
 	rcu_read_lock();
 	for (dev = dev_base; dev; dev = dev->next) {
-		if ((in_dev = __in_dev_get(dev)) == NULL)
+		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
 			continue;
 
 		for_primary_ifa(in_dev) {
@@ -887,7 +887,7 @@ u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scop
 
 	if (dev) {
 		rcu_read_lock();
-		if ((in_dev = __in_dev_get(dev)))
+		if ((in_dev = __in_dev_get_rcu(dev)))
 			addr = confirm_addr_indev(in_dev, dst, local, scope);
 		rcu_read_unlock();
 
@@ -897,7 +897,7 @@ u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scop
 	read_lock(&dev_base_lock);
 	rcu_read_lock();
 	for (dev = dev_base; dev; dev = dev->next) {
-		if ((in_dev = __in_dev_get(dev))) {
+		if ((in_dev = __in_dev_get_rcu(dev))) {
 			addr = confirm_addr_indev(in_dev, dst, local, scope);
 			if (addr)
 				break;
@@ -957,7 +957,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 			 void *ptr)
 {
 	struct net_device *dev = ptr;
-	struct in_device *in_dev = __in_dev_get(dev);
+	struct in_device *in_dev = __in_dev_get_rtnl(dev);
 
 	ASSERT_RTNL();
 
@@ -1078,7 +1078,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 		if (idx > s_idx)
 			s_ip_idx = 0;
 		rcu_read_lock();
-		if ((in_dev = __in_dev_get(dev)) == NULL) {
+		if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
 			rcu_read_unlock();
 			continue;
 		}
@@ -1149,7 +1149,7 @@ void inet_forward_change(void)
 	for (dev = dev_base; dev; dev = dev->next) {
 		struct in_device *in_dev;
 		rcu_read_lock();
-		in_dev = __in_dev_get(dev);
+		in_dev = __in_dev_get_rcu(dev);
 		if (in_dev)
 			in_dev->cnf.forwarding = on;
 		rcu_read_unlock();
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4e1379f71269..e61bc7177eb1 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -173,7 +173,7 @@ int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
 
 	no_addr = rpf = 0;
 	rcu_read_lock();
-	in_dev = __in_dev_get(dev);
+	in_dev = __in_dev_get_rcu(dev);
 	if (in_dev) {
 		no_addr = in_dev->ifa_list == NULL;
 		rpf = IN_DEV_RPFILTER(in_dev);
@@ -607,7 +607,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
 	struct net_device *dev = ptr;
-	struct in_device *in_dev = __in_dev_get(dev);
+	struct in_device *in_dev = __in_dev_get_rtnl(dev);
 
 	if (event == NETDEV_UNREGISTER) {
 		fib_disable_ip(dev, 2);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d41219e8037c..186f20c4a45e 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1087,7 +1087,7 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
 		rta->rta_oif = &dev->ifindex;
 		if (colon) {
 			struct in_ifaddr *ifa;
-			struct in_device *in_dev = __in_dev_get(dev);
+			struct in_device *in_dev = __in_dev_get_rtnl(dev);
 			if (!in_dev)
 				return -ENODEV;
 			*colon = ':';
@@ -1268,7 +1268,7 @@ int fib_sync_up(struct net_device *dev)
 			}
 			if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
 				continue;
-			if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
+			if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
 				continue;
 			alive++;
 			spin_lock_bh(&fib_multipath_lock);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 70c44e4c3ceb..8b6d3939e1e6 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1323,7 +1323,7 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
 	}
 	if (dev) {
 		imr->imr_ifindex = dev->ifindex;
-		idev = __in_dev_get(dev);
+		idev = __in_dev_get_rtnl(dev);
 	}
 	return idev;
 }
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index f0d5740d7e22..896ce3f8f53a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1104,10 +1104,10 @@ static int ipgre_open(struct net_device *dev)
 			return -EADDRNOTAVAIL;
 		dev = rt->u.dst.dev;
 		ip_rt_put(rt);
-		if (__in_dev_get(dev) == NULL)
+		if (__in_dev_get_rtnl(dev) == NULL)
 			return -EADDRNOTAVAIL;
 		t->mlink = dev->ifindex;
-		ip_mc_inc_group(__in_dev_get(dev), t->parms.iph.daddr);
+		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
 	}
 	return 0;
 }
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9dbf5909f3a6..302b7eb507c9 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -149,7 +149,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v)
 		if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
 			dev->flags |= IFF_MULTICAST;
 
-			in_dev = __in_dev_get(dev);
+			in_dev = __in_dev_get_rtnl(dev);
 			if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
 				goto failure;
 			in_dev->cnf.rp_filter = 0;
@@ -278,7 +278,7 @@ static int vif_delete(int vifi)
 
 	dev_set_allmulti(dev, -1);
 
-	if ((in_dev = __in_dev_get(dev)) != NULL) {
+	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
 		in_dev->cnf.mc_forwarding--;
 		ip_rt_multicast_event(in_dev);
 	}
@@ -421,7 +421,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
 		return -EINVAL;
 	}
 
-	if ((in_dev = __in_dev_get(dev)) == NULL)
+	if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
 		return -EADDRNOTAVAIL;
 	in_dev->cnf.mc_forwarding++;
 	dev_set_allmulti(dev, +1);
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
index 577bac22dcc6..186646eb249f 100644
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -58,7 +58,7 @@ static int help(struct sk_buff **pskb,
 		goto out;
 
 	rcu_read_lock();
-	in_dev = __in_dev_get(rt->u.dst.dev);
+	in_dev = __in_dev_get_rcu(rt->u.dst.dev);
 	if (in_dev != NULL) {
 		for_primary_ifa(in_dev) {
 			if (ifa->ifa_broadcast == iph->daddr) {
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 715cb613405c..5245bfd33d52 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -93,7 +93,7 @@ redirect_target(struct sk_buff **pskb,
 		newdst = 0;
 		
 		rcu_read_lock();
-		indev = __in_dev_get((*pskb)->dev);
+		indev = __in_dev_get_rcu((*pskb)->dev);
 		if (indev && (ifa = indev->ifa_list))
 			newdst = ifa->ifa_local;
 		rcu_read_unlock();
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8549f26e2495..381dd6a6aebb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2128,7 +2128,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
 		struct in_device *in_dev;
 
 		rcu_read_lock();
-		if ((in_dev = __in_dev_get(dev)) != NULL) {
+		if ((in_dev = __in_dev_get_rcu(dev)) != NULL) {
 			int our = ip_check_mc(in_dev, daddr, saddr,
 				skb->nh.iph->protocol);
 			if (our
@@ -2443,7 +2443,9 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
 		err = -ENODEV;
 		if (dev_out == NULL)
 			goto out;
-		if (__in_dev_get(dev_out) == NULL) {
+
+		/* RACE: Check return value of inet_select_addr instead. */
+		if (__in_dev_get_rtnl(dev_out) == NULL) {
 			dev_put(dev_out);
 			goto out;	/* Wrong error code */
 		}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4e509e52fbc1..a970b4727ce8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1806,7 +1806,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
 	}
 
         for (dev = dev_base; dev != NULL; dev = dev->next) {
-		struct in_device * in_dev = __in_dev_get(dev);
+		struct in_device * in_dev = __in_dev_get_rtnl(dev);
 		if (in_dev && (dev->flags & IFF_UP)) {
 			struct in_ifaddr * ifa;
 
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 071cd2cefd8a..953e255d2bc8 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -310,7 +310,7 @@ void irlan_eth_send_gratuitous_arp(struct net_device *dev)
 #ifdef CONFIG_INET
 	IRDA_DEBUG(4, "IrLAN: Sending gratuitous ARP\n");
 	rcu_read_lock();
-	in_dev = __in_dev_get(dev);
+	in_dev = __in_dev_get_rcu(dev);
 	if (in_dev == NULL)
 		goto out;
 	if (in_dev->ifa_list)
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e7025be77691..f01d1c9002a1 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -147,7 +147,7 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist,
 	struct sctp_sockaddr_entry *addr;
 
 	rcu_read_lock();
-	if ((in_dev = __in_dev_get(dev)) == NULL) {
+	if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
 		rcu_read_unlock();
 		return;
 	}
-- 
cgit v1.2.3


From 3115624eda34d0f4e673fc6bcea36b7ad701ee33 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 3 Oct 2005 17:37:02 -0700
Subject: [SPARC]: "extern inline" doesn't make much sense.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/time.c      |  2 +-
 arch/sparc/mm/srmmu.c         |  2 +-
 include/asm-sparc/btfixup.h   | 12 ++++++------
 include/asm-sparc/cache.h     | 18 +++++++++---------
 include/asm-sparc/cypress.h   |  8 ++++----
 include/asm-sparc/delay.h     |  2 +-
 include/asm-sparc/dma.h       |  2 +-
 include/asm-sparc/iommu.h     |  4 ++--
 include/asm-sparc/kdebug.h    |  2 +-
 include/asm-sparc/mbus.h      |  4 ++--
 include/asm-sparc/msi.h       |  2 +-
 include/asm-sparc/mxcc.h      |  8 ++++----
 include/asm-sparc/obio.h      | 30 +++++++++++++++---------------
 include/asm-sparc/pci.h       |  6 +++---
 include/asm-sparc/pgtable.h   | 28 ++++++++++++++--------------
 include/asm-sparc/pgtsrmmu.h  | 30 +++++++++++++++---------------
 include/asm-sparc/processor.h |  2 +-
 include/asm-sparc/psr.h       |  6 +++---
 include/asm-sparc/sbi.h       | 10 +++++-----
 include/asm-sparc/sbus.h      |  6 +++---
 include/asm-sparc/smp.h       | 26 +++++++++++++-------------
 include/asm-sparc/smpprim.h   |  8 ++++----
 include/asm-sparc/spinlock.h  | 10 +++++-----
 include/asm-sparc/system.h    |  2 +-
 include/asm-sparc/traps.h     |  2 +-
 25 files changed, 116 insertions(+), 116 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/kernel/time.c b/arch/sparc/kernel/time.c
index bc015e980341..279a62627c10 100644
--- a/arch/sparc/kernel/time.c
+++ b/arch/sparc/kernel/time.c
@@ -457,7 +457,7 @@ void __init time_init(void)
 	sbus_time_init();
 }
 
-extern __inline__ unsigned long do_gettimeoffset(void)
+static inline unsigned long do_gettimeoffset(void)
 {
 	return (*master_l10_counter >> 10) & 0x1fffff;
 }
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index c89a803cbc20..c664b962987c 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -260,7 +260,7 @@ static inline pte_t srmmu_pte_modify(pte_t pte, pgprot_t newprot)
 { return __pte((pte_val(pte) & SRMMU_CHG_MASK) | pgprot_val(newprot)); }
 
 /* to find an entry in a top-level page table... */
-extern inline pgd_t *srmmu_pgd_offset(struct mm_struct * mm, unsigned long address)
+static inline pgd_t *srmmu_pgd_offset(struct mm_struct * mm, unsigned long address)
 { return mm->pgd + (address >> SRMMU_PGDIR_SHIFT); }
 
 /* Find an entry in the second-level page table.. */
diff --git a/include/asm-sparc/btfixup.h b/include/asm-sparc/btfixup.h
index b84c96c89581..6b29503256fd 100644
--- a/include/asm-sparc/btfixup.h
+++ b/include/asm-sparc/btfixup.h
@@ -51,7 +51,7 @@ extern unsigned int ___illegal_use_of_BTFIXUP_INT_in_module(void);
 #define BTFIXUPDEF_SIMM13(__name)							\
 	extern unsigned int ___sf_##__name(void) __attribute_const__;		\
 	extern unsigned ___ss_##__name[2];						\
-	extern __inline__ unsigned int ___sf_##__name(void) {				\
+	static inline unsigned int ___sf_##__name(void) {				\
 		unsigned int ret;							\
 		__asm__ ("or %%g0, ___s_" #__name ", %0" : "=r"(ret));			\
 		return ret;								\
@@ -59,7 +59,7 @@ extern unsigned int ___illegal_use_of_BTFIXUP_INT_in_module(void);
 #define BTFIXUPDEF_SIMM13_INIT(__name,__val)						\
 	extern unsigned int ___sf_##__name(void) __attribute_const__;		\
 	extern unsigned ___ss_##__name[2];						\
-	extern __inline__ unsigned int ___sf_##__name(void) {				\
+	static inline unsigned int ___sf_##__name(void) {				\
 		unsigned int ret;							\
 		__asm__ ("or %%g0, ___s_" #__name "__btset_" #__val ", %0" : "=r"(ret));\
 		return ret;								\
@@ -73,7 +73,7 @@ extern unsigned int ___illegal_use_of_BTFIXUP_INT_in_module(void);
 #define BTFIXUPDEF_HALF(__name)								\
 	extern unsigned int ___af_##__name(void) __attribute_const__;		\
 	extern unsigned ___as_##__name[2];						\
-	extern __inline__ unsigned int ___af_##__name(void) {				\
+	static inline unsigned int ___af_##__name(void) {				\
 		unsigned int ret;							\
 		__asm__ ("or %%g0, ___a_" #__name ", %0" : "=r"(ret));			\
 		return ret;								\
@@ -81,7 +81,7 @@ extern unsigned int ___illegal_use_of_BTFIXUP_INT_in_module(void);
 #define BTFIXUPDEF_HALF_INIT(__name,__val)						\
 	extern unsigned int ___af_##__name(void) __attribute_const__;		\
 	extern unsigned ___as_##__name[2];						\
-	extern __inline__ unsigned int ___af_##__name(void) {				\
+	static inline unsigned int ___af_##__name(void) {				\
 		unsigned int ret;							\
 		__asm__ ("or %%g0, ___a_" #__name "__btset_" #__val ", %0" : "=r"(ret));\
 		return ret;								\
@@ -92,7 +92,7 @@ extern unsigned int ___illegal_use_of_BTFIXUP_INT_in_module(void);
 #define BTFIXUPDEF_SETHI(__name)							\
 	extern unsigned int ___hf_##__name(void) __attribute_const__;		\
 	extern unsigned ___hs_##__name[2];						\
-	extern __inline__ unsigned int ___hf_##__name(void) {				\
+	static inline unsigned int ___hf_##__name(void) {				\
 		unsigned int ret;							\
 		__asm__ ("sethi %%hi(___h_" #__name "), %0" : "=r"(ret));		\
 		return ret;								\
@@ -100,7 +100,7 @@ extern unsigned int ___illegal_use_of_BTFIXUP_INT_in_module(void);
 #define BTFIXUPDEF_SETHI_INIT(__name,__val)						\
 	extern unsigned int ___hf_##__name(void) __attribute_const__;		\
 	extern unsigned ___hs_##__name[2];						\
-	extern __inline__ unsigned int ___hf_##__name(void) {				\
+	static inline unsigned int ___hf_##__name(void) {				\
 		unsigned int ret;							\
 		__asm__ ("sethi %%hi(___h_" #__name "__btset_" #__val "), %0" : 	\
 			 "=r"(ret));							\
diff --git a/include/asm-sparc/cache.h b/include/asm-sparc/cache.h
index e6316fd7e1a4..a10522cb21b7 100644
--- a/include/asm-sparc/cache.h
+++ b/include/asm-sparc/cache.h
@@ -27,7 +27,7 @@
  */
 
 /* First, cache-tag access. */
-extern __inline__ unsigned int get_icache_tag(int setnum, int tagnum)
+static inline unsigned int get_icache_tag(int setnum, int tagnum)
 {
 	unsigned int vaddr, retval;
 
@@ -38,7 +38,7 @@ extern __inline__ unsigned int get_icache_tag(int setnum, int tagnum)
 	return retval;
 }
 
-extern __inline__ void put_icache_tag(int setnum, int tagnum, unsigned int entry)
+static inline void put_icache_tag(int setnum, int tagnum, unsigned int entry)
 {
 	unsigned int vaddr;
 
@@ -51,7 +51,7 @@ extern __inline__ void put_icache_tag(int setnum, int tagnum, unsigned int entry
 /* Second cache-data access.  The data is returned two-32bit quantities
  * at a time.
  */
-extern __inline__ void get_icache_data(int setnum, int tagnum, int subblock,
+static inline void get_icache_data(int setnum, int tagnum, int subblock,
 				       unsigned int *data)
 {
 	unsigned int value1, value2, vaddr;
@@ -67,7 +67,7 @@ extern __inline__ void get_icache_data(int setnum, int tagnum, int subblock,
 	data[0] = value1; data[1] = value2;
 }
 
-extern __inline__ void put_icache_data(int setnum, int tagnum, int subblock,
+static inline void put_icache_data(int setnum, int tagnum, int subblock,
 				       unsigned int *data)
 {
 	unsigned int value1, value2, vaddr;
@@ -92,35 +92,35 @@ extern __inline__ void put_icache_data(int setnum, int tagnum, int subblock,
  */
 
 /* Flushes which clear out both the on-chip and external caches */
-extern __inline__ void flush_ei_page(unsigned int addr)
+static inline void flush_ei_page(unsigned int addr)
 {
 	__asm__ __volatile__("sta %%g0, [%0] %1\n\t" : :
 			     "r" (addr), "i" (ASI_M_FLUSH_PAGE) :
 			     "memory");
 }
 
-extern __inline__ void flush_ei_seg(unsigned int addr)
+static inline void flush_ei_seg(unsigned int addr)
 {
 	__asm__ __volatile__("sta %%g0, [%0] %1\n\t" : :
 			     "r" (addr), "i" (ASI_M_FLUSH_SEG) :
 			     "memory");
 }
 
-extern __inline__ void flush_ei_region(unsigned int addr)
+static inline void flush_ei_region(unsigned int addr)
 {
 	__asm__ __volatile__("sta %%g0, [%0] %1\n\t" : :
 			     "r" (addr), "i" (ASI_M_FLUSH_REGION) :
 			     "memory");
 }
 
-extern __inline__ void flush_ei_ctx(unsigned int addr)
+static inline void flush_ei_ctx(unsigned int addr)
 {
 	__asm__ __volatile__("sta %%g0, [%0] %1\n\t" : :
 			     "r" (addr), "i" (ASI_M_FLUSH_CTX) :
 			     "memory");
 }
 
-extern __inline__ void flush_ei_user(unsigned int addr)
+static inline void flush_ei_user(unsigned int addr)
 {
 	__asm__ __volatile__("sta %%g0, [%0] %1\n\t" : :
 			     "r" (addr), "i" (ASI_M_FLUSH_USER) :
diff --git a/include/asm-sparc/cypress.h b/include/asm-sparc/cypress.h
index fc92fc839c3f..99599533efbc 100644
--- a/include/asm-sparc/cypress.h
+++ b/include/asm-sparc/cypress.h
@@ -48,25 +48,25 @@
 #define CYPRESS_NFAULT    0x00000002
 #define CYPRESS_MENABLE   0x00000001
 
-extern __inline__ void cypress_flush_page(unsigned long page)
+static inline void cypress_flush_page(unsigned long page)
 {
 	__asm__ __volatile__("sta %%g0, [%0] %1\n\t" : :
 			     "r" (page), "i" (ASI_M_FLUSH_PAGE));
 }
 
-extern __inline__ void cypress_flush_segment(unsigned long addr)
+static inline void cypress_flush_segment(unsigned long addr)
 {
 	__asm__ __volatile__("sta %%g0, [%0] %1\n\t" : :
 			     "r" (addr), "i" (ASI_M_FLUSH_SEG));
 }
 
-extern __inline__ void cypress_flush_region(unsigned long addr)
+static inline void cypress_flush_region(unsigned long addr)
 {
 	__asm__ __volatile__("sta %%g0, [%0] %1\n\t" : :
 			     "r" (addr), "i" (ASI_M_FLUSH_REGION));
 }
 
-extern __inline__ void cypress_flush_context(void)
+static inline void cypress_flush_context(void)
 {
 	__asm__ __volatile__("sta %%g0, [%%g0] %0\n\t" : :
 			     "i" (ASI_M_FLUSH_CTX));
diff --git a/include/asm-sparc/delay.h b/include/asm-sparc/delay.h
index 6edf2cbb246b..7ec8e9f7ad4f 100644
--- a/include/asm-sparc/delay.h
+++ b/include/asm-sparc/delay.h
@@ -10,7 +10,7 @@
 #include <linux/config.h>
 #include <asm/cpudata.h>
 
-extern __inline__ void __delay(unsigned long loops)
+static inline void __delay(unsigned long loops)
 {
 	__asm__ __volatile__("cmp %0, 0\n\t"
 			     "1: bne 1b\n\t"
diff --git a/include/asm-sparc/dma.h b/include/asm-sparc/dma.h
index 07e6368a2521..8ec206aa5f2e 100644
--- a/include/asm-sparc/dma.h
+++ b/include/asm-sparc/dma.h
@@ -198,7 +198,7 @@ extern void dvma_init(struct sbus_bus *);
 /* Pause until counter runs out or BIT isn't set in the DMA condition
  * register.
  */
-extern __inline__ void sparc_dma_pause(struct sparc_dma_registers *regs,
+static inline void sparc_dma_pause(struct sparc_dma_registers *regs,
 				       unsigned long bit)
 {
 	int ctr = 50000;   /* Let's find some bugs ;) */
diff --git a/include/asm-sparc/iommu.h b/include/asm-sparc/iommu.h
index 8171362d56b9..70c589c05a10 100644
--- a/include/asm-sparc/iommu.h
+++ b/include/asm-sparc/iommu.h
@@ -108,12 +108,12 @@ struct iommu_struct {
 	struct bit_map usemap;
 };
 
-extern __inline__ void iommu_invalidate(struct iommu_regs *regs)
+static inline void iommu_invalidate(struct iommu_regs *regs)
 {
 	regs->tlbflush = 0;
 }
 
-extern __inline__ void iommu_invalidate_page(struct iommu_regs *regs, unsigned long ba)
+static inline void iommu_invalidate_page(struct iommu_regs *regs, unsigned long ba)
 {
 	regs->pageflush = (ba & PAGE_MASK);
 }
diff --git a/include/asm-sparc/kdebug.h b/include/asm-sparc/kdebug.h
index 3ea4916635ee..fba92485fdba 100644
--- a/include/asm-sparc/kdebug.h
+++ b/include/asm-sparc/kdebug.h
@@ -46,7 +46,7 @@ struct kernel_debug {
 extern struct kernel_debug *linux_dbvec;
 
 /* Use this macro in C-code to enter the debugger. */
-extern __inline__ void sp_enter_debugger(void)
+static inline void sp_enter_debugger(void)
 {
 	__asm__ __volatile__("jmpl %0, %%o7\n\t"
 			     "nop\n\t" : :
diff --git a/include/asm-sparc/mbus.h b/include/asm-sparc/mbus.h
index 5f2749015342..ecacdf4075d7 100644
--- a/include/asm-sparc/mbus.h
+++ b/include/asm-sparc/mbus.h
@@ -83,7 +83,7 @@ extern unsigned int hwbug_bitmask;
  */
 #define TBR_ID_SHIFT            20
 
-extern __inline__ int get_cpuid(void)
+static inline int get_cpuid(void)
 {
 	register int retval;
 	__asm__ __volatile__("rd %%tbr, %0\n\t"
@@ -93,7 +93,7 @@ extern __inline__ int get_cpuid(void)
 	return (retval & 3);
 }
 
-extern __inline__ int get_modid(void)
+static inline int get_modid(void)
 {
 	return (get_cpuid() | 0x8);
 }
diff --git a/include/asm-sparc/msi.h b/include/asm-sparc/msi.h
index b69543dd3b46..ff72cbd946a4 100644
--- a/include/asm-sparc/msi.h
+++ b/include/asm-sparc/msi.h
@@ -19,7 +19,7 @@
 #define MSI_ASYNC_MODE  0x80000000	/* Operate the MSI asynchronously */
 
 
-extern __inline__ void msi_set_sync(void)
+static inline void msi_set_sync(void)
 {
 	__asm__ __volatile__ ("lda [%0] %1, %%g3\n\t"
 			      "andn %%g3, %2, %%g3\n\t"
diff --git a/include/asm-sparc/mxcc.h b/include/asm-sparc/mxcc.h
index 60ef9d6fe7bc..128fe9708135 100644
--- a/include/asm-sparc/mxcc.h
+++ b/include/asm-sparc/mxcc.h
@@ -85,7 +85,7 @@
 
 #ifndef __ASSEMBLY__
 
-extern __inline__ void mxcc_set_stream_src(unsigned long *paddr)
+static inline void mxcc_set_stream_src(unsigned long *paddr)
 {
 	unsigned long data0 = paddr[0];
 	unsigned long data1 = paddr[1];
@@ -98,7 +98,7 @@ extern __inline__ void mxcc_set_stream_src(unsigned long *paddr)
 			      "i" (ASI_M_MXCC) : "g2", "g3");
 }
 
-extern __inline__ void mxcc_set_stream_dst(unsigned long *paddr)
+static inline void mxcc_set_stream_dst(unsigned long *paddr)
 {
 	unsigned long data0 = paddr[0];
 	unsigned long data1 = paddr[1];
@@ -111,7 +111,7 @@ extern __inline__ void mxcc_set_stream_dst(unsigned long *paddr)
 			      "i" (ASI_M_MXCC) : "g2", "g3");
 }
 
-extern __inline__ unsigned long mxcc_get_creg(void)
+static inline unsigned long mxcc_get_creg(void)
 {
 	unsigned long mxcc_control;
 
@@ -125,7 +125,7 @@ extern __inline__ unsigned long mxcc_get_creg(void)
 	return mxcc_control;
 }
 
-extern __inline__ void mxcc_set_creg(unsigned long mxcc_control)
+static inline void mxcc_set_creg(unsigned long mxcc_control)
 {
 	__asm__ __volatile__("sta %0, [%1] %2\n\t" : :
 			     "r" (mxcc_control), "r" (MXCC_CREG),
diff --git a/include/asm-sparc/obio.h b/include/asm-sparc/obio.h
index 62e1d77965f3..47854a2a12cf 100644
--- a/include/asm-sparc/obio.h
+++ b/include/asm-sparc/obio.h
@@ -98,7 +98,7 @@
 
 #ifndef __ASSEMBLY__
 
-extern __inline__ int bw_get_intr_mask(int sbus_level)
+static inline int bw_get_intr_mask(int sbus_level)
 {
 	int mask;
 	
@@ -109,7 +109,7 @@ extern __inline__ int bw_get_intr_mask(int sbus_level)
 	return mask;
 }
 
-extern __inline__ void bw_clear_intr_mask(int sbus_level, int mask)
+static inline void bw_clear_intr_mask(int sbus_level, int mask)
 {
 	__asm__ __volatile__ ("stha %0, [%1] %2" : :
 			      "r" (mask),
@@ -117,7 +117,7 @@ extern __inline__ void bw_clear_intr_mask(int sbus_level, int mask)
 			      "i" (ASI_M_CTL));
 }
 
-extern __inline__ unsigned bw_get_prof_limit(int cpu)
+static inline unsigned bw_get_prof_limit(int cpu)
 {
 	unsigned limit;
 	
@@ -128,7 +128,7 @@ extern __inline__ unsigned bw_get_prof_limit(int cpu)
 	return limit;
 }
 
-extern __inline__ void bw_set_prof_limit(int cpu, unsigned limit)
+static inline void bw_set_prof_limit(int cpu, unsigned limit)
 {
 	__asm__ __volatile__ ("sta %0, [%1] %2" : :
 			      "r" (limit),
@@ -136,7 +136,7 @@ extern __inline__ void bw_set_prof_limit(int cpu, unsigned limit)
 			      "i" (ASI_M_CTL));
 }
 
-extern __inline__ unsigned bw_get_ctrl(int cpu)
+static inline unsigned bw_get_ctrl(int cpu)
 {
 	unsigned ctrl;
 	
@@ -147,7 +147,7 @@ extern __inline__ unsigned bw_get_ctrl(int cpu)
 	return ctrl;
 }
 
-extern __inline__ void bw_set_ctrl(int cpu, unsigned ctrl)
+static inline void bw_set_ctrl(int cpu, unsigned ctrl)
 {
 	__asm__ __volatile__ ("sta %0, [%1] %2" : :
 			      "r" (ctrl),
@@ -157,7 +157,7 @@ extern __inline__ void bw_set_ctrl(int cpu, unsigned ctrl)
 
 extern unsigned char cpu_leds[32];
 
-extern __inline__ void show_leds(int cpuid)
+static inline void show_leds(int cpuid)
 {
 	cpuid &= 0x1e;
 	__asm__ __volatile__ ("stba %0, [%1] %2" : :
@@ -166,7 +166,7 @@ extern __inline__ void show_leds(int cpuid)
 			      "i" (ASI_M_CTL));
 }
 
-extern __inline__ unsigned cc_get_ipen(void)
+static inline unsigned cc_get_ipen(void)
 {
 	unsigned pending;
 	
@@ -177,7 +177,7 @@ extern __inline__ unsigned cc_get_ipen(void)
 	return pending;
 }
 
-extern __inline__ void cc_set_iclr(unsigned clear)
+static inline void cc_set_iclr(unsigned clear)
 {
 	__asm__ __volatile__ ("stha %0, [%1] %2" : :
 			      "r" (clear),
@@ -185,7 +185,7 @@ extern __inline__ void cc_set_iclr(unsigned clear)
 			      "i" (ASI_M_MXCC));
 }
 
-extern __inline__ unsigned cc_get_imsk(void)
+static inline unsigned cc_get_imsk(void)
 {
 	unsigned mask;
 	
@@ -196,7 +196,7 @@ extern __inline__ unsigned cc_get_imsk(void)
 	return mask;
 }
 
-extern __inline__ void cc_set_imsk(unsigned mask)
+static inline void cc_set_imsk(unsigned mask)
 {
 	__asm__ __volatile__ ("stha %0, [%1] %2" : :
 			      "r" (mask),
@@ -204,7 +204,7 @@ extern __inline__ void cc_set_imsk(unsigned mask)
 			      "i" (ASI_M_MXCC));
 }
 
-extern __inline__ unsigned cc_get_imsk_other(int cpuid)
+static inline unsigned cc_get_imsk_other(int cpuid)
 {
 	unsigned mask;
 	
@@ -215,7 +215,7 @@ extern __inline__ unsigned cc_get_imsk_other(int cpuid)
 	return mask;
 }
 
-extern __inline__ void cc_set_imsk_other(int cpuid, unsigned mask)
+static inline void cc_set_imsk_other(int cpuid, unsigned mask)
 {
 	__asm__ __volatile__ ("stha %0, [%1] %2" : :
 			      "r" (mask),
@@ -223,7 +223,7 @@ extern __inline__ void cc_set_imsk_other(int cpuid, unsigned mask)
 			      "i" (ASI_M_CTL));
 }
 
-extern __inline__ void cc_set_igen(unsigned gen)
+static inline void cc_set_igen(unsigned gen)
 {
 	__asm__ __volatile__ ("sta %0, [%1] %2" : :
 			      "r" (gen),
@@ -239,7 +239,7 @@ extern __inline__ void cc_set_igen(unsigned gen)
 #define IGEN_MESSAGE(bcast, devid, sid, levels) \
 	(((bcast) << 31) | ((devid) << 23) | ((sid) << 15) | (levels))
             
-extern __inline__ void sun4d_send_ipi(int cpu, int level)
+static inline void sun4d_send_ipi(int cpu, int level)
 {
 	cc_set_igen(IGEN_MESSAGE(0, cpu << 3, 6 + ((level >> 1) & 7), 1 << (level - 1)));
 }
diff --git a/include/asm-sparc/pci.h b/include/asm-sparc/pci.h
index 97052baf90c1..38644742f011 100644
--- a/include/asm-sparc/pci.h
+++ b/include/asm-sparc/pci.h
@@ -15,12 +15,12 @@
 
 #define PCI_IRQ_NONE		0xffffffff
 
-extern inline void pcibios_set_master(struct pci_dev *dev)
+static inline void pcibios_set_master(struct pci_dev *dev)
 {
 	/* No special bus mastering setup handling */
 }
 
-extern inline void pcibios_penalize_isa_irq(int irq, int active)
+static inline void pcibios_penalize_isa_irq(int irq, int active)
 {
 	/* We don't do dynamic PCI IRQ allocation */
 }
@@ -137,7 +137,7 @@ extern void pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist
  * only drive the low 24-bits during PCI bus mastering, then
  * you would pass 0x00ffffff as the mask to this function.
  */
-extern inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask)
+static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask)
 {
 	return 1;
 }
diff --git a/include/asm-sparc/pgtable.h b/include/asm-sparc/pgtable.h
index 8395ad2f1c09..ae883f295f1f 100644
--- a/include/asm-sparc/pgtable.h
+++ b/include/asm-sparc/pgtable.h
@@ -154,7 +154,7 @@ BTFIXUPDEF_CALL_CONST(int, pte_present, pte_t)
 BTFIXUPDEF_CALL(void, pte_clear, pte_t *)
 BTFIXUPDEF_CALL(int, pte_read, pte_t)
 
-extern __inline__ int pte_none(pte_t pte)
+static inline int pte_none(pte_t pte)
 {
 	return !(pte_val(pte) & ~BTFIXUP_SETHI(none_mask));
 }
@@ -167,7 +167,7 @@ BTFIXUPDEF_CALL_CONST(int, pmd_bad, pmd_t)
 BTFIXUPDEF_CALL_CONST(int, pmd_present, pmd_t)
 BTFIXUPDEF_CALL(void, pmd_clear, pmd_t *)
 
-extern __inline__ int pmd_none(pmd_t pmd)
+static inline int pmd_none(pmd_t pmd)
 {
 	return !(pmd_val(pmd) & ~BTFIXUP_SETHI(none_mask));
 }
@@ -195,19 +195,19 @@ BTFIXUPDEF_HALF(pte_dirtyi)
 BTFIXUPDEF_HALF(pte_youngi)
 
 extern int pte_write(pte_t pte) __attribute_const__;
-extern __inline__ int pte_write(pte_t pte)
+static inline int pte_write(pte_t pte)
 {
 	return pte_val(pte) & BTFIXUP_HALF(pte_writei);
 }
 
 extern int pte_dirty(pte_t pte) __attribute_const__;
-extern __inline__ int pte_dirty(pte_t pte)
+static inline int pte_dirty(pte_t pte)
 {
 	return pte_val(pte) & BTFIXUP_HALF(pte_dirtyi);
 }
 
 extern int pte_young(pte_t pte) __attribute_const__;
-extern __inline__ int pte_young(pte_t pte)
+static inline int pte_young(pte_t pte)
 {
 	return pte_val(pte) & BTFIXUP_HALF(pte_youngi);
 }
@@ -218,7 +218,7 @@ extern __inline__ int pte_young(pte_t pte)
 BTFIXUPDEF_HALF(pte_filei)
 
 extern int pte_file(pte_t pte) __attribute_const__;
-extern __inline__ int pte_file(pte_t pte)
+static inline int pte_file(pte_t pte)
 {
 	return pte_val(pte) & BTFIXUP_HALF(pte_filei);
 }
@@ -230,19 +230,19 @@ BTFIXUPDEF_HALF(pte_mkcleani)
 BTFIXUPDEF_HALF(pte_mkoldi)
 
 extern pte_t pte_wrprotect(pte_t pte) __attribute_const__;
-extern __inline__ pte_t pte_wrprotect(pte_t pte)
+static inline pte_t pte_wrprotect(pte_t pte)
 {
 	return __pte(pte_val(pte) & ~BTFIXUP_HALF(pte_wrprotecti));
 }
 
 extern pte_t pte_mkclean(pte_t pte) __attribute_const__;
-extern __inline__ pte_t pte_mkclean(pte_t pte)
+static inline pte_t pte_mkclean(pte_t pte)
 {
 	return __pte(pte_val(pte) & ~BTFIXUP_HALF(pte_mkcleani));
 }
 
 extern pte_t pte_mkold(pte_t pte) __attribute_const__;
-extern __inline__ pte_t pte_mkold(pte_t pte)
+static inline pte_t pte_mkold(pte_t pte)
 {
 	return __pte(pte_val(pte) & ~BTFIXUP_HALF(pte_mkoldi));
 }
@@ -279,7 +279,7 @@ BTFIXUPDEF_CALL_CONST(pte_t, mk_pte_io, unsigned long, pgprot_t, int)
 BTFIXUPDEF_INT(pte_modify_mask)
 
 extern pte_t pte_modify(pte_t pte, pgprot_t newprot) __attribute_const__;
-extern __inline__ pte_t pte_modify(pte_t pte, pgprot_t newprot)
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
 	return __pte((pte_val(pte) & BTFIXUP_INT(pte_modify_mask)) |
 		pgprot_val(newprot));
@@ -386,13 +386,13 @@ extern struct ctx_list ctx_used;        /* Head of used contexts list */
 
 #define NO_CONTEXT     -1
 
-extern __inline__ void remove_from_ctx_list(struct ctx_list *entry)
+static inline void remove_from_ctx_list(struct ctx_list *entry)
 {
 	entry->next->prev = entry->prev;
 	entry->prev->next = entry->next;
 }
 
-extern __inline__ void add_to_ctx_list(struct ctx_list *head, struct ctx_list *entry)
+static inline void add_to_ctx_list(struct ctx_list *head, struct ctx_list *entry)
 {
 	entry->next = head;
 	(entry->prev = head->prev)->next = entry;
@@ -401,7 +401,7 @@ extern __inline__ void add_to_ctx_list(struct ctx_list *head, struct ctx_list *e
 #define add_to_free_ctxlist(entry) add_to_ctx_list(&ctx_free, entry)
 #define add_to_used_ctxlist(entry) add_to_ctx_list(&ctx_used, entry)
 
-extern __inline__ unsigned long
+static inline unsigned long
 __get_phys (unsigned long addr)
 {
 	switch (sparc_cpu_model){
@@ -416,7 +416,7 @@ __get_phys (unsigned long addr)
 	}
 }
 
-extern __inline__ int
+static inline int
 __get_iospace (unsigned long addr)
 {
 	switch (sparc_cpu_model){
diff --git a/include/asm-sparc/pgtsrmmu.h b/include/asm-sparc/pgtsrmmu.h
index ee3b9d93187c..edeb9811e728 100644
--- a/include/asm-sparc/pgtsrmmu.h
+++ b/include/asm-sparc/pgtsrmmu.h
@@ -148,7 +148,7 @@ extern void *srmmu_nocache_pool;
 #define __nocache_fix(VADDR) __va(__nocache_pa(VADDR))
 
 /* Accessing the MMU control register. */
-extern __inline__ unsigned int srmmu_get_mmureg(void)
+static inline unsigned int srmmu_get_mmureg(void)
 {
         unsigned int retval;
 	__asm__ __volatile__("lda [%%g0] %1, %0\n\t" :
@@ -157,14 +157,14 @@ extern __inline__ unsigned int srmmu_get_mmureg(void)
 	return retval;
 }
 
-extern __inline__ void srmmu_set_mmureg(unsigned long regval)
+static inline void srmmu_set_mmureg(unsigned long regval)
 {
 	__asm__ __volatile__("sta %0, [%%g0] %1\n\t" : :
 			     "r" (regval), "i" (ASI_M_MMUREGS) : "memory");
 
 }
 
-extern __inline__ void srmmu_set_ctable_ptr(unsigned long paddr)
+static inline void srmmu_set_ctable_ptr(unsigned long paddr)
 {
 	paddr = ((paddr >> 4) & SRMMU_CTX_PMASK);
 	__asm__ __volatile__("sta %0, [%1] %2\n\t" : :
@@ -173,7 +173,7 @@ extern __inline__ void srmmu_set_ctable_ptr(unsigned long paddr)
 			     "memory");
 }
 
-extern __inline__ unsigned long srmmu_get_ctable_ptr(void)
+static inline unsigned long srmmu_get_ctable_ptr(void)
 {
 	unsigned int retval;
 
@@ -184,14 +184,14 @@ extern __inline__ unsigned long srmmu_get_ctable_ptr(void)
 	return (retval & SRMMU_CTX_PMASK) << 4;
 }
 
-extern __inline__ void srmmu_set_context(int context)
+static inline void srmmu_set_context(int context)
 {
 	__asm__ __volatile__("sta %0, [%1] %2\n\t" : :
 			     "r" (context), "r" (SRMMU_CTX_REG),
 			     "i" (ASI_M_MMUREGS) : "memory");
 }
 
-extern __inline__ int srmmu_get_context(void)
+static inline int srmmu_get_context(void)
 {
 	register int retval;
 	__asm__ __volatile__("lda [%1] %2, %0\n\t" :
@@ -201,7 +201,7 @@ extern __inline__ int srmmu_get_context(void)
 	return retval;
 }
 
-extern __inline__ unsigned int srmmu_get_fstatus(void)
+static inline unsigned int srmmu_get_fstatus(void)
 {
 	unsigned int retval;
 
@@ -211,7 +211,7 @@ extern __inline__ unsigned int srmmu_get_fstatus(void)
 	return retval;
 }
 
-extern __inline__ unsigned int srmmu_get_faddr(void)
+static inline unsigned int srmmu_get_faddr(void)
 {
 	unsigned int retval;
 
@@ -222,7 +222,7 @@ extern __inline__ unsigned int srmmu_get_faddr(void)
 }
 
 /* This is guaranteed on all SRMMU's. */
-extern __inline__ void srmmu_flush_whole_tlb(void)
+static inline void srmmu_flush_whole_tlb(void)
 {
 	__asm__ __volatile__("sta %%g0, [%0] %1\n\t": :
 			     "r" (0x400),        /* Flush entire TLB!! */
@@ -231,7 +231,7 @@ extern __inline__ void srmmu_flush_whole_tlb(void)
 }
 
 /* These flush types are not available on all chips... */
-extern __inline__ void srmmu_flush_tlb_ctx(void)
+static inline void srmmu_flush_tlb_ctx(void)
 {
 	__asm__ __volatile__("sta %%g0, [%0] %1\n\t": :
 			     "r" (0x300),        /* Flush TLB ctx.. */
@@ -239,7 +239,7 @@ extern __inline__ void srmmu_flush_tlb_ctx(void)
 
 }
 
-extern __inline__ void srmmu_flush_tlb_region(unsigned long addr)
+static inline void srmmu_flush_tlb_region(unsigned long addr)
 {
 	addr &= SRMMU_PGDIR_MASK;
 	__asm__ __volatile__("sta %%g0, [%0] %1\n\t": :
@@ -249,7 +249,7 @@ extern __inline__ void srmmu_flush_tlb_region(unsigned long addr)
 }
 
 
-extern __inline__ void srmmu_flush_tlb_segment(unsigned long addr)
+static inline void srmmu_flush_tlb_segment(unsigned long addr)
 {
 	addr &= SRMMU_REAL_PMD_MASK;
 	__asm__ __volatile__("sta %%g0, [%0] %1\n\t": :
@@ -258,7 +258,7 @@ extern __inline__ void srmmu_flush_tlb_segment(unsigned long addr)
 
 }
 
-extern __inline__ void srmmu_flush_tlb_page(unsigned long page)
+static inline void srmmu_flush_tlb_page(unsigned long page)
 {
 	page &= PAGE_MASK;
 	__asm__ __volatile__("sta %%g0, [%0] %1\n\t": :
@@ -267,7 +267,7 @@ extern __inline__ void srmmu_flush_tlb_page(unsigned long page)
 
 }
 
-extern __inline__ unsigned long srmmu_hwprobe(unsigned long vaddr)
+static inline unsigned long srmmu_hwprobe(unsigned long vaddr)
 {
 	unsigned long retval;
 
@@ -279,7 +279,7 @@ extern __inline__ unsigned long srmmu_hwprobe(unsigned long vaddr)
 	return retval;
 }
 
-extern __inline__ int
+static inline int
 srmmu_get_pte (unsigned long addr)
 {
 	register unsigned long entry;
diff --git a/include/asm-sparc/processor.h b/include/asm-sparc/processor.h
index 5a7a1a8d29ac..6fbb3f0af8d8 100644
--- a/include/asm-sparc/processor.h
+++ b/include/asm-sparc/processor.h
@@ -79,7 +79,7 @@ struct thread_struct {
 extern unsigned long thread_saved_pc(struct task_struct *t);
 
 /* Do necessary setup to start up a newly executed thread. */
-extern __inline__ void start_thread(struct pt_regs * regs, unsigned long pc,
+static inline void start_thread(struct pt_regs * regs, unsigned long pc,
 				    unsigned long sp)
 {
 	register unsigned long zero asm("g1");
diff --git a/include/asm-sparc/psr.h b/include/asm-sparc/psr.h
index 9778b8c8b15b..19c978051118 100644
--- a/include/asm-sparc/psr.h
+++ b/include/asm-sparc/psr.h
@@ -38,7 +38,7 @@
 
 #ifndef __ASSEMBLY__
 /* Get the %psr register. */
-extern __inline__ unsigned int get_psr(void)
+static inline unsigned int get_psr(void)
 {
 	unsigned int psr;
 	__asm__ __volatile__(
@@ -53,7 +53,7 @@ extern __inline__ unsigned int get_psr(void)
 	return psr;
 }
 
-extern __inline__ void put_psr(unsigned int new_psr)
+static inline void put_psr(unsigned int new_psr)
 {
 	__asm__ __volatile__(
 		"wr	%0, 0x0, %%psr\n\t"
@@ -72,7 +72,7 @@ extern __inline__ void put_psr(unsigned int new_psr)
 
 extern unsigned int fsr_storage;
 
-extern __inline__ unsigned int get_fsr(void)
+static inline unsigned int get_fsr(void)
 {
 	unsigned int fsr = 0;
 
diff --git a/include/asm-sparc/sbi.h b/include/asm-sparc/sbi.h
index 739ccac5dcf2..86a603ac7b20 100644
--- a/include/asm-sparc/sbi.h
+++ b/include/asm-sparc/sbi.h
@@ -65,7 +65,7 @@ struct sbi_regs {
 
 #ifndef __ASSEMBLY__
 
-extern __inline__ int acquire_sbi(int devid, int mask)
+static inline int acquire_sbi(int devid, int mask)
 {
 	__asm__ __volatile__ ("swapa [%2] %3, %0" :
 			      "=r" (mask) :
@@ -75,7 +75,7 @@ extern __inline__ int acquire_sbi(int devid, int mask)
 	return mask;
 }
 
-extern __inline__ void release_sbi(int devid, int mask)
+static inline void release_sbi(int devid, int mask)
 {
 	__asm__ __volatile__ ("sta %0, [%1] %2" : :
 			      "r" (mask),
@@ -83,7 +83,7 @@ extern __inline__ void release_sbi(int devid, int mask)
 			      "i" (ASI_M_CTL));
 }
 
-extern __inline__ void set_sbi_tid(int devid, int targetid)
+static inline void set_sbi_tid(int devid, int targetid)
 {
 	__asm__ __volatile__ ("sta %0, [%1] %2" : :
 			      "r" (targetid),
@@ -91,7 +91,7 @@ extern __inline__ void set_sbi_tid(int devid, int targetid)
 			      "i" (ASI_M_CTL));
 }
 
-extern __inline__ int get_sbi_ctl(int devid, int cfgno)
+static inline int get_sbi_ctl(int devid, int cfgno)
 {
 	int cfg;
 	
@@ -102,7 +102,7 @@ extern __inline__ int get_sbi_ctl(int devid, int cfgno)
 	return cfg;
 }
 
-extern __inline__ void set_sbi_ctl(int devid, int cfgno, int cfg)
+static inline void set_sbi_ctl(int devid, int cfgno, int cfg)
 {
 	__asm__ __volatile__ ("sta %0, [%1] %2" : :
 			      "r" (cfg),
diff --git a/include/asm-sparc/sbus.h b/include/asm-sparc/sbus.h
index 3a8b3908728a..a13cddcecec5 100644
--- a/include/asm-sparc/sbus.h
+++ b/include/asm-sparc/sbus.h
@@ -28,12 +28,12 @@
  * numbers + offsets, and vice versa.
  */
 
-extern __inline__ unsigned long sbus_devaddr(int slotnum, unsigned long offset)
+static inline unsigned long sbus_devaddr(int slotnum, unsigned long offset)
 {
   return (unsigned long) (SUN_SBUS_BVADDR+((slotnum)<<25)+(offset));
 }
 
-extern __inline__ int sbus_dev_slot(unsigned long dev_addr)
+static inline int sbus_dev_slot(unsigned long dev_addr)
 {
   return (int) (((dev_addr)-SUN_SBUS_BVADDR)>>25);
 }
@@ -80,7 +80,7 @@ struct sbus_bus {
 
 extern struct sbus_bus *sbus_root;
 
-extern __inline__ int
+static inline int
 sbus_is_slave(struct sbus_dev *dev)
 {
 	/* XXX Have to write this for sun4c's */
diff --git a/include/asm-sparc/smp.h b/include/asm-sparc/smp.h
index 4f96d8333a12..580c51d011df 100644
--- a/include/asm-sparc/smp.h
+++ b/include/asm-sparc/smp.h
@@ -60,22 +60,22 @@ BTFIXUPDEF_BLACKBOX(load_current)
 #define smp_cross_call(func,arg1,arg2,arg3,arg4,arg5) BTFIXUP_CALL(smp_cross_call)(func,arg1,arg2,arg3,arg4,arg5)
 #define smp_message_pass(target,msg,data,wait) BTFIXUP_CALL(smp_message_pass)(target,msg,data,wait)
 
-extern __inline__ void xc0(smpfunc_t func) { smp_cross_call(func, 0, 0, 0, 0, 0); }
-extern __inline__ void xc1(smpfunc_t func, unsigned long arg1)
+static inline void xc0(smpfunc_t func) { smp_cross_call(func, 0, 0, 0, 0, 0); }
+static inline void xc1(smpfunc_t func, unsigned long arg1)
 { smp_cross_call(func, arg1, 0, 0, 0, 0); }
-extern __inline__ void xc2(smpfunc_t func, unsigned long arg1, unsigned long arg2)
+static inline void xc2(smpfunc_t func, unsigned long arg1, unsigned long arg2)
 { smp_cross_call(func, arg1, arg2, 0, 0, 0); }
-extern __inline__ void xc3(smpfunc_t func, unsigned long arg1, unsigned long arg2,
+static inline void xc3(smpfunc_t func, unsigned long arg1, unsigned long arg2,
 			   unsigned long arg3)
 { smp_cross_call(func, arg1, arg2, arg3, 0, 0); }
-extern __inline__ void xc4(smpfunc_t func, unsigned long arg1, unsigned long arg2,
+static inline void xc4(smpfunc_t func, unsigned long arg1, unsigned long arg2,
 			   unsigned long arg3, unsigned long arg4)
 { smp_cross_call(func, arg1, arg2, arg3, arg4, 0); }
-extern __inline__ void xc5(smpfunc_t func, unsigned long arg1, unsigned long arg2,
+static inline void xc5(smpfunc_t func, unsigned long arg1, unsigned long arg2,
 			   unsigned long arg3, unsigned long arg4, unsigned long arg5)
 { smp_cross_call(func, arg1, arg2, arg3, arg4, arg5); }
 
-extern __inline__ int smp_call_function(void (*func)(void *info), void *info, int nonatomic, int wait)
+static inline int smp_call_function(void (*func)(void *info), void *info, int nonatomic, int wait)
 {
 	xc1((smpfunc_t)func, (unsigned long)info);
 	return 0;
@@ -84,16 +84,16 @@ extern __inline__ int smp_call_function(void (*func)(void *info), void *info, in
 extern __volatile__ int __cpu_number_map[NR_CPUS];
 extern __volatile__ int __cpu_logical_map[NR_CPUS];
 
-extern __inline__ int cpu_logical_map(int cpu)
+static inline int cpu_logical_map(int cpu)
 {
 	return __cpu_logical_map[cpu];
 }
-extern __inline__ int cpu_number_map(int cpu)
+static inline int cpu_number_map(int cpu)
 {
 	return __cpu_number_map[cpu];
 }
 
-extern __inline__ int hard_smp4m_processor_id(void)
+static inline int hard_smp4m_processor_id(void)
 {
 	int cpuid;
 
@@ -104,7 +104,7 @@ extern __inline__ int hard_smp4m_processor_id(void)
 	return cpuid;
 }
 
-extern __inline__ int hard_smp4d_processor_id(void)
+static inline int hard_smp4d_processor_id(void)
 {
 	int cpuid;
 
@@ -114,7 +114,7 @@ extern __inline__ int hard_smp4d_processor_id(void)
 }
 
 #ifndef MODULE
-extern __inline__ int hard_smp_processor_id(void)
+static inline int hard_smp_processor_id(void)
 {
 	int cpuid;
 
@@ -136,7 +136,7 @@ extern __inline__ int hard_smp_processor_id(void)
 	return cpuid;
 }
 #else
-extern __inline__ int hard_smp_processor_id(void)
+static inline int hard_smp_processor_id(void)
 {
 	int cpuid;
 	
diff --git a/include/asm-sparc/smpprim.h b/include/asm-sparc/smpprim.h
index 9b9c28ed748e..e7b6d346ae10 100644
--- a/include/asm-sparc/smpprim.h
+++ b/include/asm-sparc/smpprim.h
@@ -15,7 +15,7 @@
  * atomic.
  */
 
-extern __inline__ __volatile__ char test_and_set(void *addr)
+static inline __volatile__ char test_and_set(void *addr)
 {
 	char state = 0;
 
@@ -27,7 +27,7 @@ extern __inline__ __volatile__ char test_and_set(void *addr)
 }
 
 /* Initialize a spin-lock. */
-extern __inline__ __volatile__ smp_initlock(void *spinlock)
+static inline __volatile__ smp_initlock(void *spinlock)
 {
 	/* Unset the lock. */
 	*((unsigned char *) spinlock) = 0;
@@ -36,7 +36,7 @@ extern __inline__ __volatile__ smp_initlock(void *spinlock)
 }
 
 /* This routine spins until it acquires the lock at ADDR. */
-extern __inline__ __volatile__ smp_lock(void *addr)
+static inline __volatile__ smp_lock(void *addr)
 {
 	while(test_and_set(addr) == 0xff)
 		;
@@ -46,7 +46,7 @@ extern __inline__ __volatile__ smp_lock(void *addr)
 }
 
 /* This routine releases the lock at ADDR. */
-extern __inline__ __volatile__ smp_unlock(void *addr)
+static inline __volatile__ smp_unlock(void *addr)
 {
 	*((unsigned char *) addr) = 0;
 }
diff --git a/include/asm-sparc/spinlock.h b/include/asm-sparc/spinlock.h
index 111727a2bb4e..e344c98a6f5f 100644
--- a/include/asm-sparc/spinlock.h
+++ b/include/asm-sparc/spinlock.h
@@ -17,7 +17,7 @@
 #define __raw_spin_unlock_wait(lock) \
 	do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0)
 
-extern __inline__ void __raw_spin_lock(raw_spinlock_t *lock)
+static inline void __raw_spin_lock(raw_spinlock_t *lock)
 {
 	__asm__ __volatile__(
 	"\n1:\n\t"
@@ -37,7 +37,7 @@ extern __inline__ void __raw_spin_lock(raw_spinlock_t *lock)
 	: "g2", "memory", "cc");
 }
 
-extern __inline__ int __raw_spin_trylock(raw_spinlock_t *lock)
+static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 {
 	unsigned int result;
 	__asm__ __volatile__("ldstub [%1], %0"
@@ -47,7 +47,7 @@ extern __inline__ int __raw_spin_trylock(raw_spinlock_t *lock)
 	return (result == 0);
 }
 
-extern __inline__ void __raw_spin_unlock(raw_spinlock_t *lock)
+static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 {
 	__asm__ __volatile__("stb %%g0, [%0]" : : "r" (lock) : "memory");
 }
@@ -78,7 +78,7 @@ extern __inline__ void __raw_spin_unlock(raw_spinlock_t *lock)
  *
  * Unfortunately this scheme limits us to ~16,000,000 cpus.
  */
-extern __inline__ void __read_lock(raw_rwlock_t *rw)
+static inline void __read_lock(raw_rwlock_t *rw)
 {
 	register raw_rwlock_t *lp asm("g1");
 	lp = rw;
@@ -98,7 +98,7 @@ do {	unsigned long flags; \
 	local_irq_restore(flags); \
 } while(0)
 
-extern __inline__ void __read_unlock(raw_rwlock_t *rw)
+static inline void __read_unlock(raw_rwlock_t *rw)
 {
 	register raw_rwlock_t *lp asm("g1");
 	lp = rw;
diff --git a/include/asm-sparc/system.h b/include/asm-sparc/system.h
index 3557781a4bfd..1f6b71f9e1b6 100644
--- a/include/asm-sparc/system.h
+++ b/include/asm-sparc/system.h
@@ -204,7 +204,7 @@ static inline unsigned long getipl(void)
 BTFIXUPDEF_CALL(void, ___xchg32, void)
 #endif
 
-extern __inline__ unsigned long xchg_u32(__volatile__ unsigned long *m, unsigned long val)
+static inline unsigned long xchg_u32(__volatile__ unsigned long *m, unsigned long val)
 {
 #ifdef CONFIG_SMP
 	__asm__ __volatile__("swap [%2], %0"
diff --git a/include/asm-sparc/traps.h b/include/asm-sparc/traps.h
index 6690ab956ea6..f62c7f878ee1 100644
--- a/include/asm-sparc/traps.h
+++ b/include/asm-sparc/traps.h
@@ -22,7 +22,7 @@ struct tt_entry {
 /* We set this to _start in system setup. */
 extern struct tt_entry *sparc_ttable;
 
-extern __inline__ unsigned long get_tbr(void)
+static inline unsigned long get_tbr(void)
 {
 	unsigned long tbr;
 
-- 
cgit v1.2.3


From ce0fe7e70a0ad11097a3773e9f3f0de3d859edf0 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 4 Oct 2005 17:43:06 +0100
Subject: [PATCH] bfs endianness annotations

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/bfs/dir.c           |  2 +-
 fs/bfs/inode.c         |  2 +-
 include/linux/bfs_fs.h | 42 +++++++++++++++++++++---------------------
 3 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index e240c335eb23..5af928fa0449 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -108,7 +108,7 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode,
 	inode->i_mapping->a_ops = &bfs_aops;
 	inode->i_mode = mode;
 	inode->i_ino = ino;
-	BFS_I(inode)->i_dsk_ino = cpu_to_le16(ino);
+	BFS_I(inode)->i_dsk_ino = ino;
 	BFS_I(inode)->i_sblock = 0;
 	BFS_I(inode)->i_eblock = 0;
 	insert_inode_hash(inode);
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index c7b39aa279d7..868af0f224fb 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -357,7 +357,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
 	}
 
 	info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */
-	info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 -  cpu_to_le32(bfs_sb->s_start))>>BFS_BSIZE_BITS;
+	info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 -  le32_to_cpu(bfs_sb->s_start))>>BFS_BSIZE_BITS;
 	info->si_freei = 0;
 	info->si_lf_eblk = 0;
 	info->si_lf_sblk = 0;
diff --git a/include/linux/bfs_fs.h b/include/linux/bfs_fs.h
index c1237aa92e38..8ed6dfdcd783 100644
--- a/include/linux/bfs_fs.h
+++ b/include/linux/bfs_fs.h
@@ -20,19 +20,19 @@
 
 /* BFS inode layout on disk */
 struct bfs_inode {
-	__u16 i_ino;
+	__le16 i_ino;
 	__u16 i_unused;
-	__u32 i_sblock;
-	__u32 i_eblock;
-	__u32 i_eoffset;
-	__u32 i_vtype;
-	__u32 i_mode;
-	__s32 i_uid;
-	__s32 i_gid;
-	__u32 i_nlink;
-	__u32 i_atime;
-	__u32 i_mtime;
-	__u32 i_ctime;
+	__le32 i_sblock;
+	__le32 i_eblock;
+	__le32 i_eoffset;
+	__le32 i_vtype;
+	__le32 i_mode;
+	__le32 i_uid;
+	__le32 i_gid;
+	__le32 i_nlink;
+	__le32 i_atime;
+	__le32 i_mtime;
+	__le32 i_ctime;
 	__u32 i_padding[4];
 };
 
@@ -41,17 +41,17 @@ struct bfs_inode {
 #define BFS_DIRS_PER_BLOCK	32
 
 struct bfs_dirent {
-	__u16 ino;
+	__le16 ino;
 	char name[BFS_NAMELEN];
 };
 
 /* BFS superblock layout on disk */
 struct bfs_super_block {
-	__u32 s_magic;
-	__u32 s_start;
-	__u32 s_end;
-	__s32 s_from;
-	__s32 s_to;
+	__le32 s_magic;
+	__le32 s_start;
+	__le32 s_end;
+	__le32 s_from;
+	__le32 s_to;
 	__s32 s_bfrom;
 	__s32 s_bto;
 	char  s_fsname[6];
@@ -66,15 +66,15 @@ struct bfs_super_block {
 #define BFS_INO2OFF(ino) \
 	((__u32)(((ino) - BFS_ROOT_INO) * sizeof(struct bfs_inode)) + BFS_BSIZE)
 #define BFS_NZFILESIZE(ip) \
-        ((cpu_to_le32((ip)->i_eoffset) + 1) -  cpu_to_le32((ip)->i_sblock) * BFS_BSIZE)
+        ((le32_to_cpu((ip)->i_eoffset) + 1) -  le32_to_cpu((ip)->i_sblock) * BFS_BSIZE)
 
 #define BFS_FILESIZE(ip) \
         ((ip)->i_sblock == 0 ? 0 : BFS_NZFILESIZE(ip))
 
 #define BFS_FILEBLOCKS(ip) \
-        ((ip)->i_sblock == 0 ? 0 : (cpu_to_le32((ip)->i_eblock) + 1) -  cpu_to_le32((ip)->i_sblock))
+        ((ip)->i_sblock == 0 ? 0 : (le32_to_cpu((ip)->i_eblock) + 1) -  le32_to_cpu((ip)->i_sblock))
 #define BFS_UNCLEAN(bfs_sb, sb)	\
-	((cpu_to_le32(bfs_sb->s_from) != -1) && (cpu_to_le32(bfs_sb->s_to) != -1) && !(sb->s_flags & MS_RDONLY))
+	((le32_to_cpu(bfs_sb->s_from) != -1) && (le32_to_cpu(bfs_sb->s_to) != -1) && !(sb->s_flags & MS_RDONLY))
 
 
 #endif	/* _LINUX_BFS_FS_H */
-- 
cgit v1.2.3


From fad1c45c939bb246a488be1fa06f539e85b80545 Mon Sep 17 00:00:00 2001
From: Allan Graves <allan.graves@oracle.com>
Date: Tue, 4 Oct 2005 14:53:52 -0400
Subject: [PATCH] uml: Fix sysrq-r support for skas mode

The old code had the IP and SP coming from the registers in the thread
struct, which are completely wrong since those are the userspace
registers.  This fixes that by pulling the correct values from the
jmp_buf in which the kernel state of each thread is stored.

Signed-off-by: Allan Graves <allan.graves@oracle.com>
Signed-off-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/um/include/registers.h             | 12 +-----------
 arch/um/include/sysdep-x86_64/ptrace.h  |  4 ----
 arch/um/kernel/sysrq.c                  |  8 +-------
 arch/um/os-Linux/sys-i386/registers.c   | 19 +++++++++----------
 arch/um/os-Linux/sys-x86_64/registers.c | 19 +++++++++----------
 arch/um/sys-i386/sysrq.c                | 13 +------------
 include/asm-um/processor-generic.h      | 23 ++++++++++-------------
 include/asm-um/processor-i386.h         | 15 ++++-----------
 include/asm-um/processor-x86_64.h       | 14 +++-----------
 9 files changed, 38 insertions(+), 89 deletions(-)

(limited to 'include')

diff --git a/arch/um/include/registers.h b/arch/um/include/registers.h
index 0a35e6d0baa0..4892e5fcef07 100644
--- a/arch/um/include/registers.h
+++ b/arch/um/include/registers.h
@@ -15,16 +15,6 @@ extern void save_registers(int pid, union uml_pt_regs *regs);
 extern void restore_registers(int pid, union uml_pt_regs *regs);
 extern void init_registers(int pid);
 extern void get_safe_registers(unsigned long * regs);
+extern void get_thread_regs(union uml_pt_regs *uml_regs, void *buffer);
 
 #endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/include/sysdep-x86_64/ptrace.h b/arch/um/include/sysdep-x86_64/ptrace.h
index 331aa2d1f3f5..8f0656766c21 100644
--- a/arch/um/include/sysdep-x86_64/ptrace.h
+++ b/arch/um/include/sysdep-x86_64/ptrace.h
@@ -218,10 +218,6 @@ struct syscall_args {
                 case RBP: UPT_RBP(regs) = __upt_val; break; \
                 case ORIG_RAX: UPT_ORIG_RAX(regs) = __upt_val; break; \
                 case CS: UPT_CS(regs) = __upt_val; break; \
-                case DS: UPT_DS(regs) = __upt_val; break; \
-                case ES: UPT_ES(regs) = __upt_val; break; \
-                case FS: UPT_FS(regs) = __upt_val; break; \
-                case GS: UPT_GS(regs) = __upt_val; break; \
                 case EFLAGS: UPT_EFLAGS(regs) = __upt_val; break; \
                 default :  \
                         panic("Bad register in UPT_SET : %d\n", reg);  \
diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
index f80850091e79..b331e970002f 100644
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@@ -62,13 +62,7 @@ void show_stack(struct task_struct *task, unsigned long *esp)
 
 	if (esp == NULL) {
 		if (task != current && task != NULL) {
-			/* XXX: Isn't this bogus? I.e. isn't this the
-			 * *userspace* stack of this task? If not so, use this
-			 * even when task == current (as in i386).
-			 */
 			esp = (unsigned long *) KSTK_ESP(task);
-			/* Which one? No actual difference - just coding style.*/
-			//esp = (unsigned long *) PT_REGS_IP(&task->thread.regs);
 		} else {
 			esp = (unsigned long *) &esp;
 		}
@@ -84,5 +78,5 @@ void show_stack(struct task_struct *task, unsigned long *esp)
 	}
 
 	printk("Call Trace: \n");
-	show_trace(current, esp);
+	show_trace(task, esp);
 }
diff --git a/arch/um/os-Linux/sys-i386/registers.c b/arch/um/os-Linux/sys-i386/registers.c
index 3125d320722c..aee4812333c6 100644
--- a/arch/um/os-Linux/sys-i386/registers.c
+++ b/arch/um/os-Linux/sys-i386/registers.c
@@ -5,6 +5,7 @@
 
 #include <errno.h>
 #include <string.h>
+#include <setjmp.h>
 #include "sysdep/ptrace_user.h"
 #include "sysdep/ptrace.h"
 #include "uml-config.h"
@@ -126,13 +127,11 @@ void get_safe_registers(unsigned long *regs)
 	memcpy(regs, exec_regs, HOST_FRAME_SIZE * sizeof(unsigned long));
 }
 
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
+void get_thread_regs(union uml_pt_regs *uml_regs, void *buffer)
+{
+	struct __jmp_buf_tag *jmpbuf = buffer;
+
+	UPT_SET(uml_regs, EIP, jmpbuf->__jmpbuf[JB_PC]);
+	UPT_SET(uml_regs, UESP, jmpbuf->__jmpbuf[JB_SP]);
+	UPT_SET(uml_regs, EBP, jmpbuf->__jmpbuf[JB_BP]);
+}
diff --git a/arch/um/os-Linux/sys-x86_64/registers.c b/arch/um/os-Linux/sys-x86_64/registers.c
index 44438d15c3d6..4b638dfb52b0 100644
--- a/arch/um/os-Linux/sys-x86_64/registers.c
+++ b/arch/um/os-Linux/sys-x86_64/registers.c
@@ -5,6 +5,7 @@
 
 #include <errno.h>
 #include <string.h>
+#include <setjmp.h>
 #include "ptrace_user.h"
 #include "uml-config.h"
 #include "skas_ptregs.h"
@@ -74,13 +75,11 @@ void get_safe_registers(unsigned long *regs)
 	memcpy(regs, exec_regs, HOST_FRAME_SIZE * sizeof(unsigned long));
 }
 
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
+void get_thread_regs(union uml_pt_regs *uml_regs, void *buffer)
+{
+	struct __jmp_buf_tag *jmpbuf = buffer;
+
+	UPT_SET(uml_regs, RIP, jmpbuf->__jmpbuf[JB_PC]);
+	UPT_SET(uml_regs, RSP, jmpbuf->__jmpbuf[JB_RSP]);
+	UPT_SET(uml_regs, RBP, jmpbuf->__jmpbuf[JB_RBP]);
+}
diff --git a/arch/um/sys-i386/sysrq.c b/arch/um/sys-i386/sysrq.c
index e3706d15c4f5..d5244f070539 100644
--- a/arch/um/sys-i386/sysrq.c
+++ b/arch/um/sys-i386/sysrq.c
@@ -88,9 +88,7 @@ void show_trace(struct task_struct* task, unsigned long * stack)
 		task = current;
 
 	if (task != current) {
-		//ebp = (unsigned long) KSTK_EBP(task);
-		/* Which one? No actual difference - just coding style.*/
-		ebp = (unsigned long) PT_REGS_EBP(&task->thread.regs);
+		ebp = (unsigned long) KSTK_EBP(task);
 	} else {
 		asm ("movl %%ebp, %0" : "=r" (ebp) : );
 	}
@@ -99,15 +97,6 @@ void show_trace(struct task_struct* task, unsigned long * stack)
 		((unsigned long)stack & (~(THREAD_SIZE - 1)));
 	print_context_stack(context, stack, ebp);
 
-	/*while (((long) stack & (THREAD_SIZE-1)) != 0) {
-		addr = *stack;
-		if (__kernel_text_address(addr)) {
-			printk("%08lx:	[<%08lx>]", (unsigned long) stack, addr);
-			print_symbol(" %s", addr);
-			printk("\n");
-		}
-		stack++;
-	}*/
 	printk("\n");
 }
 
diff --git a/include/asm-um/processor-generic.h b/include/asm-um/processor-generic.h
index 2d242360c3d6..075771c371f6 100644
--- a/include/asm-um/processor-generic.h
+++ b/include/asm-um/processor-generic.h
@@ -13,6 +13,7 @@ struct task_struct;
 #include "linux/config.h"
 #include "asm/ptrace.h"
 #include "choose-mode.h"
+#include "registers.h"
 
 struct mm_struct;
 
@@ -136,19 +137,15 @@ extern struct cpuinfo_um cpu_data[];
 #define current_cpu_data boot_cpu_data
 #endif
 
-#define KSTK_EIP(tsk) (PT_REGS_IP(&tsk->thread.regs))
-#define KSTK_ESP(tsk) (PT_REGS_SP(&tsk->thread.regs))
-#define get_wchan(p) (0)
 
+#ifdef CONFIG_MODE_SKAS
+#define KSTK_REG(tsk, reg) \
+	({ union uml_pt_regs regs; \
+	   get_thread_regs(&regs, tsk->thread.mode.skas.switch_buf); \
+	   UPT_REG(&regs, reg); })
+#else
+#define KSTK_REG(tsk, reg) (0xbadbabe)
 #endif
+#define get_wchan(p) (0)
 
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
+#endif
diff --git a/include/asm-um/processor-i386.h b/include/asm-um/processor-i386.h
index 431bad3ae9d7..4108a579eb92 100644
--- a/include/asm-um/processor-i386.h
+++ b/include/asm-um/processor-i386.h
@@ -43,17 +43,10 @@ static inline void rep_nop(void)
 #define ARCH_IS_STACKGROW(address) \
        (address + 32 >= UPT_SP(&current->thread.regs.regs))
 
+#define KSTK_EIP(tsk) KSTK_REG(tsk, EIP)
+#define KSTK_ESP(tsk) KSTK_REG(tsk, UESP)
+#define KSTK_EBP(tsk) KSTK_REG(tsk, EBP)
+
 #include "asm/processor-generic.h"
 
 #endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/include/asm-um/processor-x86_64.h b/include/asm-um/processor-x86_64.h
index 0beb9a42ae05..e1e1255a1d36 100644
--- a/include/asm-um/processor-x86_64.h
+++ b/include/asm-um/processor-x86_64.h
@@ -36,17 +36,9 @@ extern inline void rep_nop(void)
 #define ARCH_IS_STACKGROW(address) \
         (address + 128 >= UPT_SP(&current->thread.regs.regs))
 
+#define KSTK_EIP(tsk) KSTK_REG(tsk, RIP)
+#define KSTK_ESP(tsk) KSTK_REG(tsk, RSP)
+
 #include "asm/processor-generic.h"
 
 #endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
-- 
cgit v1.2.3


From d78795b6930956fb66238d4d26242482d4a31470 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <sascha@saschahauer.de>
Date: Tue, 4 Oct 2005 23:17:51 +0100
Subject: [ARM] 2949/1: Hynix h720x Run mode

Patch from Sascha Hauer

After coming out of idle mode the h720x goes into slow mode. Switch
it back to run mode.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/arch-h720x/system.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-arm/arch-h720x/system.h b/include/asm-arm/arch-h720x/system.h
index 0b025e227ec2..09eda84592ff 100644
--- a/include/asm-arm/arch-h720x/system.h
+++ b/include/asm-arm/arch-h720x/system.h
@@ -17,9 +17,11 @@
 static void arch_idle(void)
 {
 	CPU_REG (PMU_BASE, PMU_MODE) = PMU_MODE_IDLE;
-	__asm__ __volatile__(
-	"mov	r0, r0\n\t"
-	"mov	r0, r0");
+	nop();
+	nop();
+	CPU_REG (PMU_BASE, PMU_MODE) = PMU_MODE_RUN;
+	nop();
+	nop();
 }
 
 
-- 
cgit v1.2.3


From 0a5b0aa8a331f4346b4b02bc653107304a6abdc5 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <sascha@saschahauer.de>
Date: Tue, 4 Oct 2005 23:17:52 +0100
Subject: [ARM] 2950/1: i.MX gpio setup function

Patch from Sascha Hauer

Current implementation of imx_gpio_mode does not allow to
configure all alternate routing possibilities of the i.MX. With
this patch every bit in the gpio setup registers has a
corresponding bit in the gpio_mode parameter, so every routing
should be possible now.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-imx/generic.c         | 23 ++++++++++---------
 arch/arm/mach-imx/mx1ads.c          |  2 +-
 include/asm-arm/arch-imx/imx-regs.h | 46 +++++++++++++++++++++++++++----------
 3 files changed, 47 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-imx/generic.c b/arch/arm/mach-imx/generic.c
index 41e5849ae8da..f8a742bb2d5b 100644
--- a/arch/arm/mach-imx/generic.c
+++ b/arch/arm/mach-imx/generic.c
@@ -28,14 +28,15 @@
 #include <linux/module.h>
 #include <asm/arch/imxfb.h>
 #include <asm/hardware.h>
+#include <asm/arch/imx-regs.h>
 
 #include <asm/mach/map.h>
 
 void imx_gpio_mode(int gpio_mode)
 {
 	unsigned int pin = gpio_mode & GPIO_PIN_MASK;
-	unsigned int port = (gpio_mode & GPIO_PORT_MASK) >> 5;
-	unsigned int ocr = (gpio_mode & GPIO_OCR_MASK) >> 10;
+	unsigned int port = (gpio_mode & GPIO_PORT_MASK) >> GPIO_PORT_SHIFT;
+	unsigned int ocr = (gpio_mode & GPIO_OCR_MASK) >> GPIO_OCR_SHIFT;
 	unsigned int tmp;
 
 	/* Pullup enable */
@@ -57,7 +58,7 @@ void imx_gpio_mode(int gpio_mode)
 		GPR(port) &= ~(1<<pin);
 
 	/* use as gpio? */
-	if( ocr == 3 )
+	if(gpio_mode &  GPIO_GIUS)
 		GIUS(port) |= (1<<pin);
 	else
 		GIUS(port) &= ~(1<<pin);
@@ -72,20 +73,20 @@ void imx_gpio_mode(int gpio_mode)
 		tmp |= (ocr << (pin*2));
 		OCR1(port) = tmp;
 
-		if( gpio_mode &	GPIO_AOUT )
-			ICONFA1(port) &= ~( 3<<(pin*2));
-		if( gpio_mode &	GPIO_BOUT )
-			ICONFB1(port) &= ~( 3<<(pin*2));
+		ICONFA1(port) &= ~( 3<<(pin*2));
+		ICONFA1(port) |= ((gpio_mode >> GPIO_AOUT_SHIFT) & 3) << (pin * 2);
+		ICONFB1(port) &= ~( 3<<(pin*2));
+		ICONFB1(port) |= ((gpio_mode >> GPIO_BOUT_SHIFT) & 3) << (pin * 2);
 	} else {
 		tmp = OCR2(port);
 		tmp &= ~( 3<<((pin-16)*2));
 		tmp |= (ocr << ((pin-16)*2));
 		OCR2(port) = tmp;
 
-		if( gpio_mode &	GPIO_AOUT )
-			ICONFA2(port) &= ~( 3<<((pin-16)*2));
-		if( gpio_mode &	GPIO_BOUT )
-			ICONFB2(port) &= ~( 3<<((pin-16)*2));
+		ICONFA2(port) &= ~( 3<<((pin-16)*2));
+		ICONFA2(port) |= ((gpio_mode >> GPIO_AOUT_SHIFT) & 3) << ((pin-16) * 2);
+		ICONFB2(port) &= ~( 3<<((pin-16)*2));
+		ICONFB2(port) |= ((gpio_mode >> GPIO_BOUT_SHIFT) & 3) << ((pin-16) * 2);
 	}
 }
 
diff --git a/arch/arm/mach-imx/mx1ads.c b/arch/arm/mach-imx/mx1ads.c
index 5d25434d332c..a7511ddfe364 100644
--- a/arch/arm/mach-imx/mx1ads.c
+++ b/arch/arm/mach-imx/mx1ads.c
@@ -55,7 +55,7 @@ static void __init
 mx1ads_init(void)
 {
 #ifdef CONFIG_LEDS
-	imx_gpio_mode(GPIO_PORTA | GPIO_OUT | GPIO_GPIO | 2);
+	imx_gpio_mode(GPIO_PORTA | GPIO_OUT | 2);
 #endif
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 }
diff --git a/include/asm-arm/arch-imx/imx-regs.h b/include/asm-arm/arch-imx/imx-regs.h
index 93b840e8fa60..229f7008d74f 100644
--- a/include/asm-arm/arch-imx/imx-regs.h
+++ b/include/asm-arm/arch-imx/imx-regs.h
@@ -76,6 +76,7 @@
 #define GPIO_PIN_MASK 0x1f
 #define GPIO_PORT_MASK (0x3 << 5)
 
+#define GPIO_PORT_SHIFT 5
 #define GPIO_PORTA (0<<5)
 #define GPIO_PORTB (1<<5)
 #define GPIO_PORTC (2<<5)
@@ -88,24 +89,37 @@
 #define GPIO_PF    (0<<9)
 #define GPIO_AF    (1<<9)
 
+#define GPIO_OCR_SHIFT 10
 #define GPIO_OCR_MASK (3<<10)
 #define GPIO_AIN   (0<<10)
 #define GPIO_BIN   (1<<10)
 #define GPIO_CIN   (2<<10)
-#define GPIO_GPIO  (3<<10)
+#define GPIO_DR    (3<<10)
 
-#define GPIO_AOUT  (1<<12)
-#define GPIO_BOUT  (1<<13)
+#define GPIO_AOUT_SHIFT 12
+#define GPIO_AOUT_MASK (3<<12)
+#define GPIO_AOUT     (0<<12)
+#define GPIO_AOUT_ISR (1<<12)
+#define GPIO_AOUT_0   (2<<12)
+#define GPIO_AOUT_1   (3<<12)
+
+#define GPIO_BOUT_SHIFT 14
+#define GPIO_BOUT_MASK (3<<14)
+#define GPIO_BOUT      (0<<14)
+#define GPIO_BOUT_ISR  (1<<14)
+#define GPIO_BOUT_0    (2<<14)
+#define GPIO_BOUT_1    (3<<14)
+
+#define GPIO_GIUS      (1<<16)
 
 /* assignements for GPIO alternate/primary functions */
 
 /* FIXME: This list is not completed. The correct directions are
  * missing on some (many) pins
  */
-#define PA0_PF_A24           ( GPIO_PORTA | GPIO_PF | 0 )
-#define PA0_AIN_SPI2_CLK     ( GPIO_PORTA | GPIO_OUT | GPIO_AIN | 0 )
+#define PA0_AIN_SPI2_CLK     ( GPIO_GIUS | GPIO_PORTA | GPIO_OUT | 0 )
 #define PA0_AF_ETMTRACESYNC  ( GPIO_PORTA | GPIO_AF | 0 )
-#define PA1_AOUT_SPI2_RXD    ( GPIO_PORTA | GPIO_IN | GPIO_AOUT | 1 )
+#define PA1_AOUT_SPI2_RXD    ( GPIO_GIUS | GPIO_PORTA | GPIO_IN | 1 )
 #define PA1_PF_TIN           ( GPIO_PORTA | GPIO_PF | 1 )
 #define PA2_PF_PWM0          ( GPIO_PORTA | GPIO_OUT | GPIO_PF | 2 )
 #define PA3_PF_CSI_MCLK      ( GPIO_PORTA | GPIO_PF | 3 )
@@ -123,7 +137,7 @@
 #define PA15_PF_I2C_SDA      ( GPIO_PORTA | GPIO_OUT | GPIO_PF | 15 )
 #define PA16_PF_I2C_SCL      ( GPIO_PORTA | GPIO_OUT | GPIO_PF | 16 )
 #define PA17_AF_ETMTRACEPKT4 ( GPIO_PORTA | GPIO_AF | 17 )
-#define PA17_AIN_SPI2_SS     ( GPIO_PORTA | GPIO_AIN | 17 )
+#define PA17_AIN_SPI2_SS     ( GPIO_GIUS | GPIO_PORTA | GPIO_OUT | 17 )
 #define PA18_AF_ETMTRACEPKT5 ( GPIO_PORTA | GPIO_AF | 18 )
 #define PA19_AF_ETMTRACEPKT6 ( GPIO_PORTA | GPIO_AF | 19 )
 #define PA20_AF_ETMTRACEPKT7 ( GPIO_PORTA | GPIO_AF | 20 )
@@ -191,19 +205,27 @@
 #define PC15_PF_SPI1_SS      ( GPIO_PORTC | GPIO_PF | 15 )
 #define PC16_PF_SPI1_MISO    ( GPIO_PORTC | GPIO_PF | 16 )
 #define PC17_PF_SPI1_MOSI    ( GPIO_PORTC | GPIO_PF | 17 )
+#define PC24_BIN_UART3_RI    ( GPIO_GIUS | GPIO_PORTC | GPIO_OUT | GPIO_BIN | 24 )
+#define PC25_BIN_UART3_DSR   ( GPIO_GIUS | GPIO_PORTC | GPIO_OUT | GPIO_BIN | 25 )
+#define PC26_AOUT_UART3_DTR  ( GPIO_GIUS | GPIO_PORTC | GPIO_IN | 26 )
+#define PC27_BIN_UART3_DCD   ( GPIO_GIUS | GPIO_PORTC | GPIO_OUT | GPIO_BIN | 27 )
+#define PC28_BIN_UART3_CTS   ( GPIO_GIUS | GPIO_PORTC | GPIO_OUT | GPIO_BIN | 28 )
+#define PC29_AOUT_UART3_RTS  ( GPIO_GIUS | GPIO_PORTC | GPIO_IN | 29 )
+#define PC30_BIN_UART3_TX    ( GPIO_GIUS | GPIO_PORTC | GPIO_BIN | 30 )
+#define PC31_AOUT_UART3_RX   ( GPIO_GIUS | GPIO_PORTC | GPIO_IN | 31)
 #define PD6_PF_LSCLK         ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 6 )
 #define PD7_PF_REV           ( GPIO_PORTD | GPIO_PF | 7 )
 #define PD7_AF_UART2_DTR     ( GPIO_PORTD | GPIO_IN | GPIO_AF | 7 )
-#define PD7_AIN_SPI2_SCLK    ( GPIO_PORTD | GPIO_AIN | 7 )
+#define PD7_AIN_SPI2_SCLK    ( GPIO_GIUS | GPIO_PORTD | GPIO_AIN | 7 )
 #define PD8_PF_CLS           ( GPIO_PORTD | GPIO_PF | 8 )
 #define PD8_AF_UART2_DCD     ( GPIO_PORTD | GPIO_OUT | GPIO_AF | 8 )
-#define PD8_AIN_SPI2_SS      ( GPIO_PORTD | GPIO_AIN | 8 )
+#define PD8_AIN_SPI2_SS      ( GPIO_GIUS | GPIO_PORTD | GPIO_AIN | 8 )
 #define PD9_PF_PS            ( GPIO_PORTD | GPIO_PF | 9 )
 #define PD9_AF_UART2_RI      ( GPIO_PORTD | GPIO_OUT | GPIO_AF | 9 )
-#define PD9_AOUT_SPI2_RXD    ( GPIO_PORTD | GPIO_IN | GPIO_AOUT | 9 )
+#define PD9_AOUT_SPI2_RXD    ( GPIO_GIUS | GPIO_PORTD | GPIO_IN | 9 )
 #define PD10_PF_SPL_SPR      ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 10 )
 #define PD10_AF_UART2_DSR    ( GPIO_PORTD | GPIO_OUT | GPIO_AF | 10 )
-#define PD10_AIN_SPI2_TXD    ( GPIO_PORTD | GPIO_OUT | GPIO_AIN | 10 )
+#define PD10_AIN_SPI2_TXD    ( GPIO_GIUS | GPIO_PORTD | GPIO_OUT | 10 )
 #define PD11_PF_CONTRAST     ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 11 )
 #define PD12_PF_ACD_OE       ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 12 )
 #define PD13_PF_LP_HSYNC     ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 13 )
@@ -225,7 +247,7 @@
 #define PD29_PF_LD14         ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 29 )
 #define PD30_PF_LD15         ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 30 )
 #define PD31_PF_TMR2OUT      ( GPIO_PORTD | GPIO_PF | 31 )
-#define PD31_BIN_SPI2_TXD    ( GPIO_PORTD | GPIO_BIN | 31 )
+#define PD31_BIN_SPI2_TXD    ( GPIO_GIUS | GPIO_PORTD | GPIO_BIN | 31 )
 
 /*
  * PWM controller
-- 
cgit v1.2.3


From 6d2553612fa329979e6423a5f2410fd7be5aa902 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 4 Oct 2005 15:55:51 -0700
Subject: [INET]: Shrink struct inet_ehash_bucket on 32 bits UP

No need to align struct inet_ehash_bucket on a 8 bytes boundary.

On 32 bits Uniprocessor, that's a waste of 4 bytes per struct (50 %)

On other platforms, the attribute is useless, natual alignement is already 8.

platform     | Size before | Size after patch
-------------+-------------+------------------
32 bits, UP  |         8   |     4
32 bits, SMP |         8   |     8
64 bits, UP  |         8   |     8
64 bits, SMP |        16   |    16

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_hashtables.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 35f49e65e295..f50f95968340 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -40,7 +40,7 @@
 struct inet_ehash_bucket {
 	rwlock_t	  lock;
 	struct hlist_head chain;
-} __attribute__((__aligned__(8)));
+};
 
 /* There are a few simple rules, which allow for local port reuse by
  * an application.  In essence:
-- 
cgit v1.2.3


From ce12467d44d7394731ec9e91e032d50b04e502f6 Mon Sep 17 00:00:00 2001
From: Deepak Saxena <dsaxena@plexity.net>
Date: Tue, 4 Oct 2005 16:32:38 -0700
Subject: [PATCH] Fix broken IXP4xx GPIO macro

Macro ended up backwards during one of cleanups. Found by Alessandro Zummo.

Signed-off-by: Deepak Saxena <dsaxena@plexity.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-arm/arch-ixp4xx/platform.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-arm/arch-ixp4xx/platform.h b/include/asm-arm/arch-ixp4xx/platform.h
index d13ee7f78c70..f14ed63590c3 100644
--- a/include/asm-arm/arch-ixp4xx/platform.h
+++ b/include/asm-arm/arch-ixp4xx/platform.h
@@ -93,7 +93,7 @@ extern struct pci_bus *ixp4xx_scan_bus(int nr, struct pci_sys_data *sys);
 
 static inline void gpio_line_config(u8 line, u32 direction)
 {
-	if (direction == IXP4XX_GPIO_OUT)
+	if (direction == IXP4XX_GPIO_IN)
 		*IXP4XX_GPIO_GPOER |= (1 << line);
 	else
 		*IXP4XX_GPIO_GPOER &= ~(1 << line);
-- 
cgit v1.2.3


From 7b5b3f3d826ea87c224c66de9c95c09e7f110ecd Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Tue, 4 Oct 2005 22:38:44 -0700
Subject: [ATM]: fix sparse gfp nocast warnings

Fix implicit nocast warnings in atm code:
net/atm/atm_misc.c:35:44: warning: implicit cast to nocast type
drivers/atm/fore200e.c:183:33: warning: implicit cast to nocast type

Also use kzalloc() instead of kmalloc().

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/fore200e.c | 10 ++++------
 include/linux/atmdev.h |  2 +-
 net/atm/atm_misc.c     |  2 +-
 3 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index 2bf723a7b6e6..6f1a83c9d9e0 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -178,14 +178,12 @@ fore200e_irq_itoa(int irq)
 
 
 static void*
-fore200e_kmalloc(int size, int flags)
+fore200e_kmalloc(int size, unsigned int __nocast flags)
 {
-    void* chunk = kmalloc(size, flags);
+    void *chunk = kzalloc(size, flags);
 
-    if (chunk)
-	memset(chunk, 0x00, size);
-    else
-	printk(FORE200E "kmalloc() failed, requested size = %d, flags = 0x%x\n", size, flags);
+    if (!chunk)
+	printk(FORE200E "kmalloc() failed, requested size = %d, flags = 0x%x\n",			size, flags);
     
     return chunk;
 }
diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index 9f374cfa1b05..f1fd849e5535 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -457,7 +457,7 @@ static inline void atm_dev_put(struct atm_dev *dev)
 
 int atm_charge(struct atm_vcc *vcc,int truesize);
 struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc,int pdu_size,
-    int gfp_flags);
+    unsigned int __nocast gfp_flags);
 int atm_pcr_goal(struct atm_trafprm *tp);
 
 void vcc_release_async(struct atm_vcc *vcc, int reply);
diff --git a/net/atm/atm_misc.c b/net/atm/atm_misc.c
index b2113c3454ae..71abc99ec815 100644
--- a/net/atm/atm_misc.c
+++ b/net/atm/atm_misc.c
@@ -25,7 +25,7 @@ int atm_charge(struct atm_vcc *vcc,int truesize)
 
 
 struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc,int pdu_size,
-    int gfp_flags)
+    unsigned int __nocast gfp_flags)
 {
 	struct sock *sk = sk_atm(vcc);
 	int guess = atm_guess_pdu2truesize(pdu_size);
-- 
cgit v1.2.3


From 17b698856328a42d5874ac87640e2cd84a824eef Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Tue, 4 Oct 2005 22:41:16 -0700
Subject: [CONNECTOR]: fix sparse gfp nocast warnings

Fix implicit nocast warnings in connector code:
drivers/connector/connector.c:102:24: warning: implicit cast to nocast type
drivers/connector/connector.c:114:45: warning: implicit cast to nocast type

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/connector/connector.c | 3 ++-
 include/linux/connector.h     | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index bb0b3a8de14b..1422285d537c 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -69,7 +69,8 @@ int cn_already_initialized = 0;
  * a new message.
  *
  */
-int cn_netlink_send(struct cn_msg *msg, u32 __group, int gfp_mask)
+int cn_netlink_send(struct cn_msg *msg, u32 __group,
+		    unsigned int __nocast gfp_mask)
 {
 	struct cn_callback_entry *__cbq;
 	unsigned int size;
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 86d4b0a81713..96582c9911ac 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -149,7 +149,7 @@ struct cn_dev {
 
 int cn_add_callback(struct cb_id *, char *, void (*callback) (void *));
 void cn_del_callback(struct cb_id *);
-int cn_netlink_send(struct cn_msg *, u32, int);
+int cn_netlink_send(struct cn_msg *, u32, unsigned int __nocast);
 
 int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(void *));
 void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id);
-- 
cgit v1.2.3


From f4a19a56e38442e434b8809915d756469f1e89a2 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Tue, 4 Oct 2005 22:41:48 -0700
Subject: [DECNET]: fix sparse gfp nocast warnings

Fix implicit nocast warnings in decnet code:
net/decnet/af_decnet.c:458:40: warning: implicit cast to nocast type
net/decnet/dn_nsp_out.c:125:35: warning: implicit cast to nocast type
net/decnet/dn_nsp_out.c:219:29: warning: implicit cast to nocast type

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dn_nsp.h    |  8 ++++----
 include/net/dn_route.h  |  2 +-
 net/decnet/af_decnet.c  |  6 ++++--
 net/decnet/dn_nsp_out.c | 23 ++++++++++++++---------
 4 files changed, 23 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/dn_nsp.h b/include/net/dn_nsp.h
index 6bbeafa73e8b..8a0891e2e888 100644
--- a/include/net/dn_nsp.h
+++ b/include/net/dn_nsp.h
@@ -19,9 +19,9 @@ extern void dn_nsp_send_data_ack(struct sock *sk);
 extern void dn_nsp_send_oth_ack(struct sock *sk);
 extern void dn_nsp_delayed_ack(struct sock *sk);
 extern void dn_send_conn_ack(struct sock *sk);
-extern void dn_send_conn_conf(struct sock *sk, int gfp);
+extern void dn_send_conn_conf(struct sock *sk, unsigned int __nocast gfp);
 extern void dn_nsp_send_disc(struct sock *sk, unsigned char type, 
-				unsigned short reason, int gfp);
+			unsigned short reason, unsigned int __nocast gfp);
 extern void dn_nsp_return_disc(struct sk_buff *skb, unsigned char type,
 				unsigned short reason);
 extern void dn_nsp_send_link(struct sock *sk, unsigned char lsflags, char fcval);
@@ -29,14 +29,14 @@ extern void dn_nsp_send_conninit(struct sock *sk, unsigned char flags);
 
 extern void dn_nsp_output(struct sock *sk);
 extern int dn_nsp_check_xmit_queue(struct sock *sk, struct sk_buff *skb, struct sk_buff_head *q, unsigned short acknum);
-extern void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb, int gfp, int oob);
+extern void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb, unsigned int __nocast gfp, int oob);
 extern unsigned long dn_nsp_persist(struct sock *sk);
 extern int dn_nsp_xmit_timeout(struct sock *sk);
 
 extern int dn_nsp_rx(struct sk_buff *);
 extern int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb);
 
-extern struct sk_buff *dn_alloc_skb(struct sock *sk, int size, int pri);
+extern struct sk_buff *dn_alloc_skb(struct sock *sk, int size, unsigned int __nocast pri);
 extern struct sk_buff *dn_alloc_send_skb(struct sock *sk, size_t *size, int noblock, long timeo, int *err);
 
 #define NSP_REASON_OK 0		/* No error */
diff --git a/include/net/dn_route.h b/include/net/dn_route.h
index d084721db198..11fe973cf383 100644
--- a/include/net/dn_route.h
+++ b/include/net/dn_route.h
@@ -15,7 +15,7 @@
     GNU General Public License for more details.
 *******************************************************************************/
 
-extern struct sk_buff *dn_alloc_skb(struct sock *sk, int size, int pri);
+extern struct sk_buff *dn_alloc_skb(struct sock *sk, int size, unsigned int __nocast pri);
 extern int dn_route_output_sock(struct dst_entry **pprt, struct flowi *, struct sock *sk, int flags);
 extern int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb);
 extern int dn_cache_getroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 348f36b529f7..34d4128d56d5 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -452,7 +452,8 @@ static struct proto dn_proto = {
 	.obj_size = sizeof(struct dn_sock),
 };
 
-static struct sock *dn_alloc_sock(struct socket *sock, int gfp)
+static struct sock *dn_alloc_sock(struct socket *sock,
+				  unsigned int __nocast gfp)
 {
 	struct dn_scp *scp;
 	struct sock *sk = sk_alloc(PF_DECnet, gfp, &dn_proto, 1);
@@ -804,7 +805,8 @@ static int dn_auto_bind(struct socket *sock)
 	return rv;
 }
 
-static int dn_confirm_accept(struct sock *sk, long *timeo, int allocation)
+static int dn_confirm_accept(struct sock *sk, long *timeo,
+				unsigned int __nocast allocation)
 {
 	struct dn_scp *scp = DN_SK(sk);
 	DEFINE_WAIT(wait);
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 53633d352868..cd08244aa10c 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -117,7 +117,8 @@ try_again:
  * The eventual aim is for each socket to have a cached header size
  * for its outgoing packets, and to set hdr from this when sk != NULL.
  */
-struct sk_buff *dn_alloc_skb(struct sock *sk, int size, int pri)
+struct sk_buff *dn_alloc_skb(struct sock *sk, int size,
+			     unsigned int __nocast pri)
 {
 	struct sk_buff *skb;
 	int hdr = 64;
@@ -210,7 +211,8 @@ static void dn_nsp_rtt(struct sock *sk, long rtt)
  *
  * Returns: The number of times the packet has been sent previously
  */
-static inline unsigned dn_nsp_clone_and_send(struct sk_buff *skb, int gfp)
+static inline unsigned dn_nsp_clone_and_send(struct sk_buff *skb,
+					     unsigned int __nocast gfp)
 {
 	struct dn_skb_cb *cb = DN_SKB_CB(skb);
 	struct sk_buff *skb2;
@@ -350,7 +352,8 @@ static unsigned short *dn_nsp_mk_data_header(struct sock *sk, struct sk_buff *sk
 	return ptr;
 }
 
-void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb, int gfp, int oth)
+void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb,
+			unsigned int __nocast gfp, int oth)
 {
 	struct dn_scp *scp = DN_SK(sk);
 	struct dn_skb_cb *cb = DN_SKB_CB(skb);
@@ -517,7 +520,7 @@ static int dn_nsp_retrans_conn_conf(struct sock *sk)
 	return 0;
 }
 
-void dn_send_conn_conf(struct sock *sk, int gfp)
+void dn_send_conn_conf(struct sock *sk, unsigned int __nocast gfp)
 {
 	struct dn_scp *scp = DN_SK(sk);
 	struct sk_buff *skb = NULL;
@@ -549,7 +552,8 @@ void dn_send_conn_conf(struct sock *sk, int gfp)
 
 
 static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg, 
-			unsigned short reason, int gfp, struct dst_entry *dst,
+			unsigned short reason, unsigned int __nocast gfp,
+			struct dst_entry *dst,
 			int ddl, unsigned char *dd, __u16 rem, __u16 loc)
 {
 	struct sk_buff *skb = NULL;
@@ -591,7 +595,7 @@ static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg,
 
 
 void dn_nsp_send_disc(struct sock *sk, unsigned char msgflg, 
-			unsigned short reason, int gfp)
+			unsigned short reason, unsigned int __nocast gfp)
 {
 	struct dn_scp *scp = DN_SK(sk);
 	int ddl = 0;
@@ -612,7 +616,7 @@ void dn_nsp_return_disc(struct sk_buff *skb, unsigned char msgflg,
 {
 	struct dn_skb_cb *cb = DN_SKB_CB(skb);
 	int ddl = 0;
-	int gfp = GFP_ATOMIC;
+	unsigned int __nocast gfp = GFP_ATOMIC;
 
 	dn_nsp_do_disc(NULL, msgflg, reason, gfp, skb->dst, ddl, 
 			NULL, cb->src_port, cb->dst_port);
@@ -624,7 +628,7 @@ void dn_nsp_send_link(struct sock *sk, unsigned char lsflags, char fcval)
 	struct dn_scp *scp = DN_SK(sk);
 	struct sk_buff *skb;
 	unsigned char *ptr;
-	int gfp = GFP_ATOMIC;
+	unsigned int __nocast gfp = GFP_ATOMIC;
 
 	if ((skb = dn_alloc_skb(sk, DN_MAX_NSP_DATA_HEADER + 2, gfp)) == NULL)
 		return;
@@ -659,7 +663,8 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
 	unsigned char menuver;
 	struct dn_skb_cb *cb;
 	unsigned char type = 1;
-	int allocation = (msgflg == NSP_CI) ? sk->sk_allocation : GFP_ATOMIC;
+	unsigned int __nocast allocation =
+			(msgflg == NSP_CI) ? sk->sk_allocation : GFP_ATOMIC;
 	struct sk_buff *skb = dn_alloc_skb(sk, 200, allocation);
 
 	if (!skb)
-- 
cgit v1.2.3


From 8eea00a44d9f493869f8d30b72e3ed18475be556 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Tue, 4 Oct 2005 22:42:15 -0700
Subject: [IPVS]: fix sparse gfp nocast warnings

From: Randy Dunlap <rdunlap@xenotime.net>

Fix implicit nocast warnings in ip_vs code:
net/ipv4/ipvs/ip_vs_app.c:631:54: warning: implicit cast to nocast type

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_vs.h       | 2 +-
 net/ipv4/ipvs/ip_vs_app.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 06b4235aa016..ecb2b061f597 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -832,7 +832,7 @@ extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
 
 extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff **pskb);
 extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff **pskb);
-extern int ip_vs_skb_replace(struct sk_buff *skb, int pri,
+extern int ip_vs_skb_replace(struct sk_buff *skb, unsigned int __nocast pri,
 			     char *o_buf, int o_len, char *n_buf, int n_len);
 extern int ip_vs_app_init(void);
 extern void ip_vs_app_cleanup(void);
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 6e092dadb388..b942ff3c8860 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -604,7 +604,7 @@ static struct file_operations ip_vs_app_fops = {
 /*
  *	Replace a segment of data with a new segment
  */
-int ip_vs_skb_replace(struct sk_buff *skb, int pri,
+int ip_vs_skb_replace(struct sk_buff *skb, unsigned int __nocast pri,
 		      char *o_buf, int o_len, char *n_buf, int n_len)
 {
 	struct iphdr *iph;
-- 
cgit v1.2.3


From dd13a285b79ba77416b96ee10f49097f4aaf48c5 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Tue, 4 Oct 2005 22:44:45 -0700
Subject: [RPC]: fix sparse gfp nocast warnings

Fix nocast sparse warnings:
net/rxrpc/call.c:2013:25: warning: implicit cast to nocast type
net/rxrpc/connection.c:538:46: warning: implicit cast to nocast type
net/sunrpc/sched.c:730:36: warning: implicit cast to nocast type
net/sunrpc/sched.c:734:56: warning: implicit cast to nocast type

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/rxrpc/call.h    | 2 +-
 include/rxrpc/message.h | 2 +-
 net/rxrpc/call.c        | 2 +-
 net/rxrpc/connection.c  | 2 +-
 net/sunrpc/sched.c      | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/rxrpc/call.h b/include/rxrpc/call.h
index f48f27e9e0ab..8118731e7d96 100644
--- a/include/rxrpc/call.h
+++ b/include/rxrpc/call.h
@@ -203,7 +203,7 @@ extern int rxrpc_call_write_data(struct rxrpc_call *call,
 				 size_t sioc,
 				 struct kvec *siov,
 				 uint8_t rxhdr_flags,
-				 int alloc_flags,
+				 unsigned int __nocast alloc_flags,
 				 int dup_data,
 				 size_t *size_sent);
 
diff --git a/include/rxrpc/message.h b/include/rxrpc/message.h
index 3a59df6870b2..983d9f9eee1a 100644
--- a/include/rxrpc/message.h
+++ b/include/rxrpc/message.h
@@ -63,7 +63,7 @@ extern int rxrpc_conn_newmsg(struct rxrpc_connection *conn,
 			     uint8_t type,
 			     int count,
 			     struct kvec *diov,
-			     int alloc_flags,
+			     unsigned int __nocast alloc_flags,
 			     struct rxrpc_message **_msg);
 
 extern int rxrpc_conn_sendmsg(struct rxrpc_connection *conn, struct rxrpc_message *msg);
diff --git a/net/rxrpc/call.c b/net/rxrpc/call.c
index 5cfd4cadee42..86f777052633 100644
--- a/net/rxrpc/call.c
+++ b/net/rxrpc/call.c
@@ -1923,7 +1923,7 @@ int rxrpc_call_write_data(struct rxrpc_call *call,
 			  size_t sioc,
 			  struct kvec *siov,
 			  u8 rxhdr_flags,
-			  int alloc_flags,
+			  unsigned int __nocast alloc_flags,
 			  int dup_data,
 			  size_t *size_sent)
 {
diff --git a/net/rxrpc/connection.c b/net/rxrpc/connection.c
index 61463c74f8cc..be4b2be58956 100644
--- a/net/rxrpc/connection.c
+++ b/net/rxrpc/connection.c
@@ -522,7 +522,7 @@ int rxrpc_conn_newmsg(struct rxrpc_connection *conn,
 		      uint8_t type,
 		      int dcount,
 		      struct kvec diov[],
-		      int alloc_flags,
+		      unsigned int __nocast alloc_flags,
 		      struct rxrpc_message **_msg)
 {
 	struct rxrpc_message *msg;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index f3104035e35d..ade730eaf401 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -719,7 +719,7 @@ static void rpc_async_schedule(void *arg)
 void *
 rpc_malloc(struct rpc_task *task, size_t size)
 {
-	int	gfp;
+	unsigned int __nocast	gfp;
 
 	if (task->tk_flags & RPC_TASK_SWAPPER)
 		gfp = GFP_ATOMIC;
-- 
cgit v1.2.3


From 3d2aef668920e8d93b77f145f8f647f62abe75db Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Tue, 4 Oct 2005 22:45:14 -0700
Subject: [TEXTSEARCH]: fix sparse gfp nocast warnings

Fix nocast sparse warnings:
include/linux/textsearch.h:165:57: warning: implicit cast to nocast type

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/textsearch.h | 3 ++-
 lib/ts_bm.c                | 2 +-
 lib/ts_fsm.c               | 2 +-
 lib/ts_kmp.c               | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/textsearch.h b/include/linux/textsearch.h
index 941f45ac117a..1a4990e448e9 100644
--- a/include/linux/textsearch.h
+++ b/include/linux/textsearch.h
@@ -158,7 +158,8 @@ extern unsigned int textsearch_find_continuous(struct ts_config *,
 #define TS_PRIV_ALIGNTO	8
 #define TS_PRIV_ALIGN(len) (((len) + TS_PRIV_ALIGNTO-1) & ~(TS_PRIV_ALIGNTO-1))
 
-static inline struct ts_config *alloc_ts_config(size_t payload, int gfp_mask)
+static inline struct ts_config *alloc_ts_config(size_t payload,
+						unsigned int __nocast gfp_mask)
 {
 	struct ts_config *conf;
 
diff --git a/lib/ts_bm.c b/lib/ts_bm.c
index 2cc79112ecc3..1b61fceef777 100644
--- a/lib/ts_bm.c
+++ b/lib/ts_bm.c
@@ -127,7 +127,7 @@ static void compute_prefix_tbl(struct ts_bm *bm, const u8 *pattern,
 }
 
 static struct ts_config *bm_init(const void *pattern, unsigned int len,
-				 int gfp_mask)
+				 unsigned int __nocast gfp_mask)
 {
 	struct ts_config *conf;
 	struct ts_bm *bm;
diff --git a/lib/ts_fsm.c b/lib/ts_fsm.c
index d27c0a072940..ef9779e00506 100644
--- a/lib/ts_fsm.c
+++ b/lib/ts_fsm.c
@@ -258,7 +258,7 @@ found_match:
 }
 
 static struct ts_config *fsm_init(const void *pattern, unsigned int len,
-				     int gfp_mask)
+				     unsigned int __nocast gfp_mask)
 {
 	int i, err = -EINVAL;
 	struct ts_config *conf;
diff --git a/lib/ts_kmp.c b/lib/ts_kmp.c
index 73266b975585..e45f0f0c2379 100644
--- a/lib/ts_kmp.c
+++ b/lib/ts_kmp.c
@@ -87,7 +87,7 @@ static inline void compute_prefix_tbl(const u8 *pattern, unsigned int len,
 }
 
 static struct ts_config *kmp_init(const void *pattern, unsigned int len,
-				  int gfp_mask)
+				  unsigned int __nocast gfp_mask)
 {
 	struct ts_config *conf;
 	struct ts_kmp *kmp;
-- 
cgit v1.2.3


From 83fa3400ebcba307a60909824a251be984eb9567 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Tue, 4 Oct 2005 22:45:35 -0700
Subject: [XFRM]: fix sparse gfp nocast warnings

Fix implicit nocast warnings in xfrm code:
net/xfrm/xfrm_policy.c:232:47: warning: implicit cast to nocast type

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     | 2 +-
 net/xfrm/xfrm_policy.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index a9d0d8c5dfbf..7564b2ce449f 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -875,7 +875,7 @@ static inline int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsig
 } 
 #endif
 
-struct xfrm_policy *xfrm_policy_alloc(int gfp);
+struct xfrm_policy *xfrm_policy_alloc(unsigned int __nocast gfp);
 extern int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *);
 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
 struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index fda737d77edc..c6a0d34fc295 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -225,7 +225,7 @@ expired:
  * SPD calls.
  */
 
-struct xfrm_policy *xfrm_policy_alloc(int gfp)
+struct xfrm_policy *xfrm_policy_alloc(unsigned int __nocast gfp)
 {
 	struct xfrm_policy *policy;
 
-- 
cgit v1.2.3


From 329d4dd72e5c3393a0c7aeebf3e62df77b196d71 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Wed, 5 Oct 2005 08:36:02 +0100
Subject: [PATCH] fix the breakage in sparc headers

If we switch extern inline to static inline, we'd better switch the
pre-declarations we use to say that these puppies have
__attribute_const__ on them.

Otherwise we get extern declaration followed by static inline one.
Which makes gcc unhappy, and for a good reason...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-sparc/btfixup.h | 12 ++++++------
 include/asm-sparc/pgtable.h | 16 ++++++++--------
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/asm-sparc/btfixup.h b/include/asm-sparc/btfixup.h
index 6b29503256fd..c2868d0f60b6 100644
--- a/include/asm-sparc/btfixup.h
+++ b/include/asm-sparc/btfixup.h
@@ -49,7 +49,7 @@ extern unsigned int ___illegal_use_of_BTFIXUP_INT_in_module(void);
 /* Put bottom 13bits into some register variable */
 
 #define BTFIXUPDEF_SIMM13(__name)							\
-	extern unsigned int ___sf_##__name(void) __attribute_const__;		\
+	static inline unsigned int ___sf_##__name(void) __attribute_const__;		\
 	extern unsigned ___ss_##__name[2];						\
 	static inline unsigned int ___sf_##__name(void) {				\
 		unsigned int ret;							\
@@ -57,7 +57,7 @@ extern unsigned int ___illegal_use_of_BTFIXUP_INT_in_module(void);
 		return ret;								\
 	}
 #define BTFIXUPDEF_SIMM13_INIT(__name,__val)						\
-	extern unsigned int ___sf_##__name(void) __attribute_const__;		\
+	static inline unsigned int ___sf_##__name(void) __attribute_const__;		\
 	extern unsigned ___ss_##__name[2];						\
 	static inline unsigned int ___sf_##__name(void) {				\
 		unsigned int ret;							\
@@ -71,7 +71,7 @@ extern unsigned int ___illegal_use_of_BTFIXUP_INT_in_module(void);
  */
 
 #define BTFIXUPDEF_HALF(__name)								\
-	extern unsigned int ___af_##__name(void) __attribute_const__;		\
+	static inline unsigned int ___af_##__name(void) __attribute_const__;		\
 	extern unsigned ___as_##__name[2];						\
 	static inline unsigned int ___af_##__name(void) {				\
 		unsigned int ret;							\
@@ -79,7 +79,7 @@ extern unsigned int ___illegal_use_of_BTFIXUP_INT_in_module(void);
 		return ret;								\
 	}
 #define BTFIXUPDEF_HALF_INIT(__name,__val)						\
-	extern unsigned int ___af_##__name(void) __attribute_const__;		\
+	static inline unsigned int ___af_##__name(void) __attribute_const__;		\
 	extern unsigned ___as_##__name[2];						\
 	static inline unsigned int ___af_##__name(void) {				\
 		unsigned int ret;							\
@@ -90,7 +90,7 @@ extern unsigned int ___illegal_use_of_BTFIXUP_INT_in_module(void);
 /* Put upper 22 bits into some register variable */
 
 #define BTFIXUPDEF_SETHI(__name)							\
-	extern unsigned int ___hf_##__name(void) __attribute_const__;		\
+	static inline unsigned int ___hf_##__name(void) __attribute_const__;		\
 	extern unsigned ___hs_##__name[2];						\
 	static inline unsigned int ___hf_##__name(void) {				\
 		unsigned int ret;							\
@@ -98,7 +98,7 @@ extern unsigned int ___illegal_use_of_BTFIXUP_INT_in_module(void);
 		return ret;								\
 	}
 #define BTFIXUPDEF_SETHI_INIT(__name,__val)						\
-	extern unsigned int ___hf_##__name(void) __attribute_const__;		\
+	static inline unsigned int ___hf_##__name(void) __attribute_const__;		\
 	extern unsigned ___hs_##__name[2];						\
 	static inline unsigned int ___hf_##__name(void) {				\
 		unsigned int ret;							\
diff --git a/include/asm-sparc/pgtable.h b/include/asm-sparc/pgtable.h
index ae883f295f1f..a14e98677500 100644
--- a/include/asm-sparc/pgtable.h
+++ b/include/asm-sparc/pgtable.h
@@ -194,19 +194,19 @@ BTFIXUPDEF_HALF(pte_writei)
 BTFIXUPDEF_HALF(pte_dirtyi)
 BTFIXUPDEF_HALF(pte_youngi)
 
-extern int pte_write(pte_t pte) __attribute_const__;
+static int pte_write(pte_t pte) __attribute_const__;
 static inline int pte_write(pte_t pte)
 {
 	return pte_val(pte) & BTFIXUP_HALF(pte_writei);
 }
 
-extern int pte_dirty(pte_t pte) __attribute_const__;
+static int pte_dirty(pte_t pte) __attribute_const__;
 static inline int pte_dirty(pte_t pte)
 {
 	return pte_val(pte) & BTFIXUP_HALF(pte_dirtyi);
 }
 
-extern int pte_young(pte_t pte) __attribute_const__;
+static int pte_young(pte_t pte) __attribute_const__;
 static inline int pte_young(pte_t pte)
 {
 	return pte_val(pte) & BTFIXUP_HALF(pte_youngi);
@@ -217,7 +217,7 @@ static inline int pte_young(pte_t pte)
  */
 BTFIXUPDEF_HALF(pte_filei)
 
-extern int pte_file(pte_t pte) __attribute_const__;
+static int pte_file(pte_t pte) __attribute_const__;
 static inline int pte_file(pte_t pte)
 {
 	return pte_val(pte) & BTFIXUP_HALF(pte_filei);
@@ -229,19 +229,19 @@ BTFIXUPDEF_HALF(pte_wrprotecti)
 BTFIXUPDEF_HALF(pte_mkcleani)
 BTFIXUPDEF_HALF(pte_mkoldi)
 
-extern pte_t pte_wrprotect(pte_t pte) __attribute_const__;
+static pte_t pte_wrprotect(pte_t pte) __attribute_const__;
 static inline pte_t pte_wrprotect(pte_t pte)
 {
 	return __pte(pte_val(pte) & ~BTFIXUP_HALF(pte_wrprotecti));
 }
 
-extern pte_t pte_mkclean(pte_t pte) __attribute_const__;
+static pte_t pte_mkclean(pte_t pte) __attribute_const__;
 static inline pte_t pte_mkclean(pte_t pte)
 {
 	return __pte(pte_val(pte) & ~BTFIXUP_HALF(pte_mkcleani));
 }
 
-extern pte_t pte_mkold(pte_t pte) __attribute_const__;
+static pte_t pte_mkold(pte_t pte) __attribute_const__;
 static inline pte_t pte_mkold(pte_t pte)
 {
 	return __pte(pte_val(pte) & ~BTFIXUP_HALF(pte_mkoldi));
@@ -278,7 +278,7 @@ BTFIXUPDEF_CALL_CONST(pte_t, mk_pte_io, unsigned long, pgprot_t, int)
 
 BTFIXUPDEF_INT(pte_modify_mask)
 
-extern pte_t pte_modify(pte_t pte, pgprot_t newprot) __attribute_const__;
+static pte_t pte_modify(pte_t pte, pgprot_t newprot) __attribute_const__;
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
 	return __pte((pte_val(pte) & BTFIXUP_INT(pte_modify_mask)) |
-- 
cgit v1.2.3


From 77d8d7a6848c81084f413e1ec4982123a56e2ccb Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 5 Oct 2005 12:15:12 -0700
Subject: [IPSEC]: Document that policy direction is derived from the index.

Here is a patch that adds a helper called xfrm_policy_id2dir to
document the fact that the policy direction can be and is derived
from the index.

This is based on a patch by YOSHIFUJI Hideaki and 210313105@suda.edu.cn.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  5 +++++
 net/key/af_key.c       | 11 ++++++++---
 net/xfrm/xfrm_policy.c |  4 ++--
 3 files changed, 15 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 7564b2ce449f..b6e72f890c6c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -931,4 +931,9 @@ static inline int xfrm_addr_cmp(xfrm_address_t *a, xfrm_address_t *b,
 	}
 }
 
+static inline int xfrm_policy_id2dir(u32 index)
+{
+	return index & 7;
+}
+
 #endif	/* _NET_XFRM_H */
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 50d0a31c3ba6..bbf0f69181ba 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2154,6 +2154,7 @@ out:
 
 static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
 {
+	unsigned int dir;
 	int err;
 	struct sadb_x_policy *pol;
 	struct xfrm_policy *xp;
@@ -2162,7 +2163,11 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 	if ((pol = ext_hdrs[SADB_X_EXT_POLICY-1]) == NULL)
 		return -EINVAL;
 
-	xp = xfrm_policy_byid(0, pol->sadb_x_policy_id,
+	dir = xfrm_policy_id2dir(pol->sadb_x_policy_id);
+	if (dir >= XFRM_POLICY_MAX)
+		return -EINVAL;
+
+	xp = xfrm_policy_byid(dir, pol->sadb_x_policy_id,
 			      hdr->sadb_msg_type == SADB_X_SPDDELETE2);
 	if (xp == NULL)
 		return -ENOENT;
@@ -2174,9 +2179,9 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 	if (hdr->sadb_msg_type == SADB_X_SPDDELETE2) {
 		c.data.byid = 1;
 		c.event = XFRM_MSG_DELPOLICY;
-		km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c);
+		km_policy_notify(xp, dir, &c);
 	} else {
-		err = key_pol_get_resp(sk, xp, hdr, pol->sadb_x_policy_dir-1);
+		err = key_pol_get_resp(sk, xp, hdr, dir);
 	}
 
 	xfrm_pol_put(xp);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c6a0d34fc295..061b44cc2451 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -163,7 +163,7 @@ static void xfrm_policy_timer(unsigned long data)
 	if (xp->dead)
 		goto out;
 
-	dir = xp->index & 7;
+	dir = xfrm_policy_id2dir(xp->index);
 
 	if (xp->lft.hard_add_expires_seconds) {
 		long tmo = xp->lft.hard_add_expires_seconds +
@@ -417,7 +417,7 @@ struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
 	struct xfrm_policy *pol, **p;
 
 	write_lock_bh(&xfrm_policy_lock);
-	for (p = &xfrm_policy_list[id & 7]; (pol=*p)!=NULL; p = &pol->next) {
+	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
 		if (pol->index == id) {
 			xfrm_pol_hold(pol);
 			if (delete)
-- 
cgit v1.2.3


From 5fe467ee9787007dd9b263eb42dde3742deb743b Mon Sep 17 00:00:00 2001
From: Ivan Skytte J�rgensen <isj-sctp@i1.dk>
Date: Thu, 6 Oct 2005 21:36:17 -0700
Subject: [SCTP] Fix sctp_get{pl}addrs() API to work with 32-bit apps on 64-bit
 kernels.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The old socket options are marked with a _OLD suffix so that the
existing 32-bit apps on 32-bit kernels do not break.

Signed-off-by: Ivan Skytte J�rgensen <isj-sctp@i1.dk>
Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/user.h |  23 +++--
 net/sctp/socket.c       | 252 +++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 243 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h
index f6328aeddcce..e81ab52755fb 100644
--- a/include/net/sctp/user.h
+++ b/include/net/sctp/user.h
@@ -103,16 +103,20 @@ enum sctp_optname {
 #define SCTP_SOCKOPT_BINDX_REM	SCTP_SOCKOPT_BINDX_REM
 	SCTP_SOCKOPT_PEELOFF, 	/* peel off association. */
 #define SCTP_SOCKOPT_PEELOFF	SCTP_SOCKOPT_PEELOFF
-	SCTP_GET_PEER_ADDRS_NUM, 	/* Get number of peer addresss. */
-#define SCTP_GET_PEER_ADDRS_NUM	SCTP_GET_PEER_ADDRS_NUM
+	SCTP_GET_PEER_ADDRS_NUM_OLD, 	/* Get number of peer addresss. */
+#define SCTP_GET_PEER_ADDRS_NUM_OLD	SCTP_GET_PEER_ADDRS_NUM_OLD
+	SCTP_GET_PEER_ADDRS_OLD, 	/* Get all peer addresss. */
+#define SCTP_GET_PEER_ADDRS_OLD	SCTP_GET_PEER_ADDRS_OLD
+	SCTP_GET_LOCAL_ADDRS_NUM_OLD, 	/* Get number of local addresss. */
+#define SCTP_GET_LOCAL_ADDRS_NUM_OLD	SCTP_GET_LOCAL_ADDRS_NUM_OLD
+	SCTP_GET_LOCAL_ADDRS_OLD, 	/* Get all local addresss. */
+#define SCTP_GET_LOCAL_ADDRS_OLD	SCTP_GET_LOCAL_ADDRS_OLD
+	SCTP_SOCKOPT_CONNECTX, /* CONNECTX requests. */
+#define SCTP_SOCKOPT_CONNECTX	SCTP_SOCKOPT_CONNECTX
 	SCTP_GET_PEER_ADDRS, 	/* Get all peer addresss. */
 #define SCTP_GET_PEER_ADDRS	SCTP_GET_PEER_ADDRS
-	SCTP_GET_LOCAL_ADDRS_NUM, 	/* Get number of local addresss. */
-#define SCTP_GET_LOCAL_ADDRS_NUM	SCTP_GET_LOCAL_ADDRS_NUM
 	SCTP_GET_LOCAL_ADDRS, 	/* Get all local addresss. */
 #define SCTP_GET_LOCAL_ADDRS	SCTP_GET_LOCAL_ADDRS
-	SCTP_SOCKOPT_CONNECTX, /* CONNECTX requests. */
-#define SCTP_SOCKOPT_CONNECTX	SCTP_SOCKOPT_CONNECTX
 };
 
 /*
@@ -559,11 +563,16 @@ struct sctp_status {
  * SCTP_GET_LOCAL_ADDRS socket options used internally to implement
  * sctp_getpaddrs() and sctp_getladdrs() API. 
  */
-struct sctp_getaddrs {
+struct sctp_getaddrs_old {
 	sctp_assoc_t            assoc_id;
 	int			addr_num;
 	struct sockaddr		__user *addrs;
 };
+struct sctp_getaddrs {
+	sctp_assoc_t		assoc_id; /*input*/
+	__u32			addr_num; /*output*/
+	__u8			addrs[0]; /*output, variable size*/
+};
 
 /* These are bit fields for msghdr->msg_flags.  See section 5.1.  */
 /* On user space Linux, these live in <bits/socket.h> as an enum.  */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 91ec8c936913..02e068d3450d 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3159,8 +3159,9 @@ static int sctp_getsockopt_initmsg(struct sock *sk, int len, char __user *optval
 	return 0;
 }
 
-static int sctp_getsockopt_peer_addrs_num(struct sock *sk, int len,
-					  char __user *optval, int __user *optlen)
+static int sctp_getsockopt_peer_addrs_num_old(struct sock *sk, int len,
+					      char __user *optval,
+					      int __user *optlen)
 {
 	sctp_assoc_t id;
 	struct sctp_association *asoc;
@@ -3185,23 +3186,28 @@ static int sctp_getsockopt_peer_addrs_num(struct sock *sk, int len,
 	return cnt;
 }
 
-static int sctp_getsockopt_peer_addrs(struct sock *sk, int len,
-				      char __user *optval, int __user *optlen)
+/* 
+ * Old API for getting list of peer addresses. Does not work for 32-bit
+ * programs running on a 64-bit kernel
+ */
+static int sctp_getsockopt_peer_addrs_old(struct sock *sk, int len,
+					  char __user *optval,
+					  int __user *optlen)
 {
 	struct sctp_association *asoc;
 	struct list_head *pos;
 	int cnt = 0;
-	struct sctp_getaddrs getaddrs;
+	struct sctp_getaddrs_old getaddrs;
 	struct sctp_transport *from;
 	void __user *to;
 	union sctp_addr temp;
 	struct sctp_sock *sp = sctp_sk(sk);
 	int addrlen;
 
-	if (len != sizeof(struct sctp_getaddrs))
+	if (len != sizeof(struct sctp_getaddrs_old))
 		return -EINVAL;
 
-	if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs)))
+	if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs_old)))
 		return -EFAULT;
 
 	if (getaddrs.addr_num <= 0) return -EINVAL;
@@ -3225,15 +3231,69 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len,
 		if (cnt >= getaddrs.addr_num) break;
 	}
 	getaddrs.addr_num = cnt;
-	if (copy_to_user(optval, &getaddrs, sizeof(struct sctp_getaddrs)))
+	if (copy_to_user(optval, &getaddrs, sizeof(struct sctp_getaddrs_old)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int sctp_getsockopt_peer_addrs(struct sock *sk, int len,
+				      char __user *optval, int __user *optlen)
+{
+	struct sctp_association *asoc;
+	struct list_head *pos;
+	int cnt = 0;
+	struct sctp_getaddrs getaddrs;
+	struct sctp_transport *from;
+	void __user *to;
+	union sctp_addr temp;
+	struct sctp_sock *sp = sctp_sk(sk);
+	int addrlen;
+	size_t space_left;
+	int bytes_copied;
+
+	if (len < sizeof(struct sctp_getaddrs))
+		return -EINVAL;
+
+	if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs)))
+		return -EFAULT;
+
+	/* For UDP-style sockets, id specifies the association to query.  */
+	asoc = sctp_id2assoc(sk, getaddrs.assoc_id);
+	if (!asoc)
+		return -EINVAL;
+
+	to = optval + offsetof(struct sctp_getaddrs,addrs);
+	space_left = len - sizeof(struct sctp_getaddrs) - 
+			offsetof(struct sctp_getaddrs,addrs);
+
+	list_for_each(pos, &asoc->peer.transport_addr_list) {
+		from = list_entry(pos, struct sctp_transport, transports);
+		memcpy(&temp, &from->ipaddr, sizeof(temp));
+		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
+		addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len;
+		if(space_left < addrlen)
+			return -ENOMEM;
+		temp.v4.sin_port = htons(temp.v4.sin_port);
+		if (copy_to_user(to, &temp, addrlen))
+			return -EFAULT;
+		to += addrlen;
+		cnt++;
+		space_left -= addrlen;
+	}
+
+	if (put_user(cnt, &((struct sctp_getaddrs __user *)optval)->addr_num))
+		return -EFAULT;
+	bytes_copied = ((char __user *)to) - optval;
+	if (put_user(bytes_copied, optlen))
 		return -EFAULT;
 
 	return 0;
 }
 
-static int sctp_getsockopt_local_addrs_num(struct sock *sk, int len,
-						char __user *optval,
-						int __user *optlen)
+static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
+					       char __user *optval,
+					       int __user *optlen)
 {
 	sctp_assoc_t id;
 	struct sctp_bind_addr *bp;
@@ -3306,8 +3366,8 @@ done:
 /* Helper function that copies local addresses to user and returns the number
  * of addresses copied.
  */
-static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port, int max_addrs,
-				    void __user *to)
+static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_addrs,
+					void __user *to)
 {
 	struct list_head *pos;
 	struct sctp_sockaddr_entry *addr;
@@ -3341,14 +3401,54 @@ static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port, int max_addrs,
 	return cnt;
 }
 
-static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
-				       char __user *optval, int __user *optlen)
+static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port,
+				    void * __user *to, size_t space_left)
+{
+	struct list_head *pos;
+	struct sctp_sockaddr_entry *addr;
+	unsigned long flags;
+	union sctp_addr temp;
+	int cnt = 0;
+	int addrlen;
+
+	sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags);
+	list_for_each(pos, &sctp_local_addr_list) {
+		addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+		if ((PF_INET == sk->sk_family) && 
+		    (AF_INET6 == addr->a.sa.sa_family))
+			continue;
+		memcpy(&temp, &addr->a, sizeof(temp));
+		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
+								&temp);
+		addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
+		if(space_left<addrlen)
+			return -ENOMEM;
+		temp.v4.sin_port = htons(port);
+		if (copy_to_user(*to, &temp, addrlen)) {
+			sctp_spin_unlock_irqrestore(&sctp_local_addr_lock,
+						    flags);
+			return -EFAULT;
+		}
+		*to += addrlen;
+		cnt ++;
+		space_left -= addrlen;
+	}
+	sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags);
+
+	return cnt;
+}
+
+/* Old API for getting list of local addresses. Does not work for 32-bit
+ * programs running on a 64-bit kernel
+ */
+static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
+					   char __user *optval, int __user *optlen)
 {
 	struct sctp_bind_addr *bp;
 	struct sctp_association *asoc;
 	struct list_head *pos;
 	int cnt = 0;
-	struct sctp_getaddrs getaddrs;
+	struct sctp_getaddrs_old getaddrs;
 	struct sctp_sockaddr_entry *addr;
 	void __user *to;
 	union sctp_addr temp;
@@ -3357,10 +3457,10 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
 	rwlock_t *addr_lock;
 	int err = 0;
 
-	if (len != sizeof(struct sctp_getaddrs))
+	if (len != sizeof(struct sctp_getaddrs_old))
 		return -EINVAL;
 
-	if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs)))
+	if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs_old)))
 		return -EFAULT;
 
 	if (getaddrs.addr_num <= 0) return -EINVAL;
@@ -3392,8 +3492,9 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
 		addr = list_entry(bp->address_list.next,
 				  struct sctp_sockaddr_entry, list);
 		if (sctp_is_any(&addr->a)) {
-			cnt = sctp_copy_laddrs_to_user(sk, bp->port,
-						       getaddrs.addr_num, to);
+			cnt = sctp_copy_laddrs_to_user_old(sk, bp->port,
+							   getaddrs.addr_num,
+							   to);
 			if (cnt < 0) {
 				err = cnt;
 				goto unlock;
@@ -3419,7 +3520,7 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
 
 copy_getaddrs:
 	getaddrs.addr_num = cnt;
-	if (copy_to_user(optval, &getaddrs, sizeof(struct sctp_getaddrs)))
+	if (copy_to_user(optval, &getaddrs, sizeof(struct sctp_getaddrs_old)))
 		err = -EFAULT;
 
 unlock:
@@ -3427,6 +3528,99 @@ unlock:
 	return err;
 }
 
+static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
+				       char __user *optval, int __user *optlen)
+{
+	struct sctp_bind_addr *bp;
+	struct sctp_association *asoc;
+	struct list_head *pos;
+	int cnt = 0;
+	struct sctp_getaddrs getaddrs;
+	struct sctp_sockaddr_entry *addr;
+	void __user *to;
+	union sctp_addr temp;
+	struct sctp_sock *sp = sctp_sk(sk);
+	int addrlen;
+	rwlock_t *addr_lock;
+	int err = 0;
+	size_t space_left;
+	int bytes_copied;
+
+	if (len <= sizeof(struct sctp_getaddrs))
+		return -EINVAL;
+
+	if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs)))
+		return -EFAULT;
+
+	/*
+	 *  For UDP-style sockets, id specifies the association to query.
+	 *  If the id field is set to the value '0' then the locally bound
+	 *  addresses are returned without regard to any particular
+	 *  association.
+	 */
+	if (0 == getaddrs.assoc_id) {
+		bp = &sctp_sk(sk)->ep->base.bind_addr;
+		addr_lock = &sctp_sk(sk)->ep->base.addr_lock;
+	} else {
+		asoc = sctp_id2assoc(sk, getaddrs.assoc_id);
+		if (!asoc)
+			return -EINVAL;
+		bp = &asoc->base.bind_addr;
+		addr_lock = &asoc->base.addr_lock;
+	}
+
+	to = optval + offsetof(struct sctp_getaddrs,addrs);
+	space_left = len - sizeof(struct sctp_getaddrs) -
+			 offsetof(struct sctp_getaddrs,addrs);
+
+	sctp_read_lock(addr_lock);
+
+	/* If the endpoint is bound to 0.0.0.0 or ::0, get the valid
+	 * addresses from the global local address list.
+	 */
+	if (sctp_list_single_entry(&bp->address_list)) {
+		addr = list_entry(bp->address_list.next,
+				  struct sctp_sockaddr_entry, list);
+		if (sctp_is_any(&addr->a)) {
+			cnt = sctp_copy_laddrs_to_user(sk, bp->port,
+						       &to, space_left);
+			if (cnt < 0) {
+				err = cnt;
+				goto unlock;
+			}
+			goto copy_getaddrs;		
+		}
+	}
+
+	list_for_each(pos, &bp->address_list) {
+		addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+		memcpy(&temp, &addr->a, sizeof(temp));
+		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
+		addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
+		if(space_left < addrlen)
+			return -ENOMEM; /*fixme: right error?*/
+		temp.v4.sin_port = htons(temp.v4.sin_port);
+		if (copy_to_user(to, &temp, addrlen)) {
+			err = -EFAULT;
+			goto unlock;
+		}
+		to += addrlen;
+		cnt ++;
+		space_left -= addrlen;
+	}
+
+copy_getaddrs:
+	if (put_user(cnt, &((struct sctp_getaddrs __user *)optval)->addr_num))
+		return -EFAULT;
+	bytes_copied = ((char __user *)to) - optval;
+	if (put_user(bytes_copied, optlen))
+		return -EFAULT;
+
+unlock:
+	sctp_read_unlock(addr_lock);
+	return err;
+}
+
 /* 7.1.10 Set Primary Address (SCTP_PRIMARY_ADDR)
  *
  * Requests that the local SCTP stack use the enclosed peer address as
@@ -3807,12 +4001,20 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
 	case SCTP_INITMSG:
 		retval = sctp_getsockopt_initmsg(sk, len, optval, optlen);
 		break;
-	case SCTP_GET_PEER_ADDRS_NUM:
-		retval = sctp_getsockopt_peer_addrs_num(sk, len, optval,
+	case SCTP_GET_PEER_ADDRS_NUM_OLD:
+		retval = sctp_getsockopt_peer_addrs_num_old(sk, len, optval,
+							    optlen);
+		break;
+	case SCTP_GET_LOCAL_ADDRS_NUM_OLD:
+		retval = sctp_getsockopt_local_addrs_num_old(sk, len, optval,
+							     optlen);
+		break;
+	case SCTP_GET_PEER_ADDRS_OLD:
+		retval = sctp_getsockopt_peer_addrs_old(sk, len, optval,
 							optlen);
 		break;
-	case SCTP_GET_LOCAL_ADDRS_NUM:
-		retval = sctp_getsockopt_local_addrs_num(sk, len, optval,
+	case SCTP_GET_LOCAL_ADDRS_OLD:
+		retval = sctp_getsockopt_local_addrs_old(sk, len, optval,
 							 optlen);
 		break;
 	case SCTP_GET_PEER_ADDRS:
-- 
cgit v1.2.3


From 20c9c825b12fcb8526a29cf20a17a5a3fc581726 Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sri@us.ibm.com>
Date: Thu, 6 Oct 2005 21:37:01 -0700
Subject: [SCTP] Fix SCTP socket options to work with 32-bit apps on 64-bit
 kernels.

Adds alignment attribute to a few structures used with SCTP socket
options so that the sizes and offsets remain the same when built using
either 32 or 64 bit tools.

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/user.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h
index e81ab52755fb..1c5f19f995ad 100644
--- a/include/net/sctp/user.h
+++ b/include/net/sctp/user.h
@@ -243,7 +243,7 @@ struct sctp_paddr_change {
 	int spc_state;
 	int spc_error;
 	sctp_assoc_t spc_assoc_id;
-};
+} __attribute__((packed, aligned(4)));
 
 /*
  *    spc_state:  32 bits (signed integer)
@@ -468,7 +468,7 @@ struct sctp_assocparams {
 struct sctp_setpeerprim {
 	sctp_assoc_t            sspp_assoc_id;
 	struct sockaddr_storage sspp_addr;
-};
+} __attribute__((packed, aligned(4)));
 
 /*
  * 7.1.10 Set Primary Address (SCTP_PRIMARY_ADDR)
@@ -481,7 +481,7 @@ struct sctp_setpeerprim {
 struct sctp_prim {
 	sctp_assoc_t            ssp_assoc_id;
 	struct sockaddr_storage ssp_addr;
-};
+} __attribute__((packed, aligned(4)));
 
 /*
  * 7.1.11 Set Adaption Layer Indicator (SCTP_ADAPTION_LAYER)
@@ -508,7 +508,7 @@ struct sctp_paddrparams {
 	struct sockaddr_storage	spp_address;
 	__u32			spp_hbinterval;
 	__u16			spp_pathmaxrxt;
-};
+} __attribute__((packed, aligned(4)));
 
 /*
  * 7.2.2 Peer Address Information
@@ -527,7 +527,7 @@ struct sctp_paddrinfo {
 	__u32			spinfo_srtt;
 	__u32			spinfo_rto;
 	__u32			spinfo_mtu;
-};
+} __attribute__((packed, aligned(4)));
 
 /* Peer addresses's state. */
 enum sctp_spinfo_state {
-- 
cgit v1.2.3


From 0f21ba7cc3320d33459ecb3f538f1a42040c29cd Mon Sep 17 00:00:00 2001
From: Eric Kinzie <ekinzie@cmf.nrl.navy.mil>
Date: Thu, 6 Oct 2005 22:19:28 -0700
Subject: [ATM]: add support for LECS addresses learned from network

From: Eric Kinzie <ekinzie@cmf.nrl.navy.mil>
Signed-off-by: Chas Williams <chas@cmf.nrl.navy.mil>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/atmdev.h | 10 ++++++++++
 net/atm/addr.c         | 51 +++++++++++++++++++++++++++++++++++++-------------
 net/atm/addr.h         | 12 +++++++-----
 net/atm/resources.c    | 20 +++++++++++++++-----
 4 files changed, 70 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index f1fd849e5535..aca9b344bd35 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -76,6 +76,13 @@ struct atm_dev_stats {
 					/* set interface ESI */
 #define ATM_SETESIF	_IOW('a',ATMIOC_ITF+13,struct atmif_sioc)
 					/* force interface ESI */
+#define ATM_ADDLECSADDR	_IOW('a', ATMIOC_ITF+14, struct atmif_sioc)
+					/* register a LECS address */
+#define ATM_DELLECSADDR	_IOW('a', ATMIOC_ITF+15, struct atmif_sioc)
+					/* unregister a LECS address */
+#define ATM_GETLECSADDR	_IOW('a', ATMIOC_ITF+16, struct atmif_sioc)
+					/* retrieve LECS address(es) */
+
 #define ATM_GETSTAT	_IOW('a',ATMIOC_SARCOM+0,struct atmif_sioc)
 					/* get AAL layer statistics */
 #define ATM_GETSTATZ	_IOW('a',ATMIOC_SARCOM+1,struct atmif_sioc)
@@ -328,6 +335,8 @@ struct atm_dev_addr {
 	struct list_head entry;		/* next address */
 };
 
+enum atm_addr_type_t { ATM_ADDR_LOCAL, ATM_ADDR_LECS };
+
 struct atm_dev {
 	const struct atmdev_ops *ops;	/* device operations; NULL if unused */
 	const struct atmphy_ops *phy;	/* PHY operations, may be undefined */
@@ -338,6 +347,7 @@ struct atm_dev {
 	void		*phy_data;	/* private PHY date */
 	unsigned long	flags;		/* device flags (ATM_DF_*) */
 	struct list_head local;		/* local ATM addresses */
+	struct list_head lecs;		/* LECS ATM addresses learned via ILMI */
 	unsigned char	esi[ESI_LEN];	/* ESI ("MAC" addr) */
 	struct atm_cirange ci_range;	/* VPI/VCI range */
 	struct k_atm_dev_stats stats;	/* statistics */
diff --git a/net/atm/addr.c b/net/atm/addr.c
index a30d0bf48063..3060fd0ba4b9 100644
--- a/net/atm/addr.c
+++ b/net/atm/addr.c
@@ -44,31 +44,43 @@ static void notify_sigd(struct atm_dev *dev)
 	sigd_enq(NULL, as_itf_notify, NULL, &pvc, NULL);
 }
 
-void atm_reset_addr(struct atm_dev *dev)
+void atm_reset_addr(struct atm_dev *dev, enum atm_addr_type_t atype)
 {
 	unsigned long flags;
 	struct atm_dev_addr *this, *p;
+	struct list_head *head;
 
 	spin_lock_irqsave(&dev->lock, flags);
-	list_for_each_entry_safe(this, p, &dev->local, entry) {
+	if (atype == ATM_ADDR_LECS)
+		head = &dev->lecs;
+	else
+		head = &dev->local;
+	list_for_each_entry_safe(this, p, head, entry) {
 		list_del(&this->entry);
 		kfree(this);
 	}
 	spin_unlock_irqrestore(&dev->lock, flags);
-	notify_sigd(dev);
+	if (head == &dev->local)
+		notify_sigd(dev);
 }
 
-int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr)
+int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr,
+		 enum atm_addr_type_t atype)
 {
 	unsigned long flags;
 	struct atm_dev_addr *this;
+	struct list_head *head;
 	int error;
 
 	error = check_addr(addr);
 	if (error)
 		return error;
 	spin_lock_irqsave(&dev->lock, flags);
-	list_for_each_entry(this, &dev->local, entry) {
+	if (atype == ATM_ADDR_LECS)
+		head = &dev->lecs;
+	else
+		head = &dev->local;
+	list_for_each_entry(this, head, entry) {
 		if (identical(&this->addr, addr)) {
 			spin_unlock_irqrestore(&dev->lock, flags);
 			return -EEXIST;
@@ -80,28 +92,36 @@ int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr)
 		return -ENOMEM;
 	}
 	this->addr = *addr;
-	list_add(&this->entry, &dev->local);
+	list_add(&this->entry, head);
 	spin_unlock_irqrestore(&dev->lock, flags);
-	notify_sigd(dev);
+	if (head == &dev->local)
+		notify_sigd(dev);
 	return 0;
 }
 
-int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr)
+int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr,
+		 enum atm_addr_type_t atype)
 {
 	unsigned long flags;
 	struct atm_dev_addr *this;
+	struct list_head *head;
 	int error;
 
 	error = check_addr(addr);
 	if (error)
 		return error;
 	spin_lock_irqsave(&dev->lock, flags);
-	list_for_each_entry(this, &dev->local, entry) {
+	if (atype == ATM_ADDR_LECS)
+		head = &dev->lecs;
+	else
+		head = &dev->local;
+	list_for_each_entry(this, head, entry) {
 		if (identical(&this->addr, addr)) {
 			list_del(&this->entry);
 			spin_unlock_irqrestore(&dev->lock, flags);
 			kfree(this);
-			notify_sigd(dev);
+			if (head == &dev->local)
+				notify_sigd(dev);
 			return 0;
 		}
 	}
@@ -110,22 +130,27 @@ int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr)
 }
 
 int atm_get_addr(struct atm_dev *dev, struct sockaddr_atmsvc __user * buf,
-		 size_t size)
+		 size_t size, enum atm_addr_type_t atype)
 {
 	unsigned long flags;
 	struct atm_dev_addr *this;
+	struct list_head *head;
 	int total = 0, error;
 	struct sockaddr_atmsvc *tmp_buf, *tmp_bufp;
 
 	spin_lock_irqsave(&dev->lock, flags);
-	list_for_each_entry(this, &dev->local, entry)
+	if (atype == ATM_ADDR_LECS)
+		head = &dev->lecs;
+	else
+		head = &dev->local;
+	list_for_each_entry(this, head, entry)
 	    total += sizeof(struct sockaddr_atmsvc);
 	tmp_buf = tmp_bufp = kmalloc(total, GFP_ATOMIC);
 	if (!tmp_buf) {
 		spin_unlock_irqrestore(&dev->lock, flags);
 		return -ENOMEM;
 	}
-	list_for_each_entry(this, &dev->local, entry)
+	list_for_each_entry(this, head, entry)
 	    memcpy(tmp_bufp++, &this->addr, sizeof(struct sockaddr_atmsvc));
 	spin_unlock_irqrestore(&dev->lock, flags);
 	error = total > size ? -E2BIG : total;
diff --git a/net/atm/addr.h b/net/atm/addr.h
index 3099d21feeaa..f39433ad45da 100644
--- a/net/atm/addr.h
+++ b/net/atm/addr.h
@@ -9,10 +9,12 @@
 #include <linux/atm.h>
 #include <linux/atmdev.h>
 
-
-void atm_reset_addr(struct atm_dev *dev);
-int atm_add_addr(struct atm_dev *dev,struct sockaddr_atmsvc *addr);
-int atm_del_addr(struct atm_dev *dev,struct sockaddr_atmsvc *addr);
-int atm_get_addr(struct atm_dev *dev,struct sockaddr_atmsvc __user *buf,size_t size);
+void atm_reset_addr(struct atm_dev *dev, enum atm_addr_type_t type);
+int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr,
+		 enum atm_addr_type_t type);
+int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr,
+		 enum atm_addr_type_t type);
+int atm_get_addr(struct atm_dev *dev, struct sockaddr_atmsvc __user *buf,
+		 size_t size, enum atm_addr_type_t type);
 
 #endif
diff --git a/net/atm/resources.c b/net/atm/resources.c
index a57a9268bd24..415d2615d475 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -40,6 +40,7 @@ static struct atm_dev *__alloc_atm_dev(const char *type)
 	dev->link_rate = ATM_OC3_PCR;
 	spin_lock_init(&dev->lock);
 	INIT_LIST_HEAD(&dev->local);
+	INIT_LIST_HEAD(&dev->lecs);
 
 	return dev;
 }
@@ -320,10 +321,12 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg)
 				error = -EPERM;
 				goto done;
 			}
-			atm_reset_addr(dev);
+			atm_reset_addr(dev, ATM_ADDR_LOCAL);
 			break;
 		case ATM_ADDADDR:
 		case ATM_DELADDR:
+		case ATM_ADDLECSADDR:
+		case ATM_DELLECSADDR:
 			if (!capable(CAP_NET_ADMIN)) {
 				error = -EPERM;
 				goto done;
@@ -335,14 +338,21 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg)
 					error = -EFAULT;
 					goto done;
 				}
-				if (cmd == ATM_ADDADDR)
-					error = atm_add_addr(dev, &addr);
+				if (cmd == ATM_ADDADDR || cmd == ATM_ADDLECSADDR)
+					error = atm_add_addr(dev, &addr,
+							     (cmd == ATM_ADDADDR ?
+							      ATM_ADDR_LOCAL : ATM_ADDR_LECS));
 				else
-					error = atm_del_addr(dev, &addr);
+					error = atm_del_addr(dev, &addr,
+							     (cmd == ATM_DELADDR ?
+							      ATM_ADDR_LOCAL : ATM_ADDR_LECS));
 				goto done;
 			}
 		case ATM_GETADDR:
-			error = atm_get_addr(dev, buf, len);
+		case ATM_GETLECSADDR:
+			error = atm_get_addr(dev, buf, len,
+					     (cmd == ATM_GETADDR ?
+					      ATM_ADDR_LOCAL : ATM_ADDR_LECS));
 			if (error < 0)
 				goto done;
 			size = error;
-- 
cgit v1.2.3


From 72e75de2df9a7116d0afbcd5810b2a8fd4bf7559 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Fri, 16 Sep 2005 18:49:22 +0200
Subject: [ALSA] remove redundent assignment to the ac97 device structure

AC97 Codec
Don't use dev.platform_data to store a reference to the containing
ac97_t structure.  Such assignment is redundent since we can deduce the
ac97_t structure location from the contained device structure.  This
sets platform_data free for other purposes.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/ac97_codec.h  | 2 ++
 sound/pci/ac97/ac97_codec.c | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/ac97_codec.h b/include/sound/ac97_codec.h
index 2857cf0472df..d11f34832a97 100644
--- a/include/sound/ac97_codec.h
+++ b/include/sound/ac97_codec.h
@@ -527,6 +527,8 @@ struct _snd_ac97 {
 	struct device dev;
 };
 
+#define to_ac97_t(d) container_of(d, struct _snd_ac97, dev)
+
 /* conditions */
 static inline int ac97_is_audio(ac97_t * ac97)
 {
diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c
index f221eba5c32f..41fc290149ed 100644
--- a/sound/pci/ac97/ac97_codec.c
+++ b/sound/pci/ac97/ac97_codec.c
@@ -1828,7 +1828,6 @@ static int snd_ac97_dev_register(snd_device_t *device)
 
 	ac97->dev.bus = &ac97_bus_type;
 	ac97->dev.parent = ac97->bus->card->dev;
-	ac97->dev.platform_data = ac97;
 	ac97->dev.release = ac97_device_release;
 	snprintf(ac97->dev.bus_id, BUS_ID_SIZE, "card%d-%d", ac97->bus->card->number, ac97->num);
 	if ((err = device_register(&ac97->dev)) < 0) {
-- 
cgit v1.2.3


From f12aa40c9d76af5add413731d30565327219c41f Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 30 Sep 2005 16:56:59 +0200
Subject: [ALSA] emu10k1 - Fix loading of SBLive Game board

EMU10K1/EMU10K2 driver
Fixed the error at loading SBLive Game board (and possible other models).
The PCI SSIDs of this board conflicts with SB Live 5.1 Platinum, which has
no AC97 chip.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/emu10k1.h          |  2 +-
 sound/pci/emu10k1/emu10k1_main.c |  5 ++++-
 sound/pci/emu10k1/emumixer.c     | 10 ++++++++--
 3 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h
index 67bf3f18e96a..14cb2718cb77 100644
--- a/include/sound/emu10k1.h
+++ b/include/sound/emu10k1.h
@@ -1059,7 +1059,7 @@ typedef struct {
 	unsigned char spk71;        /* Has 7.1 speakers */
 	unsigned char sblive51;	    /* SBLive! 5.1 - extout 0x11 -> center, 0x12 -> lfe */
 	unsigned char spdif_bug;    /* Has Spdif phasing bug */
-	unsigned char ac97_chip;    /* Has an AC97 chip */
+	unsigned char ac97_chip;    /* Has an AC97 chip: 1 = mandatory, 2 = optional */
 	unsigned char ecard;        /* APS EEPROM */
 	const char *driver;
 	const char *name;
diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c
index e87e8427f25f..e9cd8e054f25 100644
--- a/sound/pci/emu10k1/emu10k1_main.c
+++ b/sound/pci/emu10k1/emu10k1_main.c
@@ -756,9 +756,12 @@ static emu_chip_details_t emu_chip_details[] = {
 	 .sblive51 = 1} ,
 	/* Tested by alsa bugtrack user "hus" bug #1297 12th Aug 2005 */
 	{.vendor = 0x1102, .device = 0x0002, .subsystem = 0x80611102,
-	 .driver = "EMU10K1", .name = "SBLive! Platinum 5.1 [SB0060]", 
+	 .driver = "EMU10K1", .name = "SBLive 5.1 [SB0060]",
 	 .id = "Live",
 	 .emu10k1_chip = 1,
+	 .ac97_chip = 2, /* ac97 is optional; both SBLive 5.1 and platinum
+			  * share the same IDs!
+			  */
 	 .sblive51 = 1} ,
 	{.vendor = 0x1102, .device = 0x0002, .subsystem = 0x80511102,
 	 .driver = "EMU10K1", .name = "SBLive! Value [CT4850]", 
diff --git a/sound/pci/emu10k1/emumixer.c b/sound/pci/emu10k1/emumixer.c
index d71a72e84bcc..6994f90bb83a 100644
--- a/sound/pci/emu10k1/emumixer.c
+++ b/sound/pci/emu10k1/emumixer.c
@@ -802,8 +802,13 @@ int __devinit snd_emu10k1_mixer(emu10k1_t *emu,
 			.read = snd_emu10k1_ac97_read,
 		};
 
-		if ((err = snd_ac97_bus(emu->card, 0, &ops, NULL, &pbus)) < 0)
-			return err;
+		if ((err = snd_ac97_bus(emu->card, 0, &ops, NULL, &pbus)) < 0) {
+			if (emu->card_capabilities->ac97_chip == 1)
+				return err;
+			snd_printd(KERN_INFO "emu10k1: AC97 is optional on this board\n");
+			snd_printd(KERN_INFO"          Proceeding without ac97 mixers...\n");
+			goto no_ac97; /* FIXME: get rid of ugly gotos.. */
+		}
 		pbus->no_vra = 1; /* we don't need VRA */
 		
 		memset(&ac97, 0, sizeof(ac97));
@@ -836,6 +841,7 @@ int __devinit snd_emu10k1_mixer(emu10k1_t *emu,
 		for (; *c; c++)
 			remove_ctl(card, *c);
 	} else {
+	no_ac97:
 		if (emu->card_capabilities->ecard)
 			strcpy(emu->card->mixername, "EMU APS");
 		else if (emu->audigy)
-- 
cgit v1.2.3


From 468ed2b0c85ec4310b429e60358213b6d077289e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 7 Oct 2005 15:07:38 +0100
Subject: [PATCH] Keys: Split key permissions checking into a .c file

The attached patch splits key permissions checking out of key-ui.h and
moves it into a .c file.  It's quite large and called quite a lot, and
it's about to get bigger with the addition of LSM support for keys...

key_any_permission() is also discarded as it's no longer used.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/key-ui.h     | 91 +++-------------------------------------------
 security/keys/Makefile     |  1 +
 security/keys/permission.c | 70 +++++++++++++++++++++++++++++++++++
 3 files changed, 76 insertions(+), 86 deletions(-)
 create mode 100644 security/keys/permission.c

(limited to 'include')

diff --git a/include/linux/key-ui.h b/include/linux/key-ui.h
index 918c34a8347e..7a2e332067c3 100644
--- a/include/linux/key-ui.h
+++ b/include/linux/key-ui.h
@@ -38,97 +38,16 @@ struct keyring_list {
 	struct key	*keys[0];
 };
 
-
 /*
  * check to see whether permission is granted to use a key in the desired way
  */
-static inline int key_permission(const key_ref_t key_ref, key_perm_t perm)
-{
-	struct key *key = key_ref_to_ptr(key_ref);
-	key_perm_t kperm;
-
-	if (is_key_possessed(key_ref))
-		kperm = key->perm >> 24;
-	else if (key->uid == current->fsuid)
-		kperm = key->perm >> 16;
-	else if (key->gid != -1 &&
-		 key->perm & KEY_GRP_ALL &&
-		 in_group_p(key->gid)
-		 )
-		kperm = key->perm >> 8;
-	else
-		kperm = key->perm;
-
-	kperm = kperm & perm & KEY_ALL;
-
-	return kperm == perm;
-}
-
-/*
- * check to see whether permission is granted to use a key in at least one of
- * the desired ways
- */
-static inline int key_any_permission(const key_ref_t key_ref, key_perm_t perm)
-{
-	struct key *key = key_ref_to_ptr(key_ref);
-	key_perm_t kperm;
-
-	if (is_key_possessed(key_ref))
-		kperm = key->perm >> 24;
-	else if (key->uid == current->fsuid)
-		kperm = key->perm >> 16;
-	else if (key->gid != -1 &&
-		 key->perm & KEY_GRP_ALL &&
-		 in_group_p(key->gid)
-		 )
-		kperm = key->perm >> 8;
-	else
-		kperm = key->perm;
+extern int key_task_permission(const key_ref_t key_ref,
+			       struct task_struct *context,
+			       key_perm_t perm);
 
-	kperm = kperm & perm & KEY_ALL;
-
-	return kperm != 0;
-}
-
-static inline int key_task_groups_search(struct task_struct *tsk, gid_t gid)
-{
-	int ret;
-
-	task_lock(tsk);
-	ret = groups_search(tsk->group_info, gid);
-	task_unlock(tsk);
-	return ret;
-}
-
-static inline int key_task_permission(const key_ref_t key_ref,
-				      struct task_struct *context,
-				      key_perm_t perm)
+static inline int key_permission(const key_ref_t key_ref, key_perm_t perm)
 {
-	struct key *key = key_ref_to_ptr(key_ref);
-	key_perm_t kperm;
-
-	if (is_key_possessed(key_ref)) {
-		kperm = key->perm >> 24;
-	}
-	else if (key->uid == context->fsuid) {
-		kperm = key->perm >> 16;
-	}
-	else if (key->gid != -1 &&
-		 key->perm & KEY_GRP_ALL && (
-			 key->gid == context->fsgid ||
-			 key_task_groups_search(context, key->gid)
-			 )
-		 ) {
-		kperm = key->perm >> 8;
-	}
-	else {
-		kperm = key->perm;
-	}
-
-	kperm = kperm & perm & KEY_ALL;
-
-	return kperm == perm;
-
+	return key_task_permission(key_ref, current, perm);
 }
 
 extern key_ref_t lookup_user_key(struct task_struct *context,
diff --git a/security/keys/Makefile b/security/keys/Makefile
index c392d750b208..5145adfb6a05 100644
--- a/security/keys/Makefile
+++ b/security/keys/Makefile
@@ -6,6 +6,7 @@ obj-y := \
 	key.o \
 	keyring.o \
 	keyctl.o \
+	permission.o \
 	process_keys.o \
 	request_key.o \
 	request_key_auth.o \
diff --git a/security/keys/permission.c b/security/keys/permission.c
new file mode 100644
index 000000000000..1c3651670ce9
--- /dev/null
+++ b/security/keys/permission.c
@@ -0,0 +1,70 @@
+/* permission.c: key permission determination
+ *
+ * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include "internal.h"
+
+/*****************************************************************************/
+/*
+ * check to see whether permission is granted to use a key in the desired way,
+ * but permit the security modules to override
+ */
+int key_task_permission(const key_ref_t key_ref,
+			struct task_struct *context,
+			key_perm_t perm)
+{
+	struct key *key;
+	key_perm_t kperm;
+	int ret;
+
+	key = key_ref_to_ptr(key_ref);
+
+	/* use the top 8-bits of permissions for keys the caller possesses */
+	if (is_key_possessed(key_ref)) {
+		kperm = key->perm >> 24;
+		goto use_these_perms;
+	}
+
+	/* use the second 8-bits of permissions for keys the caller owns */
+	if (key->uid == context->fsuid) {
+		kperm = key->perm >> 16;
+		goto use_these_perms;
+	}
+
+	/* use the third 8-bits of permissions for keys the caller has a group
+	 * membership in common with */
+	if (key->gid != -1 && key->perm & KEY_GRP_ALL) {
+		if (key->gid == context->fsgid) {
+			kperm = key->perm >> 8;
+			goto use_these_perms;
+		}
+
+		task_lock(context);
+		ret = groups_search(context->group_info, key->gid);
+		task_unlock(context);
+
+		if (ret) {
+			kperm = key->perm >> 8;
+			goto use_these_perms;
+		}
+	}
+
+	/* otherwise use the least-significant 8-bits */
+	kperm = key->perm;
+
+use_these_perms:
+	kperm = kperm & perm & KEY_ALL;
+
+	return kperm == perm;
+
+} /* end key_task_permission() */
+
+EXPORT_SYMBOL(key_task_permission);
-- 
cgit v1.2.3


From dd0fc66fb33cd610bc1a5db8a5e232d34879b4d7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Fri, 7 Oct 2005 07:46:04 +0100
Subject: [PATCH] gfp flags annotations - part 1

 - added typedef unsigned int __nocast gfp_t;

 - replaced __nocast uses for gfp flags with gfp_t - it gives exactly
   the same warnings as far as sparse is concerned, doesn't change
   generated code (from gcc point of view we replaced unsigned int with
   typedef) and documents what's going on far better.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/cris/arch-v32/drivers/pci/dma.c |  2 +-
 arch/i386/kernel/pci-dma.c           |  2 +-
 arch/ppc64/kernel/bpa_iommu.c        |  2 +-
 arch/ppc64/kernel/dma.c              |  2 +-
 arch/ppc64/kernel/iommu.c            |  2 +-
 arch/ppc64/kernel/pci_direct_iommu.c |  2 +-
 arch/ppc64/kernel/pci_iommu.c        |  2 +-
 arch/ppc64/kernel/vio.c              |  2 +-
 drivers/atm/ambassador.c             |  2 +-
 drivers/atm/firestream.c             |  5 ++---
 drivers/atm/fore200e.c               |  2 +-
 drivers/base/dmapool.c               |  5 ++---
 drivers/block/pktcdvd.c              |  4 ++--
 drivers/bluetooth/bpa10x.c           |  2 +-
 drivers/bluetooth/hci_usb.c          |  2 +-
 drivers/connector/connector.c        |  3 +--
 drivers/ieee1394/raw1394.c           |  2 +-
 drivers/infiniband/core/mad.c        |  2 +-
 drivers/infiniband/core/sa_query.c   |  6 +++---
 drivers/md/dm-crypt.c                |  2 +-
 drivers/md/dm-io.c                   |  2 +-
 drivers/md/dm-raid1.c                |  2 +-
 drivers/md/multipath.c               |  2 +-
 drivers/md/raid1.c                   |  4 ++--
 drivers/md/raid10.c                  |  4 ++--
 drivers/net/bonding/bond_main.c      |  2 +-
 drivers/net/ns83820.c                |  2 +-
 drivers/net/sungem.h                 |  2 +-
 drivers/s390/scsi/zfcp_aux.c         |  2 +-
 fs/bio.c                             | 10 +++++-----
 fs/buffer.c                          |  2 +-
 fs/mpage.c                           |  2 +-
 fs/ntfs/malloc.h                     |  2 +-
 fs/posix_acl.c                       |  6 +++---
 fs/xfs/linux-2.6/kmem.c              | 10 +++++-----
 fs/xfs/linux-2.6/kmem.h              | 13 ++++++-------
 include/asm-generic/dma-mapping.h    |  4 ++--
 include/asm-i386/dma-mapping.h       |  2 +-
 include/asm-ppc/dma-mapping.h        |  2 +-
 include/asm-ppc64/dma-mapping.h      |  4 ++--
 include/asm-ppc64/iommu.h            |  2 +-
 include/linux/atmdev.h               |  2 +-
 include/linux/bio.h                  |  6 +++---
 include/linux/buffer_head.h          |  2 +-
 include/linux/connector.h            |  2 +-
 include/linux/cpuset.h               |  5 ++---
 include/linux/dmapool.h              |  2 +-
 include/linux/gfp.h                  | 14 +++++++-------
 include/linux/jbd.h                  |  2 +-
 include/linux/kfifo.h                |  4 ++--
 include/linux/mempool.h              |  9 ++++-----
 include/linux/netlink.h              |  2 +-
 include/linux/pagemap.h              |  2 +-
 include/linux/posix_acl.h            |  6 +++---
 include/linux/radix-tree.h           |  2 +-
 include/linux/security.h             |  6 ++----
 include/linux/skbuff.h               | 28 ++++++++++++++--------------
 include/linux/slab.h                 | 19 +++++++++----------
 include/linux/string.h               |  2 +-
 include/linux/swap.h                 |  2 +-
 include/linux/textsearch.h           |  2 +-
 include/linux/types.h                |  4 ++++
 include/linux/vmalloc.h              |  4 ++--
 include/net/bluetooth/bluetooth.h    |  2 +-
 include/net/bluetooth/rfcomm.h       |  2 +-
 include/net/dn_nsp.h                 |  8 ++++----
 include/net/dn_route.h               |  2 +-
 include/net/inet_connection_sock.h   |  2 +-
 include/net/ip_vs.h                  |  2 +-
 include/net/llc_conn.h               |  2 +-
 include/net/sctp/sctp.h              |  2 +-
 include/net/sctp/sm.h                | 10 +++++-----
 include/net/sctp/structs.h           | 24 ++++++++++++------------
 include/net/sctp/ulpevent.h          | 16 ++++++++--------
 include/net/sctp/ulpqueue.h          | 11 ++++-------
 include/net/sock.h                   | 16 ++++++++--------
 include/net/tcp.h                    |  3 +--
 include/net/xfrm.h                   |  2 +-
 include/rdma/ib_mad.h                |  2 +-
 include/rdma/ib_sa.h                 | 10 +++++-----
 include/rxrpc/call.h                 |  2 +-
 include/rxrpc/message.h              |  2 +-
 include/sound/core.h                 |  8 ++++----
 include/sound/driver.h               |  2 +-
 kernel/audit.c                       |  2 +-
 kernel/cpuset.c                      |  2 +-
 kernel/kfifo.c                       |  4 ++--
 kernel/signal.c                      |  2 +-
 lib/radix-tree.c                     |  2 +-
 lib/ts_bm.c                          |  2 +-
 lib/ts_fsm.c                         |  2 +-
 lib/ts_kmp.c                         |  2 +-
 mm/highmem.c                         |  2 +-
 mm/mempolicy.c                       |  8 ++++----
 mm/mempool.c                         |  6 +++---
 mm/nommu.c                           |  3 +--
 mm/oom_kill.c                        |  2 +-
 mm/page_alloc.c                      | 12 ++++++------
 mm/page_io.c                         |  2 +-
 mm/shmem.c                           |  3 +--
 mm/slab.c                            | 34 ++++++++++++++++------------------
 mm/swap_state.c                      |  2 +-
 mm/vmalloc.c                         |  4 ++--
 net/atm/atm_misc.c                   |  2 +-
 net/bluetooth/l2cap.c                |  2 +-
 net/bluetooth/rfcomm/core.c          |  2 +-
 net/bluetooth/rfcomm/sock.c          |  2 +-
 net/bluetooth/rfcomm/tty.c           |  2 +-
 net/bluetooth/sco.c                  |  2 +-
 net/core/dev.c                       |  2 +-
 net/core/skbuff.c                    | 14 +++++++-------
 net/core/sock.c                      | 10 +++++-----
 net/dccp/ackvec.c                    |  2 +-
 net/dccp/ackvec.h                    |  4 ++--
 net/dccp/ccids/lib/loss_interval.h   |  2 +-
 net/dccp/ccids/lib/packet_history.h  |  4 ++--
 net/decnet/af_decnet.c               |  6 ++----
 net/decnet/dn_nsp_out.c              | 20 +++++++++-----------
 net/ieee80211/ieee80211_tx.c         |  2 +-
 net/ipv4/inet_connection_sock.c      |  2 +-
 net/ipv4/ipvs/ip_vs_app.c            |  2 +-
 net/ipv4/tcp_output.c                |  2 +-
 net/key/af_key.c                     |  6 +++---
 net/llc/llc_conn.c                   |  3 +--
 net/netfilter/nfnetlink.c            |  3 +--
 net/netlink/af_netlink.c             |  4 ++--
 net/rxrpc/call.c                     |  2 +-
 net/rxrpc/connection.c               |  2 +-
 net/sctp/associola.c                 | 10 +++++-----
 net/sctp/bind_addr.c                 | 12 ++++++------
 net/sctp/chunk.c                     |  2 +-
 net/sctp/endpointola.c               |  5 ++---
 net/sctp/protocol.c                  |  2 +-
 net/sctp/sm_make_chunk.c             | 14 +++++++-------
 net/sctp/sm_sideeffect.c             | 12 ++++++------
 net/sctp/ssnmap.c                    |  2 +-
 net/sctp/transport.c                 |  4 ++--
 net/sctp/ulpevent.c                  | 18 +++++++++---------
 net/sctp/ulpqueue.c                  |  8 ++++----
 net/sunrpc/sched.c                   |  2 +-
 net/xfrm/xfrm_policy.c               |  2 +-
 sound/core/memalloc.c                |  2 +-
 sound/core/memory.c                  | 10 +++++-----
 sound/core/seq/instr/ainstr_iw.c     |  2 +-
 sound/core/wrappers.c                |  2 +-
 145 files changed, 340 insertions(+), 360 deletions(-)

(limited to 'include')

diff --git a/arch/cris/arch-v32/drivers/pci/dma.c b/arch/cris/arch-v32/drivers/pci/dma.c
index 10329306d23c..426b09878a05 100644
--- a/arch/cris/arch-v32/drivers/pci/dma.c
+++ b/arch/cris/arch-v32/drivers/pci/dma.c
@@ -24,7 +24,7 @@ struct dma_coherent_mem {
 };
 
 void *dma_alloc_coherent(struct device *dev, size_t size,
-			   dma_addr_t *dma_handle, unsigned int __nocast gfp)
+			   dma_addr_t *dma_handle, gfp_t gfp)
 {
 	void *ret;
 	struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
diff --git a/arch/i386/kernel/pci-dma.c b/arch/i386/kernel/pci-dma.c
index 1e51427cc9eb..25fe66853934 100644
--- a/arch/i386/kernel/pci-dma.c
+++ b/arch/i386/kernel/pci-dma.c
@@ -23,7 +23,7 @@ struct dma_coherent_mem {
 };
 
 void *dma_alloc_coherent(struct device *dev, size_t size,
-			   dma_addr_t *dma_handle, unsigned int __nocast gfp)
+			   dma_addr_t *dma_handle, gfp_t gfp)
 {
 	void *ret;
 	struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
diff --git a/arch/ppc64/kernel/bpa_iommu.c b/arch/ppc64/kernel/bpa_iommu.c
index 507eb9d0223f..5f2460090e03 100644
--- a/arch/ppc64/kernel/bpa_iommu.c
+++ b/arch/ppc64/kernel/bpa_iommu.c
@@ -310,7 +310,7 @@ static void bpa_map_iommu(void)
 
 
 static void *bpa_alloc_coherent(struct device *hwdev, size_t size,
-			   dma_addr_t *dma_handle, unsigned int __nocast flag)
+			   dma_addr_t *dma_handle, gfp_t flag)
 {
 	void *ret;
 
diff --git a/arch/ppc64/kernel/dma.c b/arch/ppc64/kernel/dma.c
index 4da8e31b2b61..7c3419656ccc 100644
--- a/arch/ppc64/kernel/dma.c
+++ b/arch/ppc64/kernel/dma.c
@@ -53,7 +53,7 @@ int dma_set_mask(struct device *dev, u64 dma_mask)
 EXPORT_SYMBOL(dma_set_mask);
 
 void *dma_alloc_coherent(struct device *dev, size_t size,
-		dma_addr_t *dma_handle, unsigned int __nocast flag)
+		dma_addr_t *dma_handle, gfp_t flag)
 {
 	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
 
diff --git a/arch/ppc64/kernel/iommu.c b/arch/ppc64/kernel/iommu.c
index 9032b6bfe036..4d9b4388918b 100644
--- a/arch/ppc64/kernel/iommu.c
+++ b/arch/ppc64/kernel/iommu.c
@@ -519,7 +519,7 @@ void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle,
  * to the dma address (mapping) of the first page.
  */
 void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size,
-		dma_addr_t *dma_handle, unsigned int __nocast flag)
+		dma_addr_t *dma_handle, gfp_t flag)
 {
 	void *ret = NULL;
 	dma_addr_t mapping;
diff --git a/arch/ppc64/kernel/pci_direct_iommu.c b/arch/ppc64/kernel/pci_direct_iommu.c
index b8f7f58824f4..54055c81017a 100644
--- a/arch/ppc64/kernel/pci_direct_iommu.c
+++ b/arch/ppc64/kernel/pci_direct_iommu.c
@@ -31,7 +31,7 @@
 #include "pci.h"
 
 static void *pci_direct_alloc_coherent(struct device *hwdev, size_t size,
-				   dma_addr_t *dma_handle, unsigned int __nocast flag)
+				   dma_addr_t *dma_handle, gfp_t flag)
 {
 	void *ret;
 
diff --git a/arch/ppc64/kernel/pci_iommu.c b/arch/ppc64/kernel/pci_iommu.c
index 14647e09c9cd..d9e33b7d4203 100644
--- a/arch/ppc64/kernel/pci_iommu.c
+++ b/arch/ppc64/kernel/pci_iommu.c
@@ -76,7 +76,7 @@ static inline struct iommu_table *devnode_table(struct device *dev)
  * to the dma address (mapping) of the first page.
  */
 static void *pci_iommu_alloc_coherent(struct device *hwdev, size_t size,
-			   dma_addr_t *dma_handle, unsigned int __nocast flag)
+			   dma_addr_t *dma_handle, gfp_t flag)
 {
 	return iommu_alloc_coherent(devnode_table(hwdev), size, dma_handle,
 			flag);
diff --git a/arch/ppc64/kernel/vio.c b/arch/ppc64/kernel/vio.c
index c90e1dd875ce..0e555b7a6587 100644
--- a/arch/ppc64/kernel/vio.c
+++ b/arch/ppc64/kernel/vio.c
@@ -218,7 +218,7 @@ static void vio_unmap_sg(struct device *dev, struct scatterlist *sglist,
 }
 
 static void *vio_alloc_coherent(struct device *dev, size_t size,
-			   dma_addr_t *dma_handle, unsigned int __nocast flag)
+			   dma_addr_t *dma_handle, gfp_t flag)
 {
 	return iommu_alloc_coherent(to_vio_dev(dev)->iommu_table, size,
 			dma_handle, flag);
diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c
index d74a7c5e75dd..4b6bf19c39c0 100644
--- a/drivers/atm/ambassador.c
+++ b/drivers/atm/ambassador.c
@@ -795,7 +795,7 @@ static void drain_rx_pools (amb_dev * dev) {
 }
 
 static inline void fill_rx_pool (amb_dev * dev, unsigned char pool,
-                                 unsigned int __nocast priority)
+                                 gfp_t priority)
 {
   rx_in rx;
   amb_rxq * rxq;
diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c
index 58219744f5db..7f7ec288824d 100644
--- a/drivers/atm/firestream.c
+++ b/drivers/atm/firestream.c
@@ -1374,8 +1374,7 @@ static void reset_chip (struct fs_dev *dev)
 	}
 }
 
-static void __devinit *aligned_kmalloc (int size, unsigned int __nocast flags,
-					int alignment)
+static void __devinit *aligned_kmalloc (int size, gfp_t flags, int alignment)
 {
 	void  *t;
 
@@ -1466,7 +1465,7 @@ static inline int nr_buffers_in_freepool (struct fs_dev *dev, struct freepool *f
    working again after that...  -- REW */
 
 static void top_off_fp (struct fs_dev *dev, struct freepool *fp,
-			unsigned int __nocast gfp_flags)
+			gfp_t gfp_flags)
 {
 	struct FS_BPENTRY *qe, *ne;
 	struct sk_buff *skb;
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index 6f1a83c9d9e0..14f6a6201da3 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -178,7 +178,7 @@ fore200e_irq_itoa(int irq)
 
 
 static void*
-fore200e_kmalloc(int size, unsigned int __nocast flags)
+fore200e_kmalloc(int size, gfp_t flags)
 {
     void *chunk = kzalloc(size, flags);
 
diff --git a/drivers/base/dmapool.c b/drivers/base/dmapool.c
index 60a7ef6a201b..e2f64f91ed05 100644
--- a/drivers/base/dmapool.c
+++ b/drivers/base/dmapool.c
@@ -156,7 +156,7 @@ dma_pool_create (const char *name, struct device *dev,
 
 
 static struct dma_page *
-pool_alloc_page (struct dma_pool *pool, unsigned int __nocast mem_flags)
+pool_alloc_page (struct dma_pool *pool, gfp_t mem_flags)
 {
 	struct dma_page	*page;
 	int		mapsize;
@@ -262,8 +262,7 @@ dma_pool_destroy (struct dma_pool *pool)
  * If such a memory block can't be allocated, null is returned.
  */
 void *
-dma_pool_alloc (struct dma_pool *pool, unsigned int __nocast mem_flags,
-		dma_addr_t *handle)
+dma_pool_alloc (struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle)
 {
 	unsigned long		flags;
 	struct dma_page		*page;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 7e22a58926b8..a280e679b1ca 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -229,7 +229,7 @@ static int pkt_grow_pktlist(struct pktcdvd_device *pd, int nr_packets)
 	return 1;
 }
 
-static void *pkt_rb_alloc(unsigned int __nocast gfp_mask, void *data)
+static void *pkt_rb_alloc(gfp_t gfp_mask, void *data)
 {
 	return kmalloc(sizeof(struct pkt_rb_node), gfp_mask);
 }
@@ -2082,7 +2082,7 @@ static int pkt_close(struct inode *inode, struct file *file)
 }
 
 
-static void *psd_pool_alloc(unsigned int __nocast gfp_mask, void *data)
+static void *psd_pool_alloc(gfp_t gfp_mask, void *data)
 {
 	return kmalloc(sizeof(struct packet_stacked_data), gfp_mask);
 }
diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c
index a1bf8f066c88..4fa85234d8b5 100644
--- a/drivers/bluetooth/bpa10x.c
+++ b/drivers/bluetooth/bpa10x.c
@@ -308,7 +308,7 @@ unlock:
 }
 
 static inline struct urb *bpa10x_alloc_urb(struct usb_device *udev, unsigned int pipe,
-					size_t size, unsigned int __nocast flags, void *data)
+					size_t size, gfp_t flags, void *data)
 {
 	struct urb *urb;
 	struct usb_ctrlrequest *cr;
diff --git a/drivers/bluetooth/hci_usb.c b/drivers/bluetooth/hci_usb.c
index 57c48bbf6fe6..6756cb20b753 100644
--- a/drivers/bluetooth/hci_usb.c
+++ b/drivers/bluetooth/hci_usb.c
@@ -132,7 +132,7 @@ static struct usb_device_id blacklist_ids[] = {
 	{ }	/* Terminating entry */
 };
 
-static struct _urb *_urb_alloc(int isoc, unsigned int __nocast gfp)
+static struct _urb *_urb_alloc(int isoc, gfp_t gfp)
 {
 	struct _urb *_urb = kmalloc(sizeof(struct _urb) +
 				sizeof(struct usb_iso_packet_descriptor) * isoc, gfp);
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 1422285d537c..505677fb3157 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -69,8 +69,7 @@ int cn_already_initialized = 0;
  * a new message.
  *
  */
-int cn_netlink_send(struct cn_msg *msg, u32 __group,
-		    unsigned int __nocast gfp_mask)
+int cn_netlink_send(struct cn_msg *msg, u32 __group, gfp_t gfp_mask)
 {
 	struct cn_callback_entry *__cbq;
 	unsigned int size;
diff --git a/drivers/ieee1394/raw1394.c b/drivers/ieee1394/raw1394.c
index 5fe4f2ba0979..315f5ca8bedb 100644
--- a/drivers/ieee1394/raw1394.c
+++ b/drivers/ieee1394/raw1394.c
@@ -98,7 +98,7 @@ static struct hpsb_address_ops arm_ops = {
 
 static void queue_complete_cb(struct pending_request *req);
 
-static struct pending_request *__alloc_pending_request(unsigned int __nocast flags)
+static struct pending_request *__alloc_pending_request(gfp_t flags)
 {
 	struct pending_request *req;
 
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index a4a4d9c1eef3..a14ca87fda18 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -783,7 +783,7 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
 					    u32 remote_qpn, u16 pkey_index,
 					    struct ib_ah *ah, int rmpp_active,
 					    int hdr_len, int data_len,
-					    unsigned int __nocast gfp_mask)
+					    gfp_t gfp_mask)
 {
 	struct ib_mad_agent_private *mad_agent_priv;
 	struct ib_mad_send_buf *send_buf;
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 78de2dd1a4f2..262618210c1c 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -574,7 +574,7 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
 int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
 		       struct ib_sa_path_rec *rec,
 		       ib_sa_comp_mask comp_mask,
-		       int timeout_ms, unsigned int __nocast gfp_mask,
+		       int timeout_ms, gfp_t gfp_mask,
 		       void (*callback)(int status,
 					struct ib_sa_path_rec *resp,
 					void *context),
@@ -676,7 +676,7 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
 int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
 			    struct ib_sa_service_rec *rec,
 			    ib_sa_comp_mask comp_mask,
-			    int timeout_ms, unsigned int __nocast gfp_mask,
+			    int timeout_ms, gfp_t gfp_mask,
 			    void (*callback)(int status,
 					     struct ib_sa_service_rec *resp,
 					     void *context),
@@ -759,7 +759,7 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
 			     u8 method,
 			     struct ib_sa_mcmember_rec *rec,
 			     ib_sa_comp_mask comp_mask,
-			     int timeout_ms, unsigned int __nocast gfp_mask,
+			     int timeout_ms, gfp_t gfp_mask,
 			     void (*callback)(int status,
 					      struct ib_sa_mcmember_rec *resp,
 					      void *context),
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index b82bc3150476..b6148f6f7836 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -96,7 +96,7 @@ static kmem_cache_t *_crypt_io_pool;
 /*
  * Mempool alloc and free functions for the page
  */
-static void *mempool_alloc_page(unsigned int __nocast gfp_mask, void *data)
+static void *mempool_alloc_page(gfp_t gfp_mask, void *data)
 {
 	return alloc_page(gfp_mask);
 }
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 9de000131a8a..4809b209fbb1 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -32,7 +32,7 @@ struct io {
 static unsigned _num_ios;
 static mempool_t *_io_pool;
 
-static void *alloc_io(unsigned int __nocast gfp_mask, void *pool_data)
+static void *alloc_io(gfp_t gfp_mask, void *pool_data)
 {
 	return kmalloc(sizeof(struct io), gfp_mask);
 }
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 863282513753..2375709a392c 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -122,7 +122,7 @@ static inline sector_t region_to_sector(struct region_hash *rh, region_t region)
 /* FIXME move this */
 static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw);
 
-static void *region_alloc(unsigned int __nocast gfp_mask, void *pool_data)
+static void *region_alloc(gfp_t gfp_mask, void *pool_data)
 {
 	return kmalloc(sizeof(struct region), gfp_mask);
 }
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 286342375fb7..1151c3ed3006 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -38,7 +38,7 @@
 static mdk_personality_t multipath_personality;
 
 
-static void *mp_pool_alloc(unsigned int __nocast gfp_flags, void *data)
+static void *mp_pool_alloc(gfp_t gfp_flags, void *data)
 {
 	struct multipath_bh *mpb;
 	mpb = kmalloc(sizeof(*mpb), gfp_flags);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a93ca478142a..0e1f148dd41d 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -52,7 +52,7 @@ static mdk_personality_t raid1_personality;
 static void unplug_slaves(mddev_t *mddev);
 
 
-static void * r1bio_pool_alloc(unsigned int __nocast gfp_flags, void *data)
+static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
 {
 	struct pool_info *pi = data;
 	r1bio_t *r1_bio;
@@ -79,7 +79,7 @@ static void r1bio_pool_free(void *r1_bio, void *data)
 #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
 #define RESYNC_WINDOW (2048*1024)
 
-static void * r1buf_pool_alloc(unsigned int __nocast gfp_flags, void *data)
+static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
 {
 	struct pool_info *pi = data;
 	struct page *page;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 5bd1e9ec899d..28dd028415e4 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -47,7 +47,7 @@
 
 static void unplug_slaves(mddev_t *mddev);
 
-static void * r10bio_pool_alloc(unsigned int __nocast gfp_flags, void *data)
+static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
 {
 	conf_t *conf = data;
 	r10bio_t *r10_bio;
@@ -81,7 +81,7 @@ static void r10bio_pool_free(void *r10_bio, void *data)
  * one for write (we recover only one drive per r10buf)
  *
  */
-static void * r10buf_pool_alloc(unsigned int __nocast gfp_flags, void *data)
+static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
 {
 	conf_t *conf = data;
 	struct page *page;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index f0a5b772a386..f264ff162979 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1290,7 +1290,7 @@ static void bond_mc_list_destroy(struct bonding *bond)
  * Copy all the Multicast addresses from src to the bonding device dst
  */
 static int bond_mc_list_copy(struct dev_mc_list *mc_list, struct bonding *bond,
-			     unsigned int __nocast gfp_flag)
+			     gfp_t gfp_flag)
 {
 	struct dev_mc_list *dmi, *new_dmi;
 
diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c
index 83334db2921c..e4811b42a6b7 100644
--- a/drivers/net/ns83820.c
+++ b/drivers/net/ns83820.c
@@ -584,7 +584,7 @@ static inline int ns83820_add_rx_skb(struct ns83820 *dev, struct sk_buff *skb)
 	return 0;
 }
 
-static inline int rx_refill(struct net_device *ndev, unsigned int __nocast gfp)
+static inline int rx_refill(struct net_device *ndev, gfp_t gfp)
 {
 	struct ns83820 *dev = PRIV(ndev);
 	unsigned i;
diff --git a/drivers/net/sungem.h b/drivers/net/sungem.h
index 16edbb1a4a7a..13006d759ad8 100644
--- a/drivers/net/sungem.h
+++ b/drivers/net/sungem.h
@@ -1036,7 +1036,7 @@ struct gem {
 #define ALIGNED_RX_SKB_ADDR(addr) \
         ((((unsigned long)(addr) + (64UL - 1UL)) & ~(64UL - 1UL)) - (unsigned long)(addr))
 static __inline__ struct sk_buff *gem_alloc_skb(int size,
-						unsigned int __nocast gfp_flags)
+						gfp_t gfp_flags)
 {
 	struct sk_buff *skb = alloc_skb(size + 64, gfp_flags);
 
diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c
index 0b5087f7cabc..cab098556b44 100644
--- a/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@ -833,7 +833,7 @@ zfcp_unit_dequeue(struct zfcp_unit *unit)
 }
 
 static void *
-zfcp_mempool_alloc(unsigned int __nocast gfp_mask, void *size)
+zfcp_mempool_alloc(gfp_t gfp_mask, void *size)
 {
 	return kmalloc((size_t) size, gfp_mask);
 }
diff --git a/fs/bio.c b/fs/bio.c
index 83a349574567..7d81a93afd48 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -75,7 +75,7 @@ struct bio_set {
  */
 static struct bio_set *fs_bio_set;
 
-static inline struct bio_vec *bvec_alloc_bs(unsigned int __nocast gfp_mask, int nr, unsigned long *idx, struct bio_set *bs)
+static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs)
 {
 	struct bio_vec *bvl;
 	struct biovec_slab *bp;
@@ -155,7 +155,7 @@ inline void bio_init(struct bio *bio)
  *   allocate bio and iovecs from the memory pools specified by the
  *   bio_set structure.
  **/
-struct bio *bio_alloc_bioset(unsigned int __nocast gfp_mask, int nr_iovecs, struct bio_set *bs)
+struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 {
 	struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask);
 
@@ -181,7 +181,7 @@ out:
 	return bio;
 }
 
-struct bio *bio_alloc(unsigned int __nocast gfp_mask, int nr_iovecs)
+struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
 {
 	struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
 
@@ -277,7 +277,7 @@ inline void __bio_clone(struct bio *bio, struct bio *bio_src)
  *
  * 	Like __bio_clone, only also allocates the returned bio
  */
-struct bio *bio_clone(struct bio *bio, unsigned int __nocast gfp_mask)
+struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
 {
 	struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set);
 
@@ -1078,7 +1078,7 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
 	return bp;
 }
 
-static void *bio_pair_alloc(unsigned int __nocast gfp_flags, void *data)
+static void *bio_pair_alloc(gfp_t gfp_flags, void *data)
 {
 	return kmalloc(sizeof(struct bio_pair), gfp_flags);
 }
diff --git a/fs/buffer.c b/fs/buffer.c
index 6cbfceabd95d..1216c0d3c8ce 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3045,7 +3045,7 @@ static void recalc_bh_state(void)
 	buffer_heads_over_limit = (tot > max_buffer_heads);
 }
 	
-struct buffer_head *alloc_buffer_head(unsigned int __nocast gfp_flags)
+struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
 {
 	struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags);
 	if (ret) {
diff --git a/fs/mpage.c b/fs/mpage.c
index bb9aebe93862..c5adcdddf3cc 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -102,7 +102,7 @@ static struct bio *mpage_bio_submit(int rw, struct bio *bio)
 static struct bio *
 mpage_alloc(struct block_device *bdev,
 		sector_t first_sector, int nr_vecs,
-		unsigned int __nocast gfp_flags)
+		gfp_t gfp_flags)
 {
 	struct bio *bio;
 
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
index 006946efca8c..590887b943f5 100644
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
@@ -40,7 +40,7 @@
  * Depending on @gfp_mask the allocation may be guaranteed to succeed.
  */
 static inline void *__ntfs_malloc(unsigned long size,
-		unsigned int __nocast gfp_mask)
+		gfp_t gfp_mask)
 {
 	if (likely(size <= PAGE_SIZE)) {
 		BUG_ON(!size);
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 296480e96dd5..6c8dcf7613fd 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -35,7 +35,7 @@ EXPORT_SYMBOL(posix_acl_permission);
  * Allocate a new ACL with the specified number of entries.
  */
 struct posix_acl *
-posix_acl_alloc(int count, unsigned int __nocast flags)
+posix_acl_alloc(int count, gfp_t flags)
 {
 	const size_t size = sizeof(struct posix_acl) +
 	                    count * sizeof(struct posix_acl_entry);
@@ -51,7 +51,7 @@ posix_acl_alloc(int count, unsigned int __nocast flags)
  * Clone an ACL.
  */
 struct posix_acl *
-posix_acl_clone(const struct posix_acl *acl, unsigned int __nocast flags)
+posix_acl_clone(const struct posix_acl *acl, gfp_t flags)
 {
 	struct posix_acl *clone = NULL;
 
@@ -185,7 +185,7 @@ posix_acl_equiv_mode(const struct posix_acl *acl, mode_t *mode_p)
  * Create an ACL representing the file mode permission bits of an inode.
  */
 struct posix_acl *
-posix_acl_from_mode(mode_t mode, unsigned int __nocast flags)
+posix_acl_from_mode(mode_t mode, gfp_t flags)
 {
 	struct posix_acl *acl = posix_acl_alloc(3, flags);
 	if (!acl)
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 4b184559f231..d2653b589b1c 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -45,7 +45,7 @@
 
 
 void *
-kmem_alloc(size_t size, unsigned int __nocast flags)
+kmem_alloc(size_t size, gfp_t flags)
 {
 	int		retries = 0;
 	unsigned int	lflags = kmem_flags_convert(flags);
@@ -67,7 +67,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
 }
 
 void *
-kmem_zalloc(size_t size, unsigned int __nocast flags)
+kmem_zalloc(size_t size, gfp_t flags)
 {
 	void	*ptr;
 
@@ -90,7 +90,7 @@ kmem_free(void *ptr, size_t size)
 
 void *
 kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
-	     unsigned int __nocast flags)
+	     gfp_t flags)
 {
 	void	*new;
 
@@ -105,7 +105,7 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
 }
 
 void *
-kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
+kmem_zone_alloc(kmem_zone_t *zone, gfp_t flags)
 {
 	int		retries = 0;
 	unsigned int	lflags = kmem_flags_convert(flags);
@@ -124,7 +124,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
 }
 
 void *
-kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags)
+kmem_zone_zalloc(kmem_zone_t *zone, gfp_t flags)
 {
 	void	*ptr;
 
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 109fcf27e256..ee7010f085bc 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -81,7 +81,7 @@ typedef unsigned long xfs_pflags_t;
 	*(NSTATEP) = *(OSTATEP);	\
 } while (0)
 
-static __inline unsigned int kmem_flags_convert(unsigned int __nocast flags)
+static __inline unsigned int kmem_flags_convert(gfp_t flags)
 {
 	unsigned int	lflags = __GFP_NOWARN;	/* we'll report problems, if need be */
 
@@ -125,13 +125,12 @@ kmem_zone_destroy(kmem_zone_t *zone)
 		BUG();
 }
 
-extern void	    *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
-extern void	    *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
+extern void	    *kmem_zone_zalloc(kmem_zone_t *, gfp_t);
+extern void	    *kmem_zone_alloc(kmem_zone_t *, gfp_t);
 
-extern void	    *kmem_alloc(size_t, unsigned int __nocast);
-extern void	    *kmem_realloc(void *, size_t, size_t,
-				  unsigned int __nocast);
-extern void	    *kmem_zalloc(size_t, unsigned int __nocast);
+extern void	    *kmem_alloc(size_t, gfp_t);
+extern void	    *kmem_realloc(void *, size_t, size_t, gfp_t);
+extern void	    *kmem_zalloc(size_t, gfp_t);
 extern void         kmem_free(void *, size_t);
 
 typedef struct shrinker *kmem_shaker_t;
diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h
index 8cef663c5cd9..747d790295f3 100644
--- a/include/asm-generic/dma-mapping.h
+++ b/include/asm-generic/dma-mapping.h
@@ -35,7 +35,7 @@ dma_set_mask(struct device *dev, u64 dma_mask)
 
 static inline void *
 dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
-		   unsigned int __nocast flag)
+		   gfp_t flag)
 {
 	BUG_ON(dev->bus != &pci_bus_type);
 
@@ -168,7 +168,7 @@ dma_set_mask(struct device *dev, u64 dma_mask)
 
 static inline void *
 dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
-		   unsigned int __nocast flag)
+		   gfp_t flag)
 {
 	BUG();
 	return NULL;
diff --git a/include/asm-i386/dma-mapping.h b/include/asm-i386/dma-mapping.h
index 563964b2995b..e56c335f8ef9 100644
--- a/include/asm-i386/dma-mapping.h
+++ b/include/asm-i386/dma-mapping.h
@@ -11,7 +11,7 @@
 #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
 
 void *dma_alloc_coherent(struct device *dev, size_t size,
-			   dma_addr_t *dma_handle, unsigned int __nocast flag);
+			   dma_addr_t *dma_handle, gfp_t flag);
 
 void dma_free_coherent(struct device *dev, size_t size,
 			 void *vaddr, dma_addr_t dma_handle);
diff --git a/include/asm-ppc/dma-mapping.h b/include/asm-ppc/dma-mapping.h
index 92b8ee78dcc2..061bfcac1bf1 100644
--- a/include/asm-ppc/dma-mapping.h
+++ b/include/asm-ppc/dma-mapping.h
@@ -61,7 +61,7 @@ static inline int dma_set_mask(struct device *dev, u64 dma_mask)
 
 static inline void *dma_alloc_coherent(struct device *dev, size_t size,
 				       dma_addr_t * dma_handle,
-				       unsigned int __nocast gfp)
+				       gfp_t gfp)
 {
 #ifdef CONFIG_NOT_COHERENT_CACHE
 	return __dma_alloc_coherent(size, dma_handle, gfp);
diff --git a/include/asm-ppc64/dma-mapping.h b/include/asm-ppc64/dma-mapping.h
index 9ad8adee0067..fb68fa23bea8 100644
--- a/include/asm-ppc64/dma-mapping.h
+++ b/include/asm-ppc64/dma-mapping.h
@@ -19,7 +19,7 @@
 extern int dma_supported(struct device *dev, u64 mask);
 extern int dma_set_mask(struct device *dev, u64 dma_mask);
 extern void *dma_alloc_coherent(struct device *dev, size_t size,
-		dma_addr_t *dma_handle, unsigned int __nocast flag);
+		dma_addr_t *dma_handle, gfp_t flag);
 extern void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
 		dma_addr_t dma_handle);
 extern dma_addr_t dma_map_single(struct device *dev, void *cpu_addr,
@@ -118,7 +118,7 @@ dma_cache_sync(void *vaddr, size_t size,
  */
 struct dma_mapping_ops {
 	void *		(*alloc_coherent)(struct device *dev, size_t size,
-				dma_addr_t *dma_handle, unsigned int __nocast flag);
+				dma_addr_t *dma_handle, gfp_t flag);
 	void		(*free_coherent)(struct device *dev, size_t size,
 				void *vaddr, dma_addr_t dma_handle);
 	dma_addr_t	(*map_single)(struct device *dev, void *ptr,
diff --git a/include/asm-ppc64/iommu.h b/include/asm-ppc64/iommu.h
index 72dcf8116b04..c2f3b6e8a42f 100644
--- a/include/asm-ppc64/iommu.h
+++ b/include/asm-ppc64/iommu.h
@@ -122,7 +122,7 @@ extern void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
 		int nelems, enum dma_data_direction direction);
 
 extern void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size,
-		dma_addr_t *dma_handle, unsigned int __nocast flag);
+		dma_addr_t *dma_handle, gfp_t flag);
 extern void iommu_free_coherent(struct iommu_table *tbl, size_t size,
 		void *vaddr, dma_addr_t dma_handle);
 extern dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr,
diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index aca9b344bd35..e7d0593bb576 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -467,7 +467,7 @@ static inline void atm_dev_put(struct atm_dev *dev)
 
 int atm_charge(struct atm_vcc *vcc,int truesize);
 struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc,int pdu_size,
-    unsigned int __nocast gfp_flags);
+    gfp_t gfp_flags);
 int atm_pcr_goal(struct atm_trafprm *tp);
 
 void vcc_release_async(struct atm_vcc *vcc, int reply);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 6e1c79c8b6bf..3344b4e8e43a 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -276,8 +276,8 @@ extern void bio_pair_release(struct bio_pair *dbio);
 extern struct bio_set *bioset_create(int, int, int);
 extern void bioset_free(struct bio_set *);
 
-extern struct bio *bio_alloc(unsigned int __nocast, int);
-extern struct bio *bio_alloc_bioset(unsigned int __nocast, int, struct bio_set *);
+extern struct bio *bio_alloc(gfp_t, int);
+extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *);
 extern void bio_put(struct bio *);
 extern void bio_free(struct bio *, struct bio_set *);
 
@@ -287,7 +287,7 @@ extern int bio_phys_segments(struct request_queue *, struct bio *);
 extern int bio_hw_segments(struct request_queue *, struct bio *);
 
 extern void __bio_clone(struct bio *, struct bio *);
-extern struct bio *bio_clone(struct bio *, unsigned int __nocast);
+extern struct bio *bio_clone(struct bio *, gfp_t);
 
 extern void bio_init(struct bio *);
 
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 90828493791f..6a1d154c0825 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -172,7 +172,7 @@ void __brelse(struct buffer_head *);
 void __bforget(struct buffer_head *);
 void __breadahead(struct block_device *, sector_t block, int size);
 struct buffer_head *__bread(struct block_device *, sector_t block, int size);
-struct buffer_head *alloc_buffer_head(unsigned int __nocast gfp_flags);
+struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
 void free_buffer_head(struct buffer_head * bh);
 void FASTCALL(unlock_buffer(struct buffer_head *bh));
 void FASTCALL(__lock_buffer(struct buffer_head *bh));
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 96582c9911ac..95952cc1f525 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -149,7 +149,7 @@ struct cn_dev {
 
 int cn_add_callback(struct cb_id *, char *, void (*callback) (void *));
 void cn_del_callback(struct cb_id *);
-int cn_netlink_send(struct cn_msg *, u32, unsigned int __nocast);
+int cn_netlink_send(struct cn_msg *, u32, gfp_t);
 
 int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(void *));
 void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id);
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 24062a1dbf61..6e2deef96b34 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -23,7 +23,7 @@ void cpuset_init_current_mems_allowed(void);
 void cpuset_update_current_mems_allowed(void);
 void cpuset_restrict_to_mems_allowed(unsigned long *nodes);
 int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl);
-extern int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask);
+extern int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask);
 extern int cpuset_excl_nodes_overlap(const struct task_struct *p);
 extern struct file_operations proc_cpuset_operations;
 extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer);
@@ -49,8 +49,7 @@ static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
 	return 1;
 }
 
-static inline int cpuset_zone_allowed(struct zone *z,
-					unsigned int __nocast gfp_mask)
+static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
 {
 	return 1;
 }
diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h
index 4932ee5c77f0..76f12f46db7f 100644
--- a/include/linux/dmapool.h
+++ b/include/linux/dmapool.h
@@ -19,7 +19,7 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev,
 
 void dma_pool_destroy(struct dma_pool *pool);
 
-void *dma_pool_alloc(struct dma_pool *pool, unsigned int __nocast mem_flags,
+void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
 		     dma_addr_t *handle);
 
 void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t addr);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 4dc990f3b5cc..3010e172394d 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -85,9 +85,9 @@ static inline void arch_free_page(struct page *page, int order) { }
 #endif
 
 extern struct page *
-FASTCALL(__alloc_pages(unsigned int, unsigned int, struct zonelist *));
+FASTCALL(__alloc_pages(gfp_t, unsigned int, struct zonelist *));
 
-static inline struct page *alloc_pages_node(int nid, unsigned int __nocast gfp_mask,
+static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
 						unsigned int order)
 {
 	if (unlikely(order >= MAX_ORDER))
@@ -98,17 +98,17 @@ static inline struct page *alloc_pages_node(int nid, unsigned int __nocast gfp_m
 }
 
 #ifdef CONFIG_NUMA
-extern struct page *alloc_pages_current(unsigned int __nocast gfp_mask, unsigned order);
+extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order);
 
 static inline struct page *
-alloc_pages(unsigned int __nocast gfp_mask, unsigned int order)
+alloc_pages(gfp_t gfp_mask, unsigned int order)
 {
 	if (unlikely(order >= MAX_ORDER))
 		return NULL;
 
 	return alloc_pages_current(gfp_mask, order);
 }
-extern struct page *alloc_page_vma(unsigned __nocast gfp_mask,
+extern struct page *alloc_page_vma(gfp_t gfp_mask,
 			struct vm_area_struct *vma, unsigned long addr);
 #else
 #define alloc_pages(gfp_mask, order) \
@@ -117,8 +117,8 @@ extern struct page *alloc_page_vma(unsigned __nocast gfp_mask,
 #endif
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
 
-extern unsigned long FASTCALL(__get_free_pages(unsigned int __nocast gfp_mask, unsigned int order));
-extern unsigned long FASTCALL(get_zeroed_page(unsigned int __nocast gfp_mask));
+extern unsigned long FASTCALL(__get_free_pages(gfp_t gfp_mask, unsigned int order));
+extern unsigned long FASTCALL(get_zeroed_page(gfp_t gfp_mask));
 
 #define __get_free_page(gfp_mask) \
 		__get_free_pages((gfp_mask),0)
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index de097269bd7f..ff853b3173c6 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -935,7 +935,7 @@ void journal_put_journal_head(struct journal_head *jh);
  */
 extern kmem_cache_t *jbd_handle_cache;
 
-static inline handle_t *jbd_alloc_handle(unsigned int __nocast gfp_flags)
+static inline handle_t *jbd_alloc_handle(gfp_t gfp_flags)
 {
 	return kmem_cache_alloc(jbd_handle_cache, gfp_flags);
 }
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index c27cd428d269..48eccd865bd8 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -35,8 +35,8 @@ struct kfifo {
 };
 
 extern struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
-				unsigned int __nocast gfp_mask, spinlock_t *lock);
-extern struct kfifo *kfifo_alloc(unsigned int size, unsigned int __nocast gfp_mask,
+				gfp_t gfp_mask, spinlock_t *lock);
+extern struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask,
 				 spinlock_t *lock);
 extern void kfifo_free(struct kfifo *fifo);
 extern unsigned int __kfifo_put(struct kfifo *fifo,
diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index 796220ce47cc..f2427d7394b0 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -6,7 +6,7 @@
 
 #include <linux/wait.h>
 
-typedef void * (mempool_alloc_t)(unsigned int __nocast gfp_mask, void *pool_data);
+typedef void * (mempool_alloc_t)(gfp_t gfp_mask, void *pool_data);
 typedef void (mempool_free_t)(void *element, void *pool_data);
 
 typedef struct mempool_s {
@@ -26,17 +26,16 @@ extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
 extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
 			mempool_free_t *free_fn, void *pool_data, int nid);
 
-extern int mempool_resize(mempool_t *pool, int new_min_nr,
-			unsigned int __nocast gfp_mask);
+extern int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask);
 extern void mempool_destroy(mempool_t *pool);
-extern void * mempool_alloc(mempool_t *pool, unsigned int __nocast gfp_mask);
+extern void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask);
 extern void mempool_free(void *element, mempool_t *pool);
 
 /*
  * A mempool_alloc_t and mempool_free_t that get the memory from
  * a slab that is passed in through pool_data.
  */
-void *mempool_alloc_slab(unsigned int __nocast gfp_mask, void *pool_data);
+void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data);
 void mempool_free_slab(void *element, void *pool_data);
 
 #endif /* _LINUX_MEMPOOL_H */
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index bdebdc564506..ba25ca874c20 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -131,7 +131,7 @@ extern struct sock *netlink_kernel_create(int unit, unsigned int groups, void (*
 extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
 extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock);
 extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid,
-			     __u32 group, unsigned int __nocast allocation);
+			     __u32 group, gfp_t allocation);
 extern void netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code);
 extern int netlink_register_notifier(struct notifier_block *nb);
 extern int netlink_unregister_notifier(struct notifier_block *nb);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index d9a25647a295..acbf31c154f8 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -19,7 +19,7 @@
 #define	AS_EIO		(__GFP_BITS_SHIFT + 0)	/* IO error on async write */
 #define AS_ENOSPC	(__GFP_BITS_SHIFT + 1)	/* ENOSPC on async write */
 
-static inline unsigned int __nocast mapping_gfp_mask(struct address_space * mapping)
+static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
 {
 	return mapping->flags & __GFP_BITS_MASK;
 }
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index 4caedddaa033..4bc241290c24 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -71,11 +71,11 @@ posix_acl_release(struct posix_acl *acl)
 
 /* posix_acl.c */
 
-extern struct posix_acl *posix_acl_alloc(int, unsigned int __nocast);
-extern struct posix_acl *posix_acl_clone(const struct posix_acl *, unsigned int __nocast);
+extern struct posix_acl *posix_acl_alloc(int, gfp_t);
+extern struct posix_acl *posix_acl_clone(const struct posix_acl *, gfp_t);
 extern int posix_acl_valid(const struct posix_acl *);
 extern int posix_acl_permission(struct inode *, const struct posix_acl *, int);
-extern struct posix_acl *posix_acl_from_mode(mode_t, unsigned int __nocast);
+extern struct posix_acl *posix_acl_from_mode(mode_t, gfp_t);
 extern int posix_acl_equiv_mode(const struct posix_acl *, mode_t *);
 extern int posix_acl_create_masq(struct posix_acl *, mode_t *);
 extern int posix_acl_chmod_masq(struct posix_acl *, mode_t);
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 9c51917b1cce..045d4761febc 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -50,7 +50,7 @@ void *radix_tree_delete(struct radix_tree_root *, unsigned long);
 unsigned int
 radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 			unsigned long first_index, unsigned int max_items);
-int radix_tree_preload(unsigned int __nocast gfp_mask);
+int radix_tree_preload(gfp_t gfp_mask);
 void radix_tree_init(void);
 void *radix_tree_tag_set(struct radix_tree_root *root,
 			unsigned long index, int tag);
diff --git a/include/linux/security.h b/include/linux/security.h
index 0e43460d374e..627382e74057 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2634,8 +2634,7 @@ static inline int security_socket_getpeersec(struct socket *sock, char __user *o
 	return security_ops->socket_getpeersec(sock, optval, optlen, len);
 }
 
-static inline int security_sk_alloc(struct sock *sk, int family,
-				    unsigned int __nocast priority)
+static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority)
 {
 	return security_ops->sk_alloc_security(sk, family, priority);
 }
@@ -2752,8 +2751,7 @@ static inline int security_socket_getpeersec(struct socket *sock, char __user *o
 	return -ENOPROTOOPT;
 }
 
-static inline int security_sk_alloc(struct sock *sk, int family,
-				    unsigned int __nocast priority)
+static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority)
 {
 	return 0;
 }
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 466c879f82b8..8f5d9e7f8734 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -302,37 +302,37 @@ struct sk_buff {
 
 extern void	       __kfree_skb(struct sk_buff *skb);
 extern struct sk_buff *__alloc_skb(unsigned int size,
-				   unsigned int __nocast priority, int fclone);
+				   gfp_t priority, int fclone);
 static inline struct sk_buff *alloc_skb(unsigned int size,
-					unsigned int __nocast priority)
+					gfp_t priority)
 {
 	return __alloc_skb(size, priority, 0);
 }
 
 static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
-					       unsigned int __nocast priority)
+					       gfp_t priority)
 {
 	return __alloc_skb(size, priority, 1);
 }
 
 extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
 					    unsigned int size,
-					    unsigned int __nocast priority);
+					    gfp_t priority);
 extern void	       kfree_skbmem(struct sk_buff *skb);
 extern struct sk_buff *skb_clone(struct sk_buff *skb,
-				 unsigned int __nocast priority);
+				 gfp_t priority);
 extern struct sk_buff *skb_copy(const struct sk_buff *skb,
-				unsigned int __nocast priority);
+				gfp_t priority);
 extern struct sk_buff *pskb_copy(struct sk_buff *skb,
-				 unsigned int __nocast gfp_mask);
+				 gfp_t gfp_mask);
 extern int	       pskb_expand_head(struct sk_buff *skb,
 					int nhead, int ntail,
-					unsigned int __nocast gfp_mask);
+					gfp_t gfp_mask);
 extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
 					    unsigned int headroom);
 extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
 				       int newheadroom, int newtailroom,
-				       unsigned int __nocast priority);
+				       gfp_t priority);
 extern struct sk_buff *		skb_pad(struct sk_buff *skb, int pad);
 #define dev_kfree_skb(a)	kfree_skb(a)
 extern void	      skb_over_panic(struct sk_buff *skb, int len,
@@ -484,7 +484,7 @@ static inline int skb_shared(const struct sk_buff *skb)
  *	NULL is returned on a memory allocation failure.
  */
 static inline struct sk_buff *skb_share_check(struct sk_buff *skb,
-					      unsigned int __nocast pri)
+					      gfp_t pri)
 {
 	might_sleep_if(pri & __GFP_WAIT);
 	if (skb_shared(skb)) {
@@ -516,7 +516,7 @@ static inline struct sk_buff *skb_share_check(struct sk_buff *skb,
  *	%NULL is returned on a memory allocation failure.
  */
 static inline struct sk_buff *skb_unshare(struct sk_buff *skb,
-					  unsigned int __nocast pri)
+					  gfp_t pri)
 {
 	might_sleep_if(pri & __GFP_WAIT);
 	if (skb_cloned(skb)) {
@@ -1017,7 +1017,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
  *	%NULL is returned in there is no free memory.
  */
 static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
-					      unsigned int __nocast gfp_mask)
+					      gfp_t gfp_mask)
 {
 	struct sk_buff *skb = alloc_skb(length + 16, gfp_mask);
 	if (likely(skb))
@@ -1130,8 +1130,8 @@ static inline int skb_can_coalesce(struct sk_buff *skb, int i,
  *	If there is no free memory -ENOMEM is returned, otherwise zero
  *	is returned and the old skb data released.
  */
-extern int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp);
-static inline int skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp)
+extern int __skb_linearize(struct sk_buff *skb, gfp_t gfp);
+static inline int skb_linearize(struct sk_buff *skb, gfp_t gfp)
 {
 	return __skb_linearize(skb, gfp);
 }
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 1f356f3bbc64..5fc04a16ecb0 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -61,11 +61,11 @@ extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned lo
 				       void (*)(void *, kmem_cache_t *, unsigned long));
 extern int kmem_cache_destroy(kmem_cache_t *);
 extern int kmem_cache_shrink(kmem_cache_t *);
-extern void *kmem_cache_alloc(kmem_cache_t *, unsigned int __nocast);
+extern void *kmem_cache_alloc(kmem_cache_t *, gfp_t);
 extern void kmem_cache_free(kmem_cache_t *, void *);
 extern unsigned int kmem_cache_size(kmem_cache_t *);
 extern const char *kmem_cache_name(kmem_cache_t *);
-extern kmem_cache_t *kmem_find_general_cachep(size_t size, unsigned int __nocast gfpflags);
+extern kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags);
 
 /* Size description struct for general caches. */
 struct cache_sizes {
@@ -74,9 +74,9 @@ struct cache_sizes {
 	kmem_cache_t	*cs_dmacachep;
 };
 extern struct cache_sizes malloc_sizes[];
-extern void *__kmalloc(size_t, unsigned int __nocast);
+extern void *__kmalloc(size_t, gfp_t);
 
-static inline void *kmalloc(size_t size, unsigned int __nocast flags)
+static inline void *kmalloc(size_t size, gfp_t flags)
 {
 	if (__builtin_constant_p(size)) {
 		int i = 0;
@@ -99,7 +99,7 @@ found:
 	return __kmalloc(size, flags);
 }
 
-extern void *kzalloc(size_t, unsigned int __nocast);
+extern void *kzalloc(size_t, gfp_t);
 
 /**
  * kcalloc - allocate memory for an array. The memory is set to zero.
@@ -107,7 +107,7 @@ extern void *kzalloc(size_t, unsigned int __nocast);
  * @size: element size.
  * @flags: the type of memory to allocate.
  */
-static inline void *kcalloc(size_t n, size_t size, unsigned int __nocast flags)
+static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
 {
 	if (n != 0 && size > INT_MAX / n)
 		return NULL;
@@ -118,15 +118,14 @@ extern void kfree(const void *);
 extern unsigned int ksize(const void *);
 
 #ifdef CONFIG_NUMA
-extern void *kmem_cache_alloc_node(kmem_cache_t *,
-			unsigned int __nocast flags, int node);
-extern void *kmalloc_node(size_t size, unsigned int __nocast flags, int node);
+extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node);
+extern void *kmalloc_node(size_t size, gfp_t flags, int node);
 #else
 static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, int flags, int node)
 {
 	return kmem_cache_alloc(cachep, flags);
 }
-static inline void *kmalloc_node(size_t size, unsigned int __nocast flags, int node)
+static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	return kmalloc(size, flags);
 }
diff --git a/include/linux/string.h b/include/linux/string.h
index dab2652acbd8..369be3264a55 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -88,7 +88,7 @@ extern int memcmp(const void *,const void *,__kernel_size_t);
 extern void * memchr(const void *,int,__kernel_size_t);
 #endif
 
-extern char *kstrdup(const char *s, unsigned int __nocast gfp);
+extern char *kstrdup(const char *s, gfp_t gfp);
 
 #ifdef __cplusplus
 }
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3c9ff0048153..a7bf1a3b1496 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -147,7 +147,7 @@ struct swap_list_t {
 #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages)
 
 /* linux/mm/oom_kill.c */
-extern void out_of_memory(unsigned int __nocast gfp_mask, int order);
+extern void out_of_memory(gfp_t gfp_mask, int order);
 
 /* linux/mm/memory.c */
 extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *);
diff --git a/include/linux/textsearch.h b/include/linux/textsearch.h
index 1a4990e448e9..515046d1b2f4 100644
--- a/include/linux/textsearch.h
+++ b/include/linux/textsearch.h
@@ -159,7 +159,7 @@ extern unsigned int textsearch_find_continuous(struct ts_config *,
 #define TS_PRIV_ALIGN(len) (((len) + TS_PRIV_ALIGNTO-1) & ~(TS_PRIV_ALIGNTO-1))
 
 static inline struct ts_config *alloc_ts_config(size_t payload,
-						unsigned int __nocast gfp_mask)
+						gfp_t gfp_mask)
 {
 	struct ts_config *conf;
 
diff --git a/include/linux/types.h b/include/linux/types.h
index 2b678c22ca4a..0aee34f9da9f 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -165,6 +165,10 @@ typedef __u64 __bitwise __le64;
 typedef __u64 __bitwise __be64;
 #endif
 
+#ifdef __KERNEL__
+typedef unsigned __nocast gfp_t;
+#endif
+
 struct ustat {
 	__kernel_daddr_t	f_tfree;
 	__kernel_ino_t		f_tinode;
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index b244f69ef682..3701a0673d2c 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -34,8 +34,8 @@ struct vm_struct {
 extern void *vmalloc(unsigned long size);
 extern void *vmalloc_exec(unsigned long size);
 extern void *vmalloc_32(unsigned long size);
-extern void *__vmalloc(unsigned long size, unsigned int __nocast gfp_mask, pgprot_t prot);
-extern void *__vmalloc_area(struct vm_struct *area, unsigned int __nocast gfp_mask, pgprot_t prot);
+extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
+extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot);
 extern void vfree(void *addr);
 
 extern void *vmap(struct page **pages, unsigned int count,
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 6dfa4a61ffd0..210458624840 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -136,7 +136,7 @@ struct bt_skb_cb {
 };
 #define bt_cb(skb) ((struct bt_skb_cb *)(skb->cb)) 
 
-static inline struct sk_buff *bt_skb_alloc(unsigned int len, unsigned int __nocast how)
+static inline struct sk_buff *bt_skb_alloc(unsigned int len, gfp_t how)
 {
 	struct sk_buff *skb;
 
diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index ffea9d54071f..fbe557f7ea1d 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -230,7 +230,7 @@ int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci,
 			u8 xon_char, u8 xoff_char, u16 param_mask);
 
 /* ---- RFCOMM DLCs (channels) ---- */
-struct rfcomm_dlc *rfcomm_dlc_alloc(unsigned int __nocast prio);
+struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio);
 void rfcomm_dlc_free(struct rfcomm_dlc *d);
 int  rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, u8 channel);
 int  rfcomm_dlc_close(struct rfcomm_dlc *d, int reason);
diff --git a/include/net/dn_nsp.h b/include/net/dn_nsp.h
index 8a0891e2e888..1ba03be0af3a 100644
--- a/include/net/dn_nsp.h
+++ b/include/net/dn_nsp.h
@@ -19,9 +19,9 @@ extern void dn_nsp_send_data_ack(struct sock *sk);
 extern void dn_nsp_send_oth_ack(struct sock *sk);
 extern void dn_nsp_delayed_ack(struct sock *sk);
 extern void dn_send_conn_ack(struct sock *sk);
-extern void dn_send_conn_conf(struct sock *sk, unsigned int __nocast gfp);
+extern void dn_send_conn_conf(struct sock *sk, gfp_t gfp);
 extern void dn_nsp_send_disc(struct sock *sk, unsigned char type, 
-			unsigned short reason, unsigned int __nocast gfp);
+			unsigned short reason, gfp_t gfp);
 extern void dn_nsp_return_disc(struct sk_buff *skb, unsigned char type,
 				unsigned short reason);
 extern void dn_nsp_send_link(struct sock *sk, unsigned char lsflags, char fcval);
@@ -29,14 +29,14 @@ extern void dn_nsp_send_conninit(struct sock *sk, unsigned char flags);
 
 extern void dn_nsp_output(struct sock *sk);
 extern int dn_nsp_check_xmit_queue(struct sock *sk, struct sk_buff *skb, struct sk_buff_head *q, unsigned short acknum);
-extern void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb, unsigned int __nocast gfp, int oob);
+extern void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb, gfp_t gfp, int oob);
 extern unsigned long dn_nsp_persist(struct sock *sk);
 extern int dn_nsp_xmit_timeout(struct sock *sk);
 
 extern int dn_nsp_rx(struct sk_buff *);
 extern int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb);
 
-extern struct sk_buff *dn_alloc_skb(struct sock *sk, int size, unsigned int __nocast pri);
+extern struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri);
 extern struct sk_buff *dn_alloc_send_skb(struct sock *sk, size_t *size, int noblock, long timeo, int *err);
 
 #define NSP_REASON_OK 0		/* No error */
diff --git a/include/net/dn_route.h b/include/net/dn_route.h
index 11fe973cf383..5122da3f2eb3 100644
--- a/include/net/dn_route.h
+++ b/include/net/dn_route.h
@@ -15,7 +15,7 @@
     GNU General Public License for more details.
 *******************************************************************************/
 
-extern struct sk_buff *dn_alloc_skb(struct sock *sk, int size, unsigned int __nocast pri);
+extern struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri);
 extern int dn_route_output_sock(struct dst_entry **pprt, struct flowi *, struct sock *sk, int flags);
 extern int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb);
 extern int dn_cache_getroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 651f824c1008..b0c99060b78d 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -94,7 +94,7 @@ static inline void *inet_csk_ca(const struct sock *sk)
 
 extern struct sock *inet_csk_clone(struct sock *sk,
 				   const struct request_sock *req,
-				   const unsigned int __nocast priority);
+				   const gfp_t priority);
 
 enum inet_csk_ack_state_t {
 	ICSK_ACK_SCHED	= 1,
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index ecb2b061f597..3b5559a023a4 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -832,7 +832,7 @@ extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
 
 extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff **pskb);
 extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff **pskb);
-extern int ip_vs_skb_replace(struct sk_buff *skb, unsigned int __nocast pri,
+extern int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
 			     char *o_buf, int o_len, char *n_buf, int n_len);
 extern int ip_vs_app_init(void);
 extern void ip_vs_app_cleanup(void);
diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h
index 54852ff6033b..00730d21b522 100644
--- a/include/net/llc_conn.h
+++ b/include/net/llc_conn.h
@@ -93,7 +93,7 @@ static __inline__ char llc_backlog_type(struct sk_buff *skb)
 	return skb->cb[sizeof(skb->cb) - 1];
 }
 
-extern struct sock *llc_sk_alloc(int family, unsigned int __nocast priority,
+extern struct sock *llc_sk_alloc(int family, gfp_t priority,
 				 struct proto *prot);
 extern void llc_sk_free(struct sock *sk);
 
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index e1d5ec1c23c0..8f241216f46b 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -125,7 +125,7 @@
  */
 extern struct sock *sctp_get_ctl_sock(void);
 extern int sctp_copy_local_addr_list(struct sctp_bind_addr *,
-				     sctp_scope_t, unsigned int __nocast gfp,
+				     sctp_scope_t, gfp_t gfp,
 				     int flags);
 extern struct sctp_pf *sctp_get_pf_specific(sa_family_t family);
 extern int sctp_register_pf(struct sctp_pf *, sa_family_t);
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 58462164d960..1eac3d0eb7a9 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -181,17 +181,17 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t,
 int sctp_chunk_iif(const struct sctp_chunk *);
 struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *,
 					     struct sctp_chunk *,
-					     unsigned int __nocast gfp);
+					     gfp_t gfp);
 __u32 sctp_generate_verification_tag(void);
 void sctp_populate_tie_tags(__u8 *cookie, __u32 curTag, __u32 hisTag);
 
 /* Prototypes for chunk-building functions.  */
 struct sctp_chunk *sctp_make_init(const struct sctp_association *,
 			     const struct sctp_bind_addr *,
-			     unsigned int __nocast gfp, int vparam_len);
+			     gfp_t gfp, int vparam_len);
 struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *,
 				 const struct sctp_chunk *,
-				 const unsigned int __nocast gfp,
+				 const gfp_t gfp,
 				 const int unkparam_len);
 struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *,
 				    const struct sctp_chunk *);
@@ -265,7 +265,7 @@ int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype,
                struct sctp_endpoint *,
                struct sctp_association *asoc,
                void *event_arg,
-	       unsigned int __nocast gfp);
+	       gfp_t gfp);
 
 /* 2nd level prototypes */
 void sctp_generate_t3_rtx_event(unsigned long peer);
@@ -276,7 +276,7 @@ void sctp_ootb_pkt_free(struct sctp_packet *);
 struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *,
 				       const struct sctp_association *,
 				       struct sctp_chunk *,
-				       unsigned int __nocast gfp, int *err,
+				       gfp_t gfp, int *err,
 				       struct sctp_chunk **err_chk_p);
 int sctp_addip_addr_config(struct sctp_association *, sctp_param_t,
 			   struct sockaddr_storage*, int);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 994009bbe3b4..9c385b6417c7 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -446,7 +446,7 @@ struct sctp_ssnmap {
 };
 
 struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out,
-				    unsigned int __nocast gfp);
+				    gfp_t gfp);
 void sctp_ssnmap_free(struct sctp_ssnmap *map);
 void sctp_ssnmap_clear(struct sctp_ssnmap *map);
 
@@ -947,7 +947,7 @@ struct sctp_transport {
 };
 
 struct sctp_transport *sctp_transport_new(const union sctp_addr *,
-					  unsigned int __nocast);
+					  gfp_t);
 void sctp_transport_set_owner(struct sctp_transport *,
 			      struct sctp_association *);
 void sctp_transport_route(struct sctp_transport *, union sctp_addr *,
@@ -1095,10 +1095,10 @@ void sctp_bind_addr_init(struct sctp_bind_addr *, __u16 port);
 void sctp_bind_addr_free(struct sctp_bind_addr *);
 int sctp_bind_addr_copy(struct sctp_bind_addr *dest,
 			const struct sctp_bind_addr *src,
-			sctp_scope_t scope, unsigned int __nocast gfp,
+			sctp_scope_t scope, gfp_t gfp,
 			int flags);
 int sctp_add_bind_addr(struct sctp_bind_addr *, union sctp_addr *,
-		       unsigned int __nocast gfp);
+		       gfp_t gfp);
 int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *);
 int sctp_bind_addr_match(struct sctp_bind_addr *, const union sctp_addr *,
 			 struct sctp_sock *);
@@ -1108,9 +1108,9 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr	*bp,
 					struct sctp_sock	*opt);
 union sctp_params sctp_bind_addrs_to_raw(const struct sctp_bind_addr *bp,
 					 int *addrs_len,
-					 unsigned int __nocast gfp);
+					 gfp_t gfp);
 int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw, int len,
-			   __u16 port, unsigned int __nocast gfp);
+			   __u16 port, gfp_t gfp);
 
 sctp_scope_t sctp_scope(const union sctp_addr *);
 int sctp_in_scope(const union sctp_addr *addr, const sctp_scope_t scope);
@@ -1239,7 +1239,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base)
 }
 
 /* These are function signatures for manipulating endpoints.  */
-struct sctp_endpoint *sctp_endpoint_new(struct sock *, unsigned int __nocast);
+struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t);
 void sctp_endpoint_free(struct sctp_endpoint *);
 void sctp_endpoint_put(struct sctp_endpoint *);
 void sctp_endpoint_hold(struct sctp_endpoint *);
@@ -1260,7 +1260,7 @@ int sctp_verify_init(const struct sctp_association *asoc, sctp_cid_t,
 		     struct sctp_chunk **err_chunk);
 int sctp_process_init(struct sctp_association *, sctp_cid_t cid,
 		      const union sctp_addr *peer,
-		      sctp_init_chunk_t *init, unsigned int __nocast gfp);
+		      sctp_init_chunk_t *init, gfp_t gfp);
 __u32 sctp_generate_tag(const struct sctp_endpoint *);
 __u32 sctp_generate_tsn(const struct sctp_endpoint *);
 
@@ -1723,7 +1723,7 @@ static inline struct sctp_association *sctp_assoc(struct sctp_ep_common *base)
 
 struct sctp_association *
 sctp_association_new(const struct sctp_endpoint *, const struct sock *,
-		     sctp_scope_t scope, unsigned int __nocast gfp);
+		     sctp_scope_t scope, gfp_t gfp);
 void sctp_association_free(struct sctp_association *);
 void sctp_association_put(struct sctp_association *);
 void sctp_association_hold(struct sctp_association *);
@@ -1739,7 +1739,7 @@ int sctp_assoc_lookup_laddr(struct sctp_association *asoc,
 			    const union sctp_addr *laddr);
 struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *,
 				     const union sctp_addr *address,
-				     const unsigned int __nocast gfp,
+				     const gfp_t gfp,
 				     const int peer_state);
 void sctp_assoc_del_peer(struct sctp_association *asoc,
 			 const union sctp_addr *addr);
@@ -1764,10 +1764,10 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned);
 void sctp_assoc_set_primary(struct sctp_association *,
 			    struct sctp_transport *);
 int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *,
-				     unsigned int __nocast);
+				     gfp_t);
 int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *,
 					 struct sctp_cookie*,
-					 unsigned int __nocast gfp);
+					 gfp_t gfp);
 
 int sctp_cmp_addr_exact(const union sctp_addr *ss1,
 			const union sctp_addr *ss2);
diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h
index 90fe4bf6754f..6c40cfc4832d 100644
--- a/include/net/sctp/ulpevent.h
+++ b/include/net/sctp/ulpevent.h
@@ -88,7 +88,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_assoc_change(
 	__u16 error,
 	__u16 outbound,
 	__u16 inbound,
-	unsigned int __nocast gfp);
+	gfp_t gfp);
 
 struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change(
 	const struct sctp_association *asoc,
@@ -96,35 +96,35 @@ struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change(
 	int flags,
 	int state,
 	int error,
-	unsigned int __nocast gfp);
+	gfp_t gfp);
 
 struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
 	const struct sctp_association *asoc,
 	struct sctp_chunk *chunk,
 	__u16 flags,
-	unsigned int __nocast gfp);
+	gfp_t gfp);
 struct sctp_ulpevent *sctp_ulpevent_make_send_failed(
 	const struct sctp_association *asoc,
 	struct sctp_chunk *chunk,
 	__u16 flags,
 	__u32 error,
-	unsigned int __nocast gfp);
+	gfp_t gfp);
 
 struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event(
 	const struct sctp_association *asoc,
 	__u16 flags,
-	unsigned int __nocast gfp);
+	gfp_t gfp);
 
 struct sctp_ulpevent *sctp_ulpevent_make_pdapi(
 	const struct sctp_association *asoc,
-	__u32 indication, unsigned int __nocast gfp);
+	__u32 indication, gfp_t gfp);
 
 struct sctp_ulpevent *sctp_ulpevent_make_adaption_indication(
-	const struct sctp_association *asoc, unsigned int __nocast gfp);
+	const struct sctp_association *asoc, gfp_t gfp);
 
 struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
 	struct sctp_chunk *chunk,
-	unsigned int __nocast gfp);
+	gfp_t gfp);
 
 void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
 	struct msghdr *);
diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h
index 1a60c6d943c1..a43c8788b650 100644
--- a/include/net/sctp/ulpqueue.h
+++ b/include/net/sctp/ulpqueue.h
@@ -62,22 +62,19 @@ struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *,
 void sctp_ulpq_free(struct sctp_ulpq *);
 
 /* Add a new DATA chunk for processing. */
-int sctp_ulpq_tail_data(struct sctp_ulpq *, struct sctp_chunk *,
-			unsigned int __nocast);
+int sctp_ulpq_tail_data(struct sctp_ulpq *, struct sctp_chunk *, gfp_t);
 
 /* Add a new event for propagation to the ULP. */
 int sctp_ulpq_tail_event(struct sctp_ulpq *, struct sctp_ulpevent *ev);
 
 /* Renege previously received chunks.  */
-void sctp_ulpq_renege(struct sctp_ulpq *, struct sctp_chunk *,
-		      unsigned int __nocast);
+void sctp_ulpq_renege(struct sctp_ulpq *, struct sctp_chunk *, gfp_t);
 
 /* Perform partial delivery. */
-void sctp_ulpq_partial_delivery(struct sctp_ulpq *, struct sctp_chunk *,
-				unsigned int __nocast);
+void sctp_ulpq_partial_delivery(struct sctp_ulpq *, struct sctp_chunk *, gfp_t);
 
 /* Abort the partial delivery. */
-void sctp_ulpq_abort_pd(struct sctp_ulpq *, unsigned int __nocast);
+void sctp_ulpq_abort_pd(struct sctp_ulpq *, gfp_t);
 
 /* Clear the partial data delivery condition on this socket. */
 int sctp_clear_pd(struct sock *sk);
diff --git a/include/net/sock.h b/include/net/sock.h
index b6440805c420..ecb75526cba0 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -739,18 +739,18 @@ extern void FASTCALL(release_sock(struct sock *sk));
 #define bh_unlock_sock(__sk)	spin_unlock(&((__sk)->sk_lock.slock))
 
 extern struct sock		*sk_alloc(int family,
-					  unsigned int __nocast priority,
+					  gfp_t priority,
 					  struct proto *prot, int zero_it);
 extern void			sk_free(struct sock *sk);
 extern struct sock		*sk_clone(const struct sock *sk,
-					  const unsigned int __nocast priority);
+					  const gfp_t priority);
 
 extern struct sk_buff		*sock_wmalloc(struct sock *sk,
 					      unsigned long size, int force,
-					      unsigned int __nocast priority);
+					      gfp_t priority);
 extern struct sk_buff		*sock_rmalloc(struct sock *sk,
 					      unsigned long size, int force,
-					      unsigned int __nocast priority);
+					      gfp_t priority);
 extern void			sock_wfree(struct sk_buff *skb);
 extern void			sock_rfree(struct sk_buff *skb);
 
@@ -766,7 +766,7 @@ extern struct sk_buff 		*sock_alloc_send_skb(struct sock *sk,
 						     int noblock,
 						     int *errcode);
 extern void *sock_kmalloc(struct sock *sk, int size,
-			  unsigned int __nocast priority);
+			  gfp_t priority);
 extern void sock_kfree_s(struct sock *sk, void *mem, int size);
 extern void sk_send_sigurg(struct sock *sk);
 
@@ -1201,7 +1201,7 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk)
 
 static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk,
 						   int size, int mem,
-						   unsigned int __nocast gfp)
+						   gfp_t gfp)
 {
 	struct sk_buff *skb;
 	int hdr_len;
@@ -1224,7 +1224,7 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk,
 
 static inline struct sk_buff *sk_stream_alloc_skb(struct sock *sk,
 						  int size,
-						  unsigned int __nocast gfp)
+						  gfp_t gfp)
 {
 	return sk_stream_alloc_pskb(sk, size, 0, gfp);
 }
@@ -1255,7 +1255,7 @@ static inline int sock_writeable(const struct sock *sk)
 	return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf / 2);
 }
 
-static inline unsigned int __nocast gfp_any(void)
+static inline gfp_t gfp_any(void)
 {
 	return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
 }
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 97af77c4d096..c24339c4e310 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -460,8 +460,7 @@ extern void tcp_send_probe0(struct sock *);
 extern void tcp_send_partial(struct sock *);
 extern int  tcp_write_wakeup(struct sock *);
 extern void tcp_send_fin(struct sock *sk);
-extern void tcp_send_active_reset(struct sock *sk,
-                                  unsigned int __nocast priority);
+extern void tcp_send_active_reset(struct sock *sk, gfp_t priority);
 extern int  tcp_send_synack(struct sock *);
 extern void tcp_push_one(struct sock *, unsigned int mss_now);
 extern void tcp_send_ack(struct sock *sk);
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index b6e72f890c6c..5beae1ccd574 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -875,7 +875,7 @@ static inline int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsig
 } 
 #endif
 
-struct xfrm_policy *xfrm_policy_alloc(unsigned int __nocast gfp);
+struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp);
 extern int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *);
 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
 struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel,
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 0e293fe733b0..4172e6841e3d 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -596,7 +596,7 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
 					    u32 remote_qpn, u16 pkey_index,
 					    struct ib_ah *ah, int rmpp_active,
 					    int hdr_len, int data_len,
-					    unsigned int __nocast gfp_mask);
+					    gfp_t gfp_mask);
 
 /**
  * ib_free_send_mad - Returns data buffers used to send a MAD.
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index a7555c800ecf..f404fe21cc21 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -285,7 +285,7 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query);
 int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
 		       struct ib_sa_path_rec *rec,
 		       ib_sa_comp_mask comp_mask,
-		       int timeout_ms, unsigned int __nocast gfp_mask,
+		       int timeout_ms, gfp_t gfp_mask,
 		       void (*callback)(int status,
 					struct ib_sa_path_rec *resp,
 					void *context),
@@ -296,7 +296,7 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
 			     u8 method,
 			     struct ib_sa_mcmember_rec *rec,
 			     ib_sa_comp_mask comp_mask,
-			     int timeout_ms, unsigned int __nocast gfp_mask,
+			     int timeout_ms, gfp_t gfp_mask,
 			     void (*callback)(int status,
 					      struct ib_sa_mcmember_rec *resp,
 					      void *context),
@@ -307,7 +307,7 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num,
 			 u8 method,
 			 struct ib_sa_service_rec *rec,
 			 ib_sa_comp_mask comp_mask,
-			 int timeout_ms, unsigned int __nocast gfp_mask,
+			 int timeout_ms, gfp_t gfp_mask,
 			 void (*callback)(int status,
 					  struct ib_sa_service_rec *resp,
 					  void *context),
@@ -342,7 +342,7 @@ static inline int
 ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num,
 		       struct ib_sa_mcmember_rec *rec,
 		       ib_sa_comp_mask comp_mask,
-		       int timeout_ms, unsigned int __nocast gfp_mask,
+		       int timeout_ms, gfp_t gfp_mask,
 		       void (*callback)(int status,
 					struct ib_sa_mcmember_rec *resp,
 					void *context),
@@ -384,7 +384,7 @@ static inline int
 ib_sa_mcmember_rec_delete(struct ib_device *device, u8 port_num,
 			  struct ib_sa_mcmember_rec *rec,
 			  ib_sa_comp_mask comp_mask,
-			  int timeout_ms, unsigned int __nocast gfp_mask,
+			  int timeout_ms, gfp_t gfp_mask,
 			  void (*callback)(int status,
 					   struct ib_sa_mcmember_rec *resp,
 					   void *context),
diff --git a/include/rxrpc/call.h b/include/rxrpc/call.h
index 8118731e7d96..b86f83743510 100644
--- a/include/rxrpc/call.h
+++ b/include/rxrpc/call.h
@@ -203,7 +203,7 @@ extern int rxrpc_call_write_data(struct rxrpc_call *call,
 				 size_t sioc,
 				 struct kvec *siov,
 				 uint8_t rxhdr_flags,
-				 unsigned int __nocast alloc_flags,
+				 gfp_t alloc_flags,
 				 int dup_data,
 				 size_t *size_sent);
 
diff --git a/include/rxrpc/message.h b/include/rxrpc/message.h
index 983d9f9eee1a..b318f273d4f2 100644
--- a/include/rxrpc/message.h
+++ b/include/rxrpc/message.h
@@ -63,7 +63,7 @@ extern int rxrpc_conn_newmsg(struct rxrpc_connection *conn,
 			     uint8_t type,
 			     int count,
 			     struct kvec *diov,
-			     unsigned int __nocast alloc_flags,
+			     gfp_t alloc_flags,
 			     struct rxrpc_message **_msg);
 
 extern int rxrpc_conn_sendmsg(struct rxrpc_connection *conn, struct rxrpc_message *msg);
diff --git a/include/sound/core.h b/include/sound/core.h
index 26160adcdffc..6d971a4c4ca0 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -290,13 +290,13 @@ void snd_memory_init(void);
 void snd_memory_done(void);
 int snd_memory_info_init(void);
 int snd_memory_info_done(void);
-void *snd_hidden_kmalloc(size_t size, unsigned int __nocast flags);
-void *snd_hidden_kzalloc(size_t size, unsigned int __nocast flags);
-void *snd_hidden_kcalloc(size_t n, size_t size, unsigned int __nocast flags);
+void *snd_hidden_kmalloc(size_t size, gfp_t flags);
+void *snd_hidden_kzalloc(size_t size, gfp_t flags);
+void *snd_hidden_kcalloc(size_t n, size_t size, gfp_t flags);
 void snd_hidden_kfree(const void *obj);
 void *snd_hidden_vmalloc(unsigned long size);
 void snd_hidden_vfree(void *obj);
-char *snd_hidden_kstrdup(const char *s, unsigned int __nocast flags);
+char *snd_hidden_kstrdup(const char *s, gfp_t flags);
 #define kmalloc(size, flags) snd_hidden_kmalloc(size, flags)
 #define kzalloc(size, flags) snd_hidden_kzalloc(size, flags)
 #define kcalloc(n, size, flags) snd_hidden_kcalloc(n, size, flags)
diff --git a/include/sound/driver.h b/include/sound/driver.h
index 0d12456ec3ae..1ec2fae050a6 100644
--- a/include/sound/driver.h
+++ b/include/sound/driver.h
@@ -51,7 +51,7 @@
 #ifdef CONFIG_SND_DEBUG_MEMORY
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
-void *snd_wrapper_kmalloc(size_t, unsigned int __nocast);
+void *snd_wrapper_kmalloc(size_t, gfp_t);
 #undef kmalloc
 void snd_wrapper_kfree(const void *);
 #undef kfree
diff --git a/kernel/audit.c b/kernel/audit.c
index 83096b67510a..aefa73a8a586 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -560,7 +560,7 @@ static void audit_buffer_free(struct audit_buffer *ab)
 }
 
 static struct audit_buffer * audit_buffer_alloc(struct audit_context *ctx,
-						unsigned int __nocast gfp_mask, int type)
+						gfp_t gfp_mask, int type)
 {
 	unsigned long flags;
 	struct audit_buffer *ab = NULL;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 45a5719a0104..28176d083f7b 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1670,7 +1670,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
  *	GFP_USER     - only nodes in current tasks mems allowed ok.
  **/
 
-int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask)
+int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
 {
 	int node;			/* node that zone z is on */
 	const struct cpuset *cs;	/* current cpuset ancestors */
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 179baafcdd96..64ab045c3d9d 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -36,7 +36,7 @@
  * struct kfifo with kfree().
  */
 struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
-			 unsigned int __nocast gfp_mask, spinlock_t *lock)
+			 gfp_t gfp_mask, spinlock_t *lock)
 {
 	struct kfifo *fifo;
 
@@ -64,7 +64,7 @@ EXPORT_SYMBOL(kfifo_init);
  *
  * The size will be rounded-up to a power of 2.
  */
-struct kfifo *kfifo_alloc(unsigned int size, unsigned int __nocast gfp_mask, spinlock_t *lock)
+struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock)
 {
 	unsigned char *buffer;
 	struct kfifo *ret;
diff --git a/kernel/signal.c b/kernel/signal.c
index c135f5aa2c2d..cba193ceda0d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -262,7 +262,7 @@ next_signal(struct sigpending *pending, sigset_t *mask)
 	return sig;
 }
 
-static struct sigqueue *__sigqueue_alloc(struct task_struct *t, unsigned int __nocast flags,
+static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
 					 int override_rlimit)
 {
 	struct sigqueue *q = NULL;
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 6a8bc6e06431..d1c057e71b68 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -110,7 +110,7 @@ radix_tree_node_free(struct radix_tree_node *node)
  * success, return zero, with preemption disabled.  On error, return -ENOMEM
  * with preemption not disabled.
  */
-int radix_tree_preload(unsigned int __nocast gfp_mask)
+int radix_tree_preload(gfp_t gfp_mask)
 {
 	struct radix_tree_preload *rtp;
 	struct radix_tree_node *node;
diff --git a/lib/ts_bm.c b/lib/ts_bm.c
index 1b61fceef777..8a8b3a16133e 100644
--- a/lib/ts_bm.c
+++ b/lib/ts_bm.c
@@ -127,7 +127,7 @@ static void compute_prefix_tbl(struct ts_bm *bm, const u8 *pattern,
 }
 
 static struct ts_config *bm_init(const void *pattern, unsigned int len,
-				 unsigned int __nocast gfp_mask)
+				 gfp_t gfp_mask)
 {
 	struct ts_config *conf;
 	struct ts_bm *bm;
diff --git a/lib/ts_fsm.c b/lib/ts_fsm.c
index ef9779e00506..ca3211206eef 100644
--- a/lib/ts_fsm.c
+++ b/lib/ts_fsm.c
@@ -258,7 +258,7 @@ found_match:
 }
 
 static struct ts_config *fsm_init(const void *pattern, unsigned int len,
-				     unsigned int __nocast gfp_mask)
+				     gfp_t gfp_mask)
 {
 	int i, err = -EINVAL;
 	struct ts_config *conf;
diff --git a/lib/ts_kmp.c b/lib/ts_kmp.c
index e45f0f0c2379..7fd45451b44a 100644
--- a/lib/ts_kmp.c
+++ b/lib/ts_kmp.c
@@ -87,7 +87,7 @@ static inline void compute_prefix_tbl(const u8 *pattern, unsigned int len,
 }
 
 static struct ts_config *kmp_init(const void *pattern, unsigned int len,
-				  unsigned int __nocast gfp_mask)
+				  gfp_t gfp_mask)
 {
 	struct ts_config *conf;
 	struct ts_kmp *kmp;
diff --git a/mm/highmem.c b/mm/highmem.c
index 400911599468..90e1861e2da0 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -30,7 +30,7 @@
 
 static mempool_t *page_pool, *isa_page_pool;
 
-static void *page_pool_alloc(unsigned int __nocast gfp_mask, void *data)
+static void *page_pool_alloc(gfp_t gfp_mask, void *data)
 {
 	unsigned int gfp = gfp_mask | (unsigned int) (long) data;
 
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 9033f0859aa8..37af443eb094 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -687,7 +687,7 @@ get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned lo
 }
 
 /* Return a zonelist representing a mempolicy */
-static struct zonelist *zonelist_policy(unsigned int __nocast gfp, struct mempolicy *policy)
+static struct zonelist *zonelist_policy(gfp_t gfp, struct mempolicy *policy)
 {
 	int nd;
 
@@ -751,7 +751,7 @@ static unsigned offset_il_node(struct mempolicy *pol,
 
 /* Allocate a page in interleaved policy.
    Own path because it needs to do special accounting. */
-static struct page *alloc_page_interleave(unsigned int __nocast gfp, unsigned order, unsigned nid)
+static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, unsigned nid)
 {
 	struct zonelist *zl;
 	struct page *page;
@@ -789,7 +789,7 @@ static struct page *alloc_page_interleave(unsigned int __nocast gfp, unsigned or
  *	Should be called with the mm_sem of the vma hold.
  */
 struct page *
-alloc_page_vma(unsigned int __nocast gfp, struct vm_area_struct *vma, unsigned long addr)
+alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
 {
 	struct mempolicy *pol = get_vma_policy(current, vma, addr);
 
@@ -832,7 +832,7 @@ alloc_page_vma(unsigned int __nocast gfp, struct vm_area_struct *vma, unsigned l
  *	1) it's ok to take cpuset_sem (can WAIT), and
  *	2) allocating for current task (not interrupt).
  */
-struct page *alloc_pages_current(unsigned int __nocast gfp, unsigned order)
+struct page *alloc_pages_current(gfp_t gfp, unsigned order)
 {
 	struct mempolicy *pol = current->mempolicy;
 
diff --git a/mm/mempool.c b/mm/mempool.c
index 65f2957b8d51..9e377ea700b2 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -112,7 +112,7 @@ EXPORT_SYMBOL(mempool_create_node);
  * while this function is running. mempool_alloc() & mempool_free()
  * might be called (eg. from IRQ contexts) while this function executes.
  */
-int mempool_resize(mempool_t *pool, int new_min_nr, unsigned int __nocast gfp_mask)
+int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask)
 {
 	void *element;
 	void **new_elements;
@@ -200,7 +200,7 @@ EXPORT_SYMBOL(mempool_destroy);
  * *never* fails when called from process contexts. (it might
  * fail if called from an IRQ context.)
  */
-void * mempool_alloc(mempool_t *pool, unsigned int __nocast gfp_mask)
+void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
 {
 	void *element;
 	unsigned long flags;
@@ -276,7 +276,7 @@ EXPORT_SYMBOL(mempool_free);
 /*
  * A commonly used alloc and free fn.
  */
-void *mempool_alloc_slab(unsigned int __nocast gfp_mask, void *pool_data)
+void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data)
 {
 	kmem_cache_t *mem = (kmem_cache_t *) pool_data;
 	return kmem_cache_alloc(mem, gfp_mask);
diff --git a/mm/nommu.c b/mm/nommu.c
index 064d70442895..0ef241ae3763 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -157,8 +157,7 @@ void vfree(void *addr)
 	kfree(addr);
 }
 
-void *__vmalloc(unsigned long size, unsigned int __nocast gfp_mask,
-			pgprot_t prot)
+void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
 {
 	/*
 	 * kmalloc doesn't like __GFP_HIGHMEM for some reason
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index ac3bf33e5370..d348b9035955 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -263,7 +263,7 @@ static struct mm_struct *oom_kill_process(struct task_struct *p)
  * OR try to be smart about which process to kill. Note that we
  * don't have to be perfect here, we just have to be good.
  */
-void out_of_memory(unsigned int __nocast gfp_mask, int order)
+void out_of_memory(gfp_t gfp_mask, int order)
 {
 	struct mm_struct *mm = NULL;
 	task_t * p;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ae2903339e71..cc1fe2672a31 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -671,7 +671,7 @@ void fastcall free_cold_page(struct page *page)
 	free_hot_cold_page(page, 1);
 }
 
-static inline void prep_zero_page(struct page *page, int order, unsigned int __nocast gfp_flags)
+static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
 {
 	int i;
 
@@ -686,7 +686,7 @@ static inline void prep_zero_page(struct page *page, int order, unsigned int __n
  * or two.
  */
 static struct page *
-buffered_rmqueue(struct zone *zone, int order, unsigned int __nocast gfp_flags)
+buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
 {
 	unsigned long flags;
 	struct page *page = NULL;
@@ -761,7 +761,7 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 }
 
 static inline int
-should_reclaim_zone(struct zone *z, unsigned int gfp_mask)
+should_reclaim_zone(struct zone *z, gfp_t gfp_mask)
 {
 	if (!z->reclaim_pages)
 		return 0;
@@ -774,7 +774,7 @@ should_reclaim_zone(struct zone *z, unsigned int gfp_mask)
  * This is the 'heart' of the zoned buddy allocator.
  */
 struct page * fastcall
-__alloc_pages(unsigned int __nocast gfp_mask, unsigned int order,
+__alloc_pages(gfp_t gfp_mask, unsigned int order,
 		struct zonelist *zonelist)
 {
 	const int wait = gfp_mask & __GFP_WAIT;
@@ -977,7 +977,7 @@ EXPORT_SYMBOL(__alloc_pages);
 /*
  * Common helper functions.
  */
-fastcall unsigned long __get_free_pages(unsigned int __nocast gfp_mask, unsigned int order)
+fastcall unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)
 {
 	struct page * page;
 	page = alloc_pages(gfp_mask, order);
@@ -988,7 +988,7 @@ fastcall unsigned long __get_free_pages(unsigned int __nocast gfp_mask, unsigned
 
 EXPORT_SYMBOL(__get_free_pages);
 
-fastcall unsigned long get_zeroed_page(unsigned int __nocast gfp_mask)
+fastcall unsigned long get_zeroed_page(gfp_t gfp_mask)
 {
 	struct page * page;
 
diff --git a/mm/page_io.c b/mm/page_io.c
index 2e605a19ce57..330e00d6db00 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -19,7 +19,7 @@
 #include <linux/writeback.h>
 #include <asm/pgtable.h>
 
-static struct bio *get_swap_bio(unsigned int __nocast gfp_flags, pgoff_t index,
+static struct bio *get_swap_bio(gfp_t gfp_flags, pgoff_t index,
 				struct page *page, bio_end_io_t end_io)
 {
 	struct bio *bio;
diff --git a/mm/shmem.c b/mm/shmem.c
index 1f7aeb210c7b..ea064d89cda9 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -921,8 +921,7 @@ shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx)
 }
 
 static inline struct page *
-shmem_alloc_page(unsigned int __nocast gfp,struct shmem_inode_info *info,
-				 unsigned long idx)
+shmem_alloc_page(gfp_t gfp,struct shmem_inode_info *info, unsigned long idx)
 {
 	return alloc_page(gfp | __GFP_ZERO);
 }
diff --git a/mm/slab.c b/mm/slab.c
index 5cbbdfa6dd0e..d05c678bceb3 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -650,8 +650,7 @@ static inline struct array_cache *ac_data(kmem_cache_t *cachep)
 	return cachep->array[smp_processor_id()];
 }
 
-static inline kmem_cache_t *__find_general_cachep(size_t size,
-						unsigned int __nocast gfpflags)
+static inline kmem_cache_t *__find_general_cachep(size_t size, gfp_t gfpflags)
 {
 	struct cache_sizes *csizep = malloc_sizes;
 
@@ -675,8 +674,7 @@ static inline kmem_cache_t *__find_general_cachep(size_t size,
 	return csizep->cs_cachep;
 }
 
-kmem_cache_t *kmem_find_general_cachep(size_t size,
-		unsigned int __nocast gfpflags)
+kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
 {
 	return __find_general_cachep(size, gfpflags);
 }
@@ -1185,7 +1183,7 @@ __initcall(cpucache_init);
  * did not request dmaable memory, we might get it, but that
  * would be relatively rare and ignorable.
  */
-static void *kmem_getpages(kmem_cache_t *cachep, unsigned int __nocast flags, int nodeid)
+static void *kmem_getpages(kmem_cache_t *cachep, gfp_t flags, int nodeid)
 {
 	struct page *page;
 	void *addr;
@@ -2048,7 +2046,7 @@ EXPORT_SYMBOL(kmem_cache_destroy);
 
 /* Get the memory for a slab management obj. */
 static struct slab* alloc_slabmgmt(kmem_cache_t *cachep, void *objp,
-			int colour_off, unsigned int __nocast local_flags)
+			int colour_off, gfp_t local_flags)
 {
 	struct slab *slabp;
 	
@@ -2149,7 +2147,7 @@ static void set_slab_attr(kmem_cache_t *cachep, struct slab *slabp, void *objp)
  * Grow (by 1) the number of slabs within a cache.  This is called by
  * kmem_cache_alloc() when there are no active objs left in a cache.
  */
-static int cache_grow(kmem_cache_t *cachep, unsigned int __nocast flags, int nodeid)
+static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nodeid)
 {
 	struct slab	*slabp;
 	void		*objp;
@@ -2356,7 +2354,7 @@ bad:
 #define check_slabp(x,y) do { } while(0)
 #endif
 
-static void *cache_alloc_refill(kmem_cache_t *cachep, unsigned int __nocast flags)
+static void *cache_alloc_refill(kmem_cache_t *cachep, gfp_t flags)
 {
 	int batchcount;
 	struct kmem_list3 *l3;
@@ -2456,7 +2454,7 @@ alloc_done:
 }
 
 static inline void
-cache_alloc_debugcheck_before(kmem_cache_t *cachep, unsigned int __nocast flags)
+cache_alloc_debugcheck_before(kmem_cache_t *cachep, gfp_t flags)
 {
 	might_sleep_if(flags & __GFP_WAIT);
 #if DEBUG
@@ -2467,7 +2465,7 @@ cache_alloc_debugcheck_before(kmem_cache_t *cachep, unsigned int __nocast flags)
 #if DEBUG
 static void *
 cache_alloc_debugcheck_after(kmem_cache_t *cachep,
-			unsigned int __nocast flags, void *objp, void *caller)
+			gfp_t flags, void *objp, void *caller)
 {
 	if (!objp)	
 		return objp;
@@ -2510,7 +2508,7 @@ cache_alloc_debugcheck_after(kmem_cache_t *cachep,
 #define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
 #endif
 
-static inline void *____cache_alloc(kmem_cache_t *cachep, unsigned int __nocast flags)
+static inline void *____cache_alloc(kmem_cache_t *cachep, gfp_t flags)
 {
 	void* objp;
 	struct array_cache *ac;
@@ -2528,7 +2526,7 @@ static inline void *____cache_alloc(kmem_cache_t *cachep, unsigned int __nocast
 	return objp;
 }
 
-static inline void *__cache_alloc(kmem_cache_t *cachep, unsigned int __nocast flags)
+static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags)
 {
 	unsigned long save_flags;
 	void* objp;
@@ -2787,7 +2785,7 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp)
  * Allocate an object from this cache.  The flags are only relevant
  * if the cache has no available objects.
  */
-void *kmem_cache_alloc(kmem_cache_t *cachep, unsigned int __nocast flags)
+void *kmem_cache_alloc(kmem_cache_t *cachep, gfp_t flags)
 {
 	return __cache_alloc(cachep, flags);
 }
@@ -2848,7 +2846,7 @@ out:
  * New and improved: it will now make sure that the object gets
  * put on the correct node list so that there is no false sharing.
  */
-void *kmem_cache_alloc_node(kmem_cache_t *cachep, unsigned int __nocast flags, int nodeid)
+void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid)
 {
 	unsigned long save_flags;
 	void *ptr;
@@ -2875,7 +2873,7 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, unsigned int __nocast flags, i
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node);
 
-void *kmalloc_node(size_t size, unsigned int __nocast flags, int node)
+void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	kmem_cache_t *cachep;
 
@@ -2908,7 +2906,7 @@ EXPORT_SYMBOL(kmalloc_node);
  * platforms.  For example, on i386, it means that the memory must come
  * from the first 16MB.
  */
-void *__kmalloc(size_t size, unsigned int __nocast flags)
+void *__kmalloc(size_t size, gfp_t flags)
 {
 	kmem_cache_t *cachep;
 
@@ -2997,7 +2995,7 @@ EXPORT_SYMBOL(kmem_cache_free);
  * @size: how many bytes of memory are required.
  * @flags: the type of memory to allocate.
  */
-void *kzalloc(size_t size, unsigned int __nocast flags)
+void *kzalloc(size_t size, gfp_t flags)
 {
 	void *ret = kmalloc(size, flags);
 	if (ret)
@@ -3603,7 +3601,7 @@ unsigned int ksize(const void *objp)
  * @s: the string to duplicate
  * @gfp: the GFP mask used in the kmalloc() call when allocating memory
  */
-char *kstrdup(const char *s, unsigned int __nocast gfp)
+char *kstrdup(const char *s, gfp_t gfp)
 {
 	size_t len;
 	char *buf;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index adbc2b426c2f..132164f7d0a7 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -68,7 +68,7 @@ void show_swap_cache_info(void)
  * but sets SwapCache flag and private instead of mapping and index.
  */
 static int __add_to_swap_cache(struct page *page, swp_entry_t entry,
-			       unsigned int __nocast gfp_mask)
+			       gfp_t gfp_mask)
 {
 	int error;
 
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 13c3d82968ae..1150229b6366 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -395,7 +395,7 @@ void *vmap(struct page **pages, unsigned int count,
 
 EXPORT_SYMBOL(vmap);
 
-void *__vmalloc_area(struct vm_struct *area, unsigned int __nocast gfp_mask, pgprot_t prot)
+void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot)
 {
 	struct page **pages;
 	unsigned int nr_pages, array_size, i;
@@ -446,7 +446,7 @@ fail:
  *	allocator with @gfp_mask flags.  Map them into contiguous
  *	kernel virtual space, using a pagetable protection of @prot.
  */
-void *__vmalloc(unsigned long size, unsigned int __nocast gfp_mask, pgprot_t prot)
+void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
 {
 	struct vm_struct *area;
 
diff --git a/net/atm/atm_misc.c b/net/atm/atm_misc.c
index 71abc99ec815..223c7ad5bd0f 100644
--- a/net/atm/atm_misc.c
+++ b/net/atm/atm_misc.c
@@ -25,7 +25,7 @@ int atm_charge(struct atm_vcc *vcc,int truesize)
 
 
 struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc,int pdu_size,
-    unsigned int __nocast gfp_flags)
+    gfp_t gfp_flags)
 {
 	struct sock *sk = sk_atm(vcc);
 	int guess = atm_guess_pdu2truesize(pdu_size);
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index d3d6bc547212..59b2dd36baa7 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -372,7 +372,7 @@ static struct proto l2cap_proto = {
 	.obj_size	= sizeof(struct l2cap_pinfo)
 };
 
-static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, unsigned int __nocast prio)
+static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, gfp_t prio)
 {
 	struct sock *sk;
 
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 173f46e8cdae..35adce6482b6 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -229,7 +229,7 @@ static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d)
 	d->rx_credits = RFCOMM_DEFAULT_CREDITS;
 }
 
-struct rfcomm_dlc *rfcomm_dlc_alloc(unsigned int __nocast prio)
+struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio)
 {
 	struct rfcomm_dlc *d = kmalloc(sizeof(*d), prio);
 	if (!d)
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index f49e7e938bfb..a2b30f0aedb7 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -284,7 +284,7 @@ static struct proto rfcomm_proto = {
 	.obj_size	= sizeof(struct rfcomm_pinfo)
 };
 
-static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, unsigned int __nocast prio)
+static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, gfp_t prio)
 {
 	struct rfcomm_dlc *d;
 	struct sock *sk;
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 1bca860a6109..158a9c46d863 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -286,7 +286,7 @@ static inline void rfcomm_set_owner_w(struct sk_buff *skb, struct rfcomm_dev *de
 	skb->destructor = rfcomm_wfree;
 }
 
-static struct sk_buff *rfcomm_wmalloc(struct rfcomm_dev *dev, unsigned long size, unsigned int __nocast priority)
+static struct sk_buff *rfcomm_wmalloc(struct rfcomm_dev *dev, unsigned long size, gfp_t priority)
 {
 	if (atomic_read(&dev->wmem_alloc) < rfcomm_room(dev->dlc)) {
 		struct sk_buff *skb = alloc_skb(size, priority);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index ce7ab7dfa0b2..997e42df115c 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -418,7 +418,7 @@ static struct proto sco_proto = {
 	.obj_size	= sizeof(struct sco_pinfo)
 };
 
-static struct sock *sco_sock_alloc(struct socket *sock, int proto, unsigned int __nocast prio)
+static struct sock *sco_sock_alloc(struct socket *sock, int proto, gfp_t prio)
 {
 	struct sock *sk;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 9066c874e273..a44eeef24edf 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1132,7 +1132,7 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 #endif
 
 /* Keep head the same: replace data */
-int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp_mask)
+int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask)
 {
 	unsigned int size;
 	u8 *data;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0e9431b59fb2..af9b1516e21f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -130,7 +130,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
  *	Buffers may only be allocated from interrupts using a @gfp_mask of
  *	%GFP_ATOMIC.
  */
-struct sk_buff *__alloc_skb(unsigned int size, unsigned int __nocast gfp_mask,
+struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 			    int fclone)
 {
 	struct sk_buff *skb;
@@ -198,7 +198,7 @@ nodata:
  */
 struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
 				     unsigned int size,
-				     unsigned int __nocast gfp_mask)
+				     gfp_t gfp_mask)
 {
 	struct sk_buff *skb;
 	u8 *data;
@@ -361,7 +361,7 @@ void __kfree_skb(struct sk_buff *skb)
  *	%GFP_ATOMIC.
  */
 
-struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
+struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 {
 	struct sk_buff *n;
 
@@ -500,7 +500,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
  *	header is going to be modified. Use pskb_copy() instead.
  */
 
-struct sk_buff *skb_copy(const struct sk_buff *skb, unsigned int __nocast gfp_mask)
+struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
 {
 	int headerlen = skb->data - skb->head;
 	/*
@@ -539,7 +539,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, unsigned int __nocast gfp_ma
  *	The returned buffer has a reference count of 1.
  */
 
-struct sk_buff *pskb_copy(struct sk_buff *skb, unsigned int __nocast gfp_mask)
+struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
 {
 	/*
 	 *	Allocate the copy buffer
@@ -598,7 +598,7 @@ out:
  */
 
 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
-		     unsigned int __nocast gfp_mask)
+		     gfp_t gfp_mask)
 {
 	int i;
 	u8 *data;
@@ -689,7 +689,7 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
  */
 struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
 				int newheadroom, int newtailroom,
-				unsigned int __nocast gfp_mask)
+				gfp_t gfp_mask)
 {
 	/*
 	 *	Allocate the copy buffer
diff --git a/net/core/sock.c b/net/core/sock.c
index 928d2a1d6d8e..1c52fe809eda 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -637,7 +637,7 @@ lenout:
  *	@prot: struct proto associated with this new sock instance
  *	@zero_it: if we should zero the newly allocated sock
  */
-struct sock *sk_alloc(int family, unsigned int __nocast priority,
+struct sock *sk_alloc(int family, gfp_t priority,
 		      struct proto *prot, int zero_it)
 {
 	struct sock *sk = NULL;
@@ -704,7 +704,7 @@ void sk_free(struct sock *sk)
 	module_put(owner);
 }
 
-struct sock *sk_clone(const struct sock *sk, const unsigned int __nocast priority)
+struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 {
 	struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
 
@@ -845,7 +845,7 @@ unsigned long sock_i_ino(struct sock *sk)
  * Allocate a skb from the socket's send buffer.
  */
 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
-			     unsigned int __nocast priority)
+			     gfp_t priority)
 {
 	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
 		struct sk_buff * skb = alloc_skb(size, priority);
@@ -861,7 +861,7 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
  * Allocate a skb from the socket's receive buffer.
  */ 
 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
-			     unsigned int __nocast priority)
+			     gfp_t priority)
 {
 	if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
 		struct sk_buff *skb = alloc_skb(size, priority);
@@ -876,7 +876,7 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
 /* 
  * Allocate a memory block from the socket's option memory buffer.
  */ 
-void *sock_kmalloc(struct sock *sk, int size, unsigned int __nocast priority)
+void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
 {
 	if ((unsigned)size <= sysctl_optmem_max &&
 	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 6530283eafca..c9a62cca22fc 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -91,7 +91,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 }
 
 struct dccp_ackvec *dccp_ackvec_alloc(const unsigned int len,
-				      const unsigned int __nocast priority)
+				      const gfp_t priority)
 {
 	struct dccp_ackvec *av = kmalloc(sizeof(*av) + len, priority);
 
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 8ca51c9191f7..d0fd6c60c574 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -74,7 +74,7 @@ struct sk_buff;
 
 #ifdef CONFIG_IP_DCCP_ACKVEC
 extern struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len,
-					  const unsigned int __nocast priority);
+					  const gfp_t priority);
 extern void dccp_ackvec_free(struct dccp_ackvec *av);
 
 extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
@@ -93,7 +93,7 @@ static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
 }
 #else /* CONFIG_IP_DCCP_ACKVEC */
 static inline struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len,
-					   const unsigned int __nocast priority)
+					   const gfp_t priority)
 {
 	return NULL;
 }
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index 13ad47ba1420..417d9d82df3e 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -36,7 +36,7 @@ struct dccp_li_hist_entry {
 
 static inline struct dccp_li_hist_entry *
 		dccp_li_hist_entry_new(struct dccp_li_hist *hist,
-				       const unsigned int __nocast prio)
+				       const gfp_t prio)
 {
 	return kmem_cache_alloc(hist->dccplih_slab, prio);
 }
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index b375ebdb7dcf..122e96737ff6 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -86,7 +86,7 @@ extern struct dccp_rx_hist_entry *
 
 static inline struct dccp_tx_hist_entry *
 		dccp_tx_hist_entry_new(struct dccp_tx_hist *hist,
-				       const unsigned int __nocast prio)
+				       const gfp_t prio)
 {
 	struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab,
 							    prio);
@@ -137,7 +137,7 @@ static inline struct dccp_rx_hist_entry *
 				     	    const struct sock *sk, 
 				     	    const u32 ndp, 
 					    const struct sk_buff *skb,
-					    const unsigned int __nocast prio)
+					    const gfp_t prio)
 {
 	struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab,
 							    prio);
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 34d4128d56d5..1186dc44cdff 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -452,8 +452,7 @@ static struct proto dn_proto = {
 	.obj_size = sizeof(struct dn_sock),
 };
 
-static struct sock *dn_alloc_sock(struct socket *sock,
-				  unsigned int __nocast gfp)
+static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp)
 {
 	struct dn_scp *scp;
 	struct sock *sk = sk_alloc(PF_DECnet, gfp, &dn_proto, 1);
@@ -805,8 +804,7 @@ static int dn_auto_bind(struct socket *sock)
 	return rv;
 }
 
-static int dn_confirm_accept(struct sock *sk, long *timeo,
-				unsigned int __nocast allocation)
+static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
 {
 	struct dn_scp *scp = DN_SK(sk);
 	DEFINE_WAIT(wait);
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index cd08244aa10c..c96c767b1f74 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -117,8 +117,7 @@ try_again:
  * The eventual aim is for each socket to have a cached header size
  * for its outgoing packets, and to set hdr from this when sk != NULL.
  */
-struct sk_buff *dn_alloc_skb(struct sock *sk, int size,
-			     unsigned int __nocast pri)
+struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri)
 {
 	struct sk_buff *skb;
 	int hdr = 64;
@@ -212,7 +211,7 @@ static void dn_nsp_rtt(struct sock *sk, long rtt)
  * Returns: The number of times the packet has been sent previously
  */
 static inline unsigned dn_nsp_clone_and_send(struct sk_buff *skb,
-					     unsigned int __nocast gfp)
+					     gfp_t gfp)
 {
 	struct dn_skb_cb *cb = DN_SKB_CB(skb);
 	struct sk_buff *skb2;
@@ -353,7 +352,7 @@ static unsigned short *dn_nsp_mk_data_header(struct sock *sk, struct sk_buff *sk
 }
 
 void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb,
-			unsigned int __nocast gfp, int oth)
+			gfp_t gfp, int oth)
 {
 	struct dn_scp *scp = DN_SK(sk);
 	struct dn_skb_cb *cb = DN_SKB_CB(skb);
@@ -520,7 +519,7 @@ static int dn_nsp_retrans_conn_conf(struct sock *sk)
 	return 0;
 }
 
-void dn_send_conn_conf(struct sock *sk, unsigned int __nocast gfp)
+void dn_send_conn_conf(struct sock *sk, gfp_t gfp)
 {
 	struct dn_scp *scp = DN_SK(sk);
 	struct sk_buff *skb = NULL;
@@ -552,7 +551,7 @@ void dn_send_conn_conf(struct sock *sk, unsigned int __nocast gfp)
 
 
 static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg, 
-			unsigned short reason, unsigned int __nocast gfp,
+			unsigned short reason, gfp_t gfp,
 			struct dst_entry *dst,
 			int ddl, unsigned char *dd, __u16 rem, __u16 loc)
 {
@@ -595,7 +594,7 @@ static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg,
 
 
 void dn_nsp_send_disc(struct sock *sk, unsigned char msgflg, 
-			unsigned short reason, unsigned int __nocast gfp)
+			unsigned short reason, gfp_t gfp)
 {
 	struct dn_scp *scp = DN_SK(sk);
 	int ddl = 0;
@@ -616,7 +615,7 @@ void dn_nsp_return_disc(struct sk_buff *skb, unsigned char msgflg,
 {
 	struct dn_skb_cb *cb = DN_SKB_CB(skb);
 	int ddl = 0;
-	unsigned int __nocast gfp = GFP_ATOMIC;
+	gfp_t gfp = GFP_ATOMIC;
 
 	dn_nsp_do_disc(NULL, msgflg, reason, gfp, skb->dst, ddl, 
 			NULL, cb->src_port, cb->dst_port);
@@ -628,7 +627,7 @@ void dn_nsp_send_link(struct sock *sk, unsigned char lsflags, char fcval)
 	struct dn_scp *scp = DN_SK(sk);
 	struct sk_buff *skb;
 	unsigned char *ptr;
-	unsigned int __nocast gfp = GFP_ATOMIC;
+	gfp_t gfp = GFP_ATOMIC;
 
 	if ((skb = dn_alloc_skb(sk, DN_MAX_NSP_DATA_HEADER + 2, gfp)) == NULL)
 		return;
@@ -663,8 +662,7 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
 	unsigned char menuver;
 	struct dn_skb_cb *cb;
 	unsigned char type = 1;
-	unsigned int __nocast allocation =
-			(msgflg == NSP_CI) ? sk->sk_allocation : GFP_ATOMIC;
+	gfp_t allocation = (msgflg == NSP_CI) ? sk->sk_allocation : GFP_ATOMIC;
 	struct sk_buff *skb = dn_alloc_skb(sk, 200, allocation);
 
 	if (!skb)
diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c
index ecdf9f7a538f..eed07bbbe6b6 100644
--- a/net/ieee80211/ieee80211_tx.c
+++ b/net/ieee80211/ieee80211_tx.c
@@ -207,7 +207,7 @@ void ieee80211_txb_free(struct ieee80211_txb *txb)
 }
 
 static struct ieee80211_txb *ieee80211_alloc_txb(int nr_frags, int txb_size,
-						 unsigned int __nocast gfp_mask)
+						 gfp_t gfp_mask)
 {
 	struct ieee80211_txb *txb;
 	int i;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index fe3c6d3d0c91..94468a76c5b4 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -494,7 +494,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune);
 
 struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
-			    const unsigned int __nocast priority)
+			    const gfp_t priority)
 {
 	struct sock *newsk = sk_clone(sk, priority);
 
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index b942ff3c8860..fc6f95aaa969 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -604,7 +604,7 @@ static struct file_operations ip_vs_app_fops = {
 /*
  *	Replace a segment of data with a new segment
  */
-int ip_vs_skb_replace(struct sk_buff *skb, unsigned int __nocast pri,
+int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
 		      char *o_buf, int o_len, char *n_buf, int n_len)
 {
 	struct iphdr *iph;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c5b911f9b662..8225e4257258 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1610,7 +1610,7 @@ void tcp_send_fin(struct sock *sk)
  * was unread data in the receive queue.  This behavior is recommended
  * by draft-ietf-tcpimpl-prob-03.txt section 3.10.  -DaveM
  */
-void tcp_send_active_reset(struct sock *sk, unsigned int __nocast priority)
+void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index bbf0f69181ba..39031684b65c 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -185,7 +185,7 @@ static int pfkey_release(struct socket *sock)
 }
 
 static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
-			       unsigned int __nocast allocation, struct sock *sk)
+			       gfp_t allocation, struct sock *sk)
 {
 	int err = -ENOBUFS;
 
@@ -217,7 +217,7 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
 #define BROADCAST_ONE		1
 #define BROADCAST_REGISTERED	2
 #define BROADCAST_PROMISC_ONLY	4
-static int pfkey_broadcast(struct sk_buff *skb, unsigned int __nocast allocation,
+static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
 			   int broadcast_flags, struct sock *one_sk)
 {
 	struct sock *sk;
@@ -1417,7 +1417,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr,
 }
 
 static struct sk_buff *compose_sadb_supported(struct sadb_msg *orig,
-					      unsigned int __nocast allocation)
+					      gfp_t allocation)
 {
 	struct sk_buff *skb;
 	struct sadb_msg *hdr;
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 042b24a8ca4c..c761c15da421 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -867,8 +867,7 @@ static void llc_sk_init(struct sock* sk)
  *	Allocates a LLC sock and initializes it. Returns the new LLC sock
  *	or %NULL if there's no memory available for one
  */
-struct sock *llc_sk_alloc(int family, unsigned int __nocast priority,
-			 struct proto *prot)
+struct sock *llc_sk_alloc(int family, gfp_t priority, struct proto *prot)
 {
 	struct sock *sk = sk_alloc(family, priority, prot, 1);
 
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 34d671974a4d..1caaca06f698 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -195,8 +195,7 @@ nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys,
 
 int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
 {
-	unsigned int __nocast allocation =
-			in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
+	gfp_t allocation = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
 	int err = 0;
 
 	NETLINK_CB(skb).dst_group = group;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index a64e1d5ce3ca..678c3f2c0d0b 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -758,7 +758,7 @@ void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
 }
 
 static inline struct sk_buff *netlink_trim(struct sk_buff *skb,
-					   unsigned int __nocast allocation)
+					   gfp_t allocation)
 {
 	int delta;
 
@@ -880,7 +880,7 @@ out:
 }
 
 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
-		      u32 group, unsigned int __nocast allocation)
+		      u32 group, gfp_t allocation)
 {
 	struct netlink_broadcast_data info;
 	struct hlist_node *node;
diff --git a/net/rxrpc/call.c b/net/rxrpc/call.c
index 86f777052633..c4aeb7d40266 100644
--- a/net/rxrpc/call.c
+++ b/net/rxrpc/call.c
@@ -1923,7 +1923,7 @@ int rxrpc_call_write_data(struct rxrpc_call *call,
 			  size_t sioc,
 			  struct kvec *siov,
 			  u8 rxhdr_flags,
-			  unsigned int __nocast alloc_flags,
+			  gfp_t alloc_flags,
 			  int dup_data,
 			  size_t *size_sent)
 {
diff --git a/net/rxrpc/connection.c b/net/rxrpc/connection.c
index be4b2be58956..2ba14a75dbbe 100644
--- a/net/rxrpc/connection.c
+++ b/net/rxrpc/connection.c
@@ -522,7 +522,7 @@ int rxrpc_conn_newmsg(struct rxrpc_connection *conn,
 		      uint8_t type,
 		      int dcount,
 		      struct kvec diov[],
-		      unsigned int __nocast alloc_flags,
+		      gfp_t alloc_flags,
 		      struct rxrpc_message **_msg)
 {
 	struct rxrpc_message *msg;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 5b24ae0650d3..12b0f582a66b 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -71,7 +71,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 					  const struct sctp_endpoint *ep,
 					  const struct sock *sk,
 					  sctp_scope_t scope,
-					  unsigned int __nocast gfp)
+					  gfp_t gfp)
 {
 	struct sctp_sock *sp;
 	int i;
@@ -273,7 +273,7 @@ fail_init:
 struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep,
 					 const struct sock *sk,
 					 sctp_scope_t scope,
-					 unsigned int __nocast gfp)
+					 gfp_t gfp)
 {
 	struct sctp_association *asoc;
 
@@ -479,7 +479,7 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
 /* Add a transport address to an association.  */
 struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 					   const union sctp_addr *addr,
-					   const unsigned int __nocast gfp,
+					   const gfp_t gfp,
 					   const int peer_state)
 {
 	struct sctp_transport *peer;
@@ -1231,7 +1231,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned len)
  * local endpoint and the remote peer.
  */
 int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc,
-				     unsigned int __nocast gfp)
+				     gfp_t gfp)
 {
 	sctp_scope_t scope;
 	int flags;
@@ -1254,7 +1254,7 @@ int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc,
 /* Build the association's bind address list from the cookie.  */
 int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *asoc,
 					 struct sctp_cookie *cookie,
-					 unsigned int __nocast gfp)
+					 gfp_t gfp)
 {
 	int var_size2 = ntohs(cookie->peer_init->chunk_hdr.length);
 	int var_size3 = cookie->raw_addr_list_len;
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index f71549710f2e..2b962627f631 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -53,7 +53,7 @@
 
 /* Forward declarations for internal helpers. */
 static int sctp_copy_one_addr(struct sctp_bind_addr *, union sctp_addr *,
-			      sctp_scope_t scope, unsigned int __nocast gfp,
+			      sctp_scope_t scope, gfp_t gfp,
 			      int flags);
 static void sctp_bind_addr_clean(struct sctp_bind_addr *);
 
@@ -64,7 +64,7 @@ static void sctp_bind_addr_clean(struct sctp_bind_addr *);
  */
 int sctp_bind_addr_copy(struct sctp_bind_addr *dest, 
 			const struct sctp_bind_addr *src,
-			sctp_scope_t scope, unsigned int __nocast gfp,
+			sctp_scope_t scope, gfp_t gfp,
 			int flags)
 {
 	struct sctp_sockaddr_entry *addr;
@@ -146,7 +146,7 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp)
 
 /* Add an address to the bind address list in the SCTP_bind_addr structure. */
 int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
-		       unsigned int __nocast gfp)
+		       gfp_t gfp)
 {
 	struct sctp_sockaddr_entry *addr;
 
@@ -200,7 +200,7 @@ int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr)
  */
 union sctp_params sctp_bind_addrs_to_raw(const struct sctp_bind_addr *bp,
 					 int *addrs_len,
-					 unsigned int __nocast gfp)
+					 gfp_t gfp)
 {
 	union sctp_params addrparms;
 	union sctp_params retval;
@@ -252,7 +252,7 @@ end_raw:
  * address parameters).
  */
 int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
-			   int addrs_len, __u16 port, unsigned int __nocast gfp)
+			   int addrs_len, __u16 port, gfp_t gfp)
 {
 	union sctp_addr_param *rawaddr;
 	struct sctp_paramhdr *param;
@@ -350,7 +350,7 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr	*bp,
 /* Copy out addresses from the global local address list. */
 static int sctp_copy_one_addr(struct sctp_bind_addr *dest, 
 			      union sctp_addr *addr,
-			      sctp_scope_t scope, unsigned int __nocast gfp,
+			      sctp_scope_t scope, gfp_t gfp,
 			      int flags)
 {
 	int error = 0;
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 61da2937e641..83ef411772f4 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -62,7 +62,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg)
 }
 
 /* Allocate and initialize datamsg. */
-SCTP_STATIC struct sctp_datamsg *sctp_datamsg_new(unsigned int __nocast gfp)
+SCTP_STATIC struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp)
 {
 	struct sctp_datamsg *msg;
 	msg = kmalloc(sizeof(struct sctp_datamsg), gfp);
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index e22ccd655965..96984f7a2d69 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -68,7 +68,7 @@ static void sctp_endpoint_bh_rcv(struct sctp_endpoint *ep);
  */
 static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 						struct sock *sk,
-						unsigned int __nocast gfp)
+						gfp_t gfp)
 {
 	struct sctp_sock *sp = sctp_sk(sk);
 	memset(ep, 0, sizeof(struct sctp_endpoint));
@@ -138,8 +138,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 /* Create a sctp_endpoint with all that boring stuff initialized.
  * Returns NULL if there isn't enough memory.
  */
-struct sctp_endpoint *sctp_endpoint_new(struct sock *sk,
-					unsigned int __nocast gfp)
+struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, gfp_t gfp)
 {
 	struct sctp_endpoint *ep;
 
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index f01d1c9002a1..26de4d3e1bd9 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -219,7 +219,7 @@ static void sctp_free_local_addr_list(void)
 
 /* Copy the local addresses which are valid for 'scope' into 'bp'.  */
 int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
-			      unsigned int __nocast gfp, int copy_flags)
+			      gfp_t gfp, int copy_flags)
 {
 	struct sctp_sockaddr_entry *addr;
 	int error = 0;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 3868a8d70cc0..10e82ec2ebd3 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -78,7 +78,7 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
 static int sctp_process_param(struct sctp_association *asoc,
 			      union sctp_params param,
 			      const union sctp_addr *peer_addr,
-			      unsigned int __nocast gfp);
+			      gfp_t gfp);
 
 /* What was the inbound interface for this chunk? */
 int sctp_chunk_iif(const struct sctp_chunk *chunk)
@@ -174,7 +174,7 @@ void  sctp_init_cause(struct sctp_chunk *chunk, __u16 cause_code,
  */
 struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 			     const struct sctp_bind_addr *bp,
-			     unsigned int __nocast gfp, int vparam_len)
+			     gfp_t gfp, int vparam_len)
 {
 	sctp_inithdr_t init;
 	union sctp_params addrs;
@@ -261,7 +261,7 @@ nodata:
 
 struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 				 const struct sctp_chunk *chunk,
-				 unsigned int __nocast gfp, int unkparam_len)
+				 gfp_t gfp, int unkparam_len)
 {
 	sctp_inithdr_t initack;
 	struct sctp_chunk *retval;
@@ -1234,7 +1234,7 @@ void sctp_chunk_assign_tsn(struct sctp_chunk *chunk)
 /* Create a CLOSED association to use with an incoming packet.  */
 struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep,
 					struct sctp_chunk *chunk,
-					unsigned int __nocast gfp)
+					gfp_t gfp)
 {
 	struct sctp_association *asoc;
 	struct sk_buff *skb;
@@ -1349,7 +1349,7 @@ nodata:
 struct sctp_association *sctp_unpack_cookie(
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	struct sctp_chunk *chunk, unsigned int __nocast gfp,
+	struct sctp_chunk *chunk, gfp_t gfp,
 	int *error, struct sctp_chunk **errp)
 {
 	struct sctp_association *retval = NULL;
@@ -1814,7 +1814,7 @@ int sctp_verify_init(const struct sctp_association *asoc,
  */
 int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
 		      const union sctp_addr *peer_addr,
-		      sctp_init_chunk_t *peer_init, unsigned int __nocast gfp)
+		      sctp_init_chunk_t *peer_init, gfp_t gfp)
 {
 	union sctp_params param;
 	struct sctp_transport *transport;
@@ -1985,7 +1985,7 @@ nomem:
 static int sctp_process_param(struct sctp_association *asoc,
 			      union sctp_params param,
 			      const union sctp_addr *peer_addr,
-			      unsigned int __nocast gfp)
+			      gfp_t gfp)
 {
 	union sctp_addr addr;
 	int i;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 39c970b5b198..f84173ea8ec1 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -63,7 +63,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 				void *event_arg,
 			 	sctp_disposition_t status,
 				sctp_cmd_seq_t *commands,
-				unsigned int __nocast gfp);
+				gfp_t gfp);
 static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
 			     sctp_state_t state,
 			     struct sctp_endpoint *ep,
@@ -71,7 +71,7 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
 			     void *event_arg,
 			     sctp_disposition_t status,
 			     sctp_cmd_seq_t *commands,
-			     unsigned int __nocast gfp);
+			     gfp_t gfp);
 
 /********************************************************************
  * Helper functions
@@ -498,7 +498,7 @@ static int sctp_cmd_process_init(sctp_cmd_seq_t *commands,
 				 struct sctp_association *asoc,
 				 struct sctp_chunk *chunk,
 				 sctp_init_chunk_t *peer_init,
-				 unsigned int __nocast gfp)
+				 gfp_t gfp)
 {
 	int error;
 
@@ -853,7 +853,7 @@ int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype,
 	       struct sctp_endpoint *ep,
 	       struct sctp_association *asoc,
 	       void *event_arg,
-	       unsigned int __nocast gfp)
+	       gfp_t gfp)
 {
 	sctp_cmd_seq_t commands;
 	const sctp_sm_table_entry_t *state_fn;
@@ -898,7 +898,7 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
 			     void *event_arg,
 			     sctp_disposition_t status,
 			     sctp_cmd_seq_t *commands,
-			     unsigned int __nocast gfp)
+			     gfp_t gfp)
 {
 	int error;
 
@@ -986,7 +986,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 				void *event_arg,
 			 	sctp_disposition_t status,
 				sctp_cmd_seq_t *commands,
-				unsigned int __nocast gfp)
+				gfp_t gfp)
 {
 	int error = 0;
 	int force;
diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c
index 25037daf3fa0..cbe2513d2822 100644
--- a/net/sctp/ssnmap.c
+++ b/net/sctp/ssnmap.c
@@ -58,7 +58,7 @@ static inline size_t sctp_ssnmap_size(__u16 in, __u16 out)
  * Allocate room to store at least 'len' contiguous TSNs.
  */
 struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out,
-				    unsigned int __nocast gfp)
+				    gfp_t gfp)
 {
 	struct sctp_ssnmap *retval;
 	int size;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index d2f04ebe5081..6bc27200e6ca 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -57,7 +57,7 @@
 /* Initialize a new transport from provided memory.  */
 static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 						  const union sctp_addr *addr,
-						  unsigned int __nocast gfp)
+						  gfp_t gfp)
 {
 	/* Copy in the address.  */
 	peer->ipaddr = *addr;
@@ -122,7 +122,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 
 /* Allocate and initialize a new transport.  */
 struct sctp_transport *sctp_transport_new(const union sctp_addr *addr,
-					  unsigned int __nocast gfp)
+					  gfp_t gfp)
 {
         struct sctp_transport *transport;
 
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 0abd5101107c..057e7fac3af0 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -74,7 +74,7 @@ SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event, int msg_flags)
 
 /* Create a new sctp_ulpevent.  */
 SCTP_STATIC struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags,
-						    unsigned int __nocast gfp)
+						    gfp_t gfp)
 {
 	struct sctp_ulpevent *event;
 	struct sk_buff *skb;
@@ -136,7 +136,7 @@ static inline void sctp_ulpevent_release_owner(struct sctp_ulpevent *event)
 struct sctp_ulpevent  *sctp_ulpevent_make_assoc_change(
 	const struct sctp_association *asoc,
 	__u16 flags, __u16 state, __u16 error, __u16 outbound,
-	__u16 inbound, unsigned int __nocast gfp)
+	__u16 inbound, gfp_t gfp)
 {
 	struct sctp_ulpevent *event;
 	struct sctp_assoc_change *sac;
@@ -237,7 +237,7 @@ fail:
 struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change(
 	const struct sctp_association *asoc,
 	const struct sockaddr_storage *aaddr,
-	int flags, int state, int error, unsigned int __nocast gfp)
+	int flags, int state, int error, gfp_t gfp)
 {
 	struct sctp_ulpevent *event;
 	struct sctp_paddr_change  *spc;
@@ -350,7 +350,7 @@ fail:
  */
 struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
 	const struct sctp_association *asoc, struct sctp_chunk *chunk,
-	__u16 flags, unsigned int __nocast gfp)
+	__u16 flags, gfp_t gfp)
 {
 	struct sctp_ulpevent *event;
 	struct sctp_remote_error *sre;
@@ -448,7 +448,7 @@ fail:
  */
 struct sctp_ulpevent *sctp_ulpevent_make_send_failed(
 	const struct sctp_association *asoc, struct sctp_chunk *chunk,
-	__u16 flags, __u32 error, unsigned int __nocast gfp)
+	__u16 flags, __u32 error, gfp_t gfp)
 {
 	struct sctp_ulpevent *event;
 	struct sctp_send_failed *ssf;
@@ -557,7 +557,7 @@ fail:
  */
 struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event(
 	const struct sctp_association *asoc,
-	__u16 flags, unsigned int __nocast gfp)
+	__u16 flags, gfp_t gfp)
 {
 	struct sctp_ulpevent *event;
 	struct sctp_shutdown_event *sse;
@@ -620,7 +620,7 @@ fail:
  * 5.3.1.6 SCTP_ADAPTION_INDICATION
  */
 struct sctp_ulpevent *sctp_ulpevent_make_adaption_indication(
-	const struct sctp_association *asoc, unsigned int __nocast gfp)
+	const struct sctp_association *asoc, gfp_t gfp)
 {
 	struct sctp_ulpevent *event;
 	struct sctp_adaption_event *sai;
@@ -657,7 +657,7 @@ fail:
  */
 struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
 						struct sctp_chunk *chunk,
-						unsigned int __nocast gfp)
+						gfp_t gfp)
 {
 	struct sctp_ulpevent *event = NULL;
 	struct sk_buff *skb;
@@ -719,7 +719,7 @@ fail:
  */
 struct sctp_ulpevent *sctp_ulpevent_make_pdapi(
 	const struct sctp_association *asoc, __u32 indication,
-	unsigned int __nocast gfp)
+	gfp_t gfp)
 {
 	struct sctp_ulpevent *event;
 	struct sctp_pdapi_event *pd;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index ec2c857eae7f..2080b2d28c98 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -100,7 +100,7 @@ void sctp_ulpq_free(struct sctp_ulpq *ulpq)
 
 /* Process an incoming DATA chunk.  */
 int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
-			unsigned int __nocast gfp)
+			gfp_t gfp)
 {
 	struct sk_buff_head temp;
 	sctp_data_chunk_t *hdr;
@@ -792,7 +792,7 @@ static __u16 sctp_ulpq_renege_frags(struct sctp_ulpq *ulpq, __u16 needed)
 /* Partial deliver the first message as there is pressure on rwnd. */
 void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
 				struct sctp_chunk *chunk,
-				unsigned int __nocast gfp)
+				gfp_t gfp)
 {
 	struct sctp_ulpevent *event;
 	struct sctp_association *asoc;
@@ -816,7 +816,7 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
 
 /* Renege some packets to make room for an incoming chunk.  */
 void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
-		      unsigned int __nocast gfp)
+		      gfp_t gfp)
 {
 	struct sctp_association *asoc;
 	__u16 needed, freed;
@@ -855,7 +855,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
 /* Notify the application if an association is aborted and in
  * partial delivery mode.  Send up any pending received messages.
  */
-void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, unsigned int __nocast gfp)
+void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp)
 {
 	struct sctp_ulpevent *ev = NULL;
 	struct sock *sk;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index ade730eaf401..54e60a657500 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -719,7 +719,7 @@ static void rpc_async_schedule(void *arg)
 void *
 rpc_malloc(struct rpc_task *task, size_t size)
 {
-	unsigned int __nocast	gfp;
+	gfp_t	gfp;
 
 	if (task->tk_flags & RPC_TASK_SWAPPER)
 		gfp = GFP_ATOMIC;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 061b44cc2451..cbb0ba34a600 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -225,7 +225,7 @@ expired:
  * SPD calls.
  */
 
-struct xfrm_policy *xfrm_policy_alloc(unsigned int __nocast gfp)
+struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
 {
 	struct xfrm_policy *policy;
 
diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c
index 91124ddbdda9..e72cec77f0db 100644
--- a/sound/core/memalloc.c
+++ b/sound/core/memalloc.c
@@ -106,7 +106,7 @@ struct snd_mem_list {
 
 static void *snd_dma_hack_alloc_coherent(struct device *dev, size_t size,
 					 dma_addr_t *dma_handle,
-					 unsigned int __nocast flags)
+					 gfp_t flags)
 {
 	void *ret;
 	u64 dma_mask, coherent_dma_mask;
diff --git a/sound/core/memory.c b/sound/core/memory.c
index 8fa888fc53a0..7d8e2eebba51 100644
--- a/sound/core/memory.c
+++ b/sound/core/memory.c
@@ -89,7 +89,7 @@ void snd_memory_done(void)
 	}
 }
 
-static void *__snd_kmalloc(size_t size, unsigned int __nocast flags, void *caller)
+static void *__snd_kmalloc(size_t size, gfp_t flags, void *caller)
 {
 	unsigned long cpu_flags;
 	struct snd_alloc_track *t;
@@ -111,12 +111,12 @@ static void *__snd_kmalloc(size_t size, unsigned int __nocast flags, void *calle
 }
 
 #define _snd_kmalloc(size, flags) __snd_kmalloc((size), (flags), __builtin_return_address(0));
-void *snd_hidden_kmalloc(size_t size, unsigned int __nocast flags)
+void *snd_hidden_kmalloc(size_t size, gfp_t flags)
 {
 	return _snd_kmalloc(size, flags);
 }
 
-void *snd_hidden_kzalloc(size_t size, unsigned int __nocast flags)
+void *snd_hidden_kzalloc(size_t size, gfp_t flags)
 {
 	void *ret = _snd_kmalloc(size, flags);
 	if (ret)
@@ -125,7 +125,7 @@ void *snd_hidden_kzalloc(size_t size, unsigned int __nocast flags)
 }
 EXPORT_SYMBOL(snd_hidden_kzalloc);
 
-void *snd_hidden_kcalloc(size_t n, size_t size, unsigned int __nocast flags)
+void *snd_hidden_kcalloc(size_t n, size_t size, gfp_t flags)
 {
 	void *ret = NULL;
 	if (n != 0 && size > INT_MAX / n)
@@ -190,7 +190,7 @@ void snd_hidden_vfree(void *obj)
 	snd_wrapper_vfree(obj);
 }
 
-char *snd_hidden_kstrdup(const char *s, unsigned int __nocast flags)
+char *snd_hidden_kstrdup(const char *s, gfp_t flags)
 {
 	int len;
 	char *buf;
diff --git a/sound/core/seq/instr/ainstr_iw.c b/sound/core/seq/instr/ainstr_iw.c
index b3cee092b1a4..67c24c8e8e7b 100644
--- a/sound/core/seq/instr/ainstr_iw.c
+++ b/sound/core/seq/instr/ainstr_iw.c
@@ -58,7 +58,7 @@ static int snd_seq_iwffff_copy_env_from_stream(__u32 req_stype,
 					       iwffff_xenv_t *ex,
 					       char __user **data,
 					       long *len,
-					       unsigned int __nocast gfp_mask)
+					       gfp_t gfp_mask)
 {
 	__u32 stype;
 	iwffff_env_record_t *rp, *rp_last;
diff --git a/sound/core/wrappers.c b/sound/core/wrappers.c
index 508e6d67ee19..296b716f1376 100644
--- a/sound/core/wrappers.c
+++ b/sound/core/wrappers.c
@@ -27,7 +27,7 @@
 #include <linux/fs.h>
 
 #ifdef CONFIG_SND_DEBUG_MEMORY
-void *snd_wrapper_kmalloc(size_t size, unsigned int __nocast flags)
+void *snd_wrapper_kmalloc(size_t size, gfp_t flags)
 {
 	return kmalloc(size, flags);
 }
-- 
cgit v1.2.3


From 79d13b62ef9da84d3ba551caac42d6107e57208f Mon Sep 17 00:00:00 2001
From: Sascha Hauer <sascha@saschahauer.de>
Date: Mon, 10 Oct 2005 10:17:43 +0100
Subject: [ARM] 2958/1: fix definition in imx-regs.h

Patch from Sascha Hauer

Fix PD7_AF_UART2_DTR definition

Signed-off-by: Giancarlo Formicuccia <gformicuccia@atinno.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/arch-imx/imx-regs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-arm/arch-imx/imx-regs.h b/include/asm-arm/arch-imx/imx-regs.h
index 229f7008d74f..a6912b3d8671 100644
--- a/include/asm-arm/arch-imx/imx-regs.h
+++ b/include/asm-arm/arch-imx/imx-regs.h
@@ -215,7 +215,7 @@
 #define PC31_AOUT_UART3_RX   ( GPIO_GIUS | GPIO_PORTC | GPIO_IN | 31)
 #define PD6_PF_LSCLK         ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 6 )
 #define PD7_PF_REV           ( GPIO_PORTD | GPIO_PF | 7 )
-#define PD7_AF_UART2_DTR     ( GPIO_PORTD | GPIO_IN | GPIO_AF | 7 )
+#define PD7_AF_UART2_DTR     ( GPIO_GIUS | GPIO_PORTD | GPIO_IN | GPIO_AF | 7 )
 #define PD7_AIN_SPI2_SCLK    ( GPIO_GIUS | GPIO_PORTD | GPIO_AIN | 7 )
 #define PD8_PF_CLS           ( GPIO_PORTD | GPIO_PF | 8 )
 #define PD8_AF_UART2_DCD     ( GPIO_PORTD | GPIO_OUT | GPIO_AF | 8 )
-- 
cgit v1.2.3


From 7c3989885cfd37d237eca97832b712a7ffbbf40c Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@rpsys.net>
Date: Mon, 10 Oct 2005 10:20:06 +0100
Subject: [ARM] 2962/1: scoop: Allow GPIO pin suspend state to be specified

Patch from Richard Purdie

Allow the GPIO pin suspend states to be specified for SCOOP devices.
This is needed for correct operation on the spitz platform.

Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/common/scoop.c          | 20 ++++++++++++++++++--
 include/asm-arm/hardware/scoop.h |  2 ++
 2 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/arm/common/scoop.c b/arch/arm/common/scoop.c
index d3a04c2a2c85..9e5245c702de 100644
--- a/arch/arm/common/scoop.c
+++ b/arch/arm/common/scoop.c
@@ -26,6 +26,8 @@ struct scoop_pcmcia_dev *scoop_devs;
 struct  scoop_dev {
 	void  *base;
 	spinlock_t scoop_lock;
+	unsigned short suspend_clr;
+	unsigned short suspend_set;
 	u32 scoop_gpwr;
 };
 
@@ -90,14 +92,24 @@ EXPORT_SYMBOL(reset_scoop);
 EXPORT_SYMBOL(read_scoop_reg);
 EXPORT_SYMBOL(write_scoop_reg);
 
+static void check_scoop_reg(struct scoop_dev *sdev)
+{
+	unsigned short mcr;
+
+	mcr = SCOOP_REG(sdev->base, SCOOP_MCR);
+	if ((mcr & 0x100) == 0)
+		SCOOP_REG(sdev->base, SCOOP_MCR) = 0x0101;
+}
+
 #ifdef CONFIG_PM
 static int scoop_suspend(struct device *dev, pm_message_t state, uint32_t level)
 {
 	if (level == SUSPEND_POWER_DOWN) {
 		struct scoop_dev *sdev = dev_get_drvdata(dev);
 
-		sdev->scoop_gpwr = SCOOP_REG(sdev->base,SCOOP_GPWR);
-		SCOOP_REG(sdev->base,SCOOP_GPWR) = 0;
+		check_scoop_reg(sdev);
+  		sdev->scoop_gpwr = SCOOP_REG(sdev->base, SCOOP_GPWR);
+		SCOOP_REG(sdev->base, SCOOP_GPWR) = (sdev->scoop_gpwr & ~sdev->suspend_clr) | sdev->suspend_set;
 	}
 	return 0;
 }
@@ -107,6 +119,7 @@ static int scoop_resume(struct device *dev, uint32_t level)
 	if (level == RESUME_POWER_ON) {
 		struct scoop_dev *sdev = dev_get_drvdata(dev);
 
+		check_scoop_reg(sdev);
 		SCOOP_REG(sdev->base,SCOOP_GPWR) = sdev->scoop_gpwr;
 	}
 	return 0;
@@ -151,6 +164,9 @@ int __init scoop_probe(struct device *dev)
 	SCOOP_REG(devptr->base, SCOOP_GPCR) = inf->io_dir & 0xffff;
 	SCOOP_REG(devptr->base, SCOOP_GPWR) = inf->io_out & 0xffff;
 
+	devptr->suspend_clr = inf->suspend_clr;
+	devptr->suspend_set = inf->suspend_set;
+
 	return 0;
 }
 
diff --git a/include/asm-arm/hardware/scoop.h b/include/asm-arm/hardware/scoop.h
index 527404b5a8df..a8f1013930e3 100644
--- a/include/asm-arm/hardware/scoop.h
+++ b/include/asm-arm/hardware/scoop.h
@@ -38,6 +38,8 @@
 struct scoop_config {
 	unsigned short io_out;
 	unsigned short io_dir;
+	unsigned short suspend_clr;
+	unsigned short suspend_set;
 };
 
 /* Structure for linking scoop devices to PCMCIA sockets */
-- 
cgit v1.2.3


From 3dd083255ddcfa87751fa8e32f61a9547a15a541 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 9 Oct 2005 21:19:40 +0200
Subject: [PATCH] x86_64: Set up safe page tables during resume

The following patch makes swsusp avoid the possible temporary corruption
of page translation tables during resume on x86-64.  This is achieved by
creating a copy of the relevant page tables that will not be modified by
swsusp and can be safely used by it on resume.

The problem is that during resume on x86-64 swsusp may temporarily
corrupt the page tables used for the direct mapping of RAM.  If that
happens, a page fault occurs and cannot be handled properly, which leads
to the solid hang of the affected system.  This leads to the loss of the
system's state from before suspend and may result in the loss of data or
the corruption of filesystems, so it is a serious issue.  Also, it
appears to happen quite often (for me, as often as 50% of the time).

The problem is related to the fact that (at least) one of the PMD
entries used in the direct memory mapping (starting at PAGE_OFFSET)
points to a page table the physical address of which is much greater
than the physical address of the PMD entry itself.  Moreover,
unfortunately, the physical address of the page table before suspend
(i.e.  the one stored in the suspend image) happens to be different to
the physical address of the corresponding page table used during resume
(i.e.  the one that is valid right before swsusp_arch_resume() in
arch/x86_64/kernel/suspend_asm.S is executed).  Thus while the image is
restored, the "offending" PMD entry gets overwritten, so it does not
point to the right physical address any more (i.e.  there's no page
table at the address pointed to by it, because it points to the address
the page table has been at during suspend).  Consequently, if the PMD
entry is used later on, and it _is_ used in the process of copying the
image pages, a page fault occurs, but it cannot be handled in the normal
way and the system hangs.

In principle we can call create_resume_mapping() from
swsusp_arch_resume() (ie.  from suspend_asm.S), but then the memory
allocations in create_resume_mapping(), resume_pud_mapping(), and
resume_pmd_mapping() must be made carefully so that we use _only_
NosaveFree pages in them (the other pages are overwritten by the loop in
swsusp_arch_resume()).  Additionally, we are in atomic context at that
time, so we cannot use GFP_KERNEL.  Moreover, if one of the allocations
fails, we should free all of the allocated pages, so we need to trace
them somehow.

All of this is done in the appended patch, except that the functions
populating the page tables are located in arch/x86_64/kernel/suspend.c
rather than in init.c.  It may be done in a more elegan way in the
future, with the help of some swsusp patches that are in the works now.

[AK: move some externs into headers, renamed a function]

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/suspend.c     | 127 +++++++++++++++++++++++++++++++++++++++
 arch/x86_64/kernel/suspend_asm.S |  17 ++++--
 include/linux/suspend.h          |   2 +
 kernel/power/swsusp.c            |   7 ++-
 4 files changed, 144 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c
index ebb9abf3ce6d..f066c6ab3618 100644
--- a/arch/x86_64/kernel/suspend.c
+++ b/arch/x86_64/kernel/suspend.c
@@ -11,6 +11,8 @@
 #include <linux/smp.h>
 #include <linux/suspend.h>
 #include <asm/proto.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
 
 struct saved_context saved_context;
 
@@ -140,4 +142,129 @@ void fix_processor_context(void)
 
 }
 
+#ifdef CONFIG_SOFTWARE_SUSPEND
+/* Defined in arch/x86_64/kernel/suspend_asm.S */
+extern int restore_image(void);
 
+pgd_t *temp_level4_pgt;
+
+static void **pages;
+
+static inline void *__add_page(void)
+{
+	void **c;
+
+	c = (void **)get_usable_page(GFP_ATOMIC);
+	if (c) {
+		*c = pages;
+		pages = c;
+	}
+	return c;
+}
+
+static inline void *__next_page(void)
+{
+	void **c;
+
+	c = pages;
+	if (c) {
+		pages = *c;
+		*c = NULL;
+	}
+	return c;
+}
+
+/*
+ * Try to allocate as many usable pages as needed and daisy chain them.
+ * If one allocation fails, free the pages allocated so far
+ */
+static int alloc_usable_pages(unsigned long n)
+{
+	void *p;
+
+	pages = NULL;
+	do
+		if (!__add_page())
+			break;
+	while (--n);
+	if (n) {
+		p = __next_page();
+		while (p) {
+			free_page((unsigned long)p);
+			p = __next_page();
+		}
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static void res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
+{
+	long i, j;
+
+	i = pud_index(address);
+	pud = pud + i;
+	for (; i < PTRS_PER_PUD; pud++, i++) {
+		unsigned long paddr;
+		pmd_t *pmd;
+
+		paddr = address + i*PUD_SIZE;
+		if (paddr >= end)
+			break;
+
+		pmd = (pmd_t *)__next_page();
+		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+		for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
+			unsigned long pe;
+
+			if (paddr >= end)
+				break;
+			pe = _PAGE_NX | _PAGE_PSE | _KERNPG_TABLE | paddr;
+			pe &= __supported_pte_mask;
+			set_pmd(pmd, __pmd(pe));
+		}
+	}
+}
+
+static void set_up_temporary_mappings(void)
+{
+	unsigned long start, end, next;
+
+	temp_level4_pgt = (pgd_t *)__next_page();
+
+	/* It is safe to reuse the original kernel mapping */
+	set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
+		init_level4_pgt[pgd_index(__START_KERNEL_map)]);
+
+	/* Set up the direct mapping from scratch */
+	start = (unsigned long)pfn_to_kaddr(0);
+	end = (unsigned long)pfn_to_kaddr(end_pfn);
+
+	for (; start < end; start = next) {
+		pud_t *pud = (pud_t *)__next_page();
+		next = start + PGDIR_SIZE;
+		if (next > end)
+			next = end;
+		res_phys_pud_init(pud, __pa(start), __pa(next));
+		set_pgd(temp_level4_pgt + pgd_index(start),
+			mk_kernel_pgd(__pa(pud)));
+	}
+}
+
+int swsusp_arch_resume(void)
+{
+	unsigned long n;
+
+	n = ((end_pfn << PAGE_SHIFT) + PUD_SIZE - 1) >> PUD_SHIFT;
+	n += (n + PTRS_PER_PUD - 1) / PTRS_PER_PUD + 1;
+	pr_debug("swsusp_arch_resume(): pages needed = %lu\n", n);
+	if (alloc_usable_pages(n)) {
+		free_eaten_memory();
+		return -ENOMEM;
+	}
+	/* We have got enough memory and from now on we cannot recover */
+	set_up_temporary_mappings();
+	restore_image();
+	return 0;
+}
+#endif /* CONFIG_SOFTWARE_SUSPEND */
diff --git a/arch/x86_64/kernel/suspend_asm.S b/arch/x86_64/kernel/suspend_asm.S
index 4d659e97df10..320b6fb00cca 100644
--- a/arch/x86_64/kernel/suspend_asm.S
+++ b/arch/x86_64/kernel/suspend_asm.S
@@ -39,12 +39,13 @@ ENTRY(swsusp_arch_suspend)
 	call swsusp_save
 	ret
 
-ENTRY(swsusp_arch_resume)
-	/* set up cr3 */	
-	leaq	init_level4_pgt(%rip),%rax
-	subq	$__START_KERNEL_map,%rax
-	movq	%rax,%cr3
-
+ENTRY(restore_image)
+	/* switch to temporary page tables */
+	movq	$__PAGE_OFFSET, %rdx
+	movq	temp_level4_pgt(%rip), %rax
+	subq	%rdx, %rax
+	movq	%rax, %cr3
+	/* Flush TLB */
 	movq	mmu_cr4_features(%rip), %rax
 	movq	%rax, %rdx
 	andq	$~(1<<7), %rdx	# PGE
@@ -69,6 +70,10 @@ loop:
 	movq	pbe_next(%rdx), %rdx
 	jmp	loop
 done:
+	/* go back to the original page tables */
+	leaq	init_level4_pgt(%rip), %rax
+	subq	$__START_KERNEL_map, %rax
+	movq	%rax, %cr3
 	/* Flush TLB, including "global" things (vmalloc) */
 	movq	mmu_cr4_features(%rip), %rax
 	movq	%rax, %rdx
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index f2e96fdfaae0..ad15a54806d8 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -71,5 +71,7 @@ void restore_processor_state(void);
 struct saved_context;
 void __save_processor_state(struct saved_context *ctxt);
 void __restore_processor_state(struct saved_context *ctxt);
+extern unsigned long get_usable_page(unsigned gfp_mask);
+extern void free_eaten_memory(void);
 
 #endif /* _LINUX_SWSUSP_H */
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index acf79ac1cb6d..2d5c45676442 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -1095,7 +1095,7 @@ static inline void eat_page(void *page)
 	*eaten_memory = c;
 }
 
-static unsigned long get_usable_page(unsigned gfp_mask)
+unsigned long get_usable_page(unsigned gfp_mask)
 {
 	unsigned long m;
 
@@ -1109,7 +1109,7 @@ static unsigned long get_usable_page(unsigned gfp_mask)
 	return m;
 }
 
-static void free_eaten_memory(void)
+void free_eaten_memory(void)
 {
 	unsigned long m;
 	void **c;
@@ -1481,11 +1481,12 @@ static int read_suspend_image(void)
 	/* Allocate memory for the image and read the data from swap */
 
 	error = check_pagedir(pagedir_nosave);
-	free_eaten_memory();
+
 	if (!error)
 		error = data_read(pagedir_nosave);
 
 	if (error) { /* We fail cleanly */
+		free_eaten_memory();
 		for_each_pbe (p, pagedir_nosave)
 			if (p->address) {
 				free_page(p->address);
-- 
cgit v1.2.3


From 46113830a18847cff8da73005e57bc49c2f95a56 Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@gnumonks.org>
Date: Mon, 10 Oct 2005 19:44:29 +0200
Subject: [PATCH] Fix signal sending in usbdevio on async URB completion

If a process issues an URB from userspace and (starts to) terminate
before the URB comes back, we run into the issue described above.  This
is because the urb saves a pointer to "current" when it is posted to the
device, but there's no guarantee that this pointer is still valid
afterwards.

In fact, there are three separate issues:

1) the pointer to "current" can become invalid, since the task could be
   completely gone when the URB completion comes back from the device.

2) Even if the saved task pointer is still pointing to a valid task_struct,
   task_struct->sighand could have gone meanwhile.

3) Even if the process is perfectly fine, permissions may have changed,
   and we can no longer send it a signal.

So what we do instead, is to save the PID and uid's of the process, and
introduce a new kill_proc_info_as_uid() function.

Signed-off-by: Harald Welte <laforge@gnumonks.org>
[ Fixed up types and added symbol exports ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/usb/core/devio.c | 12 +++++++++---
 include/linux/sched.h    |  1 +
 kernel/signal.c          | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index b4265aa7d45e..6c35dcbea664 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -30,6 +30,8 @@
  *  Revision history
  *    22.12.1999   0.1   Initial release (split from proc_usb.c)
  *    04.01.2000   0.2   Turned into its own filesystem
+ *    30.09.2005   0.3   Fix user-triggerable oops in async URB delivery
+ *    			 (CAN-2005-3055)
  */
 
 /*****************************************************************************/
@@ -58,7 +60,8 @@ static struct class *usb_device_class;
 struct async {
 	struct list_head asynclist;
 	struct dev_state *ps;
-	struct task_struct *task;
+	pid_t pid;
+	uid_t uid, euid;
 	unsigned int signr;
 	unsigned int ifnum;
 	void __user *userbuffer;
@@ -290,7 +293,8 @@ static void async_completed(struct urb *urb, struct pt_regs *regs)
 		sinfo.si_errno = as->urb->status;
 		sinfo.si_code = SI_ASYNCIO;
 		sinfo.si_addr = as->userurb;
-		send_sig_info(as->signr, &sinfo, as->task);
+		kill_proc_info_as_uid(as->signr, &sinfo, as->pid, as->uid, 
+				      as->euid);
 	}
         wake_up(&ps->wait);
 }
@@ -988,7 +992,9 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb,
 		as->userbuffer = NULL;
 	as->signr = uurb->signr;
 	as->ifnum = ifnum;
-	as->task = current;
+	as->pid = current->pid;
+	as->uid = current->uid;
+	as->euid = current->euid;
 	if (!(uurb->endpoint & USB_DIR_IN)) {
 		if (copy_from_user(as->urb->transfer_buffer, uurb->buffer, as->urb->transfer_buffer_length)) {
 			free_async(as);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c3ba31f210a9..27519df0f987 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1018,6 +1018,7 @@ extern int force_sig_info(int, struct siginfo *, struct task_struct *);
 extern int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp);
 extern int kill_pg_info(int, struct siginfo *, pid_t);
 extern int kill_proc_info(int, struct siginfo *, pid_t);
+extern int kill_proc_info_as_uid(int, struct siginfo *, pid_t, uid_t, uid_t);
 extern void do_notify_parent(struct task_struct *, int);
 extern void force_sig(int, struct task_struct *);
 extern void force_sig_specific(int, struct task_struct *);
diff --git a/kernel/signal.c b/kernel/signal.c
index cba193ceda0d..50c992643771 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1193,6 +1193,40 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid)
 	return error;
 }
 
+/* like kill_proc_info(), but doesn't use uid/euid of "current" */
+int kill_proc_info_as_uid(int sig, struct siginfo *info, pid_t pid,
+		      uid_t uid, uid_t euid)
+{
+	int ret = -EINVAL;
+	struct task_struct *p;
+
+	if (!valid_signal(sig))
+		return ret;
+
+	read_lock(&tasklist_lock);
+	p = find_task_by_pid(pid);
+	if (!p) {
+		ret = -ESRCH;
+		goto out_unlock;
+	}
+	if ((!info || ((unsigned long)info != 1 &&
+			(unsigned long)info != 2 && SI_FROMUSER(info)))
+	    && (euid != p->suid) && (euid != p->uid)
+	    && (uid != p->suid) && (uid != p->uid)) {
+		ret = -EPERM;
+		goto out_unlock;
+	}
+	if (sig && p->sighand) {
+		unsigned long flags;
+		spin_lock_irqsave(&p->sighand->siglock, flags);
+		ret = __group_send_sig_info(sig, info, p);
+		spin_unlock_irqrestore(&p->sighand->siglock, flags);
+	}
+out_unlock:
+	read_unlock(&tasklist_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(kill_proc_info_as_uid);
 
 /*
  * kill_something_info() interprets pid in interesting ways just like kill(2).
-- 
cgit v1.2.3


From 421c7ce6d001fce28b1fa8fdd2e7ded0ed8a0ad5 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Mon, 10 Oct 2005 22:32:45 +0200
Subject: [PATCH] x86_64: Allocate cpu local data for all possible CPUs

CPU hotplug fills up the possible map to NR_CPUs, but it did that after
setting up per CPU data. This lead to CPU data not getting allocated
for all possible CPUs, which lead to various side effects.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/setup64.c | 4 ++++
 arch/x86_64/kernel/smpboot.c | 6 +-----
 include/asm-x86_64/smp.h     | 1 +
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index bd33be24a386..79190891fbc5 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -87,6 +87,10 @@ void __init setup_per_cpu_areas(void)
 	int i;
 	unsigned long size;
 
+#ifdef CONFIG_HOTPLUG_CPU
+	prefill_possible_map();
+#endif
+
 	/* Copy section for each CPU (we discard the original) */
 	size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
 #ifdef CONFIG_MODULES
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index e12d7baeb33e..658a81b33f3b 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -892,7 +892,7 @@ static __init void disable_smp(void)
  * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range.
  * - Ashok Raj
  */
-static void prefill_possible_map(void)
+__init void prefill_possible_map(void)
 {
 	int i;
 	for (i = 0; i < NR_CPUS; i++)
@@ -967,10 +967,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	current_cpu_data = boot_cpu_data;
 	current_thread_info()->cpu = 0;  /* needed? */
 
-#ifdef CONFIG_HOTPLUG_CPU
-	prefill_possible_map();
-#endif
-
 	if (smp_sanity_check(max_cpus) < 0) {
 		printk(KERN_INFO "SMP disabled\n");
 		disable_smp();
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h
index 24e32611f0bf..c57ce4071342 100644
--- a/include/asm-x86_64/smp.h
+++ b/include/asm-x86_64/smp.h
@@ -81,6 +81,7 @@ static inline int hard_smp_processor_id(void)
 extern int safe_smp_processor_id(void);
 extern int __cpu_disable(void);
 extern void __cpu_die(unsigned int cpu);
+extern void prefill_possible_map(void);
 
 #endif /* !ASSEMBLY */
 
-- 
cgit v1.2.3


From ebe0bbf06c9e03613bdcb6b5a704595a9344b7ff Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@netfilter.org>
Date: Mon, 10 Oct 2005 20:52:19 -0700
Subject: [NETFILTER] nfnetlink: use highest bit of nfa_type to indicate nested
 TLV

As Henrik Nordstrom pointed out, all our efforts with "split endian" (i.e.
host byte order tags, net byte order values) are useless, unless a parser
can determine whether an attribute is nested or not.

This patch steals the highest bit of nfattr.nfa_type to indicate whether
the data payload contains a nested nfattr (1) or not (0).

This will break userspace compatibility, but luckily no kernel with
nfnetlink was released so far.

Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nfnetlink.h | 12 ++++++++----
 net/netfilter/nfnetlink.c           |  4 ++--
 2 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 1d5b10ae2399..f08e870100f4 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -41,11 +41,15 @@ enum nfnetlink_groups {
 struct nfattr
 {
 	u_int16_t nfa_len;
-	u_int16_t nfa_type;
+	u_int16_t nfa_type;	/* we use 15 bits for the type, and the highest
+				 * bit to indicate whether the payload is nested */
 } __attribute__ ((packed));
 
-/* FIXME: Shamelessly copy and pasted from rtnetlink.h, it's time
- * 	  to put this in a generic file */
+/* FIXME: Apart from NFNL_NFA_NESTED shamelessly copy and pasted from
+ * rtnetlink.h, it's time to put this in a generic file */
+
+#define NFNL_NFA_NEST	0x8000
+#define NFA_TYPE(attr) 	((attr)->nfa_type & 0x7fff)
 
 #define NFA_ALIGNTO     4
 #define NFA_ALIGN(len)	(((len) + NFA_ALIGNTO - 1) & ~(NFA_ALIGNTO - 1))
@@ -59,7 +63,7 @@ struct nfattr
 #define NFA_PAYLOAD(nfa) ((int)((nfa)->nfa_len) - NFA_LENGTH(0))
 #define NFA_NEST(skb, type) \
 ({	struct nfattr *__start = (struct nfattr *) (skb)->tail; \
-	NFA_PUT(skb, type, 0, NULL); \
+	NFA_PUT(skb, (NFNL_NFA_NEST | type), 0, NULL); \
 	__start;  })
 #define NFA_NEST_END(skb, start) \
 ({      (start)->nfa_len = ((skb)->tail - (unsigned char *) (start)); \
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 1caaca06f698..4bc27a6334c1 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -133,7 +133,7 @@ int nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len)
 	memset(tb, 0, sizeof(struct nfattr *) * maxattr);
 
 	while (NFA_OK(nfa, len)) {
-		unsigned flavor = nfa->nfa_type;
+		unsigned flavor = NFA_TYPE(nfa);
 		if (flavor && flavor <= maxattr)
 			tb[flavor-1] = nfa;
 		nfa = NFA_NEXT(nfa, len);
@@ -177,7 +177,7 @@ nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys,
 		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
 
 		while (NFA_OK(attr, attrlen)) {
-			unsigned flavor = attr->nfa_type;
+			unsigned flavor = NFA_TYPE(attr);
 			if (flavor) {
 				if (flavor > attr_count)
 					return -EINVAL;
-- 
cgit v1.2.3


From b3a91d037a2575040f9b6a483f60c407a3d80368 Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@netfilter.org>
Date: Mon, 10 Oct 2005 20:52:36 -0700
Subject: [NETFILTER] nat: remove bogus structure member

When 'rustynat' was merged in 2.6.12, the use of the "helper" pointer of
struct ipt_nat_info was obsoleted, but the pointer not removed from the
struct.

This patch removes the pointer, thereby yet again shrinking struct
ip_conntrack.

Discovered-by: Rusty Russell <rusty@netfilter.org>
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ip_nat.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv4/ip_nat.h b/include/linux/netfilter_ipv4/ip_nat.h
index e201ec6e9905..41a107de17cf 100644
--- a/include/linux/netfilter_ipv4/ip_nat.h
+++ b/include/linux/netfilter_ipv4/ip_nat.h
@@ -58,10 +58,6 @@ extern rwlock_t ip_nat_lock;
 struct ip_nat_info
 {
 	struct list_head bysource;
-
-	/* Helper (NULL if none). */
-	struct ip_nat_helper *helper;
-
 	struct ip_nat_seq seq[IP_CT_DIR_MAX];
 };
 
-- 
cgit v1.2.3


From 5bbc243aafff9ad653dc7a9fa7bcaf0b4631355a Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@netfilter.org>
Date: Mon, 10 Oct 2005 20:54:01 -0700
Subject: [NETFILTER]: Add missing include to ip_conntrack_tuple.h

Without this #include, __be16 is not defined and userspace programs
will break.

Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ip_conntrack_tuple.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
index 20e43f018b7c..3232db11a4e5 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
@@ -1,6 +1,8 @@
 #ifndef _IP_CONNTRACK_TUPLE_H
 #define _IP_CONNTRACK_TUPLE_H
 
+#include <linux/types.h>
+
 /* A `tuple' is a structure containing the information to uniquely
   identify a connection.  ie. if two packets have the same tuple, they
   are in the same connection; if not, they are not.
-- 
cgit v1.2.3


From e1c73b78e3706bd3c336d4730a01dd4081dfb7ee Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 10 Oct 2005 20:55:49 -0700
Subject: [NETFILTER] ctnetlink: add one nesting level for TCP state

To keep consistency, the TCP private protocol information is nested
attributes under CTA_PROTOINFO_TCP. This way the sequence of attributes to
access the TCP state information looks like here below:

CTA_PROTOINFO
CTA_PROTOINFO_TCP
CTA_PROTOINFO_TCP_STATE

instead of:

CTA_PROTOINFO
CTA_PROTOINFO_TCP_STATE

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nfnetlink_conntrack.h | 9 ++++++++-
 net/ipv4/netfilter/ip_conntrack_proto_tcp.c   | 4 ++++
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
index 5c55751c78e4..fb5511030185 100644
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -70,11 +70,18 @@ enum ctattr_l4proto {
 
 enum ctattr_protoinfo {
 	CTA_PROTOINFO_UNSPEC,
-	CTA_PROTOINFO_TCP_STATE,
+	CTA_PROTOINFO_TCP,
 	__CTA_PROTOINFO_MAX
 };
 #define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1)
 
+enum ctattr_protoinfo_tcp {
+	CTA_PROTOINFO_TCP_UNSPEC,
+	CTA_PROTOINFO_TCP_STATE,
+	__CTA_PROTOINFO_TCP_MAX
+};
+#define CTA_PROTOINFO_TCP_MAX (__CTA_PROTOINFO_TCP_MAX - 1)
+
 enum ctattr_counters {
 	CTA_COUNTERS_UNSPEC,
 	CTA_COUNTERS_PACKETS,
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 121760d6cc50..75e27e65c28f 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -341,11 +341,15 @@ static int tcp_print_conntrack(struct seq_file *s,
 static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
 			 const struct ip_conntrack *ct)
 {
+	struct nfattr *nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
+	
 	read_lock_bh(&tcp_lock);
 	NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
 		&ct->proto.tcp.state);
 	read_unlock_bh(&tcp_lock);
 
+	NFA_NEST_END(skb, nest_parms);
+
 	return 0;
 
 nfattr_failure:
-- 
cgit v1.2.3


From a051a8f7306476af0a74370ad56e793cb6c43bf7 Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@netfilter.org>
Date: Mon, 10 Oct 2005 21:21:10 -0700
Subject: [NETFILTER]: Use only 32bit counters for CONNTRACK_ACCT

Initially we used 64bit counters for conntrack-based accounting, since we
had no event mechanism to tell userspace that our counters are about to
overflow.  With nfnetlink_conntrack, we now have such a event mechanism and
thus can save 16bytes per connection.

Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nfnetlink_conntrack.h |  6 ++++--
 include/linux/netfilter_ipv4/ip_conntrack.h   |  8 ++++++--
 net/ipv4/netfilter/ip_conntrack_core.c        | 13 ++++++++-----
 net/ipv4/netfilter/ip_conntrack_netlink.c     |  8 ++++----
 4 files changed, 22 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
index fb5511030185..116fcaced909 100644
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -84,8 +84,10 @@ enum ctattr_protoinfo_tcp {
 
 enum ctattr_counters {
 	CTA_COUNTERS_UNSPEC,
-	CTA_COUNTERS_PACKETS,
-	CTA_COUNTERS_BYTES,
+	CTA_COUNTERS_PACKETS,		/* old 64bit counters */
+	CTA_COUNTERS_BYTES,		/* old 64bit counters */
+	CTA_COUNTERS32_PACKETS,
+	CTA_COUNTERS32_BYTES,
 	__CTA_COUNTERS_MAX
 };
 #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1)
diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h
index 4ced38736813..d078bb91d9e5 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack.h
@@ -117,6 +117,10 @@ enum ip_conntrack_events
 	/* NAT info */
 	IPCT_NATINFO_BIT = 10,
 	IPCT_NATINFO = (1 << IPCT_NATINFO_BIT),
+
+	/* Counter highest bit has been set */
+	IPCT_COUNTER_FILLING_BIT = 11,
+	IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT),
 };
 
 enum ip_conntrack_expect_events {
@@ -192,8 +196,8 @@ do {									\
 
 struct ip_conntrack_counter
 {
-	u_int64_t packets;
-	u_int64_t bytes;
+	u_int32_t packets;
+	u_int32_t bytes;
 };
 
 struct ip_conntrack_helper;
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index ea65dd3e517a..07a80b56e8dc 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -1119,7 +1119,7 @@ void __ip_ct_refresh_acct(struct ip_conntrack *ct,
 			unsigned long extra_jiffies,
 			int do_acct)
 {
-	int do_event = 0;
+	int event = 0;
 
 	IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
 	IP_NF_ASSERT(skb);
@@ -1129,13 +1129,13 @@ void __ip_ct_refresh_acct(struct ip_conntrack *ct,
 	/* If not in hash table, timer will not be active yet */
 	if (!is_confirmed(ct)) {
 		ct->timeout.expires = extra_jiffies;
-		do_event = 1;
+		event = IPCT_REFRESH;
 	} else {
 		/* Need del_timer for race avoidance (may already be dying). */
 		if (del_timer(&ct->timeout)) {
 			ct->timeout.expires = jiffies + extra_jiffies;
 			add_timer(&ct->timeout);
-			do_event = 1;
+			event = IPCT_REFRESH;
 		}
 	}
 
@@ -1144,14 +1144,17 @@ void __ip_ct_refresh_acct(struct ip_conntrack *ct,
 		ct->counters[CTINFO2DIR(ctinfo)].packets++;
 		ct->counters[CTINFO2DIR(ctinfo)].bytes += 
 						ntohs(skb->nh.iph->tot_len);
+		if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
+		    || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
+			event |= IPCT_COUNTER_FILLING;
 	}
 #endif
 
 	write_unlock_bh(&ip_conntrack_lock);
 
 	/* must be unlocked when calling event cache */
-	if (do_event)
-		ip_conntrack_event_cache(IPCT_REFRESH, skb);
+	if (event)
+		ip_conntrack_event_cache(event, skb);
 }
 
 #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index eade2749915a..06ed91ee8ace 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -177,11 +177,11 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct,
 	struct nfattr *nest_count = NFA_NEST(skb, type);
 	u_int64_t tmp;
 
-	tmp = cpu_to_be64(ct->counters[dir].packets);
-	NFA_PUT(skb, CTA_COUNTERS_PACKETS, sizeof(u_int64_t), &tmp);
+	tmp = htonl(ct->counters[dir].packets);
+	NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp);
 
-	tmp = cpu_to_be64(ct->counters[dir].bytes);
-	NFA_PUT(skb, CTA_COUNTERS_BYTES, sizeof(u_int64_t), &tmp);
+	tmp = htonl(ct->counters[dir].bytes);
+	NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp);
 
 	NFA_NEST_END(skb, nest_count);
 
-- 
cgit v1.2.3


From 339231537506846cb232a2f0cc4a2c662b2d5b07 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 10 Oct 2005 21:23:28 -0700
Subject: [NETFILTER] ctnetlink: allow userspace to change TCP state

This patch adds the ability of changing the state a TCP connection. I know
that this must be used with care but it's required to provide a complete
conntrack creation via conntrack_netlink. So I'll document this aspect on
the upcoming docs.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../linux/netfilter_ipv4/ip_conntrack_protocol.h   |  3 +++
 net/ipv4/netfilter/ip_conntrack_proto_tcp.c        | 23 ++++++++++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
index b6b99be8632a..2c76b879e3dc 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
@@ -52,6 +52,9 @@ struct ip_conntrack_protocol
 	int (*to_nfattr)(struct sk_buff *skb, struct nfattr *nfa,
 			 const struct ip_conntrack *ct);
 
+	/* convert nfnetlink attributes to protoinfo */
+	int (*from_nfattr)(struct nfattr *tb[], struct ip_conntrack *ct);
+
 	int (*tuple_to_nfattr)(struct sk_buff *skb,
 			       const struct ip_conntrack_tuple *t);
 	int (*nfattr_to_tuple)(struct nfattr *tb[],
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 75e27e65c28f..d6701cafbcc2 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -356,6 +356,28 @@ nfattr_failure:
 	read_unlock_bh(&tcp_lock);
 	return -1;
 }
+
+static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct)
+{
+	struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1];
+	struct nfattr *tb[CTA_PROTOINFO_TCP_MAX];
+
+        if (nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr) < 0)
+                goto nfattr_failure;
+
+	if (!tb[CTA_PROTOINFO_TCP_STATE-1])
+		return -EINVAL;
+
+	write_lock_bh(&tcp_lock);
+	ct->proto.tcp.state = 
+		*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
+	write_unlock_bh(&tcp_lock);
+
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
 #endif
 
 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
@@ -1127,6 +1149,7 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
 #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
     defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
 	.to_nfattr		= tcp_to_nfattr,
+	.from_nfattr		= nfattr_to_tcp,
 	.tuple_to_nfattr	= ip_ct_port_tuple_to_nfattr,
 	.nfattr_to_tuple	= ip_ct_port_nfattr_to_tuple,
 #endif
-- 
cgit v1.2.3


From eeb2b8560676e454ad37ee30b49bc7d897edc9be Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Mon, 10 Oct 2005 21:25:23 -0700
Subject: [TWSK]: Grab the module refcount for timewait sockets

This is required to avoid unloading a module that has active timewait
sockets, such as DCCP.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_timewait_sock.h | 3 +++
 net/ipv4/inet_timewait_sock.c    | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 4ade56ef3a4d..28f7b2103505 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -19,6 +19,7 @@
 
 #include <linux/ip.h>
 #include <linux/list.h>
+#include <linux/module.h>
 #include <linux/timer.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
@@ -193,11 +194,13 @@ static inline u32 inet_rcv_saddr(const struct sock *sk)
 static inline void inet_twsk_put(struct inet_timewait_sock *tw)
 {
 	if (atomic_dec_and_test(&tw->tw_refcnt)) {
+		struct module *owner = tw->tw_prot->owner;
 #ifdef SOCK_REFCNT_DEBUG
 		printk(KERN_DEBUG "%s timewait_sock %p released\n",
 		       tw->tw_prot->name, tw);
 #endif
 		kmem_cache_free(tw->tw_prot->twsk_slab, tw);
+		module_put(owner);
 	}
 }
 
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index f9076ef3a1a8..a010e9a68811 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -111,6 +111,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
 		tw->tw_prot	    = sk->sk_prot_creator;
 		atomic_set(&tw->tw_refcnt, 1);
 		inet_twsk_dead_node_init(tw);
+		__module_get(tw->tw_prot->owner);
 	}
 
 	return tw;
-- 
cgit v1.2.3


From cbd27b8ced4b1888c93f69b4dd108a69ac4d733f Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 12 Oct 2005 11:39:33 +1000
Subject: [PATCH] ppc32: Fix timekeeping

Interestingly enough, ppc32 had broken timekeeping for ages...  It
worked, but probably drifted a bit more than could be explained by the
actual bad precision of the timebase calibration.  We discovered that
recently when somebody figured out that the common code was using
CLOCK_TICK_RATE to correct the timekeeing, and ppc32 had a completely
bogus value for it.

This patch turns it into something saner.  Probably not as good as doing
something based on the actual timebase frequency precision but I'll
leave that sort of math to others.  This at least makes it better for
the common HZ values.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-powerpc/timex.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-powerpc/timex.h b/include/asm-powerpc/timex.h
index 51c5b316be55..c02d15aced91 100644
--- a/include/asm-powerpc/timex.h
+++ b/include/asm-powerpc/timex.h
@@ -10,7 +10,7 @@
 #include <linux/config.h>
 #include <asm/cputable.h>
 
-#define CLOCK_TICK_RATE	1193180 /* Underlying HZ */
+#define CLOCK_TICK_RATE	1024000 /* Underlying HZ */
 
 typedef unsigned long cycles_t;
 
-- 
cgit v1.2.3


From d8e998c58a870770905495a1d45ebf7285b5b1c5 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 12 Oct 2005 14:22:50 +1000
Subject: [PATCH] ppc32: Tell userland about lack of standard TB

Glibc is about to get some new high precision timer stuff that relies on
the standard timebase of the PPC architecture.

However, some (rare & old) CPUs do not have such timebase and it is a
bit annoying to have your stuff just crash because you are running on
the wrong CPU...

This exposes to userland a CPU feature bit that tells that the current
processor doesn't have a standard timebase.  It's negative logic so that
glibc will still "just work" on older kernels (it will just be unhappy
on those old CPUs but that doesn't really matter as distro tend to
update glibc & kernel at the same time).

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/ppc/kernel/cputable.c | 5 +++--
 include/asm-ppc/cputable.h | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/ppc/kernel/cputable.c b/arch/ppc/kernel/cputable.c
index 546e1ea4cafa..6b76cf58d9e0 100644
--- a/arch/ppc/kernel/cputable.c
+++ b/arch/ppc/kernel/cputable.c
@@ -91,7 +91,7 @@ struct cpu_spec	cpu_specs[] = {
 		.cpu_features		= CPU_FTR_COMMON | CPU_FTR_601 |
 			CPU_FTR_HPTE_TABLE,
 		.cpu_user_features 	= COMMON_PPC | PPC_FEATURE_601_INSTR |
-			PPC_FEATURE_UNIFIED_CACHE,
+			PPC_FEATURE_UNIFIED_CACHE | PPC_FEATURE_NO_TB,
 		.icache_bsize		= 32,
 		.dcache_bsize		= 32,
 		.cpu_setup		= __setup_cpu_601
@@ -745,7 +745,8 @@ struct cpu_spec	cpu_specs[] = {
 		.cpu_name		= "403GCX",
 		.cpu_features		= CPU_FTR_SPLIT_ID_CACHE |
 			CPU_FTR_USE_TB,
-		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+		.cpu_user_features	= PPC_FEATURE_32 |
+		 	PPC_FEATURE_HAS_MMU | PPC_FEATURE_NO_TB,
 		.icache_bsize		= 16,
 		.dcache_bsize		= 16,
 	},
diff --git a/include/asm-ppc/cputable.h b/include/asm-ppc/cputable.h
index 41d8f8425c04..e17c492c870b 100644
--- a/include/asm-ppc/cputable.h
+++ b/include/asm-ppc/cputable.h
@@ -24,6 +24,7 @@
 #define PPC_FEATURE_HAS_SPE		0x00800000
 #define PPC_FEATURE_HAS_EFP_SINGLE	0x00400000
 #define PPC_FEATURE_HAS_EFP_DOUBLE	0x00200000
+#define PPC_FEATURE_NO_TB		0x00100000
 
 #ifdef __KERNEL__
 
-- 
cgit v1.2.3


From a451e28c7642830d8b066e5a13de46934151ce3a Mon Sep 17 00:00:00 2001
From: Liam Girdwood <Liam.Girdwood@wolfsonmicro.com>
Date: Wed, 12 Oct 2005 19:58:12 +0100
Subject: [ARM] 3003/1: SSP channel map register updates for pxa2xx

Patch from Liam Girdwood

This patch updates the pxa2xx channel map registers definitions in
pxa-regs.h

Changes:-
  o Added description for SSP2 registers
  o Added definitions for SSP3 registers

Signed-off-by:Liam Girdwood <liam.girdwood@wolfsonmicro.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/arch-pxa/pxa-regs.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-arm/arch-pxa/pxa-regs.h b/include/asm-arm/arch-pxa/pxa-regs.h
index 939d9e5020a0..13fa2deb4ddd 100644
--- a/include/asm-arm/arch-pxa/pxa-regs.h
+++ b/include/asm-arm/arch-pxa/pxa-regs.h
@@ -126,8 +126,8 @@
 #define DRCMR12		__REG(0x40000130)  /* Request to Channel Map Register for AC97 audio transmit Request */
 #define DRCMR13		__REG(0x40000134)  /* Request to Channel Map Register for SSP receive Request */
 #define DRCMR14		__REG(0x40000138)  /* Request to Channel Map Register for SSP transmit Request */
-#define DRCMR15		__REG(0x4000013c)  /* Reserved */
-#define DRCMR16		__REG(0x40000140)  /* Reserved */
+#define DRCMR15		__REG(0x4000013c)  /* Request to Channel Map Register for SSP2 receive Request */
+#define DRCMR16		__REG(0x40000140)  /* Request to Channel Map Register for SSP2 transmit Request */
 #define DRCMR17		__REG(0x40000144)  /* Request to Channel Map Register for ICP receive Request */
 #define DRCMR18		__REG(0x40000148)  /* Request to Channel Map Register for ICP transmit Request */
 #define DRCMR19		__REG(0x4000014c)  /* Request to Channel Map Register for STUART receive Request */
@@ -151,7 +151,8 @@
 #define DRCMR37		__REG(0x40000194)  /* Request to Channel Map Register for USB endpoint 13 Request */
 #define DRCMR38		__REG(0x40000198)  /* Request to Channel Map Register for USB endpoint 14 Request */
 #define DRCMR39		__REG(0x4000019C)  /* Reserved */
-
+#define DRCMR66		__REG(0x40001108)  /* Request to Channel Map Register for SSP3 receive Request */
+#define DRCMR67		__REG(0x4000110C)  /* Request to Channel Map Register for SSP3 transmit Request */
 #define DRCMR68		__REG(0x40001110)  /* Request to Channel Map Register for Camera FIFO 0 Request */
 #define DRCMR69		__REG(0x40001114)  /* Request to Channel Map Register for Camera FIFO 1 Request */
 #define DRCMR70		__REG(0x40001118)  /* Request to Channel Map Register for Camera FIFO 2 Request */
-- 
cgit v1.2.3


From afb997c6163b33292d31a09d6aa5cbb03ffa5bf1 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Wed, 12 Oct 2005 15:12:21 -0700
Subject: [NETPOLL]: wrong return for null netpoll_poll_lock()

When netpoll is not being used, the macro that
defines the removed routing netpoll_poll_lock
defines the return as zero, but the real
routine returns a `void *`

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 5ade54a78dbb..ca5a8733000f 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -86,7 +86,7 @@ static inline void netpoll_poll_unlock(void *have)
 
 #else
 #define netpoll_rx(a) 0
-#define netpoll_poll_lock(a) 0
+#define netpoll_poll_lock(a) NULL
 #define netpoll_poll_unlock(a)
 #endif
 
-- 
cgit v1.2.3


From 9153bd75f25ff3170f07fb9ac1fb0e718afc6fce Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Thu, 13 Oct 2005 16:46:35 +0100
Subject: [ARM] 3005/1: S3C2440 - add definition for s3c2440_set_dsc() call in
 hardware.h

Patch from Ben Dooks

include/asm-arm/arch-s3c2410/hardware.h was missing
the definition for s3c2440_set_dsc()

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/arch-s3c2410/hardware.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/asm-arm/arch-s3c2410/hardware.h b/include/asm-arm/arch-s3c2410/hardware.h
index 48a39918a760..1c9de29cafef 100644
--- a/include/asm-arm/arch-s3c2410/hardware.h
+++ b/include/asm-arm/arch-s3c2410/hardware.h
@@ -92,6 +92,13 @@ extern unsigned int s3c2410_gpio_getpin(unsigned int pin);
 
 extern unsigned int s3c2410_modify_misccr(unsigned int clr, unsigned int chg);
 
+#ifdef CONFIG_CPU_S3C2440
+
+extern int s3c2440_set_dsc(unsigned int pin, unsigned int value);
+
+#endif /* CONFIG_CPU_S3C2440 */
+
+
 #endif /* __ASSEMBLY__ */
 
 #include <asm/sizes.h>
-- 
cgit v1.2.3


From c8923c6b852d3a97c1faad0566e38fca330375a7 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 13 Oct 2005 14:41:23 -0700
Subject: [NETFILTER]: Fix OOPSes on machines with discontiguous cpu numbering.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Original patch by Harald Welte, with feedback from Herbert Xu
and testing by S�bastien Bernard.

EBTABLES, ARP tables, and IP/IP6 tables all assume that cpus
are numbered linearly.  That is not necessarily true.

This patch fixes that up by calculating the largest possible
cpu number, and allocating enough per-cpu structure space given
that.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/cris/arch-v32/kernel/smp.c |  2 ++
 arch/sh/kernel/smp.c            |  3 +++
 include/linux/cpumask.h         | 12 ++++++++++++
 net/bridge/netfilter/ebtables.c | 27 +++++++++++++++++----------
 net/ipv4/netfilter/arp_tables.c | 14 +++++++++-----
 net/ipv4/netfilter/ip_tables.c  | 17 +++++++++++------
 net/ipv6/netfilter/ip6_tables.c | 16 +++++++++++-----
 7 files changed, 65 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index 2c5cae04a95c..957f551ba5ce 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/cpumask.h>
 #include <linux/interrupt.h>
+#include <linux/module.h>
 
 #define IPI_SCHEDULE 1
 #define IPI_CALL 2
@@ -28,6 +29,7 @@ spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED};
 /* CPU masks */
 cpumask_t cpu_online_map = CPU_MASK_NONE;
 cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
+EXPORT_SYMBOL(phys_cpu_present_map);
 
 /* Variables used during SMP boot */
 volatile int cpu_now_booting = 0;
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 56a39d69e080..5ecefc02896a 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -22,6 +22,7 @@
 #include <linux/time.h>
 #include <linux/timex.h>
 #include <linux/sched.h>
+#include <linux/module.h>
 
 #include <asm/atomic.h>
 #include <asm/processor.h>
@@ -39,6 +40,8 @@ struct sh_cpuinfo cpu_data[NR_CPUS];
 extern void per_cpu_trap_init(void);
 
 cpumask_t cpu_possible_map;
+EXPORT_SYMBOL(cpu_possible_map);
+
 cpumask_t cpu_online_map;
 static atomic_t cpus_booted = ATOMIC_INIT(0);
 
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index b15826f6e3a2..fe9778301d07 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -392,4 +392,16 @@ extern cpumask_t cpu_present_map;
 #define for_each_online_cpu(cpu)  for_each_cpu_mask((cpu), cpu_online_map)
 #define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map)
 
+/* Find the highest possible smp_processor_id() */
+static inline unsigned int highest_possible_processor_id(void)
+{
+	unsigned int cpu, highest = 0;
+
+	for_each_cpu_mask(cpu, cpu_possible_map)
+		highest = cpu;
+
+	return highest;
+}
+
+
 #endif /* __LINUX_CPUMASK_H */
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index c4540144f0f4..f8ffbf6e2333 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -26,6 +26,7 @@
 #include <linux/spinlock.h>
 #include <asm/uaccess.h>
 #include <linux/smp.h>
+#include <linux/cpumask.h>
 #include <net/sock.h>
 /* needed for logical [in,out]-dev filtering */
 #include "../br_private.h"
@@ -823,10 +824,11 @@ static int translate_table(struct ebt_replace *repl,
 		/* this will get free'd in do_replace()/ebt_register_table()
 		   if an error occurs */
 		newinfo->chainstack = (struct ebt_chainstack **)
-		   vmalloc(num_possible_cpus() * sizeof(struct ebt_chainstack));
+		   vmalloc((highest_possible_processor_id()+1) 
+				   		* sizeof(struct ebt_chainstack));
 		if (!newinfo->chainstack)
 			return -ENOMEM;
-		for (i = 0; i < num_possible_cpus(); i++) {
+		for_each_cpu(i) {
 			newinfo->chainstack[i] =
 			   vmalloc(udc_cnt * sizeof(struct ebt_chainstack));
 			if (!newinfo->chainstack[i]) {
@@ -895,9 +897,12 @@ static void get_counters(struct ebt_counter *oldcounters,
 
 	/* counters of cpu 0 */
 	memcpy(counters, oldcounters,
-	   sizeof(struct ebt_counter) * nentries);
+	       sizeof(struct ebt_counter) * nentries);
+
 	/* add other counters to those of cpu 0 */
-	for (cpu = 1; cpu < num_possible_cpus(); cpu++) {
+	for_each_cpu(cpu) {
+		if (cpu == 0)
+			continue;
 		counter_base = COUNTER_BASE(oldcounters, nentries, cpu);
 		for (i = 0; i < nentries; i++) {
 			counters[i].pcnt += counter_base[i].pcnt;
@@ -929,7 +934,8 @@ static int do_replace(void __user *user, unsigned int len)
 		BUGPRINT("Entries_size never zero\n");
 		return -EINVAL;
 	}
-	countersize = COUNTER_OFFSET(tmp.nentries) * num_possible_cpus();
+	countersize = COUNTER_OFFSET(tmp.nentries) * 
+					(highest_possible_processor_id()+1);
 	newinfo = (struct ebt_table_info *)
 	   vmalloc(sizeof(struct ebt_table_info) + countersize);
 	if (!newinfo)
@@ -1022,7 +1028,7 @@ static int do_replace(void __user *user, unsigned int len)
 
 	vfree(table->entries);
 	if (table->chainstack) {
-		for (i = 0; i < num_possible_cpus(); i++)
+		for_each_cpu(i)
 			vfree(table->chainstack[i]);
 		vfree(table->chainstack);
 	}
@@ -1040,7 +1046,7 @@ free_counterstmp:
 	vfree(counterstmp);
 	/* can be initialized in translate_table() */
 	if (newinfo->chainstack) {
-		for (i = 0; i < num_possible_cpus(); i++)
+		for_each_cpu(i)
 			vfree(newinfo->chainstack[i]);
 		vfree(newinfo->chainstack);
 	}
@@ -1132,7 +1138,8 @@ int ebt_register_table(struct ebt_table *table)
 		return -EINVAL;
 	}
 
-	countersize = COUNTER_OFFSET(table->table->nentries) * num_possible_cpus();
+	countersize = COUNTER_OFFSET(table->table->nentries) *
+					(highest_possible_processor_id()+1);
 	newinfo = (struct ebt_table_info *)
 	   vmalloc(sizeof(struct ebt_table_info) + countersize);
 	ret = -ENOMEM;
@@ -1186,7 +1193,7 @@ free_unlock:
 	up(&ebt_mutex);
 free_chainstack:
 	if (newinfo->chainstack) {
-		for (i = 0; i < num_possible_cpus(); i++)
+		for_each_cpu(i)
 			vfree(newinfo->chainstack[i]);
 		vfree(newinfo->chainstack);
 	}
@@ -1209,7 +1216,7 @@ void ebt_unregister_table(struct ebt_table *table)
 	up(&ebt_mutex);
 	vfree(table->private->entries);
 	if (table->private->chainstack) {
-		for (i = 0; i < num_possible_cpus(); i++)
+		for_each_cpu(i)
 			vfree(table->private->chainstack[i]);
 		vfree(table->private->chainstack);
 	}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index fa1634256680..a7969286e6e7 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -716,8 +716,10 @@ static int translate_table(const char *name,
 	}
 
 	/* And one copy for every other CPU */
-	for (i = 1; i < num_possible_cpus(); i++) {
-		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
+	for_each_cpu(i) {
+		if (i == 0)
+			continue;
+		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
 		       newinfo->entries,
 		       SMP_ALIGN(newinfo->size));
 	}
@@ -767,7 +769,7 @@ static void get_counters(const struct arpt_table_info *t,
 	unsigned int cpu;
 	unsigned int i;
 
-	for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
+	for_each_cpu(cpu) {
 		i = 0;
 		ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
 				   t->size,
@@ -885,7 +887,8 @@ static int do_replace(void __user *user, unsigned int len)
 		return -ENOMEM;
 
 	newinfo = vmalloc(sizeof(struct arpt_table_info)
-			  + SMP_ALIGN(tmp.size) * num_possible_cpus());
+			  + SMP_ALIGN(tmp.size) *
+			  		(highest_possible_processor_id()+1));
 	if (!newinfo)
 		return -ENOMEM;
 
@@ -1158,7 +1161,8 @@ int arpt_register_table(struct arpt_table *table,
 		= { 0, 0, 0, { 0 }, { 0 }, { } };
 
 	newinfo = vmalloc(sizeof(struct arpt_table_info)
-			  + SMP_ALIGN(repl->size) * num_possible_cpus());
+			  + SMP_ALIGN(repl->size) *
+			  		(highest_possible_processor_id()+1));
 	if (!newinfo) {
 		ret = -ENOMEM;
 		return ret;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index eef99a1b5de6..75c27e92f6ab 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -27,6 +27,7 @@
 #include <asm/semaphore.h>
 #include <linux/proc_fs.h>
 #include <linux/err.h>
+#include <linux/cpumask.h>
 
 #include <linux/netfilter_ipv4/ip_tables.h>
 
@@ -921,8 +922,10 @@ translate_table(const char *name,
 	}
 
 	/* And one copy for every other CPU */
-	for (i = 1; i < num_possible_cpus(); i++) {
-		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
+	for_each_cpu(i) {
+		if (i == 0)
+			continue;
+		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
 		       newinfo->entries,
 		       SMP_ALIGN(newinfo->size));
 	}
@@ -943,7 +946,7 @@ replace_table(struct ipt_table *table,
 		struct ipt_entry *table_base;
 		unsigned int i;
 
-		for (i = 0; i < num_possible_cpus(); i++) {
+		for_each_cpu(i) {
 			table_base =
 				(void *)newinfo->entries
 				+ TABLE_OFFSET(newinfo, i);
@@ -990,7 +993,7 @@ get_counters(const struct ipt_table_info *t,
 	unsigned int cpu;
 	unsigned int i;
 
-	for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
+	for_each_cpu(cpu) {
 		i = 0;
 		IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
 				  t->size,
@@ -1128,7 +1131,8 @@ do_replace(void __user *user, unsigned int len)
 		return -ENOMEM;
 
 	newinfo = vmalloc(sizeof(struct ipt_table_info)
-			  + SMP_ALIGN(tmp.size) * num_possible_cpus());
+			  + SMP_ALIGN(tmp.size) * 
+			  	(highest_possible_processor_id()+1));
 	if (!newinfo)
 		return -ENOMEM;
 
@@ -1458,7 +1462,8 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
 		= { 0, 0, 0, { 0 }, { 0 }, { } };
 
 	newinfo = vmalloc(sizeof(struct ipt_table_info)
-			  + SMP_ALIGN(repl->size) * num_possible_cpus());
+			  + SMP_ALIGN(repl->size) * 
+			  		(highest_possible_processor_id()+1));
 	if (!newinfo)
 		return -ENOMEM;
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 2da514b16d95..b03e90649eb5 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -28,6 +28,7 @@
 #include <asm/uaccess.h>
 #include <asm/semaphore.h>
 #include <linux/proc_fs.h>
+#include <linux/cpumask.h>
 
 #include <linux/netfilter_ipv6/ip6_tables.h>
 
@@ -950,8 +951,10 @@ translate_table(const char *name,
 	}
 
 	/* And one copy for every other CPU */
-	for (i = 1; i < num_possible_cpus(); i++) {
-		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
+	for_each_cpu(i) {
+		if (i == 0)
+			continue;
+		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
 		       newinfo->entries,
 		       SMP_ALIGN(newinfo->size));
 	}
@@ -973,6 +976,7 @@ replace_table(struct ip6t_table *table,
 		unsigned int i;
 
 		for (i = 0; i < num_possible_cpus(); i++) {
+		for_each_cpu(i) {
 			table_base =
 				(void *)newinfo->entries
 				+ TABLE_OFFSET(newinfo, i);
@@ -1019,7 +1023,7 @@ get_counters(const struct ip6t_table_info *t,
 	unsigned int cpu;
 	unsigned int i;
 
-	for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
+	for_each_cpu(cpu) {
 		i = 0;
 		IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
 				  t->size,
@@ -1153,7 +1157,8 @@ do_replace(void __user *user, unsigned int len)
 		return -ENOMEM;
 
 	newinfo = vmalloc(sizeof(struct ip6t_table_info)
-			  + SMP_ALIGN(tmp.size) * num_possible_cpus());
+			  + SMP_ALIGN(tmp.size) *
+			  		(highest_possible_processor_id()+1));
 	if (!newinfo)
 		return -ENOMEM;
 
@@ -1467,7 +1472,8 @@ int ip6t_register_table(struct ip6t_table *table,
 		= { 0, 0, 0, { 0 }, { 0 }, { } };
 
 	newinfo = vmalloc(sizeof(struct ip6t_table_info)
-			  + SMP_ALIGN(repl->size) * num_possible_cpus());
+			  + SMP_ALIGN(repl->size) *
+			  		(highest_possible_processor_id()+1));
 	if (!newinfo)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From 51e8513615ed8202b22ba9a43b0c7376ea4f6868 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 13 Oct 2005 21:10:08 -0700
Subject: [SPARC64]: Consolidate common PCI IOMMU init code.

All the PCI controller drivers were doing the same thing
setting up the IOMMU software state, put it all in one spot.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/pci_iommu.c  | 54 ++++++++++++++++++++++++++++++++++---
 arch/sparc64/kernel/pci_psycho.c | 44 ++++---------------------------
 arch/sparc64/kernel/pci_sabre.c  | 39 +++------------------------
 arch/sparc64/kernel/pci_schizo.c | 57 +++-------------------------------------
 include/asm-sparc64/pbm.h        |  2 +-
 5 files changed, 63 insertions(+), 133 deletions(-)

(limited to 'include')

diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c
index 425c60cfea19..cdee7d9fed72 100644
--- a/arch/sparc64/kernel/pci_iommu.c
+++ b/arch/sparc64/kernel/pci_iommu.c
@@ -80,13 +80,59 @@ static void inline iopte_make_dummy(struct pci_iommu *iommu, iopte_t *iopte)
 	iopte_val(*iopte) = val;
 }
 
-void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize)
+void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset, u32 dma_addr_mask)
 {
-	int i;
+	unsigned long i, tsbbase, order;
+
+	/* Setup initial software IOMMU state. */
+	spin_lock_init(&iommu->lock);
+	iommu->ctx_lowest_free = 1;
+	iommu->page_table_map_base = dma_offset;
+	iommu->dma_addr_mask = dma_addr_mask;
+
+	switch (tsbsize / (8 * 1024)) {
+	case 64:
+		iommu->page_table_sz_bits = 16;
+		break;
+	case 128:
+		iommu->page_table_sz_bits = 17;
+		break;
+	default:
+		prom_printf("PCI_IOMMU: Illegal TSB size %d\n",
+			    tsbsize / (8 * 1024));
+		prom_halt();
+		break;
+	};
+
+	iommu->lowest_consistent_map =
+		1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS);
+
+	for (i = 0; i < PBM_NCLUSTERS; i++) {
+		iommu->alloc_info[i].flush = 0;
+		iommu->alloc_info[i].next = 0;
+	}
 
-	tsbsize /= sizeof(iopte_t);
+	/* Allocate and initialize the dummy page which we
+	 * set inactive IO PTEs to point to.
+	 */
+	iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0);
+	if (!iommu->dummy_page) {
+		prom_printf("PCI_IOMMU: Error, gfp(dummy_page) failed.\n");
+		prom_halt();
+	}
+	memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
+	iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
+
+	/* Now allocate and setup the IOMMU page table itself.  */
+	order = get_order(tsbsize);
+	tsbbase = __get_free_pages(GFP_KERNEL, order);
+	if (!tsbbase) {
+		prom_printf("PCI_IOMMU: Error, gfp(tsb) failed.\n");
+		prom_halt();
+	}
+	iommu->page_table = (iopte_t *)tsbbase;
 
-	for (i = 0; i < tsbsize; i++)
+	for (i = 0; i < tsbsize / sizeof(iopte_t); i++)
 		iopte_make_dummy(iommu, &iommu->page_table[i]);
 }
 
diff --git a/arch/sparc64/kernel/pci_psycho.c b/arch/sparc64/kernel/pci_psycho.c
index 6ed1ef25e0ac..c03ed5f49d31 100644
--- a/arch/sparc64/kernel/pci_psycho.c
+++ b/arch/sparc64/kernel/pci_psycho.c
@@ -1207,13 +1207,9 @@ static void psycho_scan_bus(struct pci_controller_info *p)
 static void psycho_iommu_init(struct pci_controller_info *p)
 {
 	struct pci_iommu *iommu = p->pbm_A.iommu;
-	unsigned long tsbbase, i;
+	unsigned long i;
 	u64 control;
 
-	/* Setup initial software IOMMU state. */
-	spin_lock_init(&iommu->lock);
-	iommu->ctx_lowest_free = 1;
-
 	/* Register addresses. */
 	iommu->iommu_control  = p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL;
 	iommu->iommu_tsbbase  = p->pbm_A.controller_regs + PSYCHO_IOMMU_TSBBASE;
@@ -1240,40 +1236,10 @@ static void psycho_iommu_init(struct pci_controller_info *p)
 	/* Leave diag mode enabled for full-flushing done
 	 * in pci_iommu.c
 	 */
+	pci_iommu_table_init(iommu, IO_TSB_SIZE, 0xc0000000, 0xffffffff);
 
-	iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0);
-	if (!iommu->dummy_page) {
-		prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n");
-		prom_halt();
-	}
-	memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
-	iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
-
-	/* Using assumed page size 8K with 128K entries we need 1MB iommu page
-	 * table (128K ioptes * 8 bytes per iopte).  This is
-	 * page order 7 on UltraSparc.
-	 */
-	tsbbase = __get_free_pages(GFP_KERNEL, get_order(IO_TSB_SIZE));
-	if (!tsbbase) {
-		prom_printf("PSYCHO_IOMMU: Error, gfp(tsb) failed.\n");
-		prom_halt();
-	}
-	iommu->page_table = (iopte_t *)tsbbase;
-	iommu->page_table_sz_bits = 17;
-	iommu->page_table_map_base = 0xc0000000;
-	iommu->dma_addr_mask = 0xffffffff;
-	pci_iommu_table_init(iommu, IO_TSB_SIZE);
-
-	/* We start with no consistent mappings. */
-	iommu->lowest_consistent_map =
-		1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS);
-
-	for (i = 0; i < PBM_NCLUSTERS; i++) {
-		iommu->alloc_info[i].flush = 0;
-		iommu->alloc_info[i].next = 0;
-	}
-
-	psycho_write(p->pbm_A.controller_regs + PSYCHO_IOMMU_TSBBASE, __pa(tsbbase));
+	psycho_write(p->pbm_A.controller_regs + PSYCHO_IOMMU_TSBBASE,
+		     __pa(iommu->page_table));
 
 	control = psycho_read(p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL);
 	control &= ~(PSYCHO_IOMMU_CTRL_TSBSZ | PSYCHO_IOMMU_CTRL_TBWSZ);
@@ -1281,7 +1247,7 @@ static void psycho_iommu_init(struct pci_controller_info *p)
 	psycho_write(p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL, control);
 
 	/* If necessary, hook us up for starfire IRQ translations. */
-	if(this_is_starfire)
+	if (this_is_starfire)
 		p->starfire_cookie = starfire_hookup(p->pbm_A.portid);
 	else
 		p->starfire_cookie = NULL;
diff --git a/arch/sparc64/kernel/pci_sabre.c b/arch/sparc64/kernel/pci_sabre.c
index 0ee6bd5b9ac6..da8e1364194f 100644
--- a/arch/sparc64/kernel/pci_sabre.c
+++ b/arch/sparc64/kernel/pci_sabre.c
@@ -1267,13 +1267,9 @@ static void sabre_iommu_init(struct pci_controller_info *p,
 			     u32 dma_mask)
 {
 	struct pci_iommu *iommu = p->pbm_A.iommu;
-	unsigned long tsbbase, i, order;
+	unsigned long i;
 	u64 control;
 
-	/* Setup initial software IOMMU state. */
-	spin_lock_init(&iommu->lock);
-	iommu->ctx_lowest_free = 1;
-
 	/* Register addresses. */
 	iommu->iommu_control  = p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL;
 	iommu->iommu_tsbbase  = p->pbm_A.controller_regs + SABRE_IOMMU_TSBBASE;
@@ -1295,26 +1291,10 @@ static void sabre_iommu_init(struct pci_controller_info *p,
 	/* Leave diag mode enabled for full-flushing done
 	 * in pci_iommu.c
 	 */
+	pci_iommu_table_init(iommu, tsbsize * 1024 * 8, dvma_offset, dma_mask);
 
-	iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0);
-	if (!iommu->dummy_page) {
-		prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n");
-		prom_halt();
-	}
-	memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
-	iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
-
-	tsbbase = __get_free_pages(GFP_KERNEL, order = get_order(tsbsize * 1024 * 8));
-	if (!tsbbase) {
-		prom_printf("SABRE_IOMMU: Error, gfp(tsb) failed.\n");
-		prom_halt();
-	}
-	iommu->page_table = (iopte_t *)tsbbase;
-	iommu->page_table_map_base = dvma_offset;
-	iommu->dma_addr_mask = dma_mask;
-	pci_iommu_table_init(iommu, PAGE_SIZE << order);
-
-	sabre_write(p->pbm_A.controller_regs + SABRE_IOMMU_TSBBASE, __pa(tsbbase));
+	sabre_write(p->pbm_A.controller_regs + SABRE_IOMMU_TSBBASE,
+		    __pa(iommu->page_table));
 
 	control = sabre_read(p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL);
 	control &= ~(SABRE_IOMMUCTRL_TSBSZ | SABRE_IOMMUCTRL_TBWSZ);
@@ -1322,11 +1302,9 @@ static void sabre_iommu_init(struct pci_controller_info *p,
 	switch(tsbsize) {
 	case 64:
 		control |= SABRE_IOMMU_TSBSZ_64K;
-		iommu->page_table_sz_bits = 16;
 		break;
 	case 128:
 		control |= SABRE_IOMMU_TSBSZ_128K;
-		iommu->page_table_sz_bits = 17;
 		break;
 	default:
 		prom_printf("iommu_init: Illegal TSB size %d\n", tsbsize);
@@ -1334,15 +1312,6 @@ static void sabre_iommu_init(struct pci_controller_info *p,
 		break;
 	}
 	sabre_write(p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL, control);
-
-	/* We start with no consistent mappings. */
-	iommu->lowest_consistent_map =
-		1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS);
-
-	for (i = 0; i < PBM_NCLUSTERS; i++) {
-		iommu->alloc_info[i].flush = 0;
-		iommu->alloc_info[i].next = 0;
-	}
 }
 
 static void pbm_register_toplevel_resources(struct pci_controller_info *p,
diff --git a/arch/sparc64/kernel/pci_schizo.c b/arch/sparc64/kernel/pci_schizo.c
index cae5b61fe2f0..d8c4e0919b4e 100644
--- a/arch/sparc64/kernel/pci_schizo.c
+++ b/arch/sparc64/kernel/pci_schizo.c
@@ -1765,7 +1765,7 @@ static void schizo_pbm_strbuf_init(struct pci_pbm_info *pbm)
 static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm)
 {
 	struct pci_iommu *iommu = pbm->iommu;
-	unsigned long tsbbase, i, tagbase, database, order;
+	unsigned long i, tagbase, database;
 	u32 vdma[2], dma_mask;
 	u64 control;
 	int err, tsbsize;
@@ -1800,10 +1800,6 @@ static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm)
 			prom_halt();
 	};
 
-	/* Setup initial software IOMMU state. */
-	spin_lock_init(&iommu->lock);
-	iommu->ctx_lowest_free = 1;
-
 	/* Register addresses, SCHIZO has iommu ctx flushing. */
 	iommu->iommu_control  = pbm->pbm_regs + SCHIZO_IOMMU_CONTROL;
 	iommu->iommu_tsbbase  = pbm->pbm_regs + SCHIZO_IOMMU_TSBBASE;
@@ -1832,56 +1828,9 @@ static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm)
 	/* Leave diag mode enabled for full-flushing done
 	 * in pci_iommu.c
 	 */
+	pci_iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask);
 
-	iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0);
-	if (!iommu->dummy_page) {
-		prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n");
-		prom_halt();
-	}
-	memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
-	iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
-
-	/* Using assumed page size 8K with 128K entries we need 1MB iommu page
-	 * table (128K ioptes * 8 bytes per iopte).  This is
-	 * page order 7 on UltraSparc.
-	 */
-	order = get_order(tsbsize * 8 * 1024);
-	tsbbase = __get_free_pages(GFP_KERNEL, order);
-	if (!tsbbase) {
-		prom_printf("%s: Error, gfp(tsb) failed.\n", pbm->name);
-		prom_halt();
-	}
-
-	iommu->page_table = (iopte_t *)tsbbase;
-	iommu->page_table_map_base = vdma[0];
-	iommu->dma_addr_mask = dma_mask;
-	pci_iommu_table_init(iommu, PAGE_SIZE << order);
-
-	switch (tsbsize) {
-	case 64:
-		iommu->page_table_sz_bits = 16;
-		break;
-
-	case 128:
-		iommu->page_table_sz_bits = 17;
-		break;
-
-	default:
-		prom_printf("iommu_init: Illegal TSB size %d\n", tsbsize);
-		prom_halt();
-		break;
-	};
-
-	/* We start with no consistent mappings. */
-	iommu->lowest_consistent_map =
-		1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS);
-
-	for (i = 0; i < PBM_NCLUSTERS; i++) {
-		iommu->alloc_info[i].flush = 0;
-		iommu->alloc_info[i].next = 0;
-	}
-
-	schizo_write(iommu->iommu_tsbbase, __pa(tsbbase));
+	schizo_write(iommu->iommu_tsbbase, __pa(iommu->page_table));
 
 	control = schizo_read(iommu->iommu_control);
 	control &= ~(SCHIZO_IOMMU_CTRL_TSBSZ | SCHIZO_IOMMU_CTRL_TBWSZ);
diff --git a/include/asm-sparc64/pbm.h b/include/asm-sparc64/pbm.h
index 38bbbccb4068..c067407de0b0 100644
--- a/include/asm-sparc64/pbm.h
+++ b/include/asm-sparc64/pbm.h
@@ -102,7 +102,7 @@ struct pci_iommu {
 	u32 dma_addr_mask;
 };
 
-extern void pci_iommu_table_init(struct pci_iommu *, int);
+extern void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset, u32 dma_addr_mask);
 
 /* This describes a PCI bus module's streaming buffer. */
 struct pci_strbuf {
-- 
cgit v1.2.3


From 688cb30bdc3e398d97682a6a58f825821ee838c2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 13 Oct 2005 22:15:24 -0700
Subject: [SPARC64]: Eliminate PCI IOMMU dma mapping size limit.

The hairy fast allocator in the sparc64 PCI IOMMU code
has a hard limit of 256 pages.  Certain devices can
exceed this when performing very large I/Os.

So replace with a more simple allocator, based largely
upon the arch/ppc64/kernel/iommu.c code.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/pci_iommu.c | 365 +++++++++++++++-------------------------
 include/asm-sparc64/pbm.h       |  28 +--
 2 files changed, 145 insertions(+), 248 deletions(-)

(limited to 'include')

diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c
index cdee7d9fed72..a11910be1013 100644
--- a/arch/sparc64/kernel/pci_iommu.c
+++ b/arch/sparc64/kernel/pci_iommu.c
@@ -49,12 +49,6 @@ static void __iommu_flushall(struct pci_iommu *iommu)
 
 	/* Ensure completion of previous PIO writes. */
 	(void) pci_iommu_read(iommu->write_complete_reg);
-
-	/* Now update everyone's flush point. */
-	for (entry = 0; entry < PBM_NCLUSTERS; entry++) {
-		iommu->alloc_info[entry].flush =
-			iommu->alloc_info[entry].next;
-	}
 }
 
 #define IOPTE_CONSISTENT(CTX) \
@@ -80,9 +74,61 @@ static void inline iopte_make_dummy(struct pci_iommu *iommu, iopte_t *iopte)
 	iopte_val(*iopte) = val;
 }
 
+/* Based largely upon the ppc64 iommu allocator.  */
+static long pci_arena_alloc(struct pci_iommu *iommu, unsigned long npages)
+{
+	struct pci_iommu_arena *arena = &iommu->arena;
+	unsigned long n, i, start, end, limit;
+	int pass;
+
+	limit = arena->limit;
+	start = arena->hint;
+	pass = 0;
+
+again:
+	n = find_next_zero_bit(arena->map, limit, start);
+	end = n + npages;
+	if (unlikely(end >= limit)) {
+		if (likely(pass < 1)) {
+			limit = start;
+			start = 0;
+			__iommu_flushall(iommu);
+			pass++;
+			goto again;
+		} else {
+			/* Scanned the whole thing, give up. */
+			return -1;
+		}
+	}
+
+	for (i = n; i < end; i++) {
+		if (test_bit(i, arena->map)) {
+			start = i + 1;
+			goto again;
+		}
+	}
+
+	for (i = n; i < end; i++)
+		__set_bit(i, arena->map);
+
+	arena->hint = end;
+
+	return n;
+}
+
+static void pci_arena_free(struct pci_iommu_arena *arena, unsigned long base, unsigned long npages)
+{
+	unsigned long i;
+
+	for (i = base; i < (base + npages); i++)
+		__clear_bit(i, arena->map);
+}
+
 void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset, u32 dma_addr_mask)
 {
-	unsigned long i, tsbbase, order;
+	unsigned long i, tsbbase, order, sz, num_tsb_entries;
+
+	num_tsb_entries = tsbsize / sizeof(iopte_t);
 
 	/* Setup initial software IOMMU state. */
 	spin_lock_init(&iommu->lock);
@@ -90,27 +136,16 @@ void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset,
 	iommu->page_table_map_base = dma_offset;
 	iommu->dma_addr_mask = dma_addr_mask;
 
-	switch (tsbsize / (8 * 1024)) {
-	case 64:
-		iommu->page_table_sz_bits = 16;
-		break;
-	case 128:
-		iommu->page_table_sz_bits = 17;
-		break;
-	default:
-		prom_printf("PCI_IOMMU: Illegal TSB size %d\n",
-			    tsbsize / (8 * 1024));
+	/* Allocate and initialize the free area map.  */
+	sz = num_tsb_entries / 8;
+	sz = (sz + 7UL) & ~7UL;
+	iommu->arena.map = kmalloc(sz, GFP_KERNEL);
+	if (!iommu->arena.map) {
+		prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n");
 		prom_halt();
-		break;
-	};
-
-	iommu->lowest_consistent_map =
-		1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS);
-
-	for (i = 0; i < PBM_NCLUSTERS; i++) {
-		iommu->alloc_info[i].flush = 0;
-		iommu->alloc_info[i].next = 0;
 	}
+	memset(iommu->arena.map, 0, sz);
+	iommu->arena.limit = num_tsb_entries;
 
 	/* Allocate and initialize the dummy page which we
 	 * set inactive IO PTEs to point to.
@@ -132,114 +167,24 @@ void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset,
 	}
 	iommu->page_table = (iopte_t *)tsbbase;
 
-	for (i = 0; i < tsbsize / sizeof(iopte_t); i++)
+	for (i = 0; i < num_tsb_entries; i++)
 		iopte_make_dummy(iommu, &iommu->page_table[i]);
 }
 
-static iopte_t *alloc_streaming_cluster(struct pci_iommu *iommu, unsigned long npages)
+static inline iopte_t *alloc_npages(struct pci_iommu *iommu, unsigned long npages)
 {
-	iopte_t *iopte, *limit, *first;
-	unsigned long cnum, ent, flush_point;
-
-	cnum = 0;
-	while ((1UL << cnum) < npages)
-		cnum++;
-	iopte  = (iommu->page_table +
-		  (cnum << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS)));
-
-	if (cnum == 0)
-		limit = (iommu->page_table +
-			 iommu->lowest_consistent_map);
-	else
-		limit = (iopte +
-			 (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS)));
+	long entry;
 
-	iopte += ((ent = iommu->alloc_info[cnum].next) << cnum);
-	flush_point = iommu->alloc_info[cnum].flush;
-	
-	first = iopte;
-	for (;;) {
-		if (IOPTE_IS_DUMMY(iommu, iopte)) {
-			if ((iopte + (1 << cnum)) >= limit)
-				ent = 0;
-			else
-				ent = ent + 1;
-			iommu->alloc_info[cnum].next = ent;
-			if (ent == flush_point)
-				__iommu_flushall(iommu);
-			break;
-		}
-		iopte += (1 << cnum);
-		ent++;
-		if (iopte >= limit) {
-			iopte = (iommu->page_table +
-				 (cnum <<
-				  (iommu->page_table_sz_bits - PBM_LOGCLUSTERS)));
-			ent = 0;
-		}
-		if (ent == flush_point)
-			__iommu_flushall(iommu);
-		if (iopte == first)
-			goto bad;
-	}
-
-	/* I've got your streaming cluster right here buddy boy... */
-	return iopte;
+	entry = pci_arena_alloc(iommu, npages);
+	if (unlikely(entry < 0))
+		return NULL;
 
-bad:
-	printk(KERN_EMERG "pci_iommu: alloc_streaming_cluster of npages(%ld) failed!\n",
-	       npages);
-	return NULL;
+	return iommu->page_table + entry;
 }
 
-static void free_streaming_cluster(struct pci_iommu *iommu, dma_addr_t base,
-				   unsigned long npages, unsigned long ctx)
+static inline void free_npages(struct pci_iommu *iommu, dma_addr_t base, unsigned long npages)
 {
-	unsigned long cnum, ent;
-
-	cnum = 0;
-	while ((1UL << cnum) < npages)
-		cnum++;
-
-	ent = (base << (32 - IO_PAGE_SHIFT + PBM_LOGCLUSTERS - iommu->page_table_sz_bits))
-		>> (32 + PBM_LOGCLUSTERS + cnum - iommu->page_table_sz_bits);
-
-	/* If the global flush might not have caught this entry,
-	 * adjust the flush point such that we will flush before
-	 * ever trying to reuse it.
-	 */
-#define between(X,Y,Z)	(((Z) - (Y)) >= ((X) - (Y)))
-	if (between(ent, iommu->alloc_info[cnum].next, iommu->alloc_info[cnum].flush))
-		iommu->alloc_info[cnum].flush = ent;
-#undef between
-}
-
-/* We allocate consistent mappings from the end of cluster zero. */
-static iopte_t *alloc_consistent_cluster(struct pci_iommu *iommu, unsigned long npages)
-{
-	iopte_t *iopte;
-
-	iopte = iommu->page_table + (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS));
-	while (iopte > iommu->page_table) {
-		iopte--;
-		if (IOPTE_IS_DUMMY(iommu, iopte)) {
-			unsigned long tmp = npages;
-
-			while (--tmp) {
-				iopte--;
-				if (!IOPTE_IS_DUMMY(iommu, iopte))
-					break;
-			}
-			if (tmp == 0) {
-				u32 entry = (iopte - iommu->page_table);
-
-				if (entry < iommu->lowest_consistent_map)
-					iommu->lowest_consistent_map = entry;
-				return iopte;
-			}
-		}
-	}
-	return NULL;
+	pci_arena_free(&iommu->arena, base >> IO_PAGE_SHIFT, npages);
 }
 
 static int iommu_alloc_ctx(struct pci_iommu *iommu)
@@ -279,7 +224,7 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad
 	struct pcidev_cookie *pcp;
 	struct pci_iommu *iommu;
 	iopte_t *iopte;
-	unsigned long flags, order, first_page, ctx;
+	unsigned long flags, order, first_page;
 	void *ret;
 	int npages;
 
@@ -297,9 +242,10 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad
 	iommu = pcp->pbm->iommu;
 
 	spin_lock_irqsave(&iommu->lock, flags);
-	iopte = alloc_consistent_cluster(iommu, size >> IO_PAGE_SHIFT);
-	if (iopte == NULL) {
-		spin_unlock_irqrestore(&iommu->lock, flags);
+	iopte = alloc_npages(iommu, size >> IO_PAGE_SHIFT);
+	spin_unlock_irqrestore(&iommu->lock, flags);
+
+	if (unlikely(iopte == NULL)) {
 		free_pages(first_page, order);
 		return NULL;
 	}
@@ -308,31 +254,15 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad
 		      ((iopte - iommu->page_table) << IO_PAGE_SHIFT));
 	ret = (void *) first_page;
 	npages = size >> IO_PAGE_SHIFT;
-	ctx = 0;
-	if (iommu->iommu_ctxflush)
-		ctx = iommu_alloc_ctx(iommu);
 	first_page = __pa(first_page);
 	while (npages--) {
-		iopte_val(*iopte) = (IOPTE_CONSISTENT(ctx) |
+		iopte_val(*iopte) = (IOPTE_CONSISTENT(0UL) |
 				     IOPTE_WRITE |
 				     (first_page & IOPTE_PAGE));
 		iopte++;
 		first_page += IO_PAGE_SIZE;
 	}
 
-	{
-		int i;
-		u32 daddr = *dma_addrp;
-
-		npages = size >> IO_PAGE_SHIFT;
-		for (i = 0; i < npages; i++) {
-			pci_iommu_write(iommu->iommu_flush, daddr);
-			daddr += IO_PAGE_SIZE;
-		}
-	}
-
-	spin_unlock_irqrestore(&iommu->lock, flags);
-
 	return ret;
 }
 
@@ -342,7 +272,7 @@ void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_
 	struct pcidev_cookie *pcp;
 	struct pci_iommu *iommu;
 	iopte_t *iopte;
-	unsigned long flags, order, npages, i, ctx;
+	unsigned long flags, order, npages;
 
 	npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
 	pcp = pdev->sysdata;
@@ -352,46 +282,7 @@ void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_
 
 	spin_lock_irqsave(&iommu->lock, flags);
 
-	if ((iopte - iommu->page_table) ==
-	    iommu->lowest_consistent_map) {
-		iopte_t *walk = iopte + npages;
-		iopte_t *limit;
-
-		limit = (iommu->page_table +
-			 (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS)));
-		while (walk < limit) {
-			if (!IOPTE_IS_DUMMY(iommu, walk))
-				break;
-			walk++;
-		}
-		iommu->lowest_consistent_map =
-			(walk - iommu->page_table);
-	}
-
-	/* Data for consistent mappings cannot enter the streaming
-	 * buffers, so we only need to update the TSB.  We flush
-	 * the IOMMU here as well to prevent conflicts with the
-	 * streaming mapping deferred tlb flush scheme.
-	 */
-
-	ctx = 0;
-	if (iommu->iommu_ctxflush)
-		ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
-
-	for (i = 0; i < npages; i++, iopte++)
-		iopte_make_dummy(iommu, iopte);
-
-	if (iommu->iommu_ctxflush) {
-		pci_iommu_write(iommu->iommu_ctxflush, ctx);
-	} else {
-		for (i = 0; i < npages; i++) {
-			u32 daddr = dvma + (i << IO_PAGE_SHIFT);
-
-			pci_iommu_write(iommu->iommu_flush, daddr);
-		}
-	}
-
-	iommu_free_ctx(iommu, ctx);
+	free_npages(iommu, dvma, npages);
 
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
@@ -418,25 +309,27 @@ dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direct
 	iommu = pcp->pbm->iommu;
 	strbuf = &pcp->pbm->stc;
 
-	if (direction == PCI_DMA_NONE)
-		BUG();
+	if (unlikely(direction == PCI_DMA_NONE))
+		goto bad_no_ctx;
 
 	oaddr = (unsigned long)ptr;
 	npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
 	npages >>= IO_PAGE_SHIFT;
 
 	spin_lock_irqsave(&iommu->lock, flags);
+	base = alloc_npages(iommu, npages);
+	ctx = 0;
+	if (iommu->iommu_ctxflush)
+		ctx = iommu_alloc_ctx(iommu);
+	spin_unlock_irqrestore(&iommu->lock, flags);
 
-	base = alloc_streaming_cluster(iommu, npages);
-	if (base == NULL)
+	if (unlikely(!base))
 		goto bad;
+
 	bus_addr = (iommu->page_table_map_base +
 		    ((base - iommu->page_table) << IO_PAGE_SHIFT));
 	ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
 	base_paddr = __pa(oaddr & IO_PAGE_MASK);
-	ctx = 0;
-	if (iommu->iommu_ctxflush)
-		ctx = iommu_alloc_ctx(iommu);
 	if (strbuf->strbuf_enabled)
 		iopte_protection = IOPTE_STREAMING(ctx);
 	else
@@ -447,12 +340,13 @@ dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direct
 	for (i = 0; i < npages; i++, base++, base_paddr += IO_PAGE_SIZE)
 		iopte_val(*base) = iopte_protection | base_paddr;
 
-	spin_unlock_irqrestore(&iommu->lock, flags);
-
 	return ret;
 
 bad:
-	spin_unlock_irqrestore(&iommu->lock, flags);
+	iommu_free_ctx(iommu, ctx);
+bad_no_ctx:
+	if (printk_ratelimit())
+		WARN_ON(1);
 	return PCI_DMA_ERROR_CODE;
 }
 
@@ -527,10 +421,13 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int
 	struct pci_iommu *iommu;
 	struct pci_strbuf *strbuf;
 	iopte_t *base;
-	unsigned long flags, npages, ctx;
+	unsigned long flags, npages, ctx, i;
 
-	if (direction == PCI_DMA_NONE)
-		BUG();
+	if (unlikely(direction == PCI_DMA_NONE)) {
+		if (printk_ratelimit())
+			WARN_ON(1);
+		return;
+	}
 
 	pcp = pdev->sysdata;
 	iommu = pcp->pbm->iommu;
@@ -556,13 +453,14 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int
 
 	/* Step 1: Kick data out of streaming buffers if necessary. */
 	if (strbuf->strbuf_enabled)
-		pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
+		pci_strbuf_flush(strbuf, iommu, bus_addr, ctx,
+				 npages, direction);
 
-	/* Step 2: Clear out first TSB entry. */
-	iopte_make_dummy(iommu, base);
+	/* Step 2: Clear out TSB entries. */
+	for (i = 0; i < npages; i++)
+		iopte_make_dummy(iommu, base + i);
 
-	free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base,
-			       npages, ctx);
+	free_npages(iommu, bus_addr - iommu->page_table_map_base, npages);
 
 	iommu_free_ctx(iommu, ctx);
 
@@ -667,6 +565,8 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int
 			pci_map_single(pdev,
 				       (page_address(sglist->page) + sglist->offset),
 				       sglist->length, direction);
+		if (unlikely(sglist->dma_address == PCI_DMA_ERROR_CODE))
+			return 0;
 		sglist->dma_length = sglist->length;
 		return 1;
 	}
@@ -675,21 +575,29 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int
 	iommu = pcp->pbm->iommu;
 	strbuf = &pcp->pbm->stc;
 	
-	if (direction == PCI_DMA_NONE)
-		BUG();
+	if (unlikely(direction == PCI_DMA_NONE))
+		goto bad_no_ctx;
 
 	/* Step 1: Prepare scatter list. */
 
 	npages = prepare_sg(sglist, nelems);
 
-	/* Step 2: Allocate a cluster. */
+	/* Step 2: Allocate a cluster and context, if necessary. */
 
 	spin_lock_irqsave(&iommu->lock, flags);
 
-	base = alloc_streaming_cluster(iommu, npages);
+	base = alloc_npages(iommu, npages);
+	ctx = 0;
+	if (iommu->iommu_ctxflush)
+		ctx = iommu_alloc_ctx(iommu);
+
+	spin_unlock_irqrestore(&iommu->lock, flags);
+
 	if (base == NULL)
 		goto bad;
-	dma_base = iommu->page_table_map_base + ((base - iommu->page_table) << IO_PAGE_SHIFT);
+
+	dma_base = iommu->page_table_map_base +
+		((base - iommu->page_table) << IO_PAGE_SHIFT);
 
 	/* Step 3: Normalize DMA addresses. */
 	used = nelems;
@@ -702,30 +610,28 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int
 	}
 	used = nelems - used;
 
-	/* Step 4: Choose a context if necessary. */
-	ctx = 0;
-	if (iommu->iommu_ctxflush)
-		ctx = iommu_alloc_ctx(iommu);
-
-	/* Step 5: Create the mappings. */
+	/* Step 4: Create the mappings. */
 	if (strbuf->strbuf_enabled)
 		iopte_protection = IOPTE_STREAMING(ctx);
 	else
 		iopte_protection = IOPTE_CONSISTENT(ctx);
 	if (direction != PCI_DMA_TODEVICE)
 		iopte_protection |= IOPTE_WRITE;
-	fill_sg (base, sglist, used, nelems, iopte_protection);
+
+	fill_sg(base, sglist, used, nelems, iopte_protection);
+
 #ifdef VERIFY_SG
 	verify_sglist(sglist, nelems, base, npages);
 #endif
 
-	spin_unlock_irqrestore(&iommu->lock, flags);
-
 	return used;
 
 bad:
-	spin_unlock_irqrestore(&iommu->lock, flags);
-	return PCI_DMA_ERROR_CODE;
+	iommu_free_ctx(iommu, ctx);
+bad_no_ctx:
+	if (printk_ratelimit())
+		WARN_ON(1);
+	return 0;
 }
 
 /* Unmap a set of streaming mode DMA translations. */
@@ -738,8 +644,10 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems,
 	unsigned long flags, ctx, i, npages;
 	u32 bus_addr;
 
-	if (direction == PCI_DMA_NONE)
-		BUG();
+	if (unlikely(direction == PCI_DMA_NONE)) {
+		if (printk_ratelimit())
+			WARN_ON(1);
+	}
 
 	pcp = pdev->sysdata;
 	iommu = pcp->pbm->iommu;
@@ -751,7 +659,8 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems,
 		if (sglist[i].dma_length == 0)
 			break;
 	i--;
-	npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - bus_addr) >> IO_PAGE_SHIFT;
+	npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) -
+		  bus_addr) >> IO_PAGE_SHIFT;
 
 	base = iommu->page_table +
 		((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
@@ -772,11 +681,11 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems,
 	if (strbuf->strbuf_enabled)
 		pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
 
-	/* Step 2: Clear out first TSB entry. */
-	iopte_make_dummy(iommu, base);
+	/* Step 2: Clear out the TSB entries. */
+	for (i = 0; i < npages; i++)
+		iopte_make_dummy(iommu, base + i);
 
-	free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base,
-			       npages, ctx);
+	free_npages(iommu, bus_addr - iommu->page_table_map_base, npages);
 
 	iommu_free_ctx(iommu, ctx);
 
diff --git a/include/asm-sparc64/pbm.h b/include/asm-sparc64/pbm.h
index c067407de0b0..dd35a2c7798a 100644
--- a/include/asm-sparc64/pbm.h
+++ b/include/asm-sparc64/pbm.h
@@ -27,23 +27,27 @@
  * PCI bus.
  */
 
-#define PBM_LOGCLUSTERS 3
-#define PBM_NCLUSTERS (1 << PBM_LOGCLUSTERS)
-
 struct pci_controller_info;
 
 /* This contains the software state necessary to drive a PCI
  * controller's IOMMU.
  */
+struct pci_iommu_arena {
+	unsigned long	*map;
+	unsigned int	hint;
+	unsigned int	limit;
+};
+
 struct pci_iommu {
 	/* This protects the controller's IOMMU and all
 	 * streaming buffers underneath.
 	 */
 	spinlock_t	lock;
 
+	struct pci_iommu_arena arena;
+
 	/* IOMMU page table, a linear array of ioptes. */
 	iopte_t		*page_table;		/* The page table itself. */
-	int		page_table_sz_bits;	/* log2 of ow many pages does it map? */
 
 	/* Base PCI memory space address where IOMMU mappings
 	 * begin.
@@ -62,12 +66,6 @@ struct pci_iommu {
 	 */
 	unsigned long	write_complete_reg;
 
-	/* The lowest used consistent mapping entry.  Since
-	 * we allocate consistent maps out of cluster 0 this
-	 * is relative to the beginning of closter 0.
-	 */
-	u32		lowest_consistent_map;
-
 	/* In order to deal with some buggy third-party PCI bridges that
 	 * do wrong prefetching, we never mark valid mappings as invalid.
 	 * Instead we point them at this dummy page.
@@ -75,16 +73,6 @@ struct pci_iommu {
 	unsigned long	dummy_page;
 	unsigned long	dummy_page_pa;
 
-	/* If PBM_NCLUSTERS is ever decreased to 4 or lower,
-	 * or if largest supported page_table_sz * 8K goes above
-	 * 2GB, you must increase the size of the type of
-	 * these counters.  You have been duly warned. -DaveM
-	 */
-	struct {
-		u16	next;
-		u16	flush;
-	} alloc_info[PBM_NCLUSTERS];
-
 	/* CTX allocation. */
 	unsigned long ctx_lowest_free;
 	unsigned long ctx_bitmap[IOMMU_NUM_CTXS / (sizeof(unsigned long) * 8)];
-- 
cgit v1.2.3


From 6205d158d16d2619bf30f0aff47a8e09b07106e9 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 14 Oct 2005 12:24:24 +0100
Subject: [ARM] 3009/1: S3C2410 - io.h offsets too large for LDRH/STRH

Patch from Ben Dooks

The __inwc/__outwc calls are capable of creating
LDRH and STRH instructions with offsets over 8bits
as GCC does not have a constraint for an 8bit
offset.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/arch-s3c2410/io.h | 58 ++++++++++++++++++++++++++++-----------
 1 file changed, 42 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/asm-arm/arch-s3c2410/io.h b/include/asm-arm/arch-s3c2410/io.h
index 418233a7ee6f..4bf272ed9add 100644
--- a/include/asm-arm/arch-s3c2410/io.h
+++ b/include/asm-arm/arch-s3c2410/io.h
@@ -9,7 +9,7 @@
  *  06-Dec-1997	RMK	Created.
  *  02-Sep-2003 BJD	Modified for S3C2410
  *  10-Mar-2005 LCVR	Changed S3C2410_VA to S3C24XX_VA
- *
+ *  13-Oct-2005 BJD	Fixed problems with LDRH/STRH offset range
  */
 
 #ifndef __ASM_ARM_ARCH_IO_H
@@ -97,7 +97,7 @@ DECLARE_IO(int,l,"")
 	else								\
 		__asm__ __volatile__(					\
 		"strb	%0, [%1, #0]	@ outbc"			\
-		: : "r" (value), "r" ((port)));		\
+		: : "r" (value), "r" ((port)));				\
 })
 
 #define __inbc(port)							\
@@ -110,35 +110,61 @@ DECLARE_IO(int,l,"")
 	else								\
 		__asm__ __volatile__(					\
 		"ldrb	%0, [%1, #0]	@ inbc"				\
-		: "=r" (result) : "r" ((port)));	\
+		: "=r" (result) : "r" ((port)));			\
 	result;								\
 })
 
 #define __outwc(value,port)						\
 ({									\
 	unsigned long v = value;					\
-	if (__PORT_PCIO((port)))					\
-		__asm__ __volatile__(					\
-		"strh	%0, [%1, %2]	@ outwc"			\
-		: : "r" (v), "r" (PCIO_BASE), "Jr" ((port)));	\
-	else								\
+	if (__PORT_PCIO((port))) {					\
+		if ((port) < 256 && (port) > -256)			\
+			__asm__ __volatile__(				\
+			"strh	%0, [%1, %2]	@ outwc"		\
+			: : "r" (v), "r" (PCIO_BASE), "Jr" ((port)));	\
+		else if ((port) > 0)					\
+			__asm__ __volatile__(				\
+			"strh	%0, [%1, %2]	@ outwc"		\
+			: : "r" (v),					\
+			    "r" (PCIO_BASE + ((port) & ~0xff)),		\
+			     "Jr" (((port) & 0xff)));			\
+		else							\
+			__asm__ __volatile__(				\
+			"strh	%0, [%1, #0]	@ outwc"		\
+			: : "r" (v),					\
+			    "r" (PCIO_BASE + (port)));			\
+	} else								\
 		__asm__ __volatile__(					\
 		"strh	%0, [%1, #0]	@ outwc"			\
-		: : "r" (v), "r" ((port)));	\
+		: : "r" (v), "r" ((port)));				\
 })
 
 #define __inwc(port)							\
 ({									\
 	unsigned short result;						\
-	if (__PORT_PCIO((port)))					\
-		__asm__ __volatile__(					\
-		"ldrh	%0, [%1, %2]	@ inwc"				\
-		: "=r" (result) : "r" (PCIO_BASE), "Jr" ((port)));	\
-	else								\
+	if (__PORT_PCIO((port))) {					\
+		if ((port) < 256 && (port) > -256 )			\
+			__asm__ __volatile__(				\
+			"ldrh	%0, [%1, %2]	@ inwc"			\
+			: "=r" (result)					\
+			: "r" (PCIO_BASE),				\
+			  "Jr" ((port)));				\
+		else if ((port) > 0)					\
+			__asm__ __volatile__(				\
+			"ldrh	%0, [%1, %2]	@ inwc"			\
+			: "=r" (result)					\
+			: "r" (PCIO_BASE + ((port) & ~0xff)),		\
+			  "Jr" (((port) & 0xff)));			\
+		else							\
+			__asm__ __volatile__(				\
+			"ldrh	%0, [%1, #0]	@ inwc"			\
+			: "=r" (result)					\
+			: "r" (PCIO_BASE + ((port))));			\
+	} else								\
 		__asm__ __volatile__(					\
 		"ldrh	%0, [%1, #0]	@ inwc"				\
-		: "=r" (result) : "r" ((port)));		\
-	result;						\
+		: "=r" (result) : "r" ((port)));			\
+	result;								\
 })
 
 #define __outlc(value,port)						\
-- 
cgit v1.2.3


From cb38c569e5ecf9e922e66963b6da2751b4f13d81 Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@rpsys.net>
Date: Fri, 14 Oct 2005 16:07:25 +0100
Subject: [ARM] 3011/1: pxafb: Add ability to set device parent + fix spitz
 compile error

Patch from Richard Purdie

Add a function to allow machines to set the parent of the pxa
framebuffer device. This means the power up/down sequence can be
controlled where required by the machine.

Update spitz to use the new function, fixing a compile error.

Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-pxa/generic.c      | 5 +++++
 arch/arm/mach-pxa/spitz.c        | 2 +-
 include/asm-arm/arch-pxa/pxafb.h | 1 +
 3 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/arm/mach-pxa/generic.c b/arch/arm/mach-pxa/generic.c
index d0660a8c4b70..d327c127eddb 100644
--- a/arch/arm/mach-pxa/generic.c
+++ b/arch/arm/mach-pxa/generic.c
@@ -208,6 +208,11 @@ static struct platform_device pxafb_device = {
 	.resource	= pxafb_resources,
 };
 
+void __init set_pxa_fb_parent(struct device *parent_dev)
+{
+	pxafb_device.dev.parent = parent_dev;
+}
+
 static struct platform_device ffuart_device = {
 	.name		= "pxa2xx-uart",
 	.id		= 0,
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 568afe3d6e1a..7eaeb24aae1e 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -328,7 +328,7 @@ static void __init common_init(void)
 
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 	pxa_set_mci_info(&spitz_mci_platform_data);
-	pxafb_device.dev.parent = &spitzssp_device.dev;
+	set_pxa_fb_parent(&spitzssp_device.dev);
 	set_pxa_fb_info(&spitz_pxafb_info);
 }
 
diff --git a/include/asm-arm/arch-pxa/pxafb.h b/include/asm-arm/arch-pxa/pxafb.h
index 21c0e16dce5f..aba9b30f4249 100644
--- a/include/asm-arm/arch-pxa/pxafb.h
+++ b/include/asm-arm/arch-pxa/pxafb.h
@@ -66,4 +66,5 @@ struct pxafb_mach_info {
 
 };
 void set_pxa_fb_info(struct pxafb_mach_info *hard_pxa_fb_info);
+void set_pxa_fb_parent(struct device *parent_dev);
 unsigned long pxafb_get_hsync_time(struct device *dev);
-- 
cgit v1.2.3


From e26148d934762b61133a64b6862f870624ff617d Mon Sep 17 00:00:00 2001
From: Tim Schmielau <tim@physik3.uni-rostock.de>
Date: Fri, 14 Oct 2005 15:59:05 -0700
Subject: [PATCH] Fix copy-and-paste error in BSD accounting

Fix copy and paste error in jiffies_to_AHZ conversion which leads to wrong
BSD accounting information on alpha and ia64 when
CONFIG_BSD_PROCESS_ACCT_V3 is turned on.

Also update comment to match reorganised header files.

Signed-off-by: Tim Schmielau <tim@physik3.uni-rostock.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/acct.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/acct.h b/include/linux/acct.h
index 1993a3691768..19f70462b3be 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -162,13 +162,13 @@ typedef struct acct acct_t;
 #ifdef __KERNEL__
 /*
  * Yet another set of HZ to *HZ helper functions.
- * See <linux/times.h> for the original.
+ * See <linux/jiffies.h> for the original.
  */
 
 static inline u32 jiffies_to_AHZ(unsigned long x)
 {
 #if (TICK_NSEC % (NSEC_PER_SEC / AHZ)) == 0
-	return x / (HZ / USER_HZ);
+	return x / (HZ / AHZ);
 #else
         u64 tmp = (u64)x * TICK_NSEC;
         do_div(tmp, (NSEC_PER_SEC / AHZ));
-- 
cgit v1.2.3


From 688ce17b8599abc548b406c00e4d18ae0dec954f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 16 Oct 2005 00:17:33 -0700
Subject: [PATCH]: highest_possible_processor_id() has to be a macro

	... otherwise, things like alpha and sparc64 break and break
badly.  They define cpu_possible_map to something else in smp.h
*AFTER* having included cpumask.h.

	If that puppy is a macro, expansion will happen at the actual
caller, when we'd already seen #define cpu_possible_map ... and we will
get the right thing used.

	As an inline helper it will be tokenized before we get to that
define and that's it; no matter what we define later, it won't affect
anything.  We get modules with dependency on cpu_possible_map instead
of the right symbol (phys_cpu_present_map in case of sparc64), or outright
link errors if they are built-in.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/cpumask.h | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index fe9778301d07..9bdba8169b41 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -393,15 +393,13 @@ extern cpumask_t cpu_present_map;
 #define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map)
 
 /* Find the highest possible smp_processor_id() */
-static inline unsigned int highest_possible_processor_id(void)
-{
-	unsigned int cpu, highest = 0;
-
-	for_each_cpu_mask(cpu, cpu_possible_map)
-		highest = cpu;
-
-	return highest;
-}
+#define highest_possible_processor_id() \
+({ \
+	unsigned int cpu, highest = 0; \
+	for_each_cpu_mask(cpu, cpu_possible_map) \
+		highest = cpu; \
+	highest; \
+})
 
 
 #endif /* __LINUX_CPUMASK_H */
-- 
cgit v1.2.3


From b24d18aa743dad0c42918157c5d717686269d3a8 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 16 Oct 2005 20:29:20 -0700
Subject: [PATCH] list: add missing rcu_dereference on first element

It seems that all the list_*_rcu primitives are missing a memory barrier
on the very first dereference.  For example,

#define list_for_each_rcu(pos, head) \
	for (pos = (head)->next; prefetch(pos->next), pos != (head); \
		pos = rcu_dereference(pos->next))

It will go something like:

	pos = (head)->next

	prefetch(pos->next)

	pos != (head)

	do stuff

We're missing a barrier here.

	pos = rcu_dereference(pos->next)

		fetch pos->next

		barrier given by rcu_dereference(pos->next)

		store pos

Without the missing barrier, the pos->next value may turn out to be stale.
In fact, if "do stuff" were also dereferencing pos and relying on
list_for_each_rcu to provide the barrier then it may also break.

So here is a patch to make sure that we have a barrier for the first
element in the list.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/list.h | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/list.h b/include/linux/list.h
index e6ec59682274..084971f333fe 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -442,12 +442,14 @@ static inline void list_splice_init(struct list_head *list,
  * as long as the traversal is guarded by rcu_read_lock().
  */
 #define list_for_each_rcu(pos, head) \
-	for (pos = (head)->next; prefetch(pos->next), pos != (head); \
-        	pos = rcu_dereference(pos->next))
+	for (pos = (head)->next; \
+		prefetch(rcu_dereference(pos)->next), pos != (head); \
+        	pos = pos->next)
 
 #define __list_for_each_rcu(pos, head) \
-	for (pos = (head)->next; pos != (head); \
-        	pos = rcu_dereference(pos->next))
+	for (pos = (head)->next; \
+		rcu_dereference(pos) != (head); \
+        	pos = pos->next)
 
 /**
  * list_for_each_safe_rcu	-	iterate over an rcu-protected list safe
@@ -461,8 +463,9 @@ static inline void list_splice_init(struct list_head *list,
  * as long as the traversal is guarded by rcu_read_lock().
  */
 #define list_for_each_safe_rcu(pos, n, head) \
-	for (pos = (head)->next, n = pos->next; pos != (head); \
-		pos = rcu_dereference(n), n = pos->next)
+	for (pos = (head)->next; \
+		n = rcu_dereference(pos)->next, pos != (head); \
+		pos = n)
 
 /**
  * list_for_each_entry_rcu	-	iterate over rcu list of given type
@@ -474,11 +477,11 @@ static inline void list_splice_init(struct list_head *list,
  * the _rcu list-mutation primitives such as list_add_rcu()
  * as long as the traversal is guarded by rcu_read_lock().
  */
-#define list_for_each_entry_rcu(pos, head, member)			\
-	for (pos = list_entry((head)->next, typeof(*pos), member);	\
-	     prefetch(pos->member.next), &pos->member != (head); 	\
-	     pos = rcu_dereference(list_entry(pos->member.next, 	\
-					typeof(*pos), member)))
+#define list_for_each_entry_rcu(pos, head, member) \
+	for (pos = list_entry((head)->next, typeof(*pos), member); \
+		prefetch(rcu_dereference(pos)->member.next), \
+			&pos->member != (head); \
+		pos = list_entry(pos->member.next, typeof(*pos), member))
 
 
 /**
@@ -492,8 +495,9 @@ static inline void list_splice_init(struct list_head *list,
  * as long as the traversal is guarded by rcu_read_lock().
  */
 #define list_for_each_continue_rcu(pos, head) \
-	for ((pos) = (pos)->next; prefetch((pos)->next), (pos) != (head); \
-        	(pos) = rcu_dereference((pos)->next))
+	for ((pos) = (pos)->next; \
+		prefetch(rcu_dereference((pos))->next), (pos) != (head); \
+        	(pos) = (pos)->next)
 
 /*
  * Double linked lists with a single pointer list head.
@@ -696,8 +700,9 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
 	     pos = n)
 
 #define hlist_for_each_rcu(pos, head) \
-	for ((pos) = (head)->first; pos && ({ prefetch((pos)->next); 1; }); \
-		(pos) = rcu_dereference((pos)->next))
+	for ((pos) = (head)->first; \
+		rcu_dereference((pos)) && ({ prefetch((pos)->next); 1; }); \
+		(pos) = (pos)->next)
 
 /**
  * hlist_for_each_entry	- iterate over list of given type
@@ -762,9 +767,9 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
  */
 #define hlist_for_each_entry_rcu(tpos, pos, head, member)		 \
 	for (pos = (head)->first;					 \
-	     pos && ({ prefetch(pos->next); 1;}) &&			 \
+	     rcu_dereference(pos) && ({ prefetch(pos->next); 1;}) &&	 \
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = rcu_dereference(pos->next))
+	     pos = pos->next)
 
 #else
 #warning "don't include kernel headers in userspace"
-- 
cgit v1.2.3


From 5ee832dbc6770135ec8d63296af0a4374557bb79 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 17 Oct 2005 20:01:21 +0200
Subject: [PATCH] rcu: keep rcu callback event counter

This makes call_rcu() keep track of how many events there are on the RCU
list, and cause a reschedule event when the list gets too long.

This helps keep RCU event lists down.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/rcupdate.h |  1 +
 kernel/rcupdate.c        | 11 +++++++++++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 4e65eb44adfd..70191a5a148f 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -94,6 +94,7 @@ struct rcu_data {
 	long  	       	batch;           /* Batch # for current RCU batch */
 	struct rcu_head *nxtlist;
 	struct rcu_head **nxttail;
+	long            count; /* # of queued items */
 	struct rcu_head *curlist;
 	struct rcu_head **curtail;
 	struct rcu_head *donelist;
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index dd99415b1551..2559d4b8f23f 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -109,6 +109,10 @@ void fastcall call_rcu(struct rcu_head *head,
 	rdp = &__get_cpu_var(rcu_data);
 	*rdp->nxttail = head;
 	rdp->nxttail = &head->next;
+
+	if (unlikely(++rdp->count > 10000))
+		set_need_resched();
+
 	local_irq_restore(flags);
 }
 
@@ -140,6 +144,12 @@ void fastcall call_rcu_bh(struct rcu_head *head,
 	rdp = &__get_cpu_var(rcu_bh_data);
 	*rdp->nxttail = head;
 	rdp->nxttail = &head->next;
+	rdp->count++;
+/*
+ *  Should we directly call rcu_do_batch() here ?
+ *  if (unlikely(rdp->count > 10000))
+ *      rcu_do_batch(rdp);
+ */
 	local_irq_restore(flags);
 }
 
@@ -157,6 +167,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
 		next = rdp->donelist = list->next;
 		list->func(list);
 		list = next;
+		rdp->count--;
 		if (++count >= maxbatch)
 			break;
 	}
-- 
cgit v1.2.3


From 4faa5285283fad081443e3612ca426a311bb6c7e Mon Sep 17 00:00:00 2001
From: Zach Brown <zach.brown@oracle.com>
Date: Mon, 17 Oct 2005 16:43:33 -0700
Subject: [PATCH] aio: revert lock_kiocb()

lock_kiocb() was introduced to serialize retrying and cancellation.  In the
process of doing so it tried to sleep waiting for KIF_LOCKED while holding
the ctx_lock spinlock.  Recent fixes have ensured that multiple concurrent
retries won't be attempted for a given iocb.  Cancel has other problems and
has no significant in-tree users that have been complaining about it.  So
for the immediate future we'll revert sleeping with the lock held and will
address proper cancellation and retry serialization in the future.

Signed-off-by: Zach Brown <zach.brown@oracle.com>
Acked-by: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/aio.c            | 26 +-------------------------
 include/linux/aio.h |  7 ++++++-
 2 files changed, 7 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/fs/aio.c b/fs/aio.c
index d6b1551342b7..9fe7216457d8 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -398,7 +398,7 @@ static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx)
 	if (unlikely(!req))
 		return NULL;
 
-	req->ki_flags = 1 << KIF_LOCKED;
+	req->ki_flags = 0;
 	req->ki_users = 2;
 	req->ki_key = 0;
 	req->ki_ctx = ctx;
@@ -547,25 +547,6 @@ struct kioctx *lookup_ioctx(unsigned long ctx_id)
 	return ioctx;
 }
 
-static int lock_kiocb_action(void *param)
-{
-	schedule();
-	return 0;
-}
-
-static inline void lock_kiocb(struct kiocb *iocb)
-{
-	wait_on_bit_lock(&iocb->ki_flags, KIF_LOCKED, lock_kiocb_action,
-			 TASK_UNINTERRUPTIBLE);
-}
-
-static inline void unlock_kiocb(struct kiocb *iocb)
-{
-	kiocbClearLocked(iocb);
-	smp_mb__after_clear_bit();
-	wake_up_bit(&iocb->ki_flags, KIF_LOCKED);
-}
-
 /*
  * use_mm
  *	Makes the calling kernel thread take on the specified
@@ -796,9 +777,7 @@ static int __aio_run_iocbs(struct kioctx *ctx)
 		 * Hold an extra reference while retrying i/o.
 		 */
 		iocb->ki_users++;       /* grab extra reference */
-		lock_kiocb(iocb);
 		aio_run_iocb(iocb);
-		unlock_kiocb(iocb);
 		if (__aio_put_req(ctx, iocb))  /* drop extra ref */
 			put_ioctx(ctx);
  	}
@@ -1542,7 +1521,6 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 
 	spin_lock_irq(&ctx->ctx_lock);
 	aio_run_iocb(req);
-	unlock_kiocb(req);
 	if (!list_empty(&ctx->run_list)) {
 		/* drain the run list */
 		while (__aio_run_iocbs(ctx))
@@ -1674,7 +1652,6 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
 	if (NULL != cancel) {
 		struct io_event tmp;
 		pr_debug("calling cancel\n");
-		lock_kiocb(kiocb);
 		memset(&tmp, 0, sizeof(tmp));
 		tmp.obj = (u64)(unsigned long)kiocb->ki_obj.user;
 		tmp.data = kiocb->ki_user_data;
@@ -1686,7 +1663,6 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
 			if (copy_to_user(result, &tmp, sizeof(tmp)))
 				ret = -EFAULT;
 		}
-		unlock_kiocb(kiocb);
 	} else
 		ret = -EINVAL;
 
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 60def658b246..0decf66117c1 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -24,7 +24,12 @@ struct kioctx;
 #define KIOCB_SYNC_KEY		(~0U)
 
 /* ki_flags bits */
-#define KIF_LOCKED		0
+/*
+ * This may be used for cancel/retry serialization in the future, but
+ * for now it's unused and we probably don't want modules to even
+ * think they can use it.
+ */
+/* #define KIF_LOCKED		0 */
 #define KIF_KICKED		1
 #define KIF_CANCELLED		2
 
-- 
cgit v1.2.3


From 9b15c6c4e22cbb381373fac3bee8cacb811147a9 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Tue, 18 Oct 2005 07:51:34 +0100
Subject: [ARM] 3019/1: fix wrong comments

Patch from Nicolas Pitre

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/locks.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-arm/locks.h b/include/asm-arm/locks.h
index f08dc8447913..852220eecdbc 100644
--- a/include/asm-arm/locks.h
+++ b/include/asm-arm/locks.h
@@ -103,7 +103,7 @@
 	({					\
 	smp_mb();				\
 	__asm__ __volatile__(			\
-	"@ up_op_read\n"			\
+	"@ up_op_write\n"			\
 "1:	ldrex	lr, [%0]\n"			\
 "	adds	lr, lr, %1\n"			\
 "	strex	ip, lr, [%0]\n"			\
@@ -231,7 +231,7 @@
 #define __up_op_write(ptr,wake)			\
 	({					\
 	__asm__ __volatile__(			\
-	"@ up_op_read\n"			\
+	"@ up_op_write\n"			\
 "	mrs	ip, cpsr\n"			\
 "	orr	lr, ip, #128\n"			\
 "	msr	cpsr_c, lr\n"			\
-- 
cgit v1.2.3


From ad1b472bea1bbcd8dc7fd92f6952d8b2d8355edb Mon Sep 17 00:00:00 2001
From: Kenneth Tan <chong.yin.tan@intel.com>
Date: Tue, 18 Oct 2005 07:51:35 +0100
Subject: [ARM] 3020/1: Fixes typo error CONFIG_CPU_IXP465, which should be
 CONFIG_CPU_IXP46X

Patch from Kenneth Tan

The cpu_is_ixp465 macro in include/asm-arm/arch-ixp4xx/hardware.h is always returning 0 because #ifdef CONFIG_CPU_IXP465 is always false.

Signed-off-by: Kenneth Tan <chong.yin.tan@intel.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/arch-ixp4xx/hardware.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-arm/arch-ixp4xx/hardware.h b/include/asm-arm/arch-ixp4xx/hardware.h
index 4ac964b9078a..55d85eea8c1a 100644
--- a/include/asm-arm/arch-ixp4xx/hardware.h
+++ b/include/asm-arm/arch-ixp4xx/hardware.h
@@ -27,7 +27,7 @@
 
 #define pcibios_assign_all_busses()	1
 
-#if defined(CONFIG_CPU_IXP465) && !defined(__ASSEMBLY__)
+#if defined(CONFIG_CPU_IXP46X) && !defined(__ASSEMBLY__)
 extern unsigned int processor_id;
 #define cpu_is_ixp465() ((processor_id & 0xffffffc0) == 0x69054200)
 #else
-- 
cgit v1.2.3


From 251b928cdff5f12e7da8f56e8933e2b58ba08456 Mon Sep 17 00:00:00 2001
From: Kenneth Tan <chong.yin.tan@intel.com>
Date: Tue, 18 Oct 2005 07:53:35 +0100
Subject: [ARM] 3021/1: Interrupt 0 bug fix for ixp4xx

Patch from Kenneth Tan

The get_irqnr_and_base subroutine of ixp4xx does not take interrupt 0 condition into account properly. We should not perform "subs" here. The Z flag will be set when interrupt 0 occur, which resulting "movne r1, sp" in the caller routine (irq_handler) not being executed.

When interrupt 0 occur:
o if CONFIG_CPU_IXP46X is not set, "subs" will set the Z flag and return
o if CONFIG_CPU_IXP46X is set, codes in upper interrupt handling will be trigerred. But since this is not supper interrupt, the "cmp" in the upper interrupt handling portion will set the Z flag and return

Signed-off-by: Kenneth Tan <chong.yin.tan@intel.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/arch-ixp4xx/entry-macro.S | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/asm-arm/arch-ixp4xx/entry-macro.S b/include/asm-arm/arch-ixp4xx/entry-macro.S
index 455da64832de..323b0bc4a39c 100644
--- a/include/asm-arm/arch-ixp4xx/entry-macro.S
+++ b/include/asm-arm/arch-ixp4xx/entry-macro.S
@@ -15,25 +15,26 @@
 		ldr	\irqstat, =(IXP4XX_INTC_BASE_VIRT+IXP4XX_ICIP_OFFSET)
 		ldr	\irqstat, [\irqstat]		@ get interrupts
 		cmp	\irqstat, #0
-		beq	1001f
+		beq	1001f				@ upper IRQ?
 		clz     \irqnr, \irqstat
 		mov     \base, #31
-		subs    \irqnr, \base, \irqnr
+		sub     \irqnr, \base, \irqnr
+		b	1002f				@ lower IRQ being
+							@ handled
 
 1001:
 		/*
 		 * IXP465 has an upper IRQ status register
 		 */
 #if defined(CONFIG_CPU_IXP46X)
-		bne 	1002f
 		ldr	\irqstat, =(IXP4XX_INTC_BASE_VIRT+IXP4XX_ICIP2_OFFSET)
 		ldr	\irqstat, [\irqstat]		@ get upper interrupts
 		mov	\irqnr, #63
 		clz	\irqstat, \irqstat
  		cmp	\irqstat, #32
 		subne	\irqnr, \irqnr, \irqstat
-1002:
 #endif
+1002:
 		.endm
 
 
-- 
cgit v1.2.3


From d1972efaf24e56c06b43c40c364f9377763c2e13 Mon Sep 17 00:00:00 2001
From: Paul Schulz <pschulz01@gmail.com>
Date: Tue, 18 Oct 2005 19:40:32 +0100
Subject: [ARM] 3023/1: pxa-regs: Typo in ARM pxa register definitions.

Patch from Paul Schulz

The following trivial patch is to fix what looks like a typo in the PXA register
definitions. The correction comes directly from the definition in the
Intel Documentation.

 http://www.intel.com/design/pca/applicationsprocessors/manuals/278693.htm
 Intel(R) PXA 255 Processor - Developers Manual - Jan 2004 - Page 12-33

Neither 'UDCCS_IO_ROF' or 'UDCCS_IO_DME' are currently used elseware
in the main code (from grep of tree)... The current definitions have been
in the code since at lease 2.4.7.

Signed-off-by: Paul Schulz <paul@mawsonlakes.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/arch-pxa/pxa-regs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-arm/arch-pxa/pxa-regs.h b/include/asm-arm/arch-pxa/pxa-regs.h
index 13fa2deb4ddd..3af7165ab0d7 100644
--- a/include/asm-arm/arch-pxa/pxa-regs.h
+++ b/include/asm-arm/arch-pxa/pxa-regs.h
@@ -653,7 +653,7 @@
 
 #define UDCCS_IO_RFS	(1 << 0)	/* Receive FIFO service */
 #define UDCCS_IO_RPC	(1 << 1)	/* Receive packet complete */
-#define UDCCS_IO_ROF	(1 << 3)	/* Receive overflow */
+#define UDCCS_IO_ROF	(1 << 2)	/* Receive overflow */
 #define UDCCS_IO_DME	(1 << 3)	/* DMA enable */
 #define UDCCS_IO_RNE	(1 << 6)	/* Receive FIFO not empty */
 #define UDCCS_IO_RSP	(1 << 7)	/* Receive short packet */
-- 
cgit v1.2.3


From 3359b54c8c07338f3a863d1109b42eebccdcf379 Mon Sep 17 00:00:00 2001
From: "Seth, Rohit" <rohit.seth@intel.com>
Date: Tue, 18 Oct 2005 14:15:12 -0700
Subject: [PATCH] Handle spurious page fault for hugetlb region

The hugetlb pages are currently pre-faulted.  At the time of mmap of
hugepages, we populate the new PTEs.  It is possible that HW has already
cached some of the unused PTEs internally.  These stale entries never
get a chance to be purged in existing control flow.

This patch extends the check in page fault code for hugepages.  Check if
a faulted address falls with in size for the hugetlb file backing it.
We return VM_FAULT_MINOR for these cases (assuming that the arch
specific page-faulting code purges the stale entry for the archs that
need it).

Signed-off-by: Rohit Seth <rohit.seth@intel.com>

[ This is apparently arguably an ia64 port bug. But the code won't
  hurt, and for now it fixes a real problem on some ia64 machines ]

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/hugetlb.h | 13 +++++++++++++
 mm/memory.c             | 14 ++++++++++++--
 2 files changed, 25 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index e670b0d13fe0..42cb7d70f9ac 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -155,11 +155,24 @@ static inline void set_file_hugepages(struct file *file)
 {
 	file->f_op = &hugetlbfs_file_operations;
 }
+
+static inline int valid_hugetlb_file_off(struct vm_area_struct *vma, 
+					  unsigned long address) 
+{
+	struct inode *inode = vma->vm_file->f_dentry->d_inode;
+	loff_t file_off = address - vma->vm_start;
+	
+	file_off += (vma->vm_pgoff << PAGE_SHIFT);
+	
+	return (file_off < inode->i_size);
+}
+
 #else /* !CONFIG_HUGETLBFS */
 
 #define is_file_hugepages(file)		0
 #define set_file_hugepages(file)	BUG()
 #define hugetlb_zero_setup(size)	ERR_PTR(-ENOSYS)
+#define valid_hugetlb_file_off(vma, address) 	0
 
 #endif /* !CONFIG_HUGETLBFS */
 
diff --git a/mm/memory.c b/mm/memory.c
index ae8161f1f459..8c88b973abc5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2045,8 +2045,18 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
 
 	inc_page_state(pgfault);
 
-	if (is_vm_hugetlb_page(vma))
-		return VM_FAULT_SIGBUS;	/* mapping truncation does this. */
+	if (unlikely(is_vm_hugetlb_page(vma))) {
+		if (valid_hugetlb_file_off(vma, address))
+			/* We get here only if there was a stale(zero) TLB entry 
+			 * (because of  HW prefetching). 
+			 * Low-level arch code (if needed) should have already
+			 * purged the stale entry as part of this fault handling.  
+			 * Here we just return.
+			 */
+			return VM_FAULT_MINOR; 
+		else
+			return VM_FAULT_SIGBUS;	/* mapping truncation does this. */
+	}
 
 	/*
 	 * We need the page table lock to synchronize with kswapd
-- 
cgit v1.2.3


From 281dd25cdc0d6903929b79183816d151ea626341 Mon Sep 17 00:00:00 2001
From: Yasunori Goto <y-goto@jp.fujitsu.com>
Date: Wed, 19 Oct 2005 15:52:18 -0700
Subject: [PATCH] swiotlb: make sure initial DMA allocations really are in DMA
 memory

This introduces a limit parameter to the core bootmem allocator; The new
parameter indicates that physical memory allocated by the bootmem
allocator should be within the requested limit.

We also introduce alloc_bootmem_low_pages_limit, alloc_bootmem_node_limit,
alloc_bootmem_low_pages_node_limit apis, but alloc_bootmem_low_pages_limit
is the only api used for swiotlb.

The existing alloc_bootmem_low_pages() api could instead have been
changed and made to pass right limit to the core allocator.  But that
would make the patch more intrusive for 2.6.14, as other arches use
alloc_bootmem_low_pages().  We may be done that post 2.6.14 as a
cleanup.

With this, swiotlb gets memory within 4G for both x86_64 and ia64
arches.

Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Ravikiran G Thirumalai <kiran@scalex86.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/ia64/lib/swiotlb.c |  4 ++--
 include/linux/bootmem.h | 32 ++++++++++++++++++++++++++++++--
 mm/bootmem.c            | 31 +++++++++++++++++++++----------
 3 files changed, 53 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/lib/swiotlb.c b/arch/ia64/lib/swiotlb.c
index dbc0b3e449c5..a604efc7f6c9 100644
--- a/arch/ia64/lib/swiotlb.c
+++ b/arch/ia64/lib/swiotlb.c
@@ -123,8 +123,8 @@ swiotlb_init_with_default_size (size_t default_size)
 	/*
 	 * Get IO TLB memory from the low pages
 	 */
-	io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs *
-					       (1 << IO_TLB_SHIFT));
+	io_tlb_start = alloc_bootmem_low_pages_limit(io_tlb_nslabs *
+					     (1 << IO_TLB_SHIFT), 0x100000000);
 	if (!io_tlb_start)
 		panic("Cannot allocate SWIOTLB buffer");
 	io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 82bd8842d11c..3b03b0b868dd 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -43,7 +43,7 @@ typedef struct bootmem_data {
 extern unsigned long __init bootmem_bootmap_pages (unsigned long);
 extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend);
 extern void __init free_bootmem (unsigned long addr, unsigned long size);
-extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal);
+extern void * __init __alloc_bootmem_limit (unsigned long size, unsigned long align, unsigned long goal, unsigned long limit);
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
 extern void __init reserve_bootmem (unsigned long addr, unsigned long size);
 #define alloc_bootmem(x) \
@@ -54,6 +54,16 @@ extern void __init reserve_bootmem (unsigned long addr, unsigned long size);
 	__alloc_bootmem((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low_pages(x) \
 	__alloc_bootmem((x), PAGE_SIZE, 0)
+
+#define alloc_bootmem_limit(x, limit)						\
+	__alloc_bootmem_limit((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS), (limit))
+#define alloc_bootmem_low_limit(x, limit)			\
+	__alloc_bootmem_limit((x), SMP_CACHE_BYTES, 0, (limit))
+#define alloc_bootmem_pages_limit(x, limit)					\
+	__alloc_bootmem_limit((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS), (limit))
+#define alloc_bootmem_low_pages_limit(x, limit)		\
+	__alloc_bootmem_limit((x), PAGE_SIZE, 0, (limit))
+
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 extern unsigned long __init free_all_bootmem (void);
 
@@ -61,7 +71,7 @@ extern unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long f
 extern void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size);
 extern void __init free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size);
 extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat);
-extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal);
+extern void * __init __alloc_bootmem_node_limit (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal, unsigned long limit);
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
 #define alloc_bootmem_node(pgdat, x) \
 	__alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
@@ -69,6 +79,14 @@ extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size,
 	__alloc_bootmem_node((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low_pages_node(pgdat, x) \
 	__alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0)
+
+#define alloc_bootmem_node_limit(pgdat, x, limit)				\
+	__alloc_bootmem_node_limit((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS), (limit))
+#define alloc_bootmem_pages_node_limit(pgdat, x, limit)				\
+	__alloc_bootmem_node_limit((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS), (limit))
+#define alloc_bootmem_low_pages_node_limit(pgdat, x, limit)		\
+	__alloc_bootmem_node_limit((pgdat), (x), PAGE_SIZE, 0, (limit))
+
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 
 #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
@@ -105,5 +123,15 @@ extern void *__init alloc_large_system_hash(const char *tablename,
 #endif
 extern int __initdata hashdist;		/* Distribute hashes across NUMA nodes? */
 
+static inline void *__alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal)
+{
+	return __alloc_bootmem_limit(size, align, goal, 0);
+}
+
+static inline void *__alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align,
+				     unsigned long goal)
+{
+	return __alloc_bootmem_node_limit(pgdat, size, align, goal, 0);
+}
 
 #endif /* _LINUX_BOOTMEM_H */
diff --git a/mm/bootmem.c b/mm/bootmem.c
index c1330cc19783..a58699b6579e 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -154,10 +154,10 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
  */
 static void * __init
 __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
-		unsigned long align, unsigned long goal)
+	      unsigned long align, unsigned long goal, unsigned long limit)
 {
 	unsigned long offset, remaining_size, areasize, preferred;
-	unsigned long i, start = 0, incr, eidx;
+	unsigned long i, start = 0, incr, eidx, end_pfn = bdata->node_low_pfn;
 	void *ret;
 
 	if(!size) {
@@ -166,7 +166,14 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
 	}
 	BUG_ON(align & (align-1));
 
-	eidx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
+	if (limit && bdata->node_boot_start >= limit)
+		return NULL;
+
+        limit >>=PAGE_SHIFT;
+	if (limit && end_pfn > limit)
+		end_pfn = limit;
+
+	eidx = end_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
 	offset = 0;
 	if (align &&
 	    (bdata->node_boot_start & (align - 1UL)) != 0)
@@ -178,11 +185,12 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
 	 * first, then we try to allocate lower pages.
 	 */
 	if (goal && (goal >= bdata->node_boot_start) && 
-	    ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) {
+	    ((goal >> PAGE_SHIFT) < end_pfn)) {
 		preferred = goal - bdata->node_boot_start;
 
 		if (bdata->last_success >= preferred)
-			preferred = bdata->last_success;
+			if (!limit || (limit && limit > bdata->last_success))
+				preferred = bdata->last_success;
 	} else
 		preferred = 0;
 
@@ -382,14 +390,15 @@ unsigned long __init free_all_bootmem (void)
 	return(free_all_bootmem_core(NODE_DATA(0)));
 }
 
-void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal)
+void * __init __alloc_bootmem_limit (unsigned long size, unsigned long align, unsigned long goal,
+				unsigned long limit)
 {
 	pg_data_t *pgdat = pgdat_list;
 	void *ptr;
 
 	for_each_pgdat(pgdat)
 		if ((ptr = __alloc_bootmem_core(pgdat->bdata, size,
-						align, goal)))
+						 align, goal, limit)))
 			return(ptr);
 
 	/*
@@ -400,14 +409,16 @@ void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned
 	return NULL;
 }
 
-void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal)
+
+void * __init __alloc_bootmem_node_limit (pg_data_t *pgdat, unsigned long size, unsigned long align,
+				     unsigned long goal, unsigned long limit)
 {
 	void *ptr;
 
-	ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal);
+	ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, limit);
 	if (ptr)
 		return (ptr);
 
-	return __alloc_bootmem(size, align, goal);
+	return __alloc_bootmem_limit(size, align, goal, limit);
 }
 
-- 
cgit v1.2.3


From ac9b9c667c2e1194e22ebe0a441ae1c37aaa9b90 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Thu, 20 Oct 2005 16:24:28 +0100
Subject: [PATCH] Fix handling spurious page fault for hugetlb region

This reverts commit 3359b54c8c07338f3a863d1109b42eebccdcf379 and
replaces it with a cleaner version that is purely based on page table
operations, so that the synchronization between inode size and hugetlb
mappings becomes moot.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/hugetlb.h | 16 +++-------------
 mm/hugetlb.c            | 22 ++++++++++++++++++++++
 mm/memory.c             | 14 ++------------
 3 files changed, 27 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 42cb7d70f9ac..d664330d900e 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -25,6 +25,8 @@ int is_hugepage_mem_enough(size_t);
 unsigned long hugetlb_total_pages(void);
 struct page *alloc_huge_page(void);
 void free_huge_page(struct page *);
+int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+			unsigned long address, int write_access);
 
 extern unsigned long max_huge_pages;
 extern const unsigned long hugetlb_zero, hugetlb_infinity;
@@ -99,6 +101,7 @@ static inline unsigned long hugetlb_total_pages(void)
 						do { } while (0)
 #define alloc_huge_page()			({ NULL; })
 #define free_huge_page(p)			({ (void)(p); BUG(); })
+#define hugetlb_fault(mm, vma, addr, write)	({ BUG(); 0; })
 
 #ifndef HPAGE_MASK
 #define HPAGE_MASK	0		/* Keep the compiler happy */
@@ -155,24 +158,11 @@ static inline void set_file_hugepages(struct file *file)
 {
 	file->f_op = &hugetlbfs_file_operations;
 }
-
-static inline int valid_hugetlb_file_off(struct vm_area_struct *vma, 
-					  unsigned long address) 
-{
-	struct inode *inode = vma->vm_file->f_dentry->d_inode;
-	loff_t file_off = address - vma->vm_start;
-	
-	file_off += (vma->vm_pgoff << PAGE_SHIFT);
-	
-	return (file_off < inode->i_size);
-}
-
 #else /* !CONFIG_HUGETLBFS */
 
 #define is_file_hugepages(file)		0
 #define set_file_hugepages(file)	BUG()
 #define hugetlb_zero_setup(size)	ERR_PTR(-ENOSYS)
-#define valid_hugetlb_file_off(vma, address) 	0
 
 #endif /* !CONFIG_HUGETLBFS */
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a1b30d45459e..61d380678030 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -394,6 +394,28 @@ out:
 	return ret;
 }
 
+/*
+ * On ia64 at least, it is possible to receive a hugetlb fault from a
+ * stale zero entry left in the TLB from earlier hardware prefetching.
+ * Low-level arch code should already have flushed the stale entry as
+ * part of its fault handling, but we do need to accept this minor fault
+ * and return successfully.  Whereas the "normal" case is that this is
+ * an access to a hugetlb page which has been truncated off since mmap.
+ */
+int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+			unsigned long address, int write_access)
+{
+	int ret = VM_FAULT_SIGBUS;
+	pte_t *pte;
+
+	spin_lock(&mm->page_table_lock);
+	pte = huge_pte_offset(mm, address);
+	if (pte && !pte_none(*pte))
+		ret = VM_FAULT_MINOR;
+	spin_unlock(&mm->page_table_lock);
+	return ret;
+}
+
 int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			struct page **pages, struct vm_area_struct **vmas,
 			unsigned long *position, int *length, int i)
diff --git a/mm/memory.c b/mm/memory.c
index 8c88b973abc5..1db40e935e55 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2045,18 +2045,8 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
 
 	inc_page_state(pgfault);
 
-	if (unlikely(is_vm_hugetlb_page(vma))) {
-		if (valid_hugetlb_file_off(vma, address))
-			/* We get here only if there was a stale(zero) TLB entry 
-			 * (because of  HW prefetching). 
-			 * Low-level arch code (if needed) should have already
-			 * purged the stale entry as part of this fault handling.  
-			 * Here we just return.
-			 */
-			return VM_FAULT_MINOR; 
-		else
-			return VM_FAULT_SIGBUS;	/* mapping truncation does this. */
-	}
+	if (unlikely(is_vm_hugetlb_page(vma)))
+		return hugetlb_fault(mm, vma, address, write_access);
 
 	/*
 	 * We need the page table lock to synchronize with kswapd
-- 
cgit v1.2.3