From 00e4d116a7ef94eb910be037912b0b2fc09f608b Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Fri, 22 Sep 2006 09:33:58 +0100
Subject: [DCCP]: Allow default/fallback service code.

This has been discussed on dccp@vger and removes the necessity for applications
to supply service codes in each and every case.

If an application does not want to provide a service code, that's fine, it will
be given 0. Otherwise, service codes can be set via socket options as before.

This patch has been tested using various client/server configurations
(including listening on multiple service codes).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
---
 include/linux/dccp.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 2d7671c92c0b..29f9291e45c0 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -404,6 +404,7 @@ struct dccp_service_list {
 };
 
 #define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1)
+#define DCCP_SERVICE_CODE_IS_ABSENT 		 0
 
 static inline int dccp_list_has_service(const struct dccp_service_list *sl,
 					const __be32 service)
@@ -484,11 +485,6 @@ static inline struct dccp_minisock *dccp_msk(const struct sock *sk)
 	return (struct dccp_minisock *)&dccp_sk(sk)->dccps_minisock;
 }
 
-static inline int dccp_service_not_initialized(const struct sock *sk)
-{
-	return dccp_sk(sk)->dccps_service == DCCP_SERVICE_INVALID_VALUE;
-}
-
 static inline const char *dccp_role(const struct sock *sk)
 {
 	switch (dccp_sk(sk)->dccps_role) {
-- 
cgit v1.2.3


From b83eff641ed39bd631535b9a8971e161b056f541 Mon Sep 17 00:00:00 2001
From: Ian McDonald <ian.mcdonald@jandi.co.nz>
Date: Fri, 22 Sep 2006 14:25:36 +1200
Subject: [DCCP]: Introduce constants for CCID numbers

Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
---
 include/linux/dccp.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 29f9291e45c0..d6f4ec467a4b 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -169,6 +169,12 @@ enum {
 	DCCPO_MAX_CCID_SPECIFIC = 255,
 };
 
+/* DCCP CCIDS */
+enum {
+	DCCPC_CCID2 = 2,
+	DCCPC_CCID3 = 3,
+};
+
 /* DCCP features */
 enum {
 	DCCPF_RESERVED = 0,
@@ -320,7 +326,7 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
 /* initial values for each feature */
 #define DCCPF_INITIAL_SEQUENCE_WINDOW		100
 #define DCCPF_INITIAL_ACK_RATIO			2
-#define DCCPF_INITIAL_CCID			2
+#define DCCPF_INITIAL_CCID			DCCPC_CCID2
 #define DCCPF_INITIAL_SEND_ACK_VECTOR		1
 /* FIXME: for now we're default to 1 but it should really be 0 */
 #define DCCPF_INITIAL_SEND_NDP_COUNT		1
-- 
cgit v1.2.3


From 0d6c7ae22d4fadbc83677ea1a6144eea8911e319 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Sun, 24 Sep 2006 19:28:47 -0700
Subject: [NETFILTER]: Add dscp,DSCP headers to header-y

This patch adds xt_dscp.h and xt_DSCP.h to the kernel headers which are
exported via 'make headers_install'. These are necessary for userspace
to add rules using dscp match and DSCP target.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/Kbuild | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 9a285cecf249..312bd2ffee33 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -10,6 +10,8 @@ header-y += xt_connmark.h
 header-y += xt_CONNMARK.h
 header-y += xt_conntrack.h
 header-y += xt_dccp.h
+header-y += xt_dscp.h
+header-y += xt_DSCP.h
 header-y += xt_esp.h
 header-y += xt_helper.h
 header-y += xt_length.h
-- 
cgit v1.2.3


From a6d967a485c67ec8a1276261f39d81ace6a3e308 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jeff@garzik.org>
Date: Mon, 25 Sep 2006 15:33:09 -0400
Subject: [libata] No need for all those arch libata-portmap.h headers

They all contain the same thing.  Instead, have a single generic one in
include/asm-generic, and permit an arch to override as needed.

Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 include/asm-alpha/libata-portmap.h   | 1 -
 include/asm-frv/libata-portmap.h     | 1 -
 include/asm-i386/libata-portmap.h    | 1 -
 include/asm-ia64/libata-portmap.h    | 1 -
 include/asm-powerpc/libata-portmap.h | 1 -
 include/asm-sparc/libata-portmap.h   | 1 -
 include/asm-sparc64/libata-portmap.h | 1 -
 include/asm-x86_64/libata-portmap.h  | 1 -
 include/linux/libata.h               | 8 ++++++++
 9 files changed, 8 insertions(+), 8 deletions(-)
 delete mode 100644 include/asm-alpha/libata-portmap.h
 delete mode 100644 include/asm-frv/libata-portmap.h
 delete mode 100644 include/asm-i386/libata-portmap.h
 delete mode 100644 include/asm-ia64/libata-portmap.h
 delete mode 100644 include/asm-powerpc/libata-portmap.h
 delete mode 100644 include/asm-sparc/libata-portmap.h
 delete mode 100644 include/asm-sparc64/libata-portmap.h
 delete mode 100644 include/asm-x86_64/libata-portmap.h

(limited to 'include/linux')

diff --git a/include/asm-alpha/libata-portmap.h b/include/asm-alpha/libata-portmap.h
deleted file mode 100644
index 75484ef0c743..000000000000
--- a/include/asm-alpha/libata-portmap.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/libata-portmap.h>
diff --git a/include/asm-frv/libata-portmap.h b/include/asm-frv/libata-portmap.h
deleted file mode 100644
index 75484ef0c743..000000000000
--- a/include/asm-frv/libata-portmap.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/libata-portmap.h>
diff --git a/include/asm-i386/libata-portmap.h b/include/asm-i386/libata-portmap.h
deleted file mode 100644
index 75484ef0c743..000000000000
--- a/include/asm-i386/libata-portmap.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/libata-portmap.h>
diff --git a/include/asm-ia64/libata-portmap.h b/include/asm-ia64/libata-portmap.h
deleted file mode 100644
index 75484ef0c743..000000000000
--- a/include/asm-ia64/libata-portmap.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/libata-portmap.h>
diff --git a/include/asm-powerpc/libata-portmap.h b/include/asm-powerpc/libata-portmap.h
deleted file mode 100644
index 75484ef0c743..000000000000
--- a/include/asm-powerpc/libata-portmap.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/libata-portmap.h>
diff --git a/include/asm-sparc/libata-portmap.h b/include/asm-sparc/libata-portmap.h
deleted file mode 100644
index 75484ef0c743..000000000000
--- a/include/asm-sparc/libata-portmap.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/libata-portmap.h>
diff --git a/include/asm-sparc64/libata-portmap.h b/include/asm-sparc64/libata-portmap.h
deleted file mode 100644
index 75484ef0c743..000000000000
--- a/include/asm-sparc64/libata-portmap.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/libata-portmap.h>
diff --git a/include/asm-x86_64/libata-portmap.h b/include/asm-x86_64/libata-portmap.h
deleted file mode 100644
index 75484ef0c743..000000000000
--- a/include/asm-x86_64/libata-portmap.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/libata-portmap.h>
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 1ef3d3901b47..d6a3d4b345fc 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -36,7 +36,15 @@
 #include <linux/workqueue.h>
 #include <scsi/scsi_host.h>
 
+/*
+ * Define if arch has non-standard setup.  This is a _PCI_ standard
+ * not a legacy or ISA standard.
+ */
+#ifdef CONFIG_ATA_NONSTANDARD
 #include <asm/libata-portmap.h>
+#else
+#include <asm-generic/libata-portmap.h>
+#endif
 
 /*
  * compile-time options: to be removed as soon as all the drivers are
-- 
cgit v1.2.3


From baef186519c69b11cf7e48c26e75feb1e6173baa Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Fri, 8 Sep 2006 16:04:05 -0400
Subject: [PATCH] WE-21 support (core API)

This is version 21 of the Wireless Extensions. Changelog :
	o finishes migrating the ESSID API (remove the +1)
	o netdev->get_wireless_stats is no more
	o long/short retry

This is a redacted version of a patch originally submitted by Jean
Tourrilhes.  I removed most of the additions, in order to minimize
future support requirements for nl80211 (or other WE successor).

CC: Jean Tourrilhes <jt@hpl.hp.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/netdevice.h |  1 -
 include/linux/wireless.h  | 24 ++++++++++++++---
 net/core/net-sysfs.c      |  5 +---
 net/core/wireless.c       | 67 +++++++++++++++++++++++++++++------------------
 4 files changed, 62 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 43289127b458..ac4f50213bc3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -334,7 +334,6 @@ struct net_device
 
 
 	struct net_device_stats* (*get_stats)(struct net_device *dev);
-	struct iw_statistics*	(*get_wireless_stats)(struct net_device *dev);
 
 	/* List of functions to handle Wireless Extensions (instead of ioctl).
 	 * See <net/iw_handler.h> for details. Jean II */
diff --git a/include/linux/wireless.h b/include/linux/wireless.h
index 13588564b42b..a50a0130fd9e 100644
--- a/include/linux/wireless.h
+++ b/include/linux/wireless.h
@@ -1,7 +1,7 @@
 /*
  * This file define a set of standard wireless extensions
  *
- * Version :	20	17.2.06
+ * Version :	21	14.3.06
  *
  * Authors :	Jean Tourrilhes - HPL - <jt@hpl.hp.com>
  * Copyright (c) 1997-2006 Jean Tourrilhes, All Rights Reserved.
@@ -69,9 +69,14 @@
 
 /***************************** INCLUDES *****************************/
 
+/* This header is used in user-space, therefore need to be sanitised
+ * for that purpose. Those includes are usually not compatible with glibc.
+ * To know which includes to use in user-space, check iwlib.h. */
+#ifdef __KERNEL__
 #include <linux/types.h>		/* for "caddr_t" et al		*/
 #include <linux/socket.h>		/* for "struct sockaddr" et al	*/
 #include <linux/if.h>			/* for IFNAMSIZ and co... */
+#endif	/* __KERNEL__ */
 
 /***************************** VERSION *****************************/
 /*
@@ -80,7 +85,7 @@
  * (there is some stuff that will be added in the future...)
  * I just plan to increment with each new version.
  */
-#define WIRELESS_EXT	20
+#define WIRELESS_EXT	21
 
 /*
  * Changes :
@@ -208,6 +213,14 @@
  * V19 to V20
  * ----------
  *	- RtNetlink requests support (SET/GET)
+ *
+ * V20 to V21
+ * ----------
+ *	- Remove (struct net_device *)->get_wireless_stats()
+ *	- Change length in ESSID and NICK to strlen() instead of strlen()+1
+ *	- Add IW_RETRY_SHORT/IW_RETRY_LONG retry modifiers
+ *	- Power/Retry relative values no longer * 100000
+ *	- Add explicit flag to tell stats are in 802.11k RCPI : IW_QUAL_RCPI
  */
 
 /**************************** CONSTANTS ****************************/
@@ -448,6 +461,7 @@
 #define IW_QUAL_QUAL_INVALID	0x10	/* Driver doesn't provide value */
 #define IW_QUAL_LEVEL_INVALID	0x20
 #define IW_QUAL_NOISE_INVALID	0x40
+#define IW_QUAL_RCPI		0x80	/* Level + Noise are 802.11k RCPI */
 #define IW_QUAL_ALL_INVALID	0x70
 
 /* Frequency flags */
@@ -500,10 +514,12 @@
 #define IW_RETRY_TYPE		0xF000	/* Type of parameter */
 #define IW_RETRY_LIMIT		0x1000	/* Maximum number of retries*/
 #define IW_RETRY_LIFETIME	0x2000	/* Maximum duration of retries in us */
-#define IW_RETRY_MODIFIER	0x000F	/* Modify a parameter */
+#define IW_RETRY_MODIFIER	0x00FF	/* Modify a parameter */
 #define IW_RETRY_MIN		0x0001	/* Value is a minimum  */
 #define IW_RETRY_MAX		0x0002	/* Value is a maximum */
 #define IW_RETRY_RELATIVE	0x0004	/* Value is not in seconds/ms/us */
+#define IW_RETRY_SHORT		0x0010	/* Value is for short packets  */
+#define IW_RETRY_LONG		0x0020	/* Value is for long packets */
 
 /* Scanning request flags */
 #define IW_SCAN_DEFAULT		0x0000	/* Default scan of the driver */
@@ -1017,7 +1033,7 @@ struct	iw_range
 	/* Note : this frequency list doesn't need to fit channel numbers,
 	 * because each entry contain its channel index */
 
-	__u32		enc_capa; /* IW_ENC_CAPA_* bit field */
+	__u32		enc_capa;	/* IW_ENC_CAPA_* bit field */
 };
 
 /*
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 13472762b18b..f47f319bb7dc 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -344,8 +344,6 @@ static ssize_t wireless_show(struct class_device *cd, char *buf,
 		if(dev->wireless_handlers &&
 		   dev->wireless_handlers->get_wireless_stats)
 			iw = dev->wireless_handlers->get_wireless_stats(dev);
-		else if (dev->get_wireless_stats)
-			iw = dev->get_wireless_stats(dev);
 		if (iw != NULL)
 			ret = (*format)(iw, buf);
 	}
@@ -465,8 +463,7 @@ int netdev_register_sysfs(struct net_device *net)
 		*groups++ = &netstat_group;
 
 #ifdef WIRELESS_EXT
-	if (net->get_wireless_stats
-	    || (net->wireless_handlers && net->wireless_handlers->get_wireless_stats))
+	if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats)
 		*groups++ = &wireless_group;
 #endif
 
diff --git a/net/core/wireless.c b/net/core/wireless.c
index 3168fca312f7..ffff0da46c6e 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -68,6 +68,14 @@
  *
  * v8 - 17.02.06 - Jean II
  *	o RtNetlink requests support (SET/GET)
+ *
+ * v8b - 03.08.06 - Herbert Xu
+ *	o Fix Wireless Event locking issues.
+ *
+ * v9 - 14.3.06 - Jean II
+ *	o Change length in ESSID and NICK to strlen() instead of strlen()+1
+ *	o Make standard_ioctl_num and standard_event_num unsigned
+ *	o Remove (struct net_device *)->get_wireless_stats()
  */
 
 /***************************** INCLUDES *****************************/
@@ -234,24 +242,24 @@ static const struct iw_ioctl_description standard_ioctl[] = {
 	[SIOCSIWESSID	- SIOCIWFIRST] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
-		.max_tokens	= IW_ESSID_MAX_SIZE + 1,
+		.max_tokens	= IW_ESSID_MAX_SIZE,
 		.flags		= IW_DESCR_FLAG_EVENT,
 	},
 	[SIOCGIWESSID	- SIOCIWFIRST] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
-		.max_tokens	= IW_ESSID_MAX_SIZE + 1,
+		.max_tokens	= IW_ESSID_MAX_SIZE,
 		.flags		= IW_DESCR_FLAG_DUMP,
 	},
 	[SIOCSIWNICKN	- SIOCIWFIRST] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
-		.max_tokens	= IW_ESSID_MAX_SIZE + 1,
+		.max_tokens	= IW_ESSID_MAX_SIZE,
 	},
 	[SIOCGIWNICKN	- SIOCIWFIRST] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
-		.max_tokens	= IW_ESSID_MAX_SIZE + 1,
+		.max_tokens	= IW_ESSID_MAX_SIZE,
 	},
 	[SIOCSIWRATE	- SIOCIWFIRST] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
@@ -338,8 +346,8 @@ static const struct iw_ioctl_description standard_ioctl[] = {
 		.max_tokens	= sizeof(struct iw_pmksa),
 	},
 };
-static const int standard_ioctl_num = (sizeof(standard_ioctl) /
-				       sizeof(struct iw_ioctl_description));
+static const unsigned standard_ioctl_num = (sizeof(standard_ioctl) /
+					    sizeof(struct iw_ioctl_description));
 
 /*
  * Meta-data about all the additional standard Wireless Extension events
@@ -389,8 +397,8 @@ static const struct iw_ioctl_description standard_event[] = {
 		.max_tokens	= sizeof(struct iw_pmkid_cand),
 	},
 };
-static const int standard_event_num = (sizeof(standard_event) /
-				       sizeof(struct iw_ioctl_description));
+static const unsigned standard_event_num = (sizeof(standard_event) /
+					    sizeof(struct iw_ioctl_description));
 
 /* Size (in bytes) of the various private data types */
 static const char iw_priv_type_size[] = {
@@ -465,17 +473,6 @@ static inline struct iw_statistics *get_wireless_stats(struct net_device *dev)
 	   (dev->wireless_handlers->get_wireless_stats != NULL))
 		return dev->wireless_handlers->get_wireless_stats(dev);
 
-	/* Old location, field to be removed in next WE */
-	if(dev->get_wireless_stats) {
-		static int printed_message;
-
-		if (!printed_message++)
-			printk(KERN_DEBUG "%s (WE) : Driver using old /proc/net/wireless support, please fix driver !\n",
-				dev->name);
-
-		return dev->get_wireless_stats(dev);
-	}
-
 	/* Not found */
 	return (struct iw_statistics *) NULL;
 }
@@ -1843,8 +1840,33 @@ int wireless_rtnetlink_set(struct net_device *	dev,
  */
 
 #ifdef WE_EVENT_RTNETLINK
+/* ---------------------------------------------------------------- */
+/*
+ * Locking...
+ * ----------
+ *
+ * Thanks to Herbert Xu <herbert@gondor.apana.org.au> for fixing
+ * the locking issue in here and implementing this code !
+ *
+ * The issue : wireless_send_event() is often called in interrupt context,
+ * while the Netlink layer can never be called in interrupt context.
+ * The fully formed RtNetlink events are queued, and then a tasklet is run
+ * to feed those to Netlink.
+ * The skb_queue is interrupt safe, and its lock is not held while calling
+ * Netlink, so there is no possibility of dealock.
+ * Jean II
+ */
+
 static struct sk_buff_head wireless_nlevent_queue;
 
+static int __init wireless_nlevent_init(void)
+{
+	skb_queue_head_init(&wireless_nlevent_queue);
+	return 0;
+}
+
+subsys_initcall(wireless_nlevent_init);
+
 static void wireless_nlevent_process(unsigned long data)
 {
 	struct sk_buff *skb;
@@ -1921,13 +1943,6 @@ static inline void rtmsg_iwinfo(struct net_device *	dev,
 	tasklet_schedule(&wireless_nlevent_tasklet);
 }
 
-static int __init wireless_nlevent_init(void)
-{
-	skb_queue_head_init(&wireless_nlevent_queue);
-	return 0;
-}
-
-subsys_initcall(wireless_nlevent_init);
 #endif	/* WE_EVENT_RTNETLINK */
 
 /* ---------------------------------------------------------------- */
-- 
cgit v1.2.3


From 0b680e753724d31a9c45f059d1aad29df54584a1 Mon Sep 17 00:00:00 2001
From: Jay Vosburgh <fubar@us.ibm.com>
Date: Fri, 22 Sep 2006 21:54:10 -0700
Subject: [PATCH] bonding: Add priv_flag to avoid event mishandling

Add priv_flag to specifically identify bonding-involved devices.  Needed
because IFF_MASTER is an unreliable identifier (vlan interfaces above bonding
will inherit IFF_MASTER).  Misidentification of devices would cause
notifier events for other devices to be erroneously processed by bonding,
causing various havoc.

Bug discovered by Martin Papik <martin.papik@ipsec.info>; this patch is
modified from his original.

Signed-off-by: Martin Papik <martin.papik@ipsec.info>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 drivers/net/bonding/bond_main.c | 7 ++++++-
 include/linux/if.h              | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index d2f460b0dbab..9e5a533a1622 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1371,6 +1371,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 	}
 
 	new_slave->dev = slave_dev;
+	slave_dev->priv_flags |= IFF_BONDING;
 
 	if ((bond->params.mode == BOND_MODE_TLB) ||
 	    (bond->params.mode == BOND_MODE_ALB)) {
@@ -1784,7 +1785,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 	dev_set_mac_address(slave_dev, &addr);
 
 	slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
-				   IFF_SLAVE_INACTIVE);
+				   IFF_SLAVE_INACTIVE | IFF_BONDING);
 
 	kfree(slave);
 
@@ -3216,6 +3217,9 @@ static int bond_netdev_event(struct notifier_block *this, unsigned long event, v
 		(event_dev ? event_dev->name : "None"),
 		event);
 
+	if (!(event_dev->priv_flags & IFF_BONDING))
+		return NOTIFY_DONE;
+
 	if (event_dev->flags & IFF_MASTER) {
 		dprintk("IFF_MASTER\n");
 		return bond_master_netdev_event(event, event_dev);
@@ -4185,6 +4189,7 @@ static int bond_init(struct net_device *bond_dev, struct bond_params *params)
 	/* Initialize the device options */
 	bond_dev->tx_queue_len = 0;
 	bond_dev->flags |= IFF_MASTER|IFF_MULTICAST;
+	bond_dev->priv_flags |= IFF_BONDING;
 
 	/* At first, we block adding VLANs. That's the only way to
 	 * prevent problems that occur when adding VLANs over an
diff --git a/include/linux/if.h b/include/linux/if.h
index cd080d765324..a023ec1274fe 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -59,6 +59,7 @@
 #define IFF_SLAVE_INACTIVE	0x4	/* bonding slave not the curr. active */
 #define IFF_MASTER_8023AD	0x8	/* bonding master, 802.3ad. 	*/
 #define IFF_MASTER_ALB	0x10		/* bonding master, balance-alb.	*/
+#define IFF_BONDING	0x20		/* bonding master or slave	*/
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
-- 
cgit v1.2.3


From f5b2b966f032f22d3a289045a5afd4afa09f09c6 Mon Sep 17 00:00:00 2001
From: Jay Vosburgh <fubar@us.ibm.com>
Date: Fri, 22 Sep 2006 21:54:53 -0700
Subject: [PATCH] bonding: Validate probe replies in ARP monitor

	Add logic to check ARP request / reply packets used for ARP
monitor link integrity checking.

	The current method simply examines the slave device to see if it
has sent and received traffic; this can be fooled by extraneous traffic.
For example, if multiple hosts running bonding are behind a common
switch, the probe traffic from the multiple instances of bonding will
update the tx/rx times on each other's slave devices.

Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 Documentation/networking/bonding.txt |  59 ++++++++++++
 drivers/net/bonding/bond_main.c      | 182 +++++++++++++++++++++++++++++++++--
 drivers/net/bonding/bond_sysfs.c     |  54 +++++++++++
 drivers/net/bonding/bonding.h        |  32 +++++-
 include/linux/if.h                   |   1 +
 include/linux/netdevice.h            |   7 +-
 6 files changed, 325 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index afac780445cd..dc942eaf490f 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -192,6 +192,17 @@ or, for backwards compatibility, the option value.  E.g.,
 arp_interval
 
 	Specifies the ARP link monitoring frequency in milliseconds.
+
+	The ARP monitor works by periodically checking the slave
+	devices to determine whether they have sent or received
+	traffic recently (the precise criteria depends upon the
+	bonding mode, and the state of the slave).  Regular traffic is
+	generated via ARP probes issued for the addresses specified by
+	the arp_ip_target option.
+
+	This behavior can be modified by the arp_validate option,
+	below.
+
 	If ARP monitoring is used in an etherchannel compatible mode
 	(modes 0 and 2), the switch should be configured in a mode
 	that evenly distributes packets across all links. If the
@@ -213,6 +224,54 @@ arp_ip_target
 	maximum number of targets that can be specified is 16.  The
 	default value is no IP addresses.
 
+arp_validate
+
+	Specifies whether or not ARP probes and replies should be
+	validated in the active-backup mode.  This causes the ARP
+	monitor to examine the incoming ARP requests and replies, and
+	only consider a slave to be up if it is receiving the
+	appropriate ARP traffic.
+
+	Possible values are:
+
+	none or 0
+
+		No validation is performed.  This is the default.
+
+	active or 1
+
+		Validation is performed only for the active slave.
+
+	backup or 2
+
+		Validation is performed only for backup slaves.
+
+	all or 3
+
+		Validation is performed for all slaves.
+
+	For the active slave, the validation checks ARP replies to
+	confirm that they were generated by an arp_ip_target.  Since
+	backup slaves do not typically receive these replies, the
+	validation performed for backup slaves is on the ARP request
+	sent out via the active slave.  It is possible that some
+	switch or network configurations may result in situations
+	wherein the backup slaves do not receive the ARP requests; in
+	such a situation, validation of backup slaves must be
+	disabled.
+
+	This option is useful in network configurations in which
+	multiple bonding hosts are concurrently issuing ARPs to one or
+	more targets beyond a common switch.  Should the link between
+	the switch and target fail (but not the switch itself), the
+	probe traffic generated by the multiple bonding instances will
+	fool the standard ARP monitor into considering the links as
+	still up.  Use of the arp_validate option can resolve this, as
+	the ARP monitor will only consider ARP requests and replies
+	associated with its own instance of bonding.
+
+	This option was added in bonding version 3.1.0.
+
 downdelay
 
 	Specifies the time, in milliseconds, to wait before disabling
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index bafe62f7c9b7..fd521b05db83 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -96,6 +96,7 @@ static char *lacp_rate	= NULL;
 static char *xmit_hash_policy = NULL;
 static int arp_interval = BOND_LINK_ARP_INTERV;
 static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, };
+static char *arp_validate = NULL;
 struct bond_params bonding_defaults;
 
 module_param(max_bonds, int, 0);
@@ -127,6 +128,8 @@ module_param(arp_interval, int, 0);
 MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
 module_param_array(arp_ip_target, charp, NULL, 0);
 MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form");
+module_param(arp_validate, charp, 0);
+MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all");
 
 /*----------------------------- Global variables ----------------------------*/
 
@@ -170,6 +173,14 @@ struct bond_parm_tbl xmit_hashtype_tbl[] = {
 {	NULL,			-1},
 };
 
+struct bond_parm_tbl arp_validate_tbl[] = {
+{	"none",			BOND_ARP_VALIDATE_NONE},
+{	"active",		BOND_ARP_VALIDATE_ACTIVE},
+{	"backup",		BOND_ARP_VALIDATE_BACKUP},
+{	"all",			BOND_ARP_VALIDATE_ALL},
+{	NULL,			-1},
+};
+
 /*-------------------------- Forward declarations ---------------------------*/
 
 static void bond_send_gratuitous_arp(struct bonding *bond);
@@ -1424,6 +1435,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 
 	bond_compute_features(bond);
 
+	new_slave->last_arp_rx = jiffies;
+
 	if (bond->params.miimon && !bond->params.use_carrier) {
 		link_reporting = bond_check_dev_link(bond, slave_dev, 1);
 
@@ -1785,7 +1798,8 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 	dev_set_mac_address(slave_dev, &addr);
 
 	slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
-				   IFF_SLAVE_INACTIVE | IFF_BONDING);
+				   IFF_SLAVE_INACTIVE | IFF_BONDING |
+				   IFF_SLAVE_NEEDARP);
 
 	kfree(slave);
 
@@ -2298,6 +2312,25 @@ static int bond_has_ip(struct bonding *bond)
 	return 0;
 }
 
+static int bond_has_this_ip(struct bonding *bond, u32 ip)
+{
+	struct vlan_entry *vlan, *vlan_next;
+
+	if (ip == bond->master_ip)
+		return 1;
+
+	if (list_empty(&bond->vlan_list))
+		return 0;
+
+	list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list,
+				 vlan_list) {
+		if (ip == vlan->vlan_ip)
+			return 1;
+	}
+
+	return 0;
+}
+
 /*
  * We go to the (large) trouble of VLAN tagging ARP frames because
  * switches in VLAN mode (especially if ports are configured as
@@ -2436,6 +2469,93 @@ static void bond_send_gratuitous_arp(struct bonding *bond)
 	}
 }
 
+static void bond_validate_arp(struct bonding *bond, struct slave *slave, u32 sip, u32 tip)
+{
+	int i;
+	u32 *targets = bond->params.arp_targets;
+
+	targets = bond->params.arp_targets;
+	for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) {
+		dprintk("bva: sip %u.%u.%u.%u tip %u.%u.%u.%u t[%d] "
+			"%u.%u.%u.%u bhti(tip) %d\n",
+		       NIPQUAD(sip), NIPQUAD(tip), i, NIPQUAD(targets[i]),
+		       bond_has_this_ip(bond, tip));
+		if (sip == targets[i]) {
+			if (bond_has_this_ip(bond, tip))
+				slave->last_arp_rx = jiffies;
+			return;
+		}
+	}
+}
+
+static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct arphdr *arp;
+	struct slave *slave;
+	struct bonding *bond;
+	unsigned char *arp_ptr;
+	u32 sip, tip;
+
+	if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER))
+		goto out;
+
+	bond = dev->priv;
+	read_lock(&bond->lock);
+
+	dprintk("bond_arp_rcv: bond %s skb->dev %s orig_dev %s\n",
+		bond->dev->name, skb->dev ? skb->dev->name : "NULL",
+		orig_dev ? orig_dev->name : "NULL");
+
+	slave = bond_get_slave_by_dev(bond, orig_dev);
+	if (!slave || !slave_do_arp_validate(bond, slave))
+		goto out_unlock;
+
+	/* ARP header, plus 2 device addresses, plus 2 IP addresses.  */
+	if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
+				 (2 * dev->addr_len) +
+				 (2 * sizeof(u32)))))
+		goto out_unlock;
+
+	arp = skb->nh.arph;
+	if (arp->ar_hln != dev->addr_len ||
+	    skb->pkt_type == PACKET_OTHERHOST ||
+	    skb->pkt_type == PACKET_LOOPBACK ||
+	    arp->ar_hrd != htons(ARPHRD_ETHER) ||
+	    arp->ar_pro != htons(ETH_P_IP) ||
+	    arp->ar_pln != 4)
+		goto out_unlock;
+
+	arp_ptr = (unsigned char *)(arp + 1);
+	arp_ptr += dev->addr_len;
+	memcpy(&sip, arp_ptr, 4);
+	arp_ptr += 4 + dev->addr_len;
+	memcpy(&tip, arp_ptr, 4);
+
+	dprintk("bond_arp_rcv: %s %s/%d av %d sv %d sip %u.%u.%u.%u"
+		" tip %u.%u.%u.%u\n", bond->dev->name, slave->dev->name,
+		slave->state, bond->params.arp_validate,
+		slave_do_arp_validate(bond, slave), NIPQUAD(sip), NIPQUAD(tip));
+
+	/*
+	 * Backup slaves won't see the ARP reply, but do come through
+	 * here for each ARP probe (so we swap the sip/tip to validate
+	 * the probe).  In a "redundant switch, common router" type of
+	 * configuration, the ARP probe will (hopefully) travel from
+	 * the active, through one switch, the router, then the other
+	 * switch before reaching the backup.
+	 */
+	if (slave->state == BOND_STATE_ACTIVE)
+		bond_validate_arp(bond, slave, sip, tip);
+	else
+		bond_validate_arp(bond, slave, tip, sip);
+
+out_unlock:
+	read_unlock(&bond->lock);
+out:
+	dev_kfree_skb(skb);
+	return NET_RX_SUCCESS;
+}
+
 /*
  * this function is called regularly to monitor each slave's link
  * ensuring that traffic is being sent and received when arp monitoring
@@ -2600,7 +2720,8 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev)
 	 */
 	bond_for_each_slave(bond, slave, i) {
 		if (slave->link != BOND_LINK_UP) {
-			if ((jiffies - slave->dev->last_rx) <= delta_in_ticks) {
+			if ((jiffies - slave_last_rx(bond, slave)) <=
+			     delta_in_ticks) {
 
 				slave->link = BOND_LINK_UP;
 
@@ -2645,7 +2766,7 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev)
 
 			if ((slave != bond->curr_active_slave) &&
 			    (!bond->current_arp_slave) &&
-			    (((jiffies - slave->dev->last_rx) >= 3*delta_in_ticks) &&
+			    (((jiffies - slave_last_rx(bond, slave)) >= 3*delta_in_ticks) &&
 			     bond_has_ip(bond))) {
 				/* a backup slave has gone down; three times
 				 * the delta allows the current slave to be
@@ -2692,7 +2813,7 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev)
 		 * if it is up and needs to take over as the curr_active_slave
 		 */
 		if ((((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) ||
-	    (((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) &&
+	    (((jiffies - slave_last_rx(bond, slave)) >= (2*delta_in_ticks)) &&
 	     bond_has_ip(bond))) &&
 		    ((jiffies - slave->jiffies) >= 2*delta_in_ticks)) {
 
@@ -3315,6 +3436,21 @@ static void bond_unregister_lacpdu(struct bonding *bond)
 	dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type));
 }
 
+void bond_register_arp(struct bonding *bond)
+{
+	struct packet_type *pt = &bond->arp_mon_pt;
+
+	pt->type = htons(ETH_P_ARP);
+	pt->dev = NULL; /*bond->dev;XXX*/
+	pt->func = bond_arp_rcv;
+	dev_add_pack(pt);
+}
+
+void bond_unregister_arp(struct bonding *bond)
+{
+	dev_remove_pack(&bond->arp_mon_pt);
+}
+
 /*---------------------------- Hashing Policies -----------------------------*/
 
 /*
@@ -3401,6 +3537,9 @@ static int bond_open(struct net_device *bond_dev)
 		} else {
 			arp_timer->function = (void *)&bond_loadbalance_arp_mon;
 		}
+		if (bond->params.arp_validate)
+			bond_register_arp(bond);
+
 		add_timer(arp_timer);
 	}
 
@@ -3428,6 +3567,9 @@ static int bond_close(struct net_device *bond_dev)
 		bond_unregister_lacpdu(bond);
 	}
 
+	if (bond->params.arp_validate)
+		bond_unregister_arp(bond);
+
 	write_lock_bh(&bond->lock);
 
 
@@ -4281,6 +4423,8 @@ int bond_parse_parm(char *mode_arg, struct bond_parm_tbl *tbl)
 
 static int bond_check_params(struct bond_params *params)
 {
+	int arp_validate_value;
+
 	/*
 	 * Convert string parameters.
 	 */
@@ -4484,6 +4628,29 @@ static int bond_check_params(struct bond_params *params)
 		arp_interval = 0;
 	}
 
+	if (arp_validate) {
+		if (bond_mode != BOND_MODE_ACTIVEBACKUP) {
+			printk(KERN_ERR DRV_NAME
+	       ": arp_validate only supported in active-backup mode\n");
+			return -EINVAL;
+		}
+		if (!arp_interval) {
+			printk(KERN_ERR DRV_NAME
+			       ": arp_validate requires arp_interval\n");
+			return -EINVAL;
+		}
+
+		arp_validate_value = bond_parse_parm(arp_validate,
+						     arp_validate_tbl);
+		if (arp_validate_value == -1) {
+			printk(KERN_ERR DRV_NAME
+			       ": Error: invalid arp_validate \"%s\"\n",
+			       arp_validate == NULL ? "NULL" : arp_validate);
+			return -EINVAL;
+		}
+	} else
+		arp_validate_value = 0;
+
 	if (miimon) {
 		printk(KERN_INFO DRV_NAME
 		       ": MII link monitoring set to %d ms\n",
@@ -4492,8 +4659,10 @@ static int bond_check_params(struct bond_params *params)
 		int i;
 
 		printk(KERN_INFO DRV_NAME
-		       ": ARP monitoring set to %d ms with %d target(s):",
-		       arp_interval, arp_ip_count);
+		       ": ARP monitoring set to %d ms, validate %s, with %d target(s):",
+		       arp_interval,
+		       arp_validate_tbl[arp_validate_value].modename,
+		       arp_ip_count);
 
 		for (i = 0; i < arp_ip_count; i++)
 			printk (" %s", arp_ip_target[i]);
@@ -4527,6 +4696,7 @@ static int bond_check_params(struct bond_params *params)
 	params->xmit_policy = xmit_hashtype;
 	params->miimon = miimon;
 	params->arp_interval = arp_interval;
+	params->arp_validate = arp_validate_value;
 	params->updelay = updelay;
 	params->downdelay = downdelay;
 	params->use_carrier = use_carrier;
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 15b6a29bb4d4..ced9ed8f995a 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -51,6 +51,7 @@ extern struct bond_params bonding_defaults;
 extern struct bond_parm_tbl bond_mode_tbl[];
 extern struct bond_parm_tbl bond_lacp_tbl[];
 extern struct bond_parm_tbl xmit_hashtype_tbl[];
+extern struct bond_parm_tbl arp_validate_tbl[];
 
 static int expected_refcount = -1;
 static struct class *netdev_class;
@@ -502,6 +503,53 @@ out:
 }
 static CLASS_DEVICE_ATTR(xmit_hash_policy, S_IRUGO | S_IWUSR, bonding_show_xmit_hash, bonding_store_xmit_hash);
 
+/*
+ * Show and set arp_validate.
+ */
+static ssize_t bonding_show_arp_validate(struct class_device *cd, char *buf)
+{
+	struct bonding *bond = to_bond(cd);
+
+	return sprintf(buf, "%s %d\n",
+		       arp_validate_tbl[bond->params.arp_validate].modename,
+		       bond->params.arp_validate) + 1;
+}
+
+static ssize_t bonding_store_arp_validate(struct class_device *cd, const char *buf, size_t count)
+{
+	int new_value;
+	struct bonding *bond = to_bond(cd);
+
+	new_value = bond_parse_parm((char *)buf, arp_validate_tbl);
+	if (new_value < 0) {
+		printk(KERN_ERR DRV_NAME
+		       ": %s: Ignoring invalid arp_validate value %s\n",
+		       bond->dev->name, buf);
+		return -EINVAL;
+	}
+	if (new_value && (bond->params.mode != BOND_MODE_ACTIVEBACKUP)) {
+		printk(KERN_ERR DRV_NAME
+		       ": %s: arp_validate only supported in active-backup mode.\n",
+		       bond->dev->name);
+		return -EINVAL;
+	}
+	printk(KERN_INFO DRV_NAME ": %s: setting arp_validate to %s (%d).\n",
+	       bond->dev->name, arp_validate_tbl[new_value].modename,
+	       new_value);
+
+	if (!bond->params.arp_validate && new_value) {
+		bond_register_arp(bond);
+	} else if (bond->params.arp_validate && !new_value) {
+		bond_unregister_arp(bond);
+	}
+
+	bond->params.arp_validate = new_value;
+
+	return count;
+}
+
+static CLASS_DEVICE_ATTR(arp_validate, S_IRUGO | S_IWUSR, bonding_show_arp_validate, bonding_store_arp_validate);
+
 /*
  * Show and set the arp timer interval.  There are two tricky bits
  * here.  First, if ARP monitoring is activated, then we must disable
@@ -914,6 +962,11 @@ static ssize_t bonding_store_miimon(struct class_device *cd, const char *buf, si
 			       "ARP monitoring. Disabling ARP monitoring...\n",
 			       bond->dev->name);
 			bond->params.arp_interval = 0;
+			if (bond->params.arp_validate) {
+				bond_unregister_arp(bond);
+				bond->params.arp_validate =
+					BOND_ARP_VALIDATE_NONE;
+			}
 			/* Kill ARP timer, else it brings bond's link down */
 			if (bond->mii_timer.function) {
 				printk(KERN_INFO DRV_NAME
@@ -1273,6 +1326,7 @@ static CLASS_DEVICE_ATTR(ad_partner_mac, S_IRUGO, bonding_show_ad_partner_mac, N
 static struct attribute *per_bond_attrs[] = {
 	&class_device_attr_slaves.attr,
 	&class_device_attr_mode.attr,
+	&class_device_attr_arp_validate.attr,
 	&class_device_attr_arp_interval.attr,
 	&class_device_attr_arp_ip_target.attr,
 	&class_device_attr_downdelay.attr,
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 17caafe58247..db16fee40a5f 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -22,8 +22,8 @@
 #include "bond_3ad.h"
 #include "bond_alb.h"
 
-#define DRV_VERSION	"3.0.3"
-#define DRV_RELDATE	"March 23, 2006"
+#define DRV_VERSION	"3.1.0-test"
+#define DRV_RELDATE	"September 9, 2006"
 #define DRV_NAME	"bonding"
 #define DRV_DESCRIPTION	"Ethernet Channel Bonding Driver"
 
@@ -126,6 +126,7 @@ struct bond_params {
 	int xmit_policy;
 	int miimon;
 	int arp_interval;
+	int arp_validate;
 	int use_carrier;
 	int updelay;
 	int downdelay;
@@ -151,6 +152,7 @@ struct slave {
 	struct slave *prev;
 	int    delay;
 	u32    jiffies;
+	u32    last_arp_rx;
 	s8     link;    /* one of BOND_LINK_XXXX */
 	s8     state;   /* one of BOND_STATE_XXXX */
 	u32    original_flags;
@@ -198,6 +200,7 @@ struct bonding {
 	struct   bond_params params;
 	struct   list_head vlan_list;
 	struct   vlan_group *vlgrp;
+	struct   packet_type arp_mon_pt;
 };
 
 /**
@@ -228,6 +231,25 @@ static inline struct bonding *bond_get_bond_by_slave(struct slave *slave)
 	return (struct bonding *)slave->dev->master->priv;
 }
 
+#define BOND_ARP_VALIDATE_NONE		0
+#define BOND_ARP_VALIDATE_ACTIVE	(1 << BOND_STATE_ACTIVE)
+#define BOND_ARP_VALIDATE_BACKUP	(1 << BOND_STATE_BACKUP)
+#define BOND_ARP_VALIDATE_ALL		(BOND_ARP_VALIDATE_ACTIVE | \
+					 BOND_ARP_VALIDATE_BACKUP)
+
+extern inline int slave_do_arp_validate(struct bonding *bond, struct slave *slave)
+{
+	return bond->params.arp_validate & (1 << slave->state);
+}
+
+extern inline u32 slave_last_rx(struct bonding *bond, struct slave *slave)
+{
+	if (slave_do_arp_validate(bond, slave))
+		return slave->last_arp_rx;
+
+	return slave->dev->last_rx;
+}
+
 static inline void bond_set_slave_inactive_flags(struct slave *slave)
 {
 	struct bonding *bond = slave->dev->master->priv;
@@ -235,12 +257,14 @@ static inline void bond_set_slave_inactive_flags(struct slave *slave)
 	    bond->params.mode != BOND_MODE_ALB)
 		slave->state = BOND_STATE_BACKUP;
 	slave->dev->priv_flags |= IFF_SLAVE_INACTIVE;
+	if (slave_do_arp_validate(bond, slave))
+		slave->dev->priv_flags |= IFF_SLAVE_NEEDARP;
 }
 
 static inline void bond_set_slave_active_flags(struct slave *slave)
 {
 	slave->state = BOND_STATE_ACTIVE;
-	slave->dev->priv_flags &= ~IFF_SLAVE_INACTIVE;
+	slave->dev->priv_flags &= ~(IFF_SLAVE_INACTIVE | IFF_SLAVE_NEEDARP);
 }
 
 static inline void bond_set_master_3ad_flags(struct bonding *bond)
@@ -284,6 +308,8 @@ int bond_parse_parm(char *mode_arg, struct bond_parm_tbl *tbl);
 const char *bond_mode_name(int mode);
 void bond_select_active_slave(struct bonding *bond);
 void bond_change_active_slave(struct bonding *bond, struct slave *new_active);
+void bond_register_arp(struct bonding *);
+void bond_unregister_arp(struct bonding *);
 
 #endif /* _LINUX_BONDING_H */
 
diff --git a/include/linux/if.h b/include/linux/if.h
index a023ec1274fe..8018c2e22c0c 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -60,6 +60,7 @@
 #define IFF_MASTER_8023AD	0x8	/* bonding master, 802.3ad. 	*/
 #define IFF_MASTER_ALB	0x10		/* bonding master, balance-alb.	*/
 #define IFF_BONDING	0x20		/* bonding master or slave	*/
+#define IFF_SLAVE_NEEDARP 0x40		/* need ARPs for validation	*/
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 43289127b458..afd80eff2725 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1016,7 +1016,8 @@ static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
 }
 
 /* On bonding slaves other than the currently active slave, suppress
- * duplicates except for 802.3ad ETH_P_SLOW and alb non-mcast/bcast.
+ * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
+ * ARP on active-backup slaves with arp_validate enabled.
  */
 static inline int skb_bond_should_drop(struct sk_buff *skb)
 {
@@ -1025,6 +1026,10 @@ static inline int skb_bond_should_drop(struct sk_buff *skb)
 
 	if (master &&
 	    (dev->priv_flags & IFF_SLAVE_INACTIVE)) {
+		if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
+		    skb->protocol == __constant_htons(ETH_P_ARP))
+			return 0;
+
 		if (master->priv_flags & IFF_MASTER_ALB) {
 			if (skb->pkt_type != PACKET_BROADCAST &&
 			    skb->pkt_type != PACKET_MULTICAST)
-- 
cgit v1.2.3


From ddd5d35a8f7c1924049e8b6877b3177c1787e6a3 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor@insightbb.com>
Date: Thu, 10 Aug 2006 01:18:18 -0400
Subject: class_device_create(): make fmt argument 'const char *'

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/class.c   | 3 ++-
 include/linux/device.h | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/class.c b/drivers/base/class.c
index 46336f1b93b6..75057aaab809 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -679,7 +679,8 @@ int class_device_register(struct class_device *class_dev)
 struct class_device *class_device_create(struct class *cls,
 					 struct class_device *parent,
 					 dev_t devt,
-					 struct device *device, char *fmt, ...)
+					 struct device *device,
+					 const char *fmt, ...)
 {
 	va_list args;
 	struct class_device *class_dev = NULL;
diff --git a/include/linux/device.h b/include/linux/device.h
index 1e5f30da98bc..1fec28546525 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -277,7 +277,7 @@ extern struct class_device *class_device_create(struct class *cls,
 						struct class_device *parent,
 						dev_t devt,
 						struct device *device,
-						char *fmt, ...)
+						const char *fmt, ...)
 					__attribute__((format(printf,5,6)));
 extern void class_device_destroy(struct class *cls, dev_t devt);
 
-- 
cgit v1.2.3


From 5cbe5f8a5897470698222ac9924429056e57d84c Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Thu, 10 Aug 2006 01:19:19 -0400
Subject: device_create(): make fmt argument 'const char *'

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/core.c    | 2 +-
 include/linux/device.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 04d089fd4f76..5d4b7e04471e 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -583,7 +583,7 @@ static void device_create_release(struct device *dev)
  * been created with a call to class_create().
  */
 struct device *device_create(struct class *class, struct device *parent,
-			     dev_t devt, char *fmt, ...)
+			     dev_t devt, const char *fmt, ...)
 {
 	va_list args;
 	struct device *dev = NULL;
diff --git a/include/linux/device.h b/include/linux/device.h
index 1fec28546525..8d92013f9ce1 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -384,7 +384,7 @@ extern void device_reprobe(struct device *dev);
  * Easy functions for dynamically creating devices on the fly
  */
 extern struct device *device_create(struct class *cls, struct device *parent,
-				    dev_t devt, char *fmt, ...)
+				    dev_t devt, const char *fmt, ...)
 				    __attribute__((format(printf,4,5)));
 extern void device_destroy(struct class *cls, dev_t devt);
 
-- 
cgit v1.2.3


From ab7d7371acc68fa9130b079a9ba879191202035f Mon Sep 17 00:00:00 2001
From: Miguel Ojeda Sandonis <maxextreme@gmail.com>
Date: Wed, 13 Sep 2006 15:34:05 +0200
Subject: Driver core: add const to class_create

Adds const to class_create second parameter, because:

struct class {
	const char * name;

	/*...*/
}

Signed-off-by: Miguel Ojeda Sandonis <maxextreme@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/class.c   | 2 +-
 include/linux/device.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/class.c b/drivers/base/class.c
index 75057aaab809..e078bc21d52e 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -197,7 +197,7 @@ static int class_device_create_uevent(struct class_device *class_dev,
  * Note, the pointer created here is to be destroyed when finished by
  * making a call to class_destroy().
  */
-struct class *class_create(struct module *owner, char *name)
+struct class *class_create(struct module *owner, const char *name)
 {
 	struct class *cls;
 	int retval;
diff --git a/include/linux/device.h b/include/linux/device.h
index 8d92013f9ce1..8a648cd94fa9 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -271,7 +271,7 @@ struct class_interface {
 extern int class_interface_register(struct class_interface *);
 extern void class_interface_unregister(struct class_interface *);
 
-extern struct class *class_create(struct module *owner, char *name);
+extern struct class *class_create(struct module *owner, const char *name);
 extern void class_destroy(struct class *cls);
 extern struct class_device *class_device_create(struct class *cls,
 						struct class_device *parent,
-- 
cgit v1.2.3


From 7c8265f51073bc8632a99de78d5fd19117ed78b7 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@osdl.org>
Date: Sat, 24 Jun 2006 14:50:29 -0700
Subject: Suspend infrastructure cleanup and extension

Allow devices to participate in the suspend process more intimately,
in particular, allow the final phase (with interrupts disabled) to
also be open to normal devices, not just system devices.

Also, allow classes to participate in device suspend.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/power/resume.c  |  28 ++++++++--
 drivers/base/power/suspend.c | 122 ++++++++++++++++++++++++++++++++-----------
 include/linux/device.h       |  11 +++-
 include/linux/pm.h           |   1 +
 kernel/power/main.c          |   4 ++
 5 files changed, 130 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/resume.c b/drivers/base/power/resume.c
index 826093ef4c7e..48e3d49d7b65 100644
--- a/drivers/base/power/resume.c
+++ b/drivers/base/power/resume.c
@@ -38,13 +38,35 @@ int resume_device(struct device * dev)
 		dev_dbg(dev,"resuming\n");
 		error = dev->bus->resume(dev);
 	}
+	if (dev->class && dev->class->resume) {
+		dev_dbg(dev,"class resume\n");
+		error = dev->class->resume(dev);
+	}
 	up(&dev->sem);
 	TRACE_RESUME(error);
 	return error;
 }
 
 
+static int resume_device_early(struct device * dev)
+{
+	int error = 0;
 
+	TRACE_DEVICE(dev);
+	TRACE_RESUME(0);
+	if (dev->bus && dev->bus->resume_early) {
+		dev_dbg(dev,"EARLY resume\n");
+		error = dev->bus->resume_early(dev);
+	}
+	TRACE_RESUME(error);
+	return error;
+}
+
+/*
+ * Resume the devices that have either not gone through
+ * the late suspend, or that did go through it but also
+ * went through the early resume
+ */
 void dpm_resume(void)
 {
 	down(&dpm_list_sem);
@@ -99,10 +121,8 @@ void dpm_power_up(void)
 		struct list_head * entry = dpm_off_irq.next;
 		struct device * dev = to_device(entry);
 
-		get_device(dev);
-		list_move_tail(entry, &dpm_active);
-		resume_device(dev);
-		put_device(dev);
+		list_move_tail(entry, &dpm_off);
+		resume_device_early(dev);
 	}
 }
 
diff --git a/drivers/base/power/suspend.c b/drivers/base/power/suspend.c
index 69509e02f703..10e8032aee1a 100644
--- a/drivers/base/power/suspend.c
+++ b/drivers/base/power/suspend.c
@@ -65,7 +65,19 @@ int suspend_device(struct device * dev, pm_message_t state)
 
 	dev->power.prev_state = dev->power.power_state;
 
-	if (dev->bus && dev->bus->suspend && !dev->power.power_state.event) {
+	if (dev->class && dev->class->suspend && !dev->power.power_state.event) {
+		dev_dbg(dev, "class %s%s\n",
+			suspend_verb(state.event),
+			((state.event == PM_EVENT_SUSPEND)
+					&& device_may_wakeup(dev))
+				? ", may wakeup"
+				: ""
+			);
+		error = dev->class->suspend(dev, state);
+		suspend_report_result(dev->class->suspend, error);
+	}
+
+	if (!error && dev->bus && dev->bus->suspend && !dev->power.power_state.event) {
 		dev_dbg(dev, "%s%s\n",
 			suspend_verb(state.event),
 			((state.event == PM_EVENT_SUSPEND)
@@ -81,15 +93,74 @@ int suspend_device(struct device * dev, pm_message_t state)
 }
 
 
+/*
+ * This is called with interrupts off, only a single CPU
+ * running. We can't do down() on a semaphore (and we don't
+ * need the protection)
+ */
+static int suspend_device_late(struct device *dev, pm_message_t state)
+{
+	int error = 0;
+
+	if (dev->power.power_state.event) {
+		dev_dbg(dev, "PM: suspend_late %d-->%d\n",
+			dev->power.power_state.event, state.event);
+	}
+
+	if (dev->bus && dev->bus->suspend_late && !dev->power.power_state.event) {
+		dev_dbg(dev, "LATE %s%s\n",
+			suspend_verb(state.event),
+			((state.event == PM_EVENT_SUSPEND)
+					&& device_may_wakeup(dev))
+				? ", may wakeup"
+				: ""
+			);
+		error = dev->bus->suspend_late(dev, state);
+		suspend_report_result(dev->bus->suspend_late, error);
+	}
+	return error;
+}
+
+/**
+ *	device_prepare_suspend - save state and prepare to suspend
+ *
+ *	NOTE! Devices cannot detach at this point - not only do we
+ *	hold the device list semaphores over the whole prepare, but
+ *	the whole point is to do non-invasive preparatory work, not
+ *	the actual suspend.
+ */
+int device_prepare_suspend(pm_message_t state)
+{
+	int error = 0;
+	struct device * dev;
+
+	down(&dpm_sem);
+	down(&dpm_list_sem);
+	list_for_each_entry_reverse(dev, &dpm_active, power.entry) {
+		if (!dev->bus || !dev->bus->suspend_prepare)
+			continue;
+		error = dev->bus->suspend_prepare(dev, state);
+		if (error)
+			break;
+	}
+	up(&dpm_list_sem);
+	up(&dpm_sem);
+	return error;
+}
+
 /**
  *	device_suspend - Save state and stop all devices in system.
  *	@state:		Power state to put each device in.
  *
  *	Walk the dpm_active list, call ->suspend() for each device, and move
- *	it to dpm_off.
- *	Check the return value for each. If it returns 0, then we move the
- *	the device to the dpm_off list. If it returns -EAGAIN, we move it to
- *	the dpm_off_irq list. If we get a different error, try and back out.
+ *	it to the dpm_off list.
+ *
+ *	(For historical reasons, if it returns -EAGAIN, that used to mean
+ *	that the device would be called again with interrupts disabled.
+ *	These days, we use the "suspend_late()" callback for that, so we
+ *	print a warning and consider it an error).
+ *
+ *	If we get a different error, try and back out.
  *
  *	If we hit a failure with any of the devices, call device_resume()
  *	above to bring the suspended devices back to life.
@@ -115,39 +186,27 @@ int device_suspend(pm_message_t state)
 
 		/* Check if the device got removed */
 		if (!list_empty(&dev->power.entry)) {
-			/* Move it to the dpm_off or dpm_off_irq list */
+			/* Move it to the dpm_off list */
 			if (!error)
 				list_move(&dev->power.entry, &dpm_off);
-			else if (error == -EAGAIN) {
-				list_move(&dev->power.entry, &dpm_off_irq);
-				error = 0;
-			}
 		}
 		if (error)
 			printk(KERN_ERR "Could not suspend device %s: "
-				"error %d\n", kobject_name(&dev->kobj), error);
+				"error %d%s\n",
+				kobject_name(&dev->kobj), error,
+				error == -EAGAIN ? " (please convert to suspend_late)" : "");
 		put_device(dev);
 	}
 	up(&dpm_list_sem);
-	if (error) {
-		/* we failed... before resuming, bring back devices from
-		 * dpm_off_irq list back to main dpm_off list, we do want
-		 * to call resume() on them, in case they partially suspended
-		 * despite returning -EAGAIN
-		 */
-		while (!list_empty(&dpm_off_irq)) {
-			struct list_head * entry = dpm_off_irq.next;
-			list_move(entry, &dpm_off);
-		}
+	if (error)
 		dpm_resume();
-	}
+
 	up(&dpm_sem);
 	return error;
 }
 
 EXPORT_SYMBOL_GPL(device_suspend);
 
-
 /**
  *	device_power_down - Shut down special devices.
  *	@state:		Power state to enter.
@@ -162,14 +221,17 @@ int device_power_down(pm_message_t state)
 	int error = 0;
 	struct device * dev;
 
-	list_for_each_entry_reverse(dev, &dpm_off_irq, power.entry) {
-		if ((error = suspend_device(dev, state)))
-			break;
+	while (!list_empty(&dpm_off)) {
+		struct list_head * entry = dpm_off.prev;
+
+		dev = to_device(entry);
+		error = suspend_device_late(dev, state);
+		if (error)
+			goto Error;
+		list_move(&dev->power.entry, &dpm_off_irq);
 	}
-	if (error)
-		goto Error;
-	if ((error = sysdev_suspend(state)))
-		goto Error;
+
+	error = sysdev_suspend(state);
  Done:
 	return error;
  Error:
diff --git a/include/linux/device.h b/include/linux/device.h
index 8a648cd94fa9..b40be6fca6fa 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -51,8 +51,12 @@ struct bus_type {
 	int		(*probe)(struct device * dev);
 	int		(*remove)(struct device * dev);
 	void		(*shutdown)(struct device * dev);
-	int		(*suspend)(struct device * dev, pm_message_t state);
-	int		(*resume)(struct device * dev);
+
+	int (*suspend_prepare)(struct device * dev, pm_message_t state);
+	int (*suspend)(struct device * dev, pm_message_t state);
+	int (*suspend_late)(struct device * dev, pm_message_t state);
+	int (*resume_early)(struct device * dev);
+	int (*resume)(struct device * dev);
 };
 
 extern int bus_register(struct bus_type * bus);
@@ -154,6 +158,9 @@ struct class {
 
 	void	(*release)(struct class_device *dev);
 	void	(*class_release)(struct class *class);
+
+	int	(*suspend)(struct device *, pm_message_t state);
+	int	(*resume)(struct device *);
 };
 
 extern int class_register(struct class *);
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 658c1b93d5bb..096fb6f754cf 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -190,6 +190,7 @@ extern void device_resume(void);
 extern suspend_disk_method_t pm_disk_mode;
 
 extern int device_suspend(pm_message_t state);
+extern int device_prepare_suspend(pm_message_t state);
 
 #define device_set_wakeup_enable(dev,val) \
 	((dev)->power.should_wakeup = !!(val))
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 6d295c776794..0c3ed6ac938e 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -57,6 +57,10 @@ static int suspend_prepare(suspend_state_t state)
 	if (!pm_ops || !pm_ops->enter)
 		return -EPERM;
 
+	error = device_prepare_suspend(PMSG_SUSPEND);
+	if (error)
+		return error;
+
 	pm_prepare_console();
 
 	disable_nonboot_cpus();
-- 
cgit v1.2.3


From cbd69dbbf1adfce6e048f15afc8629901ca9dae5 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@osdl.org>
Date: Sat, 24 Jun 2006 14:50:29 -0700
Subject: Suspend changes for PCI core

Changes the PCI core to use the new suspend infrastructure changes.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/pci/pci-driver.c | 41 ++++++++++++++++++++++++++++++++++++++++-
 include/linux/pci.h      |  3 +++
 2 files changed, 43 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 474e9cd0e9e4..9e7d6ceb3805 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -264,6 +264,19 @@ static int pci_device_remove(struct device * dev)
 	return 0;
 }
 
+static int pci_device_suspend_prepare(struct device * dev, pm_message_t state)
+{
+	struct pci_dev * pci_dev = to_pci_dev(dev);
+	struct pci_driver * drv = pci_dev->driver;
+	int i = 0;
+
+	if (drv && drv->suspend_prepare) {
+		i = drv->suspend_prepare(pci_dev, state);
+		suspend_report_result(drv->suspend_prepare, i);
+	}
+	return i;
+}
+
 static int pci_device_suspend(struct device * dev, pm_message_t state)
 {
 	struct pci_dev * pci_dev = to_pci_dev(dev);
@@ -279,6 +292,18 @@ static int pci_device_suspend(struct device * dev, pm_message_t state)
 	return i;
 }
 
+static int pci_device_suspend_late(struct device * dev, pm_message_t state)
+{
+	struct pci_dev * pci_dev = to_pci_dev(dev);
+	struct pci_driver * drv = pci_dev->driver;
+	int i = 0;
+
+	if (drv && drv->suspend_late) {
+		i = drv->suspend_late(pci_dev, state);
+		suspend_report_result(drv->suspend_late, i);
+	}
+	return i;
+}
 
 /*
  * Default resume method for devices that have no driver provided resume,
@@ -313,6 +338,17 @@ static int pci_device_resume(struct device * dev)
 	return error;
 }
 
+static int pci_device_resume_early(struct device * dev)
+{
+	int error = 0;
+	struct pci_dev * pci_dev = to_pci_dev(dev);
+	struct pci_driver * drv = pci_dev->driver;
+
+	if (drv && drv->resume_early)
+		error = drv->resume_early(pci_dev);
+	return error;
+}
+
 static void pci_device_shutdown(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
@@ -508,9 +544,12 @@ struct bus_type pci_bus_type = {
 	.uevent		= pci_uevent,
 	.probe		= pci_device_probe,
 	.remove		= pci_device_remove,
+	.suspend_prepare= pci_device_suspend_prepare,
 	.suspend	= pci_device_suspend,
-	.shutdown	= pci_device_shutdown,
+	.suspend_late	= pci_device_suspend_late,
+	.resume_early	= pci_device_resume_early,
 	.resume		= pci_device_resume,
+	.shutdown	= pci_device_shutdown,
 	.dev_attrs	= pci_dev_attrs,
 };
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 8565b81d7fbc..4b2e629467c7 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -345,7 +345,10 @@ struct pci_driver {
 	const struct pci_device_id *id_table;	/* must be non-NULL for probe to be called */
 	int  (*probe)  (struct pci_dev *dev, const struct pci_device_id *id);	/* New device inserted */
 	void (*remove) (struct pci_dev *dev);	/* Device removed (NULL if not a hot-plug capable driver) */
+	int  (*suspend_prepare) (struct pci_dev *dev, pm_message_t state);
 	int  (*suspend) (struct pci_dev *dev, pm_message_t state);	/* Device suspended */
+	int  (*suspend_late) (struct pci_dev *dev, pm_message_t state);
+	int  (*resume_early) (struct pci_dev *dev);
 	int  (*resume) (struct pci_dev *dev);	                /* Device woken up */
 	int  (*enable_wake) (struct pci_dev *dev, pci_power_t state, int enable);   /* Enable wake event */
 	void (*shutdown) (struct pci_dev *dev);
-- 
cgit v1.2.3


From 82bb67f2c1f9ef438c56ac24e7dca027fe7289b5 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Mon, 14 Aug 2006 23:11:04 -0700
Subject: PM: define PM_EVENT_PRETHAW

This adds a new pm_message_t event type to use when preparing to restore a
swsusp snapshot.  Devices that have been initialized by Linux after resume
(rather than left in power-up-reset state) may need to be reset; this new
event type give drivers the chance to do that.

The drivers that will care about this are those which understand more hardware
states than just "on" and "reset", relying on hardware state during resume()
methods to be either the state left by the preceding suspend(), or a
power-lost reset.  The best current example of this class of drivers are USB
host controller drivers, which currently do not work through swsusp when
they're statically linked.

When the swsusp freeze/thaw mechanism kicks in, a troublesome third state
could exist: one state set up by a different kernel instance, before a
snapshot image is resumed.  This mechanism lets drivers prevent that state.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pm.h | 62 +++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 47 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 096fb6f754cf..6b27e07aef19 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -142,29 +142,61 @@ typedef struct pm_message {
 } pm_message_t;
 
 /*
- * There are 4 important states driver can be in:
- * ON     -- driver is working
- * FREEZE -- stop operations and apply whatever policy is applicable to a
- *           suspended driver of that class, freeze queues for block like IDE
- *           does, drop packets for ethernet, etc... stop DMA engine too etc...
- *           so a consistent image can be saved; but do not power any hardware
- *           down.
- * SUSPEND - like FREEZE, but hardware is doing as much powersaving as
- *           possible. Roughly pci D3.
+ * Several driver power state transitions are externally visible, affecting
+ * the state of pending I/O queues and (for drivers that touch hardware)
+ * interrupts, wakeups, DMA, and other hardware state.  There may also be
+ * internal transitions to various low power modes, which are transparent
+ * to the rest of the driver stack (such as a driver that's ON gating off
+ * clocks which are not in active use).
  *
- * Unfortunately, current drivers only recognize numeric values 0 (ON) and 3
- * (SUSPEND).  We'll need to fix the drivers. So yes, putting 3 to all different
- * defines is intentional, and will go away as soon as drivers are fixed.  Also
- * note that typedef is neccessary, we'll probably want to switch to
- *   typedef struct pm_message_t { int event; int flags; } pm_message_t
- * or something similar soon.
+ * One transition is triggered by resume(), after a suspend() call; the
+ * message is implicit:
+ *
+ * ON		Driver starts working again, responding to hardware events
+ * 		and software requests.  The hardware may have gone through
+ * 		a power-off reset, or it may have maintained state from the
+ * 		previous suspend() which the driver will rely on while
+ * 		resuming.  On most platforms, there are no restrictions on
+ * 		availability of resources like clocks during resume().
+ *
+ * Other transitions are triggered by messages sent using suspend().  All
+ * these transitions quiesce the driver, so that I/O queues are inactive.
+ * That commonly entails turning off IRQs and DMA; there may be rules
+ * about how to quiesce that are specific to the bus or the device's type.
+ * (For example, network drivers mark the link state.)  Other details may
+ * differ according to the message:
+ *
+ * SUSPEND	Quiesce, enter a low power device state appropriate for
+ * 		the upcoming system state (such as PCI_D3hot), and enable
+ * 		wakeup events as appropriate.
+ *
+ * FREEZE	Quiesce operations so that a consistent image can be saved;
+ * 		but do NOT otherwise enter a low power device state, and do
+ * 		NOT emit system wakeup events.
+ *
+ * PRETHAW	Quiesce as if for FREEZE; additionally, prepare for restoring
+ * 		the system from a snapshot taken after an earlier FREEZE.
+ * 		Some drivers will need to reset their hardware state instead
+ * 		of preserving it, to ensure that it's never mistaken for the
+ * 		state which that earlier snapshot had set up.
+ *
+ * A minimally power-aware driver treats all messages as SUSPEND, fully
+ * reinitializes its device during resume() -- whether or not it was reset
+ * during the suspend/resume cycle -- and can't issue wakeup events.
+ *
+ * More power-aware drivers may also use low power states at runtime as
+ * well as during system sleep states like PM_SUSPEND_STANDBY.  They may
+ * be able to use wakeup events to exit from runtime low-power states,
+ * or from system low-power states such as standby or suspend-to-RAM.
  */
 
 #define PM_EVENT_ON 0
 #define PM_EVENT_FREEZE 1
 #define PM_EVENT_SUSPEND 2
+#define PM_EVENT_PRETHAW 3
 
 #define PMSG_FREEZE	((struct pm_message){ .event = PM_EVENT_FREEZE, })
+#define PMSG_PRETHAW	((struct pm_message){ .event = PM_EVENT_PRETHAW, })
 #define PMSG_SUSPEND	((struct pm_message){ .event = PM_EVENT_SUSPEND, })
 #define PMSG_ON		((struct pm_message){ .event = PM_EVENT_ON, })
 
-- 
cgit v1.2.3


From 1d3a82af45428c5e8deaa119cdeb79611ae46371 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Wed, 30 Aug 2006 14:09:47 -0700
Subject: PM: no suspend_prepare() phase

Remove the new suspend_prepare() phase.  It doesn't seem very usable,
has never been tested, doesn't address fault cleanup, and would need
a sibling resume_complete(); plus there are no real use cases.  It
could be restored later if those issues get resolved.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/power/suspend.c | 27 ---------------------------
 drivers/pci/pci-driver.c     | 14 --------------
 include/linux/device.h       |  1 -
 include/linux/pci.h          |  1 -
 kernel/power/main.c          |  4 ----
 5 files changed, 47 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/suspend.c b/drivers/base/power/suspend.c
index e86db83746ac..6453c25103d2 100644
--- a/drivers/base/power/suspend.c
+++ b/drivers/base/power/suspend.c
@@ -117,33 +117,6 @@ static int suspend_device_late(struct device *dev, pm_message_t state)
 	return error;
 }
 
-/**
- *	device_prepare_suspend - save state and prepare to suspend
- *
- *	NOTE! Devices cannot detach at this point - not only do we
- *	hold the device list semaphores over the whole prepare, but
- *	the whole point is to do non-invasive preparatory work, not
- *	the actual suspend.
- */
-int device_prepare_suspend(pm_message_t state)
-{
-	int error = 0;
-	struct device * dev;
-
-	down(&dpm_sem);
-	down(&dpm_list_sem);
-	list_for_each_entry_reverse(dev, &dpm_active, power.entry) {
-		if (!dev->bus || !dev->bus->suspend_prepare)
-			continue;
-		error = dev->bus->suspend_prepare(dev, state);
-		if (error)
-			break;
-	}
-	up(&dpm_list_sem);
-	up(&dpm_sem);
-	return error;
-}
-
 /**
  *	device_suspend - Save state and stop all devices in system.
  *	@state:		Power state to put each device in.
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 9e7d6ceb3805..8948ac9ab681 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -264,19 +264,6 @@ static int pci_device_remove(struct device * dev)
 	return 0;
 }
 
-static int pci_device_suspend_prepare(struct device * dev, pm_message_t state)
-{
-	struct pci_dev * pci_dev = to_pci_dev(dev);
-	struct pci_driver * drv = pci_dev->driver;
-	int i = 0;
-
-	if (drv && drv->suspend_prepare) {
-		i = drv->suspend_prepare(pci_dev, state);
-		suspend_report_result(drv->suspend_prepare, i);
-	}
-	return i;
-}
-
 static int pci_device_suspend(struct device * dev, pm_message_t state)
 {
 	struct pci_dev * pci_dev = to_pci_dev(dev);
@@ -544,7 +531,6 @@ struct bus_type pci_bus_type = {
 	.uevent		= pci_uevent,
 	.probe		= pci_device_probe,
 	.remove		= pci_device_remove,
-	.suspend_prepare= pci_device_suspend_prepare,
 	.suspend	= pci_device_suspend,
 	.suspend_late	= pci_device_suspend_late,
 	.resume_early	= pci_device_resume_early,
diff --git a/include/linux/device.h b/include/linux/device.h
index b40be6fca6fa..b3646462d6dc 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -52,7 +52,6 @@ struct bus_type {
 	int		(*remove)(struct device * dev);
 	void		(*shutdown)(struct device * dev);
 
-	int (*suspend_prepare)(struct device * dev, pm_message_t state);
 	int (*suspend)(struct device * dev, pm_message_t state);
 	int (*suspend_late)(struct device * dev, pm_message_t state);
 	int (*resume_early)(struct device * dev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4b2e629467c7..9514bbfe96e2 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -345,7 +345,6 @@ struct pci_driver {
 	const struct pci_device_id *id_table;	/* must be non-NULL for probe to be called */
 	int  (*probe)  (struct pci_dev *dev, const struct pci_device_id *id);	/* New device inserted */
 	void (*remove) (struct pci_dev *dev);	/* Device removed (NULL if not a hot-plug capable driver) */
-	int  (*suspend_prepare) (struct pci_dev *dev, pm_message_t state);
 	int  (*suspend) (struct pci_dev *dev, pm_message_t state);	/* Device suspended */
 	int  (*suspend_late) (struct pci_dev *dev, pm_message_t state);
 	int  (*resume_early) (struct pci_dev *dev);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 0c3ed6ac938e..6d295c776794 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -57,10 +57,6 @@ static int suspend_prepare(suspend_state_t state)
 	if (!pm_ops || !pm_ops->enter)
 		return -EPERM;
 
-	error = device_prepare_suspend(PMSG_SUSPEND);
-	if (error)
-		return error;
-
 	pm_prepare_console();
 
 	disable_nonboot_cpus();
-- 
cgit v1.2.3


From 386415d88b1ae50304f9c61aa3e0db082fa90428 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Sun, 3 Sep 2006 13:16:45 -0700
Subject: PM: platform_bus and late_suspend/early_resume

Teach platform_bus about the new suspend_late/resume_early PM calls,
issued with IRQs off.  Do we really need sysdev and friends any more,
or can janitors start switching its users over to platform_device so
we can do a minor code-ectomy?

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/platform.c         | 30 ++++++++++++++++++++++++++++--
 include/linux/platform_device.h |  2 ++
 2 files changed, 30 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 2b8755db76c6..940ce41f1887 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -505,12 +505,36 @@ static int platform_match(struct device * dev, struct device_driver * drv)
 	return (strncmp(pdev->name, drv->name, BUS_ID_SIZE) == 0);
 }
 
-static int platform_suspend(struct device * dev, pm_message_t state)
+static int platform_suspend(struct device *dev, pm_message_t mesg)
 {
 	int ret = 0;
 
 	if (dev->driver && dev->driver->suspend)
-		ret = dev->driver->suspend(dev, state);
+		ret = dev->driver->suspend(dev, mesg);
+
+	return ret;
+}
+
+static int platform_suspend_late(struct device *dev, pm_message_t mesg)
+{
+	struct platform_driver *drv = to_platform_driver(dev->driver);
+	struct platform_device *pdev = container_of(dev, struct platform_device, dev);
+	int ret = 0;
+
+	if (dev->driver && drv->suspend_late)
+		ret = drv->suspend_late(pdev, mesg);
+
+	return ret;
+}
+
+static int platform_resume_early(struct device *dev)
+{
+	struct platform_driver *drv = to_platform_driver(dev->driver);
+	struct platform_device *pdev = container_of(dev, struct platform_device, dev);
+	int ret = 0;
+
+	if (dev->driver && drv->resume_early)
+		ret = drv->resume_early(pdev);
 
 	return ret;
 }
@@ -531,6 +555,8 @@ struct bus_type platform_bus_type = {
 	.match		= platform_match,
 	.uevent		= platform_uevent,
 	.suspend	= platform_suspend,
+	.suspend_late	= platform_suspend_late,
+	.resume_early	= platform_resume_early,
 	.resume		= platform_resume,
 };
 EXPORT_SYMBOL_GPL(platform_bus_type);
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 782090c68932..29cd6dee13db 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -49,6 +49,8 @@ struct platform_driver {
 	int (*remove)(struct platform_device *);
 	void (*shutdown)(struct platform_device *);
 	int (*suspend)(struct platform_device *, pm_message_t state);
+	int (*suspend_late)(struct platform_device *, pm_message_t state);
+	int (*resume_early)(struct platform_device *);
 	int (*resume)(struct platform_device *);
 	struct device_driver driver;
 };
-- 
cgit v1.2.3


From de0ff00d723fd821d372496e2c084805644aa5e1 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Tue, 27 Jun 2006 00:06:09 -0700
Subject: Driver core: add groups support to struct device

This is needed for the network class devices in order to be able to
convert over to use struct device.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/core.c    | 34 ++++++++++++++++++++++++++++++++++
 include/linux/device.h |  1 +
 2 files changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 5d4b7e04471e..641c0c42bb48 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -197,6 +197,35 @@ static ssize_t store_uevent(struct device *dev, struct device_attribute *attr,
 	return count;
 }
 
+static int device_add_groups(struct device *dev)
+{
+	int i;
+	int error = 0;
+
+	if (dev->groups) {
+		for (i = 0; dev->groups[i]; i++) {
+			error = sysfs_create_group(&dev->kobj, dev->groups[i]);
+			if (error) {
+				while (--i >= 0)
+					sysfs_remove_group(&dev->kobj, dev->groups[i]);
+				goto out;
+			}
+		}
+	}
+out:
+	return error;
+}
+
+static void device_remove_groups(struct device *dev)
+{
+	int i;
+	if (dev->groups) {
+		for (i = 0; dev->groups[i]; i++) {
+			sysfs_remove_group(&dev->kobj, dev->groups[i]);
+		}
+	}
+}
+
 static ssize_t show_dev(struct device *dev, struct device_attribute *attr,
 			char *buf)
 {
@@ -350,6 +379,8 @@ int device_add(struct device *dev)
 		sysfs_create_link(&dev->parent->kobj, &dev->kobj, class_name);
 	}
 
+	if ((error = device_add_groups(dev)))
+		goto GroupError;
 	if ((error = device_pm_add(dev)))
 		goto PMError;
 	if ((error = bus_add_device(dev)))
@@ -376,6 +407,8 @@ int device_add(struct device *dev)
  BusError:
 	device_pm_remove(dev);
  PMError:
+	device_remove_groups(dev);
+ GroupError:
 	if (dev->devt_attr) {
 		device_remove_file(dev, dev->devt_attr);
 		kfree(dev->devt_attr);
@@ -470,6 +503,7 @@ void device_del(struct device * dev)
 		up(&dev->class->sem);
 	}
 	device_remove_file(dev, &dev->uevent_attr);
+	device_remove_groups(dev);
 
 	/* Notify the platform of the removal, in case they
 	 * need to do anything...
diff --git a/include/linux/device.h b/include/linux/device.h
index b3646462d6dc..994d3ebd53f4 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -344,6 +344,7 @@ struct device {
 	struct list_head	node;
 	struct class		*class;		/* optional*/
 	dev_t			devt;		/* dev_t, creates the sysfs "dev" */
+	struct attribute_group	**groups;	/* optional groups */
 
 	void	(*release)(struct device * dev);
 };
-- 
cgit v1.2.3


From 2620efef7029bb040430f50f0fc148f2d5e002ad Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Wed, 28 Jun 2006 16:19:58 -0700
Subject: Driver core: add ability for classes to handle devices properly

This adds two new callbacks to the class structure:
	int	(*dev_uevent)(struct device *dev, char **envp, int num_envp,
			char *buffer, int buffer_size);
	void	(*dev_release)(struct device *dev);

And one pointer:
	struct device_attribute		* dev_attrs;

which all corrispond with the same thing as the "normal" class devices
do, yet this is for when a struct device is bound to a class.

Someday soon, struct class_device will go away, and then the other
fields in this structure can be removed too.  But this is necessary in
order to get the transition to work properly.

Tested out on a network core patch that converted it to use struct
device instead of struct class_device.


Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/core.c    | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/device.h |  4 ++++
 2 files changed, 57 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 5c91d0d81e78..f1228f25efe0 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -94,6 +94,8 @@ static void device_release(struct kobject * kobj)
 
 	if (dev->release)
 		dev->release(dev);
+	else if (dev->class && dev->class->dev_release)
+		dev->class->dev_release(dev);
 	else {
 		printk(KERN_ERR "Device '%s' does not have a release() function, "
 			"it is broken and must be fixed.\n",
@@ -183,6 +185,15 @@ static int dev_uevent(struct kset *kset, struct kobject *kobj, char **envp,
 		}
 	}
 
+	if (dev->class && dev->class->dev_uevent) {
+		/* have the class specific function add its stuff */
+		retval = dev->class->dev_uevent(dev, envp, num_envp, buffer, buffer_size);
+			if (retval) {
+				pr_debug("%s - dev_uevent() returned %d\n",
+					 __FUNCTION__, retval);
+		}
+	}
+
 	return retval;
 }
 
@@ -228,6 +239,43 @@ static void device_remove_groups(struct device *dev)
 	}
 }
 
+static int device_add_attrs(struct device *dev)
+{
+	struct class *class = dev->class;
+	int error = 0;
+	int i;
+
+	if (!class)
+		return 0;
+
+	if (class->dev_attrs) {
+		for (i = 0; attr_name(class->dev_attrs[i]); i++) {
+			error = device_create_file(dev, &class->dev_attrs[i]);
+			if (error)
+				break;
+		}
+	}
+	if (error)
+		while (--i >= 0)
+			device_remove_file(dev, &class->dev_attrs[i]);
+	return error;
+}
+
+static void device_remove_attrs(struct device *dev)
+{
+	struct class *class = dev->class;
+	int i;
+
+	if (!class)
+		return;
+
+	if (class->dev_attrs) {
+		for (i = 0; attr_name(class->dev_attrs[i]); i++)
+			device_remove_file(dev, &class->dev_attrs[i]);
+	}
+}
+
+
 static ssize_t show_dev(struct device *dev, struct device_attribute *attr,
 			char *buf)
 {
@@ -382,6 +430,8 @@ int device_add(struct device *dev)
 		}
 	}
 
+	if ((error = device_add_attrs(dev)))
+		goto AttrsError;
 	if ((error = device_add_groups(dev)))
 		goto GroupError;
 	if ((error = device_pm_add(dev)))
@@ -412,6 +462,8 @@ int device_add(struct device *dev)
  PMError:
 	device_remove_groups(dev);
  GroupError:
+ 	device_remove_attrs(dev);
+ AttrsError:
 	if (dev->devt_attr) {
 		device_remove_file(dev, dev->devt_attr);
 		kfree(dev->devt_attr);
@@ -509,6 +561,7 @@ void device_del(struct device * dev)
 	}
 	device_remove_file(dev, &dev->uevent_attr);
 	device_remove_groups(dev);
+	device_remove_attrs(dev);
 
 	/* Notify the platform of the removal, in case they
 	 * need to do anything...
diff --git a/include/linux/device.h b/include/linux/device.h
index 994d3ebd53f4..3122bd25ce42 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -151,12 +151,16 @@ struct class {
 
 	struct class_attribute		* class_attrs;
 	struct class_device_attribute	* class_dev_attrs;
+	struct device_attribute		* dev_attrs;
 
 	int	(*uevent)(struct class_device *dev, char **envp,
 			   int num_envp, char *buffer, int buffer_size);
+	int	(*dev_uevent)(struct device *dev, char **envp, int num_envp,
+				char *buffer, int buffer_size);
 
 	void	(*release)(struct class_device *dev);
 	void	(*class_release)(struct class *class);
+	void	(*dev_release)(struct device *dev);
 
 	int	(*suspend)(struct device *, pm_message_t state);
 	int	(*resume)(struct device *);
-- 
cgit v1.2.3


From a2de48cace5d0993da6cfa28b276ae724dc3569b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 3 Jul 2006 14:31:12 -0700
Subject: Driver core: add device_rename function

The network layer needs this to convert to using struct device instead
of a struct class_device.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/core.c    | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/device.h |  1 +
 2 files changed, 56 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index f1228f25efe0..0db47561331e 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -736,3 +736,58 @@ void device_destroy(struct class *class, dev_t devt)
 		device_unregister(dev);
 }
 EXPORT_SYMBOL_GPL(device_destroy);
+
+/**
+ * device_rename - renames a device
+ * @dev: the pointer to the struct device to be renamed
+ * @new_name: the new name of the device
+ */
+int device_rename(struct device *dev, char *new_name)
+{
+	char *old_class_name = NULL;
+	char *new_class_name = NULL;
+	char *old_symlink_name = NULL;
+	int error;
+
+	dev = get_device(dev);
+	if (!dev)
+		return -EINVAL;
+
+	pr_debug("DEVICE: renaming '%s' to '%s'\n", dev->bus_id, new_name);
+
+	if ((dev->class) && (dev->parent))
+		old_class_name = make_class_name(dev->class->name, &dev->kobj);
+
+	if (dev->class) {
+		old_symlink_name = kmalloc(BUS_ID_SIZE, GFP_KERNEL);
+		if (!old_symlink_name)
+			return -ENOMEM;
+		strlcpy(old_symlink_name, dev->bus_id, BUS_ID_SIZE);
+	}
+
+	strlcpy(dev->bus_id, new_name, BUS_ID_SIZE);
+
+	error = kobject_rename(&dev->kobj, new_name);
+
+	if (old_class_name) {
+		new_class_name = make_class_name(dev->class->name, &dev->kobj);
+		if (new_class_name) {
+			sysfs_create_link(&dev->parent->kobj, &dev->kobj,
+					  new_class_name);
+			sysfs_remove_link(&dev->parent->kobj, old_class_name);
+		}
+	}
+	if (dev->class) {
+		sysfs_remove_link(&dev->class->subsys.kset.kobj,
+				  old_symlink_name);
+		sysfs_create_link(&dev->class->subsys.kset.kobj, &dev->kobj,
+				  dev->bus_id);
+	}
+	put_device(dev);
+
+	kfree(old_class_name);
+	kfree(new_class_name);
+	kfree(old_symlink_name);
+
+	return error;
+}
diff --git a/include/linux/device.h b/include/linux/device.h
index 3122bd25ce42..3400e09bf458 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -380,6 +380,7 @@ extern int device_add(struct device * dev);
 extern void device_del(struct device * dev);
 extern int device_for_each_child(struct device *, void *,
 		     int (*fn)(struct device *, void *));
+extern int device_rename(struct device *dev, char *new_name);
 
 /*
  * Manual binding of a device to driver. See drivers/base/bus.c
-- 
cgit v1.2.3


From c205ef4880273d2de4ee5388d4e52227ff688cc4 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 7 Aug 2006 22:19:37 -0700
Subject: Driver core: create devices/virtual/ tree

This change creates a devices/virtual/CLASS_NAME tree for struct devices
that belong to a class, yet do not have a "real" struct device for a
parent.  It automatically creates the directories on the fly as needed.


Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/class.c   | 17 +++++++++++++++++
 drivers/base/core.c    |  7 +++++++
 include/linux/device.h |  5 ++++-
 3 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/base/class.c b/drivers/base/class.c
index e078bc21d52e..cbdf47c0c60d 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -19,6 +19,8 @@
 #include <linux/slab.h>
 #include "base.h"
 
+extern struct subsystem devices_subsys;
+
 #define to_class_attr(_attr) container_of(_attr, struct class_attribute, attr)
 #define to_class(obj) container_of(obj, struct class, subsys.kset.kobj)
 
@@ -878,7 +880,22 @@ void class_interface_unregister(struct class_interface *class_intf)
 	class_put(parent);
 }
 
+int virtual_device_parent(struct device *dev)
+{
+	if (!dev->class)
+		return -ENODEV;
+
+	if (!dev->class->virtual_dir) {
+		static struct kobject *virtual_dir = NULL;
 
+		if (!virtual_dir)
+			virtual_dir = kobject_add_dir(&devices_subsys.kset.kobj, "virtual");
+		dev->class->virtual_dir = kobject_add_dir(virtual_dir, dev->class->name);
+	}
+
+	dev->kobj.parent = dev->class->virtual_dir;
+	return 0;
+}
 
 int __init classes_init(void)
 {
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 0db47561331e..e21a65fc043e 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -378,6 +378,13 @@ int device_add(struct device *dev)
 	if (!dev || !strlen(dev->bus_id))
 		goto Error;
 
+	/* if this is a class device, and has no parent, create one */
+	if ((dev->class) && (dev->parent == NULL)) {
+		error = virtual_device_parent(dev);
+		if (error)
+			goto Error;
+	}
+
 	parent = get_device(dev->parent);
 
 	pr_debug("DEV: registering device: ID = '%s'\n", dev->bus_id);
diff --git a/include/linux/device.h b/include/linux/device.h
index 3400e09bf458..bbb0d6b5d232 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -149,6 +149,8 @@ struct class {
 	struct list_head	interfaces;
 	struct semaphore	sem;	/* locks both the children and interfaces lists */
 
+	struct kobject		*virtual_dir;
+
 	struct class_attribute		* class_attrs;
 	struct class_device_attribute	* class_dev_attrs;
 	struct device_attribute		* dev_attrs;
@@ -291,7 +293,6 @@ extern struct class_device *class_device_create(struct class *cls,
 					__attribute__((format(printf,5,6)));
 extern void class_device_destroy(struct class *cls, dev_t devt);
 
-
 /* interface for exporting device attributes */
 struct device_attribute {
 	struct attribute	attr;
@@ -400,6 +401,8 @@ extern struct device *device_create(struct class *cls, struct device *parent,
 				    __attribute__((format(printf,4,5)));
 extern void device_destroy(struct class *cls, dev_t devt);
 
+extern int virtual_device_parent(struct device *dev);
+
 /*
  * Platform "fixup" functions - allow the platform to have their say
  * about devices and actions that the general device layer doesn't
-- 
cgit v1.2.3


From c47ed219ba81632595e9f02e27318151fec16c9e Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Wed, 13 Sep 2006 15:34:05 +0200
Subject: Class: add support for class interfaces for devices

When moving class_device usage over to device, we need to handle
class_interfaces properly with devices.  This patch adds that support.


Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/class.c   | 10 ++++++++++
 drivers/base/core.c    | 14 +++++++++++++-
 include/linux/device.h |  2 ++
 3 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/base/class.c b/drivers/base/class.c
index cbdf47c0c60d..b06b0e2b9c62 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -842,6 +842,7 @@ int class_interface_register(struct class_interface *class_intf)
 {
 	struct class *parent;
 	struct class_device *class_dev;
+	struct device *dev;
 
 	if (!class_intf || !class_intf->class)
 		return -ENODEV;
@@ -856,6 +857,10 @@ int class_interface_register(struct class_interface *class_intf)
 		list_for_each_entry(class_dev, &parent->children, node)
 			class_intf->add(class_dev, class_intf);
 	}
+	if (class_intf->add_dev) {
+		list_for_each_entry(dev, &parent->devices, node)
+			class_intf->add_dev(dev, class_intf);
+	}
 	up(&parent->sem);
 
 	return 0;
@@ -865,6 +870,7 @@ void class_interface_unregister(struct class_interface *class_intf)
 {
 	struct class * parent = class_intf->class;
 	struct class_device *class_dev;
+	struct device *dev;
 
 	if (!parent)
 		return;
@@ -875,6 +881,10 @@ void class_interface_unregister(struct class_interface *class_intf)
 		list_for_each_entry(class_dev, &parent->children, node)
 			class_intf->remove(class_dev, class_intf);
 	}
+	if (class_intf->remove_dev) {
+		list_for_each_entry(dev, &parent->devices, node)
+			class_intf->remove_dev(dev, class_intf);
+	}
 	up(&parent->sem);
 
 	class_put(parent);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index e21a65fc043e..1d3d3582fcca 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -372,6 +372,7 @@ int device_add(struct device *dev)
 {
 	struct device *parent = NULL;
 	char *class_name = NULL;
+	struct class_interface *class_intf;
 	int error = -EINVAL;
 
 	dev = get_device(dev);
@@ -451,9 +452,14 @@ int device_add(struct device *dev)
 		klist_add_tail(&dev->knode_parent, &parent->klist_children);
 
 	if (dev->class) {
-		/* tie the class to the device */
 		down(&dev->class->sem);
+		/* tie the class to the device */
 		list_add_tail(&dev->node, &dev->class->devices);
+
+		/* notify any interfaces that the device is here */
+		list_for_each_entry(class_intf, &dev->class->interfaces, node)
+			if (class_intf->add_dev)
+				class_intf->add_dev(dev, class_intf);
 		up(&dev->class->sem);
 	}
 
@@ -548,6 +554,7 @@ void device_del(struct device * dev)
 {
 	struct device * parent = dev->parent;
 	char *class_name = NULL;
+	struct class_interface *class_intf;
 
 	if (parent)
 		klist_del(&dev->knode_parent);
@@ -563,6 +570,11 @@ void device_del(struct device * dev)
 		}
 		kfree(class_name);
 		down(&dev->class->sem);
+		/* notify any interfaces that the device is now gone */
+		list_for_each_entry(class_intf, &dev->class->interfaces, node)
+			if (class_intf->remove_dev)
+				class_intf->remove_dev(dev, class_intf);
+		/* remove the device from the class list */
 		list_del_init(&dev->node);
 		up(&dev->class->sem);
 	}
diff --git a/include/linux/device.h b/include/linux/device.h
index bbb0d6b5d232..e0fae0e76fa9 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -278,6 +278,8 @@ struct class_interface {
 
 	int (*add)	(struct class_device *, struct class_interface *);
 	void (*remove)	(struct class_device *, struct class_interface *);
+	int (*add_dev)		(struct device *, struct class_interface *);
+	void (*remove_dev)	(struct device *, struct class_interface *);
 };
 
 extern int class_interface_register(struct class_interface *);
-- 
cgit v1.2.3


From 2589f1887b0bf9f08ec3d7f3c5705ccb7c628076 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Tue, 19 Sep 2006 09:39:19 -0700
Subject: Driver core: add ability for devices to create and remove bin files

Makes it easier for devices to create and remove binary attribute files
so they don't have to call directly into sysfs.  This is needed to help
with the conversion from struct class_device to struct device.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/core.c    | 26 ++++++++++++++++++++++++++
 include/linux/device.h |  4 ++++
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 1d3d3582fcca..bc9f35c81691 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -319,6 +319,32 @@ void device_remove_file(struct device * dev, struct device_attribute * attr)
 	}
 }
 
+/**
+ * device_create_bin_file - create sysfs binary attribute file for device.
+ * @dev: device.
+ * @attr: device binary attribute descriptor.
+ */
+int device_create_bin_file(struct device *dev, struct bin_attribute *attr)
+{
+	int error = -EINVAL;
+	if (dev)
+		error = sysfs_create_bin_file(&dev->kobj, attr);
+	return error;
+}
+EXPORT_SYMBOL_GPL(device_create_bin_file);
+
+/**
+ * device_remove_bin_file - remove sysfs binary attribute file
+ * @dev: device.
+ * @attr: device binary attribute descriptor.
+ */
+void device_remove_bin_file(struct device *dev, struct bin_attribute *attr)
+{
+	if (dev)
+		sysfs_remove_bin_file(&dev->kobj, attr);
+}
+EXPORT_SYMBOL_GPL(device_remove_bin_file);
+
 static void klist_children_get(struct klist_node *n)
 {
 	struct device *dev = container_of(n, struct device, knode_parent);
diff --git a/include/linux/device.h b/include/linux/device.h
index e0fae0e76fa9..7d447d7271c5 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -309,6 +309,10 @@ struct device_attribute dev_attr_##_name = __ATTR(_name,_mode,_show,_store)
 
 extern int device_create_file(struct device *device, struct device_attribute * entry);
 extern void device_remove_file(struct device * dev, struct device_attribute * attr);
+extern int __must_check device_create_bin_file(struct device *dev,
+					       struct bin_attribute *attr);
+extern void device_remove_bin_file(struct device *dev,
+				   struct bin_attribute *attr);
 struct device {
 	struct klist		klist_children;
 	struct klist_node	knode_parent;		/* node in sibling list */
-- 
cgit v1.2.3


From 995982ca79d9262869513948ec7c540f32035491 Mon Sep 17 00:00:00 2001
From: "Randy.Dunlap" <rdunlap@xenotime.net>
Date: Mon, 10 Jul 2006 23:05:25 -0700
Subject: sysfs_remove_bin_file: no return value, dump_stack on error

Make sysfs_remove_bin_file() void.  If it detects an error,
printk the file name and call dump_stack().

sysfs_hash_and_remove() now returns an error code indicating
its success or failure so that sysfs_remove_bin_file() can
know success/failure.

Convert the only driver that checked the return value of
sysfs_remove_bin_file().

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/pci/hotplug/acpiphp_ibm.c |  4 +---
 fs/sysfs/bin.c                    | 13 ++++++++-----
 fs/sysfs/inode.c                  | 11 ++++++++---
 fs/sysfs/sysfs.h                  |  2 +-
 include/linux/sysfs.h             |  2 +-
 5 files changed, 19 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c
index 317457dd4014..d0a07d9ab30c 100644
--- a/drivers/pci/hotplug/acpiphp_ibm.c
+++ b/drivers/pci/hotplug/acpiphp_ibm.c
@@ -487,9 +487,7 @@ static void __exit ibm_acpiphp_exit(void)
 	if (ACPI_FAILURE(status))
 		err("%s: Notification handler removal failed\n", __FUNCTION__);
 	/* remove the /sys entries */
-	if (sysfs_remove_bin_file(sysdir, &ibm_apci_table_attr))
-		err("%s: removal of sysfs file apci_table failed\n",
-				__FUNCTION__);
+	sysfs_remove_bin_file(sysdir, &ibm_apci_table_attr);
 }
 
 module_init(ibm_acpiphp_init);
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index c16a93c353c0..98022e41cda1 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -10,6 +10,7 @@
 
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/kernel.h>
 #include <linux/kobject.h>
 #include <linux/module.h>
 #include <linux/slab.h>
@@ -176,7 +177,6 @@ const struct file_operations bin_fops = {
  *	sysfs_create_bin_file - create binary file for object.
  *	@kobj:	object.
  *	@attr:	attribute descriptor.
- *
  */
 
 int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
@@ -191,13 +191,16 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
  *	sysfs_remove_bin_file - remove binary file for object.
  *	@kobj:	object.
  *	@attr:	attribute descriptor.
- *
  */
 
-int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
+void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
 {
-	sysfs_hash_and_remove(kobj->dentry,attr->attr.name);
-	return 0;
+	if (sysfs_hash_and_remove(kobj->dentry, attr->attr.name) < 0) {
+		printk(KERN_ERR "%s: "
+			"bad dentry or inode or no such file: \"%s\"\n",
+			__FUNCTION__, attr->attr.name);
+		dump_stack();
+	}
 }
 
 EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 9889e54e1f13..fd7cd5f843d2 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -12,6 +12,7 @@
 #include <linux/namei.h>
 #include <linux/backing-dev.h>
 #include <linux/capability.h>
+#include <linux/errno.h>
 #include "sysfs.h"
 
 extern struct super_block * sysfs_sb;
@@ -234,17 +235,18 @@ void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent)
 	}
 }
 
-void sysfs_hash_and_remove(struct dentry * dir, const char * name)
+int sysfs_hash_and_remove(struct dentry * dir, const char * name)
 {
 	struct sysfs_dirent * sd;
 	struct sysfs_dirent * parent_sd;
+	int found = 0;
 
 	if (!dir)
-		return;
+		return -ENOENT;
 
 	if (dir->d_inode == NULL)
 		/* no inode means this hasn't been made visible yet */
-		return;
+		return -ENOENT;
 
 	parent_sd = dir->d_fsdata;
 	mutex_lock(&dir->d_inode->i_mutex);
@@ -255,8 +257,11 @@ void sysfs_hash_and_remove(struct dentry * dir, const char * name)
 			list_del_init(&sd->s_sibling);
 			sysfs_drop_dentry(sd, dir);
 			sysfs_put(sd);
+			found = 1;
 			break;
 		}
 	}
 	mutex_unlock(&dir->d_inode->i_mutex);
+
+	return found ? 0 : -ENOENT;
 }
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 3651ffb5ec09..6f3d6bd52887 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -10,7 +10,7 @@ extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *,
 				umode_t, int);
 
 extern int sysfs_add_file(struct dentry *, const struct attribute *, int);
-extern void sysfs_hash_and_remove(struct dentry * dir, const char * name);
+extern int sysfs_hash_and_remove(struct dentry * dir, const char * name);
 extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name);
 
 extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **);
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 1ea5d3cda6ae..95f6db54d8d2 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -114,7 +114,7 @@ extern void
 sysfs_remove_link(struct kobject *, const char * name);
 
 int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr);
-int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr);
+void sysfs_remove_bin_file(struct kobject *kobj, struct bin_attribute *attr);
 
 int sysfs_create_group(struct kobject *, const struct attribute_group *);
 void sysfs_remove_group(struct kobject *, const struct attribute_group *);
-- 
cgit v1.2.3


From 4a7fb6363f2d1a6c09a10253937f672f3c7929e1 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 14 Aug 2006 22:43:17 -0700
Subject: add __must_check to device management code

We're getting a lot of crashes in the sysfs/kobject/device/bus/class code and
they're very hard to diagnose.

I'm suspecting that in some cases this is because drivers aren't checking
return values and aren't handling errors correctly.  So the code blithely
blunders on and crashes later in very obscure ways.

There's just no reason to ignore errors which can and do occur.  So the patch
sprinkles __must_check all over these APIs.

Causes 1,513 new warnings.  Heh.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h  | 52 ++++++++++++++++++++++++++-----------------------
 include/linux/kobject.h | 16 ++++++++-------
 include/linux/pci.h     | 34 +++++++++++++++++---------------
 include/linux/sysfs.h   | 19 ++++++++++--------
 4 files changed, 66 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 7d447d7271c5..ad4db72f5733 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -15,6 +15,7 @@
 #include <linux/kobject.h>
 #include <linux/klist.h>
 #include <linux/list.h>
+#include <linux/compiler.h>
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/pm.h>
@@ -58,7 +59,7 @@ struct bus_type {
 	int (*resume)(struct device * dev);
 };
 
-extern int bus_register(struct bus_type * bus);
+extern int __must_check bus_register(struct bus_type * bus);
 extern void bus_unregister(struct bus_type * bus);
 
 extern void bus_rescan_devices(struct bus_type * bus);
@@ -70,9 +71,9 @@ int bus_for_each_dev(struct bus_type * bus, struct device * start, void * data,
 struct device * bus_find_device(struct bus_type *bus, struct device *start,
 				void *data, int (*match)(struct device *, void *));
 
-int bus_for_each_drv(struct bus_type * bus, struct device_driver * start, 
-		     void * data, int (*fn)(struct device_driver *, void *));
-
+int __must_check bus_for_each_drv(struct bus_type *bus,
+		struct device_driver *start, void *data,
+		int (*fn)(struct device_driver *, void *));
 
 /* driverfs interface for exporting bus attributes */
 
@@ -85,7 +86,8 @@ struct bus_attribute {
 #define BUS_ATTR(_name,_mode,_show,_store)	\
 struct bus_attribute bus_attr_##_name = __ATTR(_name,_mode,_show,_store)
 
-extern int bus_create_file(struct bus_type *, struct bus_attribute *);
+extern int __must_check bus_create_file(struct bus_type *,
+					struct bus_attribute *);
 extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
 
 struct device_driver {
@@ -107,14 +109,13 @@ struct device_driver {
 };
 
 
-extern int driver_register(struct device_driver * drv);
+extern int __must_check driver_register(struct device_driver * drv);
 extern void driver_unregister(struct device_driver * drv);
 
 extern struct device_driver * get_driver(struct device_driver * drv);
 extern void put_driver(struct device_driver * drv);
 extern struct device_driver *driver_find(const char *name, struct bus_type *bus);
 
-
 /* driverfs interface for exporting driver attributes */
 
 struct driver_attribute {
@@ -126,16 +127,17 @@ struct driver_attribute {
 #define DRIVER_ATTR(_name,_mode,_show,_store)	\
 struct driver_attribute driver_attr_##_name = __ATTR(_name,_mode,_show,_store)
 
-extern int driver_create_file(struct device_driver *, struct driver_attribute *);
+extern int __must_check driver_create_file(struct device_driver *,
+					struct driver_attribute *);
 extern void driver_remove_file(struct device_driver *, struct driver_attribute *);
 
-extern int driver_for_each_device(struct device_driver * drv, struct device * start,
-				  void * data, int (*fn)(struct device *, void *));
+extern int __must_check driver_for_each_device(struct device_driver * drv,
+		struct device *start, void *data,
+		int (*fn)(struct device *, void *));
 struct device * driver_find_device(struct device_driver *drv,
 				   struct device *start, void *data,
 				   int (*match)(struct device *, void *));
 
-
 /*
  * device classes
  */
@@ -168,7 +170,7 @@ struct class {
 	int	(*resume)(struct device *);
 };
 
-extern int class_register(struct class *);
+extern int __must_check class_register(struct class *);
 extern void class_unregister(struct class *);
 
 
@@ -181,7 +183,8 @@ struct class_attribute {
 #define CLASS_ATTR(_name,_mode,_show,_store)			\
 struct class_attribute class_attr_##_name = __ATTR(_name,_mode,_show,_store) 
 
-extern int class_create_file(struct class *, const struct class_attribute *);
+extern int __must_check class_create_file(struct class *,
+					const struct class_attribute *);
 extern void class_remove_file(struct class *, const struct class_attribute *);
 
 struct class_device_attribute {
@@ -194,7 +197,7 @@ struct class_device_attribute {
 struct class_device_attribute class_device_attr_##_name = 	\
 	__ATTR(_name,_mode,_show,_store)
 
-extern int class_device_create_file(struct class_device *,
+extern int __must_check class_device_create_file(struct class_device *,
 				    const struct class_device_attribute *);
 
 /**
@@ -254,10 +257,10 @@ class_set_devdata (struct class_device *dev, void *data)
 }
 
 
-extern int class_device_register(struct class_device *);
+extern int __must_check class_device_register(struct class_device *);
 extern void class_device_unregister(struct class_device *);
 extern void class_device_initialize(struct class_device *);
-extern int class_device_add(struct class_device *);
+extern int __must_check class_device_add(struct class_device *);
 extern void class_device_del(struct class_device *);
 
 extern int class_device_rename(struct class_device *, char *);
@@ -267,7 +270,7 @@ extern void class_device_put(struct class_device *);
 
 extern void class_device_remove_file(struct class_device *, 
 				     const struct class_device_attribute *);
-extern int class_device_create_bin_file(struct class_device *,
+extern int __must_check class_device_create_bin_file(struct class_device *,
 					struct bin_attribute *);
 extern void class_device_remove_bin_file(struct class_device *,
 					 struct bin_attribute *);
@@ -282,7 +285,7 @@ struct class_interface {
 	void (*remove_dev)	(struct device *, struct class_interface *);
 };
 
-extern int class_interface_register(struct class_interface *);
+extern int __must_check class_interface_register(struct class_interface *);
 extern void class_interface_unregister(struct class_interface *);
 
 extern struct class *class_create(struct module *owner, const char *name);
@@ -307,7 +310,8 @@ struct device_attribute {
 #define DEVICE_ATTR(_name,_mode,_show,_store) \
 struct device_attribute dev_attr_##_name = __ATTR(_name,_mode,_show,_store)
 
-extern int device_create_file(struct device *device, struct device_attribute * entry);
+extern int __must_check device_create_file(struct device *device,
+					struct device_attribute * entry);
 extern void device_remove_file(struct device * dev, struct device_attribute * attr);
 extern int __must_check device_create_bin_file(struct device *dev,
 					       struct bin_attribute *attr);
@@ -380,12 +384,12 @@ static inline int device_is_registered(struct device *dev)
 /*
  * High level routines for use by the bus drivers
  */
-extern int device_register(struct device * dev);
+extern int __must_check device_register(struct device * dev);
 extern void device_unregister(struct device * dev);
 extern void device_initialize(struct device * dev);
-extern int device_add(struct device * dev);
+extern int __must_check device_add(struct device * dev);
 extern void device_del(struct device * dev);
-extern int device_for_each_child(struct device *, void *,
+extern int __must_check device_for_each_child(struct device *, void *,
 		     int (*fn)(struct device *, void *));
 extern int device_rename(struct device *dev, char *new_name);
 
@@ -395,7 +399,7 @@ extern int device_rename(struct device *dev, char *new_name);
  */
 extern void device_bind_driver(struct device * dev);
 extern void device_release_driver(struct device * dev);
-extern int  device_attach(struct device * dev);
+extern int  __must_check device_attach(struct device * dev);
 extern void driver_attach(struct device_driver * drv);
 extern void device_reprobe(struct device *dev);
 
@@ -433,7 +437,7 @@ extern void device_shutdown(void);
 
 
 /* drivers/base/firmware.c */
-extern int firmware_register(struct subsystem *);
+extern int __must_check firmware_register(struct subsystem *);
 extern void firmware_unregister(struct subsystem *);
 
 /* debugging and troubleshooting/diagnostic helpers. */
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 2d229327959e..bcd9cd173c2c 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -20,6 +20,7 @@
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/sysfs.h>
+#include <linux/compiler.h>
 #include <linux/spinlock.h>
 #include <linux/rwsem.h>
 #include <linux/kref.h>
@@ -71,12 +72,12 @@ static inline const char * kobject_name(const struct kobject * kobj)
 extern void kobject_init(struct kobject *);
 extern void kobject_cleanup(struct kobject *);
 
-extern int kobject_add(struct kobject *);
+extern int __must_check kobject_add(struct kobject *);
 extern void kobject_del(struct kobject *);
 
-extern int kobject_rename(struct kobject *, const char *new_name);
+extern int __must_check kobject_rename(struct kobject *, const char *new_name);
 
-extern int kobject_register(struct kobject *);
+extern int __must_check kobject_register(struct kobject *);
 extern void kobject_unregister(struct kobject *);
 
 extern struct kobject * kobject_get(struct kobject *);
@@ -128,8 +129,8 @@ struct kset {
 
 
 extern void kset_init(struct kset * k);
-extern int kset_add(struct kset * k);
-extern int kset_register(struct kset * k);
+extern int __must_check kset_add(struct kset * k);
+extern int __must_check kset_register(struct kset * k);
 extern void kset_unregister(struct kset * k);
 
 static inline struct kset * to_kset(struct kobject * kobj)
@@ -239,7 +240,7 @@ extern struct subsystem hypervisor_subsys;
 	(obj)->subsys.kset.kobj.kset = &(_subsys).kset
 
 extern void subsystem_init(struct subsystem *);
-extern int subsystem_register(struct subsystem *);
+extern int __must_check subsystem_register(struct subsystem *);
 extern void subsystem_unregister(struct subsystem *);
 
 static inline struct subsystem * subsys_get(struct subsystem * s)
@@ -258,7 +259,8 @@ struct subsys_attribute {
 	ssize_t (*store)(struct subsystem *, const char *, size_t); 
 };
 
-extern int subsys_create_file(struct subsystem * , struct subsys_attribute *);
+extern int __must_check subsys_create_file(struct subsystem * ,
+					struct subsys_attribute *);
 
 #if defined(CONFIG_HOTPLUG)
 void kobject_uevent(struct kobject *kobj, enum kobject_action action);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9514bbfe96e2..3ec72551ac31 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -49,6 +49,7 @@
 #include <linux/types.h>
 #include <linux/ioport.h>
 #include <linux/list.h>
+#include <linux/compiler.h>
 #include <linux/errno.h>
 #include <linux/device.h>
 
@@ -403,7 +404,7 @@ extern struct list_head pci_root_buses;	/* list of all known PCI buses */
 extern struct list_head pci_devices;	/* list of all devices */
 
 void pcibios_fixup_bus(struct pci_bus *);
-int pcibios_enable_device(struct pci_dev *, int mask);
+int __must_check pcibios_enable_device(struct pci_dev *, int mask);
 char *pcibios_setup (char *str);
 
 /* Used only when drivers/pci/setup.c is used */
@@ -490,19 +491,19 @@ static inline int pci_write_config_dword(struct pci_dev *dev, int where, u32 val
 	return pci_bus_write_config_dword (dev->bus, dev->devfn, where, val);
 }
 
-int pci_enable_device(struct pci_dev *dev);
-int pci_enable_device_bars(struct pci_dev *dev, int mask);
+int __must_check pci_enable_device(struct pci_dev *dev);
+int __must_check pci_enable_device_bars(struct pci_dev *dev, int mask);
 void pci_disable_device(struct pci_dev *dev);
 void pci_set_master(struct pci_dev *dev);
 #define HAVE_PCI_SET_MWI
-int pci_set_mwi(struct pci_dev *dev);
+int __must_check pci_set_mwi(struct pci_dev *dev);
 void pci_clear_mwi(struct pci_dev *dev);
 void pci_intx(struct pci_dev *dev, int enable);
 int pci_set_dma_mask(struct pci_dev *dev, u64 mask);
 int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask);
 void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno);
-int pci_assign_resource(struct pci_dev *dev, int i);
-int pci_assign_resource_fixed(struct pci_dev *dev, int i);
+int __must_check pci_assign_resource(struct pci_dev *dev, int i);
+int __must_check pci_assign_resource_fixed(struct pci_dev *dev, int i);
 void pci_restore_bars(struct pci_dev *dev);
 
 /* ROM control related routines */
@@ -528,23 +529,24 @@ void pdev_sort_resources(struct pci_dev *, struct resource_list *);
 void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *),
 		    int (*)(struct pci_dev *, u8, u8));
 #define HAVE_PCI_REQ_REGIONS	2
-int pci_request_regions(struct pci_dev *, const char *);
+int __must_check pci_request_regions(struct pci_dev *, const char *);
 void pci_release_regions(struct pci_dev *);
-int pci_request_region(struct pci_dev *, int, const char *);
+int __must_check pci_request_region(struct pci_dev *, int, const char *);
 void pci_release_region(struct pci_dev *, int);
 
 /* drivers/pci/bus.c */
-int pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res,
-			   resource_size_t size, resource_size_t align,
-			   resource_size_t min, unsigned int type_mask,
-			   void (*alignf)(void *, struct resource *,
-					  resource_size_t, resource_size_t),
-			   void *alignf_data);
+int __must_check pci_bus_alloc_resource(struct pci_bus *bus,
+			struct resource *res, resource_size_t size,
+			resource_size_t align, resource_size_t min,
+			unsigned int type_mask,
+			void (*alignf)(void *, struct resource *,
+				resource_size_t, resource_size_t),
+			void *alignf_data);
 void pci_enable_bridges(struct pci_bus *bus);
 
 /* Proper probing supporting hot-pluggable devices */
-int __pci_register_driver(struct pci_driver *, struct module *);
-static inline int pci_register_driver(struct pci_driver *driver)
+int __must_check __pci_register_driver(struct pci_driver *, struct module *);
+static inline int __must_check pci_register_driver(struct pci_driver *driver)
 {
 	return __pci_register_driver(driver, THIS_MODULE);
 }
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 95f6db54d8d2..02ad790921fd 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -10,6 +10,7 @@
 #ifndef _SYSFS_H_
 #define _SYSFS_H_
 
+#include <linux/compiler.h>
 #include <asm/atomic.h>
 
 struct kobject;
@@ -86,37 +87,39 @@ struct sysfs_dirent {
 
 #ifdef CONFIG_SYSFS
 
-extern int
+extern int __must_check
 sysfs_create_dir(struct kobject *);
 
 extern void
 sysfs_remove_dir(struct kobject *);
 
-extern int
+extern int __must_check
 sysfs_rename_dir(struct kobject *, const char *new_name);
 
-extern int
+extern int __must_check
 sysfs_create_file(struct kobject *, const struct attribute *);
 
-extern int
+extern int __must_check
 sysfs_update_file(struct kobject *, const struct attribute *);
 
-extern int
+extern int __must_check
 sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode);
 
 extern void
 sysfs_remove_file(struct kobject *, const struct attribute *);
 
-extern int
+extern int __must_check
 sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name);
 
 extern void
 sysfs_remove_link(struct kobject *, const char * name);
 
-int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr);
+int __must_check sysfs_create_bin_file(struct kobject *kobj,
+					struct bin_attribute *attr);
 void sysfs_remove_bin_file(struct kobject *kobj, struct bin_attribute *attr);
 
-int sysfs_create_group(struct kobject *, const struct attribute_group *);
+int __must_check sysfs_create_group(struct kobject *,
+					const struct attribute_group *);
 void sysfs_remove_group(struct kobject *, const struct attribute_group *);
 void sysfs_notify(struct kobject * k, char *dir, char *attr);
 
-- 
cgit v1.2.3


From cebc04ba9aeb3a646cc746300421fc0e5aa4f253 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 14 Aug 2006 22:43:18 -0700
Subject: add CONFIG_ENABLE_MUST_CHECK

Those 1500 warnings can be a bit of a pain.  Add a config option to shut them
up.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/compiler.h | 5 +++++
 lib/Kconfig.debug        | 7 +++++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 9b4f11094937..060b96112ec6 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -99,6 +99,11 @@ extern void __chk_io_ptr(void __iomem *);
 #define __must_check
 #endif
 
+#ifndef CONFIG_ENABLE_MUST_CHECK
+#undef __must_check
+#define __must_check
+#endif
+
 /*
  * Allow us to avoid 'defined but not used' warnings on functions and data,
  * as well as force them to be emitted to the assembly file.
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 554ee688a9f8..5c114c3f03c5 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -8,6 +8,13 @@ config PRINTK_TIME
 	  operations.  This is useful for identifying long delays
 	  in kernel startup.
 
+config ENABLE_MUST_CHECK
+	bool "Enable __must_check logic"
+	default y
+	help
+	  Enable the __must_check logic in the kernel build.  Disable this to
+	  suppress the "warning: ignoring return value of 'foo', declared with
+	  attribute warn_unused_result" messages.
 
 config MAGIC_SYSRQ
 	bool "Magic SysRq key"
-- 
cgit v1.2.3


From f86db396ff455ed586751d21816a1ebd431264e5 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 14 Aug 2006 22:43:20 -0700
Subject: drivers/base: check errors

Add lots of return-value checking.

<pcornelia.huck@de.ibm.com>: fix bus_rescan_devices()]
Cc: "Randy.Dunlap" <rdunlap@xenotime.net>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/base.h    |   2 +-
 drivers/base/bus.c     | 107 +++++++++++++++++++++++++++++++++----------------
 drivers/base/dd.c      |  37 ++++++++++++-----
 include/linux/device.h |   8 ++--
 4 files changed, 104 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/base.h b/drivers/base/base.h
index c3b8dc98b8a7..d26644a59537 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -16,7 +16,7 @@ extern int cpu_dev_init(void);
 extern int attribute_container_init(void);
 
 extern int bus_add_device(struct device * dev);
-extern void bus_attach_device(struct device * dev);
+extern int bus_attach_device(struct device * dev);
 extern void bus_remove_device(struct device * dev);
 extern struct bus_type *get_bus(struct bus_type * bus);
 extern void put_bus(struct bus_type * bus);
diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index 4d22a1d10a1c..aa685a20b649 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -371,12 +371,20 @@ int bus_add_device(struct device * dev)
 	if (bus) {
 		pr_debug("bus %s: add device %s\n", bus->name, dev->bus_id);
 		error = device_add_attrs(bus, dev);
-		if (!error) {
-			sysfs_create_link(&bus->devices.kobj, &dev->kobj, dev->bus_id);
-			sysfs_create_link(&dev->kobj, &dev->bus->subsys.kset.kobj, "subsystem");
-			sysfs_create_link(&dev->kobj, &dev->bus->subsys.kset.kobj, "bus");
-		}
+		if (error)
+			goto out;
+		error = sysfs_create_link(&bus->devices.kobj,
+						&dev->kobj, dev->bus_id);
+		if (error)
+			goto out;
+		error = sysfs_create_link(&dev->kobj,
+				&dev->bus->subsys.kset.kobj, "subsystem");
+		if (error)
+			goto out;
+		error = sysfs_create_link(&dev->kobj,
+				&dev->bus->subsys.kset.kobj, "bus");
 	}
+out:
 	return error;
 }
 
@@ -386,14 +394,19 @@ int bus_add_device(struct device * dev)
  *
  *	- Try to attach to driver.
  */
-void bus_attach_device(struct device * dev)
+int bus_attach_device(struct device * dev)
 {
-	struct bus_type * bus = dev->bus;
+	struct bus_type *bus = dev->bus;
+	int ret = 0;
 
 	if (bus) {
-		device_attach(dev);
-		klist_add_tail(&dev->knode_bus, &bus->klist_devices);
+		ret = device_attach(dev);
+		if (ret >= 0) {
+			klist_add_tail(&dev->knode_bus, &bus->klist_devices);
+			ret = 0;
+		}
 	}
+	return ret;
 }
 
 /**
@@ -455,10 +468,17 @@ static void driver_remove_attrs(struct bus_type * bus, struct device_driver * dr
  * Thanks to drivers making their tables __devinit, we can't allow manual
  * bind and unbind from userspace unless CONFIG_HOTPLUG is enabled.
  */
-static void add_bind_files(struct device_driver *drv)
+static int __must_check add_bind_files(struct device_driver *drv)
 {
-	driver_create_file(drv, &driver_attr_unbind);
-	driver_create_file(drv, &driver_attr_bind);
+	int ret;
+
+	ret = driver_create_file(drv, &driver_attr_unbind);
+	if (ret == 0) {
+		ret = driver_create_file(drv, &driver_attr_bind);
+		if (ret)
+			driver_remove_file(drv, &driver_attr_unbind);
+	}
+	return ret;
 }
 
 static void remove_bind_files(struct device_driver *drv)
@@ -476,7 +496,7 @@ static inline void remove_bind_files(struct device_driver *drv) {}
  *	@drv:	driver.
  *
  */
-int bus_add_driver(struct device_driver * drv)
+int bus_add_driver(struct device_driver *drv)
 {
 	struct bus_type * bus = get_bus(drv->bus);
 	int error = 0;
@@ -484,27 +504,39 @@ int bus_add_driver(struct device_driver * drv)
 	if (bus) {
 		pr_debug("bus %s: add driver %s\n", bus->name, drv->name);
 		error = kobject_set_name(&drv->kobj, "%s", drv->name);
-		if (error) {
-			put_bus(bus);
-			return error;
-		}
+		if (error)
+			goto out_put_bus;
 		drv->kobj.kset = &bus->drivers;
-		if ((error = kobject_register(&drv->kobj))) {
-			put_bus(bus);
-			return error;
-		}
+		if ((error = kobject_register(&drv->kobj)))
+			goto out_put_bus;
 
-		driver_attach(drv);
+		error = driver_attach(drv);
+		if (error)
+			goto out_unregister;
 		klist_add_tail(&drv->knode_bus, &bus->klist_drivers);
 		module_add_driver(drv->owner, drv);
 
-		driver_add_attrs(bus, drv);
-		add_bind_files(drv);
+		error = driver_add_attrs(bus, drv);
+		if (error) {
+			/* How the hell do we get out of this pickle? Give up */
+			printk(KERN_ERR "%s: driver_add_attrs(%s) failed\n",
+				__FUNCTION__, drv->name);
+		}
+		error = add_bind_files(drv);
+		if (error) {
+			/* Ditto */
+			printk(KERN_ERR "%s: add_bind_files(%s) failed\n",
+				__FUNCTION__, drv->name);
+		}
 	}
 	return error;
+out_unregister:
+	kobject_unregister(&drv->kobj);
+out_put_bus:
+	put_bus(bus);
+	return error;
 }
 
-
 /**
  *	bus_remove_driver - delete driver from bus's knowledge.
  *	@drv:	driver.
@@ -530,16 +562,21 @@ void bus_remove_driver(struct device_driver * drv)
 
 
 /* Helper for bus_rescan_devices's iter */
-static int bus_rescan_devices_helper(struct device *dev, void *data)
+static int __must_check bus_rescan_devices_helper(struct device *dev,
+						void *data)
 {
+	int ret = 0;
+
 	if (!dev->driver) {
 		if (dev->parent)	/* Needed for USB */
 			down(&dev->parent->sem);
-		device_attach(dev);
+		ret = device_attach(dev);
 		if (dev->parent)
 			up(&dev->parent->sem);
+		if (ret > 0)
+			ret = 0;
 	}
-	return 0;
+	return ret < 0 ? ret : 0;
 }
 
 /**
@@ -550,9 +587,9 @@ static int bus_rescan_devices_helper(struct device *dev, void *data)
  * attached and rescan it against existing drivers to see if it matches
  * any by calling device_attach() for the unbound devices.
  */
-void bus_rescan_devices(struct bus_type * bus)
+int bus_rescan_devices(struct bus_type * bus)
 {
-	bus_for_each_dev(bus, NULL, NULL, bus_rescan_devices_helper);
+	return bus_for_each_dev(bus, NULL, NULL, bus_rescan_devices_helper);
 }
 
 /**
@@ -564,7 +601,7 @@ void bus_rescan_devices(struct bus_type * bus)
  * to use if probing criteria changed during a devices lifetime and
  * driver attachment should change accordingly.
  */
-void device_reprobe(struct device *dev)
+int device_reprobe(struct device *dev)
 {
 	if (dev->driver) {
 		if (dev->parent)        /* Needed for USB */
@@ -573,14 +610,14 @@ void device_reprobe(struct device *dev)
 		if (dev->parent)
 			up(&dev->parent->sem);
 	}
-
-	bus_rescan_devices_helper(dev, NULL);
+	return bus_rescan_devices_helper(dev, NULL);
 }
 EXPORT_SYMBOL_GPL(device_reprobe);
 
-struct bus_type * get_bus(struct bus_type * bus)
+struct bus_type *get_bus(struct bus_type *bus)
 {
-	return bus ? container_of(subsys_get(&bus->subsys), struct bus_type, subsys) : NULL;
+	return bus ? container_of(subsys_get(&bus->subsys),
+				struct bus_type, subsys) : NULL;
 }
 
 void put_bus(struct bus_type * bus)
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 889c71111239..9f6f11ca0ab6 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -38,17 +38,29 @@
  *
  *	This function must be called with @dev->sem held.
  */
-void device_bind_driver(struct device * dev)
+int device_bind_driver(struct device *dev)
 {
-	if (klist_node_attached(&dev->knode_driver))
-		return;
+	int ret;
+
+	if (klist_node_attached(&dev->knode_driver)) {
+		printk(KERN_WARNING "%s: device %s already bound\n",
+			__FUNCTION__, kobject_name(&dev->kobj));
+		return 0;
+	}
 
 	pr_debug("bound device '%s' to driver '%s'\n",
 		 dev->bus_id, dev->driver->name);
 	klist_add_tail(&dev->knode_driver, &dev->driver->klist_devices);
-	sysfs_create_link(&dev->driver->kobj, &dev->kobj,
+	ret = sysfs_create_link(&dev->driver->kobj, &dev->kobj,
 			  kobject_name(&dev->kobj));
-	sysfs_create_link(&dev->kobj, &dev->driver->kobj, "driver");
+	if (ret == 0) {
+		ret = sysfs_create_link(&dev->kobj, &dev->driver->kobj,
+					"driver");
+		if (ret)
+			sysfs_remove_link(&dev->driver->kobj,
+					kobject_name(&dev->kobj));
+	}
+	return ret;
 }
 
 /**
@@ -91,7 +103,11 @@ int driver_probe_device(struct device_driver * drv, struct device * dev)
 			goto ProbeFailed;
 		}
 	}
-	device_bind_driver(dev);
+	if (device_bind_driver(dev)) {
+		printk(KERN_ERR "%s: device_bind_driver(%s) failed\n",
+			__FUNCTION__, dev->bus_id);
+		/* How does undo a ->probe?  We're screwed. */
+	}
 	ret = 1;
 	pr_debug("%s: Bound Device %s to Driver %s\n",
 		 drv->bus->name, dev->bus_id, drv->name);
@@ -139,8 +155,9 @@ int device_attach(struct device * dev)
 
 	down(&dev->sem);
 	if (dev->driver) {
-		device_bind_driver(dev);
-		ret = 1;
+		ret = device_bind_driver(dev);
+		if (ret == 0)
+			ret = 1;
 	} else
 		ret = bus_for_each_drv(dev->bus, NULL, dev, __device_attach);
 	up(&dev->sem);
@@ -182,9 +199,9 @@ static int __driver_attach(struct device * dev, void * data)
  *	returns 0 and the @dev->driver is set, we've found a
  *	compatible pair.
  */
-void driver_attach(struct device_driver * drv)
+int driver_attach(struct device_driver * drv)
 {
-	bus_for_each_dev(drv->bus, NULL, drv, __driver_attach);
+	return bus_for_each_dev(drv->bus, NULL, drv, __driver_attach);
 }
 
 /**
diff --git a/include/linux/device.h b/include/linux/device.h
index ad4db72f5733..b3da9a870cfc 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -62,7 +62,7 @@ struct bus_type {
 extern int __must_check bus_register(struct bus_type * bus);
 extern void bus_unregister(struct bus_type * bus);
 
-extern void bus_rescan_devices(struct bus_type * bus);
+extern int __must_check bus_rescan_devices(struct bus_type * bus);
 
 /* iterator helpers for buses */
 
@@ -397,11 +397,11 @@ extern int device_rename(struct device *dev, char *new_name);
  * Manual binding of a device to driver. See drivers/base/bus.c
  * for information on use.
  */
-extern void device_bind_driver(struct device * dev);
+extern int __must_check device_bind_driver(struct device *dev);
 extern void device_release_driver(struct device * dev);
 extern int  __must_check device_attach(struct device * dev);
-extern void driver_attach(struct device_driver * drv);
-extern void device_reprobe(struct device *dev);
+extern int __must_check driver_attach(struct device_driver *drv);
+extern int __must_check device_reprobe(struct device *dev);
 
 /*
  * Easy functions for dynamically creating devices on the fly
-- 
cgit v1.2.3


From f20a9ead0d005fbeeae3fc21a96f9bf197ac1c1c Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 14 Aug 2006 22:43:23 -0700
Subject: sysfs: add proper sysfs_init() prototype

Don't be crufty.  Mark it __must_check too.

Cc: "Randy.Dunlap" <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/namespace.c        | 10 +---------
 include/linux/sysfs.h |  7 +++++++
 2 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index fa7ed6a9fc2d..36d180858136 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -17,6 +17,7 @@
 #include <linux/acct.h>
 #include <linux/capability.h>
 #include <linux/module.h>
+#include <linux/sysfs.h>
 #include <linux/seq_file.h>
 #include <linux/namespace.h>
 #include <linux/namei.h>
@@ -28,15 +29,6 @@
 
 extern int __init init_rootfs(void);
 
-#ifdef CONFIG_SYSFS
-extern int __init sysfs_init(void);
-#else
-static inline int sysfs_init(void)
-{
-	return 0;
-}
-#endif
-
 /* spinlock for vfsmount related operations, inplace of dcache_lock */
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
 
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 02ad790921fd..6d5c43d31dec 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -123,6 +123,8 @@ int __must_check sysfs_create_group(struct kobject *,
 void sysfs_remove_group(struct kobject *, const struct attribute_group *);
 void sysfs_notify(struct kobject * k, char *dir, char *attr);
 
+extern int __must_check sysfs_init(void);
+
 #else /* CONFIG_SYSFS */
 
 static inline int sysfs_create_dir(struct kobject * k)
@@ -194,6 +196,11 @@ static inline void sysfs_notify(struct kobject * k, char *dir, char *attr)
 {
 }
 
+static inline int __must_check sysfs_init(void)
+{
+	return 0;
+}
+
 #endif /* CONFIG_SYSFS */
 
 #endif /* _SYSFS_H_ */
-- 
cgit v1.2.3


From d779249ed4cb3b50690de6de8448829d65a1cd08 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Tue, 18 Jul 2006 10:59:59 -0700
Subject: Driver Core: add ability for drivers to do a threaded probe

This adds the infrastructure for drivers to do a threaded probe, and
waits at init time for all currently outstanding probes to complete.

A new kernel thread will be created when the probe() function for the
driver is called, if the multithread_probe bit is set in the driver
saying it can support this kind of operation.

I have tested this with USB and PCI, and it works, and shaves off a lot
of time in the boot process, but there are issues with finding root boot
disks, and some USB drivers assume that this can never happen, so it is
currently not enabled for any bus type.  Individual drivers can enable
this right now if they wish, and bus authors can selectivly turn it on
as well, once they determine that their subsystem will work properly
with it.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/dd.c      | 108 ++++++++++++++++++++++++++++++++++++-------------
 include/linux/device.h |   3 ++
 init/do_mounts.c       |   5 +++
 3 files changed, 89 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 9f6f11ca0ab6..319a73be4180 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -17,6 +17,7 @@
 
 #include <linux/device.h>
 #include <linux/module.h>
+#include <linux/kthread.h>
 
 #include "base.h"
 #include "power/power.h"
@@ -63,44 +64,35 @@ int device_bind_driver(struct device *dev)
 	return ret;
 }
 
-/**
- *	driver_probe_device - attempt to bind device & driver.
- *	@drv:	driver.
- *	@dev:	device.
- *
- *	First, we call the bus's match function, if one present, which
- *	should compare the device IDs the driver supports with the
- *	device IDs of the device. Note we don't do this ourselves
- *	because we don't know the format of the ID structures, nor what
- *	is to be considered a match and what is not.
- *
- *	This function returns 1 if a match is found, an error if one
- *	occurs (that is not -ENODEV or -ENXIO), and 0 otherwise.
- *
- *	This function must be called with @dev->sem held.  When called
- *	for a USB interface, @dev->parent->sem must be held as well.
- */
-int driver_probe_device(struct device_driver * drv, struct device * dev)
+struct stupid_thread_structure {
+	struct device_driver *drv;
+	struct device *dev;
+};
+
+static atomic_t probe_count = ATOMIC_INIT(0);
+static int really_probe(void *void_data)
 {
+	struct stupid_thread_structure *data = void_data;
+	struct device_driver *drv = data->drv;
+	struct device *dev = data->dev;
 	int ret = 0;
 
-	if (drv->bus->match && !drv->bus->match(dev, drv))
-		goto Done;
+	atomic_inc(&probe_count);
+	pr_debug("%s: Probing driver %s with device %s\n",
+		 drv->bus->name, drv->name, dev->bus_id);
 
-	pr_debug("%s: Matched Device %s with Driver %s\n",
-		 drv->bus->name, dev->bus_id, drv->name);
 	dev->driver = drv;
 	if (dev->bus->probe) {
 		ret = dev->bus->probe(dev);
 		if (ret) {
 			dev->driver = NULL;
-			goto ProbeFailed;
+			goto probe_failed;
 		}
 	} else if (drv->probe) {
 		ret = drv->probe(dev);
 		if (ret) {
 			dev->driver = NULL;
-			goto ProbeFailed;
+			goto probe_failed;
 		}
 	}
 	if (device_bind_driver(dev)) {
@@ -111,9 +103,9 @@ int driver_probe_device(struct device_driver * drv, struct device * dev)
 	ret = 1;
 	pr_debug("%s: Bound Device %s to Driver %s\n",
 		 drv->bus->name, dev->bus_id, drv->name);
-	goto Done;
+	goto done;
 
- ProbeFailed:
+probe_failed:
 	if (ret == -ENODEV || ret == -ENXIO) {
 		/* Driver matched, but didn't support device
 		 * or device not found.
@@ -126,7 +118,69 @@ int driver_probe_device(struct device_driver * drv, struct device * dev)
 		       "%s: probe of %s failed with error %d\n",
 		       drv->name, dev->bus_id, ret);
 	}
- Done:
+done:
+	kfree(data);
+	atomic_dec(&probe_count);
+	return ret;
+}
+
+/**
+ * driver_probe_done
+ * Determine if the probe sequence is finished or not.
+ *
+ * Should somehow figure out how to use a semaphore, not an atomic variable...
+ */
+int driver_probe_done(void)
+{
+	pr_debug("%s: probe_count = %d\n", __FUNCTION__,
+		 atomic_read(&probe_count));
+	if (atomic_read(&probe_count))
+		return -EBUSY;
+	return 0;
+}
+
+/**
+ * driver_probe_device - attempt to bind device & driver together
+ * @drv: driver to bind a device to
+ * @dev: device to try to bind to the driver
+ *
+ * First, we call the bus's match function, if one present, which should
+ * compare the device IDs the driver supports with the device IDs of the
+ * device. Note we don't do this ourselves because we don't know the
+ * format of the ID structures, nor what is to be considered a match and
+ * what is not.
+ *
+ * This function returns 1 if a match is found, an error if one occurs
+ * (that is not -ENODEV or -ENXIO), and 0 otherwise.
+ *
+ * This function must be called with @dev->sem held.  When called for a
+ * USB interface, @dev->parent->sem must be held as well.
+ */
+int driver_probe_device(struct device_driver * drv, struct device * dev)
+{
+	struct stupid_thread_structure *data;
+	struct task_struct *probe_task;
+	int ret = 0;
+
+	if (drv->bus->match && !drv->bus->match(dev, drv))
+		goto done;
+
+	pr_debug("%s: Matched Device %s with Driver %s\n",
+		 drv->bus->name, dev->bus_id, drv->name);
+
+	data = kmalloc(sizeof(*data), GFP_KERNEL);
+	data->drv = drv;
+	data->dev = dev;
+
+	if (drv->multithread_probe) {
+		probe_task = kthread_run(really_probe, data,
+					 "probe-%s", dev->bus_id);
+		if (IS_ERR(probe_task))
+			ret = PTR_ERR(probe_task);
+	} else
+		ret = really_probe(data);
+
+done:
 	return ret;
 }
 
diff --git a/include/linux/device.h b/include/linux/device.h
index b3da9a870cfc..74246efba931 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -106,6 +106,8 @@ struct device_driver {
 	void	(*shutdown)	(struct device * dev);
 	int	(*suspend)	(struct device * dev, pm_message_t state);
 	int	(*resume)	(struct device * dev);
+
+	unsigned int multithread_probe:1;
 };
 
 
@@ -115,6 +117,7 @@ extern void driver_unregister(struct device_driver * drv);
 extern struct device_driver * get_driver(struct device_driver * drv);
 extern void put_driver(struct device_driver * drv);
 extern struct device_driver *driver_find(const char *name, struct bus_type *bus);
+extern int driver_probe_done(void);
 
 /* driverfs interface for exporting driver attributes */
 
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 94aeec7aa917..b290aadb1d3f 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -8,6 +8,7 @@
 #include <linux/security.h>
 #include <linux/delay.h>
 #include <linux/mount.h>
+#include <linux/device.h>
 
 #include <linux/nfs_fs.h>
 #include <linux/nfs_fs_sb.h>
@@ -403,6 +404,10 @@ void __init prepare_namespace(void)
 		ssleep(root_delay);
 	}
 
+	/* wait for the known devices to complete their probing */
+	while (driver_probe_done() != 0)
+		msleep(100);
+
 	md_run_setup();
 
 	if (saved_root_name[0]) {
-- 
cgit v1.2.3


From f2eaae197f4590c4d96f31b09b0ee9067421a95c Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 18 Sep 2006 16:22:34 -0400
Subject: Driver core: Fix potential deadlock in driver core

There is a potential deadlock in the driver core.  It boils down to
the fact that bus_remove_device() calls klist_remove() instead of
klist_del(), thereby waiting until the reference count of the
klist_node in the bus's klist of devices drops to 0.  The refcount
can't reach 0 so long as a modprobe process is trying to bind a new
driver to the device being removed, by calling __driver_attach().  The
problem is that __driver_attach() tries to acquire the device's
parent's semaphore, but the caller of bus_remove_device() is quite
likely to own that semaphore already.

It isn't sufficient just to replace klist_remove() with klist_del().
Doing so runs the risk that the device would remain on the bus's klist
of devices for some time, and so could be bound to another driver even
after it was unregistered.  What's needed is a new way to distinguish
whether or not a device is registered, based on a criterion other than
whether its klist_node is linked into the bus's klist of devices.  That
way driver binding can fail when the device is unregistered, even if
it is still linked into the klist.

This patch (as782) implements the solution, by adding a new bitflag to
indiate when a struct device is registered, by testing the flag before
allowing a driver to bind a device, and by changing the definition of
the device_is_registered() inline.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/bus.c     | 8 ++++++--
 drivers/base/dd.c      | 2 ++
 include/linux/device.h | 3 ++-
 3 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index aa685a20b649..636af538a2b5 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -392,6 +392,7 @@ out:
  *	bus_attach_device - add device to bus
  *	@dev:	device tried to attach to a driver
  *
+ *	- Add device to bus's list of devices.
  *	- Try to attach to driver.
  */
 int bus_attach_device(struct device * dev)
@@ -400,11 +401,13 @@ int bus_attach_device(struct device * dev)
 	int ret = 0;
 
 	if (bus) {
+		dev->is_registered = 1;
 		ret = device_attach(dev);
 		if (ret >= 0) {
 			klist_add_tail(&dev->knode_bus, &bus->klist_devices);
 			ret = 0;
-		}
+		} else
+			dev->is_registered = 0;
 	}
 	return ret;
 }
@@ -425,7 +428,8 @@ void bus_remove_device(struct device * dev)
 		sysfs_remove_link(&dev->kobj, "bus");
 		sysfs_remove_link(&dev->bus->devices.kobj, dev->bus_id);
 		device_remove_attrs(dev->bus, dev);
-		klist_remove(&dev->knode_bus);
+		dev->is_registered = 0;
+		klist_del(&dev->knode_bus);
 		pr_debug("bus %s: remove device %s\n", dev->bus->name, dev->bus_id);
 		device_release_driver(dev);
 		put_bus(dev->bus);
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 319a73be4180..b5f43c3e44fa 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -162,6 +162,8 @@ int driver_probe_device(struct device_driver * drv, struct device * dev)
 	struct task_struct *probe_task;
 	int ret = 0;
 
+	if (!device_is_registered(dev))
+		return -ENODEV;
 	if (drv->bus->match && !drv->bus->match(dev, drv))
 		goto done;
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 74246efba931..662e6a10144e 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -329,6 +329,7 @@ struct device {
 
 	struct kobject kobj;
 	char	bus_id[BUS_ID_SIZE];	/* position on parent bus */
+	unsigned		is_registered:1;
 	struct device_attribute uevent_attr;
 	struct device_attribute *devt_attr;
 
@@ -381,7 +382,7 @@ dev_set_drvdata (struct device *dev, void *data)
 
 static inline int device_is_registered(struct device *dev)
 {
-	return klist_node_attached(&dev->knode_bus);
+	return dev->is_registered;
 }
 
 /*
-- 
cgit v1.2.3


From 407984f1af259b31957c7c05075a454a751bb801 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Tue, 26 Sep 2006 10:52:27 +0200
Subject: [PATCH] x86: Add abilty to enable/disable nmi watchdog with sysctl

Adds a new /proc/sys/kernel/nmi call that will enable/disable the nmi
watchdog.

Signed-off-by:  Don Zickus <dzickus@redhat.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/i386/kernel/nmi.c   | 52 ++++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86_64/kernel/nmi.c | 48 ++++++++++++++++++++++++++++++++++++++++++++
 include/asm-i386/nmi.h   |  1 +
 include/asm-x86_64/nmi.h |  1 +
 include/linux/sysctl.h   |  1 +
 kernel/sysctl.c          | 11 ++++++++++
 6 files changed, 114 insertions(+)

(limited to 'include/linux')

diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index acd3fdea2a21..28065d0b71a9 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -846,6 +846,58 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
 	return 0;
 }
 
+/*
+ * proc handler for /proc/sys/kernel/nmi_watchdog
+ */
+int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
+			void __user *buffer, size_t *length, loff_t *ppos)
+{
+	int old_state;
+
+	nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
+	old_state = nmi_watchdog_enabled;
+	proc_dointvec(table, write, file, buffer, length, ppos);
+	if (!!old_state == !!nmi_watchdog_enabled)
+		return 0;
+
+	if (atomic_read(&nmi_active) < 0) {
+		printk(KERN_WARNING "NMI watchdog is permanently disabled\n");
+		return -EINVAL;
+	}
+
+	if (nmi_watchdog == NMI_DEFAULT) {
+		if (nmi_known_cpu() > 0)
+			nmi_watchdog = NMI_LOCAL_APIC;
+		else
+			nmi_watchdog = NMI_IO_APIC;
+	}
+
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+	{
+		if (nmi_watchdog_enabled)
+			enable_lapic_nmi_watchdog();
+		else
+			disable_lapic_nmi_watchdog();
+	} else if (nmi_watchdog == NMI_IO_APIC) {
+		/* FIXME
+		 * for some reason these functions don't work
+		 */
+		printk("Can not enable/disable NMI on IO APIC\n");
+		return -EINVAL;
+#if 0
+		if (nmi_watchdog_enabled)
+			enable_timer_nmi_watchdog();
+		else
+			disable_timer_nmi_watchdog();
+#endif
+	} else {
+		printk( KERN_WARNING
+			"NMI watchdog doesn't know what hardware to touch\n");
+		return -EIO;
+	}
+	return 0;
+}
+
 #endif
 
 EXPORT_SYMBOL(nmi_active);
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 9d175dcf3a2d..3a17411a9a19 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -750,6 +750,54 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
 	return 0;
 }
 
+/*
+ * proc handler for /proc/sys/kernel/nmi
+ */
+int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
+			void __user *buffer, size_t *length, loff_t *ppos)
+{
+	int old_state;
+
+	nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
+	old_state = nmi_watchdog_enabled;
+	proc_dointvec(table, write, file, buffer, length, ppos);
+	if (!!old_state == !!nmi_watchdog_enabled)
+		return 0;
+
+	if (atomic_read(&nmi_active) < 0) {
+		printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
+		return -EINVAL;
+	}
+
+	/* if nmi_watchdog is not set yet, then set it */
+	nmi_watchdog_default();
+
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+	{
+		if (nmi_watchdog_enabled)
+			enable_lapic_nmi_watchdog();
+		else
+			disable_lapic_nmi_watchdog();
+	} else if (nmi_watchdog == NMI_IO_APIC) {
+		/* FIXME
+		 * for some reason these functions don't work
+		 */
+		printk("Can not enable/disable NMI on IO APIC\n");
+		return -EIO;
+#if 0
+		if (nmi_watchdog_enabled)
+			enable_timer_nmi_watchdog();
+		else
+			disable_timer_nmi_watchdog();
+#endif
+	} else {
+		printk(KERN_WARNING
+			"NMI watchdog doesn't know what hardware to touch\n");
+		return -EIO;
+	}
+	return 0;
+}
+
 #endif
 
 EXPORT_SYMBOL(nmi_active);
diff --git a/include/asm-i386/nmi.h b/include/asm-i386/nmi.h
index 34d6bf063b6e..13b5d8311bf7 100644
--- a/include/asm-i386/nmi.h
+++ b/include/asm-i386/nmi.h
@@ -14,6 +14,7 @@
  */
 int do_nmi_callback(struct pt_regs *regs, int cpu);
 
+extern int nmi_watchdog_enabled;
 extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
 extern int avail_to_resrv_perfctr_nmi(unsigned int);
 extern int reserve_perfctr_nmi(unsigned int);
diff --git a/include/asm-x86_64/nmi.h b/include/asm-x86_64/nmi.h
index 8818c39d34e0..2c23b0df87d2 100644
--- a/include/asm-x86_64/nmi.h
+++ b/include/asm-x86_64/nmi.h
@@ -43,6 +43,7 @@ extern void die_nmi(char *str, struct pt_regs *regs);
 
 extern int panic_on_timeout;
 extern int unknown_nmi_panic;
+extern int nmi_watchdog_enabled;
 
 extern int check_nmi_watchdog(void);
 extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 736ed917a4f8..ecb79ba52ae1 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -150,6 +150,7 @@ enum
 	KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */
 	KERN_COMPAT_LOG=73,	/* int: print compat layer  messages */
 	KERN_MAX_LOCK_DEPTH=74,
+	KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */
 };
 
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 83f168361624..040de6bd74dd 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -76,6 +76,9 @@ extern int compat_log;
 
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 int unknown_nmi_panic;
+int nmi_watchdog_enabled;
+extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
+			void __user *, size_t *, loff_t *);
 #endif
 
 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
@@ -628,6 +631,14 @@ static ctl_table kern_table[] = {
 		.mode           = 0644,
 		.proc_handler   = &proc_dointvec,
 	},
+	{
+		.ctl_name       = KERN_NMI_WATCHDOG,
+		.procname       = "nmi_watchdog",
+		.data           = &nmi_watchdog_enabled,
+		.maxlen         = sizeof (int),
+		.mode           = 0644,
+		.proc_handler   = &proc_nmi_enabled,
+	},
 #endif
 #if defined(CONFIG_X86)
 	{
-- 
cgit v1.2.3


From 8da5adda91df3d2fcc5300e68da491694c9af019 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Tue, 26 Sep 2006 10:52:27 +0200
Subject: [PATCH] x86: Allow users to force a panic on NMI

To quote Alan Cox:

The default Linux behaviour on an NMI of either memory or unknown is to
continue operation. For many environments such as scientific computing
it is preferable that the box is taken out and the error dealt with than
an uncorrected parity/ECC error get propogated.

A small number of systems do generate NMI's for bizarre random reasons
such as power management so the default is unchanged. In other respects
the new proc/sys entry works like the existing panic controls already in
that directory.

This is separate to the edac support - EDAC allows supported chipsets to
handle ECC errors well, this change allows unsupported cases to at least
panic rather than cause problems further down the line.

Signed-off-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/i386/kernel/traps.c   | 6 ++++++
 arch/x86_64/kernel/traps.c | 6 ++++++
 include/linux/kernel.h     | 1 +
 include/linux/sysctl.h     | 1 +
 kernel/panic.c             | 1 +
 kernel/sysctl.c            | 8 ++++++++
 6 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 7db664d0b25c..2f6cb8276480 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -635,6 +635,8 @@ static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
 			"to continue\n");
 	printk(KERN_EMERG "You probably have a hardware problem with your RAM "
 			"chips\n");
+	if (panic_on_unrecovered_nmi)
+                panic("NMI: Not continuing");
 
 	/* Clear and disable the memory parity error line. */
 	clear_mem_error(reason);
@@ -670,6 +672,10 @@ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
 		reason, smp_processor_id());
 	printk("Dazed and confused, but trying to continue\n");
 	printk("Do you have a strange power saving mode enabled?\n");
+
+	if (panic_on_unrecovered_nmi)
+                panic("NMI: Not continuing");
+
 }
 
 static DEFINE_SPINLOCK(nmi_print_lock);
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 42bc070fdf11..b18829db2a6a 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -732,6 +732,8 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs)
 {
 	printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
 	printk("You probably have a hardware problem with your RAM chips\n");
+	if (panic_on_unrecovered_nmi)
+               panic("NMI: Not continuing");
 
 	/* Clear and disable the memory parity error line. */
 	reason = (reason & 0xf) | 4;
@@ -757,6 +759,10 @@ unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
 {	printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
 	printk("Dazed and confused, but trying to continue\n");
 	printk("Do you have a strange power saving mode enabled?\n");
+
+	if (panic_on_unrecovered_nmi)
+                panic("NMI: Not continuing");
+
 }
 
 /* Runs on IST stack. This code must keep interrupts off all the time.
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2b2ae4fdce8b..1ff9609300b4 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -186,6 +186,7 @@ extern void bust_spinlocks(int yes);
 extern int oops_in_progress;		/* If set, an oops, panic(), BUG() or die() is in progress */
 extern int panic_timeout;
 extern int panic_on_oops;
+extern int panic_on_unrecovered_nmi;
 extern int tainted;
 extern const char *print_tainted(void);
 extern void add_taint(unsigned);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index ecb79ba52ae1..432778446ad2 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -151,6 +151,7 @@ enum
 	KERN_COMPAT_LOG=73,	/* int: print compat layer  messages */
 	KERN_MAX_LOCK_DEPTH=74,
 	KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */
+	KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */
 };
 
 
diff --git a/kernel/panic.c b/kernel/panic.c
index 8010b9b17aca..d2db3e2209e0 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -21,6 +21,7 @@
 #include <linux/debug_locks.h>
 
 int panic_on_oops;
+int panic_on_unrecovered_nmi;
 int tainted;
 static int pause_on_oops;
 static int pause_on_oops_flag;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 040de6bd74dd..220e20564124 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -641,6 +641,14 @@ static ctl_table kern_table[] = {
 	},
 #endif
 #if defined(CONFIG_X86)
+	{
+		.ctl_name	= KERN_PANIC_ON_NMI,
+		.procname	= "panic_on_unrecovered_nmi",
+		.data		= &panic_on_unrecovered_nmi,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 	{
 		.ctl_name	= KERN_BOOTLOADER_TYPE,
 		.procname	= "bootloader_type",
-- 
cgit v1.2.3


From c08c820508233b424deab3302bc404bbecc6493a Mon Sep 17 00:00:00 2001
From: Vojtech Pavlik <vojtech@suse.cz>
Date: Tue, 26 Sep 2006 10:52:28 +0200
Subject: [PATCH] Add the vgetcpu vsyscall

This patch adds a vgetcpu vsyscall, which depending on the CPU RDTSCP
capability uses either the RDTSCP or CPUID to obtain a CPU and node
numbers and pass them to the program.

AK: Lots of changes over Vojtech's original code:
Better prototype for vgetcpu()
It's better to pass the cpu / node numbers as separate arguments
to avoid mistakes when going from SMP to NUMA.
Also add a fast time stamp based cache using a user supplied
argument to speed things more up.
Use fast method from Chuck Ebbert to retrieve node/cpu from
GDT limit instead of CPUID
Made sure RDTSCP init is always executed after node is known.
Drop printk

Signed-off-by: Vojtech Pavlik <vojtech@suse.cz>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/head.S        |  2 +-
 arch/x86_64/kernel/time.c        | 13 ++++---
 arch/x86_64/kernel/vmlinux.lds.S |  3 ++
 arch/x86_64/kernel/vsyscall.c    | 84 +++++++++++++++++++++++++++++++++++++++-
 include/asm-x86_64/segment.h     |  5 ++-
 include/asm-x86_64/smp.h         | 12 ++++--
 include/asm-x86_64/vsyscall.h    |  9 +++++
 include/linux/getcpu.h           | 16 ++++++++
 8 files changed, 130 insertions(+), 14 deletions(-)
 create mode 100644 include/linux/getcpu.h

(limited to 'include/linux')

diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index c9739ca81d06..505ec4a57506 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -371,7 +371,7 @@ ENTRY(cpu_gdt_table)
 	.quad	0,0			/* TSS */
 	.quad	0,0			/* LDT */
 	.quad   0,0,0			/* three TLS descriptors */ 
-	.quad	0			/* unused */
+	.quad	0x0000f40000000000	/* node/CPU stored in limit */
 gdt_end:	
 	/* asm/segment.h:GDT_ENTRIES must match this */	
 	/* This should be a multiple of the cache line size */
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 97b9e46d1992..560ed944dc0e 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -899,12 +899,8 @@ static int __cpuinit
 time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu)
 {
 	unsigned cpu = (unsigned long) hcpu;
-	if (action == CPU_ONLINE &&
-		cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) {
-		unsigned p;
-		p = smp_processor_id() | (cpu_to_node(smp_processor_id())<<12);
-		write_rdtscp_aux(p);
-	}
+	if (action == CPU_ONLINE)
+		vsyscall_set_cpu(cpu);
 	return NOTIFY_DONE;
 }
 
@@ -993,6 +989,11 @@ void time_init_gtod(void)
 	if (unsynchronized_tsc())
 		notsc = 1;
 
+ 	if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
+		vgetcpu_mode = VGETCPU_RDTSCP;
+	else
+		vgetcpu_mode = VGETCPU_LSL;
+
 	if (vxtime.hpet_address && notsc) {
 		timetype = hpet_use_timer ? "HPET" : "PIT/HPET";
 		if (hpet_use_timer)
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 7c4de31471d4..8d5a5149bb3a 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -99,6 +99,9 @@ SECTIONS
   .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) }
   vxtime = VVIRT(.vxtime);
 
+  .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) }
+  vgetcpu_mode = VVIRT(.vgetcpu_mode);
+
   .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) }
   wall_jiffies = VVIRT(.wall_jiffies);
 
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index f603037df162..902783bc4d53 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -26,6 +26,7 @@
 #include <linux/seqlock.h>
 #include <linux/jiffies.h>
 #include <linux/sysctl.h>
+#include <linux/getcpu.h>
 
 #include <asm/vsyscall.h>
 #include <asm/pgtable.h>
@@ -33,11 +34,15 @@
 #include <asm/fixmap.h>
 #include <asm/errno.h>
 #include <asm/io.h>
+#include <asm/segment.h>
+#include <asm/desc.h>
+#include <asm/topology.h>
 
 #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
 
 int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
 seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
+int __vgetcpu_mode __section_vgetcpu_mode;
 
 #include <asm/unistd.h>
 
@@ -127,9 +132,46 @@ time_t __vsyscall(1) vtime(time_t *t)
 	return __xtime.tv_sec;
 }
 
-long __vsyscall(2) venosys_0(void)
+/* Fast way to get current CPU and node.
+   This helps to do per node and per CPU caches in user space.
+   The result is not guaranteed without CPU affinity, but usually
+   works out because the scheduler tries to keep a thread on the same
+   CPU.
+
+   tcache must point to a two element sized long array.
+   All arguments can be NULL. */
+long __vsyscall(2)
+vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
 {
-	return -ENOSYS;
+	unsigned int dummy, p;
+	unsigned long j = 0;
+
+	/* Fast cache - only recompute value once per jiffies and avoid
+	   relatively costly rdtscp/cpuid otherwise.
+	   This works because the scheduler usually keeps the process
+	   on the same CPU and this syscall doesn't guarantee its
+	   results anyways.
+	   We do this here because otherwise user space would do it on
+	   its own in a likely inferior way (no access to jiffies).
+	   If you don't like it pass NULL. */
+	if (tcache && tcache->t0 == (j = __jiffies)) {
+		p = tcache->t1;
+	} else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
+		/* Load per CPU data from RDTSCP */
+		rdtscp(dummy, dummy, p);
+	} else {
+		/* Load per CPU data from GDT */
+		asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
+	}
+	if (tcache) {
+		tcache->t0 = j;
+		tcache->t1 = p;
+	}
+	if (cpu)
+		*cpu = p & 0xfff;
+	if (node)
+		*node = p >> 12;
+	return 0;
 }
 
 long __vsyscall(3) venosys_1(void)
@@ -200,6 +242,43 @@ static ctl_table kernel_root_table2[] = {
 
 #endif
 
+static void __cpuinit write_rdtscp_cb(void *info)
+{
+	write_rdtscp_aux((unsigned long)info);
+}
+
+void __cpuinit vsyscall_set_cpu(int cpu)
+{
+	unsigned long *d;
+	unsigned long node = 0;
+#ifdef CONFIG_NUMA
+	node = cpu_to_node[cpu];
+#endif
+	if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) {
+		void *info = (void *)((node << 12) | cpu);
+		/* Can happen on preemptive kernel */
+		if (get_cpu() == cpu)
+			write_rdtscp_cb(info);
+#ifdef CONFIG_SMP
+		else {
+			/* the notifier is unfortunately not executed on the
+			   target CPU */
+			smp_call_function_single(cpu,write_rdtscp_cb,info,0,1);
+		}
+#endif
+		put_cpu();
+	}
+
+	/* Store cpu number in limit so that it can be loaded quickly
+	   in user space in vgetcpu.
+	   12 bits for the CPU and 8 bits for the node. */
+	d = (unsigned long *)(cpu_gdt(cpu) + GDT_ENTRY_PER_CPU);
+	*d = 0x0f40000000000ULL;
+	*d |= cpu;
+	*d |= (node & 0xf) << 12;
+	*d |= (node >> 4) << 48;
+}
+
 static void __init map_vsyscall(void)
 {
 	extern char __vsyscall_0;
@@ -214,6 +293,7 @@ static int __init vsyscall_init(void)
 			VSYSCALL_ADDR(__NR_vgettimeofday)));
 	BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
 	BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
+	BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
 	map_vsyscall();
 #ifdef CONFIG_SYSCTL
 	register_sysctl_table(kernel_root_table2, 0);
diff --git a/include/asm-x86_64/segment.h b/include/asm-x86_64/segment.h
index d4bed33fb32c..334ddcdd8f92 100644
--- a/include/asm-x86_64/segment.h
+++ b/include/asm-x86_64/segment.h
@@ -20,15 +20,16 @@
 #define __USER_CS     0x33   /* 6*8+3 */ 
 #define __USER32_DS	__USER_DS 
 
-#define GDT_ENTRY_TLS 1
 #define GDT_ENTRY_TSS 8	/* needs two entries */
 #define GDT_ENTRY_LDT 10 /* needs two entries */
 #define GDT_ENTRY_TLS_MIN 12
 #define GDT_ENTRY_TLS_MAX 14
-/* 15 free */
 
 #define GDT_ENTRY_TLS_ENTRIES 3
 
+#define GDT_ENTRY_PER_CPU 15	/* Abused to load per CPU data from limit */
+#define __PER_CPU_SEG	(GDT_ENTRY_PER_CPU * 8 + 3)
+
 /* TLS indexes for 64bit - hardcoded in arch_prctl */
 #define FS_TLS 0	
 #define GS_TLS 1	
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h
index 6805e1feb300..d61547fd833b 100644
--- a/include/asm-x86_64/smp.h
+++ b/include/asm-x86_64/smp.h
@@ -133,13 +133,19 @@ static __inline int logical_smp_processor_id(void)
 	/* we don't want to mark this access volatile - bad code generation */
 	return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
 }
-#endif
 
 #ifdef CONFIG_SMP
 #define cpu_physical_id(cpu)		x86_cpu_to_apicid[cpu]
 #else
 #define cpu_physical_id(cpu)		boot_cpu_id
-#endif
-
+static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
+				void *info, int retry, int wait)
+{
+	/* Disable interrupts here? */
+	func(info);
+	return 0;
+}
+#endif /* !CONFIG_SMP */
+#endif /* !__ASSEMBLY */
 #endif
 
diff --git a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h
index 146b24402a5f..2281e9399b96 100644
--- a/include/asm-x86_64/vsyscall.h
+++ b/include/asm-x86_64/vsyscall.h
@@ -4,6 +4,7 @@
 enum vsyscall_num {
 	__NR_vgettimeofday,
 	__NR_vtime,
+	__NR_vgetcpu,
 };
 
 #define VSYSCALL_START (-10UL << 20)
@@ -15,6 +16,7 @@ enum vsyscall_num {
 #include <linux/seqlock.h>
 
 #define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16)))
+#define __section_vgetcpu_mode __attribute__ ((unused, __section__ (".vgetcpu_mode"), aligned(16)))
 #define __section_wall_jiffies __attribute__ ((unused, __section__ (".wall_jiffies"), aligned(16)))
 #define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16)))
 #define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16)))
@@ -26,6 +28,9 @@ enum vsyscall_num {
 #define VXTIME_HPET	2
 #define VXTIME_PMTMR	3
 
+#define VGETCPU_RDTSCP	1
+#define VGETCPU_LSL	2
+
 struct vxtime_data {
 	long hpet_address;	/* HPET base address */
 	int last;
@@ -40,6 +45,7 @@ struct vxtime_data {
 
 /* vsyscall space (readonly) */
 extern struct vxtime_data __vxtime;
+extern int __vgetcpu_mode;
 extern struct timespec __xtime;
 extern volatile unsigned long __jiffies;
 extern unsigned long __wall_jiffies;
@@ -48,6 +54,7 @@ extern seqlock_t __xtime_lock;
 
 /* kernel space (writeable) */
 extern struct vxtime_data vxtime;
+extern int vgetcpu_mode;
 extern unsigned long wall_jiffies;
 extern struct timezone sys_tz;
 extern int sysctl_vsyscall;
@@ -55,6 +62,8 @@ extern seqlock_t xtime_lock;
 
 extern int sysctl_vsyscall;
 
+extern void vsyscall_set_cpu(int cpu);
+
 #define ARCH_HAVE_XTIME_LOCK 1
 
 #endif /* __KERNEL__ */
diff --git a/include/linux/getcpu.h b/include/linux/getcpu.h
new file mode 100644
index 000000000000..031ed3780e45
--- /dev/null
+++ b/include/linux/getcpu.h
@@ -0,0 +1,16 @@
+#ifndef _LINUX_GETCPU_H
+#define _LINUX_GETCPU_H 1
+
+/* Cache for getcpu() to speed it up. Results might be upto a jiffie
+   out of date, but will be faster.
+   User programs should not refer to the contents of this structure.
+   It is only a cache for vgetcpu(). It might change in future kernels.
+   The user program must store this information per thread (__thread)
+   If you want 100% accurate information pass NULL instead. */
+struct getcpu_cache {
+	unsigned long t0;
+	unsigned long t1;
+	unsigned long res[4];
+};
+
+#endif
-- 
cgit v1.2.3


From 3cfc348bf90ffaa777c188652aa297f04eb94de8 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Tue, 26 Sep 2006 10:52:28 +0200
Subject: [PATCH] x86: Add portable getcpu call

For NUMA optimization and some other algorithms it is useful to have a fast
to get the current CPU and node numbers in user space.

x86-64 added a fast way to do this in a vsyscall. This adds a generic
syscall for other architectures to make it a generic portable facility.

I expect some of them will also implement it as a faster vsyscall.

The cache is an optimization for the x86-64 vsyscall optimization. Since
what the syscall returns is an approximation anyways and user space
often wants very fast results it can be cached for some time.  The norma
methods to get this information in user space are relatively slow

The vsyscall is in a better position to manage the cache because it has direct
access to a fast time stamp (jiffies). For the generic syscall optimization
it doesn't help much, but enforce a valid argument to keep programs
portable

I only added an i386 syscall entry for now. Other architectures can follow
as needed.

AK: Also added some cleanups from Andrew Morton

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/i386/kernel/syscall_table.S |  1 +
 arch/x86_64/ia32/ia32entry.S     |  1 +
 include/asm-i386/unistd.h        |  3 ++-
 include/linux/syscalls.h         |  2 ++
 kernel/sys.c                     | 31 +++++++++++++++++++++++++++++++
 5 files changed, 37 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index dd63d4775398..7e639f78b0b9 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -317,3 +317,4 @@ ENTRY(sys_call_table)
 	.long sys_tee			/* 315 */
 	.long sys_vmsplice
 	.long sys_move_pages
+	.long sys_getcpu
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index 30ed0f6f4a2b..32fd32bea07c 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -713,4 +713,5 @@ ia32_sys_call_table:
 	.quad sys_tee
 	.quad compat_sys_vmsplice
 	.quad compat_sys_move_pages
+	.quad sys_getcpu
 ia32_syscall_end:		
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index fc1c8ddae149..565d0897b205 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -323,10 +323,11 @@
 #define __NR_tee		315
 #define __NR_vmsplice		316
 #define __NR_move_pages		317
+#define __NR_getcpu		318
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 318
+#define NR_syscalls 319
 
 /*
  * user-visible error numbers are in the range -1 - -128: see
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 008f04c56737..3f0f716225ec 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -53,6 +53,7 @@ struct mq_attr;
 struct compat_stat;
 struct compat_timeval;
 struct robust_list_head;
+struct getcpu_cache;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -596,5 +597,6 @@ asmlinkage long sys_get_robust_list(int pid,
 				    size_t __user *len_ptr);
 asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
 				    size_t len);
+asmlinkage long sys_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *cache);
 
 #endif
diff --git a/kernel/sys.c b/kernel/sys.c
index e236f98f7ec5..3f894775488d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -28,6 +28,7 @@
 #include <linux/tty.h>
 #include <linux/signal.h>
 #include <linux/cn_proc.h>
+#include <linux/getcpu.h>
 
 #include <linux/compat.h>
 #include <linux/syscalls.h>
@@ -2062,3 +2063,33 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
 	}
 	return error;
 }
+
+asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep,
+	   		   struct getcpu_cache __user *cache)
+{
+	int err = 0;
+	int cpu = raw_smp_processor_id();
+	if (cpup)
+		err |= put_user(cpu, cpup);
+	if (nodep)
+		err |= put_user(cpu_to_node(cpu), nodep);
+	if (cache) {
+		/*
+		 * The cache is not needed for this implementation,
+		 * but make sure user programs pass something
+		 * valid. vsyscall implementations can instead make
+		 * good use of the cache. Only use t0 and t1 because
+		 * these are available in both 32bit and 64bit ABI (no
+		 * need for a compat_getcpu). 32bit has enough
+		 * padding
+		 */
+		unsigned long t0, t1;
+		get_user(t0, &cache->t0);
+		get_user(t1, &cache->t1);
+		t0++;
+		t1++;
+		put_user(t0, &cache->t0);
+		put_user(t1, &cache->t1);
+	}
+	return err ? -EFAULT : 0;
+}
-- 
cgit v1.2.3


From 5a1b3999d6cb7ab87f1f3b1700bc91839fd6fa29 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Tue, 26 Sep 2006 10:52:34 +0200
Subject: [PATCH] x86: Some preparationary cleanup for stack trace

- Remove unused all_contexts parameter
No caller used it
- Move skip argument into the structure (needed for
followon patches)

Cc: mingo@elte.hu

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/i386/kernel/stacktrace.c   | 11 +++--------
 arch/s390/kernel/stacktrace.c   | 17 ++++++++---------
 arch/x86_64/kernel/stacktrace.c | 14 +++++---------
 include/linux/stacktrace.h      |  7 ++++---
 kernel/lockdep.c                |  5 ++++-
 5 files changed, 24 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/arch/i386/kernel/stacktrace.c b/arch/i386/kernel/stacktrace.c
index e62a037ab399..ae3c32a87add 100644
--- a/arch/i386/kernel/stacktrace.c
+++ b/arch/i386/kernel/stacktrace.c
@@ -61,12 +61,8 @@ save_context_stack(struct stack_trace *trace, unsigned int skip,
 
 /*
  * Save stack-backtrace addresses into a stack_trace buffer.
- * If all_contexts is set, all contexts (hardirq, softirq and process)
- * are saved. If not set then only the current context is saved.
  */
-void save_stack_trace(struct stack_trace *trace,
-		      struct task_struct *task, int all_contexts,
-		      unsigned int skip)
+void save_stack_trace(struct stack_trace *trace, struct task_struct *task)
 {
 	unsigned long ebp;
 	unsigned long *stack = &ebp;
@@ -85,10 +81,9 @@ void save_stack_trace(struct stack_trace *trace,
 		struct thread_info *context = (struct thread_info *)
 				((unsigned long)stack & (~(THREAD_SIZE - 1)));
 
-		ebp = save_context_stack(trace, skip, context, stack, ebp);
+		ebp = save_context_stack(trace, trace->skip, context, stack, ebp);
 		stack = (unsigned long *)context->previous_esp;
-		if (!all_contexts || !stack ||
-				trace->nr_entries >= trace->max_entries)
+		if (!stack || trace->nr_entries >= trace->max_entries)
 			break;
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 		if (trace->nr_entries >= trace->max_entries)
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index de83f38288d0..d9428a0fc8fb 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -59,9 +59,7 @@ static inline unsigned long save_context_stack(struct stack_trace *trace,
 	}
 }
 
-void save_stack_trace(struct stack_trace *trace,
-		      struct task_struct *task, int all_contexts,
-		      unsigned int skip)
+void save_stack_trace(struct stack_trace *trace, struct task_struct *task)
 {
 	register unsigned long sp asm ("15");
 	unsigned long orig_sp;
@@ -69,22 +67,23 @@ void save_stack_trace(struct stack_trace *trace,
 	sp &= PSW_ADDR_INSN;
 	orig_sp = sp;
 
-	sp = save_context_stack(trace, &skip, sp,
+	sp = save_context_stack(trace, &trace->skip, sp,
 				S390_lowcore.panic_stack - PAGE_SIZE,
 				S390_lowcore.panic_stack);
-	if ((sp != orig_sp) && !all_contexts)
+	if ((sp != orig_sp) && !trace->all_contexts)
 		return;
-	sp = save_context_stack(trace, &skip, sp,
+	sp = save_context_stack(trace, &trace->skip, sp,
 				S390_lowcore.async_stack - ASYNC_SIZE,
 				S390_lowcore.async_stack);
-	if ((sp != orig_sp) && !all_contexts)
+	if ((sp != orig_sp) && !trace->all_contexts)
 		return;
 	if (task)
-		save_context_stack(trace, &skip, sp,
+		save_context_stack(trace, &trace->skip, sp,
 				   (unsigned long) task_stack_page(task),
 				   (unsigned long) task_stack_page(task) + THREAD_SIZE);
 	else
-		save_context_stack(trace, &skip, sp, S390_lowcore.thread_info,
+		save_context_stack(trace, &trace->skip, sp,
+				   S390_lowcore.thread_info,
 				   S390_lowcore.thread_info + THREAD_SIZE);
 	return;
 }
diff --git a/arch/x86_64/kernel/stacktrace.c b/arch/x86_64/kernel/stacktrace.c
index 32cf55eb9af8..1c022af8fe1e 100644
--- a/arch/x86_64/kernel/stacktrace.c
+++ b/arch/x86_64/kernel/stacktrace.c
@@ -109,9 +109,10 @@ out_restore:
  * Save stack-backtrace addresses into a stack_trace buffer:
  */
 static inline unsigned long
-save_context_stack(struct stack_trace *trace, unsigned int skip,
+save_context_stack(struct stack_trace *trace,
 		   unsigned long stack, unsigned long stack_end)
 {
+	int skip = trace->skip;
 	unsigned long addr;
 
 #ifdef CONFIG_FRAME_POINTER
@@ -159,12 +160,8 @@ save_context_stack(struct stack_trace *trace, unsigned int skip,
 
 /*
  * Save stack-backtrace addresses into a stack_trace buffer.
- * If all_contexts is set, all contexts (hardirq, softirq and process)
- * are saved. If not set then only the current context is saved.
  */
-void save_stack_trace(struct stack_trace *trace,
-		      struct task_struct *task, int all_contexts,
-		      unsigned int skip)
+void save_stack_trace(struct stack_trace *trace, struct task_struct *task)
 {
 	unsigned long stack = (unsigned long)&stack;
 	int i, nr_stacks = 0, stacks_done[MAX_STACKS];
@@ -207,9 +204,8 @@ void save_stack_trace(struct stack_trace *trace,
 				return;
 		stacks_done[nr_stacks] = stack_end;
 
-		stack = save_context_stack(trace, skip, stack, stack_end);
-		if (!all_contexts || !stack ||
-				trace->nr_entries >= trace->max_entries)
+		stack = save_context_stack(trace, stack, stack_end);
+		if (!stack || trace->nr_entries >= trace->max_entries)
 			return;
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 		if (trace->nr_entries >= trace->max_entries)
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 9cc81e572224..50e2b01e517c 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -5,15 +5,16 @@
 struct stack_trace {
 	unsigned int nr_entries, max_entries;
 	unsigned long *entries;
+	int skip;	/* input argument: How many entries to skip */
+	int all_contexts; /* input argument: if true do than one stack */
 };
 
 extern void save_stack_trace(struct stack_trace *trace,
-			     struct task_struct *task, int all_contexts,
-			     unsigned int skip);
+			     struct task_struct *task);
 
 extern void print_stack_trace(struct stack_trace *trace, int spaces);
 #else
-# define save_stack_trace(trace, task, all, skip)	do { } while (0)
+# define save_stack_trace(trace, task)			do { } while (0)
 # define print_stack_trace(trace)			do { } while (0)
 #endif
 
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 9bad17884513..900b4cb1a024 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -224,7 +224,10 @@ static int save_trace(struct stack_trace *trace)
 	trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries;
 	trace->entries = stack_trace + nr_stack_trace_entries;
 
-	save_stack_trace(trace, NULL, 0, 3);
+	trace->skip = 3;
+	trace->all_contexts = 0;
+
+	save_stack_trace(trace, NULL);
 
 	trace->max_entries = trace->nr_entries;
 
-- 
cgit v1.2.3


From d28c4393a7bf558538e9def269c1caeab6ec056f Mon Sep 17 00:00:00 2001
From: "Prasanna S.P" <prasanna@in.ibm.com>
Date: Tue, 26 Sep 2006 10:52:34 +0200
Subject: [PATCH] x86: error_code is not safe for kprobes

This patch moves the entry.S:error_entry to .kprobes.text section,
since code marked unsafe for kprobes jumps directly to entry.S::error_entry,
that must be marked unsafe as well.
This patch also moves all the ".previous.text" asm directives to ".previous"
for kprobes section.

AK: Following a similar i386 patch from Chuck Ebbert
AK: Also merged Jeremy's fix in.

+From: Jeremy Fitzhardinge <jeremy@goop.org>

KPROBE_ENTRY does a .section .kprobes.text, and expects its users to
do a .previous at the end of the function.

Unfortunately, if any code within the function switches sections, for
example .fixup, then the .previous ends up putting all subsequent code
into .fixup.  Worse, any subsequent .fixup code gets intermingled with
the code its supposed to be fixing (which is also in .fixup).  It's
surprising this didn't cause more havok.

The fix is to use .pushsection/.popsection, so this stuff nests
properly.  A further cleanup would be to get rid of all
.section/.previous pairs, since they're inherently fragile.

+From: Chuck Ebbert <76306.1226@compuserve.com>

Because code marked unsafe for kprobes jumps directly to
entry.S::error_code, that must be marked unsafe as well.
The easiest way to do that is to move the page fault entry
point to just before error_code and let it inherit the same
section.

Also moved all the ".previous" asm directives for kprobes
sections to column 1 and removed ".text" from them.

Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/i386/kernel/entry.S   | 25 +++++++++++++------------
 arch/x86_64/kernel/entry.S | 19 +++++++------------
 include/linux/linkage.h    |  6 +++++-
 3 files changed, 25 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 87f9f60b803b..ba22ec8fab54 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -591,11 +591,9 @@ ENTRY(name)				\
 /* The include is where all of the SMP etc. interrupts come from */
 #include "entry_arch.h"
 
-ENTRY(divide_error)
-	RING0_INT_FRAME
-	pushl $0			# no error code
-	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_divide_error
+KPROBE_ENTRY(page_fault)
+	RING0_EC_FRAME
+	pushl $do_page_fault
 	CFI_ADJUST_CFA_OFFSET 4
 	ALIGN
 error_code:
@@ -645,6 +643,7 @@ error_code:
 	call *%edi
 	jmp ret_from_exception
 	CFI_ENDPROC
+KPROBE_END(page_fault)
 
 ENTRY(coprocessor_error)
 	RING0_INT_FRAME
@@ -720,7 +719,8 @@ debug_stack_correct:
 	call do_debug
 	jmp ret_from_exception
 	CFI_ENDPROC
-	.previous .text
+KPROBE_END(debug)
+
 /*
  * NMI is doubly nasty. It can happen _while_ we're handling
  * a debug fault, and the debug fault hasn't yet been able to
@@ -816,7 +816,7 @@ KPROBE_ENTRY(int3)
 	call do_int3
 	jmp ret_from_exception
 	CFI_ENDPROC
-	.previous .text
+KPROBE_END(int3)
 
 ENTRY(overflow)
 	RING0_INT_FRAME
@@ -881,7 +881,7 @@ KPROBE_ENTRY(general_protection)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
-	.previous .text
+KPROBE_END(general_protection)
 
 ENTRY(alignment_check)
 	RING0_EC_FRAME
@@ -890,13 +890,14 @@ ENTRY(alignment_check)
 	jmp error_code
 	CFI_ENDPROC
 
-KPROBE_ENTRY(page_fault)
-	RING0_EC_FRAME
-	pushl $do_page_fault
+ENTRY(divide_error)
+	RING0_INT_FRAME
+	pushl $0			# no error code
+	CFI_ADJUST_CFA_OFFSET 4
+	pushl $do_divide_error
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
-	.previous .text
 
 #ifdef CONFIG_X86_MCE
 ENTRY(machine_check)
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 2092f565aa87..780f9b26169f 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -819,7 +819,7 @@ paranoid_schedule\trace:
  * Exception entry point. This expects an error code/orig_rax on the stack
  * and the exception handler in %rax.	
  */ 		  				
-ENTRY(error_entry)
+KPROBE_ENTRY(error_entry)
 	_frame RDI
 	/* rdi slot contains rax, oldrax contains error code */
 	cld	
@@ -903,7 +903,7 @@ error_kernelspace:
 	cmpq $gs_change,RIP(%rsp)
         je   error_swapgs
 	jmp  error_sti
-END(error_entry)
+KPROBE_END(error_entry)
 	
        /* Reload gs selector with exception handling */
        /* edi:  new selector */ 
@@ -1025,8 +1025,7 @@ ENDPROC(execve)
 
 KPROBE_ENTRY(page_fault)
 	errorentry do_page_fault
-END(page_fault)
-	.previous .text
+KPROBE_END(page_fault)
 
 ENTRY(coprocessor_error)
 	zeroentry do_coprocessor_error
@@ -1047,8 +1046,7 @@ KPROBE_ENTRY(debug)
 	CFI_ADJUST_CFA_OFFSET 8		
 	paranoidentry do_debug, DEBUG_STACK
 	paranoidexit
-END(debug)
-	.previous .text
+KPROBE_END(debug)
 
 	/* runs on exception stack */	
 KPROBE_ENTRY(nmi)
@@ -1062,8 +1060,7 @@ KPROBE_ENTRY(nmi)
 	jmp paranoid_exit1
  	CFI_ENDPROC
 #endif
-END(nmi)
-	.previous .text
+KPROBE_END(nmi)
 
 KPROBE_ENTRY(int3)
  	INTR_FRAME
@@ -1072,8 +1069,7 @@ KPROBE_ENTRY(int3)
  	paranoidentry do_int3, DEBUG_STACK
  	jmp paranoid_exit1
  	CFI_ENDPROC
-END(int3)
-	.previous .text
+KPROBE_END(int3)
 
 ENTRY(overflow)
 	zeroentry do_overflow
@@ -1121,8 +1117,7 @@ END(stack_segment)
 
 KPROBE_ENTRY(general_protection)
 	errorentry do_general_protection
-END(general_protection)
-	.previous .text
+KPROBE_END(general_protection)
 
 ENTRY(alignment_check)
 	errorentry do_alignment_check
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 932021f872d5..6c9873f88287 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -35,9 +35,13 @@
 #endif
 
 #define KPROBE_ENTRY(name) \
-  .section .kprobes.text, "ax"; \
+  .pushsection .kprobes.text, "ax"; \
   ENTRY(name)
 
+#define KPROBE_END(name) \
+  END(name);		 \
+  .popsection
+
 #ifndef END
 #define END(name) \
   .size name, .-name
-- 
cgit v1.2.3


From e07e23e1fd3000289fc7ccc6c71879070d3b19e0 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Tue, 26 Sep 2006 10:52:36 +0200
Subject: [PATCH] non lazy "sleazy" fpu implementation

Right now the kernel on x86-64 has a 100% lazy fpu behavior: after *every*
context switch a trap is taken for the first FPU use to restore the FPU
context lazily.  This is of course great for applications that have very
sporadic or no FPU use (since then you avoid doing the expensive
save/restore all the time).  However for very frequent FPU users...  you
take an extra trap every context switch.

The patch below adds a simple heuristic to this code: After 5 consecutive
context switches of FPU use, the lazy behavior is disabled and the context
gets restored every context switch.  If the app indeed uses the FPU, the
trap is avoided.  (the chance of the 6th time slice using FPU after the
previous 5 having done so are quite high obviously).

After 256 switches, this is reset and lazy behavior is returned (until
there are 5 consecutive ones again).  The reason for this is to give apps
that do longer bursts of FPU use still the lazy behavior back after some
time.

[akpm@osdl.org: place new task_struct field next to jit_keyring to save space]
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
 arch/x86_64/kernel/process.c | 10 ++++++++++
 arch/x86_64/kernel/traps.c   |  1 +
 include/asm-x86_64/i387.h    |  5 ++++-
 include/linux/sched.h        |  9 +++++++++
 4 files changed, 24 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 6fbd19564e4e..9e9a70e50c72 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -552,6 +552,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	int cpu = smp_processor_id();  
 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
 
+	/* we're going to use this soon, after a few expensive things */
+	if (next_p->fpu_counter>5)
+		prefetch(&next->i387.fxsave);
+
 	/*
 	 * Reload esp0, LDT and the page table pointer:
 	 */
@@ -629,6 +633,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	    || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))
 		__switch_to_xtra(prev_p, next_p, tss);
 
+	/* If the task has used fpu the last 5 timeslices, just do a full
+	 * restore of the math state immediately to avoid the trap; the
+	 * chances of needing FPU soon are obviously high now
+	 */
+	if (next_p->fpu_counter>5)
+		math_state_restore();
 	return prev_p;
 }
 
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 28e53342f294..ffc40cff1e07 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -1136,6 +1136,7 @@ asmlinkage void math_state_restore(void)
 		init_fpu(me);
 	restore_fpu_checking(&me->thread.i387.fxsave);
 	task_thread_info(me)->status |= TS_USEDFPU;
+	me->fpu_counter++;
 }
 
 void __init trap_init(void)
diff --git a/include/asm-x86_64/i387.h b/include/asm-x86_64/i387.h
index cba8a3b0cded..60c0f4853fdb 100644
--- a/include/asm-x86_64/i387.h
+++ b/include/asm-x86_64/i387.h
@@ -24,6 +24,7 @@ extern unsigned int mxcsr_feature_mask;
 extern void mxcsr_feature_mask_init(void);
 extern void init_fpu(struct task_struct *child);
 extern int save_i387(struct _fpstate __user *buf);
+extern asmlinkage void math_state_restore(void);
 
 /*
  * FPU lazy state save handling...
@@ -31,7 +32,9 @@ extern int save_i387(struct _fpstate __user *buf);
 
 #define unlazy_fpu(tsk) do { \
 	if (task_thread_info(tsk)->status & TS_USEDFPU) \
-		save_init_fpu(tsk); \
+		save_init_fpu(tsk); 			\
+	else						\
+		tsk->fpu_counter = 0;			\
 } while (0)
 
 /* Ignore delayed exceptions from user space */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 34ed0d99b1bd..807556c5bcd2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -865,6 +865,15 @@ struct task_struct {
 	struct key *thread_keyring;	/* keyring private to this thread */
 	unsigned char jit_keyring;	/* default keyring to attach requested keys to */
 #endif
+	/*
+	 * fpu_counter contains the number of consecutive context switches
+	 * that the FPU is used. If this is over a threshold, the lazy fpu
+	 * saving becomes unlazy to save the trap. This is an unsigned char
+	 * so that after 256 times the counter wraps and the behavior turns
+	 * lazy again; this to deal with bursty apps that only use FPU for
+	 * a short time
+	 */
+	unsigned char fpu_counter;
 	int oomkilladj; /* OOM kill score adjustment (bit shift). */
 	char comm[TASK_COMM_LEN]; /* executable name excluding path
 				     - access with [gs]et_task_comm (which lock
-- 
cgit v1.2.3


From 1bb4996bcebca1cde49d964b4e012699ce180e61 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Tue, 26 Sep 2006 10:52:37 +0200
Subject: [PATCH] Move compiler check for modules to ia64 only

Apparently IA64 needs it, but i386/x86-64 don't anymore
since gcc 2.95 support was dropped.  Nobody else on linux-arch
requested keeping it generically

Cc: tony.luck@intel.com
Cc: kaos@sgi.com

Signed-off-by: Andi Kleen <ak@suse.de>
---
 include/asm-ia64/module.h | 3 ++-
 include/linux/vermagic.h  | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-ia64/module.h b/include/asm-ia64/module.h
index 85c82bd819f2..d2da61e4c49b 100644
--- a/include/asm-ia64/module.h
+++ b/include/asm-ia64/module.h
@@ -28,7 +28,8 @@ struct mod_arch_specific {
 #define Elf_Ehdr	Elf64_Ehdr
 
 #define MODULE_PROC_FAMILY	"ia64"
-#define MODULE_ARCH_VERMAGIC	MODULE_PROC_FAMILY
+#define MODULE_ARCH_VERMAGIC	MODULE_PROC_FAMILY \
+	"gcc-" __stringify(__GNUC__) "." __stringify(__GNUC_MINOR__)
 
 #define ARCH_SHF_SMALL	SHF_IA_64_SHORT
 
diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h
index 46919f9f5eb3..4d0909e53595 100644
--- a/include/linux/vermagic.h
+++ b/include/linux/vermagic.h
@@ -24,5 +24,5 @@
 #define VERMAGIC_STRING 						\
 	UTS_RELEASE " "							\
 	MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT 			\
-	MODULE_VERMAGIC_MODULE_UNLOAD MODULE_ARCH_VERMAGIC 		\
-	"gcc-" __stringify(__GNUC__) "." __stringify(__GNUC_MINOR__)
+	MODULE_VERMAGIC_MODULE_UNLOAD MODULE_ARCH_VERMAGIC
+
-- 
cgit v1.2.3


From 575400d1b483fbe9e03c68758059bfaf4e4768d1 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Tue, 26 Sep 2006 10:52:38 +0200
Subject: [PATCH] i386: Fix the EDD code misparsing the command line

The EDD code would scan the command line as a fixed array, without
taking account of either whitespace, null-termination, the old
command-line protocol, late overrides early, or the fact that the
command line may not be reachable from INITSEG.

This should fix those problems, and enable us to use a longer command
line.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/i386/boot/edd.S | 97 ++++++++++++++++++++++++++++++++++++++++------------
 include/linux/edd.h  |  1 +
 2 files changed, 76 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/arch/i386/boot/edd.S b/arch/i386/boot/edd.S
index 4b84ea216f2b..34321368011a 100644
--- a/arch/i386/boot/edd.S
+++ b/arch/i386/boot/edd.S
@@ -15,42 +15,95 @@
 #include <asm/setup.h>
 
 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
+
+# It is assumed that %ds == INITSEG here
+
 	movb	$0, (EDD_MBR_SIG_NR_BUF)
 	movb	$0, (EDDNR)
 
-# Check the command line for two options:
+# Check the command line for options:
 # edd=of  disables EDD completely  (edd=off)
 # edd=sk  skips the MBR test    (edd=skipmbr)
+# edd=on  re-enables EDD (edd=on)
+
 	pushl	%esi
-    	cmpl	$0, %cs:cmd_line_ptr
-	jz	done_cl
+	movw	$edd_mbr_sig_start, %di	# Default to edd=on
+
 	movl	%cs:(cmd_line_ptr), %esi
-# ds:esi has the pointer to the command line now
-	movl	$(COMMAND_LINE_SIZE-7), %ecx
-# loop through kernel command line one byte at a time
-cl_loop:
-	cmpl	$EDD_CL_EQUALS, (%si)
+	andl	%esi, %esi
+	jz	old_cl			# Old boot protocol?
+
+# Convert to a real-mode pointer in fs:si
+	movl	%esi, %eax
+	shrl	$4, %eax
+	movw	%ax, %fs
+	andw	$0xf, %si
+	jmp	have_cl_pointer
+
+# Old-style boot protocol?
+old_cl:
+	push	%ds			# aka INITSEG
+	pop	%fs
+
+	cmpw	$0xa33f, (0x20)
+	jne	done_cl			# No command line at all?
+	movw	(0x22), %si		# Pointer relative to INITSEG
+
+# fs:si has the pointer to the command line now
+have_cl_pointer:
+
+# Loop through kernel command line one byte at a time.  Just in
+# case the loader is buggy and failed to null-terminate the command line
+# terminate if we get close enough to the end of the segment that we
+# cannot fit "edd=XX"...
+cl_atspace:
+	cmpw	$-5, %si		# Watch for segment wraparound
+	jae	done_cl
+	movl	%fs:(%si), %eax
+	andb	%al, %al		# End of line?
+	jz	done_cl
+	cmpl	$EDD_CL_EQUALS, %eax
 	jz	found_edd_equals
-	incl	%esi
-	loop	cl_loop
-	jmp	done_cl
+	cmpb	$0x20, %al		# <= space consider whitespace
+	ja	cl_skipword
+	incw	%si
+	jmp	cl_atspace
+
+cl_skipword:
+	cmpw	$-5, %si		# Watch for segment wraparound
+	jae	done_cl
+	movb	%fs:(%si), %al		# End of string?
+	andb	%al, %al
+	jz	done_cl
+	cmpb	$0x20, %al
+	jbe	cl_atspace
+	incw	%si
+	jmp	cl_skipword
+
 found_edd_equals:
 # only looking at first two characters after equals
-    	addl	$4, %esi
-	cmpw	$EDD_CL_OFF, (%si)	# edd=of
-	jz	do_edd_off
-	cmpw	$EDD_CL_SKIP, (%si)	# edd=sk
-	jz	do_edd_skipmbr
-	jmp	done_cl
+# late overrides early on the command line, so keep going after finding something
+	movw	%fs:4(%si), %ax
+	cmpw	$EDD_CL_OFF, %ax	# edd=of
+	je	do_edd_off
+	cmpw	$EDD_CL_SKIP, %ax	# edd=sk
+	je	do_edd_skipmbr
+	cmpw	$EDD_CL_ON, %ax		# edd=on
+	je	do_edd_on
+	jmp	cl_skipword
 do_edd_skipmbr:
-    	popl	%esi
-	jmp	edd_start
+	movw	$edd_start, %di
+	jmp	cl_skipword
 do_edd_off:
-	popl	%esi
-	jmp	edd_done
+	movw	$edd_done, %di
+	jmp	cl_skipword
+do_edd_on:
+	movw	$edd_mbr_sig_start, %di
+	jmp	cl_skipword
+
 done_cl:
 	popl	%esi
-
+	jmpw	*%di
 
 # Read the first sector of each BIOS disk device and store the 4-byte signature
 edd_mbr_sig_start:
diff --git a/include/linux/edd.h b/include/linux/edd.h
index 162512b886f7..b2b3e68aa512 100644
--- a/include/linux/edd.h
+++ b/include/linux/edd.h
@@ -52,6 +52,7 @@
 #define EDD_CL_EQUALS   0x3d646465     /* "edd=" */
 #define EDD_CL_OFF      0x666f         /* "of" for off  */
 #define EDD_CL_SKIP     0x6b73         /* "sk" for skipmbr */
+#define EDD_CL_ON       0x6e6f	       /* "on" for on */
 
 #ifndef __ASSEMBLY__
 
-- 
cgit v1.2.3


From 0a4254058037eb172758961d0a5b94f4320a1425 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Tue, 26 Sep 2006 10:52:38 +0200
Subject: [PATCH] Add the canary field to the PDA area and the task struct

This patch adds the per thread cookie field to the task struct and the PDA.
Also it makes sure that the PDA value gets the new cookie value at context
switch, and that a new task gets a new cookie at task creation time.

Signed-off-by: Arjan van Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andi Kleen <ak@suse.de>
CC: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/process.c | 8 ++++++++
 include/asm-x86_64/pda.h     | 7 ++++++-
 include/linux/sched.h        | 5 +++++
 kernel/fork.c                | 5 +++++
 4 files changed, 24 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 9e9a70e50c72..fba8dfeda67c 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -625,6 +625,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	unlazy_fpu(prev_p);
 	write_pda(kernelstack,
 		  task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
+#ifdef CONFIG_CC_STACKPROTECTOR
+	write_pda(stack_canary, next_p->stack_canary);
+	/*
+	 * Build time only check to make sure the stack_canary is at
+	 * offset 40 in the pda; this is a gcc ABI requirement
+	 */
+	BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
+#endif
 
 	/*
 	 * Now maybe reload the debug registers and handle I/O bitmaps
diff --git a/include/asm-x86_64/pda.h b/include/asm-x86_64/pda.h
index 6794ffaae433..e7773e0af865 100644
--- a/include/asm-x86_64/pda.h
+++ b/include/asm-x86_64/pda.h
@@ -16,7 +16,12 @@ struct x8664_pda {
 	unsigned long oldrsp; 	    /* 24 user rsp for system call */
         int irqcount;		    /* 32 Irq nesting counter. Starts with -1 */
 	int cpunumber;		    /* 36 Logical CPU number */
-	char *irqstackptr;	/* 40 top of irqstack */
+#ifdef CONFIG_CC_STACKPROTECTOR
+	unsigned long stack_canary;	/* 40 stack canary value */
+					/* gcc-ABI: this canary MUST be at
+					   offset 40!!! */
+#endif
+	char *irqstackptr;
 	int nodenumber;		    /* number of current node */
 	unsigned int __softirq_pending;
 	unsigned int __nmi_count;	/* number of NMI on this CPUs */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 807556c5bcd2..9d4aa7f95bc8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -819,6 +819,11 @@ struct task_struct {
 	unsigned did_exec:1;
 	pid_t pid;
 	pid_t tgid;
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+	/* Canary value for the -fstack-protector gcc feature */
+	unsigned long stack_canary;
+#endif
 	/* 
 	 * pointers to (original) parent process, youngest child, younger sibling,
 	 * older sibling, respectively.  (p->father can be replaced with 
diff --git a/kernel/fork.c b/kernel/fork.c
index f9b014e3e700..a0dad84567c9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -45,6 +45,7 @@
 #include <linux/cn_proc.h>
 #include <linux/delayacct.h>
 #include <linux/taskstats_kern.h>
+#include <linux/random.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -175,6 +176,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 	tsk->thread_info = ti;
 	setup_thread_stack(tsk, orig);
 
+#ifdef CONFIG_CC_STACKPROTECTOR
+	tsk->stack_canary = get_random_int();
+#endif
+
 	/* One for us, one for whoever does the "release_task()" (usually parent) */
 	atomic_set(&tsk->usage,2);
 	atomic_set(&tsk->fs_excl, 0);
-- 
cgit v1.2.3


From 3b171672831b9633c2ed8fa94805255cd4d5af19 Mon Sep 17 00:00:00 2001
From: Dmitriy Zavin <dmitriyz@google.com>
Date: Tue, 26 Sep 2006 10:52:42 +0200
Subject: [PATCH] Add 64bit jiffies compares (for use with get_jiffies_64)

The current time_before/time_after macros will fail typechecks
when passed u64 values (as returned by get_jiffies_64()). On 64bit
systems, this will just result in a warning about mismatching types
without explicit casts, but since unsigned long and u64
(unsigned long long) are of same size, it will still work.
On 32bit systems, a long is 32bits, so the value from get_jiffies_64()
will be truncated by the cast and thus lose all the precision gained by
64bit jiffies.

Signed-off-by: Dmitriy Zavin <dmitriyz@google.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 include/linux/jiffies.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 329ebcffa106..c8d5f207c3d4 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -115,6 +115,21 @@ static inline u64 get_jiffies_64(void)
 	 ((long)(a) - (long)(b) >= 0))
 #define time_before_eq(a,b)	time_after_eq(b,a)
 
+/* Same as above, but does so with platform independent 64bit types.
+ * These must be used when utilizing jiffies_64 (i.e. return value of
+ * get_jiffies_64() */
+#define time_after64(a,b)	\
+	(typecheck(__u64, a) &&	\
+	 typecheck(__u64, b) && \
+	 ((__s64)(b) - (__s64)(a) < 0))
+#define time_before64(a,b)	time_after64(b,a)
+
+#define time_after_eq64(a,b)	\
+	(typecheck(__u64, a) && \
+	 typecheck(__u64, b) && \
+	 ((__s64)(a) - (__s64)(b) >= 0))
+#define time_before_eq64(a,b)	time_after_eq64(b,a)
+
 /*
  * Have the 32 bit jiffies value wrap 5 minutes after boot
  * so jiffies wrap bugs show up earlier.
-- 
cgit v1.2.3


From 632bbfeee4f042c05bc65150b4433a297d3fe387 Mon Sep 17 00:00:00 2001
From: Jan Blunck <jblunck@suse.de>
Date: Mon, 25 Sep 2006 23:30:53 -0700
Subject: [PATCH] trigger a syntax error if percpu macros are incorrectly used

get_cpu_var()/per_cpu()/__get_cpu_var() arguments must be simple
identifiers.  Otherwise the arch dependent implementations might break.

This patch enforces the correct usage of the macros by producing a syntax
error if the variable is not a simple identifier.

Signed-off-by: Jan Blunck <jblunck@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-generic/percpu.h |  4 +++-
 include/asm-s390/percpu.h    | 20 +++++++++++---------
 include/asm-x86_64/percpu.h  | 12 +++++++++---
 include/linux/percpu.h       | 10 ++++++++--
 4 files changed, 31 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index e160e04290fb..6d45ee5472af 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -14,7 +14,9 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
     __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
 
 /* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]))
+#define per_cpu(var, cpu) (*({				\
+	extern int simple_indentifier_##var(void);	\
+	RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); }))
 #define __get_cpu_var(var) per_cpu(var, smp_processor_id())
 #define __raw_get_cpu_var(var) per_cpu(var, raw_smp_processor_id())
 
diff --git a/include/asm-s390/percpu.h b/include/asm-s390/percpu.h
index 28b3517e787c..495ad99c7635 100644
--- a/include/asm-s390/percpu.h
+++ b/include/asm-s390/percpu.h
@@ -15,18 +15,20 @@
  */
 #if defined(__s390x__) && defined(MODULE)
 
-#define __reloc_hide(var,offset) \
-  (*({ unsigned long *__ptr; \
-       asm ( "larl %0,per_cpu__"#var"@GOTENT" \
-             : "=a" (__ptr) : "X" (per_cpu__##var) ); \
-       (typeof(&per_cpu__##var))((*__ptr) + (offset)); }))
+#define __reloc_hide(var,offset) (*({			\
+	extern int simple_indentifier_##var(void);	\
+	unsigned long *__ptr;				\
+	asm ( "larl %0,per_cpu__"#var"@GOTENT"		\
+	    : "=a" (__ptr) : "X" (per_cpu__##var) );	\
+	(typeof(&per_cpu__##var))((*__ptr) + (offset));	}))
 
 #else
 
-#define __reloc_hide(var, offset) \
-  (*({ unsigned long __ptr; \
-       asm ( "" : "=a" (__ptr) : "0" (&per_cpu__##var) ); \
-       (typeof(&per_cpu__##var)) (__ptr + (offset)); }))
+#define __reloc_hide(var, offset) (*({				\
+	extern int simple_indentifier_##var(void);		\
+	unsigned long __ptr;					\
+	asm ( "" : "=a" (__ptr) : "0" (&per_cpu__##var) );	\
+	(typeof(&per_cpu__##var)) (__ptr + (offset)); }))
 
 #endif
 
diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h
index 08dd9f9dda81..bffb2f886a51 100644
--- a/include/asm-x86_64/percpu.h
+++ b/include/asm-x86_64/percpu.h
@@ -21,9 +21,15 @@
     __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
 
 /* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)))
-#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()))
-#define __raw_get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()))
+#define per_cpu(var, cpu) (*({				\
+	extern int simple_indentifier_##var(void);	\
+	RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)); }))
+#define __get_cpu_var(var) (*({				\
+	extern int simple_indentifier_##var(void);	\
+	RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); }))
+#define __raw_get_cpu_var(var) (*({			\
+	extern int simple_indentifier_##var(void);	\
+	RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); }))
 
 /* A macro to avoid #include hell... */
 #define percpu_modcopy(pcpudst, src, size)			\
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index cb9039a21f2a..f926490a7d8b 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -11,8 +11,14 @@
 #define PERCPU_ENOUGH_ROOM 32768
 #endif
 
-/* Must be an lvalue. */
-#define get_cpu_var(var) (*({ preempt_disable(); &__get_cpu_var(var); }))
+/*
+ * Must be an lvalue. Since @var must be a simple identifier,
+ * we force a syntax error here if it isn't.
+ */
+#define get_cpu_var(var) (*({				\
+	extern int simple_indentifier_##var(void);	\
+	preempt_disable();				\
+	&__get_cpu_var(var); }))
 #define put_cpu_var(var) preempt_enable()
 
 #ifdef CONFIG_SMP
-- 
cgit v1.2.3


From a6ca1b99ed434f3fb41bbed647ed36c0420501e5 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@SteelEye.com>
Date: Mon, 25 Sep 2006 23:30:55 -0700
Subject: [PATCH] update to the kernel kmap/kunmap API

Give non-highmem architectures access to the kmap API for the purposes of
overriding (this is what the attached patch does).

The proposal is that we should now require all architectures with coherence
issues to manage data coherence via the kmap/kunmap API.  Thus driver
writers never have to write code like

    kmap(page)
    modify data in page
    flush_kernel_dcache_page(page)
    kunmap(page)

instead, kmap/kunmap will manage the coherence and driver (and filesystem)
writers don't need to worry about how to flush between kmap and kunmap.

For most architectures, the page only needs to be flushed if it was
actually written to *and* there are user mappings of it, so the best
implementation looks to be: clear the page dirty pte bit in the kernel page
tables on kmap and on kunmap, check page->mappings for user maps, and then
the dirty bit, and only flush if it both has user mappings and is dirty.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/highmem.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 85ce7ef9a512..42620e723abb 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -29,6 +29,7 @@ unsigned int nr_free_highpages(void);
 
 static inline unsigned int nr_free_highpages(void) { return 0; }
 
+#ifndef ARCH_HAS_KMAP
 static inline void *kmap(struct page *page)
 {
 	might_sleep();
@@ -41,6 +42,7 @@ static inline void *kmap(struct page *page)
 #define kunmap_atomic(addr, idx)	do { } while (0)
 #define kmap_atomic_pfn(pfn, idx)	page_address(pfn_to_page(pfn))
 #define kmap_atomic_to_page(ptr)	virt_to_page(ptr)
+#endif
 
 #endif /* CONFIG_HIGHMEM */
 
-- 
cgit v1.2.3


From 725d704ecaca4a43f067092c140d4f3271cf2856 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Mon, 25 Sep 2006 23:30:55 -0700
Subject: [PATCH] mm: VM_BUG_ON

Introduce a VM_BUG_ON, which is turned on with CONFIG_DEBUG_VM.  Use this
in the lightweight, inline refcounting functions; PageLRU and PageActive
checks in vmscan, because they're pretty well confined to vmscan.  And in
page allocate/free fastpaths which can be the hottest parts of the kernel
for kbuilds.

Unlike BUG_ON, VM_BUG_ON must not be used to execute statements with
side-effects, and should not be used outside core mm code.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Christoph Lameter <clameter@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h | 10 +++++++++-
 mm/internal.h      |  4 ++--
 mm/page_alloc.c    | 23 +++++++++++------------
 mm/swap.c          | 12 ++++++------
 mm/vmscan.c        | 16 ++++++++--------
 5 files changed, 36 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 224178a000d2..7d20b25c58fc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -278,6 +278,12 @@ struct page {
  */
 #include <linux/page-flags.h>
 
+#ifdef CONFIG_DEBUG_VM
+#define VM_BUG_ON(cond) BUG_ON(cond)
+#else
+#define VM_BUG_ON(condition) do { } while(0)
+#endif
+
 /*
  * Methods to modify the page usage count.
  *
@@ -297,7 +303,7 @@ struct page {
  */
 static inline int put_page_testzero(struct page *page)
 {
-	BUG_ON(atomic_read(&page->_count) == 0);
+	VM_BUG_ON(atomic_read(&page->_count) == 0);
 	return atomic_dec_and_test(&page->_count);
 }
 
@@ -307,6 +313,7 @@ static inline int put_page_testzero(struct page *page)
  */
 static inline int get_page_unless_zero(struct page *page)
 {
+	VM_BUG_ON(PageCompound(page));
 	return atomic_inc_not_zero(&page->_count);
 }
 
@@ -323,6 +330,7 @@ static inline void get_page(struct page *page)
 {
 	if (unlikely(PageCompound(page)))
 		page = (struct page *)page_private(page);
+	VM_BUG_ON(atomic_read(&page->_count) == 0);
 	atomic_inc(&page->_count);
 }
 
diff --git a/mm/internal.h b/mm/internal.h
index d20e3cc4aef0..d527b80b292f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -24,8 +24,8 @@ static inline void set_page_count(struct page *page, int v)
  */
 static inline void set_page_refcounted(struct page *page)
 {
-	BUG_ON(PageCompound(page) && page_private(page) != (unsigned long)page);
-	BUG_ON(atomic_read(&page->_count));
+	VM_BUG_ON(PageCompound(page) && page_private(page) != (unsigned long)page);
+	VM_BUG_ON(atomic_read(&page->_count));
 	set_page_count(page, 1);
 }
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8a52ba9fe693..4b33878e9488 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -127,7 +127,6 @@ static int bad_range(struct zone *zone, struct page *page)
 
 	return 0;
 }
-
 #else
 static inline int bad_range(struct zone *zone, struct page *page)
 {
@@ -218,12 +217,12 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
 {
 	int i;
 
-	BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM);
+	VM_BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM);
 	/*
 	 * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO
 	 * and __GFP_HIGHMEM from hard or soft interrupt context.
 	 */
-	BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt());
+	VM_BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt());
 	for (i = 0; i < (1 << order); i++)
 		clear_highpage(page + i);
 }
@@ -347,8 +346,8 @@ static inline void __free_one_page(struct page *page,
 
 	page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
 
-	BUG_ON(page_idx & (order_size - 1));
-	BUG_ON(bad_range(zone, page));
+	VM_BUG_ON(page_idx & (order_size - 1));
+	VM_BUG_ON(bad_range(zone, page));
 
 	zone->free_pages += order_size;
 	while (order < MAX_ORDER-1) {
@@ -421,7 +420,7 @@ static void free_pages_bulk(struct zone *zone, int count,
 	while (count--) {
 		struct page *page;
 
-		BUG_ON(list_empty(list));
+		VM_BUG_ON(list_empty(list));
 		page = list_entry(list->prev, struct page, lru);
 		/* have to delete it as __free_one_page list manipulates */
 		list_del(&page->lru);
@@ -512,7 +511,7 @@ static inline void expand(struct zone *zone, struct page *page,
 		area--;
 		high--;
 		size >>= 1;
-		BUG_ON(bad_range(zone, &page[size]));
+		VM_BUG_ON(bad_range(zone, &page[size]));
 		list_add(&page[size].lru, &area->free_list);
 		area->nr_free++;
 		set_page_order(&page[size], high);
@@ -761,8 +760,8 @@ void split_page(struct page *page, unsigned int order)
 {
 	int i;
 
-	BUG_ON(PageCompound(page));
-	BUG_ON(!page_count(page));
+	VM_BUG_ON(PageCompound(page));
+	VM_BUG_ON(!page_count(page));
 	for (i = 1; i < (1 << order); i++)
 		set_page_refcounted(page + i);
 }
@@ -809,7 +808,7 @@ again:
 	local_irq_restore(flags);
 	put_cpu();
 
-	BUG_ON(bad_range(zone, page));
+	VM_BUG_ON(bad_range(zone, page));
 	if (prep_new_page(page, order, gfp_flags))
 		goto again;
 	return page;
@@ -1083,7 +1082,7 @@ fastcall unsigned long get_zeroed_page(gfp_t gfp_mask)
 	 * get_zeroed_page() returns a 32-bit address, which cannot represent
 	 * a highmem page
 	 */
-	BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
+	VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
 
 	page = alloc_pages(gfp_mask | __GFP_ZERO, 0);
 	if (page)
@@ -1116,7 +1115,7 @@ EXPORT_SYMBOL(__free_pages);
 fastcall void free_pages(unsigned long addr, unsigned int order)
 {
 	if (addr != 0) {
-		BUG_ON(!virt_addr_valid((void *)addr));
+		VM_BUG_ON(!virt_addr_valid((void *)addr));
 		__free_pages(virt_to_page((void *)addr), order);
 	}
 }
diff --git a/mm/swap.c b/mm/swap.c
index 687686a61f7c..600235e43704 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -233,7 +233,7 @@ void fastcall __page_cache_release(struct page *page)
 		struct zone *zone = page_zone(page);
 
 		spin_lock_irqsave(&zone->lru_lock, flags);
-		BUG_ON(!PageLRU(page));
+		VM_BUG_ON(!PageLRU(page));
 		__ClearPageLRU(page);
 		del_page_from_lru(zone, page);
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
@@ -284,7 +284,7 @@ void release_pages(struct page **pages, int nr, int cold)
 				zone = pagezone;
 				spin_lock_irq(&zone->lru_lock);
 			}
-			BUG_ON(!PageLRU(page));
+			VM_BUG_ON(!PageLRU(page));
 			__ClearPageLRU(page);
 			del_page_from_lru(zone, page);
 		}
@@ -337,7 +337,7 @@ void __pagevec_release_nonlru(struct pagevec *pvec)
 	for (i = 0; i < pagevec_count(pvec); i++) {
 		struct page *page = pvec->pages[i];
 
-		BUG_ON(PageLRU(page));
+		VM_BUG_ON(PageLRU(page));
 		if (put_page_testzero(page))
 			pagevec_add(&pages_to_free, page);
 	}
@@ -364,7 +364,7 @@ void __pagevec_lru_add(struct pagevec *pvec)
 			zone = pagezone;
 			spin_lock_irq(&zone->lru_lock);
 		}
-		BUG_ON(PageLRU(page));
+		VM_BUG_ON(PageLRU(page));
 		SetPageLRU(page);
 		add_page_to_inactive_list(zone, page);
 	}
@@ -391,9 +391,9 @@ void __pagevec_lru_add_active(struct pagevec *pvec)
 			zone = pagezone;
 			spin_lock_irq(&zone->lru_lock);
 		}
-		BUG_ON(PageLRU(page));
+		VM_BUG_ON(PageLRU(page));
 		SetPageLRU(page);
-		BUG_ON(PageActive(page));
+		VM_BUG_ON(PageActive(page));
 		SetPageActive(page);
 		add_page_to_active_list(zone, page);
 	}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5d4c4d02254d..41a3da3d6ccc 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -440,7 +440,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		if (TestSetPageLocked(page))
 			goto keep;
 
-		BUG_ON(PageActive(page));
+		VM_BUG_ON(PageActive(page));
 
 		sc->nr_scanned++;
 
@@ -564,7 +564,7 @@ keep_locked:
 		unlock_page(page);
 keep:
 		list_add(&page->lru, &ret_pages);
-		BUG_ON(PageLRU(page));
+		VM_BUG_ON(PageLRU(page));
 	}
 	list_splice(&ret_pages, page_list);
 	if (pagevec_count(&freed_pvec))
@@ -603,7 +603,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 		page = lru_to_page(src);
 		prefetchw_prev_lru_page(page, src, flags);
 
-		BUG_ON(!PageLRU(page));
+		VM_BUG_ON(!PageLRU(page));
 
 		list_del(&page->lru);
 		target = src;
@@ -674,7 +674,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 		 */
 		while (!list_empty(&page_list)) {
 			page = lru_to_page(&page_list);
-			BUG_ON(PageLRU(page));
+			VM_BUG_ON(PageLRU(page));
 			SetPageLRU(page);
 			list_del(&page->lru);
 			if (PageActive(page))
@@ -797,9 +797,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 	while (!list_empty(&l_inactive)) {
 		page = lru_to_page(&l_inactive);
 		prefetchw_prev_lru_page(page, &l_inactive, flags);
-		BUG_ON(PageLRU(page));
+		VM_BUG_ON(PageLRU(page));
 		SetPageLRU(page);
-		BUG_ON(!PageActive(page));
+		VM_BUG_ON(!PageActive(page));
 		ClearPageActive(page);
 
 		list_move(&page->lru, &zone->inactive_list);
@@ -827,9 +827,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 	while (!list_empty(&l_active)) {
 		page = lru_to_page(&l_active);
 		prefetchw_prev_lru_page(page, &l_active, flags);
-		BUG_ON(PageLRU(page));
+		VM_BUG_ON(PageLRU(page));
 		SetPageLRU(page);
-		BUG_ON(!PageActive(page));
+		VM_BUG_ON(!PageActive(page));
 		list_move(&page->lru, &zone->active_list);
 		pgmoved++;
 		if (!pagevec_add(&pvec, page)) {
-- 
cgit v1.2.3


From d08b3851da41d0ee60851f2c75b118e1f7a5fc89 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 25 Sep 2006 23:30:57 -0700
Subject: [PATCH] mm: tracking shared dirty pages

Tracking of dirty pages in shared writeable mmap()s.

The idea is simple: write protect clean shared writeable pages, catch the
write-fault, make writeable and set dirty.  On page write-back clean all the
PTE dirty bits and write protect them once again.

The implementation is a tad harder, mainly because the default
backing_dev_info capabilities were too loosely maintained.  Hence it is not
enough to test the backing_dev_info for cap_account_dirty.

The current heuristic is as follows, a VMA is eligible when:
 - its shared writeable
    (vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED)
 - it is not a 'special' mapping
    (vm_flags & (VM_PFNMAP|VM_INSERTPAGE)) == 0
 - the backing_dev_info is cap_account_dirty
    mapping_cap_account_dirty(vma->vm_file->f_mapping)
 - f_op->mmap() didn't change the default page protection

Page from remap_pfn_range() are explicitly excluded because their COW
semantics are already horrid enough (see vm_normal_page() in do_wp_page()) and
because they don't have a backing store anyway.

mprotect() is taught about the new behaviour as well.  However it overrides
the last condition.

Cleaning the pages on write-back is done with page_mkclean() a new rmap call.
It can be called on any page, but is currently only implemented for mapped
pages, if the page is found the be of a VMA that accounts dirty pages it will
also wrprotect the PTE.

Finally, in fs/buffers.c:try_to_free_buffers(); remove clear_page_dirty() from
under ->private_lock.  This seems to be safe, since ->private_lock is used to
serialize access to the buffers, not the page itself.  This is needed because
clear_page_dirty() will call into page_mkclean() and would thereby violate
locking order.

[dhowells@redhat.com: Provide a page_mkclean() implementation for NOMMU]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c          |  2 +-
 include/linux/mm.h   | 34 +++++++++++++++++++++++++++
 include/linux/rmap.h | 14 +++++++++++
 mm/memory.c          | 29 ++++++++++++++++++-----
 mm/mmap.c            | 10 ++++----
 mm/mprotect.c        | 21 +++++++----------
 mm/page-writeback.c  | 17 ++++++++++----
 mm/rmap.c            | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 162 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 71649ef9b658..3b6d701073e7 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2987,6 +2987,7 @@ int try_to_free_buffers(struct page *page)
 
 	spin_lock(&mapping->private_lock);
 	ret = drop_buffers(page, &buffers_to_free);
+	spin_unlock(&mapping->private_lock);
 	if (ret) {
 		/*
 		 * If the filesystem writes its buffers by hand (eg ext3)
@@ -2998,7 +2999,6 @@ int try_to_free_buffers(struct page *page)
 		 */
 		clear_page_dirty(page);
 	}
-	spin_unlock(&mapping->private_lock);
 out:
 	if (buffers_to_free) {
 		struct buffer_head *bh = buffers_to_free;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7d20b25c58fc..449841413cf1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -15,6 +15,7 @@
 #include <linux/fs.h>
 #include <linux/mutex.h>
 #include <linux/debug_locks.h>
+#include <linux/backing-dev.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -810,6 +811,39 @@ struct shrinker;
 extern struct shrinker *set_shrinker(int, shrinker_t);
 extern void remove_shrinker(struct shrinker *shrinker);
 
+/*
+ * Some shared mappigns will want the pages marked read-only
+ * to track write events. If so, we'll downgrade vm_page_prot
+ * to the private version (using protection_map[] without the
+ * VM_SHARED bit).
+ */
+static inline int vma_wants_writenotify(struct vm_area_struct *vma)
+{
+	unsigned int vm_flags = vma->vm_flags;
+
+	/* If it was private or non-writable, the write bit is already clear */
+	if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
+		return 0;
+
+	/* The backer wishes to know when pages are first written to? */
+	if (vma->vm_ops && vma->vm_ops->page_mkwrite)
+		return 1;
+
+	/* The open routine did something to the protections already? */
+	if (pgprot_val(vma->vm_page_prot) !=
+	    pgprot_val(protection_map[vm_flags &
+		    (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]))
+		return 0;
+
+	/* Specialty mapping? */
+	if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE))
+		return 0;
+
+	/* Can the mapping track the dirty pages? */
+	return vma->vm_file && vma->vm_file->f_mapping &&
+		mapping_cap_account_dirty(vma->vm_file->f_mapping);
+}
+
 extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl));
 
 int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bf97b0900014..db2c1df4fef9 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -103,6 +103,14 @@ pte_t *page_check_address(struct page *, struct mm_struct *,
  */
 unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
 
+/*
+ * Cleans the PTEs of shared mappings.
+ * (and since clean PTEs should also be readonly, write protects them too)
+ *
+ * returns the number of cleaned PTEs.
+ */
+int page_mkclean(struct page *);
+
 #else	/* !CONFIG_MMU */
 
 #define anon_vma_init()		do {} while (0)
@@ -112,6 +120,12 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
 #define page_referenced(page,l) TestClearPageReferenced(page)
 #define try_to_unmap(page, refs) SWAP_FAIL
 
+static inline int page_mkclean(struct page *page)
+{
+	return 0;
+}
+
+
 #endif	/* CONFIG_MMU */
 
 /*
diff --git a/mm/memory.c b/mm/memory.c
index 109e9866237e..fa941b169071 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1458,14 +1458,19 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 {
 	struct page *old_page, *new_page;
 	pte_t entry;
-	int reuse, ret = VM_FAULT_MINOR;
+	int reuse = 0, ret = VM_FAULT_MINOR;
+	struct page *dirty_page = NULL;
 
 	old_page = vm_normal_page(vma, address, orig_pte);
 	if (!old_page)
 		goto gotten;
 
-	if (unlikely((vma->vm_flags & (VM_SHARED|VM_WRITE)) ==
-				(VM_SHARED|VM_WRITE))) {
+	/*
+	 * Only catch write-faults on shared writable pages, read-only
+	 * shared pages can get COWed by get_user_pages(.write=1, .force=1).
+	 */
+	if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
+					(VM_WRITE|VM_SHARED))) {
 		if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
 			/*
 			 * Notify the address space that the page is about to
@@ -1494,13 +1499,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			if (!pte_same(*page_table, orig_pte))
 				goto unlock;
 		}
-
+		dirty_page = old_page;
+		get_page(dirty_page);
 		reuse = 1;
 	} else if (PageAnon(old_page) && !TestSetPageLocked(old_page)) {
 		reuse = can_share_swap_page(old_page);
 		unlock_page(old_page);
-	} else {
-		reuse = 0;
 	}
 
 	if (reuse) {
@@ -1566,6 +1570,10 @@ gotten:
 		page_cache_release(old_page);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
+	if (dirty_page) {
+		set_page_dirty(dirty_page);
+		put_page(dirty_page);
+	}
 	return ret;
 oom:
 	if (old_page)
@@ -2098,6 +2106,7 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	unsigned int sequence = 0;
 	int ret = VM_FAULT_MINOR;
 	int anon = 0;
+	struct page *dirty_page = NULL;
 
 	pte_unmap(page_table);
 	BUG_ON(vma->vm_flags & VM_PFNMAP);
@@ -2192,6 +2201,10 @@ retry:
 		} else {
 			inc_mm_counter(mm, file_rss);
 			page_add_file_rmap(new_page);
+			if (write_access) {
+				dirty_page = new_page;
+				get_page(dirty_page);
+			}
 		}
 	} else {
 		/* One of our sibling threads was faster, back out. */
@@ -2204,6 +2217,10 @@ retry:
 	lazy_mmu_prot_update(entry);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
+	if (dirty_page) {
+		set_page_dirty(dirty_page);
+		put_page(dirty_page);
+	}
 	return ret;
 oom:
 	page_cache_release(new_page);
diff --git a/mm/mmap.c b/mm/mmap.c
index d799d896d74a..8507ee9cd573 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1105,12 +1105,6 @@ munmap_back:
 			goto free_vma;
 	}
 
-	/* Don't make the VMA automatically writable if it's shared, but the
-	 * backer wishes to know when pages are first written to */
-	if (vma->vm_ops && vma->vm_ops->page_mkwrite)
-		vma->vm_page_prot =
-			protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC)];
-
 	/* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform
 	 * shmem_zero_setup (perhaps called through /dev/zero's ->mmap)
 	 * that memory reservation must be checked; but that reservation
@@ -1128,6 +1122,10 @@ munmap_back:
 	pgoff = vma->vm_pgoff;
 	vm_flags = vma->vm_flags;
 
+	if (vma_wants_writenotify(vma))
+		vma->vm_page_prot =
+			protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC)];
+
 	if (!file || !vma_merge(mm, prev, addr, vma->vm_end,
 			vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) {
 		file = vma->vm_file;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 638edabaff71..367b7f6c0637 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -123,8 +123,6 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
 	unsigned long oldflags = vma->vm_flags;
 	long nrpages = (end - start) >> PAGE_SHIFT;
 	unsigned long charged = 0;
-	unsigned int mask;
-	pgprot_t newprot;
 	pgoff_t pgoff;
 	int error;
 
@@ -176,24 +174,21 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
 	}
 
 success:
-	/* Don't make the VMA automatically writable if it's shared, but the
-	 * backer wishes to know when pages are first written to */
-	mask = VM_READ|VM_WRITE|VM_EXEC|VM_SHARED;
-	if (vma->vm_ops && vma->vm_ops->page_mkwrite)
-		mask &= ~VM_SHARED;
-
-	newprot = protection_map[newflags & mask];
-
 	/*
 	 * vm_flags and vm_page_prot are protected by the mmap_sem
 	 * held in write mode.
 	 */
 	vma->vm_flags = newflags;
-	vma->vm_page_prot = newprot;
+	vma->vm_page_prot = protection_map[newflags &
+		(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+	if (vma_wants_writenotify(vma))
+		vma->vm_page_prot = protection_map[newflags &
+			(VM_READ|VM_WRITE|VM_EXEC)];
+
 	if (is_vm_hugetlb_page(vma))
-		hugetlb_change_protection(vma, start, end, newprot);
+		hugetlb_change_protection(vma, start, end, vma->vm_page_prot);
 	else
-		change_protection(vma, start, end, newprot);
+		change_protection(vma, start, end, vma->vm_page_prot);
 	vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
 	vm_stat_account(mm, newflags, vma->vm_file, nrpages);
 	return 0;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 77a0bc4e261a..1c87430b7a25 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -23,6 +23,7 @@
 #include <linux/backing-dev.h>
 #include <linux/blkdev.h>
 #include <linux/mpage.h>
+#include <linux/rmap.h>
 #include <linux/percpu.h>
 #include <linux/notifier.h>
 #include <linux/smp.h>
@@ -550,7 +551,7 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
 		return 0;
 	wbc->for_writepages = 1;
 	if (mapping->a_ops->writepages)
-		ret =  mapping->a_ops->writepages(mapping, wbc);
+		ret = mapping->a_ops->writepages(mapping, wbc);
 	else
 		ret = generic_writepages(mapping, wbc);
 	wbc->for_writepages = 0;
@@ -712,9 +713,15 @@ int test_clear_page_dirty(struct page *page)
 			radix_tree_tag_clear(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_DIRTY);
-			if (mapping_cap_account_dirty(mapping))
-				__dec_zone_page_state(page, NR_FILE_DIRTY);
 			write_unlock_irqrestore(&mapping->tree_lock, flags);
+			/*
+			 * We can continue to use `mapping' here because the
+			 * page is locked, which pins the address_space
+			 */
+			if (mapping_cap_account_dirty(mapping)) {
+				page_mkclean(page);
+				dec_zone_page_state(page, NR_FILE_DIRTY);
+			}
 			return 1;
 		}
 		write_unlock_irqrestore(&mapping->tree_lock, flags);
@@ -744,8 +751,10 @@ int clear_page_dirty_for_io(struct page *page)
 
 	if (mapping) {
 		if (TestClearPageDirty(page)) {
-			if (mapping_cap_account_dirty(mapping))
+			if (mapping_cap_account_dirty(mapping)) {
+				page_mkclean(page);
 				dec_zone_page_state(page, NR_FILE_DIRTY);
+			}
 			return 1;
 		}
 		return 0;
diff --git a/mm/rmap.c b/mm/rmap.c
index 40158b59729e..e2155d791d99 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -434,6 +434,71 @@ int page_referenced(struct page *page, int is_locked)
 	return referenced;
 }
 
+static int page_mkclean_one(struct page *page, struct vm_area_struct *vma)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long address;
+	pte_t *pte, entry;
+	spinlock_t *ptl;
+	int ret = 0;
+
+	address = vma_address(page, vma);
+	if (address == -EFAULT)
+		goto out;
+
+	pte = page_check_address(page, mm, address, &ptl);
+	if (!pte)
+		goto out;
+
+	if (!pte_dirty(*pte) && !pte_write(*pte))
+		goto unlock;
+
+	entry = ptep_get_and_clear(mm, address, pte);
+	entry = pte_mkclean(entry);
+	entry = pte_wrprotect(entry);
+	ptep_establish(vma, address, pte, entry);
+	lazy_mmu_prot_update(entry);
+	ret = 1;
+
+unlock:
+	pte_unmap_unlock(pte, ptl);
+out:
+	return ret;
+}
+
+static int page_mkclean_file(struct address_space *mapping, struct page *page)
+{
+	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	struct vm_area_struct *vma;
+	struct prio_tree_iter iter;
+	int ret = 0;
+
+	BUG_ON(PageAnon(page));
+
+	spin_lock(&mapping->i_mmap_lock);
+	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+		if (vma->vm_flags & VM_SHARED)
+			ret += page_mkclean_one(page, vma);
+	}
+	spin_unlock(&mapping->i_mmap_lock);
+	return ret;
+}
+
+int page_mkclean(struct page *page)
+{
+	int ret = 0;
+
+	BUG_ON(!PageLocked(page));
+
+	if (page_mapped(page)) {
+		struct address_space *mapping = page_mapping(page);
+		if (mapping)
+			ret = page_mkclean_file(mapping, page);
+	}
+
+	return ret;
+}
+
 /**
  * page_set_anon_rmap - setup new anonymous rmap
  * @page:	the page to add the mapping to
-- 
cgit v1.2.3


From edc79b2a46ed854595e40edcf3f8b37f9f14aa3f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 25 Sep 2006 23:30:58 -0700
Subject: [PATCH] mm: balance dirty pages

Now that we can detect writers of shared mappings, throttle them.  Avoids OOM
by surprise.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/writeback.h |  1 +
 mm/memory.c               |  5 +++--
 mm/page-writeback.c       | 10 ++++++++++
 3 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 0422036af4eb..56a23a0e7f2e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -116,6 +116,7 @@ int sync_page_range(struct inode *inode, struct address_space *mapping,
 			loff_t pos, loff_t count);
 int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
 			   loff_t pos, loff_t count);
+void set_page_dirty_balance(struct page *page);
 
 /* pdflush.c */
 extern int nr_pdflush_threads;	/* Global so it can be exported to sysctl
diff --git a/mm/memory.c b/mm/memory.c
index fa941b169071..dd7d7fc5ed60 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -49,6 +49,7 @@
 #include <linux/module.h>
 #include <linux/delayacct.h>
 #include <linux/init.h>
+#include <linux/writeback.h>
 
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
@@ -1571,7 +1572,7 @@ gotten:
 unlock:
 	pte_unmap_unlock(page_table, ptl);
 	if (dirty_page) {
-		set_page_dirty(dirty_page);
+		set_page_dirty_balance(dirty_page);
 		put_page(dirty_page);
 	}
 	return ret;
@@ -2218,7 +2219,7 @@ retry:
 unlock:
 	pte_unmap_unlock(page_table, ptl);
 	if (dirty_page) {
-		set_page_dirty(dirty_page);
+		set_page_dirty_balance(dirty_page);
 		put_page(dirty_page);
 	}
 	return ret;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 1c87430b7a25..b9f4c6f1be86 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -244,6 +244,16 @@ static void balance_dirty_pages(struct address_space *mapping)
 		pdflush_operation(background_writeout, 0);
 }
 
+void set_page_dirty_balance(struct page *page)
+{
+	if (set_page_dirty(page)) {
+		struct address_space *mapping = page_mapping(page);
+
+		if (mapping)
+			balance_dirty_pages_ratelimited(mapping);
+	}
+}
+
 /**
  * balance_dirty_pages_ratelimited_nr - balance dirty memory state
  * @mapping: address_space which was dirtied
-- 
cgit v1.2.3


From b221385bc41d6789edde3d2fa0cb20d5045730eb Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 25 Sep 2006 23:31:02 -0700
Subject: [PATCH] mm/: make functions static

This patch makes the following needlessly global functions static:
 - slab.c: kmem_find_general_cachep()
 - swap.c: __page_cache_release()
 - vmalloc.c: __vmalloc_node()

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h      |  2 --
 include/linux/slab.h    |  2 --
 include/linux/vmalloc.h |  2 --
 mm/slab.c               |  3 +--
 mm/swap.c               | 39 +++++++++++++++++++--------------------
 mm/vmalloc.c            |  8 +++++---
 6 files changed, 25 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 449841413cf1..45678b036955 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -318,8 +318,6 @@ static inline int get_page_unless_zero(struct page *page)
 	return atomic_inc_not_zero(&page->_count);
 }
 
-extern void FASTCALL(__page_cache_release(struct page *));
-
 static inline int page_count(struct page *page)
 {
 	if (unlikely(PageCompound(page)))
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 45ad55b70d1c..193c03c547ec 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -67,7 +67,6 @@ extern void *kmem_cache_zalloc(struct kmem_cache *, gfp_t);
 extern void kmem_cache_free(kmem_cache_t *, void *);
 extern unsigned int kmem_cache_size(kmem_cache_t *);
 extern const char *kmem_cache_name(kmem_cache_t *);
-extern kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags);
 
 /* Size description struct for general caches. */
 struct cache_sizes {
@@ -223,7 +222,6 @@ extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr));
 /* SLOB allocator routines */
 
 void kmem_cache_init(void);
-struct kmem_cache *kmem_find_general_cachep(size_t, gfp_t gfpflags);
 struct kmem_cache *kmem_cache_create(const char *c, size_t, size_t,
 	unsigned long,
 	void (*)(void *, struct kmem_cache *, unsigned long),
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 71b6363caaaf..dee88c6b6fa7 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -44,8 +44,6 @@ extern void *vmalloc_32_user(unsigned long size);
 extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
 extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask,
 				pgprot_t prot);
-extern void *__vmalloc_node(unsigned long size, gfp_t gfp_mask,
-				pgprot_t prot, int node);
 extern void vfree(void *addr);
 
 extern void *vmap(struct page **pages, unsigned int count,
diff --git a/mm/slab.c b/mm/slab.c
index 21ba06035700..5870bcbd33cf 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -768,11 +768,10 @@ static inline struct kmem_cache *__find_general_cachep(size_t size,
 	return csizep->cs_cachep;
 }
 
-struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
+static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
 {
 	return __find_general_cachep(size, gfpflags);
 }
-EXPORT_SYMBOL(kmem_find_general_cachep);
 
 static size_t slab_mgmt_size(size_t nr_objs, size_t align)
 {
diff --git a/mm/swap.c b/mm/swap.c
index 600235e43704..2e0e871f542f 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -34,6 +34,25 @@
 /* How many pages do we try to swap or page in/out together? */
 int page_cluster;
 
+/*
+ * This path almost never happens for VM activity - pages are normally
+ * freed via pagevecs.  But it gets used by networking.
+ */
+static void fastcall __page_cache_release(struct page *page)
+{
+	if (PageLRU(page)) {
+		unsigned long flags;
+		struct zone *zone = page_zone(page);
+
+		spin_lock_irqsave(&zone->lru_lock, flags);
+		VM_BUG_ON(!PageLRU(page));
+		__ClearPageLRU(page);
+		del_page_from_lru(zone, page);
+		spin_unlock_irqrestore(&zone->lru_lock, flags);
+	}
+	free_hot_page(page);
+}
+
 static void put_compound_page(struct page *page)
 {
 	page = (struct page *)page_private(page);
@@ -222,26 +241,6 @@ int lru_add_drain_all(void)
 }
 #endif
 
-/*
- * This path almost never happens for VM activity - pages are normally
- * freed via pagevecs.  But it gets used by networking.
- */
-void fastcall __page_cache_release(struct page *page)
-{
-	if (PageLRU(page)) {
-		unsigned long flags;
-		struct zone *zone = page_zone(page);
-
-		spin_lock_irqsave(&zone->lru_lock, flags);
-		VM_BUG_ON(!PageLRU(page));
-		__ClearPageLRU(page);
-		del_page_from_lru(zone, page);
-		spin_unlock_irqrestore(&zone->lru_lock, flags);
-	}
-	free_hot_page(page);
-}
-EXPORT_SYMBOL(__page_cache_release);
-
 /*
  * Batched page_cache_release().  Decrement the reference count on all the
  * passed pages.  If it fell to zero then remove the page from the LRU and
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 266162d2ba28..9aad8b0cc6ee 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -24,6 +24,9 @@
 DEFINE_RWLOCK(vmlist_lock);
 struct vm_struct *vmlist;
 
+static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
+			    int node);
+
 static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
 {
 	pte_t *pte;
@@ -478,8 +481,8 @@ void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot)
  *	allocator with @gfp_mask flags.  Map them into contiguous
  *	kernel virtual space, using a pagetable protection of @prot.
  */
-void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
-			int node)
+static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
+			    int node)
 {
 	struct vm_struct *area;
 
@@ -493,7 +496,6 @@ void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
 
 	return __vmalloc_area_node(area, gfp_mask, prot, node);
 }
-EXPORT_SYMBOL(__vmalloc_node);
 
 void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
 {
-- 
cgit v1.2.3


From 2d1a07d487d8b36658404839cdf03a974968cefd Mon Sep 17 00:00:00 2001
From: Franck Bui-Huu <vagabon.xyz@gmail.com>
Date: Mon, 25 Sep 2006 23:31:03 -0700
Subject: [PATCH] bootmem: remove useless __init in header file

__init in headers is pretty useless because the compiler doesn't check it, and
they get out of sync relatively frequently.  So if you see an __init in a
header file, it's quite unreliable and you need to check the definition
anyway.

Signed-off-by: Franck Bui-Huu <vagabon.xyz@gmail.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/bootmem.h | 46 +++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index e319c649e4fd..c7124d4ce7cf 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -41,23 +41,23 @@ typedef struct bootmem_data {
 	struct list_head list;
 } bootmem_data_t;
 
-extern unsigned long __init bootmem_bootmap_pages (unsigned long);
-extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend);
-extern void __init free_bootmem (unsigned long addr, unsigned long size);
-extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal);
-extern void * __init __alloc_bootmem_nopanic (unsigned long size, unsigned long align, unsigned long goal);
-extern void * __init __alloc_bootmem_low(unsigned long size,
+extern unsigned long bootmem_bootmap_pages (unsigned long);
+extern unsigned long init_bootmem (unsigned long addr, unsigned long memend);
+extern void free_bootmem (unsigned long addr, unsigned long size);
+extern void * __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal);
+extern void * __alloc_bootmem_nopanic (unsigned long size, unsigned long align, unsigned long goal);
+extern void * __alloc_bootmem_low(unsigned long size,
 					 unsigned long align,
 					 unsigned long goal);
-extern void * __init __alloc_bootmem_low_node(pg_data_t *pgdat,
+extern void * __alloc_bootmem_low_node(pg_data_t *pgdat,
 					      unsigned long size,
 					      unsigned long align,
 					      unsigned long goal);
-extern void * __init __alloc_bootmem_core(struct bootmem_data *bdata,
+extern void * __alloc_bootmem_core(struct bootmem_data *bdata,
 		unsigned long size, unsigned long align, unsigned long goal,
 		unsigned long limit);
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
-extern void __init reserve_bootmem (unsigned long addr, unsigned long size);
+extern void reserve_bootmem (unsigned long addr, unsigned long size);
 #define alloc_bootmem(x) \
 	__alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low(x) \
@@ -67,12 +67,12 @@ extern void __init reserve_bootmem (unsigned long addr, unsigned long size);
 #define alloc_bootmem_low_pages(x) \
 	__alloc_bootmem_low((x), PAGE_SIZE, 0)
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
-extern unsigned long __init free_all_bootmem (void);
-extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal);
-extern unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn);
-extern void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size);
-extern void __init free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size);
-extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat);
+extern unsigned long free_all_bootmem (void);
+extern void * __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal);
+extern unsigned long init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn);
+extern void reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size);
+extern void free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size);
+extern unsigned long free_all_bootmem_node (pg_data_t *pgdat);
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
 #define alloc_bootmem_node(pgdat, x) \
 	__alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
@@ -94,14 +94,14 @@ static inline void *alloc_remap(int nid, unsigned long size)
 extern unsigned long __meminitdata nr_kernel_pages;
 extern unsigned long nr_all_pages;
 
-extern void *__init alloc_large_system_hash(const char *tablename,
-					    unsigned long bucketsize,
-					    unsigned long numentries,
-					    int scale,
-					    int flags,
-					    unsigned int *_hash_shift,
-					    unsigned int *_hash_mask,
-					    unsigned long limit);
+extern void * alloc_large_system_hash(const char *tablename,
+				      unsigned long bucketsize,
+				      unsigned long numentries,
+				      int scale,
+				      int flags,
+				      unsigned int *_hash_shift,
+				      unsigned int *_hash_mask,
+				      unsigned long limit);
 
 #define HASH_HIGHMEM	0x00000001	/* Consider highmem? */
 #define HASH_EARLY	0x00000002	/* Allocating during early boot? */
-- 
cgit v1.2.3


From 71fb2e8f8753b66b1f4295aa264a2eb4e69381e8 Mon Sep 17 00:00:00 2001
From: Franck Bui-Huu <vagabon.xyz@gmail.com>
Date: Mon, 25 Sep 2006 23:31:05 -0700
Subject: [PATCH] bootmem: remove useless parentheses in bootmem header file

Signed-off-by: Franck Bui-Huu <vagabon.xyz@gmail.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/bootmem.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index c7124d4ce7cf..fa2523f47628 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -59,13 +59,13 @@ extern void * __alloc_bootmem_core(struct bootmem_data *bdata,
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
 extern void reserve_bootmem (unsigned long addr, unsigned long size);
 #define alloc_bootmem(x) \
-	__alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low(x) \
-	__alloc_bootmem_low((x), SMP_CACHE_BYTES, 0)
+	__alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
 #define alloc_bootmem_pages(x) \
-	__alloc_bootmem((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low_pages(x) \
-	__alloc_bootmem_low((x), PAGE_SIZE, 0)
+	__alloc_bootmem_low(x, PAGE_SIZE, 0)
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 extern unsigned long free_all_bootmem (void);
 extern void * __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal);
@@ -75,11 +75,11 @@ extern void free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned lo
 extern unsigned long free_all_bootmem_node (pg_data_t *pgdat);
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
 #define alloc_bootmem_node(pgdat, x) \
-	__alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_pages_node(pgdat, x) \
-	__alloc_bootmem_node((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low_pages_node(pgdat, x) \
-	__alloc_bootmem_low_node((pgdat), (x), PAGE_SIZE, 0)
+	__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 
 #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
-- 
cgit v1.2.3


From bb0923a66820718f636736b22ce47372f79e3400 Mon Sep 17 00:00:00 2001
From: Franck Bui-Huu <vagabon.xyz@gmail.com>
Date: Mon, 25 Sep 2006 23:31:05 -0700
Subject: [PATCH] bootmem: limit to 80 columns width

Signed-off-by: Franck Bui-Huu <vagabon.xyz@gmail.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/bootmem.h | 42 +++++++++++++++++++++++++++++-------------
 mm/bootmem.c            | 28 ++++++++++++++++++----------
 2 files changed, 47 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index fa2523f47628..1e8dddfb3083 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -44,18 +44,24 @@ typedef struct bootmem_data {
 extern unsigned long bootmem_bootmap_pages (unsigned long);
 extern unsigned long init_bootmem (unsigned long addr, unsigned long memend);
 extern void free_bootmem (unsigned long addr, unsigned long size);
-extern void * __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal);
-extern void * __alloc_bootmem_nopanic (unsigned long size, unsigned long align, unsigned long goal);
+extern void * __alloc_bootmem (unsigned long size,
+			       unsigned long align,
+			       unsigned long goal);
+extern void * __alloc_bootmem_nopanic (unsigned long size,
+				       unsigned long align,
+				       unsigned long goal);
 extern void * __alloc_bootmem_low(unsigned long size,
-					 unsigned long align,
-					 unsigned long goal);
+				  unsigned long align,
+				  unsigned long goal);
 extern void * __alloc_bootmem_low_node(pg_data_t *pgdat,
-					      unsigned long size,
-					      unsigned long align,
-					      unsigned long goal);
+				       unsigned long size,
+				       unsigned long align,
+				       unsigned long goal);
 extern void * __alloc_bootmem_core(struct bootmem_data *bdata,
-		unsigned long size, unsigned long align, unsigned long goal,
-		unsigned long limit);
+				   unsigned long size,
+				   unsigned long align,
+				   unsigned long goal,
+				   unsigned long limit);
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
 extern void reserve_bootmem (unsigned long addr, unsigned long size);
 #define alloc_bootmem(x) \
@@ -68,10 +74,20 @@ extern void reserve_bootmem (unsigned long addr, unsigned long size);
 	__alloc_bootmem_low(x, PAGE_SIZE, 0)
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 extern unsigned long free_all_bootmem (void);
-extern void * __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal);
-extern unsigned long init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn);
-extern void reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size);
-extern void free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size);
+extern void * __alloc_bootmem_node (pg_data_t *pgdat,
+				    unsigned long size,
+				    unsigned long align,
+				    unsigned long goal);
+extern unsigned long init_bootmem_node (pg_data_t *pgdat,
+					unsigned long freepfn,
+					unsigned long startpfn,
+					unsigned long endpfn);
+extern void reserve_bootmem_node (pg_data_t *pgdat,
+				  unsigned long physaddr,
+				  unsigned long size);
+extern void free_bootmem_node (pg_data_t *pgdat,
+			       unsigned long addr,
+			       unsigned long size);
 extern unsigned long free_all_bootmem_node (pg_data_t *pgdat);
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
 #define alloc_bootmem_node(pgdat, x) \
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 70f1528a3c8a..9083f5029673 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -102,7 +102,8 @@ static unsigned long __init init_bootmem_core (pg_data_t *pgdat,
  * might be used for boot-time allocations - or it might get added
  * to the free page pool later on.
  */
-static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size)
+static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
+					unsigned long size)
 {
 	unsigned long i;
 	/*
@@ -127,7 +128,8 @@ static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long add
 		}
 }
 
-static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size)
+static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
+				     unsigned long size)
 {
 	unsigned long i;
 	unsigned long start;
@@ -355,17 +357,20 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
 	return total;
 }
 
-unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn)
+unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn,
+				unsigned long startpfn, unsigned long endpfn)
 {
 	return(init_bootmem_core(pgdat, freepfn, startpfn, endpfn));
 }
 
-void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size)
+void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr,
+				  unsigned long size)
 {
 	reserve_bootmem_core(pgdat->bdata, physaddr, size);
 }
 
-void __init free_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size)
+void __init free_bootmem_node (pg_data_t *pgdat, unsigned long physaddr,
+			       unsigned long size)
 {
 	free_bootmem_core(pgdat->bdata, physaddr, size);
 }
@@ -399,7 +404,8 @@ unsigned long __init free_all_bootmem (void)
 	return(free_all_bootmem_core(NODE_DATA(0)));
 }
 
-void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal)
+void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
+				      unsigned long goal)
 {
 	bootmem_data_t *bdata;
 	void *ptr;
@@ -410,7 +416,8 @@ void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, u
 	return NULL;
 }
 
-void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal)
+void * __init __alloc_bootmem(unsigned long size, unsigned long align,
+			      unsigned long goal)
 {
 	void *mem = __alloc_bootmem_nopanic(size,align,goal);
 	if (mem)
@@ -424,8 +431,8 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned
 }
 
 
-void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align,
-				   unsigned long goal)
+void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
+				   unsigned long align, unsigned long goal)
 {
 	void *ptr;
 
@@ -438,7 +445,8 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigne
 
 #define LOW32LIMIT 0xffffffff
 
-void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsigned long goal)
+void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
+				  unsigned long goal)
 {
 	bootmem_data_t *bdata;
 	void *ptr;
-- 
cgit v1.2.3


From e786e86a542ccc1133f333402526ad00b9c088ae Mon Sep 17 00:00:00 2001
From: Franck Bui-Huu <vagabon.xyz@gmail.com>
Date: Mon, 25 Sep 2006 23:31:06 -0700
Subject: [PATCH] bootmem: remove useless headers inclusions

Signed-off-by: Franck Bui-Huu <vagabon.xyz@gmail.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/bootmem.h |  5 +----
 mm/bootmem.c            | 10 +++-------
 2 files changed, 4 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 1e8dddfb3083..b2067e4a4486 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -4,11 +4,8 @@
 #ifndef _LINUX_BOOTMEM_H
 #define _LINUX_BOOTMEM_H
 
-#include <asm/pgtable.h>
-#include <asm/dma.h>
-#include <linux/cache.h>
-#include <linux/init.h>
 #include <linux/mmzone.h>
+#include <asm/dma.h>
 
 /*
  *  simple boot-time physical memory area allocator.
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 9083f5029673..e136c086fd98 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -8,17 +8,13 @@
  *  free memory collector. It's used to deal with reserved
  *  system memory and memory holes as well.
  */
-
-#include <linux/mm.h>
-#include <linux/kernel_stat.h>
-#include <linux/swap.h>
-#include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/bootmem.h>
-#include <linux/mmzone.h>
 #include <linux/module.h>
-#include <asm/dma.h>
+
+#include <asm/bug.h>
 #include <asm/io.h>
+
 #include "internal.h"
 
 /*
-- 
cgit v1.2.3


From f71bf0cac730ccb5ebcdf21747db75ae0445ccde Mon Sep 17 00:00:00 2001
From: Franck Bui-Huu <vagabon.xyz@gmail.com>
Date: Mon, 25 Sep 2006 23:31:08 -0700
Subject: [PATCH] bootmem: miscellaneous coding style fixes

It fixes various coding style issues, specially when spaces are useless.  For
example '*' go next to the function name.

Signed-off-by: Franck Bui-Huu <vagabon.xyz@gmail.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/bootmem.h | 95 +++++++++++++++++++++++++------------------------
 mm/bootmem.c            | 81 ++++++++++++++++++++++-------------------
 2 files changed, 93 insertions(+), 83 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index b2067e4a4486..31e9abb6d977 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -38,29 +38,30 @@ typedef struct bootmem_data {
 	struct list_head list;
 } bootmem_data_t;
 
-extern unsigned long bootmem_bootmap_pages (unsigned long);
-extern unsigned long init_bootmem (unsigned long addr, unsigned long memend);
-extern void free_bootmem (unsigned long addr, unsigned long size);
-extern void * __alloc_bootmem (unsigned long size,
-			       unsigned long align,
-			       unsigned long goal);
-extern void * __alloc_bootmem_nopanic (unsigned long size,
-				       unsigned long align,
-				       unsigned long goal);
-extern void * __alloc_bootmem_low(unsigned long size,
+extern unsigned long bootmem_bootmap_pages(unsigned long);
+extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
+extern void free_bootmem(unsigned long addr, unsigned long size);
+extern void *__alloc_bootmem(unsigned long size,
+			     unsigned long align,
+			     unsigned long goal);
+extern void *__alloc_bootmem_nopanic(unsigned long size,
+				     unsigned long align,
+				     unsigned long goal);
+extern void *__alloc_bootmem_low(unsigned long size,
+				 unsigned long align,
+				 unsigned long goal);
+extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
+				      unsigned long size,
+				      unsigned long align,
+				      unsigned long goal);
+extern void *__alloc_bootmem_core(struct bootmem_data *bdata,
+				  unsigned long size,
 				  unsigned long align,
-				  unsigned long goal);
-extern void * __alloc_bootmem_low_node(pg_data_t *pgdat,
-				       unsigned long size,
-				       unsigned long align,
-				       unsigned long goal);
-extern void * __alloc_bootmem_core(struct bootmem_data *bdata,
-				   unsigned long size,
-				   unsigned long align,
-				   unsigned long goal,
-				   unsigned long limit);
+				  unsigned long goal,
+				  unsigned long limit);
+
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
-extern void reserve_bootmem (unsigned long addr, unsigned long size);
+extern void reserve_bootmem(unsigned long addr, unsigned long size);
 #define alloc_bootmem(x) \
 	__alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low(x) \
@@ -70,22 +71,24 @@ extern void reserve_bootmem (unsigned long addr, unsigned long size);
 #define alloc_bootmem_low_pages(x) \
 	__alloc_bootmem_low(x, PAGE_SIZE, 0)
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
-extern unsigned long free_all_bootmem (void);
-extern void * __alloc_bootmem_node (pg_data_t *pgdat,
-				    unsigned long size,
-				    unsigned long align,
-				    unsigned long goal);
-extern unsigned long init_bootmem_node (pg_data_t *pgdat,
-					unsigned long freepfn,
-					unsigned long startpfn,
-					unsigned long endpfn);
-extern void reserve_bootmem_node (pg_data_t *pgdat,
-				  unsigned long physaddr,
-				  unsigned long size);
-extern void free_bootmem_node (pg_data_t *pgdat,
-			       unsigned long addr,
-			       unsigned long size);
-extern unsigned long free_all_bootmem_node (pg_data_t *pgdat);
+
+extern unsigned long free_all_bootmem(void);
+extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
+extern void *__alloc_bootmem_node(pg_data_t *pgdat,
+				  unsigned long size,
+				  unsigned long align,
+				  unsigned long goal);
+extern unsigned long init_bootmem_node(pg_data_t *pgdat,
+				       unsigned long freepfn,
+				       unsigned long startpfn,
+				       unsigned long endpfn);
+extern void reserve_bootmem_node(pg_data_t *pgdat,
+				 unsigned long physaddr,
+				 unsigned long size);
+extern void free_bootmem_node(pg_data_t *pgdat,
+			      unsigned long addr,
+			      unsigned long size);
+
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
 #define alloc_bootmem_node(pgdat, x) \
 	__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
@@ -102,19 +105,19 @@ static inline void *alloc_remap(int nid, unsigned long size)
 {
 	return NULL;
 }
-#endif
+#endif /* CONFIG_HAVE_ARCH_ALLOC_REMAP */
 
 extern unsigned long __meminitdata nr_kernel_pages;
 extern unsigned long nr_all_pages;
 
-extern void * alloc_large_system_hash(const char *tablename,
-				      unsigned long bucketsize,
-				      unsigned long numentries,
-				      int scale,
-				      int flags,
-				      unsigned int *_hash_shift,
-				      unsigned int *_hash_mask,
-				      unsigned long limit);
+extern void *alloc_large_system_hash(const char *tablename,
+				     unsigned long bucketsize,
+				     unsigned long numentries,
+				     int scale,
+				     int flags,
+				     unsigned int *_hash_shift,
+				     unsigned int *_hash_mask,
+				     unsigned long limit);
 
 #define HASH_HIGHMEM	0x00000001	/* Consider highmem? */
 #define HASH_EARLY	0x00000002	/* Allocating during early boot? */
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 23c3317c09ed..f0f85fa713ca 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -38,7 +38,7 @@ unsigned long saved_max_pfn;
 #endif
 
 /* return the number of _pages_ that will be allocated for the boot bitmap */
-unsigned long __init bootmem_bootmap_pages (unsigned long pages)
+unsigned long __init bootmem_bootmap_pages(unsigned long pages)
 {
 	unsigned long mapsize;
 
@@ -48,12 +48,14 @@ unsigned long __init bootmem_bootmap_pages (unsigned long pages)
 
 	return mapsize;
 }
+
 /*
  * link bdata in order
  */
 static void __init link_bootmem(bootmem_data_t *bdata)
 {
 	bootmem_data_t *ent;
+
 	if (list_empty(&bdata_list)) {
 		list_add(&bdata->list, &bdata_list);
 		return;
@@ -66,7 +68,6 @@ static void __init link_bootmem(bootmem_data_t *bdata)
 		}
 	}
 	list_add_tail(&bdata->list, &bdata_list);
-	return;
 }
 
 /*
@@ -85,7 +86,7 @@ static unsigned long __init get_mapsize(bootmem_data_t *bdata)
 /*
  * Called once to set up the allocator itself.
  */
-static unsigned long __init init_bootmem_core (pg_data_t *pgdat,
+static unsigned long __init init_bootmem_core(pg_data_t *pgdat,
 	unsigned long mapstart, unsigned long start, unsigned long end)
 {
 	bootmem_data_t *bdata = pgdat->bdata;
@@ -185,7 +186,7 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
 	unsigned long i, start = 0, incr, eidx, end_pfn;
 	void *ret;
 
-	if(!size) {
+	if (!size) {
 		printk("__alloc_bootmem_core(): zero-sized request\n");
 		BUG();
 	}
@@ -234,7 +235,7 @@ restart_scan:
 		for (j = i + 1; j < i + areasize; ++j) {
 			if (j >= eidx)
 				goto fail_block;
-			if (test_bit (j, bdata->node_bootmem_map))
+			if (test_bit(j, bdata->node_bootmem_map))
 				goto fail_block;
 		}
 		start = i;
@@ -262,19 +263,21 @@ found:
 	    bdata->last_offset && bdata->last_pos+1 == start) {
 		offset = ALIGN(bdata->last_offset, align);
 		BUG_ON(offset > PAGE_SIZE);
-		remaining_size = PAGE_SIZE-offset;
+		remaining_size = PAGE_SIZE - offset;
 		if (size < remaining_size) {
 			areasize = 0;
 			/* last_pos unchanged */
-			bdata->last_offset = offset+size;
-			ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset +
-						bdata->node_boot_start);
+			bdata->last_offset = offset + size;
+			ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
+					   offset +
+					   bdata->node_boot_start);
 		} else {
 			remaining_size = size - remaining_size;
-			areasize = (remaining_size+PAGE_SIZE-1)/PAGE_SIZE;
-			ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset +
-						bdata->node_boot_start);
-			bdata->last_pos = start+areasize-1;
+			areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE;
+			ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
+					   offset +
+					   bdata->node_boot_start);
+			bdata->last_pos = start + areasize - 1;
 			bdata->last_offset = remaining_size;
 		}
 		bdata->last_offset &= ~PAGE_MASK;
@@ -287,7 +290,7 @@ found:
 	/*
 	 * Reserve the area now:
 	 */
-	for (i = start; i < start+areasize; i++)
+	for (i = start; i < start + areasize; i++)
 		if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map)))
 			BUG();
 	memset(ret, 0, size);
@@ -338,7 +341,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
 				}
 			}
 		} else {
-			i+=BITS_PER_LONG;
+			i += BITS_PER_LONG;
 		}
 		pfn += BITS_PER_LONG;
 	}
@@ -361,51 +364,51 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
 	return total;
 }
 
-unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn,
+unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
 				unsigned long startpfn, unsigned long endpfn)
 {
-	return(init_bootmem_core(pgdat, freepfn, startpfn, endpfn));
+	return init_bootmem_core(pgdat, freepfn, startpfn, endpfn);
 }
 
-void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr,
-				  unsigned long size)
+void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
+				 unsigned long size)
 {
 	reserve_bootmem_core(pgdat->bdata, physaddr, size);
 }
 
-void __init free_bootmem_node (pg_data_t *pgdat, unsigned long physaddr,
-			       unsigned long size)
+void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
+			      unsigned long size)
 {
 	free_bootmem_core(pgdat->bdata, physaddr, size);
 }
 
-unsigned long __init free_all_bootmem_node (pg_data_t *pgdat)
+unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
 {
-	return(free_all_bootmem_core(pgdat));
+	return free_all_bootmem_core(pgdat);
 }
 
-unsigned long __init init_bootmem (unsigned long start, unsigned long pages)
+unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
 {
 	max_low_pfn = pages;
 	min_low_pfn = start;
-	return(init_bootmem_core(NODE_DATA(0), start, 0, pages));
+	return init_bootmem_core(NODE_DATA(0), start, 0, pages);
 }
 
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
-void __init reserve_bootmem (unsigned long addr, unsigned long size)
+void __init reserve_bootmem(unsigned long addr, unsigned long size)
 {
 	reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size);
 }
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 
-void __init free_bootmem (unsigned long addr, unsigned long size)
+void __init free_bootmem(unsigned long addr, unsigned long size)
 {
 	free_bootmem_core(NODE_DATA(0)->bdata, addr, size);
 }
 
-unsigned long __init free_all_bootmem (void)
+unsigned long __init free_all_bootmem(void)
 {
-	return(free_all_bootmem_core(NODE_DATA(0)));
+	return free_all_bootmem_core(NODE_DATA(0));
 }
 
 void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
@@ -414,9 +417,11 @@ void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
 	bootmem_data_t *bdata;
 	void *ptr;
 
-	list_for_each_entry(bdata, &bdata_list, list)
-		if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0)))
-			return(ptr);
+	list_for_each_entry(bdata, &bdata_list, list) {
+		ptr = __alloc_bootmem_core(bdata, size, align, goal, 0);
+		if (ptr)
+			return ptr;
+	}
 	return NULL;
 }
 
@@ -424,6 +429,7 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align,
 			      unsigned long goal)
 {
 	void *mem = __alloc_bootmem_nopanic(size,align,goal);
+
 	if (mem)
 		return mem;
 	/*
@@ -442,7 +448,7 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
 
 	ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
 	if (ptr)
-		return (ptr);
+		return ptr;
 
 	return __alloc_bootmem(size, align, goal);
 }
@@ -455,10 +461,11 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
 	bootmem_data_t *bdata;
 	void *ptr;
 
-	list_for_each_entry(bdata, &bdata_list, list)
-		if ((ptr = __alloc_bootmem_core(bdata, size,
-						 align, goal, LOW32LIMIT)))
-			return(ptr);
+	list_for_each_entry(bdata, &bdata_list, list) {
+		ptr = __alloc_bootmem_core(bdata, size, align, goal, LOW32LIMIT);
+		if (ptr)
+			return ptr;
+	}
 
 	/*
 	 * Whoops, we cannot satisfy the allocation request.
-- 
cgit v1.2.3


From c1f60a5a419cc60aff27daffb150f5a3a3a79ef4 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:11 -0700
Subject: [PATCH] reduce MAX_NR_ZONES: move HIGHMEM counters into highmem.c/.h

Move totalhigh_pages and nr_free_highpages() into highmem.c/.h

Move the totalhigh_pages definition into highmem.c/.h.  Move the
nr_free_highpages function into highmem.c

[yoichi_yuasa@tripeaks.co.jp: build fix]
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/mips/sgi-ip27/ip27-memory.c |  1 +
 arch/um/kernel/mem.c             |  2 ++
 include/linux/highmem.h          |  3 +++
 include/linux/swap.h             |  1 -
 mm/highmem.c                     | 13 +++++++++++++
 mm/page_alloc.c                  | 15 ---------------
 mm/shmem.c                       |  1 +
 7 files changed, 20 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index 59bfc0fc3f45..16e5682b01f1 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -19,6 +19,7 @@
 #include <linux/swap.h>
 #include <linux/bootmem.h>
 #include <linux/pfn.h>
+#include <linux/highmem.h>
 #include <asm/page.h>
 #include <asm/sections.h>
 
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 61280167c560..b39e624c3291 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -79,8 +79,10 @@ void mem_init(void)
 
 	/* this will put all low memory onto the freelists */
 	totalram_pages = free_all_bootmem();
+#ifdef CONFIG_HIGHMEM
 	totalhigh_pages = highmem >> PAGE_SHIFT;
 	totalram_pages += totalhigh_pages;
+#endif
 	num_physpages = totalram_pages;
 	max_pfn = totalram_pages;
 	printk(KERN_INFO "Memory: %luk available\n", 
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 42620e723abb..fd7d12daa94f 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -24,11 +24,14 @@ static inline void flush_kernel_dcache_page(struct page *page)
 
 /* declarations for linux/mm/highmem.c */
 unsigned int nr_free_highpages(void);
+extern unsigned long totalhigh_pages;
 
 #else /* CONFIG_HIGHMEM */
 
 static inline unsigned int nr_free_highpages(void) { return 0; }
 
+#define totalhigh_pages 0
+
 #ifndef ARCH_HAS_KMAP
 static inline void *kmap(struct page *page)
 {
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 5e59184c9096..34a6bc3e6cf3 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -162,7 +162,6 @@ extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *
 
 /* linux/mm/page_alloc.c */
 extern unsigned long totalram_pages;
-extern unsigned long totalhigh_pages;
 extern unsigned long totalreserve_pages;
 extern long nr_swap_pages;
 extern unsigned int nr_free_pages(void);
diff --git a/mm/highmem.c b/mm/highmem.c
index 9b2a5403c447..ee5519b176ee 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -46,6 +46,19 @@ static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
  */
 #ifdef CONFIG_HIGHMEM
 
+unsigned long totalhigh_pages __read_mostly;
+
+unsigned int nr_free_highpages (void)
+{
+	pg_data_t *pgdat;
+	unsigned int pages = 0;
+
+	for_each_online_pgdat(pgdat)
+		pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
+
+	return pages;
+}
+
 static int pkmap_count[LAST_PKMAP];
 static unsigned int last_pkmap_nr;
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e26491cb5a27..5cde54695cfb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -51,7 +51,6 @@ EXPORT_SYMBOL(node_online_map);
 nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
 EXPORT_SYMBOL(node_possible_map);
 unsigned long totalram_pages __read_mostly;
-unsigned long totalhigh_pages __read_mostly;
 unsigned long totalreserve_pages __read_mostly;
 long nr_swap_pages;
 int percpu_pagelist_fraction;
@@ -1185,20 +1184,6 @@ unsigned int nr_free_pagecache_pages(void)
 {
 	return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER));
 }
-
-#ifdef CONFIG_HIGHMEM
-unsigned int nr_free_highpages (void)
-{
-	pg_data_t *pgdat;
-	unsigned int pages = 0;
-
-	for_each_online_pgdat(pgdat)
-		pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
-
-	return pages;
-}
-#endif
-
 #ifdef CONFIG_NUMA
 static void show_node(struct zone *zone)
 {
diff --git a/mm/shmem.c b/mm/shmem.c
index db21c51531ca..8631be45b40d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -45,6 +45,7 @@
 #include <linux/namei.h>
 #include <linux/ctype.h>
 #include <linux/migrate.h>
+#include <linux/highmem.h>
 
 #include <asm/uaccess.h>
 #include <asm/div64.h>
-- 
cgit v1.2.3


From 2f1b6248682f8b39ca3c7e549dfc216d26c4109b Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:13 -0700
Subject: [PATCH] reduce MAX_NR_ZONES: use enum to define zones, reformat and
 comment

Use enum for zones and reformat zones dependent information

Add comments explaning the use of zones and add a zones_t type for zone
numbers.

Line up information that will be #ifdefd by the following patches.

[akpm@osdl.org: comment cleanups]
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h     |  7 +++---
 include/linux/mmzone.h | 66 +++++++++++++++++++++++++++++++++++---------------
 mm/page_alloc.c        | 24 +++++++++++++-----
 3 files changed, 69 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 45678b036955..2db4229a0066 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -470,7 +470,7 @@ void split_page(struct page *page, unsigned int order);
 #define SECTIONS_MASK		((1UL << SECTIONS_WIDTH) - 1)
 #define ZONETABLE_MASK		((1UL << ZONETABLE_SHIFT) - 1)
 
-static inline unsigned long page_zonenum(struct page *page)
+static inline enum zone_type page_zonenum(struct page *page)
 {
 	return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
 }
@@ -499,11 +499,12 @@ static inline unsigned long page_to_section(struct page *page)
 	return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
 }
 
-static inline void set_page_zone(struct page *page, unsigned long zone)
+static inline void set_page_zone(struct page *page, enum zone_type zone)
 {
 	page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
 	page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT;
 }
+
 static inline void set_page_node(struct page *page, unsigned long node)
 {
 	page->flags &= ~(NODES_MASK << NODES_PGSHIFT);
@@ -515,7 +516,7 @@ static inline void set_page_section(struct page *page, unsigned long section)
 	page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
 }
 
-static inline void set_page_links(struct page *page, unsigned long zone,
+static inline void set_page_links(struct page *page, enum zone_type zone,
 	unsigned long node, unsigned long pfn)
 {
 	set_page_zone(page, zone);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f45163c528e8..03a5a6eb0ffa 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -88,14 +88,53 @@ struct per_cpu_pageset {
 #define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)])
 #endif
 
-#define ZONE_DMA		0
-#define ZONE_DMA32		1
-#define ZONE_NORMAL		2
-#define ZONE_HIGHMEM		3
+enum zone_type {
+	/*
+	 * ZONE_DMA is used when there are devices that are not able
+	 * to do DMA to all of addressable memory (ZONE_NORMAL). Then we
+	 * carve out the portion of memory that is needed for these devices.
+	 * The range is arch specific.
+	 *
+	 * Some examples
+	 *
+	 * Architecture		Limit
+	 * ---------------------------
+	 * parisc, ia64, sparc	<4G
+	 * s390			<2G
+	 * arm26		<48M
+	 * arm			Various
+	 * alpha		Unlimited or 0-16MB.
+	 *
+	 * i386, x86_64 and multiple other arches
+	 * 			<16M.
+	 */
+	ZONE_DMA,
+	/*
+	 * x86_64 needs two ZONE_DMAs because it supports devices that are
+	 * only able to do DMA to the lower 16M but also 32 bit devices that
+	 * can only do DMA areas below 4G.
+	 */
+	ZONE_DMA32,
+	/*
+	 * Normal addressable memory is in ZONE_NORMAL. DMA operations can be
+	 * performed on pages in ZONE_NORMAL if the DMA devices support
+	 * transfers to all addressable memory.
+	 */
+	ZONE_NORMAL,
+	/*
+	 * A memory area that is only addressable by the kernel through
+	 * mapping portions into its own address space. This is for example
+	 * used by i386 to allow the kernel to address the memory beyond
+	 * 900MB. The kernel will set up special mappings (page
+	 * table entries on i386) for each page that the kernel needs to
+	 * access.
+	 */
+	ZONE_HIGHMEM,
 
-#define MAX_NR_ZONES		4	/* Sync this with ZONES_SHIFT */
-#define ZONES_SHIFT		2	/* ceil(log2(MAX_NR_ZONES)) */
+	MAX_NR_ZONES
+};
 
+#define	ZONES_SHIFT		2	/* ceil(log2(MAX_NR_ZONES)) */
 
 /*
  * When a memory allocation must conform to specific limitations (such
@@ -126,16 +165,6 @@ struct per_cpu_pageset {
 /* #define GFP_ZONETYPES       (GFP_ZONEMASK + 1) */           /* Non-loner */
 #define GFP_ZONETYPES  ((GFP_ZONEMASK + 1) / 2 + 1)            /* Loner */
 
-/*
- * On machines where it is needed (eg PCs) we divide physical memory
- * into multiple physical zones. On a 32bit PC we have 4 zones:
- *
- * ZONE_DMA	  < 16 MB	ISA DMA capable memory
- * ZONE_DMA32	     0 MB 	Empty
- * ZONE_NORMAL	16-896 MB	direct mapped by the kernel
- * ZONE_HIGHMEM	 > 896 MB	only page cache and user processes
- */
-
 struct zone {
 	/* Fields commonly accessed by the page allocator */
 	unsigned long		free_pages;
@@ -266,7 +295,6 @@ struct zone {
 	char			*name;
 } ____cacheline_internodealigned_in_smp;
 
-
 /*
  * The "priority" of VM scanning is how much of the queues we will scan in one
  * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
@@ -373,12 +401,12 @@ static inline int populated_zone(struct zone *zone)
 	return (!!zone->present_pages);
 }
 
-static inline int is_highmem_idx(int idx)
+static inline int is_highmem_idx(enum zone_type idx)
 {
 	return (idx == ZONE_HIGHMEM);
 }
 
-static inline int is_normal_idx(int idx)
+static inline int is_normal_idx(enum zone_type idx)
 {
 	return (idx == ZONE_NORMAL);
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 13c102b95c57..2410a3cb1c53 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -68,7 +68,11 @@ static void __free_pages_ok(struct page *page, unsigned int order);
  * TBD: should special case ZONE_DMA32 machines here - in those we normally
  * don't need any ZONE_NORMAL reservation
  */
-int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 };
+int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
+	 256,
+	 256,
+	 32
+};
 
 EXPORT_SYMBOL(totalram_pages);
 
@@ -79,7 +83,13 @@ EXPORT_SYMBOL(totalram_pages);
 struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly;
 EXPORT_SYMBOL(zone_table);
 
-static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" };
+static char *zone_names[MAX_NR_ZONES] = {
+	 "DMA",
+	 "DMA32",
+	 "Normal",
+	 "HighMem"
+};
+
 int min_free_kbytes = 1024;
 
 unsigned long __meminitdata nr_kernel_pages;
@@ -1487,7 +1497,9 @@ static void __meminit build_zonelists(pg_data_t *pgdat)
 
 static void __meminit build_zonelists(pg_data_t *pgdat)
 {
-	int i, j, k, node, local_node;
+	int i, node, local_node;
+	enum zone_type k;
+	enum zone_type j;
 
 	local_node = pgdat->node_id;
 	for (i = 0; i < GFP_ZONETYPES; i++) {
@@ -1675,8 +1687,8 @@ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone,
 }
 
 #define ZONETABLE_INDEX(x, zone_nr)	((x << ZONES_SHIFT) | zone_nr)
-void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn,
-		unsigned long size)
+void zonetable_add(struct zone *zone, int nid, enum zone_type zid,
+		unsigned long pfn, unsigned long size)
 {
 	unsigned long snum = pfn_to_section_nr(pfn);
 	unsigned long end = pfn_to_section_nr(pfn + size);
@@ -1960,7 +1972,7 @@ __meminit int init_currently_empty_zone(struct zone *zone,
 static void __meminit free_area_init_core(struct pglist_data *pgdat,
 		unsigned long *zones_size, unsigned long *zholes_size)
 {
-	unsigned long j;
+	enum zone_type j;
 	int nid = pgdat->node_id;
 	unsigned long zone_start_pfn = pgdat->node_start_pfn;
 	int ret;
-- 
cgit v1.2.3


From fb0e7942bdcbbd2f90e61cb4cfa4fa892a873f8a Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:13 -0700
Subject: [PATCH] reduce MAX_NR_ZONES: make ZONE_DMA32 optional

Make ZONE_DMA32 optional

- Add #ifdefs around ZONE_DMA32 specific code and definitions.

- Add CONFIG_ZONE_DMA32 config option and use that for x86_64
  that alone needs this zone.

- Remove the use of CONFIG_DMA_IS_DMA32 and CONFIG_DMA_IS_NORMAL
  for ia64 and fix up the way per node ZVCs are calculated.

- Fall back to prior GFP_ZONEMASK of 0x03 if there is no
  DMA32 zone.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/ia64/Kconfig      |  9 ---------
 arch/x86_64/Kconfig    |  4 ++++
 include/linux/gfp.h    |  2 +-
 include/linux/mmzone.h | 16 +++++++++++++---
 include/linux/vmstat.h |  4 +---
 mm/page_alloc.c        |  6 ++++++
 6 files changed, 25 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index db274da7dba1..f521f2f60a78 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -66,15 +66,6 @@ config IA64_UNCACHED_ALLOCATOR
 	bool
 	select GENERIC_ALLOCATOR
 
-config DMA_IS_DMA32
-	bool
-	default y
-
-config DMA_IS_NORMAL
-	bool
-	depends on IA64_SGI_SN2
-	default y
-
 config AUDIT_ARCH
 	bool
 	default y
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 6cd4878625f1..581ce9af0ec8 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -24,6 +24,10 @@ config X86
 	bool
 	default y
 
+config ZONE_DMA32
+	bool
+	default y
+
 config LOCKDEP_SUPPORT
 	bool
 	default y
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index cc9e60844484..14610b56c132 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -13,7 +13,7 @@ struct vm_area_struct;
 /* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) */
 #define __GFP_DMA	((__force gfp_t)0x01u)
 #define __GFP_HIGHMEM	((__force gfp_t)0x02u)
-#ifdef CONFIG_DMA_IS_DMA32
+#ifndef CONFIG_ZONE_DMA32
 #define __GFP_DMA32	((__force gfp_t)0x01)	/* ZONE_DMA is ZONE_DMA32 */
 #elif BITS_PER_LONG < 64
 #define __GFP_DMA32	((__force gfp_t)0x00)	/* ZONE_NORMAL is ZONE_DMA32 */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 03a5a6eb0ffa..adae3c915938 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -109,12 +109,14 @@ enum zone_type {
 	 * 			<16M.
 	 */
 	ZONE_DMA,
+#ifdef CONFIG_ZONE_DMA32
 	/*
 	 * x86_64 needs two ZONE_DMAs because it supports devices that are
 	 * only able to do DMA to the lower 16M but also 32 bit devices that
 	 * can only do DMA areas below 4G.
 	 */
 	ZONE_DMA32,
+#endif
 	/*
 	 * Normal addressable memory is in ZONE_NORMAL. DMA operations can be
 	 * performed on pages in ZONE_NORMAL if the DMA devices support
@@ -161,9 +163,13 @@ enum zone_type {
  *
  * NOTE! Make sure this matches the zones in <linux/gfp.h>
  */
-#define GFP_ZONEMASK	0x07
-/* #define GFP_ZONETYPES       (GFP_ZONEMASK + 1) */           /* Non-loner */
-#define GFP_ZONETYPES  ((GFP_ZONEMASK + 1) / 2 + 1)            /* Loner */
+#define GFP_ZONETYPES		((GFP_ZONEMASK + 1) / 2 + 1)    /* Loner */
+
+#ifdef CONFIG_ZONE_DMA32
+#define GFP_ZONEMASK		0x07
+#else
+#define GFP_ZONEMASK		0x03
+#endif
 
 struct zone {
 	/* Fields commonly accessed by the page allocator */
@@ -429,7 +435,11 @@ static inline int is_normal(struct zone *zone)
 
 static inline int is_dma32(struct zone *zone)
 {
+#ifdef CONFIG_ZONE_DMA32
 	return zone == zone->zone_pgdat->node_zones + ZONE_DMA32;
+#else
+	return 0;
+#endif
 }
 
 static inline int is_dma(struct zone *zone)
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 2d9b1b60798a..9c6e62c56ec2 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -124,12 +124,10 @@ static inline unsigned long node_page_state(int node,
 	struct zone *zones = NODE_DATA(node)->node_zones;
 
 	return
-#ifndef CONFIG_DMA_IS_NORMAL
-#if !defined(CONFIG_DMA_IS_DMA32) && BITS_PER_LONG >= 64
+#ifdef CONFIG_ZONE_DMA32
 		zone_page_state(&zones[ZONE_DMA32], item) +
 #endif
 		zone_page_state(&zones[ZONE_NORMAL], item) +
-#endif
 #ifdef CONFIG_HIGHMEM
 		zone_page_state(&zones[ZONE_HIGHMEM], item) +
 #endif
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2410a3cb1c53..5b5cbb5e1816 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -70,7 +70,9 @@ static void __free_pages_ok(struct page *page, unsigned int order);
  */
 int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
 	 256,
+#ifdef CONFIG_ZONE_DMA32
 	 256,
+#endif
 	 32
 };
 
@@ -85,7 +87,9 @@ EXPORT_SYMBOL(zone_table);
 
 static char *zone_names[MAX_NR_ZONES] = {
 	 "DMA",
+#ifdef CONFIG_ZONE_DMA32
 	 "DMA32",
+#endif
 	 "Normal",
 	 "HighMem"
 };
@@ -1373,8 +1377,10 @@ static inline int highest_zone(int zone_bits)
 	int res = ZONE_NORMAL;
 	if (zone_bits & (__force int)__GFP_HIGHMEM)
 		res = ZONE_HIGHMEM;
+#ifdef CONFIG_ZONE_DMA32
 	if (zone_bits & (__force int)__GFP_DMA32)
 		res = ZONE_DMA32;
+#endif
 	if (zone_bits & (__force int)__GFP_DMA)
 		res = ZONE_DMA;
 	return res;
-- 
cgit v1.2.3


From e53ef38d05dd59ed281a35590e4a5b64d8ff4c52 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:14 -0700
Subject: [PATCH] reduce MAX_NR_ZONES: make ZONE_HIGHMEM optional

Make ZONE_HIGHMEM optional

- ifdef out code and definitions related to CONFIG_HIGHMEM

- __GFP_HIGHMEM falls back to normal allocations if there is no
  ZONE_HIGHMEM

- GFP_ZONEMASK becomes 0x01 if there is no DMA32 and no HIGHMEM
  zone.

[jdike@addtoit.com: build fix]
Signed-off-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Christoph Lameter <clameter@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/um/kernel/mem.c   |  2 ++
 include/linux/gfp.h    | 18 ++++++++++--------
 include/linux/mmzone.h | 36 +++++++++++++++++++++++++++++++++---
 mm/page_alloc.c        |  6 ++++++
 4 files changed, 51 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index b39e624c3291..b1cd5c6e468b 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -226,7 +226,9 @@ void paging_init(void)
 	for(i=0;i<sizeof(zones_size)/sizeof(zones_size[0]);i++) 
 		zones_size[i] = 0;
 	zones_size[ZONE_DMA] = (end_iomem >> PAGE_SHIFT) - (uml_physmem >> PAGE_SHIFT);
+#ifdef CONFIG_HIGHMEM
 	zones_size[ZONE_HIGHMEM] = highmem >> PAGE_SHIFT;
+#endif
 	free_area_init(zones_size);
 
 	/*
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 14610b56c132..2a2153ebfe0b 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -9,17 +9,19 @@ struct vm_area_struct;
 
 /*
  * GFP bitmasks..
+ *
+ * Zone modifiers (see linux/mmzone.h - low three bits)
+ *
+ * These may be masked by GFP_ZONEMASK to make allocations with this bit
+ * set fall back to ZONE_NORMAL.
+ *
+ * Do not put any conditional on these. If necessary modify the definitions
+ * without the underscores and use the consistently. The definitions here may
+ * be used in bit comparisons.
  */
-/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) */
 #define __GFP_DMA	((__force gfp_t)0x01u)
 #define __GFP_HIGHMEM	((__force gfp_t)0x02u)
-#ifndef CONFIG_ZONE_DMA32
-#define __GFP_DMA32	((__force gfp_t)0x01)	/* ZONE_DMA is ZONE_DMA32 */
-#elif BITS_PER_LONG < 64
-#define __GFP_DMA32	((__force gfp_t)0x00)	/* ZONE_NORMAL is ZONE_DMA32 */
-#else
-#define __GFP_DMA32	((__force gfp_t)0x04)	/* Has own ZONE_DMA32 */
-#endif
+#define __GFP_DMA32	((__force gfp_t)0x04u)
 
 /*
  * Action modifiers - doesn't change the zoning
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index adae3c915938..76d33e688593 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -123,6 +123,7 @@ enum zone_type {
 	 * transfers to all addressable memory.
 	 */
 	ZONE_NORMAL,
+#ifdef CONFIG_HIGHMEM
 	/*
 	 * A memory area that is only addressable by the kernel through
 	 * mapping portions into its own address space. This is for example
@@ -132,11 +133,10 @@ enum zone_type {
 	 * access.
 	 */
 	ZONE_HIGHMEM,
-
+#endif
 	MAX_NR_ZONES
 };
 
-#define	ZONES_SHIFT		2	/* ceil(log2(MAX_NR_ZONES)) */
 
 /*
  * When a memory allocation must conform to specific limitations (such
@@ -163,12 +163,34 @@ enum zone_type {
  *
  * NOTE! Make sure this matches the zones in <linux/gfp.h>
  */
-#define GFP_ZONETYPES		((GFP_ZONEMASK + 1) / 2 + 1)    /* Loner */
 
 #ifdef CONFIG_ZONE_DMA32
+
+#ifdef CONFIG_HIGHMEM
+#define GFP_ZONETYPES		((GFP_ZONEMASK + 1) / 2 + 1)    /* Loner */
 #define GFP_ZONEMASK		0x07
+#define ZONES_SHIFT		2	/* ceil(log2(MAX_NR_ZONES)) */
 #else
+#define GFP_ZONETYPES		((0x07 + 1) / 2 + 1)    /* Loner */
+/* Mask __GFP_HIGHMEM */
+#define GFP_ZONEMASK		0x05
+#define ZONES_SHIFT		2
+#endif
+
+#else
+#ifdef CONFIG_HIGHMEM
+
 #define GFP_ZONEMASK		0x03
+#define ZONES_SHIFT		2
+#define GFP_ZONETYPES		3
+
+#else
+
+#define GFP_ZONEMASK		0x01
+#define ZONES_SHIFT		1
+#define GFP_ZONETYPES		2
+
+#endif
 #endif
 
 struct zone {
@@ -409,7 +431,11 @@ static inline int populated_zone(struct zone *zone)
 
 static inline int is_highmem_idx(enum zone_type idx)
 {
+#ifdef CONFIG_HIGHMEM
 	return (idx == ZONE_HIGHMEM);
+#else
+	return 0;
+#endif
 }
 
 static inline int is_normal_idx(enum zone_type idx)
@@ -425,7 +451,11 @@ static inline int is_normal_idx(enum zone_type idx)
  */
 static inline int is_highmem(struct zone *zone)
 {
+#ifdef CONFIG_HIGHMEM
 	return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM;
+#else
+	return 0;
+#endif
 }
 
 static inline int is_normal(struct zone *zone)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5b5cbb5e1816..6c7c2dd1b3ed 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -73,7 +73,9 @@ int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
 #ifdef CONFIG_ZONE_DMA32
 	 256,
 #endif
+#ifdef CONFIG_HIGHMEM
 	 32
+#endif
 };
 
 EXPORT_SYMBOL(totalram_pages);
@@ -91,7 +93,9 @@ static char *zone_names[MAX_NR_ZONES] = {
 	 "DMA32",
 #endif
 	 "Normal",
+#ifdef CONFIG_HIGHMEM
 	 "HighMem"
+#endif
 };
 
 int min_free_kbytes = 1024;
@@ -1375,8 +1379,10 @@ static int __meminit build_zonelists_node(pg_data_t *pgdat,
 static inline int highest_zone(int zone_bits)
 {
 	int res = ZONE_NORMAL;
+#ifdef CONFIG_HIGHMEM
 	if (zone_bits & (__force int)__GFP_HIGHMEM)
 		res = ZONE_HIGHMEM;
+#endif
 #ifdef CONFIG_ZONE_DMA32
 	if (zone_bits & (__force int)__GFP_DMA32)
 		res = ZONE_DMA32;
-- 
cgit v1.2.3


From 27bf71c2a7e596ed34e9bf2d4a5030321a09a1ad Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:15 -0700
Subject: [PATCH] reduce MAX_NR_ZONES: remove display of counters for
 unconfigured zones

eventcounters: Do not display counters for zones that are not available on an
arch

Do not define or display counters for the DMA32 and the HIGHMEM zone if such
zones were not configured.

[akpm@osdl.org: s390 fix]
[heiko.carstens@de.ibm.com: s390 fix]
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/s390/appldata/appldata_mem.c |  3 +--
 include/linux/vmstat.h            | 14 ++++++++++++-
 mm/vmstat.c                       | 43 ++++++++++++++++++---------------------
 3 files changed, 34 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
index ab3b0765a64e..8aea3698a77b 100644
--- a/arch/s390/appldata/appldata_mem.c
+++ b/arch/s390/appldata/appldata_mem.c
@@ -117,8 +117,7 @@ static void appldata_get_mem_data(void *data)
 	mem_data->pgpgout    = ev[PGPGOUT] >> 1;
 	mem_data->pswpin     = ev[PSWPIN];
 	mem_data->pswpout    = ev[PSWPOUT];
-	mem_data->pgalloc    = ev[PGALLOC_HIGH] + ev[PGALLOC_NORMAL] +
-			       ev[PGALLOC_DMA];
+	mem_data->pgalloc    = ev[PGALLOC_NORMAL] + ev[PGALLOC_DMA];
 	mem_data->pgfault    = ev[PGFAULT];
 	mem_data->pgmajfault = ev[PGMAJFAULT];
 
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 9c6e62c56ec2..176c7f797339 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -18,7 +18,19 @@
  * generated will simply be the increment of a global address.
  */
 
-#define FOR_ALL_ZONES(x) x##_DMA, x##_DMA32, x##_NORMAL, x##_HIGH
+#ifdef CONFIG_ZONE_DMA32
+#define DMA32_ZONE(xx) xx##_DMA32,
+#else
+#define DMA32_ZONE(xx)
+#endif
+
+#ifdef CONFIG_HIGHMEM
+#define HIGHMEM_ZONE(xx) , xx##_HIGH
+#else
+#define HIGHMEM_ZONE(xx)
+#endif
+
+#define FOR_ALL_ZONES(xx) xx##_DMA, DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx)
 
 enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		FOR_ALL_ZONES(PGALLOC),
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c1b5f4106b38..04a9093f649e 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -435,6 +435,21 @@ struct seq_operations fragmentation_op = {
 	.show	= frag_show,
 };
 
+#ifdef CONFIG_ZONE_DMA32
+#define TEXT_FOR_DMA32(xx) xx "_dma32",
+#else
+#define TEXT_FOR_DMA32(xx)
+#endif
+
+#ifdef CONFIG_HIGHMEM
+#define TEXT_FOR_HIGHMEM(xx) xx "_high",
+#else
+#define TEXT_FOR_HIGHMEM(xx)
+#endif
+
+#define TEXTS_FOR_ZONES(xx) xx "_dma", TEXT_FOR_DMA32(xx) xx "_normal", \
+					TEXT_FOR_HIGHMEM(xx)
+
 static char *vmstat_text[] = {
 	/* Zoned VM counters */
 	"nr_anon_pages",
@@ -462,10 +477,7 @@ static char *vmstat_text[] = {
 	"pswpin",
 	"pswpout",
 
-	"pgalloc_dma",
-	"pgalloc_dma32",
-	"pgalloc_normal",
-	"pgalloc_high",
+	TEXTS_FOR_ZONES("pgalloc")
 
 	"pgfree",
 	"pgactivate",
@@ -474,25 +486,10 @@ static char *vmstat_text[] = {
 	"pgfault",
 	"pgmajfault",
 
-	"pgrefill_dma",
-	"pgrefill_dma32",
-	"pgrefill_normal",
-	"pgrefill_high",
-
-	"pgsteal_dma",
-	"pgsteal_dma32",
-	"pgsteal_normal",
-	"pgsteal_high",
-
-	"pgscan_kswapd_dma",
-	"pgscan_kswapd_dma32",
-	"pgscan_kswapd_normal",
-	"pgscan_kswapd_high",
-
-	"pgscan_direct_dma",
-	"pgscan_direct_dma32",
-	"pgscan_direct_normal",
-	"pgscan_direct_high",
+	TEXTS_FOR_ZONES("pgrefill")
+	TEXTS_FOR_ZONES("pgsteal")
+	TEXTS_FOR_ZONES("pgscan_kswapd")
+	TEXTS_FOR_ZONES("pgscan_direct")
 
 	"pginodesteal",
 	"slabs_scanned",
-- 
cgit v1.2.3


From 4e4785bcf0c8503224fa6c17d8e0228de781bff6 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:17 -0700
Subject: [PATCH] mempolicies: fix policy_zone check

There is a check in zonelist_policy that compares pieces of the bitmap
obtained from a gfp mask via GFP_ZONETYPES with a zone number in function
zonelist_policy().

The bitmap is an ORed mask of __GFP_DMA, __GFP_DMA32 and __GFP_HIGHMEM.
The policy_zone is a zone number with the possible values of ZONE_DMA,
ZONE_DMA32, ZONE_HIGHMEM and ZONE_NORMAL. These are two different domains
of values.

For some reason seemed to work before the zone reduction patchset (It
definitely works on SGI boxes since we just have one zone and the check
cannot fail).

With the zone reduction patchset this check definitely fails on systems
with two zones if the system actually has memory in both zones.

This is because ZONE_NORMAL is selected using no __GFP flag at
all and thus gfp_zone(gfpmask) == 0. ZONE_DMA is selected when __GFP_DMA
is set. __GFP_DMA is 0x01.  So gfp_zone(gfpmask) == 1.

policy_zone is set to ZONE_NORMAL (==1) if ZONE_NORMAL and ZONE_DMA are
populated.

For ZONE_NORMAL gfp_zone(<no _GFP_DMA>) yields 0 which is <
policy_zone(ZONE_NORMAL) and so policy is not applied to regular memory
allocations!

Instead gfp_zone(__GFP_DMA) == 1 which results in policy being applied
to DMA allocations!

What we realy want in that place is to establish the highest allowable
zone for a given gfp_mask. If the highest zone is higher or equal to the
policy_zone then memory policies need to be applied. We have such
a highest_zone() function in page_alloc.c.

So move the highest_zone() function from mm/page_alloc.c into
include/linux/gfp.h.  On the way we simplify the function and use the new
zone_type that was also introduced with the zone reduction patchset plus we
also specify the right type for the gfp flags parameter.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/gfp.h | 15 +++++++++++++++
 mm/mempolicy.c      |  2 +-
 mm/page_alloc.c     | 16 ----------------
 3 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 2a2153ebfe0b..a0992d392d79 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -85,6 +85,21 @@ static inline int gfp_zone(gfp_t gfp)
 	return zone;
 }
 
+static inline enum zone_type highest_zone(gfp_t flags)
+{
+	if (flags & __GFP_DMA)
+		return ZONE_DMA;
+#ifdef CONFIG_ZONE_DMA32
+	if (flags & __GFP_DMA32)
+		return ZONE_DMA32;
+#endif
+#ifdef CONFIG_HIGHMEM
+	if (flags & __GFP_HIGHMEM)
+		return ZONE_HIGHMEM;
+#endif
+	return ZONE_NORMAL;
+}
+
 /*
  * There is only one page-allocator function, and two main namespaces to
  * it. The alloc_page*() variants return 'struct page *' and as such
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a9963ceddd65..9870624d72a6 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1096,7 +1096,7 @@ static struct zonelist *zonelist_policy(gfp_t gfp, struct mempolicy *policy)
 	case MPOL_BIND:
 		/* Lower zones don't get a policy applied */
 		/* Careful: current->mems_allowed might have moved */
-		if (gfp_zone(gfp) >= policy_zone)
+		if (highest_zone(gfp) >= policy_zone)
 			if (cpuset_zonelist_valid_mems_allowed(policy->v.zonelist))
 				return policy->v.zonelist;
 		/*FALL THROUGH*/
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6c7c2dd1b3ed..25f39865e324 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1376,22 +1376,6 @@ static int __meminit build_zonelists_node(pg_data_t *pgdat,
 	return nr_zones;
 }
 
-static inline int highest_zone(int zone_bits)
-{
-	int res = ZONE_NORMAL;
-#ifdef CONFIG_HIGHMEM
-	if (zone_bits & (__force int)__GFP_HIGHMEM)
-		res = ZONE_HIGHMEM;
-#endif
-#ifdef CONFIG_ZONE_DMA32
-	if (zone_bits & (__force int)__GFP_DMA32)
-		res = ZONE_DMA32;
-#endif
-	if (zone_bits & (__force int)__GFP_DMA)
-		res = ZONE_DMA;
-	return res;
-}
-
 #ifdef CONFIG_NUMA
 #define MAX_NODE_LOAD (num_online_nodes())
 static int __meminitdata node_load[MAX_NUMNODES];
-- 
cgit v1.2.3


From 2f6726e54a9410e2e4cee864947c05e954051916 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:18 -0700
Subject: [PATCH] Apply type enum zone_type

After we have done this we can now do some typing cleanup.

The memory policy layer keeps a policy_zone that specifies
the zone that gets memory policies applied. This variable
can now be of type enum zone_type.

The check_highest_zone function and the build_zonelists funnctionm must
then also take a enum zone_type parameter.

Plus there are a number of loops over zones that also should use
zone_type.

We run into some troubles at some points with functions that need a
zone_type variable to become -1. Fix that up.

[pj@sgi.com: fix set_mempolicy() crash]
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mempolicy.h |  4 ++--
 mm/mempolicy.c            | 11 ++++++++---
 mm/page_alloc.c           | 27 +++++++++++++++++----------
 3 files changed, 27 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 72440f0a443d..09f0f575ddff 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -162,9 +162,9 @@ extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
 		unsigned long addr);
 extern unsigned slab_node(struct mempolicy *policy);
 
-extern int policy_zone;
+extern enum zone_type policy_zone;
 
-static inline void check_highest_zone(int k)
+static inline void check_highest_zone(enum zone_type k)
 {
 	if (k > policy_zone)
 		policy_zone = k;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 9870624d72a6..7da4142ce960 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -105,7 +105,7 @@ static struct kmem_cache *sn_cache;
 
 /* Highest zone. An specific allocation for a zone below that is not
    policied. */
-int policy_zone = ZONE_DMA;
+enum zone_type policy_zone = ZONE_DMA;
 
 struct mempolicy default_policy = {
 	.refcnt = ATOMIC_INIT(1), /* never free it */
@@ -137,7 +137,8 @@ static int mpol_check_policy(int mode, nodemask_t *nodes)
 static struct zonelist *bind_zonelist(nodemask_t *nodes)
 {
 	struct zonelist *zl;
-	int num, max, nd, k;
+	int num, max, nd;
+	enum zone_type k;
 
 	max = 1 + MAX_NR_ZONES * nodes_weight(*nodes);
 	zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL);
@@ -148,12 +149,16 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes)
 	   lower zones etc. Avoid empty zones because the memory allocator
 	   doesn't like them. If you implement node hot removal you
 	   have to fix that. */
-	for (k = policy_zone; k >= 0; k--) { 
+	k = policy_zone;
+	while (1) {
 		for_each_node_mask(nd, *nodes) { 
 			struct zone *z = &NODE_DATA(nd)->node_zones[k];
 			if (z->present_pages > 0) 
 				zl->zones[num++] = z;
 		}
+		if (k == 0)
+			break;
+		k--;
 	}
 	zl->zones[num] = NULL;
 	return zl;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 25f39865e324..9c44b9a39d30 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -637,7 +637,8 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
  */
 void drain_node_pages(int nodeid)
 {
-	int i, z;
+	int i;
+	enum zone_type z;
 	unsigned long flags;
 
 	for (z = 0; z < MAX_NR_ZONES; z++) {
@@ -1158,7 +1159,8 @@ EXPORT_SYMBOL(nr_free_pages);
 #ifdef CONFIG_NUMA
 unsigned int nr_free_pages_pgdat(pg_data_t *pgdat)
 {
-	unsigned int i, sum = 0;
+	unsigned int sum = 0;
+	enum zone_type i;
 
 	for (i = 0; i < MAX_NR_ZONES; i++)
 		sum += pgdat->node_zones[i].free_pages;
@@ -1358,21 +1360,22 @@ void show_free_areas(void)
  * Add all populated zones of a node to the zonelist.
  */
 static int __meminit build_zonelists_node(pg_data_t *pgdat,
-			struct zonelist *zonelist, int nr_zones, int zone_type)
+			struct zonelist *zonelist, int nr_zones, enum zone_type zone_type)
 {
 	struct zone *zone;
 
 	BUG_ON(zone_type >= MAX_NR_ZONES);
+	zone_type++;
 
 	do {
+		zone_type--;
 		zone = pgdat->node_zones + zone_type;
 		if (populated_zone(zone)) {
 			zonelist->zones[nr_zones++] = zone;
 			check_highest_zone(zone_type);
 		}
-		zone_type--;
 
-	} while (zone_type >= 0);
+	} while (zone_type);
 	return nr_zones;
 }
 
@@ -1441,10 +1444,11 @@ static int __meminit find_next_best_node(int node, nodemask_t *used_node_mask)
 
 static void __meminit build_zonelists(pg_data_t *pgdat)
 {
-	int i, j, k, node, local_node;
+	int i, j, node, local_node;
 	int prev_node, load;
 	struct zonelist *zonelist;
 	nodemask_t used_mask;
+	enum zone_type k;
 
 	/* initialize zonelists */
 	for (i = 0; i < GFP_ZONETYPES; i++) {
@@ -1628,7 +1632,7 @@ static void __init calculate_zone_totalpages(struct pglist_data *pgdat,
 		unsigned long *zones_size, unsigned long *zholes_size)
 {
 	unsigned long realtotalpages, totalpages = 0;
-	int i;
+	enum zone_type i;
 
 	for (i = 0; i < MAX_NR_ZONES; i++)
 		totalpages += zones_size[i];
@@ -2116,7 +2120,7 @@ static void calculate_totalreserve_pages(void)
 {
 	struct pglist_data *pgdat;
 	unsigned long reserve_pages = 0;
-	int i, j;
+	enum zone_type i, j;
 
 	for_each_online_pgdat(pgdat) {
 		for (i = 0; i < MAX_NR_ZONES; i++) {
@@ -2149,7 +2153,7 @@ static void calculate_totalreserve_pages(void)
 static void setup_per_zone_lowmem_reserve(void)
 {
 	struct pglist_data *pgdat;
-	int j, idx;
+	enum zone_type j, idx;
 
 	for_each_online_pgdat(pgdat) {
 		for (j = 0; j < MAX_NR_ZONES; j++) {
@@ -2158,9 +2162,12 @@ static void setup_per_zone_lowmem_reserve(void)
 
 			zone->lowmem_reserve[j] = 0;
 
-			for (idx = j-1; idx >= 0; idx--) {
+			idx = j;
+			while (idx) {
 				struct zone *lower_zone;
 
+				idx--;
+
 				if (sysctl_lowmem_reserve_ratio[idx] < 1)
 					sysctl_lowmem_reserve_ratio[idx] = 1;
 
-- 
cgit v1.2.3


From 19655d3487001d7df0e10e9cbfc27c758b77c2b5 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:19 -0700
Subject: [PATCH] linearly index zone->node_zonelists[]

I wonder why we need this bitmask indexing into zone->node_zonelists[]?

We always start with the highest zone and then include all lower zones
if we build zonelists.

Are there really cases where we need allocation from ZONE_DMA or
ZONE_HIGHMEM but not ZONE_NORMAL? It seems that the current implementation
of highest_zone() makes that already impossible.

If we go linear on the index then gfp_zone() == highest_zone() and a lot
of definitions fall by the wayside.

We can now revert back to the use of gfp_zone() in mempolicy.c ;-)

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/gfp.h    | 12 +-----------
 include/linux/mmzone.h | 52 +++++---------------------------------------------
 mm/mempolicy.c         |  2 +-
 mm/page_alloc.c        | 27 +++++++++++---------------
 4 files changed, 18 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index a0992d392d79..63ab88a36f39 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -12,9 +12,6 @@ struct vm_area_struct;
  *
  * Zone modifiers (see linux/mmzone.h - low three bits)
  *
- * These may be masked by GFP_ZONEMASK to make allocations with this bit
- * set fall back to ZONE_NORMAL.
- *
  * Do not put any conditional on these. If necessary modify the definitions
  * without the underscores and use the consistently. The definitions here may
  * be used in bit comparisons.
@@ -78,14 +75,7 @@ struct vm_area_struct;
 #define GFP_DMA32	__GFP_DMA32
 
 
-static inline int gfp_zone(gfp_t gfp)
-{
-	int zone = GFP_ZONEMASK & (__force int) gfp;
-	BUG_ON(zone >= GFP_ZONETYPES);
-	return zone;
-}
-
-static inline enum zone_type highest_zone(gfp_t flags)
+static inline enum zone_type gfp_zone(gfp_t flags)
 {
 	if (flags & __GFP_DMA)
 		return ZONE_DMA;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 76d33e688593..7fe317164b73 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -137,60 +137,18 @@ enum zone_type {
 	MAX_NR_ZONES
 };
 
-
 /*
  * When a memory allocation must conform to specific limitations (such
  * as being suitable for DMA) the caller will pass in hints to the
  * allocator in the gfp_mask, in the zone modifier bits.  These bits
  * are used to select a priority ordered list of memory zones which
- * match the requested limits.  GFP_ZONEMASK defines which bits within
- * the gfp_mask should be considered as zone modifiers.  Each valid
- * combination of the zone modifier bits has a corresponding list
- * of zones (in node_zonelists).  Thus for two zone modifiers there
- * will be a maximum of 4 (2 ** 2) zonelists, for 3 modifiers there will
- * be 8 (2 ** 3) zonelists.  GFP_ZONETYPES defines the number of possible
- * combinations of zone modifiers in "zone modifier space".
- *
- * As an optimisation any zone modifier bits which are only valid when
- * no other zone modifier bits are set (loners) should be placed in
- * the highest order bits of this field.  This allows us to reduce the
- * extent of the zonelists thus saving space.  For example in the case
- * of three zone modifier bits, we could require up to eight zonelists.
- * If the left most zone modifier is a "loner" then the highest valid
- * zonelist would be four allowing us to allocate only five zonelists.
- * Use the first form for GFP_ZONETYPES when the left most bit is not
- * a "loner", otherwise use the second.
- *
- * NOTE! Make sure this matches the zones in <linux/gfp.h>
+ * match the requested limits. See gfp_zone() in include/linux/gfp.h
  */
 
-#ifdef CONFIG_ZONE_DMA32
-
-#ifdef CONFIG_HIGHMEM
-#define GFP_ZONETYPES		((GFP_ZONEMASK + 1) / 2 + 1)    /* Loner */
-#define GFP_ZONEMASK		0x07
-#define ZONES_SHIFT		2	/* ceil(log2(MAX_NR_ZONES)) */
-#else
-#define GFP_ZONETYPES		((0x07 + 1) / 2 + 1)    /* Loner */
-/* Mask __GFP_HIGHMEM */
-#define GFP_ZONEMASK		0x05
-#define ZONES_SHIFT		2
-#endif
-
-#else
-#ifdef CONFIG_HIGHMEM
-
-#define GFP_ZONEMASK		0x03
-#define ZONES_SHIFT		2
-#define GFP_ZONETYPES		3
-
+#if !defined(CONFIG_ZONE_DMA32) && !defined(CONFIG_HIGHMEM)
+#define ZONES_SHIFT 1
 #else
-
-#define GFP_ZONEMASK		0x01
-#define ZONES_SHIFT		1
-#define GFP_ZONETYPES		2
-
-#endif
+#define ZONES_SHIFT 2
 #endif
 
 struct zone {
@@ -360,7 +318,7 @@ struct zonelist {
 struct bootmem_data;
 typedef struct pglist_data {
 	struct zone node_zones[MAX_NR_ZONES];
-	struct zonelist node_zonelists[GFP_ZONETYPES];
+	struct zonelist node_zonelists[MAX_NR_ZONES];
 	int nr_zones;
 #ifdef CONFIG_FLAT_NODE_MEM_MAP
 	struct page *node_mem_map;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 7da4142ce960..c3429a710ab1 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1101,7 +1101,7 @@ static struct zonelist *zonelist_policy(gfp_t gfp, struct mempolicy *policy)
 	case MPOL_BIND:
 		/* Lower zones don't get a policy applied */
 		/* Careful: current->mems_allowed might have moved */
-		if (highest_zone(gfp) >= policy_zone)
+		if (gfp_zone(gfp) >= policy_zone)
 			if (cpuset_zonelist_valid_mems_allowed(policy->v.zonelist))
 				return policy->v.zonelist;
 		/*FALL THROUGH*/
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9c44b9a39d30..208a6b03aa78 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1444,14 +1444,14 @@ static int __meminit find_next_best_node(int node, nodemask_t *used_node_mask)
 
 static void __meminit build_zonelists(pg_data_t *pgdat)
 {
-	int i, j, node, local_node;
+	int j, node, local_node;
+	enum zone_type i;
 	int prev_node, load;
 	struct zonelist *zonelist;
 	nodemask_t used_mask;
-	enum zone_type k;
 
 	/* initialize zonelists */
-	for (i = 0; i < GFP_ZONETYPES; i++) {
+	for (i = 0; i < MAX_NR_ZONES; i++) {
 		zonelist = pgdat->node_zonelists + i;
 		zonelist->zones[0] = NULL;
 	}
@@ -1481,13 +1481,11 @@ static void __meminit build_zonelists(pg_data_t *pgdat)
 			node_load[node] += load;
 		prev_node = node;
 		load--;
-		for (i = 0; i < GFP_ZONETYPES; i++) {
+		for (i = 0; i < MAX_NR_ZONES; i++) {
 			zonelist = pgdat->node_zonelists + i;
 			for (j = 0; zonelist->zones[j] != NULL; j++);
 
-			k = highest_zone(i);
-
-	 		j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
+	 		j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
 			zonelist->zones[j] = NULL;
 		}
 	}
@@ -1497,19 +1495,16 @@ static void __meminit build_zonelists(pg_data_t *pgdat)
 
 static void __meminit build_zonelists(pg_data_t *pgdat)
 {
-	int i, node, local_node;
-	enum zone_type k;
-	enum zone_type j;
+	int node, local_node;
+	enum zone_type i,j;
 
 	local_node = pgdat->node_id;
-	for (i = 0; i < GFP_ZONETYPES; i++) {
+	for (i = 0; i < MAX_NR_ZONES; i++) {
 		struct zonelist *zonelist;
 
 		zonelist = pgdat->node_zonelists + i;
 
-		j = 0;
-		k = highest_zone(i);
- 		j = build_zonelists_node(pgdat, zonelist, j, k);
+ 		j = build_zonelists_node(pgdat, zonelist, 0, i);
  		/*
  		 * Now we build the zonelist so that it contains the zones
  		 * of all the other nodes.
@@ -1521,12 +1516,12 @@ static void __meminit build_zonelists(pg_data_t *pgdat)
 		for (node = local_node + 1; node < MAX_NUMNODES; node++) {
 			if (!node_online(node))
 				continue;
-			j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
+			j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
 		}
 		for (node = 0; node < local_node; node++) {
 			if (!node_online(node))
 				continue;
-			j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
+			j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
 		}
 
 		zonelist->zones[j] = NULL;
-- 
cgit v1.2.3


From 8bc719d3cab8414938f9ea6e33b58d8810d18068 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 25 Sep 2006 23:31:20 -0700
Subject: [PATCH] out of memory notifier

Add a notifer chain to the out of memory killer.  If one of the registered
callbacks could release some memory, do not kill the process but return and
retry the allocation that forced the oom killer to run.

The purpose of the notifier is to add a safety net in the presence of
memory ballooners.  If the resource manager inflated the balloon to a size
where memory allocations can not be satisfied anymore, it is better to
deflate the balloon a bit instead of killing processes.

The implementation for the s390 ballooner is included.

[akpm@osdl.org: cleanups]
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/s390/mm/cmm.c   | 155 +++++++++++++++++++++++++++++++--------------------
 include/linux/swap.h |   4 ++
 mm/oom_kill.c        |  22 ++++++++
 3 files changed, 121 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 786a44dba5bf..f2e00df99bae 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/sysctl.h>
 #include <linux/ctype.h>
+#include <linux/swap.h>
 
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
@@ -34,17 +35,18 @@ struct cmm_page_array {
 	unsigned long pages[CMM_NR_PAGES];
 };
 
-static long cmm_pages = 0;
-static long cmm_timed_pages = 0;
-static volatile long cmm_pages_target = 0;
-static volatile long cmm_timed_pages_target = 0;
-static long cmm_timeout_pages = 0;
-static long cmm_timeout_seconds = 0;
+static long cmm_pages;
+static long cmm_timed_pages;
+static volatile long cmm_pages_target;
+static volatile long cmm_timed_pages_target;
+static long cmm_timeout_pages;
+static long cmm_timeout_seconds;
 
-static struct cmm_page_array *cmm_page_list = NULL;
-static struct cmm_page_array *cmm_timed_page_list = NULL;
+static struct cmm_page_array *cmm_page_list;
+static struct cmm_page_array *cmm_timed_page_list;
+static DEFINE_SPINLOCK(cmm_lock);
 
-static unsigned long cmm_thread_active = 0;
+static unsigned long cmm_thread_active;
 static struct work_struct cmm_thread_starter;
 static wait_queue_head_t cmm_thread_wait;
 static struct timer_list cmm_timer;
@@ -53,58 +55,89 @@ static void cmm_timer_fn(unsigned long);
 static void cmm_set_timer(void);
 
 static long
-cmm_alloc_pages(long pages, long *counter, struct cmm_page_array **list)
+cmm_alloc_pages(long nr, long *counter, struct cmm_page_array **list)
 {
-	struct cmm_page_array *pa;
-	unsigned long page;
+	struct cmm_page_array *pa, *npa;
+	unsigned long addr;
 
-	pa = *list;
-	while (pages) {
-		page = __get_free_page(GFP_NOIO);
-		if (!page)
+	while (nr) {
+		addr = __get_free_page(GFP_NOIO);
+		if (!addr)
 			break;
+		spin_lock(&cmm_lock);
+		pa = *list;
 		if (!pa || pa->index >= CMM_NR_PAGES) {
 			/* Need a new page for the page list. */
-			pa = (struct cmm_page_array *)
+			spin_unlock(&cmm_lock);
+			npa = (struct cmm_page_array *)
 				__get_free_page(GFP_NOIO);
-			if (!pa) {
-				free_page(page);
+			if (!npa) {
+				free_page(addr);
 				break;
 			}
-			pa->next = *list;
-			pa->index = 0;
-			*list = pa;
+			spin_lock(&cmm_lock);
+			pa = *list;
+			if (!pa || pa->index >= CMM_NR_PAGES) {
+				npa->next = pa;
+				npa->index = 0;
+				pa = npa;
+				*list = pa;
+			} else
+				free_page((unsigned long) npa);
 		}
-		diag10(page);
-		pa->pages[pa->index++] = page;
+		diag10(addr);
+		pa->pages[pa->index++] = addr;
 		(*counter)++;
-		pages--;
+		spin_unlock(&cmm_lock);
+		nr--;
 	}
-	return pages;
+	return nr;
 }
 
-static void
-cmm_free_pages(long pages, long *counter, struct cmm_page_array **list)
+static long
+cmm_free_pages(long nr, long *counter, struct cmm_page_array **list)
 {
 	struct cmm_page_array *pa;
-	unsigned long page;
+	unsigned long addr;
 
+	spin_lock(&cmm_lock);
 	pa = *list;
-	while (pages) {
+	while (nr) {
 		if (!pa || pa->index <= 0)
 			break;
-		page = pa->pages[--pa->index];
+		addr = pa->pages[--pa->index];
 		if (pa->index == 0) {
 			pa = pa->next;
 			free_page((unsigned long) *list);
 			*list = pa;
 		}
-		free_page(page);
+		free_page(addr);
 		(*counter)--;
-		pages--;
+		nr--;
 	}
+	spin_unlock(&cmm_lock);
+	return nr;
 }
 
+static int cmm_oom_notify(struct notifier_block *self,
+			  unsigned long dummy, void *parm)
+{
+	unsigned long *freed = parm;
+	long nr = 256;
+
+	nr = cmm_free_pages(nr, &cmm_timed_pages, &cmm_timed_page_list);
+	if (nr > 0)
+		nr = cmm_free_pages(nr, &cmm_pages, &cmm_page_list);
+	cmm_pages_target = cmm_pages;
+	cmm_timed_pages_target = cmm_timed_pages;
+	*freed += 256 - nr;
+	return NOTIFY_OK;
+}
+
+static struct notifier_block cmm_oom_nb = {
+	.notifier_call = cmm_oom_notify
+};
+
 static int
 cmm_thread(void *dummy)
 {
@@ -177,21 +210,21 @@ cmm_set_timer(void)
 static void
 cmm_timer_fn(unsigned long ignored)
 {
-	long pages;
+	long nr;
 
-	pages = cmm_timed_pages_target - cmm_timeout_pages;
-	if (pages < 0)
+	nr = cmm_timed_pages_target - cmm_timeout_pages;
+	if (nr < 0)
 		cmm_timed_pages_target = 0;
 	else
-		cmm_timed_pages_target = pages;
+		cmm_timed_pages_target = nr;
 	cmm_kick_thread();
 	cmm_set_timer();
 }
 
 void
-cmm_set_pages(long pages)
+cmm_set_pages(long nr)
 {
-	cmm_pages_target = pages;
+	cmm_pages_target = nr;
 	cmm_kick_thread();
 }
 
@@ -202,9 +235,9 @@ cmm_get_pages(void)
 }
 
 void
-cmm_add_timed_pages(long pages)
+cmm_add_timed_pages(long nr)
 {
-	cmm_timed_pages_target += pages;
+	cmm_timed_pages_target += nr;
 	cmm_kick_thread();
 }
 
@@ -215,9 +248,9 @@ cmm_get_timed_pages(void)
 }
 
 void
-cmm_set_timeout(long pages, long seconds)
+cmm_set_timeout(long nr, long seconds)
 {
-	cmm_timeout_pages = pages;
+	cmm_timeout_pages = nr;
 	cmm_timeout_seconds = seconds;
 	cmm_set_timer();
 }
@@ -245,7 +278,7 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	char buf[16], *p;
-	long pages;
+	long nr;
 	int len;
 
 	if (!*lenp || (*ppos && !write)) {
@@ -260,17 +293,17 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
 			return -EFAULT;
 		buf[sizeof(buf) - 1] = '\0';
 		cmm_skip_blanks(buf, &p);
-		pages = simple_strtoul(p, &p, 0);
+		nr = simple_strtoul(p, &p, 0);
 		if (ctl == &cmm_table[0])
-			cmm_set_pages(pages);
+			cmm_set_pages(nr);
 		else
-			cmm_add_timed_pages(pages);
+			cmm_add_timed_pages(nr);
 	} else {
 		if (ctl == &cmm_table[0])
-			pages = cmm_get_pages();
+			nr = cmm_get_pages();
 		else
-			pages = cmm_get_timed_pages();
-		len = sprintf(buf, "%ld\n", pages);
+			nr = cmm_get_timed_pages();
+		len = sprintf(buf, "%ld\n", nr);
 		if (len > *lenp)
 			len = *lenp;
 		if (copy_to_user(buffer, buf, len))
@@ -286,7 +319,7 @@ cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp,
 		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	char buf[64], *p;
-	long pages, seconds;
+	long nr, seconds;
 	int len;
 
 	if (!*lenp || (*ppos && !write)) {
@@ -301,10 +334,10 @@ cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp,
 			return -EFAULT;
 		buf[sizeof(buf) - 1] = '\0';
 		cmm_skip_blanks(buf, &p);
-		pages = simple_strtoul(p, &p, 0);
+		nr = simple_strtoul(p, &p, 0);
 		cmm_skip_blanks(p, &p);
 		seconds = simple_strtoul(p, &p, 0);
-		cmm_set_timeout(pages, seconds);
+		cmm_set_timeout(nr, seconds);
 	} else {
 		len = sprintf(buf, "%ld %ld\n",
 			      cmm_timeout_pages, cmm_timeout_seconds);
@@ -357,7 +390,7 @@ static struct ctl_table cmm_dir_table[] = {
 static void
 cmm_smsg_target(char *from, char *msg)
 {
-	long pages, seconds;
+	long nr, seconds;
 
 	if (strlen(sender) > 0 && strcmp(from, sender) != 0)
 		return;
@@ -366,27 +399,27 @@ cmm_smsg_target(char *from, char *msg)
 	if (strncmp(msg, "SHRINK", 6) == 0) {
 		if (!cmm_skip_blanks(msg + 6, &msg))
 			return;
-		pages = simple_strtoul(msg, &msg, 0);
+		nr = simple_strtoul(msg, &msg, 0);
 		cmm_skip_blanks(msg, &msg);
 		if (*msg == '\0')
-			cmm_set_pages(pages);
+			cmm_set_pages(nr);
 	} else if (strncmp(msg, "RELEASE", 7) == 0) {
 		if (!cmm_skip_blanks(msg + 7, &msg))
 			return;
-		pages = simple_strtoul(msg, &msg, 0);
+		nr = simple_strtoul(msg, &msg, 0);
 		cmm_skip_blanks(msg, &msg);
 		if (*msg == '\0')
-			cmm_add_timed_pages(pages);
+			cmm_add_timed_pages(nr);
 	} else if (strncmp(msg, "REUSE", 5) == 0) {
 		if (!cmm_skip_blanks(msg + 5, &msg))
 			return;
-		pages = simple_strtoul(msg, &msg, 0);
+		nr = simple_strtoul(msg, &msg, 0);
 		if (!cmm_skip_blanks(msg, &msg))
 			return;
 		seconds = simple_strtoul(msg, &msg, 0);
 		cmm_skip_blanks(msg, &msg);
 		if (*msg == '\0')
-			cmm_set_timeout(pages, seconds);
+			cmm_set_timeout(nr, seconds);
 	}
 }
 #endif
@@ -402,6 +435,7 @@ cmm_init (void)
 #ifdef CONFIG_CMM_IUCV
 	smsg_register_callback(SMSG_PREFIX, cmm_smsg_target);
 #endif
+	register_oom_notifier(&cmm_oom_nb);
 	INIT_WORK(&cmm_thread_starter, (void *) cmm_start_thread, NULL);
 	init_waitqueue_head(&cmm_thread_wait);
 	init_timer(&cmm_timer);
@@ -411,6 +445,7 @@ cmm_init (void)
 static void
 cmm_exit(void)
 {
+	unregister_oom_notifier(&cmm_oom_nb);
 	cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
 	cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
 #ifdef CONFIG_CMM_PROC
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 34a6bc3e6cf3..32db06c8ffe0 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -10,6 +10,8 @@
 #include <asm/atomic.h>
 #include <asm/page.h>
 
+struct notifier_block;
+
 #define SWAP_FLAG_PREFER	0x8000	/* set if swap priority specified */
 #define SWAP_FLAG_PRIO_MASK	0x7fff
 #define SWAP_FLAG_PRIO_SHIFT	0
@@ -156,6 +158,8 @@ struct swap_list_t {
 
 /* linux/mm/oom_kill.c */
 extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
+extern int register_oom_notifier(struct notifier_block *nb);
+extern int unregister_oom_notifier(struct notifier_block *nb);
 
 /* linux/mm/memory.c */
 extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index b9af136e5cfa..7d056843fa2d 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -21,6 +21,8 @@
 #include <linux/timex.h>
 #include <linux/jiffies.h>
 #include <linux/cpuset.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
 
 int sysctl_panic_on_oom;
 /* #define DEBUG */
@@ -306,6 +308,20 @@ static int oom_kill_process(struct task_struct *p, unsigned long points,
 	return oom_kill_task(p, message);
 }
 
+static BLOCKING_NOTIFIER_HEAD(oom_notify_list);
+
+int register_oom_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&oom_notify_list, nb);
+}
+EXPORT_SYMBOL_GPL(register_oom_notifier);
+
+int unregister_oom_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&oom_notify_list, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_oom_notifier);
+
 /**
  * out_of_memory - kill the "best" process when we run out of memory
  *
@@ -318,6 +334,12 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
 {
 	struct task_struct *p;
 	unsigned long points = 0;
+	unsigned long freed = 0;
+
+	blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
+	if (freed > 0)
+		/* Got some memory back in the last second. */
+		return;
 
 	if (printk_ratelimit()) {
 		printk("oom-killer: gfp_mask=0x%x, order=%d\n",
-- 
cgit v1.2.3


From 7ff6f08295d90ab20d25200ef485ebb45b1b8d71 Mon Sep 17 00:00:00 2001
From: Martin Peschke <mp3@de.ibm.com>
Date: Mon, 25 Sep 2006 23:31:21 -0700
Subject: [PATCH] CPU hotplug compatible alloc_percpu()

This patch splits alloc_percpu() up into two phases.  Likewise for
free_percpu().  This allows clients to limit initial allocations to online
cpu's, and to populate or depopulate per-cpu data at run time as needed:

  struct my_struct *obj;

  /* initial allocation for online cpu's */
  obj = percpu_alloc(sizeof(struct my_struct), GFP_KERNEL);

  ...

  /* populate per-cpu data for cpu coming online */
  ptr = percpu_populate(obj, sizeof(struct my_struct), GFP_KERNEL, cpu);

  ...

  /* access per-cpu object */
  ptr = percpu_ptr(obj, smp_processor_id());

  ...

  /* depopulate per-cpu data for cpu going offline */
  percpu_depopulate(obj, cpu);

  ...

  /* final removal */
  percpu_free(obj);

Signed-off-by: Martin Peschke <mp3@de.ibm.com>
Cc: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/percpu.h |  79 +++++++++++++++++------
 mm/slab.c              | 166 ++++++++++++++++++++++++++++++++-----------------
 2 files changed, 169 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index f926490a7d8b..3835a9642f13 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -1,9 +1,12 @@
 #ifndef __LINUX_PERCPU_H
 #define __LINUX_PERCPU_H
+
 #include <linux/spinlock.h> /* For preempt_disable() */
 #include <linux/slab.h> /* For kmalloc() */
 #include <linux/smp.h>
 #include <linux/string.h> /* For memset() */
+#include <linux/cpumask.h>
+
 #include <asm/percpu.h>
 
 /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */
@@ -27,39 +30,77 @@ struct percpu_data {
 	void *ptrs[NR_CPUS];
 };
 
+#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata)
 /* 
- * Use this to get to a cpu's version of the per-cpu object allocated using
- * alloc_percpu.  Non-atomic access to the current CPU's version should
+ * Use this to get to a cpu's version of the per-cpu object dynamically
+ * allocated. Non-atomic access to the current CPU's version should
  * probably be combined with get_cpu()/put_cpu().
  */ 
-#define per_cpu_ptr(ptr, cpu)                   \
-({                                              \
-        struct percpu_data *__p = (struct percpu_data *)~(unsigned long)(ptr); \
-        (__typeof__(ptr))__p->ptrs[(cpu)];	\
+#define percpu_ptr(ptr, cpu)                              \
+({                                                        \
+        struct percpu_data *__p = __percpu_disguise(ptr); \
+        (__typeof__(ptr))__p->ptrs[(cpu)];	          \
 })
 
-extern void *__alloc_percpu(size_t size);
-extern void free_percpu(const void *);
+extern void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu);
+extern void percpu_depopulate(void *__pdata, int cpu);
+extern int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
+				  cpumask_t *mask);
+extern void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask);
+extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask);
+extern void percpu_free(void *__pdata);
 
 #else /* CONFIG_SMP */
 
-#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
+#define percpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
+
+static inline void percpu_depopulate(void *__pdata, int cpu)
+{
+}
+
+static inline void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
+{
+}
 
-static inline void *__alloc_percpu(size_t size)
+static inline void *percpu_populate(void *__pdata, size_t size, gfp_t gfp,
+				    int cpu)
 {
-	void *ret = kmalloc(size, GFP_KERNEL);
-	if (ret)
-		memset(ret, 0, size);
-	return ret;
+	return percpu_ptr(__pdata, cpu);
 }
-static inline void free_percpu(const void *ptr)
-{	
-	kfree(ptr);
+
+static inline int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
+					 cpumask_t *mask)
+{
+	return 0;
+}
+
+static inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
+{
+	return kzalloc(size, gfp);
+}
+
+static inline void percpu_free(void *__pdata)
+{
+	kfree(__pdata);
 }
 
 #endif /* CONFIG_SMP */
 
-/* Simple wrapper for the common case: zeros memory. */
-#define alloc_percpu(type)	((type *)(__alloc_percpu(sizeof(type))))
+#define percpu_populate_mask(__pdata, size, gfp, mask) \
+	__percpu_populate_mask((__pdata), (size), (gfp), &(mask))
+#define percpu_depopulate_mask(__pdata, mask) \
+	__percpu_depopulate_mask((__pdata), &(mask))
+#define percpu_alloc_mask(size, gfp, mask) \
+	__percpu_alloc_mask((size), (gfp), &(mask))
+
+#define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map)
+
+/* (legacy) interface for use without CPU hotplug handling */
+
+#define __alloc_percpu(size)	percpu_alloc_mask((size), GFP_KERNEL, \
+						  cpu_possible_map)
+#define alloc_percpu(type)	(type *)__alloc_percpu(sizeof(type))
+#define free_percpu(ptr)	percpu_free((ptr))
+#define per_cpu_ptr(ptr, cpu)	percpu_ptr((ptr), (cpu))
 
 #endif /* __LINUX_PERCPU_H */
diff --git a/mm/slab.c b/mm/slab.c
index 5870bcbd33cf..00584dbbec03 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3371,52 +3371,127 @@ EXPORT_SYMBOL(__kmalloc_track_caller);
 
 #ifdef CONFIG_SMP
 /**
- * __alloc_percpu - allocate one copy of the object for every present
- * cpu in the system, zeroing them.
- * Objects should be dereferenced using the per_cpu_ptr macro only.
+ * percpu_depopulate - depopulate per-cpu data for given cpu
+ * @__pdata: per-cpu data to depopulate
+ * @cpu: depopulate per-cpu data for this cpu
  *
- * @size: how many bytes of memory are required.
+ * Depopulating per-cpu data for a cpu going offline would be a typical
+ * use case. You need to register a cpu hotplug handler for that purpose.
  */
-void *__alloc_percpu(size_t size)
+void percpu_depopulate(void *__pdata, int cpu)
 {
-	int i;
-	struct percpu_data *pdata = kmalloc(sizeof(*pdata), GFP_KERNEL);
+	struct percpu_data *pdata = __percpu_disguise(__pdata);
+	if (pdata->ptrs[cpu]) {
+		kfree(pdata->ptrs[cpu]);
+		pdata->ptrs[cpu] = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(percpu_depopulate);
 
-	if (!pdata)
-		return NULL;
+/**
+ * percpu_depopulate_mask - depopulate per-cpu data for some cpu's
+ * @__pdata: per-cpu data to depopulate
+ * @mask: depopulate per-cpu data for cpu's selected through mask bits
+ */
+void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
+{
+	int cpu;
+	for_each_cpu_mask(cpu, *mask)
+		percpu_depopulate(__pdata, cpu);
+}
+EXPORT_SYMBOL_GPL(__percpu_depopulate_mask);
 
-	/*
-	 * Cannot use for_each_online_cpu since a cpu may come online
-	 * and we have no way of figuring out how to fix the array
-	 * that we have allocated then....
-	 */
-	for_each_possible_cpu(i) {
-		int node = cpu_to_node(i);
+/**
+ * percpu_populate - populate per-cpu data for given cpu
+ * @__pdata: per-cpu data to populate further
+ * @size: size of per-cpu object
+ * @gfp: may sleep or not etc.
+ * @cpu: populate per-data for this cpu
+ *
+ * Populating per-cpu data for a cpu coming online would be a typical
+ * use case. You need to register a cpu hotplug handler for that purpose.
+ * Per-cpu object is populated with zeroed buffer.
+ */
+void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu)
+{
+	struct percpu_data *pdata = __percpu_disguise(__pdata);
+	int node = cpu_to_node(cpu);
 
-		if (node_online(node))
-			pdata->ptrs[i] = kmalloc_node(size, GFP_KERNEL, node);
-		else
-			pdata->ptrs[i] = kmalloc(size, GFP_KERNEL);
+	BUG_ON(pdata->ptrs[cpu]);
+	if (node_online(node)) {
+		/* FIXME: kzalloc_node(size, gfp, node) */
+		pdata->ptrs[cpu] = kmalloc_node(size, gfp, node);
+		if (pdata->ptrs[cpu])
+			memset(pdata->ptrs[cpu], 0, size);
+	} else
+		pdata->ptrs[cpu] = kzalloc(size, gfp);
+	return pdata->ptrs[cpu];
+}
+EXPORT_SYMBOL_GPL(percpu_populate);
 
-		if (!pdata->ptrs[i])
-			goto unwind_oom;
-		memset(pdata->ptrs[i], 0, size);
-	}
+/**
+ * percpu_populate_mask - populate per-cpu data for more cpu's
+ * @__pdata: per-cpu data to populate further
+ * @size: size of per-cpu object
+ * @gfp: may sleep or not etc.
+ * @mask: populate per-cpu data for cpu's selected through mask bits
+ *
+ * Per-cpu objects are populated with zeroed buffers.
+ */
+int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
+			   cpumask_t *mask)
+{
+	cpumask_t populated = CPU_MASK_NONE;
+	int cpu;
+
+	for_each_cpu_mask(cpu, *mask)
+		if (unlikely(!percpu_populate(__pdata, size, gfp, cpu))) {
+			__percpu_depopulate_mask(__pdata, &populated);
+			return -ENOMEM;
+		} else
+			cpu_set(cpu, populated);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__percpu_populate_mask);
 
-	/* Catch derefs w/o wrappers */
-	return (void *)(~(unsigned long)pdata);
+/**
+ * percpu_alloc_mask - initial setup of per-cpu data
+ * @size: size of per-cpu object
+ * @gfp: may sleep or not etc.
+ * @mask: populate per-data for cpu's selected through mask bits
+ *
+ * Populating per-cpu data for all online cpu's would be a typical use case,
+ * which is simplified by the percpu_alloc() wrapper.
+ * Per-cpu objects are populated with zeroed buffers.
+ */
+void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
+{
+	void *pdata = kzalloc(sizeof(struct percpu_data), gfp);
+	void *__pdata = __percpu_disguise(pdata);
 
-unwind_oom:
-	while (--i >= 0) {
-		if (!cpu_possible(i))
-			continue;
-		kfree(pdata->ptrs[i]);
-	}
+	if (unlikely(!pdata))
+		return NULL;
+	if (likely(!__percpu_populate_mask(__pdata, size, gfp, mask)))
+		return __pdata;
 	kfree(pdata);
 	return NULL;
 }
-EXPORT_SYMBOL(__alloc_percpu);
-#endif
+EXPORT_SYMBOL_GPL(__percpu_alloc_mask);
+
+/**
+ * percpu_free - final cleanup of per-cpu data
+ * @__pdata: object to clean up
+ *
+ * We simply clean up any per-cpu object left. No need for the client to
+ * track and specify through a bis mask which per-cpu objects are to free.
+ */
+void percpu_free(void *__pdata)
+{
+	__percpu_depopulate_mask(__pdata, &cpu_possible_map);
+	kfree(__percpu_disguise(__pdata));
+}
+EXPORT_SYMBOL_GPL(percpu_free);
+#endif	/* CONFIG_SMP */
 
 /**
  * kmem_cache_free - Deallocate an object
@@ -3463,29 +3538,6 @@ void kfree(const void *objp)
 }
 EXPORT_SYMBOL(kfree);
 
-#ifdef CONFIG_SMP
-/**
- * free_percpu - free previously allocated percpu memory
- * @objp: pointer returned by alloc_percpu.
- *
- * Don't free memory not originally allocated by alloc_percpu()
- * The complemented objp is to check for that.
- */
-void free_percpu(const void *objp)
-{
-	int i;
-	struct percpu_data *p = (struct percpu_data *)(~(unsigned long)objp);
-
-	/*
-	 * We allocate for all cpus so we cannot use for online cpu here.
-	 */
-	for_each_possible_cpu(i)
-	    kfree(p->ptrs[i]);
-	kfree(p);
-}
-EXPORT_SYMBOL(free_percpu);
-#endif
-
 unsigned int kmem_cache_size(struct kmem_cache *cachep)
 {
 	return obj_size(cachep);
-- 
cgit v1.2.3


From db37648cd6ce9b828abd6d49aa3d269926ee7b7d Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Mon, 25 Sep 2006 23:31:24 -0700
Subject: [PATCH] mm: non syncing lock_page()

lock_page needs the caller to have a reference on the page->mapping inode
due to sync_page, ergo set_page_dirty_lock is obviously buggy according to
its comments.

Solve it by introducing a new lock_page_nosync which does not do a sync_page.

akpm: unpleasant solution to an unpleasant problem.  If it goes wrong it could
cause great slowdowns while the lock_page() caller waits for kblockd to
perform the unplug.  And if a filesystem has special sync_page() requirements
(none presently do), permanent hangs are possible.

otoh, set_page_dirty_lock() is usually (always?) called against userspace
pages.  They are always up-to-date, so there shouldn't be any pending read I/O
against these pages.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/pagemap.h | 15 +++++++++++++++
 mm/filemap.c            | 17 +++++++++++++++++
 mm/page-writeback.c     |  2 +-
 3 files changed, 33 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0a2f5d27f60e..64f950925151 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -130,14 +130,29 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
 }
 
 extern void FASTCALL(__lock_page(struct page *page));
+extern void FASTCALL(__lock_page_nosync(struct page *page));
 extern void FASTCALL(unlock_page(struct page *page));
 
+/*
+ * lock_page may only be called if we have the page's inode pinned.
+ */
 static inline void lock_page(struct page *page)
 {
 	might_sleep();
 	if (TestSetPageLocked(page))
 		__lock_page(page);
 }
+
+/*
+ * lock_page_nosync should only be used if we can't pin the page's inode.
+ * Doesn't play quite so well with block device plugging.
+ */
+static inline void lock_page_nosync(struct page *page)
+{
+	might_sleep();
+	if (TestSetPageLocked(page))
+		__lock_page_nosync(page);
+}
 	
 /*
  * This is exported only for wait_on_page_locked/wait_on_page_writeback.
diff --git a/mm/filemap.c b/mm/filemap.c
index b9a60c43b61a..d5af1cab4268 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -488,6 +488,12 @@ struct page *page_cache_alloc_cold(struct address_space *x)
 EXPORT_SYMBOL(page_cache_alloc_cold);
 #endif
 
+static int __sleep_on_page_lock(void *word)
+{
+	io_schedule();
+	return 0;
+}
+
 /*
  * In order to wait for pages to become available there must be
  * waitqueues associated with pages. By using a hash table of
@@ -577,6 +583,17 @@ void fastcall __lock_page(struct page *page)
 }
 EXPORT_SYMBOL(__lock_page);
 
+/*
+ * Variant of lock_page that does not require the caller to hold a reference
+ * on the page's mapping.
+ */
+void fastcall __lock_page_nosync(struct page *page)
+{
+	DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
+	__wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock,
+							TASK_UNINTERRUPTIBLE);
+}
+
 /**
  * find_get_page - find and get a page reference
  * @mapping: the address_space to search
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b9f4c6f1be86..555752907dc3 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -701,7 +701,7 @@ int set_page_dirty_lock(struct page *page)
 {
 	int ret;
 
-	lock_page(page);
+	lock_page_nosync(page);
 	ret = set_page_dirty(page);
 	unlock_page(page);
 	return ret;
-- 
cgit v1.2.3


From da6052f7b33abe55fbfd7d2213815f58c00a88d4 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Mon, 25 Sep 2006 23:31:35 -0700
Subject: [PATCH] update some mm/ comments

Let's try to keep mm/ comments more useful and up to date. This is a start.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h         | 68 +++++++++++++++++++++++++++-------------------
 include/linux/page-flags.h | 35 ++++++++++++++----------
 mm/filemap.c               |  8 +++---
 3 files changed, 64 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2db4229a0066..f2018775b995 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -219,7 +219,8 @@ struct inode;
  * Each physical page in the system has a struct page associated with
  * it to keep track of whatever it is we are using the page for at the
  * moment. Note that we have no way to track which tasks are using
- * a page.
+ * a page, though if it is a pagecache page, rmap structures can tell us
+ * who is mapping it.
  */
 struct page {
 	unsigned long flags;		/* Atomic flags, some possibly
@@ -299,8 +300,7 @@ struct page {
  */
 
 /*
- * Drop a ref, return true if the logical refcount fell to zero (the page has
- * no users)
+ * Drop a ref, return true if the refcount fell to zero (the page has no users)
  */
 static inline int put_page_testzero(struct page *page)
 {
@@ -356,43 +356,55 @@ void split_page(struct page *page, unsigned int order);
  * For the non-reserved pages, page_count(page) denotes a reference count.
  *   page_count() == 0 means the page is free. page->lru is then used for
  *   freelist management in the buddy allocator.
- *   page_count() == 1 means the page is used for exactly one purpose
- *   (e.g. a private data page of one process).
+ *   page_count() > 0  means the page has been allocated.
  *
- * A page may be used for kmalloc() or anyone else who does a
- * __get_free_page(). In this case the page_count() is at least 1, and
- * all other fields are unused but should be 0 or NULL. The
- * management of this page is the responsibility of the one who uses
- * it.
+ * Pages are allocated by the slab allocator in order to provide memory
+ * to kmalloc and kmem_cache_alloc. In this case, the management of the
+ * page, and the fields in 'struct page' are the responsibility of mm/slab.c
+ * unless a particular usage is carefully commented. (the responsibility of
+ * freeing the kmalloc memory is the caller's, of course).
  *
- * The other pages (we may call them "process pages") are completely
+ * A page may be used by anyone else who does a __get_free_page().
+ * In this case, page_count still tracks the references, and should only
+ * be used through the normal accessor functions. The top bits of page->flags
+ * and page->virtual store page management information, but all other fields
+ * are unused and could be used privately, carefully. The management of this
+ * page is the responsibility of the one who allocated it, and those who have
+ * subsequently been given references to it.
+ *
+ * The other pages (we may call them "pagecache pages") are completely
  * managed by the Linux memory manager: I/O, buffers, swapping etc.
  * The following discussion applies only to them.
  *
- * A page may belong to an inode's memory mapping. In this case,
- * page->mapping is the pointer to the inode, and page->index is the
- * file offset of the page, in units of PAGE_CACHE_SIZE.
+ * A pagecache page contains an opaque `private' member, which belongs to the
+ * page's address_space. Usually, this is the address of a circular list of
+ * the page's disk buffers. PG_private must be set to tell the VM to call
+ * into the filesystem to release these pages.
  *
- * A page contains an opaque `private' member, which belongs to the
- * page's address_space.  Usually, this is the address of a circular
- * list of the page's disk buffers.
+ * A page may belong to an inode's memory mapping. In this case, page->mapping
+ * is the pointer to the inode, and page->index is the file offset of the page,
+ * in units of PAGE_CACHE_SIZE.
  *
- * For pages belonging to inodes, the page_count() is the number of
- * attaches, plus 1 if `private' contains something, plus one for
- * the page cache itself.
+ * If pagecache pages are not associated with an inode, they are said to be
+ * anonymous pages. These may become associated with the swapcache, and in that
+ * case PG_swapcache is set, and page->private is an offset into the swapcache.
  *
- * Instead of keeping dirty/clean pages in per address-space lists, we instead
- * now tag pages as dirty/under writeback in the radix tree.
+ * In either case (swapcache or inode backed), the pagecache itself holds one
+ * reference to the page. Setting PG_private should also increment the
+ * refcount. The each user mapping also has a reference to the page.
  *
- * There is also a per-mapping radix tree mapping index to the page
- * in memory if present. The tree is rooted at mapping->root.  
+ * The pagecache pages are stored in a per-mapping radix tree, which is
+ * rooted at mapping->page_tree, and indexed by offset.
+ * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space
+ * lists, we instead now tag pages as dirty/writeback in the radix tree.
  *
- * All process pages can do I/O:
+ * All pagecache pages may be subject to I/O:
  * - inode pages may need to be read from disk,
  * - inode pages which have been modified and are MAP_SHARED may need
- *   to be written to disk,
- * - private pages which have been modified may need to be swapped out
- *   to swap space and (later) to be read back into memory.
+ *   to be written back to the inode on disk,
+ * - anonymous pages (including MAP_PRIVATE file mappings) which have been
+ *   modified may need to be swapped out to swap space and (later) to be read
+ *   back into memory.
  */
 
 /*
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5748642e9f36..9d7921dd50f0 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -13,24 +13,25 @@
  * PG_reserved is set for special pages, which can never be swapped out. Some
  * of them might not even exist (eg empty_bad_page)...
  *
- * The PG_private bitflag is set if page->private contains a valid value.
+ * The PG_private bitflag is set on pagecache pages if they contain filesystem
+ * specific data (which is normally at page->private). It can be used by
+ * private allocations for its own usage.
  *
- * During disk I/O, PG_locked is used. This bit is set before I/O and
- * reset when I/O completes. page_waitqueue(page) is a wait queue of all tasks
- * waiting for the I/O on this page to complete.
+ * During initiation of disk I/O, PG_locked is set. This bit is set before I/O
+ * and cleared when writeback _starts_ or when read _completes_. PG_writeback
+ * is set before writeback starts and cleared when it finishes.
+ *
+ * PG_locked also pins a page in pagecache, and blocks truncation of the file
+ * while it is held.
+ *
+ * page_waitqueue(page) is a wait queue of all tasks waiting for the page
+ * to become unlocked.
  *
  * PG_uptodate tells whether the page's contents is valid.  When a read
  * completes, the page becomes uptodate, unless a disk I/O error happened.
  *
- * For choosing which pages to swap out, inode pages carry a PG_referenced bit,
- * which is set any time the system accesses that page through the (mapping,
- * index) hash table.  This referenced bit, together with the referenced bit
- * in the page tables, is used to manipulate page->age and move the page across
- * the active, inactive_dirty and inactive_clean lists.
- *
- * Note that the referenced bit, the page->lru list_head and the active,
- * inactive_dirty and inactive_clean lists are protected by the
- * zone->lru_lock, and *NOT* by the usual PG_locked bit!
+ * PG_referenced, PG_reclaim are used for page reclaim for anonymous and
+ * file-backed pagecache (see mm/vmscan.c).
  *
  * PG_error is set to indicate that an I/O error occurred on this page.
  *
@@ -42,6 +43,10 @@
  * space, they need to be kmapped separately for doing IO on the pages.  The
  * struct page (these bits with information) are always mapped into kernel
  * address space...
+ *
+ * PG_buddy is set to indicate that the page is free and in the buddy system
+ * (see mm/page_alloc.c).
+ *
  */
 
 /*
@@ -74,7 +79,7 @@
 #define PG_checked		 8	/* kill me in 2.5.<early>. */
 #define PG_arch_1		 9
 #define PG_reserved		10
-#define PG_private		11	/* Has something at ->private */
+#define PG_private		11	/* If pagecache, has fs-private data */
 
 #define PG_writeback		12	/* Page is under writeback */
 #define PG_nosave		13	/* Used for system suspend/resume */
@@ -83,7 +88,7 @@
 
 #define PG_mappedtodisk		16	/* Has blocks allocated on-disk */
 #define PG_reclaim		17	/* To be reclaimed asap */
-#define PG_nosave_free		18	/* Free, should not be written */
+#define PG_nosave_free		18	/* Used for system suspend/resume */
 #define PG_buddy		19	/* Page is free, on buddy lists */
 
 
diff --git a/mm/filemap.c b/mm/filemap.c
index d5af1cab4268..afcdc72b5e90 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -599,8 +599,8 @@ void fastcall __lock_page_nosync(struct page *page)
  * @mapping: the address_space to search
  * @offset: the page index
  *
- * A rather lightweight function, finding and getting a reference to a
- * hashed page atomically.
+ * Is there a pagecache struct page at the given (mapping, offset) tuple?
+ * If yes, increment its refcount and return it; if no, return NULL.
  */
 struct page * find_get_page(struct address_space *mapping, unsigned long offset)
 {
@@ -987,7 +987,7 @@ page_not_up_to_date:
 		/* Get exclusive access to the page ... */
 		lock_page(page);
 
-		/* Did it get unhashed before we got the lock? */
+		/* Did it get truncated before we got the lock? */
 		if (!page->mapping) {
 			unlock_page(page);
 			page_cache_release(page);
@@ -1627,7 +1627,7 @@ no_cached_page:
 page_not_uptodate:
 	lock_page(page);
 
-	/* Did it get unhashed while we waited for it? */
+	/* Did it get truncated while we waited for it? */
 	if (!page->mapping) {
 		unlock_page(page);
 		goto err;
-- 
cgit v1.2.3


From dbe5e69d2d6e591996ea2b817b887d03b60bb143 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 25 Sep 2006 23:31:36 -0700
Subject: [PATCH] slab: optimize kmalloc_node the same way as kmalloc

[akpm@osdl.org: export fix]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/slab.h | 25 ++++++++++++++++++++++++-
 mm/slab.c            |  4 ++--
 2 files changed, 26 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 193c03c547ec..2f6bef6a98c9 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -202,7 +202,30 @@ extern int slab_is_available(void);
 
 #ifdef CONFIG_NUMA
 extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node);
-extern void *kmalloc_node(size_t size, gfp_t flags, int node);
+extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
+
+static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+{
+	if (__builtin_constant_p(size)) {
+		int i = 0;
+#define CACHE(x) \
+		if (size <= x) \
+			goto found; \
+		else \
+			i++;
+#include "kmalloc_sizes.h"
+#undef CACHE
+		{
+			extern void __you_cannot_kmalloc_that_much(void);
+			__you_cannot_kmalloc_that_much();
+		}
+found:
+		return kmem_cache_alloc_node((flags & GFP_DMA) ?
+			malloc_sizes[i].cs_dmacachep :
+			malloc_sizes[i].cs_cachep, flags, node);
+	}
+	return __kmalloc_node(size, flags, node);
+}
 #else
 static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int node)
 {
diff --git a/mm/slab.c b/mm/slab.c
index 3ad2f64998fd..5e59ce7a46c8 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3348,7 +3348,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node);
 
-void *kmalloc_node(size_t size, gfp_t flags, int node)
+void *__kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	struct kmem_cache *cachep;
 
@@ -3357,7 +3357,7 @@ void *kmalloc_node(size_t size, gfp_t flags, int node)
 		return NULL;
 	return kmem_cache_alloc_node(cachep, flags, node);
 }
-EXPORT_SYMBOL(kmalloc_node);
+EXPORT_SYMBOL(__kmalloc_node);
 #endif
 
 /**
-- 
cgit v1.2.3


From 9b819d204cf602eab1a53a9ec4b8d2ca51e02a1d Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:40 -0700
Subject: [PATCH] Add __GFP_THISNODE to avoid fallback to other nodes and
 ignore cpuset/memory policy restrictions

Add a new gfp flag __GFP_THISNODE to avoid fallback to other nodes.  This
flag is essential if a kernel component requires memory to be located on a
certain node.  It will be needed for alloc_pages_node() to force allocation
on the indicated node and for alloc_pages() to force allocation on the
current node.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/gfp.h | 3 ++-
 kernel/cpuset.c     | 2 +-
 mm/mempolicy.c      | 2 +-
 mm/page_alloc.c     | 3 +++
 4 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 63ab88a36f39..0eda5b6dedbd 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -45,6 +45,7 @@ struct vm_area_struct;
 #define __GFP_ZERO	((__force gfp_t)0x8000u)/* Return zeroed page on success */
 #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
 #define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
+#define __GFP_THISNODE	((__force gfp_t)0x40000u)/* No fallback, no policies */
 
 #define __GFP_BITS_SHIFT 20	/* Room for 20 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -53,7 +54,7 @@ struct vm_area_struct;
 #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
 			__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
 			__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
-			__GFP_NOMEMALLOC|__GFP_HARDWALL)
+			__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE)
 
 /* This equals 0, but use constants in case they ever change */
 #define GFP_NOWAIT	(GFP_ATOMIC & ~__GFP_HIGH)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 4ea6f0dc2fc5..76940361273e 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2316,7 +2316,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
 	const struct cpuset *cs;	/* current cpuset ancestors */
 	int allowed;			/* is allocation in zone z allowed? */
 
-	if (in_interrupt())
+	if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
 		return 1;
 	node = z->zone_pgdat->node_id;
 	might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c3429a710ab1..8002e1faccda 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1290,7 +1290,7 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
 
 	if ((gfp & __GFP_WAIT) && !in_interrupt())
 		cpuset_update_task_memory_state();
-	if (!pol || in_interrupt())
+	if (!pol || in_interrupt() || (gfp & __GFP_THISNODE))
 		pol = &default_policy;
 	if (pol->policy == MPOL_INTERLEAVE)
 		return alloc_page_interleave(gfp, order, interleave_nodes(pol));
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 208a6b03aa78..ea498788af53 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -893,6 +893,9 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
 	 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
 	 */
 	do {
+		if (unlikely((gfp_mask & __GFP_THISNODE) &&
+			(*z)->zone_pgdat != zonelist->zones[0]->zone_pgdat))
+				break;
 		if ((alloc_flags & ALLOC_CPUSET) &&
 				!cpuset_zone_allowed(*z, gfp_mask))
 			continue;
-- 
cgit v1.2.3


From 980128f223fa3c75e3ebdde650c9f1bcabd4c0a2 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:46 -0700
Subject: [PATCH] Define easier to handle GFP_THISNODE

In many places we will need to use the same combination of flags.  Specify
a single GFP_THISNODE definition for ease of use in gfp.h.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/ia64/kernel/uncached.c | 3 +--
 include/linux/gfp.h         | 2 ++
 mm/migrate.c                | 4 +---
 3 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index f813caa80570..c58e933694d5 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -98,8 +98,7 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
 
 	/* attempt to allocate a granule's worth of cached memory pages */
 
-	page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO |
-				 __GFP_THISNODE | __GFP_NORETRY | __GFP_NOWARN,
+	page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
 				IA64_GRANULE_SHIFT-PAGE_SHIFT);
 	if (!page) {
 		mutex_unlock(&uc_pool->add_chunk_mutex);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0eda5b6dedbd..8b34aabfe4c6 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -67,6 +67,8 @@ struct vm_area_struct;
 #define GFP_HIGHUSER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
 			 __GFP_HIGHMEM)
 
+#define GFP_THISNODE	(__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
+
 /* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
    platforms, used as appropriate on others */
 
diff --git a/mm/migrate.c b/mm/migrate.c
index 6196f45c5263..20a8c2687b1e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -741,9 +741,7 @@ static struct page *new_page_node(struct page *p, unsigned long private,
 
 	*result = &pm->status;
 
-	return alloc_pages_node(pm->node,
-		GFP_HIGHUSER | __GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY,
-		0);
+	return alloc_pages_node(pm->node, GFP_HIGHUSER | GFP_THISNODE, 0);
 }
 
 /*
-- 
cgit v1.2.3


From 8417bba4b151346ed475fcc923693c9e3be89063 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:51 -0700
Subject: [PATCH] Replace min_unmapped_ratio by min_unmapped_pages in struct
 zone

*_pages is a better description of the role of the variable.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 2 +-
 mm/page_alloc.c        | 4 ++--
 mm/vmscan.c            | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7fe317164b73..a703527e2b45 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -169,7 +169,7 @@ struct zone {
 	/*
 	 * zone reclaim becomes active if more unmapped pages exist.
 	 */
-	unsigned long		min_unmapped_ratio;
+	unsigned long		min_unmapped_pages;
 	struct per_cpu_pageset	*pageset[NR_CPUS];
 #else
 	struct per_cpu_pageset	pageset[NR_CPUS];
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f7ea020c23ea..5da6bc4e0a6b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2002,7 +2002,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
 		zone->spanned_pages = size;
 		zone->present_pages = realsize;
 #ifdef CONFIG_NUMA
-		zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio)
+		zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)
 						/ 100;
 #endif
 		zone->name = zone_names[j];
@@ -2313,7 +2313,7 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
 		return rc;
 
 	for_each_zone(zone)
-		zone->min_unmapped_ratio = (zone->present_pages *
+		zone->min_unmapped_pages = (zone->present_pages *
 				sysctl_min_unmapped_ratio) / 100;
 	return 0;
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8f35d7d585cb..5154c25e8440 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1618,7 +1618,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	 * unmapped file backed pages.
 	 */
 	if (zone_page_state(zone, NR_FILE_PAGES) -
-	    zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_ratio)
+	    zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_pages)
 		return 0;
 
 	/*
-- 
cgit v1.2.3


From 972d1a7b140569084439a81265a0f15b74e924e0 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:51 -0700
Subject: [PATCH] ZVC: Support NR_SLAB_RECLAIMABLE / NR_SLAB_UNRECLAIMABLE

Remove the atomic counter for slab_reclaim_pages and replace the counter
and NR_SLAB with two ZVC counter that account for unreclaimable and
reclaimable slab pages: NR_SLAB_RECLAIMABLE and NR_SLAB_UNRECLAIMABLE.

Change the check in vmscan.c to refer to to NR_SLAB_RECLAIMABLE.  The
intend seems to be to check for slab pages that could be freed.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/mm/pgtable.c |  4 +++-
 drivers/base/node.c    |  9 +++++++--
 fs/proc/proc_misc.c    |  7 ++++++-
 include/linux/mmzone.h |  3 ++-
 include/linux/slab.h   |  2 --
 mm/mmap.c              |  2 +-
 mm/nommu.c             |  2 +-
 mm/page_alloc.c        |  3 ++-
 mm/slab.c              | 24 +++++++++++-------------
 mm/slob.c              |  4 ----
 mm/vmscan.c            |  2 +-
 mm/vmstat.c            |  3 ++-
 12 files changed, 36 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index bd98768d8764..a9f4910a22f8 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -60,7 +60,9 @@ void show_mem(void)
 	printk(KERN_INFO "%lu pages writeback\n",
 					global_page_state(NR_WRITEBACK));
 	printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
-	printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB));
+	printk(KERN_INFO "%lu pages slab\n",
+		global_page_state(NR_SLAB_RECLAIMABLE) +
+		global_page_state(NR_SLAB_UNRECLAIMABLE));
 	printk(KERN_INFO "%lu pages pagetables\n",
 					global_page_state(NR_PAGETABLE));
 }
diff --git a/drivers/base/node.c b/drivers/base/node.c
index e09f5c2c11ee..001e6f6b9c1b 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -68,7 +68,9 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf)
 		       "Node %d PageTables:   %8lu kB\n"
 		       "Node %d NFS_Unstable: %8lu kB\n"
 		       "Node %d Bounce:       %8lu kB\n"
-		       "Node %d Slab:         %8lu kB\n",
+		       "Node %d Slab:         %8lu kB\n"
+		       "Node %d SReclaimable: %8lu kB\n"
+		       "Node %d SUnreclaim:   %8lu kB\n",
 		       nid, K(i.totalram),
 		       nid, K(i.freeram),
 		       nid, K(i.totalram - i.freeram),
@@ -88,7 +90,10 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf)
 		       nid, K(node_page_state(nid, NR_PAGETABLE)),
 		       nid, K(node_page_state(nid, NR_UNSTABLE_NFS)),
 		       nid, K(node_page_state(nid, NR_BOUNCE)),
-		       nid, K(node_page_state(nid, NR_SLAB)));
+		       nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE) +
+				node_page_state(nid, NR_SLAB_UNRECLAIMABLE)),
+		       nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE)),
+		       nid, K(node_page_state(nid, NR_SLAB_UNRECLAIMABLE)));
 	n += hugetlb_report_node_meminfo(nid, buf + n);
 	return n;
 }
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index caa0a51560a0..5bbd60896050 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -170,6 +170,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 		"AnonPages:    %8lu kB\n"
 		"Mapped:       %8lu kB\n"
 		"Slab:         %8lu kB\n"
+		"SReclaimable: %8lu kB\n"
+		"SUnreclaim:   %8lu kB\n"
 		"PageTables:   %8lu kB\n"
 		"NFS_Unstable: %8lu kB\n"
 		"Bounce:       %8lu kB\n"
@@ -197,7 +199,10 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 		K(global_page_state(NR_WRITEBACK)),
 		K(global_page_state(NR_ANON_PAGES)),
 		K(global_page_state(NR_FILE_MAPPED)),
-		K(global_page_state(NR_SLAB)),
+		K(global_page_state(NR_SLAB_RECLAIMABLE) +
+				global_page_state(NR_SLAB_UNRECLAIMABLE)),
+		K(global_page_state(NR_SLAB_RECLAIMABLE)),
+		K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
 		K(global_page_state(NR_PAGETABLE)),
 		K(global_page_state(NR_UNSTABLE_NFS)),
 		K(global_page_state(NR_BOUNCE)),
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index a703527e2b45..08c41b9f92e0 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -51,7 +51,8 @@ enum zone_stat_item {
 	NR_FILE_MAPPED,	/* pagecache pages mapped into pagetables.
 			   only modified from process context */
 	NR_FILE_PAGES,
-	NR_SLAB,	/* Pages used by slab allocator */
+	NR_SLAB_RECLAIMABLE,
+	NR_SLAB_UNRECLAIMABLE,
 	NR_PAGETABLE,	/* used for pagetables */
 	NR_FILE_DIRTY,
 	NR_WRITEBACK,
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 2f6bef6a98c9..66d6eb78d1c6 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -284,8 +284,6 @@ extern kmem_cache_t	*fs_cachep;
 extern kmem_cache_t	*sighand_cachep;
 extern kmem_cache_t	*bio_cachep;
 
-extern atomic_t slab_reclaim_pages;
-
 #endif	/* __KERNEL__ */
 
 #endif	/* _LINUX_SLAB_H */
diff --git a/mm/mmap.c b/mm/mmap.c
index 8507ee9cd573..eea8eefd51a8 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -116,7 +116,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
 		 * which are reclaimable, under pressure.  The dentry
 		 * cache and most inode caches should fall into this
 		 */
-		free += atomic_read(&slab_reclaim_pages);
+		free += global_page_state(NR_SLAB_RECLAIMABLE);
 
 		/*
 		 * Leave the last 3% for root
diff --git a/mm/nommu.c b/mm/nommu.c
index c576df71e3bb..d99dea31e443 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1133,7 +1133,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
 		 * which are reclaimable, under pressure.  The dentry
 		 * cache and most inode caches should fall into this
 		 */
-		free += atomic_read(&slab_reclaim_pages);
+		free += global_page_state(NR_SLAB_RECLAIMABLE);
 
 		/*
 		 * Leave the last 3% for root
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5da6bc4e0a6b..47e98423b30d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1304,7 +1304,8 @@ void show_free_areas(void)
 		global_page_state(NR_WRITEBACK),
 		global_page_state(NR_UNSTABLE_NFS),
 		nr_free_pages(),
-		global_page_state(NR_SLAB),
+		global_page_state(NR_SLAB_RECLAIMABLE) +
+			global_page_state(NR_SLAB_UNRECLAIMABLE),
 		global_page_state(NR_FILE_MAPPED),
 		global_page_state(NR_PAGETABLE));
 
diff --git a/mm/slab.c b/mm/slab.c
index 13b5050f84cc..7a48eb1a60c8 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -735,14 +735,6 @@ static inline void init_lock_keys(void)
 static DEFINE_MUTEX(cache_chain_mutex);
 static struct list_head cache_chain;
 
-/*
- * vm_enough_memory() looks at this to determine how many slab-allocated pages
- * are possibly freeable under pressure
- *
- * SLAB_RECLAIM_ACCOUNT turns this on per-slab
- */
-atomic_t slab_reclaim_pages;
-
 /*
  * chicken and egg problem: delay the per-cpu array allocation
  * until the general caches are up.
@@ -1580,8 +1572,11 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 
 	nr_pages = (1 << cachep->gfporder);
 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
-		atomic_add(nr_pages, &slab_reclaim_pages);
-	add_zone_page_state(page_zone(page), NR_SLAB, nr_pages);
+		add_zone_page_state(page_zone(page),
+			NR_SLAB_RECLAIMABLE, nr_pages);
+	else
+		add_zone_page_state(page_zone(page),
+			NR_SLAB_UNRECLAIMABLE, nr_pages);
 	for (i = 0; i < nr_pages; i++)
 		__SetPageSlab(page + i);
 	return page_address(page);
@@ -1596,7 +1591,12 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
 	struct page *page = virt_to_page(addr);
 	const unsigned long nr_freed = i;
 
-	sub_zone_page_state(page_zone(page), NR_SLAB, nr_freed);
+	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
+		sub_zone_page_state(page_zone(page),
+				NR_SLAB_RECLAIMABLE, nr_freed);
+	else
+		sub_zone_page_state(page_zone(page),
+				NR_SLAB_UNRECLAIMABLE, nr_freed);
 	while (i--) {
 		BUG_ON(!PageSlab(page));
 		__ClearPageSlab(page);
@@ -1605,8 +1605,6 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += nr_freed;
 	free_pages((unsigned long)addr, cachep->gfporder);
-	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
-		atomic_sub(1 << cachep->gfporder, &slab_reclaim_pages);
 }
 
 static void kmem_rcu_free(struct rcu_head *head)
diff --git a/mm/slob.c b/mm/slob.c
index 4c28a421b270..20188627347c 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -339,7 +339,3 @@ void kmem_cache_init(void)
 
 	mod_timer(&slob_timer, jiffies + HZ);
 }
-
-atomic_t slab_reclaim_pages = ATOMIC_INIT(0);
-EXPORT_SYMBOL(slab_reclaim_pages);
-
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5154c25e8440..349797ba4bac 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1378,7 +1378,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 	for_each_zone(zone)
 		lru_pages += zone->nr_active + zone->nr_inactive;
 
-	nr_slab = global_page_state(NR_SLAB);
+	nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
 	/* If slab caches are huge, it's better to hit them first */
 	while (nr_slab >= lru_pages) {
 		reclaim_state.reclaimed_slab = 0;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 968c0072e19a..490d8c1a0ded 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -458,7 +458,8 @@ static char *vmstat_text[] = {
 	"nr_anon_pages",
 	"nr_mapped",
 	"nr_file_pages",
-	"nr_slab",
+	"nr_slab_reclaimable",
+	"nr_slab_unreclaimable",
 	"nr_page_table_pages",
 	"nr_dirty",
 	"nr_writeback",
-- 
cgit v1.2.3


From 0ff38490c836dc379ff7ec45b10a15a662f4e5f6 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:52 -0700
Subject: [PATCH] zone_reclaim: dynamic slab reclaim

Currently one can enable slab reclaim by setting an explicit option in
/proc/sys/vm/zone_reclaim_mode.  Slab reclaim is then used as a final
option if the freeing of unmapped file backed pages is not enough to free
enough pages to allow a local allocation.

However, that means that the slab can grow excessively and that most memory
of a node may be used by slabs.  We have had a case where a machine with
46GB of memory was using 40-42GB for slab.  Zone reclaim was effective in
dealing with pagecache pages.  However, slab reclaim was only done during
global reclaim (which is a bit rare on NUMA systems).

This patch implements slab reclaim during zone reclaim.  Zone reclaim
occurs if there is a danger of an off node allocation.  At that point we

1. Shrink the per node page cache if the number of pagecache
   pages is more than min_unmapped_ratio percent of pages in a zone.

2. Shrink the slab cache if the number of the nodes reclaimable slab pages
   (patch depends on earlier one that implements that counter)
   are more than min_slab_ratio (a new /proc/sys/vm tunable).

The shrinking of the slab cache is a bit problematic since it is not node
specific.  So we simply calculate what point in the slab we want to reach
(current per node slab use minus the number of pages that neeed to be
allocated) and then repeately run the global reclaim until that is
unsuccessful or we have reached the limit.  I hope we will have zone based
slab reclaim at some point which will make that easier.

The default for the min_slab_ratio is 5%

Also remove the slab option from /proc/sys/vm/zone_reclaim_mode.

[akpm@osdl.org: cleanups]
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/sysctl/vm.txt | 27 +++++++++++++++------
 include/linux/mmzone.h      |  3 +++
 include/linux/swap.h        |  1 +
 include/linux/sysctl.h      |  1 +
 kernel/sysctl.c             | 11 +++++++++
 mm/page_alloc.c             | 17 +++++++++++++
 mm/vmscan.c                 | 58 +++++++++++++++++++++++++++++----------------
 7 files changed, 90 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 7cee90223d3a..20d0d797f539 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -29,6 +29,7 @@ Currently, these files are in /proc/sys/vm:
 - drop-caches
 - zone_reclaim_mode
 - min_unmapped_ratio
+- min_slab_ratio
 - panic_on_oom
 
 ==============================================================
@@ -138,7 +139,6 @@ This is value ORed together of
 1	= Zone reclaim on
 2	= Zone reclaim writes dirty pages out
 4	= Zone reclaim swaps pages
-8	= Also do a global slab reclaim pass
 
 zone_reclaim_mode is set during bootup to 1 if it is determined that pages
 from remote zones will cause a measurable performance reduction. The
@@ -162,18 +162,13 @@ Allowing regular swap effectively restricts allocations to the local
 node unless explicitly overridden by memory policies or cpuset
 configurations.
 
-It may be advisable to allow slab reclaim if the system makes heavy
-use of files and builds up large slab caches. However, the slab
-shrink operation is global, may take a long time and free slabs
-in all nodes of the system.
-
 =============================================================
 
 min_unmapped_ratio:
 
 This is available only on NUMA kernels.
 
-A percentage of the file backed pages in each zone.  Zone reclaim will only
+A percentage of the total pages in each zone.  Zone reclaim will only
 occur if more than this percentage of pages are file backed and unmapped.
 This is to insure that a minimal amount of local pages is still available for
 file I/O even if the node is overallocated.
@@ -182,6 +177,24 @@ The default is 1 percent.
 
 =============================================================
 
+min_slab_ratio:
+
+This is available only on NUMA kernels.
+
+A percentage of the total pages in each zone.  On Zone reclaim
+(fallback from the local zone occurs) slabs will be reclaimed if more
+than this percentage of pages in a zone are reclaimable slab pages.
+This insures that the slab growth stays under control even in NUMA
+systems that rarely perform global reclaim.
+
+The default is 5 percent.
+
+Note that slab reclaim is triggered in a per zone / node fashion.
+The process of reclaiming slab memory is currently not node specific
+and may not be fast.
+
+=============================================================
+
 panic_on_oom
 
 This enables or disables panic on out-of-memory feature.  If this is set to 1,
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 08c41b9f92e0..3693f1a52788 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -171,6 +171,7 @@ struct zone {
 	 * zone reclaim becomes active if more unmapped pages exist.
 	 */
 	unsigned long		min_unmapped_pages;
+	unsigned long		min_slab_pages;
 	struct per_cpu_pageset	*pageset[NR_CPUS];
 #else
 	struct per_cpu_pageset	pageset[NR_CPUS];
@@ -448,6 +449,8 @@ int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file
 					void __user *, size_t *, loff_t *);
 int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
 			struct file *, void __user *, size_t *, loff_t *);
+int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
+			struct file *, void __user *, size_t *, loff_t *);
 
 #include <linux/topology.h>
 /* Returns the number of the current Node. */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 32db06c8ffe0..a2f5ad7c2d2e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -193,6 +193,7 @@ extern long vm_total_pages;
 #ifdef CONFIG_NUMA
 extern int zone_reclaim_mode;
 extern int sysctl_min_unmapped_ratio;
+extern int sysctl_min_slab_ratio;
 extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
 #else
 #define zone_reclaim_mode 0
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 736ed917a4f8..eca555781d05 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -191,6 +191,7 @@ enum
 	VM_MIN_UNMAPPED=32,	/* Set min percent of unmapped pages */
 	VM_PANIC_ON_OOM=33,	/* panic at out-of-memory */
 	VM_VDSO_ENABLED=34,	/* map VDSO into new processes? */
+	VM_MIN_SLAB=35,		 /* Percent pages ignored by zone reclaim */
 };
 
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 362a0cc37138..fd43c3e6786b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -943,6 +943,17 @@ static ctl_table vm_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one_hundred,
 	},
+	{
+		.ctl_name	= VM_MIN_SLAB,
+		.procname	= "min_slab_ratio",
+		.data		= &sysctl_min_slab_ratio,
+		.maxlen		= sizeof(sysctl_min_slab_ratio),
+		.mode		= 0644,
+		.proc_handler	= &sysctl_min_slab_ratio_sysctl_handler,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+		.extra2		= &one_hundred,
+	},
 #endif
 #ifdef CONFIG_X86_32
 	{
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 47e98423b30d..cf913bdd433e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2005,6 +2005,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
 #ifdef CONFIG_NUMA
 		zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)
 						/ 100;
+		zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;
 #endif
 		zone->name = zone_names[j];
 		spin_lock_init(&zone->lock);
@@ -2318,6 +2319,22 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
 				sysctl_min_unmapped_ratio) / 100;
 	return 0;
 }
+
+int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
+	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+{
+	struct zone *zone;
+	int rc;
+
+	rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+	if (rc)
+		return rc;
+
+	for_each_zone(zone)
+		zone->min_slab_pages = (zone->present_pages *
+				sysctl_min_slab_ratio) / 100;
+	return 0;
+}
 #endif
 
 /*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 349797ba4bac..089e943c4d38 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1527,7 +1527,6 @@ int zone_reclaim_mode __read_mostly;
 #define RECLAIM_ZONE (1<<0)	/* Run shrink_cache on the zone */
 #define RECLAIM_WRITE (1<<1)	/* Writeout pages during reclaim */
 #define RECLAIM_SWAP (1<<2)	/* Swap pages out during reclaim */
-#define RECLAIM_SLAB (1<<3)	/* Do a global slab shrink if the zone is out of memory */
 
 /*
  * Priority for ZONE_RECLAIM. This determines the fraction of pages
@@ -1542,6 +1541,12 @@ int zone_reclaim_mode __read_mostly;
  */
 int sysctl_min_unmapped_ratio = 1;
 
+/*
+ * If the number of slab pages in a zone grows beyond this percentage then
+ * slab reclaim needs to occur.
+ */
+int sysctl_min_slab_ratio = 5;
+
 /*
  * Try to free up some pages from this zone through reclaim.
  */
@@ -1573,29 +1578,37 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
 
-	/*
-	 * Free memory by calling shrink zone with increasing priorities
-	 * until we have enough memory freed.
-	 */
-	priority = ZONE_RECLAIM_PRIORITY;
-	do {
-		nr_reclaimed += shrink_zone(priority, zone, &sc);
-		priority--;
-	} while (priority >= 0 && nr_reclaimed < nr_pages);
+	if (zone_page_state(zone, NR_FILE_PAGES) -
+		zone_page_state(zone, NR_FILE_MAPPED) >
+		zone->min_unmapped_pages) {
+		/*
+		 * Free memory by calling shrink zone with increasing
+		 * priorities until we have enough memory freed.
+		 */
+		priority = ZONE_RECLAIM_PRIORITY;
+		do {
+			nr_reclaimed += shrink_zone(priority, zone, &sc);
+			priority--;
+		} while (priority >= 0 && nr_reclaimed < nr_pages);
+	}
 
-	if (nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) {
+	if (zone_page_state(zone, NR_SLAB_RECLAIMABLE) > zone->min_slab_pages) {
 		/*
 		 * shrink_slab() does not currently allow us to determine how
-		 * many pages were freed in this zone. So we just shake the slab
-		 * a bit and then go off node for this particular allocation
-		 * despite possibly having freed enough memory to allocate in
-		 * this zone.  If we freed local memory then the next
-		 * allocations will be local again.
+		 * many pages were freed in this zone. So we take the current
+		 * number of slab pages and shake the slab until it is reduced
+		 * by the same nr_pages that we used for reclaiming unmapped
+		 * pages.
 		 *
-		 * shrink_slab will free memory on all zones and may take
-		 * a long time.
+		 * Note that shrink_slab will free memory on all zones and may
+		 * take a long time.
 		 */
-		shrink_slab(sc.nr_scanned, gfp_mask, order);
+		unsigned long limit = zone_page_state(zone,
+				NR_SLAB_RECLAIMABLE) - nr_pages;
+
+		while (shrink_slab(sc.nr_scanned, gfp_mask, order) &&
+			zone_page_state(zone, NR_SLAB_RECLAIMABLE) > limit)
+			;
 	}
 
 	p->reclaim_state = NULL;
@@ -1609,7 +1622,8 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	int node_id;
 
 	/*
-	 * Zone reclaim reclaims unmapped file backed pages.
+	 * Zone reclaim reclaims unmapped file backed pages and
+	 * slab pages if we are over the defined limits.
 	 *
 	 * A small portion of unmapped file backed pages is needed for
 	 * file I/O otherwise pages read by file I/O will be immediately
@@ -1618,7 +1632,9 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	 * unmapped file backed pages.
 	 */
 	if (zone_page_state(zone, NR_FILE_PAGES) -
-	    zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_pages)
+	    zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_pages
+	    && zone_page_state(zone, NR_SLAB_RECLAIMABLE)
+			<= zone->min_slab_pages)
 		return 0;
 
 	/*
-- 
cgit v1.2.3


From 89fa30242facca249aead2aac03c4c69764f911c Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:55 -0700
Subject: [PATCH] NUMA: Add zone_to_nid function

There are many places where we need to determine the node of a zone.
Currently we use a difficult to read sequence of pointer dereferencing.
Put that into an inline function and use throughout VM.  Maybe we can find
a way to optimize the lookup in the future.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/mm/discontig.c | 2 +-
 arch/parisc/mm/init.c    | 2 +-
 include/linux/mm.h       | 7 ++++++-
 kernel/cpuset.c          | 4 ++--
 mm/hugetlb.c             | 2 +-
 mm/mempolicy.c           | 6 +++---
 mm/oom_kill.c            | 3 +--
 mm/page_alloc.c          | 2 +-
 mm/vmscan.c              | 2 +-
 9 files changed, 17 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index 07c300f93764..fb5d8b747de4 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -422,7 +422,7 @@ void __init set_highmem_pages_init(int bad_ppro)
 		zone_end_pfn = zone_start_pfn + zone->spanned_pages;
 
 		printk("Initializing %s for node %d (%08lx:%08lx)\n",
-				zone->name, zone->zone_pgdat->node_id,
+				zone->name, zone_to_nid(zone),
 				zone_start_pfn, zone_end_pfn);
 
 		for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) {
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index c7329615ef94..25ad28d63e88 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -551,7 +551,7 @@ void show_mem(void)
 
 				printk("Zone list for zone %d on node %d: ", j, i);
 				for (k = 0; zl->zones[k] != NULL; k++) 
-					printk("[%d/%s] ", zl->zones[k]->zone_pgdat->node_id, zl->zones[k]->name);
+					printk("[%d/%s] ", zone_to_nid(zl->zones[k]), zl->zones[k]->name);
 				printk("\n");
 			}
 		}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f2018775b995..856f0ee7e84a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -499,12 +499,17 @@ static inline struct zone *page_zone(struct page *page)
 	return zone_table[page_zone_id(page)];
 }
 
+static inline unsigned long zone_to_nid(struct zone *zone)
+{
+	return zone->zone_pgdat->node_id;
+}
+
 static inline unsigned long page_to_nid(struct page *page)
 {
 	if (FLAGS_HAS_NODE)
 		return (page->flags >> NODES_PGSHIFT) & NODES_MASK;
 	else
-		return page_zone(page)->zone_pgdat->node_id;
+		return zone_to_nid(page_zone(page));
 }
 static inline unsigned long page_to_section(struct page *page)
 {
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 76940361273e..cff41511269f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2245,7 +2245,7 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
 	int i;
 
 	for (i = 0; zl->zones[i]; i++) {
-		int nid = zl->zones[i]->zone_pgdat->node_id;
+		int nid = zone_to_nid(zl->zones[i]);
 
 		if (node_isset(nid, current->mems_allowed))
 			return 1;
@@ -2318,7 +2318,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
 
 	if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
 		return 1;
-	node = z->zone_pgdat->node_id;
+	node = zone_to_nid(z);
 	might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
 	if (node_isset(node, current->mems_allowed))
 		return 1;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3aceadce1a76..7c7d03dbf73d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -72,7 +72,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
 	struct zone **z;
 
 	for (z = zonelist->zones; *z; z++) {
-		nid = (*z)->zone_pgdat->node_id;
+		nid = zone_to_nid(*z);
 		if (cpuset_zone_allowed(*z, GFP_HIGHUSER) &&
 		    !list_empty(&hugepage_freelists[nid]))
 			break;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 8002e1faccda..38f89650bc84 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -487,7 +487,7 @@ static void get_zonemask(struct mempolicy *p, nodemask_t *nodes)
 	switch (p->policy) {
 	case MPOL_BIND:
 		for (i = 0; p->v.zonelist->zones[i]; i++)
-			node_set(p->v.zonelist->zones[i]->zone_pgdat->node_id,
+			node_set(zone_to_nid(p->v.zonelist->zones[i]),
 				*nodes);
 		break;
 	case MPOL_DEFAULT:
@@ -1145,7 +1145,7 @@ unsigned slab_node(struct mempolicy *policy)
 		 * Follow bind policy behavior and start allocation at the
 		 * first node.
 		 */
-		return policy->v.zonelist->zones[0]->zone_pgdat->node_id;
+		return zone_to_nid(policy->v.zonelist->zones[0]);
 
 	case MPOL_PREFERRED:
 		if (policy->v.preferred_node >= 0)
@@ -1649,7 +1649,7 @@ void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask)
 
 		nodes_clear(nodes);
 		for (z = pol->v.zonelist->zones; *z; z++)
-			node_set((*z)->zone_pgdat->node_id, nodes);
+			node_set(zone_to_nid(*z), nodes);
 		nodes_remap(tmp, nodes, *mpolmask, *newmask);
 		nodes = tmp;
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f1c0ef1fd21f..bada3d03119f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -177,8 +177,7 @@ static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask)
 
 	for (z = zonelist->zones; *z; z++)
 		if (cpuset_zone_allowed(*z, gfp_mask))
-			node_clear((*z)->zone_pgdat->node_id,
-					nodes);
+			node_clear(zone_to_nid(*z), nodes);
 		else
 			return CONSTRAINT_CPUSET;
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cf913bdd433e..51070b6d593f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1217,7 +1217,7 @@ unsigned int nr_free_pagecache_pages(void)
 #ifdef CONFIG_NUMA
 static void show_node(struct zone *zone)
 {
-	printk("Node %d ", zone->zone_pgdat->node_id);
+	printk("Node %ld ", zone_to_nid(zone));
 }
 #else
 #define show_node(zone)	do { } while (0)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b950f193816e..87779dda4ec6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1661,7 +1661,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	 * over remote processors and spread off node memory allocations
 	 * as wide as possible.
 	 */
-	node_id = zone->zone_pgdat->node_id;
+	node_id = zone_to_nid(zone);
 	mask = node_to_cpumask(node_id);
 	if (!cpus_empty(mask) && node_id != numa_node_id())
 		return 0;
-- 
cgit v1.2.3


From 62bac0185ad3dfef11d9602980445c54d45199c6 Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Mon, 25 Sep 2006 23:31:56 -0700
Subject: [PATCH] selinux: eliminate selinux_task_ctxid

Eliminate selinux_task_ctxid since it duplicates selinux_task_get_sid.

Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/selinux.h    | 15 ---------------
 kernel/auditsc.c           |  2 +-
 security/selinux/exports.c |  9 ---------
 3 files changed, 1 insertion(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/selinux.h b/include/linux/selinux.h
index aad4e390d6a5..79e4707ca772 100644
--- a/include/linux/selinux.h
+++ b/include/linux/selinux.h
@@ -69,16 +69,6 @@ int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
  */
 void selinux_audit_set_callback(int (*callback)(void));
 
-/**
- *	selinux_task_ctxid - determine a context ID for a process.
- *	@tsk: the task object
- *	@ctxid: ID value returned via this
- *
- *	On return, ctxid will contain an ID for the context.  This value
- *	should only be used opaquely.
- */
-void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid);
-
 /**
  *     selinux_ctxid_to_string - map a security context ID to a string
  *     @ctxid: security context ID to be converted.
@@ -166,11 +156,6 @@ static inline void selinux_audit_set_callback(int (*callback)(void))
 	return;
 }
 
-static inline void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid)
-{
-	*ctxid = 0;
-}
-
 static inline int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen)
 {
        *ctx = NULL;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 1bd8827a0102..331e17010393 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -385,7 +385,7 @@ static int audit_filter_rules(struct task_struct *tsk,
 			   logged upon error */
 			if (f->se_rule) {
 				if (need_sid) {
-					selinux_task_ctxid(tsk, &sid);
+					selinux_get_task_sid(tsk, &sid);
 					need_sid = 0;
 				}
 				result = selinux_audit_rule_match(sid, f->type,
diff --git a/security/selinux/exports.c b/security/selinux/exports.c
index 9d7737db5e51..ee0fb47f81ae 100644
--- a/security/selinux/exports.c
+++ b/security/selinux/exports.c
@@ -21,15 +21,6 @@
 #include "security.h"
 #include "objsec.h"
 
-void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid)
-{
-	struct task_security_struct *tsec = tsk->security;
-	if (selinux_enabled)
-		*ctxid = tsec->sid;
-	else
-		*ctxid = 0;
-}
-
 int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen)
 {
 	if (selinux_enabled)
-- 
cgit v1.2.3


From 1a70cd40cb291c25b67ec0da715a49d76719329d Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Mon, 25 Sep 2006 23:31:57 -0700
Subject: [PATCH] selinux: rename selinux_ctxid_to_string

Rename selinux_ctxid_to_string to selinux_sid_to_string to be
consistent with other interfaces.

Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/selinux.h    |  8 ++++----
 kernel/audit.c             | 14 +++++++-------
 kernel/auditfilter.c       |  2 +-
 kernel/auditsc.c           |  4 ++--
 security/selinux/exports.c |  4 ++--
 5 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/selinux.h b/include/linux/selinux.h
index 79e4707ca772..df9098de4c99 100644
--- a/include/linux/selinux.h
+++ b/include/linux/selinux.h
@@ -70,8 +70,8 @@ int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
 void selinux_audit_set_callback(int (*callback)(void));
 
 /**
- *     selinux_ctxid_to_string - map a security context ID to a string
- *     @ctxid: security context ID to be converted.
+ *     selinux_sid_to_string - map a security context ID to a string
+ *     @sid: security context ID to be converted.
  *     @ctx: address of context string to be returned
  *     @ctxlen: length of returned context string.
  *
@@ -79,7 +79,7 @@ void selinux_audit_set_callback(int (*callback)(void));
  *     string will be allocated internally, and the caller must call
  *     kfree() on it after use.
  */
-int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen);
+int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen);
 
 /**
  *     selinux_get_inode_sid - get the inode's security context ID
@@ -156,7 +156,7 @@ static inline void selinux_audit_set_callback(int (*callback)(void))
 	return;
 }
 
-static inline int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen)
+static inline int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen)
 {
        *ctx = NULL;
        *ctxlen = 0;
diff --git a/kernel/audit.c b/kernel/audit.c
index 963fd15c9621..f9889ee77825 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -244,7 +244,7 @@ static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid)
 		char *ctx = NULL;
 		u32 len;
 		int rc;
-		if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+		if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
 			return rc;
 		else
 			audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -267,7 +267,7 @@ static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid)
 		char *ctx = NULL;
 		u32 len;
 		int rc;
-		if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+		if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
 			return rc;
 		else
 			audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -293,7 +293,7 @@ static int audit_set_enabled(int state, uid_t loginuid, u32 sid)
 		char *ctx = NULL;
 		u32 len;
 		int rc;
-		if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+		if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
 			return rc;
 		else
 			audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -321,7 +321,7 @@ static int audit_set_failure(int state, uid_t loginuid, u32 sid)
 		char *ctx = NULL;
 		u32 len;
 		int rc;
-		if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+		if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
 			return rc;
 		else
 			audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -538,7 +538,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		if (status_get->mask & AUDIT_STATUS_PID) {
 			int old   = audit_pid;
 			if (sid) {
-				if ((err = selinux_ctxid_to_string(
+				if ((err = selinux_sid_to_string(
 						sid, &ctx, &len)))
 					return err;
 				else
@@ -576,7 +576,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 						 "user pid=%d uid=%u auid=%u",
 						 pid, uid, loginuid);
 				if (sid) {
-					if (selinux_ctxid_to_string(
+					if (selinux_sid_to_string(
 							sid, &ctx, &len)) {
 						audit_log_format(ab, 
 							" ssid=%u", sid);
@@ -614,7 +614,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 					   loginuid, sid);
 		break;
 	case AUDIT_SIGNAL_INFO:
-		err = selinux_ctxid_to_string(audit_sig_sid, &ctx, &len);
+		err = selinux_sid_to_string(audit_sig_sid, &ctx, &len);
 		if (err)
 			return err;
 		sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index a44879b0c72f..1a58a81fb09d 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1398,7 +1398,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action,
 	if (sid) {
 		char *ctx = NULL;
 		u32 len;
-		if (selinux_ctxid_to_string(sid, &ctx, &len))
+		if (selinux_sid_to_string(sid, &ctx, &len))
 			audit_log_format(ab, " ssid=%u", sid);
 		else
 			audit_log_format(ab, " subj=%s", ctx);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 331e17010393..fb83c5cb8c32 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -898,7 +898,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 			if (axi->osid != 0) {
 				char *ctx = NULL;
 				u32 len;
-				if (selinux_ctxid_to_string(
+				if (selinux_sid_to_string(
 						axi->osid, &ctx, &len)) {
 					audit_log_format(ab, " osid=%u",
 							axi->osid);
@@ -1005,7 +1005,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 		if (n->osid != 0) {
 			char *ctx = NULL;
 			u32 len;
-			if (selinux_ctxid_to_string(
+			if (selinux_sid_to_string(
 				n->osid, &ctx, &len)) {
 				audit_log_format(ab, " osid=%u", n->osid);
 				call_panic = 2;
diff --git a/security/selinux/exports.c b/security/selinux/exports.c
index ee0fb47f81ae..b6f96943be1f 100644
--- a/security/selinux/exports.c
+++ b/security/selinux/exports.c
@@ -21,10 +21,10 @@
 #include "security.h"
 #include "objsec.h"
 
-int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen)
+int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen)
 {
 	if (selinux_enabled)
-		return security_sid_to_context(ctxid, ctx, ctxlen);
+		return security_sid_to_context(sid, ctx, ctxlen);
 	else {
 		*ctx = NULL;
 		*ctxlen = 0;
-- 
cgit v1.2.3


From 9a2f44f01a67a6ecca71515af999895b45a2aeb0 Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Mon, 25 Sep 2006 23:31:58 -0700
Subject: [PATCH] selinux: replace ctxid with sid in selinux_audit_rule_match
 interface

Replace ctxid with sid in selinux_audit_rule_match interface for
consistency with other interfaces.

Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/selinux.h        | 6 +++---
 security/selinux/ss/services.c | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/selinux.h b/include/linux/selinux.h
index df9098de4c99..d1b7ca6c1c57 100644
--- a/include/linux/selinux.h
+++ b/include/linux/selinux.h
@@ -46,7 +46,7 @@ void selinux_audit_rule_free(struct selinux_audit_rule *rule);
 
 /**
  *	selinux_audit_rule_match - determine if a context ID matches a rule.
- *	@ctxid: the context ID to check
+ *	@sid: the context ID to check
  *	@field: the field this rule refers to
  *	@op: the operater the rule uses
  *	@rule: pointer to the audit rule to check against
@@ -55,7 +55,7 @@ void selinux_audit_rule_free(struct selinux_audit_rule *rule);
  *	Returns 1 if the context id matches the rule, 0 if it does not, and
  *	-errno on failure.
  */
-int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
+int selinux_audit_rule_match(u32 sid, u32 field, u32 op,
                              struct selinux_audit_rule *rule,
                              struct audit_context *actx);
 
@@ -144,7 +144,7 @@ static inline void selinux_audit_rule_free(struct selinux_audit_rule *rule)
 	return;
 }
 
-static inline int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
+static inline int selinux_audit_rule_match(u32 sid, u32 field, u32 op,
                                            struct selinux_audit_rule *rule,
                                            struct audit_context *actx)
 {
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 22ed17c17718..988079f45294 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -2003,7 +2003,7 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr,
 	return rc;
 }
 
-int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
+int selinux_audit_rule_match(u32 sid, u32 field, u32 op,
                              struct selinux_audit_rule *rule,
                              struct audit_context *actx)
 {
@@ -2026,11 +2026,11 @@ int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
 		goto out;
 	}
 
-	ctxt = sidtab_search(&sidtab, ctxid);
+	ctxt = sidtab_search(&sidtab, sid);
 	if (!ctxt) {
 		audit_log(actx, GFP_ATOMIC, AUDIT_SELINUX_ERR,
 		          "selinux_audit_rule_match: unrecognized SID %d\n",
-		          ctxid);
+		          sid);
 		match = -ENOENT;
 		goto out;
 	}
-- 
cgit v1.2.3


From af8c65b57aaa4ae321af34dbfc5ca7f5625263fe Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 25 Sep 2006 23:32:07 -0700
Subject: [PATCH] FRV: permit __do_IRQ() to be dispensed with

Permit __do_IRQ() to be dispensed with based on a configuration option.

Signed-off-by: David Howells <dhowells@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/frv/Kconfig    | 4 ++++
 include/linux/irq.h | 6 ++++++
 kernel/irq/handle.c | 2 ++
 3 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index 5833808fb823..f7b171b92ea2 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -29,6 +29,10 @@ config GENERIC_HARDIRQS
 	bool
 	default y
 
+config GENERIC_HARDIRQS_NO__DO_IRQ
+	bool
+	default y
+
 config GENERIC_TIME
 	bool
 	default y
diff --git a/include/linux/irq.h b/include/linux/irq.h
index fbf6d901e9c2..48d3cb3b6a47 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -320,7 +320,9 @@ handle_irq_name(void fastcall (*handle)(unsigned int, struct irq_desc *,
  * Monolithic do_IRQ implementation.
  * (is an explicit fastcall, because i386 4KSTACKS calls it from assembly)
  */
+#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
 extern fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs);
+#endif
 
 /*
  * Architectures call this to let the generic IRQ layer
@@ -332,10 +334,14 @@ static inline void generic_handle_irq(unsigned int irq, struct pt_regs *regs)
 {
 	struct irq_desc *desc = irq_desc + irq;
 
+#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
+	desc->handle_irq(irq, desc, regs);
+#else
 	if (likely(desc->handle_irq))
 		desc->handle_irq(irq, desc, regs);
 	else
 		__do_IRQ(irq, regs);
+#endif
 }
 
 /* Handling of unhandled and spurious interrupts: */
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 48a53f68af96..4c6cdbaed661 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -154,6 +154,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
 	return retval;
 }
 
+#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
 /**
  * __do_IRQ - original all in one highlevel IRQ handler
  * @irq:	the interrupt number
@@ -253,6 +254,7 @@ out:
 
 	return 1;
 }
+#endif
 
 #ifdef CONFIG_TRACE_IRQFLAGS
 
-- 
cgit v1.2.3


From 5f97f7f9400de47ae837170bb274e90ad3934386 Mon Sep 17 00:00:00 2001
From: Haavard Skinnemoen <hskinnemoen@atmel.com>
Date: Mon, 25 Sep 2006 23:32:13 -0700
Subject: [PATCH] avr32 architecture

This adds support for the Atmel AVR32 architecture as well as the AT32AP7000
CPU and the AT32STK1000 development board.

AVR32 is a new high-performance 32-bit RISC microprocessor core, designed for
cost-sensitive embedded applications, with particular emphasis on low power
consumption and high code density.  The AVR32 architecture is not binary
compatible with earlier 8-bit AVR architectures.

The AVR32 architecture, including the instruction set, is described by the
AVR32 Architecture Manual, available from

http://www.atmel.com/dyn/resources/prod_documents/doc32000.pdf

The Atmel AT32AP7000 is the first CPU implementing the AVR32 architecture.  It
features a 7-stage pipeline, 16KB instruction and data caches and a full
Memory Management Unit.  It also comes with a large set of integrated
peripherals, many of which are shared with the AT91 ARM-based controllers from
Atmel.

Full data sheet is available from

http://www.atmel.com/dyn/resources/prod_documents/doc32003.pdf

while the CPU core implementation including caches and MMU is documented by
the AVR32 AP Technical Reference, available from

http://www.atmel.com/dyn/resources/prod_documents/doc32001.pdf

Information about the AT32STK1000 development board can be found at

http://www.atmel.com/dyn/products/tools_card.asp?tool_id=3918

including a BSP CD image with an earlier version of this patch, development
tools (binaries and source/patches) and a root filesystem image suitable for
booting from SD card.

Alternatively, there's a preliminary "getting started" guide available at
http://avr32linux.org/twiki/bin/view/Main/GettingStarted which provides links
to the sources and patches you will need in order to set up a cross-compiling
environment for avr32-linux.

This patch, as well as the other patches included with the BSP and the
toolchain patches, is actively supported by Atmel Corporation.

[dmccr@us.ibm.com: Fix more pxx_page macro locations]
[bunk@stusta.de: fix `make defconfig']
Signed-off-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Dave McCracken <dmccr@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 MAINTAINERS                                      |  17 +
 arch/avr32/Kconfig                               | 196 +++++
 arch/avr32/Kconfig.debug                         |  19 +
 arch/avr32/Makefile                              |  84 +++
 arch/avr32/boards/atstk1000/Makefile             |   2 +
 arch/avr32/boards/atstk1000/atstk1002.c          |  37 +
 arch/avr32/boards/atstk1000/setup.c              |  59 ++
 arch/avr32/boards/atstk1000/spi.c                |  27 +
 arch/avr32/boot/images/Makefile                  |  62 ++
 arch/avr32/boot/u-boot/Makefile                  |   3 +
 arch/avr32/boot/u-boot/empty.S                   |   1 +
 arch/avr32/boot/u-boot/head.S                    |  60 ++
 arch/avr32/configs/atstk1002_defconfig           | 754 ++++++++++++++++++++
 arch/avr32/kernel/Makefile                       |  18 +
 arch/avr32/kernel/asm-offsets.c                  |  25 +
 arch/avr32/kernel/avr32_ksyms.c                  |  55 ++
 arch/avr32/kernel/cpu.c                          | 327 +++++++++
 arch/avr32/kernel/entry-avr32b.S                 | 678 ++++++++++++++++++
 arch/avr32/kernel/head.S                         |  45 ++
 arch/avr32/kernel/init_task.c                    |  38 +
 arch/avr32/kernel/irq.c                          |  71 ++
 arch/avr32/kernel/kprobes.c                      | 270 +++++++
 arch/avr32/kernel/module.c                       | 324 +++++++++
 arch/avr32/kernel/process.c                      | 276 ++++++++
 arch/avr32/kernel/ptrace.c                       | 371 ++++++++++
 arch/avr32/kernel/semaphore.c                    | 148 ++++
 arch/avr32/kernel/setup.c                        | 335 +++++++++
 arch/avr32/kernel/signal.c                       | 328 +++++++++
 arch/avr32/kernel/switch_to.S                    |  35 +
 arch/avr32/kernel/sys_avr32.c                    |  51 ++
 arch/avr32/kernel/syscall-stubs.S                | 102 +++
 arch/avr32/kernel/syscall_table.S                | 289 ++++++++
 arch/avr32/kernel/time.c                         | 238 +++++++
 arch/avr32/kernel/traps.c                        | 425 +++++++++++
 arch/avr32/kernel/vmlinux.lds.c                  | 139 ++++
 arch/avr32/lib/Makefile                          |  10 +
 arch/avr32/lib/__avr32_asr64.S                   |  31 +
 arch/avr32/lib/__avr32_lsl64.S                   |  31 +
 arch/avr32/lib/__avr32_lsr64.S                   |  31 +
 arch/avr32/lib/clear_user.S                      |  76 ++
 arch/avr32/lib/copy_user.S                       | 119 ++++
 arch/avr32/lib/csum_partial.S                    |  47 ++
 arch/avr32/lib/csum_partial_copy_generic.S       |  99 +++
 arch/avr32/lib/delay.c                           |  55 ++
 arch/avr32/lib/findbit.S                         | 154 ++++
 arch/avr32/lib/io-readsl.S                       |  24 +
 arch/avr32/lib/io-readsw.S                       |  43 ++
 arch/avr32/lib/io-writesl.S                      |  20 +
 arch/avr32/lib/io-writesw.S                      |  38 +
 arch/avr32/lib/libgcc.h                          |  33 +
 arch/avr32/lib/longlong.h                        |  98 +++
 arch/avr32/lib/memcpy.S                          |  62 ++
 arch/avr32/lib/memset.S                          |  72 ++
 arch/avr32/lib/strncpy_from_user.S               |  60 ++
 arch/avr32/lib/strnlen_user.S                    |  67 ++
 arch/avr32/mach-at32ap/Makefile                  |   2 +
 arch/avr32/mach-at32ap/at32ap.c                  |  90 +++
 arch/avr32/mach-at32ap/at32ap7000.c              | 866 +++++++++++++++++++++++
 arch/avr32/mach-at32ap/clock.c                   | 148 ++++
 arch/avr32/mach-at32ap/clock.h                   |  30 +
 arch/avr32/mach-at32ap/extint.c                  | 171 +++++
 arch/avr32/mach-at32ap/intc.c                    | 133 ++++
 arch/avr32/mach-at32ap/intc.h                    | 327 +++++++++
 arch/avr32/mach-at32ap/pio.c                     | 118 +++
 arch/avr32/mach-at32ap/pio.h                     | 178 +++++
 arch/avr32/mach-at32ap/sm.c                      | 289 ++++++++
 arch/avr32/mach-at32ap/sm.h                      | 240 +++++++
 arch/avr32/mm/Makefile                           |   6 +
 arch/avr32/mm/cache.c                            | 150 ++++
 arch/avr32/mm/clear_page.S                       |  25 +
 arch/avr32/mm/copy_page.S                        |  28 +
 arch/avr32/mm/dma-coherent.c                     | 139 ++++
 arch/avr32/mm/fault.c                            | 315 +++++++++
 arch/avr32/mm/init.c                             | 480 +++++++++++++
 arch/avr32/mm/ioremap.c                          | 197 ++++++
 arch/avr32/mm/tlb.c                              | 378 ++++++++++
 include/asm-avr32/Kbuild                         |   3 +
 include/asm-avr32/a.out.h                        |  26 +
 include/asm-avr32/addrspace.h                    |  43 ++
 include/asm-avr32/arch-at32ap/at91rm9200_pdc.h   |  36 +
 include/asm-avr32/arch-at32ap/at91rm9200_usart.h | 123 ++++
 include/asm-avr32/arch-at32ap/board.h            |  35 +
 include/asm-avr32/arch-at32ap/init.h             |  21 +
 include/asm-avr32/arch-at32ap/portmux.h          |  16 +
 include/asm-avr32/arch-at32ap/sm.h               |  27 +
 include/asm-avr32/asm.h                          | 102 +++
 include/asm-avr32/atomic.h                       | 201 ++++++
 include/asm-avr32/auxvec.h                       |   4 +
 include/asm-avr32/bitops.h                       | 296 ++++++++
 include/asm-avr32/bug.h                          |  47 ++
 include/asm-avr32/bugs.h                         |  15 +
 include/asm-avr32/byteorder.h                    |  25 +
 include/asm-avr32/cache.h                        |  29 +
 include/asm-avr32/cachectl.h                     |  11 +
 include/asm-avr32/cacheflush.h                   | 129 ++++
 include/asm-avr32/checksum.h                     | 156 ++++
 include/asm-avr32/cputime.h                      |   6 +
 include/asm-avr32/current.h                      |  15 +
 include/asm-avr32/delay.h                        |  26 +
 include/asm-avr32/div64.h                        |   6 +
 include/asm-avr32/dma-mapping.h                  | 320 +++++++++
 include/asm-avr32/dma.h                          |   8 +
 include/asm-avr32/elf.h                          | 110 +++
 include/asm-avr32/emergency-restart.h            |   6 +
 include/asm-avr32/errno.h                        |   6 +
 include/asm-avr32/fcntl.h                        |   6 +
 include/asm-avr32/futex.h                        |   6 +
 include/asm-avr32/hardirq.h                      |  34 +
 include/asm-avr32/hw_irq.h                       |   9 +
 include/asm-avr32/intc.h                         | 128 ++++
 include/asm-avr32/io.h                           | 253 +++++++
 include/asm-avr32/ioctl.h                        |   6 +
 include/asm-avr32/ioctls.h                       |  83 +++
 include/asm-avr32/ipcbuf.h                       |  29 +
 include/asm-avr32/irq.h                          |  10 +
 include/asm-avr32/irqflags.h                     |  68 ++
 include/asm-avr32/kdebug.h                       |  38 +
 include/asm-avr32/kmap_types.h                   |  30 +
 include/asm-avr32/kprobes.h                      |  34 +
 include/asm-avr32/linkage.h                      |   7 +
 include/asm-avr32/local.h                        |   6 +
 include/asm-avr32/mach/serial_at91.h             |  33 +
 include/asm-avr32/mman.h                         |  17 +
 include/asm-avr32/mmu.h                          |  10 +
 include/asm-avr32/mmu_context.h                  | 148 ++++
 include/asm-avr32/module.h                       |  28 +
 include/asm-avr32/msgbuf.h                       |  31 +
 include/asm-avr32/mutex.h                        |   9 +
 include/asm-avr32/namei.h                        |   7 +
 include/asm-avr32/numnodes.h                     |   7 +
 include/asm-avr32/ocd.h                          |  78 ++
 include/asm-avr32/page.h                         | 112 +++
 include/asm-avr32/param.h                        |  23 +
 include/asm-avr32/pci.h                          |   8 +
 include/asm-avr32/percpu.h                       |   6 +
 include/asm-avr32/pgalloc.h                      |  96 +++
 include/asm-avr32/pgtable-2level.h               |  47 ++
 include/asm-avr32/pgtable.h                      | 408 +++++++++++
 include/asm-avr32/poll.h                         |  27 +
 include/asm-avr32/posix_types.h                  | 129 ++++
 include/asm-avr32/processor.h                    | 147 ++++
 include/asm-avr32/ptrace.h                       | 154 ++++
 include/asm-avr32/resource.h                     |   6 +
 include/asm-avr32/scatterlist.h                  |  21 +
 include/asm-avr32/sections.h                     |   6 +
 include/asm-avr32/semaphore.h                    | 109 +++
 include/asm-avr32/sembuf.h                       |  25 +
 include/asm-avr32/setup.h                        | 141 ++++
 include/asm-avr32/shmbuf.h                       |  42 ++
 include/asm-avr32/shmparam.h                     |   6 +
 include/asm-avr32/sigcontext.h                   |  34 +
 include/asm-avr32/siginfo.h                      |   6 +
 include/asm-avr32/signal.h                       | 168 +++++
 include/asm-avr32/socket.h                       |  53 ++
 include/asm-avr32/sockios.h                      |  12 +
 include/asm-avr32/stat.h                         |  79 +++
 include/asm-avr32/statfs.h                       |   6 +
 include/asm-avr32/string.h                       |  17 +
 include/asm-avr32/sysreg.h                       | 332 +++++++++
 include/asm-avr32/system.h                       | 155 ++++
 include/asm-avr32/termbits.h                     | 173 +++++
 include/asm-avr32/termios.h                      |  80 +++
 include/asm-avr32/thread_info.h                  | 106 +++
 include/asm-avr32/timex.h                        |  40 ++
 include/asm-avr32/tlb.h                          |  32 +
 include/asm-avr32/tlbflush.h                     |  40 ++
 include/asm-avr32/topology.h                     |   6 +
 include/asm-avr32/traps.h                        |  23 +
 include/asm-avr32/types.h                        |  70 ++
 include/asm-avr32/uaccess.h                      | 335 +++++++++
 include/asm-avr32/ucontext.h                     |  12 +
 include/asm-avr32/unaligned.h                    |  25 +
 include/asm-avr32/unistd.h                       | 387 ++++++++++
 include/asm-avr32/user.h                         |  65 ++
 include/linux/elf-em.h                           |   1 +
 lib/Kconfig.debug                                |   4 +-
 176 files changed, 18124 insertions(+), 2 deletions(-)
 create mode 100644 arch/avr32/Kconfig
 create mode 100644 arch/avr32/Kconfig.debug
 create mode 100644 arch/avr32/Makefile
 create mode 100644 arch/avr32/boards/atstk1000/Makefile
 create mode 100644 arch/avr32/boards/atstk1000/atstk1002.c
 create mode 100644 arch/avr32/boards/atstk1000/setup.c
 create mode 100644 arch/avr32/boards/atstk1000/spi.c
 create mode 100644 arch/avr32/boot/images/Makefile
 create mode 100644 arch/avr32/boot/u-boot/Makefile
 create mode 100644 arch/avr32/boot/u-boot/empty.S
 create mode 100644 arch/avr32/boot/u-boot/head.S
 create mode 100644 arch/avr32/configs/atstk1002_defconfig
 create mode 100644 arch/avr32/kernel/Makefile
 create mode 100644 arch/avr32/kernel/asm-offsets.c
 create mode 100644 arch/avr32/kernel/avr32_ksyms.c
 create mode 100644 arch/avr32/kernel/cpu.c
 create mode 100644 arch/avr32/kernel/entry-avr32b.S
 create mode 100644 arch/avr32/kernel/head.S
 create mode 100644 arch/avr32/kernel/init_task.c
 create mode 100644 arch/avr32/kernel/irq.c
 create mode 100644 arch/avr32/kernel/kprobes.c
 create mode 100644 arch/avr32/kernel/module.c
 create mode 100644 arch/avr32/kernel/process.c
 create mode 100644 arch/avr32/kernel/ptrace.c
 create mode 100644 arch/avr32/kernel/semaphore.c
 create mode 100644 arch/avr32/kernel/setup.c
 create mode 100644 arch/avr32/kernel/signal.c
 create mode 100644 arch/avr32/kernel/switch_to.S
 create mode 100644 arch/avr32/kernel/sys_avr32.c
 create mode 100644 arch/avr32/kernel/syscall-stubs.S
 create mode 100644 arch/avr32/kernel/syscall_table.S
 create mode 100644 arch/avr32/kernel/time.c
 create mode 100644 arch/avr32/kernel/traps.c
 create mode 100644 arch/avr32/kernel/vmlinux.lds.c
 create mode 100644 arch/avr32/lib/Makefile
 create mode 100644 arch/avr32/lib/__avr32_asr64.S
 create mode 100644 arch/avr32/lib/__avr32_lsl64.S
 create mode 100644 arch/avr32/lib/__avr32_lsr64.S
 create mode 100644 arch/avr32/lib/clear_user.S
 create mode 100644 arch/avr32/lib/copy_user.S
 create mode 100644 arch/avr32/lib/csum_partial.S
 create mode 100644 arch/avr32/lib/csum_partial_copy_generic.S
 create mode 100644 arch/avr32/lib/delay.c
 create mode 100644 arch/avr32/lib/findbit.S
 create mode 100644 arch/avr32/lib/io-readsl.S
 create mode 100644 arch/avr32/lib/io-readsw.S
 create mode 100644 arch/avr32/lib/io-writesl.S
 create mode 100644 arch/avr32/lib/io-writesw.S
 create mode 100644 arch/avr32/lib/libgcc.h
 create mode 100644 arch/avr32/lib/longlong.h
 create mode 100644 arch/avr32/lib/memcpy.S
 create mode 100644 arch/avr32/lib/memset.S
 create mode 100644 arch/avr32/lib/strncpy_from_user.S
 create mode 100644 arch/avr32/lib/strnlen_user.S
 create mode 100644 arch/avr32/mach-at32ap/Makefile
 create mode 100644 arch/avr32/mach-at32ap/at32ap.c
 create mode 100644 arch/avr32/mach-at32ap/at32ap7000.c
 create mode 100644 arch/avr32/mach-at32ap/clock.c
 create mode 100644 arch/avr32/mach-at32ap/clock.h
 create mode 100644 arch/avr32/mach-at32ap/extint.c
 create mode 100644 arch/avr32/mach-at32ap/intc.c
 create mode 100644 arch/avr32/mach-at32ap/intc.h
 create mode 100644 arch/avr32/mach-at32ap/pio.c
 create mode 100644 arch/avr32/mach-at32ap/pio.h
 create mode 100644 arch/avr32/mach-at32ap/sm.c
 create mode 100644 arch/avr32/mach-at32ap/sm.h
 create mode 100644 arch/avr32/mm/Makefile
 create mode 100644 arch/avr32/mm/cache.c
 create mode 100644 arch/avr32/mm/clear_page.S
 create mode 100644 arch/avr32/mm/copy_page.S
 create mode 100644 arch/avr32/mm/dma-coherent.c
 create mode 100644 arch/avr32/mm/fault.c
 create mode 100644 arch/avr32/mm/init.c
 create mode 100644 arch/avr32/mm/ioremap.c
 create mode 100644 arch/avr32/mm/tlb.c
 create mode 100644 include/asm-avr32/Kbuild
 create mode 100644 include/asm-avr32/a.out.h
 create mode 100644 include/asm-avr32/addrspace.h
 create mode 100644 include/asm-avr32/arch-at32ap/at91rm9200_pdc.h
 create mode 100644 include/asm-avr32/arch-at32ap/at91rm9200_usart.h
 create mode 100644 include/asm-avr32/arch-at32ap/board.h
 create mode 100644 include/asm-avr32/arch-at32ap/init.h
 create mode 100644 include/asm-avr32/arch-at32ap/portmux.h
 create mode 100644 include/asm-avr32/arch-at32ap/sm.h
 create mode 100644 include/asm-avr32/asm.h
 create mode 100644 include/asm-avr32/atomic.h
 create mode 100644 include/asm-avr32/auxvec.h
 create mode 100644 include/asm-avr32/bitops.h
 create mode 100644 include/asm-avr32/bug.h
 create mode 100644 include/asm-avr32/bugs.h
 create mode 100644 include/asm-avr32/byteorder.h
 create mode 100644 include/asm-avr32/cache.h
 create mode 100644 include/asm-avr32/cachectl.h
 create mode 100644 include/asm-avr32/cacheflush.h
 create mode 100644 include/asm-avr32/checksum.h
 create mode 100644 include/asm-avr32/cputime.h
 create mode 100644 include/asm-avr32/current.h
 create mode 100644 include/asm-avr32/delay.h
 create mode 100644 include/asm-avr32/div64.h
 create mode 100644 include/asm-avr32/dma-mapping.h
 create mode 100644 include/asm-avr32/dma.h
 create mode 100644 include/asm-avr32/elf.h
 create mode 100644 include/asm-avr32/emergency-restart.h
 create mode 100644 include/asm-avr32/errno.h
 create mode 100644 include/asm-avr32/fcntl.h
 create mode 100644 include/asm-avr32/futex.h
 create mode 100644 include/asm-avr32/hardirq.h
 create mode 100644 include/asm-avr32/hw_irq.h
 create mode 100644 include/asm-avr32/intc.h
 create mode 100644 include/asm-avr32/io.h
 create mode 100644 include/asm-avr32/ioctl.h
 create mode 100644 include/asm-avr32/ioctls.h
 create mode 100644 include/asm-avr32/ipcbuf.h
 create mode 100644 include/asm-avr32/irq.h
 create mode 100644 include/asm-avr32/irqflags.h
 create mode 100644 include/asm-avr32/kdebug.h
 create mode 100644 include/asm-avr32/kmap_types.h
 create mode 100644 include/asm-avr32/kprobes.h
 create mode 100644 include/asm-avr32/linkage.h
 create mode 100644 include/asm-avr32/local.h
 create mode 100644 include/asm-avr32/mach/serial_at91.h
 create mode 100644 include/asm-avr32/mman.h
 create mode 100644 include/asm-avr32/mmu.h
 create mode 100644 include/asm-avr32/mmu_context.h
 create mode 100644 include/asm-avr32/module.h
 create mode 100644 include/asm-avr32/msgbuf.h
 create mode 100644 include/asm-avr32/mutex.h
 create mode 100644 include/asm-avr32/namei.h
 create mode 100644 include/asm-avr32/numnodes.h
 create mode 100644 include/asm-avr32/ocd.h
 create mode 100644 include/asm-avr32/page.h
 create mode 100644 include/asm-avr32/param.h
 create mode 100644 include/asm-avr32/pci.h
 create mode 100644 include/asm-avr32/percpu.h
 create mode 100644 include/asm-avr32/pgalloc.h
 create mode 100644 include/asm-avr32/pgtable-2level.h
 create mode 100644 include/asm-avr32/pgtable.h
 create mode 100644 include/asm-avr32/poll.h
 create mode 100644 include/asm-avr32/posix_types.h
 create mode 100644 include/asm-avr32/processor.h
 create mode 100644 include/asm-avr32/ptrace.h
 create mode 100644 include/asm-avr32/resource.h
 create mode 100644 include/asm-avr32/scatterlist.h
 create mode 100644 include/asm-avr32/sections.h
 create mode 100644 include/asm-avr32/semaphore.h
 create mode 100644 include/asm-avr32/sembuf.h
 create mode 100644 include/asm-avr32/setup.h
 create mode 100644 include/asm-avr32/shmbuf.h
 create mode 100644 include/asm-avr32/shmparam.h
 create mode 100644 include/asm-avr32/sigcontext.h
 create mode 100644 include/asm-avr32/siginfo.h
 create mode 100644 include/asm-avr32/signal.h
 create mode 100644 include/asm-avr32/socket.h
 create mode 100644 include/asm-avr32/sockios.h
 create mode 100644 include/asm-avr32/stat.h
 create mode 100644 include/asm-avr32/statfs.h
 create mode 100644 include/asm-avr32/string.h
 create mode 100644 include/asm-avr32/sysreg.h
 create mode 100644 include/asm-avr32/system.h
 create mode 100644 include/asm-avr32/termbits.h
 create mode 100644 include/asm-avr32/termios.h
 create mode 100644 include/asm-avr32/thread_info.h
 create mode 100644 include/asm-avr32/timex.h
 create mode 100644 include/asm-avr32/tlb.h
 create mode 100644 include/asm-avr32/tlbflush.h
 create mode 100644 include/asm-avr32/topology.h
 create mode 100644 include/asm-avr32/traps.h
 create mode 100644 include/asm-avr32/types.h
 create mode 100644 include/asm-avr32/uaccess.h
 create mode 100644 include/asm-avr32/ucontext.h
 create mode 100644 include/asm-avr32/unaligned.h
 create mode 100644 include/asm-avr32/unistd.h
 create mode 100644 include/asm-avr32/user.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 23348c0d37bc..767a43434ae8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -443,6 +443,23 @@ W:	http://people.redhat.com/sgrubb/audit/
 T:	git kernel.org:/pub/scm/linux/kernel/git/dwmw2/audit-2.6.git
 S:	Maintained
 
+AVR32 ARCHITECTURE
+P:	Atmel AVR32 Support Team
+M:	avr32@atmel.com
+P:	Haavard Skinnemoen
+M:	hskinnemoen@atmel.com
+W:	http://www.atmel.com/products/AVR32/
+W:	http://avr32linux.org/
+W:	http://avrfreaks.net/
+S:	Supported
+
+AVR32/AT32AP MACHINE SUPPORT
+P:	Atmel AVR32 Support Team
+M:	avr32@atmel.com
+P:	Haavard Skinnemoen
+M:	hskinnemoen@atmel.com
+S:	Supported
+
 AX.25 NETWORK LAYER
 P:	Ralf Baechle
 M:	ralf@linux-mips.org
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
new file mode 100644
index 000000000000..5f1694eea842
--- /dev/null
+++ b/arch/avr32/Kconfig
@@ -0,0 +1,196 @@
+#
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/kconfig-language.txt.
+#
+
+mainmenu "Linux Kernel Configuration"
+
+config AVR32
+	bool
+	default y
+	# With EMBEDDED=n, we get lots of stuff automatically selected
+	# that we usually don't need on AVR32.
+	select EMBEDDED
+	help
+	  AVR32 is a high-performance 32-bit RISC microprocessor core,
+	  designed for cost-sensitive embedded applications, with particular
+	  emphasis on low power consumption and high code density.
+
+	  There is an AVR32 Linux project with a web page at
+	  http://avr32linux.org/.
+
+config UID16
+	bool
+
+config GENERIC_HARDIRQS
+	bool
+	default y
+
+config HARDIRQS_SW_RESEND
+	bool
+	default y
+
+config GENERIC_IRQ_PROBE
+	bool
+	default y
+
+config RWSEM_GENERIC_SPINLOCK
+	bool
+	default y
+
+config GENERIC_TIME
+	bool
+	default y
+
+config RWSEM_XCHGADD_ALGORITHM
+	bool
+
+config GENERIC_BUST_SPINLOCK
+	bool
+
+config GENERIC_HWEIGHT
+	bool
+	default y
+
+config GENERIC_CALIBRATE_DELAY
+	bool
+	default y
+
+source "init/Kconfig"
+
+menu "System Type and features"
+
+config SUBARCH_AVR32B
+	bool
+config MMU
+	bool
+config PERFORMANCE_COUNTERS
+	bool
+
+config PLATFORM_AT32AP
+	bool
+	select SUBARCH_AVR32B
+	select MMU
+	select PERFORMANCE_COUNTERS
+
+choice
+	prompt "AVR32 CPU type"
+	default CPU_AT32AP7000
+
+config CPU_AT32AP7000
+	bool "AT32AP7000"
+	select PLATFORM_AT32AP
+endchoice
+
+#
+# CPU Daughterboards for ATSTK1000
+config BOARD_ATSTK1002
+	bool
+
+choice
+	prompt "AVR32 board type"
+	default BOARD_ATSTK1000
+
+config BOARD_ATSTK1000
+	bool "ATSTK1000 evaluation board"
+	select BOARD_ATSTK1002 if CPU_AT32AP7000
+endchoice
+
+choice
+	prompt "Boot loader type"
+	default LOADER_U_BOOT
+
+config	LOADER_U_BOOT
+	bool "U-Boot (or similar) bootloader"
+endchoice
+
+config LOAD_ADDRESS
+	hex
+	default 0x10000000 if LOADER_U_BOOT=y && CPU_AT32AP7000=y
+
+config ENTRY_ADDRESS
+	hex
+	default 0x90000000 if LOADER_U_BOOT=y && CPU_AT32AP7000=y
+
+config PHYS_OFFSET
+	hex
+	default 0x10000000 if CPU_AT32AP7000=y
+
+source "kernel/Kconfig.preempt"
+
+config HAVE_ARCH_BOOTMEM_NODE
+	bool
+	default n
+
+config ARCH_HAVE_MEMORY_PRESENT
+	bool
+	default n
+
+config NEED_NODE_MEMMAP_SIZE
+	bool
+	default n
+
+config ARCH_FLATMEM_ENABLE
+	bool
+	default y
+
+config ARCH_DISCONTIGMEM_ENABLE
+	bool
+	default n
+
+config ARCH_SPARSEMEM_ENABLE
+	bool
+	default n
+
+source "mm/Kconfig"
+
+config OWNERSHIP_TRACE
+	bool "Ownership trace support"
+	default y
+	help
+	  Say Y to generate an Ownership Trace message on every context switch,
+	  enabling Nexus-compliant debuggers to keep track of the PID of the
+	  currently executing task.
+
+# FPU emulation goes here
+
+source "kernel/Kconfig.hz"
+
+config CMDLINE
+	string "Default kernel command line"
+	default ""
+	help
+	  If you don't have a boot loader capable of passing a command line string
+	  to the kernel, you may specify one here. As a minimum, you should specify
+	  the memory size and the root device (e.g., mem=8M, root=/dev/nfs).
+
+endmenu
+
+menu "Bus options"
+
+config PCI
+	bool
+
+source "drivers/pci/Kconfig"
+
+source "drivers/pcmcia/Kconfig"
+
+endmenu
+
+menu "Executable file formats"
+source "fs/Kconfig.binfmt"
+endmenu
+
+source "net/Kconfig"
+
+source "drivers/Kconfig"
+
+source "fs/Kconfig"
+
+source "arch/avr32/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+
+source "lib/Kconfig"
diff --git a/arch/avr32/Kconfig.debug b/arch/avr32/Kconfig.debug
new file mode 100644
index 000000000000..64ace00fe6cb
--- /dev/null
+++ b/arch/avr32/Kconfig.debug
@@ -0,0 +1,19 @@
+menu "Kernel hacking"
+
+config TRACE_IRQFLAGS_SUPPORT
+	bool
+	default y
+
+source "lib/Kconfig.debug"
+
+config KPROBES
+	bool "Kprobes"
+	depends on DEBUG_KERNEL
+	help
+	  Kprobes allows you to trap at almost any kernel address and
+          execute a callback function.  register_kprobe() establishes
+          a probepoint and specifies the callback.  Kprobes is useful
+          for kernel debugging, non-intrusive instrumentation and testing.
+          If in doubt, say "N".
+
+endmenu
diff --git a/arch/avr32/Makefile b/arch/avr32/Makefile
new file mode 100644
index 000000000000..cefc95a73980
--- /dev/null
+++ b/arch/avr32/Makefile
@@ -0,0 +1,84 @@
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2004-2006 Atmel Corporation.
+
+# Default target when executing plain make
+.PHONY: all
+all: uImage vmlinux.elf linux.lst
+
+KBUILD_DEFCONFIG	:= atstk1002_defconfig
+
+CFLAGS		+= -pipe -fno-builtin -mno-pic
+AFLAGS		+= -mrelax -mno-pic
+CFLAGS_MODULE	+= -mno-relax
+LDFLAGS_vmlinux	+= --relax
+
+cpuflags-$(CONFIG_CPU_AP7000)	+= -mcpu=ap7000
+
+CFLAGS		+= $(cpuflags-y)
+AFLAGS		+= $(cpuflags-y)
+
+CHECKFLAGS	+= -D__avr32__
+
+LIBGCC		:= $(shell $(CC) $(CFLAGS) -print-libgcc-file-name)
+
+head-$(CONFIG_LOADER_U_BOOT)		+= arch/avr32/boot/u-boot/head.o
+head-y					+= arch/avr32/kernel/head.o
+core-$(CONFIG_PLATFORM_AT32AP)		+= arch/avr32/mach-at32ap/
+core-$(CONFIG_BOARD_ATSTK1000)		+= arch/avr32/boards/atstk1000/
+core-$(CONFIG_LOADER_U_BOOT)		+= arch/avr32/boot/u-boot/
+core-y					+= arch/avr32/kernel/
+core-y					+= arch/avr32/mm/
+libs-y					+= arch/avr32/lib/ #$(LIBGCC)
+
+archincdir-$(CONFIG_PLATFORM_AT32AP)	:= arch-at32ap
+
+include/asm-avr32/.arch: $(wildcard include/config/platform/*.h) include/config/auto.conf
+	@echo '  SYMLINK include/asm-avr32/arch -> include/asm-avr32/$(archincdir-y)'
+ifneq ($(KBUILD_SRC),)
+	$(Q)mkdir -p include/asm-avr32
+	$(Q)ln -fsn $(srctree)/include/asm-avr32/$(archincdir-y) include/asm-avr32/arch
+else
+	$(Q)ln -fsn $(archincdir-y) include/asm-avr32/arch
+endif
+	@touch $@
+
+archprepare: include/asm-avr32/.arch
+
+BOOT_TARGETS := vmlinux.elf vmlinux.bin uImage uImage.srec
+
+.PHONY: $(BOOT_TARGETS) install
+
+boot := arch/$(ARCH)/boot/images
+
+             KBUILD_IMAGE := $(boot)/uImage
+vmlinux.elf: KBUILD_IMAGE := $(boot)/vmlinux.elf
+vmlinux.cso: KBUILD_IMAGE := $(boot)/vmlinux.cso
+uImage.srec: KBUILD_IMAGE := $(boot)/uImage.srec
+uImage:      KBUILD_IMAGE := $(boot)/uImage
+
+quiet_cmd_listing = LST     $@
+      cmd_listing = avr32-linux-objdump $(OBJDUMPFLAGS) -lS $< > $@
+quiet_cmd_disasm  = DIS     $@
+      cmd_disasm  = avr32-linux-objdump $(OBJDUMPFLAGS) -d $< > $@
+
+vmlinux.elf vmlinux.bin uImage.srec uImage vmlinux.cso: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+install: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
+
+linux.s: vmlinux
+	$(call if_changed,disasm)
+
+linux.lst: vmlinux
+	$(call if_changed,listing)
+
+define archhelp
+  @echo '* vmlinux.elf		- ELF image with load address 0'
+  @echo '  vmlinux.cso		- PathFinder CSO image'
+  @echo '  uImage		- Create a bootable image for U-Boot'
+endef
diff --git a/arch/avr32/boards/atstk1000/Makefile b/arch/avr32/boards/atstk1000/Makefile
new file mode 100644
index 000000000000..c3e36ece8651
--- /dev/null
+++ b/arch/avr32/boards/atstk1000/Makefile
@@ -0,0 +1,2 @@
+obj-y				+= setup.o spi.o
+obj-$(CONFIG_BOARD_ATSTK1002)	+= atstk1002.o
diff --git a/arch/avr32/boards/atstk1000/atstk1002.c b/arch/avr32/boards/atstk1000/atstk1002.c
new file mode 100644
index 000000000000..49164e9aadd6
--- /dev/null
+++ b/arch/avr32/boards/atstk1000/atstk1002.c
@@ -0,0 +1,37 @@
+/*
+ * ATSTK1002 daughterboard-specific init code
+ *
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+
+#include <asm/arch/board.h>
+
+struct eth_platform_data __initdata eth0_data = {
+	.valid		= 1,
+	.mii_phy_addr	= 0x10,
+	.is_rmii	= 0,
+	.hw_addr	= { 0x6a, 0x87, 0x71, 0x14, 0xcd, 0xcb },
+};
+
+extern struct lcdc_platform_data atstk1000_fb0_data;
+
+static int __init atstk1002_init(void)
+{
+	at32_add_system_devices();
+
+	at32_add_device_usart(1);	/* /dev/ttyS0 */
+	at32_add_device_usart(2);	/* /dev/ttyS1 */
+	at32_add_device_usart(3);	/* /dev/ttyS2 */
+
+	at32_add_device_eth(0, &eth0_data);
+	at32_add_device_spi(0);
+	at32_add_device_lcdc(0, &atstk1000_fb0_data);
+
+	return 0;
+}
+postcore_initcall(atstk1002_init);
diff --git a/arch/avr32/boards/atstk1000/setup.c b/arch/avr32/boards/atstk1000/setup.c
new file mode 100644
index 000000000000..191ab85de9a3
--- /dev/null
+++ b/arch/avr32/boards/atstk1000/setup.c
@@ -0,0 +1,59 @@
+/*
+ * ATSTK1000 board-specific setup code.
+ *
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/bootmem.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/linkage.h>
+
+#include <asm/setup.h>
+
+#include <asm/arch/board.h>
+
+/* Initialized by bootloader-specific startup code. */
+struct tag *bootloader_tags __initdata;
+
+struct lcdc_platform_data __initdata atstk1000_fb0_data;
+
+asmlinkage void __init board_early_init(void)
+{
+	extern void sdram_init(void);
+
+#ifdef CONFIG_LOADER_STANDALONE
+	sdram_init();
+#endif
+}
+
+void __init board_setup_fbmem(unsigned long fbmem_start,
+			      unsigned long fbmem_size)
+{
+	if (!fbmem_size)
+		return;
+
+	if (!fbmem_start) {
+		void *fbmem;
+
+		fbmem = alloc_bootmem_low_pages(fbmem_size);
+		fbmem_start = __pa(fbmem);
+	} else {
+		pg_data_t *pgdat;
+
+		for_each_online_pgdat(pgdat) {
+			if (fbmem_start >= pgdat->bdata->node_boot_start
+			    && fbmem_start <= pgdat->bdata->node_low_pfn)
+				reserve_bootmem_node(pgdat, fbmem_start,
+						     fbmem_size);
+		}
+	}
+
+	printk("%luKiB framebuffer memory at address 0x%08lx\n",
+	       fbmem_size >> 10, fbmem_start);
+	atstk1000_fb0_data.fbmem_start = fbmem_start;
+	atstk1000_fb0_data.fbmem_size = fbmem_size;
+}
diff --git a/arch/avr32/boards/atstk1000/spi.c b/arch/avr32/boards/atstk1000/spi.c
new file mode 100644
index 000000000000..567726c82c6e
--- /dev/null
+++ b/arch/avr32/boards/atstk1000/spi.c
@@ -0,0 +1,27 @@
+/*
+ * ATSTK1000 SPI devices
+ *
+ * Copyright (C) 2005 Atmel Norway
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/device.h>
+#include <linux/spi/spi.h>
+
+static struct spi_board_info spi_board_info[] __initdata = {
+	{
+		.modalias	= "ltv350qv",
+		.max_speed_hz	= 16000000,
+		.bus_num	= 0,
+		.chip_select	= 1,
+	},
+};
+
+static int board_init_spi(void)
+{
+	spi_register_board_info(spi_board_info, ARRAY_SIZE(spi_board_info));
+	return 0;
+}
+arch_initcall(board_init_spi);
diff --git a/arch/avr32/boot/images/Makefile b/arch/avr32/boot/images/Makefile
new file mode 100644
index 000000000000..ccd74eeecec3
--- /dev/null
+++ b/arch/avr32/boot/images/Makefile
@@ -0,0 +1,62 @@
+#
+# Copyright (C) 2004-2006 Atmel Corporation
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+
+MKIMAGE		:= $(srctree)/scripts/mkuboot.sh
+
+extra-y		:= vmlinux.bin vmlinux.gz
+
+OBJCOPYFLAGS_vmlinux.bin := -O binary
+$(obj)/vmlinux.bin: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+$(obj)/vmlinux.gz: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,gzip)
+
+quiet_cmd_uimage = UIMAGE $@
+      cmd_uimage = $(CONFIG_SHELL) $(MKIMAGE) -A avr32 -O linux -T kernel	\
+		-C gzip -a $(CONFIG_LOAD_ADDRESS) -e $(CONFIG_ENTRY_ADDRESS)	\
+		-n 'Linux-$(KERNELRELEASE)' -d $< $@
+
+targets += uImage uImage.srec
+$(obj)/uImage: $(obj)/vmlinux.gz
+	$(call if_changed,uimage)
+	@echo '  Image $@ is ready'
+
+OBJCOPYFLAGS_uImage.srec := -I binary -O srec
+$(obj)/uImage.srec: $(obj)/uImage
+	$(call if_changed,objcopy)
+
+OBJCOPYFLAGS_vmlinux.elf := --change-section-lma .text-0x80000000 \
+			    --change-section-lma __ex_table-0x80000000 \
+			    --change-section-lma .rodata-0x80000000 \
+			    --change-section-lma .data-0x80000000 \
+			    --change-section-lma .init-0x80000000 \
+			    --change-section-lma .bss-0x80000000 \
+			    --change-section-lma .initrd-0x80000000 \
+			    --change-section-lma __param-0x80000000 \
+			    --change-section-lma __ksymtab-0x80000000 \
+			    --change-section-lma __ksymtab_gpl-0x80000000 \
+			    --change-section-lma __kcrctab-0x80000000 \
+			    --change-section-lma __kcrctab_gpl-0x80000000 \
+			    --change-section-lma __ksymtab_strings-0x80000000 \
+			    --change-section-lma .got-0x80000000 \
+			    --set-start 0xa0000000
+$(obj)/vmlinux.elf: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+quiet_cmd_sfdwarf = SFDWARF $@
+      cmd_sfdwarf = sfdwarf $< TO $@ GNUAVR IW $(SFDWARF_FLAGS) > $(obj)/sfdwarf.log
+
+$(obj)/vmlinux.cso: $(obj)/vmlinux.elf FORCE
+	$(call if_changed,sfdwarf)
+
+install: $(BOOTIMAGE)
+	sh $(srctree)/install-kernel.sh $<
+
+# Generated files to be removed upon make clean
+clean-files	:= vmlinux* uImage uImage.srec
diff --git a/arch/avr32/boot/u-boot/Makefile b/arch/avr32/boot/u-boot/Makefile
new file mode 100644
index 000000000000..125ddc96c275
--- /dev/null
+++ b/arch/avr32/boot/u-boot/Makefile
@@ -0,0 +1,3 @@
+extra-y		:= head.o
+
+obj-y		:= empty.o
diff --git a/arch/avr32/boot/u-boot/empty.S b/arch/avr32/boot/u-boot/empty.S
new file mode 100644
index 000000000000..8ac91a5f12f0
--- /dev/null
+++ b/arch/avr32/boot/u-boot/empty.S
@@ -0,0 +1 @@
+/* Empty file */
diff --git a/arch/avr32/boot/u-boot/head.S b/arch/avr32/boot/u-boot/head.S
new file mode 100644
index 000000000000..4488fa27fe94
--- /dev/null
+++ b/arch/avr32/boot/u-boot/head.S
@@ -0,0 +1,60 @@
+/*
+ * Startup code for use with the u-boot bootloader.
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <asm/setup.h>
+
+	/*
+	 * The kernel is loaded where we want it to be and all caches
+	 * have just been flushed. We get two parameters from u-boot:
+	 *
+	 * r12 contains a magic number (ATAG_MAGIC)
+	 * r11 points to a tag table providing information about
+	 *     the system.
+	 */
+	.section .init.text,"ax"
+	.global _start
+_start:
+	/* Check if the boot loader actually provided a tag table */
+	lddpc	r0, magic_number
+	cp.w	r12, r0
+	brne	no_tag_table
+
+	/* Initialize .bss */
+	lddpc	r2, bss_start_addr
+	lddpc   r3, end_addr
+	mov	r0, 0
+	mov	r1, 0
+1:      st.d    r2++, r0
+	cp      r2, r3
+	brlo    1b
+
+	/*
+	 * Save the tag table address for later use. This must be done
+	 * _after_ .bss has been initialized...
+	 */
+	lddpc	r0, tag_table_addr
+	st.w	r0[0], r11
+
+	/* Jump to loader-independent setup code */
+	rjmp	kernel_entry
+
+	.align	2
+magic_number:
+	.long	ATAG_MAGIC
+tag_table_addr:
+	.long	bootloader_tags
+bss_start_addr:
+	.long   __bss_start
+end_addr:
+	.long   _end
+
+no_tag_table:
+	sub	r12, pc, (. - 2f)
+	bral	panic
+2:	.asciz	"Boot loader didn't provide correct magic number\n"
diff --git a/arch/avr32/configs/atstk1002_defconfig b/arch/avr32/configs/atstk1002_defconfig
new file mode 100644
index 000000000000..1d22255009fd
--- /dev/null
+++ b/arch/avr32/configs/atstk1002_defconfig
@@ -0,0 +1,754 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.18-rc1
+# Tue Jul 11 12:41:36 2006
+#
+CONFIG_AVR32=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_HARDIRQS_SW_RESEND=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SWAP=y
+# CONFIG_SYSVIPC is not set
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+# CONFIG_IKCONFIG is not set
+# CONFIG_RELAY is not set
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EMBEDDED=y
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+# CONFIG_BASE_FULL is not set
+# CONFIG_FUTEX is not set
+# CONFIG_EPOLL is not set
+CONFIG_SHMEM=y
+# CONFIG_SLAB is not set
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=1
+CONFIG_SLOB=y
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+# CONFIG_KMOD is not set
+
+#
+# Block layer
+#
+# CONFIG_BLK_DEV_IO_TRACE is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+# CONFIG_IOSCHED_AS is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+CONFIG_DEFAULT_NOOP=y
+CONFIG_DEFAULT_IOSCHED="noop"
+
+#
+# System Type and features
+#
+CONFIG_SUBARCH_AVR32B=y
+CONFIG_MMU=y
+CONFIG_PERFORMANCE_COUNTERS=y
+CONFIG_PLATFORM_AT32AP=y
+CONFIG_CPU_AT32AP7000=y
+CONFIG_BOARD_ATSTK1002=y
+CONFIG_BOARD_ATSTK1000=y
+CONFIG_LOADER_U_BOOT=y
+CONFIG_LOAD_ADDRESS=0x10000000
+CONFIG_ENTRY_ADDRESS=0x90000000
+CONFIG_PHYS_OFFSET=0x10000000
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+# CONFIG_HAVE_ARCH_BOOTMEM_NODE is not set
+# CONFIG_ARCH_HAVE_MEMORY_PRESENT is not set
+# CONFIG_NEED_NODE_MEMMAP_SIZE is not set
+CONFIG_ARCH_FLATMEM_ENABLE=y
+# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set
+# CONFIG_ARCH_SPARSEMEM_ENABLE is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+# CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_RESOURCES_64BIT is not set
+# CONFIG_OWNERSHIP_TRACE is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+CONFIG_CMDLINE=""
+
+#
+# Bus options
+#
+
+#
+# PCCARD (PCMCIA/CardBus) support
+#
+# CONFIG_PCCARD is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+# CONFIG_NETDEBUG is not set
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+# CONFIG_IP_PNP_BOOTP is not set
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_BIC=y
+# CONFIG_IPV6 is not set
+# CONFIG_INET6_XFRM_TUNNEL is not set
+# CONFIG_INET6_TUNNEL is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+
+#
+# DCCP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_DCCP is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+
+#
+# TIPC Configuration (EXPERIMENTAL)
+#
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_NET_TCPPROBE is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_IEEE80211 is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_SYS_HYPERVISOR is not set
+
+#
+# Connector - unified userspace <-> kernelspace linker
+#
+# CONFIG_CONNECTOR is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play support
+#
+
+#
+# Block devices
+#
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=m
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=m
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=4096
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+# CONFIG_MD is not set
+
+#
+# Fusion MPT device support
+#
+# CONFIG_FUSION is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+
+#
+# I2O device support
+#
+
+#
+# Network device support
+#
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=y
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+CONFIG_TUN=m
+
+#
+# PHY device support
+#
+# CONFIG_PHYLIB is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+CONFIG_MACB=y
+
+#
+# Ethernet (1000 Mbit)
+#
+
+#
+# Ethernet (10000 Mbit)
+#
+
+#
+# Token Ring devices
+#
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+CONFIG_PPP=m
+# CONFIG_PPP_MULTILINK is not set
+# CONFIG_PPP_FILTER is not set
+CONFIG_PPP_ASYNC=m
+# CONFIG_PPP_SYNC_TTY is not set
+CONFIG_PPP_DEFLATE=m
+# CONFIG_PPP_BSDCOMP is not set
+# CONFIG_PPP_MPPE is not set
+# CONFIG_PPPOE is not set
+# CONFIG_SLIP is not set
+# CONFIG_SHAPER is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+# CONFIG_INPUT is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_AT91=y
+CONFIG_SERIAL_AT91_CONSOLE=y
+# CONFIG_SERIAL_AT91_TTYAT is not set
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+# CONFIG_LEGACY_PTYS is not set
+
+#
+# IPMI
+#
+# CONFIG_IPMI_HANDLER is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_RTC is not set
+# CONFIG_GEN_RTC is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_RAW_DRIVER is not set
+
+#
+# TPM devices
+#
+# CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# SPI support
+#
+CONFIG_SPI=y
+# CONFIG_SPI_DEBUG is not set
+CONFIG_SPI_MASTER=y
+
+#
+# SPI Master Controller Drivers
+#
+CONFIG_SPI_ATMEL=m
+# CONFIG_SPI_BITBANG is not set
+
+#
+# SPI Protocol Masters
+#
+
+#
+# Dallas's 1-wire bus
+#
+
+#
+# Hardware Monitoring support
+#
+# CONFIG_HWMON is not set
+# CONFIG_HWMON_VID is not set
+
+#
+# Misc devices
+#
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+CONFIG_VIDEO_V4L2=y
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+
+#
+# Graphics support
+#
+# CONFIG_FIRMWARE_EDID is not set
+CONFIG_FB=m
+CONFIG_FB_CFB_FILLRECT=m
+CONFIG_FB_CFB_COPYAREA=m
+CONFIG_FB_CFB_IMAGEBLIT=m
+# CONFIG_FB_MACMODES is not set
+# CONFIG_FB_BACKLIGHT is not set
+# CONFIG_FB_MODE_HELPERS is not set
+# CONFIG_FB_TILEBLITTING is not set
+CONFIG_FB_SIDSA=m
+CONFIG_FB_SIDSA_DEFAULT_BPP=24
+# CONFIG_FB_S1D13XXX is not set
+# CONFIG_FB_VIRTUAL is not set
+
+#
+# Logo configuration
+#
+# CONFIG_LOGO is not set
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
+CONFIG_LCD_CLASS_DEVICE=m
+CONFIG_LCD_DEVICE=y
+CONFIG_LCD_LTV350QV=m
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+# CONFIG_USB_ARCH_HAS_HCD is not set
+# CONFIG_USB_ARCH_HAS_OHCI is not set
+# CONFIG_USB_ARCH_HAS_EHCI is not set
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# MMC/SD Card support
+#
+# CONFIG_MMC is not set
+
+#
+# LED devices
+#
+# CONFIG_NEW_LEDS is not set
+
+#
+# LED drivers
+#
+
+#
+# LED Triggers
+#
+
+#
+# InfiniBand support
+#
+
+#
+# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
+#
+
+#
+# Real Time Clock
+#
+# CONFIG_RTC_CLASS is not set
+
+#
+# DMA Engine support
+#
+# CONFIG_DMA_ENGINE is not set
+
+#
+# DMA Clients
+#
+
+#
+# DMA Devices
+#
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+CONFIG_MINIX_FS=m
+CONFIG_ROMFS_FS=m
+# CONFIG_INOTIFY is not set
+# CONFIG_QUOTA is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_HUGETLB_PAGE is not set
+CONFIG_RAMFS=y
+CONFIG_CONFIGFS_FS=m
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFS_DIRECTIO is not set
+# CONFIG_NFSD is not set
+CONFIG_ROOT_NFS=y
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+CONFIG_CIFS=m
+# CONFIG_CIFS_STATS is not set
+# CONFIG_CIFS_WEAK_PW_HASH is not set
+# CONFIG_CIFS_XATTR is not set
+# CONFIG_CIFS_DEBUG2 is not set
+# CONFIG_CIFS_EXPERIMENTAL is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+# CONFIG_9P_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS=m
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=m
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+CONFIG_NLS_CODEPAGE_850=m
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+CONFIG_NLS_ISO8859_1=m
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+CONFIG_NLS_UTF8=m
+
+#
+# Kernel hacking
+#
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+CONFIG_PRINTK_TIME=y
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_RWSEMS is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_DEBUG_KOBJECT is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+CONFIG_DEBUG_FS=y
+# CONFIG_DEBUG_VM is not set
+CONFIG_FRAME_POINTER=y
+# CONFIG_UNWIND_INFO is not set
+CONFIG_FORCED_INLINING=y
+# CONFIG_RCU_TORTURE_TEST is not set
+CONFIG_KPROBES=y
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+# CONFIG_CRYPTO is not set
+
+#
+# Hardware crypto devices
+#
+
+#
+# Library routines
+#
+CONFIG_CRC_CCITT=m
+# CONFIG_CRC16 is not set
+CONFIG_CRC32=m
+# CONFIG_LIBCRC32C is not set
+CONFIG_ZLIB_INFLATE=m
+CONFIG_ZLIB_DEFLATE=m
diff --git a/arch/avr32/kernel/Makefile b/arch/avr32/kernel/Makefile
new file mode 100644
index 000000000000..90e5afff54a2
--- /dev/null
+++ b/arch/avr32/kernel/Makefile
@@ -0,0 +1,18 @@
+#
+# Makefile for the Linux/AVR32 kernel.
+#
+
+extra-y				:= head.o vmlinux.lds
+
+obj-$(CONFIG_SUBARCH_AVR32B)	+= entry-avr32b.o
+obj-y				+= syscall_table.o syscall-stubs.o irq.o
+obj-y				+= setup.o traps.o semaphore.o ptrace.o
+obj-y				+= signal.o sys_avr32.o process.o time.o
+obj-y				+= init_task.o switch_to.o cpu.o
+obj-$(CONFIG_MODULES)		+= module.o avr32_ksyms.o
+obj-$(CONFIG_KPROBES)		+= kprobes.o
+
+USE_STANDARD_AS_RULE		:= true
+
+%.lds: %.lds.c FORCE
+	$(call if_changed_dep,cpp_lds_S)
diff --git a/arch/avr32/kernel/asm-offsets.c b/arch/avr32/kernel/asm-offsets.c
new file mode 100644
index 000000000000..97d865865667
--- /dev/null
+++ b/arch/avr32/kernel/asm-offsets.c
@@ -0,0 +1,25 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ */
+
+#include <linux/thread_info.h>
+
+#define DEFINE(sym, val) \
+        asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+
+#define BLANK() asm volatile("\n->" : : )
+
+#define OFFSET(sym, str, mem) \
+        DEFINE(sym, offsetof(struct str, mem));
+
+void foo(void)
+{
+	OFFSET(TI_task, thread_info, task);
+	OFFSET(TI_exec_domain, thread_info, exec_domain);
+	OFFSET(TI_flags, thread_info, flags);
+	OFFSET(TI_cpu, thread_info, cpu);
+	OFFSET(TI_preempt_count, thread_info, preempt_count);
+	OFFSET(TI_restart_block, thread_info, restart_block);
+}
diff --git a/arch/avr32/kernel/avr32_ksyms.c b/arch/avr32/kernel/avr32_ksyms.c
new file mode 100644
index 000000000000..04f767a272b7
--- /dev/null
+++ b/arch/avr32/kernel/avr32_ksyms.c
@@ -0,0 +1,55 @@
+/*
+ * Export AVR32-specific functions for loadable modules.
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+
+#include <asm/checksum.h>
+#include <asm/uaccess.h>
+#include <asm/delay.h>
+
+/*
+ * GCC functions
+ */
+extern unsigned long long __avr32_lsl64(unsigned long long u, unsigned long b);
+extern unsigned long long __avr32_lsr64(unsigned long long u, unsigned long b);
+extern unsigned long long __avr32_asr64(unsigned long long u, unsigned long b);
+EXPORT_SYMBOL(__avr32_lsl64);
+EXPORT_SYMBOL(__avr32_lsr64);
+EXPORT_SYMBOL(__avr32_asr64);
+
+/*
+ * String functions
+ */
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(memcpy);
+
+/*
+ * Userspace access stuff.
+ */
+EXPORT_SYMBOL(copy_from_user);
+EXPORT_SYMBOL(copy_to_user);
+EXPORT_SYMBOL(__copy_user);
+EXPORT_SYMBOL(strncpy_from_user);
+EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(clear_user);
+EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(csum_partial);
+EXPORT_SYMBOL(csum_partial_copy_generic);
+
+/* Delay loops (lib/delay.S) */
+EXPORT_SYMBOL(__ndelay);
+EXPORT_SYMBOL(__udelay);
+EXPORT_SYMBOL(__const_udelay);
+
+/* Bit operations (lib/findbit.S) */
+EXPORT_SYMBOL(find_first_zero_bit);
+EXPORT_SYMBOL(find_next_zero_bit);
+EXPORT_SYMBOL(find_first_bit);
+EXPORT_SYMBOL(find_next_bit);
+EXPORT_SYMBOL(generic_find_next_zero_le_bit);
diff --git a/arch/avr32/kernel/cpu.c b/arch/avr32/kernel/cpu.c
new file mode 100644
index 000000000000..342452ba2049
--- /dev/null
+++ b/arch/avr32/kernel/cpu.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/sysdev.h>
+#include <linux/seq_file.h>
+#include <linux/cpu.h>
+#include <linux/percpu.h>
+#include <linux/param.h>
+#include <linux/errno.h>
+
+#include <asm/setup.h>
+#include <asm/sysreg.h>
+
+static DEFINE_PER_CPU(struct cpu, cpu_devices);
+
+#ifdef CONFIG_PERFORMANCE_COUNTERS
+
+/*
+ * XXX: If/when a SMP-capable implementation of AVR32 will ever be
+ * made, we must make sure that the code executes on the correct CPU.
+ */
+static ssize_t show_pc0event(struct sys_device *dev, char *buf)
+{
+	unsigned long pccr;
+
+	pccr = sysreg_read(PCCR);
+	return sprintf(buf, "0x%lx\n", (pccr >> 12) & 0x3f);
+}
+static ssize_t store_pc0event(struct sys_device *dev, const char *buf,
+			      size_t count)
+{
+	unsigned long val;
+	char *endp;
+
+	val = simple_strtoul(buf, &endp, 0);
+	if (endp == buf || val > 0x3f)
+		return -EINVAL;
+	val = (val << 12) | (sysreg_read(PCCR) & 0xfffc0fff);
+	sysreg_write(PCCR, val);
+	return count;
+}
+static ssize_t show_pc0count(struct sys_device *dev, char *buf)
+{
+	unsigned long pcnt0;
+
+	pcnt0 = sysreg_read(PCNT0);
+	return sprintf(buf, "%lu\n", pcnt0);
+}
+static ssize_t store_pc0count(struct sys_device *dev, const char *buf,
+			      size_t count)
+{
+	unsigned long val;
+	char *endp;
+
+	val = simple_strtoul(buf, &endp, 0);
+	if (endp == buf)
+		return -EINVAL;
+	sysreg_write(PCNT0, val);
+
+	return count;
+}
+
+static ssize_t show_pc1event(struct sys_device *dev, char *buf)
+{
+	unsigned long pccr;
+
+	pccr = sysreg_read(PCCR);
+	return sprintf(buf, "0x%lx\n", (pccr >> 18) & 0x3f);
+}
+static ssize_t store_pc1event(struct sys_device *dev, const char *buf,
+			      size_t count)
+{
+	unsigned long val;
+	char *endp;
+
+	val = simple_strtoul(buf, &endp, 0);
+	if (endp == buf || val > 0x3f)
+		return -EINVAL;
+	val = (val << 18) | (sysreg_read(PCCR) & 0xff03ffff);
+	sysreg_write(PCCR, val);
+	return count;
+}
+static ssize_t show_pc1count(struct sys_device *dev, char *buf)
+{
+	unsigned long pcnt1;
+
+	pcnt1 = sysreg_read(PCNT1);
+	return sprintf(buf, "%lu\n", pcnt1);
+}
+static ssize_t store_pc1count(struct sys_device *dev, const char *buf,
+			      size_t count)
+{
+	unsigned long val;
+	char *endp;
+
+	val = simple_strtoul(buf, &endp, 0);
+	if (endp == buf)
+		return -EINVAL;
+	sysreg_write(PCNT1, val);
+
+	return count;
+}
+
+static ssize_t show_pccycles(struct sys_device *dev, char *buf)
+{
+	unsigned long pccnt;
+
+	pccnt = sysreg_read(PCCNT);
+	return sprintf(buf, "%lu\n", pccnt);
+}
+static ssize_t store_pccycles(struct sys_device *dev, const char *buf,
+			      size_t count)
+{
+	unsigned long val;
+	char *endp;
+
+	val = simple_strtoul(buf, &endp, 0);
+	if (endp == buf)
+		return -EINVAL;
+	sysreg_write(PCCNT, val);
+
+	return count;
+}
+
+static ssize_t show_pcenable(struct sys_device *dev, char *buf)
+{
+	unsigned long pccr;
+
+	pccr = sysreg_read(PCCR);
+	return sprintf(buf, "%c\n", (pccr & 1)?'1':'0');
+}
+static ssize_t store_pcenable(struct sys_device *dev, const char *buf,
+			      size_t count)
+{
+	unsigned long pccr, val;
+	char *endp;
+
+	val = simple_strtoul(buf, &endp, 0);
+	if (endp == buf)
+		return -EINVAL;
+	if (val)
+		val = 1;
+
+	pccr = sysreg_read(PCCR);
+	pccr = (pccr & ~1UL) | val;
+	sysreg_write(PCCR, pccr);
+
+	return count;
+}
+
+static SYSDEV_ATTR(pc0event, 0600, show_pc0event, store_pc0event);
+static SYSDEV_ATTR(pc0count, 0600, show_pc0count, store_pc0count);
+static SYSDEV_ATTR(pc1event, 0600, show_pc1event, store_pc1event);
+static SYSDEV_ATTR(pc1count, 0600, show_pc1count, store_pc1count);
+static SYSDEV_ATTR(pccycles, 0600, show_pccycles, store_pccycles);
+static SYSDEV_ATTR(pcenable, 0600, show_pcenable, store_pcenable);
+
+#endif /* CONFIG_PERFORMANCE_COUNTERS */
+
+static int __init topology_init(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct cpu *c = &per_cpu(cpu_devices, cpu);
+
+		register_cpu(c, cpu);
+
+#ifdef CONFIG_PERFORMANCE_COUNTERS
+		sysdev_create_file(&c->sysdev, &attr_pc0event);
+		sysdev_create_file(&c->sysdev, &attr_pc0count);
+		sysdev_create_file(&c->sysdev, &attr_pc1event);
+		sysdev_create_file(&c->sysdev, &attr_pc1count);
+		sysdev_create_file(&c->sysdev, &attr_pccycles);
+		sysdev_create_file(&c->sysdev, &attr_pcenable);
+#endif
+	}
+
+	return 0;
+}
+
+subsys_initcall(topology_init);
+
+static const char *cpu_names[] = {
+	"Morgan",
+	"AP7000",
+};
+#define NR_CPU_NAMES ARRAY_SIZE(cpu_names)
+
+static const char *arch_names[] = {
+	"AVR32A",
+	"AVR32B",
+};
+#define NR_ARCH_NAMES ARRAY_SIZE(arch_names)
+
+static const char *mmu_types[] = {
+	"No MMU",
+	"ITLB and DTLB",
+	"Shared TLB",
+	"MPU"
+};
+
+void __init setup_processor(void)
+{
+	unsigned long config0, config1;
+	unsigned cpu_id, cpu_rev, arch_id, arch_rev, mmu_type;
+	unsigned tmp;
+
+	config0 = sysreg_read(CONFIG0); /* 0x0000013e; */
+	config1 = sysreg_read(CONFIG1); /* 0x01f689a2; */
+	cpu_id = config0 >> 24;
+	cpu_rev = (config0 >> 16) & 0xff;
+	arch_id = (config0 >> 13) & 0x07;
+	arch_rev = (config0 >> 10) & 0x07;
+	mmu_type = (config0 >> 7) & 0x03;
+
+	boot_cpu_data.arch_type = arch_id;
+	boot_cpu_data.cpu_type = cpu_id;
+	boot_cpu_data.arch_revision = arch_rev;
+	boot_cpu_data.cpu_revision = cpu_rev;
+	boot_cpu_data.tlb_config = mmu_type;
+
+	tmp = (config1 >> 13) & 0x07;
+	if (tmp) {
+		boot_cpu_data.icache.ways = 1 << ((config1 >> 10) & 0x07);
+		boot_cpu_data.icache.sets = 1 << ((config1 >> 16) & 0x0f);
+		boot_cpu_data.icache.linesz = 1 << (tmp + 1);
+	}
+	tmp = (config1 >> 3) & 0x07;
+	if (tmp) {
+		boot_cpu_data.dcache.ways = 1 << (config1 & 0x07);
+		boot_cpu_data.dcache.sets = 1 << ((config1 >> 6) & 0x0f);
+		boot_cpu_data.dcache.linesz = 1 << (tmp + 1);
+	}
+
+	if ((cpu_id >= NR_CPU_NAMES) || (arch_id >= NR_ARCH_NAMES)) {
+		printk ("Unknown CPU configuration (ID %02x, arch %02x), "
+			"continuing anyway...\n",
+			cpu_id, arch_id);
+		return;
+	}
+
+	printk ("CPU: %s [%02x] revision %d (%s revision %d)\n",
+		cpu_names[cpu_id], cpu_id, cpu_rev,
+		arch_names[arch_id], arch_rev);
+	printk ("CPU: MMU configuration: %s\n", mmu_types[mmu_type]);
+	printk ("CPU: features:");
+	if (config0 & (1 << 6))
+		printk(" fpu");
+	if (config0 & (1 << 5))
+		printk(" java");
+	if (config0 & (1 << 4))
+		printk(" perfctr");
+	if (config0 & (1 << 3))
+		printk(" ocd");
+	printk("\n");
+}
+
+#ifdef CONFIG_PROC_FS
+static int c_show(struct seq_file *m, void *v)
+{
+	unsigned int icache_size, dcache_size;
+	unsigned int cpu = smp_processor_id();
+
+	icache_size = boot_cpu_data.icache.ways *
+		boot_cpu_data.icache.sets *
+		boot_cpu_data.icache.linesz;
+	dcache_size = boot_cpu_data.dcache.ways *
+		boot_cpu_data.dcache.sets *
+		boot_cpu_data.dcache.linesz;
+
+	seq_printf(m, "processor\t: %d\n", cpu);
+
+	if (boot_cpu_data.arch_type < NR_ARCH_NAMES)
+		seq_printf(m, "cpu family\t: %s revision %d\n",
+			   arch_names[boot_cpu_data.arch_type],
+			   boot_cpu_data.arch_revision);
+	if (boot_cpu_data.cpu_type < NR_CPU_NAMES)
+		seq_printf(m, "cpu type\t: %s revision %d\n",
+			   cpu_names[boot_cpu_data.cpu_type],
+			   boot_cpu_data.cpu_revision);
+
+	seq_printf(m, "i-cache\t\t: %dK (%u ways x %u sets x %u)\n",
+		   icache_size >> 10,
+		   boot_cpu_data.icache.ways,
+		   boot_cpu_data.icache.sets,
+		   boot_cpu_data.icache.linesz);
+	seq_printf(m, "d-cache\t\t: %dK (%u ways x %u sets x %u)\n",
+		   dcache_size >> 10,
+		   boot_cpu_data.dcache.ways,
+		   boot_cpu_data.dcache.sets,
+		   boot_cpu_data.dcache.linesz);
+	seq_printf(m, "bogomips\t: %lu.%02lu\n",
+		   boot_cpu_data.loops_per_jiffy / (500000/HZ),
+		   (boot_cpu_data.loops_per_jiffy / (5000/HZ)) % 100);
+
+	return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	return *pos < 1 ? (void *)1 : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return NULL;
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+
+}
+
+struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= c_show
+};
+#endif /* CONFIG_PROC_FS */
diff --git a/arch/avr32/kernel/entry-avr32b.S b/arch/avr32/kernel/entry-avr32b.S
new file mode 100644
index 000000000000..eeb66792bc37
--- /dev/null
+++ b/arch/avr32/kernel/entry-avr32b.S
@@ -0,0 +1,678 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * This file contains the low-level entry-points into the kernel, that is,
+ * exception handlers, debug trap handlers, interrupt handlers and the
+ * system call handler.
+ */
+#include <linux/errno.h>
+
+#include <asm/asm.h>
+#include <asm/hardirq.h>
+#include <asm/irq.h>
+#include <asm/ocd.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/ptrace.h>
+#include <asm/sysreg.h>
+#include <asm/thread_info.h>
+#include <asm/unistd.h>
+
+#ifdef CONFIG_PREEMPT
+# define preempt_stop		mask_interrupts
+#else
+# define preempt_stop
+# define fault_resume_kernel	fault_restore_all
+#endif
+
+#define __MASK(x)	((1 << (x)) - 1)
+#define IRQ_MASK	((__MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) | \
+			 (__MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT))
+
+	.section .ex.text,"ax",@progbits
+	.align	2
+exception_vectors:
+	bral	handle_critical
+	.align	2
+	bral	handle_critical
+	.align	2
+	bral	do_bus_error_write
+	.align	2
+	bral	do_bus_error_read
+	.align	2
+	bral	do_nmi_ll
+	.align	2
+	bral	handle_address_fault
+	.align	2
+	bral	handle_protection_fault
+	.align	2
+	bral	handle_debug
+	.align	2
+	bral	do_illegal_opcode_ll
+	.align	2
+	bral	do_illegal_opcode_ll
+	.align	2
+	bral	do_illegal_opcode_ll
+	.align	2
+	bral	do_fpe_ll
+	.align	2
+	bral	do_illegal_opcode_ll
+	.align	2
+	bral	handle_address_fault
+	.align	2
+	bral	handle_address_fault
+	.align	2
+	bral	handle_protection_fault
+	.align	2
+	bral	handle_protection_fault
+	.align	2
+	bral	do_dtlb_modified
+
+	/*
+	 * r0 :	PGD/PT/PTE
+	 * r1 : Offending address
+	 * r2 : Scratch register
+	 * r3 : Cause (5, 12 or 13)
+	 */
+#define	tlbmiss_save	pushm	r0-r3
+#define tlbmiss_restore	popm	r0-r3
+
+	.section .tlbx.ex.text,"ax",@progbits
+	.global	itlb_miss
+itlb_miss:
+	tlbmiss_save
+	rjmp	tlb_miss_common
+
+	.section .tlbr.ex.text,"ax",@progbits
+dtlb_miss_read:
+	tlbmiss_save
+	rjmp	tlb_miss_common
+
+	.section .tlbw.ex.text,"ax",@progbits
+dtlb_miss_write:
+	tlbmiss_save
+
+	.global	tlb_miss_common
+tlb_miss_common:
+	mfsr	r0, SYSREG_PTBR
+	mfsr	r1, SYSREG_TLBEAR
+
+	/* Is it the vmalloc space? */
+	bld	r1, 31
+	brcs	handle_vmalloc_miss
+
+	/* First level lookup */
+pgtbl_lookup:
+	lsr	r2, r1, PGDIR_SHIFT
+	ld.w	r0, r0[r2 << 2]
+	bld	r0, _PAGE_BIT_PRESENT
+	brcc	page_table_not_present
+
+	/* TODO: Check access rights on page table if necessary */
+
+	/* Translate to virtual address in P1. */
+	andl	r0, 0xf000
+	sbr	r0, 31
+
+	/* Second level lookup */
+	lsl	r1, (32 - PGDIR_SHIFT)
+	lsr	r1, (32 - PGDIR_SHIFT) + PAGE_SHIFT
+	add	r2, r0, r1 << 2
+	ld.w	r1, r2[0]
+	bld	r1, _PAGE_BIT_PRESENT
+	brcc	page_not_present
+
+	/* Mark the page as accessed */
+	sbr	r1, _PAGE_BIT_ACCESSED
+	st.w	r2[0], r1
+
+	/* Drop software flags */
+	andl	r1, _PAGE_FLAGS_HARDWARE_MASK & 0xffff
+	mtsr	SYSREG_TLBELO, r1
+
+	/* Figure out which entry we want to replace */
+	mfsr	r0, SYSREG_TLBARLO
+	clz	r2, r0
+	brcc	1f
+	mov	r1, -1			/* All entries have been accessed, */
+	mtsr	SYSREG_TLBARLO, r1	/* so reset TLBAR */
+	mov	r2, 0			/* and start at 0 */
+1:	mfsr	r1, SYSREG_MMUCR
+	lsl	r2, 14
+	andl	r1, 0x3fff, COH
+	or	r1, r2
+	mtsr	SYSREG_MMUCR, r1
+
+	tlbw
+
+	tlbmiss_restore
+	rete
+
+handle_vmalloc_miss:
+	/* Simply do the lookup in init's page table */
+	mov	r0, lo(swapper_pg_dir)
+	orh	r0, hi(swapper_pg_dir)
+	rjmp	pgtbl_lookup
+
+
+	/* ---                    System Call                    --- */
+
+	.section .scall.text,"ax",@progbits
+system_call:
+	pushm	r12		/* r12_orig */
+	stmts	--sp, r0-lr
+	zero_fp
+	mfsr	r0, SYSREG_RAR_SUP
+	mfsr	r1, SYSREG_RSR_SUP
+	stm	--sp, r0-r1
+
+	/* check for syscall tracing */
+	get_thread_info r0
+	ld.w	r1, r0[TI_flags]
+	bld	r1, TIF_SYSCALL_TRACE
+	brcs	syscall_trace_enter
+
+syscall_trace_cont:
+	cp.w	r8, NR_syscalls
+	brhs	syscall_badsys
+
+	lddpc   lr, syscall_table_addr
+	ld.w    lr, lr[r8 << 2]
+	mov	r8, r5		/* 5th argument (6th is pushed by stub) */
+	icall   lr
+
+	.global	syscall_return
+syscall_return:
+	get_thread_info r0
+	mask_interrupts		/* make sure we don't miss an interrupt
+				   setting need_resched or sigpending
+				   between sampling and the rets */
+
+	/* Store the return value so that the correct value is loaded below */
+	stdsp   sp[REG_R12], r12
+
+	ld.w	r1, r0[TI_flags]
+	andl	r1, _TIF_ALLWORK_MASK, COH
+	brne	syscall_exit_work
+
+syscall_exit_cont:
+	popm	r8-r9
+	mtsr	SYSREG_RAR_SUP, r8
+	mtsr	SYSREG_RSR_SUP, r9
+	ldmts	sp++, r0-lr
+	sub	sp, -4		/* r12_orig */
+	rets
+
+	.align	2
+syscall_table_addr:
+	.long   sys_call_table
+
+syscall_badsys:
+	mov	r12, -ENOSYS
+	rjmp	syscall_return
+
+	.global ret_from_fork
+ret_from_fork:
+	rcall   schedule_tail
+
+	/* check for syscall tracing */
+	get_thread_info r0
+	ld.w	r1, r0[TI_flags]
+	andl	r1, _TIF_ALLWORK_MASK, COH
+	brne	syscall_exit_work
+	rjmp    syscall_exit_cont
+
+syscall_trace_enter:
+	pushm	r8-r12
+	rcall	syscall_trace
+	popm	r8-r12
+	rjmp	syscall_trace_cont
+
+syscall_exit_work:
+	bld	r1, TIF_SYSCALL_TRACE
+	brcc	1f
+	unmask_interrupts
+	rcall	syscall_trace
+	mask_interrupts
+	ld.w	r1, r0[TI_flags]
+
+1:	bld	r1, TIF_NEED_RESCHED
+	brcc	2f
+	unmask_interrupts
+	rcall	schedule
+	mask_interrupts
+	ld.w	r1, r0[TI_flags]
+	rjmp	1b
+
+2:	mov	r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK
+	tst	r1, r2
+	breq	3f
+	unmask_interrupts
+	mov	r12, sp
+	mov	r11, r0
+	rcall	do_notify_resume
+	mask_interrupts
+	ld.w	r1, r0[TI_flags]
+	rjmp	1b
+
+3:	bld	r1, TIF_BREAKPOINT
+	brcc	syscall_exit_cont
+	mfsr	r3, SYSREG_TLBEHI
+	lddsp	r2, sp[REG_PC]
+	andl	r3, 0xff, COH
+	lsl	r3, 1
+	sbr	r3, 30
+	sbr	r3, 0
+	mtdr	DBGREG_BWA2A, r2
+	mtdr	DBGREG_BWC2A, r3
+	rjmp	syscall_exit_cont
+
+
+	/* The slow path of the TLB miss handler */
+page_table_not_present:
+page_not_present:
+	tlbmiss_restore
+	sub	sp, 4
+	stmts	--sp, r0-lr
+	rcall	save_full_context_ex
+	mfsr	r12, SYSREG_ECR
+	mov	r11, sp
+	rcall	do_page_fault
+	rjmp	ret_from_exception
+
+	/* This function expects to find offending PC in SYSREG_RAR_EX */
+save_full_context_ex:
+	mfsr	r8, SYSREG_RSR_EX
+	mov	r12, r8
+	andh	r8, (MODE_MASK >> 16), COH
+	mfsr	r11, SYSREG_RAR_EX
+	brne	2f
+
+1:	pushm	r11, r12	/* PC and SR */
+	unmask_exceptions
+	ret	r12
+
+2:	sub	r10, sp, -(FRAME_SIZE_FULL - REG_LR)
+	stdsp	sp[4], r10	/* replace saved SP */
+	rjmp	1b
+
+	/* Low-level exception handlers */
+handle_critical:
+	pushm	r12
+	pushm	r0-r12
+	rcall	save_full_context_ex
+	mfsr	r12, SYSREG_ECR
+	mov	r11, sp
+	rcall	do_critical_exception
+
+	/* We should never get here... */
+bad_return:
+	sub	r12, pc, (. - 1f)
+	bral	panic
+	.align	2
+1:	.asciz	"Return from critical exception!"
+
+	.align	1
+do_bus_error_write:
+	sub	sp, 4
+	stmts	--sp, r0-lr
+	rcall	save_full_context_ex
+	mov	r11, 1
+	rjmp	1f
+
+do_bus_error_read:
+	sub	sp, 4
+	stmts	--sp, r0-lr
+	rcall	save_full_context_ex
+	mov	r11, 0
+1:	mfsr	r12, SYSREG_BEAR
+	mov	r10, sp
+	rcall	do_bus_error
+	rjmp	ret_from_exception
+
+	.align	1
+do_nmi_ll:
+	sub	sp, 4
+	stmts	--sp, r0-lr
+	/* FIXME: Make sure RAR_NMI and RSR_NMI are pushed instead of *_EX */
+	rcall	save_full_context_ex
+	mfsr	r12, SYSREG_ECR
+	mov	r11, sp
+	rcall	do_nmi
+	rjmp	bad_return
+
+handle_address_fault:
+	sub	sp, 4
+	stmts	--sp, r0-lr
+	rcall	save_full_context_ex
+	mfsr	r12, SYSREG_ECR
+	mov	r11, sp
+	rcall	do_address_exception
+	rjmp	ret_from_exception
+
+handle_protection_fault:
+	sub	sp, 4
+	stmts	--sp, r0-lr
+	rcall	save_full_context_ex
+	mfsr	r12, SYSREG_ECR
+	mov	r11, sp
+	rcall	do_page_fault
+	rjmp	ret_from_exception
+
+	.align	1
+do_illegal_opcode_ll:
+	sub	sp, 4
+	stmts	--sp, r0-lr
+	rcall	save_full_context_ex
+	mfsr	r12, SYSREG_ECR
+	mov	r11, sp
+	rcall	do_illegal_opcode
+	rjmp	ret_from_exception
+
+do_dtlb_modified:
+	pushm	r0-r3
+	mfsr	r1, SYSREG_TLBEAR
+	mfsr	r0, SYSREG_PTBR
+	lsr	r2, r1, PGDIR_SHIFT
+	ld.w	r0, r0[r2 << 2]
+	lsl	r1, (32 - PGDIR_SHIFT)
+	lsr	r1, (32 - PGDIR_SHIFT) + PAGE_SHIFT
+
+	/* Translate to virtual address in P1 */
+	andl	r0, 0xf000
+	sbr	r0, 31
+	add	r2, r0, r1 << 2
+	ld.w	r3, r2[0]
+	sbr	r3, _PAGE_BIT_DIRTY
+	mov	r0, r3
+	st.w	r2[0], r3
+
+	/* The page table is up-to-date. Update the TLB entry as well */
+	andl	r0, lo(_PAGE_FLAGS_HARDWARE_MASK)
+	mtsr	SYSREG_TLBELO, r0
+
+	/* MMUCR[DRP] is updated automatically, so let's go... */
+	tlbw
+
+	popm	r0-r3
+	rete
+
+do_fpe_ll:
+	sub	sp, 4
+	stmts	--sp, r0-lr
+	rcall	save_full_context_ex
+	unmask_interrupts
+	mov	r12, 26
+	mov	r11, sp
+	rcall	do_fpe
+	rjmp	ret_from_exception
+
+ret_from_exception:
+	mask_interrupts
+	lddsp	r4, sp[REG_SR]
+	andh	r4, (MODE_MASK >> 16), COH
+	brne	fault_resume_kernel
+
+	get_thread_info r0
+	ld.w	r1, r0[TI_flags]
+	andl	r1, _TIF_WORK_MASK, COH
+	brne	fault_exit_work
+
+fault_resume_user:
+	popm	r8-r9
+	mask_exceptions
+	mtsr	SYSREG_RAR_EX, r8
+	mtsr	SYSREG_RSR_EX, r9
+	ldmts	sp++, r0-lr
+	sub	sp, -4
+	rete
+
+fault_resume_kernel:
+#ifdef CONFIG_PREEMPT
+	get_thread_info	r0
+	ld.w	r2, r0[TI_preempt_count]
+	cp.w	r2, 0
+	brne	1f
+	ld.w	r1, r0[TI_flags]
+	bld	r1, TIF_NEED_RESCHED
+	brcc	1f
+	lddsp	r4, sp[REG_SR]
+	bld	r4, SYSREG_GM_OFFSET
+	brcs	1f
+	rcall	preempt_schedule_irq
+1:
+#endif
+
+	popm	r8-r9
+	mask_exceptions
+	mfsr	r1, SYSREG_SR
+	mtsr	SYSREG_RAR_EX, r8
+	mtsr	SYSREG_RSR_EX, r9
+	popm	lr
+	sub	sp, -4		/* ignore SP */
+	popm	r0-r12
+	sub	sp, -4		/* ignore r12_orig */
+	rete
+
+irq_exit_work:
+	/* Switch to exception mode so that we can share the same code. */
+	mfsr	r8, SYSREG_SR
+	cbr	r8, SYSREG_M0_OFFSET
+	orh	r8, hi(SYSREG_BIT(M1) | SYSREG_BIT(M2))
+	mtsr	SYSREG_SR, r8
+	sub	pc, -2
+	get_thread_info r0
+	ld.w	r1, r0[TI_flags]
+
+fault_exit_work:
+	bld	r1, TIF_NEED_RESCHED
+	brcc	1f
+	unmask_interrupts
+	rcall	schedule
+	mask_interrupts
+	ld.w	r1, r0[TI_flags]
+	rjmp	fault_exit_work
+
+1:	mov	r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK
+	tst	r1, r2
+	breq	2f
+	unmask_interrupts
+	mov	r12, sp
+	mov	r11, r0
+	rcall	do_notify_resume
+	mask_interrupts
+	ld.w	r1, r0[TI_flags]
+	rjmp	fault_exit_work
+
+2:	bld	r1, TIF_BREAKPOINT
+	brcc	fault_resume_user
+	mfsr	r3, SYSREG_TLBEHI
+	lddsp	r2, sp[REG_PC]
+	andl	r3, 0xff, COH
+	lsl	r3, 1
+	sbr	r3, 30
+	sbr	r3, 0
+	mtdr	DBGREG_BWA2A, r2
+	mtdr	DBGREG_BWC2A, r3
+	rjmp	fault_resume_user
+
+	/* If we get a debug trap from privileged context we end up here */
+handle_debug_priv:
+	/* Fix up LR and SP in regs. r11 contains the mode we came from */
+	mfsr	r8, SYSREG_SR
+	mov	r9, r8
+	andh	r8, hi(~MODE_MASK)
+	or	r8, r11
+	mtsr	SYSREG_SR, r8
+	sub	pc, -2
+	stdsp	sp[REG_LR], lr
+	mtsr	SYSREG_SR, r9
+	sub	pc, -2
+	sub	r10, sp, -FRAME_SIZE_FULL
+	stdsp	sp[REG_SP], r10
+	mov	r12, sp
+	rcall	do_debug_priv
+
+	/* Now, put everything back */
+	ssrf	SR_EM_BIT
+	popm	r10, r11
+	mtsr	SYSREG_RAR_DBG, r10
+	mtsr	SYSREG_RSR_DBG, r11
+	mfsr	r8, SYSREG_SR
+	mov	r9, r8
+	andh	r8, hi(~MODE_MASK)
+	andh	r11, hi(MODE_MASK)
+	or	r8, r11
+	mtsr	SYSREG_SR, r8
+	sub	pc, -2
+	popm	lr
+	mtsr	SYSREG_SR, r9
+	sub	pc, -2
+	sub	sp, -4		/* skip SP */
+	popm	r0-r12
+	sub	sp, -4
+	retd
+
+	/*
+	 * At this point, everything is masked, that is, interrupts,
+	 * exceptions and debugging traps. We might get called from
+	 * interrupt or exception context in some rare cases, but this
+	 * will be taken care of by do_debug(), so we're not going to
+	 * do a 100% correct context save here.
+	 */
+handle_debug:
+	sub	sp, 4		/* r12_orig */
+	stmts	--sp, r0-lr
+	mfsr	r10, SYSREG_RAR_DBG
+	mfsr	r11, SYSREG_RSR_DBG
+	unmask_exceptions
+	pushm	r10,r11
+	andh	r11, (MODE_MASK >> 16), COH
+	brne	handle_debug_priv
+
+	mov	r12, sp
+	rcall	do_debug
+
+	lddsp	r10, sp[REG_SR]
+	andh	r10, (MODE_MASK >> 16), COH
+	breq	debug_resume_user
+
+debug_restore_all:
+	popm	r10,r11
+	mask_exceptions
+	mtsr	SYSREG_RSR_DBG, r11
+	mtsr	SYSREG_RAR_DBG, r10
+	ldmts	sp++, r0-lr
+	sub	sp, -4
+	retd
+
+debug_resume_user:
+	get_thread_info r0
+	mask_interrupts
+
+	ld.w	r1, r0[TI_flags]
+	andl	r1, _TIF_DBGWORK_MASK, COH
+	breq	debug_restore_all
+
+1:	bld	r1, TIF_NEED_RESCHED
+	brcc	2f
+	unmask_interrupts
+	rcall	schedule
+	mask_interrupts
+	ld.w	r1, r0[TI_flags]
+	rjmp	1b
+
+2:	mov	r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK
+	tst	r1, r2
+	breq	3f
+	unmask_interrupts
+	mov	r12, sp
+	mov	r11, r0
+	rcall	do_notify_resume
+	mask_interrupts
+	ld.w	r1, r0[TI_flags]
+	rjmp	1b
+
+3:	bld	r1, TIF_SINGLE_STEP
+	brcc	debug_restore_all
+	mfdr	r2, DBGREG_DC
+	sbr	r2, DC_SS_BIT
+	mtdr	DBGREG_DC, r2
+	rjmp	debug_restore_all
+
+	.set	rsr_int0,	SYSREG_RSR_INT0
+	.set	rsr_int1,	SYSREG_RSR_INT1
+	.set	rsr_int2,	SYSREG_RSR_INT2
+	.set	rsr_int3,	SYSREG_RSR_INT3
+	.set	rar_int0,	SYSREG_RAR_INT0
+	.set	rar_int1,	SYSREG_RAR_INT1
+	.set	rar_int2,	SYSREG_RAR_INT2
+	.set	rar_int3,	SYSREG_RAR_INT3
+
+	.macro	IRQ_LEVEL level
+	.type	irq_level\level, @function
+irq_level\level:
+	sub	sp, 4		/* r12_orig */
+	stmts	--sp,r0-lr
+	mfsr	r8, rar_int\level
+	mfsr	r9, rsr_int\level
+	pushm	r8-r9
+
+	mov	r11, sp
+	mov	r12, \level
+
+	rcall	do_IRQ
+
+	lddsp	r4, sp[REG_SR]
+	andh	r4, (MODE_MASK >> 16), COH
+#ifdef CONFIG_PREEMPT
+	brne	2f
+#else
+	brne	1f
+#endif
+
+	get_thread_info	r0
+	ld.w	r1, r0[TI_flags]
+	andl	r1, _TIF_WORK_MASK, COH
+	brne	irq_exit_work
+
+1:	popm	r8-r9
+	mtsr	rar_int\level, r8
+	mtsr	rsr_int\level, r9
+	ldmts	sp++,r0-lr
+	sub	sp, -4		/* ignore r12_orig */
+	rete
+
+#ifdef CONFIG_PREEMPT
+2:
+	get_thread_info	r0
+	ld.w	r2, r0[TI_preempt_count]
+	cp.w	r2, 0
+	brne	1b
+	ld.w	r1, r0[TI_flags]
+	bld	r1, TIF_NEED_RESCHED
+	brcc	1b
+	lddsp	r4, sp[REG_SR]
+	bld	r4, SYSREG_GM_OFFSET
+	brcs	1b
+	rcall	preempt_schedule_irq
+	rjmp	1b
+#endif
+	.endm
+
+	.section .irq.text,"ax",@progbits
+
+	.global	irq_level0
+	.global	irq_level1
+	.global	irq_level2
+	.global	irq_level3
+	IRQ_LEVEL 0
+	IRQ_LEVEL 1
+	IRQ_LEVEL 2
+	IRQ_LEVEL 3
diff --git a/arch/avr32/kernel/head.S b/arch/avr32/kernel/head.S
new file mode 100644
index 000000000000..773b7ad87be9
--- /dev/null
+++ b/arch/avr32/kernel/head.S
@@ -0,0 +1,45 @@
+/*
+ * Non-board-specific low-level startup code
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/sysreg.h>
+
+	.section .init.text,"ax"
+	.global kernel_entry
+kernel_entry:
+	/* Initialize status register */
+	lddpc   r0, init_sr
+	mtsr	SYSREG_SR, r0
+
+	/* Set initial stack pointer */
+	lddpc   sp, stack_addr
+	sub	sp, -THREAD_SIZE
+
+#ifdef CONFIG_FRAME_POINTER
+	/* Mark last stack frame */
+	mov	lr, 0
+	mov	r7, 0
+#endif
+
+	/* Set up the PIO, SDRAM controller, early printk, etc. */
+	rcall	board_early_init
+
+	/* Start the show */
+	lddpc   pc, kernel_start_addr
+
+	.align  2
+init_sr:
+	.long   0x007f0000	/* Supervisor mode, everything masked */
+stack_addr:
+	.long   init_thread_union
+kernel_start_addr:
+	.long   start_kernel
diff --git a/arch/avr32/kernel/init_task.c b/arch/avr32/kernel/init_task.c
new file mode 100644
index 000000000000..effcacf9d1a2
--- /dev/null
+++ b/arch/avr32/kernel/init_task.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/init_task.h>
+#include <linux/mqueue.h>
+
+#include <asm/pgtable.h>
+
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+struct mm_struct init_mm = INIT_MM(init_mm);
+
+EXPORT_SYMBOL(init_mm);
+
+/*
+ * Initial thread structure. Must be aligned on an 8192-byte boundary.
+ */
+union thread_union init_thread_union
+	__attribute__((__section__(".data.init_task"))) =
+		{ INIT_THREAD_INFO(init_task) };
+
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+
+EXPORT_SYMBOL(init_task);
diff --git a/arch/avr32/kernel/irq.c b/arch/avr32/kernel/irq.c
new file mode 100644
index 000000000000..856f3548e664
--- /dev/null
+++ b/arch/avr32/kernel/irq.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on arch/i386/kernel/irq.c
+ *   Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ *
+ * IRQ's are in fact implemented a bit like signal handlers for the kernel.
+ * Naturally it's not a 1:1 relation, but there are similarities.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel_stat.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/sysdev.h>
+
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+	printk("unexpected IRQ %u\n", irq);
+}
+
+#ifdef CONFIG_PROC_FS
+int show_interrupts(struct seq_file *p, void *v)
+{
+	int i = *(loff_t *)v, cpu;
+	struct irqaction *action;
+	unsigned long flags;
+
+	if (i == 0) {
+		seq_puts(p, "           ");
+		for_each_online_cpu(cpu)
+			seq_printf(p, "CPU%d       ", cpu);
+		seq_putc(p, '\n');
+	}
+
+	if (i < NR_IRQS) {
+		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		action = irq_desc[i].action;
+		if (!action)
+			goto unlock;
+
+		seq_printf(p, "%3d: ", i);
+		for_each_online_cpu(cpu)
+			seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]);
+		seq_printf(p, "  %s", action->name);
+		for (action = action->next; action; action = action->next)
+			seq_printf(p, ", %s", action->name);
+
+		seq_putc(p, '\n');
+	unlock:
+		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+	}
+
+	return 0;
+}
+#endif
diff --git a/arch/avr32/kernel/kprobes.c b/arch/avr32/kernel/kprobes.c
new file mode 100644
index 000000000000..6caf9e8d8080
--- /dev/null
+++ b/arch/avr32/kernel/kprobes.c
@@ -0,0 +1,270 @@
+/*
+ *  Kernel Probes (KProbes)
+ *
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * Based on arch/ppc64/kernel/kprobes.c
+ *  Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+
+#include <asm/cacheflush.h>
+#include <asm/kdebug.h>
+#include <asm/ocd.h>
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe);
+static unsigned long kprobe_status;
+static struct pt_regs jprobe_saved_regs;
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+	int ret = 0;
+
+	if ((unsigned long)p->addr & 0x01) {
+		printk("Attempt to register kprobe at an unaligned address\n");
+		ret = -EINVAL;
+	}
+
+	/* XXX: Might be a good idea to check if p->addr is a valid
+	 * kernel address as well... */
+
+	if (!ret) {
+		pr_debug("copy kprobe at %p\n", p->addr);
+		memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+		p->opcode = *p->addr;
+	}
+
+	return ret;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+	pr_debug("arming kprobe at %p\n", p->addr);
+	*p->addr = BREAKPOINT_INSTRUCTION;
+	flush_icache_range((unsigned long)p->addr,
+			   (unsigned long)p->addr + sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+	pr_debug("disarming kprobe at %p\n", p->addr);
+	*p->addr = p->opcode;
+	flush_icache_range((unsigned long)p->addr,
+			   (unsigned long)p->addr + sizeof(kprobe_opcode_t));
+}
+
+static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+	unsigned long dc;
+
+	pr_debug("preparing to singlestep over %p (PC=%08lx)\n",
+		 p->addr, regs->pc);
+
+	BUG_ON(!(sysreg_read(SR) & SYSREG_BIT(SR_D)));
+
+	dc = __mfdr(DBGREG_DC);
+	dc |= DC_SS;
+	__mtdr(DBGREG_DC, dc);
+
+	/*
+	 * We must run the instruction from its original location
+	 * since it may actually reference PC.
+	 *
+	 * TODO: Do the instruction replacement directly in icache.
+	 */
+	*p->addr = p->opcode;
+	flush_icache_range((unsigned long)p->addr,
+			   (unsigned long)p->addr + sizeof(kprobe_opcode_t));
+}
+
+static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
+{
+	unsigned long dc;
+
+	pr_debug("resuming execution at PC=%08lx\n", regs->pc);
+
+	dc = __mfdr(DBGREG_DC);
+	dc &= ~DC_SS;
+	__mtdr(DBGREG_DC, dc);
+
+	*p->addr = BREAKPOINT_INSTRUCTION;
+	flush_icache_range((unsigned long)p->addr,
+			   (unsigned long)p->addr + sizeof(kprobe_opcode_t));
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p)
+{
+	__get_cpu_var(current_kprobe) = p;
+}
+
+static int __kprobes kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *p;
+	void *addr = (void *)regs->pc;
+	int ret = 0;
+
+	pr_debug("kprobe_handler: kprobe_running=%d\n",
+		 kprobe_running());
+
+	/*
+	 * We don't want to be preempted for the entire
+	 * duration of kprobe processing
+	 */
+	preempt_disable();
+
+	/* Check that we're not recursing */
+	if (kprobe_running()) {
+		p = get_kprobe(addr);
+		if (p) {
+			if (kprobe_status == KPROBE_HIT_SS) {
+				printk("FIXME: kprobe hit while single-stepping!\n");
+				goto no_kprobe;
+			}
+
+			printk("FIXME: kprobe hit while handling another kprobe\n");
+			goto no_kprobe;
+		} else {
+			p = kprobe_running();
+			if (p->break_handler && p->break_handler(p, regs))
+				goto ss_probe;
+		}
+		/* If it's not ours, can't be delete race, (we hold lock). */
+		goto no_kprobe;
+	}
+
+	p = get_kprobe(addr);
+	if (!p)
+		goto no_kprobe;
+
+	kprobe_status = KPROBE_HIT_ACTIVE;
+	set_current_kprobe(p);
+	if (p->pre_handler && p->pre_handler(p, regs))
+		/* handler has already set things up, so skip ss setup */
+		return 1;
+
+ss_probe:
+	prepare_singlestep(p, regs);
+	kprobe_status = KPROBE_HIT_SS;
+	return 1;
+
+no_kprobe:
+	return ret;
+}
+
+static int __kprobes post_kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *cur = kprobe_running();
+
+	pr_debug("post_kprobe_handler, cur=%p\n", cur);
+
+	if (!cur)
+		return 0;
+
+	if (cur->post_handler) {
+		kprobe_status = KPROBE_HIT_SSDONE;
+		cur->post_handler(cur, regs, 0);
+	}
+
+	resume_execution(cur, regs);
+	reset_current_kprobe();
+	preempt_enable_no_resched();
+
+	return 1;
+}
+
+static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+	struct kprobe *cur = kprobe_running();
+
+	pr_debug("kprobe_fault_handler: trapnr=%d\n", trapnr);
+
+	if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+		return 1;
+
+	if (kprobe_status & KPROBE_HIT_SS) {
+		resume_execution(cur, regs);
+		preempt_enable_no_resched();
+	}
+	return 0;
+}
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+				       unsigned long val, void *data)
+{
+	struct die_args *args = (struct die_args *)data;
+	int ret = NOTIFY_DONE;
+
+	pr_debug("kprobe_exceptions_notify: val=%lu, data=%p\n",
+		 val, data);
+
+	switch (val) {
+	case DIE_BREAKPOINT:
+		if (kprobe_handler(args->regs))
+			ret = NOTIFY_STOP;
+		break;
+	case DIE_SSTEP:
+		if (post_kprobe_handler(args->regs))
+			ret = NOTIFY_STOP;
+		break;
+	case DIE_FAULT:
+		if (kprobe_running()
+		    && kprobe_fault_handler(args->regs, args->trapnr))
+			ret = NOTIFY_STOP;
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct jprobe *jp = container_of(p, struct jprobe, kp);
+
+	memcpy(&jprobe_saved_regs, regs, sizeof(struct pt_regs));
+
+	/*
+	 * TODO: We should probably save some of the stack here as
+	 * well, since gcc may pass arguments on the stack for certain
+	 * functions (lots of arguments, large aggregates, varargs)
+	 */
+
+	/* setup return addr to the jprobe handler routine */
+	regs->pc = (unsigned long)jp->entry;
+	return 1;
+}
+
+void __kprobes jprobe_return(void)
+{
+	asm volatile("breakpoint" ::: "memory");
+}
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	/*
+	 * FIXME - we should ideally be validating that we got here 'cos
+	 * of the "trap" in jprobe_return() above, before restoring the
+	 * saved regs...
+	 */
+	memcpy(regs, &jprobe_saved_regs, sizeof(struct pt_regs));
+	return 1;
+}
+
+int __init arch_init_kprobes(void)
+{
+	printk("KPROBES: Enabling monitor mode (MM|DBE)...\n");
+	__mtdr(DBGREG_DC, DC_MM | DC_DBE);
+
+	/* TODO: Register kretprobe trampoline */
+	return 0;
+}
diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c
new file mode 100644
index 000000000000..dfc32f2817b6
--- /dev/null
+++ b/arch/avr32/kernel/module.c
@@ -0,0 +1,324 @@
+/*
+ * AVR32-specific kernel module loader
+ *
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * GOT initialization parts are based on the s390 version
+ *   Copyright (C) 2002, 2003 IBM Deutschland Entwicklung GmbH,
+ *                            IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/moduleloader.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+
+void *module_alloc(unsigned long size)
+{
+	if (size == 0)
+		return NULL;
+	return vmalloc(size);
+}
+
+void module_free(struct module *mod, void *module_region)
+{
+	vfree(mod->arch.syminfo);
+	mod->arch.syminfo = NULL;
+
+	vfree(module_region);
+	/* FIXME: if module_region == mod->init_region, trim exception
+	 * table entries. */
+}
+
+static inline int check_rela(Elf32_Rela *rela, struct module *module,
+			     char *strings, Elf32_Sym *symbols)
+{
+	struct mod_arch_syminfo *info;
+
+	info = module->arch.syminfo + ELF32_R_SYM(rela->r_info);
+	switch (ELF32_R_TYPE(rela->r_info)) {
+	case R_AVR32_GOT32:
+	case R_AVR32_GOT16:
+	case R_AVR32_GOT8:
+	case R_AVR32_GOT21S:
+	case R_AVR32_GOT18SW:	/* mcall */
+	case R_AVR32_GOT16S:	/* ld.w */
+		if (rela->r_addend != 0) {
+			printk(KERN_ERR
+			       "GOT relocation against %s at offset %u with addend\n",
+			       strings + symbols[ELF32_R_SYM(rela->r_info)].st_name,
+			       rela->r_offset);
+			return -ENOEXEC;
+		}
+		if (info->got_offset == -1UL) {
+			info->got_offset = module->arch.got_size;
+			module->arch.got_size += sizeof(void *);
+		}
+		pr_debug("GOT[%3lu] %s\n", info->got_offset,
+			 strings + symbols[ELF32_R_SYM(rela->r_info)].st_name);
+		break;
+	}
+
+	return 0;
+}
+
+int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
+			      char *secstrings, struct module *module)
+{
+	Elf32_Shdr *symtab;
+	Elf32_Sym *symbols;
+	Elf32_Rela *rela;
+	char *strings;
+	int nrela, i, j;
+	int ret;
+
+	/* Find the symbol table */
+	symtab = NULL;
+	for (i = 0; i < hdr->e_shnum; i++)
+		switch (sechdrs[i].sh_type) {
+		case SHT_SYMTAB:
+			symtab = &sechdrs[i];
+			break;
+		}
+	if (!symtab) {
+		printk(KERN_ERR "module %s: no symbol table\n", module->name);
+		return -ENOEXEC;
+	}
+
+	/* Allocate room for one syminfo structure per symbol. */
+	module->arch.nsyms = symtab->sh_size / sizeof(Elf_Sym);
+	module->arch.syminfo = vmalloc(module->arch.nsyms
+				   * sizeof(struct mod_arch_syminfo));
+	if (!module->arch.syminfo)
+		return -ENOMEM;
+
+	symbols = (void *)hdr + symtab->sh_offset;
+	strings = (void *)hdr + sechdrs[symtab->sh_link].sh_offset;
+	for (i = 0; i < module->arch.nsyms; i++) {
+		if (symbols[i].st_shndx == SHN_UNDEF &&
+		    strcmp(strings + symbols[i].st_name,
+			   "_GLOBAL_OFFSET_TABLE_") == 0)
+			/* "Define" it as absolute. */
+			symbols[i].st_shndx = SHN_ABS;
+		module->arch.syminfo[i].got_offset = -1UL;
+		module->arch.syminfo[i].got_initialized = 0;
+	}
+
+	/* Allocate GOT entries for symbols that need it. */
+	module->arch.got_size = 0;
+	for (i = 0; i < hdr->e_shnum; i++) {
+		if (sechdrs[i].sh_type != SHT_RELA)
+			continue;
+		nrela = sechdrs[i].sh_size / sizeof(Elf32_Rela);
+		rela = (void *)hdr + sechdrs[i].sh_offset;
+		for (j = 0; j < nrela; j++) {
+			ret = check_rela(rela + j, module,
+					 strings, symbols);
+			if (ret)
+				goto out_free_syminfo;
+		}
+	}
+
+	/*
+	 * Increase core size to make room for GOT and set start
+	 * offset for GOT.
+	 */
+	module->core_size = ALIGN(module->core_size, 4);
+	module->arch.got_offset = module->core_size;
+	module->core_size += module->arch.got_size;
+
+	return 0;
+
+out_free_syminfo:
+	vfree(module->arch.syminfo);
+	module->arch.syminfo = NULL;
+
+	return ret;
+}
+
+static inline int reloc_overflow(struct module *module, const char *reloc_name,
+				 Elf32_Addr relocation)
+{
+	printk(KERN_ERR "module %s: Value %lx does not fit relocation %s\n",
+	       module->name, (unsigned long)relocation, reloc_name);
+	return -ENOEXEC;
+}
+
+#define get_u16(loc)		(*((uint16_t *)loc))
+#define put_u16(loc, val)	(*((uint16_t *)loc) = (val))
+
+int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
+		       unsigned int symindex, unsigned int relindex,
+		       struct module *module)
+{
+	Elf32_Shdr *symsec = sechdrs + symindex;
+	Elf32_Shdr *relsec = sechdrs + relindex;
+	Elf32_Shdr *dstsec = sechdrs + relsec->sh_info;
+	Elf32_Rela *rel = (void *)relsec->sh_addr;
+	unsigned int i;
+	int ret = 0;
+
+	for (i = 0; i < relsec->sh_size / sizeof(Elf32_Rela); i++, rel++) {
+		struct mod_arch_syminfo *info;
+		Elf32_Sym *sym;
+		Elf32_Addr relocation;
+		uint32_t *location;
+		uint32_t value;
+
+		location = (void *)dstsec->sh_addr + rel->r_offset;
+		sym = (Elf32_Sym *)symsec->sh_addr + ELF32_R_SYM(rel->r_info);
+		relocation = sym->st_value + rel->r_addend;
+
+		info = module->arch.syminfo + ELF32_R_SYM(rel->r_info);
+
+		/* Initialize GOT entry if necessary */
+		switch (ELF32_R_TYPE(rel->r_info)) {
+		case R_AVR32_GOT32:
+		case R_AVR32_GOT16:
+		case R_AVR32_GOT8:
+		case R_AVR32_GOT21S:
+		case R_AVR32_GOT18SW:
+		case R_AVR32_GOT16S:
+			if (!info->got_initialized) {
+				Elf32_Addr *gotent;
+
+				gotent = (module->module_core
+					  + module->arch.got_offset
+					  + info->got_offset);
+				*gotent = relocation;
+				info->got_initialized = 1;
+			}
+
+			relocation = info->got_offset;
+			break;
+		}
+
+		switch (ELF32_R_TYPE(rel->r_info)) {
+		case R_AVR32_32:
+		case R_AVR32_32_CPENT:
+			*location = relocation;
+			break;
+		case R_AVR32_22H_PCREL:
+			relocation -= (Elf32_Addr)location;
+			if ((relocation & 0xffe00001) != 0
+			    && (relocation & 0xffc00001) != 0xffc00000)
+				return reloc_overflow(module,
+						      "R_AVR32_22H_PCREL",
+						      relocation);
+			relocation >>= 1;
+
+			value = *location;
+			value = ((value & 0xe1ef0000)
+				 | (relocation & 0xffff)
+				 | ((relocation & 0x10000) << 4)
+				 | ((relocation & 0x1e0000) << 8));
+			*location = value;
+			break;
+		case R_AVR32_11H_PCREL:
+			relocation -= (Elf32_Addr)location;
+			if ((relocation & 0xfffffc01) != 0
+			    && (relocation & 0xfffff801) != 0xfffff800)
+				return reloc_overflow(module,
+						      "R_AVR32_11H_PCREL",
+						      relocation);
+			value = get_u16(location);
+			value = ((value & 0xf00c)
+				 | ((relocation & 0x1fe) << 3)
+				 | ((relocation & 0x600) >> 9));
+			put_u16(location, value);
+			break;
+		case R_AVR32_9H_PCREL:
+			relocation -= (Elf32_Addr)location;
+			if ((relocation & 0xffffff01) != 0
+			    && (relocation & 0xfffffe01) != 0xfffffe00)
+				return reloc_overflow(module,
+						      "R_AVR32_9H_PCREL",
+						      relocation);
+			value = get_u16(location);
+			value = ((value & 0xf00f)
+				 | ((relocation & 0x1fe) << 3));
+			put_u16(location, value);
+			break;
+		case R_AVR32_9UW_PCREL:
+			relocation -= ((Elf32_Addr)location) & 0xfffffffc;
+			if ((relocation & 0xfffffc03) != 0)
+				return reloc_overflow(module,
+						      "R_AVR32_9UW_PCREL",
+						      relocation);
+			value = get_u16(location);
+			value = ((value & 0xf80f)
+				 | ((relocation & 0x1fc) << 2));
+			put_u16(location, value);
+			break;
+		case R_AVR32_GOTPC:
+			/*
+			 * R6 = PC - (PC - GOT)
+			 *
+			 * At this point, relocation contains the
+			 * value of PC.  Just subtract the value of
+			 * GOT, and we're done.
+			 */
+			pr_debug("GOTPC: PC=0x%lx, got_offset=0x%lx, core=0x%p\n",
+				 relocation, module->arch.got_offset,
+				 module->module_core);
+			relocation -= ((unsigned long)module->module_core
+				       + module->arch.got_offset);
+			*location = relocation;
+			break;
+		case R_AVR32_GOT18SW:
+			if ((relocation & 0xfffe0003) != 0
+			    && (relocation & 0xfffc0003) != 0xffff0000)
+				return reloc_overflow(module, "R_AVR32_GOT18SW",
+						     relocation);
+			relocation >>= 2;
+			/* fall through */
+		case R_AVR32_GOT16S:
+			if ((relocation & 0xffff8000) != 0
+			    && (relocation & 0xffff0000) != 0xffff0000)
+				return reloc_overflow(module, "R_AVR32_GOT16S",
+						      relocation);
+			pr_debug("GOT reloc @ 0x%lx -> %lu\n",
+				 rel->r_offset, relocation);
+			value = *location;
+			value = ((value & 0xffff0000)
+				 | (relocation & 0xffff));
+			*location = value;
+			break;
+
+		default:
+			printk(KERN_ERR "module %s: Unknown relocation: %u\n",
+			       module->name, ELF32_R_TYPE(rel->r_info));
+			return -ENOEXEC;
+		}
+	}
+
+	return ret;
+}
+
+int apply_relocate(Elf32_Shdr *sechdrs, const char *strtab,
+		   unsigned int symindex, unsigned int relindex,
+		   struct module *module)
+{
+	printk(KERN_ERR "module %s: REL relocations are not supported\n",
+		module->name);
+	return -ENOEXEC;
+}
+
+int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
+		    struct module *module)
+{
+	vfree(module->arch.syminfo);
+	module->arch.syminfo = NULL;
+
+	return 0;
+}
+
+void module_arch_cleanup(struct module *module)
+{
+
+}
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
new file mode 100644
index 000000000000..317dc50945f2
--- /dev/null
+++ b/arch/avr32/kernel/process.c
@@ -0,0 +1,276 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/fs.h>
+#include <linux/ptrace.h>
+#include <linux/reboot.h>
+#include <linux/unistd.h>
+
+#include <asm/sysreg.h>
+#include <asm/ocd.h>
+
+void (*pm_power_off)(void) = NULL;
+EXPORT_SYMBOL(pm_power_off);
+
+/*
+ * This file handles the architecture-dependent parts of process handling..
+ */
+
+void cpu_idle(void)
+{
+	/* endless idle loop with no priority at all */
+	while (1) {
+		/* TODO: Enter sleep mode */
+		while (!need_resched())
+			cpu_relax();
+		preempt_enable_no_resched();
+		schedule();
+		preempt_disable();
+	}
+}
+
+void machine_halt(void)
+{
+}
+
+void machine_power_off(void)
+{
+}
+
+void machine_restart(char *cmd)
+{
+	__mtdr(DBGREG_DC, DC_DBE);
+	__mtdr(DBGREG_DC, DC_RES);
+	while (1) ;
+}
+
+/*
+ * PC is actually discarded when returning from a system call -- the
+ * return address must be stored in LR. This function will make sure
+ * LR points to do_exit before starting the thread.
+ *
+ * Also, when returning from fork(), r12 is 0, so we must copy the
+ * argument as well.
+ *
+ *  r0 : The argument to the main thread function
+ *  r1 : The address of do_exit
+ *  r2 : The address of the main thread function
+ */
+asmlinkage extern void kernel_thread_helper(void);
+__asm__("	.type	kernel_thread_helper, @function\n"
+	"kernel_thread_helper:\n"
+	"	mov	r12, r0\n"
+	"	mov	lr, r2\n"
+	"	mov	pc, r1\n"
+	"	.size	kernel_thread_helper, . - kernel_thread_helper");
+
+int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
+{
+	struct pt_regs regs;
+
+	memset(&regs, 0, sizeof(regs));
+
+	regs.r0 = (unsigned long)arg;
+	regs.r1 = (unsigned long)fn;
+	regs.r2 = (unsigned long)do_exit;
+	regs.lr = (unsigned long)kernel_thread_helper;
+	regs.pc = (unsigned long)kernel_thread_helper;
+	regs.sr = MODE_SUPERVISOR;
+
+	return do_fork(flags | CLONE_VM | CLONE_UNTRACED,
+		       0, &regs, 0, NULL, NULL);
+}
+EXPORT_SYMBOL(kernel_thread);
+
+/*
+ * Free current thread data structures etc
+ */
+void exit_thread(void)
+{
+	/* nothing to do */
+}
+
+void flush_thread(void)
+{
+	/* nothing to do */
+}
+
+void release_thread(struct task_struct *dead_task)
+{
+	/* do nothing */
+}
+
+static const char *cpu_modes[] = {
+	"Application", "Supervisor", "Interrupt level 0", "Interrupt level 1",
+	"Interrupt level 2", "Interrupt level 3", "Exception", "NMI"
+};
+
+void show_regs(struct pt_regs *regs)
+{
+	unsigned long sp = regs->sp;
+	unsigned long lr = regs->lr;
+	unsigned long mode = (regs->sr & MODE_MASK) >> MODE_SHIFT;
+
+	if (!user_mode(regs))
+		sp = (unsigned long)regs + FRAME_SIZE_FULL;
+
+	print_symbol("PC is at %s\n", instruction_pointer(regs));
+	print_symbol("LR is at %s\n", lr);
+	printk("pc : [<%08lx>]    lr : [<%08lx>]    %s\n"
+	       "sp : %08lx  r12: %08lx  r11: %08lx\n",
+	       instruction_pointer(regs),
+	       lr, print_tainted(), sp, regs->r12, regs->r11);
+	printk("r10: %08lx  r9 : %08lx  r8 : %08lx\n",
+	       regs->r10, regs->r9, regs->r8);
+	printk("r7 : %08lx  r6 : %08lx  r5 : %08lx  r4 : %08lx\n",
+	       regs->r7, regs->r6, regs->r5, regs->r4);
+	printk("r3 : %08lx  r2 : %08lx  r1 : %08lx  r0 : %08lx\n",
+	       regs->r3, regs->r2, regs->r1, regs->r0);
+	printk("Flags: %c%c%c%c%c\n",
+	       regs->sr & SR_Q ? 'Q' : 'q',
+	       regs->sr & SR_V ? 'V' : 'v',
+	       regs->sr & SR_N ? 'N' : 'n',
+	       regs->sr & SR_Z ? 'Z' : 'z',
+	       regs->sr & SR_C ? 'C' : 'c');
+	printk("Mode bits: %c%c%c%c%c%c%c%c%c\n",
+	       regs->sr & SR_H ? 'H' : 'h',
+	       regs->sr & SR_R ? 'R' : 'r',
+	       regs->sr & SR_J ? 'J' : 'j',
+	       regs->sr & SR_EM ? 'E' : 'e',
+	       regs->sr & SR_I3M ? '3' : '.',
+	       regs->sr & SR_I2M ? '2' : '.',
+	       regs->sr & SR_I1M ? '1' : '.',
+	       regs->sr & SR_I0M ? '0' : '.',
+	       regs->sr & SR_GM ? 'G' : 'g');
+	printk("CPU Mode: %s\n", cpu_modes[mode]);
+
+	show_trace(NULL, (unsigned long *)sp, regs);
+}
+EXPORT_SYMBOL(show_regs);
+
+/* Fill in the fpu structure for a core dump. This is easy -- we don't have any */
+int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
+{
+	/* Not valid */
+	return 0;
+}
+
+asmlinkage void ret_from_fork(void);
+
+int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
+		unsigned long unused,
+		struct task_struct *p, struct pt_regs *regs)
+{
+	struct pt_regs *childregs;
+
+	childregs = ((struct pt_regs *)(THREAD_SIZE + (unsigned long)p->thread_info)) - 1;
+	*childregs = *regs;
+
+	if (user_mode(regs))
+		childregs->sp = usp;
+	else
+		childregs->sp = (unsigned long)p->thread_info + THREAD_SIZE;
+
+	childregs->r12 = 0; /* Set return value for child */
+
+	p->thread.cpu_context.sr = MODE_SUPERVISOR | SR_GM;
+	p->thread.cpu_context.ksp = (unsigned long)childregs;
+	p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
+
+	return 0;
+}
+
+/* r12-r8 are dummy parameters to force the compiler to use the stack */
+asmlinkage int sys_fork(struct pt_regs *regs)
+{
+	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
+}
+
+asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
+			 unsigned long parent_tidptr,
+			 unsigned long child_tidptr, struct pt_regs *regs)
+{
+	if (!newsp)
+		newsp = regs->sp;
+	return do_fork(clone_flags, newsp, regs, 0,
+		       (int __user *)parent_tidptr,
+		       (int __user *)child_tidptr);
+}
+
+asmlinkage int sys_vfork(struct pt_regs *regs)
+{
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs,
+		       0, NULL, NULL);
+}
+
+asmlinkage int sys_execve(char __user *ufilename, char __user *__user *uargv,
+			  char __user *__user *uenvp, struct pt_regs *regs)
+{
+	int error;
+	char *filename;
+
+	filename = getname(ufilename);
+	error = PTR_ERR(filename);
+	if (IS_ERR(filename))
+		goto out;
+
+	error = do_execve(filename, uargv, uenvp, regs);
+	if (error == 0)
+		current->ptrace &= ~PT_DTRACE;
+	putname(filename);
+
+out:
+	return error;
+}
+
+
+/*
+ * This function is supposed to answer the question "who called
+ * schedule()?"
+ */
+unsigned long get_wchan(struct task_struct *p)
+{
+	unsigned long pc;
+	unsigned long stack_page;
+
+	if (!p || p == current || p->state == TASK_RUNNING)
+		return 0;
+
+	stack_page = (unsigned long)p->thread_info;
+	BUG_ON(!stack_page);
+
+	/*
+	 * The stored value of PC is either the address right after
+	 * the call to __switch_to() or ret_from_fork.
+	 */
+	pc = thread_saved_pc(p);
+	if (in_sched_functions(pc)) {
+#ifdef CONFIG_FRAME_POINTER
+		unsigned long fp = p->thread.cpu_context.r7;
+		BUG_ON(fp < stack_page || fp > (THREAD_SIZE + stack_page));
+		pc = *(unsigned long *)fp;
+#else
+		/*
+		 * We depend on the frame size of schedule here, which
+		 * is actually quite ugly. It might be possible to
+		 * determine the frame size automatically at build
+		 * time by doing this:
+		 *   - compile sched.c
+		 *   - disassemble the resulting sched.o
+		 *   - look for 'sub sp,??' shortly after '<schedule>:'
+		 */
+		unsigned long sp = p->thread.cpu_context.ksp + 16;
+		BUG_ON(sp < stack_page || sp > (THREAD_SIZE + stack_page));
+		pc = *(unsigned long *)sp;
+#endif
+	}
+
+	return pc;
+}
diff --git a/arch/avr32/kernel/ptrace.c b/arch/avr32/kernel/ptrace.c
new file mode 100644
index 000000000000..3c89e59029ab
--- /dev/null
+++ b/arch/avr32/kernel/ptrace.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#undef DEBUG
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp_lock.h>
+#include <linux/ptrace.h>
+#include <linux/errno.h>
+#include <linux/user.h>
+#include <linux/security.h>
+#include <linux/unistd.h>
+#include <linux/notifier.h>
+
+#include <asm/traps.h>
+#include <asm/uaccess.h>
+#include <asm/ocd.h>
+#include <asm/mmu_context.h>
+#include <asm/kdebug.h>
+
+static struct pt_regs *get_user_regs(struct task_struct *tsk)
+{
+	return (struct pt_regs *)((unsigned long) tsk->thread_info +
+				  THREAD_SIZE - sizeof(struct pt_regs));
+}
+
+static void ptrace_single_step(struct task_struct *tsk)
+{
+	pr_debug("ptrace_single_step: pid=%u, SR=0x%08lx\n",
+		 tsk->pid, tsk->thread.cpu_context.sr);
+	if (!(tsk->thread.cpu_context.sr & SR_D)) {
+		/*
+		 * Set a breakpoint at the current pc to force the
+		 * process into debug mode.  The syscall/exception
+		 * exit code will set a breakpoint at the return
+		 * address when this flag is set.
+		 */
+		pr_debug("ptrace_single_step: Setting TIF_BREAKPOINT\n");
+		set_tsk_thread_flag(tsk, TIF_BREAKPOINT);
+	}
+
+	/* The monitor code will do the actual step for us */
+	set_tsk_thread_flag(tsk, TIF_SINGLE_STEP);
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching
+ *
+ * Make sure any single step bits, etc. are not set
+ */
+void ptrace_disable(struct task_struct *child)
+{
+	clear_tsk_thread_flag(child, TIF_SINGLE_STEP);
+}
+
+/*
+ * Handle hitting a breakpoint
+ */
+static void ptrace_break(struct task_struct *tsk, struct pt_regs *regs)
+{
+	siginfo_t info;
+
+	info.si_signo = SIGTRAP;
+	info.si_errno = 0;
+	info.si_code  = TRAP_BRKPT;
+	info.si_addr  = (void __user *)instruction_pointer(regs);
+
+	pr_debug("ptrace_break: Sending SIGTRAP to PID %u (pc = 0x%p)\n",
+		 tsk->pid, info.si_addr);
+	force_sig_info(SIGTRAP, &info, tsk);
+}
+
+/*
+ * Read the word at offset "offset" into the task's "struct user". We
+ * actually access the pt_regs struct stored on the kernel stack.
+ */
+static int ptrace_read_user(struct task_struct *tsk, unsigned long offset,
+			    unsigned long __user *data)
+{
+	unsigned long *regs;
+	unsigned long value;
+
+	pr_debug("ptrace_read_user(%p, %#lx, %p)\n",
+		 tsk, offset, data);
+
+	if (offset & 3 || offset >= sizeof(struct user)) {
+		printk("ptrace_read_user: invalid offset 0x%08lx\n", offset);
+		return -EIO;
+	}
+
+	regs = (unsigned long *)get_user_regs(tsk);
+
+	value = 0;
+	if (offset < sizeof(struct pt_regs))
+		value = regs[offset / sizeof(regs[0])];
+
+	return put_user(value, data);
+}
+
+/*
+ * Write the word "value" to offset "offset" into the task's "struct
+ * user". We actually access the pt_regs struct stored on the kernel
+ * stack.
+ */
+static int ptrace_write_user(struct task_struct *tsk, unsigned long offset,
+			     unsigned long value)
+{
+	unsigned long *regs;
+
+	if (offset & 3 || offset >= sizeof(struct user)) {
+		printk("ptrace_write_user: invalid offset 0x%08lx\n", offset);
+		return -EIO;
+	}
+
+	if (offset >= sizeof(struct pt_regs))
+		return 0;
+
+	regs = (unsigned long *)get_user_regs(tsk);
+	regs[offset / sizeof(regs[0])] = value;
+
+	return 0;
+}
+
+static int ptrace_getregs(struct task_struct *tsk, void __user *uregs)
+{
+	struct pt_regs *regs = get_user_regs(tsk);
+
+	return copy_to_user(uregs, regs, sizeof(*regs)) ? -EFAULT : 0;
+}
+
+static int ptrace_setregs(struct task_struct *tsk, const void __user *uregs)
+{
+	struct pt_regs newregs;
+	int ret;
+
+	ret = -EFAULT;
+	if (copy_from_user(&newregs, uregs, sizeof(newregs)) == 0) {
+		struct pt_regs *regs = get_user_regs(tsk);
+
+		ret = -EINVAL;
+		if (valid_user_regs(&newregs)) {
+			*regs = newregs;
+			ret = 0;
+		}
+	}
+
+	return ret;
+}
+
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+{
+	unsigned long tmp;
+	int ret;
+
+	pr_debug("arch_ptrace(%ld, %ld, %#lx, %#lx)\n",
+		 request, child->pid, addr, data);
+
+	pr_debug("ptrace: Enabling monitor mode...\n");
+	__mtdr(DBGREG_DC, __mfdr(DBGREG_DC) | DC_MM | DC_DBE);
+
+	switch (request) {
+	/* Read the word at location addr in the child process */
+	case PTRACE_PEEKTEXT:
+	case PTRACE_PEEKDATA:
+		ret = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
+		if (ret == sizeof(tmp))
+			ret = put_user(tmp, (unsigned long __user *)data);
+		else
+			ret = -EIO;
+		break;
+
+	case PTRACE_PEEKUSR:
+		ret = ptrace_read_user(child, addr,
+				       (unsigned long __user *)data);
+		break;
+
+	/* Write the word in data at location addr */
+	case PTRACE_POKETEXT:
+	case PTRACE_POKEDATA:
+		ret = access_process_vm(child, addr, &data, sizeof(data), 1);
+		if (ret == sizeof(data))
+			ret = 0;
+		else
+			ret = -EIO;
+		break;
+
+	case PTRACE_POKEUSR:
+		ret = ptrace_write_user(child, addr, data);
+		break;
+
+	/* continue and stop at next (return from) syscall */
+	case PTRACE_SYSCALL:
+	/* restart after signal */
+	case PTRACE_CONT:
+		ret = -EIO;
+		if (!valid_signal(data))
+			break;
+		if (request == PTRACE_SYSCALL)
+			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+		else
+			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+		child->exit_code = data;
+		/* XXX: Are we sure no breakpoints are active here? */
+		wake_up_process(child);
+		ret = 0;
+		break;
+
+	/*
+	 * Make the child exit. Best I can do is send it a
+	 * SIGKILL. Perhaps it should be put in the status that it
+	 * wants to exit.
+	 */
+	case PTRACE_KILL:
+		ret = 0;
+		if (child->exit_state == EXIT_ZOMBIE)
+			break;
+		child->exit_code = SIGKILL;
+		wake_up_process(child);
+		break;
+
+	/*
+	 * execute single instruction.
+	 */
+	case PTRACE_SINGLESTEP:
+		ret = -EIO;
+		if (!valid_signal(data))
+			break;
+		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+		ptrace_single_step(child);
+		child->exit_code = data;
+		wake_up_process(child);
+		ret = 0;
+		break;
+
+	/* Detach a process that was attached */
+	case PTRACE_DETACH:
+		ret = ptrace_detach(child, data);
+		break;
+
+	case PTRACE_GETREGS:
+		ret = ptrace_getregs(child, (void __user *)data);
+		break;
+
+	case PTRACE_SETREGS:
+		ret = ptrace_setregs(child, (const void __user *)data);
+		break;
+
+	default:
+		ret = ptrace_request(child, request, addr, data);
+		break;
+	}
+
+	pr_debug("sys_ptrace returning %d (DC = 0x%08lx)\n", ret, __mfdr(DBGREG_DC));
+	return ret;
+}
+
+asmlinkage void syscall_trace(void)
+{
+	pr_debug("syscall_trace called\n");
+	if (!test_thread_flag(TIF_SYSCALL_TRACE))
+		return;
+	if (!(current->ptrace & PT_PTRACED))
+		return;
+
+	pr_debug("syscall_trace: notifying parent\n");
+	/* The 0x80 provides a way for the tracing parent to
+	 * distinguish between a syscall stop and SIGTRAP delivery */
+	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+				 ? 0x80 : 0));
+
+	/*
+	 * this isn't the same as continuing with a signal, but it
+	 * will do for normal use.  strace only continues with a
+	 * signal if the stopping signal is not SIGTRAP.  -brl
+	 */
+	if (current->exit_code) {
+		pr_debug("syscall_trace: sending signal %d to PID %u\n",
+			 current->exit_code, current->pid);
+		send_sig(current->exit_code, current, 1);
+		current->exit_code = 0;
+	}
+}
+
+asmlinkage void do_debug_priv(struct pt_regs *regs)
+{
+	unsigned long dc, ds;
+	unsigned long die_val;
+
+	ds = __mfdr(DBGREG_DS);
+
+	pr_debug("do_debug_priv: pc = %08lx, ds = %08lx\n", regs->pc, ds);
+
+	if (ds & DS_SSS)
+		die_val = DIE_SSTEP;
+	else
+		die_val = DIE_BREAKPOINT;
+
+	if (notify_die(die_val, regs, 0, SIGTRAP) == NOTIFY_STOP)
+		return;
+
+	if (likely(ds & DS_SSS)) {
+		extern void itlb_miss(void);
+		extern void tlb_miss_common(void);
+		struct thread_info *ti;
+
+		dc = __mfdr(DBGREG_DC);
+		dc &= ~DC_SS;
+		__mtdr(DBGREG_DC, dc);
+
+		ti = current_thread_info();
+		ti->flags |= _TIF_BREAKPOINT;
+
+		/* The TLB miss handlers don't check thread flags */
+		if ((regs->pc >= (unsigned long)&itlb_miss)
+		    && (regs->pc <= (unsigned long)&tlb_miss_common)) {
+			__mtdr(DBGREG_BWA2A, sysreg_read(RAR_EX));
+			__mtdr(DBGREG_BWC2A, 0x40000001 | (get_asid() << 1));
+		}
+
+		/*
+		 * If we're running in supervisor mode, the breakpoint
+		 * will take us where we want directly, no need to
+		 * single step.
+		 */
+		if ((regs->sr & MODE_MASK) != MODE_SUPERVISOR)
+			ti->flags |= TIF_SINGLE_STEP;
+	} else {
+		panic("Unable to handle debug trap at pc = %08lx\n",
+		      regs->pc);
+	}
+}
+
+/*
+ * Handle breakpoints, single steps and other debuggy things. To keep
+ * things simple initially, we run with interrupts and exceptions
+ * disabled all the time.
+ */
+asmlinkage void do_debug(struct pt_regs *regs)
+{
+	unsigned long dc, ds;
+
+	ds = __mfdr(DBGREG_DS);
+	pr_debug("do_debug: pc = %08lx, ds = %08lx\n", regs->pc, ds);
+
+	if (test_thread_flag(TIF_BREAKPOINT)) {
+		pr_debug("TIF_BREAKPOINT set\n");
+		/* We're taking care of it */
+		clear_thread_flag(TIF_BREAKPOINT);
+		__mtdr(DBGREG_BWC2A, 0);
+	}
+
+	if (test_thread_flag(TIF_SINGLE_STEP)) {
+		pr_debug("TIF_SINGLE_STEP set, ds = 0x%08lx\n", ds);
+		if (ds & DS_SSS) {
+			dc = __mfdr(DBGREG_DC);
+			dc &= ~DC_SS;
+			__mtdr(DBGREG_DC, dc);
+
+			clear_thread_flag(TIF_SINGLE_STEP);
+			ptrace_break(current, regs);
+		}
+	} else {
+		/* regular breakpoint */
+		ptrace_break(current, regs);
+	}
+}
diff --git a/arch/avr32/kernel/semaphore.c b/arch/avr32/kernel/semaphore.c
new file mode 100644
index 000000000000..1e2705a05016
--- /dev/null
+++ b/arch/avr32/kernel/semaphore.c
@@ -0,0 +1,148 @@
+/*
+ * AVR32 sempahore implementation.
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on linux/arch/i386/kernel/semaphore.c
+ *  Copyright (C) 1999 Linus Torvalds
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+
+#include <asm/semaphore.h>
+#include <asm/atomic.h>
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to acquire the semaphore, while the "sleeping"
+ * variable is a count of such acquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * "sleeping" and the contention routine ordering is protected
+ * by the spinlock in the semaphore's waitqueue head.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in <asm/semaphore.h>
+ * where we want to avoid any extra jumps and calls.
+ */
+
+/*
+ * Logic:
+ *  - only on a boundary condition do we need to care. When we go
+ *    from a negative count to a non-negative, we wake people up.
+ *  - when we go from a non-negative count to a negative do we
+ *    (a) synchronize with the "sleeper" count and (b) make sure
+ *    that we're on the wakeup list before we synchronize so that
+ *    we cannot lose wakeup events.
+ */
+
+void __up(struct semaphore *sem)
+{
+	wake_up(&sem->wait);
+}
+EXPORT_SYMBOL(__up);
+
+void __sched __down(struct semaphore *sem)
+{
+	struct task_struct *tsk = current;
+        DECLARE_WAITQUEUE(wait, tsk);
+        unsigned long flags;
+
+        tsk->state = TASK_UNINTERRUPTIBLE;
+        spin_lock_irqsave(&sem->wait.lock, flags);
+        add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+        sem->sleepers++;
+        for (;;) {
+                int sleepers = sem->sleepers;
+
+                /*
+                 * Add "everybody else" into it. They aren't
+                 * playing, because we own the spinlock in
+                 * the wait_queue_head.
+                 */
+                if (atomic_add_return(sleepers - 1, &sem->count) >= 0) {
+                        sem->sleepers = 0;
+                        break;
+                }
+                sem->sleepers = 1;      /* us - see -1 above */
+                spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+                schedule();
+
+                spin_lock_irqsave(&sem->wait.lock, flags);
+                tsk->state = TASK_UNINTERRUPTIBLE;
+        }
+        remove_wait_queue_locked(&sem->wait, &wait);
+        wake_up_locked(&sem->wait);
+        spin_unlock_irqrestore(&sem->wait.lock, flags);
+        tsk->state = TASK_RUNNING;
+}
+EXPORT_SYMBOL(__down);
+
+int __sched __down_interruptible(struct semaphore *sem)
+{
+	int retval = 0;
+	struct task_struct *tsk = current;
+        DECLARE_WAITQUEUE(wait, tsk);
+        unsigned long flags;
+
+        tsk->state = TASK_INTERRUPTIBLE;
+        spin_lock_irqsave(&sem->wait.lock, flags);
+        add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+        sem->sleepers++;
+        for (;;) {
+                int sleepers = sem->sleepers;
+
+		/*
+		 * With signals pending, this turns into the trylock
+		 * failure case - we won't be sleeping, and we can't
+		 * get the lock as it has contention. Just correct the
+		 * count and exit.
+		 */
+		if (signal_pending(current)) {
+			retval = -EINTR;
+			sem->sleepers = 0;
+			atomic_add(sleepers, &sem->count);
+			break;
+		}
+
+                /*
+                 * Add "everybody else" into it. They aren't
+                 * playing, because we own the spinlock in
+                 * the wait_queue_head.
+                 */
+                if (atomic_add_return(sleepers - 1, &sem->count) >= 0) {
+                        sem->sleepers = 0;
+                        break;
+                }
+                sem->sleepers = 1;      /* us - see -1 above */
+                spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+                schedule();
+
+                spin_lock_irqsave(&sem->wait.lock, flags);
+                tsk->state = TASK_INTERRUPTIBLE;
+        }
+        remove_wait_queue_locked(&sem->wait, &wait);
+        wake_up_locked(&sem->wait);
+        spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+        tsk->state = TASK_RUNNING;
+	return retval;
+}
+EXPORT_SYMBOL(__down_interruptible);
diff --git a/arch/avr32/kernel/setup.c b/arch/avr32/kernel/setup.c
new file mode 100644
index 000000000000..5d68f3c6990b
--- /dev/null
+++ b/arch/avr32/kernel/setup.c
@@ -0,0 +1,335 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/console.h>
+#include <linux/ioport.h>
+#include <linux/bootmem.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/root_dev.h>
+#include <linux/cpu.h>
+
+#include <asm/sections.h>
+#include <asm/processor.h>
+#include <asm/pgtable.h>
+#include <asm/setup.h>
+#include <asm/sysreg.h>
+
+#include <asm/arch/board.h>
+#include <asm/arch/init.h>
+
+extern int root_mountflags;
+
+/*
+ * Bootloader-provided information about physical memory
+ */
+struct tag_mem_range *mem_phys;
+struct tag_mem_range *mem_reserved;
+struct tag_mem_range *mem_ramdisk;
+
+/*
+ * Initialize loops_per_jiffy as 5000000 (500MIPS).
+ * Better make it too large than too small...
+ */
+struct avr32_cpuinfo boot_cpu_data = {
+	.loops_per_jiffy = 5000000
+};
+EXPORT_SYMBOL(boot_cpu_data);
+
+static char command_line[COMMAND_LINE_SIZE];
+
+/*
+ * Should be more than enough, but if you have a _really_ complex
+ * setup, you might need to increase the size of this...
+ */
+static struct tag_mem_range __initdata mem_range_cache[32];
+static unsigned mem_range_next_free;
+
+/*
+ * Standard memory resources
+ */
+static struct resource mem_res[] = {
+	{
+		.name	= "Kernel code",
+		.start	= 0,
+		.end	= 0,
+		.flags	= IORESOURCE_MEM
+	},
+	{
+		.name	= "Kernel data",
+		.start	= 0,
+		.end	= 0,
+		.flags	= IORESOURCE_MEM,
+	},
+};
+
+#define kernel_code	mem_res[0]
+#define kernel_data	mem_res[1]
+
+/*
+ * Early framebuffer allocation. Works as follows:
+ *   - If fbmem_size is zero, nothing will be allocated or reserved.
+ *   - If fbmem_start is zero when setup_bootmem() is called,
+ *     fbmem_size bytes will be allocated from the bootmem allocator.
+ *   - If fbmem_start is nonzero, an area of size fbmem_size will be
+ *     reserved at the physical address fbmem_start if necessary. If
+ *     the area isn't in a memory region known to the kernel, it will
+ *     be left alone.
+ *
+ * Board-specific code may use these variables to set up platform data
+ * for the framebuffer driver if fbmem_size is nonzero.
+ */
+static unsigned long __initdata fbmem_start;
+static unsigned long __initdata fbmem_size;
+
+/*
+ * "fbmem=xxx[kKmM]" allocates the specified amount of boot memory for
+ * use as framebuffer.
+ *
+ * "fbmem=xxx[kKmM]@yyy[kKmM]" defines a memory region of size xxx and
+ * starting at yyy to be reserved for use as framebuffer.
+ *
+ * The kernel won't verify that the memory region starting at yyy
+ * actually contains usable RAM.
+ */
+static int __init early_parse_fbmem(char *p)
+{
+	fbmem_size = memparse(p, &p);
+	if (*p == '@')
+		fbmem_start = memparse(p, &p);
+	return 0;
+}
+early_param("fbmem", early_parse_fbmem);
+
+static inline void __init resource_init(void)
+{
+	struct tag_mem_range *region;
+
+	kernel_code.start = __pa(init_mm.start_code);
+	kernel_code.end = __pa(init_mm.end_code - 1);
+	kernel_data.start = __pa(init_mm.end_code);
+	kernel_data.end = __pa(init_mm.brk - 1);
+
+	for (region = mem_phys; region; region = region->next) {
+		struct resource *res;
+		unsigned long phys_start, phys_end;
+
+		if (region->size == 0)
+			continue;
+
+		phys_start = region->addr;
+		phys_end = phys_start + region->size - 1;
+
+		res = alloc_bootmem_low(sizeof(*res));
+		res->name = "System RAM";
+		res->start = phys_start;
+		res->end = phys_end;
+		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+
+		request_resource (&iomem_resource, res);
+
+		if (kernel_code.start >= res->start &&
+		    kernel_code.end <= res->end)
+			request_resource (res, &kernel_code);
+		if (kernel_data.start >= res->start &&
+		    kernel_data.end <= res->end)
+			request_resource (res, &kernel_data);
+	}
+}
+
+static int __init parse_tag_core(struct tag *tag)
+{
+	if (tag->hdr.size > 2) {
+		if ((tag->u.core.flags & 1) == 0)
+			root_mountflags &= ~MS_RDONLY;
+		ROOT_DEV = new_decode_dev(tag->u.core.rootdev);
+	}
+	return 0;
+}
+__tagtable(ATAG_CORE, parse_tag_core);
+
+static int __init parse_tag_mem_range(struct tag *tag,
+				      struct tag_mem_range **root)
+{
+	struct tag_mem_range *cur, **pprev;
+	struct tag_mem_range *new;
+
+	/*
+	 * Ignore zero-sized entries. If we're running standalone, the
+	 * SDRAM code may emit such entries if something goes
+	 * wrong...
+	 */
+	if (tag->u.mem_range.size == 0)
+		return 0;
+
+	/*
+	 * Copy the data so the bootmem init code doesn't need to care
+	 * about it.
+	 */
+	if (mem_range_next_free >=
+	    (sizeof(mem_range_cache) / sizeof(mem_range_cache[0])))
+		panic("Physical memory map too complex!\n");
+
+	new = &mem_range_cache[mem_range_next_free++];
+	*new = tag->u.mem_range;
+
+	pprev = root;
+	cur = *root;
+	while (cur) {
+		pprev = &cur->next;
+		cur = cur->next;
+	}
+
+	*pprev = new;
+	new->next = NULL;
+
+	return 0;
+}
+
+static int __init parse_tag_mem(struct tag *tag)
+{
+	return parse_tag_mem_range(tag, &mem_phys);
+}
+__tagtable(ATAG_MEM, parse_tag_mem);
+
+static int __init parse_tag_cmdline(struct tag *tag)
+{
+	strlcpy(saved_command_line, tag->u.cmdline.cmdline, COMMAND_LINE_SIZE);
+	return 0;
+}
+__tagtable(ATAG_CMDLINE, parse_tag_cmdline);
+
+static int __init parse_tag_rdimg(struct tag *tag)
+{
+	return parse_tag_mem_range(tag, &mem_ramdisk);
+}
+__tagtable(ATAG_RDIMG, parse_tag_rdimg);
+
+static int __init parse_tag_clock(struct tag *tag)
+{
+	/*
+	 * We'll figure out the clocks by peeking at the system
+	 * manager regs directly.
+	 */
+	return 0;
+}
+__tagtable(ATAG_CLOCK, parse_tag_clock);
+
+static int __init parse_tag_rsvd_mem(struct tag *tag)
+{
+	return parse_tag_mem_range(tag, &mem_reserved);
+}
+__tagtable(ATAG_RSVD_MEM, parse_tag_rsvd_mem);
+
+static int __init parse_tag_ethernet(struct tag *tag)
+{
+#if 0
+	const struct platform_device *pdev;
+
+	/*
+	 * We really need a bus type that supports "classes"...this
+	 * will do for now (until we must handle other kinds of
+	 * ethernet controllers)
+	 */
+	pdev = platform_get_device("macb", tag->u.ethernet.mac_index);
+	if (pdev && pdev->dev.platform_data) {
+		struct eth_platform_data *data = pdev->dev.platform_data;
+
+		data->valid = 1;
+		data->mii_phy_addr = tag->u.ethernet.mii_phy_addr;
+		memcpy(data->hw_addr, tag->u.ethernet.hw_address,
+		       sizeof(data->hw_addr));
+	}
+#endif
+	return 0;
+}
+__tagtable(ATAG_ETHERNET, parse_tag_ethernet);
+
+/*
+ * Scan the tag table for this tag, and call its parse function. The
+ * tag table is built by the linker from all the __tagtable
+ * declarations.
+ */
+static int __init parse_tag(struct tag *tag)
+{
+	extern struct tagtable __tagtable_begin, __tagtable_end;
+	struct tagtable *t;
+
+	for (t = &__tagtable_begin; t < &__tagtable_end; t++)
+		if (tag->hdr.tag == t->tag) {
+			t->parse(tag);
+			break;
+		}
+
+	return t < &__tagtable_end;
+}
+
+/*
+ * Parse all tags in the list we got from the boot loader
+ */
+static void __init parse_tags(struct tag *t)
+{
+	for (; t->hdr.tag != ATAG_NONE; t = tag_next(t))
+		if (!parse_tag(t))
+			printk(KERN_WARNING
+			       "Ignoring unrecognised tag 0x%08x\n",
+			       t->hdr.tag);
+}
+
+void __init setup_arch (char **cmdline_p)
+{
+	struct clk *cpu_clk;
+
+	parse_tags(bootloader_tags);
+
+	setup_processor();
+	setup_platform();
+
+	cpu_clk = clk_get(NULL, "cpu");
+	if (IS_ERR(cpu_clk)) {
+		printk(KERN_WARNING "Warning: Unable to get CPU clock\n");
+	} else {
+		unsigned long cpu_hz = clk_get_rate(cpu_clk);
+
+		/*
+		 * Well, duh, but it's probably a good idea to
+		 * increment the use count.
+		 */
+		clk_enable(cpu_clk);
+
+		boot_cpu_data.clk = cpu_clk;
+		boot_cpu_data.loops_per_jiffy = cpu_hz * 4;
+		printk("CPU: Running at %lu.%03lu MHz\n",
+		       ((cpu_hz + 500) / 1000) / 1000,
+		       ((cpu_hz + 500) / 1000) % 1000);
+	}
+
+	init_mm.start_code = (unsigned long) &_text;
+	init_mm.end_code = (unsigned long) &_etext;
+	init_mm.end_data = (unsigned long) &_edata;
+	init_mm.brk = (unsigned long) &_end;
+
+	strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
+	*cmdline_p = command_line;
+	parse_early_param();
+
+	setup_bootmem();
+
+	board_setup_fbmem(fbmem_start, fbmem_size);
+
+#ifdef CONFIG_VT
+	conswitchp = &dummy_con;
+#endif
+
+	paging_init();
+
+	resource_init();
+}
diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c
new file mode 100644
index 000000000000..33096651c24f
--- /dev/null
+++ b/arch/avr32/kernel/signal.c
@@ -0,0 +1,328 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on linux/arch/sh/kernel/signal.c
+ *  Copyright (C) 1999, 2000  Niibe Yutaka & Kaz Kojima
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/suspend.h>
+
+#include <asm/uaccess.h>
+#include <asm/ucontext.h>
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
+			       struct pt_regs *regs)
+{
+	return do_sigaltstack(uss, uoss, regs->sp);
+}
+
+struct rt_sigframe
+{
+	struct siginfo info;
+	struct ucontext uc;
+	unsigned long retcode;
+};
+
+static int
+restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
+{
+	int err = 0;
+
+#define COPY(x)		err |= __get_user(regs->x, &sc->x)
+	COPY(sr);
+	COPY(pc);
+	COPY(lr);
+	COPY(sp);
+	COPY(r12);
+	COPY(r11);
+	COPY(r10);
+	COPY(r9);
+	COPY(r8);
+	COPY(r7);
+	COPY(r6);
+	COPY(r5);
+	COPY(r4);
+	COPY(r3);
+	COPY(r2);
+	COPY(r1);
+	COPY(r0);
+#undef	COPY
+
+	/*
+	 * Don't allow anyone to pretend they're running in supervisor
+	 * mode or something...
+	 */
+	err |= !valid_user_regs(regs);
+
+	return err;
+}
+
+
+asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+	sigset_t set;
+
+	frame = (struct rt_sigframe __user *)regs->sp;
+	pr_debug("SIG return: frame = %p\n", frame);
+
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+
+	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+		goto badframe;
+
+	pr_debug("Context restored: pc = %08lx, lr = %08lx, sp = %08lx\n",
+		 regs->pc, regs->lr, regs->sp);
+
+	return regs->r12;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+static int
+setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs)
+{
+	int err = 0;
+
+#define COPY(x)		err |= __put_user(regs->x, &sc->x)
+	COPY(sr);
+	COPY(pc);
+	COPY(lr);
+	COPY(sp);
+	COPY(r12);
+	COPY(r11);
+	COPY(r10);
+	COPY(r9);
+	COPY(r8);
+	COPY(r7);
+	COPY(r6);
+	COPY(r5);
+	COPY(r4);
+	COPY(r3);
+	COPY(r2);
+	COPY(r1);
+	COPY(r0);
+#undef	COPY
+
+	return err;
+}
+
+static inline void __user *
+get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, int framesize)
+{
+	unsigned long sp = regs->sp;
+
+	if ((ka->sa.sa_flags & SA_ONSTACK) && !sas_ss_flags(sp))
+		sp = current->sas_ss_sp + current->sas_ss_size;
+
+	return (void __user *)((sp - framesize) & ~3);
+}
+
+static int
+setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+	       sigset_t *set, struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+	int err = 0;
+
+	frame = get_sigframe(ka, regs, sizeof(*frame));
+	err = -EFAULT;
+	if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
+		goto out;
+
+	/*
+	 * Set up the return code:
+	 *
+	 *	mov	r8, __NR_rt_sigreturn
+	 *	scall
+	 *
+	 * Note: This will blow up since we're using a non-executable
+	 * stack. Better use SA_RESTORER.
+	 */
+#if __NR_rt_sigreturn > 127
+# error __NR_rt_sigreturn must be < 127 to fit in a short mov
+#endif
+	err = __put_user(0x3008d733 | (__NR_rt_sigreturn << 20),
+			 &frame->retcode);
+
+	err |= copy_siginfo_to_user(&frame->info, info);
+
+	/* Set up the ucontext */
+	err |= __put_user(0, &frame->uc.uc_flags);
+	err |= __put_user(NULL, &frame->uc.uc_link);
+	err |= __put_user((void __user *)current->sas_ss_sp,
+			  &frame->uc.uc_stack.ss_sp);
+	err |= __put_user(sas_ss_flags(regs->sp),
+			  &frame->uc.uc_stack.ss_flags);
+	err |= __put_user(current->sas_ss_size,
+			  &frame->uc.uc_stack.ss_size);
+	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+
+	if (err)
+		goto out;
+
+	regs->r12 = sig;
+	regs->r11 = (unsigned long) &frame->info;
+	regs->r10 = (unsigned long) &frame->uc;
+	regs->sp = (unsigned long) frame;
+	if (ka->sa.sa_flags & SA_RESTORER)
+		regs->lr = (unsigned long)ka->sa.sa_restorer;
+	else {
+		printk(KERN_NOTICE "[%s:%d] did not set SA_RESTORER\n",
+		       current->comm, current->pid);
+		regs->lr = (unsigned long) &frame->retcode;
+	}
+
+	pr_debug("SIG deliver [%s:%d]: sig=%d sp=0x%lx pc=0x%lx->0x%p lr=0x%lx\n",
+		 current->comm, current->pid, sig, regs->sp,
+		 regs->pc, ka->sa.sa_handler, regs->lr);
+
+	regs->pc = (unsigned long) ka->sa.sa_handler;
+
+out:
+	return err;
+}
+
+static inline void restart_syscall(struct pt_regs *regs)
+{
+	if (regs->r12 == -ERESTART_RESTARTBLOCK)
+		regs->r8 = __NR_restart_syscall;
+	else
+		regs->r12 = regs->r12_orig;
+	regs->pc -= 2;
+}
+
+static inline void
+handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
+	      sigset_t *oldset, struct pt_regs *regs, int syscall)
+{
+	int ret;
+
+	/*
+	 * Set up the stack frame
+	 */
+	ret = setup_rt_frame(sig, ka, info, oldset, regs);
+
+	/*
+	 * Check that the resulting registers are sane
+	 */
+	ret |= !valid_user_regs(regs);
+
+	/*
+	 * Block the signal if we were unsuccessful.
+	 */
+	if (ret != 0 || !(ka->sa.sa_flags & SA_NODEFER)) {
+		spin_lock_irq(&current->sighand->siglock);
+		sigorsets(&current->blocked, &current->blocked,
+			  &ka->sa.sa_mask);
+		sigaddset(&current->blocked, sig);
+		recalc_sigpending();
+		spin_unlock_irq(&current->sighand->siglock);
+	}
+
+	if (ret == 0)
+		return;
+
+	force_sigsegv(sig, current);
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it
+ * doesn't want to handle. Thus you cannot kill init even with a
+ * SIGKILL even by mistake.
+ */
+int do_signal(struct pt_regs *regs, sigset_t *oldset, int syscall)
+{
+	siginfo_t info;
+	int signr;
+	struct k_sigaction ka;
+
+	/*
+	 * We want the common case to go fast, which is why we may in
+	 * certain cases get here from kernel mode. Just return
+	 * without doing anything if so.
+	 */
+	if (!user_mode(regs))
+		return 0;
+
+	if (try_to_freeze()) {
+		signr = 0;
+		if (!signal_pending(current))
+			goto no_signal;
+	}
+
+	if (test_thread_flag(TIF_RESTORE_SIGMASK))
+		oldset = &current->saved_sigmask;
+	else if (!oldset)
+		oldset = &current->blocked;
+
+	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+no_signal:
+	if (syscall) {
+		switch (regs->r12) {
+		case -ERESTART_RESTARTBLOCK:
+		case -ERESTARTNOHAND:
+			if (signr > 0) {
+				regs->r12 = -EINTR;
+				break;
+			}
+			/* fall through */
+		case -ERESTARTSYS:
+			if (signr > 0 && !(ka.sa.sa_flags & SA_RESTART)) {
+				regs->r12 = -EINTR;
+				break;
+			}
+			/* fall through */
+		case -ERESTARTNOINTR:
+			restart_syscall(regs);
+		}
+	}
+
+	if (signr == 0) {
+		/* No signal to deliver -- put the saved sigmask back */
+		if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
+			clear_thread_flag(TIF_RESTORE_SIGMASK);
+			sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+		}
+		return 0;
+	}
+
+	handle_signal(signr, &ka, &info, oldset, regs, syscall);
+	return 1;
+}
+
+asmlinkage void do_notify_resume(struct pt_regs *regs, struct thread_info *ti)
+{
+	int syscall = 0;
+
+	if ((sysreg_read(SR) & MODE_MASK) == MODE_SUPERVISOR)
+		syscall = 1;
+
+	if (ti->flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
+		do_signal(regs, &current->blocked, syscall);
+}
diff --git a/arch/avr32/kernel/switch_to.S b/arch/avr32/kernel/switch_to.S
new file mode 100644
index 000000000000..a48d046723c5
--- /dev/null
+++ b/arch/avr32/kernel/switch_to.S
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/sysreg.h>
+
+	.text
+	.global	__switch_to
+	.type	__switch_to, @function
+
+	/* Switch thread context from "prev" to "next", returning "last"
+	 *   r12 :	prev
+	 *   r11 :	&prev->thread + 1
+	 *   r10 :	&next->thread
+	 */
+__switch_to:
+	stm	--r11, r0,r1,r2,r3,r4,r5,r6,r7,sp,lr
+	mfsr	r9, SYSREG_SR
+	st.w	--r11, r9
+	ld.w	r8, r10++
+	/*
+	 * schedule() may have been called from a mode with a different
+	 * set of registers. Make sure we don't lose anything here.
+	 */
+	pushm	r10,r12
+	mtsr	SYSREG_SR, r8
+	frs			/* flush the return stack */
+	sub	pc, -2		/* flush the pipeline */
+	popm	r10,r12
+	ldm	r10++, r0,r1,r2,r3,r4,r5,r6,r7,sp,pc
+	.size	__switch_to, . - __switch_to
diff --git a/arch/avr32/kernel/sys_avr32.c b/arch/avr32/kernel/sys_avr32.c
new file mode 100644
index 000000000000..6ec5693da448
--- /dev/null
+++ b/arch/avr32/kernel/sys_avr32.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/unistd.h>
+
+#include <asm/mman.h>
+#include <asm/uaccess.h>
+
+asmlinkage int sys_pipe(unsigned long __user *filedes)
+{
+	int fd[2];
+	int error;
+
+	error = do_pipe(fd);
+	if (!error) {
+		if (copy_to_user(filedes, fd, sizeof(fd)))
+			error = -EFAULT;
+	}
+	return error;
+}
+
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+			  unsigned long prot, unsigned long flags,
+			  unsigned long fd, off_t offset)
+{
+	int error = -EBADF;
+	struct file *file = NULL;
+
+	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+	if (!(flags & MAP_ANONYMOUS)) {
+		file = fget(fd);
+		if (!file)
+			return error;
+	}
+
+	down_write(&current->mm->mmap_sem);
+	error = do_mmap_pgoff(file, addr, len, prot, flags, offset);
+	up_write(&current->mm->mmap_sem);
+
+	if (file)
+		fput(file);
+	return error;
+}
diff --git a/arch/avr32/kernel/syscall-stubs.S b/arch/avr32/kernel/syscall-stubs.S
new file mode 100644
index 000000000000..7589a9b426cb
--- /dev/null
+++ b/arch/avr32/kernel/syscall-stubs.S
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Stubs for syscalls that require access to pt_regs or that take more
+ * than five parameters.
+ */
+
+#define ARG6	r3
+
+	.text
+	.global __sys_rt_sigsuspend
+	.type	__sys_rt_sigsuspend,@function
+__sys_rt_sigsuspend:
+	mov	r10, sp
+	rjmp	sys_rt_sigsuspend
+
+	.global	__sys_sigaltstack
+	.type	__sys_sigaltstack,@function
+__sys_sigaltstack:
+	mov	r10, sp
+	rjmp	sys_sigaltstack
+
+	.global	__sys_rt_sigreturn
+	.type	__sys_rt_sigreturn,@function
+__sys_rt_sigreturn:
+	mov	r12, sp
+	rjmp	sys_rt_sigreturn
+
+	.global	__sys_fork
+	.type	__sys_fork,@function
+__sys_fork:
+	mov	r12, sp
+	rjmp	sys_fork
+
+	.global	__sys_clone
+	.type	__sys_clone,@function
+__sys_clone:
+	mov	r8, sp
+	rjmp	sys_clone
+
+	.global	__sys_vfork
+	.type	__sys_vfork,@function
+__sys_vfork:
+	mov	r12, sp
+	rjmp	sys_vfork
+
+	.global	__sys_execve
+	.type	__sys_execve,@function
+__sys_execve:
+	mov	r9, sp
+	rjmp	sys_execve
+
+	.global	__sys_mmap2
+	.type	__sys_mmap2,@function
+__sys_mmap2:
+	pushm	lr
+	st.w	--sp, ARG6
+	rcall	sys_mmap2
+	sub	sp, -4
+	popm	pc
+
+	.global	__sys_sendto
+	.type	__sys_sendto,@function
+__sys_sendto:
+	pushm	lr
+	st.w	--sp, ARG6
+	rcall	sys_sendto
+	sub	sp, -4
+	popm	pc
+
+	.global	__sys_recvfrom
+	.type	__sys_recvfrom,@function
+__sys_recvfrom:
+	pushm	lr
+	st.w	--sp, ARG6
+	rcall	sys_recvfrom
+	sub	sp, -4
+	popm	pc
+
+	.global	__sys_pselect6
+	.type	__sys_pselect6,@function
+__sys_pselect6:
+	pushm	lr
+	st.w	--sp, ARG6
+	rcall	sys_pselect6
+	sub	sp, -4
+	popm	pc
+
+	.global	__sys_splice
+	.type	__sys_splice,@function
+__sys_splice:
+	pushm	lr
+	st.w	--sp, ARG6
+	rcall	sys_splice
+	sub	sp, -4
+	popm	pc
diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S
new file mode 100644
index 000000000000..63b206965d05
--- /dev/null
+++ b/arch/avr32/kernel/syscall_table.S
@@ -0,0 +1,289 @@
+/*
+ * AVR32 system call table
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#if !defined(CONFIG_NFSD) && !defined(CONFIG_NFSD_MODULE)
+#define sys_nfsservctl sys_ni_syscall
+#endif
+
+#if !defined(CONFIG_SYSV_IPC)
+# define sys_ipc	sys_ni_syscall
+#endif
+
+	.section .rodata,"a",@progbits
+	.type	sys_call_table,@object
+	.global	sys_call_table
+	.align	2
+sys_call_table:
+	.long	sys_restart_syscall
+	.long	sys_exit
+	.long	__sys_fork
+	.long	sys_read
+	.long	sys_write
+	.long	sys_open		/* 5 */
+	.long	sys_close
+	.long	sys_umask
+	.long	sys_creat
+	.long	sys_link
+	.long	sys_unlink		/* 10 */
+	.long	__sys_execve
+	.long	sys_chdir
+	.long	sys_time
+	.long	sys_mknod
+	.long	sys_chmod		/* 15 */
+	.long	sys_chown
+	.long	sys_lchown
+	.long	sys_lseek
+	.long	sys_llseek
+	.long	sys_getpid		/* 20 */
+	.long	sys_mount
+	.long	sys_umount
+	.long	sys_setuid
+	.long	sys_getuid
+	.long	sys_stime		/* 25 */
+	.long	sys_ptrace
+	.long	sys_alarm
+	.long	sys_pause
+	.long	sys_utime
+	.long	sys_newstat		/* 30 */
+	.long	sys_newfstat
+	.long	sys_newlstat
+	.long	sys_access
+	.long	sys_chroot
+	.long	sys_sync		/* 35 */
+	.long	sys_fsync
+	.long	sys_kill
+	.long	sys_rename
+	.long	sys_mkdir
+	.long	sys_rmdir		/* 40 */
+	.long	sys_dup
+	.long	sys_pipe
+	.long	sys_times
+	.long	__sys_clone
+	.long	sys_brk			/* 45 */
+	.long	sys_setgid
+	.long	sys_getgid
+	.long	sys_getcwd
+	.long	sys_geteuid
+	.long	sys_getegid		/* 50 */
+	.long	sys_acct
+	.long	sys_setfsuid
+	.long	sys_setfsgid
+	.long	sys_ioctl
+	.long	sys_fcntl		/* 55 */
+	.long	sys_setpgid
+	.long	sys_mremap
+	.long	sys_setresuid
+	.long	sys_getresuid
+	.long	sys_setreuid		/* 60 */
+	.long	sys_setregid
+	.long	sys_ustat
+	.long	sys_dup2
+	.long	sys_getppid
+	.long	sys_getpgrp		/* 65 */
+	.long	sys_setsid
+	.long	sys_rt_sigaction
+	.long	__sys_rt_sigreturn
+	.long	sys_rt_sigprocmask
+	.long	sys_rt_sigpending	/* 70 */
+	.long	sys_rt_sigtimedwait
+	.long	sys_rt_sigqueueinfo
+	.long	__sys_rt_sigsuspend
+	.long	sys_sethostname
+	.long	sys_setrlimit		/* 75 */
+	.long	sys_getrlimit
+	.long	sys_getrusage
+	.long	sys_gettimeofday
+	.long	sys_settimeofday
+	.long	sys_getgroups		/* 80 */
+	.long	sys_setgroups
+	.long	sys_select
+	.long	sys_symlink
+	.long	sys_fchdir
+	.long	sys_readlink		/* 85 */
+	.long	sys_pread64
+	.long	sys_pwrite64
+	.long	sys_swapon
+	.long	sys_reboot
+	.long	__sys_mmap2		/* 90 */
+	.long	sys_munmap
+	.long	sys_truncate
+	.long	sys_ftruncate
+	.long	sys_fchmod
+	.long	sys_fchown		/* 95 */
+	.long	sys_getpriority
+	.long	sys_setpriority
+	.long	sys_wait4
+	.long	sys_statfs
+	.long	sys_fstatfs		/* 100 */
+	.long	sys_vhangup
+	.long	__sys_sigaltstack
+	.long	sys_syslog
+	.long	sys_setitimer
+	.long	sys_getitimer		/* 105 */
+	.long	sys_swapoff
+	.long	sys_sysinfo
+	.long	sys_ipc
+	.long	sys_sendfile
+	.long	sys_setdomainname	/* 110 */
+	.long	sys_newuname
+	.long	sys_adjtimex
+	.long	sys_mprotect
+	.long	__sys_vfork
+	.long	sys_init_module		/* 115 */
+	.long	sys_delete_module
+	.long	sys_quotactl
+	.long	sys_getpgid
+	.long	sys_bdflush
+	.long	sys_sysfs		/* 120 */
+	.long	sys_personality
+	.long	sys_ni_syscall		/* reserved for afs_syscall */
+	.long	sys_getdents
+	.long	sys_flock
+	.long	sys_msync		/* 125 */
+	.long	sys_readv
+	.long	sys_writev
+	.long	sys_getsid
+	.long	sys_fdatasync
+	.long	sys_sysctl		/* 130 */
+	.long	sys_mlock
+	.long	sys_munlock
+	.long	sys_mlockall
+	.long	sys_munlockall
+	.long	sys_sched_setparam		/* 135 */
+	.long	sys_sched_getparam
+	.long	sys_sched_setscheduler
+	.long	sys_sched_getscheduler
+	.long	sys_sched_yield
+	.long	sys_sched_get_priority_max	/* 140 */
+	.long	sys_sched_get_priority_min
+	.long	sys_sched_rr_get_interval
+	.long	sys_nanosleep
+	.long	sys_poll
+	.long	sys_nfsservctl		/* 145 */
+	.long	sys_setresgid
+	.long	sys_getresgid
+	.long	sys_prctl
+	.long	sys_socket
+	.long	sys_bind		/* 150 */
+	.long	sys_connect
+	.long	sys_listen
+	.long	sys_accept
+	.long	sys_getsockname
+	.long	sys_getpeername		/* 155 */
+	.long	sys_socketpair
+	.long	sys_send
+	.long	sys_recv
+	.long	__sys_sendto
+	.long	__sys_recvfrom		/* 160 */
+	.long	sys_shutdown
+	.long	sys_setsockopt
+	.long	sys_getsockopt
+	.long	sys_sendmsg
+	.long	sys_recvmsg		/* 165 */
+	.long	sys_truncate64
+	.long	sys_ftruncate64
+	.long	sys_stat64
+	.long	sys_lstat64
+	.long	sys_fstat64		/* 170 */
+	.long	sys_pivot_root
+	.long	sys_mincore
+	.long	sys_madvise
+	.long	sys_getdents64
+	.long	sys_fcntl64		/* 175 */
+	.long	sys_gettid
+	.long	sys_readahead
+	.long	sys_setxattr
+	.long	sys_lsetxattr
+	.long	sys_fsetxattr		/* 180 */
+	.long	sys_getxattr
+	.long	sys_lgetxattr
+	.long	sys_fgetxattr
+	.long	sys_listxattr
+	.long	sys_llistxattr		/* 185 */
+	.long	sys_flistxattr
+	.long	sys_removexattr
+	.long	sys_lremovexattr
+	.long	sys_fremovexattr
+	.long	sys_tkill		/* 190 */
+	.long	sys_sendfile64
+	.long	sys_futex
+	.long	sys_sched_setaffinity
+	.long	sys_sched_getaffinity
+	.long	sys_capget		/* 195 */
+	.long	sys_capset
+	.long	sys_io_setup
+	.long	sys_io_destroy
+	.long	sys_io_getevents
+	.long	sys_io_submit		/* 200 */
+	.long	sys_io_cancel
+	.long	sys_fadvise64
+	.long	sys_exit_group
+	.long	sys_lookup_dcookie
+	.long	sys_epoll_create	/* 205 */
+	.long	sys_epoll_ctl
+	.long	sys_epoll_wait
+	.long	sys_remap_file_pages
+	.long	sys_set_tid_address
+	.long	sys_timer_create	/* 210 */
+	.long	sys_timer_settime
+	.long	sys_timer_gettime
+	.long	sys_timer_getoverrun
+	.long	sys_timer_delete
+	.long	sys_clock_settime	/* 215 */
+	.long	sys_clock_gettime
+	.long	sys_clock_getres
+	.long	sys_clock_nanosleep
+	.long	sys_statfs64
+	.long	sys_fstatfs64		/* 220 */
+	.long	sys_tgkill
+	.long	sys_ni_syscall		/* reserved for TUX */
+	.long	sys_utimes
+	.long	sys_fadvise64_64
+	.long	sys_cacheflush		/* 225 */
+	.long	sys_ni_syscall		/* sys_vserver */
+	.long	sys_mq_open
+	.long	sys_mq_unlink
+	.long	sys_mq_timedsend
+	.long	sys_mq_timedreceive	/* 230 */
+	.long	sys_mq_notify
+	.long	sys_mq_getsetattr
+	.long	sys_kexec_load
+	.long	sys_waitid
+	.long	sys_add_key		/* 235 */
+	.long	sys_request_key
+	.long	sys_keyctl
+	.long	sys_ioprio_set
+	.long	sys_ioprio_get
+	.long	sys_inotify_init	/* 240 */
+	.long	sys_inotify_add_watch
+	.long	sys_inotify_rm_watch
+	.long	sys_openat
+	.long	sys_mkdirat
+	.long	sys_mknodat		/* 245 */
+	.long	sys_fchownat
+	.long	sys_futimesat
+	.long	sys_fstatat64
+	.long	sys_unlinkat
+	.long	sys_renameat		/* 250 */
+	.long	sys_linkat
+	.long	sys_symlinkat
+	.long	sys_readlinkat
+	.long	sys_fchmodat
+	.long	sys_faccessat		/* 255 */
+	.long	__sys_pselect6
+	.long	sys_ppoll
+	.long	sys_unshare
+	.long	sys_set_robust_list
+	.long	sys_get_robust_list	/* 260 */
+	.long	__sys_splice
+	.long	sys_sync_file_range
+	.long	sys_tee
+	.long	sys_vmsplice
+	.long	sys_ni_syscall		/* r8 is saturated at nr_syscalls */
diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c
new file mode 100644
index 000000000000..b0e6b5855a38
--- /dev/null
+++ b/arch/avr32/kernel/time.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on MIPS implementation arch/mips/kernel/time.c
+ *   Copyright 2001 MontaVista Software Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/clocksource.h>
+#include <linux/time.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel_stat.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/profile.h>
+#include <linux/sysdev.h>
+
+#include <asm/div64.h>
+#include <asm/sysreg.h>
+#include <asm/io.h>
+#include <asm/sections.h>
+
+static cycle_t read_cycle_count(void)
+{
+	return (cycle_t)sysreg_read(COUNT);
+}
+
+static struct clocksource clocksource_avr32 = {
+	.name		= "avr32",
+	.rating		= 350,
+	.read		= read_cycle_count,
+	.mask		= CLOCKSOURCE_MASK(32),
+	.shift		= 16,
+	.is_continuous	= 1,
+};
+
+/*
+ * By default we provide the null RTC ops
+ */
+static unsigned long null_rtc_get_time(void)
+{
+	return mktime(2004, 1, 1, 0, 0, 0);
+}
+
+static int null_rtc_set_time(unsigned long sec)
+{
+	return 0;
+}
+
+static unsigned long (*rtc_get_time)(void) = null_rtc_get_time;
+static int (*rtc_set_time)(unsigned long) = null_rtc_set_time;
+
+/* how many counter cycles in a jiffy? */
+static unsigned long cycles_per_jiffy;
+
+/* cycle counter value at the previous timer interrupt */
+static unsigned int timerhi, timerlo;
+
+/* the count value for the next timer interrupt */
+static unsigned int expirelo;
+
+static void avr32_timer_ack(void)
+{
+	unsigned int count;
+
+	/* Ack this timer interrupt and set the next one */
+	expirelo += cycles_per_jiffy;
+	if (expirelo == 0) {
+		printk(KERN_DEBUG "expirelo == 0\n");
+		sysreg_write(COMPARE, expirelo + 1);
+	} else {
+		sysreg_write(COMPARE, expirelo);
+	}
+
+	/* Check to see if we have missed any timer interrupts */
+	count = sysreg_read(COUNT);
+	if ((count - expirelo) < 0x7fffffff) {
+		expirelo = count + cycles_per_jiffy;
+		sysreg_write(COMPARE, expirelo);
+	}
+}
+
+static unsigned int avr32_hpt_read(void)
+{
+	return sysreg_read(COUNT);
+}
+
+/*
+ * Taken from MIPS c0_hpt_timer_init().
+ *
+ * Why is it so complicated, and what is "count"?  My assumption is
+ * that `count' specifies the "reference cycle", i.e. the cycle since
+ * reset that should mean "zero". The reason COUNT is written twice is
+ * probably to make sure we don't get any timer interrupts while we
+ * are messing with the counter.
+ */
+static void avr32_hpt_init(unsigned int count)
+{
+	count = sysreg_read(COUNT) - count;
+	expirelo = (count / cycles_per_jiffy + 1) * cycles_per_jiffy;
+	sysreg_write(COUNT, expirelo - cycles_per_jiffy);
+	sysreg_write(COMPARE, expirelo);
+	sysreg_write(COUNT, count);
+}
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ */
+unsigned long long sched_clock(void)
+{
+	/* There must be better ways...? */
+	return (unsigned long long)jiffies * (1000000000 / HZ);
+}
+
+/*
+ * local_timer_interrupt() does profiling and process accounting on a
+ * per-CPU basis.
+ *
+ * In UP mode, it is invoked from the (global) timer_interrupt.
+ */
+static void local_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	if (current->pid)
+		profile_tick(CPU_PROFILING, regs);
+	update_process_times(user_mode(regs));
+}
+
+static irqreturn_t
+timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	unsigned int count;
+
+	/* ack timer interrupt and try to set next interrupt */
+	count = avr32_hpt_read();
+	avr32_timer_ack();
+
+	/* Update timerhi/timerlo for intra-jiffy calibration */
+	timerhi += count < timerlo;	/* Wrap around */
+	timerlo = count;
+
+	/*
+	 * Call the generic timer interrupt handler
+	 */
+	write_seqlock(&xtime_lock);
+	do_timer(regs);
+	write_sequnlock(&xtime_lock);
+
+	/*
+	 * In UP mode, we call local_timer_interrupt() to do profiling
+	 * and process accounting.
+	 *
+	 * SMP is not supported yet.
+	 */
+	local_timer_interrupt(irq, dev_id, regs);
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction timer_irqaction = {
+	.handler	= timer_interrupt,
+	.flags		= IRQF_DISABLED,
+	.name		= "timer",
+};
+
+void __init time_init(void)
+{
+	unsigned long mult, shift, count_hz;
+	int ret;
+
+	xtime.tv_sec = rtc_get_time();
+	xtime.tv_nsec = 0;
+
+	set_normalized_timespec(&wall_to_monotonic,
+				-xtime.tv_sec, -xtime.tv_nsec);
+
+	printk("Before time_init: count=%08lx, compare=%08lx\n",
+	       (unsigned long)sysreg_read(COUNT),
+	       (unsigned long)sysreg_read(COMPARE));
+
+	count_hz = clk_get_rate(boot_cpu_data.clk);
+	shift = clocksource_avr32.shift;
+	mult = clocksource_hz2mult(count_hz, shift);
+	clocksource_avr32.mult = mult;
+
+	printk("Cycle counter: mult=%lu, shift=%lu\n", mult, shift);
+
+	{
+		u64 tmp;
+
+		tmp = TICK_NSEC;
+		tmp <<= shift;
+		tmp += mult / 2;
+		do_div(tmp, mult);
+
+		cycles_per_jiffy = tmp;
+	}
+
+	/* This sets up the high precision timer for the first interrupt. */
+	avr32_hpt_init(avr32_hpt_read());
+
+	printk("After time_init: count=%08lx, compare=%08lx\n",
+	       (unsigned long)sysreg_read(COUNT),
+	       (unsigned long)sysreg_read(COMPARE));
+
+	ret = clocksource_register(&clocksource_avr32);
+	if (ret)
+		printk(KERN_ERR
+		       "timer: could not register clocksource: %d\n", ret);
+
+	ret = setup_irq(0, &timer_irqaction);
+	if (ret)
+		printk("timer: could not request IRQ 0: %d\n", ret);
+}
+
+static struct sysdev_class timer_class = {
+	set_kset_name("timer"),
+};
+
+static struct sys_device timer_device = {
+	.id	= 0,
+	.cls	= &timer_class,
+};
+
+static int __init init_timer_sysfs(void)
+{
+	int err = sysdev_class_register(&timer_class);
+	if (!err)
+		err = sysdev_register(&timer_device);
+	return err;
+}
+
+device_initcall(init_timer_sysfs);
diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c
new file mode 100644
index 000000000000..7e803f4d7a12
--- /dev/null
+++ b/arch/avr32/kernel/traps.c
@@ -0,0 +1,425 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#undef DEBUG
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/notifier.h>
+
+#include <asm/traps.h>
+#include <asm/sysreg.h>
+#include <asm/addrspace.h>
+#include <asm/ocd.h>
+#include <asm/mmu_context.h>
+#include <asm/uaccess.h>
+
+static void dump_mem(const char *str, unsigned long bottom, unsigned long top)
+{
+	unsigned long p;
+	int i;
+
+	printk("%s(0x%08lx to 0x%08lx)\n", str, bottom, top);
+
+	for (p = bottom & ~31; p < top; ) {
+		printk("%04lx: ", p & 0xffff);
+
+		for (i = 0; i < 8; i++, p += 4) {
+			unsigned int val;
+
+			if (p < bottom || p >= top)
+				printk("         ");
+			else {
+				if (__get_user(val, (unsigned int __user *)p)) {
+					printk("\n");
+					goto out;
+				}
+				printk("%08x ", val);
+			}
+		}
+		printk("\n");
+	}
+
+out:
+	return;
+}
+
+#ifdef CONFIG_FRAME_POINTER
+static inline void __show_trace(struct task_struct *tsk, unsigned long *sp,
+				struct pt_regs *regs)
+{
+	unsigned long __user *fp;
+	unsigned long __user *last_fp = NULL;
+
+	if (regs) {
+		fp = (unsigned long __user *)regs->r7;
+	} else if (tsk == current) {
+		register unsigned long __user *real_fp __asm__("r7");
+		fp = real_fp;
+	} else {
+		fp = (unsigned long __user *)tsk->thread.cpu_context.r7;
+	}
+
+	/*
+	 * Walk the stack until (a) we get an exception, (b) the frame
+	 * pointer becomes zero, or (c) the frame pointer gets stuck
+	 * at the same value.
+	 */
+	while (fp && fp != last_fp) {
+		unsigned long lr, new_fp = 0;
+
+		last_fp = fp;
+		if (__get_user(lr, fp))
+			break;
+		if (fp && __get_user(new_fp, fp + 1))
+			break;
+		fp = (unsigned long __user *)new_fp;
+
+		printk(" [<%08lx>] ", lr);
+		print_symbol("%s\n", lr);
+	}
+	printk("\n");
+}
+#else
+static inline void __show_trace(struct task_struct *tsk, unsigned long *sp,
+				struct pt_regs *regs)
+{
+	unsigned long addr;
+
+	while (!kstack_end(sp)) {
+		addr = *sp++;
+		if (kernel_text_address(addr)) {
+			printk(" [<%08lx>] ", addr);
+			print_symbol("%s\n", addr);
+		}
+	}
+}
+#endif
+
+void show_trace(struct task_struct *tsk, unsigned long *sp,
+		       struct pt_regs *regs)
+{
+	if (regs &&
+	    (((regs->sr & MODE_MASK) == MODE_EXCEPTION) ||
+	     ((regs->sr & MODE_MASK) == MODE_USER)))
+		return;
+
+	printk ("Call trace:");
+#ifdef CONFIG_KALLSYMS
+	printk("\n");
+#endif
+
+	__show_trace(tsk, sp, regs);
+	printk("\n");
+}
+
+void show_stack(struct task_struct *tsk, unsigned long *sp)
+{
+	unsigned long stack;
+
+	if (!tsk)
+		tsk = current;
+	if (sp == 0) {
+		if (tsk == current) {
+			register unsigned long *real_sp __asm__("sp");
+			sp = real_sp;
+		} else {
+			sp = (unsigned long *)tsk->thread.cpu_context.ksp;
+		}
+	}
+
+	stack = (unsigned long)sp;
+	dump_mem("Stack: ", stack,
+		 THREAD_SIZE + (unsigned long)tsk->thread_info);
+	show_trace(tsk, sp, NULL);
+}
+
+void dump_stack(void)
+{
+	show_stack(NULL, NULL);
+}
+EXPORT_SYMBOL(dump_stack);
+
+ATOMIC_NOTIFIER_HEAD(avr32_die_chain);
+
+int register_die_notifier(struct notifier_block *nb)
+{
+	pr_debug("register_die_notifier: %p\n", nb);
+
+	return atomic_notifier_chain_register(&avr32_die_chain, nb);
+}
+EXPORT_SYMBOL(register_die_notifier);
+
+int unregister_die_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&avr32_die_chain, nb);
+}
+EXPORT_SYMBOL(unregister_die_notifier);
+
+static DEFINE_SPINLOCK(die_lock);
+
+void __die(const char *str, struct pt_regs *regs, unsigned long err,
+	   const char *file, const char *func, unsigned long line)
+{
+	struct task_struct *tsk = current;
+	static int die_counter;
+
+	console_verbose();
+	spin_lock_irq(&die_lock);
+	bust_spinlocks(1);
+
+	printk(KERN_ALERT "%s", str);
+	if (file && func)
+		printk(" in %s:%s, line %ld", file, func, line);
+	printk("[#%d]:\n", ++die_counter);
+	print_modules();
+	show_regs(regs);
+	printk("Process %s (pid: %d, stack limit = 0x%p)\n",
+	       tsk->comm, tsk->pid, tsk->thread_info + 1);
+
+	if (!user_mode(regs) || in_interrupt()) {
+		dump_mem("Stack: ", regs->sp,
+			 THREAD_SIZE + (unsigned long)tsk->thread_info);
+	}
+
+	bust_spinlocks(0);
+	spin_unlock_irq(&die_lock);
+	do_exit(SIGSEGV);
+}
+
+void __die_if_kernel(const char *str, struct pt_regs *regs, unsigned long err,
+		     const char *file, const char *func, unsigned long line)
+{
+	if (!user_mode(regs))
+		__die(str, regs, err, file, func, line);
+}
+
+asmlinkage void do_nmi(unsigned long ecr, struct pt_regs *regs)
+{
+#ifdef CONFIG_SUBARCH_AVR32B
+	/*
+	 * The exception entry always saves RSR_EX. For NMI, this is
+	 * wrong; it should be RSR_NMI
+	 */
+	regs->sr = sysreg_read(RSR_NMI);
+#endif
+
+	printk("NMI taken!!!!\n");
+	die("NMI", regs, ecr);
+	BUG();
+}
+
+asmlinkage void do_critical_exception(unsigned long ecr, struct pt_regs *regs)
+{
+	printk("Unable to handle critical exception %lu at pc = %08lx!\n",
+	       ecr, regs->pc);
+	die("Oops", regs, ecr);
+	BUG();
+}
+
+asmlinkage void do_address_exception(unsigned long ecr, struct pt_regs *regs)
+{
+	siginfo_t info;
+
+	die_if_kernel("Oops: Address exception in kernel mode", regs, ecr);
+
+#ifdef DEBUG
+	if (ecr == ECR_ADDR_ALIGN_X)
+		pr_debug("Instruction Address Exception at pc = %08lx\n",
+			 regs->pc);
+	else if (ecr == ECR_ADDR_ALIGN_R)
+		pr_debug("Data Address Exception (Read) at pc = %08lx\n",
+			 regs->pc);
+	else if (ecr == ECR_ADDR_ALIGN_W)
+		pr_debug("Data Address Exception (Write) at pc = %08lx\n",
+			 regs->pc);
+	else
+		BUG();
+
+	show_regs(regs);
+#endif
+
+	info.si_signo = SIGBUS;
+	info.si_errno = 0;
+	info.si_code = BUS_ADRALN;
+	info.si_addr = (void __user *)regs->pc;
+
+	force_sig_info(SIGBUS, &info, current);
+}
+
+/* This way of handling undefined instructions is stolen from ARM */
+static LIST_HEAD(undef_hook);
+static spinlock_t undef_lock = SPIN_LOCK_UNLOCKED;
+
+void register_undef_hook(struct undef_hook *hook)
+{
+	spin_lock_irq(&undef_lock);
+	list_add(&hook->node, &undef_hook);
+	spin_unlock_irq(&undef_lock);
+}
+
+void unregister_undef_hook(struct undef_hook *hook)
+{
+	spin_lock_irq(&undef_lock);
+	list_del(&hook->node);
+	spin_unlock_irq(&undef_lock);
+}
+
+static int do_cop_absent(u32 insn)
+{
+	int cop_nr;
+	u32 cpucr;
+	if ( (insn & 0xfdf00000) == 0xf1900000 )
+		/* LDC0 */
+		cop_nr = 0;
+	else
+		cop_nr = (insn >> 13) & 0x7;
+
+	/* Try enabling the coprocessor */
+	cpucr = sysreg_read(CPUCR);
+	cpucr |= (1 << (24 + cop_nr));
+	sysreg_write(CPUCR, cpucr);
+
+	cpucr = sysreg_read(CPUCR);
+	if ( !(cpucr & (1 << (24 + cop_nr))) ){
+		printk("Coprocessor #%i not found!\n", cop_nr);
+		return -1;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_BUG
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+static inline void do_bug_verbose(struct pt_regs *regs, u32 insn)
+{
+	char *file;
+	u16 line;
+	char c;
+
+	if (__get_user(line, (u16 __user *)(regs->pc + 2)))
+		return;
+	if (__get_user(file, (char * __user *)(regs->pc + 4))
+	    || (unsigned long)file < PAGE_OFFSET
+	    || __get_user(c, file))
+		file = "<bad filename>";
+
+	printk(KERN_ALERT "kernel BUG at %s:%d!\n", file, line);
+}
+#else
+static inline void do_bug_verbose(struct pt_regs *regs, u32 insn)
+{
+
+}
+#endif
+#endif
+
+asmlinkage void do_illegal_opcode(unsigned long ecr, struct pt_regs *regs)
+{
+	u32 insn;
+	struct undef_hook *hook;
+	siginfo_t info;
+	void __user *pc;
+
+	if (!user_mode(regs))
+		goto kernel_trap;
+
+	local_irq_enable();
+
+	pc = (void __user *)instruction_pointer(regs);
+	if (__get_user(insn, (u32 __user *)pc))
+		goto invalid_area;
+
+        if (ecr == ECR_COPROC_ABSENT) {
+		if (do_cop_absent(insn) == 0)
+			return;
+        }
+
+	spin_lock_irq(&undef_lock);
+	list_for_each_entry(hook, &undef_hook, node) {
+		if ((insn & hook->insn_mask) == hook->insn_val) {
+			if (hook->fn(regs, insn) == 0) {
+				spin_unlock_irq(&undef_lock);
+				return;
+			}
+		}
+	}
+	spin_unlock_irq(&undef_lock);
+
+invalid_area:
+
+#ifdef DEBUG
+	printk("Illegal instruction at pc = %08lx\n", regs->pc);
+	if (regs->pc < TASK_SIZE) {
+		unsigned long ptbr, pgd, pte, *p;
+
+		ptbr = sysreg_read(PTBR);
+		p = (unsigned long *)ptbr;
+		pgd = p[regs->pc >> 22];
+		p = (unsigned long *)((pgd & 0x1ffff000) | 0x80000000);
+		pte = p[(regs->pc >> 12) & 0x3ff];
+		printk("page table: 0x%08lx -> 0x%08lx -> 0x%08lx\n", ptbr, pgd, pte);
+	}
+#endif
+
+	info.si_signo = SIGILL;
+	info.si_errno = 0;
+	info.si_addr = (void __user *)regs->pc;
+	switch (ecr) {
+	case ECR_ILLEGAL_OPCODE:
+	case ECR_UNIMPL_INSTRUCTION:
+		info.si_code = ILL_ILLOPC;
+		break;
+	case ECR_PRIVILEGE_VIOLATION:
+		info.si_code = ILL_PRVOPC;
+		break;
+	case ECR_COPROC_ABSENT:
+		info.si_code = ILL_COPROC;
+		break;
+	default:
+		BUG();
+	}
+
+	force_sig_info(SIGILL, &info, current);
+	return;
+
+kernel_trap:
+#ifdef CONFIG_BUG
+	if (__kernel_text_address(instruction_pointer(regs))) {
+		insn = *(u16 *)instruction_pointer(regs);
+		if (insn == AVR32_BUG_OPCODE) {
+			do_bug_verbose(regs, insn);
+			die("Kernel BUG", regs, 0);
+			return;
+		}
+	}
+#endif
+
+	die("Oops: Illegal instruction in kernel code", regs, ecr);
+}
+
+asmlinkage void do_fpe(unsigned long ecr, struct pt_regs *regs)
+{
+	siginfo_t info;
+
+	printk("Floating-point exception at pc = %08lx\n", regs->pc);
+
+	/* We have no FPU... */
+	info.si_signo = SIGILL;
+	info.si_errno = 0;
+	info.si_addr = (void __user *)regs->pc;
+	info.si_code = ILL_COPROC;
+
+	force_sig_info(SIGILL, &info, current);
+}
+
+
+void __init trap_init(void)
+{
+
+}
diff --git a/arch/avr32/kernel/vmlinux.lds.c b/arch/avr32/kernel/vmlinux.lds.c
new file mode 100644
index 000000000000..cdd627c6b7dc
--- /dev/null
+++ b/arch/avr32/kernel/vmlinux.lds.c
@@ -0,0 +1,139 @@
+/*
+ * AVR32 linker script for the Linux kernel
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define LOAD_OFFSET 0x00000000
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_FORMAT("elf32-avr32", "elf32-avr32", "elf32-avr32")
+OUTPUT_ARCH(avr32)
+ENTRY(_start)
+
+/* Big endian */
+jiffies = jiffies_64 + 4;
+
+SECTIONS
+{
+	. = CONFIG_ENTRY_ADDRESS;
+	.init		: AT(ADDR(.init) - LOAD_OFFSET) {
+		_stext = .;
+		__init_begin = .;
+			_sinittext = .;
+			*(.text.reset)
+			*(.init.text)
+			_einittext = .;
+		. = ALIGN(4);
+		__tagtable_begin = .;
+			*(.taglist)
+		__tagtable_end = .;
+			*(.init.data)
+		. = ALIGN(16);
+		__setup_start = .;
+			*(.init.setup)
+		__setup_end = .;
+		. = ALIGN(4);
+		__initcall_start = .;
+			*(.initcall1.init)
+			*(.initcall2.init)
+			*(.initcall3.init)
+			*(.initcall4.init)
+			*(.initcall5.init)
+			*(.initcall6.init)
+			*(.initcall7.init)
+		__initcall_end = .;
+		__con_initcall_start = .;
+			*(.con_initcall.init)
+		__con_initcall_end = .;
+		__security_initcall_start = .;
+			*(.security_initcall.init)
+		__security_initcall_end = .;
+		. = ALIGN(32);
+		__initramfs_start = .;
+			*(.init.ramfs)
+		__initramfs_end = .;
+		. = ALIGN(4096);
+		__init_end = .;
+	}
+
+	. = ALIGN(8192);
+	.text		: AT(ADDR(.text) - LOAD_OFFSET) {
+		_evba = .;
+		_text = .;
+		*(.ex.text)
+		. = 0x50;
+		*(.tlbx.ex.text)
+		. = 0x60;
+		*(.tlbr.ex.text)
+		. = 0x70;
+		*(.tlbw.ex.text)
+		. = 0x100;
+		*(.scall.text)
+		*(.irq.text)
+		*(.text)
+		SCHED_TEXT
+		LOCK_TEXT
+		KPROBES_TEXT
+		*(.fixup)
+		*(.gnu.warning)
+		_etext = .;
+	} = 0xd703d703
+
+	. = ALIGN(4);
+	__ex_table	: AT(ADDR(__ex_table) - LOAD_OFFSET) {
+		__start___ex_table = .;
+		*(__ex_table)
+		__stop___ex_table = .;
+	}
+
+	RODATA
+
+	. = ALIGN(8192);
+
+	.data		: AT(ADDR(.data) - LOAD_OFFSET) {
+		_data = .;
+		_sdata = .;
+		/*
+		 * First, the init task union, aligned to an 8K boundary.
+		 */
+		*(.data.init_task)
+
+		/* Then, the cacheline aligned data */
+		. = ALIGN(32);
+		*(.data.cacheline_aligned)
+
+		/* And the rest... */
+		*(.data.rel*)
+		*(.data)
+		CONSTRUCTORS
+
+		_edata = .;
+	}
+
+
+	. = ALIGN(8);
+	.bss    	: AT(ADDR(.bss) - LOAD_OFFSET) {
+		__bss_start = .;
+		*(.bss)
+		*(COMMON)
+		. = ALIGN(8);
+		__bss_stop = .;
+		_end = .;
+	}
+
+	/* When something in the kernel is NOT compiled as a module, the module
+	 * cleanup code and data are put into these segments. Both can then be
+	 * thrown away, as cleanup code is never called unless it's a module.
+	 */
+	/DISCARD/       	: {
+		*(.exit.text)
+		*(.exit.data)
+		*(.exitcall.exit)
+	}
+
+	DWARF_DEBUG
+}
diff --git a/arch/avr32/lib/Makefile b/arch/avr32/lib/Makefile
new file mode 100644
index 000000000000..09ac43e40522
--- /dev/null
+++ b/arch/avr32/lib/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile for AVR32-specific library files
+#
+
+lib-y	:= copy_user.o clear_user.o
+lib-y	+= strncpy_from_user.o strnlen_user.o
+lib-y	+= delay.o memset.o memcpy.o findbit.o
+lib-y	+= csum_partial.o csum_partial_copy_generic.o
+lib-y	+= io-readsw.o io-readsl.o io-writesw.o io-writesl.o
+lib-y	+= __avr32_lsl64.o __avr32_lsr64.o __avr32_asr64.o
diff --git a/arch/avr32/lib/__avr32_asr64.S b/arch/avr32/lib/__avr32_asr64.S
new file mode 100644
index 000000000000..368b6bca4c76
--- /dev/null
+++ b/arch/avr32/lib/__avr32_asr64.S
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+	/*
+	 * DWtype __avr32_asr64(DWtype u, word_type b)
+	 */
+	.text
+	.global	__avr32_asr64
+	.type	__avr32_asr64,@function
+__avr32_asr64:
+	cp.w	r12, 0
+	reteq	r12
+
+	rsub	r9, r12, 32
+	brle	1f
+
+	lsl	r8, r11, r9
+	lsr	r10, r10, r12
+	asr	r11, r11, r12
+	or	r10, r8
+	retal	r12
+
+1:	neg	r9
+	asr	r10, r11, r9
+	asr	r11, 31
+	retal	r12
diff --git a/arch/avr32/lib/__avr32_lsl64.S b/arch/avr32/lib/__avr32_lsl64.S
new file mode 100644
index 000000000000..f1dbc2b36257
--- /dev/null
+++ b/arch/avr32/lib/__avr32_lsl64.S
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+	/*
+	 * DWtype __avr32_lsl64(DWtype u, word_type b)
+	 */
+	.text
+	.global	__avr32_lsl64
+	.type	__avr32_lsl64,@function
+__avr32_lsl64:
+	cp.w	r12, 0
+	reteq	r12
+
+	rsub	r9, r12, 32
+	brle	1f
+
+	lsr	r8, r10, r9
+	lsl	r10, r10, r12
+	lsl	r11, r11, r12
+	or	r11, r8
+	retal	r12
+
+1:	neg	r9
+	lsl	r11, r10, r9
+	mov	r10, 0
+	retal	r12
diff --git a/arch/avr32/lib/__avr32_lsr64.S b/arch/avr32/lib/__avr32_lsr64.S
new file mode 100644
index 000000000000..e65bb7f0d24c
--- /dev/null
+++ b/arch/avr32/lib/__avr32_lsr64.S
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+	/*
+	 * DWtype __avr32_lsr64(DWtype u, word_type b)
+	 */
+	.text
+	.global	__avr32_lsr64
+	.type	__avr32_lsr64,@function
+__avr32_lsr64:
+	cp.w	r12, 0
+	reteq	r12
+
+	rsub	r9, r12, 32
+	brle	1f
+
+	lsl	r8, r11, r9
+	lsr	r11, r11, r12
+	lsr	r10, r10, r12
+	or	r10, r8
+	retal	r12
+
+1:	neg	r9
+	lsr	r10, r11, r9
+	mov	r11, 0
+	retal	r12
diff --git a/arch/avr32/lib/clear_user.S b/arch/avr32/lib/clear_user.S
new file mode 100644
index 000000000000..d8991b6f8eb7
--- /dev/null
+++ b/arch/avr32/lib/clear_user.S
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/asm.h>
+
+	.text
+	.align	1
+	.global	clear_user
+	.type	clear_user, "function"
+clear_user:
+	branch_if_kernel r8, __clear_user
+	ret_if_privileged r8, r12, r11, r11
+
+	.global	__clear_user
+	.type	__clear_user, "function"
+__clear_user:
+	mov	r9, r12
+	mov	r8, 0
+	andl	r9, 3, COH
+	brne	5f
+
+1:	sub	r11, 4
+	brlt	2f
+
+10:	st.w	r12++, r8
+	sub	r11, 4
+	brge	10b
+
+2:	sub	r11, -4
+	reteq	0
+
+	/* Unaligned count or address */
+	bld	r11, 1
+	brcc	12f
+11:	st.h	r12++, r8
+	sub	r11, 2
+	reteq	0
+12:	st.b	r12++, r8
+	retal	0
+
+	/* Unaligned address */
+5:	cp.w	r11, 4
+	brlt	2b
+
+	lsl	r9, 2
+	add	pc, pc, r9
+13:	st.b	r12++, r8
+	sub	r11, 1
+14:	st.b	r12++, r8
+	sub	r11, 1
+15:	st.b	r12++, r8
+	sub	r11, 1
+	rjmp	1b
+
+	.size	clear_user, . - clear_user
+	.size	__clear_user, . - __clear_user
+
+	.section .fixup, "ax"
+	.align	1
+18:	sub	r11, -4
+19:	retal	r11
+
+	.section __ex_table, "a"
+	.align	2
+	.long	10b, 18b
+	.long	11b, 19b
+	.long	12b, 19b
+	.long	13b, 19b
+	.long	14b, 19b
+	.long	15b, 19b
diff --git a/arch/avr32/lib/copy_user.S b/arch/avr32/lib/copy_user.S
new file mode 100644
index 000000000000..ea59c04b07de
--- /dev/null
+++ b/arch/avr32/lib/copy_user.S
@@ -0,0 +1,119 @@
+/*
+ * Copy to/from userspace with optional address space checking.
+ *
+ * Copyright 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/asm.h>
+
+	/*
+	 * __kernel_size_t
+	 * __copy_user(void *to, const void *from, __kernel_size_t n)
+	 *
+	 * Returns the number of bytes not copied. Might be off by
+	 * max 3 bytes if we get a fault in the main loop.
+	 *
+	 * The address-space checking functions simply fall through to
+	 * the non-checking version.
+	 */
+	.text
+	.align	1
+	.global	copy_from_user
+	.type	copy_from_user, @function
+copy_from_user:
+	branch_if_kernel r8, __copy_user
+	ret_if_privileged r8, r11, r10, r10
+	rjmp	__copy_user
+	.size	copy_from_user, . - copy_from_user
+
+	.global	copy_to_user
+	.type	copy_to_user, @function
+copy_to_user:
+	branch_if_kernel r8, __copy_user
+	ret_if_privileged r8, r12, r10, r10
+	.size	copy_to_user, . - copy_to_user
+
+	.global	__copy_user
+	.type	__copy_user, @function
+__copy_user:
+	mov	r9, r11
+	andl	r9, 3, COH
+	brne	6f
+
+	/* At this point, from is word-aligned */
+1:	sub	r10, 4
+	brlt	3f
+
+2:
+10:	ld.w	r8, r11++
+11:	st.w	r12++, r8
+	sub	r10, 4
+	brge	2b
+
+3:	sub	r10, -4
+	reteq	0
+
+	/*
+	 * Handle unaligned count. Need to be careful with r10 here so
+	 * that we return the correct value even if we get a fault
+	 */
+4:
+20:	ld.ub	r8, r11++
+21:	st.b	r12++, r8
+	sub	r10, 1
+	reteq	0
+22:	ld.ub	r8, r11++
+23:	st.b	r12++, r8
+	sub	r10, 1
+	reteq	0
+24:	ld.ub	r8, r11++
+25:	st.b	r12++, r8
+	retal	0
+
+	/* Handle unaligned from-pointer */
+6:	cp.w	r10, 4
+	brlt	4b
+	rsub	r9, r9, 4
+
+30:	ld.ub	r8, r11++
+31:	st.b	r12++, r8
+	sub	r10, 1
+	sub	r9, 1
+	breq	1b
+32:	ld.ub	r8, r11++
+33:	st.b	r12++, r8
+	sub	r10, 1
+	sub	r9, 1
+	breq	1b
+34:	ld.ub	r8, r11++
+35:	st.b	r12++, r8
+	sub	r10, 1
+	rjmp	1b
+	.size	__copy_user, . - __copy_user
+
+	.section .fixup,"ax"
+	.align	1
+19:	sub	r10, -4
+29:	retal	r10
+
+	.section __ex_table,"a"
+	.align	2
+	.long	10b, 19b
+	.long	11b, 19b
+	.long	20b, 29b
+	.long	21b, 29b
+	.long	22b, 29b
+	.long	23b, 29b
+	.long	24b, 29b
+	.long	25b, 29b
+	.long	30b, 29b
+	.long	31b, 29b
+	.long	32b, 29b
+	.long	33b, 29b
+	.long	34b, 29b
+	.long	35b, 29b
diff --git a/arch/avr32/lib/csum_partial.S b/arch/avr32/lib/csum_partial.S
new file mode 100644
index 000000000000..6a262b528eb7
--- /dev/null
+++ b/arch/avr32/lib/csum_partial.S
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+	/*
+	 * unsigned int csum_partial(const unsigned char *buff,
+	 *			     int len, unsigned int sum)
+	 */
+	.text
+	.global	csum_partial
+	.type	csum_partial,"function"
+	.align	1
+csum_partial:
+	/* checksum complete words, aligned or not */
+3:	sub	r11, 4
+	brlt	5f
+4:	ld.w	r9, r12++
+	add	r10, r9
+	acr	r10
+	sub	r11, 4
+	brge	4b
+
+	/* return if we had a whole number of words */
+5:	sub	r11, -4
+	reteq	r10
+
+	/* checksum any remaining bytes at the end */
+	mov	r9, 0
+	mov	r8, 0
+	cp	r11, 2
+	brlt	6f
+	ld.uh	r9, r12++
+	sub	r11, 2
+	breq	7f
+	lsl	r9, 16
+6:	ld.ub	r8, r12++
+	lsl	r8, 8
+7:	or	r9, r8
+	add	r10, r9
+	acr	r10
+
+	retal	r10
+	.size	csum_partial, . - csum_partial
diff --git a/arch/avr32/lib/csum_partial_copy_generic.S b/arch/avr32/lib/csum_partial_copy_generic.S
new file mode 100644
index 000000000000..a3a0f9b8929c
--- /dev/null
+++ b/arch/avr32/lib/csum_partial_copy_generic.S
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <asm/errno.h>
+#include <asm/asm.h>
+
+	/*
+	 * unsigned int csum_partial_copy_generic(const char *src, char *dst, int len
+	 *					  int sum, int *src_err_ptr,
+	 *					  int *dst_err_ptr)
+	 *
+	 * Copy src to dst while checksumming, otherwise like csum_partial.
+	 */
+
+	.macro ld_src size, reg, ptr
+9999:	ld.\size \reg, \ptr
+	.section __ex_table, "a"
+	.long	9999b, fixup_ld_src
+	.previous
+	.endm
+
+	.macro st_dst size, ptr, reg
+9999:	st.\size \ptr, \reg
+	.section __ex_table, "a"
+	.long	9999b, fixup_st_dst
+	.previous
+	.endm
+
+	.text
+	.global	csum_partial_copy_generic
+	.type	csum_partial_copy_generic,"function"
+	.align	1
+csum_partial_copy_generic:
+	pushm	r4-r7,lr
+
+	/* The inner loop */
+1:	sub	r10, 4
+	brlt	5f
+2:	ld_src	w, r5, r12++
+	st_dst	w, r11++, r5
+	add	r9, r5
+	acr	r9
+	sub	r10, 4
+	brge	2b
+
+	/* return if we had a whole number of words */
+5:	sub	r10, -4
+	brne	7f
+
+6:	mov	r12, r9
+	popm	r4-r7,pc
+
+	/* handle additional bytes at the tail */
+7:	mov	r5, 0
+	mov	r4, 32
+8:	ld_src	ub, r6, r12++
+	st_dst	b, r11++, r6
+	lsl	r5, 8
+	sub	r4, 8
+	bfins	r5, r6, 0, 8
+	sub	r10, 1
+	brne	8b
+
+	lsl	r5, r5, r4
+	add	r9, r5
+	acr	r9
+	rjmp	6b
+
+	/* Exception handler */
+	.section .fixup,"ax"
+	.align	1
+fixup_ld_src:
+	mov	r9, -EFAULT
+	cp.w	r8, 0
+	breq	1f
+	st.w	r8[0], r9
+
+1:	/*
+	 * TODO: zero the complete destination - computing the rest
+	 * is too much work
+	 */
+
+	mov	r9, 0
+	rjmp	6b
+
+fixup_st_dst:
+	mov	r9, -EFAULT
+	lddsp	r8, sp[20]
+	cp.w	r8, 0
+	breq	1f
+	st.w	r8[0], r9
+1:	mov	r9, 0
+	rjmp	6b
+
+	.previous
diff --git a/arch/avr32/lib/delay.c b/arch/avr32/lib/delay.c
new file mode 100644
index 000000000000..462c8307b680
--- /dev/null
+++ b/arch/avr32/lib/delay.c
@@ -0,0 +1,55 @@
+/*
+ *      Precise Delay Loops for avr32
+ *
+ *      Copyright (C) 1993 Linus Torvalds
+ *      Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *	Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+#include <asm/delay.h>
+#include <asm/processor.h>
+#include <asm/sysreg.h>
+
+int read_current_timer(unsigned long *timer_value)
+{
+	*timer_value = sysreg_read(COUNT);
+	return 0;
+}
+
+void __delay(unsigned long loops)
+{
+	unsigned bclock, now;
+
+	bclock = sysreg_read(COUNT);
+	do {
+		now = sysreg_read(COUNT);
+	} while ((now - bclock) < loops);
+}
+
+inline void __const_udelay(unsigned long xloops)
+{
+	unsigned long long loops;
+
+	asm("mulu.d %0, %1, %2"
+	    : "=r"(loops)
+	    : "r"(current_cpu_data.loops_per_jiffy * HZ), "r"(xloops));
+	__delay(loops >> 32);
+}
+
+void __udelay(unsigned long usecs)
+{
+	__const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
+}
+
+void __ndelay(unsigned long nsecs)
+{
+	__const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
+}
diff --git a/arch/avr32/lib/findbit.S b/arch/avr32/lib/findbit.S
new file mode 100644
index 000000000000..2b4856f4bf7c
--- /dev/null
+++ b/arch/avr32/lib/findbit.S
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+
+	.text
+	/*
+	 * unsigned long find_first_zero_bit(const unsigned long *addr,
+	 *				     unsigned long size)
+	 */
+ENTRY(find_first_zero_bit)
+	cp.w	r11, 0
+	reteq	r11
+	mov	r9, r11
+1:	ld.w	r8, r12[0]
+	com	r8
+	brne	.L_found
+	sub	r12, -4
+	sub	r9, 32
+	brgt	1b
+	retal	r11
+
+	/*
+	 * unsigned long find_next_zero_bit(const unsigned long *addr,
+	 *				    unsigned long size,
+	 *				    unsigned long offset)
+	 */
+ENTRY(find_next_zero_bit)
+	lsr	r8, r10, 5
+	sub	r9, r11, r10
+	retle	r11
+
+	lsl	r8, 2
+	add	r12, r8
+	andl	r10, 31, COH
+	breq	1f
+
+	/* offset is not word-aligned. Handle the first (32 - r10) bits */
+	ld.w	r8, r12[0]
+	com	r8
+	sub	r12, -4
+	lsr	r8, r8, r10
+	brne	.L_found
+
+	/* r9 = r9 - (32 - r10) = r9 + r10 - 32 */
+	add	r9, r10
+	sub	r9, 32
+	retle	r11
+
+	/* Main loop. offset must be word-aligned */
+1:	ld.w	r8, r12[0]
+	com	r8
+	brne	.L_found
+	sub	r12, -4
+	sub	r9, 32
+	brgt	1b
+	retal	r11
+
+	/* Common return path for when a bit is actually found. */
+.L_found:
+	brev	r8
+	clz	r10, r8
+	rsub	r9, r11
+	add	r10, r9
+
+	/* XXX: If we don't have to return exactly "size" when the bit
+	   is not found, we may drop this "min" thing */
+	min	r12, r11, r10
+	retal	r12
+
+	/*
+	 * unsigned long find_first_bit(const unsigned long *addr,
+	 *				unsigned long size)
+	 */
+ENTRY(find_first_bit)
+	cp.w	r11, 0
+	reteq	r11
+	mov	r9, r11
+1:	ld.w	r8, r12[0]
+	cp.w	r8, 0
+	brne	.L_found
+	sub	r12, -4
+	sub	r9, 32
+	brgt	1b
+	retal	r11
+
+	/*
+	 * unsigned long find_next_bit(const unsigned long *addr,
+	 *			       unsigned long size,
+	 *			       unsigned long offset)
+	 */
+ENTRY(find_next_bit)
+	lsr	r8, r10, 5
+	sub	r9, r11, r10
+	retle	r11
+
+	lsl	r8, 2
+	add	r12, r8
+	andl	r10, 31, COH
+	breq	1f
+
+	/* offset is not word-aligned. Handle the first (32 - r10) bits */
+	ld.w	r8, r12[0]
+	sub	r12, -4
+	lsr	r8, r8, r10
+	brne	.L_found
+
+	/* r9 = r9 - (32 - r10) = r9 + r10 - 32 */
+	add	r9, r10
+	sub	r9, 32
+	retle	r11
+
+	/* Main loop. offset must be word-aligned */
+1:	ld.w	r8, r12[0]
+	cp.w	r8, 0
+	brne	.L_found
+	sub	r12, -4
+	sub	r9, 32
+	brgt	1b
+	retal	r11
+
+ENTRY(generic_find_next_zero_le_bit)
+	lsr	r8, r10, 5
+	sub	r9, r11, r10
+	retle	r11
+
+	lsl	r8, 2
+	add	r12, r8
+	andl	r10, 31, COH
+	breq	1f
+
+	/* offset is not word-aligned. Handle the first (32 - r10) bits */
+	ldswp.w	r8, r12[0]
+	sub	r12, -4
+	lsr	r8, r8, r10
+	brne	.L_found
+
+	/* r9 = r9 - (32 - r10) = r9 + r10 - 32 */
+	add	r9, r10
+	sub	r9, 32
+	retle	r11
+
+	/* Main loop. offset must be word-aligned */
+1:	ldswp.w	r8, r12[0]
+	cp.w	r8, 0
+	brne	.L_found
+	sub	r12, -4
+	sub	r9, 32
+	brgt	1b
+	retal	r11
diff --git a/arch/avr32/lib/io-readsl.S b/arch/avr32/lib/io-readsl.S
new file mode 100644
index 000000000000..b103511ed6c4
--- /dev/null
+++ b/arch/avr32/lib/io-readsl.S
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+	.global	__raw_readsl
+	.type	__raw_readsl,@function
+__raw_readsl:
+	cp.w	r10, 0
+	reteq	r12
+
+	/*
+	 * If r11 isn't properly aligned, we might get an exception on
+	 * some implementations. But there's not much we can do about it.
+	 */
+1:	ld.w	r8, r12[0]
+	sub	r10, 1
+	st.w	r11++, r8
+	brne	1b
+
+	retal	r12
diff --git a/arch/avr32/lib/io-readsw.S b/arch/avr32/lib/io-readsw.S
new file mode 100644
index 000000000000..456be9909027
--- /dev/null
+++ b/arch/avr32/lib/io-readsw.S
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+.Lnot_word_aligned:
+	/*
+	 * Bad alignment will cause a hardware exception, which is as
+	 * good as anything. No need for us to check for proper alignment.
+	 */
+	ld.uh	r8, r12[0]
+	sub	r10, 1
+	st.h	r11++, r8
+
+	/* fall through */
+
+	.global	__raw_readsw
+	.type	__raw_readsw,@function
+__raw_readsw:
+	cp.w	r10, 0
+	reteq	r12
+	mov	r9, 3
+	tst	r11, r9
+	brne	.Lnot_word_aligned
+
+	sub	r10, 2
+	brlt	2f
+
+1:	ldins.h	r8:t, r12[0]
+	ldins.h	r8:b, r12[0]
+	st.w	r11++, r8
+	sub	r10, 2
+	brge	1b
+
+2:	sub	r10, -2
+	reteq	r12
+
+	ld.uh	r8, r12[0]
+	st.h	r11++, r8
+	retal	r12
diff --git a/arch/avr32/lib/io-writesl.S b/arch/avr32/lib/io-writesl.S
new file mode 100644
index 000000000000..22138b3a16e5
--- /dev/null
+++ b/arch/avr32/lib/io-writesl.S
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+	.global	__raw_writesl
+	.type	__raw_writesl,@function
+__raw_writesl:
+	cp.w	r10, 0
+	reteq	r12
+
+1:	ld.w	r8, r11++
+	sub	r10, 1
+	st.w	r12[0], r8
+	brne	1b
+
+	retal	r12
diff --git a/arch/avr32/lib/io-writesw.S b/arch/avr32/lib/io-writesw.S
new file mode 100644
index 000000000000..8c4a53f1c52a
--- /dev/null
+++ b/arch/avr32/lib/io-writesw.S
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+.Lnot_word_aligned:
+	ld.uh	r8, r11++
+	sub	r10, 1
+	st.h	r12[0], r8
+
+	.global	__raw_writesw
+	.type	__raw_writesw,@function
+__raw_writesw:
+	cp.w	r10, 0
+	mov	r9, 3
+	reteq	r12
+	tst	r11, r9
+	brne	.Lnot_word_aligned
+
+	sub	r10, 2
+	brlt	2f
+
+1:	ld.w	r8, r11++
+	bfextu	r9, r8, 16, 16
+	st.h	r12[0], r9
+	st.h	r12[0], r8
+	sub	r10, 2
+	brge	1b
+
+2:	sub	r10, -2
+	reteq	r12
+
+	ld.uh	r8, r11++
+	st.h	r12[0], r8
+	retal	r12
diff --git a/arch/avr32/lib/libgcc.h b/arch/avr32/lib/libgcc.h
new file mode 100644
index 000000000000..5a091b5e3618
--- /dev/null
+++ b/arch/avr32/lib/libgcc.h
@@ -0,0 +1,33 @@
+/* Definitions for various functions 'borrowed' from gcc-3.4.3 */
+
+#define BITS_PER_UNIT	8
+
+typedef		 int QItype	__attribute__ ((mode (QI)));
+typedef unsigned int UQItype	__attribute__ ((mode (QI)));
+typedef		 int HItype	__attribute__ ((mode (HI)));
+typedef unsigned int UHItype	__attribute__ ((mode (HI)));
+typedef 	 int SItype	__attribute__ ((mode (SI)));
+typedef unsigned int USItype	__attribute__ ((mode (SI)));
+typedef		 int DItype	__attribute__ ((mode (DI)));
+typedef unsigned int UDItype	__attribute__ ((mode (DI)));
+typedef 	float SFtype	__attribute__ ((mode (SF)));
+typedef		float DFtype	__attribute__ ((mode (DF)));
+typedef int word_type __attribute__ ((mode (__word__)));
+
+#define W_TYPE_SIZE (4 * BITS_PER_UNIT)
+#define Wtype	SItype
+#define UWtype	USItype
+#define HWtype	SItype
+#define UHWtype	USItype
+#define DWtype	DItype
+#define UDWtype	UDItype
+#define __NW(a,b)	__ ## a ## si ## b
+#define __NDW(a,b)	__ ## a ## di ## b
+
+struct DWstruct {Wtype high, low;};
+
+typedef union
+{
+  struct DWstruct s;
+  DWtype ll;
+} DWunion;
diff --git a/arch/avr32/lib/longlong.h b/arch/avr32/lib/longlong.h
new file mode 100644
index 000000000000..cd5e369ac437
--- /dev/null
+++ b/arch/avr32/lib/longlong.h
@@ -0,0 +1,98 @@
+/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
+   Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000
+   Free Software Foundation, Inc.
+
+   This definition file is free software; you can redistribute it
+   and/or modify it under the terms of the GNU General Public
+   License as published by the Free Software Foundation; either
+   version 2, or (at your option) any later version.
+
+   This definition file is distributed in the hope that it will be
+   useful, but WITHOUT ANY WARRANTY; without even the implied
+   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+   See the GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/* Borrowed from gcc-3.4.3 */
+
+#define __BITS4 (W_TYPE_SIZE / 4)
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+#define count_leading_zeros(count, x) ((count) = __builtin_clz(x))
+
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+  do {									\
+    UWtype __d1, __d0, __q1, __q0;					\
+    UWtype __r1, __r0, __m;						\
+    __d1 = __ll_highpart (d);						\
+    __d0 = __ll_lowpart (d);						\
+									\
+    __r1 = (n1) % __d1;							\
+    __q1 = (n1) / __d1;							\
+    __m = (UWtype) __q1 * __d0;						\
+    __r1 = __r1 * __ll_B | __ll_highpart (n0);				\
+    if (__r1 < __m)							\
+      {									\
+	__q1--, __r1 += (d);						\
+	if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
+	  if (__r1 < __m)						\
+	    __q1--, __r1 += (d);					\
+      }									\
+    __r1 -= __m;							\
+									\
+    __r0 = __r1 % __d1;							\
+    __q0 = __r1 / __d1;							\
+    __m = (UWtype) __q0 * __d0;						\
+    __r0 = __r0 * __ll_B | __ll_lowpart (n0);				\
+    if (__r0 < __m)							\
+      {									\
+	__q0--, __r0 += (d);						\
+	if (__r0 >= (d))						\
+	  if (__r0 < __m)						\
+	    __q0--, __r0 += (d);					\
+      }									\
+    __r0 -= __m;							\
+									\
+    (q) = (UWtype) __q1 * __ll_B | __q0;				\
+    (r) = __r0;								\
+  } while (0)
+
+#define udiv_qrnnd __udiv_qrnnd_c
+
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {									\
+    UWtype __x;								\
+    __x = (al) - (bl);							\
+    (sh) = (ah) - (bh) - (__x > (al));					\
+    (sl) = __x;								\
+  } while (0)
+
+#define umul_ppmm(w1, w0, u, v)						\
+  do {									\
+    UWtype __x0, __x1, __x2, __x3;					\
+    UHWtype __ul, __vl, __uh, __vh;					\
+									\
+    __ul = __ll_lowpart (u);						\
+    __uh = __ll_highpart (u);						\
+    __vl = __ll_lowpart (v);						\
+    __vh = __ll_highpart (v);						\
+									\
+    __x0 = (UWtype) __ul * __vl;					\
+    __x1 = (UWtype) __ul * __vh;					\
+    __x2 = (UWtype) __uh * __vl;					\
+    __x3 = (UWtype) __uh * __vh;					\
+									\
+    __x1 += __ll_highpart (__x0);/* this can't give carry */		\
+    __x1 += __x2;		/* but this indeed can */		\
+    if (__x1 < __x2)		/* did we get it? */			\
+      __x3 += __ll_B;		/* yes, add it in the proper pos.  */	\
+									\
+    (w1) = __x3 + __ll_highpart (__x1);					\
+    (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);		\
+  } while (0)
diff --git a/arch/avr32/lib/memcpy.S b/arch/avr32/lib/memcpy.S
new file mode 100644
index 000000000000..0abb26142b64
--- /dev/null
+++ b/arch/avr32/lib/memcpy.S
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+	/*
+	 * void *memcpy(void *to, const void *from, unsigned long n)
+	 *
+	 * This implementation does word-aligned loads in the main loop,
+	 * possibly sacrificing alignment of stores.
+	 *
+	 * Hopefully, in most cases, both "to" and "from" will be
+	 * word-aligned to begin with.
+	 */
+	.text
+	.global	memcpy
+	.type	memcpy, @function
+memcpy:
+	mov	r9, r11
+	andl	r9, 3, COH
+	brne	1f
+
+	/* At this point, "from" is word-aligned */
+2:	sub	r10, 4
+	mov	r9, r12
+	brlt	4f
+
+3:	ld.w	r8, r11++
+	sub	r10, 4
+	st.w	r12++, r8
+	brge	3b
+
+4:	neg	r10
+	reteq	r9
+
+	/* Handle unaligned count */
+	lsl	r10, 2
+	add	pc, pc, r10
+	ld.ub	r8, r11++
+	st.b	r12++, r8
+	ld.ub	r8, r11++
+	st.b	r12++, r8
+	ld.ub	r8, r11++
+	st.b	r12++, r8
+	retal	r9
+
+	/* Handle unaligned "from" pointer */
+1:	sub	r10, 4
+	brlt	4b
+	add	r10, r9
+	lsl	r9, 2
+	add	pc, pc, r9
+	ld.ub	r8, r11++
+	st.b	r12++, r8
+	ld.ub	r8, r11++
+	st.b	r12++, r8
+	ld.ub	r8, r11++
+	st.b	r12++, r8
+	rjmp	2b
diff --git a/arch/avr32/lib/memset.S b/arch/avr32/lib/memset.S
new file mode 100644
index 000000000000..40da32c0480c
--- /dev/null
+++ b/arch/avr32/lib/memset.S
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on linux/arch/arm/lib/memset.S
+ *   Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * ASM optimised string functions
+ */
+#include <asm/asm.h>
+
+	/*
+	 * r12:	void *b
+	 * r11:	int c
+	 * r10:	size_t len
+	 *
+	 * Returns b in r12
+	 */
+	.text
+	.global	memset
+	.type	memset, @function
+	.align	5
+memset:
+	mov	r9, r12
+	mov	r8, r12
+	or	r11, r11, r11 << 8
+	andl	r9, 3, COH
+	brne	1f
+
+2:	or	r11, r11, r11 << 16
+	sub	r10, 4
+	brlt	5f
+
+	/* Let's do some real work */
+4:	st.w	r8++, r11
+	sub	r10, 4
+	brge	4b
+
+	/*
+	 * When we get here, we've got less than 4 bytes to set. r10
+	 * might be negative.
+	 */
+5:	sub	r10, -4
+	reteq	r12
+
+	/* Fastpath ends here, exactly 32 bytes from memset */
+
+	/* Handle unaligned count or pointer */
+	bld	r10, 1
+	brcc	6f
+	st.b	r8++, r11
+	st.b	r8++, r11
+	bld	r10, 0
+	retcc	r12
+6:	st.b	r8++, r11
+	retal	r12
+
+	/* Handle unaligned pointer */
+1:	sub	r10, 4
+	brlt	5b
+	add	r10, r9
+	lsl	r9, 1
+	add	pc, r9
+	st.b	r8++, r11
+	st.b	r8++, r11
+	st.b	r8++, r11
+	rjmp	2b
+
+	.size	memset, . - memset
diff --git a/arch/avr32/lib/strncpy_from_user.S b/arch/avr32/lib/strncpy_from_user.S
new file mode 100644
index 000000000000..72bd50599ec6
--- /dev/null
+++ b/arch/avr32/lib/strncpy_from_user.S
@@ -0,0 +1,60 @@
+/*
+ * Copy to/from userspace with optional address space checking.
+ *
+ * Copyright 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/errno.h>
+
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/asm.h>
+
+	/*
+	 * long strncpy_from_user(char *dst, const char *src, long count)
+	 *
+	 * On success, returns the length of the string, not including
+	 * the terminating NUL.
+	 *
+	 * If the string is longer than count, returns count
+	 *
+	 * If userspace access fails, returns -EFAULT
+	 */
+	.text
+	.align	1
+	.global	strncpy_from_user
+	.type	strncpy_from_user, "function"
+strncpy_from_user:
+	mov	r9, -EFAULT
+	branch_if_kernel r8, __strncpy_from_user
+	ret_if_privileged r8, r11, r10, r9
+
+	.global	__strncpy_from_user
+	.type	__strncpy_from_user, "function"
+__strncpy_from_user:
+	cp.w	r10, 0
+	reteq	0
+
+	mov	r9, r10
+
+1:	ld.ub	r8, r11++
+	st.b	r12++, r8
+	cp.w	r8, 0
+	breq	2f
+	sub	r9, 1
+	brne	1b
+
+2:	sub	r10, r9
+	retal	r10
+
+	.section .fixup, "ax"
+	.align	1
+3:	mov	r12, -EFAULT
+	retal	r12
+
+	.section __ex_table, "a"
+	.align	2
+	.long	1b, 3b
diff --git a/arch/avr32/lib/strnlen_user.S b/arch/avr32/lib/strnlen_user.S
new file mode 100644
index 000000000000..65ce11afa66a
--- /dev/null
+++ b/arch/avr32/lib/strnlen_user.S
@@ -0,0 +1,67 @@
+/*
+ * Copy to/from userspace with optional address space checking.
+ *
+ * Copyright 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/processor.h>
+#include <asm/asm.h>
+
+	.text
+	.align	1
+	.global	strnlen_user
+	.type	strnlen_user, "function"
+strnlen_user:
+	branch_if_kernel r8, __strnlen_user
+	sub	r8, r11, 1
+	add	r8, r12
+	retcs	0
+	brmi	adjust_length	/* do a closer inspection */
+
+	.global	__strnlen_user
+	.type	__strnlen_user, "function"
+__strnlen_user:
+	mov	r10, r12
+
+10:	ld.ub	r8, r12++
+	cp.w	r8, 0
+	breq	2f
+	sub	r11, 1
+	brne	10b
+
+	sub	r12, -1
+2:	sub	r12, r10
+	retal	r12
+
+
+	.type	adjust_length, "function"
+adjust_length:
+	cp.w	r12, 0		/* addr must always be < TASK_SIZE */
+	retmi	0
+
+	pushm	lr
+	lddpc	lr, _task_size
+	sub	r11, lr, r12
+	mov	r9, r11
+	rcall	__strnlen_user
+	cp.w	r12, r9
+	brgt	1f
+	popm	pc
+1:	popm	pc, r12=0
+
+	.align	2
+_task_size:
+	.long	TASK_SIZE
+
+	.section .fixup, "ax"
+	.align	1
+19:	retal	0
+
+	.section __ex_table, "a"
+	.align	2
+	.long	10b, 19b
diff --git a/arch/avr32/mach-at32ap/Makefile b/arch/avr32/mach-at32ap/Makefile
new file mode 100644
index 000000000000..4b10853eb614
--- /dev/null
+++ b/arch/avr32/mach-at32ap/Makefile
@@ -0,0 +1,2 @@
+obj-y				+= at32ap.o clock.o pio.o intc.o extint.o
+obj-$(CONFIG_CPU_AT32AP7000)	+= at32ap7000.o
diff --git a/arch/avr32/mach-at32ap/at32ap.c b/arch/avr32/mach-at32ap/at32ap.c
new file mode 100644
index 000000000000..f7cedf5aabea
--- /dev/null
+++ b/arch/avr32/mach-at32ap/at32ap.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+
+#include <asm/io.h>
+
+#include <asm/arch/init.h>
+#include <asm/arch/sm.h>
+
+struct at32_sm system_manager;
+
+static int __init at32_sm_init(void)
+{
+	struct resource *regs;
+	struct at32_sm *sm = &system_manager;
+	int ret = -ENXIO;
+
+	regs = platform_get_resource(&at32_sm_device, IORESOURCE_MEM, 0);
+	if (!regs)
+		goto fail;
+
+	spin_lock_init(&sm->lock);
+	sm->pdev = &at32_sm_device;
+
+	ret = -ENOMEM;
+	sm->regs = ioremap(regs->start, regs->end - regs->start + 1);
+	if (!sm->regs)
+		goto fail;
+
+	return 0;
+
+fail:
+	printk(KERN_ERR "Failed to initialize System Manager: %d\n", ret);
+	return ret;
+}
+
+void __init setup_platform(void)
+{
+	at32_sm_init();
+	at32_clock_init();
+	at32_portmux_init();
+
+	/* FIXME: This doesn't belong here */
+	at32_setup_serial_console(1);
+}
+
+static int __init pdc_probe(struct platform_device *pdev)
+{
+	struct clk *pclk, *hclk;
+
+	pclk = clk_get(&pdev->dev, "pclk");
+	if (IS_ERR(pclk)) {
+		dev_err(&pdev->dev, "no pclk defined\n");
+		return PTR_ERR(pclk);
+	}
+	hclk = clk_get(&pdev->dev, "hclk");
+	if (IS_ERR(hclk)) {
+		dev_err(&pdev->dev, "no hclk defined\n");
+		clk_put(pclk);
+		return PTR_ERR(hclk);
+	}
+
+	clk_enable(pclk);
+	clk_enable(hclk);
+
+	dev_info(&pdev->dev, "Atmel Peripheral DMA Controller enabled\n");
+	return 0;
+}
+
+static struct platform_driver pdc_driver = {
+	.probe		= pdc_probe,
+	.driver		= {
+		.name	= "pdc",
+	},
+};
+
+static int __init pdc_init(void)
+{
+	return platform_driver_register(&pdc_driver);
+}
+arch_initcall(pdc_init);
diff --git a/arch/avr32/mach-at32ap/at32ap7000.c b/arch/avr32/mach-at32ap/at32ap7000.c
new file mode 100644
index 000000000000..e8c6893a1c23
--- /dev/null
+++ b/arch/avr32/mach-at32ap/at32ap7000.c
@@ -0,0 +1,866 @@
+/*
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/clk.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+
+#include <asm/io.h>
+
+#include <asm/arch/board.h>
+#include <asm/arch/portmux.h>
+#include <asm/arch/sm.h>
+
+#include "clock.h"
+#include "pio.h"
+#include "sm.h"
+
+#define PBMEM(base)					\
+	{						\
+		.start		= base,			\
+		.end		= base + 0x3ff,		\
+		.flags		= IORESOURCE_MEM,	\
+	}
+#define IRQ(num)					\
+	{						\
+		.start		= num,			\
+		.end		= num,			\
+		.flags		= IORESOURCE_IRQ,	\
+	}
+#define NAMED_IRQ(num, _name)				\
+	{						\
+		.start		= num,			\
+		.end		= num,			\
+		.name		= _name,		\
+		.flags		= IORESOURCE_IRQ,	\
+	}
+
+#define DEFINE_DEV(_name, _id)					\
+static struct platform_device _name##_id##_device = {		\
+	.name		= #_name,				\
+	.id		= _id,					\
+	.resource	= _name##_id##_resource,		\
+	.num_resources	= ARRAY_SIZE(_name##_id##_resource),	\
+}
+#define DEFINE_DEV_DATA(_name, _id)				\
+static struct platform_device _name##_id##_device = {		\
+	.name		= #_name,				\
+	.id		= _id,					\
+	.dev		= {					\
+		.platform_data	= &_name##_id##_data,		\
+	},							\
+	.resource	= _name##_id##_resource,		\
+	.num_resources	= ARRAY_SIZE(_name##_id##_resource),	\
+}
+
+#define DEV_CLK(_name, devname, bus, _index)			\
+static struct clk devname##_##_name = {				\
+	.name		= #_name,				\
+	.dev		= &devname##_device.dev,		\
+	.parent		= &bus##_clk,				\
+	.mode		= bus##_clk_mode,			\
+	.get_rate	= bus##_clk_get_rate,			\
+	.index		= _index,				\
+}
+
+enum {
+	PIOA,
+	PIOB,
+	PIOC,
+	PIOD,
+};
+
+enum {
+	FUNC_A,
+	FUNC_B,
+};
+
+unsigned long at32ap7000_osc_rates[3] = {
+	[0] = 32768,
+	/* FIXME: these are ATSTK1002-specific */
+	[1] = 20000000,
+	[2] = 12000000,
+};
+
+static unsigned long osc_get_rate(struct clk *clk)
+{
+	return at32ap7000_osc_rates[clk->index];
+}
+
+static unsigned long pll_get_rate(struct clk *clk, unsigned long control)
+{
+	unsigned long div, mul, rate;
+
+	if (!(control & SM_BIT(PLLEN)))
+		return 0;
+
+	div = SM_BFEXT(PLLDIV, control) + 1;
+	mul = SM_BFEXT(PLLMUL, control) + 1;
+
+	rate = clk->parent->get_rate(clk->parent);
+	rate = (rate + div / 2) / div;
+	rate *= mul;
+
+	return rate;
+}
+
+static unsigned long pll0_get_rate(struct clk *clk)
+{
+	u32 control;
+
+	control = sm_readl(&system_manager, PM_PLL0);
+
+	return pll_get_rate(clk, control);
+}
+
+static unsigned long pll1_get_rate(struct clk *clk)
+{
+	u32 control;
+
+	control = sm_readl(&system_manager, PM_PLL1);
+
+	return pll_get_rate(clk, control);
+}
+
+/*
+ * The AT32AP7000 has five primary clock sources: One 32kHz
+ * oscillator, two crystal oscillators and two PLLs.
+ */
+static struct clk osc32k = {
+	.name		= "osc32k",
+	.get_rate	= osc_get_rate,
+	.users		= 1,
+	.index		= 0,
+};
+static struct clk osc0 = {
+	.name		= "osc0",
+	.get_rate	= osc_get_rate,
+	.users		= 1,
+	.index		= 1,
+};
+static struct clk osc1 = {
+	.name		= "osc1",
+	.get_rate	= osc_get_rate,
+	.index		= 2,
+};
+static struct clk pll0 = {
+	.name		= "pll0",
+	.get_rate	= pll0_get_rate,
+	.parent		= &osc0,
+};
+static struct clk pll1 = {
+	.name		= "pll1",
+	.get_rate	= pll1_get_rate,
+	.parent		= &osc0,
+};
+
+/*
+ * The main clock can be either osc0 or pll0.  The boot loader may
+ * have chosen one for us, so we don't really know which one until we
+ * have a look at the SM.
+ */
+static struct clk *main_clock;
+
+/*
+ * Synchronous clocks are generated from the main clock. The clocks
+ * must satisfy the constraint
+ *   fCPU >= fHSB >= fPB
+ * i.e. each clock must not be faster than its parent.
+ */
+static unsigned long bus_clk_get_rate(struct clk *clk, unsigned int shift)
+{
+	return main_clock->get_rate(main_clock) >> shift;
+};
+
+static void cpu_clk_mode(struct clk *clk, int enabled)
+{
+	struct at32_sm *sm = &system_manager;
+	unsigned long flags;
+	u32 mask;
+
+	spin_lock_irqsave(&sm->lock, flags);
+	mask = sm_readl(sm, PM_CPU_MASK);
+	if (enabled)
+		mask |= 1 << clk->index;
+	else
+		mask &= ~(1 << clk->index);
+	sm_writel(sm, PM_CPU_MASK, mask);
+	spin_unlock_irqrestore(&sm->lock, flags);
+}
+
+static unsigned long cpu_clk_get_rate(struct clk *clk)
+{
+	unsigned long cksel, shift = 0;
+
+	cksel = sm_readl(&system_manager, PM_CKSEL);
+	if (cksel & SM_BIT(CPUDIV))
+		shift = SM_BFEXT(CPUSEL, cksel) + 1;
+
+	return bus_clk_get_rate(clk, shift);
+}
+
+static void hsb_clk_mode(struct clk *clk, int enabled)
+{
+	struct at32_sm *sm = &system_manager;
+	unsigned long flags;
+	u32 mask;
+
+	spin_lock_irqsave(&sm->lock, flags);
+	mask = sm_readl(sm, PM_HSB_MASK);
+	if (enabled)
+		mask |= 1 << clk->index;
+	else
+		mask &= ~(1 << clk->index);
+	sm_writel(sm, PM_HSB_MASK, mask);
+	spin_unlock_irqrestore(&sm->lock, flags);
+}
+
+static unsigned long hsb_clk_get_rate(struct clk *clk)
+{
+	unsigned long cksel, shift = 0;
+
+	cksel = sm_readl(&system_manager, PM_CKSEL);
+	if (cksel & SM_BIT(HSBDIV))
+		shift = SM_BFEXT(HSBSEL, cksel) + 1;
+
+	return bus_clk_get_rate(clk, shift);
+}
+
+static void pba_clk_mode(struct clk *clk, int enabled)
+{
+	struct at32_sm *sm = &system_manager;
+	unsigned long flags;
+	u32 mask;
+
+	spin_lock_irqsave(&sm->lock, flags);
+	mask = sm_readl(sm, PM_PBA_MASK);
+	if (enabled)
+		mask |= 1 << clk->index;
+	else
+		mask &= ~(1 << clk->index);
+	sm_writel(sm, PM_PBA_MASK, mask);
+	spin_unlock_irqrestore(&sm->lock, flags);
+}
+
+static unsigned long pba_clk_get_rate(struct clk *clk)
+{
+	unsigned long cksel, shift = 0;
+
+	cksel = sm_readl(&system_manager, PM_CKSEL);
+	if (cksel & SM_BIT(PBADIV))
+		shift = SM_BFEXT(PBASEL, cksel) + 1;
+
+	return bus_clk_get_rate(clk, shift);
+}
+
+static void pbb_clk_mode(struct clk *clk, int enabled)
+{
+	struct at32_sm *sm = &system_manager;
+	unsigned long flags;
+	u32 mask;
+
+	spin_lock_irqsave(&sm->lock, flags);
+	mask = sm_readl(sm, PM_PBB_MASK);
+	if (enabled)
+		mask |= 1 << clk->index;
+	else
+		mask &= ~(1 << clk->index);
+	sm_writel(sm, PM_PBB_MASK, mask);
+	spin_unlock_irqrestore(&sm->lock, flags);
+}
+
+static unsigned long pbb_clk_get_rate(struct clk *clk)
+{
+	unsigned long cksel, shift = 0;
+
+	cksel = sm_readl(&system_manager, PM_CKSEL);
+	if (cksel & SM_BIT(PBBDIV))
+		shift = SM_BFEXT(PBBSEL, cksel) + 1;
+
+	return bus_clk_get_rate(clk, shift);
+}
+
+static struct clk cpu_clk = {
+	.name		= "cpu",
+	.get_rate	= cpu_clk_get_rate,
+	.users		= 1,
+};
+static struct clk hsb_clk = {
+	.name		= "hsb",
+	.parent		= &cpu_clk,
+	.get_rate	= hsb_clk_get_rate,
+};
+static struct clk pba_clk = {
+	.name		= "pba",
+	.parent		= &hsb_clk,
+	.mode		= hsb_clk_mode,
+	.get_rate	= pba_clk_get_rate,
+	.index		= 1,
+};
+static struct clk pbb_clk = {
+	.name		= "pbb",
+	.parent		= &hsb_clk,
+	.mode		= hsb_clk_mode,
+	.get_rate	= pbb_clk_get_rate,
+	.users		= 1,
+	.index		= 2,
+};
+
+/* --------------------------------------------------------------------
+ *  Generic Clock operations
+ * -------------------------------------------------------------------- */
+
+static void genclk_mode(struct clk *clk, int enabled)
+{
+	u32 control;
+
+	BUG_ON(clk->index > 7);
+
+	control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index);
+	if (enabled)
+		control |= SM_BIT(CEN);
+	else
+		control &= ~SM_BIT(CEN);
+	sm_writel(&system_manager, PM_GCCTRL + 4 * clk->index, control);
+}
+
+static unsigned long genclk_get_rate(struct clk *clk)
+{
+	u32 control;
+	unsigned long div = 1;
+
+	BUG_ON(clk->index > 7);
+
+	if (!clk->parent)
+		return 0;
+
+	control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index);
+	if (control & SM_BIT(DIVEN))
+		div = 2 * (SM_BFEXT(DIV, control) + 1);
+
+	return clk->parent->get_rate(clk->parent) / div;
+}
+
+static long genclk_set_rate(struct clk *clk, unsigned long rate, int apply)
+{
+	u32 control;
+	unsigned long parent_rate, actual_rate, div;
+
+	BUG_ON(clk->index > 7);
+
+	if (!clk->parent)
+		return 0;
+
+	parent_rate = clk->parent->get_rate(clk->parent);
+	control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index);
+
+	if (rate > 3 * parent_rate / 4) {
+		actual_rate = parent_rate;
+		control &= ~SM_BIT(DIVEN);
+	} else {
+		div = (parent_rate + rate) / (2 * rate) - 1;
+		control = SM_BFINS(DIV, div, control) | SM_BIT(DIVEN);
+		actual_rate = parent_rate / (2 * (div + 1));
+	}
+
+	printk("clk %s: new rate %lu (actual rate %lu)\n",
+	       clk->name, rate, actual_rate);
+
+	if (apply)
+		sm_writel(&system_manager, PM_GCCTRL + 4 * clk->index,
+			  control);
+
+	return actual_rate;
+}
+
+int genclk_set_parent(struct clk *clk, struct clk *parent)
+{
+	u32 control;
+
+	BUG_ON(clk->index > 7);
+
+	printk("clk %s: new parent %s (was %s)\n",
+	       clk->name, parent->name,
+	       clk->parent ? clk->parent->name : "(null)");
+
+	control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index);
+
+	if (parent == &osc1 || parent == &pll1)
+		control |= SM_BIT(OSCSEL);
+	else if (parent == &osc0 || parent == &pll0)
+		control &= ~SM_BIT(OSCSEL);
+	else
+		return -EINVAL;
+
+	if (parent == &pll0 || parent == &pll1)
+		control |= SM_BIT(PLLSEL);
+	else
+		control &= ~SM_BIT(PLLSEL);
+
+	sm_writel(&system_manager, PM_GCCTRL + 4 * clk->index, control);
+	clk->parent = parent;
+
+	return 0;
+}
+
+/* --------------------------------------------------------------------
+ *  System peripherals
+ * -------------------------------------------------------------------- */
+static struct resource sm_resource[] = {
+	PBMEM(0xfff00000),
+	NAMED_IRQ(19, "eim"),
+	NAMED_IRQ(20, "pm"),
+	NAMED_IRQ(21, "rtc"),
+};
+struct platform_device at32_sm_device = {
+	.name		= "sm",
+	.id		= 0,
+	.resource	= sm_resource,
+	.num_resources	= ARRAY_SIZE(sm_resource),
+};
+DEV_CLK(pclk, at32_sm, pbb, 0);
+
+static struct resource intc0_resource[] = {
+	PBMEM(0xfff00400),
+};
+struct platform_device at32_intc0_device = {
+	.name		= "intc",
+	.id		= 0,
+	.resource	= intc0_resource,
+	.num_resources	= ARRAY_SIZE(intc0_resource),
+};
+DEV_CLK(pclk, at32_intc0, pbb, 1);
+
+static struct clk ebi_clk = {
+	.name		= "ebi",
+	.parent		= &hsb_clk,
+	.mode		= hsb_clk_mode,
+	.get_rate	= hsb_clk_get_rate,
+	.users		= 1,
+};
+static struct clk hramc_clk = {
+	.name		= "hramc",
+	.parent		= &hsb_clk,
+	.mode		= hsb_clk_mode,
+	.get_rate	= hsb_clk_get_rate,
+	.users		= 1,
+};
+
+static struct platform_device pdc_device = {
+	.name		= "pdc",
+	.id		= 0,
+};
+DEV_CLK(hclk, pdc, hsb, 4);
+DEV_CLK(pclk, pdc, pba, 16);
+
+static struct clk pico_clk = {
+	.name		= "pico",
+	.parent		= &cpu_clk,
+	.mode		= cpu_clk_mode,
+	.get_rate	= cpu_clk_get_rate,
+	.users		= 1,
+};
+
+/* --------------------------------------------------------------------
+ *  PIO
+ * -------------------------------------------------------------------- */
+
+static struct resource pio0_resource[] = {
+	PBMEM(0xffe02800),
+	IRQ(13),
+};
+DEFINE_DEV(pio, 0);
+DEV_CLK(mck, pio0, pba, 10);
+
+static struct resource pio1_resource[] = {
+	PBMEM(0xffe02c00),
+	IRQ(14),
+};
+DEFINE_DEV(pio, 1);
+DEV_CLK(mck, pio1, pba, 11);
+
+static struct resource pio2_resource[] = {
+	PBMEM(0xffe03000),
+	IRQ(15),
+};
+DEFINE_DEV(pio, 2);
+DEV_CLK(mck, pio2, pba, 12);
+
+static struct resource pio3_resource[] = {
+	PBMEM(0xffe03400),
+	IRQ(16),
+};
+DEFINE_DEV(pio, 3);
+DEV_CLK(mck, pio3, pba, 13);
+
+void __init at32_add_system_devices(void)
+{
+	system_manager.eim_first_irq = NR_INTERNAL_IRQS;
+
+	platform_device_register(&at32_sm_device);
+	platform_device_register(&at32_intc0_device);
+	platform_device_register(&pdc_device);
+
+	platform_device_register(&pio0_device);
+	platform_device_register(&pio1_device);
+	platform_device_register(&pio2_device);
+	platform_device_register(&pio3_device);
+}
+
+/* --------------------------------------------------------------------
+ *  USART
+ * -------------------------------------------------------------------- */
+
+static struct resource usart0_resource[] = {
+	PBMEM(0xffe00c00),
+	IRQ(7),
+};
+DEFINE_DEV(usart, 0);
+DEV_CLK(usart, usart0, pba, 4);
+
+static struct resource usart1_resource[] = {
+	PBMEM(0xffe01000),
+	IRQ(7),
+};
+DEFINE_DEV(usart, 1);
+DEV_CLK(usart, usart1, pba, 4);
+
+static struct resource usart2_resource[] = {
+	PBMEM(0xffe01400),
+	IRQ(8),
+};
+DEFINE_DEV(usart, 2);
+DEV_CLK(usart, usart2, pba, 5);
+
+static struct resource usart3_resource[] = {
+	PBMEM(0xffe01800),
+	IRQ(9),
+};
+DEFINE_DEV(usart, 3);
+DEV_CLK(usart, usart3, pba, 6);
+
+static inline void configure_usart0_pins(void)
+{
+	portmux_set_func(PIOA,  8, FUNC_B);	/* RXD	*/
+	portmux_set_func(PIOA,  9, FUNC_B);	/* TXD	*/
+}
+
+static inline void configure_usart1_pins(void)
+{
+	portmux_set_func(PIOA, 17, FUNC_A);	/* RXD	*/
+	portmux_set_func(PIOA, 18, FUNC_A);	/* TXD	*/
+}
+
+static inline void configure_usart2_pins(void)
+{
+	portmux_set_func(PIOB, 26, FUNC_B);	/* RXD	*/
+	portmux_set_func(PIOB, 27, FUNC_B);	/* TXD	*/
+}
+
+static inline void configure_usart3_pins(void)
+{
+	portmux_set_func(PIOB, 18, FUNC_B);	/* RXD	*/
+	portmux_set_func(PIOB, 17, FUNC_B);	/* TXD	*/
+}
+
+static struct platform_device *setup_usart(unsigned int id)
+{
+	struct platform_device *pdev;
+
+	switch (id) {
+	case 0:
+		pdev = &usart0_device;
+		configure_usart0_pins();
+		break;
+	case 1:
+		pdev = &usart1_device;
+		configure_usart1_pins();
+		break;
+	case 2:
+		pdev = &usart2_device;
+		configure_usart2_pins();
+		break;
+	case 3:
+		pdev = &usart3_device;
+		configure_usart3_pins();
+		break;
+	default:
+		pdev = NULL;
+		break;
+	}
+
+	return pdev;
+}
+
+struct platform_device *__init at32_add_device_usart(unsigned int id)
+{
+	struct platform_device *pdev;
+
+	pdev = setup_usart(id);
+	if (pdev)
+		platform_device_register(pdev);
+
+	return pdev;
+}
+
+struct platform_device *at91_default_console_device;
+
+void __init at32_setup_serial_console(unsigned int usart_id)
+{
+	at91_default_console_device = setup_usart(usart_id);
+}
+
+/* --------------------------------------------------------------------
+ *  Ethernet
+ * -------------------------------------------------------------------- */
+
+static struct eth_platform_data macb0_data;
+static struct resource macb0_resource[] = {
+	PBMEM(0xfff01800),
+	IRQ(25),
+};
+DEFINE_DEV_DATA(macb, 0);
+DEV_CLK(hclk, macb0, hsb, 8);
+DEV_CLK(pclk, macb0, pbb, 6);
+
+struct platform_device *__init
+at32_add_device_eth(unsigned int id, struct eth_platform_data *data)
+{
+	struct platform_device *pdev;
+
+	switch (id) {
+	case 0:
+		pdev = &macb0_device;
+
+		portmux_set_func(PIOC,  3, FUNC_A);	/* TXD0	*/
+		portmux_set_func(PIOC,  4, FUNC_A);	/* TXD1	*/
+		portmux_set_func(PIOC,  7, FUNC_A);	/* TXEN	*/
+		portmux_set_func(PIOC,  8, FUNC_A);	/* TXCK */
+		portmux_set_func(PIOC,  9, FUNC_A);	/* RXD0	*/
+		portmux_set_func(PIOC, 10, FUNC_A);	/* RXD1	*/
+		portmux_set_func(PIOC, 13, FUNC_A);	/* RXER	*/
+		portmux_set_func(PIOC, 15, FUNC_A);	/* RXDV	*/
+		portmux_set_func(PIOC, 16, FUNC_A);	/* MDC	*/
+		portmux_set_func(PIOC, 17, FUNC_A);	/* MDIO	*/
+
+		if (!data->is_rmii) {
+			portmux_set_func(PIOC,  0, FUNC_A);	/* COL	*/
+			portmux_set_func(PIOC,  1, FUNC_A);	/* CRS	*/
+			portmux_set_func(PIOC,  2, FUNC_A);	/* TXER	*/
+			portmux_set_func(PIOC,  5, FUNC_A);	/* TXD2	*/
+			portmux_set_func(PIOC,  6, FUNC_A);	/* TXD3 */
+			portmux_set_func(PIOC, 11, FUNC_A);	/* RXD2	*/
+			portmux_set_func(PIOC, 12, FUNC_A);	/* RXD3	*/
+			portmux_set_func(PIOC, 14, FUNC_A);	/* RXCK	*/
+			portmux_set_func(PIOC, 18, FUNC_A);	/* SPD	*/
+		}
+		break;
+
+	default:
+		return NULL;
+	}
+
+	memcpy(pdev->dev.platform_data, data, sizeof(struct eth_platform_data));
+	platform_device_register(pdev);
+
+	return pdev;
+}
+
+/* --------------------------------------------------------------------
+ *  SPI
+ * -------------------------------------------------------------------- */
+static struct resource spi0_resource[] = {
+	PBMEM(0xffe00000),
+	IRQ(3),
+};
+DEFINE_DEV(spi, 0);
+DEV_CLK(mck, spi0, pba, 0);
+
+struct platform_device *__init at32_add_device_spi(unsigned int id)
+{
+	struct platform_device *pdev;
+
+	switch (id) {
+	case 0:
+		pdev = &spi0_device;
+		portmux_set_func(PIOA,  0, FUNC_A);	/* MISO	 */
+		portmux_set_func(PIOA,  1, FUNC_A);	/* MOSI	 */
+		portmux_set_func(PIOA,  2, FUNC_A);	/* SCK	 */
+		portmux_set_func(PIOA,  3, FUNC_A);	/* NPCS0 */
+		portmux_set_func(PIOA,  4, FUNC_A);	/* NPCS1 */
+		portmux_set_func(PIOA,  5, FUNC_A);	/* NPCS2 */
+		break;
+
+	default:
+		return NULL;
+	}
+
+	platform_device_register(pdev);
+	return pdev;
+}
+
+/* --------------------------------------------------------------------
+ *  LCDC
+ * -------------------------------------------------------------------- */
+static struct lcdc_platform_data lcdc0_data;
+static struct resource lcdc0_resource[] = {
+	{
+		.start		= 0xff000000,
+		.end		= 0xff000fff,
+		.flags		= IORESOURCE_MEM,
+	},
+	IRQ(1),
+};
+DEFINE_DEV_DATA(lcdc, 0);
+DEV_CLK(hclk, lcdc0, hsb, 7);
+static struct clk lcdc0_pixclk = {
+	.name		= "pixclk",
+	.dev		= &lcdc0_device.dev,
+	.mode		= genclk_mode,
+	.get_rate	= genclk_get_rate,
+	.set_rate	= genclk_set_rate,
+	.set_parent	= genclk_set_parent,
+	.index		= 7,
+};
+
+struct platform_device *__init
+at32_add_device_lcdc(unsigned int id, struct lcdc_platform_data *data)
+{
+	struct platform_device *pdev;
+
+	switch (id) {
+	case 0:
+		pdev = &lcdc0_device;
+		portmux_set_func(PIOC, 19, FUNC_A);	/* CC	  */
+		portmux_set_func(PIOC, 20, FUNC_A);	/* HSYNC  */
+		portmux_set_func(PIOC, 21, FUNC_A);	/* PCLK	  */
+		portmux_set_func(PIOC, 22, FUNC_A);	/* VSYNC  */
+		portmux_set_func(PIOC, 23, FUNC_A);	/* DVAL	  */
+		portmux_set_func(PIOC, 24, FUNC_A);	/* MODE	  */
+		portmux_set_func(PIOC, 25, FUNC_A);	/* PWR	  */
+		portmux_set_func(PIOC, 26, FUNC_A);	/* DATA0  */
+		portmux_set_func(PIOC, 27, FUNC_A);	/* DATA1  */
+		portmux_set_func(PIOC, 28, FUNC_A);	/* DATA2  */
+		portmux_set_func(PIOC, 29, FUNC_A);	/* DATA3  */
+		portmux_set_func(PIOC, 30, FUNC_A);	/* DATA4  */
+		portmux_set_func(PIOC, 31, FUNC_A);	/* DATA5  */
+		portmux_set_func(PIOD,  0, FUNC_A);	/* DATA6  */
+		portmux_set_func(PIOD,  1, FUNC_A);	/* DATA7  */
+		portmux_set_func(PIOD,  2, FUNC_A);	/* DATA8  */
+		portmux_set_func(PIOD,  3, FUNC_A);	/* DATA9  */
+		portmux_set_func(PIOD,  4, FUNC_A);	/* DATA10 */
+		portmux_set_func(PIOD,  5, FUNC_A);	/* DATA11 */
+		portmux_set_func(PIOD,  6, FUNC_A);	/* DATA12 */
+		portmux_set_func(PIOD,  7, FUNC_A);	/* DATA13 */
+		portmux_set_func(PIOD,  8, FUNC_A);	/* DATA14 */
+		portmux_set_func(PIOD,  9, FUNC_A);	/* DATA15 */
+		portmux_set_func(PIOD, 10, FUNC_A);	/* DATA16 */
+		portmux_set_func(PIOD, 11, FUNC_A);	/* DATA17 */
+		portmux_set_func(PIOD, 12, FUNC_A);	/* DATA18 */
+		portmux_set_func(PIOD, 13, FUNC_A);	/* DATA19 */
+		portmux_set_func(PIOD, 14, FUNC_A);	/* DATA20 */
+		portmux_set_func(PIOD, 15, FUNC_A);	/* DATA21 */
+		portmux_set_func(PIOD, 16, FUNC_A);	/* DATA22 */
+		portmux_set_func(PIOD, 17, FUNC_A);	/* DATA23 */
+
+		clk_set_parent(&lcdc0_pixclk, &pll0);
+		clk_set_rate(&lcdc0_pixclk, clk_get_rate(&pll0));
+		break;
+
+	default:
+		return NULL;
+	}
+
+	memcpy(pdev->dev.platform_data, data,
+	       sizeof(struct lcdc_platform_data));
+
+	platform_device_register(pdev);
+	return pdev;
+}
+
+struct clk *at32_clock_list[] = {
+	&osc32k,
+	&osc0,
+	&osc1,
+	&pll0,
+	&pll1,
+	&cpu_clk,
+	&hsb_clk,
+	&pba_clk,
+	&pbb_clk,
+	&at32_sm_pclk,
+	&at32_intc0_pclk,
+	&ebi_clk,
+	&hramc_clk,
+	&pdc_hclk,
+	&pdc_pclk,
+	&pico_clk,
+	&pio0_mck,
+	&pio1_mck,
+	&pio2_mck,
+	&pio3_mck,
+	&usart0_usart,
+	&usart1_usart,
+	&usart2_usart,
+	&usart3_usart,
+	&macb0_hclk,
+	&macb0_pclk,
+	&spi0_mck,
+	&lcdc0_hclk,
+	&lcdc0_pixclk,
+};
+unsigned int at32_nr_clocks = ARRAY_SIZE(at32_clock_list);
+
+void __init at32_portmux_init(void)
+{
+	at32_init_pio(&pio0_device);
+	at32_init_pio(&pio1_device);
+	at32_init_pio(&pio2_device);
+	at32_init_pio(&pio3_device);
+}
+
+void __init at32_clock_init(void)
+{
+	struct at32_sm *sm = &system_manager;
+	u32 cpu_mask = 0, hsb_mask = 0, pba_mask = 0, pbb_mask = 0;
+	int i;
+
+	if (sm_readl(sm, PM_MCCTRL) & SM_BIT(PLLSEL))
+		main_clock = &pll0;
+	else
+		main_clock = &osc0;
+
+	if (sm_readl(sm, PM_PLL0) & SM_BIT(PLLOSC))
+		pll0.parent = &osc1;
+	if (sm_readl(sm, PM_PLL1) & SM_BIT(PLLOSC))
+		pll1.parent = &osc1;
+
+	/*
+	 * Turn on all clocks that have at least one user already, and
+	 * turn off everything else. We only do this for module
+	 * clocks, and even though it isn't particularly pretty to
+	 * check the address of the mode function, it should do the
+	 * trick...
+	 */
+	for (i = 0; i < ARRAY_SIZE(at32_clock_list); i++) {
+		struct clk *clk = at32_clock_list[i];
+
+		if (clk->mode == &cpu_clk_mode)
+			cpu_mask |= 1 << clk->index;
+		else if (clk->mode == &hsb_clk_mode)
+			hsb_mask |= 1 << clk->index;
+		else if (clk->mode == &pba_clk_mode)
+			pba_mask |= 1 << clk->index;
+		else if (clk->mode == &pbb_clk_mode)
+			pbb_mask |= 1 << clk->index;
+	}
+
+	sm_writel(sm, PM_CPU_MASK, cpu_mask);
+	sm_writel(sm, PM_HSB_MASK, hsb_mask);
+	sm_writel(sm, PM_PBA_MASK, pba_mask);
+	sm_writel(sm, PM_PBB_MASK, pbb_mask);
+}
diff --git a/arch/avr32/mach-at32ap/clock.c b/arch/avr32/mach-at32ap/clock.c
new file mode 100644
index 000000000000..3d0d1097389f
--- /dev/null
+++ b/arch/avr32/mach-at32ap/clock.c
@@ -0,0 +1,148 @@
+/*
+ * Clock management for AT32AP CPUs
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * Based on arch/arm/mach-at91rm9200/clock.c
+ *   Copyright (C) 2005 David Brownell
+ *   Copyright (C) 2005 Ivan Kokshaysky
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/string.h>
+
+#include "clock.h"
+
+static spinlock_t clk_lock = SPIN_LOCK_UNLOCKED;
+
+struct clk *clk_get(struct device *dev, const char *id)
+{
+	int i;
+
+	for (i = 0; i < at32_nr_clocks; i++) {
+		struct clk *clk = at32_clock_list[i];
+
+		if (clk->dev == dev && strcmp(id, clk->name) == 0)
+			return clk;
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL(clk_get);
+
+void clk_put(struct clk *clk)
+{
+	/* clocks are static for now, we can't free them */
+}
+EXPORT_SYMBOL(clk_put);
+
+static void __clk_enable(struct clk *clk)
+{
+	if (clk->parent)
+		__clk_enable(clk->parent);
+	if (clk->users++ == 0 && clk->mode)
+		clk->mode(clk, 1);
+}
+
+int clk_enable(struct clk *clk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&clk_lock, flags);
+	__clk_enable(clk);
+	spin_unlock_irqrestore(&clk_lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(clk_enable);
+
+static void __clk_disable(struct clk *clk)
+{
+	BUG_ON(clk->users == 0);
+
+	if (--clk->users == 0 && clk->mode)
+		clk->mode(clk, 0);
+	if (clk->parent)
+		__clk_disable(clk->parent);
+}
+
+void clk_disable(struct clk *clk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&clk_lock, flags);
+	__clk_disable(clk);
+	spin_unlock_irqrestore(&clk_lock, flags);
+}
+EXPORT_SYMBOL(clk_disable);
+
+unsigned long clk_get_rate(struct clk *clk)
+{
+	unsigned long flags;
+	unsigned long rate;
+
+	spin_lock_irqsave(&clk_lock, flags);
+	rate = clk->get_rate(clk);
+	spin_unlock_irqrestore(&clk_lock, flags);
+
+	return rate;
+}
+EXPORT_SYMBOL(clk_get_rate);
+
+long clk_round_rate(struct clk *clk, unsigned long rate)
+{
+	unsigned long flags, actual_rate;
+
+	if (!clk->set_rate)
+		return -ENOSYS;
+
+	spin_lock_irqsave(&clk_lock, flags);
+	actual_rate = clk->set_rate(clk, rate, 0);
+	spin_unlock_irqrestore(&clk_lock, flags);
+
+	return actual_rate;
+}
+EXPORT_SYMBOL(clk_round_rate);
+
+int clk_set_rate(struct clk *clk, unsigned long rate)
+{
+	unsigned long flags;
+	long ret;
+
+	if (!clk->set_rate)
+		return -ENOSYS;
+
+	spin_lock_irqsave(&clk_lock, flags);
+	ret = clk->set_rate(clk, rate, 1);
+	spin_unlock_irqrestore(&clk_lock, flags);
+
+	return (ret < 0) ? ret : 0;
+}
+EXPORT_SYMBOL(clk_set_rate);
+
+int clk_set_parent(struct clk *clk, struct clk *parent)
+{
+	unsigned long flags;
+	int ret;
+
+	if (!clk->set_parent)
+		return -ENOSYS;
+
+	spin_lock_irqsave(&clk_lock, flags);
+	ret = clk->set_parent(clk, parent);
+	spin_unlock_irqrestore(&clk_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(clk_set_parent);
+
+struct clk *clk_get_parent(struct clk *clk)
+{
+	return clk->parent;
+}
+EXPORT_SYMBOL(clk_get_parent);
diff --git a/arch/avr32/mach-at32ap/clock.h b/arch/avr32/mach-at32ap/clock.h
new file mode 100644
index 000000000000..f953f044ba4d
--- /dev/null
+++ b/arch/avr32/mach-at32ap/clock.h
@@ -0,0 +1,30 @@
+/*
+ * Clock management for AT32AP CPUs
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * Based on arch/arm/mach-at91rm9200/clock.c
+ *   Copyright (C) 2005 David Brownell
+ *   Copyright (C) 2005 Ivan Kokshaysky
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/clk.h>
+
+struct clk {
+	const char	*name;		/* Clock name/function */
+	struct device	*dev;		/* Device the clock is used by */
+	struct clk	*parent;	/* Parent clock, if any */
+	void		(*mode)(struct clk *clk, int enabled);
+	unsigned long	(*get_rate)(struct clk *clk);
+	long		(*set_rate)(struct clk *clk, unsigned long rate,
+				    int apply);
+	int		(*set_parent)(struct clk *clk, struct clk *parent);
+	u16		users;		/* Enabled if non-zero */
+	u16		index;		/* Sibling index */
+};
+
+extern struct clk *at32_clock_list[];
+extern unsigned int at32_nr_clocks;
diff --git a/arch/avr32/mach-at32ap/extint.c b/arch/avr32/mach-at32ap/extint.c
new file mode 100644
index 000000000000..7da9c5f7a0eb
--- /dev/null
+++ b/arch/avr32/mach-at32ap/extint.c
@@ -0,0 +1,171 @@
+/*
+ * External interrupt handling for AT32AP CPUs
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/platform_device.h>
+#include <linux/random.h>
+
+#include <asm/io.h>
+
+#include <asm/arch/sm.h>
+
+#include "sm.h"
+
+static void eim_ack_irq(unsigned int irq)
+{
+	struct at32_sm *sm = get_irq_chip_data(irq);
+	sm_writel(sm, EIM_ICR, 1 << (irq - sm->eim_first_irq));
+}
+
+static void eim_mask_irq(unsigned int irq)
+{
+	struct at32_sm *sm = get_irq_chip_data(irq);
+	sm_writel(sm, EIM_IDR, 1 << (irq - sm->eim_first_irq));
+}
+
+static void eim_mask_ack_irq(unsigned int irq)
+{
+	struct at32_sm *sm = get_irq_chip_data(irq);
+	sm_writel(sm, EIM_ICR, 1 << (irq - sm->eim_first_irq));
+	sm_writel(sm, EIM_IDR, 1 << (irq - sm->eim_first_irq));
+}
+
+static void eim_unmask_irq(unsigned int irq)
+{
+	struct at32_sm *sm = get_irq_chip_data(irq);
+	sm_writel(sm, EIM_IER, 1 << (irq - sm->eim_first_irq));
+}
+
+static int eim_set_irq_type(unsigned int irq, unsigned int flow_type)
+{
+	struct at32_sm *sm = get_irq_chip_data(irq);
+	unsigned int i = irq - sm->eim_first_irq;
+	u32 mode, edge, level;
+	unsigned long flags;
+	int ret = 0;
+
+	flow_type &= IRQ_TYPE_SENSE_MASK;
+
+	spin_lock_irqsave(&sm->lock, flags);
+
+	mode = sm_readl(sm, EIM_MODE);
+	edge = sm_readl(sm, EIM_EDGE);
+	level = sm_readl(sm, EIM_LEVEL);
+
+	switch (flow_type) {
+	case IRQ_TYPE_LEVEL_LOW:
+		mode |= 1 << i;
+		level &= ~(1 << i);
+		break;
+	case IRQ_TYPE_LEVEL_HIGH:
+		mode |= 1 << i;
+		level |= 1 << i;
+		break;
+	case IRQ_TYPE_EDGE_RISING:
+		mode &= ~(1 << i);
+		edge |= 1 << i;
+		break;
+	case IRQ_TYPE_EDGE_FALLING:
+		mode &= ~(1 << i);
+		edge &= ~(1 << i);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	sm_writel(sm, EIM_MODE, mode);
+	sm_writel(sm, EIM_EDGE, edge);
+	sm_writel(sm, EIM_LEVEL, level);
+
+	spin_unlock_irqrestore(&sm->lock, flags);
+
+	return ret;
+}
+
+struct irq_chip eim_chip = {
+	.name		= "eim",
+	.ack		= eim_ack_irq,
+	.mask		= eim_mask_irq,
+	.mask_ack	= eim_mask_ack_irq,
+	.unmask		= eim_unmask_irq,
+	.set_type	= eim_set_irq_type,
+};
+
+static void demux_eim_irq(unsigned int irq, struct irq_desc *desc,
+			  struct pt_regs *regs)
+{
+	struct at32_sm *sm = desc->handler_data;
+	struct irq_desc *ext_desc;
+	unsigned long status, pending;
+	unsigned int i, ext_irq;
+
+	spin_lock(&sm->lock);
+
+	status = sm_readl(sm, EIM_ISR);
+	pending = status & sm_readl(sm, EIM_IMR);
+
+	while (pending) {
+		i = fls(pending) - 1;
+		pending &= ~(1 << i);
+
+		ext_irq = i + sm->eim_first_irq;
+		ext_desc = irq_desc + ext_irq;
+		ext_desc->handle_irq(ext_irq, ext_desc, regs);
+	}
+
+	spin_unlock(&sm->lock);
+}
+
+static int __init eim_init(void)
+{
+	struct at32_sm *sm = &system_manager;
+	unsigned int i;
+	unsigned int nr_irqs;
+	unsigned int int_irq;
+	u32 pattern;
+
+	/*
+	 * The EIM is really the same module as SM, so register
+	 * mapping, etc. has been taken care of already.
+	 */
+
+	/*
+	 * Find out how many interrupt lines that are actually
+	 * implemented in hardware.
+	 */
+	sm_writel(sm, EIM_IDR, ~0UL);
+	sm_writel(sm, EIM_MODE, ~0UL);
+	pattern = sm_readl(sm, EIM_MODE);
+	nr_irqs = fls(pattern);
+
+	sm->eim_chip = &eim_chip;
+
+	for (i = 0; i < nr_irqs; i++) {
+		set_irq_chip(sm->eim_first_irq + i, &eim_chip);
+		set_irq_chip_data(sm->eim_first_irq + i, sm);
+	}
+
+	int_irq = platform_get_irq_byname(sm->pdev, "eim");
+
+	set_irq_chained_handler(int_irq, demux_eim_irq);
+	set_irq_data(int_irq, sm);
+
+	printk("EIM: External Interrupt Module at 0x%p, IRQ %u\n",
+	       sm->regs, int_irq);
+	printk("EIM: Handling %u external IRQs, starting with IRQ %u\n",
+	       nr_irqs, sm->eim_first_irq);
+
+	return 0;
+}
+arch_initcall(eim_init);
diff --git a/arch/avr32/mach-at32ap/intc.c b/arch/avr32/mach-at32ap/intc.c
new file mode 100644
index 000000000000..74f8c9f2f03d
--- /dev/null
+++ b/arch/avr32/mach-at32ap/intc.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/platform_device.h>
+
+#include <asm/io.h>
+
+#include "intc.h"
+
+struct intc {
+	void __iomem	*regs;
+	struct irq_chip	chip;
+};
+
+extern struct platform_device at32_intc0_device;
+
+/*
+ * TODO: We may be able to implement mask/unmask by setting IxM flags
+ * in the status register.
+ */
+static void intc_mask_irq(unsigned int irq)
+{
+
+}
+
+static void intc_unmask_irq(unsigned int irq)
+{
+
+}
+
+static struct intc intc0 = {
+	.chip = {
+		.name		= "intc",
+		.mask		= intc_mask_irq,
+		.unmask		= intc_unmask_irq,
+	},
+};
+
+/*
+ * All interrupts go via intc at some point.
+ */
+asmlinkage void do_IRQ(int level, struct pt_regs *regs)
+{
+	struct irq_desc *desc;
+	unsigned int irq;
+	unsigned long status_reg;
+
+	local_irq_disable();
+
+	irq_enter();
+
+	irq = intc_readl(&intc0, INTCAUSE0 - 4 * level);
+	desc = irq_desc + irq;
+	desc->handle_irq(irq, desc, regs);
+
+	/*
+	 * Clear all interrupt level masks so that we may handle
+	 * interrupts during softirq processing.  If this is a nested
+	 * interrupt, interrupts must stay globally disabled until we
+	 * return.
+	 */
+	status_reg = sysreg_read(SR);
+	status_reg &= ~(SYSREG_BIT(I0M) | SYSREG_BIT(I1M)
+			| SYSREG_BIT(I2M) | SYSREG_BIT(I3M));
+	sysreg_write(SR, status_reg);
+
+	irq_exit();
+}
+
+void __init init_IRQ(void)
+{
+	extern void _evba(void);
+	extern void irq_level0(void);
+	struct resource *regs;
+	struct clk *pclk;
+	unsigned int i;
+	u32 offset, readback;
+
+	regs = platform_get_resource(&at32_intc0_device, IORESOURCE_MEM, 0);
+	if (!regs) {
+		printk(KERN_EMERG "intc: no mmio resource defined\n");
+		goto fail;
+	}
+	pclk = clk_get(&at32_intc0_device.dev, "pclk");
+	if (IS_ERR(pclk)) {
+		printk(KERN_EMERG "intc: no clock defined\n");
+		goto fail;
+	}
+
+	clk_enable(pclk);
+
+	intc0.regs = ioremap(regs->start, regs->end - regs->start + 1);
+	if (!intc0.regs) {
+		printk(KERN_EMERG "intc: failed to map registers (0x%08lx)\n",
+		       (unsigned long)regs->start);
+		goto fail;
+	}
+
+	/*
+	 * Initialize all interrupts to level 0 (lowest priority). The
+	 * priority level may be changed by calling
+	 * irq_set_priority().
+	 *
+	 */
+	offset = (unsigned long)&irq_level0 - (unsigned long)&_evba;
+	for (i = 0; i < NR_INTERNAL_IRQS; i++) {
+		intc_writel(&intc0, INTPR0 + 4 * i, offset);
+		readback = intc_readl(&intc0, INTPR0 + 4 * i);
+		if (readback == offset)
+			set_irq_chip_and_handler(i, &intc0.chip,
+						 handle_simple_irq);
+	}
+
+	/* Unmask all interrupt levels */
+	sysreg_write(SR, (sysreg_read(SR)
+			  & ~(SR_I3M | SR_I2M | SR_I1M | SR_I0M)));
+
+	return;
+
+fail:
+	panic("Interrupt controller initialization failed!\n");
+}
+
diff --git a/arch/avr32/mach-at32ap/intc.h b/arch/avr32/mach-at32ap/intc.h
new file mode 100644
index 000000000000..d289ca2fff13
--- /dev/null
+++ b/arch/avr32/mach-at32ap/intc.h
@@ -0,0 +1,327 @@
+/*
+ * Automatically generated by gen-header.xsl
+ */
+#ifndef __ASM_AVR32_PERIHP_INTC_H__
+#define __ASM_AVR32_PERIHP_INTC_H__
+
+#define INTC_NUM_INT_GRPS            33
+
+#define INTC_INTPR0                  0x0
+# define INTC_INTPR0_INTLEV_OFFSET   30
+# define INTC_INTPR0_INTLEV_SIZE     2
+# define INTC_INTPR0_OFFSET_OFFSET   0
+# define INTC_INTPR0_OFFSET_SIZE     24
+#define INTC_INTREQ0                 0x100
+# define INTC_INTREQ0_IREQUEST0_OFFSET 0
+# define INTC_INTREQ0_IREQUEST0_SIZE 1
+# define INTC_INTREQ0_IREQUEST1_OFFSET 1
+# define INTC_INTREQ0_IREQUEST1_SIZE 1
+#define INTC_INTPR1                  0x4
+# define INTC_INTPR1_INTLEV_OFFSET   30
+# define INTC_INTPR1_INTLEV_SIZE     2
+# define INTC_INTPR1_OFFSET_OFFSET   0
+# define INTC_INTPR1_OFFSET_SIZE     24
+#define INTC_INTREQ1                 0x104
+# define INTC_INTREQ1_IREQUEST32_OFFSET 0
+# define INTC_INTREQ1_IREQUEST32_SIZE 1
+# define INTC_INTREQ1_IREQUEST33_OFFSET 1
+# define INTC_INTREQ1_IREQUEST33_SIZE 1
+# define INTC_INTREQ1_IREQUEST34_OFFSET 2
+# define INTC_INTREQ1_IREQUEST34_SIZE 1
+# define INTC_INTREQ1_IREQUEST35_OFFSET 3
+# define INTC_INTREQ1_IREQUEST35_SIZE 1
+# define INTC_INTREQ1_IREQUEST36_OFFSET 4
+# define INTC_INTREQ1_IREQUEST36_SIZE 1
+# define INTC_INTREQ1_IREQUEST37_OFFSET 5
+# define INTC_INTREQ1_IREQUEST37_SIZE 1
+#define INTC_INTPR2                  0x8
+# define INTC_INTPR2_INTLEV_OFFSET   30
+# define INTC_INTPR2_INTLEV_SIZE     2
+# define INTC_INTPR2_OFFSET_OFFSET   0
+# define INTC_INTPR2_OFFSET_SIZE     24
+#define INTC_INTREQ2                 0x108
+# define INTC_INTREQ2_IREQUEST64_OFFSET 0
+# define INTC_INTREQ2_IREQUEST64_SIZE 1
+# define INTC_INTREQ2_IREQUEST65_OFFSET 1
+# define INTC_INTREQ2_IREQUEST65_SIZE 1
+# define INTC_INTREQ2_IREQUEST66_OFFSET 2
+# define INTC_INTREQ2_IREQUEST66_SIZE 1
+# define INTC_INTREQ2_IREQUEST67_OFFSET 3
+# define INTC_INTREQ2_IREQUEST67_SIZE 1
+# define INTC_INTREQ2_IREQUEST68_OFFSET 4
+# define INTC_INTREQ2_IREQUEST68_SIZE 1
+#define INTC_INTPR3                  0xc
+# define INTC_INTPR3_INTLEV_OFFSET   30
+# define INTC_INTPR3_INTLEV_SIZE     2
+# define INTC_INTPR3_OFFSET_OFFSET   0
+# define INTC_INTPR3_OFFSET_SIZE     24
+#define INTC_INTREQ3                 0x10c
+# define INTC_INTREQ3_IREQUEST96_OFFSET 0
+# define INTC_INTREQ3_IREQUEST96_SIZE 1
+#define INTC_INTPR4                  0x10
+# define INTC_INTPR4_INTLEV_OFFSET   30
+# define INTC_INTPR4_INTLEV_SIZE     2
+# define INTC_INTPR4_OFFSET_OFFSET   0
+# define INTC_INTPR4_OFFSET_SIZE     24
+#define INTC_INTREQ4                 0x110
+# define INTC_INTREQ4_IREQUEST128_OFFSET 0
+# define INTC_INTREQ4_IREQUEST128_SIZE 1
+#define INTC_INTPR5                  0x14
+# define INTC_INTPR5_INTLEV_OFFSET   30
+# define INTC_INTPR5_INTLEV_SIZE     2
+# define INTC_INTPR5_OFFSET_OFFSET   0
+# define INTC_INTPR5_OFFSET_SIZE     24
+#define INTC_INTREQ5                 0x114
+# define INTC_INTREQ5_IREQUEST160_OFFSET 0
+# define INTC_INTREQ5_IREQUEST160_SIZE 1
+#define INTC_INTPR6                  0x18
+# define INTC_INTPR6_INTLEV_OFFSET   30
+# define INTC_INTPR6_INTLEV_SIZE     2
+# define INTC_INTPR6_OFFSET_OFFSET   0
+# define INTC_INTPR6_OFFSET_SIZE     24
+#define INTC_INTREQ6                 0x118
+# define INTC_INTREQ6_IREQUEST192_OFFSET 0
+# define INTC_INTREQ6_IREQUEST192_SIZE 1
+#define INTC_INTPR7                  0x1c
+# define INTC_INTPR7_INTLEV_OFFSET   30
+# define INTC_INTPR7_INTLEV_SIZE     2
+# define INTC_INTPR7_OFFSET_OFFSET   0
+# define INTC_INTPR7_OFFSET_SIZE     24
+#define INTC_INTREQ7                 0x11c
+# define INTC_INTREQ7_IREQUEST224_OFFSET 0
+# define INTC_INTREQ7_IREQUEST224_SIZE 1
+#define INTC_INTPR8                  0x20
+# define INTC_INTPR8_INTLEV_OFFSET   30
+# define INTC_INTPR8_INTLEV_SIZE     2
+# define INTC_INTPR8_OFFSET_OFFSET   0
+# define INTC_INTPR8_OFFSET_SIZE     24
+#define INTC_INTREQ8                 0x120
+# define INTC_INTREQ8_IREQUEST256_OFFSET 0
+# define INTC_INTREQ8_IREQUEST256_SIZE 1
+#define INTC_INTPR9                  0x24
+# define INTC_INTPR9_INTLEV_OFFSET   30
+# define INTC_INTPR9_INTLEV_SIZE     2
+# define INTC_INTPR9_OFFSET_OFFSET   0
+# define INTC_INTPR9_OFFSET_SIZE     24
+#define INTC_INTREQ9                 0x124
+# define INTC_INTREQ9_IREQUEST288_OFFSET 0
+# define INTC_INTREQ9_IREQUEST288_SIZE 1
+#define INTC_INTPR10                 0x28
+# define INTC_INTPR10_INTLEV_OFFSET  30
+# define INTC_INTPR10_INTLEV_SIZE    2
+# define INTC_INTPR10_OFFSET_OFFSET  0
+# define INTC_INTPR10_OFFSET_SIZE    24
+#define INTC_INTREQ10                0x128
+# define INTC_INTREQ10_IREQUEST320_OFFSET 0
+# define INTC_INTREQ10_IREQUEST320_SIZE 1
+#define INTC_INTPR11                 0x2c
+# define INTC_INTPR11_INTLEV_OFFSET  30
+# define INTC_INTPR11_INTLEV_SIZE    2
+# define INTC_INTPR11_OFFSET_OFFSET  0
+# define INTC_INTPR11_OFFSET_SIZE    24
+#define INTC_INTREQ11                0x12c
+# define INTC_INTREQ11_IREQUEST352_OFFSET 0
+# define INTC_INTREQ11_IREQUEST352_SIZE 1
+#define INTC_INTPR12                 0x30
+# define INTC_INTPR12_INTLEV_OFFSET  30
+# define INTC_INTPR12_INTLEV_SIZE    2
+# define INTC_INTPR12_OFFSET_OFFSET  0
+# define INTC_INTPR12_OFFSET_SIZE    24
+#define INTC_INTREQ12                0x130
+# define INTC_INTREQ12_IREQUEST384_OFFSET 0
+# define INTC_INTREQ12_IREQUEST384_SIZE 1
+#define INTC_INTPR13                 0x34
+# define INTC_INTPR13_INTLEV_OFFSET  30
+# define INTC_INTPR13_INTLEV_SIZE    2
+# define INTC_INTPR13_OFFSET_OFFSET  0
+# define INTC_INTPR13_OFFSET_SIZE    24
+#define INTC_INTREQ13                0x134
+# define INTC_INTREQ13_IREQUEST416_OFFSET 0
+# define INTC_INTREQ13_IREQUEST416_SIZE 1
+#define INTC_INTPR14                 0x38
+# define INTC_INTPR14_INTLEV_OFFSET  30
+# define INTC_INTPR14_INTLEV_SIZE    2
+# define INTC_INTPR14_OFFSET_OFFSET  0
+# define INTC_INTPR14_OFFSET_SIZE    24
+#define INTC_INTREQ14                0x138
+# define INTC_INTREQ14_IREQUEST448_OFFSET 0
+# define INTC_INTREQ14_IREQUEST448_SIZE 1
+#define INTC_INTPR15                 0x3c
+# define INTC_INTPR15_INTLEV_OFFSET  30
+# define INTC_INTPR15_INTLEV_SIZE    2
+# define INTC_INTPR15_OFFSET_OFFSET  0
+# define INTC_INTPR15_OFFSET_SIZE    24
+#define INTC_INTREQ15                0x13c
+# define INTC_INTREQ15_IREQUEST480_OFFSET 0
+# define INTC_INTREQ15_IREQUEST480_SIZE 1
+#define INTC_INTPR16                 0x40
+# define INTC_INTPR16_INTLEV_OFFSET  30
+# define INTC_INTPR16_INTLEV_SIZE    2
+# define INTC_INTPR16_OFFSET_OFFSET  0
+# define INTC_INTPR16_OFFSET_SIZE    24
+#define INTC_INTREQ16                0x140
+# define INTC_INTREQ16_IREQUEST512_OFFSET 0
+# define INTC_INTREQ16_IREQUEST512_SIZE 1
+#define INTC_INTPR17                 0x44
+# define INTC_INTPR17_INTLEV_OFFSET  30
+# define INTC_INTPR17_INTLEV_SIZE    2
+# define INTC_INTPR17_OFFSET_OFFSET  0
+# define INTC_INTPR17_OFFSET_SIZE    24
+#define INTC_INTREQ17                0x144
+# define INTC_INTREQ17_IREQUEST544_OFFSET 0
+# define INTC_INTREQ17_IREQUEST544_SIZE 1
+#define INTC_INTPR18                 0x48
+# define INTC_INTPR18_INTLEV_OFFSET  30
+# define INTC_INTPR18_INTLEV_SIZE    2
+# define INTC_INTPR18_OFFSET_OFFSET  0
+# define INTC_INTPR18_OFFSET_SIZE    24
+#define INTC_INTREQ18                0x148
+# define INTC_INTREQ18_IREQUEST576_OFFSET 0
+# define INTC_INTREQ18_IREQUEST576_SIZE 1
+#define INTC_INTPR19                 0x4c
+# define INTC_INTPR19_INTLEV_OFFSET  30
+# define INTC_INTPR19_INTLEV_SIZE    2
+# define INTC_INTPR19_OFFSET_OFFSET  0
+# define INTC_INTPR19_OFFSET_SIZE    24
+#define INTC_INTREQ19                0x14c
+# define INTC_INTREQ19_IREQUEST608_OFFSET 0
+# define INTC_INTREQ19_IREQUEST608_SIZE 1
+# define INTC_INTREQ19_IREQUEST609_OFFSET 1
+# define INTC_INTREQ19_IREQUEST609_SIZE 1
+# define INTC_INTREQ19_IREQUEST610_OFFSET 2
+# define INTC_INTREQ19_IREQUEST610_SIZE 1
+# define INTC_INTREQ19_IREQUEST611_OFFSET 3
+# define INTC_INTREQ19_IREQUEST611_SIZE 1
+#define INTC_INTPR20                 0x50
+# define INTC_INTPR20_INTLEV_OFFSET  30
+# define INTC_INTPR20_INTLEV_SIZE    2
+# define INTC_INTPR20_OFFSET_OFFSET  0
+# define INTC_INTPR20_OFFSET_SIZE    24
+#define INTC_INTREQ20                0x150
+# define INTC_INTREQ20_IREQUEST640_OFFSET 0
+# define INTC_INTREQ20_IREQUEST640_SIZE 1
+#define INTC_INTPR21                 0x54
+# define INTC_INTPR21_INTLEV_OFFSET  30
+# define INTC_INTPR21_INTLEV_SIZE    2
+# define INTC_INTPR21_OFFSET_OFFSET  0
+# define INTC_INTPR21_OFFSET_SIZE    24
+#define INTC_INTREQ21                0x154
+# define INTC_INTREQ21_IREQUEST672_OFFSET 0
+# define INTC_INTREQ21_IREQUEST672_SIZE 1
+#define INTC_INTPR22                 0x58
+# define INTC_INTPR22_INTLEV_OFFSET  30
+# define INTC_INTPR22_INTLEV_SIZE    2
+# define INTC_INTPR22_OFFSET_OFFSET  0
+# define INTC_INTPR22_OFFSET_SIZE    24
+#define INTC_INTREQ22                0x158
+# define INTC_INTREQ22_IREQUEST704_OFFSET 0
+# define INTC_INTREQ22_IREQUEST704_SIZE 1
+# define INTC_INTREQ22_IREQUEST705_OFFSET 1
+# define INTC_INTREQ22_IREQUEST705_SIZE 1
+# define INTC_INTREQ22_IREQUEST706_OFFSET 2
+# define INTC_INTREQ22_IREQUEST706_SIZE 1
+#define INTC_INTPR23                 0x5c
+# define INTC_INTPR23_INTLEV_OFFSET  30
+# define INTC_INTPR23_INTLEV_SIZE    2
+# define INTC_INTPR23_OFFSET_OFFSET  0
+# define INTC_INTPR23_OFFSET_SIZE    24
+#define INTC_INTREQ23                0x15c
+# define INTC_INTREQ23_IREQUEST736_OFFSET 0
+# define INTC_INTREQ23_IREQUEST736_SIZE 1
+# define INTC_INTREQ23_IREQUEST737_OFFSET 1
+# define INTC_INTREQ23_IREQUEST737_SIZE 1
+# define INTC_INTREQ23_IREQUEST738_OFFSET 2
+# define INTC_INTREQ23_IREQUEST738_SIZE 1
+#define INTC_INTPR24                 0x60
+# define INTC_INTPR24_INTLEV_OFFSET  30
+# define INTC_INTPR24_INTLEV_SIZE    2
+# define INTC_INTPR24_OFFSET_OFFSET  0
+# define INTC_INTPR24_OFFSET_SIZE    24
+#define INTC_INTREQ24                0x160
+# define INTC_INTREQ24_IREQUEST768_OFFSET 0
+# define INTC_INTREQ24_IREQUEST768_SIZE 1
+#define INTC_INTPR25                 0x64
+# define INTC_INTPR25_INTLEV_OFFSET  30
+# define INTC_INTPR25_INTLEV_SIZE    2
+# define INTC_INTPR25_OFFSET_OFFSET  0
+# define INTC_INTPR25_OFFSET_SIZE    24
+#define INTC_INTREQ25                0x164
+# define INTC_INTREQ25_IREQUEST800_OFFSET 0
+# define INTC_INTREQ25_IREQUEST800_SIZE 1
+#define INTC_INTPR26                 0x68
+# define INTC_INTPR26_INTLEV_OFFSET  30
+# define INTC_INTPR26_INTLEV_SIZE    2
+# define INTC_INTPR26_OFFSET_OFFSET  0
+# define INTC_INTPR26_OFFSET_SIZE    24
+#define INTC_INTREQ26                0x168
+# define INTC_INTREQ26_IREQUEST832_OFFSET 0
+# define INTC_INTREQ26_IREQUEST832_SIZE 1
+#define INTC_INTPR27                 0x6c
+# define INTC_INTPR27_INTLEV_OFFSET  30
+# define INTC_INTPR27_INTLEV_SIZE    2
+# define INTC_INTPR27_OFFSET_OFFSET  0
+# define INTC_INTPR27_OFFSET_SIZE    24
+#define INTC_INTREQ27                0x16c
+# define INTC_INTREQ27_IREQUEST864_OFFSET 0
+# define INTC_INTREQ27_IREQUEST864_SIZE 1
+#define INTC_INTPR28                 0x70
+# define INTC_INTPR28_INTLEV_OFFSET  30
+# define INTC_INTPR28_INTLEV_SIZE    2
+# define INTC_INTPR28_OFFSET_OFFSET  0
+# define INTC_INTPR28_OFFSET_SIZE    24
+#define INTC_INTREQ28                0x170
+# define INTC_INTREQ28_IREQUEST896_OFFSET 0
+# define INTC_INTREQ28_IREQUEST896_SIZE 1
+#define INTC_INTPR29                 0x74
+# define INTC_INTPR29_INTLEV_OFFSET  30
+# define INTC_INTPR29_INTLEV_SIZE    2
+# define INTC_INTPR29_OFFSET_OFFSET  0
+# define INTC_INTPR29_OFFSET_SIZE    24
+#define INTC_INTREQ29                0x174
+# define INTC_INTREQ29_IREQUEST928_OFFSET 0
+# define INTC_INTREQ29_IREQUEST928_SIZE 1
+#define INTC_INTPR30                 0x78
+# define INTC_INTPR30_INTLEV_OFFSET  30
+# define INTC_INTPR30_INTLEV_SIZE    2
+# define INTC_INTPR30_OFFSET_OFFSET  0
+# define INTC_INTPR30_OFFSET_SIZE    24
+#define INTC_INTREQ30                0x178
+# define INTC_INTREQ30_IREQUEST960_OFFSET 0
+# define INTC_INTREQ30_IREQUEST960_SIZE 1
+#define INTC_INTPR31                 0x7c
+# define INTC_INTPR31_INTLEV_OFFSET  30
+# define INTC_INTPR31_INTLEV_SIZE    2
+# define INTC_INTPR31_OFFSET_OFFSET  0
+# define INTC_INTPR31_OFFSET_SIZE    24
+#define INTC_INTREQ31                0x17c
+# define INTC_INTREQ31_IREQUEST992_OFFSET 0
+# define INTC_INTREQ31_IREQUEST992_SIZE 1
+#define INTC_INTPR32                 0x80
+# define INTC_INTPR32_INTLEV_OFFSET  30
+# define INTC_INTPR32_INTLEV_SIZE    2
+# define INTC_INTPR32_OFFSET_OFFSET  0
+# define INTC_INTPR32_OFFSET_SIZE    24
+#define INTC_INTREQ32                0x180
+# define INTC_INTREQ32_IREQUEST1024_OFFSET 0
+# define INTC_INTREQ32_IREQUEST1024_SIZE 1
+#define INTC_INTCAUSE0               0x20c
+# define INTC_INTCAUSE0_CAUSEGRP_OFFSET 0
+# define INTC_INTCAUSE0_CAUSEGRP_SIZE 6
+#define INTC_INTCAUSE1               0x208
+# define INTC_INTCAUSE1_CAUSEGRP_OFFSET 0
+# define INTC_INTCAUSE1_CAUSEGRP_SIZE 6
+#define INTC_INTCAUSE2               0x204
+# define INTC_INTCAUSE2_CAUSEGRP_OFFSET 0
+# define INTC_INTCAUSE2_CAUSEGRP_SIZE 6
+#define INTC_INTCAUSE3               0x200
+# define INTC_INTCAUSE3_CAUSEGRP_OFFSET 0
+# define INTC_INTCAUSE3_CAUSEGRP_SIZE 6
+
+#define INTC_BIT(name)               (1 << INTC_##name##_OFFSET)
+#define INTC_MKBF(name, value)       (((value) & ((1 << INTC_##name##_SIZE) - 1)) << INTC_##name##_OFFSET)
+#define INTC_GETBF(name, value)      (((value) >> INTC_##name##_OFFSET) & ((1 << INTC_##name##_SIZE) - 1))
+
+#define intc_readl(port,reg)         readl((port)->regs + INTC_##reg)
+#define intc_writel(port,reg,value)  writel((value), (port)->regs + INTC_##reg)
+
+#endif /* __ASM_AVR32_PERIHP_INTC_H__ */
diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c
new file mode 100644
index 000000000000..d3aabfca8598
--- /dev/null
+++ b/arch/avr32/mach-at32ap/pio.c
@@ -0,0 +1,118 @@
+/*
+ * Atmel PIO2 Port Multiplexer support
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/platform_device.h>
+
+#include <asm/io.h>
+
+#include <asm/arch/portmux.h>
+
+#include "pio.h"
+
+#define MAX_NR_PIO_DEVICES		8
+
+struct pio_device {
+	void __iomem *regs;
+	const struct platform_device *pdev;
+	struct clk *clk;
+	u32 alloc_mask;
+	char name[32];
+};
+
+static struct pio_device pio_dev[MAX_NR_PIO_DEVICES];
+
+void portmux_set_func(unsigned int portmux_id, unsigned int pin_id,
+		      unsigned int function_id)
+{
+	struct pio_device *pio;
+	u32 mask = 1 << pin_id;
+
+	BUG_ON(portmux_id >= MAX_NR_PIO_DEVICES);
+
+	pio = &pio_dev[portmux_id];
+
+	if (function_id)
+		pio_writel(pio, BSR, mask);
+	else
+		pio_writel(pio, ASR, mask);
+	pio_writel(pio, PDR, mask);
+}
+
+static int __init pio_probe(struct platform_device *pdev)
+{
+	struct pio_device *pio = NULL;
+
+	BUG_ON(pdev->id >= MAX_NR_PIO_DEVICES);
+	pio = &pio_dev[pdev->id];
+	BUG_ON(!pio->regs);
+
+	/* TODO: Interrupts */
+
+	platform_set_drvdata(pdev, pio);
+
+	printk(KERN_INFO "%s: Atmel Port Multiplexer at 0x%p (irq %d)\n",
+	       pio->name, pio->regs, platform_get_irq(pdev, 0));
+
+	return 0;
+}
+
+static struct platform_driver pio_driver = {
+	.probe		= pio_probe,
+	.driver		= {
+		.name		= "pio",
+	},
+};
+
+static int __init pio_init(void)
+{
+	return platform_driver_register(&pio_driver);
+}
+subsys_initcall(pio_init);
+
+void __init at32_init_pio(struct platform_device *pdev)
+{
+	struct resource *regs;
+	struct pio_device *pio;
+
+	if (pdev->id > MAX_NR_PIO_DEVICES) {
+		dev_err(&pdev->dev, "only %d PIO devices supported\n",
+			MAX_NR_PIO_DEVICES);
+		return;
+	}
+
+	pio = &pio_dev[pdev->id];
+	snprintf(pio->name, sizeof(pio->name), "pio%d", pdev->id);
+
+	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!regs) {
+		dev_err(&pdev->dev, "no mmio resource defined\n");
+		return;
+	}
+
+	pio->clk = clk_get(&pdev->dev, "mck");
+	if (IS_ERR(pio->clk))
+		/*
+		 * This is a fatal error, but if we continue we might
+		 * be so lucky that we manage to initialize the
+		 * console and display this message...
+		 */
+		dev_err(&pdev->dev, "no mck clock defined\n");
+	else
+		clk_enable(pio->clk);
+
+	pio->pdev = pdev;
+	pio->regs = ioremap(regs->start, regs->end - regs->start + 1);
+
+	pio_writel(pio, ODR, ~0UL);
+	pio_writel(pio, PER, ~0UL);
+}
diff --git a/arch/avr32/mach-at32ap/pio.h b/arch/avr32/mach-at32ap/pio.h
new file mode 100644
index 000000000000..cfea12351599
--- /dev/null
+++ b/arch/avr32/mach-at32ap/pio.h
@@ -0,0 +1,178 @@
+/*
+ * Atmel PIO2 Port Multiplexer support
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ARCH_AVR32_AT32AP_PIO_H__
+#define __ARCH_AVR32_AT32AP_PIO_H__
+
+/* PIO register offsets */
+#define PIO_PER                                0x0000
+#define PIO_PDR                                0x0004
+#define PIO_PSR                                0x0008
+#define PIO_OER                                0x0010
+#define PIO_ODR                                0x0014
+#define PIO_OSR                                0x0018
+#define PIO_IFER                               0x0020
+#define PIO_IFDR                               0x0024
+#define PIO_ISFR                               0x0028
+#define PIO_SODR                               0x0030
+#define PIO_CODR                               0x0034
+#define PIO_ODSR                               0x0038
+#define PIO_PDSR                               0x003c
+#define PIO_IER                                0x0040
+#define PIO_IDR                                0x0044
+#define PIO_IMR                                0x0048
+#define PIO_ISR                                0x004c
+#define PIO_MDER                               0x0050
+#define PIO_MDDR                               0x0054
+#define PIO_MDSR                               0x0058
+#define PIO_PUDR                               0x0060
+#define PIO_PUER                               0x0064
+#define PIO_PUSR                               0x0068
+#define PIO_ASR                                0x0070
+#define PIO_BSR                                0x0074
+#define PIO_ABSR                               0x0078
+#define PIO_OWER                               0x00a0
+#define PIO_OWDR                               0x00a4
+#define PIO_OWSR                               0x00a8
+
+/* Bitfields in PER */
+
+/* Bitfields in PDR */
+
+/* Bitfields in PSR */
+
+/* Bitfields in OER */
+
+/* Bitfields in ODR */
+
+/* Bitfields in OSR */
+
+/* Bitfields in IFER */
+
+/* Bitfields in IFDR */
+
+/* Bitfields in ISFR */
+
+/* Bitfields in SODR */
+
+/* Bitfields in CODR */
+
+/* Bitfields in ODSR */
+
+/* Bitfields in PDSR */
+
+/* Bitfields in IER */
+
+/* Bitfields in IDR */
+
+/* Bitfields in IMR */
+
+/* Bitfields in ISR */
+
+/* Bitfields in MDER */
+
+/* Bitfields in MDDR */
+
+/* Bitfields in MDSR */
+
+/* Bitfields in PUDR */
+
+/* Bitfields in PUER */
+
+/* Bitfields in PUSR */
+
+/* Bitfields in ASR */
+
+/* Bitfields in BSR */
+
+/* Bitfields in ABSR */
+#define PIO_P0_OFFSET                          0
+#define PIO_P0_SIZE                            1
+#define PIO_P1_OFFSET                          1
+#define PIO_P1_SIZE                            1
+#define PIO_P2_OFFSET                          2
+#define PIO_P2_SIZE                            1
+#define PIO_P3_OFFSET                          3
+#define PIO_P3_SIZE                            1
+#define PIO_P4_OFFSET                          4
+#define PIO_P4_SIZE                            1
+#define PIO_P5_OFFSET                          5
+#define PIO_P5_SIZE                            1
+#define PIO_P6_OFFSET                          6
+#define PIO_P6_SIZE                            1
+#define PIO_P7_OFFSET                          7
+#define PIO_P7_SIZE                            1
+#define PIO_P8_OFFSET                          8
+#define PIO_P8_SIZE                            1
+#define PIO_P9_OFFSET                          9
+#define PIO_P9_SIZE                            1
+#define PIO_P10_OFFSET                         10
+#define PIO_P10_SIZE                           1
+#define PIO_P11_OFFSET                         11
+#define PIO_P11_SIZE                           1
+#define PIO_P12_OFFSET                         12
+#define PIO_P12_SIZE                           1
+#define PIO_P13_OFFSET                         13
+#define PIO_P13_SIZE                           1
+#define PIO_P14_OFFSET                         14
+#define PIO_P14_SIZE                           1
+#define PIO_P15_OFFSET                         15
+#define PIO_P15_SIZE                           1
+#define PIO_P16_OFFSET                         16
+#define PIO_P16_SIZE                           1
+#define PIO_P17_OFFSET                         17
+#define PIO_P17_SIZE                           1
+#define PIO_P18_OFFSET                         18
+#define PIO_P18_SIZE                           1
+#define PIO_P19_OFFSET                         19
+#define PIO_P19_SIZE                           1
+#define PIO_P20_OFFSET                         20
+#define PIO_P20_SIZE                           1
+#define PIO_P21_OFFSET                         21
+#define PIO_P21_SIZE                           1
+#define PIO_P22_OFFSET                         22
+#define PIO_P22_SIZE                           1
+#define PIO_P23_OFFSET                         23
+#define PIO_P23_SIZE                           1
+#define PIO_P24_OFFSET                         24
+#define PIO_P24_SIZE                           1
+#define PIO_P25_OFFSET                         25
+#define PIO_P25_SIZE                           1
+#define PIO_P26_OFFSET                         26
+#define PIO_P26_SIZE                           1
+#define PIO_P27_OFFSET                         27
+#define PIO_P27_SIZE                           1
+#define PIO_P28_OFFSET                         28
+#define PIO_P28_SIZE                           1
+#define PIO_P29_OFFSET                         29
+#define PIO_P29_SIZE                           1
+#define PIO_P30_OFFSET                         30
+#define PIO_P30_SIZE                           1
+#define PIO_P31_OFFSET                         31
+#define PIO_P31_SIZE                           1
+
+/* Bitfields in OWER */
+
+/* Bitfields in OWDR */
+
+/* Bitfields in OWSR */
+
+/* Bit manipulation macros */
+#define PIO_BIT(name)                          (1 << PIO_##name##_OFFSET)
+#define PIO_BF(name,value)                     (((value) & ((1 << PIO_##name##_SIZE) - 1)) << PIO_##name##_OFFSET)
+#define PIO_BFEXT(name,value)                  (((value) >> PIO_##name##_OFFSET) & ((1 << PIO_##name##_SIZE) - 1))
+#define PIO_BFINS(name,value,old)              (((old) & ~(((1 << PIO_##name##_SIZE) - 1) << PIO_##name##_OFFSET)) | PIO_BF(name,value))
+
+/* Register access macros */
+#define pio_readl(port,reg)                    readl((port)->regs + PIO_##reg)
+#define pio_writel(port,reg,value)             writel((value), (port)->regs + PIO_##reg)
+
+void at32_init_pio(struct platform_device *pdev);
+
+#endif /* __ARCH_AVR32_AT32AP_PIO_H__ */
diff --git a/arch/avr32/mach-at32ap/sm.c b/arch/avr32/mach-at32ap/sm.c
new file mode 100644
index 000000000000..03306eb0345e
--- /dev/null
+++ b/arch/avr32/mach-at32ap/sm.c
@@ -0,0 +1,289 @@
+/*
+ * System Manager driver for AT32AP CPUs
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/random.h>
+#include <linux/spinlock.h>
+
+#include <asm/intc.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+
+#include <asm/arch/sm.h>
+
+#include "sm.h"
+
+#define SM_EIM_IRQ_RESOURCE	1
+#define SM_PM_IRQ_RESOURCE	2
+#define SM_RTC_IRQ_RESOURCE	3
+
+#define to_eim(irqc) container_of(irqc, struct at32_sm, irqc)
+
+struct at32_sm system_manager;
+
+int __init at32_sm_init(void)
+{
+	struct resource *regs;
+	struct at32_sm *sm = &system_manager;
+	int ret = -ENXIO;
+
+	regs = platform_get_resource(&at32_sm_device, IORESOURCE_MEM, 0);
+	if (!regs)
+		goto fail;
+
+	spin_lock_init(&sm->lock);
+	sm->pdev = &at32_sm_device;
+
+	ret = -ENOMEM;
+	sm->regs = ioremap(regs->start, regs->end - regs->start + 1);
+	if (!sm->regs)
+		goto fail;
+
+	return 0;
+
+fail:
+	printk(KERN_ERR "Failed to initialize System Manager: %d\n", ret);
+	return ret;
+}
+
+/*
+ * External Interrupt Module (EIM).
+ *
+ * EIM gets level- or edge-triggered interrupts of either polarity
+ * from the outside and converts it to active-high level-triggered
+ * interrupts that the internal interrupt controller can handle. EIM
+ * also provides masking/unmasking of interrupts, as well as
+ * acknowledging of edge-triggered interrupts.
+ */
+
+static irqreturn_t spurious_eim_interrupt(int irq, void *dev_id,
+					  struct pt_regs *regs)
+{
+	printk(KERN_WARNING "Spurious EIM interrupt %d\n", irq);
+	disable_irq(irq);
+	return IRQ_NONE;
+}
+
+static struct irqaction eim_spurious_action = {
+	.handler = spurious_eim_interrupt,
+};
+
+static irqreturn_t eim_handle_irq(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct irq_controller * irqc = dev_id;
+	struct at32_sm *sm = to_eim(irqc);
+	unsigned long pending;
+
+	/*
+	 * No need to disable interrupts globally.  The interrupt
+	 * level relevant to this group must be masked all the time,
+	 * so we know that this particular EIM instance will not be
+	 * re-entered.
+	 */
+	spin_lock(&sm->lock);
+
+	pending = intc_get_pending(sm->irqc.irq_group);
+	if (unlikely(!pending)) {
+		printk(KERN_ERR "EIM (group %u): No interrupts pending!\n",
+		       sm->irqc.irq_group);
+		goto unlock;
+	}
+
+	do {
+		struct irqaction *action;
+		unsigned int i;
+
+		i = fls(pending) - 1;
+		pending &= ~(1 << i);
+		action = sm->action[i];
+
+		/* Acknowledge the interrupt */
+		sm_writel(sm, EIM_ICR, 1 << i);
+
+		spin_unlock(&sm->lock);
+
+		if (action->flags & SA_INTERRUPT)
+			local_irq_disable();
+		action->handler(sm->irqc.first_irq + i, action->dev_id, regs);
+		local_irq_enable();
+		spin_lock(&sm->lock);
+		if (action->flags & SA_SAMPLE_RANDOM)
+			add_interrupt_randomness(sm->irqc.first_irq + i);
+	} while (pending);
+
+unlock:
+	spin_unlock(&sm->lock);
+	return IRQ_HANDLED;
+}
+
+static void eim_mask(struct irq_controller *irqc, unsigned int irq)
+{
+	struct at32_sm *sm = to_eim(irqc);
+	unsigned int i;
+
+	i = irq - sm->irqc.first_irq;
+	sm_writel(sm, EIM_IDR, 1 << i);
+}
+
+static void eim_unmask(struct irq_controller *irqc, unsigned int irq)
+{
+	struct at32_sm *sm = to_eim(irqc);
+	unsigned int i;
+
+	i = irq - sm->irqc.first_irq;
+	sm_writel(sm, EIM_IER, 1 << i);
+}
+
+static int eim_setup(struct irq_controller *irqc, unsigned int irq,
+		struct irqaction *action)
+{
+	struct at32_sm *sm = to_eim(irqc);
+	sm->action[irq - sm->irqc.first_irq] = action;
+	/* Acknowledge earlier interrupts */
+	sm_writel(sm, EIM_ICR, (1<<(irq - sm->irqc.first_irq)));
+	eim_unmask(irqc, irq);
+	return 0;
+}
+
+static void eim_free(struct irq_controller *irqc, unsigned int irq,
+		void *dev)
+{
+	struct at32_sm *sm = to_eim(irqc);
+	eim_mask(irqc, irq);
+	sm->action[irq - sm->irqc.first_irq] = &eim_spurious_action;
+}
+
+static int eim_set_type(struct irq_controller *irqc, unsigned int irq,
+			unsigned int type)
+{
+	struct at32_sm *sm = to_eim(irqc);
+	unsigned long flags;
+	u32 value, pattern;
+
+	spin_lock_irqsave(&sm->lock, flags);
+
+	pattern = 1 << (irq - sm->irqc.first_irq);
+
+	value = sm_readl(sm, EIM_MODE);
+	if (type & IRQ_TYPE_LEVEL)
+		value |= pattern;
+	else
+		value &= ~pattern;
+	sm_writel(sm, EIM_MODE, value);
+	value = sm_readl(sm, EIM_EDGE);
+	if (type & IRQ_EDGE_RISING)
+		value |= pattern;
+	else
+		value &= ~pattern;
+	sm_writel(sm, EIM_EDGE, value);
+	value = sm_readl(sm, EIM_LEVEL);
+	if (type & IRQ_LEVEL_HIGH)
+		value |= pattern;
+	else
+		value &= ~pattern;
+	sm_writel(sm, EIM_LEVEL, value);
+
+	spin_unlock_irqrestore(&sm->lock, flags);
+
+	return 0;
+}
+
+static unsigned int eim_get_type(struct irq_controller *irqc,
+				 unsigned int irq)
+{
+	struct at32_sm *sm = to_eim(irqc);
+	unsigned long flags;
+	unsigned int type = 0;
+	u32 mode, edge, level, pattern;
+
+	pattern = 1 << (irq - sm->irqc.first_irq);
+
+	spin_lock_irqsave(&sm->lock, flags);
+	mode = sm_readl(sm, EIM_MODE);
+	edge = sm_readl(sm, EIM_EDGE);
+	level = sm_readl(sm, EIM_LEVEL);
+	spin_unlock_irqrestore(&sm->lock, flags);
+
+	if (mode & pattern)
+		type |= IRQ_TYPE_LEVEL;
+	if (edge & pattern)
+		type |= IRQ_EDGE_RISING;
+	if (level & pattern)
+		type |= IRQ_LEVEL_HIGH;
+
+	return type;
+}
+
+static struct irq_controller_class eim_irq_class = {
+	.typename	= "EIM",
+	.handle		= eim_handle_irq,
+	.setup		= eim_setup,
+	.free		= eim_free,
+	.mask		= eim_mask,
+	.unmask		= eim_unmask,
+	.set_type	= eim_set_type,
+	.get_type	= eim_get_type,
+};
+
+static int __init eim_init(void)
+{
+	struct at32_sm *sm = &system_manager;
+	unsigned int i;
+	u32 pattern;
+	int ret;
+
+	/*
+	 * The EIM is really the same module as SM, so register
+	 * mapping, etc. has been taken care of already.
+	 */
+
+	/*
+	 * Find out how many interrupt lines that are actually
+	 * implemented in hardware.
+	 */
+	sm_writel(sm, EIM_IDR, ~0UL);
+	sm_writel(sm, EIM_MODE, ~0UL);
+	pattern = sm_readl(sm, EIM_MODE);
+	sm->irqc.nr_irqs = fls(pattern);
+
+	ret = -ENOMEM;
+	sm->action = kmalloc(sizeof(*sm->action) * sm->irqc.nr_irqs,
+			     GFP_KERNEL);
+	if (!sm->action)
+		goto out;
+
+	for (i = 0; i < sm->irqc.nr_irqs; i++)
+		sm->action[i] = &eim_spurious_action;
+
+	spin_lock_init(&sm->lock);
+	sm->irqc.irq_group = sm->pdev->resource[SM_EIM_IRQ_RESOURCE].start;
+	sm->irqc.class = &eim_irq_class;
+
+	ret = intc_register_controller(&sm->irqc);
+	if (ret < 0)
+		goto out_free_actions;
+
+	printk("EIM: External Interrupt Module at 0x%p, IRQ group %u\n",
+	       sm->regs, sm->irqc.irq_group);
+	printk("EIM: Handling %u external IRQs, starting with IRQ%u\n",
+	       sm->irqc.nr_irqs, sm->irqc.first_irq);
+
+	return 0;
+
+out_free_actions:
+	kfree(sm->action);
+out:
+	return ret;
+}
+arch_initcall(eim_init);
diff --git a/arch/avr32/mach-at32ap/sm.h b/arch/avr32/mach-at32ap/sm.h
new file mode 100644
index 000000000000..27565822ae2a
--- /dev/null
+++ b/arch/avr32/mach-at32ap/sm.h
@@ -0,0 +1,240 @@
+/*
+ * Register definitions for SM
+ *
+ * System Manager
+ */
+#ifndef __ASM_AVR32_SM_H__
+#define __ASM_AVR32_SM_H__
+
+/* SM register offsets */
+#define SM_PM_MCCTRL                            0x0000
+#define SM_PM_CKSEL                             0x0004
+#define SM_PM_CPU_MASK                          0x0008
+#define SM_PM_HSB_MASK                          0x000c
+#define SM_PM_PBA_MASK                         0x0010
+#define SM_PM_PBB_MASK                         0x0014
+#define SM_PM_PLL0                              0x0020
+#define SM_PM_PLL1                              0x0024
+#define SM_PM_VCTRL                             0x0030
+#define SM_PM_VMREF                             0x0034
+#define SM_PM_VMV                               0x0038
+#define SM_PM_IER                               0x0040
+#define SM_PM_IDR                               0x0044
+#define SM_PM_IMR                               0x0048
+#define SM_PM_ISR                               0x004c
+#define SM_PM_ICR                               0x0050
+#define SM_PM_GCCTRL                            0x0060
+#define SM_RTC_CTRL                             0x0080
+#define SM_RTC_VAL                              0x0084
+#define SM_RTC_TOP                              0x0088
+#define SM_RTC_IER                              0x0090
+#define SM_RTC_IDR                              0x0094
+#define SM_RTC_IMR                              0x0098
+#define SM_RTC_ISR                              0x009c
+#define SM_RTC_ICR                              0x00a0
+#define SM_WDT_CTRL                             0x00b0
+#define SM_WDT_CLR                              0x00b4
+#define SM_WDT_EXT                              0x00b8
+#define SM_RC_RCAUSE                            0x00c0
+#define SM_EIM_IER                              0x0100
+#define SM_EIM_IDR                              0x0104
+#define SM_EIM_IMR                              0x0108
+#define SM_EIM_ISR                              0x010c
+#define SM_EIM_ICR                              0x0110
+#define SM_EIM_MODE                             0x0114
+#define SM_EIM_EDGE                             0x0118
+#define SM_EIM_LEVEL                            0x011c
+#define SM_EIM_TEST                             0x0120
+#define SM_EIM_NMIC                             0x0124
+
+/* Bitfields in PM_MCCTRL */
+
+/* Bitfields in PM_CKSEL */
+#define SM_CPUSEL_OFFSET                        0
+#define SM_CPUSEL_SIZE                          3
+#define SM_CPUDIV_OFFSET                        7
+#define SM_CPUDIV_SIZE                          1
+#define SM_HSBSEL_OFFSET                        8
+#define SM_HSBSEL_SIZE                          3
+#define SM_HSBDIV_OFFSET                        15
+#define SM_HSBDIV_SIZE                          1
+#define SM_PBASEL_OFFSET                       16
+#define SM_PBASEL_SIZE                         3
+#define SM_PBADIV_OFFSET                       23
+#define SM_PBADIV_SIZE                         1
+#define SM_PBBSEL_OFFSET                       24
+#define SM_PBBSEL_SIZE                         3
+#define SM_PBBDIV_OFFSET                       31
+#define SM_PBBDIV_SIZE                         1
+
+/* Bitfields in PM_CPU_MASK */
+
+/* Bitfields in PM_HSB_MASK */
+
+/* Bitfields in PM_PBA_MASK */
+
+/* Bitfields in PM_PBB_MASK */
+
+/* Bitfields in PM_PLL0 */
+#define SM_PLLEN_OFFSET                         0
+#define SM_PLLEN_SIZE                           1
+#define SM_PLLOSC_OFFSET                        1
+#define SM_PLLOSC_SIZE                          1
+#define SM_PLLOPT_OFFSET                        2
+#define SM_PLLOPT_SIZE                          3
+#define SM_PLLDIV_OFFSET                        8
+#define SM_PLLDIV_SIZE                          8
+#define SM_PLLMUL_OFFSET                        16
+#define SM_PLLMUL_SIZE                          8
+#define SM_PLLCOUNT_OFFSET                      24
+#define SM_PLLCOUNT_SIZE                        6
+#define SM_PLLTEST_OFFSET                       31
+#define SM_PLLTEST_SIZE                         1
+
+/* Bitfields in PM_PLL1 */
+
+/* Bitfields in PM_VCTRL */
+#define SM_VAUTO_OFFSET                         0
+#define SM_VAUTO_SIZE                           1
+#define SM_PM_VCTRL_VAL_OFFSET                  8
+#define SM_PM_VCTRL_VAL_SIZE                    7
+
+/* Bitfields in PM_VMREF */
+#define SM_REFSEL_OFFSET                        0
+#define SM_REFSEL_SIZE                          4
+
+/* Bitfields in PM_VMV */
+#define SM_PM_VMV_VAL_OFFSET                    0
+#define SM_PM_VMV_VAL_SIZE                      8
+
+/* Bitfields in PM_IER */
+
+/* Bitfields in PM_IDR */
+
+/* Bitfields in PM_IMR */
+
+/* Bitfields in PM_ISR */
+
+/* Bitfields in PM_ICR */
+#define SM_LOCK0_OFFSET                         0
+#define SM_LOCK0_SIZE                           1
+#define SM_LOCK1_OFFSET                         1
+#define SM_LOCK1_SIZE                           1
+#define SM_WAKE_OFFSET                          2
+#define SM_WAKE_SIZE                            1
+#define SM_VOK_OFFSET                           3
+#define SM_VOK_SIZE                             1
+#define SM_VMRDY_OFFSET                         4
+#define SM_VMRDY_SIZE                           1
+#define SM_CKRDY_OFFSET                         5
+#define SM_CKRDY_SIZE                           1
+
+/* Bitfields in PM_GCCTRL */
+#define SM_OSCSEL_OFFSET                        0
+#define SM_OSCSEL_SIZE                          1
+#define SM_PLLSEL_OFFSET                        1
+#define SM_PLLSEL_SIZE                          1
+#define SM_CEN_OFFSET                           2
+#define SM_CEN_SIZE                             1
+#define SM_CPC_OFFSET                           3
+#define SM_CPC_SIZE                             1
+#define SM_DIVEN_OFFSET                         4
+#define SM_DIVEN_SIZE                           1
+#define SM_DIV_OFFSET                           8
+#define SM_DIV_SIZE                             8
+
+/* Bitfields in RTC_CTRL */
+#define SM_PCLR_OFFSET                          1
+#define SM_PCLR_SIZE                            1
+#define SM_TOPEN_OFFSET                         2
+#define SM_TOPEN_SIZE                           1
+#define SM_CLKEN_OFFSET                         3
+#define SM_CLKEN_SIZE                           1
+#define SM_PSEL_OFFSET                          8
+#define SM_PSEL_SIZE                            16
+
+/* Bitfields in RTC_VAL */
+#define SM_RTC_VAL_VAL_OFFSET                   0
+#define SM_RTC_VAL_VAL_SIZE                     31
+
+/* Bitfields in RTC_TOP */
+#define SM_RTC_TOP_VAL_OFFSET                   0
+#define SM_RTC_TOP_VAL_SIZE                     32
+
+/* Bitfields in RTC_IER */
+
+/* Bitfields in RTC_IDR */
+
+/* Bitfields in RTC_IMR */
+
+/* Bitfields in RTC_ISR */
+
+/* Bitfields in RTC_ICR */
+#define SM_TOPI_OFFSET                          0
+#define SM_TOPI_SIZE                            1
+
+/* Bitfields in WDT_CTRL */
+#define SM_KEY_OFFSET                           24
+#define SM_KEY_SIZE                             8
+
+/* Bitfields in WDT_CLR */
+
+/* Bitfields in WDT_EXT */
+
+/* Bitfields in RC_RCAUSE */
+#define SM_POR_OFFSET                           0
+#define SM_POR_SIZE                             1
+#define SM_BOD_OFFSET                           1
+#define SM_BOD_SIZE                             1
+#define SM_EXT_OFFSET                           2
+#define SM_EXT_SIZE                             1
+#define SM_WDT_OFFSET                           3
+#define SM_WDT_SIZE                             1
+#define SM_NTAE_OFFSET                          4
+#define SM_NTAE_SIZE                            1
+#define SM_SERP_OFFSET                          5
+#define SM_SERP_SIZE                            1
+
+/* Bitfields in EIM_IER */
+
+/* Bitfields in EIM_IDR */
+
+/* Bitfields in EIM_IMR */
+
+/* Bitfields in EIM_ISR */
+
+/* Bitfields in EIM_ICR */
+
+/* Bitfields in EIM_MODE */
+
+/* Bitfields in EIM_EDGE */
+#define SM_INT0_OFFSET                          0
+#define SM_INT0_SIZE                            1
+#define SM_INT1_OFFSET                          1
+#define SM_INT1_SIZE                            1
+#define SM_INT2_OFFSET                          2
+#define SM_INT2_SIZE                            1
+#define SM_INT3_OFFSET                          3
+#define SM_INT3_SIZE                            1
+
+/* Bitfields in EIM_LEVEL */
+
+/* Bitfields in EIM_TEST */
+#define SM_TESTEN_OFFSET                        31
+#define SM_TESTEN_SIZE                          1
+
+/* Bitfields in EIM_NMIC */
+#define SM_EN_OFFSET                            0
+#define SM_EN_SIZE                              1
+
+/* Bit manipulation macros */
+#define SM_BIT(name)                            (1 << SM_##name##_OFFSET)
+#define SM_BF(name,value)                       (((value) & ((1 << SM_##name##_SIZE) - 1)) << SM_##name##_OFFSET)
+#define SM_BFEXT(name,value)                    (((value) >> SM_##name##_OFFSET) & ((1 << SM_##name##_SIZE) - 1))
+#define SM_BFINS(name,value,old)                (((old) & ~(((1 << SM_##name##_SIZE) - 1) << SM_##name##_OFFSET)) | SM_BF(name,value))
+
+/* Register access macros */
+#define sm_readl(port,reg)                      readl((port)->regs + SM_##reg)
+#define sm_writel(port,reg,value)               writel((value), (port)->regs + SM_##reg)
+
+#endif /* __ASM_AVR32_SM_H__ */
diff --git a/arch/avr32/mm/Makefile b/arch/avr32/mm/Makefile
new file mode 100644
index 000000000000..0066491f90d4
--- /dev/null
+++ b/arch/avr32/mm/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for the Linux/AVR32 kernel.
+#
+
+obj-y				+= init.o clear_page.o copy_page.o dma-coherent.o
+obj-y				+= ioremap.o cache.o fault.o tlb.o
diff --git a/arch/avr32/mm/cache.c b/arch/avr32/mm/cache.c
new file mode 100644
index 000000000000..450515b245a0
--- /dev/null
+++ b/arch/avr32/mm/cache.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/highmem.h>
+#include <linux/unistd.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cachectl.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+
+/*
+ * If you attempt to flush anything more than this, you need superuser
+ * privileges.  The value is completely arbitrary.
+ */
+#define CACHEFLUSH_MAX_LEN	1024
+
+void invalidate_dcache_region(void *start, size_t size)
+{
+	unsigned long v, begin, end, linesz;
+
+	linesz = boot_cpu_data.dcache.linesz;
+
+	//printk("invalidate dcache: %p + %u\n", start, size);
+
+	/* You asked for it, you got it */
+	begin = (unsigned long)start & ~(linesz - 1);
+	end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1);
+
+	for (v = begin; v < end; v += linesz)
+		invalidate_dcache_line((void *)v);
+}
+
+void clean_dcache_region(void *start, size_t size)
+{
+	unsigned long v, begin, end, linesz;
+
+	linesz = boot_cpu_data.dcache.linesz;
+	begin = (unsigned long)start & ~(linesz - 1);
+	end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1);
+
+	for (v = begin; v < end; v += linesz)
+		clean_dcache_line((void *)v);
+	flush_write_buffer();
+}
+
+void flush_dcache_region(void *start, size_t size)
+{
+	unsigned long v, begin, end, linesz;
+
+	linesz = boot_cpu_data.dcache.linesz;
+	begin = (unsigned long)start & ~(linesz - 1);
+	end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1);
+
+	for (v = begin; v < end; v += linesz)
+		flush_dcache_line((void *)v);
+	flush_write_buffer();
+}
+
+void invalidate_icache_region(void *start, size_t size)
+{
+	unsigned long v, begin, end, linesz;
+
+	linesz = boot_cpu_data.icache.linesz;
+	begin = (unsigned long)start & ~(linesz - 1);
+	end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1);
+
+	for (v = begin; v < end; v += linesz)
+		invalidate_icache_line((void *)v);
+}
+
+static inline void __flush_icache_range(unsigned long start, unsigned long end)
+{
+	unsigned long v, linesz;
+
+	linesz = boot_cpu_data.dcache.linesz;
+	for (v = start; v < end; v += linesz) {
+		clean_dcache_line((void *)v);
+		invalidate_icache_line((void *)v);
+	}
+
+	flush_write_buffer();
+}
+
+/*
+ * This one is called after a module has been loaded.
+ */
+void flush_icache_range(unsigned long start, unsigned long end)
+{
+	unsigned long linesz;
+
+	linesz = boot_cpu_data.dcache.linesz;
+	__flush_icache_range(start & ~(linesz - 1),
+			     (end + linesz - 1) & ~(linesz - 1));
+}
+
+/*
+ * This one is called from do_no_page(), do_swap_page() and install_page().
+ */
+void flush_icache_page(struct vm_area_struct *vma, struct page *page)
+{
+	if (vma->vm_flags & VM_EXEC) {
+		void *v = kmap(page);
+		__flush_icache_range((unsigned long)v, (unsigned long)v + PAGE_SIZE);
+		kunmap(v);
+	}
+}
+
+/*
+ * This one is used by copy_to_user_page()
+ */
+void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
+			     unsigned long addr, int len)
+{
+	if (vma->vm_flags & VM_EXEC)
+		flush_icache_range(addr, addr + len);
+}
+
+asmlinkage int sys_cacheflush(int operation, void __user *addr, size_t len)
+{
+	int ret;
+
+	if (len > CACHEFLUSH_MAX_LEN) {
+		ret = -EPERM;
+		if (!capable(CAP_SYS_ADMIN))
+			goto out;
+	}
+
+	ret = -EFAULT;
+	if (!access_ok(VERIFY_WRITE, addr, len))
+		goto out;
+
+	switch (operation) {
+	case CACHE_IFLUSH:
+		flush_icache_range((unsigned long)addr,
+				   (unsigned long)addr + len);
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+out:
+	return ret;
+}
diff --git a/arch/avr32/mm/clear_page.S b/arch/avr32/mm/clear_page.S
new file mode 100644
index 000000000000..5d70dca00699
--- /dev/null
+++ b/arch/avr32/mm/clear_page.S
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+/*
+ * clear_page
+ * r12: P1 address (to)
+ */
+	.text
+	.global clear_page
+clear_page:
+	sub	r9, r12, -PAGE_SIZE
+	mov     r10, 0
+	mov	r11, 0
+0:      st.d    r12++, r10
+	cp      r12, r9
+	brne	0b
+	mov     pc, lr
diff --git a/arch/avr32/mm/copy_page.S b/arch/avr32/mm/copy_page.S
new file mode 100644
index 000000000000..c2b3752946b8
--- /dev/null
+++ b/arch/avr32/mm/copy_page.S
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+/*
+ * copy_page
+ *
+ * r12		to (P1 address)
+ * r11		from (P1 address)
+ * r8-r10	scratch
+ */
+	.text
+	.global copy_page
+copy_page:
+	sub	r10, r11, -(1 << PAGE_SHIFT)
+	/* pref	r11[0] */
+1:	/* pref	r11[8] */
+	ld.d	r8, r11++
+	st.d	r12++, r8
+	cp	r11, r10
+	brlo	1b
+	mov	pc, lr
diff --git a/arch/avr32/mm/dma-coherent.c b/arch/avr32/mm/dma-coherent.c
new file mode 100644
index 000000000000..44ab8a7bdae2
--- /dev/null
+++ b/arch/avr32/mm/dma-coherent.c
@@ -0,0 +1,139 @@
+/*
+ *  Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/dma-mapping.h>
+
+#include <asm/addrspace.h>
+#include <asm/cacheflush.h>
+
+void dma_cache_sync(void *vaddr, size_t size, int direction)
+{
+	/*
+	 * No need to sync an uncached area
+	 */
+	if (PXSEG(vaddr) == P2SEG)
+		return;
+
+	switch (direction) {
+	case DMA_FROM_DEVICE:		/* invalidate only */
+		dma_cache_inv(vaddr, size);
+		break;
+	case DMA_TO_DEVICE:		/* writeback only */
+		dma_cache_wback(vaddr, size);
+		break;
+	case DMA_BIDIRECTIONAL:		/* writeback and invalidate */
+		dma_cache_wback_inv(vaddr, size);
+		break;
+	default:
+		BUG();
+	}
+}
+EXPORT_SYMBOL(dma_cache_sync);
+
+static struct page *__dma_alloc(struct device *dev, size_t size,
+				dma_addr_t *handle, gfp_t gfp)
+{
+	struct page *page, *free, *end;
+	int order;
+
+	size = PAGE_ALIGN(size);
+	order = get_order(size);
+
+	page = alloc_pages(gfp, order);
+	if (!page)
+		return NULL;
+	split_page(page, order);
+
+	/*
+	 * When accessing physical memory with valid cache data, we
+	 * get a cache hit even if the virtual memory region is marked
+	 * as uncached.
+	 *
+	 * Since the memory is newly allocated, there is no point in
+	 * doing a writeback. If the previous owner cares, he should
+	 * have flushed the cache before releasing the memory.
+	 */
+	invalidate_dcache_region(phys_to_virt(page_to_phys(page)), size);
+
+	*handle = page_to_bus(page);
+	free = page + (size >> PAGE_SHIFT);
+	end = page + (1 << order);
+
+	/*
+	 * Free any unused pages
+	 */
+	while (free < end) {
+		__free_page(free);
+		free++;
+	}
+
+	return page;
+}
+
+static void __dma_free(struct device *dev, size_t size,
+		       struct page *page, dma_addr_t handle)
+{
+	struct page *end = page + (PAGE_ALIGN(size) >> PAGE_SHIFT);
+
+	while (page < end)
+		__free_page(page++);
+}
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+			 dma_addr_t *handle, gfp_t gfp)
+{
+	struct page *page;
+	void *ret = NULL;
+
+	page = __dma_alloc(dev, size, handle, gfp);
+	if (page)
+		ret = phys_to_uncached(page_to_phys(page));
+
+	return ret;
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+void dma_free_coherent(struct device *dev, size_t size,
+		       void *cpu_addr, dma_addr_t handle)
+{
+	void *addr = phys_to_cached(uncached_to_phys(cpu_addr));
+	struct page *page;
+
+	pr_debug("dma_free_coherent addr %p (phys %08lx) size %u\n",
+		 cpu_addr, (unsigned long)handle, (unsigned)size);
+	BUG_ON(!virt_addr_valid(addr));
+	page = virt_to_page(addr);
+	__dma_free(dev, size, page, handle);
+}
+EXPORT_SYMBOL(dma_free_coherent);
+
+#if 0
+void *dma_alloc_writecombine(struct device *dev, size_t size,
+			     dma_addr_t *handle, gfp_t gfp)
+{
+	struct page *page;
+
+	page = __dma_alloc(dev, size, handle, gfp);
+
+	/* Now, map the page into P3 with write-combining turned on */
+	return __ioremap(page_to_phys(page), size, _PAGE_BUFFER);
+}
+EXPORT_SYMBOL(dma_alloc_writecombine);
+
+void dma_free_writecombine(struct device *dev, size_t size,
+			   void *cpu_addr, dma_addr_t handle)
+{
+	struct page *page;
+
+	iounmap(cpu_addr);
+
+	page = bus_to_page(handle);
+	__dma_free(dev, size, page, handle);
+}
+EXPORT_SYMBOL(dma_free_writecombine);
+#endif
diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
new file mode 100644
index 000000000000..678557260a35
--- /dev/null
+++ b/arch/avr32/mm/fault.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on linux/arch/sh/mm/fault.c:
+ *   Copyright (C) 1999  Niibe Yutaka
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/pagemap.h>
+
+#include <asm/kdebug.h>
+#include <asm/mmu_context.h>
+#include <asm/sysreg.h>
+#include <asm/uaccess.h>
+#include <asm/tlb.h>
+
+#ifdef DEBUG
+static void dump_code(unsigned long pc)
+{
+	char *p = (char *)pc;
+	char val;
+	int i;
+
+
+	printk(KERN_DEBUG "Code:");
+	for (i = 0; i < 16; i++) {
+		if (__get_user(val, p + i))
+			break;
+		printk(" %02x", val);
+	}
+	printk("\n");
+}
+#endif
+
+#ifdef CONFIG_KPROBES
+ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
+
+/* Hook to register for page fault notifications */
+int register_page_fault_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
+}
+
+int unregister_page_fault_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
+}
+
+static inline int notify_page_fault(enum die_val val, struct pt_regs *regs,
+				    int trap, int sig)
+{
+	struct die_args args = {
+		.regs = regs,
+		.trapnr = trap,
+	};
+	return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
+}
+#else
+static inline int notify_page_fault(enum die_val val, struct pt_regs *regs,
+				    int trap, int sig)
+{
+	return NOTIFY_DONE;
+}
+#endif
+
+/*
+ * This routine handles page faults. It determines the address and the
+ * problem, and then passes it off to one of the appropriate routines.
+ *
+ * ecr is the Exception Cause Register. Possible values are:
+ *   5:  Page not found (instruction access)
+ *   6:  Protection fault (instruction access)
+ *   12: Page not found (read access)
+ *   13: Page not found (write access)
+ *   14: Protection fault (read access)
+ *   15: Protection fault (write access)
+ */
+asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs)
+{
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	struct vm_area_struct *vma;
+	const struct exception_table_entry *fixup;
+	unsigned long address;
+	unsigned long page;
+	int writeaccess = 0;
+
+	if (notify_page_fault(DIE_PAGE_FAULT, regs,
+			      ecr, SIGSEGV) == NOTIFY_STOP)
+		return;
+
+	address = sysreg_read(TLBEAR);
+
+	tsk = current;
+	mm = tsk->mm;
+
+	/*
+	 * If we're in an interrupt or have no user context, we must
+	 * not take the fault...
+	 */
+	if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM))
+		goto no_context;
+
+	local_irq_enable();
+
+	down_read(&mm->mmap_sem);
+
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+	if (vma->vm_start <= address)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+	if (expand_stack(vma, address))
+		goto bad_area;
+
+	/*
+	 * Ok, we have a good vm_area for this memory access, so we
+	 * can handle it...
+	 */
+good_area:
+	//pr_debug("good area: vm_flags = 0x%lx\n", vma->vm_flags);
+	switch (ecr) {
+	case ECR_PROTECTION_X:
+	case ECR_TLB_MISS_X:
+		if (!(vma->vm_flags & VM_EXEC))
+			goto bad_area;
+		break;
+	case ECR_PROTECTION_R:
+	case ECR_TLB_MISS_R:
+		if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
+			goto bad_area;
+		break;
+	case ECR_PROTECTION_W:
+	case ECR_TLB_MISS_W:
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+		writeaccess = 1;
+		break;
+	default:
+		panic("Unhandled case %lu in do_page_fault!", ecr);
+	}
+
+	/*
+	 * If for any reason at all we couldn't handle the fault, make
+	 * sure we exit gracefully rather than endlessly redo the
+	 * fault.
+	 */
+survive:
+	switch (handle_mm_fault(mm, vma, address, writeaccess)) {
+	case VM_FAULT_MINOR:
+		tsk->min_flt++;
+		break;
+	case VM_FAULT_MAJOR:
+		tsk->maj_flt++;
+		break;
+	case VM_FAULT_SIGBUS:
+		goto do_sigbus;
+	case VM_FAULT_OOM:
+		goto out_of_memory;
+	default:
+		BUG();
+	}
+
+	up_read(&mm->mmap_sem);
+	return;
+
+	/*
+	 * Something tried to access memory that isn't in our memory
+	 * map. Fix it, but check if it's kernel or user first...
+	 */
+bad_area:
+	pr_debug("Bad area [%s:%u]: addr %08lx, ecr %lu\n",
+		 tsk->comm, tsk->pid, address, ecr);
+
+	up_read(&mm->mmap_sem);
+
+	if (user_mode(regs)) {
+		/* Hmm...we have to pass address and ecr somehow... */
+		/* tsk->thread.address = address;
+		   tsk->thread.error_code = ecr; */
+#ifdef DEBUG
+		show_regs(regs);
+		dump_code(regs->pc);
+
+		page = sysreg_read(PTBR);
+		printk("ptbr = %08lx", page);
+		if (page) {
+			page = ((unsigned long *)page)[address >> 22];
+			printk(" pgd = %08lx", page);
+			if (page & _PAGE_PRESENT) {
+				page &= PAGE_MASK;
+				address &= 0x003ff000;
+				page = ((unsigned long *)__va(page))[address >> PAGE_SHIFT];
+				printk(" pte = %08lx\n", page);
+			}
+		}
+#endif
+		pr_debug("Sending SIGSEGV to PID %d...\n",
+			tsk->pid);
+		force_sig(SIGSEGV, tsk);
+		return;
+	}
+
+no_context:
+	pr_debug("No context\n");
+
+	/* Are we prepared to handle this kernel fault? */
+	fixup = search_exception_tables(regs->pc);
+	if (fixup) {
+		regs->pc = fixup->fixup;
+		pr_debug("Found fixup at %08lx\n", fixup->fixup);
+		return;
+	}
+
+	/*
+	 * Oops. The kernel tried to access some bad page. We'll have
+	 * to terminate things with extreme prejudice.
+	 */
+	if (address < PAGE_SIZE)
+		printk(KERN_ALERT
+		       "Unable to handle kernel NULL pointer dereference");
+	else
+		printk(KERN_ALERT
+		       "Unable to handle kernel paging request");
+	printk(" at virtual address %08lx\n", address);
+	printk(KERN_ALERT "pc = %08lx\n", regs->pc);
+
+	page = sysreg_read(PTBR);
+	printk(KERN_ALERT "ptbr = %08lx", page);
+	if (page) {
+		page = ((unsigned long *)page)[address >> 22];
+		printk(" pgd = %08lx", page);
+		if (page & _PAGE_PRESENT) {
+			page &= PAGE_MASK;
+			address &= 0x003ff000;
+			page = ((unsigned long *)__va(page))[address >> PAGE_SHIFT];
+			printk(" pte = %08lx\n", page);
+		}
+	}
+	die("\nOops", regs, ecr);
+	do_exit(SIGKILL);
+
+	/*
+	 * We ran out of memory, or some other thing happened to us
+	 * that made us unable to handle the page fault gracefully.
+	 */
+out_of_memory:
+	printk("Out of memory\n");
+	up_read(&mm->mmap_sem);
+	if (current->pid == 1) {
+		yield();
+		down_read(&mm->mmap_sem);
+		goto survive;
+	}
+	printk("VM: Killing process %s\n", tsk->comm);
+	if (user_mode(regs))
+		do_exit(SIGKILL);
+	goto no_context;
+
+do_sigbus:
+	up_read(&mm->mmap_sem);
+
+	/*
+	 * Send a sigbus, regardless of whether we were in kernel or
+	 * user mode.
+	 */
+	/* address, error_code, trap_no, ... */
+#ifdef DEBUG
+	show_regs(regs);
+	dump_code(regs->pc);
+#endif
+	pr_debug("Sending SIGBUS to PID %d...\n", tsk->pid);
+	force_sig(SIGBUS, tsk);
+
+	/* Kernel mode? Handle exceptions or die */
+	if (!user_mode(regs))
+		goto no_context;
+}
+
+asmlinkage void do_bus_error(unsigned long addr, int write_access,
+			     struct pt_regs *regs)
+{
+	printk(KERN_ALERT
+	       "Bus error at physical address 0x%08lx (%s access)\n",
+	       addr, write_access ? "write" : "read");
+	printk(KERN_INFO "DTLB dump:\n");
+	dump_dtlb();
+	die("Bus Error", regs, write_access);
+	do_exit(SIGKILL);
+}
+
+/*
+ * This functionality is currently not possible to implement because
+ * we're using segmentation to ensure a fixed mapping of the kernel
+ * virtual address space.
+ *
+ * It would be possible to implement this, but it would require us to
+ * disable segmentation at startup and load the kernel mappings into
+ * the TLB like any other pages. There will be lots of trickery to
+ * avoid recursive invocation of the TLB miss handler, though...
+ */
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void kernel_map_pages(struct page *page, int numpages, int enable)
+{
+
+}
+EXPORT_SYMBOL(kernel_map_pages);
+#endif
diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c
new file mode 100644
index 000000000000..3e6c41039808
--- /dev/null
+++ b/arch/avr32/mm/init.c
@@ -0,0 +1,480 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/init.h>
+#include <linux/initrd.h>
+#include <linux/mmzone.h>
+#include <linux/bootmem.h>
+#include <linux/pagemap.h>
+#include <linux/pfn.h>
+#include <linux/nodemask.h>
+
+#include <asm/page.h>
+#include <asm/mmu_context.h>
+#include <asm/tlb.h>
+#include <asm/io.h>
+#include <asm/dma.h>
+#include <asm/setup.h>
+#include <asm/sections.h>
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
+pgd_t swapper_pg_dir[PTRS_PER_PGD];
+
+struct page *empty_zero_page;
+
+/*
+ * Cache of MMU context last used.
+ */
+unsigned long mmu_context_cache = NO_CONTEXT;
+
+#define START_PFN	(NODE_DATA(0)->bdata->node_boot_start >> PAGE_SHIFT)
+#define MAX_LOW_PFN	(NODE_DATA(0)->bdata->node_low_pfn)
+
+void show_mem(void)
+{
+	int total = 0, reserved = 0, cached = 0;
+	int slab = 0, free = 0, shared = 0;
+	pg_data_t *pgdat;
+
+	printk("Mem-info:\n");
+	show_free_areas();
+
+	for_each_online_pgdat(pgdat) {
+		struct page *page, *end;
+
+		page = pgdat->node_mem_map;
+		end = page + pgdat->node_spanned_pages;
+
+		do {
+			total++;
+			if (PageReserved(page))
+				reserved++;
+			else if (PageSwapCache(page))
+				cached++;
+			else if (PageSlab(page))
+				slab++;
+			else if (!page_count(page))
+				free++;
+			else
+				shared += page_count(page) - 1;
+			page++;
+		} while (page < end);
+	}
+
+	printk ("%d pages of RAM\n", total);
+	printk ("%d free pages\n", free);
+	printk ("%d reserved pages\n", reserved);
+	printk ("%d slab pages\n", slab);
+	printk ("%d pages shared\n", shared);
+	printk ("%d pages swap cached\n", cached);
+}
+
+static void __init print_memory_map(const char *what,
+				    struct tag_mem_range *mem)
+{
+	printk ("%s:\n", what);
+	for (; mem; mem = mem->next) {
+		printk ("  %08lx - %08lx\n",
+			(unsigned long)mem->addr,
+			(unsigned long)(mem->addr + mem->size));
+	}
+}
+
+#define MAX_LOWMEM	HIGHMEM_START
+#define MAX_LOWMEM_PFN	PFN_DOWN(MAX_LOWMEM)
+
+/*
+ * Sort a list of memory regions in-place by ascending address.
+ *
+ * We're using bubble sort because we only have singly linked lists
+ * with few elements.
+ */
+static void __init sort_mem_list(struct tag_mem_range **pmem)
+{
+	int done;
+	struct tag_mem_range **a, **b;
+
+	if (!*pmem)
+		return;
+
+	do {
+		done = 1;
+		a = pmem, b = &(*pmem)->next;
+		while (*b) {
+			if ((*a)->addr > (*b)->addr) {
+				struct tag_mem_range *tmp;
+				tmp = (*b)->next;
+				(*b)->next = *a;
+				*a = *b;
+				*b = tmp;
+				done = 0;
+			}
+			a = &(*a)->next;
+			b = &(*a)->next;
+		}
+	} while (!done);
+}
+
+/*
+ * Find a free memory region large enough for storing the
+ * bootmem bitmap.
+ */
+static unsigned long __init
+find_bootmap_pfn(const struct tag_mem_range *mem)
+{
+	unsigned long bootmap_pages, bootmap_len;
+	unsigned long node_pages = PFN_UP(mem->size);
+	unsigned long bootmap_addr = mem->addr;
+	struct tag_mem_range *reserved = mem_reserved;
+	struct tag_mem_range *ramdisk = mem_ramdisk;
+	unsigned long kern_start = virt_to_phys(_stext);
+	unsigned long kern_end = virt_to_phys(_end);
+
+	bootmap_pages = bootmem_bootmap_pages(node_pages);
+	bootmap_len = bootmap_pages << PAGE_SHIFT;
+
+	/*
+	 * Find a large enough region without reserved pages for
+	 * storing the bootmem bitmap. We can take advantage of the
+	 * fact that all lists have been sorted.
+	 *
+	 * We have to check explicitly reserved regions as well as the
+	 * kernel image and any RAMDISK images...
+	 *
+	 * Oh, and we have to make sure we don't overwrite the taglist
+	 * since we're going to use it until the bootmem allocator is
+	 * fully up and running.
+	 */
+	while (1) {
+		if ((bootmap_addr < kern_end) &&
+		    ((bootmap_addr + bootmap_len) > kern_start))
+			bootmap_addr = kern_end;
+
+		while (reserved &&
+		       (bootmap_addr >= (reserved->addr + reserved->size)))
+			reserved = reserved->next;
+
+		if (reserved &&
+		    ((bootmap_addr + bootmap_len) >= reserved->addr)) {
+			bootmap_addr = reserved->addr + reserved->size;
+			continue;
+		}
+
+		while (ramdisk &&
+		       (bootmap_addr >= (ramdisk->addr + ramdisk->size)))
+			ramdisk = ramdisk->next;
+
+		if (!ramdisk ||
+		    ((bootmap_addr + bootmap_len) < ramdisk->addr))
+			break;
+
+		bootmap_addr = ramdisk->addr + ramdisk->size;
+	}
+
+	if ((PFN_UP(bootmap_addr) + bootmap_len) >= (mem->addr + mem->size))
+		return ~0UL;
+
+	return PFN_UP(bootmap_addr);
+}
+
+void __init setup_bootmem(void)
+{
+	unsigned bootmap_size;
+	unsigned long first_pfn, bootmap_pfn, pages;
+	unsigned long max_pfn, max_low_pfn;
+	unsigned long kern_start = virt_to_phys(_stext);
+	unsigned long kern_end = virt_to_phys(_end);
+	unsigned node = 0;
+	struct tag_mem_range *bank, *res;
+
+	sort_mem_list(&mem_phys);
+	sort_mem_list(&mem_reserved);
+
+	print_memory_map("Physical memory", mem_phys);
+	print_memory_map("Reserved memory", mem_reserved);
+
+	nodes_clear(node_online_map);
+
+	if (mem_ramdisk) {
+#ifdef CONFIG_BLK_DEV_INITRD
+		initrd_start = __va(mem_ramdisk->addr);
+		initrd_end = initrd_start + mem_ramdisk->size;
+
+		print_memory_map("RAMDISK images", mem_ramdisk);
+		if (mem_ramdisk->next)
+			printk(KERN_WARNING
+			       "Warning: Only the first RAMDISK image "
+			       "will be used\n");
+		sort_mem_list(&mem_ramdisk);
+#else
+		printk(KERN_WARNING "RAM disk image present, but "
+		       "no initrd support in kernel!\n");
+#endif
+	}
+
+	if (mem_phys->next)
+		printk(KERN_WARNING "Only using first memory bank\n");
+
+	for (bank = mem_phys; bank; bank = NULL) {
+		first_pfn = PFN_UP(bank->addr);
+		max_low_pfn = max_pfn = PFN_DOWN(bank->addr + bank->size);
+		bootmap_pfn = find_bootmap_pfn(bank);
+		if (bootmap_pfn > max_pfn)
+			panic("No space for bootmem bitmap!\n");
+
+		if (max_low_pfn > MAX_LOWMEM_PFN) {
+			max_low_pfn = MAX_LOWMEM_PFN;
+#ifndef CONFIG_HIGHMEM
+			/*
+			 * Lowmem is memory that can be addressed
+			 * directly through P1/P2
+			 */
+			printk(KERN_WARNING
+			       "Node %u: Only %ld MiB of memory will be used.\n",
+			       node, MAX_LOWMEM >> 20);
+			printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
+#else
+#error HIGHMEM is not supported by AVR32 yet
+#endif
+		}
+
+		/* Initialize the boot-time allocator with low memory only. */
+		bootmap_size = init_bootmem_node(NODE_DATA(node), bootmap_pfn,
+						 first_pfn, max_low_pfn);
+
+		printk("Node %u: bdata = %p, bdata->node_bootmem_map = %p\n",
+		       node, NODE_DATA(node)->bdata,
+		       NODE_DATA(node)->bdata->node_bootmem_map);
+
+		/*
+		 * Register fully available RAM pages with the bootmem
+		 * allocator.
+		 */
+		pages = max_low_pfn - first_pfn;
+		free_bootmem_node (NODE_DATA(node), PFN_PHYS(first_pfn),
+				   PFN_PHYS(pages));
+
+		/*
+		 * Reserve space for the kernel image (if present in
+		 * this node)...
+		 */
+		if ((kern_start >= PFN_PHYS(first_pfn)) &&
+		    (kern_start < PFN_PHYS(max_pfn))) {
+			printk("Node %u: Kernel image %08lx - %08lx\n",
+			       node, kern_start, kern_end);
+			reserve_bootmem_node(NODE_DATA(node), kern_start,
+					     kern_end - kern_start);
+		}
+
+		/* ...the bootmem bitmap... */
+		reserve_bootmem_node(NODE_DATA(node),
+				     PFN_PHYS(bootmap_pfn),
+				     bootmap_size);
+
+		/* ...any RAMDISK images... */
+		for (res = mem_ramdisk; res; res = res->next) {
+			if (res->addr > PFN_PHYS(max_pfn))
+				break;
+
+			if (res->addr >= PFN_PHYS(first_pfn)) {
+				printk("Node %u: RAMDISK %08lx - %08lx\n",
+				       node,
+				       (unsigned long)res->addr,
+				       (unsigned long)(res->addr + res->size));
+				reserve_bootmem_node(NODE_DATA(node),
+						     res->addr, res->size);
+			}
+		}
+
+		/* ...and any other reserved regions. */
+		for (res = mem_reserved; res; res = res->next) {
+			if (res->addr > PFN_PHYS(max_pfn))
+				break;
+
+			if (res->addr >= PFN_PHYS(first_pfn)) {
+				printk("Node %u: Reserved %08lx - %08lx\n",
+				       node,
+				       (unsigned long)res->addr,
+				       (unsigned long)(res->addr + res->size));
+				reserve_bootmem_node(NODE_DATA(node),
+						     res->addr, res->size);
+			}
+		}
+
+		node_set_online(node);
+	}
+}
+
+/*
+ * paging_init() sets up the page tables
+ *
+ * This routine also unmaps the page at virtual kernel address 0, so
+ * that we can trap those pesky NULL-reference errors in the kernel.
+ */
+void __init paging_init(void)
+{
+	extern unsigned long _evba;
+	void *zero_page;
+	int nid;
+
+	/*
+	 * Make sure we can handle exceptions before enabling
+	 * paging. Not that we should ever _get_ any exceptions this
+	 * early, but you never know...
+	 */
+	printk("Exception vectors start at %p\n", &_evba);
+	sysreg_write(EVBA, (unsigned long)&_evba);
+
+	/*
+	 * Since we are ready to handle exceptions now, we should let
+	 * the CPU generate them...
+	 */
+	__asm__ __volatile__ ("csrf %0" : : "i"(SR_EM_BIT));
+
+	/*
+	 * Allocate the zero page. The allocator will panic if it
+	 * can't satisfy the request, so no need to check.
+	 */
+	zero_page = alloc_bootmem_low_pages_node(NODE_DATA(0),
+						 PAGE_SIZE);
+
+	{
+		pgd_t *pg_dir;
+		int i;
+
+		pg_dir = swapper_pg_dir;
+		sysreg_write(PTBR, (unsigned long)pg_dir);
+
+		for (i = 0; i < PTRS_PER_PGD; i++)
+			pgd_val(pg_dir[i]) = 0;
+
+		enable_mmu();
+		printk ("CPU: Paging enabled\n");
+	}
+
+	for_each_online_node(nid) {
+		pg_data_t *pgdat = NODE_DATA(nid);
+		unsigned long zones_size[MAX_NR_ZONES];
+		unsigned long low, start_pfn;
+
+		start_pfn = pgdat->bdata->node_boot_start;
+		start_pfn >>= PAGE_SHIFT;
+		low = pgdat->bdata->node_low_pfn;
+
+		memset(zones_size, 0, sizeof(zones_size));
+		zones_size[ZONE_NORMAL] = low - start_pfn;
+
+		printk("Node %u: start_pfn = 0x%lx, low = 0x%lx\n",
+		       nid, start_pfn, low);
+
+		free_area_init_node(nid, pgdat, zones_size, start_pfn, NULL);
+
+		printk("Node %u: mem_map starts at %p\n",
+		       pgdat->node_id, pgdat->node_mem_map);
+	}
+
+	mem_map = NODE_DATA(0)->node_mem_map;
+
+	memset(zero_page, 0, PAGE_SIZE);
+	empty_zero_page = virt_to_page(zero_page);
+	flush_dcache_page(empty_zero_page);
+}
+
+void __init mem_init(void)
+{
+	int codesize, reservedpages, datasize, initsize;
+	int nid, i;
+
+	reservedpages = 0;
+	high_memory = NULL;
+
+	/* this will put all low memory onto the freelists */
+	for_each_online_node(nid) {
+		pg_data_t *pgdat = NODE_DATA(nid);
+		unsigned long node_pages = 0;
+		void *node_high_memory;
+
+		num_physpages += pgdat->node_present_pages;
+
+		if (pgdat->node_spanned_pages != 0)
+			node_pages = free_all_bootmem_node(pgdat);
+
+		totalram_pages += node_pages;
+
+		for (i = 0; i < node_pages; i++)
+			if (PageReserved(pgdat->node_mem_map + i))
+				reservedpages++;
+
+		node_high_memory = (void *)((pgdat->node_start_pfn
+					     + pgdat->node_spanned_pages)
+					    << PAGE_SHIFT);
+		if (node_high_memory > high_memory)
+			high_memory = node_high_memory;
+	}
+
+	max_mapnr = MAP_NR(high_memory);
+
+	codesize = (unsigned long)_etext - (unsigned long)_text;
+	datasize = (unsigned long)_edata - (unsigned long)_data;
+	initsize = (unsigned long)__init_end - (unsigned long)__init_begin;
+
+	printk ("Memory: %luk/%luk available (%dk kernel code, "
+		"%dk reserved, %dk data, %dk init)\n",
+		(unsigned long)nr_free_pages() << (PAGE_SHIFT - 10),
+		totalram_pages << (PAGE_SHIFT - 10),
+		codesize >> 10,
+		reservedpages << (PAGE_SHIFT - 10),
+		datasize >> 10,
+		initsize >> 10);
+}
+
+static inline void free_area(unsigned long addr, unsigned long end, char *s)
+{
+	unsigned int size = (end - addr) >> 10;
+
+	for (; addr < end; addr += PAGE_SIZE) {
+		struct page *page = virt_to_page(addr);
+		ClearPageReserved(page);
+		init_page_count(page);
+		free_page(addr);
+		totalram_pages++;
+	}
+
+	if (size && s)
+		printk(KERN_INFO "Freeing %s memory: %dK (%lx - %lx)\n",
+		       s, size, end - (size << 10), end);
+}
+
+void free_initmem(void)
+{
+	free_area((unsigned long)__init_begin, (unsigned long)__init_end,
+		  "init");
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+
+static int keep_initrd;
+
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	if (!keep_initrd)
+		free_area(start, end, "initrd");
+}
+
+static int __init keepinitrd_setup(char *__unused)
+{
+	keep_initrd = 1;
+	return 1;
+}
+
+__setup("keepinitrd", keepinitrd_setup);
+#endif
diff --git a/arch/avr32/mm/ioremap.c b/arch/avr32/mm/ioremap.c
new file mode 100644
index 000000000000..536021877df6
--- /dev/null
+++ b/arch/avr32/mm/ioremap.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/addrspace.h>
+
+static inline int remap_area_pte(pte_t *pte, unsigned long address,
+				  unsigned long end, unsigned long phys_addr,
+				  pgprot_t prot)
+{
+	unsigned long pfn;
+
+	pfn = phys_addr >> PAGE_SHIFT;
+	do {
+		WARN_ON(!pte_none(*pte));
+
+		set_pte(pte, pfn_pte(pfn, prot));
+		address += PAGE_SIZE;
+		pfn++;
+		pte++;
+	} while (address && (address < end));
+
+	return 0;
+}
+
+static inline int remap_area_pmd(pmd_t *pmd, unsigned long address,
+				 unsigned long end, unsigned long phys_addr,
+				 pgprot_t prot)
+{
+	unsigned long next;
+
+	phys_addr -= address;
+
+	do {
+		pte_t *pte = pte_alloc_kernel(pmd, address);
+		if (!pte)
+			return -ENOMEM;
+
+		next = (address + PMD_SIZE) & PMD_MASK;
+		if (remap_area_pte(pte, address, next,
+				   address + phys_addr, prot))
+			return -ENOMEM;
+
+		address = next;
+		pmd++;
+	} while (address && (address < end));
+	return 0;
+}
+
+static int remap_area_pud(pud_t *pud, unsigned long address,
+			  unsigned long end, unsigned long phys_addr,
+			  pgprot_t prot)
+{
+	unsigned long next;
+
+	phys_addr -= address;
+
+	do {
+		pmd_t *pmd = pmd_alloc(&init_mm, pud, address);
+		if (!pmd)
+			return -ENOMEM;
+		next = (address + PUD_SIZE) & PUD_MASK;
+		if (remap_area_pmd(pmd, address, next,
+				   phys_addr + address, prot))
+			return -ENOMEM;
+
+		address = next;
+		pud++;
+	} while (address && address < end);
+
+	return 0;
+}
+
+static int remap_area_pages(unsigned long address, unsigned long phys_addr,
+			    size_t size, pgprot_t prot)
+{
+	unsigned long end = address + size;
+	unsigned long next;
+	pgd_t *pgd;
+	int err = 0;
+
+	phys_addr -= address;
+
+	pgd = pgd_offset_k(address);
+	flush_cache_all();
+	BUG_ON(address >= end);
+
+	spin_lock(&init_mm.page_table_lock);
+	do {
+		pud_t *pud = pud_alloc(&init_mm, pgd, address);
+
+		err = -ENOMEM;
+		if (!pud)
+			break;
+
+		next = (address + PGDIR_SIZE) & PGDIR_MASK;
+		if (next < address || next > end)
+			next = end;
+		err = remap_area_pud(pud, address, next,
+				     phys_addr + address, prot);
+		if (err)
+			break;
+
+		address = next;
+		pgd++;
+	} while (address && (address < end));
+
+	spin_unlock(&init_mm.page_table_lock);
+	flush_tlb_all();
+	return err;
+}
+
+/*
+ * Re-map an arbitrary physical address space into the kernel virtual
+ * address space. Needed when the kernel wants to access physical
+ * memory directly.
+ */
+void __iomem *__ioremap(unsigned long phys_addr, size_t size,
+			unsigned long flags)
+{
+	void *addr;
+	struct vm_struct *area;
+	unsigned long offset, last_addr;
+	pgprot_t prot;
+
+	/*
+	 * Check if we can simply use the P4 segment. This area is
+	 * uncacheable, so if caching/buffering is requested, we can't
+	 * use it.
+	 */
+	if ((phys_addr >= P4SEG) && (flags == 0))
+		return (void __iomem *)phys_addr;
+
+	/* Don't allow wraparound or zero size */
+	last_addr = phys_addr + size - 1;
+	if (!size || last_addr < phys_addr)
+		return NULL;
+
+	/*
+	 * XXX: When mapping regular RAM, we'd better make damn sure
+	 * it's never used for anything else.  But this is really the
+	 * caller's responsibility...
+	 */
+	if (PHYSADDR(P2SEGADDR(phys_addr)) == phys_addr)
+		return (void __iomem *)P2SEGADDR(phys_addr);
+
+	/* Mappings have to be page-aligned */
+	offset = phys_addr & ~PAGE_MASK;
+	phys_addr &= PAGE_MASK;
+	size = PAGE_ALIGN(last_addr + 1) - phys_addr;
+
+	prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY
+			| _PAGE_ACCESSED | _PAGE_TYPE_SMALL | flags);
+
+	/*
+	 * Ok, go for it..
+	 */
+	area = get_vm_area(size, VM_IOREMAP);
+	if (!area)
+		return NULL;
+	area->phys_addr = phys_addr;
+	addr = area->addr;
+	if (remap_area_pages((unsigned long)addr, phys_addr, size, prot)) {
+		vunmap(addr);
+		return NULL;
+	}
+
+	return (void __iomem *)(offset + (char *)addr);
+}
+EXPORT_SYMBOL(__ioremap);
+
+void __iounmap(void __iomem *addr)
+{
+	struct vm_struct *p;
+
+	if ((unsigned long)addr >= P4SEG)
+		return;
+
+	p = remove_vm_area((void *)(PAGE_MASK & (unsigned long __force)addr));
+	if (unlikely(!p)) {
+		printk (KERN_ERR "iounmap: bad address %p\n", addr);
+		return;
+	}
+
+	kfree (p);
+}
+EXPORT_SYMBOL(__iounmap);
diff --git a/arch/avr32/mm/tlb.c b/arch/avr32/mm/tlb.c
new file mode 100644
index 000000000000..5d0523bbe298
--- /dev/null
+++ b/arch/avr32/mm/tlb.c
@@ -0,0 +1,378 @@
+/*
+ * AVR32 TLB operations
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/mm.h>
+
+#include <asm/mmu_context.h>
+
+#define _TLBEHI_I	0x100
+
+void show_dtlb_entry(unsigned int index)
+{
+	unsigned int tlbehi, tlbehi_save, tlbelo, mmucr, mmucr_save, flags;
+
+	local_irq_save(flags);
+	mmucr_save = sysreg_read(MMUCR);
+	tlbehi_save = sysreg_read(TLBEHI);
+	mmucr = mmucr_save & 0x13;
+	mmucr |= index << 14;
+	sysreg_write(MMUCR, mmucr);
+
+	asm volatile("tlbr" : : : "memory");
+	cpu_sync_pipeline();
+
+	tlbehi = sysreg_read(TLBEHI);
+	tlbelo = sysreg_read(TLBELO);
+
+	printk("%2u: %c %c %02x   %05x %05x %o  %o  %c %c %c %c\n",
+	       index,
+	       (tlbehi & 0x200)?'1':'0',
+	       (tlbelo & 0x100)?'1':'0',
+	       (tlbehi & 0xff),
+	       (tlbehi >> 12), (tlbelo >> 12),
+	       (tlbelo >> 4) & 7, (tlbelo >> 2) & 3,
+	       (tlbelo & 0x200)?'1':'0',
+	       (tlbelo & 0x080)?'1':'0',
+	       (tlbelo & 0x001)?'1':'0',
+	       (tlbelo & 0x002)?'1':'0');
+
+	sysreg_write(MMUCR, mmucr_save);
+	sysreg_write(TLBEHI, tlbehi_save);
+	cpu_sync_pipeline();
+	local_irq_restore(flags);
+}
+
+void dump_dtlb(void)
+{
+	unsigned int i;
+
+	printk("ID  V G ASID VPN   PFN   AP SZ C B W D\n");
+	for (i = 0; i < 32; i++)
+		show_dtlb_entry(i);
+}
+
+static unsigned long last_mmucr;
+
+static inline void set_replacement_pointer(unsigned shift)
+{
+	unsigned long mmucr, mmucr_save;
+
+	mmucr = mmucr_save = sysreg_read(MMUCR);
+
+	/* Does this mapping already exist? */
+	__asm__ __volatile__(
+		"	tlbs\n"
+		"	mfsr %0, %1"
+		: "=r"(mmucr)
+		: "i"(SYSREG_MMUCR));
+
+	if (mmucr & SYSREG_BIT(MMUCR_N)) {
+		/* Not found -- pick a not-recently-accessed entry */
+		unsigned long rp;
+		unsigned long tlbar = sysreg_read(TLBARLO);
+
+		rp = 32 - fls(tlbar);
+		if (rp == 32) {
+			rp = 0;
+			sysreg_write(TLBARLO, -1L);
+		}
+
+		mmucr &= 0x13;
+		mmucr |= (rp << shift);
+
+		sysreg_write(MMUCR, mmucr);
+	}
+
+	last_mmucr = mmucr;
+}
+
+static void update_dtlb(unsigned long address, pte_t pte, unsigned long asid)
+{
+	unsigned long vpn;
+
+	vpn = (address & MMU_VPN_MASK) | _TLBEHI_VALID | asid;
+	sysreg_write(TLBEHI, vpn);
+	cpu_sync_pipeline();
+
+	set_replacement_pointer(14);
+
+	sysreg_write(TLBELO, pte_val(pte) & _PAGE_FLAGS_HARDWARE_MASK);
+
+	/* Let's go */
+	asm volatile("nop\n\ttlbw" : : : "memory");
+	cpu_sync_pipeline();
+}
+
+void update_mmu_cache(struct vm_area_struct *vma,
+		      unsigned long address, pte_t pte)
+{
+	unsigned long flags;
+
+	/* ptrace may call this routine */
+	if (vma && current->active_mm != vma->vm_mm)
+		return;
+
+	local_irq_save(flags);
+	update_dtlb(address, pte, get_asid());
+	local_irq_restore(flags);
+}
+
+void __flush_tlb_page(unsigned long asid, unsigned long page)
+{
+	unsigned long mmucr, tlbehi;
+
+	page |= asid;
+	sysreg_write(TLBEHI, page);
+	cpu_sync_pipeline();
+	asm volatile("tlbs");
+	mmucr = sysreg_read(MMUCR);
+
+	if (!(mmucr & SYSREG_BIT(MMUCR_N))) {
+		unsigned long tlbarlo;
+		unsigned long entry;
+
+		/* Clear the "valid" bit */
+		tlbehi = sysreg_read(TLBEHI);
+		tlbehi &= ~_TLBEHI_VALID;
+		sysreg_write(TLBEHI, tlbehi);
+		cpu_sync_pipeline();
+
+		/* mark the entry as "not accessed" */
+		entry = (mmucr >> 14) & 0x3f;
+		tlbarlo = sysreg_read(TLBARLO);
+		tlbarlo |= (0x80000000 >> entry);
+		sysreg_write(TLBARLO, tlbarlo);
+
+		/* update the entry with valid bit clear */
+		asm volatile("tlbw");
+		cpu_sync_pipeline();
+	}
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	if (vma->vm_mm && vma->vm_mm->context != NO_CONTEXT) {
+		unsigned long flags, asid;
+		unsigned long saved_asid = MMU_NO_ASID;
+
+		asid = vma->vm_mm->context & MMU_CONTEXT_ASID_MASK;
+		page &= PAGE_MASK;
+
+		local_irq_save(flags);
+		if (vma->vm_mm != current->mm) {
+			saved_asid = get_asid();
+			set_asid(asid);
+		}
+
+		__flush_tlb_page(asid, page);
+
+		if (saved_asid != MMU_NO_ASID)
+			set_asid(saved_asid);
+		local_irq_restore(flags);
+	}
+}
+
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+		     unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (mm->context != NO_CONTEXT) {
+		unsigned long flags;
+		int size;
+
+		local_irq_save(flags);
+		size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+		if (size > (MMU_DTLB_ENTRIES / 4)) { /* Too many entries to flush */
+			mm->context = NO_CONTEXT;
+			if (mm == current->mm)
+				activate_context(mm);
+		} else {
+			unsigned long asid = mm->context & MMU_CONTEXT_ASID_MASK;
+			unsigned long saved_asid = MMU_NO_ASID;
+
+			start &= PAGE_MASK;
+			end += (PAGE_SIZE - 1);
+			end &= PAGE_MASK;
+			if (mm != current->mm) {
+				saved_asid = get_asid();
+				set_asid(asid);
+			}
+
+			while (start < end) {
+				__flush_tlb_page(asid, start);
+				start += PAGE_SIZE;
+			}
+			if (saved_asid != MMU_NO_ASID)
+				set_asid(saved_asid);
+		}
+		local_irq_restore(flags);
+	}
+}
+
+/*
+ * TODO: If this is only called for addresses > TASK_SIZE, we can probably
+ * skip the ASID stuff and just use the Global bit...
+ */
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	unsigned long flags;
+	int size;
+
+	local_irq_save(flags);
+	size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	if (size > (MMU_DTLB_ENTRIES / 4)) { /* Too many entries to flush */
+		flush_tlb_all();
+	} else {
+		unsigned long asid = init_mm.context & MMU_CONTEXT_ASID_MASK;
+		unsigned long saved_asid = get_asid();
+
+		start &= PAGE_MASK;
+		end += (PAGE_SIZE - 1);
+		end &= PAGE_MASK;
+		set_asid(asid);
+		while (start < end) {
+			__flush_tlb_page(asid, start);
+			start += PAGE_SIZE;
+		}
+		set_asid(saved_asid);
+	}
+	local_irq_restore(flags);
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	/* Invalidate all TLB entries of this process by getting a new ASID */
+	if (mm->context != NO_CONTEXT) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		mm->context = NO_CONTEXT;
+		if (mm == current->mm)
+			activate_context(mm);
+		local_irq_restore(flags);
+	}
+}
+
+void flush_tlb_all(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	sysreg_write(MMUCR, sysreg_read(MMUCR) | SYSREG_BIT(MMUCR_I));
+	local_irq_restore(flags);
+}
+
+#ifdef CONFIG_PROC_FS
+
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+
+static void *tlb_start(struct seq_file *tlb, loff_t *pos)
+{
+	static unsigned long tlb_index;
+
+	if (*pos >= 32)
+		return NULL;
+
+	tlb_index = 0;
+	return &tlb_index;
+}
+
+static void *tlb_next(struct seq_file *tlb, void *v, loff_t *pos)
+{
+	unsigned long *index = v;
+
+	if (*index >= 31)
+		return NULL;
+
+	++*pos;
+	++*index;
+	return index;
+}
+
+static void tlb_stop(struct seq_file *tlb, void *v)
+{
+
+}
+
+static int tlb_show(struct seq_file *tlb, void *v)
+{
+	unsigned int tlbehi, tlbehi_save, tlbelo, mmucr, mmucr_save, flags;
+	unsigned long *index = v;
+
+	if (*index == 0)
+		seq_puts(tlb, "ID  V G ASID VPN   PFN   AP SZ C B W D\n");
+
+	BUG_ON(*index >= 32);
+
+	local_irq_save(flags);
+	mmucr_save = sysreg_read(MMUCR);
+	tlbehi_save = sysreg_read(TLBEHI);
+	mmucr = mmucr_save & 0x13;
+	mmucr |= *index << 14;
+	sysreg_write(MMUCR, mmucr);
+
+	asm volatile("tlbr" : : : "memory");
+	cpu_sync_pipeline();
+
+	tlbehi = sysreg_read(TLBEHI);
+	tlbelo = sysreg_read(TLBELO);
+
+	sysreg_write(MMUCR, mmucr_save);
+	sysreg_write(TLBEHI, tlbehi_save);
+	cpu_sync_pipeline();
+	local_irq_restore(flags);
+
+	seq_printf(tlb, "%2lu: %c %c %02x   %05x %05x %o  %o  %c %c %c %c\n",
+	       *index,
+	       (tlbehi & 0x200)?'1':'0',
+	       (tlbelo & 0x100)?'1':'0',
+	       (tlbehi & 0xff),
+	       (tlbehi >> 12), (tlbelo >> 12),
+	       (tlbelo >> 4) & 7, (tlbelo >> 2) & 3,
+	       (tlbelo & 0x200)?'1':'0',
+	       (tlbelo & 0x080)?'1':'0',
+	       (tlbelo & 0x001)?'1':'0',
+	       (tlbelo & 0x002)?'1':'0');
+
+	return 0;
+}
+
+static struct seq_operations tlb_ops = {
+	.start		= tlb_start,
+	.next		= tlb_next,
+	.stop		= tlb_stop,
+	.show		= tlb_show,
+};
+
+static int tlb_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &tlb_ops);
+}
+
+static struct file_operations proc_tlb_operations = {
+	.open		= tlb_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int __init proctlb_init(void)
+{
+	struct proc_dir_entry *entry;
+
+	entry = create_proc_entry("tlb", 0, NULL);
+	if (entry)
+		entry->proc_fops = &proc_tlb_operations;
+	return 0;
+}
+late_initcall(proctlb_init);
+#endif /* CONFIG_PROC_FS */
diff --git a/include/asm-avr32/Kbuild b/include/asm-avr32/Kbuild
new file mode 100644
index 000000000000..8770e73ce938
--- /dev/null
+++ b/include/asm-avr32/Kbuild
@@ -0,0 +1,3 @@
+include include/asm-generic/Kbuild.asm
+
+headers-y	+= cachectl.h
diff --git a/include/asm-avr32/a.out.h b/include/asm-avr32/a.out.h
new file mode 100644
index 000000000000..50bf6e31a143
--- /dev/null
+++ b/include/asm-avr32/a.out.h
@@ -0,0 +1,26 @@
+#ifndef __ASM_AVR32_A_OUT_H
+#define __ASM_AVR32_A_OUT_H
+
+struct exec
+{
+  unsigned long a_info;		/* Use macros N_MAGIC, etc for access */
+  unsigned a_text;		/* length of text, in bytes */
+  unsigned a_data;		/* length of data, in bytes */
+  unsigned a_bss;		/* length of uninitialized data area for file, in bytes */
+  unsigned a_syms;		/* length of symbol table data in file, in bytes */
+  unsigned a_entry;		/* start address */
+  unsigned a_trsize;		/* length of relocation info for text, in bytes */
+  unsigned a_drsize;		/* length of relocation info for data, in bytes */
+};
+
+#define N_TRSIZE(a)	((a).a_trsize)
+#define N_DRSIZE(a)	((a).a_drsize)
+#define N_SYMSIZE(a)	((a).a_syms)
+
+#ifdef __KERNEL__
+
+#define STACK_TOP	TASK_SIZE
+
+#endif
+
+#endif /* __ASM_AVR32_A_OUT_H */
diff --git a/include/asm-avr32/addrspace.h b/include/asm-avr32/addrspace.h
new file mode 100644
index 000000000000..366794858ec7
--- /dev/null
+++ b/include/asm-avr32/addrspace.h
@@ -0,0 +1,43 @@
+/*
+ * Defitions for the address spaces of the AVR32 CPUs. Heavily based on
+ * include/asm-sh/addrspace.h
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_ADDRSPACE_H
+#define __ASM_AVR32_ADDRSPACE_H
+
+#ifdef CONFIG_MMU
+
+/* Memory segments when segmentation is enabled */
+#define P0SEG		0x00000000
+#define P1SEG		0x80000000
+#define P2SEG		0xa0000000
+#define P3SEG		0xc0000000
+#define P4SEG		0xe0000000
+
+/* Returns the privileged segment base of a given address */
+#define PXSEG(a)	(((unsigned long)(a)) & 0xe0000000)
+
+/* Returns the physical address of a PnSEG (n=1,2) address */
+#define PHYSADDR(a)	(((unsigned long)(a)) & 0x1fffffff)
+
+/*
+ * Map an address to a certain privileged segment
+ */
+#define P1SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \
+				      | P1SEG))
+#define P2SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \
+				      | P2SEG))
+#define P3SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \
+				      | P3SEG))
+#define P4SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \
+				      | P4SEG))
+
+#endif /* CONFIG_MMU */
+
+#endif /* __ASM_AVR32_ADDRSPACE_H */
diff --git a/include/asm-avr32/arch-at32ap/at91rm9200_pdc.h b/include/asm-avr32/arch-at32ap/at91rm9200_pdc.h
new file mode 100644
index 000000000000..ce1150d4438d
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/at91rm9200_pdc.h
@@ -0,0 +1,36 @@
+/*
+ * include/asm-arm/arch-at91rm9200/at91rm9200_pdc.h
+ *
+ * Copyright (C) 2005 Ivan Kokshaysky
+ * Copyright (C) SAN People
+ *
+ * Peripheral Data Controller (PDC) registers.
+ * Based on AT91RM9200 datasheet revision E.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef AT91RM9200_PDC_H
+#define AT91RM9200_PDC_H
+
+#define AT91_PDC_RPR		0x100	/* Receive Pointer Register */
+#define AT91_PDC_RCR		0x104	/* Receive Counter Register */
+#define AT91_PDC_TPR		0x108	/* Transmit Pointer Register */
+#define AT91_PDC_TCR		0x10c	/* Transmit Counter Register */
+#define AT91_PDC_RNPR		0x110	/* Receive Next Pointer Register */
+#define AT91_PDC_RNCR		0x114	/* Receive Next Counter Register */
+#define AT91_PDC_TNPR		0x118	/* Transmit Next Pointer Register */
+#define AT91_PDC_TNCR		0x11c	/* Transmit Next Counter Register */
+
+#define AT91_PDC_PTCR		0x120	/* Transfer Control Register */
+#define		AT91_PDC_RXTEN		(1 << 0)	/* Receiver Transfer Enable */
+#define		AT91_PDC_RXTDIS		(1 << 1)	/* Receiver Transfer Disable */
+#define		AT91_PDC_TXTEN		(1 << 8)	/* Transmitter Transfer Enable */
+#define		AT91_PDC_TXTDIS		(1 << 9)	/* Transmitter Transfer Disable */
+
+#define AT91_PDC_PTSR		0x124	/* Transfer Status Register */
+
+#endif
diff --git a/include/asm-avr32/arch-at32ap/at91rm9200_usart.h b/include/asm-avr32/arch-at32ap/at91rm9200_usart.h
new file mode 100644
index 000000000000..79f851e31b9c
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/at91rm9200_usart.h
@@ -0,0 +1,123 @@
+/*
+ * include/asm-arm/arch-at91rm9200/at91rm9200_usart.h
+ *
+ * Copyright (C) 2005 Ivan Kokshaysky
+ * Copyright (C) SAN People
+ *
+ * USART registers.
+ * Based on AT91RM9200 datasheet revision E.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef AT91RM9200_USART_H
+#define AT91RM9200_USART_H
+
+#define AT91_US_CR		0x00			/* Control Register */
+#define		AT91_US_RSTRX		(1 <<  2)		/* Reset Receiver */
+#define		AT91_US_RSTTX		(1 <<  3)		/* Reset Transmitter */
+#define		AT91_US_RXEN		(1 <<  4)		/* Receiver Enable */
+#define		AT91_US_RXDIS		(1 <<  5)		/* Receiver Disable */
+#define		AT91_US_TXEN		(1 <<  6)		/* Transmitter Enable */
+#define		AT91_US_TXDIS		(1 <<  7)		/* Transmitter Disable */
+#define		AT91_US_RSTSTA		(1 <<  8)		/* Reset Status Bits */
+#define		AT91_US_STTBRK		(1 <<  9)		/* Start Break */
+#define		AT91_US_STPBRK		(1 << 10)		/* Stop Break */
+#define		AT91_US_STTTO		(1 << 11)		/* Start Time-out */
+#define		AT91_US_SENDA		(1 << 12)		/* Send Address */
+#define		AT91_US_RSTIT		(1 << 13)		/* Reset Iterations */
+#define		AT91_US_RSTNACK		(1 << 14)		/* Reset Non Acknowledge */
+#define		AT91_US_RETTO		(1 << 15)		/* Rearm Time-out */
+#define		AT91_US_DTREN		(1 << 16)		/* Data Terminal Ready Enable */
+#define		AT91_US_DTRDIS		(1 << 17)		/* Data Terminal Ready Disable */
+#define		AT91_US_RTSEN		(1 << 18)		/* Request To Send Enable */
+#define		AT91_US_RTSDIS		(1 << 19)		/* Request To Send Disable */
+
+#define AT91_US_MR		0x04			/* Mode Register */
+#define		AT91_US_USMODE		(0xf <<  0)		/* Mode of the USART */
+#define			AT91_US_USMODE_NORMAL		0
+#define			AT91_US_USMODE_RS485		1
+#define			AT91_US_USMODE_HWHS		2
+#define			AT91_US_USMODE_MODEM		3
+#define			AT91_US_USMODE_ISO7816_T0	4
+#define			AT91_US_USMODE_ISO7816_T1	6
+#define			AT91_US_USMODE_IRDA		8
+#define		AT91_US_USCLKS		(3   <<  4)		/* Clock Selection */
+#define		AT91_US_CHRL		(3   <<  6)		/* Character Length */
+#define			AT91_US_CHRL_5			(0 <<  6)
+#define			AT91_US_CHRL_6			(1 <<  6)
+#define			AT91_US_CHRL_7			(2 <<  6)
+#define			AT91_US_CHRL_8			(3 <<  6)
+#define		AT91_US_SYNC		(1 <<  8)		/* Synchronous Mode Select */
+#define		AT91_US_PAR		(7 <<  9)		/* Parity Type */
+#define			AT91_US_PAR_EVEN		(0 <<  9)
+#define			AT91_US_PAR_ODD			(1 <<  9)
+#define			AT91_US_PAR_SPACE		(2 <<  9)
+#define			AT91_US_PAR_MARK		(3 <<  9)
+#define			AT91_US_PAR_NONE		(4 <<  9)
+#define			AT91_US_PAR_MULTI_DROP		(6 <<  9)
+#define		AT91_US_NBSTOP		(3 << 12)		/* Number of Stop Bits */
+#define			AT91_US_NBSTOP_1		(0 << 12)
+#define			AT91_US_NBSTOP_1_5		(1 << 12)
+#define			AT91_US_NBSTOP_2		(2 << 12)
+#define		AT91_US_CHMODE		(3 << 14)		/* Channel Mode */
+#define			AT91_US_CHMODE_NORMAL		(0 << 14)
+#define			AT91_US_CHMODE_ECHO		(1 << 14)
+#define			AT91_US_CHMODE_LOC_LOOP		(2 << 14)
+#define			AT91_US_CHMODE_REM_LOOP		(3 << 14)
+#define		AT91_US_MSBF		(1 << 16)		/* Bit Order */
+#define		AT91_US_MODE9		(1 << 17)		/* 9-bit Character Length */
+#define		AT91_US_CLKO		(1 << 18)		/* Clock Output Select */
+#define		AT91_US_OVER		(1 << 19)		/* Oversampling Mode */
+#define		AT91_US_INACK		(1 << 20)		/* Inhibit Non Acknowledge */
+#define		AT91_US_DSNACK		(1 << 21)		/* Disable Successive NACK */
+#define		AT91_US_MAX_ITER	(7 << 24)		/* Max Iterations */
+#define		AT91_US_FILTER		(1 << 28)		/* Infrared Receive Line Filter */
+
+#define AT91_US_IER		0x08			/* Interrupt Enable Register */
+#define		AT91_US_RXRDY		(1 <<  0)		/* Receiver Ready */
+#define		AT91_US_TXRDY		(1 <<  1)		/* Transmitter Ready */
+#define		AT91_US_RXBRK		(1 <<  2)		/* Break Received / End of Break */
+#define		AT91_US_ENDRX		(1 <<  3)		/* End of Receiver Transfer */
+#define		AT91_US_ENDTX		(1 <<  4)		/* End of Transmitter Transfer */
+#define		AT91_US_OVRE		(1 <<  5)		/* Overrun Error */
+#define		AT91_US_FRAME		(1 <<  6)		/* Framing Error */
+#define		AT91_US_PARE		(1 <<  7)		/* Parity Error */
+#define		AT91_US_TIMEOUT		(1 <<  8)		/* Receiver Time-out */
+#define		AT91_US_TXEMPTY		(1 <<  9)		/* Transmitter Empty */
+#define		AT91_US_ITERATION	(1 << 10)		/* Max number of Repetitions Reached */
+#define		AT91_US_TXBUFE		(1 << 11)		/* Transmission Buffer Empty */
+#define		AT91_US_RXBUFF		(1 << 12)		/* Reception Buffer Full */
+#define		AT91_US_NACK		(1 << 13)		/* Non Acknowledge */
+#define		AT91_US_RIIC		(1 << 16)		/* Ring Indicator Input Change */
+#define		AT91_US_DSRIC		(1 << 17)		/* Data Set Ready Input Change */
+#define		AT91_US_DCDIC		(1 << 18)		/* Data Carrier Detect Input Change */
+#define		AT91_US_CTSIC		(1 << 19)		/* Clear to Send Input Change */
+#define		AT91_US_RI		(1 << 20)		/* RI */
+#define		AT91_US_DSR		(1 << 21)		/* DSR */
+#define		AT91_US_DCD		(1 << 22)		/* DCD */
+#define		AT91_US_CTS		(1 << 23)		/* CTS */
+
+#define AT91_US_IDR		0x0c			/* Interrupt Disable Register */
+#define AT91_US_IMR		0x10			/* Interrupt Mask Register */
+#define AT91_US_CSR		0x14			/* Channel Status Register */
+#define AT91_US_RHR		0x18			/* Receiver Holding Register */
+#define AT91_US_THR		0x1c			/* Transmitter Holding Register */
+
+#define AT91_US_BRGR		0x20			/* Baud Rate Generator Register */
+#define		AT91_US_CD		(0xffff << 0)		/* Clock Divider */
+
+#define AT91_US_RTOR		0x24			/* Receiver Time-out Register */
+#define		AT91_US_TO		(0xffff << 0)		/* Time-out Value */
+
+#define AT91_US_TTGR		0x28			/* Transmitter Timeguard Register */
+#define		AT91_US_TG		(0xff << 0)		/* Timeguard Value */
+
+#define AT91_US_FIDI		0x40			/* FI DI Ratio Register */
+#define AT91_US_NER		0x44			/* Number of Errors Register */
+#define AT91_US_IF		0x4c			/* IrDA Filter Register */
+
+#endif
diff --git a/include/asm-avr32/arch-at32ap/board.h b/include/asm-avr32/arch-at32ap/board.h
new file mode 100644
index 000000000000..39368e18ab20
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/board.h
@@ -0,0 +1,35 @@
+/*
+ * Platform data definitions.
+ */
+#ifndef __ASM_ARCH_BOARD_H
+#define __ASM_ARCH_BOARD_H
+
+#include <linux/types.h>
+
+/* Add basic devices: system manager, interrupt controller, portmuxes, etc. */
+void at32_add_system_devices(void);
+
+#define AT91_NR_UART	4
+extern struct platform_device *at91_default_console_device;
+
+struct platform_device *at32_add_device_usart(unsigned int id);
+
+struct eth_platform_data {
+	u8	valid;
+	u8	mii_phy_addr;
+	u8	is_rmii;
+	u8	hw_addr[6];
+};
+struct platform_device *
+at32_add_device_eth(unsigned int id, struct eth_platform_data *data);
+
+struct platform_device *at32_add_device_spi(unsigned int id);
+
+struct lcdc_platform_data {
+	unsigned long fbmem_start;
+	unsigned long fbmem_size;
+};
+struct platform_device *
+at32_add_device_lcdc(unsigned int id, struct lcdc_platform_data *data);
+
+#endif /* __ASM_ARCH_BOARD_H */
diff --git a/include/asm-avr32/arch-at32ap/init.h b/include/asm-avr32/arch-at32ap/init.h
new file mode 100644
index 000000000000..43722634e069
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/init.h
@@ -0,0 +1,21 @@
+/*
+ * AT32AP platform initialization calls.
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_AT32AP_INIT_H__
+#define __ASM_AVR32_AT32AP_INIT_H__
+
+void setup_platform(void);
+
+/* Called by setup_platform */
+void at32_clock_init(void);
+void at32_portmux_init(void);
+
+void at32_setup_serial_console(unsigned int usart_id);
+
+#endif /* __ASM_AVR32_AT32AP_INIT_H__ */
diff --git a/include/asm-avr32/arch-at32ap/portmux.h b/include/asm-avr32/arch-at32ap/portmux.h
new file mode 100644
index 000000000000..4d50421262a1
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/portmux.h
@@ -0,0 +1,16 @@
+/*
+ * AT32 portmux interface.
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_AT32_PORTMUX_H__
+#define __ASM_AVR32_AT32_PORTMUX_H__
+
+void portmux_set_func(unsigned int portmux_id, unsigned int pin_id,
+		      unsigned int function_id);
+
+#endif /* __ASM_AVR32_AT32_PORTMUX_H__ */
diff --git a/include/asm-avr32/arch-at32ap/sm.h b/include/asm-avr32/arch-at32ap/sm.h
new file mode 100644
index 000000000000..265a9ead20bf
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/sm.h
@@ -0,0 +1,27 @@
+/*
+ * AT32 System Manager interface.
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_AT32_SM_H__
+#define __ASM_AVR32_AT32_SM_H__
+
+struct irq_chip;
+struct platform_device;
+
+struct at32_sm {
+	spinlock_t lock;
+	void __iomem *regs;
+	struct irq_chip *eim_chip;
+	unsigned int eim_first_irq;
+	struct platform_device *pdev;
+};
+
+extern struct platform_device at32_sm_device;
+extern struct at32_sm system_manager;
+
+#endif /* __ASM_AVR32_AT32_SM_H__ */
diff --git a/include/asm-avr32/asm.h b/include/asm-avr32/asm.h
new file mode 100644
index 000000000000..515c7618952b
--- /dev/null
+++ b/include/asm-avr32/asm.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_ASM_H__
+#define __ASM_AVR32_ASM_H__
+
+#include <asm/sysreg.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+
+#define mask_interrupts		ssrf	SR_GM_BIT
+#define mask_exceptions		ssrf	SR_EM_BIT
+#define unmask_interrupts	csrf	SR_GM_BIT
+#define unmask_exceptions	csrf	SR_EM_BIT
+
+#ifdef CONFIG_FRAME_POINTER
+	.macro	save_fp
+	st.w	--sp, r7
+	.endm
+	.macro	restore_fp
+	ld.w	r7, sp++
+	.endm
+	.macro	zero_fp
+	mov	r7, 0
+	.endm
+#else
+	.macro	save_fp
+	.endm
+	.macro	restore_fp
+	.endm
+	.macro	zero_fp
+	.endm
+#endif
+	.macro	get_thread_info reg
+	mov	\reg, sp
+	andl	\reg, ~(THREAD_SIZE - 1) & 0xffff
+	.endm
+
+	/* Save and restore registers */
+	.macro	save_min sr, tmp=lr
+	pushm	lr
+	mfsr	\tmp, \sr
+	zero_fp
+	st.w	--sp, \tmp
+	.endm
+
+	.macro	restore_min sr, tmp=lr
+	ld.w	\tmp, sp++
+	mtsr	\sr, \tmp
+	popm	lr
+	.endm
+
+	.macro	save_half sr, tmp=lr
+	save_fp
+	pushm	r8-r9,r10,r11,r12,lr
+	zero_fp
+	mfsr	\tmp, \sr
+	st.w	--sp, \tmp
+	.endm
+
+	.macro	restore_half sr, tmp=lr
+	ld.w	\tmp, sp++
+	mtsr	\sr, \tmp
+	popm	r8-r9,r10,r11,r12,lr
+	restore_fp
+	.endm
+
+	.macro	save_full_user sr, tmp=lr
+	stmts	--sp, r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,sp,lr
+	st.w	--sp, lr
+	zero_fp
+	mfsr	\tmp, \sr
+	st.w	--sp, \tmp
+	.endm
+
+	.macro	restore_full_user sr, tmp=lr
+	ld.w	\tmp, sp++
+	mtsr	\sr, \tmp
+	ld.w	lr, sp++
+	ldmts	sp++, r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,sp,lr
+	.endm
+
+	/* uaccess macros */
+	.macro branch_if_kernel scratch, label
+	get_thread_info \scratch
+	ld.w	\scratch, \scratch[TI_flags]
+	bld	\scratch, TIF_USERSPACE
+	brcc	\label
+	.endm
+
+	.macro ret_if_privileged scratch, addr, size, ret
+	sub	\scratch, \size, 1
+	add	\scratch, \addr
+	retcs	\ret
+	retmi	\ret
+	.endm
+
+#endif /* __ASM_AVR32_ASM_H__ */
diff --git a/include/asm-avr32/atomic.h b/include/asm-avr32/atomic.h
new file mode 100644
index 000000000000..e0b9c44c126c
--- /dev/null
+++ b/include/asm-avr32/atomic.h
@@ -0,0 +1,201 @@
+/*
+ * Atomic operations that C can't guarantee us.  Useful for
+ * resource counting etc.
+ *
+ * But use these as seldom as possible since they are slower than
+ * regular operations.
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_ATOMIC_H
+#define __ASM_AVR32_ATOMIC_H
+
+#include <asm/system.h>
+
+typedef struct { volatile int counter; } atomic_t;
+#define ATOMIC_INIT(i)  { (i) }
+
+#define atomic_read(v)		((v)->counter)
+#define atomic_set(v, i)	(((v)->counter) = i)
+
+/*
+ * atomic_sub_return - subtract the atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically subtracts @i from @v. Returns the resulting value.
+ */
+static inline int atomic_sub_return(int i, atomic_t *v)
+{
+	int result;
+
+	asm volatile(
+		"/* atomic_sub_return */\n"
+		"1:	ssrf	5\n"
+		"	ld.w	%0, %2\n"
+		"	sub	%0, %3\n"
+		"	stcond	%1, %0\n"
+		"	brne	1b"
+		: "=&r"(result), "=o"(v->counter)
+		: "m"(v->counter), "ir"(i)
+		: "cc");
+
+	return result;
+}
+
+/*
+ * atomic_add_return - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+ * Atomically adds @i to @v. Returns the resulting value.
+ */
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+	int result;
+
+	if (__builtin_constant_p(i))
+		result = atomic_sub_return(-i, v);
+	else
+		asm volatile(
+			"/* atomic_add_return */\n"
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %1\n"
+			"	add	%0, %3\n"
+			"	stcond	%2, %0\n"
+			"	brne	1b"
+			: "=&r"(result), "=o"(v->counter)
+			: "m"(v->counter), "r"(i)
+			: "cc", "memory");
+
+	return result;
+}
+
+/*
+ * atomic_sub_unless - sub unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * If the atomic value v is not equal to u, this function subtracts a
+ * from v, and returns non zero. If v is equal to u then it returns
+ * zero. This is done as an atomic operation.
+*/
+static inline int atomic_sub_unless(atomic_t *v, int a, int u)
+{
+	int tmp, result = 0;
+
+	asm volatile(
+		"/* atomic_sub_unless */\n"
+		"1:	ssrf	5\n"
+		"	ld.w	%0, %3\n"
+		"	cp.w	%0, %5\n"
+		"	breq	1f\n"
+		"	sub	%0, %4\n"
+		"	stcond	%2, %0\n"
+		"	brne	1b\n"
+		"	mov	%1, 1\n"
+		"1:"
+		: "=&r"(tmp), "=&r"(result), "=o"(v->counter)
+		: "m"(v->counter), "ir"(a), "ir"(u)
+		: "cc", "memory");
+
+	return result;
+}
+
+/*
+ * atomic_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * If the atomic value v is not equal to u, this function adds a to v,
+ * and returns non zero. If v is equal to u then it returns zero. This
+ * is done as an atomic operation.
+*/
+static inline int atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int tmp, result;
+
+	if (__builtin_constant_p(a))
+		result = atomic_sub_unless(v, -a, u);
+	else {
+		result = 0;
+		asm volatile(
+			"/* atomic_add_unless */\n"
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %3\n"
+			"	cp.w	%0, %5\n"
+			"	breq	1f\n"
+			"	add	%0, %4\n"
+			"	stcond	%2, %0\n"
+			"	brne	1b\n"
+			"	mov	%1, 1\n"
+			"1:"
+			: "=&r"(tmp), "=&r"(result), "=o"(v->counter)
+			: "m"(v->counter), "r"(a), "ir"(u)
+			: "cc", "memory");
+	}
+
+	return result;
+}
+
+/*
+ * atomic_sub_if_positive - conditionally subtract integer from atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically test @v and subtract @i if @v is greater or equal than @i.
+ * The function returns the old value of @v minus @i.
+ */
+static inline int atomic_sub_if_positive(int i, atomic_t *v)
+{
+	int result;
+
+	asm volatile(
+		"/* atomic_sub_if_positive */\n"
+		"1:	ssrf	5\n"
+		"	ld.w	%0, %2\n"
+		"	sub	%0, %3\n"
+		"	brlt	1f\n"
+		"	stcond	%1, %0\n"
+		"	brne	1b\n"
+		"1:"
+		: "=&r"(result), "=o"(v->counter)
+		: "m"(v->counter), "ir"(i)
+		: "cc", "memory");
+
+	return result;
+}
+
+#define atomic_xchg(v, new)	(xchg(&((v)->counter), new))
+#define atomic_cmpxchg(v, o, n)	((int)cmpxchg(&((v)->counter), (o), (n)))
+
+#define atomic_sub(i, v)	(void)atomic_sub_return(i, v)
+#define atomic_add(i, v)	(void)atomic_add_return(i, v)
+#define atomic_dec(v)		atomic_sub(1, (v))
+#define atomic_inc(v)		atomic_add(1, (v))
+
+#define atomic_dec_return(v)	atomic_sub_return(1, v)
+#define atomic_inc_return(v)	atomic_add_return(1, v)
+
+#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
+#define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0)
+#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0)
+#define atomic_add_negative(i, v) (atomic_add_return(i, v) < 0)
+
+#define atomic_inc_not_zero(v)	atomic_add_unless(v, 1, 0)
+#define atomic_dec_if_positive(v) atomic_sub_if_positive(1, v)
+
+#define smp_mb__before_atomic_dec()	barrier()
+#define smp_mb__after_atomic_dec()	barrier()
+#define smp_mb__before_atomic_inc()	barrier()
+#define smp_mb__after_atomic_inc()	barrier()
+
+#include <asm-generic/atomic.h>
+
+#endif /*  __ASM_AVR32_ATOMIC_H */
diff --git a/include/asm-avr32/auxvec.h b/include/asm-avr32/auxvec.h
new file mode 100644
index 000000000000..d5dd435bf8f4
--- /dev/null
+++ b/include/asm-avr32/auxvec.h
@@ -0,0 +1,4 @@
+#ifndef __ASM_AVR32_AUXVEC_H
+#define __ASM_AVR32_AUXVEC_H
+
+#endif /* __ASM_AVR32_AUXVEC_H */
diff --git a/include/asm-avr32/bitops.h b/include/asm-avr32/bitops.h
new file mode 100644
index 000000000000..5299f8c8e11d
--- /dev/null
+++ b/include/asm-avr32/bitops.h
@@ -0,0 +1,296 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_BITOPS_H
+#define __ASM_AVR32_BITOPS_H
+
+#include <asm/byteorder.h>
+#include <asm/system.h>
+
+/*
+ * clear_bit() doesn't provide any barrier for the compiler
+ */
+#define smp_mb__before_clear_bit()	barrier()
+#define smp_mb__after_clear_bit()	barrier()
+
+/*
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered.  See __set_bit()
+ * if you do not require the atomic guarantees.
+ *
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void set_bit(int nr, volatile void * addr)
+{
+	unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+	unsigned long tmp;
+
+	if (__builtin_constant_p(nr)) {
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %2\n"
+			"	sbr	%0, %3\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p)
+			: "m"(*p), "i"(nr)
+			: "cc");
+	} else {
+		unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %2\n"
+			"	or	%0, %3\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p)
+			: "m"(*p), "r"(mask)
+			: "cc");
+	}
+}
+
+/*
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered.  However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static inline void clear_bit(int nr, volatile void * addr)
+{
+	unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+	unsigned long tmp;
+
+	if (__builtin_constant_p(nr)) {
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %2\n"
+			"	cbr	%0, %3\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p)
+			: "m"(*p), "i"(nr)
+			: "cc");
+	} else {
+		unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %2\n"
+			"	andn	%0, %3\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p)
+			: "m"(*p), "r"(mask)
+			: "cc");
+	}
+}
+
+/*
+ * change_bit - Toggle a bit in memory
+ * @nr: Bit to change
+ * @addr: Address to start counting from
+ *
+ * change_bit() is atomic and may not be reordered.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void change_bit(int nr, volatile void * addr)
+{
+	unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+	unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+	unsigned long tmp;
+
+	asm volatile(
+		"1:	ssrf	5\n"
+		"	ld.w	%0, %2\n"
+		"	eor	%0, %3\n"
+		"	stcond	%1, %0\n"
+		"	brne	1b"
+		: "=&r"(tmp), "=o"(*p)
+		: "m"(*p), "r"(mask)
+		: "cc");
+}
+
+/*
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_set_bit(int nr, volatile void * addr)
+{
+	unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+	unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+	unsigned long tmp, old;
+
+	if (__builtin_constant_p(nr)) {
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %3\n"
+			"	mov	%2, %0\n"
+			"	sbr	%0, %4\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p), "=&r"(old)
+			: "m"(*p), "i"(nr)
+			: "memory", "cc");
+	} else {
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%2, %3\n"
+			"	or	%0, %2, %4\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p), "=&r"(old)
+			: "m"(*p), "r"(mask)
+			: "memory", "cc");
+	}
+
+	return (old & mask) != 0;
+}
+
+/*
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_clear_bit(int nr, volatile void * addr)
+{
+	unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+	unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+	unsigned long tmp, old;
+
+	if (__builtin_constant_p(nr)) {
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %3\n"
+			"	mov	%2, %0\n"
+			"	cbr	%0, %4\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p), "=&r"(old)
+			: "m"(*p), "i"(nr)
+			: "memory", "cc");
+	} else {
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %3\n"
+			"	mov	%2, %0\n"
+			"	andn	%0, %4\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p), "=&r"(old)
+			: "m"(*p), "r"(mask)
+			: "memory", "cc");
+	}
+
+	return (old & mask) != 0;
+}
+
+/*
+ * test_and_change_bit - Change a bit and return its old value
+ * @nr: Bit to change
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_change_bit(int nr, volatile void * addr)
+{
+	unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+	unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+	unsigned long tmp, old;
+
+	asm volatile(
+		"1:	ssrf	5\n"
+		"	ld.w	%2, %3\n"
+		"	eor	%0, %2, %4\n"
+		"	stcond	%1, %0\n"
+		"	brne	1b"
+		: "=&r"(tmp), "=o"(*p), "=&r"(old)
+		: "m"(*p), "r"(mask)
+		: "memory", "cc");
+
+	return (old & mask) != 0;
+}
+
+#include <asm-generic/bitops/non-atomic.h>
+
+/* Find First bit Set */
+static inline unsigned long __ffs(unsigned long word)
+{
+	unsigned long result;
+
+	asm("brev %1\n\t"
+	    "clz %0,%1"
+	    : "=r"(result), "=&r"(word)
+	    : "1"(word));
+	return result;
+}
+
+/* Find First Zero */
+static inline unsigned long ffz(unsigned long word)
+{
+	return __ffs(~word);
+}
+
+/* Find Last bit Set */
+static inline int fls(unsigned long word)
+{
+	unsigned long result;
+
+	asm("clz %0,%1" : "=r"(result) : "r"(word));
+	return 32 - result;
+}
+
+unsigned long find_first_zero_bit(const unsigned long *addr,
+				  unsigned long size);
+unsigned long find_next_zero_bit(const unsigned long *addr,
+				 unsigned long size,
+				 unsigned long offset);
+unsigned long find_first_bit(const unsigned long *addr,
+			     unsigned long size);
+unsigned long find_next_bit(const unsigned long *addr,
+				 unsigned long size,
+				 unsigned long offset);
+
+/*
+ * ffs: find first bit set. This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ *
+ * The difference is that bit numbering starts at 1, and if no bit is set,
+ * the function returns 0.
+ */
+static inline int ffs(unsigned long word)
+{
+	if(word == 0)
+		return 0;
+	return __ffs(word) + 1;
+}
+
+#include <asm-generic/bitops/fls64.h>
+#include <asm-generic/bitops/sched.h>
+#include <asm-generic/bitops/hweight.h>
+
+#include <asm-generic/bitops/ext2-non-atomic.h>
+#include <asm-generic/bitops/ext2-atomic.h>
+#include <asm-generic/bitops/minix-le.h>
+
+#endif /* __ASM_AVR32_BITOPS_H */
diff --git a/include/asm-avr32/bug.h b/include/asm-avr32/bug.h
new file mode 100644
index 000000000000..521766bc9366
--- /dev/null
+++ b/include/asm-avr32/bug.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_BUG_H
+#define __ASM_AVR32_BUG_H
+
+#ifdef CONFIG_BUG
+
+/*
+ * According to our Chief Architect, this compact opcode is very
+ * unlikely to ever be implemented.
+ */
+#define AVR32_BUG_OPCODE	0x5df0
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+
+#define BUG()								\
+	do {								\
+		asm volatile(".hword	%0\n\t"				\
+			     ".hword	%1\n\t"				\
+			     ".long	%2"				\
+			     :						\
+			     : "n"(AVR32_BUG_OPCODE),			\
+			       "i"(__LINE__), "X"(__FILE__));		\
+	} while (0)
+
+#else
+
+#define BUG()								\
+	do {								\
+		asm volatile(".hword	%0\n\t"				\
+			     : : "n"(AVR32_BUG_OPCODE));		\
+	} while (0)
+
+#endif /* CONFIG_DEBUG_BUGVERBOSE */
+
+#define HAVE_ARCH_BUG
+
+#endif /* CONFIG_BUG */
+
+#include <asm-generic/bug.h>
+
+#endif /* __ASM_AVR32_BUG_H */
diff --git a/include/asm-avr32/bugs.h b/include/asm-avr32/bugs.h
new file mode 100644
index 000000000000..7635e770622e
--- /dev/null
+++ b/include/asm-avr32/bugs.h
@@ -0,0 +1,15 @@
+/*
+ * This is included by init/main.c to check for architecture-dependent bugs.
+ *
+ * Needs:
+ *      void check_bugs(void);
+ */
+#ifndef __ASM_AVR32_BUGS_H
+#define __ASM_AVR32_BUGS_H
+
+static void __init check_bugs(void)
+{
+	cpu_data->loops_per_jiffy = loops_per_jiffy;
+}
+
+#endif /* __ASM_AVR32_BUGS_H */
diff --git a/include/asm-avr32/byteorder.h b/include/asm-avr32/byteorder.h
new file mode 100644
index 000000000000..402ff4125cdc
--- /dev/null
+++ b/include/asm-avr32/byteorder.h
@@ -0,0 +1,25 @@
+/*
+ * AVR32 endian-conversion functions.
+ */
+#ifndef __ASM_AVR32_BYTEORDER_H
+#define __ASM_AVR32_BYTEORDER_H
+
+#include <asm/types.h>
+#include <linux/compiler.h>
+
+#ifdef __CHECKER__
+extern unsigned long __builtin_bswap_32(unsigned long x);
+extern unsigned short __builtin_bswap_16(unsigned short x);
+#endif
+
+#define __arch__swab32(x) __builtin_bswap_32(x)
+#define __arch__swab16(x) __builtin_bswap_16(x)
+
+#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
+# define __BYTEORDER_HAS_U64__
+# define __SWAB_64_THRU_32__
+#endif
+
+#include <linux/byteorder/big_endian.h>
+
+#endif /* __ASM_AVR32_BYTEORDER_H */
diff --git a/include/asm-avr32/cache.h b/include/asm-avr32/cache.h
new file mode 100644
index 000000000000..dabb955f3c00
--- /dev/null
+++ b/include/asm-avr32/cache.h
@@ -0,0 +1,29 @@
+#ifndef __ASM_AVR32_CACHE_H
+#define __ASM_AVR32_CACHE_H
+
+#define L1_CACHE_SHIFT 5
+#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
+
+#ifndef __ASSEMBLER__
+struct cache_info {
+	unsigned int ways;
+	unsigned int sets;
+	unsigned int linesz;
+};
+#endif /* __ASSEMBLER */
+
+/* Cache operation constants */
+#define ICACHE_FLUSH		0x00
+#define ICACHE_INVALIDATE	0x01
+#define ICACHE_LOCK		0x02
+#define ICACHE_UNLOCK		0x03
+#define ICACHE_PREFETCH		0x04
+
+#define DCACHE_FLUSH		0x08
+#define DCACHE_LOCK		0x09
+#define DCACHE_UNLOCK		0x0a
+#define DCACHE_INVALIDATE	0x0b
+#define DCACHE_CLEAN		0x0c
+#define DCACHE_CLEAN_INVAL	0x0d
+
+#endif /* __ASM_AVR32_CACHE_H */
diff --git a/include/asm-avr32/cachectl.h b/include/asm-avr32/cachectl.h
new file mode 100644
index 000000000000..4faf1ce60061
--- /dev/null
+++ b/include/asm-avr32/cachectl.h
@@ -0,0 +1,11 @@
+#ifndef __ASM_AVR32_CACHECTL_H
+#define __ASM_AVR32_CACHECTL_H
+
+/*
+ * Operations that can be performed through the cacheflush system call
+ */
+
+/* Clean the data cache, then invalidate the icache */
+#define CACHE_IFLUSH	0
+
+#endif /* __ASM_AVR32_CACHECTL_H */
diff --git a/include/asm-avr32/cacheflush.h b/include/asm-avr32/cacheflush.h
new file mode 100644
index 000000000000..f1bf1708980e
--- /dev/null
+++ b/include/asm-avr32/cacheflush.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_CACHEFLUSH_H
+#define __ASM_AVR32_CACHEFLUSH_H
+
+/* Keep includes the same across arches.  */
+#include <linux/mm.h>
+
+#define CACHE_OP_ICACHE_INVALIDATE	0x01
+#define CACHE_OP_DCACHE_INVALIDATE	0x0b
+#define CACHE_OP_DCACHE_CLEAN		0x0c
+#define CACHE_OP_DCACHE_CLEAN_INVAL	0x0d
+
+/*
+ * Invalidate any cacheline containing virtual address vaddr without
+ * writing anything back to memory.
+ *
+ * Note that this function may corrupt unrelated data structures when
+ * applied on buffers that are not cacheline aligned in both ends.
+ */
+static inline void invalidate_dcache_line(void *vaddr)
+{
+	asm volatile("cache %0[0], %1"
+		     :
+		     : "r"(vaddr), "n"(CACHE_OP_DCACHE_INVALIDATE)
+		     : "memory");
+}
+
+/*
+ * Make sure any cacheline containing virtual address vaddr is written
+ * to memory.
+ */
+static inline void clean_dcache_line(void *vaddr)
+{
+	asm volatile("cache %0[0], %1"
+		     :
+		     : "r"(vaddr), "n"(CACHE_OP_DCACHE_CLEAN)
+		     : "memory");
+}
+
+/*
+ * Make sure any cacheline containing virtual address vaddr is written
+ * to memory and then invalidate it.
+ */
+static inline void flush_dcache_line(void *vaddr)
+{
+	asm volatile("cache %0[0], %1"
+		     :
+		     : "r"(vaddr), "n"(CACHE_OP_DCACHE_CLEAN_INVAL)
+		     : "memory");
+}
+
+/*
+ * Invalidate any instruction cacheline containing virtual address
+ * vaddr.
+ */
+static inline void invalidate_icache_line(void *vaddr)
+{
+	asm volatile("cache %0[0], %1"
+		     :
+		     : "r"(vaddr), "n"(CACHE_OP_ICACHE_INVALIDATE)
+		     : "memory");
+}
+
+/*
+ * Applies the above functions on all lines that are touched by the
+ * specified virtual address range.
+ */
+void invalidate_dcache_region(void *start, size_t len);
+void clean_dcache_region(void *start, size_t len);
+void flush_dcache_region(void *start, size_t len);
+void invalidate_icache_region(void *start, size_t len);
+
+/*
+ * Make sure any pending writes are completed before continuing.
+ */
+#define flush_write_buffer() asm volatile("sync 0" : : : "memory")
+
+/*
+ * The following functions are called when a virtual mapping changes.
+ * We do not need to flush anything in this case.
+ */
+#define flush_cache_all()			do { } while (0)
+#define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_range(vma, start, end)	do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define flush_cache_vmap(start, end)		do { } while (0)
+#define flush_cache_vunmap(start, end)		do { } while (0)
+
+/*
+ * I think we need to implement this one to be able to reliably
+ * execute pages from RAMDISK. However, if we implement the
+ * flush_dcache_*() functions, it might not be needed anymore.
+ *
+ * #define flush_icache_page(vma, page)		do { } while (0)
+ */
+extern void flush_icache_page(struct vm_area_struct *vma, struct page *page);
+
+/*
+ * These are (I think) related to D-cache aliasing.  We might need to
+ * do something here, but only for certain configurations.  No such
+ * configurations exist at this time.
+ */
+#define flush_dcache_page(page)			do { } while (0)
+#define flush_dcache_mmap_lock(page)		do { } while (0)
+#define flush_dcache_mmap_unlock(page)		do { } while (0)
+
+/*
+ * These are for I/D cache coherency. In this case, we do need to
+ * flush with all configurations.
+ */
+extern void flush_icache_range(unsigned long start, unsigned long end);
+extern void flush_icache_user_range(struct vm_area_struct *vma,
+				    struct page *page,
+				    unsigned long addr, int len);
+
+#define copy_to_user_page(vma, page, vaddr, dst, src, len) do {	\
+	memcpy(dst, src, len);					\
+	flush_icache_user_range(vma, page, vaddr, len);		\
+} while(0)
+#define copy_from_user_page(vma, page, vaddr, dst, src, len)	\
+	memcpy(dst, src, len)
+
+#endif /* __ASM_AVR32_CACHEFLUSH_H */
diff --git a/include/asm-avr32/checksum.h b/include/asm-avr32/checksum.h
new file mode 100644
index 000000000000..41b7af09edc4
--- /dev/null
+++ b/include/asm-avr32/checksum.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_CHECKSUM_H
+#define __ASM_AVR32_CHECKSUM_H
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+unsigned int csum_partial(const unsigned char * buff, int len,
+			  unsigned int sum);
+
+/*
+ * the same as csum_partial, but copies from src while it
+ * checksums, and handles user-space pointer exceptions correctly, when needed.
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+unsigned int csum_partial_copy_generic(const char *src, char *dst, int len,
+				       int sum, int *src_err_ptr,
+				       int *dst_err_ptr);
+
+/*
+ *	Note: when you get a NULL pointer exception here this means someone
+ *	passed in an incorrect kernel address to one of these functions.
+ *
+ *	If you use these functions directly please don't forget the
+ *	verify_area().
+ */
+static inline
+unsigned int csum_partial_copy_nocheck(const char *src, char *dst,
+				       int len, int sum)
+{
+	return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL);
+}
+
+static inline
+unsigned int csum_partial_copy_from_user (const char __user *src, char *dst,
+					  int len, int sum, int *err_ptr)
+{
+	return csum_partial_copy_generic((const char __force *)src, dst, len,
+					 sum, err_ptr, NULL);
+}
+
+/*
+ *	This is a version of ip_compute_csum() optimized for IP headers,
+ *	which always checksum on 4 octet boundaries.
+ */
+static inline unsigned short ip_fast_csum(unsigned char *iph,
+					  unsigned int ihl)
+{
+	unsigned int sum, tmp;
+
+	__asm__ __volatile__(
+		"	ld.w	%0, %1++\n"
+		"	ld.w	%3, %1++\n"
+		"	sub	%2, 4\n"
+		"	add	%0, %3\n"
+		"	ld.w	%3, %1++\n"
+		"	adc	%0, %0, %3\n"
+		"	ld.w	%3, %1++\n"
+		"	adc	%0, %0, %3\n"
+		"	acr	%0\n"
+		"1:	ld.w	%3, %1++\n"
+		"	add	%0, %3\n"
+		"	acr	%0\n"
+		"	sub	%2, 1\n"
+		"	brne	1b\n"
+		"	lsl	%3, %0, 16\n"
+		"	andl	%0, 0\n"
+		"	mov	%2, 0xffff\n"
+		"	add	%0, %3\n"
+		"	adc	%0, %0, %2\n"
+		"	com	%0\n"
+		"	lsr	%0, 16\n"
+		: "=r"(sum), "=r"(iph), "=r"(ihl), "=r"(tmp)
+		: "1"(iph), "2"(ihl)
+		: "memory", "cc");
+	return sum;
+}
+
+/*
+ *	Fold a partial checksum
+ */
+
+static inline unsigned int csum_fold(unsigned int sum)
+{
+	unsigned int tmp;
+
+	asm("	bfextu	%1, %0, 0, 16\n"
+	    "	lsr	%0, 16\n"
+	    "	add	%0, %1\n"
+	    "	bfextu	%1, %0, 16, 16\n"
+	    "	add	%0, %1"
+	    : "=&r"(sum), "=&r"(tmp)
+	    : "0"(sum));
+
+	return ~sum;
+}
+
+static inline unsigned long csum_tcpudp_nofold(unsigned long saddr,
+					       unsigned long daddr,
+					       unsigned short len,
+					       unsigned short proto,
+					       unsigned int sum)
+{
+	asm("	add	%0, %1\n"
+	    "	adc	%0, %0, %2\n"
+	    "	adc	%0, %0, %3\n"
+	    "	acr	%0"
+	    : "=r"(sum)
+	    : "r"(daddr), "r"(saddr), "r"(ntohs(len) | (proto << 16)),
+	      "0"(sum)
+	    : "cc");
+
+	return sum;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline unsigned short int csum_tcpudp_magic(unsigned long saddr,
+						   unsigned long daddr,
+						   unsigned short len,
+						   unsigned short proto,
+						   unsigned int sum)
+{
+	return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
+}
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+
+static inline unsigned short ip_compute_csum(unsigned char * buff, int len)
+{
+    return csum_fold(csum_partial(buff, len, 0));
+}
+
+#endif /* __ASM_AVR32_CHECKSUM_H */
diff --git a/include/asm-avr32/cputime.h b/include/asm-avr32/cputime.h
new file mode 100644
index 000000000000..e87e0f81cbeb
--- /dev/null
+++ b/include/asm-avr32/cputime.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_CPUTIME_H
+#define __ASM_AVR32_CPUTIME_H
+
+#include <asm-generic/cputime.h>
+
+#endif /* __ASM_AVR32_CPUTIME_H */
diff --git a/include/asm-avr32/current.h b/include/asm-avr32/current.h
new file mode 100644
index 000000000000..c7b0549eab8a
--- /dev/null
+++ b/include/asm-avr32/current.h
@@ -0,0 +1,15 @@
+#ifndef __ASM_AVR32_CURRENT_H
+#define __ASM_AVR32_CURRENT_H
+
+#include <linux/thread_info.h>
+
+struct task_struct;
+
+inline static struct task_struct * get_current(void)
+{
+	return current_thread_info()->task;
+}
+
+#define current get_current()
+
+#endif /* __ASM_AVR32_CURRENT_H */
diff --git a/include/asm-avr32/delay.h b/include/asm-avr32/delay.h
new file mode 100644
index 000000000000..cc3b2e3343b3
--- /dev/null
+++ b/include/asm-avr32/delay.h
@@ -0,0 +1,26 @@
+#ifndef __ASM_AVR32_DELAY_H
+#define __ASM_AVR32_DELAY_H
+
+/*
+ * Copyright (C) 1993 Linus Torvalds
+ *
+ * Delay routines calling functions in arch/avr32/lib/delay.c
+ */
+
+extern void __bad_udelay(void);
+extern void __bad_ndelay(void);
+
+extern void __udelay(unsigned long usecs);
+extern void __ndelay(unsigned long nsecs);
+extern void __const_udelay(unsigned long usecs);
+extern void __delay(unsigned long loops);
+
+#define udelay(n) (__builtin_constant_p(n) ? \
+	((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c6ul)) : \
+	__udelay(n))
+
+#define ndelay(n) (__builtin_constant_p(n) ? \
+	((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \
+	__ndelay(n))
+
+#endif /* __ASM_AVR32_DELAY_H */
diff --git a/include/asm-avr32/div64.h b/include/asm-avr32/div64.h
new file mode 100644
index 000000000000..d7ddd4fdeca6
--- /dev/null
+++ b/include/asm-avr32/div64.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_DIV64_H
+#define __ASM_AVR32_DIV64_H
+
+#include <asm-generic/div64.h>
+
+#endif /* __ASM_AVR32_DIV64_H */
diff --git a/include/asm-avr32/dma-mapping.h b/include/asm-avr32/dma-mapping.h
new file mode 100644
index 000000000000..4c40cb41cdf8
--- /dev/null
+++ b/include/asm-avr32/dma-mapping.h
@@ -0,0 +1,320 @@
+#ifndef __ASM_AVR32_DMA_MAPPING_H
+#define __ASM_AVR32_DMA_MAPPING_H
+
+#include <linux/mm.h>
+#include <linux/device.h>
+#include <asm/scatterlist.h>
+#include <asm/processor.h>
+#include <asm/cacheflush.h>
+#include <asm/io.h>
+
+extern void dma_cache_sync(void *vaddr, size_t size, int direction);
+
+/*
+ * Return whether the given device DMA address mask can be supported
+ * properly.  For example, if your device can only drive the low 24-bits
+ * during bus mastering, then you would pass 0x00ffffff as the mask
+ * to this function.
+ */
+static inline int dma_supported(struct device *dev, u64 mask)
+{
+	/* Fix when needed. I really don't know of any limitations */
+	return 1;
+}
+
+static inline int dma_set_mask(struct device *dev, u64 dma_mask)
+{
+	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+		return -EIO;
+
+	*dev->dma_mask = dma_mask;
+	return 0;
+}
+
+/**
+ * dma_alloc_coherent - allocate consistent memory for DMA
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @size: required memory size
+ * @handle: bus-specific DMA address
+ *
+ * Allocate some uncached, unbuffered memory for a device for
+ * performing DMA.  This function allocates pages, and will
+ * return the CPU-viewed address, and sets @handle to be the
+ * device-viewed address.
+ */
+extern void *dma_alloc_coherent(struct device *dev, size_t size,
+				dma_addr_t *handle, gfp_t gfp);
+
+/**
+ * dma_free_coherent - free memory allocated by dma_alloc_coherent
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @size: size of memory originally requested in dma_alloc_coherent
+ * @cpu_addr: CPU-view address returned from dma_alloc_coherent
+ * @handle: device-view address returned from dma_alloc_coherent
+ *
+ * Free (and unmap) a DMA buffer previously allocated by
+ * dma_alloc_coherent().
+ *
+ * References to memory and mappings associated with cpu_addr/handle
+ * during and after this call executing are illegal.
+ */
+extern void dma_free_coherent(struct device *dev, size_t size,
+			      void *cpu_addr, dma_addr_t handle);
+
+/**
+ * dma_alloc_writecombine - allocate write-combining memory for DMA
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @size: required memory size
+ * @handle: bus-specific DMA address
+ *
+ * Allocate some uncached, buffered memory for a device for
+ * performing DMA.  This function allocates pages, and will
+ * return the CPU-viewed address, and sets @handle to be the
+ * device-viewed address.
+ */
+extern void *dma_alloc_writecombine(struct device *dev, size_t size,
+				    dma_addr_t *handle, gfp_t gfp);
+
+/**
+ * dma_free_coherent - free memory allocated by dma_alloc_writecombine
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @size: size of memory originally requested in dma_alloc_writecombine
+ * @cpu_addr: CPU-view address returned from dma_alloc_writecombine
+ * @handle: device-view address returned from dma_alloc_writecombine
+ *
+ * Free (and unmap) a DMA buffer previously allocated by
+ * dma_alloc_writecombine().
+ *
+ * References to memory and mappings associated with cpu_addr/handle
+ * during and after this call executing are illegal.
+ */
+extern void dma_free_writecombine(struct device *dev, size_t size,
+				  void *cpu_addr, dma_addr_t handle);
+
+/**
+ * dma_map_single - map a single buffer for streaming DMA
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @cpu_addr: CPU direct mapped address of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * Ensure that any data held in the cache is appropriately discarded
+ * or written back.
+ *
+ * The device owns this memory once this call has completed.  The CPU
+ * can regain ownership by calling dma_unmap_single() or dma_sync_single().
+ */
+static inline dma_addr_t
+dma_map_single(struct device *dev, void *cpu_addr, size_t size,
+	       enum dma_data_direction direction)
+{
+	dma_cache_sync(cpu_addr, size, direction);
+	return virt_to_bus(cpu_addr);
+}
+
+/**
+ * dma_unmap_single - unmap a single buffer previously mapped
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @handle: DMA address of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * Unmap a single streaming mode DMA translation.  The handle and size
+ * must match what was provided in the previous dma_map_single() call.
+ * All other usages are undefined.
+ *
+ * After this call, reads by the CPU to the buffer are guaranteed to see
+ * whatever the device wrote there.
+ */
+static inline void
+dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+		 enum dma_data_direction direction)
+{
+
+}
+
+/**
+ * dma_map_page - map a portion of a page for streaming DMA
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @page: page that buffer resides in
+ * @offset: offset into page for start of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * Ensure that any data held in the cache is appropriately discarded
+ * or written back.
+ *
+ * The device owns this memory once this call has completed.  The CPU
+ * can regain ownership by calling dma_unmap_page() or dma_sync_single().
+ */
+static inline dma_addr_t
+dma_map_page(struct device *dev, struct page *page,
+	     unsigned long offset, size_t size,
+	     enum dma_data_direction direction)
+{
+	return dma_map_single(dev, page_address(page) + offset,
+			      size, direction);
+}
+
+/**
+ * dma_unmap_page - unmap a buffer previously mapped through dma_map_page()
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @handle: DMA address of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * Unmap a single streaming mode DMA translation.  The handle and size
+ * must match what was provided in the previous dma_map_single() call.
+ * All other usages are undefined.
+ *
+ * After this call, reads by the CPU to the buffer are guaranteed to see
+ * whatever the device wrote there.
+ */
+static inline void
+dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+	       enum dma_data_direction direction)
+{
+	dma_unmap_single(dev, dma_address, size, direction);
+}
+
+/**
+ * dma_map_sg - map a set of SG buffers for streaming mode DMA
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @sg: list of buffers
+ * @nents: number of buffers to map
+ * @dir: DMA transfer direction
+ *
+ * Map a set of buffers described by scatterlist in streaming
+ * mode for DMA.  This is the scatter-gather version of the
+ * above pci_map_single interface.  Here the scatter gather list
+ * elements are each tagged with the appropriate dma address
+ * and length.  They are obtained via sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ *       DMA address/length pairs than there are SG table elements.
+ *       (for example via virtual mapping capabilities)
+ *       The routine returns the number of addr/length pairs actually
+ *       used, at most nents.
+ *
+ * Device ownership issues as mentioned above for pci_map_single are
+ * the same here.
+ */
+static inline int
+dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+	   enum dma_data_direction direction)
+{
+	int i;
+
+	for (i = 0; i < nents; i++) {
+		char *virt;
+
+		sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset;
+		virt = page_address(sg[i].page) + sg[i].offset;
+		dma_cache_sync(virt, sg[i].length, direction);
+	}
+
+	return nents;
+}
+
+/**
+ * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @sg: list of buffers
+ * @nents: number of buffers to map
+ * @dir: DMA transfer direction
+ *
+ * Unmap a set of streaming mode DMA translations.
+ * Again, CPU read rules concerning calls here are the same as for
+ * pci_unmap_single() above.
+ */
+static inline void
+dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
+	     enum dma_data_direction direction)
+{
+
+}
+
+/**
+ * dma_sync_single_for_cpu
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @handle: DMA address of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * Make physical memory consistent for a single streaming mode DMA
+ * translation after a transfer.
+ *
+ * If you perform a dma_map_single() but wish to interrogate the
+ * buffer using the cpu, yet do not wish to teardown the DMA mapping,
+ * you must call this function before doing so.  At the next point you
+ * give the DMA address back to the card, you must first perform a
+ * dma_sync_single_for_device, and then the device again owns the
+ * buffer.
+ */
+static inline void
+dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+			size_t size, enum dma_data_direction direction)
+{
+	dma_cache_sync(bus_to_virt(dma_handle), size, direction);
+}
+
+static inline void
+dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+			   size_t size, enum dma_data_direction direction)
+{
+	dma_cache_sync(bus_to_virt(dma_handle), size, direction);
+}
+
+/**
+ * dma_sync_sg_for_cpu
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @sg: list of buffers
+ * @nents: number of buffers to map
+ * @dir: DMA transfer direction
+ *
+ * Make physical memory consistent for a set of streaming
+ * mode DMA translations after a transfer.
+ *
+ * The same as dma_sync_single_for_* but for a scatter-gather list,
+ * same rules and usage.
+ */
+static inline void
+dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+		    int nents, enum dma_data_direction direction)
+{
+	int i;
+
+	for (i = 0; i < nents; i++) {
+		dma_cache_sync(page_address(sg[i].page) + sg[i].offset,
+			       sg[i].length, direction);
+	}
+}
+
+static inline void
+dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+		       int nents, enum dma_data_direction direction)
+{
+	int i;
+
+	for (i = 0; i < nents; i++) {
+		dma_cache_sync(page_address(sg[i].page) + sg[i].offset,
+			       sg[i].length, direction);
+	}
+}
+
+/* Now for the API extensions over the pci_ one */
+
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+
+static inline int dma_is_consistent(dma_addr_t dma_addr)
+{
+	return 1;
+}
+
+static inline int dma_get_cache_alignment(void)
+{
+	return boot_cpu_data.dcache.linesz;
+}
+
+#endif /* __ASM_AVR32_DMA_MAPPING_H */
diff --git a/include/asm-avr32/dma.h b/include/asm-avr32/dma.h
new file mode 100644
index 000000000000..9e91205590ac
--- /dev/null
+++ b/include/asm-avr32/dma.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_AVR32_DMA_H
+#define __ASM_AVR32_DMA_H
+
+/* The maximum address that we can perform a DMA transfer to on this platform.
+ * Not really applicable to AVR32, but some functions need it. */
+#define MAX_DMA_ADDRESS		0xffffffff
+
+#endif /* __ASM_AVR32_DMA_H */
diff --git a/include/asm-avr32/elf.h b/include/asm-avr32/elf.h
new file mode 100644
index 000000000000..d334b4994d2d
--- /dev/null
+++ b/include/asm-avr32/elf.h
@@ -0,0 +1,110 @@
+#ifndef __ASM_AVR32_ELF_H
+#define __ASM_AVR32_ELF_H
+
+/* AVR32 relocation numbers */
+#define R_AVR32_NONE		0
+#define R_AVR32_32		1
+#define R_AVR32_16		2
+#define R_AVR32_8		3
+#define R_AVR32_32_PCREL	4
+#define R_AVR32_16_PCREL	5
+#define R_AVR32_8_PCREL		6
+#define R_AVR32_DIFF32		7
+#define R_AVR32_DIFF16		8
+#define R_AVR32_DIFF8		9
+#define R_AVR32_GOT32		10
+#define R_AVR32_GOT16		11
+#define R_AVR32_GOT8		12
+#define R_AVR32_21S		13
+#define R_AVR32_16U		14
+#define R_AVR32_16S		15
+#define R_AVR32_8S		16
+#define R_AVR32_8S_EXT		17
+#define R_AVR32_22H_PCREL	18
+#define R_AVR32_18W_PCREL	19
+#define R_AVR32_16B_PCREL	20
+#define R_AVR32_16N_PCREL	21
+#define R_AVR32_14UW_PCREL	22
+#define R_AVR32_11H_PCREL	23
+#define R_AVR32_10UW_PCREL	24
+#define R_AVR32_9H_PCREL	25
+#define R_AVR32_9UW_PCREL	26
+#define R_AVR32_HI16		27
+#define R_AVR32_LO16		28
+#define R_AVR32_GOTPC		29
+#define R_AVR32_GOTCALL		30
+#define R_AVR32_LDA_GOT		31
+#define R_AVR32_GOT21S		32
+#define R_AVR32_GOT18SW		33
+#define R_AVR32_GOT16S		34
+#define R_AVR32_GOT7UW		35
+#define R_AVR32_32_CPENT	36
+#define R_AVR32_CPCALL		37
+#define R_AVR32_16_CP		38
+#define R_AVR32_9W_CP		39
+#define R_AVR32_RELATIVE	40
+#define R_AVR32_GLOB_DAT	41
+#define R_AVR32_JMP_SLOT	42
+#define R_AVR32_ALIGN		43
+
+/*
+ * ELF register definitions..
+ */
+
+#include <asm/ptrace.h>
+#include <asm/user.h>
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof (struct pt_regs) / sizeof (elf_greg_t))
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef struct user_fpu_struct elf_fpregset_t;
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) ( (x)->e_machine == EM_AVR32 )
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS	ELFCLASS32
+#ifdef __LITTLE_ENDIAN__
+#define ELF_DATA	ELFDATA2LSB
+#else
+#define ELF_DATA	ELFDATA2MSB
+#endif
+#define ELF_ARCH	EM_AVR32
+
+#define USE_ELF_CORE_DUMP
+#define ELF_EXEC_PAGESIZE	4096
+
+/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+   use of this is to invoke "./ld.so someprog" to test out a new version of
+   the loader.  We need to make sure that it is out of the way of the program
+   that it will "exec", and that there is sufficient room for the brk.  */
+
+#define ELF_ET_DYN_BASE         (2 * TASK_SIZE / 3)
+
+
+/* This yields a mask that user programs can use to figure out what
+   instruction set this CPU supports.  This could be done in user space,
+   but it's not easy, and we've already done it here.  */
+
+#define ELF_HWCAP	(0)
+
+/* This yields a string that ld.so will use to load implementation
+   specific libraries for optimization.  This is more specific in
+   intent than poking at uname or /proc/cpuinfo.
+
+   For the moment, we have only optimizations for the Intel generations,
+   but that could change... */
+
+#define ELF_PLATFORM  (NULL)
+
+#ifdef __KERNEL__
+#define SET_PERSONALITY(ex, ibcs2) set_personality(PER_LINUX_32BIT)
+#endif
+
+#endif /* __ASM_AVR32_ELF_H */
diff --git a/include/asm-avr32/emergency-restart.h b/include/asm-avr32/emergency-restart.h
new file mode 100644
index 000000000000..3e7e014776ba
--- /dev/null
+++ b/include/asm-avr32/emergency-restart.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_EMERGENCY_RESTART_H
+#define __ASM_AVR32_EMERGENCY_RESTART_H
+
+#include <asm-generic/emergency-restart.h>
+
+#endif /* __ASM_AVR32_EMERGENCY_RESTART_H */
diff --git a/include/asm-avr32/errno.h b/include/asm-avr32/errno.h
new file mode 100644
index 000000000000..558a7249f06d
--- /dev/null
+++ b/include/asm-avr32/errno.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_ERRNO_H
+#define __ASM_AVR32_ERRNO_H
+
+#include <asm-generic/errno.h>
+
+#endif /* __ASM_AVR32_ERRNO_H */
diff --git a/include/asm-avr32/fcntl.h b/include/asm-avr32/fcntl.h
new file mode 100644
index 000000000000..14c0c4402b11
--- /dev/null
+++ b/include/asm-avr32/fcntl.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_FCNTL_H
+#define __ASM_AVR32_FCNTL_H
+
+#include <asm-generic/fcntl.h>
+
+#endif /* __ASM_AVR32_FCNTL_H */
diff --git a/include/asm-avr32/futex.h b/include/asm-avr32/futex.h
new file mode 100644
index 000000000000..10419f14a68a
--- /dev/null
+++ b/include/asm-avr32/futex.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_FUTEX_H
+#define __ASM_AVR32_FUTEX_H
+
+#include <asm-generic/futex.h>
+
+#endif /* __ASM_AVR32_FUTEX_H */
diff --git a/include/asm-avr32/hardirq.h b/include/asm-avr32/hardirq.h
new file mode 100644
index 000000000000..267354356f60
--- /dev/null
+++ b/include/asm-avr32/hardirq.h
@@ -0,0 +1,34 @@
+#ifndef __ASM_AVR32_HARDIRQ_H
+#define __ASM_AVR32_HARDIRQ_H
+
+#include <linux/threads.h>
+#include <asm/irq.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/cache.h>
+
+/* entry.S is sensitive to the offsets of these fields */
+typedef struct {
+	unsigned int __softirq_pending;
+} ____cacheline_aligned irq_cpustat_t;
+
+void ack_bad_irq(unsigned int irq);
+
+/* Standard mappings for irq_cpustat_t above */
+#include <linux/irq_cpustat.h>
+
+#endif /* __ASSEMBLY__ */
+
+#define HARDIRQ_BITS	12
+
+/*
+ * The hardirq mask has to be large enough to have
+ * space for potentially all IRQ sources in the system
+ * nesting on a single CPU:
+ */
+#if (1 << HARDIRQ_BITS) < NR_IRQS
+# error HARDIRQ_BITS is too low!
+#endif
+
+#endif /* __ASM_AVR32_HARDIRQ_H */
diff --git a/include/asm-avr32/hw_irq.h b/include/asm-avr32/hw_irq.h
new file mode 100644
index 000000000000..218b0a6bfd1b
--- /dev/null
+++ b/include/asm-avr32/hw_irq.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_AVR32_HW_IRQ_H
+#define __ASM_AVR32_HW_IRQ_H
+
+static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i)
+{
+	/* Nothing to do */
+}
+
+#endif /* __ASM_AVR32_HW_IRQ_H */
diff --git a/include/asm-avr32/intc.h b/include/asm-avr32/intc.h
new file mode 100644
index 000000000000..1ac9ca75e8fd
--- /dev/null
+++ b/include/asm-avr32/intc.h
@@ -0,0 +1,128 @@
+#ifndef __ASM_AVR32_INTC_H
+#define __ASM_AVR32_INTC_H
+
+#include <linux/sysdev.h>
+#include <linux/interrupt.h>
+
+struct irq_controller;
+struct irqaction;
+struct pt_regs;
+
+struct platform_device;
+
+/* Information about the internal interrupt controller */
+struct intc_device {
+	/* ioremapped address of configuration block */
+	void __iomem *regs;
+
+	/* the physical device */
+	struct platform_device *pdev;
+
+	/* Number of interrupt lines per group. */
+	unsigned int irqs_per_group;
+
+	/* The highest group ID + 1 */
+	unsigned int nr_groups;
+
+	/*
+	 * Bitfield indicating which groups are actually in use.  The
+	 * size of the array is
+	 * ceil(group_max / (8 * sizeof(unsigned int))).
+	 */
+	unsigned int group_mask[];
+};
+
+struct irq_controller_class {
+	/*
+	 * A short name identifying this kind of controller.
+	 */
+	const char *typename;
+	/*
+	 * Handle the IRQ.  Must do any necessary acking and masking.
+	 */
+	irqreturn_t (*handle)(int irq, void *dev_id, struct pt_regs *regs);
+	/*
+	 * Register a new IRQ handler.
+	 */
+	int (*setup)(struct irq_controller *ctrl, unsigned int irq,
+		     struct irqaction *action);
+	/*
+	 * Unregister a IRQ handler.
+	 */
+	void (*free)(struct irq_controller *ctrl, unsigned int irq,
+		     void *dev_id);
+	/*
+	 * Mask the IRQ in the interrupt controller.
+	 */
+	void (*mask)(struct irq_controller *ctrl, unsigned int irq);
+	/*
+	 * Unmask the IRQ in the interrupt controller.
+	 */
+	void (*unmask)(struct irq_controller *ctrl, unsigned int irq);
+	/*
+	 * Set the type of the IRQ. See below for possible types.
+	 * Return -EINVAL if a given type is not supported
+	 */
+	int (*set_type)(struct irq_controller *ctrl, unsigned int irq,
+			unsigned int type);
+	/*
+	 * Return the IRQ type currently set
+	 */
+	unsigned int (*get_type)(struct irq_controller *ctrl, unsigned int irq);
+};
+
+struct irq_controller {
+	struct irq_controller_class *class;
+	unsigned int irq_group;
+	unsigned int first_irq;
+	unsigned int nr_irqs;
+	struct list_head list;
+};
+
+struct intc_group_desc {
+	struct irq_controller *ctrl;
+	irqreturn_t (*handle)(int, void *, struct pt_regs *);
+	unsigned long flags;
+	void *dev_id;
+	const char *devname;
+};
+
+/*
+ * The internal interrupt controller.  Defined in board/part-specific
+ * devices.c.
+ * TODO: Should probably be defined per-cpu.
+ */
+extern struct intc_device intc;
+
+extern int request_internal_irq(unsigned int irq,
+				irqreturn_t (*handler)(int, void *, struct pt_regs *),
+				unsigned long irqflags,
+				const char *devname, void *dev_id);
+extern void free_internal_irq(unsigned int irq);
+
+/* Only used by time_init() */
+extern int setup_internal_irq(unsigned int irq, struct intc_group_desc *desc);
+
+/*
+ * Set interrupt priority for a given group. `group' can be found by
+ * using irq_to_group(irq). Priority can be from 0 (lowest) to 3
+ * (highest). Higher-priority interrupts will preempt lower-priority
+ * interrupts (unless interrupts are masked globally).
+ *
+ * This function does not check for conflicts within a group.
+ */
+extern int intc_set_priority(unsigned int group,
+			     unsigned int priority);
+
+/*
+ * Returns a bitmask of pending interrupts in a group.
+ */
+extern unsigned long intc_get_pending(unsigned int group);
+
+/*
+ * Register a new external interrupt controller.  Returns the first
+ * external IRQ number that is assigned to the new controller.
+ */
+extern int intc_register_controller(struct irq_controller *ctrl);
+
+#endif /* __ASM_AVR32_INTC_H */
diff --git a/include/asm-avr32/io.h b/include/asm-avr32/io.h
new file mode 100644
index 000000000000..2fc8f111dce9
--- /dev/null
+++ b/include/asm-avr32/io.h
@@ -0,0 +1,253 @@
+#ifndef __ASM_AVR32_IO_H
+#define __ASM_AVR32_IO_H
+
+#include <linux/string.h>
+
+#ifdef __KERNEL__
+
+#include <asm/addrspace.h>
+#include <asm/byteorder.h>
+
+/* virt_to_phys will only work when address is in P1 or P2 */
+static __inline__ unsigned long virt_to_phys(volatile void *address)
+{
+	return PHYSADDR(address);
+}
+
+static __inline__ void * phys_to_virt(unsigned long address)
+{
+	return (void *)P1SEGADDR(address);
+}
+
+#define cached_to_phys(addr)	((unsigned long)PHYSADDR(addr))
+#define uncached_to_phys(addr)	((unsigned long)PHYSADDR(addr))
+#define phys_to_cached(addr)	((void *)P1SEGADDR(addr))
+#define phys_to_uncached(addr)	((void *)P2SEGADDR(addr))
+
+/*
+ * Generic IO read/write.  These perform native-endian accesses.  Note
+ * that some architectures will want to re-define __raw_{read,write}w.
+ */
+extern void __raw_writesb(unsigned int addr, const void *data, int bytelen);
+extern void __raw_writesw(unsigned int addr, const void *data, int wordlen);
+extern void __raw_writesl(unsigned int addr, const void *data, int longlen);
+
+extern void __raw_readsb(unsigned int addr, void *data, int bytelen);
+extern void __raw_readsw(unsigned int addr, void *data, int wordlen);
+extern void __raw_readsl(unsigned int addr, void *data, int longlen);
+
+static inline void writeb(unsigned char b, volatile void __iomem *addr)
+{
+	*(volatile unsigned char __force *)addr = b;
+}
+static inline void writew(unsigned short b, volatile void __iomem *addr)
+{
+	*(volatile unsigned short __force *)addr = b;
+}
+static inline void writel(unsigned int b, volatile void __iomem *addr)
+{
+	*(volatile unsigned int __force *)addr = b;
+}
+#define __raw_writeb writeb
+#define __raw_writew writew
+#define __raw_writel writel
+
+static inline unsigned char readb(const volatile void __iomem *addr)
+{
+	return *(const volatile unsigned char __force *)addr;
+}
+static inline unsigned short readw(const volatile void __iomem *addr)
+{
+	return *(const volatile unsigned short __force *)addr;
+}
+static inline unsigned int readl(const volatile void __iomem *addr)
+{
+	return *(const volatile unsigned int __force *)addr;
+}
+#define __raw_readb readb
+#define __raw_readw readw
+#define __raw_readl readl
+
+#define writesb(p, d, l)	__raw_writesb((unsigned int)p, d, l)
+#define writesw(p, d, l)	__raw_writesw((unsigned int)p, d, l)
+#define writesl(p, d, l)	__raw_writesl((unsigned int)p, d, l)
+
+#define readsb(p, d, l)		__raw_readsb((unsigned int)p, d, l)
+#define readsw(p, d, l)		__raw_readsw((unsigned int)p, d, l)
+#define readsl(p, d, l)		__raw_readsl((unsigned int)p, d, l)
+
+/*
+ * These two are only here because ALSA _thinks_ it needs them...
+ */
+static inline void memcpy_fromio(void * to, const volatile void __iomem *from,
+				 unsigned long count)
+{
+	char *p = to;
+	while (count) {
+		count--;
+		*p = readb(from);
+		p++;
+		from++;
+	}
+}
+
+static inline void  memcpy_toio(volatile void __iomem *to, const void * from,
+				unsigned long count)
+{
+	const char *p = from;
+	while (count) {
+		count--;
+		writeb(*p, to);
+		p++;
+		to++;
+	}
+}
+
+static inline void memset_io(volatile void __iomem *addr, unsigned char val,
+			     unsigned long count)
+{
+	memset((void __force *)addr, val, count);
+}
+
+/*
+ * Bad read/write accesses...
+ */
+extern void __readwrite_bug(const char *fn);
+
+#define IO_SPACE_LIMIT	0xffffffff
+
+/* Convert I/O port address to virtual address */
+#define __io(p)		((void __iomem *)phys_to_uncached(p))
+
+/*
+ *  IO port access primitives
+ *  -------------------------
+ *
+ * The AVR32 doesn't have special IO access instructions; all IO is memory
+ * mapped. Note that these are defined to perform little endian accesses
+ * only. Their primary purpose is to access PCI and ISA peripherals.
+ *
+ * Note that for a big endian machine, this implies that the following
+ * big endian mode connectivity is in place.
+ *
+ * The machine specific io.h include defines __io to translate an "IO"
+ * address to a memory address.
+ *
+ * Note that we prevent GCC re-ordering or caching values in expressions
+ * by introducing sequence points into the in*() definitions.  Note that
+ * __raw_* do not guarantee this behaviour.
+ *
+ * The {in,out}[bwl] macros are for emulating x86-style PCI/ISA IO space.
+ */
+#define outb(v, p)		__raw_writeb(v, __io(p))
+#define outw(v, p)		__raw_writew(cpu_to_le16(v), __io(p))
+#define outl(v, p)		__raw_writel(cpu_to_le32(v), __io(p))
+
+#define inb(p)			__raw_readb(__io(p))
+#define inw(p)			le16_to_cpu(__raw_readw(__io(p)))
+#define inl(p)			le32_to_cpu(__raw_readl(__io(p)))
+
+static inline void __outsb(unsigned long port, void *addr, unsigned int count)
+{
+	while (count--) {
+		outb(*(u8 *)addr, port);
+		addr++;
+	}
+}
+
+static inline void __insb(unsigned long port, void *addr, unsigned int count)
+{
+	while (count--) {
+		*(u8 *)addr = inb(port);
+		addr++;
+	}
+}
+
+static inline void __outsw(unsigned long port, void *addr, unsigned int count)
+{
+	while (count--) {
+		outw(*(u16 *)addr, port);
+		addr += 2;
+	}
+}
+
+static inline void __insw(unsigned long port, void *addr, unsigned int count)
+{
+	while (count--) {
+		*(u16 *)addr = inw(port);
+		addr += 2;
+	}
+}
+
+static inline void __outsl(unsigned long port, void *addr, unsigned int count)
+{
+	while (count--) {
+		outl(*(u32 *)addr, port);
+		addr += 4;
+	}
+}
+
+static inline void __insl(unsigned long port, void *addr, unsigned int count)
+{
+	while (count--) {
+		*(u32 *)addr = inl(port);
+		addr += 4;
+	}
+}
+
+#define outsb(port, addr, count)	__outsb(port, addr, count)
+#define insb(port, addr, count)		__insb(port, addr, count)
+#define outsw(port, addr, count)	__outsw(port, addr, count)
+#define insw(port, addr, count)		__insw(port, addr, count)
+#define outsl(port, addr, count)	__outsl(port, addr, count)
+#define insl(port, addr, count)		__insl(port, addr, count)
+
+extern void __iomem *__ioremap(unsigned long offset, size_t size,
+			       unsigned long flags);
+extern void __iounmap(void __iomem *addr);
+
+/*
+ * ioremap	-   map bus memory into CPU space
+ * @offset	bus address of the memory
+ * @size	size of the resource to map
+ *
+ * ioremap performs a platform specific sequence of operations to make
+ * bus memory CPU accessible via the readb/.../writel functions and
+ * the other mmio helpers. The returned address is not guaranteed to
+ * be usable directly as a virtual address.
+ */
+#define ioremap(offset, size)			\
+	__ioremap((offset), (size), 0)
+
+#define iounmap(addr)				\
+	__iounmap(addr)
+
+#define cached(addr) P1SEGADDR(addr)
+#define uncached(addr) P2SEGADDR(addr)
+
+#define virt_to_bus virt_to_phys
+#define bus_to_virt phys_to_virt
+#define page_to_bus page_to_phys
+#define bus_to_page phys_to_page
+
+#define dma_cache_wback_inv(_start, _size)	\
+	flush_dcache_region(_start, _size)
+#define dma_cache_inv(_start, _size)		\
+	invalidate_dcache_region(_start, _size)
+#define dma_cache_wback(_start, _size)		\
+	clean_dcache_region(_start, _size)
+
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)    __va(p)
+
+/*
+ * Convert a virtual cached pointer to an uncached pointer
+ */
+#define xlate_dev_kmem_ptr(p)   p
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASM_AVR32_IO_H */
diff --git a/include/asm-avr32/ioctl.h b/include/asm-avr32/ioctl.h
new file mode 100644
index 000000000000..c8472c1398ef
--- /dev/null
+++ b/include/asm-avr32/ioctl.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_IOCTL_H
+#define __ASM_AVR32_IOCTL_H
+
+#include <asm-generic/ioctl.h>
+
+#endif /* __ASM_AVR32_IOCTL_H */
diff --git a/include/asm-avr32/ioctls.h b/include/asm-avr32/ioctls.h
new file mode 100644
index 000000000000..0500426b7186
--- /dev/null
+++ b/include/asm-avr32/ioctls.h
@@ -0,0 +1,83 @@
+#ifndef __ASM_AVR32_IOCTLS_H
+#define __ASM_AVR32_IOCTLS_H
+
+#include <asm/ioctl.h>
+
+/* 0x54 is just a magic number to make these relatively unique ('T') */
+
+#define TCGETS		0x5401
+#define TCSETS		0x5402 /* Clashes with SNDCTL_TMR_START sound ioctl */
+#define TCSETSW		0x5403
+#define TCSETSF		0x5404
+#define TCGETA		0x5405
+#define TCSETA		0x5406
+#define TCSETAW		0x5407
+#define TCSETAF		0x5408
+#define TCSBRK		0x5409
+#define TCXONC		0x540A
+#define TCFLSH		0x540B
+#define TIOCEXCL	0x540C
+#define TIOCNXCL	0x540D
+#define TIOCSCTTY	0x540E
+#define TIOCGPGRP	0x540F
+#define TIOCSPGRP	0x5410
+#define TIOCOUTQ	0x5411
+#define TIOCSTI		0x5412
+#define TIOCGWINSZ	0x5413
+#define TIOCSWINSZ	0x5414
+#define TIOCMGET	0x5415
+#define TIOCMBIS	0x5416
+#define TIOCMBIC	0x5417
+#define TIOCMSET	0x5418
+#define TIOCGSOFTCAR	0x5419
+#define TIOCSSOFTCAR	0x541A
+#define FIONREAD	0x541B
+#define TIOCINQ		FIONREAD
+#define TIOCLINUX	0x541C
+#define TIOCCONS	0x541D
+#define TIOCGSERIAL	0x541E
+#define TIOCSSERIAL	0x541F
+#define TIOCPKT		0x5420
+#define FIONBIO		0x5421
+#define TIOCNOTTY	0x5422
+#define TIOCSETD	0x5423
+#define TIOCGETD	0x5424
+#define TCSBRKP		0x5425	/* Needed for POSIX tcsendbreak() */
+/* #define TIOCTTYGSTRUCT 0x5426 - Former debugging-only ioctl */
+#define TIOCSBRK	0x5427  /* BSD compatibility */
+#define TIOCCBRK	0x5428  /* BSD compatibility */
+#define TIOCGSID	0x5429  /* Return the session ID of FD */
+#define TIOCGPTN	_IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
+#define TIOCSPTLCK	_IOW('T',0x31, int)  /* Lock/unlock Pty */
+
+#define FIONCLEX	0x5450
+#define FIOCLEX		0x5451
+#define FIOASYNC	0x5452
+#define TIOCSERCONFIG	0x5453
+#define TIOCSERGWILD	0x5454
+#define TIOCSERSWILD	0x5455
+#define TIOCGLCKTRMIOS	0x5456
+#define TIOCSLCKTRMIOS	0x5457
+#define TIOCSERGSTRUCT	0x5458 /* For debugging only */
+#define TIOCSERGETLSR   0x5459 /* Get line status register */
+#define TIOCSERGETMULTI 0x545A /* Get multiport config  */
+#define TIOCSERSETMULTI 0x545B /* Set multiport config */
+
+#define TIOCMIWAIT	0x545C	/* wait for a change on serial input line(s) */
+#define TIOCGICOUNT	0x545D	/* read serial port inline interrupt counts */
+#define TIOCGHAYESESP   0x545E  /* Get Hayes ESP configuration */
+#define TIOCSHAYESESP   0x545F  /* Set Hayes ESP configuration */
+#define FIOQSIZE	0x5460
+
+/* Used for packet mode */
+#define TIOCPKT_DATA		 0
+#define TIOCPKT_FLUSHREAD	 1
+#define TIOCPKT_FLUSHWRITE	 2
+#define TIOCPKT_STOP		 4
+#define TIOCPKT_START		 8
+#define TIOCPKT_NOSTOP		16
+#define TIOCPKT_DOSTOP		32
+
+#define TIOCSER_TEMT    0x01	/* Transmitter physically empty */
+
+#endif /* __ASM_AVR32_IOCTLS_H */
diff --git a/include/asm-avr32/ipcbuf.h b/include/asm-avr32/ipcbuf.h
new file mode 100644
index 000000000000..1552c9698f5e
--- /dev/null
+++ b/include/asm-avr32/ipcbuf.h
@@ -0,0 +1,29 @@
+#ifndef __ASM_AVR32_IPCBUF_H
+#define __ASM_AVR32_IPCBUF_H
+
+/*
+* The user_ipc_perm structure for AVR32 architecture.
+* Note extra padding because this structure is passed back and forth
+* between kernel and user space.
+*
+* Pad space is left for:
+* - 32-bit mode_t and seq
+* - 2 miscellaneous 32-bit values
+*/
+
+struct ipc64_perm
+{
+        __kernel_key_t          key;
+        __kernel_uid32_t        uid;
+        __kernel_gid32_t        gid;
+        __kernel_uid32_t        cuid;
+        __kernel_gid32_t        cgid;
+        __kernel_mode_t         mode;
+        unsigned short          __pad1;
+        unsigned short          seq;
+        unsigned short          __pad2;
+        unsigned long           __unused1;
+        unsigned long           __unused2;
+};
+
+#endif /* __ASM_AVR32_IPCBUF_H */
diff --git a/include/asm-avr32/irq.h b/include/asm-avr32/irq.h
new file mode 100644
index 000000000000..f7e725707dd7
--- /dev/null
+++ b/include/asm-avr32/irq.h
@@ -0,0 +1,10 @@
+#ifndef __ASM_AVR32_IRQ_H
+#define __ASM_AVR32_IRQ_H
+
+#define NR_INTERNAL_IRQS	64
+#define NR_EXTERNAL_IRQS	64
+#define NR_IRQS			(NR_INTERNAL_IRQS + NR_EXTERNAL_IRQS)
+
+#define irq_canonicalize(i)	(i)
+
+#endif /* __ASM_AVR32_IOCTLS_H */
diff --git a/include/asm-avr32/irqflags.h b/include/asm-avr32/irqflags.h
new file mode 100644
index 000000000000..93570daac38a
--- /dev/null
+++ b/include/asm-avr32/irqflags.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_IRQFLAGS_H
+#define __ASM_AVR32_IRQFLAGS_H
+
+#include <asm/sysreg.h>
+
+static inline unsigned long __raw_local_save_flags(void)
+{
+	return sysreg_read(SR);
+}
+
+#define raw_local_save_flags(x)					\
+	do { (x) = __raw_local_save_flags(); } while (0)
+
+/*
+ * This will restore ALL status register flags, not only the interrupt
+ * mask flag.
+ *
+ * The empty asm statement informs the compiler of this fact while
+ * also serving as a barrier.
+ */
+static inline void raw_local_irq_restore(unsigned long flags)
+{
+	sysreg_write(SR, flags);
+	asm volatile("" : : : "memory", "cc");
+}
+
+static inline void raw_local_irq_disable(void)
+{
+	asm volatile("ssrf %0" : : "n"(SYSREG_GM_OFFSET) : "memory");
+}
+
+static inline void raw_local_irq_enable(void)
+{
+	asm volatile("csrf %0" : : "n"(SYSREG_GM_OFFSET) : "memory");
+}
+
+static inline int raw_irqs_disabled_flags(unsigned long flags)
+{
+	return (flags & SYSREG_BIT(GM)) != 0;
+}
+
+static inline int raw_irqs_disabled(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	return raw_irqs_disabled_flags(flags);
+}
+
+static inline unsigned long __raw_local_irq_save(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	raw_local_irq_disable();
+
+	return flags;
+}
+
+#define raw_local_irq_save(flags)				\
+	do { (flags) = __raw_local_irq_save(); } while (0)
+
+#endif /* __ASM_AVR32_IRQFLAGS_H */
diff --git a/include/asm-avr32/kdebug.h b/include/asm-avr32/kdebug.h
new file mode 100644
index 000000000000..f583b643ffb2
--- /dev/null
+++ b/include/asm-avr32/kdebug.h
@@ -0,0 +1,38 @@
+#ifndef __ASM_AVR32_KDEBUG_H
+#define __ASM_AVR32_KDEBUG_H
+
+#include <linux/notifier.h>
+
+struct pt_regs;
+
+struct die_args {
+	struct pt_regs *regs;
+	int trapnr;
+};
+
+int register_die_notifier(struct notifier_block *nb);
+int unregister_die_notifier(struct notifier_block *nb);
+int register_page_fault_notifier(struct notifier_block *nb);
+int unregister_page_fault_notifier(struct notifier_block *nb);
+extern struct atomic_notifier_head avr32_die_chain;
+
+/* Grossly misnamed. */
+enum die_val {
+	DIE_FAULT,
+	DIE_BREAKPOINT,
+	DIE_SSTEP,
+	DIE_PAGE_FAULT,
+};
+
+static inline int notify_die(enum die_val val, struct pt_regs *regs,
+			     int trap, int sig)
+{
+	struct die_args args = {
+		.regs = regs,
+		.trapnr = trap,
+	};
+
+	return atomic_notifier_call_chain(&avr32_die_chain, val, &args);
+}
+
+#endif /* __ASM_AVR32_KDEBUG_H */
diff --git a/include/asm-avr32/kmap_types.h b/include/asm-avr32/kmap_types.h
new file mode 100644
index 000000000000..b7f5c6870107
--- /dev/null
+++ b/include/asm-avr32/kmap_types.h
@@ -0,0 +1,30 @@
+#ifndef __ASM_AVR32_KMAP_TYPES_H
+#define __ASM_AVR32_KMAP_TYPES_H
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+# define D(n) __KM_FENCE_##n ,
+#else
+# define D(n)
+#endif
+
+enum km_type {
+D(0)	KM_BOUNCE_READ,
+D(1)	KM_SKB_SUNRPC_DATA,
+D(2)	KM_SKB_DATA_SOFTIRQ,
+D(3)	KM_USER0,
+D(4)	KM_USER1,
+D(5)	KM_BIO_SRC_IRQ,
+D(6)	KM_BIO_DST_IRQ,
+D(7)	KM_PTE0,
+D(8)	KM_PTE1,
+D(9)	KM_PTE2,
+D(10)	KM_IRQ0,
+D(11)	KM_IRQ1,
+D(12)	KM_SOFTIRQ0,
+D(13)	KM_SOFTIRQ1,
+D(14)	KM_TYPE_NR
+};
+
+#undef D
+
+#endif /* __ASM_AVR32_KMAP_TYPES_H */
diff --git a/include/asm-avr32/kprobes.h b/include/asm-avr32/kprobes.h
new file mode 100644
index 000000000000..09a5cbe2f896
--- /dev/null
+++ b/include/asm-avr32/kprobes.h
@@ -0,0 +1,34 @@
+/*
+ * Kernel Probes (KProbes)
+ *
+ * Copyright (C) 2005-2006 Atmel Corporation
+ * Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_KPROBES_H
+#define __ASM_AVR32_KPROBES_H
+
+#include <linux/types.h>
+
+typedef u16	kprobe_opcode_t;
+#define BREAKPOINT_INSTRUCTION	0xd673	/* breakpoint */
+#define MAX_INSN_SIZE		2
+
+#define ARCH_INACTIVE_KPROBE_COUNT 1
+
+#define arch_remove_kprobe(p)	do { } while (0)
+
+/* Architecture specific copy of original instruction */
+struct arch_specific_insn {
+	kprobe_opcode_t	insn[MAX_INSN_SIZE];
+};
+
+extern int kprobe_exceptions_notify(struct notifier_block *self,
+				    unsigned long val, void *data);
+
+#define flush_insn_slot(p)	do { } while (0)
+
+#endif /* __ASM_AVR32_KPROBES_H */
diff --git a/include/asm-avr32/linkage.h b/include/asm-avr32/linkage.h
new file mode 100644
index 000000000000..f7b285e910d4
--- /dev/null
+++ b/include/asm-avr32/linkage.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_LINKAGE_H
+#define __ASM_LINKAGE_H
+
+#define __ALIGN .balign 2
+#define __ALIGN_STR ".balign 2"
+
+#endif /* __ASM_LINKAGE_H */
diff --git a/include/asm-avr32/local.h b/include/asm-avr32/local.h
new file mode 100644
index 000000000000..1c1619694da3
--- /dev/null
+++ b/include/asm-avr32/local.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_LOCAL_H
+#define __ASM_AVR32_LOCAL_H
+
+#include <asm-generic/local.h>
+
+#endif /* __ASM_AVR32_LOCAL_H */
diff --git a/include/asm-avr32/mach/serial_at91.h b/include/asm-avr32/mach/serial_at91.h
new file mode 100644
index 000000000000..1290bb32802d
--- /dev/null
+++ b/include/asm-avr32/mach/serial_at91.h
@@ -0,0 +1,33 @@
+/*
+ *  linux/include/asm-arm/mach/serial_at91.h
+ *
+ *  Based on serial_sa1100.h  by Nicolas Pitre
+ *
+ *  Copyright (C) 2002 ATMEL Rousset
+ *
+ *  Low level machine dependent UART functions.
+ */
+
+struct uart_port;
+
+/*
+ * This is a temporary structure for registering these
+ * functions; it is intended to be discarded after boot.
+ */
+struct at91_port_fns {
+	void	(*set_mctrl)(struct uart_port *, u_int);
+	u_int	(*get_mctrl)(struct uart_port *);
+	void	(*enable_ms)(struct uart_port *);
+	void	(*pm)(struct uart_port *, u_int, u_int);
+	int	(*set_wake)(struct uart_port *, u_int);
+	int	(*open)(struct uart_port *);
+	void	(*close)(struct uart_port *);
+};
+
+#if defined(CONFIG_SERIAL_AT91)
+void at91_register_uart_fns(struct at91_port_fns *fns);
+#else
+#define at91_register_uart_fns(fns) do { } while (0)
+#endif
+
+
diff --git a/include/asm-avr32/mman.h b/include/asm-avr32/mman.h
new file mode 100644
index 000000000000..648f91e7187a
--- /dev/null
+++ b/include/asm-avr32/mman.h
@@ -0,0 +1,17 @@
+#ifndef __ASM_AVR32_MMAN_H__
+#define __ASM_AVR32_MMAN_H__
+
+#include <asm-generic/mman.h>
+
+#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
+#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
+#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
+#define MAP_LOCKED	0x2000		/* pages are locked */
+#define MAP_NORESERVE	0x4000		/* don't check for reservations */
+#define MAP_POPULATE	0x8000		/* populate (prefault) page tables */
+#define MAP_NONBLOCK	0x10000		/* do not block on IO */
+
+#define MCL_CURRENT	1		/* lock all current mappings */
+#define MCL_FUTURE	2		/* lock all future mappings */
+
+#endif /* __ASM_AVR32_MMAN_H__ */
diff --git a/include/asm-avr32/mmu.h b/include/asm-avr32/mmu.h
new file mode 100644
index 000000000000..60c2d2650d32
--- /dev/null
+++ b/include/asm-avr32/mmu.h
@@ -0,0 +1,10 @@
+#ifndef __ASM_AVR32_MMU_H
+#define __ASM_AVR32_MMU_H
+
+/* Default "unsigned long" context */
+typedef unsigned long mm_context_t;
+
+#define MMU_ITLB_ENTRIES	64
+#define MMU_DTLB_ENTRIES	64
+
+#endif /* __ASM_AVR32_MMU_H */
diff --git a/include/asm-avr32/mmu_context.h b/include/asm-avr32/mmu_context.h
new file mode 100644
index 000000000000..31add1ae8089
--- /dev/null
+++ b/include/asm-avr32/mmu_context.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * ASID handling taken from SH implementation.
+ *   Copyright (C) 1999 Niibe Yutaka
+ *   Copyright (C) 2003 Paul Mundt
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_MMU_CONTEXT_H
+#define __ASM_AVR32_MMU_CONTEXT_H
+
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+#include <asm/sysreg.h>
+
+/*
+ * The MMU "context" consists of two things:
+ *    (a) TLB cache version
+ *    (b) ASID (Address Space IDentifier)
+ */
+#define MMU_CONTEXT_ASID_MASK		0x000000ff
+#define MMU_CONTEXT_VERSION_MASK	0xffffff00
+#define MMU_CONTEXT_FIRST_VERSION       0x00000100
+#define NO_CONTEXT			0
+
+#define MMU_NO_ASID			0x100
+
+/* Virtual Page Number mask */
+#define MMU_VPN_MASK	0xfffff000
+
+/* Cache of MMU context last used */
+extern unsigned long mmu_context_cache;
+
+/*
+ * Get MMU context if needed
+ */
+static inline void
+get_mmu_context(struct mm_struct *mm)
+{
+	unsigned long mc = mmu_context_cache;
+
+	if (((mm->context ^ mc) & MMU_CONTEXT_VERSION_MASK) == 0)
+		/* It's up to date, do nothing */
+		return;
+
+	/* It's old, we need to get new context with new version */
+	mc = ++mmu_context_cache;
+	if (!(mc & MMU_CONTEXT_ASID_MASK)) {
+		/*
+		 * We have exhausted all ASIDs of this version.
+		 * Flush the TLB and start new cycle.
+		 */
+		flush_tlb_all();
+		/*
+		 * Fix version. Note that we avoid version #0
+		 * to distinguish NO_CONTEXT.
+		 */
+		if (!mc)
+			mmu_context_cache = mc = MMU_CONTEXT_FIRST_VERSION;
+	}
+	mm->context = mc;
+}
+
+/*
+ * Initialize the context related info for a new mm_struct
+ * instance.
+ */
+static inline int init_new_context(struct task_struct *tsk,
+				       struct mm_struct *mm)
+{
+	mm->context = NO_CONTEXT;
+	return 0;
+}
+
+/*
+ * Destroy context related info for an mm_struct that is about
+ * to be put to rest.
+ */
+static inline void destroy_context(struct mm_struct *mm)
+{
+	/* Do nothing */
+}
+
+static inline void set_asid(unsigned long asid)
+{
+	/* XXX: We're destroying TLBEHI[8:31] */
+	sysreg_write(TLBEHI, asid & MMU_CONTEXT_ASID_MASK);
+	cpu_sync_pipeline();
+}
+
+static inline unsigned long get_asid(void)
+{
+	unsigned long asid;
+
+	asid = sysreg_read(TLBEHI);
+	return asid & MMU_CONTEXT_ASID_MASK;
+}
+
+static inline void activate_context(struct mm_struct *mm)
+{
+	get_mmu_context(mm);
+	set_asid(mm->context & MMU_CONTEXT_ASID_MASK);
+}
+
+static inline void switch_mm(struct mm_struct *prev,
+				 struct mm_struct *next,
+				 struct task_struct *tsk)
+{
+	if (likely(prev != next)) {
+		unsigned long __pgdir = (unsigned long)next->pgd;
+
+		sysreg_write(PTBR, __pgdir);
+		activate_context(next);
+	}
+}
+
+#define deactivate_mm(tsk,mm) do { } while(0)
+
+#define activate_mm(prev, next) switch_mm((prev), (next), NULL)
+
+static inline void
+enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+}
+
+
+static inline void enable_mmu(void)
+{
+	sysreg_write(MMUCR, (SYSREG_BIT(MMUCR_S)
+			     | SYSREG_BIT(E)
+			     | SYSREG_BIT(MMUCR_I)));
+	nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
+
+	if (mmu_context_cache == NO_CONTEXT)
+		mmu_context_cache = MMU_CONTEXT_FIRST_VERSION;
+
+	set_asid(mmu_context_cache & MMU_CONTEXT_ASID_MASK);
+}
+
+static inline void disable_mmu(void)
+{
+	sysreg_write(MMUCR, SYSREG_BIT(MMUCR_S));
+}
+
+#endif /* __ASM_AVR32_MMU_CONTEXT_H */
diff --git a/include/asm-avr32/module.h b/include/asm-avr32/module.h
new file mode 100644
index 000000000000..451444538a1b
--- /dev/null
+++ b/include/asm-avr32/module.h
@@ -0,0 +1,28 @@
+#ifndef __ASM_AVR32_MODULE_H
+#define __ASM_AVR32_MODULE_H
+
+struct mod_arch_syminfo {
+	unsigned long got_offset;
+	int got_initialized;
+};
+
+struct mod_arch_specific {
+	/* Starting offset of got in the module core memory. */
+	unsigned long got_offset;
+	/* Size of the got. */
+	unsigned long got_size;
+	/* Number of symbols in syminfo. */
+	int nsyms;
+	/* Additional symbol information (got offsets). */
+	struct mod_arch_syminfo *syminfo;
+};
+
+#define Elf_Shdr		Elf32_Shdr
+#define Elf_Sym			Elf32_Sym
+#define Elf_Ehdr		Elf32_Ehdr
+
+#define MODULE_PROC_FAMILY "AVR32v1"
+
+#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY
+
+#endif /* __ASM_AVR32_MODULE_H */
diff --git a/include/asm-avr32/msgbuf.h b/include/asm-avr32/msgbuf.h
new file mode 100644
index 000000000000..ac18bc4da7f7
--- /dev/null
+++ b/include/asm-avr32/msgbuf.h
@@ -0,0 +1,31 @@
+#ifndef __ASM_AVR32_MSGBUF_H
+#define __ASM_AVR32_MSGBUF_H
+
+/*
+ * The msqid64_ds structure for i386 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct msqid64_ds {
+	struct ipc64_perm msg_perm;
+	__kernel_time_t msg_stime;	/* last msgsnd time */
+	unsigned long	__unused1;
+	__kernel_time_t msg_rtime;	/* last msgrcv time */
+	unsigned long	__unused2;
+	__kernel_time_t msg_ctime;	/* last change time */
+	unsigned long	__unused3;
+	unsigned long  msg_cbytes;	/* current number of bytes on queue */
+	unsigned long  msg_qnum;	/* number of messages in queue */
+	unsigned long  msg_qbytes;	/* max number of bytes on queue */
+	__kernel_pid_t msg_lspid;	/* pid of last msgsnd */
+	__kernel_pid_t msg_lrpid;	/* last receive pid */
+	unsigned long  __unused4;
+	unsigned long  __unused5;
+};
+
+#endif /* __ASM_AVR32_MSGBUF_H */
diff --git a/include/asm-avr32/mutex.h b/include/asm-avr32/mutex.h
new file mode 100644
index 000000000000..458c1f7fbc18
--- /dev/null
+++ b/include/asm-avr32/mutex.h
@@ -0,0 +1,9 @@
+/*
+ * Pull in the generic implementation for the mutex fastpath.
+ *
+ * TODO: implement optimized primitives instead, or leave the generic
+ * implementation in place, or pick the atomic_xchg() based generic
+ * implementation. (see asm-generic/mutex-xchg.h for details)
+ */
+
+#include <asm-generic/mutex-dec.h>
diff --git a/include/asm-avr32/namei.h b/include/asm-avr32/namei.h
new file mode 100644
index 000000000000..f0a26de06cab
--- /dev/null
+++ b/include/asm-avr32/namei.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_AVR32_NAMEI_H
+#define __ASM_AVR32_NAMEI_H
+
+/* This dummy routine may be changed to something useful */
+#define __emul_prefix() NULL
+
+#endif /* __ASM_AVR32_NAMEI_H */
diff --git a/include/asm-avr32/numnodes.h b/include/asm-avr32/numnodes.h
new file mode 100644
index 000000000000..0b864d7ce330
--- /dev/null
+++ b/include/asm-avr32/numnodes.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_AVR32_NUMNODES_H
+#define __ASM_AVR32_NUMNODES_H
+
+/* Max 4 nodes */
+#define NODES_SHIFT	2
+
+#endif /* __ASM_AVR32_NUMNODES_H */
diff --git a/include/asm-avr32/ocd.h b/include/asm-avr32/ocd.h
new file mode 100644
index 000000000000..46f73180a127
--- /dev/null
+++ b/include/asm-avr32/ocd.h
@@ -0,0 +1,78 @@
+/*
+ * AVR32 OCD Registers
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_OCD_H
+#define __ASM_AVR32_OCD_H
+
+/* Debug Registers */
+#define DBGREG_DID		  0
+#define DBGREG_DC		  8
+#define DBGREG_DS		 16
+#define DBGREG_RWCS		 28
+#define DBGREG_RWA		 36
+#define DBGREG_RWD		 40
+#define DBGREG_WT		 44
+#define DBGREG_DTC		 52
+#define DBGREG_DTSA0		 56
+#define DBGREG_DTSA1		 60
+#define DBGREG_DTEA0		 72
+#define DBGREG_DTEA1		 76
+#define DBGREG_BWC0A		 88
+#define DBGREG_BWC0B		 92
+#define DBGREG_BWC1A		 96
+#define DBGREG_BWC1B		100
+#define DBGREG_BWC2A		104
+#define DBGREG_BWC2B		108
+#define DBGREG_BWC3A		112
+#define DBGREG_BWC3B		116
+#define DBGREG_BWA0A		120
+#define DBGREG_BWA0B		124
+#define DBGREG_BWA1A		128
+#define DBGREG_BWA1B		132
+#define DBGREG_BWA2A		136
+#define DBGREG_BWA2B		140
+#define DBGREG_BWA3A		144
+#define DBGREG_BWA3B		148
+#define DBGREG_BWD3A		153
+#define DBGREG_BWD3B		156
+
+#define DBGREG_PID		284
+
+#define SABAH_OCD		0x01
+#define SABAH_ICACHE		0x02
+#define SABAH_MEM_CACHED	0x04
+#define SABAH_MEM_UNCACHED	0x05
+
+/* Fields in the Development Control register */
+#define DC_SS_BIT		8
+
+#define DC_SS			(1 <<  DC_SS_BIT)
+#define DC_DBE			(1 << 13)
+#define DC_RID			(1 << 27)
+#define DC_ORP			(1 << 28)
+#define DC_MM			(1 << 29)
+#define DC_RES			(1 << 30)
+
+/* Fields in the Development Status register */
+#define DS_SSS			(1 <<  0)
+#define DS_SWB			(1 <<  1)
+#define DS_HWB			(1 <<  2)
+#define DS_BP_SHIFT		8
+#define DS_BP_MASK		(0xff << DS_BP_SHIFT)
+
+#define __mfdr(addr)							\
+({									\
+	register unsigned long value;					\
+	asm volatile("mfdr	%0, %1" : "=r"(value) : "i"(addr));	\
+	value;								\
+})
+#define __mtdr(addr, value)						\
+	asm volatile("mtdr	%0, %1" : : "i"(addr), "r"(value))
+
+#endif /* __ASM_AVR32_OCD_H */
diff --git a/include/asm-avr32/page.h b/include/asm-avr32/page.h
new file mode 100644
index 000000000000..0f630b3e9932
--- /dev/null
+++ b/include/asm-avr32/page.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_PAGE_H
+#define __ASM_AVR32_PAGE_H
+
+#ifdef __KERNEL__
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT	12
+#ifdef __ASSEMBLY__
+#define PAGE_SIZE	(1 << PAGE_SHIFT)
+#else
+#define PAGE_SIZE	(1UL << PAGE_SHIFT)
+#endif
+#define PAGE_MASK	(~(PAGE_SIZE-1))
+#define PTE_MASK	PAGE_MASK
+
+#ifndef __ASSEMBLY__
+
+#include <asm/addrspace.h>
+
+extern void clear_page(void *to);
+extern void copy_page(void *to, void *from);
+
+#define clear_user_page(page, vaddr, pg)	clear_page(page)
+#define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
+
+/*
+ * These are used to make use of C type-checking..
+ */
+typedef struct { unsigned long pte; } pte_t;
+typedef struct { unsigned long pgd; } pgd_t;
+typedef struct { unsigned long pgprot; } pgprot_t;
+
+#define pte_val(x)		((x).pte)
+#define pgd_val(x)		((x).pgd)
+#define pgprot_val(x)		((x).pgprot)
+
+#define __pte(x)		((pte_t) { (x) })
+#define __pgd(x)		((pgd_t) { (x) })
+#define __pgprot(x)		((pgprot_t) { (x) })
+
+/* FIXME: These should be removed soon */
+extern unsigned long memory_start, memory_end;
+
+/* Pure 2^n version of get_order */
+static inline int get_order(unsigned long size)
+{
+	unsigned lz;
+
+	size = (size - 1) >> PAGE_SHIFT;
+	asm("clz %0, %1" : "=r"(lz) : "r"(size));
+	return 32 - lz;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+/* Align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr)	(((addr) + PAGE_SIZE - 1) & PAGE_MASK)
+
+/*
+ * The hardware maps the virtual addresses 0x80000000 -> 0x9fffffff
+ * permanently to the physical addresses 0x00000000 -> 0x1fffffff when
+ * segmentation is enabled. We want to make use of this in order to
+ * minimize TLB pressure.
+ */
+#define PAGE_OFFSET		(0x80000000UL)
+
+/*
+ * ALSA uses virt_to_page() on DMA pages, which I'm not entirely sure
+ * is a good idea. Anyway, we can't simply subtract PAGE_OFFSET here
+ * in that case, so we'll have to mask out the three most significant
+ * bits of the address instead...
+ *
+ * What's the difference between __pa() and virt_to_phys() anyway?
+ */
+#define __pa(x)		PHYSADDR(x)
+#define __va(x)		((void *)(P1SEGADDR(x)))
+
+#define MAP_NR(addr)	(((unsigned long)(addr) - PAGE_OFFSET) >> PAGE_SHIFT)
+
+#define phys_to_page(phys)	(pfn_to_page(phys >> PAGE_SHIFT))
+#define page_to_phys(page)	(page_to_pfn(page) << PAGE_SHIFT)
+
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+
+#define PHYS_PFN_OFFSET		(CONFIG_PHYS_OFFSET >> PAGE_SHIFT)
+
+#define pfn_to_page(pfn)	(mem_map + ((pfn) - PHYS_PFN_OFFSET))
+#define page_to_pfn(page)	((unsigned long)((page) - mem_map) + PHYS_PFN_OFFSET)
+#define pfn_valid(pfn)		((pfn) >= PHYS_PFN_OFFSET && (pfn) < (PHYS_PFN_OFFSET + max_mapnr))
+#endif /* CONFIG_NEED_MULTIPLE_NODES */
+
+#define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+
+#define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE |	\
+				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+/*
+ * Memory above this physical address will be considered highmem.
+ */
+#define HIGHMEM_START		0x20000000UL
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASM_AVR32_PAGE_H */
diff --git a/include/asm-avr32/param.h b/include/asm-avr32/param.h
new file mode 100644
index 000000000000..34bc8d4c3b29
--- /dev/null
+++ b/include/asm-avr32/param.h
@@ -0,0 +1,23 @@
+#ifndef __ASM_AVR32_PARAM_H
+#define __ASM_AVR32_PARAM_H
+
+#ifdef __KERNEL__
+# define HZ		CONFIG_HZ
+# define USER_HZ	100		/* User interfaces are in "ticks" */
+# define CLOCKS_PER_SEC	(USER_HZ)	/* frequency at which times() counts */
+#endif
+
+#ifndef HZ
+# define HZ		100
+#endif
+
+/* TODO: Should be configurable */
+#define EXEC_PAGESIZE	4096
+
+#ifndef NOGROUP
+# define NOGROUP	(-1)
+#endif
+
+#define MAXHOSTNAMELEN	64
+
+#endif /* __ASM_AVR32_PARAM_H */
diff --git a/include/asm-avr32/pci.h b/include/asm-avr32/pci.h
new file mode 100644
index 000000000000..0f5f134b896a
--- /dev/null
+++ b/include/asm-avr32/pci.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_AVR32_PCI_H__
+#define __ASM_AVR32_PCI_H__
+
+/* We don't support PCI yet, but some drivers require this file anyway */
+
+#define PCI_DMA_BUS_IS_PHYS	(1)
+
+#endif /* __ASM_AVR32_PCI_H__ */
diff --git a/include/asm-avr32/percpu.h b/include/asm-avr32/percpu.h
new file mode 100644
index 000000000000..69227b4cd0d4
--- /dev/null
+++ b/include/asm-avr32/percpu.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_PERCPU_H
+#define __ASM_AVR32_PERCPU_H
+
+#include <asm-generic/percpu.h>
+
+#endif /* __ASM_AVR32_PERCPU_H */
diff --git a/include/asm-avr32/pgalloc.h b/include/asm-avr32/pgalloc.h
new file mode 100644
index 000000000000..7492cfb92ced
--- /dev/null
+++ b/include/asm-avr32/pgalloc.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_PGALLOC_H
+#define __ASM_AVR32_PGALLOC_H
+
+#include <asm/processor.h>
+#include <linux/threads.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#define pmd_populate_kernel(mm, pmd, pte) \
+	set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))
+
+static __inline__ void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
+				    struct page *pte)
+{
+	set_pmd(pmd, __pmd(_PAGE_TABLE + page_to_phys(pte)));
+}
+
+/*
+ * Allocate and free page tables
+ */
+static __inline__ pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	unsigned int pgd_size = (USER_PTRS_PER_PGD * sizeof(pgd_t));
+	pgd_t *pgd = (pgd_t *)kmalloc(pgd_size, GFP_KERNEL);
+
+	if (pgd)
+		memset(pgd, 0, pgd_size);
+
+	return pgd;
+}
+
+static inline void pgd_free(pgd_t *pgd)
+{
+	kfree(pgd);
+}
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+					  unsigned long address)
+{
+	int count = 0;
+	pte_t *pte;
+
+	do {
+		pte = (pte_t *) __get_free_page(GFP_KERNEL | __GFP_REPEAT);
+		if (pte)
+			clear_page(pte);
+		else {
+			current->state = TASK_UNINTERRUPTIBLE;
+			schedule_timeout(HZ);
+		}
+	} while (!pte && (count++ < 10));
+
+	return pte;
+}
+
+static inline struct page *pte_alloc_one(struct mm_struct *mm,
+					 unsigned long address)
+{
+	int count = 0;
+	struct page *pte;
+
+	do {
+		pte = alloc_pages(GFP_KERNEL, 0);
+		if (pte)
+			clear_page(page_address(pte));
+		else {
+			current->state = TASK_UNINTERRUPTIBLE;
+			schedule_timeout(HZ);
+		}
+	} while (!pte && (count++ < 10));
+
+	return pte;
+}
+
+static inline void pte_free_kernel(pte_t *pte)
+{
+	free_page((unsigned long)pte);
+}
+
+static inline void pte_free(struct page *pte)
+{
+	__free_page(pte);
+}
+
+#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+
+#define check_pgt_cache() do { } while(0)
+
+#endif /* __ASM_AVR32_PGALLOC_H */
diff --git a/include/asm-avr32/pgtable-2level.h b/include/asm-avr32/pgtable-2level.h
new file mode 100644
index 000000000000..425dd567b5b9
--- /dev/null
+++ b/include/asm-avr32/pgtable-2level.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_PGTABLE_2LEVEL_H
+#define __ASM_AVR32_PGTABLE_2LEVEL_H
+
+#include <asm-generic/pgtable-nopmd.h>
+
+/*
+ * Traditional 2-level paging structure
+ */
+#define PGDIR_SHIFT	22
+#define PTRS_PER_PGD	1024
+
+#define PTRS_PER_PTE	1024
+
+#ifndef __ASSEMBLY__
+#define pte_ERROR(e) \
+	printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+#define pgd_ERROR(e) \
+	printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/*
+ * Certain architectures need to do special things when PTEs
+ * within a page table are directly modified.  Thus, the following
+ * hook is made available.
+ */
+#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
+#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep, pteval)
+
+/*
+ * (pmds are folded into pgds so this doesn't get actually called,
+ * but the define is needed for a generic inline function.)
+ */
+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval)
+
+#define pte_pfn(x)		((unsigned long)(((x).pte >> PAGE_SHIFT)))
+#define pfn_pte(pfn, prot)	__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
+#define pfn_pmd(pfn, prot)	__pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_AVR32_PGTABLE_2LEVEL_H */
diff --git a/include/asm-avr32/pgtable.h b/include/asm-avr32/pgtable.h
new file mode 100644
index 000000000000..6b8ca9db2bd5
--- /dev/null
+++ b/include/asm-avr32/pgtable.h
@@ -0,0 +1,408 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_PGTABLE_H
+#define __ASM_AVR32_PGTABLE_H
+
+#include <asm/addrspace.h>
+
+#ifndef __ASSEMBLY__
+#include <linux/sched.h>
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * Use two-level page tables just as the i386 (without PAE)
+ */
+#include <asm/pgtable-2level.h>
+
+/*
+ * The following code might need some cleanup when the values are
+ * final...
+ */
+#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_MASK	(~(PMD_SIZE-1))
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+
+#define USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
+#define FIRST_USER_ADDRESS	0
+
+#define PTE_PHYS_MASK	0x1ffff000
+
+#ifndef __ASSEMBLY__
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+extern void paging_init(void);
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used for
+ * zero-mapped memory areas etc.
+ */
+extern struct page *empty_zero_page;
+#define ZERO_PAGE(vaddr) (empty_zero_page)
+
+/*
+ * Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 8 MiB value just means that there will be a 8 MiB "hole"
+ * after the uncached physical memory (P2 segment) until the vmalloc
+ * area starts. That means that any out-of-bounds memory accesses will
+ * hopefully be caught; we don't know if the end of the P1/P2 segments
+ * are actually used for anything, but it is anyway safer to let the
+ * MMU catch these kinds of errors than to rely on the memory bus.
+ *
+ * A "hole" of the same size is added to the end of the P3 segment as
+ * well. It might seem wasteful to use 16 MiB of virtual address space
+ * on this, but we do have 512 MiB of it...
+ *
+ * The vmalloc() routines leave a hole of 4 KiB between each vmalloced
+ * area for the same reason.
+ */
+#define VMALLOC_OFFSET	(8 * 1024 * 1024)
+#define VMALLOC_START	(P3SEG + VMALLOC_OFFSET)
+#define VMALLOC_END	(P4SEG - VMALLOC_OFFSET)
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * Page flags. Some of these flags are not directly supported by
+ * hardware, so we have to emulate them.
+ */
+#define _TLBEHI_BIT_VALID	9
+#define _TLBEHI_VALID		(1 << _TLBEHI_BIT_VALID)
+
+#define _PAGE_BIT_WT		0  /* W-bit   : write-through */
+#define _PAGE_BIT_DIRTY		1  /* D-bit   : page changed */
+#define _PAGE_BIT_SZ0		2  /* SZ0-bit : Size of page */
+#define _PAGE_BIT_SZ1		3  /* SZ1-bit : Size of page */
+#define _PAGE_BIT_EXECUTE	4  /* X-bit   : execute access allowed */
+#define _PAGE_BIT_RW		5  /* AP0-bit : write access allowed */
+#define _PAGE_BIT_USER		6  /* AP1-bit : user space access allowed */
+#define _PAGE_BIT_BUFFER	7  /* B-bit   : bufferable */
+#define _PAGE_BIT_GLOBAL	8  /* G-bit   : global (ignore ASID) */
+#define _PAGE_BIT_CACHABLE	9  /* C-bit   : cachable */
+
+/* If we drop support for 1K pages, we get two extra bits */
+#define _PAGE_BIT_PRESENT	10
+#define _PAGE_BIT_ACCESSED	11 /* software: page was accessed */
+
+/* The following flags are only valid when !PRESENT */
+#define _PAGE_BIT_FILE		0 /* software: pagecache or swap? */
+
+#define _PAGE_WT		(1 << _PAGE_BIT_WT)
+#define _PAGE_DIRTY		(1 << _PAGE_BIT_DIRTY)
+#define _PAGE_EXECUTE		(1 << _PAGE_BIT_EXECUTE)
+#define _PAGE_RW		(1 << _PAGE_BIT_RW)
+#define _PAGE_USER		(1 << _PAGE_BIT_USER)
+#define _PAGE_BUFFER		(1 << _PAGE_BIT_BUFFER)
+#define _PAGE_GLOBAL		(1 << _PAGE_BIT_GLOBAL)
+#define _PAGE_CACHABLE		(1 << _PAGE_BIT_CACHABLE)
+
+/* Software flags */
+#define _PAGE_ACCESSED		(1 << _PAGE_BIT_ACCESSED)
+#define _PAGE_PRESENT		(1 << _PAGE_BIT_PRESENT)
+#define _PAGE_FILE		(1 << _PAGE_BIT_FILE)
+
+/*
+ * Page types, i.e. sizes. _PAGE_TYPE_NONE corresponds to what is
+ * usually called _PAGE_PROTNONE on other architectures.
+ *
+ * XXX: Find out if _PAGE_PROTNONE is equivalent with !_PAGE_USER. If
+ * so, we can encode all possible page sizes (although we can't really
+ * support 1K pages anyway due to the _PAGE_PRESENT and _PAGE_ACCESSED
+ * bits)
+ *
+ */
+#define _PAGE_TYPE_MASK		((1 << _PAGE_BIT_SZ0) | (1 << _PAGE_BIT_SZ1))
+#define _PAGE_TYPE_NONE		(0 << _PAGE_BIT_SZ0)
+#define _PAGE_TYPE_SMALL	(1 << _PAGE_BIT_SZ0)
+#define _PAGE_TYPE_MEDIUM	(2 << _PAGE_BIT_SZ0)
+#define _PAGE_TYPE_LARGE	(3 << _PAGE_BIT_SZ0)
+
+/*
+ * Mask which drop software flags. We currently can't handle more than
+ * 512 MiB of physical memory, so we can use bits 29-31 for other
+ * stuff.  With a fixed 4K page size, we can use bits 10-11 as well as
+ * bits 2-3 (SZ)
+ */
+#define _PAGE_FLAGS_HARDWARE_MASK	0xfffff3ff
+
+#define _PAGE_FLAGS_CACHE_MASK	(_PAGE_CACHABLE | _PAGE_BUFFER | _PAGE_WT)
+
+/* TODO: Check for saneness */
+/* User-mode page table flags (to be set in a pgd or pmd entry) */
+#define _PAGE_TABLE		(_PAGE_PRESENT | _PAGE_TYPE_SMALL | _PAGE_RW \
+				 | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
+/* Kernel-mode page table flags */
+#define _KERNPG_TABLE		(_PAGE_PRESENT | _PAGE_TYPE_SMALL | _PAGE_RW \
+				 | _PAGE_ACCESSED | _PAGE_DIRTY)
+/* Flags that may be modified by software */
+#define _PAGE_CHG_MASK		(PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY \
+				 | _PAGE_FLAGS_CACHE_MASK)
+
+#define _PAGE_FLAGS_READ	(_PAGE_CACHABLE	| _PAGE_BUFFER)
+#define _PAGE_FLAGS_WRITE	(_PAGE_FLAGS_READ | _PAGE_RW | _PAGE_DIRTY)
+
+#define _PAGE_NORMAL(x)	__pgprot((x) | _PAGE_PRESENT | _PAGE_TYPE_SMALL	\
+				 | _PAGE_ACCESSED)
+
+#define PAGE_NONE	(_PAGE_ACCESSED | _PAGE_TYPE_NONE)
+#define PAGE_READ	(_PAGE_FLAGS_READ | _PAGE_USER)
+#define PAGE_EXEC	(_PAGE_FLAGS_READ | _PAGE_EXECUTE | _PAGE_USER)
+#define PAGE_WRITE	(_PAGE_FLAGS_WRITE | _PAGE_USER)
+#define PAGE_KERNEL	_PAGE_NORMAL(_PAGE_FLAGS_WRITE | _PAGE_EXECUTE | _PAGE_GLOBAL)
+#define PAGE_KERNEL_RO	_PAGE_NORMAL(_PAGE_FLAGS_READ | _PAGE_EXECUTE | _PAGE_GLOBAL)
+
+#define _PAGE_P(x)	_PAGE_NORMAL((x) & ~(_PAGE_RW | _PAGE_DIRTY))
+#define _PAGE_S(x)	_PAGE_NORMAL(x)
+
+#define PAGE_COPY	_PAGE_P(PAGE_WRITE | PAGE_READ)
+
+#ifndef __ASSEMBLY__
+/*
+ * The hardware supports flags for write- and execute access. Read is
+ * always allowed if the page is loaded into the TLB, so the "-w-",
+ * "--x" and "-wx" mappings are implemented as "rw-", "r-x" and "rwx",
+ * respectively.
+ *
+ * The "---" case is handled by software; the page will simply not be
+ * loaded into the TLB if the page type is _PAGE_TYPE_NONE.
+ */
+
+#define __P000	__pgprot(PAGE_NONE)
+#define __P001	_PAGE_P(PAGE_READ)
+#define __P010	_PAGE_P(PAGE_WRITE)
+#define __P011	_PAGE_P(PAGE_WRITE | PAGE_READ)
+#define __P100	_PAGE_P(PAGE_EXEC)
+#define __P101	_PAGE_P(PAGE_EXEC | PAGE_READ)
+#define __P110	_PAGE_P(PAGE_EXEC | PAGE_WRITE)
+#define __P111	_PAGE_P(PAGE_EXEC | PAGE_WRITE | PAGE_READ)
+
+#define __S000	__pgprot(PAGE_NONE)
+#define __S001	_PAGE_S(PAGE_READ)
+#define __S010	_PAGE_S(PAGE_WRITE)
+#define __S011	_PAGE_S(PAGE_WRITE | PAGE_READ)
+#define __S100	_PAGE_S(PAGE_EXEC)
+#define __S101	_PAGE_S(PAGE_EXEC | PAGE_READ)
+#define __S110	_PAGE_S(PAGE_EXEC | PAGE_WRITE)
+#define __S111	_PAGE_S(PAGE_EXEC | PAGE_WRITE | PAGE_READ)
+
+#define pte_none(x)	(!pte_val(x))
+#define pte_present(x)	(pte_val(x) & _PAGE_PRESENT)
+
+#define pte_clear(mm,addr,xp)					\
+	do {							\
+		set_pte_at(mm, addr, xp, __pte(0));		\
+	} while (0)
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+static inline int pte_read(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_USER;
+}
+static inline int pte_write(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_RW;
+}
+static inline int pte_exec(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_EXECUTE;
+}
+static inline int pte_dirty(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_DIRTY;
+}
+static inline int pte_young(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_ACCESSED;
+}
+
+/*
+ * The following only work if pte_present() is not true.
+ */
+static inline int pte_file(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_FILE;
+}
+
+/* Mutator functions for PTE bits */
+static inline pte_t pte_rdprotect(pte_t pte)
+{
+	set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER));
+	return pte;
+}
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_RW));
+	return pte;
+}
+static inline pte_t pte_exprotect(pte_t pte)
+{
+	set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_EXECUTE));
+	return pte;
+}
+static inline pte_t pte_mkclean(pte_t pte)
+{
+	set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_DIRTY));
+	return pte;
+}
+static inline pte_t pte_mkold(pte_t pte)
+{
+	set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_ACCESSED));
+	return pte;
+}
+static inline pte_t pte_mkread(pte_t pte)
+{
+	set_pte(&pte, __pte(pte_val(pte) | _PAGE_USER));
+	return pte;
+}
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+	set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW));
+	return pte;
+}
+static inline pte_t pte_mkexec(pte_t pte)
+{
+	set_pte(&pte, __pte(pte_val(pte) | _PAGE_EXECUTE));
+	return pte;
+}
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+	set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY));
+	return pte;
+}
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+	set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED));
+	return pte;
+}
+
+#define pmd_none(x)	(!pmd_val(x))
+#define pmd_present(x)	(pmd_val(x) & _PAGE_PRESENT)
+#define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
+#define	pmd_bad(x)	((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER))	\
+			 != _KERNPG_TABLE)
+
+/*
+ * Permanent address of a page. We don't support highmem, so this is
+ * trivial.
+ */
+#define pages_to_mb(x)	((x) >> (20-PAGE_SHIFT))
+#define pte_page(x) 	phys_to_page(pte_val(x) & PTE_PHYS_MASK)
+
+/*
+ * Mark the prot value as uncacheable and unbufferable
+ */
+#define pgprot_noncached(prot)						\
+	__pgprot(pgprot_val(prot) & ~(_PAGE_BUFFER | _PAGE_CACHABLE))
+
+/*
+ * Mark the prot value as uncacheable but bufferable
+ */
+#define pgprot_writecombine(prot)					\
+	__pgprot((pgprot_val(prot) & ~_PAGE_CACHABLE) | _PAGE_BUFFER)
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ *
+ * extern pte_t mk_pte(struct page *page, pgprot_t pgprot)
+ */
+#define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	set_pte(&pte, __pte((pte_val(pte) & _PAGE_CHG_MASK)
+			    | pgprot_val(newprot)));
+	return pte;
+}
+
+#define page_pte(page)	page_pte_prot(page, __pgprot(0))
+
+#define pmd_page_vaddr(pmd)					\
+	((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+
+#define pmd_page(pmd)	(phys_to_page(pmd_val(pmd)))
+
+/* to find an entry in a page-table-directory. */
+#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
+#define pgd_offset_current(address)				\
+	((pgd_t *)__mfsr(SYSREG_PTBR) + pgd_index(address))
+
+/* to find an entry in a kernel page-table-directory */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+/* Find an entry in the third-level page table.. */
+#define pte_index(address)				\
+	((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#define pte_offset(dir, address)					\
+	((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
+#define pte_offset_kernel(dir, address)					\
+	((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
+#define pte_offset_map(dir, address) pte_offset_kernel(dir, address)
+#define pte_offset_map_nested(dir, address) pte_offset_kernel(dir, address)
+#define pte_unmap(pte)		do { } while (0)
+#define pte_unmap_nested(pte)	do { } while (0)
+
+struct vm_area_struct;
+extern void update_mmu_cache(struct vm_area_struct * vma,
+			     unsigned long address, pte_t pte);
+
+/*
+ * Encode and decode a swap entry
+ *
+ * Constraints:
+ *   _PAGE_FILE at bit 0
+ *   _PAGE_TYPE_* at bits 2-3 (for emulating _PAGE_PROTNONE)
+ *   _PAGE_PRESENT at bit 10
+ *
+ * We encode the type into bits 4-9 and offset into bits 11-31. This
+ * gives us a 21 bits offset, or 2**21 * 4K = 8G usable swap space per
+ * device, and 64 possible types.
+ *
+ * NOTE: We should set ZEROs at the position of _PAGE_PRESENT
+ *       and _PAGE_PROTNONE bits
+ */
+#define __swp_type(x)		(((x).val >> 4) & 0x3f)
+#define __swp_offset(x)		((x).val >> 11)
+#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 4) | ((offset) << 11) })
+#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)	((pte_t) { (x).val })
+
+/*
+ * Encode and decode a nonlinear file mapping entry. We have to
+ * preserve _PAGE_FILE and _PAGE_PRESENT here. _PAGE_TYPE_* isn't
+ * necessary, since _PAGE_FILE implies !_PAGE_PROTNONE (?)
+ */
+#define PTE_FILE_MAX_BITS	30
+#define pte_to_pgoff(pte)	(((pte_val(pte) >> 1) & 0x1ff)		\
+				 | ((pte_val(pte) >> 11) << 9))
+#define pgoff_to_pte(off)	((pte_t) { ((((off) & 0x1ff) << 1)	\
+					    | (((off) >> 9) << 11)	\
+					    | _PAGE_FILE) })
+
+typedef pte_t *pte_addr_t;
+
+#define kern_addr_valid(addr)	(1)
+
+#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)	\
+	remap_pfn_range(vma, vaddr, pfn, size, prot)
+
+#define MK_IOSPACE_PFN(space, pfn)	(pfn)
+#define GET_IOSPACE(pfn)		0
+#define GET_PFN(pfn)			(pfn)
+
+/* No page table caches to initialize (?) */
+#define pgtable_cache_init()	do { } while(0)
+
+#include <asm-generic/pgtable.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_AVR32_PGTABLE_H */
diff --git a/include/asm-avr32/poll.h b/include/asm-avr32/poll.h
new file mode 100644
index 000000000000..736e29755dfc
--- /dev/null
+++ b/include/asm-avr32/poll.h
@@ -0,0 +1,27 @@
+#ifndef __ASM_AVR32_POLL_H
+#define __ASM_AVR32_POLL_H
+
+/* These are specified by iBCS2 */
+#define POLLIN		0x0001
+#define POLLPRI		0x0002
+#define POLLOUT		0x0004
+#define POLLERR		0x0008
+#define POLLHUP		0x0010
+#define POLLNVAL	0x0020
+
+/* The rest seem to be more-or-less nonstandard. Check them! */
+#define POLLRDNORM	0x0040
+#define POLLRDBAND	0x0080
+#define POLLWRNORM	0x0100
+#define POLLWRBAND	0x0200
+#define POLLMSG		0x0400
+#define POLLREMOVE	0x1000
+#define POLLRDHUP	0x2000
+
+struct pollfd {
+	int fd;
+	short events;
+	short revents;
+};
+
+#endif /* __ASM_AVR32_POLL_H */
diff --git a/include/asm-avr32/posix_types.h b/include/asm-avr32/posix_types.h
new file mode 100644
index 000000000000..2831b039b349
--- /dev/null
+++ b/include/asm-avr32/posix_types.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_POSIX_TYPES_H
+#define __ASM_AVR32_POSIX_TYPES_H
+
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc.  Also, we cannot
+ * assume GCC is being used.
+ */
+
+typedef unsigned long   __kernel_ino_t;
+typedef unsigned short  __kernel_mode_t;
+typedef unsigned short  __kernel_nlink_t;
+typedef long            __kernel_off_t;
+typedef int             __kernel_pid_t;
+typedef unsigned short  __kernel_ipc_pid_t;
+typedef unsigned int	__kernel_uid_t;
+typedef unsigned int	__kernel_gid_t;
+typedef unsigned long	__kernel_size_t;
+typedef int             __kernel_ssize_t;
+typedef int             __kernel_ptrdiff_t;
+typedef long            __kernel_time_t;
+typedef long            __kernel_suseconds_t;
+typedef long            __kernel_clock_t;
+typedef int             __kernel_timer_t;
+typedef int             __kernel_clockid_t;
+typedef int             __kernel_daddr_t;
+typedef char *          __kernel_caddr_t;
+typedef unsigned short  __kernel_uid16_t;
+typedef unsigned short  __kernel_gid16_t;
+typedef unsigned int    __kernel_uid32_t;
+typedef unsigned int    __kernel_gid32_t;
+
+typedef unsigned short  __kernel_old_uid_t;
+typedef unsigned short  __kernel_old_gid_t;
+typedef unsigned short  __kernel_old_dev_t;
+
+#ifdef __GNUC__
+typedef long long       __kernel_loff_t;
+#endif
+
+typedef struct {
+#if defined(__KERNEL__) || defined(__USE_ALL)
+    int     val[2];
+#else /* !defined(__KERNEL__) && !defined(__USE_ALL) */
+    int     __val[2];
+#endif /* !defined(__KERNEL__) && !defined(__USE_ALL) */
+} __kernel_fsid_t;
+
+#if defined(__KERNEL__)
+
+#undef  __FD_SET
+static __inline__ void __FD_SET(unsigned long __fd, __kernel_fd_set *__fdsetp)
+{
+    unsigned long __tmp = __fd / __NFDBITS;
+    unsigned long __rem = __fd % __NFDBITS;
+    __fdsetp->fds_bits[__tmp] |= (1UL<<__rem);
+}
+
+#undef  __FD_CLR
+static __inline__ void __FD_CLR(unsigned long __fd, __kernel_fd_set *__fdsetp)
+{
+    unsigned long __tmp = __fd / __NFDBITS;
+    unsigned long __rem = __fd % __NFDBITS;
+    __fdsetp->fds_bits[__tmp] &= ~(1UL<<__rem);
+}
+
+
+#undef  __FD_ISSET
+static __inline__ int __FD_ISSET(unsigned long __fd, const __kernel_fd_set *__p)
+{
+    unsigned long __tmp = __fd / __NFDBITS;
+    unsigned long __rem = __fd % __NFDBITS;
+    return (__p->fds_bits[__tmp] & (1UL<<__rem)) != 0;
+}
+
+/*
+ * This will unroll the loop for the normal constant case (8 ints,
+ * for a 256-bit fd_set)
+ */
+#undef  __FD_ZERO
+static __inline__ void __FD_ZERO(__kernel_fd_set *__p)
+{
+    unsigned long *__tmp = __p->fds_bits;
+    int __i;
+
+    if (__builtin_constant_p(__FDSET_LONGS)) {
+        switch (__FDSET_LONGS) {
+            case 16:
+                __tmp[ 0] = 0; __tmp[ 1] = 0;
+                __tmp[ 2] = 0; __tmp[ 3] = 0;
+                __tmp[ 4] = 0; __tmp[ 5] = 0;
+                __tmp[ 6] = 0; __tmp[ 7] = 0;
+                __tmp[ 8] = 0; __tmp[ 9] = 0;
+                __tmp[10] = 0; __tmp[11] = 0;
+                __tmp[12] = 0; __tmp[13] = 0;
+                __tmp[14] = 0; __tmp[15] = 0;
+                return;
+
+            case 8:
+                __tmp[ 0] = 0; __tmp[ 1] = 0;
+                __tmp[ 2] = 0; __tmp[ 3] = 0;
+                __tmp[ 4] = 0; __tmp[ 5] = 0;
+                __tmp[ 6] = 0; __tmp[ 7] = 0;
+                return;
+
+            case 4:
+                __tmp[ 0] = 0; __tmp[ 1] = 0;
+                __tmp[ 2] = 0; __tmp[ 3] = 0;
+                return;
+        }
+    }
+    __i = __FDSET_LONGS;
+    while (__i) {
+        __i--;
+        *__tmp = 0;
+        __tmp++;
+    }
+}
+
+#endif /* defined(__KERNEL__) */
+
+#endif /* __ASM_AVR32_POSIX_TYPES_H */
diff --git a/include/asm-avr32/processor.h b/include/asm-avr32/processor.h
new file mode 100644
index 000000000000..f6913778a45f
--- /dev/null
+++ b/include/asm-avr32/processor.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_PROCESSOR_H
+#define __ASM_AVR32_PROCESSOR_H
+
+#include <asm/page.h>
+#include <asm/cache.h>
+
+#define TASK_SIZE	0x80000000
+
+#ifndef __ASSEMBLY__
+
+static inline void *current_text_addr(void)
+{
+	register void *pc asm("pc");
+	return pc;
+}
+
+enum arch_type {
+	ARCH_AVR32A,
+	ARCH_AVR32B,
+	ARCH_MAX
+};
+
+enum cpu_type {
+	CPU_MORGAN,
+	CPU_AT32AP,
+	CPU_MAX
+};
+
+enum tlb_config {
+	TLB_NONE,
+	TLB_SPLIT,
+	TLB_UNIFIED,
+	TLB_INVALID
+};
+
+struct avr32_cpuinfo {
+	struct clk *clk;
+	unsigned long loops_per_jiffy;
+	enum arch_type arch_type;
+	enum cpu_type cpu_type;
+	unsigned short arch_revision;
+	unsigned short cpu_revision;
+	enum tlb_config tlb_config;
+
+	struct cache_info icache;
+	struct cache_info dcache;
+};
+
+extern struct avr32_cpuinfo boot_cpu_data;
+
+#ifdef CONFIG_SMP
+extern struct avr32_cpuinfo cpu_data[];
+#define current_cpu_data cpu_data[smp_processor_id()]
+#else
+#define cpu_data (&boot_cpu_data)
+#define current_cpu_data boot_cpu_data
+#endif
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's
+ */
+#define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 3))
+
+#define cpu_relax()		barrier()
+#define cpu_sync_pipeline()	asm volatile("sub pc, -2" : : : "memory")
+
+struct cpu_context {
+	unsigned long sr;
+	unsigned long pc;
+	unsigned long ksp;	/* Kernel stack pointer */
+	unsigned long r7;
+	unsigned long r6;
+	unsigned long r5;
+	unsigned long r4;
+	unsigned long r3;
+	unsigned long r2;
+	unsigned long r1;
+	unsigned long r0;
+};
+
+/* This struct contains the CPU context as stored by switch_to() */
+struct thread_struct {
+	struct cpu_context cpu_context;
+	unsigned long single_step_addr;
+	u16 single_step_insn;
+};
+
+#define INIT_THREAD {						\
+	.cpu_context = {					\
+		.ksp = sizeof(init_stack) + (long)&init_stack,	\
+	},							\
+}
+
+/*
+ * Do necessary setup to start up a newly executed thread.
+ */
+#define start_thread(regs, new_pc, new_sp)	 \
+	do {					 \
+		set_fs(USER_DS);		 \
+		memset(regs, 0, sizeof(*regs));	 \
+		regs->sr = MODE_USER;		 \
+		regs->pc = new_pc & ~1;		 \
+		regs->sp = new_sp;		 \
+	} while(0)
+
+struct task_struct;
+
+/* Free all resources held by a thread */
+extern void release_thread(struct task_struct *);
+
+/* Create a kernel thread without removing it from tasklists */
+extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+
+/* Prepare to copy thread state - unlazy all lazy status */
+#define prepare_to_copy(tsk) do { } while(0)
+
+/* Return saved PC of a blocked thread */
+#define thread_saved_pc(tsk)    ((tsk)->thread.cpu_context.pc)
+
+struct pt_regs;
+void show_trace(struct task_struct *task, unsigned long *stack,
+		struct pt_regs *regs);
+
+extern unsigned long get_wchan(struct task_struct *p);
+
+#define KSTK_EIP(tsk)	((tsk)->thread.cpu_context.pc)
+#define KSTK_ESP(tsk)	((tsk)->thread.cpu_context.ksp)
+
+#define ARCH_HAS_PREFETCH
+
+static inline void prefetch(const void *x)
+{
+	const char *c = x;
+	asm volatile("pref %0" : : "r"(c));
+}
+#define PREFETCH_STRIDE	L1_CACHE_BYTES
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_AVR32_PROCESSOR_H */
diff --git a/include/asm-avr32/ptrace.h b/include/asm-avr32/ptrace.h
new file mode 100644
index 000000000000..60f0f19a81f1
--- /dev/null
+++ b/include/asm-avr32/ptrace.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_PTRACE_H
+#define __ASM_AVR32_PTRACE_H
+
+#define PTRACE_GETREGS		12
+#define PTRACE_SETREGS		13
+
+/*
+ * Status Register bits
+ */
+#define SR_H		0x40000000
+#define SR_R		0x20000000
+#define SR_J		0x10000000
+#define SR_DM		0x08000000
+#define SR_D		0x04000000
+#define MODE_NMI	0x01c00000
+#define MODE_EXCEPTION	0x01800000
+#define MODE_INT3	0x01400000
+#define MODE_INT2	0x01000000
+#define MODE_INT1	0x00c00000
+#define MODE_INT0	0x00800000
+#define MODE_SUPERVISOR	0x00400000
+#define MODE_USER	0x00000000
+#define MODE_MASK	0x01c00000
+#define SR_EM		0x00200000
+#define SR_I3M		0x00100000
+#define SR_I2M		0x00080000
+#define SR_I1M		0x00040000
+#define SR_I0M		0x00020000
+#define SR_GM		0x00010000
+
+#define SR_H_BIT	30
+#define SR_R_BIT	29
+#define SR_J_BIT	28
+#define SR_DM_BIT	27
+#define SR_D_BIT	26
+#define MODE_SHIFT	22
+#define SR_EM_BIT	21
+#define SR_I3M_BIT	20
+#define SR_I2M_BIT	19
+#define SR_I1M_BIT	18
+#define SR_I0M_BIT	17
+#define SR_GM_BIT	16
+
+/* The user-visible part */
+#define SR_L		0x00000020
+#define SR_Q		0x00000010
+#define SR_V		0x00000008
+#define SR_N		0x00000004
+#define SR_Z		0x00000002
+#define SR_C		0x00000001
+
+#define SR_L_BIT	5
+#define SR_Q_BIT	4
+#define SR_V_BIT	3
+#define SR_N_BIT	2
+#define SR_Z_BIT	1
+#define SR_C_BIT	0
+
+/*
+ * The order is defined by the stmts instruction. r0 is stored first,
+ * so it gets the highest address.
+ *
+ * Registers 0-12 are general-purpose registers (r12 is normally used for
+ * the function return value).
+ * Register 13 is the stack pointer
+ * Register 14 is the link register
+ * Register 15 is the program counter (retrieved from the RAR sysreg)
+ */
+#define FRAME_SIZE_FULL 72
+#define REG_R12_ORIG	68
+#define REG_R0		64
+#define REG_R1		60
+#define REG_R2		56
+#define REG_R3		52
+#define REG_R4		48
+#define REG_R5		44
+#define REG_R6		40
+#define REG_R7		36
+#define REG_R8		32
+#define REG_R9		28
+#define REG_R10		24
+#define REG_R11		20
+#define REG_R12		16
+#define REG_SP		12
+#define REG_LR		 8
+
+#define FRAME_SIZE_MIN	 8
+#define REG_PC		 4
+#define REG_SR		 0
+
+#ifndef __ASSEMBLY__
+struct pt_regs {
+	/* These are always saved */
+	unsigned long sr;
+	unsigned long pc;
+
+	/* These are sometimes saved */
+	unsigned long lr;
+	unsigned long sp;
+	unsigned long r12;
+	unsigned long r11;
+	unsigned long r10;
+	unsigned long r9;
+	unsigned long r8;
+	unsigned long r7;
+	unsigned long r6;
+	unsigned long r5;
+	unsigned long r4;
+	unsigned long r3;
+	unsigned long r2;
+	unsigned long r1;
+	unsigned long r0;
+
+	/* Only saved on system call */
+	unsigned long r12_orig;
+};
+
+#ifdef __KERNEL__
+# define user_mode(regs) (((regs)->sr & MODE_MASK) == MODE_USER)
+extern void show_regs (struct pt_regs *);
+
+static __inline__ int valid_user_regs(struct pt_regs *regs)
+{
+	/*
+	 * Some of the Java bits might be acceptable if/when we
+	 * implement some support for that stuff...
+	 */
+	if ((regs->sr & 0xffff0000) == 0)
+		return 1;
+
+	/*
+	 * Force status register flags to be sane and report this
+	 * illegal behaviour...
+	 */
+	regs->sr &= 0x0000ffff;
+	return 0;
+}
+
+#define instruction_pointer(regs) ((regs)->pc)
+
+#define profile_pc(regs) instruction_pointer(regs)
+
+#endif /* __KERNEL__ */
+
+#endif /* ! __ASSEMBLY__ */
+
+#endif /* __ASM_AVR32_PTRACE_H */
diff --git a/include/asm-avr32/resource.h b/include/asm-avr32/resource.h
new file mode 100644
index 000000000000..c6dd101472b1
--- /dev/null
+++ b/include/asm-avr32/resource.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_RESOURCE_H
+#define __ASM_AVR32_RESOURCE_H
+
+#include <asm-generic/resource.h>
+
+#endif /* __ASM_AVR32_RESOURCE_H */
diff --git a/include/asm-avr32/scatterlist.h b/include/asm-avr32/scatterlist.h
new file mode 100644
index 000000000000..bfe7d753423c
--- /dev/null
+++ b/include/asm-avr32/scatterlist.h
@@ -0,0 +1,21 @@
+#ifndef __ASM_AVR32_SCATTERLIST_H
+#define __ASM_AVR32_SCATTERLIST_H
+
+struct scatterlist {
+    struct page		*page;
+    unsigned int	offset;
+    dma_addr_t		dma_address;
+    unsigned int	length;
+};
+
+/* These macros should be used after a pci_map_sg call has been done
+ * to get bus addresses of each of the SG entries and their lengths.
+ * You should only work with the number of sg entries pci_map_sg
+ * returns.
+ */
+#define sg_dma_address(sg)	((sg)->dma_address)
+#define sg_dma_len(sg)		((sg)->length)
+
+#define ISA_DMA_THRESHOLD (0xffffffff)
+
+#endif /* __ASM_AVR32_SCATTERLIST_H */
diff --git a/include/asm-avr32/sections.h b/include/asm-avr32/sections.h
new file mode 100644
index 000000000000..aa14252e4181
--- /dev/null
+++ b/include/asm-avr32/sections.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_SECTIONS_H
+#define __ASM_AVR32_SECTIONS_H
+
+#include <asm-generic/sections.h>
+
+#endif /* __ASM_AVR32_SECTIONS_H */
diff --git a/include/asm-avr32/semaphore.h b/include/asm-avr32/semaphore.h
new file mode 100644
index 000000000000..ef99ddccc10c
--- /dev/null
+++ b/include/asm-avr32/semaphore.h
@@ -0,0 +1,109 @@
+/*
+ * SMP- and interrupt-safe semaphores.
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * Based on include/asm-i386/semaphore.h
+ *   Copyright (C) 1996 Linus Torvalds
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_SEMAPHORE_H
+#define __ASM_AVR32_SEMAPHORE_H
+
+#include <linux/linkage.h>
+
+#include <asm/system.h>
+#include <asm/atomic.h>
+#include <linux/wait.h>
+#include <linux/rwsem.h>
+
+struct semaphore {
+	atomic_t count;
+	int sleepers;
+	wait_queue_head_t wait;
+};
+
+#define __SEMAPHORE_INITIALIZER(name, n)				\
+{									\
+	.count		= ATOMIC_INIT(n),				\
+	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
+}
+
+#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
+	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
+
+#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
+#define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name,0)
+
+static inline void sema_init (struct semaphore *sem, int val)
+{
+	atomic_set(&sem->count, val);
+	sem->sleepers = 0;
+	init_waitqueue_head(&sem->wait);
+}
+
+static inline void init_MUTEX (struct semaphore *sem)
+{
+	sema_init(sem, 1);
+}
+
+static inline void init_MUTEX_LOCKED (struct semaphore *sem)
+{
+	sema_init(sem, 0);
+}
+
+void __down(struct semaphore * sem);
+int  __down_interruptible(struct semaphore * sem);
+void __up(struct semaphore * sem);
+
+/*
+ * This is ugly, but we want the default case to fall through.
+ * "__down_failed" is a special asm handler that calls the C
+ * routine that actually waits. See arch/i386/kernel/semaphore.c
+ */
+static inline void down(struct semaphore * sem)
+{
+	might_sleep();
+	if (unlikely(atomic_dec_return (&sem->count) < 0))
+		__down (sem);
+}
+
+/*
+ * Interruptible try to acquire a semaphore.  If we obtained
+ * it, return zero.  If we were interrupted, returns -EINTR
+ */
+static inline int down_interruptible(struct semaphore * sem)
+{
+	int ret = 0;
+
+	might_sleep();
+	if (unlikely(atomic_dec_return (&sem->count) < 0))
+		ret = __down_interruptible (sem);
+	return ret;
+}
+
+/*
+ * Non-blockingly attempt to down() a semaphore.
+ * Returns zero if we acquired it
+ */
+static inline int down_trylock(struct semaphore * sem)
+{
+	return atomic_dec_if_positive(&sem->count) < 0;
+}
+
+/*
+ * Note! This is subtle. We jump to wake people up only if
+ * the semaphore was negative (== somebody was waiting on it).
+ * The default case (no contention) will result in NO
+ * jumps for both down() and up().
+ */
+static inline void up(struct semaphore * sem)
+{
+	if (unlikely(atomic_inc_return (&sem->count) <= 0))
+		__up (sem);
+}
+
+#endif /*__ASM_AVR32_SEMAPHORE_H */
diff --git a/include/asm-avr32/sembuf.h b/include/asm-avr32/sembuf.h
new file mode 100644
index 000000000000..e472216e0c97
--- /dev/null
+++ b/include/asm-avr32/sembuf.h
@@ -0,0 +1,25 @@
+#ifndef __ASM_AVR32_SEMBUF_H
+#define __ASM_AVR32_SEMBUF_H
+
+/*
+* The semid64_ds structure for AVR32 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct semid64_ds {
+        struct ipc64_perm sem_perm;             /* permissions .. see ipc.h */
+        __kernel_time_t sem_otime;              /* last semop time */
+        unsigned long   __unused1;
+        __kernel_time_t sem_ctime;              /* last change time */
+        unsigned long   __unused2;
+        unsigned long   sem_nsems;              /* no. of semaphores in array */
+        unsigned long   __unused3;
+        unsigned long   __unused4;
+};
+
+#endif /* __ASM_AVR32_SEMBUF_H */
diff --git a/include/asm-avr32/setup.h b/include/asm-avr32/setup.h
new file mode 100644
index 000000000000..10193da4113b
--- /dev/null
+++ b/include/asm-avr32/setup.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on linux/include/asm-arm/setup.h
+ *   Copyright (C) 1997-1999 Russel King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_SETUP_H__
+#define __ASM_AVR32_SETUP_H__
+
+#define COMMAND_LINE_SIZE 256
+
+/* Magic number indicating that a tag table is present */
+#define ATAG_MAGIC	0xa2a25441
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Generic memory range, used by several tags.
+ *
+ *   addr is always physical.
+ *   size is measured in bytes.
+ *   next is for use by the OS, e.g. for grouping regions into
+ *        linked lists.
+ */
+struct tag_mem_range {
+	u32			addr;
+	u32			size;
+	struct tag_mem_range *	next;
+};
+
+/* The list ends with an ATAG_NONE node. */
+#define ATAG_NONE	0x00000000
+
+struct tag_header {
+	u32 size;
+	u32 tag;
+};
+
+/* The list must start with an ATAG_CORE node */
+#define ATAG_CORE	0x54410001
+
+struct tag_core {
+	u32 flags;
+	u32 pagesize;
+	u32 rootdev;
+};
+
+/* it is allowed to have multiple ATAG_MEM nodes */
+#define ATAG_MEM	0x54410002
+/* ATAG_MEM uses tag_mem_range */
+
+/* command line: \0 terminated string */
+#define ATAG_CMDLINE	0x54410003
+
+struct tag_cmdline {
+	char	cmdline[1];	/* this is the minimum size */
+};
+
+/* Ramdisk image (may be compressed) */
+#define ATAG_RDIMG	0x54410004
+/* ATAG_RDIMG uses tag_mem_range */
+
+/* Information about various clocks present in the system */
+#define ATAG_CLOCK	0x54410005
+
+struct tag_clock {
+	u32	clock_id;	/* Which clock are we talking about? */
+	u32	clock_flags;	/* Special features */
+	u64	clock_hz;	/* Clock speed in Hz */
+};
+
+/* The clock types we know about */
+#define CLOCK_BOOTCPU	0
+
+/* Memory reserved for the system (e.g. the bootloader) */
+#define ATAG_RSVD_MEM	0x54410006
+/* ATAG_RSVD_MEM uses tag_mem_range */
+
+/* Ethernet information */
+
+#define ATAG_ETHERNET	0x54410007
+
+struct tag_ethernet {
+	u8	mac_index;
+	u8	mii_phy_addr;
+	u8	hw_address[6];
+};
+
+#define ETH_INVALID_PHY	0xff
+
+struct tag {
+	struct tag_header hdr;
+	union {
+		struct tag_core core;
+		struct tag_mem_range mem_range;
+		struct tag_cmdline cmdline;
+		struct tag_clock clock;
+		struct tag_ethernet ethernet;
+	} u;
+};
+
+struct tagtable {
+	u32	tag;
+	int	(*parse)(struct tag *);
+};
+
+#define __tag __attribute_used__ __attribute__((__section__(".taglist")))
+#define __tagtable(tag, fn)						\
+	static struct tagtable __tagtable_##fn __tag = { tag, fn }
+
+#define tag_member_present(tag,member)					\
+	((unsigned long)(&((struct tag *)0L)->member + 1)		\
+	 <= (tag)->hdr.size * 4)
+
+#define tag_next(t)	((struct tag *)((u32 *)(t) + (t)->hdr.size))
+#define tag_size(type)	((sizeof(struct tag_header) + sizeof(struct type)) >> 2)
+
+#define for_each_tag(t,base)						\
+	for (t = base; t->hdr.size; t = tag_next(t))
+
+extern struct tag_mem_range *mem_phys;
+extern struct tag_mem_range *mem_reserved;
+extern struct tag_mem_range *mem_ramdisk;
+
+extern struct tag *bootloader_tags;
+
+extern void setup_bootmem(void);
+extern void setup_processor(void);
+extern void board_setup_fbmem(unsigned long fbmem_start,
+			      unsigned long fbmem_size);
+
+/* Chip-specific hook to enable the use of SDRAM */
+void chip_enable_sdram(void);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_AVR32_SETUP_H__ */
diff --git a/include/asm-avr32/shmbuf.h b/include/asm-avr32/shmbuf.h
new file mode 100644
index 000000000000..c62fba41739a
--- /dev/null
+++ b/include/asm-avr32/shmbuf.h
@@ -0,0 +1,42 @@
+#ifndef __ASM_AVR32_SHMBUF_H
+#define __ASM_AVR32_SHMBUF_H
+
+/*
+ * The shmid64_ds structure for i386 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct shmid64_ds {
+	struct ipc64_perm	shm_perm;	/* operation perms */
+	size_t			shm_segsz;	/* size of segment (bytes) */
+	__kernel_time_t		shm_atime;	/* last attach time */
+	unsigned long		__unused1;
+	__kernel_time_t		shm_dtime;	/* last detach time */
+	unsigned long		__unused2;
+	__kernel_time_t		shm_ctime;	/* last change time */
+	unsigned long		__unused3;
+	__kernel_pid_t		shm_cpid;	/* pid of creator */
+	__kernel_pid_t		shm_lpid;	/* pid of last operator */
+	unsigned long		shm_nattch;	/* no. of current attaches */
+	unsigned long		__unused4;
+	unsigned long		__unused5;
+};
+
+struct shminfo64 {
+	unsigned long	shmmax;
+	unsigned long	shmmin;
+	unsigned long	shmmni;
+	unsigned long	shmseg;
+	unsigned long	shmall;
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+	unsigned long	__unused3;
+	unsigned long	__unused4;
+};
+
+#endif /* __ASM_AVR32_SHMBUF_H */
diff --git a/include/asm-avr32/shmparam.h b/include/asm-avr32/shmparam.h
new file mode 100644
index 000000000000..3681266c77f7
--- /dev/null
+++ b/include/asm-avr32/shmparam.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_SHMPARAM_H
+#define __ASM_AVR32_SHMPARAM_H
+
+#define SHMLBA PAGE_SIZE	/* attach addr a multiple of this */
+
+#endif /* __ASM_AVR32_SHMPARAM_H */
diff --git a/include/asm-avr32/sigcontext.h b/include/asm-avr32/sigcontext.h
new file mode 100644
index 000000000000..e04062b5f39f
--- /dev/null
+++ b/include/asm-avr32/sigcontext.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_SIGCONTEXT_H
+#define __ASM_AVR32_SIGCONTEXT_H
+
+struct sigcontext {
+	unsigned long	oldmask;
+
+	/* CPU registers */
+	unsigned long	sr;
+	unsigned long	pc;
+	unsigned long	lr;
+	unsigned long	sp;
+	unsigned long	r12;
+	unsigned long	r11;
+	unsigned long	r10;
+	unsigned long	r9;
+	unsigned long	r8;
+	unsigned long	r7;
+	unsigned long	r6;
+	unsigned long	r5;
+	unsigned long	r4;
+	unsigned long	r3;
+	unsigned long	r2;
+	unsigned long	r1;
+	unsigned long	r0;
+};
+
+#endif /* __ASM_AVR32_SIGCONTEXT_H */
diff --git a/include/asm-avr32/siginfo.h b/include/asm-avr32/siginfo.h
new file mode 100644
index 000000000000..5ee93f40a8a8
--- /dev/null
+++ b/include/asm-avr32/siginfo.h
@@ -0,0 +1,6 @@
+#ifndef _AVR32_SIGINFO_H
+#define _AVR32_SIGINFO_H
+
+#include <asm-generic/siginfo.h>
+
+#endif
diff --git a/include/asm-avr32/signal.h b/include/asm-avr32/signal.h
new file mode 100644
index 000000000000..caffefeeba1f
--- /dev/null
+++ b/include/asm-avr32/signal.h
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_SIGNAL_H
+#define __ASM_AVR32_SIGNAL_H
+
+#include <linux/types.h>
+
+/* Avoid too many header ordering problems.  */
+struct siginfo;
+
+#ifdef __KERNEL__
+/* Most things should be clean enough to redefine this at will, if care
+   is taken to make libc match.  */
+
+#define _NSIG		64
+#define _NSIG_BPW	32
+#define _NSIG_WORDS	(_NSIG / _NSIG_BPW)
+
+typedef unsigned long old_sigset_t;		/* at least 32 bits */
+
+typedef struct {
+	unsigned long sig[_NSIG_WORDS];
+} sigset_t;
+
+#else
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+#define NSIG		32
+typedef unsigned long sigset_t;
+
+#endif /* __KERNEL__ */
+
+#define SIGHUP		 1
+#define SIGINT		 2
+#define SIGQUIT		 3
+#define SIGILL		 4
+#define SIGTRAP		 5
+#define SIGABRT		 6
+#define SIGIOT		 6
+#define SIGBUS		 7
+#define SIGFPE		 8
+#define SIGKILL		 9
+#define SIGUSR1		10
+#define SIGSEGV		11
+#define SIGUSR2		12
+#define SIGPIPE		13
+#define SIGALRM		14
+#define SIGTERM		15
+#define SIGSTKFLT	16
+#define SIGCHLD		17
+#define SIGCONT		18
+#define SIGSTOP		19
+#define SIGTSTP		20
+#define SIGTTIN		21
+#define SIGTTOU		22
+#define SIGURG		23
+#define SIGXCPU		24
+#define SIGXFSZ		25
+#define SIGVTALRM	26
+#define SIGPROF		27
+#define SIGWINCH	28
+#define SIGIO		29
+#define SIGPOLL		SIGIO
+/*
+#define SIGLOST		29
+*/
+#define SIGPWR		30
+#define SIGSYS		31
+#define	SIGUNUSED	31
+
+/* These should not be considered constants from userland.  */
+#define SIGRTMIN	32
+#define SIGRTMAX	(_NSIG-1)
+
+/*
+ * SA_FLAGS values:
+ *
+ * SA_NOCLDSTOP		flag to turn off SIGCHLD when children stop.
+ * SA_NOCLDWAIT		flag on SIGCHLD to inhibit zombies.
+ * SA_SIGINFO		deliver the signal with SIGINFO structs
+ * SA_ONSTACK		indicates that a registered stack_t will be used.
+ * SA_RESTART		flag to get restarting signals (which were the default long ago)
+ * SA_NODEFER		prevents the current signal from being masked in the handler.
+ * SA_RESETHAND		clears the handler when the signal is delivered.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+#define SA_NOCLDSTOP	0x00000001
+#define SA_NOCLDWAIT	0x00000002
+#define SA_SIGINFO	0x00000004
+#define SA_RESTORER	0x04000000
+#define SA_ONSTACK	0x08000000
+#define SA_RESTART	0x10000000
+#define SA_NODEFER	0x40000000
+#define SA_RESETHAND	0x80000000
+
+#define SA_NOMASK	SA_NODEFER
+#define SA_ONESHOT	SA_RESETHAND
+
+/*
+ * sigaltstack controls
+ */
+#define SS_ONSTACK	1
+#define SS_DISABLE	2
+
+#define MINSIGSTKSZ	2048
+#define SIGSTKSZ	8192
+
+#include <asm-generic/signal.h>
+
+#ifdef __KERNEL__
+struct old_sigaction {
+	__sighandler_t sa_handler;
+	old_sigset_t sa_mask;
+	unsigned long sa_flags;
+	__sigrestore_t sa_restorer;
+};
+
+struct sigaction {
+	__sighandler_t sa_handler;
+	unsigned long sa_flags;
+	__sigrestore_t sa_restorer;
+	sigset_t sa_mask;		/* mask last for extensibility */
+};
+
+struct k_sigaction {
+	struct sigaction sa;
+};
+#else
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+struct sigaction {
+	union {
+		__sighandler_t _sa_handler;
+		void (*_sa_sigaction)(int, struct siginfo *, void *);
+	} _u;
+	sigset_t sa_mask;
+	unsigned long sa_flags;
+	void (*sa_restorer)(void);
+};
+
+#define sa_handler	_u._sa_handler
+#define sa_sigaction	_u._sa_sigaction
+
+#endif /* __KERNEL__ */
+
+typedef struct sigaltstack {
+	void __user *ss_sp;
+	int ss_flags;
+	size_t ss_size;
+} stack_t;
+
+#ifdef __KERNEL__
+
+#include <asm/sigcontext.h>
+#undef __HAVE_ARCH_SIG_BITOPS
+
+#define ptrace_signal_deliver(regs, cookie) do { } while (0)
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/include/asm-avr32/socket.h b/include/asm-avr32/socket.h
new file mode 100644
index 000000000000..543229de8173
--- /dev/null
+++ b/include/asm-avr32/socket.h
@@ -0,0 +1,53 @@
+#ifndef __ASM_AVR32_SOCKET_H
+#define __ASM_AVR32_SOCKET_H
+
+#include <asm/sockios.h>
+
+/* For setsockopt(2) */
+#define SOL_SOCKET	1
+
+#define SO_DEBUG	1
+#define SO_REUSEADDR	2
+#define SO_TYPE		3
+#define SO_ERROR	4
+#define SO_DONTROUTE	5
+#define SO_BROADCAST	6
+#define SO_SNDBUF	7
+#define SO_RCVBUF	8
+#define SO_SNDBUFFORCE	32
+#define SO_RCVBUFFORCE	33
+#define SO_KEEPALIVE	9
+#define SO_OOBINLINE	10
+#define SO_NO_CHECK	11
+#define SO_PRIORITY	12
+#define SO_LINGER	13
+#define SO_BSDCOMPAT	14
+/* To add :#define SO_REUSEPORT 15 */
+#define SO_PASSCRED	16
+#define SO_PEERCRED	17
+#define SO_RCVLOWAT	18
+#define SO_SNDLOWAT	19
+#define SO_RCVTIMEO	20
+#define SO_SNDTIMEO	21
+
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define SO_SECURITY_AUTHENTICATION		22
+#define SO_SECURITY_ENCRYPTION_TRANSPORT	23
+#define SO_SECURITY_ENCRYPTION_NETWORK		24
+
+#define SO_BINDTODEVICE	25
+
+/* Socket filtering */
+#define SO_ATTACH_FILTER        26
+#define SO_DETACH_FILTER        27
+
+#define SO_PEERNAME		28
+#define SO_TIMESTAMP		29
+#define SCM_TIMESTAMP		SO_TIMESTAMP
+
+#define SO_ACCEPTCONN		30
+
+#define SO_PEERSEC		31
+#define SO_PASSSEC		34
+
+#endif /* __ASM_AVR32_SOCKET_H */
diff --git a/include/asm-avr32/sockios.h b/include/asm-avr32/sockios.h
new file mode 100644
index 000000000000..84f3d65b3b3b
--- /dev/null
+++ b/include/asm-avr32/sockios.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_AVR32_SOCKIOS_H
+#define __ASM_AVR32_SOCKIOS_H
+
+/* Socket-level I/O control calls. */
+#define FIOSETOWN 	0x8901
+#define SIOCSPGRP	0x8902
+#define FIOGETOWN	0x8903
+#define SIOCGPGRP	0x8904
+#define SIOCATMARK	0x8905
+#define SIOCGSTAMP	0x8906		/* Get stamp */
+
+#endif /* __ASM_AVR32_SOCKIOS_H */
diff --git a/include/asm-avr32/stat.h b/include/asm-avr32/stat.h
new file mode 100644
index 000000000000..e72881e10230
--- /dev/null
+++ b/include/asm-avr32/stat.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_STAT_H
+#define __ASM_AVR32_STAT_H
+
+struct __old_kernel_stat {
+        unsigned short st_dev;
+        unsigned short st_ino;
+        unsigned short st_mode;
+        unsigned short st_nlink;
+        unsigned short st_uid;
+        unsigned short st_gid;
+        unsigned short st_rdev;
+        unsigned long  st_size;
+        unsigned long  st_atime;
+        unsigned long  st_mtime;
+        unsigned long  st_ctime;
+};
+
+struct stat {
+        unsigned long st_dev;
+        unsigned long st_ino;
+        unsigned short st_mode;
+        unsigned short st_nlink;
+        unsigned short st_uid;
+        unsigned short st_gid;
+        unsigned long  st_rdev;
+        unsigned long  st_size;
+        unsigned long  st_blksize;
+        unsigned long  st_blocks;
+        unsigned long  st_atime;
+        unsigned long  st_atime_nsec;
+        unsigned long  st_mtime;
+        unsigned long  st_mtime_nsec;
+        unsigned long  st_ctime;
+        unsigned long  st_ctime_nsec;
+        unsigned long  __unused4;
+        unsigned long  __unused5;
+};
+
+#define STAT_HAVE_NSEC 1
+
+struct stat64 {
+	unsigned long long st_dev;
+
+	unsigned long long st_ino;
+	unsigned int	st_mode;
+	unsigned int	st_nlink;
+
+	unsigned long	st_uid;
+	unsigned long	st_gid;
+
+	unsigned long long st_rdev;
+
+	long long	st_size;
+	unsigned long	__pad1;		/* align 64-bit st_blocks */
+	unsigned long	st_blksize;
+
+	unsigned long long st_blocks;	/* Number 512-byte blocks allocated. */
+
+	unsigned long	st_atime;
+	unsigned long	st_atime_nsec;
+
+	unsigned long	st_mtime;
+	unsigned long	st_mtime_nsec;
+
+	unsigned long	st_ctime;
+	unsigned long	st_ctime_nsec;
+
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+};
+
+#endif /* __ASM_AVR32_STAT_H */
diff --git a/include/asm-avr32/statfs.h b/include/asm-avr32/statfs.h
new file mode 100644
index 000000000000..2961bd18c50e
--- /dev/null
+++ b/include/asm-avr32/statfs.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_STATFS_H
+#define __ASM_AVR32_STATFS_H
+
+#include <asm-generic/statfs.h>
+
+#endif /* __ASM_AVR32_STATFS_H */
diff --git a/include/asm-avr32/string.h b/include/asm-avr32/string.h
new file mode 100644
index 000000000000..c91a623cd585
--- /dev/null
+++ b/include/asm-avr32/string.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_STRING_H
+#define __ASM_AVR32_STRING_H
+
+#define __HAVE_ARCH_MEMSET
+extern void *memset(void *b, int c, size_t len);
+
+#define __HAVE_ARCH_MEMCPY
+extern void *memcpy(void *to, const void *from, size_t len);
+
+#endif /* __ASM_AVR32_STRING_H */
diff --git a/include/asm-avr32/sysreg.h b/include/asm-avr32/sysreg.h
new file mode 100644
index 000000000000..f91975f330f6
--- /dev/null
+++ b/include/asm-avr32/sysreg.h
@@ -0,0 +1,332 @@
+/*
+ * AVR32 System Registers
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_SYSREG_H__
+#define __ASM_AVR32_SYSREG_H__
+
+/* sysreg register offsets */
+#define SYSREG_SR                               0x0000
+#define SYSREG_EVBA                             0x0004
+#define SYSREG_ACBA                             0x0008
+#define SYSREG_CPUCR                            0x000c
+#define SYSREG_ECR                              0x0010
+#define SYSREG_RSR_SUP                          0x0014
+#define SYSREG_RSR_INT0                         0x0018
+#define SYSREG_RSR_INT1                         0x001c
+#define SYSREG_RSR_INT2                         0x0020
+#define SYSREG_RSR_INT3                         0x0024
+#define SYSREG_RSR_EX                           0x0028
+#define SYSREG_RSR_NMI                          0x002c
+#define SYSREG_RSR_DBG                          0x0030
+#define SYSREG_RAR_SUP                          0x0034
+#define SYSREG_RAR_INT0                         0x0038
+#define SYSREG_RAR_INT1                         0x003c
+#define SYSREG_RAR_INT2                         0x0040
+#define SYSREG_RAR_INT3                         0x0044
+#define SYSREG_RAR_EX                           0x0048
+#define SYSREG_RAR_NMI                          0x004c
+#define SYSREG_RAR_DBG                          0x0050
+#define SYSREG_JECR                             0x0054
+#define SYSREG_JOSP                             0x0058
+#define SYSREG_JAVA_LV0                         0x005c
+#define SYSREG_JAVA_LV1                         0x0060
+#define SYSREG_JAVA_LV2                         0x0064
+#define SYSREG_JAVA_LV3                         0x0068
+#define SYSREG_JAVA_LV4                         0x006c
+#define SYSREG_JAVA_LV5                         0x0070
+#define SYSREG_JAVA_LV6                         0x0074
+#define SYSREG_JAVA_LV7                         0x0078
+#define SYSREG_JTBA                             0x007c
+#define SYSREG_JBCR                             0x0080
+#define SYSREG_CONFIG0                          0x0100
+#define SYSREG_CONFIG1                          0x0104
+#define SYSREG_COUNT                            0x0108
+#define SYSREG_COMPARE                          0x010c
+#define SYSREG_TLBEHI                           0x0110
+#define SYSREG_TLBELO                           0x0114
+#define SYSREG_PTBR                             0x0118
+#define SYSREG_TLBEAR                           0x011c
+#define SYSREG_MMUCR                            0x0120
+#define SYSREG_TLBARLO                          0x0124
+#define SYSREG_TLBARHI                          0x0128
+#define SYSREG_PCCNT                            0x012c
+#define SYSREG_PCNT0                            0x0130
+#define SYSREG_PCNT1                            0x0134
+#define SYSREG_PCCR                             0x0138
+#define SYSREG_BEAR                             0x013c
+
+/* Bitfields in SR */
+#define SYSREG_SR_C_OFFSET                      0
+#define SYSREG_SR_C_SIZE                        1
+#define SYSREG_Z_OFFSET                         1
+#define SYSREG_Z_SIZE                           1
+#define SYSREG_SR_N_OFFSET                      2
+#define SYSREG_SR_N_SIZE                        1
+#define SYSREG_SR_V_OFFSET                      3
+#define SYSREG_SR_V_SIZE                        1
+#define SYSREG_Q_OFFSET                         4
+#define SYSREG_Q_SIZE                           1
+#define SYSREG_GM_OFFSET                        16
+#define SYSREG_GM_SIZE                          1
+#define SYSREG_I0M_OFFSET                       17
+#define SYSREG_I0M_SIZE                         1
+#define SYSREG_I1M_OFFSET                       18
+#define SYSREG_I1M_SIZE                         1
+#define SYSREG_I2M_OFFSET                       19
+#define SYSREG_I2M_SIZE                         1
+#define SYSREG_I3M_OFFSET                       20
+#define SYSREG_I3M_SIZE                         1
+#define SYSREG_EM_OFFSET                        21
+#define SYSREG_EM_SIZE                          1
+#define SYSREG_M0_OFFSET                        22
+#define SYSREG_M0_SIZE                          1
+#define SYSREG_M1_OFFSET                        23
+#define SYSREG_M1_SIZE                          1
+#define SYSREG_M2_OFFSET                        24
+#define SYSREG_M2_SIZE                          1
+#define SYSREG_SR_D_OFFSET                      26
+#define SYSREG_SR_D_SIZE                        1
+#define SYSREG_DM_OFFSET                        27
+#define SYSREG_DM_SIZE                          1
+#define SYSREG_SR_J_OFFSET                      28
+#define SYSREG_SR_J_SIZE                        1
+#define SYSREG_R_OFFSET                         29
+#define SYSREG_R_SIZE                           1
+#define SYSREG_H_OFFSET                         30
+#define SYSREG_H_SIZE                           1
+
+/* Bitfields in EVBA */
+
+/* Bitfields in ACBA */
+
+/* Bitfields in CPUCR */
+#define SYSREG_BI_OFFSET                        0
+#define SYSREG_BI_SIZE                          1
+#define SYSREG_BE_OFFSET                        1
+#define SYSREG_BE_SIZE                          1
+#define SYSREG_FE_OFFSET                        2
+#define SYSREG_FE_SIZE                          1
+#define SYSREG_RE_OFFSET                        3
+#define SYSREG_RE_SIZE                          1
+#define SYSREG_IBE_OFFSET                       4
+#define SYSREG_IBE_SIZE                         1
+#define SYSREG_IEE_OFFSET                       5
+#define SYSREG_IEE_SIZE                         1
+
+/* Bitfields in ECR */
+#define SYSREG_ECR_OFFSET                       0
+#define SYSREG_ECR_SIZE                         32
+
+/* Bitfields in RSR_SUP */
+
+/* Bitfields in RSR_INT0 */
+
+/* Bitfields in RSR_INT1 */
+
+/* Bitfields in RSR_INT2 */
+
+/* Bitfields in RSR_INT3 */
+
+/* Bitfields in RSR_EX */
+
+/* Bitfields in RSR_NMI */
+
+/* Bitfields in RSR_DBG */
+
+/* Bitfields in RAR_SUP */
+
+/* Bitfields in RAR_INT0 */
+
+/* Bitfields in RAR_INT1 */
+
+/* Bitfields in RAR_INT2 */
+
+/* Bitfields in RAR_INT3 */
+
+/* Bitfields in RAR_EX */
+
+/* Bitfields in RAR_NMI */
+
+/* Bitfields in RAR_DBG */
+
+/* Bitfields in JECR */
+
+/* Bitfields in JOSP */
+
+/* Bitfields in JAVA_LV0 */
+
+/* Bitfields in JAVA_LV1 */
+
+/* Bitfields in JAVA_LV2 */
+
+/* Bitfields in JAVA_LV3 */
+
+/* Bitfields in JAVA_LV4 */
+
+/* Bitfields in JAVA_LV5 */
+
+/* Bitfields in JAVA_LV6 */
+
+/* Bitfields in JAVA_LV7 */
+
+/* Bitfields in JTBA */
+
+/* Bitfields in JBCR */
+
+/* Bitfields in CONFIG0 */
+#define SYSREG_CONFIG0_D_OFFSET                 1
+#define SYSREG_CONFIG0_D_SIZE                   1
+#define SYSREG_CONFIG0_S_OFFSET                 2
+#define SYSREG_CONFIG0_S_SIZE                   1
+#define SYSREG_O_OFFSET                         3
+#define SYSREG_O_SIZE                           1
+#define SYSREG_P_OFFSET                         4
+#define SYSREG_P_SIZE                           1
+#define SYSREG_CONFIG0_J_OFFSET                 5
+#define SYSREG_CONFIG0_J_SIZE                   1
+#define SYSREG_F_OFFSET                         6
+#define SYSREG_F_SIZE                           1
+#define SYSREG_MMUT_OFFSET                      7
+#define SYSREG_MMUT_SIZE                        3
+#define SYSREG_AR_OFFSET                        10
+#define SYSREG_AR_SIZE                          3
+#define SYSREG_AT_OFFSET                        13
+#define SYSREG_AT_SIZE                          3
+#define SYSREG_PROCESSORREVISION_OFFSET         16
+#define SYSREG_PROCESSORREVISION_SIZE           8
+#define SYSREG_PROCESSORID_OFFSET               24
+#define SYSREG_PROCESSORID_SIZE                 8
+
+/* Bitfields in CONFIG1 */
+#define SYSREG_DASS_OFFSET                      0
+#define SYSREG_DASS_SIZE                        3
+#define SYSREG_DLSZ_OFFSET                      3
+#define SYSREG_DLSZ_SIZE                        3
+#define SYSREG_DSET_OFFSET                      6
+#define SYSREG_DSET_SIZE                        4
+#define SYSREG_IASS_OFFSET                      10
+#define SYSREG_IASS_SIZE                        2
+#define SYSREG_ILSZ_OFFSET                      13
+#define SYSREG_ILSZ_SIZE                        3
+#define SYSREG_ISET_OFFSET                      16
+#define SYSREG_ISET_SIZE                        4
+#define SYSREG_DMMUSZ_OFFSET                    20
+#define SYSREG_DMMUSZ_SIZE                      6
+#define SYSREG_IMMUSZ_OFFSET                    26
+#define SYSREG_IMMUSZ_SIZE                      6
+
+/* Bitfields in COUNT */
+
+/* Bitfields in COMPARE */
+
+/* Bitfields in TLBEHI */
+#define SYSREG_ASID_OFFSET                      0
+#define SYSREG_ASID_SIZE                        8
+#define SYSREG_TLBEHI_I_OFFSET                  8
+#define SYSREG_TLBEHI_I_SIZE                    1
+#define SYSREG_TLBEHI_V_OFFSET                  9
+#define SYSREG_TLBEHI_V_SIZE                    1
+#define SYSREG_VPN_OFFSET                       10
+#define SYSREG_VPN_SIZE                         22
+
+/* Bitfields in TLBELO */
+#define SYSREG_W_OFFSET                         0
+#define SYSREG_W_SIZE                           1
+#define SYSREG_TLBELO_D_OFFSET                  1
+#define SYSREG_TLBELO_D_SIZE                    1
+#define SYSREG_SZ_OFFSET                        2
+#define SYSREG_SZ_SIZE                          2
+#define SYSREG_AP_OFFSET                        4
+#define SYSREG_AP_SIZE                          3
+#define SYSREG_B_OFFSET                         7
+#define SYSREG_B_SIZE                           1
+#define SYSREG_G_OFFSET                         8
+#define SYSREG_G_SIZE                           1
+#define SYSREG_TLBELO_C_OFFSET                  9
+#define SYSREG_TLBELO_C_SIZE                    1
+#define SYSREG_PFN_OFFSET                       10
+#define SYSREG_PFN_SIZE                         22
+
+/* Bitfields in PTBR */
+
+/* Bitfields in TLBEAR */
+
+/* Bitfields in MMUCR */
+#define SYSREG_E_OFFSET                         0
+#define SYSREG_E_SIZE                           1
+#define SYSREG_M_OFFSET                         1
+#define SYSREG_M_SIZE                           1
+#define SYSREG_MMUCR_I_OFFSET                   2
+#define SYSREG_MMUCR_I_SIZE                     1
+#define SYSREG_MMUCR_N_OFFSET                   3
+#define SYSREG_MMUCR_N_SIZE                     1
+#define SYSREG_MMUCR_S_OFFSET                   4
+#define SYSREG_MMUCR_S_SIZE                     1
+#define SYSREG_DLA_OFFSET                       8
+#define SYSREG_DLA_SIZE                         6
+#define SYSREG_DRP_OFFSET                       14
+#define SYSREG_DRP_SIZE                         6
+#define SYSREG_ILA_OFFSET                       20
+#define SYSREG_ILA_SIZE                         6
+#define SYSREG_IRP_OFFSET                       26
+#define SYSREG_IRP_SIZE                         6
+
+/* Bitfields in TLBARLO */
+
+/* Bitfields in TLBARHI */
+
+/* Bitfields in PCCNT */
+
+/* Bitfields in PCNT0 */
+
+/* Bitfields in PCNT1 */
+
+/* Bitfields in PCCR */
+
+/* Bitfields in BEAR */
+
+/* Constants for ECR */
+#define ECR_UNRECOVERABLE                       0
+#define ECR_TLB_MULTIPLE                        1
+#define ECR_BUS_ERROR_WRITE                     2
+#define ECR_BUS_ERROR_READ                      3
+#define ECR_NMI                                 4
+#define ECR_ADDR_ALIGN_X                        5
+#define ECR_PROTECTION_X                        6
+#define ECR_DEBUG                               7
+#define ECR_ILLEGAL_OPCODE                      8
+#define ECR_UNIMPL_INSTRUCTION                  9
+#define ECR_PRIVILEGE_VIOLATION                 10
+#define ECR_FPE                                 11
+#define ECR_COPROC_ABSENT                       12
+#define ECR_ADDR_ALIGN_R                        13
+#define ECR_ADDR_ALIGN_W                        14
+#define ECR_PROTECTION_R                        15
+#define ECR_PROTECTION_W                        16
+#define ECR_DTLB_MODIFIED                       17
+#define ECR_TLB_MISS_X                          20
+#define ECR_TLB_MISS_R                          24
+#define ECR_TLB_MISS_W                          28
+
+/* Bit manipulation macros */
+#define SYSREG_BIT(name)                        (1 << SYSREG_##name##_OFFSET)
+#define SYSREG_BF(name,value)                   (((value) & ((1 << SYSREG_##name##_SIZE) - 1)) << SYSREG_##name##_OFFSET)
+#define SYSREG_BFEXT(name,value)                (((value) >> SYSREG_##name##_OFFSET) & ((1 << SYSREG_##name##_SIZE) - 1))
+#define SYSREG_BFINS(name,value,old)            (((old) & ~(((1 << SYSREG_##name##_SIZE) - 1) << SYSREG_##name##_OFFSET)) | SYSREG_BF(name,value))
+
+#ifdef __CHECKER__
+extern unsigned long __builtin_mfsr(unsigned long reg);
+extern void __builtin_mtsr(unsigned long reg, unsigned long value);
+#endif
+
+/* Register access macros */
+#define sysreg_read(reg)                        __builtin_mfsr(SYSREG_##reg)
+#define sysreg_write(reg, value)                __builtin_mtsr(SYSREG_##reg, value)
+
+#endif /* __ASM_AVR32_SYSREG_H__ */
diff --git a/include/asm-avr32/system.h b/include/asm-avr32/system.h
new file mode 100644
index 000000000000..ac596058697d
--- /dev/null
+++ b/include/asm-avr32/system.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_SYSTEM_H
+#define __ASM_AVR32_SYSTEM_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#include <asm/ptrace.h>
+#include <asm/sysreg.h>
+
+#define xchg(ptr,x) \
+	((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
+
+#define nop() asm volatile("nop")
+
+#define mb()			asm volatile("" : : : "memory")
+#define rmb()			mb()
+#define wmb()			asm volatile("sync 0" : : : "memory")
+#define read_barrier_depends()  do { } while(0)
+#define set_mb(var, value)      do { var = value; mb(); } while(0)
+
+/*
+ * Help PathFinder and other Nexus-compliant debuggers keep track of
+ * the current PID by emitting an Ownership Trace Message each time we
+ * switch task.
+ */
+#ifdef CONFIG_OWNERSHIP_TRACE
+#include <asm/ocd.h>
+#define finish_arch_switch(prev)			\
+	do {						\
+		__mtdr(DBGREG_PID, prev->pid);		\
+		__mtdr(DBGREG_PID, current->pid);	\
+	} while(0)
+#endif
+
+/*
+ * switch_to(prev, next, last) should switch from task `prev' to task
+ * `next'. `prev' will never be the same as `next'.
+ *
+ * We just delegate everything to the __switch_to assembly function,
+ * which is implemented in arch/avr32/kernel/switch_to.S
+ *
+ * mb() tells GCC not to cache `current' across this call.
+ */
+struct cpu_context;
+struct task_struct;
+extern struct task_struct *__switch_to(struct task_struct *,
+				       struct cpu_context *,
+				       struct cpu_context *);
+#define switch_to(prev, next, last)					\
+	do {								\
+		last = __switch_to(prev, &prev->thread.cpu_context + 1,	\
+				   &next->thread.cpu_context);		\
+	} while (0)
+
+#ifdef CONFIG_SMP
+# error "The AVR32 port does not support SMP"
+#else
+# define smp_mb()		barrier()
+# define smp_rmb()		barrier()
+# define smp_wmb()		barrier()
+# define smp_read_barrier_depends() do { } while(0)
+#endif
+
+#include <linux/irqflags.h>
+
+extern void __xchg_called_with_bad_pointer(void);
+
+#ifdef __CHECKER__
+extern unsigned long __builtin_xchg(void *ptr, unsigned long x);
+#endif
+
+#define xchg_u32(val, m) __builtin_xchg((void *)m, val)
+
+static inline unsigned long __xchg(unsigned long x,
+				       volatile void *ptr,
+				       int size)
+{
+	switch(size) {
+	case 4:
+		return xchg_u32(x, ptr);
+	default:
+		__xchg_called_with_bad_pointer();
+		return x;
+	}
+}
+
+static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old,
+					  unsigned long new)
+{
+	__u32 ret;
+
+	asm volatile(
+		"1:	ssrf	5\n"
+		"	ld.w	%[ret], %[m]\n"
+		"	cp.w	%[ret], %[old]\n"
+		"	brne	2f\n"
+		"	stcond	%[m], %[new]\n"
+		"	brne	1b\n"
+		"2:\n"
+		: [ret] "=&r"(ret), [m] "=m"(*m)
+		: "m"(m), [old] "ir"(old), [new] "r"(new)
+		: "memory", "cc");
+	return ret;
+}
+
+extern unsigned long __cmpxchg_u64_unsupported_on_32bit_kernels(
+        volatile int * m, unsigned long old, unsigned long new);
+#define __cmpxchg_u64 __cmpxchg_u64_unsupported_on_32bit_kernels
+
+/* This function doesn't exist, so you'll get a linker error
+   if something tries to do an invalid cmpxchg().  */
+extern void __cmpxchg_called_with_bad_pointer(void);
+
+#define __HAVE_ARCH_CMPXCHG 1
+
+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+				      unsigned long new, int size)
+{
+	switch (size) {
+	case 4:
+		return __cmpxchg_u32(ptr, old, new);
+	case 8:
+		return __cmpxchg_u64(ptr, old, new);
+	}
+
+	__cmpxchg_called_with_bad_pointer();
+	return old;
+}
+
+#define cmpxchg(ptr, old, new)					\
+	((typeof(*(ptr)))__cmpxchg((ptr), (unsigned long)(old),	\
+				   (unsigned long)(new),	\
+				   sizeof(*(ptr))))
+
+struct pt_regs;
+extern void __die(const char *, struct pt_regs *, unsigned long,
+		  const char *, const char *, unsigned long);
+extern void __die_if_kernel(const char *, struct pt_regs *, unsigned long,
+			    const char *, const char *, unsigned long);
+
+#define die(msg, regs, err)					\
+	__die(msg, regs, err, __FILE__ ":", __FUNCTION__, __LINE__)
+#define die_if_kernel(msg, regs, err)					\
+	__die_if_kernel(msg, regs, err, __FILE__ ":", __FUNCTION__, __LINE__)
+
+#define arch_align_stack(x)	(x)
+
+#endif /* __ASM_AVR32_SYSTEM_H */
diff --git a/include/asm-avr32/termbits.h b/include/asm-avr32/termbits.h
new file mode 100644
index 000000000000..9dc6eacafa33
--- /dev/null
+++ b/include/asm-avr32/termbits.h
@@ -0,0 +1,173 @@
+#ifndef __ASM_AVR32_TERMBITS_H
+#define __ASM_AVR32_TERMBITS_H
+
+#include <linux/posix_types.h>
+
+typedef unsigned char	cc_t;
+typedef unsigned int	speed_t;
+typedef unsigned int	tcflag_t;
+
+#define NCCS 19
+struct termios {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+	cc_t c_cc[NCCS];		/* control characters */
+};
+
+/* c_cc characters */
+#define VINTR 0
+#define VQUIT 1
+#define VERASE 2
+#define VKILL 3
+#define VEOF 4
+#define VTIME 5
+#define VMIN 6
+#define VSWTC 7
+#define VSTART 8
+#define VSTOP 9
+#define VSUSP 10
+#define VEOL 11
+#define VREPRINT 12
+#define VDISCARD 13
+#define VWERASE 14
+#define VLNEXT 15
+#define VEOL2 16
+
+/* c_iflag bits */
+#define IGNBRK	0000001
+#define BRKINT	0000002
+#define IGNPAR	0000004
+#define PARMRK	0000010
+#define INPCK	0000020
+#define ISTRIP	0000040
+#define INLCR	0000100
+#define IGNCR	0000200
+#define ICRNL	0000400
+#define IUCLC	0001000
+#define IXON	0002000
+#define IXANY	0004000
+#define IXOFF	0010000
+#define IMAXBEL	0020000
+#define IUTF8	0040000
+
+/* c_oflag bits */
+#define OPOST	0000001
+#define OLCUC	0000002
+#define ONLCR	0000004
+#define OCRNL	0000010
+#define ONOCR	0000020
+#define ONLRET	0000040
+#define OFILL	0000100
+#define OFDEL	0000200
+#define NLDLY	0000400
+#define   NL0	0000000
+#define   NL1	0000400
+#define CRDLY	0003000
+#define   CR0	0000000
+#define   CR1	0001000
+#define   CR2	0002000
+#define   CR3	0003000
+#define TABDLY	0014000
+#define   TAB0	0000000
+#define   TAB1	0004000
+#define   TAB2	0010000
+#define   TAB3	0014000
+#define   XTABS	0014000
+#define BSDLY	0020000
+#define   BS0	0000000
+#define   BS1	0020000
+#define VTDLY	0040000
+#define   VT0	0000000
+#define   VT1	0040000
+#define FFDLY	0100000
+#define   FF0	0000000
+#define   FF1	0100000
+
+/* c_cflag bit meaning */
+#define CBAUD	0010017
+#define  B0	0000000		/* hang up */
+#define  B50	0000001
+#define  B75	0000002
+#define  B110	0000003
+#define  B134	0000004
+#define  B150	0000005
+#define  B200	0000006
+#define  B300	0000007
+#define  B600	0000010
+#define  B1200	0000011
+#define  B1800	0000012
+#define  B2400	0000013
+#define  B4800	0000014
+#define  B9600	0000015
+#define  B19200	0000016
+#define  B38400	0000017
+#define EXTA B19200
+#define EXTB B38400
+#define CSIZE	0000060
+#define   CS5	0000000
+#define   CS6	0000020
+#define   CS7	0000040
+#define   CS8	0000060
+#define CSTOPB	0000100
+#define CREAD	0000200
+#define PARENB	0000400
+#define PARODD	0001000
+#define HUPCL	0002000
+#define CLOCAL	0004000
+#define CBAUDEX 0010000
+#define    B57600 0010001
+#define   B115200 0010002
+#define   B230400 0010003
+#define   B460800 0010004
+#define   B500000 0010005
+#define   B576000 0010006
+#define   B921600 0010007
+#define  B1000000 0010010
+#define  B1152000 0010011
+#define  B1500000 0010012
+#define  B2000000 0010013
+#define  B2500000 0010014
+#define  B3000000 0010015
+#define  B3500000 0010016
+#define  B4000000 0010017
+#define CIBAUD	  002003600000	/* input baud rate (not used) */
+#define CMSPAR	  010000000000		/* mark or space (stick) parity */
+#define CRTSCTS	  020000000000		/* flow control */
+
+/* c_lflag bits */
+#define ISIG	0000001
+#define ICANON	0000002
+#define XCASE	0000004
+#define ECHO	0000010
+#define ECHOE	0000020
+#define ECHOK	0000040
+#define ECHONL	0000100
+#define NOFLSH	0000200
+#define TOSTOP	0000400
+#define ECHOCTL	0001000
+#define ECHOPRT	0002000
+#define ECHOKE	0004000
+#define FLUSHO	0010000
+#define PENDIN	0040000
+#define IEXTEN	0100000
+
+/* tcflow() and TCXONC use these */
+#define	TCOOFF		0
+#define	TCOON		1
+#define	TCIOFF		2
+#define	TCION		3
+
+/* tcflush() and TCFLSH use these */
+#define	TCIFLUSH	0
+#define	TCOFLUSH	1
+#define	TCIOFLUSH	2
+
+/* tcsetattr uses these */
+#define	TCSANOW		0
+#define	TCSADRAIN	1
+#define	TCSAFLUSH	2
+
+#endif /* __ASM_AVR32_TERMBITS_H */
diff --git a/include/asm-avr32/termios.h b/include/asm-avr32/termios.h
new file mode 100644
index 000000000000..615bc0639e5c
--- /dev/null
+++ b/include/asm-avr32/termios.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_TERMIOS_H
+#define __ASM_AVR32_TERMIOS_H
+
+#include <asm/termbits.h>
+#include <asm/ioctls.h>
+
+struct winsize {
+	unsigned short ws_row;
+	unsigned short ws_col;
+	unsigned short ws_xpixel;
+	unsigned short ws_ypixel;
+};
+
+#define NCC 8
+struct termio {
+	unsigned short c_iflag;		/* input mode flags */
+	unsigned short c_oflag;		/* output mode flags */
+	unsigned short c_cflag;		/* control mode flags */
+	unsigned short c_lflag;		/* local mode flags */
+	unsigned char c_line;		/* line discipline */
+	unsigned char c_cc[NCC];	/* control characters */
+};
+
+/* modem lines */
+#define TIOCM_LE	0x001
+#define TIOCM_DTR	0x002
+#define TIOCM_RTS	0x004
+#define TIOCM_ST	0x008
+#define TIOCM_SR	0x010
+#define TIOCM_CTS	0x020
+#define TIOCM_CAR	0x040
+#define TIOCM_RNG	0x080
+#define TIOCM_DSR	0x100
+#define TIOCM_CD	TIOCM_CAR
+#define TIOCM_RI	TIOCM_RNG
+#define TIOCM_OUT1	0x2000
+#define TIOCM_OUT2	0x4000
+#define TIOCM_LOOP	0x8000
+
+/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
+
+/* line disciplines */
+#define N_TTY		0
+#define N_SLIP		1
+#define N_MOUSE		2
+#define N_PPP		3
+#define N_STRIP		4
+#define N_AX25		5
+#define N_X25		6	/* X.25 async */
+#define N_6PACK		7
+#define N_MASC		8	/* Reserved for Mobitex module <kaz@cafe.net> */
+#define N_R3964		9	/* Reserved for Simatic R3964 module */
+#define N_PROFIBUS_FDL	10	/* Reserved for Profibus <Dave@mvhi.com> */
+#define N_IRDA		11	/* Linux IR - http://irda.sourceforge.net/ */
+#define N_SMSBLOCK	12	/* SMS block mode - for talking to GSM data cards about SMS messages */
+#define N_HDLC		13	/* synchronous HDLC */
+#define N_SYNC_PPP	14	/* synchronous PPP */
+#define N_HCI		15  /* Bluetooth HCI UART */
+
+#ifdef __KERNEL__
+/*	intr=^C		quit=^\		erase=del	kill=^U
+	eof=^D		vtime=\0	vmin=\1		sxtc=\0
+	start=^Q	stop=^S		susp=^Z		eol=\0
+	reprint=^R	discard=^U	werase=^W	lnext=^V
+	eol2=\0
+*/
+#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
+
+#include <asm-generic/termios.h>
+
+#endif	/* __KERNEL__ */
+
+#endif	/* __ASM_AVR32_TERMIOS_H */
diff --git a/include/asm-avr32/thread_info.h b/include/asm-avr32/thread_info.h
new file mode 100644
index 000000000000..d1f5b35ebd54
--- /dev/null
+++ b/include/asm-avr32/thread_info.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_THREAD_INFO_H
+#define __ASM_AVR32_THREAD_INFO_H
+
+#include <asm/page.h>
+
+#define THREAD_SIZE_ORDER	1
+#define THREAD_SIZE		(PAGE_SIZE << THREAD_SIZE_ORDER)
+
+#ifndef __ASSEMBLY__
+#include <asm/types.h>
+
+struct task_struct;
+struct exec_domain;
+
+struct thread_info {
+	struct task_struct	*task;		/* main task structure */
+	struct exec_domain	*exec_domain;	/* execution domain */
+	unsigned long		flags;		/* low level flags */
+	__u32			cpu;
+	__s32			preempt_count;	/* 0 => preemptable, <0 => BUG */
+	struct restart_block	restart_block;
+	__u8			supervisor_stack[0];
+};
+
+#define INIT_THREAD_INFO(tsk)						\
+{									\
+	.task		= &tsk,						\
+	.exec_domain	= &default_exec_domain,				\
+	.flags		= 0,						\
+	.cpu		= 0,						\
+	.preempt_count	= 1,						\
+	.restart_block	= {						\
+		.fn	= do_no_restart_syscall				\
+	}								\
+}
+
+#define init_thread_info	(init_thread_union.thread_info)
+#define init_stack		(init_thread_union.stack)
+
+/*
+ * Get the thread information struct from C.
+ * We do the usual trick and use the lower end of the stack for this
+ */
+static inline struct thread_info *current_thread_info(void)
+{
+	unsigned long addr = ~(THREAD_SIZE - 1);
+
+	asm("and %0, sp" : "=r"(addr) : "0"(addr));
+	return (struct thread_info *)addr;
+}
+
+/* thread information allocation */
+#define alloc_thread_info(ti) \
+	((struct thread_info *) __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER))
+#define free_thread_info(ti) free_pages((unsigned long)(ti), 1)
+#define get_thread_info(ti) get_task_struct((ti)->task)
+#define put_thread_info(ti) put_task_struct((ti)->task)
+
+#endif /* !__ASSEMBLY__ */
+
+#define PREEMPT_ACTIVE		0x40000000
+
+/*
+ * Thread information flags
+ * - these are process state flags that various assembly files may need to access
+ * - pending work-to-be-done flags are in LSW
+ * - other flags in MSW
+ */
+#define TIF_SYSCALL_TRACE       0       /* syscall trace active */
+#define TIF_NOTIFY_RESUME       1       /* resumption notification requested */
+#define TIF_SIGPENDING          2       /* signal pending */
+#define TIF_NEED_RESCHED        3       /* rescheduling necessary */
+#define TIF_POLLING_NRFLAG      4       /* true if poll_idle() is polling
+					   TIF_NEED_RESCHED */
+#define TIF_BREAKPOINT		5	/* true if we should break after return */
+#define TIF_SINGLE_STEP		6	/* single step after next break */
+#define TIF_MEMDIE		7
+#define TIF_RESTORE_SIGMASK	8	/* restore signal mask in do_signal */
+#define TIF_USERSPACE		31      /* true if FS sets userspace */
+
+#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
+#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
+#define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
+#define _TIF_BREAKPOINT		(1 << TIF_BREAKPOINT)
+#define _TIF_SINGLE_STEP	(1 << TIF_SINGLE_STEP)
+#define _TIF_MEMDIE		(1 << TIF_MEMDIE)
+#define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
+
+/* XXX: These two masks must never span more than 16 bits! */
+/* work to do on interrupt/exception return */
+#define _TIF_WORK_MASK		0x0000013e
+/* work to do on any return to userspace */
+#define _TIF_ALLWORK_MASK	0x0000013f
+/* work to do on return from debug mode */
+#define _TIF_DBGWORK_MASK	0x0000017e
+
+#endif /* __ASM_AVR32_THREAD_INFO_H */
diff --git a/include/asm-avr32/timex.h b/include/asm-avr32/timex.h
new file mode 100644
index 000000000000..5e44ecb3ce0c
--- /dev/null
+++ b/include/asm-avr32/timex.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_TIMEX_H
+#define __ASM_AVR32_TIMEX_H
+
+/*
+ * This is the frequency of the timer used for Linux's timer interrupt.
+ * The value should be defined as accurate as possible or under certain
+ * circumstances Linux timekeeping might become inaccurate or fail.
+ *
+ * For many system the exact clockrate of the timer isn't known but due to
+ * the way this value is used we can get away with a wrong value as long
+ * as this value is:
+ *
+ *  - a multiple of HZ
+ *  - a divisor of the actual rate
+ *
+ * 500000 is a good such cheat value.
+ *
+ * The obscure number 1193182 is the same as used by the original i8254
+ * time in legacy PC hardware; the chip is never found in AVR32 systems.
+ */
+#define CLOCK_TICK_RATE		500000	/* Underlying HZ */
+
+typedef unsigned long cycles_t;
+
+static inline cycles_t get_cycles (void)
+{
+	return 0;
+}
+
+extern int read_current_timer(unsigned long *timer_value);
+#define ARCH_HAS_READ_CURRENT_TIMER	1
+
+#endif /* __ASM_AVR32_TIMEX_H */
diff --git a/include/asm-avr32/tlb.h b/include/asm-avr32/tlb.h
new file mode 100644
index 000000000000..5c55f9ce7c7d
--- /dev/null
+++ b/include/asm-avr32/tlb.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_TLB_H
+#define __ASM_AVR32_TLB_H
+
+#define tlb_start_vma(tlb, vma) \
+	flush_cache_range(vma, vma->vm_start, vma->vm_end)
+
+#define tlb_end_vma(tlb, vma) \
+	flush_tlb_range(vma, vma->vm_start, vma->vm_end)
+
+#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while(0)
+
+/*
+ * Flush whole TLB for MM
+ */
+#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
+
+#include <asm-generic/tlb.h>
+
+/*
+ * For debugging purposes
+ */
+extern void show_dtlb_entry(unsigned int index);
+extern void dump_dtlb(void);
+
+#endif /* __ASM_AVR32_TLB_H */
diff --git a/include/asm-avr32/tlbflush.h b/include/asm-avr32/tlbflush.h
new file mode 100644
index 000000000000..730e268f81f3
--- /dev/null
+++ b/include/asm-avr32/tlbflush.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_TLBFLUSH_H
+#define __ASM_AVR32_TLBFLUSH_H
+
+#include <asm/mmu.h>
+
+/*
+ * TLB flushing:
+ *
+ *  - flush_tlb() flushes the current mm struct TLBs
+ *  - flush_tlb_all() flushes all processes' TLB entries
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLBs
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(vma, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
+ *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
+ */
+extern void flush_tlb(void);
+extern void flush_tlb_all(void);
+extern void flush_tlb_mm(struct mm_struct *mm);
+extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			    unsigned long end);
+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
+extern void __flush_tlb_page(unsigned long asid, unsigned long page);
+
+static inline void flush_tlb_pgtables(struct mm_struct *mm,
+				      unsigned long start, unsigned long end)
+{
+	/* Nothing to do */
+}
+
+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+
+#endif /* __ASM_AVR32_TLBFLUSH_H */
diff --git a/include/asm-avr32/topology.h b/include/asm-avr32/topology.h
new file mode 100644
index 000000000000..5b766cbb4806
--- /dev/null
+++ b/include/asm-avr32/topology.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_TOPOLOGY_H
+#define __ASM_AVR32_TOPOLOGY_H
+
+#include <asm-generic/topology.h>
+
+#endif /* __ASM_AVR32_TOPOLOGY_H */
diff --git a/include/asm-avr32/traps.h b/include/asm-avr32/traps.h
new file mode 100644
index 000000000000..6a8fb944f414
--- /dev/null
+++ b/include/asm-avr32/traps.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_TRAPS_H
+#define __ASM_AVR32_TRAPS_H
+
+#include <linux/list.h>
+
+struct undef_hook {
+	struct list_head node;
+	u32 insn_mask;
+	u32 insn_val;
+	int (*fn)(struct pt_regs *regs, u32 insn);
+};
+
+void register_undef_hook(struct undef_hook *hook);
+void unregister_undef_hook(struct undef_hook *hook);
+
+#endif /* __ASM_AVR32_TRAPS_H */
diff --git a/include/asm-avr32/types.h b/include/asm-avr32/types.h
new file mode 100644
index 000000000000..3f47db9675af
--- /dev/null
+++ b/include/asm-avr32/types.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_TYPES_H
+#define __ASM_AVR32_TYPES_H
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned short umode_t;
+
+/*
+ * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
+ * header files exported to user space
+ */
+typedef __signed__ char __s8;
+typedef unsigned char __u8;
+
+typedef __signed__ short __s16;
+typedef unsigned short __u16;
+
+typedef __signed__ int __s32;
+typedef unsigned int __u32;
+
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+typedef __signed__ long long __s64;
+typedef unsigned long long __u64;
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * These aren't exported outside the kernel to avoid name space clashes
+ */
+#ifdef __KERNEL__
+
+#define BITS_PER_LONG 32
+
+#ifndef __ASSEMBLY__
+
+typedef signed char s8;
+typedef unsigned char u8;
+
+typedef signed short s16;
+typedef unsigned short u16;
+
+typedef signed int s32;
+typedef unsigned int u32;
+
+typedef signed long long s64;
+typedef unsigned long long u64;
+
+/* Dma addresses are 32-bits wide.  */
+
+typedef u32 dma_addr_t;
+
+#ifdef CONFIG_LBD
+typedef u64 sector_t;
+#define HAVE_SECTOR_T
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+
+
+#endif /* __ASM_AVR32_TYPES_H */
diff --git a/include/asm-avr32/uaccess.h b/include/asm-avr32/uaccess.h
new file mode 100644
index 000000000000..821deb5a9d28
--- /dev/null
+++ b/include/asm-avr32/uaccess.h
@@ -0,0 +1,335 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_UACCESS_H
+#define __ASM_AVR32_UACCESS_H
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+
+#define VERIFY_READ	0
+#define VERIFY_WRITE	1
+
+typedef struct {
+	unsigned int is_user_space;
+} mm_segment_t;
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not.  If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons (Data Segment Register?), these macros are misnamed.
+ */
+#define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
+#define segment_eq(a,b)	((a).is_user_space == (b).is_user_space)
+
+#define USER_ADDR_LIMIT 0x80000000
+
+#define KERNEL_DS	MAKE_MM_SEG(0)
+#define USER_DS		MAKE_MM_SEG(1)
+
+#define get_ds()	(KERNEL_DS)
+
+static inline mm_segment_t get_fs(void)
+{
+	return MAKE_MM_SEG(test_thread_flag(TIF_USERSPACE));
+}
+
+static inline void set_fs(mm_segment_t s)
+{
+	if (s.is_user_space)
+		set_thread_flag(TIF_USERSPACE);
+	else
+		clear_thread_flag(TIF_USERSPACE);
+}
+
+/*
+ * Test whether a block of memory is a valid user space address.
+ * Returns 0 if the range is valid, nonzero otherwise.
+ *
+ * We do the following checks:
+ *   1. Is the access from kernel space?
+ *   2. Does (addr + size) set the carry bit?
+ *   3. Is (addr + size) a negative number (i.e. >= 0x80000000)?
+ *
+ * If yes on the first check, access is granted.
+ * If no on any of the others, access is denied.
+ */
+#define __range_ok(addr, size)						\
+	(test_thread_flag(TIF_USERSPACE)				\
+	 && (((unsigned long)(addr) >= 0x80000000)			\
+	     || ((unsigned long)(size) > 0x80000000)			\
+	     || (((unsigned long)(addr) + (unsigned long)(size)) > 0x80000000)))
+
+#define access_ok(type, addr, size) (likely(__range_ok(addr, size) == 0))
+
+static inline int
+verify_area(int type, const void __user *addr, unsigned long size)
+{
+	return access_ok(type, addr, size) ? 0 : -EFAULT;
+}
+
+/* Generic arbitrary sized copy. Return the number of bytes NOT copied */
+extern __kernel_size_t __copy_user(void *to, const void *from,
+				   __kernel_size_t n);
+
+extern __kernel_size_t copy_to_user(void __user *to, const void *from,
+				    __kernel_size_t n);
+extern __kernel_size_t copy_from_user(void *to, const void __user *from,
+				      __kernel_size_t n);
+
+static inline __kernel_size_t __copy_to_user(void __user *to, const void *from,
+					     __kernel_size_t n)
+{
+	return __copy_user((void __force *)to, from, n);
+}
+static inline __kernel_size_t __copy_from_user(void *to,
+					       const void __user *from,
+					       __kernel_size_t n)
+{
+	return __copy_user(to, (const void __force *)from, n);
+}
+
+#define __copy_to_user_inatomic __copy_to_user
+#define __copy_from_user_inatomic __copy_from_user
+
+/*
+ * put_user: - Write a simple value into user space.
+ * @x:   Value to copy to user space.
+ * @ptr: Destination address, in user space.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * This macro copies a single simple value from kernel space to user
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and @x must be assignable
+ * to the result of dereferencing @ptr.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ */
+#define put_user(x,ptr)	\
+	__put_user_check((x),(ptr),sizeof(*(ptr)))
+
+/*
+ * get_user: - Get a simple variable from user space.
+ * @x:   Variable to store result.
+ * @ptr: Source address, in user space.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * This macro copies a single simple variable from user space to kernel
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and the result of
+ * dereferencing @ptr must be assignable to @x without a cast.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ */
+#define get_user(x,ptr) \
+	__get_user_check((x),(ptr),sizeof(*(ptr)))
+
+/*
+ * __put_user: - Write a simple value into user space, with less checking.
+ * @x:   Value to copy to user space.
+ * @ptr: Destination address, in user space.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * This macro copies a single simple value from kernel space to user
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and @x must be assignable
+ * to the result of dereferencing @ptr.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ */
+#define __put_user(x,ptr) \
+	__put_user_nocheck((x),(ptr),sizeof(*(ptr)))
+
+/*
+ * __get_user: - Get a simple variable from user space, with less checking.
+ * @x:   Variable to store result.
+ * @ptr: Source address, in user space.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * This macro copies a single simple variable from user space to kernel
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and the result of
+ * dereferencing @ptr must be assignable to @x without a cast.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ */
+#define __get_user(x,ptr) \
+	__get_user_nocheck((x),(ptr),sizeof(*(ptr)))
+
+extern int __get_user_bad(void);
+extern int __put_user_bad(void);
+
+#define __get_user_nocheck(x, ptr, size)				\
+({									\
+	typeof(*(ptr)) __gu_val = (typeof(*(ptr)) __force)0;		\
+	int __gu_err = 0;						\
+									\
+	switch (size) {							\
+	case 1: __get_user_asm("ub", __gu_val, ptr, __gu_err); break;	\
+	case 2: __get_user_asm("uh", __gu_val, ptr, __gu_err); break;	\
+	case 4: __get_user_asm("w", __gu_val, ptr, __gu_err); break;	\
+	case 8: __get_user_asm("d", __gu_val, ptr, __gu_err); break;	\
+	default: __gu_err = __get_user_bad(); break;			\
+	}								\
+									\
+	x = __gu_val;							\
+	__gu_err;							\
+})
+
+#define __get_user_check(x, ptr, size)					\
+({									\
+	typeof(*(ptr)) __gu_val = (typeof(*(ptr)) __force)0;		\
+	const typeof(*(ptr)) __user * __gu_addr = (ptr);		\
+	int __gu_err = 0;						\
+									\
+	if (access_ok(VERIFY_READ, __gu_addr, size)) {			\
+		switch (size) {						\
+		case 1:							\
+			__get_user_asm("ub", __gu_val, __gu_addr,	\
+				       __gu_err);			\
+			break;						\
+		case 2:							\
+			__get_user_asm("uh", __gu_val, __gu_addr,	\
+				       __gu_err);			\
+			break;						\
+		case 4:							\
+			__get_user_asm("w", __gu_val, __gu_addr,	\
+				       __gu_err);			\
+			break;						\
+		case 8:							\
+			__get_user_asm("d", __gu_val, __gu_addr,	\
+				       __gu_err);			\
+			break;						\
+		default:						\
+			__gu_err = __get_user_bad();			\
+			break;						\
+		}							\
+	} else {							\
+		__gu_err = -EFAULT;					\
+	}								\
+	x = __gu_val;							\
+	__gu_err;							\
+})
+
+#define __get_user_asm(suffix, __gu_val, ptr, __gu_err)			\
+	asm volatile(							\
+		"1:	ld." suffix "	%1, %3			\n"	\
+		"2:						\n"	\
+		"	.section .fixup, \"ax\"			\n"	\
+		"3:	mov	%0, %4				\n"	\
+		"	rjmp	2b				\n"	\
+		"	.previous				\n"	\
+		"	.section __ex_table, \"a\"		\n"	\
+		"	.long	1b, 3b				\n"	\
+		"	.previous				\n"	\
+		: "=r"(__gu_err), "=r"(__gu_val)			\
+		: "0"(__gu_err), "m"(*(ptr)), "i"(-EFAULT))
+
+#define __put_user_nocheck(x, ptr, size)				\
+({									\
+	typeof(*(ptr)) __pu_val;					\
+	int __pu_err = 0;						\
+									\
+	__pu_val = (x);							\
+	switch (size) {							\
+	case 1: __put_user_asm("b", ptr, __pu_val, __pu_err); break;	\
+	case 2: __put_user_asm("h", ptr, __pu_val, __pu_err); break;	\
+	case 4: __put_user_asm("w", ptr, __pu_val, __pu_err); break;	\
+	case 8: __put_user_asm("d", ptr, __pu_val, __pu_err); break;	\
+	default: __pu_err = __put_user_bad(); break;			\
+	}								\
+	__pu_err;							\
+})
+
+#define __put_user_check(x, ptr, size)					\
+({									\
+	typeof(*(ptr)) __pu_val;					\
+	typeof(*(ptr)) __user *__pu_addr = (ptr);			\
+	int __pu_err = 0;						\
+									\
+	__pu_val = (x);							\
+	if (access_ok(VERIFY_WRITE, __pu_addr, size)) {			\
+		switch (size) {						\
+		case 1:							\
+			__put_user_asm("b", __pu_addr, __pu_val,	\
+				       __pu_err);			\
+			break;						\
+		case 2:							\
+			__put_user_asm("h", __pu_addr, __pu_val,	\
+				       __pu_err);			\
+			break;						\
+		case 4:							\
+			__put_user_asm("w", __pu_addr, __pu_val,	\
+				       __pu_err);			\
+			break;						\
+		case 8:							\
+			__put_user_asm("d", __pu_addr, __pu_val,		\
+				       __pu_err);			\
+			break;						\
+		default:						\
+			__pu_err = __put_user_bad();			\
+			break;						\
+		}							\
+	} else {							\
+		__pu_err = -EFAULT;					\
+	}								\
+	__pu_err;							\
+})
+
+#define __put_user_asm(suffix, ptr, __pu_val, __gu_err)			\
+	asm volatile(							\
+		"1:	st." suffix "	%1, %3			\n"	\
+		"2:						\n"	\
+		"	.section .fixup, \"ax\"			\n"	\
+		"3:	mov	%0, %4				\n"	\
+		"	rjmp	2b				\n"	\
+		"	.previous				\n"	\
+		"	.section __ex_table, \"a\"		\n"	\
+		"	.long	1b, 3b				\n"	\
+		"	.previous				\n"	\
+		: "=r"(__gu_err), "=m"(*(ptr))				\
+		: "0"(__gu_err), "r"(__pu_val), "i"(-EFAULT))
+
+extern __kernel_size_t clear_user(void __user *addr, __kernel_size_t size);
+extern __kernel_size_t __clear_user(void __user *addr, __kernel_size_t size);
+
+extern long strncpy_from_user(char *dst, const char __user *src, long count);
+extern long __strncpy_from_user(char *dst, const char __user *src, long count);
+
+extern long strnlen_user(const char __user *__s, long __n);
+extern long __strnlen_user(const char __user *__s, long __n);
+
+#define strlen_user(s) strnlen_user(s, ~0UL >> 1)
+
+struct exception_table_entry
+{
+	unsigned long insn, fixup;
+};
+
+#endif /* __ASM_AVR32_UACCESS_H */
diff --git a/include/asm-avr32/ucontext.h b/include/asm-avr32/ucontext.h
new file mode 100644
index 000000000000..ac7259c2a799
--- /dev/null
+++ b/include/asm-avr32/ucontext.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_AVR32_UCONTEXT_H
+#define __ASM_AVR32_UCONTEXT_H
+
+struct ucontext {
+	unsigned long		uc_flags;
+	struct ucontext	*	uc_link;
+	stack_t			uc_stack;
+	struct sigcontext	uc_mcontext;
+	sigset_t		uc_sigmask;
+};
+
+#endif /* __ASM_AVR32_UCONTEXT_H */
diff --git a/include/asm-avr32/unaligned.h b/include/asm-avr32/unaligned.h
new file mode 100644
index 000000000000..3042723fcbfd
--- /dev/null
+++ b/include/asm-avr32/unaligned.h
@@ -0,0 +1,25 @@
+#ifndef __ASM_AVR32_UNALIGNED_H
+#define __ASM_AVR32_UNALIGNED_H
+
+/*
+ * AVR32 can handle some unaligned accesses, depending on the
+ * implementation.  The AVR32 AP implementation can handle unaligned
+ * words, but halfwords must be halfword-aligned, and doublewords must
+ * be word-aligned.
+ *
+ * TODO: Make all this CPU-specific and optimize.
+ */
+
+#include <linux/string.h>
+
+/* Use memmove here, so gcc does not insert a __builtin_memcpy. */
+
+#define get_unaligned(ptr) \
+  ({ __typeof__(*(ptr)) __tmp; memmove(&__tmp, (ptr), sizeof(*(ptr))); __tmp; })
+
+#define put_unaligned(val, ptr)				\
+  ({ __typeof__(*(ptr)) __tmp = (val);			\
+     memmove((ptr), &__tmp, sizeof(*(ptr)));		\
+     (void)0; })
+
+#endif /* __ASM_AVR32_UNALIGNED_H */
diff --git a/include/asm-avr32/unistd.h b/include/asm-avr32/unistd.h
new file mode 100644
index 000000000000..1f528f92690d
--- /dev/null
+++ b/include/asm-avr32/unistd.h
@@ -0,0 +1,387 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_UNISTD_H
+#define __ASM_AVR32_UNISTD_H
+
+/*
+ * This file contains the system call numbers.
+ */
+
+#define __NR_restart_syscall      0
+#define __NR_exit		  1
+#define __NR_fork		  2
+#define __NR_read		  3
+#define __NR_write		  4
+#define __NR_open		  5
+#define __NR_close		  6
+#define __NR_umask		  7
+#define __NR_creat		  8
+#define __NR_link		  9
+#define __NR_unlink		 10
+#define __NR_execve		 11
+#define __NR_chdir		 12
+#define __NR_time		 13
+#define __NR_mknod		 14
+#define __NR_chmod		 15
+#define __NR_chown		 16
+#define __NR_lchown		 17
+#define __NR_lseek		 18
+#define __NR__llseek		 19
+#define __NR_getpid		 20
+#define __NR_mount		 21
+#define __NR_umount2		 22
+#define __NR_setuid		 23
+#define __NR_getuid		 24
+#define __NR_stime		 25
+#define __NR_ptrace		 26
+#define __NR_alarm		 27
+#define __NR_pause		 28
+#define __NR_utime		 29
+#define __NR_stat		 30
+#define __NR_fstat		 31
+#define __NR_lstat		 32
+#define __NR_access		 33
+#define __NR_chroot		 34
+#define __NR_sync		 35
+#define __NR_fsync		 36
+#define __NR_kill		 37
+#define __NR_rename		 38
+#define __NR_mkdir		 39
+#define __NR_rmdir		 40
+#define __NR_dup		 41
+#define __NR_pipe		 42
+#define __NR_times		 43
+#define __NR_clone		 44
+#define __NR_brk		 45
+#define __NR_setgid		 46
+#define __NR_getgid		 47
+#define __NR_getcwd		 48
+#define __NR_geteuid		 49
+#define __NR_getegid		 50
+#define __NR_acct		 51
+#define __NR_setfsuid		 52
+#define __NR_setfsgid		 53
+#define __NR_ioctl		 54
+#define __NR_fcntl		 55
+#define __NR_setpgid		 56
+#define __NR_mremap		 57
+#define __NR_setresuid		 58
+#define __NR_getresuid		 59
+#define __NR_setreuid		 60
+#define __NR_setregid		 61
+#define __NR_ustat		 62
+#define __NR_dup2		 63
+#define __NR_getppid		 64
+#define __NR_getpgrp		 65
+#define __NR_setsid		 66
+#define __NR_rt_sigaction	 67
+#define __NR_rt_sigreturn	 68
+#define __NR_rt_sigprocmask	 69
+#define __NR_rt_sigpending	 70
+#define __NR_rt_sigtimedwait	 71
+#define __NR_rt_sigqueueinfo	 72
+#define __NR_rt_sigsuspend	 73
+#define __NR_sethostname	 74
+#define __NR_setrlimit		 75
+#define __NR_getrlimit		 76	/* SuS compliant getrlimit */
+#define __NR_getrusage		 77
+#define __NR_gettimeofday	 78
+#define __NR_settimeofday	 79
+#define __NR_getgroups		 80
+#define __NR_setgroups		 81
+#define __NR_select		 82
+#define __NR_symlink		 83
+#define __NR_fchdir		 84
+#define __NR_readlink		 85
+#define __NR_pread		 86
+#define __NR_pwrite		 87
+#define __NR_swapon		 88
+#define __NR_reboot		 89
+#define __NR_mmap2		 90
+#define __NR_munmap		 91
+#define __NR_truncate		 92
+#define __NR_ftruncate		 93
+#define __NR_fchmod		 94
+#define __NR_fchown		 95
+#define __NR_getpriority	 96
+#define __NR_setpriority	 97
+#define __NR_wait4		 98
+#define __NR_statfs		 99
+#define __NR_fstatfs		100
+#define __NR_vhangup		101
+#define __NR_sigaltstack	102
+#define __NR_syslog		103
+#define __NR_setitimer		104
+#define __NR_getitimer		105
+#define __NR_swapoff		106
+#define __NR_sysinfo		107
+#define __NR_ipc		108
+#define __NR_sendfile		109
+#define __NR_setdomainname	110
+#define __NR_uname		111
+#define __NR_adjtimex		112
+#define __NR_mprotect		113
+#define __NR_vfork		114
+#define __NR_init_module	115
+#define __NR_delete_module	116
+#define __NR_quotactl		117
+#define __NR_getpgid		118
+#define __NR_bdflush		119
+#define __NR_sysfs		120
+#define __NR_personality	121
+#define __NR_afs_syscall	122 /* Syscall for Andrew File System */
+#define __NR_getdents		123
+#define __NR_flock		124
+#define __NR_msync		125
+#define __NR_readv		126
+#define __NR_writev		127
+#define __NR_getsid		128
+#define __NR_fdatasync		129
+#define __NR__sysctl		130
+#define __NR_mlock		131
+#define __NR_munlock		132
+#define __NR_mlockall		133
+#define __NR_munlockall		134
+#define __NR_sched_setparam		135
+#define __NR_sched_getparam		136
+#define __NR_sched_setscheduler		137
+#define __NR_sched_getscheduler		138
+#define __NR_sched_yield		139
+#define __NR_sched_get_priority_max	140
+#define __NR_sched_get_priority_min	141
+#define __NR_sched_rr_get_interval	142
+#define __NR_nanosleep		143
+#define __NR_poll		144
+#define __NR_nfsservctl		145
+#define __NR_setresgid		146
+#define __NR_getresgid		147
+#define __NR_prctl              148
+#define __NR_socket		149
+#define __NR_bind		150
+#define __NR_connect		151
+#define __NR_listen		152
+#define __NR_accept		153
+#define __NR_getsockname	154
+#define __NR_getpeername	155
+#define __NR_socketpair		156
+#define __NR_send		157
+#define __NR_recv		158
+#define __NR_sendto		159
+#define __NR_recvfrom		160
+#define __NR_shutdown		161
+#define __NR_setsockopt		162
+#define __NR_getsockopt		163
+#define __NR_sendmsg		164
+#define __NR_recvmsg		165
+#define __NR_truncate64		166
+#define __NR_ftruncate64	167
+#define __NR_stat64		168
+#define __NR_lstat64		169
+#define __NR_fstat64		170
+#define __NR_pivot_root		171
+#define __NR_mincore		172
+#define __NR_madvise		173
+#define __NR_getdents64		174
+#define __NR_fcntl64		175
+#define __NR_gettid		176
+#define __NR_readahead		177
+#define __NR_setxattr		178
+#define __NR_lsetxattr		179
+#define __NR_fsetxattr		180
+#define __NR_getxattr		181
+#define __NR_lgetxattr		182
+#define __NR_fgetxattr		183
+#define __NR_listxattr		184
+#define __NR_llistxattr		185
+#define __NR_flistxattr		186
+#define __NR_removexattr	187
+#define __NR_lremovexattr	188
+#define __NR_fremovexattr	189
+#define __NR_tkill		190
+#define __NR_sendfile64		191
+#define __NR_futex		192
+#define __NR_sched_setaffinity	193
+#define __NR_sched_getaffinity	194
+#define __NR_capget		195
+#define __NR_capset		196
+#define __NR_io_setup		197
+#define __NR_io_destroy		198
+#define __NR_io_getevents	199
+#define __NR_io_submit		200
+#define __NR_io_cancel		201
+#define __NR_fadvise64		202
+#define __NR_exit_group		203
+#define __NR_lookup_dcookie	204
+#define __NR_epoll_create	205
+#define __NR_epoll_ctl		206
+#define __NR_epoll_wait		207
+#define __NR_remap_file_pages	208
+#define __NR_set_tid_address	209
+
+#define __NR_timer_create	210
+#define __NR_timer_settime	211
+#define __NR_timer_gettime	212
+#define __NR_timer_getoverrun	213
+#define __NR_timer_delete	214
+#define __NR_clock_settime	215
+#define __NR_clock_gettime	216
+#define __NR_clock_getres	217
+#define __NR_clock_nanosleep	218
+#define __NR_statfs64		219
+#define __NR_fstatfs64		220
+#define __NR_tgkill		221
+				/* 222 reserved for tux */
+#define __NR_utimes		223
+#define __NR_fadvise64_64	224
+
+#define __NR_cacheflush		225
+
+#define __NR_vserver		226
+#define __NR_mq_open		227
+#define __NR_mq_unlink		228
+#define __NR_mq_timedsend	229
+#define __NR_mq_timedreceive	230
+#define __NR_mq_notify		231
+#define __NR_mq_getsetattr	232
+#define __NR_kexec_load		233
+#define __NR_waitid		234
+#define __NR_add_key		235
+#define __NR_request_key	236
+#define __NR_keyctl		237
+#define __NR_ioprio_set		238
+#define __NR_ioprio_get		239
+#define __NR_inotify_init	240
+#define __NR_inotify_add_watch	241
+#define __NR_inotify_rm_watch	242
+#define __NR_openat		243
+#define __NR_mkdirat		244
+#define __NR_mknodat		245
+#define __NR_fchownat		246
+#define __NR_futimesat		247
+#define __NR_fstatat64		248
+#define __NR_unlinkat		249
+#define __NR_renameat		250
+#define __NR_linkat		251
+#define __NR_symlinkat		252
+#define __NR_readlinkat		253
+#define __NR_fchmodat		254
+#define __NR_faccessat		255
+#define __NR_pselect6		256
+#define __NR_ppoll		257
+#define __NR_unshare		258
+#define __NR_set_robust_list	259
+#define __NR_get_robust_list	260
+#define __NR_splice		261
+#define __NR_sync_file_range	262
+#define __NR_tee		263
+#define __NR_vmsplice		264
+
+#define NR_syscalls		265
+
+
+/*
+ * AVR32 calling convention for system calls:
+ *   - System call number in r8
+ *   - Parameters in r12 and downwards to r9 as well as r6 and r5.
+ *   - Return value in r12
+ */
+
+/*
+ * user-visible error numbers are in the range -1 - -124: see
+ * <asm-generic/errno.h>
+ */
+
+#define __syscall_return(type, res) do {				\
+		if ((unsigned long)(res) >= (unsigned long)(-125)) {	\
+			errno = -(res);					\
+			res = -1;					\
+		}							\
+		return (type) (res);					\
+	} while (0)
+
+#ifdef __KERNEL__
+#define __ARCH_WANT_IPC_PARSE_VERSION
+#define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SYS_ALARM
+#define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_PAUSE
+#define __ARCH_WANT_SYS_TIME
+#define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_WAITPID
+#define __ARCH_WANT_SYS_FADVISE64
+#define __ARCH_WANT_SYS_GETPGRP
+#define __ARCH_WANT_SYS_LLSEEK
+#define __ARCH_WANT_SYS_GETPGRP
+#define __ARCH_WANT_SYS_RT_SIGACTION
+#define __ARCH_WANT_SYS_RT_SIGSUSPEND
+#endif
+
+#if defined(__KERNEL_SYSCALLS__) || defined(__CHECKER__)
+
+#include <linux/types.h>
+#include <linux/linkage.h>
+#include <asm/signal.h>
+
+struct pt_regs;
+
+/*
+ * we need this inline - forking from kernel space will result
+ * in NO COPY ON WRITE (!!!), until an execve is executed. This
+ * is no problem, but for the stack. This is handled by not letting
+ * main() use the stack at all after fork(). Thus, no function
+ * calls - which means inline code for fork too, as otherwise we
+ * would use the stack upon exit from 'fork()'.
+ *
+ * Actually only pause and fork are needed inline, so that there
+ * won't be any messing with the stack from main(), but we define
+ * some others too.
+ */
+static inline int execve(const char *file, char **argv, char **envp)
+{
+	register long scno asm("r8") = __NR_execve;
+	register long sc1 asm("r12") = (long)file;
+	register long sc2 asm("r11") = (long)argv;
+	register long sc3 asm("r10") = (long)envp;
+	int res;
+
+	asm volatile("scall"
+		     : "=r"(sc1)
+		     : "r"(scno), "0"(sc1), "r"(sc2), "r"(sc3)
+		     : "lr", "memory");
+	res = sc1;
+	__syscall_return(int, res);
+}
+
+asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize);
+asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
+			       struct pt_regs *regs);
+asmlinkage int sys_rt_sigreturn(struct pt_regs *regs);
+asmlinkage int sys_pipe(unsigned long __user *filedes);
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+			  unsigned long prot, unsigned long flags,
+			  unsigned long fd, off_t offset);
+asmlinkage int sys_cacheflush(int operation, void __user *addr, size_t len);
+asmlinkage int sys_fork(struct pt_regs *regs);
+asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
+			 unsigned long parent_tidptr,
+			 unsigned long child_tidptr, struct pt_regs *regs);
+asmlinkage int sys_vfork(struct pt_regs *regs);
+asmlinkage int sys_execve(char __user *ufilename, char __user *__user *uargv,
+			  char __user *__user *uenvp, struct pt_regs *regs);
+
+#endif
+
+/*
+ * "Conditional" syscalls
+ *
+ * What we want is __attribute__((weak,alias("sys_ni_syscall"))),
+ * but it doesn't work on all toolchains, so we just do it by hand
+ */
+#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall");
+
+#endif /* __ASM_AVR32_UNISTD_H */
diff --git a/include/asm-avr32/user.h b/include/asm-avr32/user.h
new file mode 100644
index 000000000000..060fb3acee49
--- /dev/null
+++ b/include/asm-avr32/user.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Note: We may not need these definitions for AVR32, as we don't
+ * support a.out.
+ */
+#ifndef __ASM_AVR32_USER_H
+#define __ASM_AVR32_USER_H
+
+#include <linux/types.h>
+#include <asm/ptrace.h>
+#include <asm/page.h>
+
+/*
+ * Core file format: The core file is written in such a way that gdb
+ * can understand it and provide useful information to the user (under
+ * linux we use the `trad-core' bfd).  The file contents are as follows:
+ *
+ *  upage: 1 page consisting of a user struct that tells gdb
+ *	what is present in the file.  Directly after this is a
+ *	copy of the task_struct, which is currently not used by gdb,
+ *	but it may come in handy at some point.  All of the registers
+ *	are stored as part of the upage.  The upage should always be
+ *	only one page long.
+ *  data: The data segment follows next.  We use current->end_text to
+ *	current->brk to pick up all of the user variables, plus any memory
+ *	that may have been sbrk'ed.  No attempt is made to determine if a
+ *	page is demand-zero or if a page is totally unused, we just cover
+ *	the entire range.  All of the addresses are rounded in such a way
+ *	that an integral number of pages is written.
+ *  stack: We need the stack information in order to get a meaningful
+ *	backtrace.  We need to write the data from usp to
+ *	current->start_stack, so we round each of these in order to be able
+ *	to write an integer number of pages.
+ */
+
+struct user_fpu_struct {
+	/* We have no FPU (yet) */
+};
+
+struct user {
+	struct pt_regs	regs;			/* entire machine state */
+	size_t		u_tsize;		/* text size (pages) */
+	size_t		u_dsize;		/* data size (pages) */
+	size_t		u_ssize;		/* stack size (pages) */
+	unsigned long	start_code;		/* text starting address */
+	unsigned long	start_data;		/* data starting address */
+	unsigned long	start_stack;		/* stack starting address */
+	long int	signal;			/* signal causing core dump */
+	struct regs *	u_ar0;			/* help gdb find registers */
+	unsigned long	magic;			/* identifies a core file */
+	char		u_comm[32];		/* user command name */
+};
+
+#define NBPG			PAGE_SIZE
+#define UPAGES			1
+#define HOST_TEXT_START_ADDR	(u.start_code)
+#define HOST_DATA_START_ADDR	(u.start_data)
+#define HOST_STACK_END_ADDR	(u.start_stack + u.u_ssize * NBPG)
+
+#endif /* __ASM_AVR32_USER_H */
diff --git a/include/linux/elf-em.h b/include/linux/elf-em.h
index 6a5796c81c90..666e0a5f00fc 100644
--- a/include/linux/elf-em.h
+++ b/include/linux/elf-em.h
@@ -31,6 +31,7 @@
 #define EM_M32R		88	/* Renesas M32R */
 #define EM_H8_300	46	/* Renesas H8/300,300H,H8S */
 #define EM_FRV		0x5441	/* Fujitsu FR-V */
+#define EM_AVR32	0x18ad	/* Atmel AVR32 */
 
 /*
  * This is an interim value that we will use until the committee comes
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 554ee688a9f8..3f21cc79a134 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -277,7 +277,7 @@ config DEBUG_HIGHMEM
 config DEBUG_BUGVERBOSE
 	bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EMBEDDED
 	depends on BUG
-	depends on ARM || ARM26 || M32R || M68K || SPARC32 || SPARC64 || X86_32 || FRV
+	depends on ARM || ARM26 || AVR32 || M32R || M68K || SPARC32 || SPARC64 || X86_32 || FRV
 	default !EMBEDDED
 	help
 	  Say Y here to make BUG() panics output the file name and line number
@@ -315,7 +315,7 @@ config DEBUG_VM
 
 config FRAME_POINTER
 	bool "Compile the kernel with frame pointers"
-	depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390)
+	depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390 || AVR32)
 	default y if DEBUG_INFO && UML
 	help
 	  If you say Y here the resulting kernel image will be slightly larger
-- 
cgit v1.2.3


From 9c9b8b388296ad5a306ab238dc677cfe6ff4cb12 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@xensource.com>
Date: Mon, 25 Sep 2006 23:32:26 -0700
Subject: [PATCH] x86: put .note.* sections into a PT_NOTE segment in vmlinux

This patch will pack any .note.* section into a PT_NOTE segment in the output
file.

To do this, we tell ld that we need a PT_NOTE segment.  This requires us to
start explicitly mapping sections to segments, so we also need to explicitly
create PT_LOAD segments for text and data, and map the sections to them
appropriately.  Fortunately, each section will default to its previous
section's segment, so it doesn't take many changes to vmlinux.lds.S.

This only changes i386 for now, but I presume the corresponding changes for
other architectures will be as simple.

This change also adds <linux/elfnote.h>, which defines C and Assembler macros
for actually creating ELF notes.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/vmlinux.lds.S    | 12 +++++-
 include/asm-generic/vmlinux.lds.h |  3 ++
 include/linux/elfnote.h           | 88 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 101 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/elfnote.h

(limited to 'include/linux')

diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index 2d4f1386e2b1..1e7ac1c44ddc 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -13,6 +13,12 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
 OUTPUT_ARCH(i386)
 ENTRY(phys_startup_32)
 jiffies = jiffies_64;
+
+PHDRS {
+	text PT_LOAD FLAGS(5);	/* R_E */
+	data PT_LOAD FLAGS(7);	/* RWE */
+	note PT_NOTE FLAGS(4);	/* R__ */
+}
 SECTIONS
 {
   . = __KERNEL_START;
@@ -26,7 +32,7 @@ SECTIONS
 	KPROBES_TEXT
 	*(.fixup)
 	*(.gnu.warning)
-	} = 0x9090
+	} :text = 0x9090
 
   _etext = .;			/* End of text section */
 
@@ -48,7 +54,7 @@ SECTIONS
   .data : AT(ADDR(.data) - LOAD_OFFSET) {	/* Data */
 	*(.data)
 	CONSTRUCTORS
-	}
+	} :data
 
   . = ALIGN(4096);
   __nosave_begin = .;
@@ -184,4 +190,6 @@ SECTIONS
   STABS_DEBUG
 
   DWARF_DEBUG
+
+  NOTES
 }
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index db5a3732f106..253ae1328271 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -194,3 +194,6 @@
 		.stab.index 0 : { *(.stab.index) }			\
 		.stab.indexstr 0 : { *(.stab.indexstr) }		\
 		.comment 0 : { *(.comment) }
+
+#define NOTES								\
+		.notes : { *(.note.*) } :note
diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h
new file mode 100644
index 000000000000..16f9f8ebffd9
--- /dev/null
+++ b/include/linux/elfnote.h
@@ -0,0 +1,88 @@
+#ifndef _LINUX_ELFNOTE_H
+#define _LINUX_ELFNOTE_H
+/*
+ * Helper macros to generate ELF Note structures, which are put into a
+ * PT_NOTE segment of the final vmlinux image.  These are useful for
+ * including name-value pairs of metadata into the kernel binary (or
+ * modules?) for use by external programs.
+ *
+ * Each note has three parts: a name, a type and a desc.  The name is
+ * intended to distinguish the note's originator, so it would be a
+ * company, project, subsystem, etc; it must be in a suitable form for
+ * use in a section name.  The type is an integer which is used to tag
+ * the data, and is considered to be within the "name" namespace (so
+ * "FooCo"'s type 42 is distinct from "BarProj"'s type 42).  The
+ * "desc" field is the actual data.  There are no constraints on the
+ * desc field's contents, though typically they're fairly small.
+ *
+ * All notes from a given NAME are put into a section named
+ * .note.NAME.  When the kernel image is finally linked, all the notes
+ * are packed into a single .notes section, which is mapped into the
+ * PT_NOTE segment.  Because notes for a given name are grouped into
+ * the same section, they'll all be adjacent the output file.
+ *
+ * This file defines macros for both C and assembler use.  Their
+ * syntax is slightly different, but they're semantically similar.
+ *
+ * See the ELF specification for more detail about ELF notes.
+ */
+
+#ifdef __ASSEMBLER__
+/*
+ * Generate a structure with the same shape as Elf{32,64}_Nhdr (which
+ * turn out to be the same size and shape), followed by the name and
+ * desc data with appropriate padding.  The 'desc' argument includes
+ * the assembler pseudo op defining the type of the data: .asciz
+ * "hello, world"
+ */
+.macro ELFNOTE name type desc:vararg
+.pushsection ".note.\name"
+  .align 4
+  .long 2f - 1f			/* namesz */
+  .long 4f - 3f			/* descsz */
+  .long \type
+1:.asciz "\name"
+2:.align 4
+3:\desc
+4:.align 4
+.popsection
+.endm
+#else	/* !__ASSEMBLER__ */
+#include <linux/elf.h>
+/*
+ * Use an anonymous structure which matches the shape of
+ * Elf{32,64}_Nhdr, but includes the name and desc data.  The size and
+ * type of name and desc depend on the macro arguments.  "name" must
+ * be a literal string, and "desc" must be passed by value.  You may
+ * only define one note per line, since __LINE__ is used to generate
+ * unique symbols.
+ */
+#define _ELFNOTE_PASTE(a,b)	a##b
+#define _ELFNOTE(size, name, unique, type, desc)			\
+	static const struct {						\
+		struct elf##size##_note _nhdr;				\
+		unsigned char _name[sizeof(name)]			\
+		__attribute__((aligned(sizeof(Elf##size##_Word))));	\
+		typeof(desc) _desc					\
+			     __attribute__((aligned(sizeof(Elf##size##_Word)))); \
+	} _ELFNOTE_PASTE(_note_, unique)				\
+		__attribute_used__					\
+		__attribute__((section(".note." name),			\
+			       aligned(sizeof(Elf##size##_Word)),	\
+			       unused)) = {				\
+		{							\
+			sizeof(name),					\
+			sizeof(desc),					\
+			type,						\
+		},							\
+		name,							\
+		desc							\
+	}
+#define ELFNOTE(size, name, type, desc)		\
+	_ELFNOTE(size, name, __LINE__, type, desc)
+
+#define ELFNOTE32(name, type, desc) ELFNOTE(32, name, type, desc)
+#define ELFNOTE64(name, type, desc) ELFNOTE(64, name, type, desc)
+#endif	/* __ASSEMBLER__ */
+
+#endif /* _LINUX_ELFNOTE_H */
-- 
cgit v1.2.3


From 5091e746848f74c9a2c0579b4ef8d8cd1a6b135d Mon Sep 17 00:00:00 2001
From: Ian Campbell <Ian.Campbell@xensource.com>
Date: Mon, 25 Sep 2006 23:32:28 -0700
Subject: [PATCH] Translate asm version of ELFNOTE macro into preprocessor
 macro

I've come across some problems with the assembly version of the ELFNOTE
macro currently in -mm. (in
x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch)

The first is that older gas does not support :varargs in .macro
definitions (in my testing 2.17 does while 2.15 does not, I don't know
when it became supported). The Changes file says binutils >= 2.12 so I
think we need to avoid using it. There are no other uses in mainline or
-mm. Old gas appears to just ignore it so you get "too many arguments"
type errors.

Secondly it seems that passing strings as arguments to assembler macros
is broken without varargs. It looks like they get unquoted or each
character is treated as a separate argument or something and this causes
all manner of grief. I think this is because of the use of -traditional
when compiling assembly files.

Therefore I have translated the assembler macro into a pre-processor
macro.

I added the desctype as a separate argument instead of including it with
the descdata as the previous version did since -traditional means the
ELFNOTE definition after the #else needs to have the same number of
arguments (I think so anyway, the -traditional CPP semantics are pretty
fscking strange!).

With this patch I am able to define elfnotes in assembly like this with
both old and new assemblers.

	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz, "linux")
	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION,  .asciz, "2.6")
	ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION,    .asciz, "xen-3.0")
	ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      .long,  __PAGE_OFFSET)

Which seems reasonable enough.

Signed-off-by: Ian Campbell <ian.campbell@xensource.com>
Acked-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/elfnote.h | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h
index 16f9f8ebffd9..67396db141e8 100644
--- a/include/linux/elfnote.h
+++ b/include/linux/elfnote.h
@@ -31,22 +31,24 @@
 /*
  * Generate a structure with the same shape as Elf{32,64}_Nhdr (which
  * turn out to be the same size and shape), followed by the name and
- * desc data with appropriate padding.  The 'desc' argument includes
- * the assembler pseudo op defining the type of the data: .asciz
- * "hello, world"
+ * desc data with appropriate padding.  The 'desctype' argument is the
+ * assembler pseudo op defining the type of the data e.g. .asciz while
+ * 'descdata' is the data itself e.g.  "hello, world".
+ *
+ * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two")
+ *      ELFNOTE(XYZCo, 12, .long, 0xdeadbeef)
  */
-.macro ELFNOTE name type desc:vararg
-.pushsection ".note.\name"
-  .align 4
-  .long 2f - 1f			/* namesz */
-  .long 4f - 3f			/* descsz */
-  .long \type
-1:.asciz "\name"
-2:.align 4
-3:\desc
-4:.align 4
-.popsection
-.endm
+#define ELFNOTE(name, type, desctype, descdata)	\
+.pushsection .note.name			;	\
+  .align 4				;	\
+  .long 2f - 1f		/* namesz */	;	\
+  .long 4f - 3f		/* descsz */	;	\
+  .long type				;	\
+1:.asciz "name"				;	\
+2:.align 4				;	\
+3:desctype descdata			;	\
+4:.align 4				;	\
+.popsection				;
 #else	/* !__ASSEMBLER__ */
 #include <linux/elf.h>
 /*
-- 
cgit v1.2.3


From a3bc0dbc81d36fd38991b4373f6de8e1a507605a Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 25 Sep 2006 23:32:33 -0700
Subject: [PATCH] smp_call_function_single() cleanup

If we're going to implement smp_call_function_single() on three architecture
with the same prototype then it should have a declaration in a
non-arch-specific header file.

Move it into <linux/smp.h>.

Cc: Stephane Eranian <eranian@hpl.hp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/smp.c              | 4 ++--
 arch/ia64/kernel/perfmon.c          | 1 +
 arch/ia64/sn/kernel/sn2/sn_hwperf.c | 3 ++-
 arch/x86_64/kernel/smpboot.c        | 3 ++-
 include/asm-ia64/smp.h              | 2 --
 include/asm-x86_64/smp.h            | 2 --
 include/linux/smp.h                 | 3 +++
 7 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index 304243ed7a30..465188e2d701 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -683,8 +683,8 @@ __smp_call_function_single(int cpu, void (*func) (void *info), void *info,
  * or is or has executed.
  */
 
-int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
-	int nonatomic, int wait)
+int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
+			int nonatomic, int wait)
 {
 	/* prevent preemption and reschedule on another processor */
 	int me = get_cpu();
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 84a7e52f56f6..7bb7696e4ce2 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -34,6 +34,7 @@
 #include <linux/file.h>
 #include <linux/poll.h>
 #include <linux/vfs.h>
+#include <linux/smp.h>
 #include <linux/pagemap.h>
 #include <linux/mount.h>
 #include <linux/bitops.h>
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index 9a8a29339d2d..b632b9c1e3b3 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -32,9 +32,10 @@
 #include <linux/cpumask.h>
 #include <linux/smp_lock.h>
 #include <linux/nodemask.h>
+#include <linux/smp.h>
+
 #include <asm/processor.h>
 #include <asm/topology.h>
-#include <asm/smp.h>
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
 #include <asm/sal.h>
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 975380207b46..3ae9ffddddc0 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -46,9 +46,10 @@
 #include <linux/bootmem.h>
 #include <linux/thread_info.h>
 #include <linux/module.h>
-
 #include <linux/delay.h>
 #include <linux/mc146818rtc.h>
+#include <linux/smp.h>
+
 #include <asm/mtrr.h>
 #include <asm/pgalloc.h>
 #include <asm/desc.h>
diff --git a/include/asm-ia64/smp.h b/include/asm-ia64/smp.h
index 719ff309ce09..74bde1c2bb1a 100644
--- a/include/asm-ia64/smp.h
+++ b/include/asm-ia64/smp.h
@@ -122,8 +122,6 @@ extern void __init smp_build_cpu_map(void);
 extern void __init init_smp_config (void);
 extern void smp_do_timer (struct pt_regs *regs);
 
-extern int smp_call_function_single (int cpuid, void (*func) (void *info), void *info,
-				     int retry, int wait);
 extern void smp_send_reschedule (int cpu);
 extern void lock_ipi_calllock(void);
 extern void unlock_ipi_calllock(void);
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h
index 6805e1feb300..ce97f65e1d10 100644
--- a/include/asm-x86_64/smp.h
+++ b/include/asm-x86_64/smp.h
@@ -48,8 +48,6 @@ extern void unlock_ipi_call_lock(void);
 extern int smp_num_siblings;
 extern void smp_send_reschedule(int cpu);
 void smp_stop_cpu(void);
-extern int smp_call_function_single(int cpuid, void (*func) (void *info),
-				void *info, int retry, int wait);
 
 extern cpumask_t cpu_sibling_map[NR_CPUS];
 extern cpumask_t cpu_core_map[NR_CPUS];
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 837e8bce1349..51649987f691 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -53,6 +53,9 @@ extern void smp_cpus_done(unsigned int max_cpus);
  */
 int smp_call_function(void(*func)(void *info), void *info, int retry, int wait);
 
+int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
+				int retry, int wait);
+
 /*
  * Call a function on all processors
  */
-- 
cgit v1.2.3


From 930631edd4b1fe2781d9fe90edbe35d89dfc94cc Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 25 Sep 2006 23:32:40 -0700
Subject: [PATCH] add DIV_ROUND_UP()

Add the DIV_ROUND_UP() helper macro: divide `n' by `d', rounding up.

Stolen from the gfs2 tree(!) because the swsusp patches need it.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kernel.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2b2ae4fdce8b..e44a37e2c71c 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -33,6 +33,7 @@ extern const char linux_banner[];
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 #define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL))
 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
 #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
 
 #define	KERN_EMERG	"<0>"	/* system is unusable			*/
-- 
cgit v1.2.3


From ab954160350c91c77ae03740ef90458c3ad5412c Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 25 Sep 2006 23:32:42 -0700
Subject: [PATCH] swsusp: write speedup

Switch the swsusp writeout code from 4k-at-a-time to 4MB-at-a-time.

Crufty old PIII testbox:
	12.9 MB/s -> 20.9 MB/s

Sony Vaio:
	14.7 MB/s -> 26.5 MB/s

The implementation is crude.  A better one would use larger BIOs, but wouldn't
gain any performance.

The memcpys will be mostly pipelined with the IO and basically come for free.

The ENOMEM path has not been tested.  It should be.

Cc: Pavel Machek <pavel@ucw.cz>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/swap.h |  5 ++-
 kernel/power/swap.c  | 93 ++++++++++++++++++++++++++++++++++++++++++----------
 mm/page_io.c         | 25 ++++++++++----
 3 files changed, 98 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index a2f5ad7c2d2e..3d434cbffe26 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -12,6 +12,8 @@
 
 struct notifier_block;
 
+struct bio;
+
 #define SWAP_FLAG_PREFER	0x8000	/* set if swap priority specified */
 #define SWAP_FLAG_PRIO_MASK	0x7fff
 #define SWAP_FLAG_PRIO_SHIFT	0
@@ -216,7 +218,8 @@ extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *);
 /* linux/mm/page_io.c */
 extern int swap_readpage(struct file *, struct page *);
 extern int swap_writepage(struct page *page, struct writeback_control *wbc);
-extern int rw_swap_page_sync(int, swp_entry_t, struct page *);
+extern int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page,
+				struct bio **bio_chain);
 
 /* linux/mm/swap_state.c */
 extern struct address_space swapper_space;
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 79b66e734bdb..2dc883d361d5 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -49,18 +49,16 @@ static int mark_swapfiles(swp_entry_t start)
 {
 	int error;
 
-	rw_swap_page_sync(READ,
-			  swp_entry(root_swap, 0),
-			  virt_to_page((unsigned long)&swsusp_header));
+	rw_swap_page_sync(READ, swp_entry(root_swap, 0),
+			  virt_to_page((unsigned long)&swsusp_header), NULL);
 	if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) ||
 	    !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
 		memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
 		memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
 		swsusp_header.image = start;
-		error = rw_swap_page_sync(WRITE,
-					  swp_entry(root_swap, 0),
-					  virt_to_page((unsigned long)
-						       &swsusp_header));
+		error = rw_swap_page_sync(WRITE, swp_entry(root_swap, 0),
+				virt_to_page((unsigned long)&swsusp_header),
+				NULL);
 	} else {
 		pr_debug("swsusp: Partition is not swap space.\n");
 		error = -ENODEV;
@@ -88,16 +86,37 @@ static int swsusp_swap_check(void) /* This is called before saving image */
  *	write_page - Write one page to given swap location.
  *	@buf:		Address we're writing.
  *	@offset:	Offset of the swap page we're writing to.
+ *	@bio_chain:	Link the next write BIO here
  */
 
-static int write_page(void *buf, unsigned long offset)
+static int write_page(void *buf, unsigned long offset, struct bio **bio_chain)
 {
 	swp_entry_t entry;
 	int error = -ENOSPC;
 
 	if (offset) {
+		struct page *page = virt_to_page(buf);
+
+		if (bio_chain) {
+			/*
+			 * Whether or not we successfully allocated a copy page,
+			 * we take a ref on the page here.  It gets undone in
+			 * wait_on_bio_chain().
+			 */
+			struct page *page_copy;
+			page_copy = alloc_page(GFP_ATOMIC);
+			if (page_copy == NULL) {
+				WARN_ON_ONCE(1);
+				bio_chain = NULL;	/* Go synchronous */
+				get_page(page);
+			} else {
+				memcpy(page_address(page_copy),
+					page_address(page), PAGE_SIZE);
+				page = page_copy;
+			}
+		}
 		entry = swp_entry(root_swap, offset);
-		error = rw_swap_page_sync(WRITE, entry, virt_to_page(buf));
+		error = rw_swap_page_sync(WRITE, entry, page, bio_chain);
 	}
 	return error;
 }
@@ -185,37 +204,68 @@ static int get_swap_writer(struct swap_map_handle *handle)
 	return 0;
 }
 
-static int swap_write_page(struct swap_map_handle *handle, void *buf)
+static int wait_on_bio_chain(struct bio **bio_chain)
 {
-	int error;
+	struct bio *bio;
+	struct bio *next_bio;
+	int ret = 0;
+
+	if (bio_chain == NULL)
+		return 0;
+
+	bio = *bio_chain;
+	while (bio) {
+		struct page *page;
+
+		next_bio = bio->bi_private;
+		page = bio->bi_io_vec[0].bv_page;
+		wait_on_page_locked(page);
+		if (!PageUptodate(page) || PageError(page))
+			ret = -EIO;
+		put_page(page);
+		bio_put(bio);
+		bio = next_bio;
+	}
+	*bio_chain = NULL;
+	return ret;
+}
+
+static int swap_write_page(struct swap_map_handle *handle, void *buf,
+				struct bio **bio_chain)
+{
+	int error = 0;
 	unsigned long offset;
 
 	if (!handle->cur)
 		return -EINVAL;
 	offset = alloc_swap_page(root_swap, handle->bitmap);
-	error = write_page(buf, offset);
+	error = write_page(buf, offset, bio_chain);
 	if (error)
 		return error;
 	handle->cur->entries[handle->k++] = offset;
 	if (handle->k >= MAP_PAGE_ENTRIES) {
+		error = wait_on_bio_chain(bio_chain);
+		if (error)
+			goto out;
 		offset = alloc_swap_page(root_swap, handle->bitmap);
 		if (!offset)
 			return -ENOSPC;
 		handle->cur->next_swap = offset;
-		error = write_page(handle->cur, handle->cur_swap);
+		error = write_page(handle->cur, handle->cur_swap, NULL);
 		if (error)
-			return error;
+			goto out;
 		memset(handle->cur, 0, PAGE_SIZE);
 		handle->cur_swap = offset;
 		handle->k = 0;
 	}
-	return 0;
+out:
+	return error;
 }
 
 static int flush_swap_writer(struct swap_map_handle *handle)
 {
 	if (handle->cur && handle->cur_swap)
-		return write_page(handle->cur, handle->cur_swap);
+		return write_page(handle->cur, handle->cur_swap, NULL);
 	else
 		return -EINVAL;
 }
@@ -232,6 +282,8 @@ static int save_image(struct swap_map_handle *handle,
 	int ret;
 	int error = 0;
 	int nr_pages;
+	int err2;
+	struct bio *bio;
 	struct timeval start;
 	struct timeval stop;
 
@@ -240,11 +292,13 @@ static int save_image(struct swap_map_handle *handle,
 	if (!m)
 		m = 1;
 	nr_pages = 0;
+	bio = NULL;
 	do_gettimeofday(&start);
 	do {
 		ret = snapshot_read_next(snapshot, PAGE_SIZE);
 		if (ret > 0) {
-			error = swap_write_page(handle, data_of(*snapshot));
+			error = swap_write_page(handle, data_of(*snapshot),
+						&bio);
 			if (error)
 				break;
 			if (!(nr_pages % m))
@@ -252,7 +306,10 @@ static int save_image(struct swap_map_handle *handle,
 			nr_pages++;
 		}
 	} while (ret > 0);
+	err2 = wait_on_bio_chain(&bio);
 	do_gettimeofday(&stop);
+	if (!error)
+		error = err2;
 	if (!error)
 		printk("\b\b\b\bdone\n");
 	show_speed(&start, &stop, nr_to_write, "Wrote");
@@ -307,7 +364,7 @@ int swsusp_write(void)
 	error = get_swap_writer(&handle);
 	if (!error) {
 		unsigned long start = handle.cur_swap;
-		error = swap_write_page(&handle, header);
+		error = swap_write_page(&handle, header, NULL);
 		if (!error)
 			error = save_image(&handle, &snapshot,
 					header->pages - 1);
diff --git a/mm/page_io.c b/mm/page_io.c
index d2f0a5783370..f46a9862b7ef 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -156,10 +156,12 @@ out:
  * We use end_swap_bio_read() even for writes, because it happens to do what
  * we want.
  */
-int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page)
+int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page,
+			struct bio **bio_chain)
 {
 	struct bio *bio;
 	int ret = 0;
+	int bio_rw;
 
 	lock_page(page);
 
@@ -170,11 +172,22 @@ int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page)
 		goto out;
 	}
 
-	submit_bio(rw | (1 << BIO_RW_SYNC), bio);
-	wait_on_page_locked(page);
-
-	if (!PageUptodate(page) || PageError(page))
-		ret = -EIO;
+	bio_rw = rw;
+	if (!bio_chain)
+		bio_rw |= (1 << BIO_RW_SYNC);
+	if (bio_chain)
+		bio_get(bio);
+	submit_bio(bio_rw, bio);
+	if (bio_chain == NULL) {
+		wait_on_page_locked(page);
+
+		if (!PageUptodate(page) || PageError(page))
+			ret = -EIO;
+	}
+	if (bio_chain) {
+		bio->bi_private = *bio_chain;
+		*bio_chain = bio;
+	}
 out:
 	return ret;
 }
-- 
cgit v1.2.3


From 546e0d271941dd1ff6961e2a1f7eac75f1fc277e Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 25 Sep 2006 23:32:44 -0700
Subject: [PATCH] swsusp: read speedup

Implement async reads for swsusp resuming.

Crufty old PIII testbox:
	15.7 MB/s -> 20.3 MB/s

Sony Vaio:
	14.6 MB/s -> 33.3 MB/s

I didn't implement the post-resume bio_set_pages_dirty().  I don't really
understand why resume needs to run set_page_dirty() against these pages.

It might be a worry that this code modifies PG_Uptodate, PG_Error and
PG_Locked against the image pages.  Can this possibly affect the resumed-into
kernel?  Hopefully not, if we're atomically restoring its mem_map?

Cc: Pavel Machek <pavel@ucw.cz>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Jens Axboe <axboe@suse.de>
Cc: Laurent Riffard <laurent.riffard@free.fr>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/swap.h    |   1 +
 kernel/power/power.h    |   1 +
 kernel/power/snapshot.c |  11 ++--
 kernel/power/swap.c     | 138 ++++++++++++++++++++++++------------------------
 mm/page_io.c            |   2 +-
 5 files changed, 81 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3d434cbffe26..e7c36ba2a2db 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -220,6 +220,7 @@ extern int swap_readpage(struct file *, struct page *);
 extern int swap_writepage(struct page *page, struct writeback_control *wbc);
 extern int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page,
 				struct bio **bio_chain);
+extern int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err);
 
 /* linux/mm/swap_state.c */
 extern struct address_space swapper_space;
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 57a792982fb9..59ce712f082d 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -58,6 +58,7 @@ struct snapshot_handle {
 	struct pbe	*pbe, *last_pbe;
 	void		*buffer;
 	unsigned int	buf_offset;
+	int		sync_read;
 };
 
 #define data_of(handle)	((handle).buffer + (handle).buf_offset)
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 75d4886e648e..591301ae8b7d 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -314,7 +314,7 @@ static unsigned int unsafe_pages;
  *	and we count them using unsafe_pages
  */
 
-static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
+static void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
 {
 	void *res;
 
@@ -828,13 +828,16 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
 	}
 	if (!handle->offset)
 		handle->buffer = buffer;
+	handle->sync_read = 1;
 	if (handle->prev < handle->page) {
 		if (!handle->prev) {
-			error = load_header(handle, (struct swsusp_info *)buffer);
+			error = load_header(handle,
+					(struct swsusp_info *)buffer);
 			if (error)
 				return error;
 		} else if (handle->prev <= nr_meta_pages) {
-			handle->pbe = unpack_orig_addresses(buffer, handle->pbe);
+			handle->pbe = unpack_orig_addresses(buffer,
+							handle->pbe);
 			if (!handle->pbe) {
 				error = prepare_image(handle);
 				if (error)
@@ -842,10 +845,12 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
 				handle->pbe = pagedir_nosave;
 				handle->last_pbe = NULL;
 				handle->buffer = get_buffer(handle);
+				handle->sync_read = 0;
 			}
 		} else {
 			handle->pbe = handle->pbe->next;
 			handle->buffer = get_buffer(handle);
+			handle->sync_read = 0;
 		}
 		handle->prev = handle->page;
 	}
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 9ab989572164..8309d20b2563 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -22,6 +22,7 @@
 #include <linux/device.h>
 #include <linux/buffer_head.h>
 #include <linux/bio.h>
+#include <linux/blkdev.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
 #include <linux/pm.h>
@@ -214,6 +215,8 @@ static int wait_on_bio_chain(struct bio **bio_chain)
 		return 0;
 
 	bio = *bio_chain;
+	if (bio == NULL)
+		return 0;
 	while (bio) {
 		struct page *page;
 
@@ -349,7 +352,8 @@ int swsusp_write(void)
 	int error;
 
 	if ((error = swsusp_swap_check())) {
-		printk(KERN_ERR "swsusp: Cannot find swap device, try swapon -a.\n");
+		printk(KERN_ERR "swsusp: Cannot find swap device, try "
+				"swapon -a.\n");
 		return error;
 	}
 	memset(&snapshot, 0, sizeof(struct snapshot_handle));
@@ -381,27 +385,6 @@ int swsusp_write(void)
 	return error;
 }
 
-/*
- *	Using bio to read from swap.
- *	This code requires a bit more work than just using buffer heads
- *	but, it is the recommended way for 2.5/2.6.
- *	The following are to signal the beginning and end of I/O. Bios
- *	finish asynchronously, while we want them to happen synchronously.
- *	A simple atomic_t, and a wait loop take care of this problem.
- */
-
-static atomic_t io_done = ATOMIC_INIT(0);
-
-static int end_io(struct bio *bio, unsigned int num, int err)
-{
-	if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
-		printk(KERN_ERR "I/O error reading swsusp image.\n");
-		return -EIO;
-	}
-	atomic_set(&io_done, 0);
-	return 0;
-}
-
 static struct block_device *resume_bdev;
 
 /**
@@ -409,15 +392,15 @@ static struct block_device *resume_bdev;
  *	@rw:	READ or WRITE.
  *	@off	physical offset of page.
  *	@page:	page we're reading or writing.
+ *	@bio_chain: list of pending biod (for async reading)
  *
  *	Straight from the textbook - allocate and initialize the bio.
- *	If we're writing, make sure the page is marked as dirty.
- *	Then submit it and wait.
+ *	If we're reading, make sure the page is marked as dirty.
+ *	Then submit it and, if @bio_chain == NULL, wait.
  */
-
-static int submit(int rw, pgoff_t page_off, void *page)
+static int submit(int rw, pgoff_t page_off, struct page *page,
+			struct bio **bio_chain)
 {
-	int error = 0;
 	struct bio *bio;
 
 	bio = bio_alloc(GFP_ATOMIC, 1);
@@ -425,33 +408,40 @@ static int submit(int rw, pgoff_t page_off, void *page)
 		return -ENOMEM;
 	bio->bi_sector = page_off * (PAGE_SIZE >> 9);
 	bio->bi_bdev = resume_bdev;
-	bio->bi_end_io = end_io;
+	bio->bi_end_io = end_swap_bio_read;
 
-	if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) {
-		printk("swsusp: ERROR: adding page to bio at %ld\n",page_off);
-		error = -EFAULT;
-		goto Done;
+	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+		printk("swsusp: ERROR: adding page to bio at %ld\n", page_off);
+		bio_put(bio);
+		return -EFAULT;
 	}
 
-	atomic_set(&io_done, 1);
-	submit_bio(rw | (1 << BIO_RW_SYNC), bio);
-	while (atomic_read(&io_done))
-		yield();
-	if (rw == READ)
-		bio_set_pages_dirty(bio);
- Done:
-	bio_put(bio);
-	return error;
+	lock_page(page);
+	bio_get(bio);
+
+	if (bio_chain == NULL) {
+		submit_bio(rw | (1 << BIO_RW_SYNC), bio);
+		wait_on_page_locked(page);
+		if (rw == READ)
+			bio_set_pages_dirty(bio);
+		bio_put(bio);
+	} else {
+		get_page(page);
+		bio->bi_private = *bio_chain;
+		*bio_chain = bio;
+		submit_bio(rw | (1 << BIO_RW_SYNC), bio);
+	}
+	return 0;
 }
 
-static int bio_read_page(pgoff_t page_off, void *page)
+static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
 {
-	return submit(READ, page_off, page);
+	return submit(READ, page_off, virt_to_page(addr), bio_chain);
 }
 
-static int bio_write_page(pgoff_t page_off, void *page)
+static int bio_write_page(pgoff_t page_off, void *addr)
 {
-	return submit(WRITE, page_off, page);
+	return submit(WRITE, page_off, virt_to_page(addr), NULL);
 }
 
 /**
@@ -476,7 +466,7 @@ static int get_swap_reader(struct swap_map_handle *handle,
 	handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
 	if (!handle->cur)
 		return -ENOMEM;
-	error = bio_read_page(swp_offset(start), handle->cur);
+	error = bio_read_page(swp_offset(start), handle->cur, NULL);
 	if (error) {
 		release_swap_reader(handle);
 		return error;
@@ -485,7 +475,8 @@ static int get_swap_reader(struct swap_map_handle *handle,
 	return 0;
 }
 
-static int swap_read_page(struct swap_map_handle *handle, void *buf)
+static int swap_read_page(struct swap_map_handle *handle, void *buf,
+				struct bio **bio_chain)
 {
 	unsigned long offset;
 	int error;
@@ -495,16 +486,17 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf)
 	offset = handle->cur->entries[handle->k];
 	if (!offset)
 		return -EFAULT;
-	error = bio_read_page(offset, buf);
+	error = bio_read_page(offset, buf, bio_chain);
 	if (error)
 		return error;
 	if (++handle->k >= MAP_PAGE_ENTRIES) {
+		error = wait_on_bio_chain(bio_chain);
 		handle->k = 0;
 		offset = handle->cur->next_swap;
 		if (!offset)
 			release_swap_reader(handle);
-		else
-			error = bio_read_page(offset, handle->cur);
+		else if (!error)
+			error = bio_read_page(offset, handle->cur, NULL);
 	}
 	return error;
 }
@@ -517,38 +509,48 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf)
 
 static int load_image(struct swap_map_handle *handle,
                       struct snapshot_handle *snapshot,
-                      unsigned int nr_pages)
+                      unsigned int nr_to_read)
 {
 	unsigned int m;
-	int ret;
 	int error = 0;
 	struct timeval start;
 	struct timeval stop;
+	struct bio *bio;
+	int err2;
+	unsigned nr_pages;
 
-	printk("Loading image data pages (%u pages) ...     ", nr_pages);
-	m = nr_pages / 100;
+	printk("Loading image data pages (%u pages) ...     ", nr_to_read);
+	m = nr_to_read / 100;
 	if (!m)
 		m = 1;
 	nr_pages = 0;
+	bio = NULL;
 	do_gettimeofday(&start);
-	do {
-		ret = snapshot_write_next(snapshot, PAGE_SIZE);
-		if (ret > 0) {
-			error = swap_read_page(handle, data_of(*snapshot));
-			if (error)
-				break;
-			if (!(nr_pages % m))
-				printk("\b\b\b\b%3d%%", nr_pages / m);
-			nr_pages++;
-		}
-	} while (ret > 0);
+	for ( ; ; ) {
+		error = snapshot_write_next(snapshot, PAGE_SIZE);
+		if (error <= 0)
+			break;
+		error = swap_read_page(handle, data_of(*snapshot), &bio);
+		if (error)
+			break;
+		if (snapshot->sync_read)
+			error = wait_on_bio_chain(&bio);
+		if (error)
+			break;
+		if (!(nr_pages % m))
+			printk("\b\b\b\b%3d%%", nr_pages / m);
+		nr_pages++;
+	}
+	err2 = wait_on_bio_chain(&bio);
 	do_gettimeofday(&stop);
+	if (!error)
+		error = err2;
 	if (!error) {
 		printk("\b\b\b\bdone\n");
 		if (!snapshot_image_loaded(snapshot))
 			error = -ENODATA;
 	}
-	show_speed(&start, &stop, nr_pages, "Read");
+	show_speed(&start, &stop, nr_to_read, "Read");
 	return error;
 }
 
@@ -571,7 +573,7 @@ int swsusp_read(void)
 	header = (struct swsusp_info *)data_of(snapshot);
 	error = get_swap_reader(&handle, swsusp_header.image);
 	if (!error)
-		error = swap_read_page(&handle, header);
+		error = swap_read_page(&handle, header, NULL);
 	if (!error)
 		error = load_image(&handle, &snapshot, header->pages - 1);
 	release_swap_reader(&handle);
@@ -597,7 +599,7 @@ int swsusp_check(void)
 	if (!IS_ERR(resume_bdev)) {
 		set_blocksize(resume_bdev, PAGE_SIZE);
 		memset(&swsusp_header, 0, sizeof(swsusp_header));
-		if ((error = bio_read_page(0, &swsusp_header)))
+		if ((error = bio_read_page(0, &swsusp_header, NULL)))
 			return error;
 		if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) {
 			memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10);
diff --git a/mm/page_io.c b/mm/page_io.c
index f46a9862b7ef..d4840ecbf8f9 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -74,7 +74,7 @@ static int end_swap_bio_write(struct bio *bio, unsigned int bytes_done, int err)
 	return 0;
 }
 
-static int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err)
+int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err)
 {
 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 	struct page *page = bio->bi_io_vec[0].bv_page;
-- 
cgit v1.2.3


From e3920fb42c8ddfe63befb54d95c0e13eabacea9b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 25 Sep 2006 23:32:48 -0700
Subject: [PATCH] Disable CPU hotplug during suspend

The current suspend code has to be run on one CPU, so we use the CPU
hotplug to take the non-boot CPUs offline on SMP machines.  However, we
should also make sure that these CPUs will not be enabled by someone else
after we have disabled them.

The functions disable_nonboot_cpus() and enable_nonboot_cpus() are moved to
kernel/cpu.c, because they now refer to some stuff in there that should
better be static.  Also it's better if disable_nonboot_cpus() returns an
error instead of panicking if something goes wrong, and
enable_nonboot_cpus() has no reason to panic(), because the CPUs may have
been enabled by the userland before it tries to take them online.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/cpu.h     |   8 +++
 include/linux/suspend.h |   8 ---
 kernel/cpu.c            | 138 +++++++++++++++++++++++++++++++++++++++++-------
 kernel/power/Makefile   |   2 -
 kernel/power/disk.c     |   7 ++-
 kernel/power/main.c     |  10 ++--
 kernel/power/smp.c      |  62 ----------------------
 kernel/power/user.c     |  14 +++--
 8 files changed, 145 insertions(+), 104 deletions(-)
 delete mode 100644 kernel/power/smp.c

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 8fb344a9abd8..3fef7d67aedc 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -89,4 +89,12 @@ int cpu_down(unsigned int cpu);
 static inline int cpu_is_offline(int cpu) { return 0; }
 #endif
 
+#ifdef CONFIG_SUSPEND_SMP
+extern int disable_nonboot_cpus(void);
+extern void enable_nonboot_cpus(void);
+#else
+static inline int disable_nonboot_cpus(void) { return 0; }
+static inline void enable_nonboot_cpus(void) {}
+#endif
+
 #endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 96e31aa64cc7..6e8a06c950f4 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -57,14 +57,6 @@ static inline int software_suspend(void)
 }
 #endif /* CONFIG_PM */
 
-#ifdef CONFIG_SUSPEND_SMP
-extern void disable_nonboot_cpus(void);
-extern void enable_nonboot_cpus(void);
-#else
-static inline void disable_nonboot_cpus(void) {}
-static inline void enable_nonboot_cpus(void) {}
-#endif
-
 void save_processor_state(void);
 void restore_processor_state(void);
 struct saved_context;
diff --git a/kernel/cpu.c b/kernel/cpu.c
index f230f9ae01c2..32c96628463e 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -21,6 +21,11 @@ static DEFINE_MUTEX(cpu_bitmask_lock);
 
 static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain);
 
+/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
+ * Should always be manipulated under cpu_add_remove_lock
+ */
+static int cpu_hotplug_disabled;
+
 #ifdef CONFIG_HOTPLUG_CPU
 
 /* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */
@@ -108,30 +113,25 @@ static int take_cpu_down(void *unused)
 	return 0;
 }
 
-int cpu_down(unsigned int cpu)
+/* Requires cpu_add_remove_lock to be held */
+static int _cpu_down(unsigned int cpu)
 {
 	int err;
 	struct task_struct *p;
 	cpumask_t old_allowed, tmp;
 
-	mutex_lock(&cpu_add_remove_lock);
-	if (num_online_cpus() == 1) {
-		err = -EBUSY;
-		goto out;
-	}
+	if (num_online_cpus() == 1)
+		return -EBUSY;
 
-	if (!cpu_online(cpu)) {
-		err = -EINVAL;
-		goto out;
-	}
+	if (!cpu_online(cpu))
+		return -EINVAL;
 
 	err = blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE,
 						(void *)(long)cpu);
 	if (err == NOTIFY_BAD) {
 		printk("%s: attempt to take down CPU %u failed\n",
 				__FUNCTION__, cpu);
-		err = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 
 	/* Ensure that we are not runnable on dying cpu */
@@ -179,22 +179,32 @@ out_thread:
 	err = kthread_stop(p);
 out_allowed:
 	set_cpus_allowed(current, old_allowed);
-out:
+	return err;
+}
+
+int cpu_down(unsigned int cpu)
+{
+	int err = 0;
+
+	mutex_lock(&cpu_add_remove_lock);
+	if (cpu_hotplug_disabled)
+		err = -EBUSY;
+	else
+		err = _cpu_down(cpu);
+
 	mutex_unlock(&cpu_add_remove_lock);
 	return err;
 }
 #endif /*CONFIG_HOTPLUG_CPU*/
 
-int __devinit cpu_up(unsigned int cpu)
+/* Requires cpu_add_remove_lock to be held */
+static int __devinit _cpu_up(unsigned int cpu)
 {
 	int ret;
 	void *hcpu = (void *)(long)cpu;
 
-	mutex_lock(&cpu_add_remove_lock);
-	if (cpu_online(cpu) || !cpu_present(cpu)) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (cpu_online(cpu) || !cpu_present(cpu))
+		return -EINVAL;
 
 	ret = blocking_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
 	if (ret == NOTIFY_BAD) {
@@ -219,7 +229,95 @@ out_notify:
 	if (ret != 0)
 		blocking_notifier_call_chain(&cpu_chain,
 				CPU_UP_CANCELED, hcpu);
+
+	return ret;
+}
+
+int __devinit cpu_up(unsigned int cpu)
+{
+	int err = 0;
+
+	mutex_lock(&cpu_add_remove_lock);
+	if (cpu_hotplug_disabled)
+		err = -EBUSY;
+	else
+		err = _cpu_up(cpu);
+
+	mutex_unlock(&cpu_add_remove_lock);
+	return err;
+}
+
+#ifdef CONFIG_SUSPEND_SMP
+static cpumask_t frozen_cpus;
+
+int disable_nonboot_cpus(void)
+{
+	int cpu, first_cpu, error;
+
+	mutex_lock(&cpu_add_remove_lock);
+	first_cpu = first_cpu(cpu_present_map);
+	if (!cpu_online(first_cpu)) {
+		error = _cpu_up(first_cpu);
+		if (error) {
+			printk(KERN_ERR "Could not bring CPU%d up.\n",
+				first_cpu);
+			goto out;
+		}
+	}
+	error = set_cpus_allowed(current, cpumask_of_cpu(first_cpu));
+	if (error) {
+		printk(KERN_ERR "Could not run on CPU%d\n", first_cpu);
+		goto out;
+	}
+	/* We take down all of the non-boot CPUs in one shot to avoid races
+	 * with the userspace trying to use the CPU hotplug at the same time
+	 */
+	cpus_clear(frozen_cpus);
+	printk("Disabling non-boot CPUs ...\n");
+	for_each_online_cpu(cpu) {
+		if (cpu == first_cpu)
+			continue;
+		error = _cpu_down(cpu);
+		if (!error) {
+			cpu_set(cpu, frozen_cpus);
+			printk("CPU%d is down\n", cpu);
+		} else {
+			printk(KERN_ERR "Error taking CPU%d down: %d\n",
+				cpu, error);
+			break;
+		}
+	}
+	if (!error) {
+		BUG_ON(num_online_cpus() > 1);
+		/* Make sure the CPUs won't be enabled by someone else */
+		cpu_hotplug_disabled = 1;
+	} else {
+		printk(KERN_ERR "Non-boot CPUs are not disabled");
+	}
 out:
 	mutex_unlock(&cpu_add_remove_lock);
-	return ret;
+	return error;
+}
+
+void enable_nonboot_cpus(void)
+{
+	int cpu, error;
+
+	/* Allow everyone to use the CPU hotplug again */
+	mutex_lock(&cpu_add_remove_lock);
+	cpu_hotplug_disabled = 0;
+	mutex_unlock(&cpu_add_remove_lock);
+
+	printk("Enabling non-boot CPUs ...\n");
+	for_each_cpu_mask(cpu, frozen_cpus) {
+		error = cpu_up(cpu);
+		if (!error) {
+			printk("CPU%d is up\n", cpu);
+			continue;
+		}
+		printk(KERN_WARNING "Error taking CPU%d up: %d\n",
+			cpu, error);
+	}
+	cpus_clear(frozen_cpus);
 }
+#endif
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 8d0af3d37a4b..38725f526afc 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -7,6 +7,4 @@ obj-y				:= main.o process.o console.o
 obj-$(CONFIG_PM_LEGACY)		+= pm.o
 obj-$(CONFIG_SOFTWARE_SUSPEND)	+= swsusp.o disk.o snapshot.o swap.o user.o
 
-obj-$(CONFIG_SUSPEND_SMP)	+= smp.o
-
 obj-$(CONFIG_MAGIC_SYSRQ)	+= poweroff.o
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index e13e74067845..7c7b9b65e365 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -18,6 +18,7 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/pm.h>
+#include <linux/cpu.h>
 
 #include "power.h"
 
@@ -72,7 +73,10 @@ static int prepare_processes(void)
 	int error;
 
 	pm_prepare_console();
-	disable_nonboot_cpus();
+
+	error = disable_nonboot_cpus();
+	if (error)
+		goto enable_cpus;
 
 	if (freeze_processes()) {
 		error = -EBUSY;
@@ -84,6 +88,7 @@ static int prepare_processes(void)
 		return 0;
 thaw:
 	thaw_processes();
+enable_cpus:
 	enable_nonboot_cpus();
 	pm_restore_console();
 	return error;
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 6d295c776794..4d403323a7bb 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/pm.h>
 #include <linux/console.h>
+#include <linux/cpu.h>
 
 #include "power.h"
 
@@ -51,7 +52,7 @@ void pm_set_ops(struct pm_ops * ops)
 
 static int suspend_prepare(suspend_state_t state)
 {
-	int error = 0;
+	int error;
 	unsigned int free_pages;
 
 	if (!pm_ops || !pm_ops->enter)
@@ -59,12 +60,9 @@ static int suspend_prepare(suspend_state_t state)
 
 	pm_prepare_console();
 
-	disable_nonboot_cpus();
-
-	if (num_online_cpus() != 1) {
-		error = -EPERM;
+	error = disable_nonboot_cpus();
+	if (error)
 		goto Enable_cpu;
-	}
 
 	if (freeze_processes()) {
 		error = -EAGAIN;
diff --git a/kernel/power/smp.c b/kernel/power/smp.c
deleted file mode 100644
index 5957312b2d68..000000000000
--- a/kernel/power/smp.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * drivers/power/smp.c - Functions for stopping other CPUs.
- *
- * Copyright 2004 Pavel Machek <pavel@suse.cz>
- * Copyright (C) 2002-2003 Nigel Cunningham <ncunningham@clear.net.nz>
- *
- * This file is released under the GPLv2.
- */
-
-#undef DEBUG
-
-#include <linux/smp_lock.h>
-#include <linux/interrupt.h>
-#include <linux/suspend.h>
-#include <linux/module.h>
-#include <linux/cpu.h>
-#include <asm/atomic.h>
-#include <asm/tlbflush.h>
-
-/* This is protected by pm_sem semaphore */
-static cpumask_t frozen_cpus;
-
-void disable_nonboot_cpus(void)
-{
-	int cpu, error;
-
-	error = 0;
-	cpus_clear(frozen_cpus);
-	printk("Freezing cpus ...\n");
-	for_each_online_cpu(cpu) {
-		if (cpu == 0)
-			continue;
-		error = cpu_down(cpu);
-		if (!error) {
-			cpu_set(cpu, frozen_cpus);
-			printk("CPU%d is down\n", cpu);
-			continue;
-		}
-		printk("Error taking cpu %d down: %d\n", cpu, error);
-	}
-	BUG_ON(raw_smp_processor_id() != 0);
-	if (error)
-		panic("cpus not sleeping");
-}
-
-void enable_nonboot_cpus(void)
-{
-	int cpu, error;
-
-	printk("Thawing cpus ...\n");
-	for_each_cpu_mask(cpu, frozen_cpus) {
-		error = cpu_up(cpu);
-		if (!error) {
-			printk("CPU%d is up\n", cpu);
-			continue;
-		}
-		printk("Error taking cpu %d up: %d\n", cpu, error);
-		panic("Not enough cpus");
-	}
-	cpus_clear(frozen_cpus);
-}
-
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 3f1539fbe48a..0ef5e4ba39e5 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -19,6 +19,7 @@
 #include <linux/swapops.h>
 #include <linux/pm.h>
 #include <linux/fs.h>
+#include <linux/cpu.h>
 
 #include <asm/uaccess.h>
 
@@ -139,12 +140,15 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 		if (data->frozen)
 			break;
 		down(&pm_sem);
-		disable_nonboot_cpus();
-		if (freeze_processes()) {
-			thaw_processes();
-			enable_nonboot_cpus();
-			error = -EBUSY;
+		error = disable_nonboot_cpus();
+		if (!error) {
+			error = freeze_processes();
+			if (error) {
+				thaw_processes();
+				error = -EBUSY;
+			}
 		}
+		enable_nonboot_cpus();
 		up(&pm_sem);
 		if (!error)
 			data->frozen = 1;
-- 
cgit v1.2.3


From dcbb5a54f6e3984efa24772394f2225b11495c55 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 25 Sep 2006 23:32:51 -0700
Subject: [PATCH] swsusp: clean up suspend header

Remove some things that are no longer used or defined elsewhere from suspend.h
and make the inline version of software_suspend() return the right error code.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/suspend.h | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 6e8a06c950f4..c11cacf1a13b 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -10,11 +10,11 @@
 #include <linux/pm.h>
 
 /* page backup entry */
-typedef struct pbe {
+struct pbe {
 	unsigned long address;		/* address of the copy */
 	unsigned long orig_address;	/* original address of page */
 	struct pbe *next;
-} suspend_pagedir_t;
+};
 
 #define for_each_pbe(pbe, pblist) \
 	for (pbe = pblist ; pbe ; pbe = pbe->next)
@@ -25,15 +25,6 @@ typedef struct pbe {
 #define for_each_pb_page(pbe, pblist) \
 	for (pbe = pblist ; pbe ; pbe = (pbe+PB_PAGE_SKIP)->next)
 
-
-#define SWAP_FILENAME_MAXLENGTH	32
-
-
-extern dev_t swsusp_resume_device;
-   
-/* mm/vmscan.c */
-extern int shrink_mem(void);
-
 /* mm/page_alloc.c */
 extern void drain_local_pages(void);
 extern void mark_free_pages(struct zone *zone);
@@ -53,7 +44,7 @@ static inline void pm_restore_console(void) {}
 static inline int software_suspend(void)
 {
 	printk("Warning: fake suspend called\n");
-	return -EPERM;
+	return -ENOSYS;
 }
 #endif /* CONFIG_PM */
 
-- 
cgit v1.2.3


From 940864ddabdb180e02041c4dcd46ba6f9eee732f Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 25 Sep 2006 23:32:55 -0700
Subject: [PATCH] swsusp: Use memory bitmaps during resume

Make swsusp use memory bitmaps to store its internal information during the
resume phase of the suspend-resume cycle.

If the pfns of saveable pages are saved during the suspend phase instead of
the kernel virtual addresses of these pages, we can use them during the resume
phase directly to set the corresponding bits in a memory bitmap.  Then, this
bitmap is used to mark the page frames corresponding to the pages that were
saveable before the suspend (aka "unsafe" page frames).

Next, we allocate as many page frames as needed to store the entire suspend
image and make sure that there will be some extra free "safe" page frames for
the list of PBEs constructed later.  Subsequently, the image is loaded and, if
possible, the data loaded from it are written into their "original" page
frames (ie.  the ones they had occupied before the suspend).

The image data that cannot be written into their "original" page frames are
loaded into "safe" page frames and their "original" kernel virtual addresses,
as well as the addresses of the "safe" pages containing their copies, are
stored in a list of PBEs.  Finally, the list of PBEs is used to copy the
remaining image data into their "original" page frames (this is done
atomically, by the architecture-dependent parts of swsusp).

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/suspend.h |   9 --
 kernel/power/power.h    |  11 +-
 kernel/power/snapshot.c | 416 +++++++++++++++++++++---------------------------
 kernel/power/swap.c     |   4 +-
 kernel/power/user.c     |   1 +
 5 files changed, 186 insertions(+), 255 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index c11cacf1a13b..b1237f16ecde 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -16,15 +16,6 @@ struct pbe {
 	struct pbe *next;
 };
 
-#define for_each_pbe(pbe, pblist) \
-	for (pbe = pblist ; pbe ; pbe = pbe->next)
-
-#define PBES_PER_PAGE      (PAGE_SIZE/sizeof(struct pbe))
-#define PB_PAGE_SKIP       (PBES_PER_PAGE-1)
-
-#define for_each_pb_page(pbe, pblist) \
-	for (pbe = pblist ; pbe ; pbe = (pbe+PB_PAGE_SKIP)->next)
-
 /* mm/page_alloc.c */
 extern void drain_local_pages(void);
 extern void mark_free_pages(struct zone *zone);
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 6e9e2acc34f8..bfe999f7b272 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -81,16 +81,6 @@ struct snapshot_handle {
 	unsigned int	prev;	/* number of the block of PAGE_SIZE bytes that
 				 * was the current one previously
 				 */
-	struct pbe	*pbe;	/* PBE that corresponds to 'buffer' */
-	struct pbe	*last_pbe;	/* When the image is restored (eg. read
-					 * from disk) we can store some image
-					 * data directly in the page frames
-					 * in which they were before suspend.
-					 * In such a case the PBEs that
-					 * correspond to them will be unused.
-					 * This is the last PBE, so far, that
-					 * does not correspond to such data.
-					 */
 	void		*buffer;	/* address of the block to read from
 					 * or write to
 					 */
@@ -113,6 +103,7 @@ extern unsigned int snapshot_additional_pages(struct zone *zone);
 extern int snapshot_read_next(struct snapshot_handle *handle, size_t count);
 extern int snapshot_write_next(struct snapshot_handle *handle, size_t count);
 extern int snapshot_image_loaded(struct snapshot_handle *handle);
+extern void snapshot_free_unused_memory(struct snapshot_handle *handle);
 
 #define SNAPSHOT_IOC_MAGIC	'3'
 #define SNAPSHOT_FREEZE			_IO(SNAPSHOT_IOC_MAGIC, 1)
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 852e0df41719..1b84313cbab5 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -39,7 +39,7 @@ struct pbe *restore_pblist;
 
 static unsigned int nr_copy_pages;
 static unsigned int nr_meta_pages;
-static unsigned long *buffer;
+static void *buffer;
 
 #ifdef CONFIG_HIGHMEM
 unsigned int count_highmem_pages(void)
@@ -172,7 +172,7 @@ static inline int restore_highmem(void) {return 0;}
 #define PG_UNSAFE_CLEAR	1
 #define PG_UNSAFE_KEEP	0
 
-static unsigned int unsafe_pages;
+static unsigned int allocated_unsafe_pages;
 
 static void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
 {
@@ -183,7 +183,7 @@ static void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
 		while (res && PageNosaveFree(virt_to_page(res))) {
 			/* The page is unsafe, mark it for swsusp_free() */
 			SetPageNosave(virt_to_page(res));
-			unsafe_pages++;
+			allocated_unsafe_pages++;
 			res = (void *)get_zeroed_page(gfp_mask);
 		}
 	if (res) {
@@ -772,101 +772,10 @@ copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
 }
 
 /**
- *	free_pagedir - free pages allocated with alloc_pagedir()
- */
-
-static void free_pagedir(struct pbe *pblist, int clear_nosave_free)
-{
-	struct pbe *pbe;
-
-	while (pblist) {
-		pbe = (pblist + PB_PAGE_SKIP)->next;
-		free_image_page(pblist, clear_nosave_free);
-		pblist = pbe;
-	}
-}
-
-/**
- *	fill_pb_page - Create a list of PBEs on a given memory page
- */
-
-static inline void fill_pb_page(struct pbe *pbpage, unsigned int n)
-{
-	struct pbe *p;
-
-	p = pbpage;
-	pbpage += n - 1;
-	do
-		p->next = p + 1;
-	while (++p < pbpage);
-}
-
-/**
- *	create_pbe_list - Create a list of PBEs on top of a given chain
- *	of memory pages allocated with alloc_pagedir()
+ *	swsusp_free - free pages allocated for the suspend.
  *
- *	This function assumes that pages allocated by alloc_image_page() will
- *	always be zeroed.
- */
-
-static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages)
-{
-	struct pbe *pbpage;
-	unsigned int num = PBES_PER_PAGE;
-
-	for_each_pb_page (pbpage, pblist) {
-		if (num >= nr_pages)
-			break;
-
-		fill_pb_page(pbpage, PBES_PER_PAGE);
-		num += PBES_PER_PAGE;
-	}
-	if (pbpage) {
-		num -= PBES_PER_PAGE;
-		fill_pb_page(pbpage, nr_pages - num);
-	}
-}
-
-/**
- *	alloc_pagedir - Allocate the page directory.
- *
- *	First, determine exactly how many pages we need and
- *	allocate them.
- *
- *	We arrange the pages in a chain: each page is an array of PBES_PER_PAGE
- *	struct pbe elements (pbes) and the last element in the page points
- *	to the next page.
- *
- *	On each page we set up a list of struct_pbe elements.
- */
-
-static struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask,
-				 int safe_needed)
-{
-	unsigned int num;
-	struct pbe *pblist, *pbe;
-
-	if (!nr_pages)
-		return NULL;
-
-	pblist = alloc_image_page(gfp_mask, safe_needed);
-	pbe = pblist;
-	for (num = PBES_PER_PAGE; num < nr_pages; num += PBES_PER_PAGE) {
-		if (!pbe) {
-			free_pagedir(pblist, PG_UNSAFE_CLEAR);
-			return NULL;
-		}
-		pbe += PB_PAGE_SKIP;
-		pbe->next = alloc_image_page(gfp_mask, safe_needed);
-		pbe = pbe->next;
-	}
-	create_pbe_list(pblist, nr_pages);
-	return pblist;
-}
-
-/**
- * Free pages we allocated for suspend. Suspend pages are alocated
- * before atomic copy, so we need to free them after resume.
+ *	Suspend pages are alocated before the atomic copy is made, so we
+ *	need to release them after the resume.
  */
 
 void swsusp_free(void)
@@ -904,14 +813,18 @@ void swsusp_free(void)
 static int enough_free_mem(unsigned int nr_pages)
 {
 	struct zone *zone;
-	unsigned int n = 0;
+	unsigned int free = 0, meta = 0;
 
 	for_each_zone (zone)
-		if (!is_highmem(zone))
-			n += zone->free_pages;
-	pr_debug("swsusp: available memory: %u pages\n", n);
-	return n > (nr_pages + PAGES_FOR_IO +
-		(nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
+		if (!is_highmem(zone)) {
+			free += zone->free_pages;
+			meta += snapshot_additional_pages(zone);
+		}
+
+	pr_debug("swsusp: pages needed: %u + %u + %u, available pages: %u\n",
+		nr_pages, PAGES_FOR_IO, meta, free);
+
+	return free > nr_pages + PAGES_FOR_IO + meta;
 }
 
 static int
@@ -961,11 +874,6 @@ asmlinkage int swsusp_save(void)
 	nr_pages = count_data_pages();
 	printk("swsusp: Need to copy %u pages\n", nr_pages);
 
-	pr_debug("swsusp: pages needed: %u + %lu + %u, free: %u\n",
-		 nr_pages,
-		 (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE,
-		 PAGES_FOR_IO, nr_free_pages());
-
 	if (!enough_free_mem(nr_pages)) {
 		printk(KERN_ERR "swsusp: Not enough free memory\n");
 		return -ENOMEM;
@@ -1007,22 +915,19 @@ static void init_header(struct swsusp_info *info)
 }
 
 /**
- *	pack_addresses - the addresses corresponding to pfns found in the
- *	bitmap @bm are stored in the array @buf[] (1 page)
+ *	pack_pfns - pfns corresponding to the set bits found in the bitmap @bm
+ *	are stored in the array @buf[] (1 page at a time)
  */
 
 static inline void
-pack_addresses(unsigned long *buf, struct memory_bitmap *bm)
+pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
 {
 	int j;
 
 	for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
-		unsigned long pfn = memory_bm_next_pfn(bm);
-
-		if (unlikely(pfn == BM_END_OF_MAP))
+		buf[j] = memory_bm_next_pfn(bm);
+		if (unlikely(buf[j] == BM_END_OF_MAP))
 			break;
-
-		buf[j] = (unsigned long)page_address(pfn_to_page(pfn));
 	}
 }
 
@@ -1068,7 +973,7 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
 	if (handle->prev < handle->cur) {
 		if (handle->cur <= nr_meta_pages) {
 			memset(buffer, 0, PAGE_SIZE);
-			pack_addresses(buffer, &orig_bm);
+			pack_pfns(buffer, &orig_bm);
 		} else {
 			unsigned long pfn = memory_bm_next_pfn(&copy_bm);
 
@@ -1094,14 +999,10 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
  *	had been used before suspend
  */
 
-static int mark_unsafe_pages(struct pbe *pblist)
+static int mark_unsafe_pages(struct memory_bitmap *bm)
 {
 	struct zone *zone;
 	unsigned long pfn, max_zone_pfn;
-	struct pbe *p;
-
-	if (!pblist) /* a sanity check */
-		return -EINVAL;
 
 	/* Clear page flags */
 	for_each_zone (zone) {
@@ -1111,30 +1012,37 @@ static int mark_unsafe_pages(struct pbe *pblist)
 				ClearPageNosaveFree(pfn_to_page(pfn));
 	}
 
-	/* Mark orig addresses */
-	for_each_pbe (p, pblist) {
-		if (virt_addr_valid(p->orig_address))
-			SetPageNosaveFree(virt_to_page(p->orig_address));
-		else
-			return -EFAULT;
-	}
+	/* Mark pages that correspond to the "original" pfns as "unsafe" */
+	memory_bm_position_reset(bm);
+	do {
+		pfn = memory_bm_next_pfn(bm);
+		if (likely(pfn != BM_END_OF_MAP)) {
+			if (likely(pfn_valid(pfn)))
+				SetPageNosaveFree(pfn_to_page(pfn));
+			else
+				return -EFAULT;
+		}
+	} while (pfn != BM_END_OF_MAP);
 
-	unsafe_pages = 0;
+	allocated_unsafe_pages = 0;
 
 	return 0;
 }
 
-static void copy_page_backup_list(struct pbe *dst, struct pbe *src)
+static void
+duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src)
 {
-	/* We assume both lists contain the same number of elements */
-	while (src) {
-		dst->orig_address = src->orig_address;
-		dst = dst->next;
-		src = src->next;
+	unsigned long pfn;
+
+	memory_bm_position_reset(src);
+	pfn = memory_bm_next_pfn(src);
+	while (pfn != BM_END_OF_MAP) {
+		memory_bm_set_bit(dst, pfn);
+		pfn = memory_bm_next_pfn(src);
 	}
 }
 
-static int check_header(struct swsusp_info *info)
+static inline int check_header(struct swsusp_info *info)
 {
 	char *reason = NULL;
 
@@ -1161,19 +1069,14 @@ static int check_header(struct swsusp_info *info)
  *	load header - check the image header and copy data from it
  */
 
-static int load_header(struct snapshot_handle *handle,
-                              struct swsusp_info *info)
+static int
+load_header(struct swsusp_info *info)
 {
 	int error;
-	struct pbe *pblist;
 
+	restore_pblist = NULL;
 	error = check_header(info);
 	if (!error) {
-		pblist = alloc_pagedir(info->image_pages, GFP_ATOMIC, PG_ANY);
-		if (!pblist)
-			return -ENOMEM;
-		restore_pblist = pblist;
-		handle->pbe = pblist;
 		nr_copy_pages = info->image_pages;
 		nr_meta_pages = info->pages - info->image_pages - 1;
 	}
@@ -1181,108 +1084,137 @@ static int load_header(struct snapshot_handle *handle,
 }
 
 /**
- *	unpack_orig_addresses - copy the elements of @buf[] (1 page) to
- *	the PBEs in the list starting at @pbe
+ *	unpack_orig_pfns - for each element of @buf[] (1 page at a time) set
+ *	the corresponding bit in the memory bitmap @bm
  */
 
-static inline struct pbe *unpack_orig_addresses(unsigned long *buf,
-                                                struct pbe *pbe)
+static inline void
+unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
 {
 	int j;
 
-	for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
-		pbe->orig_address = buf[j];
-		pbe = pbe->next;
+	for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
+		if (unlikely(buf[j] == BM_END_OF_MAP))
+			break;
+
+		memory_bm_set_bit(bm, buf[j]);
 	}
-	return pbe;
 }
 
 /**
- *	prepare_image - use metadata contained in the PBE list
- *	pointed to by restore_pblist to mark the pages that will
- *	be overwritten in the process of restoring the system
- *	memory state from the image ("unsafe" pages) and allocate
- *	memory for the image
+ *	prepare_image - use the memory bitmap @bm to mark the pages that will
+ *	be overwritten in the process of restoring the system memory state
+ *	from the suspend image ("unsafe" pages) and allocate memory for the
+ *	image.
  *
- *	The idea is to allocate the PBE list first and then
- *	allocate as many pages as it's needed for the image data,
- *	but not to assign these pages to the PBEs initially.
- *	Instead, we just mark them as allocated and create a list
- *	of "safe" which will be used later
+ *	The idea is to allocate a new memory bitmap first and then allocate
+ *	as many pages as needed for the image data, but not to assign these
+ *	pages to specific tasks initially.  Instead, we just mark them as
+ *	allocated and create a list of "safe" pages that will be used later.
  */
 
-static struct linked_page *safe_pages;
+#define PBES_PER_LINKED_PAGE	(LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
+
+static struct linked_page *safe_pages_list;
 
-static int prepare_image(struct snapshot_handle *handle)
+static int
+prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 {
-	int error = 0;
-	unsigned int nr_pages = nr_copy_pages;
-	struct pbe *p, *pblist = NULL;
+	unsigned int nr_pages;
+	struct linked_page *sp_list, *lp;
+	int error;
 
-	p = restore_pblist;
-	error = mark_unsafe_pages(p);
-	if (!error) {
-		pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, PG_SAFE);
-		if (pblist)
-			copy_page_backup_list(pblist, p);
-		free_pagedir(p, PG_UNSAFE_KEEP);
-		if (!pblist)
+	error = mark_unsafe_pages(bm);
+	if (error)
+		goto Free;
+
+	error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
+	if (error)
+		goto Free;
+
+	duplicate_memory_bitmap(new_bm, bm);
+	memory_bm_free(bm, PG_UNSAFE_KEEP);
+	/* Reserve some safe pages for potential later use.
+	 *
+	 * NOTE: This way we make sure there will be enough safe pages for the
+	 * chain_alloc() in get_buffer().  It is a bit wasteful, but
+	 * nr_copy_pages cannot be greater than 50% of the memory anyway.
+	 */
+	sp_list = NULL;
+	/* nr_copy_pages cannot be lesser than allocated_unsafe_pages */
+	nr_pages = nr_copy_pages - allocated_unsafe_pages;
+	nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
+	while (nr_pages > 0) {
+		lp = alloc_image_page(GFP_ATOMIC, PG_SAFE);
+		if (!lp) {
 			error = -ENOMEM;
+			goto Free;
+		}
+		lp->next = sp_list;
+		sp_list = lp;
+		nr_pages--;
 	}
-	safe_pages = NULL;
-	if (!error && nr_pages > unsafe_pages) {
-		nr_pages -= unsafe_pages;
-		while (nr_pages--) {
-			struct linked_page *ptr;
-
-			ptr = (void *)get_zeroed_page(GFP_ATOMIC);
-			if (!ptr) {
-				error = -ENOMEM;
-				break;
-			}
-			if (!PageNosaveFree(virt_to_page(ptr))) {
-				/* The page is "safe", add it to the list */
-				ptr->next = safe_pages;
-				safe_pages = ptr;
-			}
-			/* Mark the page as allocated */
-			SetPageNosave(virt_to_page(ptr));
-			SetPageNosaveFree(virt_to_page(ptr));
+	/* Preallocate memory for the image */
+	safe_pages_list = NULL;
+	nr_pages = nr_copy_pages - allocated_unsafe_pages;
+	while (nr_pages > 0) {
+		lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
+		if (!lp) {
+			error = -ENOMEM;
+			goto Free;
+		}
+		if (!PageNosaveFree(virt_to_page(lp))) {
+			/* The page is "safe", add it to the list */
+			lp->next = safe_pages_list;
+			safe_pages_list = lp;
 		}
+		/* Mark the page as allocated */
+		SetPageNosave(virt_to_page(lp));
+		SetPageNosaveFree(virt_to_page(lp));
+		nr_pages--;
 	}
-	if (!error) {
-		restore_pblist = pblist;
-	} else {
-		handle->pbe = NULL;
-		swsusp_free();
+	/* Free the reserved safe pages so that chain_alloc() can use them */
+	while (sp_list) {
+		lp = sp_list->next;
+		free_image_page(sp_list, PG_UNSAFE_CLEAR);
+		sp_list = lp;
 	}
+	return 0;
+
+Free:
+	swsusp_free();
 	return error;
 }
 
-static void *get_buffer(struct snapshot_handle *handle)
+/**
+ *	get_buffer - compute the address that snapshot_write_next() should
+ *	set for its caller to write to.
+ */
+
+static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
 {
-	struct pbe *pbe = handle->pbe, *last = handle->last_pbe;
-	struct page *page = virt_to_page(pbe->orig_address);
+	struct pbe *pbe;
+	struct page *page = pfn_to_page(memory_bm_next_pfn(bm));
 
-	if (PageNosave(page) && PageNosaveFree(page)) {
-		/*
-		 * We have allocated the "original" page frame and we can
-		 * use it directly to store the read page
+	if (PageNosave(page) && PageNosaveFree(page))
+		/* We have allocated the "original" page frame and we can
+		 * use it directly to store the loaded page.
 		 */
-		pbe->address = 0;
-		if (last && last->next)
-			last->next = NULL;
-		return (void *)pbe->orig_address;
-	}
-	/*
-	 * The "original" page frame has not been allocated and we have to
-	 * use a "safe" page frame to store the read page
+		return page_address(page);
+
+	/* The "original" page frame has not been allocated and we have to
+	 * use a "safe" page frame to store the loaded page.
 	 */
-	pbe->address = (unsigned long)safe_pages;
-	safe_pages = safe_pages->next;
-	if (last)
-		last->next = pbe;
-	handle->last_pbe = pbe;
+	pbe = chain_alloc(ca, sizeof(struct pbe));
+	if (!pbe) {
+		swsusp_free();
+		return NULL;
+	}
+	pbe->orig_address = (unsigned long)page_address(page);
+	pbe->address = (unsigned long)safe_pages_list;
+	safe_pages_list = safe_pages_list->next;
+	pbe->next = restore_pblist;
+	restore_pblist = pbe;
 	return (void *)pbe->address;
 }
 
@@ -1310,10 +1242,13 @@ static void *get_buffer(struct snapshot_handle *handle)
 
 int snapshot_write_next(struct snapshot_handle *handle, size_t count)
 {
+	static struct chain_allocator ca;
 	int error = 0;
 
+	/* Check if we have already loaded the entire image */
 	if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
 		return 0;
+
 	if (!buffer) {
 		/* This makes the buffer be freed by swsusp_free() */
 		buffer = alloc_image_page(GFP_ATOMIC, PG_ANY);
@@ -1324,26 +1259,32 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
 		handle->buffer = buffer;
 	handle->sync_read = 1;
 	if (handle->prev < handle->cur) {
-		if (!handle->prev) {
-			error = load_header(handle,
-					(struct swsusp_info *)buffer);
+		if (handle->prev == 0) {
+			error = load_header(buffer);
+			if (error)
+				return error;
+
+			error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
 			if (error)
 				return error;
+
 		} else if (handle->prev <= nr_meta_pages) {
-			handle->pbe = unpack_orig_addresses(buffer,
-							handle->pbe);
-			if (!handle->pbe) {
-				error = prepare_image(handle);
+			unpack_orig_pfns(buffer, &copy_bm);
+			if (handle->prev == nr_meta_pages) {
+				error = prepare_image(&orig_bm, &copy_bm);
 				if (error)
 					return error;
-				handle->pbe = restore_pblist;
-				handle->last_pbe = NULL;
-				handle->buffer = get_buffer(handle);
+
+				chain_init(&ca, GFP_ATOMIC, PG_SAFE);
+				memory_bm_position_reset(&orig_bm);
+				restore_pblist = NULL;
+				handle->buffer = get_buffer(&orig_bm, &ca);
 				handle->sync_read = 0;
+				if (!handle->buffer)
+					return -ENOMEM;
 			}
 		} else {
-			handle->pbe = handle->pbe->next;
-			handle->buffer = get_buffer(handle);
+			handle->buffer = get_buffer(&orig_bm, &ca);
 			handle->sync_read = 0;
 		}
 		handle->prev = handle->cur;
@@ -1362,6 +1303,13 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
 
 int snapshot_image_loaded(struct snapshot_handle *handle)
 {
-	return !(!handle->pbe || handle->pbe->next || !nr_copy_pages ||
-		handle->cur <= nr_meta_pages + nr_copy_pages);
+	return !(!nr_copy_pages ||
+			handle->cur <= nr_meta_pages + nr_copy_pages);
+}
+
+void snapshot_free_unused_memory(struct snapshot_handle *handle)
+{
+	/* Free only if we have loaded the image entirely */
+	if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
+		memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
 }
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 8309d20b2563..9b2ee5344dee 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -331,8 +331,7 @@ static int enough_swap(unsigned int nr_pages)
 	unsigned int free_swap = count_swap_pages(root_swap, 1);
 
 	pr_debug("swsusp: free swap pages: %u\n", free_swap);
-	return free_swap > (nr_pages + PAGES_FOR_IO +
-		(nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
+	return free_swap > nr_pages + PAGES_FOR_IO;
 }
 
 /**
@@ -547,6 +546,7 @@ static int load_image(struct swap_map_handle *handle,
 		error = err2;
 	if (!error) {
 		printk("\b\b\b\bdone\n");
+		snapshot_free_unused_memory(snapshot);
 		if (!snapshot_image_loaded(snapshot))
 			error = -ENODATA;
 	}
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 0ef5e4ba39e5..2e4499f3e4d9 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -193,6 +193,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 			error = -EPERM;
 			break;
 		}
+		snapshot_free_unused_memory(&data->handle);
 		down(&pm_sem);
 		pm_prepare_console();
 		error = device_suspend(PMSG_FREEZE);
-- 
cgit v1.2.3


From c8eb8b4025175f967af0ba8e933f23aa9954dc35 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 25 Sep 2006 23:32:56 -0700
Subject: [PATCH] PM: make it possible to disable console suspending

Change suspend_console() so that it waits for all consoles to flush the
remaining messages and make it possible to switch the console suspending off
with the help of a Kconfig option.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Stefan Seyfried <seife@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/console.h |  5 +++++
 kernel/power/Kconfig    | 11 +++++++++++
 kernel/printk.c         |  3 +++
 3 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/console.h b/include/linux/console.h
index 3bdf2155e565..76a1807726eb 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -120,9 +120,14 @@ extern void console_stop(struct console *);
 extern void console_start(struct console *);
 extern int is_console_locked(void);
 
+#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND
 /* Suspend and resume console messages over PM events */
 extern void suspend_console(void);
 extern void resume_console(void);
+#else
+static inline void suspend_console(void) {}
+static inline void resume_console(void) {}
+#endif /* CONFIG_DISABLE_CONSOLE_SUSPEND */
 
 /* Some debug stub to catch some of the obvious races in the VT code */
 #if 1
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 619ecabf7c58..4b6e2f18e056 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -36,6 +36,17 @@ config PM_DEBUG
 	code. This is helpful when debugging and reporting various PM bugs, 
 	like suspend support.
 
+config DISABLE_CONSOLE_SUSPEND
+	bool "Keep console(s) enabled during suspend/resume (DANGEROUS)"
+	depends on PM && PM_DEBUG
+	default n
+	---help---
+	This option turns off the console suspend mechanism that prevents
+	debug messages from reaching the console during the suspend/resume
+	operations.  This may be helpful when debugging device drivers'
+	suspend/resume routines, but may itself lead to problems, for example
+	if netconsole is used.
+
 config PM_TRACE
 	bool "Suspend/resume event tracing"
 	depends on PM && PM_DEBUG && X86_32 && EXPERIMENTAL
diff --git a/kernel/printk.c b/kernel/printk.c
index 1149365e989e..771f5e861bcd 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -721,6 +721,7 @@ int __init add_preferred_console(char *name, int idx, char *options)
 	return 0;
 }
 
+#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND
 /**
  * suspend_console - suspend the console subsystem
  *
@@ -728,6 +729,7 @@ int __init add_preferred_console(char *name, int idx, char *options)
  */
 void suspend_console(void)
 {
+	printk("Suspending console(s)\n");
 	acquire_console_sem();
 	console_suspended = 1;
 }
@@ -737,6 +739,7 @@ void resume_console(void)
 	console_suspended = 0;
 	release_console_sem();
 }
+#endif /* CONFIG_DISABLE_CONSOLE_SUSPEND */
 
 /**
  * acquire_console_sem - lock the console system for exclusive use.
-- 
cgit v1.2.3


From c5c6ba4e08ab9c9e390a0f3a7d9a5c332f5cc6ef Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 25 Sep 2006 23:32:58 -0700
Subject: [PATCH] PM: Add pm_trace switch

Add the pm_trace attribute in /sys/power which has to be explicitly set to
one to really enable the "PM tracing" code compiled in when CONFIG_PM_TRACE
is set (which modifies the machine's CMOS clock in unpredictable ways).

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/power/interface.txt | 15 +++++++++++++++
 include/linux/resume-trace.h      | 24 ++++++++++++++----------
 kernel/power/main.c               | 30 ++++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt
index 4117802af0f8..a66bec222b16 100644
--- a/Documentation/power/interface.txt
+++ b/Documentation/power/interface.txt
@@ -52,3 +52,18 @@ suspend image will be as small as possible.
 
 Reading from this file will display the current image size limit, which
 is set to 500 MB by default.
+
+/sys/power/pm_trace controls the code which saves the last PM event point in
+the RTC across reboots, so that you can debug a machine that just hangs
+during suspend (or more commonly, during resume).  Namely, the RTC is only
+used to save the last PM event point if this file contains '1'.  Initially it
+contains '0' which may be changed to '1' by writing a string representing a
+nonzero integer into it.
+
+To use this debugging feature you should attempt to suspend the machine, then
+reboot it and run
+
+	dmesg -s 1000000 | grep 'hash matches'
+
+CAUTION: Using it will cause your machine's real-time (CMOS) clock to be
+set to a random invalid time after a resume.
diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h
index a376bd4ade39..81e9299ca148 100644
--- a/include/linux/resume-trace.h
+++ b/include/linux/resume-trace.h
@@ -3,21 +3,25 @@
 
 #ifdef CONFIG_PM_TRACE
 
+extern int pm_trace_enabled;
+
 struct device;
 extern void set_trace_device(struct device *);
 extern void generate_resume_trace(void *tracedata, unsigned int user);
 
 #define TRACE_DEVICE(dev) set_trace_device(dev)
-#define TRACE_RESUME(user) do {				\
-	void *tracedata;				\
-	asm volatile("movl $1f,%0\n"			\
-		".section .tracedata,\"a\"\n"		\
-		"1:\t.word %c1\n"			\
-		"\t.long %c2\n"				\
-		".previous"				\
-		:"=r" (tracedata)			\
-		: "i" (__LINE__), "i" (__FILE__));	\
-	generate_resume_trace(tracedata, user);		\
+#define TRACE_RESUME(user) do {					\
+	if (pm_trace_enabled) {					\
+		void *tracedata;				\
+		asm volatile("movl $1f,%0\n"			\
+			".section .tracedata,\"a\"\n"		\
+			"1:\t.word %c1\n"			\
+			"\t.long %c2\n"				\
+			".previous"				\
+			:"=r" (tracedata)			\
+			: "i" (__LINE__), "i" (__FILE__));	\
+		generate_resume_trace(tracedata, user);		\
+	}							\
 } while (0)
 
 #else
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 4d403323a7bb..873228c71dab 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -17,6 +17,7 @@
 #include <linux/pm.h>
 #include <linux/console.h>
 #include <linux/cpu.h>
+#include <linux/resume-trace.h>
 
 #include "power.h"
 
@@ -281,10 +282,39 @@ static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n
 
 power_attr(state);
 
+#ifdef CONFIG_PM_TRACE
+int pm_trace_enabled;
+
+static ssize_t pm_trace_show(struct subsystem * subsys, char * buf)
+{
+	return sprintf(buf, "%d\n", pm_trace_enabled);
+}
+
+static ssize_t
+pm_trace_store(struct subsystem * subsys, const char * buf, size_t n)
+{
+	int val;
+
+	if (sscanf(buf, "%d", &val) == 1) {
+		pm_trace_enabled = !!val;
+		return n;
+	}
+	return -EINVAL;
+}
+
+power_attr(pm_trace);
+
+static struct attribute * g[] = {
+	&state_attr.attr,
+	&pm_trace_attr.attr,
+	NULL,
+};
+#else
 static struct attribute * g[] = {
 	&state_attr.attr,
 	NULL,
 };
+#endif /* CONFIG_PM_TRACE */
 
 static struct attribute_group attr_group = {
 	.attrs = g,
-- 
cgit v1.2.3


From eb2a2fd91f7c8a53b15063d6f08cf22b9a56cbfb Mon Sep 17 00:00:00 2001
From: Krzysztof Halasa <khc@pm.waw.pl>
Date: Tue, 26 Sep 2006 23:23:45 +0200
Subject: [PATCH] Modularize generic HDLC

This patch enables building of individual WAN protocol support
routines (parts of generic HDLC) as separate modules.
All protocol-private definitions are moved from hdlc.h file
to protocol drivers. User-space interface and interface
between generic HDLC and underlying low-level HDLC drivers
are unchanged.

Signed-off-by: Krzysztof Halasa <khc@pm.waw.pl>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 drivers/net/wan/Kconfig        |  12 +-
 drivers/net/wan/Makefile       |  19 +-
 drivers/net/wan/hdlc.c         | 368 ++++++++++++++++++++++++++++++++++++++
 drivers/net/wan/hdlc_cisco.c   | 198 ++++++++++++++-------
 drivers/net/wan/hdlc_fr.c      | 389 +++++++++++++++++++++++++----------------
 drivers/net/wan/hdlc_generic.c | 339 -----------------------------------
 drivers/net/wan/hdlc_ppp.c     |  77 ++++++--
 drivers/net/wan/hdlc_raw.c     |  50 +++++-
 drivers/net/wan/hdlc_raw_eth.c |  49 +++++-
 drivers/net/wan/hdlc_x25.c     |  54 ++++--
 include/linux/hdlc.h           | 201 +++++----------------
 include/linux/hdlc/ioctl.h     |  33 ++++
 12 files changed, 1012 insertions(+), 777 deletions(-)
 create mode 100644 drivers/net/wan/hdlc.c
 delete mode 100644 drivers/net/wan/hdlc_generic.c

(limited to 'include/linux')

diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig
index 54b8e492ef97..58b7efbb0750 100644
--- a/drivers/net/wan/Kconfig
+++ b/drivers/net/wan/Kconfig
@@ -154,7 +154,7 @@ config HDLC
 	  If unsure, say N.
 
 config HDLC_RAW
-	bool "Raw HDLC support"
+	tristate "Raw HDLC support"
 	depends on HDLC
 	help
 	  Generic HDLC driver supporting raw HDLC over WAN connections.
@@ -162,7 +162,7 @@ config HDLC_RAW
 	  If unsure, say N.
 
 config HDLC_RAW_ETH
-	bool "Raw HDLC Ethernet device support"
+	tristate "Raw HDLC Ethernet device support"
 	depends on HDLC
 	help
 	  Generic HDLC driver supporting raw HDLC Ethernet device emulation
@@ -173,7 +173,7 @@ config HDLC_RAW_ETH
 	  If unsure, say N.
 
 config HDLC_CISCO
-	bool "Cisco HDLC support"
+	tristate "Cisco HDLC support"
 	depends on HDLC
 	help
 	  Generic HDLC driver supporting Cisco HDLC over WAN connections.
@@ -181,7 +181,7 @@ config HDLC_CISCO
 	  If unsure, say N.
 
 config HDLC_FR
-	bool "Frame Relay support"
+	tristate "Frame Relay support"
 	depends on HDLC
 	help
 	  Generic HDLC driver supporting Frame Relay over WAN connections.
@@ -189,7 +189,7 @@ config HDLC_FR
 	  If unsure, say N.
 
 config HDLC_PPP
-	bool "Synchronous Point-to-Point Protocol (PPP) support"
+	tristate "Synchronous Point-to-Point Protocol (PPP) support"
 	depends on HDLC
 	help
 	  Generic HDLC driver supporting PPP over WAN connections.
@@ -197,7 +197,7 @@ config HDLC_PPP
 	  If unsure, say N.
 
 config HDLC_X25
-	bool "X.25 protocol support"
+	tristate "X.25 protocol support"
 	depends on HDLC && (LAPB=m && HDLC=m || LAPB=y)
 	help
 	  Generic HDLC driver supporting X.25 over WAN connections.
diff --git a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile
index 316ca6869d5e..83ec2c87ba3f 100644
--- a/drivers/net/wan/Makefile
+++ b/drivers/net/wan/Makefile
@@ -9,14 +9,13 @@ cyclomx-y                       := cycx_main.o
 cyclomx-$(CONFIG_CYCLOMX_X25)	+= cycx_x25.o
 cyclomx-objs			:= $(cyclomx-y)  
 
-hdlc-y				:= hdlc_generic.o
-hdlc-$(CONFIG_HDLC_RAW)		+= hdlc_raw.o
-hdlc-$(CONFIG_HDLC_RAW_ETH)	+= hdlc_raw_eth.o
-hdlc-$(CONFIG_HDLC_CISCO)	+= hdlc_cisco.o
-hdlc-$(CONFIG_HDLC_FR)		+= hdlc_fr.o
-hdlc-$(CONFIG_HDLC_PPP)		+= hdlc_ppp.o
-hdlc-$(CONFIG_HDLC_X25)		+= hdlc_x25.o
-hdlc-objs			:= $(hdlc-y)
+obj-$(CONFIG_HDLC)		+= hdlc.o
+obj-$(CONFIG_HDLC_RAW)		+= hdlc_raw.o
+obj-$(CONFIG_HDLC_RAW_ETH)	+= hdlc_raw_eth.o
+obj-$(CONFIG_HDLC_CISCO)	+= hdlc_cisco.o
+obj-$(CONFIG_HDLC_FR)		+= hdlc_fr.o
+obj-$(CONFIG_HDLC_PPP)		+= hdlc_ppp.o	syncppp.o
+obj-$(CONFIG_HDLC_X25)		+= hdlc_x25.o
 
 pc300-y				:= pc300_drv.o
 pc300-$(CONFIG_PC300_MLPPP)	+= pc300_tty.o
@@ -38,10 +37,6 @@ obj-$(CONFIG_CYCLADES_SYNC)	+= cycx_drv.o cyclomx.o
 obj-$(CONFIG_LAPBETHER)		+= lapbether.o
 obj-$(CONFIG_SBNI)		+= sbni.o
 obj-$(CONFIG_PC300)		+= pc300.o
-obj-$(CONFIG_HDLC)		+= hdlc.o
-ifeq ($(CONFIG_HDLC_PPP),y)
-  obj-$(CONFIG_HDLC)		+= syncppp.o
-endif
 obj-$(CONFIG_N2)		+= n2.o
 obj-$(CONFIG_C101)		+= c101.o
 obj-$(CONFIG_WANXL)		+= wanxl.o
diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c
new file mode 100644
index 000000000000..db354e0edbe5
--- /dev/null
+++ b/drivers/net/wan/hdlc.c
@@ -0,0 +1,368 @@
+/*
+ * Generic HDLC support routines for Linux
+ *
+ * Copyright (C) 1999 - 2006 Krzysztof Halasa <khc@pm.waw.pl>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * Currently supported:
+ *	* raw IP-in-HDLC
+ *	* Cisco HDLC
+ *	* Frame Relay with ANSI or CCITT LMI (both user and network side)
+ *	* PPP
+ *	* X.25
+ *
+ * Use sethdlc utility to set line parameters, protocol and PVCs
+ *
+ * How does it work:
+ * - proto->open(), close(), start(), stop() calls are serialized.
+ *   The order is: open, [ start, stop ... ] close ...
+ * - proto->start() and stop() are called with spin_lock_irq held.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/errno.h>
+#include <linux/if_arp.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/pkt_sched.h>
+#include <linux/inetdevice.h>
+#include <linux/lapb.h>
+#include <linux/rtnetlink.h>
+#include <linux/notifier.h>
+#include <linux/hdlc.h>
+
+
+static const char* version = "HDLC support module revision 1.20";
+
+#undef DEBUG_LINK
+
+static struct hdlc_proto *first_proto = NULL;
+
+
+static int hdlc_change_mtu(struct net_device *dev, int new_mtu)
+{
+	if ((new_mtu < 68) || (new_mtu > HDLC_MAX_MTU))
+		return -EINVAL;
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+
+
+static struct net_device_stats *hdlc_get_stats(struct net_device *dev)
+{
+	return hdlc_stats(dev);
+}
+
+
+
+static int hdlc_rcv(struct sk_buff *skb, struct net_device *dev,
+		    struct packet_type *p, struct net_device *orig_dev)
+{
+	struct hdlc_device_desc *desc = dev_to_desc(dev);
+	if (desc->netif_rx)
+		return desc->netif_rx(skb);
+
+	desc->stats.rx_dropped++; /* Shouldn't happen */
+	dev_kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+
+
+static inline void hdlc_proto_start(struct net_device *dev)
+{
+	hdlc_device *hdlc = dev_to_hdlc(dev);
+	if (hdlc->proto->start)
+		return hdlc->proto->start(dev);
+}
+
+
+
+static inline void hdlc_proto_stop(struct net_device *dev)
+{
+	hdlc_device *hdlc = dev_to_hdlc(dev);
+	if (hdlc->proto->stop)
+		return hdlc->proto->stop(dev);
+}
+
+
+
+static int hdlc_device_event(struct notifier_block *this, unsigned long event,
+			     void *ptr)
+{
+	struct net_device *dev = ptr;
+	hdlc_device *hdlc;
+	unsigned long flags;
+	int on;
+ 
+	if (dev->get_stats != hdlc_get_stats)
+		return NOTIFY_DONE; /* not an HDLC device */
+ 
+	if (event != NETDEV_CHANGE)
+		return NOTIFY_DONE; /* Only interrested in carrier changes */
+
+	on = netif_carrier_ok(dev);
+
+#ifdef DEBUG_LINK
+	printk(KERN_DEBUG "%s: hdlc_device_event NETDEV_CHANGE, carrier %i\n",
+	       dev->name, on);
+#endif
+
+	hdlc = dev_to_hdlc(dev);
+	spin_lock_irqsave(&hdlc->state_lock, flags);
+
+	if (hdlc->carrier == on)
+		goto carrier_exit; /* no change in DCD line level */
+
+	hdlc->carrier = on;
+
+	if (!hdlc->open)
+		goto carrier_exit;
+
+	if (hdlc->carrier) {
+		printk(KERN_INFO "%s: Carrier detected\n", dev->name);
+		hdlc_proto_start(dev);
+	} else {
+		printk(KERN_INFO "%s: Carrier lost\n", dev->name);
+		hdlc_proto_stop(dev);
+	}
+
+carrier_exit:
+	spin_unlock_irqrestore(&hdlc->state_lock, flags);
+	return NOTIFY_DONE;
+}
+
+
+
+/* Must be called by hardware driver when HDLC device is being opened */
+int hdlc_open(struct net_device *dev)
+{
+	hdlc_device *hdlc = dev_to_hdlc(dev);
+#ifdef DEBUG_LINK
+	printk(KERN_DEBUG "%s: hdlc_open() carrier %i open %i\n", dev->name,
+	       hdlc->carrier, hdlc->open);
+#endif
+
+	if (hdlc->proto == NULL)
+		return -ENOSYS;	/* no protocol attached */
+
+	if (hdlc->proto->open) {
+		int result = hdlc->proto->open(dev);
+		if (result)
+			return result;
+	}
+
+	spin_lock_irq(&hdlc->state_lock);
+
+	if (hdlc->carrier) {
+		printk(KERN_INFO "%s: Carrier detected\n", dev->name);
+		hdlc_proto_start(dev);
+	} else
+		printk(KERN_INFO "%s: No carrier\n", dev->name);
+
+	hdlc->open = 1;
+
+	spin_unlock_irq(&hdlc->state_lock);
+	return 0;
+}
+
+
+
+/* Must be called by hardware driver when HDLC device is being closed */
+void hdlc_close(struct net_device *dev)
+{
+	hdlc_device *hdlc = dev_to_hdlc(dev);
+#ifdef DEBUG_LINK
+	printk(KERN_DEBUG "%s: hdlc_close() carrier %i open %i\n", dev->name,
+	       hdlc->carrier, hdlc->open);
+#endif
+
+	spin_lock_irq(&hdlc->state_lock);
+
+	hdlc->open = 0;
+	if (hdlc->carrier)
+		hdlc_proto_stop(dev);
+
+	spin_unlock_irq(&hdlc->state_lock);
+
+	if (hdlc->proto->close)
+		hdlc->proto->close(dev);
+}
+
+
+
+int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	struct hdlc_proto *proto = first_proto;
+	int result;
+
+	if (cmd != SIOCWANDEV)
+		return -EINVAL;
+
+	if (dev_to_hdlc(dev)->proto) {
+		result = dev_to_hdlc(dev)->proto->ioctl(dev, ifr);
+		if (result != -EINVAL)
+			return result;
+	}
+
+	/* Not handled by currently attached protocol (if any) */
+
+	while (proto) {
+		if ((result = proto->ioctl(dev, ifr)) != -EINVAL)
+			return result;
+		proto = proto->next;
+	}
+	return -EINVAL;
+}
+
+void hdlc_setup(struct net_device *dev)
+{
+	hdlc_device *hdlc = dev_to_hdlc(dev);
+
+	dev->get_stats = hdlc_get_stats;
+	dev->change_mtu = hdlc_change_mtu;
+	dev->mtu = HDLC_MAX_MTU;
+
+	dev->type = ARPHRD_RAWHDLC;
+	dev->hard_header_len = 16;
+
+	dev->flags = IFF_POINTOPOINT | IFF_NOARP;
+
+	hdlc->carrier = 1;
+	hdlc->open = 0;
+	spin_lock_init(&hdlc->state_lock);
+}
+
+struct net_device *alloc_hdlcdev(void *priv)
+{
+	struct net_device *dev;
+	dev = alloc_netdev(sizeof(struct hdlc_device_desc) +
+			   sizeof(hdlc_device), "hdlc%d", hdlc_setup);
+	if (dev)
+		dev_to_hdlc(dev)->priv = priv;
+	return dev;
+}
+
+void unregister_hdlc_device(struct net_device *dev)
+{
+	rtnl_lock();
+	unregister_netdevice(dev);
+	detach_hdlc_protocol(dev);
+	rtnl_unlock();
+}
+
+
+
+int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto,
+			 int (*rx)(struct sk_buff *skb), size_t size)
+{
+	detach_hdlc_protocol(dev);
+
+	if (!try_module_get(proto->module))
+		return -ENOSYS;
+
+	if (size)
+		if ((dev_to_hdlc(dev)->state = kmalloc(size,
+						       GFP_KERNEL)) == NULL) {
+			printk(KERN_WARNING "Memory squeeze on"
+			       " hdlc_proto_attach()\n");
+			module_put(proto->module);
+			return -ENOBUFS;
+		}
+	dev_to_hdlc(dev)->proto = proto;
+	dev_to_desc(dev)->netif_rx = rx;
+	return 0;
+}
+
+
+void detach_hdlc_protocol(struct net_device *dev)
+{
+	hdlc_device *hdlc = dev_to_hdlc(dev);
+
+	if (hdlc->proto) {
+		if (hdlc->proto->detach)
+			hdlc->proto->detach(dev);
+		module_put(hdlc->proto->module);
+		hdlc->proto = NULL;
+	}
+	kfree(hdlc->state);
+	hdlc->state = NULL;
+}
+
+
+void register_hdlc_protocol(struct hdlc_proto *proto)
+{
+	proto->next = first_proto;
+	first_proto = proto;
+}
+
+
+void unregister_hdlc_protocol(struct hdlc_proto *proto)
+{
+	struct hdlc_proto **p = &first_proto;
+	while (*p) {
+		if (*p == proto) {
+			*p = proto->next;
+			return;
+		}
+		p = &((*p)->next);
+	}
+}
+
+
+
+MODULE_AUTHOR("Krzysztof Halasa <khc@pm.waw.pl>");
+MODULE_DESCRIPTION("HDLC support module");
+MODULE_LICENSE("GPL v2");
+
+EXPORT_SYMBOL(hdlc_open);
+EXPORT_SYMBOL(hdlc_close);
+EXPORT_SYMBOL(hdlc_ioctl);
+EXPORT_SYMBOL(hdlc_setup);
+EXPORT_SYMBOL(alloc_hdlcdev);
+EXPORT_SYMBOL(unregister_hdlc_device);
+EXPORT_SYMBOL(register_hdlc_protocol);
+EXPORT_SYMBOL(unregister_hdlc_protocol);
+EXPORT_SYMBOL(attach_hdlc_protocol);
+EXPORT_SYMBOL(detach_hdlc_protocol);
+
+static struct packet_type hdlc_packet_type = {
+	.type = __constant_htons(ETH_P_HDLC),
+	.func = hdlc_rcv,
+};
+
+
+static struct notifier_block hdlc_notifier = {
+        .notifier_call = hdlc_device_event,
+};
+
+
+static int __init hdlc_module_init(void)
+{
+	int result;
+
+	printk(KERN_INFO "%s\n", version);
+	if ((result = register_netdevice_notifier(&hdlc_notifier)) != 0)
+                return result;
+        dev_add_pack(&hdlc_packet_type);
+	return 0;
+}
+
+
+
+static void __exit hdlc_module_exit(void)
+{
+	dev_remove_pack(&hdlc_packet_type);
+	unregister_netdevice_notifier(&hdlc_notifier);
+}
+
+
+module_init(hdlc_module_init);
+module_exit(hdlc_module_exit);
diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c
index f289daba0c7b..7ec2b2f9b7ee 100644
--- a/drivers/net/wan/hdlc_cisco.c
+++ b/drivers/net/wan/hdlc_cisco.c
@@ -2,7 +2,7 @@
  * Generic HDLC support routines for Linux
  * Cisco HDLC support
  *
- * Copyright (C) 2000 - 2003 Krzysztof Halasa <khc@pm.waw.pl>
+ * Copyright (C) 2000 - 2006 Krzysztof Halasa <khc@pm.waw.pl>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License
@@ -34,17 +34,56 @@
 #define CISCO_KEEPALIVE_REQ	2	/* Cisco keepalive request */
 
 
+struct hdlc_header {
+	u8 address;
+	u8 control;
+	u16 protocol;
+}__attribute__ ((packed));
+
+
+struct cisco_packet {
+	u32 type;		/* code */
+	u32 par1;
+	u32 par2;
+	u16 rel;		/* reliability */
+	u32 time;
+}__attribute__ ((packed));
+#define	CISCO_PACKET_LEN	18
+#define	CISCO_BIG_PACKET_LEN	20
+
+
+struct cisco_state {
+	cisco_proto settings;
+
+	struct timer_list timer;
+	unsigned long last_poll;
+	int up;
+	int request_sent;
+	u32 txseq; /* TX sequence number */
+	u32 rxseq; /* RX sequence number */
+};
+
+
+static int cisco_ioctl(struct net_device *dev, struct ifreq *ifr);
+
+
+static inline struct cisco_state * state(hdlc_device *hdlc)
+{
+	return(struct cisco_state *)(hdlc->state);
+}
+
+
 static int cisco_hard_header(struct sk_buff *skb, struct net_device *dev,
 			     u16 type, void *daddr, void *saddr,
 			     unsigned int len)
 {
-	hdlc_header *data;
+	struct hdlc_header *data;
 #ifdef DEBUG_HARD_HEADER
 	printk(KERN_DEBUG "%s: cisco_hard_header called\n", dev->name);
 #endif
 
-	skb_push(skb, sizeof(hdlc_header));
-	data = (hdlc_header*)skb->data;
+	skb_push(skb, sizeof(struct hdlc_header));
+	data = (struct hdlc_header*)skb->data;
 	if (type == CISCO_KEEPALIVE)
 		data->address = CISCO_MULTICAST;
 	else
@@ -52,7 +91,7 @@ static int cisco_hard_header(struct sk_buff *skb, struct net_device *dev,
 	data->control = 0;
 	data->protocol = htons(type);
 
-	return sizeof(hdlc_header);
+	return sizeof(struct hdlc_header);
 }
 
 
@@ -61,9 +100,10 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type,
 				 u32 par1, u32 par2)
 {
 	struct sk_buff *skb;
-	cisco_packet *data;
+	struct cisco_packet *data;
 
-	skb = dev_alloc_skb(sizeof(hdlc_header) + sizeof(cisco_packet));
+	skb = dev_alloc_skb(sizeof(struct hdlc_header) +
+			    sizeof(struct cisco_packet));
 	if (!skb) {
 		printk(KERN_WARNING
 		       "%s: Memory squeeze on cisco_keepalive_send()\n",
@@ -72,7 +112,7 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type,
 	}
 	skb_reserve(skb, 4);
 	cisco_hard_header(skb, dev, CISCO_KEEPALIVE, NULL, NULL, 0);
-	data = (cisco_packet*)(skb->data + 4);
+	data = (struct cisco_packet*)(skb->data + 4);
 
 	data->type = htonl(type);
 	data->par1 = htonl(par1);
@@ -81,7 +121,7 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type,
 	/* we will need do_div here if 1000 % HZ != 0 */
 	data->time = htonl((jiffies - INITIAL_JIFFIES) * (1000 / HZ));
 
-	skb_put(skb, sizeof(cisco_packet));
+	skb_put(skb, sizeof(struct cisco_packet));
 	skb->priority = TC_PRIO_CONTROL;
 	skb->dev = dev;
 	skb->nh.raw = skb->data;
@@ -93,9 +133,9 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type,
 
 static __be16 cisco_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
-	hdlc_header *data = (hdlc_header*)skb->data;
+	struct hdlc_header *data = (struct hdlc_header*)skb->data;
 
-	if (skb->len < sizeof(hdlc_header))
+	if (skb->len < sizeof(struct hdlc_header))
 		return __constant_htons(ETH_P_HDLC);
 
 	if (data->address != CISCO_MULTICAST &&
@@ -106,7 +146,7 @@ static __be16 cisco_type_trans(struct sk_buff *skb, struct net_device *dev)
 	case __constant_htons(ETH_P_IP):
 	case __constant_htons(ETH_P_IPX):
 	case __constant_htons(ETH_P_IPV6):
-		skb_pull(skb, sizeof(hdlc_header));
+		skb_pull(skb, sizeof(struct hdlc_header));
 		return data->protocol;
 	default:
 		return __constant_htons(ETH_P_HDLC);
@@ -118,12 +158,12 @@ static int cisco_rx(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
 	hdlc_device *hdlc = dev_to_hdlc(dev);
-	hdlc_header *data = (hdlc_header*)skb->data;
-	cisco_packet *cisco_data;
+	struct hdlc_header *data = (struct hdlc_header*)skb->data;
+	struct cisco_packet *cisco_data;
 	struct in_device *in_dev;
 	u32 addr, mask;
 
-	if (skb->len < sizeof(hdlc_header))
+	if (skb->len < sizeof(struct hdlc_header))
 		goto rx_error;
 
 	if (data->address != CISCO_MULTICAST &&
@@ -137,15 +177,17 @@ static int cisco_rx(struct sk_buff *skb)
 		return NET_RX_SUCCESS;
 
 	case CISCO_KEEPALIVE:
-		if (skb->len != sizeof(hdlc_header) + CISCO_PACKET_LEN &&
-		    skb->len != sizeof(hdlc_header) + CISCO_BIG_PACKET_LEN) {
-			printk(KERN_INFO "%s: Invalid length of Cisco "
-			       "control packet (%d bytes)\n",
-			       dev->name, skb->len);
+		if ((skb->len != sizeof(struct hdlc_header) +
+		     CISCO_PACKET_LEN) &&
+		    (skb->len != sizeof(struct hdlc_header) +
+		     CISCO_BIG_PACKET_LEN)) {
+			printk(KERN_INFO "%s: Invalid length of Cisco control"
+			       " packet (%d bytes)\n", dev->name, skb->len);
 			goto rx_error;
 		}
 
-		cisco_data = (cisco_packet*)(skb->data + sizeof(hdlc_header));
+		cisco_data = (struct cisco_packet*)(skb->data + sizeof
+						    (struct hdlc_header));
 
 		switch(ntohl (cisco_data->type)) {
 		case CISCO_ADDR_REQ: /* Stolen from syncppp.c :-) */
@@ -178,11 +220,11 @@ static int cisco_rx(struct sk_buff *skb)
 			goto rx_error;
 
 		case CISCO_KEEPALIVE_REQ:
-			hdlc->state.cisco.rxseq = ntohl(cisco_data->par1);
-			if (hdlc->state.cisco.request_sent &&
-			    ntohl(cisco_data->par2)==hdlc->state.cisco.txseq) {
-				hdlc->state.cisco.last_poll = jiffies;
-				if (!hdlc->state.cisco.up) {
+			state(hdlc)->rxseq = ntohl(cisco_data->par1);
+			if (state(hdlc)->request_sent &&
+			    ntohl(cisco_data->par2) == state(hdlc)->txseq) {
+				state(hdlc)->last_poll = jiffies;
+				if (!state(hdlc)->up) {
 					u32 sec, min, hrs, days;
 					sec = ntohl(cisco_data->time) / 1000;
 					min = sec / 60; sec -= min * 60;
@@ -193,7 +235,7 @@ static int cisco_rx(struct sk_buff *skb)
 					       dev->name, days, hrs,
 					       min, sec);
 					netif_dormant_off(dev);
-					hdlc->state.cisco.up = 1;
+					state(hdlc)->up = 1;
 				}
 			}
 
@@ -208,7 +250,7 @@ static int cisco_rx(struct sk_buff *skb)
 	return NET_RX_DROP;
 
  rx_error:
-	hdlc->stats.rx_errors++; /* Mark error */
+	dev_to_desc(dev)->stats.rx_errors++; /* Mark error */
 	dev_kfree_skb_any(skb);
 	return NET_RX_DROP;
 }
@@ -220,23 +262,22 @@ static void cisco_timer(unsigned long arg)
 	struct net_device *dev = (struct net_device *)arg;
 	hdlc_device *hdlc = dev_to_hdlc(dev);
 
-	if (hdlc->state.cisco.up &&
-	    time_after(jiffies, hdlc->state.cisco.last_poll +
-		       hdlc->state.cisco.settings.timeout * HZ)) {
-		hdlc->state.cisco.up = 0;
+	if (state(hdlc)->up &&
+	    time_after(jiffies, state(hdlc)->last_poll +
+		       state(hdlc)->settings.timeout * HZ)) {
+		state(hdlc)->up = 0;
 		printk(KERN_INFO "%s: Link down\n", dev->name);
 		netif_dormant_on(dev);
 	}
 
-	cisco_keepalive_send(dev, CISCO_KEEPALIVE_REQ,
-			     ++hdlc->state.cisco.txseq,
-			     hdlc->state.cisco.rxseq);
-	hdlc->state.cisco.request_sent = 1;
-	hdlc->state.cisco.timer.expires = jiffies +
-		hdlc->state.cisco.settings.interval * HZ;
-	hdlc->state.cisco.timer.function = cisco_timer;
-	hdlc->state.cisco.timer.data = arg;
-	add_timer(&hdlc->state.cisco.timer);
+	cisco_keepalive_send(dev, CISCO_KEEPALIVE_REQ, ++state(hdlc)->txseq,
+			     state(hdlc)->rxseq);
+	state(hdlc)->request_sent = 1;
+	state(hdlc)->timer.expires = jiffies +
+		state(hdlc)->settings.interval * HZ;
+	state(hdlc)->timer.function = cisco_timer;
+	state(hdlc)->timer.data = arg;
+	add_timer(&state(hdlc)->timer);
 }
 
 
@@ -244,15 +285,15 @@ static void cisco_timer(unsigned long arg)
 static void cisco_start(struct net_device *dev)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
-	hdlc->state.cisco.up = 0;
-	hdlc->state.cisco.request_sent = 0;
-	hdlc->state.cisco.txseq = hdlc->state.cisco.rxseq = 0;
-
-	init_timer(&hdlc->state.cisco.timer);
-	hdlc->state.cisco.timer.expires = jiffies + HZ; /*First poll after 1s*/
-	hdlc->state.cisco.timer.function = cisco_timer;
-	hdlc->state.cisco.timer.data = (unsigned long)dev;
-	add_timer(&hdlc->state.cisco.timer);
+	state(hdlc)->up = 0;
+	state(hdlc)->request_sent = 0;
+	state(hdlc)->txseq = state(hdlc)->rxseq = 0;
+
+	init_timer(&state(hdlc)->timer);
+	state(hdlc)->timer.expires = jiffies + HZ; /*First poll after 1s*/
+	state(hdlc)->timer.function = cisco_timer;
+	state(hdlc)->timer.data = (unsigned long)dev;
+	add_timer(&state(hdlc)->timer);
 }
 
 
@@ -260,15 +301,24 @@ static void cisco_start(struct net_device *dev)
 static void cisco_stop(struct net_device *dev)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
-	del_timer_sync(&hdlc->state.cisco.timer);
+	del_timer_sync(&state(hdlc)->timer);
 	netif_dormant_on(dev);
-	hdlc->state.cisco.up = 0;
-	hdlc->state.cisco.request_sent = 0;
+	state(hdlc)->up = 0;
+	state(hdlc)->request_sent = 0;
 }
 
 
-int hdlc_cisco_ioctl(struct net_device *dev, struct ifreq *ifr)
+static struct hdlc_proto proto = {
+	.start		= cisco_start,
+	.stop		= cisco_stop,
+	.type_trans	= cisco_type_trans,
+	.ioctl		= cisco_ioctl,
+	.module		= THIS_MODULE,
+};
+ 
+ 
+static int cisco_ioctl(struct net_device *dev, struct ifreq *ifr)
 {
 	cisco_proto __user *cisco_s = ifr->ifr_settings.ifs_ifsu.cisco;
 	const size_t size = sizeof(cisco_proto);
@@ -278,12 +328,14 @@ int hdlc_cisco_ioctl(struct net_device *dev, struct ifreq *ifr)
 
 	switch (ifr->ifr_settings.type) {
 	case IF_GET_PROTO:
+		if (dev_to_hdlc(dev)->proto != &proto)
+			return -EINVAL;
 		ifr->ifr_settings.type = IF_PROTO_CISCO;
 		if (ifr->ifr_settings.size < size) {
 			ifr->ifr_settings.size = size; /* data size wanted */
 			return -ENOBUFS;
 		}
-		if (copy_to_user(cisco_s, &hdlc->state.cisco.settings, size))
+		if (copy_to_user(cisco_s, &state(hdlc)->settings, size))
 			return -EFAULT;
 		return 0;
 
@@ -302,19 +354,15 @@ int hdlc_cisco_ioctl(struct net_device *dev, struct ifreq *ifr)
 			return -EINVAL;
 
 		result=hdlc->attach(dev, ENCODING_NRZ,PARITY_CRC16_PR1_CCITT);
-
 		if (result)
 			return result;
 
-		hdlc_proto_detach(hdlc);
-		memcpy(&hdlc->state.cisco.settings, &new_settings, size);
-		memset(&hdlc->proto, 0, sizeof(hdlc->proto));
+		result = attach_hdlc_protocol(dev, &proto, cisco_rx,
+					      sizeof(struct cisco_state));
+		if (result)
+			return result;
 
-		hdlc->proto.start = cisco_start;
-		hdlc->proto.stop = cisco_stop;
-		hdlc->proto.netif_rx = cisco_rx;
-		hdlc->proto.type_trans = cisco_type_trans;
-		hdlc->proto.id = IF_PROTO_CISCO;
+		memcpy(&state(hdlc)->settings, &new_settings, size);
 		dev->hard_start_xmit = hdlc->xmit;
 		dev->hard_header = cisco_hard_header;
 		dev->hard_header_cache = NULL;
@@ -327,3 +375,25 @@ int hdlc_cisco_ioctl(struct net_device *dev, struct ifreq *ifr)
 
 	return -EINVAL;
 }
+
+
+static int __init mod_init(void)
+{
+	register_hdlc_protocol(&proto);
+	return 0;
+}
+
+
+
+static void __exit mod_exit(void)
+{
+	unregister_hdlc_protocol(&proto);
+}
+
+
+module_init(mod_init);
+module_exit(mod_exit);
+
+MODULE_AUTHOR("Krzysztof Halasa <khc@pm.waw.pl>");
+MODULE_DESCRIPTION("Cisco HDLC protocol support for generic HDLC");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index 7bb737bbdeb9..b45ab680d2d6 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -2,7 +2,7 @@
  * Generic HDLC support routines for Linux
  * Frame Relay support
  *
- * Copyright (C) 1999 - 2005 Krzysztof Halasa <khc@pm.waw.pl>
+ * Copyright (C) 1999 - 2006 Krzysztof Halasa <khc@pm.waw.pl>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License
@@ -52,6 +52,8 @@
 #undef DEBUG_PKT
 #undef DEBUG_ECN
 #undef DEBUG_LINK
+#undef DEBUG_PROTO
+#undef DEBUG_PVC
 
 #define FR_UI			0x03
 #define FR_PAD			0x00
@@ -115,13 +117,53 @@ typedef struct {
 }__attribute__ ((packed)) fr_hdr;
 
 
+typedef struct pvc_device_struct {
+	struct net_device *frad;
+	struct net_device *main;
+	struct net_device *ether;	/* bridged Ethernet interface	*/
+	struct pvc_device_struct *next;	/* Sorted in ascending DLCI order */
+	int dlci;
+	int open_count;
+
+	struct {
+		unsigned int new: 1;
+		unsigned int active: 1;
+		unsigned int exist: 1;
+		unsigned int deleted: 1;
+		unsigned int fecn: 1;
+		unsigned int becn: 1;
+		unsigned int bandwidth;	/* Cisco LMI reporting only */
+	}state;
+}pvc_device;
+
+
+struct frad_state {
+	fr_proto settings;
+	pvc_device *first_pvc;
+	int dce_pvc_count;
+
+	struct timer_list timer;
+	unsigned long last_poll;
+	int reliable;
+	int dce_changed;
+	int request;
+	int fullrep_sent;
+	u32 last_errors; /* last errors bit list */
+	u8 n391cnt;
+	u8 txseq; /* TX sequence number */
+	u8 rxseq; /* RX sequence number */
+};
+
+
+static int fr_ioctl(struct net_device *dev, struct ifreq *ifr);
+
+
 static inline u16 q922_to_dlci(u8 *hdr)
 {
 	return ((hdr[0] & 0xFC) << 2) | ((hdr[1] & 0xF0) >> 4);
 }
 
 
-
 static inline void dlci_to_q922(u8 *hdr, u16 dlci)
 {
 	hdr[0] = (dlci >> 2) & 0xFC;
@@ -129,10 +171,21 @@ static inline void dlci_to_q922(u8 *hdr, u16 dlci)
 }
 
 
+static inline struct frad_state * state(hdlc_device *hdlc)
+{
+	return(struct frad_state *)(hdlc->state);
+}
+
+
+static __inline__ pvc_device* dev_to_pvc(struct net_device *dev)
+{
+	return dev->priv;
+}
+
 
 static inline pvc_device* find_pvc(hdlc_device *hdlc, u16 dlci)
 {
-	pvc_device *pvc = hdlc->state.fr.first_pvc;
+	pvc_device *pvc = state(hdlc)->first_pvc;
 
 	while (pvc) {
 		if (pvc->dlci == dlci)
@@ -146,10 +199,10 @@ static inline pvc_device* find_pvc(hdlc_device *hdlc, u16 dlci)
 }
 
 
-static inline pvc_device* add_pvc(struct net_device *dev, u16 dlci)
+static pvc_device* add_pvc(struct net_device *dev, u16 dlci)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
-	pvc_device *pvc, **pvc_p = &hdlc->state.fr.first_pvc;
+	pvc_device *pvc, **pvc_p = &state(hdlc)->first_pvc;
 
 	while (*pvc_p) {
 		if ((*pvc_p)->dlci == dlci)
@@ -160,12 +213,15 @@ static inline pvc_device* add_pvc(struct net_device *dev, u16 dlci)
 	}
 
 	pvc = kmalloc(sizeof(pvc_device), GFP_ATOMIC);
+#ifdef DEBUG_PVC
+	printk(KERN_DEBUG "add_pvc: allocated pvc %p, frad %p\n", pvc, dev);
+#endif
 	if (!pvc)
 		return NULL;
 
 	memset(pvc, 0, sizeof(pvc_device));
 	pvc->dlci = dlci;
-	pvc->master = dev;
+	pvc->frad = dev;
 	pvc->next = *pvc_p;	/* Put it in the chain */
 	*pvc_p = pvc;
 	return pvc;
@@ -174,7 +230,7 @@ static inline pvc_device* add_pvc(struct net_device *dev, u16 dlci)
 
 static inline int pvc_is_used(pvc_device *pvc)
 {
-	return pvc->main != NULL || pvc->ether != NULL;
+	return pvc->main || pvc->ether;
 }
 
 
@@ -200,11 +256,14 @@ static inline void pvc_carrier(int on, pvc_device *pvc)
 
 static inline void delete_unused_pvcs(hdlc_device *hdlc)
 {
-	pvc_device **pvc_p = &hdlc->state.fr.first_pvc;
+	pvc_device **pvc_p = &state(hdlc)->first_pvc;
 
 	while (*pvc_p) {
 		if (!pvc_is_used(*pvc_p)) {
 			pvc_device *pvc = *pvc_p;
+#ifdef DEBUG_PVC
+			printk(KERN_DEBUG "freeing unused pvc: %p\n", pvc);
+#endif
 			*pvc_p = pvc->next;
 			kfree(pvc);
 			continue;
@@ -295,16 +354,16 @@ static int pvc_open(struct net_device *dev)
 {
 	pvc_device *pvc = dev_to_pvc(dev);
 
-	if ((pvc->master->flags & IFF_UP) == 0)
-		return -EIO;  /* Master must be UP in order to activate PVC */
+	if ((pvc->frad->flags & IFF_UP) == 0)
+		return -EIO;  /* Frad must be UP in order to activate PVC */
 
 	if (pvc->open_count++ == 0) {
-		hdlc_device *hdlc = dev_to_hdlc(pvc->master);
-		if (hdlc->state.fr.settings.lmi == LMI_NONE)
-			pvc->state.active = netif_carrier_ok(pvc->master);
+		hdlc_device *hdlc = dev_to_hdlc(pvc->frad);
+		if (state(hdlc)->settings.lmi == LMI_NONE)
+			pvc->state.active = netif_carrier_ok(pvc->frad);
 
 		pvc_carrier(pvc->state.active, pvc);
-		hdlc->state.fr.dce_changed = 1;
+		state(hdlc)->dce_changed = 1;
 	}
 	return 0;
 }
@@ -316,12 +375,12 @@ static int pvc_close(struct net_device *dev)
 	pvc_device *pvc = dev_to_pvc(dev);
 
 	if (--pvc->open_count == 0) {
-		hdlc_device *hdlc = dev_to_hdlc(pvc->master);
-		if (hdlc->state.fr.settings.lmi == LMI_NONE)
+		hdlc_device *hdlc = dev_to_hdlc(pvc->frad);
+		if (state(hdlc)->settings.lmi == LMI_NONE)
 			pvc->state.active = 0;
 
-		if (hdlc->state.fr.settings.dce) {
-			hdlc->state.fr.dce_changed = 1;
+		if (state(hdlc)->settings.dce) {
+			state(hdlc)->dce_changed = 1;
 			pvc->state.active = 0;
 		}
 	}
@@ -348,7 +407,7 @@ static int pvc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		}
 
 		info.dlci = pvc->dlci;
-		memcpy(info.master, pvc->master->name, IFNAMSIZ);
+		memcpy(info.master, pvc->frad->name, IFNAMSIZ);
 		if (copy_to_user(ifr->ifr_settings.ifs_ifsu.fr_pvc_info,
 				 &info, sizeof(info)))
 			return -EFAULT;
@@ -361,7 +420,7 @@ static int pvc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 static inline struct net_device_stats *pvc_get_stats(struct net_device *dev)
 {
-	return netdev_priv(dev);
+	return &dev_to_desc(dev)->stats;
 }
 
 
@@ -393,7 +452,7 @@ static int pvc_xmit(struct sk_buff *skb, struct net_device *dev)
 			stats->tx_packets++;
 			if (pvc->state.fecn) /* TX Congestion counter */
 				stats->tx_compressed++;
-			skb->dev = pvc->master;
+			skb->dev = pvc->frad;
 			dev_queue_xmit(skb);
 			return 0;
 		}
@@ -419,7 +478,7 @@ static int pvc_change_mtu(struct net_device *dev, int new_mtu)
 static inline void fr_log_dlci_active(pvc_device *pvc)
 {
 	printk(KERN_INFO "%s: DLCI %d [%s%s%s]%s %s\n",
-	       pvc->master->name,
+	       pvc->frad->name,
 	       pvc->dlci,
 	       pvc->main ? pvc->main->name : "",
 	       pvc->main && pvc->ether ? " " : "",
@@ -438,21 +497,20 @@ static inline u8 fr_lmi_nextseq(u8 x)
 }
 
 
-
 static void fr_lmi_send(struct net_device *dev, int fullrep)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
 	struct sk_buff *skb;
-	pvc_device *pvc = hdlc->state.fr.first_pvc;
-	int lmi = hdlc->state.fr.settings.lmi;
-	int dce = hdlc->state.fr.settings.dce;
+	pvc_device *pvc = state(hdlc)->first_pvc;
+	int lmi = state(hdlc)->settings.lmi;
+	int dce = state(hdlc)->settings.dce;
 	int len = lmi == LMI_ANSI ? LMI_ANSI_LENGTH : LMI_CCITT_CISCO_LENGTH;
 	int stat_len = (lmi == LMI_CISCO) ? 6 : 3;
 	u8 *data;
 	int i = 0;
 
 	if (dce && fullrep) {
-		len += hdlc->state.fr.dce_pvc_count * (2 + stat_len);
+		len += state(hdlc)->dce_pvc_count * (2 + stat_len);
 		if (len > HDLC_MAX_MRU) {
 			printk(KERN_WARNING "%s: Too many PVCs while sending "
 			       "LMI full report\n", dev->name);
@@ -486,8 +544,9 @@ static void fr_lmi_send(struct net_device *dev, int fullrep)
 	data[i++] = fullrep ? LMI_FULLREP : LMI_INTEGRITY;
 	data[i++] = lmi == LMI_CCITT ? LMI_CCITT_ALIVE : LMI_ANSI_CISCO_ALIVE;
 	data[i++] = LMI_INTEG_LEN;
-	data[i++] = hdlc->state.fr.txseq =fr_lmi_nextseq(hdlc->state.fr.txseq);
-	data[i++] = hdlc->state.fr.rxseq;
+	data[i++] = state(hdlc)->txseq =
+		fr_lmi_nextseq(state(hdlc)->txseq);
+	data[i++] = state(hdlc)->rxseq;
 
 	if (dce && fullrep) {
 		while (pvc) {
@@ -496,7 +555,7 @@ static void fr_lmi_send(struct net_device *dev, int fullrep)
 			data[i++] = stat_len;
 
 			/* LMI start/restart */
-			if (hdlc->state.fr.reliable && !pvc->state.exist) {
+			if (state(hdlc)->reliable && !pvc->state.exist) {
 				pvc->state.exist = pvc->state.new = 1;
 				fr_log_dlci_active(pvc);
 			}
@@ -541,15 +600,15 @@ static void fr_lmi_send(struct net_device *dev, int fullrep)
 static void fr_set_link_state(int reliable, struct net_device *dev)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
-	pvc_device *pvc = hdlc->state.fr.first_pvc;
+	pvc_device *pvc = state(hdlc)->first_pvc;
 
-	hdlc->state.fr.reliable = reliable;
+	state(hdlc)->reliable = reliable;
 	if (reliable) {
 		netif_dormant_off(dev);
-		hdlc->state.fr.n391cnt = 0; /* Request full status */
-		hdlc->state.fr.dce_changed = 1;
+		state(hdlc)->n391cnt = 0; /* Request full status */
+		state(hdlc)->dce_changed = 1;
 
-		if (hdlc->state.fr.settings.lmi == LMI_NONE) {
+		if (state(hdlc)->settings.lmi == LMI_NONE) {
 			while (pvc) {	/* Activate all PVCs */
 				pvc_carrier(1, pvc);
 				pvc->state.exist = pvc->state.active = 1;
@@ -563,7 +622,7 @@ static void fr_set_link_state(int reliable, struct net_device *dev)
 			pvc_carrier(0, pvc);
 			pvc->state.exist = pvc->state.active = 0;
 			pvc->state.new = 0;
-			if (!hdlc->state.fr.settings.dce)
+			if (!state(hdlc)->settings.dce)
 				pvc->state.bandwidth = 0;
 			pvc = pvc->next;
 		}
@@ -571,7 +630,6 @@ static void fr_set_link_state(int reliable, struct net_device *dev)
 }
 
 
-
 static void fr_timer(unsigned long arg)
 {
 	struct net_device *dev = (struct net_device *)arg;
@@ -579,62 +637,61 @@ static void fr_timer(unsigned long arg)
 	int i, cnt = 0, reliable;
 	u32 list;
 
-	if (hdlc->state.fr.settings.dce) {
-		reliable = hdlc->state.fr.request &&
-			time_before(jiffies, hdlc->state.fr.last_poll +
-				    hdlc->state.fr.settings.t392 * HZ);
-		hdlc->state.fr.request = 0;
+	if (state(hdlc)->settings.dce) {
+		reliable = state(hdlc)->request &&
+			time_before(jiffies, state(hdlc)->last_poll +
+				    state(hdlc)->settings.t392 * HZ);
+		state(hdlc)->request = 0;
 	} else {
-		hdlc->state.fr.last_errors <<= 1; /* Shift the list */
-		if (hdlc->state.fr.request) {
-			if (hdlc->state.fr.reliable)
+		state(hdlc)->last_errors <<= 1; /* Shift the list */
+		if (state(hdlc)->request) {
+			if (state(hdlc)->reliable)
 				printk(KERN_INFO "%s: No LMI status reply "
 				       "received\n", dev->name);
-			hdlc->state.fr.last_errors |= 1;
+			state(hdlc)->last_errors |= 1;
 		}
 
-		list = hdlc->state.fr.last_errors;
-		for (i = 0; i < hdlc->state.fr.settings.n393; i++, list >>= 1)
+		list = state(hdlc)->last_errors;
+		for (i = 0; i < state(hdlc)->settings.n393; i++, list >>= 1)
 			cnt += (list & 1);	/* errors count */
 
-		reliable = (cnt < hdlc->state.fr.settings.n392);
+		reliable = (cnt < state(hdlc)->settings.n392);
 	}
 
-	if (hdlc->state.fr.reliable != reliable) {
+	if (state(hdlc)->reliable != reliable) {
 		printk(KERN_INFO "%s: Link %sreliable\n", dev->name,
 		       reliable ? "" : "un");
 		fr_set_link_state(reliable, dev);
 	}
 
-	if (hdlc->state.fr.settings.dce)
-		hdlc->state.fr.timer.expires = jiffies +
-			hdlc->state.fr.settings.t392 * HZ;
+	if (state(hdlc)->settings.dce)
+		state(hdlc)->timer.expires = jiffies +
+			state(hdlc)->settings.t392 * HZ;
 	else {
-		if (hdlc->state.fr.n391cnt)
-			hdlc->state.fr.n391cnt--;
+		if (state(hdlc)->n391cnt)
+			state(hdlc)->n391cnt--;
 
-		fr_lmi_send(dev, hdlc->state.fr.n391cnt == 0);
+		fr_lmi_send(dev, state(hdlc)->n391cnt == 0);
 
-		hdlc->state.fr.last_poll = jiffies;
-		hdlc->state.fr.request = 1;
-		hdlc->state.fr.timer.expires = jiffies +
-			hdlc->state.fr.settings.t391 * HZ;
+		state(hdlc)->last_poll = jiffies;
+		state(hdlc)->request = 1;
+		state(hdlc)->timer.expires = jiffies +
+			state(hdlc)->settings.t391 * HZ;
 	}
 
-	hdlc->state.fr.timer.function = fr_timer;
-	hdlc->state.fr.timer.data = arg;
-	add_timer(&hdlc->state.fr.timer);
+	state(hdlc)->timer.function = fr_timer;
+	state(hdlc)->timer.data = arg;
+	add_timer(&state(hdlc)->timer);
 }
 
 
-
 static int fr_lmi_recv(struct net_device *dev, struct sk_buff *skb)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
 	pvc_device *pvc;
 	u8 rxseq, txseq;
-	int lmi = hdlc->state.fr.settings.lmi;
-	int dce = hdlc->state.fr.settings.dce;
+	int lmi = state(hdlc)->settings.lmi;
+	int dce = state(hdlc)->settings.dce;
 	int stat_len = (lmi == LMI_CISCO) ? 6 : 3, reptype, error, no_ram, i;
 
 	if (skb->len < (lmi == LMI_ANSI ? LMI_ANSI_LENGTH :
@@ -645,8 +702,8 @@ static int fr_lmi_recv(struct net_device *dev, struct sk_buff *skb)
 
 	if (skb->data[3] != (lmi == LMI_CISCO ? NLPID_CISCO_LMI :
 			     NLPID_CCITT_ANSI_LMI)) {
-		printk(KERN_INFO "%s: Received non-LMI frame with LMI"
-		       " DLCI\n", dev->name);
+		printk(KERN_INFO "%s: Received non-LMI frame with LMI DLCI\n",
+		       dev->name);
 		return 1;
 	}
 
@@ -706,53 +763,53 @@ static int fr_lmi_recv(struct net_device *dev, struct sk_buff *skb)
 	}
 	i++;
 
-	hdlc->state.fr.rxseq = skb->data[i++]; /* TX sequence from peer */
+	state(hdlc)->rxseq = skb->data[i++]; /* TX sequence from peer */
 	rxseq = skb->data[i++];	/* Should confirm our sequence */
 
-	txseq = hdlc->state.fr.txseq;
+	txseq = state(hdlc)->txseq;
 
 	if (dce)
-		hdlc->state.fr.last_poll = jiffies;
+		state(hdlc)->last_poll = jiffies;
 
 	error = 0;
-	if (!hdlc->state.fr.reliable)
+	if (!state(hdlc)->reliable)
 		error = 1;
 
-	if (rxseq == 0 || rxseq != txseq) {
-		hdlc->state.fr.n391cnt = 0; /* Ask for full report next time */
+	if (rxseq == 0 || rxseq != txseq) { /* Ask for full report next time */
+		state(hdlc)->n391cnt = 0;
 		error = 1;
 	}
 
 	if (dce) {
-		if (hdlc->state.fr.fullrep_sent && !error) {
+		if (state(hdlc)->fullrep_sent && !error) {
 /* Stop sending full report - the last one has been confirmed by DTE */
-			hdlc->state.fr.fullrep_sent = 0;
-			pvc = hdlc->state.fr.first_pvc;
+			state(hdlc)->fullrep_sent = 0;
+			pvc = state(hdlc)->first_pvc;
 			while (pvc) {
 				if (pvc->state.new) {
 					pvc->state.new = 0;
 
 /* Tell DTE that new PVC is now active */
-					hdlc->state.fr.dce_changed = 1;
+					state(hdlc)->dce_changed = 1;
 				}
 				pvc = pvc->next;
 			}
 		}
 
-		if (hdlc->state.fr.dce_changed) {
+		if (state(hdlc)->dce_changed) {
 			reptype = LMI_FULLREP;
-			hdlc->state.fr.fullrep_sent = 1;
-			hdlc->state.fr.dce_changed = 0;
+			state(hdlc)->fullrep_sent = 1;
+			state(hdlc)->dce_changed = 0;
 		}
 
-		hdlc->state.fr.request = 1; /* got request */
+		state(hdlc)->request = 1; /* got request */
 		fr_lmi_send(dev, reptype == LMI_FULLREP ? 1 : 0);
 		return 0;
 	}
 
 	/* DTE */
 
-	hdlc->state.fr.request = 0; /* got response, no request pending */
+	state(hdlc)->request = 0; /* got response, no request pending */
 
 	if (error)
 		return 0;
@@ -760,7 +817,7 @@ static int fr_lmi_recv(struct net_device *dev, struct sk_buff *skb)
 	if (reptype != LMI_FULLREP)
 		return 0;
 
-	pvc = hdlc->state.fr.first_pvc;
+	pvc = state(hdlc)->first_pvc;
 
 	while (pvc) {
 		pvc->state.deleted = 1;
@@ -827,7 +884,7 @@ static int fr_lmi_recv(struct net_device *dev, struct sk_buff *skb)
 		i += stat_len;
 	}
 
-	pvc = hdlc->state.fr.first_pvc;
+	pvc = state(hdlc)->first_pvc;
 
 	while (pvc) {
 		if (pvc->state.deleted && pvc->state.exist) {
@@ -841,17 +898,16 @@ static int fr_lmi_recv(struct net_device *dev, struct sk_buff *skb)
 	}
 
 	/* Next full report after N391 polls */
-	hdlc->state.fr.n391cnt = hdlc->state.fr.settings.n391;
+	state(hdlc)->n391cnt = state(hdlc)->settings.n391;
 
 	return 0;
 }
 
 
-
 static int fr_rx(struct sk_buff *skb)
 {
-	struct net_device *ndev = skb->dev;
-	hdlc_device *hdlc = dev_to_hdlc(ndev);
+	struct net_device *frad = skb->dev;
+	hdlc_device *hdlc = dev_to_hdlc(frad);
 	fr_hdr *fh = (fr_hdr*)skb->data;
 	u8 *data = skb->data;
 	u16 dlci;
@@ -864,11 +920,11 @@ static int fr_rx(struct sk_buff *skb)
 	dlci = q922_to_dlci(skb->data);
 
 	if ((dlci == LMI_CCITT_ANSI_DLCI &&
-	     (hdlc->state.fr.settings.lmi == LMI_ANSI ||
-	      hdlc->state.fr.settings.lmi == LMI_CCITT)) ||
+	     (state(hdlc)->settings.lmi == LMI_ANSI ||
+	      state(hdlc)->settings.lmi == LMI_CCITT)) ||
 	    (dlci == LMI_CISCO_DLCI &&
-	     hdlc->state.fr.settings.lmi == LMI_CISCO)) {
-		if (fr_lmi_recv(ndev, skb))
+	     state(hdlc)->settings.lmi == LMI_CISCO)) {
+		if (fr_lmi_recv(frad, skb))
 			goto rx_error;
 		dev_kfree_skb_any(skb);
 		return NET_RX_SUCCESS;
@@ -878,7 +934,7 @@ static int fr_rx(struct sk_buff *skb)
 	if (!pvc) {
 #ifdef DEBUG_PKT
 		printk(KERN_INFO "%s: No PVC for received frame's DLCI %d\n",
-		       ndev->name, dlci);
+		       frad->name, dlci);
 #endif
 		dev_kfree_skb_any(skb);
 		return NET_RX_DROP;
@@ -886,7 +942,7 @@ static int fr_rx(struct sk_buff *skb)
 
 	if (pvc->state.fecn != fh->fecn) {
 #ifdef DEBUG_ECN
-		printk(KERN_DEBUG "%s: DLCI %d FECN O%s\n", ndev->name,
+		printk(KERN_DEBUG "%s: DLCI %d FECN O%s\n", frad->name,
 		       dlci, fh->fecn ? "N" : "FF");
 #endif
 		pvc->state.fecn ^= 1;
@@ -894,7 +950,7 @@ static int fr_rx(struct sk_buff *skb)
 
 	if (pvc->state.becn != fh->becn) {
 #ifdef DEBUG_ECN
-		printk(KERN_DEBUG "%s: DLCI %d BECN O%s\n", ndev->name,
+		printk(KERN_DEBUG "%s: DLCI %d BECN O%s\n", frad->name,
 		       dlci, fh->becn ? "N" : "FF");
 #endif
 		pvc->state.becn ^= 1;
@@ -902,7 +958,7 @@ static int fr_rx(struct sk_buff *skb)
 
 
 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
-		hdlc->stats.rx_dropped++;
+		dev_to_desc(frad)->stats.rx_dropped++;
 		return NET_RX_DROP;
 	}
 
@@ -938,13 +994,13 @@ static int fr_rx(struct sk_buff *skb)
 
 		default:
 			printk(KERN_INFO "%s: Unsupported protocol, OUI=%x "
-			       "PID=%x\n", ndev->name, oui, pid);
+			       "PID=%x\n", frad->name, oui, pid);
 			dev_kfree_skb_any(skb);
 			return NET_RX_DROP;
 		}
 	} else {
 		printk(KERN_INFO "%s: Unsupported protocol, NLPID=%x "
-		       "length = %i\n", ndev->name, data[3], skb->len);
+		       "length = %i\n", frad->name, data[3], skb->len);
 		dev_kfree_skb_any(skb);
 		return NET_RX_DROP;
 	}
@@ -964,7 +1020,7 @@ static int fr_rx(struct sk_buff *skb)
 	}
 
  rx_error:
-	hdlc->stats.rx_errors++; /* Mark error */
+	dev_to_desc(frad)->stats.rx_errors++; /* Mark error */
 	dev_kfree_skb_any(skb);
 	return NET_RX_DROP;
 }
@@ -977,44 +1033,42 @@ static void fr_start(struct net_device *dev)
 #ifdef DEBUG_LINK
 	printk(KERN_DEBUG "fr_start\n");
 #endif
-	if (hdlc->state.fr.settings.lmi != LMI_NONE) {
-		hdlc->state.fr.reliable = 0;
-		hdlc->state.fr.dce_changed = 1;
-		hdlc->state.fr.request = 0;
-		hdlc->state.fr.fullrep_sent = 0;
-		hdlc->state.fr.last_errors = 0xFFFFFFFF;
-		hdlc->state.fr.n391cnt = 0;
-		hdlc->state.fr.txseq = hdlc->state.fr.rxseq = 0;
-
-		init_timer(&hdlc->state.fr.timer);
+	if (state(hdlc)->settings.lmi != LMI_NONE) {
+		state(hdlc)->reliable = 0;
+		state(hdlc)->dce_changed = 1;
+		state(hdlc)->request = 0;
+		state(hdlc)->fullrep_sent = 0;
+		state(hdlc)->last_errors = 0xFFFFFFFF;
+		state(hdlc)->n391cnt = 0;
+		state(hdlc)->txseq = state(hdlc)->rxseq = 0;
+
+		init_timer(&state(hdlc)->timer);
 		/* First poll after 1 s */
-		hdlc->state.fr.timer.expires = jiffies + HZ;
-		hdlc->state.fr.timer.function = fr_timer;
-		hdlc->state.fr.timer.data = (unsigned long)dev;
-		add_timer(&hdlc->state.fr.timer);
+		state(hdlc)->timer.expires = jiffies + HZ;
+		state(hdlc)->timer.function = fr_timer;
+		state(hdlc)->timer.data = (unsigned long)dev;
+		add_timer(&state(hdlc)->timer);
 	} else
 		fr_set_link_state(1, dev);
 }
 
 
-
 static void fr_stop(struct net_device *dev)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
 #ifdef DEBUG_LINK
 	printk(KERN_DEBUG "fr_stop\n");
 #endif
-	if (hdlc->state.fr.settings.lmi != LMI_NONE)
-		del_timer_sync(&hdlc->state.fr.timer);
+	if (state(hdlc)->settings.lmi != LMI_NONE)
+		del_timer_sync(&state(hdlc)->timer);
 	fr_set_link_state(0, dev);
 }
 
 
-
 static void fr_close(struct net_device *dev)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
-	pvc_device *pvc = hdlc->state.fr.first_pvc;
+	pvc_device *pvc = state(hdlc)->first_pvc;
 
 	while (pvc) {		/* Shutdown all PVCs for this FRAD */
 		if (pvc->main)
@@ -1025,7 +1079,8 @@ static void fr_close(struct net_device *dev)
 	}
 }
 
-static void dlci_setup(struct net_device *dev)
+
+static void pvc_setup(struct net_device *dev)
 {
 	dev->type = ARPHRD_DLCI;
 	dev->flags = IFF_POINTOPOINT;
@@ -1033,9 +1088,9 @@ static void dlci_setup(struct net_device *dev)
 	dev->addr_len = 2;
 }
 
-static int fr_add_pvc(struct net_device *master, unsigned int dlci, int type)
+static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type)
 {
-	hdlc_device *hdlc = dev_to_hdlc(master);
+	hdlc_device *hdlc = dev_to_hdlc(frad);
 	pvc_device *pvc = NULL;
 	struct net_device *dev;
 	int result, used;
@@ -1044,9 +1099,9 @@ static int fr_add_pvc(struct net_device *master, unsigned int dlci, int type)
 	if (type == ARPHRD_ETHER)
 		prefix = "pvceth%d";
 
-	if ((pvc = add_pvc(master, dlci)) == NULL) {
+	if ((pvc = add_pvc(frad, dlci)) == NULL) {
 		printk(KERN_WARNING "%s: Memory squeeze on fr_add_pvc()\n",
-		       master->name);
+		       frad->name);
 		return -ENOBUFS;
 	}
 
@@ -1060,11 +1115,11 @@ static int fr_add_pvc(struct net_device *master, unsigned int dlci, int type)
 				   "pvceth%d", ether_setup);
 	else
 		dev = alloc_netdev(sizeof(struct net_device_stats),
-				   "pvc%d", dlci_setup);
+				   "pvc%d", pvc_setup);
 
 	if (!dev) {
 		printk(KERN_WARNING "%s: Memory squeeze on fr_pvc()\n",
-		       master->name);
+		       frad->name);
 		delete_unused_pvcs(hdlc);
 		return -ENOBUFS;
 	}
@@ -1102,8 +1157,8 @@ static int fr_add_pvc(struct net_device *master, unsigned int dlci, int type)
 	dev->destructor = free_netdev;
 	*get_dev_p(pvc, type) = dev;
 	if (!used) {
-		hdlc->state.fr.dce_changed = 1;
-		hdlc->state.fr.dce_pvc_count++;
+		state(hdlc)->dce_changed = 1;
+		state(hdlc)->dce_pvc_count++;
 	}
 	return 0;
 }
@@ -1128,8 +1183,8 @@ static int fr_del_pvc(hdlc_device *hdlc, unsigned int dlci, int type)
 	*get_dev_p(pvc, type) = NULL;
 
 	if (!pvc_is_used(pvc)) {
-		hdlc->state.fr.dce_pvc_count--;
-		hdlc->state.fr.dce_changed = 1;
+		state(hdlc)->dce_pvc_count--;
+		state(hdlc)->dce_changed = 1;
 	}
 	delete_unused_pvcs(hdlc);
 	return 0;
@@ -1137,14 +1192,13 @@ static int fr_del_pvc(hdlc_device *hdlc, unsigned int dlci, int type)
 
 
-static void fr_destroy(hdlc_device *hdlc)
+static void fr_destroy(struct net_device *frad)
 {
-	pvc_device *pvc;
-
-	pvc = hdlc->state.fr.first_pvc;
-	hdlc->state.fr.first_pvc = NULL; /* All PVCs destroyed */
-	hdlc->state.fr.dce_pvc_count = 0;
-	hdlc->state.fr.dce_changed = 1;
+	hdlc_device *hdlc = dev_to_hdlc(frad);
+	pvc_device *pvc = state(hdlc)->first_pvc;
+	state(hdlc)->first_pvc = NULL; /* All PVCs destroyed */
+	state(hdlc)->dce_pvc_count = 0;
+	state(hdlc)->dce_changed = 1;
 
 	while (pvc) {
 		pvc_device *next = pvc->next;
@@ -1161,8 +1215,17 @@ static void fr_destroy(hdlc_device *hdlc)
 }
 
 
+static struct hdlc_proto proto = {
+	.close		= fr_close,
+	.start		= fr_start,
+	.stop		= fr_stop,
+	.detach		= fr_destroy,
+	.ioctl		= fr_ioctl,
+	.module		= THIS_MODULE,
+};
+
 
-int hdlc_fr_ioctl(struct net_device *dev, struct ifreq *ifr)
+static int fr_ioctl(struct net_device *dev, struct ifreq *ifr)
 {
 	fr_proto __user *fr_s = ifr->ifr_settings.ifs_ifsu.fr;
 	const size_t size = sizeof(fr_proto);
@@ -1173,12 +1236,14 @@ int hdlc_fr_ioctl(struct net_device *dev, struct ifreq *ifr)
 
 	switch (ifr->ifr_settings.type) {
 	case IF_GET_PROTO:
+		if (dev_to_hdlc(dev)->proto != &proto) /* Different proto */
+			return -EINVAL;
 		ifr->ifr_settings.type = IF_PROTO_FR;
 		if (ifr->ifr_settings.size < size) {
 			ifr->ifr_settings.size = size; /* data size wanted */
 			return -ENOBUFS;
 		}
-		if (copy_to_user(fr_s, &hdlc->state.fr.settings, size))
+		if (copy_to_user(fr_s, &state(hdlc)->settings, size))
 			return -EFAULT;
 		return 0;
 
@@ -1213,20 +1278,16 @@ int hdlc_fr_ioctl(struct net_device *dev, struct ifreq *ifr)
 		if (result)
 			return result;
 
-		if (hdlc->proto.id != IF_PROTO_FR) {
-			hdlc_proto_detach(hdlc);
-			hdlc->state.fr.first_pvc = NULL;
-			hdlc->state.fr.dce_pvc_count = 0;
+		if (dev_to_hdlc(dev)->proto != &proto) { /* Different proto */
+			result = attach_hdlc_protocol(dev, &proto, fr_rx,
+						      sizeof(struct frad_state));
+			if (result)
+				return result;
+			state(hdlc)->first_pvc = NULL;
+			state(hdlc)->dce_pvc_count = 0;
 		}
-		memcpy(&hdlc->state.fr.settings, &new_settings, size);
-		memset(&hdlc->proto, 0, sizeof(hdlc->proto));
-
-		hdlc->proto.close = fr_close;
-		hdlc->proto.start = fr_start;
-		hdlc->proto.stop = fr_stop;
-		hdlc->proto.detach = fr_destroy;
-		hdlc->proto.netif_rx = fr_rx;
-		hdlc->proto.id = IF_PROTO_FR;
+		memcpy(&state(hdlc)->settings, &new_settings, size);
+
 		dev->hard_start_xmit = hdlc->xmit;
 		dev->hard_header = NULL;
 		dev->type = ARPHRD_FRAD;
@@ -1238,6 +1299,9 @@ int hdlc_fr_ioctl(struct net_device *dev, struct ifreq *ifr)
 	case IF_PROTO_FR_DEL_PVC:
 	case IF_PROTO_FR_ADD_ETH_PVC:
 	case IF_PROTO_FR_DEL_ETH_PVC:
+		if (dev_to_hdlc(dev)->proto != &proto) /* Different proto */
+			return -EINVAL;
+
 		if(!capable(CAP_NET_ADMIN))
 			return -EPERM;
 
@@ -1263,3 +1327,24 @@ int hdlc_fr_ioctl(struct net_device *dev, struct ifreq *ifr)
 
 	return -EINVAL;
 }
+
+
+static int __init mod_init(void)
+{
+	register_hdlc_protocol(&proto);
+	return 0;
+}
+
+
+static void __exit mod_exit(void)
+{
+	unregister_hdlc_protocol(&proto);
+}
+
+
+module_init(mod_init);
+module_exit(mod_exit);
+
+MODULE_AUTHOR("Krzysztof Halasa <khc@pm.waw.pl>");
+MODULE_DESCRIPTION("Frame-Relay protocol support for generic HDLC");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/wan/hdlc_generic.c b/drivers/net/wan/hdlc_generic.c
deleted file mode 100644
index 04ca1f7b6424..000000000000
--- a/drivers/net/wan/hdlc_generic.c
+++ /dev/null
@@ -1,339 +0,0 @@
-/*
- * Generic HDLC support routines for Linux
- *
- * Copyright (C) 1999 - 2005 Krzysztof Halasa <khc@pm.waw.pl>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
- *
- * Currently supported:
- *	* raw IP-in-HDLC
- *	* Cisco HDLC
- *	* Frame Relay with ANSI or CCITT LMI (both user and network side)
- *	* PPP
- *	* X.25
- *
- * Use sethdlc utility to set line parameters, protocol and PVCs
- *
- * How does it work:
- * - proto.open(), close(), start(), stop() calls are serialized.
- *   The order is: open, [ start, stop ... ] close ...
- * - proto.start() and stop() are called with spin_lock_irq held.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/poll.h>
-#include <linux/errno.h>
-#include <linux/if_arp.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/pkt_sched.h>
-#include <linux/inetdevice.h>
-#include <linux/lapb.h>
-#include <linux/rtnetlink.h>
-#include <linux/notifier.h>
-#include <linux/hdlc.h>
-
-
-static const char* version = "HDLC support module revision 1.19";
-
-#undef DEBUG_LINK
-
-
-static int hdlc_change_mtu(struct net_device *dev, int new_mtu)
-{
-	if ((new_mtu < 68) || (new_mtu > HDLC_MAX_MTU))
-		return -EINVAL;
-	dev->mtu = new_mtu;
-	return 0;
-}
-
-
-
-static struct net_device_stats *hdlc_get_stats(struct net_device *dev)
-{
-	return hdlc_stats(dev);
-}
-
-
-
-static int hdlc_rcv(struct sk_buff *skb, struct net_device *dev,
-		    struct packet_type *p, struct net_device *orig_dev)
-{
-	hdlc_device *hdlc = dev_to_hdlc(dev);
-	if (hdlc->proto.netif_rx)
-		return hdlc->proto.netif_rx(skb);
-
-	hdlc->stats.rx_dropped++; /* Shouldn't happen */
-	dev_kfree_skb(skb);
-	return NET_RX_DROP;
-}
-
-
-
-static inline void hdlc_proto_start(struct net_device *dev)
-{
-	hdlc_device *hdlc = dev_to_hdlc(dev);
-	if (hdlc->proto.start)
-		return hdlc->proto.start(dev);
-}
-
-
-
-static inline void hdlc_proto_stop(struct net_device *dev)
-{
-	hdlc_device *hdlc = dev_to_hdlc(dev);
-	if (hdlc->proto.stop)
-		return hdlc->proto.stop(dev);
-}
-
-
-
-static int hdlc_device_event(struct notifier_block *this, unsigned long event,
-			     void *ptr)
-{
-	struct net_device *dev = ptr;
-	hdlc_device *hdlc;
-	unsigned long flags;
-	int on;
- 
-	if (dev->get_stats != hdlc_get_stats)
-		return NOTIFY_DONE; /* not an HDLC device */
- 
-	if (event != NETDEV_CHANGE)
-		return NOTIFY_DONE; /* Only interrested in carrier changes */
-
-	on = netif_carrier_ok(dev);
-
-#ifdef DEBUG_LINK
-	printk(KERN_DEBUG "%s: hdlc_device_event NETDEV_CHANGE, carrier %i\n",
-	       dev->name, on);
-#endif
-
-	hdlc = dev_to_hdlc(dev);
-	spin_lock_irqsave(&hdlc->state_lock, flags);
-
-	if (hdlc->carrier == on)
-		goto carrier_exit; /* no change in DCD line level */
-
-	hdlc->carrier = on;
-
-	if (!hdlc->open)
-		goto carrier_exit;
-
-	if (hdlc->carrier) {
-		printk(KERN_INFO "%s: Carrier detected\n", dev->name);
-		hdlc_proto_start(dev);
-	} else {
-		printk(KERN_INFO "%s: Carrier lost\n", dev->name);
-		hdlc_proto_stop(dev);
-	}
-
-carrier_exit:
-	spin_unlock_irqrestore(&hdlc->state_lock, flags);
-	return NOTIFY_DONE;
-}
-
-
-
-/* Must be called by hardware driver when HDLC device is being opened */
-int hdlc_open(struct net_device *dev)
-{
-	hdlc_device *hdlc = dev_to_hdlc(dev);
-#ifdef DEBUG_LINK
-	printk(KERN_DEBUG "hdlc_open() carrier %i open %i\n",
-	       hdlc->carrier, hdlc->open);
-#endif
-
-	if (hdlc->proto.id == -1)
-		return -ENOSYS;	/* no protocol attached */
-
-	if (hdlc->proto.open) {
-		int result = hdlc->proto.open(dev);
-		if (result)
-			return result;
-	}
-
-	spin_lock_irq(&hdlc->state_lock);
-
-	if (hdlc->carrier) {
-		printk(KERN_INFO "%s: Carrier detected\n", dev->name);
-		hdlc_proto_start(dev);
-	} else
-		printk(KERN_INFO "%s: No carrier\n", dev->name);
-
-	hdlc->open = 1;
-
-	spin_unlock_irq(&hdlc->state_lock);
-	return 0;
-}
-
-
-
-/* Must be called by hardware driver when HDLC device is being closed */
-void hdlc_close(struct net_device *dev)
-{
-	hdlc_device *hdlc = dev_to_hdlc(dev);
-#ifdef DEBUG_LINK
-	printk(KERN_DEBUG "hdlc_close() carrier %i open %i\n",
-	       hdlc->carrier, hdlc->open);
-#endif
-
-	spin_lock_irq(&hdlc->state_lock);
-
-	hdlc->open = 0;
-	if (hdlc->carrier)
-		hdlc_proto_stop(dev);
-
-	spin_unlock_irq(&hdlc->state_lock);
-
-	if (hdlc->proto.close)
-		hdlc->proto.close(dev);
-}
-
-
-
-#ifndef CONFIG_HDLC_RAW
-#define hdlc_raw_ioctl(dev, ifr)	-ENOSYS
-#endif
-
-#ifndef CONFIG_HDLC_RAW_ETH
-#define hdlc_raw_eth_ioctl(dev, ifr)	-ENOSYS
-#endif
-
-#ifndef CONFIG_HDLC_PPP
-#define hdlc_ppp_ioctl(dev, ifr)	-ENOSYS
-#endif
-
-#ifndef CONFIG_HDLC_CISCO
-#define hdlc_cisco_ioctl(dev, ifr)	-ENOSYS
-#endif
-
-#ifndef CONFIG_HDLC_FR
-#define hdlc_fr_ioctl(dev, ifr)		-ENOSYS
-#endif
-
-#ifndef CONFIG_HDLC_X25
-#define hdlc_x25_ioctl(dev, ifr)	-ENOSYS
-#endif
-
-
-int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
-{
-	hdlc_device *hdlc = dev_to_hdlc(dev);
-	unsigned int proto;
-
-	if (cmd != SIOCWANDEV)
-		return -EINVAL;
-
-	switch(ifr->ifr_settings.type) {
-	case IF_PROTO_HDLC:
-	case IF_PROTO_HDLC_ETH:
-	case IF_PROTO_PPP:
-	case IF_PROTO_CISCO:
-	case IF_PROTO_FR:
-	case IF_PROTO_X25:
-		proto = ifr->ifr_settings.type;
-		break;
-
-	default:
-		proto = hdlc->proto.id;
-	}
-
-	switch(proto) {
-	case IF_PROTO_HDLC:	return hdlc_raw_ioctl(dev, ifr);
-	case IF_PROTO_HDLC_ETH:	return hdlc_raw_eth_ioctl(dev, ifr);
-	case IF_PROTO_PPP:	return hdlc_ppp_ioctl(dev, ifr);
-	case IF_PROTO_CISCO:	return hdlc_cisco_ioctl(dev, ifr);
-	case IF_PROTO_FR:	return hdlc_fr_ioctl(dev, ifr);
-	case IF_PROTO_X25:	return hdlc_x25_ioctl(dev, ifr);
-	default:		return -EINVAL;
-	}
-}
-
-void hdlc_setup(struct net_device *dev)
-{
-	hdlc_device *hdlc = dev_to_hdlc(dev);
-
-	dev->get_stats = hdlc_get_stats;
-	dev->change_mtu = hdlc_change_mtu;
-	dev->mtu = HDLC_MAX_MTU;
-
-	dev->type = ARPHRD_RAWHDLC;
-	dev->hard_header_len = 16;
-
-	dev->flags = IFF_POINTOPOINT | IFF_NOARP;
-
-	hdlc->proto.id = -1;
-	hdlc->proto.detach = NULL;
-	hdlc->carrier = 1;
-	hdlc->open = 0;
-	spin_lock_init(&hdlc->state_lock);
-}
-
-struct net_device *alloc_hdlcdev(void *priv)
-{
-	struct net_device *dev;
-	dev = alloc_netdev(sizeof(hdlc_device), "hdlc%d", hdlc_setup);
-	if (dev)
-		dev_to_hdlc(dev)->priv = priv;
-	return dev;
-}
-
-void unregister_hdlc_device(struct net_device *dev)
-{
-	rtnl_lock();
-	hdlc_proto_detach(dev_to_hdlc(dev));
-	unregister_netdevice(dev);
-	rtnl_unlock();
-}
-
-
-
-MODULE_AUTHOR("Krzysztof Halasa <khc@pm.waw.pl>");
-MODULE_DESCRIPTION("HDLC support module");
-MODULE_LICENSE("GPL v2");
-
-EXPORT_SYMBOL(hdlc_open);
-EXPORT_SYMBOL(hdlc_close);
-EXPORT_SYMBOL(hdlc_ioctl);
-EXPORT_SYMBOL(hdlc_setup);
-EXPORT_SYMBOL(alloc_hdlcdev);
-EXPORT_SYMBOL(unregister_hdlc_device);
-
-static struct packet_type hdlc_packet_type = {
-	.type = __constant_htons(ETH_P_HDLC),
-	.func = hdlc_rcv,
-};
-
-
-static struct notifier_block hdlc_notifier = {
-        .notifier_call = hdlc_device_event,
-};
-
-
-static int __init hdlc_module_init(void)
-{
-	int result;
-
-	printk(KERN_INFO "%s\n", version);
-	if ((result = register_netdevice_notifier(&hdlc_notifier)) != 0)
-                return result;
-        dev_add_pack(&hdlc_packet_type);
-	return 0;
-}
-
-
-
-static void __exit hdlc_module_exit(void)
-{
-	dev_remove_pack(&hdlc_packet_type);
-	unregister_netdevice_notifier(&hdlc_notifier);
-}
-
-
-module_init(hdlc_module_init);
-module_exit(hdlc_module_exit);
diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c
index fbaab5bf71eb..e9f717070fde 100644
--- a/drivers/net/wan/hdlc_ppp.c
+++ b/drivers/net/wan/hdlc_ppp.c
@@ -2,7 +2,7 @@
  * Generic HDLC support routines for Linux
  * Point-to-point protocol support
  *
- * Copyright (C) 1999 - 2003 Krzysztof Halasa <khc@pm.waw.pl>
+ * Copyright (C) 1999 - 2006 Krzysztof Halasa <khc@pm.waw.pl>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License
@@ -22,6 +22,21 @@
 #include <linux/lapb.h>
 #include <linux/rtnetlink.h>
 #include <linux/hdlc.h>
+#include <net/syncppp.h>
+
+struct ppp_state {
+	struct ppp_device pppdev;
+	struct ppp_device *syncppp_ptr;
+	int (*old_change_mtu)(struct net_device *dev, int new_mtu);
+};
+
+static int ppp_ioctl(struct net_device *dev, struct ifreq *ifr);
+
+
+static inline struct ppp_state* state(hdlc_device *hdlc)
+{
+	return(struct ppp_state *)(hdlc->state);
+}
 
 
 static int ppp_open(struct net_device *dev)
@@ -30,16 +45,16 @@ static int ppp_open(struct net_device *dev)
 	void *old_ioctl;
 	int result;
 
-	dev->priv = &hdlc->state.ppp.syncppp_ptr;
-	hdlc->state.ppp.syncppp_ptr = &hdlc->state.ppp.pppdev;
-	hdlc->state.ppp.pppdev.dev = dev;
+	dev->priv = &state(hdlc)->syncppp_ptr;
+	state(hdlc)->syncppp_ptr = &state(hdlc)->pppdev;
+	state(hdlc)->pppdev.dev = dev;
 
 	old_ioctl = dev->do_ioctl;
-	hdlc->state.ppp.old_change_mtu = dev->change_mtu;
-	sppp_attach(&hdlc->state.ppp.pppdev);
+	state(hdlc)->old_change_mtu = dev->change_mtu;
+	sppp_attach(&state(hdlc)->pppdev);
 	/* sppp_attach nukes them. We don't need syncppp's ioctl */
 	dev->do_ioctl = old_ioctl;
-	hdlc->state.ppp.pppdev.sppp.pp_flags &= ~PP_CISCO;
+	state(hdlc)->pppdev.sppp.pp_flags &= ~PP_CISCO;
 	dev->type = ARPHRD_PPP;
 	result = sppp_open(dev);
 	if (result) {
@@ -59,7 +74,7 @@ static void ppp_close(struct net_device *dev)
 	sppp_close(dev);
 	sppp_detach(dev);
 	dev->rebuild_header = NULL;
-	dev->change_mtu = hdlc->state.ppp.old_change_mtu;
+	dev->change_mtu = state(hdlc)->old_change_mtu;
 	dev->mtu = HDLC_MAX_MTU;
 	dev->hard_header_len = 16;
 }
@@ -73,13 +88,24 @@ static __be16 ppp_type_trans(struct sk_buff *skb, struct net_device *dev)
 
 
-int hdlc_ppp_ioctl(struct net_device *dev, struct ifreq *ifr)
+static struct hdlc_proto proto = {
+	.open		= ppp_open,
+	.close		= ppp_close,
+	.type_trans	= ppp_type_trans,
+	.ioctl		= ppp_ioctl,
+	.module		= THIS_MODULE,
+};
+
+
+static int ppp_ioctl(struct net_device *dev, struct ifreq *ifr)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
 	int result;
 
 	switch (ifr->ifr_settings.type) {
 	case IF_GET_PROTO:
+		if (dev_to_hdlc(dev)->proto != &proto)
+			return -EINVAL;
 		ifr->ifr_settings.type = IF_PROTO_PPP;
 		return 0; /* return protocol only, no settable parameters */
 
@@ -96,13 +122,10 @@ int hdlc_ppp_ioctl(struct net_device *dev, struct ifreq *ifr)
 		if (result)
 			return result;
 
-		hdlc_proto_detach(hdlc);
-		memset(&hdlc->proto, 0, sizeof(hdlc->proto));
-
-		hdlc->proto.open = ppp_open;
-		hdlc->proto.close = ppp_close;
-		hdlc->proto.type_trans = ppp_type_trans;
-		hdlc->proto.id = IF_PROTO_PPP;
+		result = attach_hdlc_protocol(dev, &proto, NULL,
+					      sizeof(struct ppp_state));
+		if (result)
+			return result;
 		dev->hard_start_xmit = hdlc->xmit;
 		dev->hard_header = NULL;
 		dev->type = ARPHRD_PPP;
@@ -113,3 +136,25 @@ int hdlc_ppp_ioctl(struct net_device *dev, struct ifreq *ifr)
 
 	return -EINVAL;
 }
+
+
+static int __init mod_init(void)
+{
+	register_hdlc_protocol(&proto);
+	return 0;
+}
+
+
+
+static void __exit mod_exit(void)
+{
+	unregister_hdlc_protocol(&proto);
+}
+
+
+module_init(mod_init);
+module_exit(mod_exit);
+
+MODULE_AUTHOR("Krzysztof Halasa <khc@pm.waw.pl>");
+MODULE_DESCRIPTION("PPP protocol support for generic HDLC");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/wan/hdlc_raw.c b/drivers/net/wan/hdlc_raw.c
index f15aa6ba77f1..fe3cae5c6b9d 100644
--- a/drivers/net/wan/hdlc_raw.c
+++ b/drivers/net/wan/hdlc_raw.c
@@ -2,7 +2,7 @@
  * Generic HDLC support routines for Linux
  * HDLC support
  *
- * Copyright (C) 1999 - 2003 Krzysztof Halasa <khc@pm.waw.pl>
+ * Copyright (C) 1999 - 2006 Krzysztof Halasa <khc@pm.waw.pl>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License
@@ -24,6 +24,8 @@
 #include <linux/hdlc.h>
 
 
+static int raw_ioctl(struct net_device *dev, struct ifreq *ifr);
+
 static __be16 raw_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
 	return __constant_htons(ETH_P_IP);
@@ -31,7 +33,14 @@ static __be16 raw_type_trans(struct sk_buff *skb, struct net_device *dev)
 
 
-int hdlc_raw_ioctl(struct net_device *dev, struct ifreq *ifr)
+static struct hdlc_proto proto = {
+	.type_trans	= raw_type_trans,
+	.ioctl		= raw_ioctl,
+	.module		= THIS_MODULE,
+};
+
+
+static int raw_ioctl(struct net_device *dev, struct ifreq *ifr)
 {
 	raw_hdlc_proto __user *raw_s = ifr->ifr_settings.ifs_ifsu.raw_hdlc;
 	const size_t size = sizeof(raw_hdlc_proto);
@@ -41,12 +50,14 @@ int hdlc_raw_ioctl(struct net_device *dev, struct ifreq *ifr)
 
 	switch (ifr->ifr_settings.type) {
 	case IF_GET_PROTO:
+		if (dev_to_hdlc(dev)->proto != &proto)
+			return -EINVAL;
 		ifr->ifr_settings.type = IF_PROTO_HDLC;
 		if (ifr->ifr_settings.size < size) {
 			ifr->ifr_settings.size = size; /* data size wanted */
 			return -ENOBUFS;
 		}
-		if (copy_to_user(raw_s, &hdlc->state.raw_hdlc.settings, size))
+		if (copy_to_user(raw_s, hdlc->state, size))
 			return -EFAULT;
 		return 0;
 
@@ -71,12 +82,11 @@ int hdlc_raw_ioctl(struct net_device *dev, struct ifreq *ifr)
 		if (result)
 			return result;
 
-		hdlc_proto_detach(hdlc);
-		memcpy(&hdlc->state.raw_hdlc.settings, &new_settings, size);
-		memset(&hdlc->proto, 0, sizeof(hdlc->proto));
-
-		hdlc->proto.type_trans = raw_type_trans;
-		hdlc->proto.id = IF_PROTO_HDLC;
+		result = attach_hdlc_protocol(dev, &proto, NULL,
+					      sizeof(raw_hdlc_proto));
+		if (result)
+			return result;
+		memcpy(hdlc->state, &new_settings, size);
 		dev->hard_start_xmit = hdlc->xmit;
 		dev->hard_header = NULL;
 		dev->type = ARPHRD_RAWHDLC;
@@ -88,3 +98,25 @@ int hdlc_raw_ioctl(struct net_device *dev, struct ifreq *ifr)
 
 	return -EINVAL;
 }
+
+
+static int __init mod_init(void)
+{
+	register_hdlc_protocol(&proto);
+	return 0;
+}
+
+
+
+static void __exit mod_exit(void)
+{
+	unregister_hdlc_protocol(&proto);
+}
+
+
+module_init(mod_init);
+module_exit(mod_exit);
+
+MODULE_AUTHOR("Krzysztof Halasa <khc@pm.waw.pl>");
+MODULE_DESCRIPTION("Raw HDLC protocol support for generic HDLC");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/wan/hdlc_raw_eth.c b/drivers/net/wan/hdlc_raw_eth.c
index d1884987f94e..1a69a9aaa9b9 100644
--- a/drivers/net/wan/hdlc_raw_eth.c
+++ b/drivers/net/wan/hdlc_raw_eth.c
@@ -2,7 +2,7 @@
  * Generic HDLC support routines for Linux
  * HDLC Ethernet emulation support
  *
- * Copyright (C) 2002-2003 Krzysztof Halasa <khc@pm.waw.pl>
+ * Copyright (C) 2002-2006 Krzysztof Halasa <khc@pm.waw.pl>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License
@@ -25,6 +25,7 @@
 #include <linux/etherdevice.h>
 #include <linux/hdlc.h>
 
+static int raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr);
 
 static int eth_tx(struct sk_buff *skb, struct net_device *dev)
 {
@@ -44,7 +45,14 @@ static int eth_tx(struct sk_buff *skb, struct net_device *dev)
 }
 
 
-int hdlc_raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr)
+static struct hdlc_proto proto = {
+	.type_trans	= eth_type_trans,
+	.ioctl		= raw_eth_ioctl,
+	.module		= THIS_MODULE,
+};
+
+
+static int raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr)
 {
 	raw_hdlc_proto __user *raw_s = ifr->ifr_settings.ifs_ifsu.raw_hdlc;
 	const size_t size = sizeof(raw_hdlc_proto);
@@ -56,12 +64,14 @@ int hdlc_raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr)
 
 	switch (ifr->ifr_settings.type) {
 	case IF_GET_PROTO:
+		if (dev_to_hdlc(dev)->proto != &proto)
+			return -EINVAL;
 		ifr->ifr_settings.type = IF_PROTO_HDLC_ETH;
 		if (ifr->ifr_settings.size < size) {
 			ifr->ifr_settings.size = size; /* data size wanted */
 			return -ENOBUFS;
 		}
-		if (copy_to_user(raw_s, &hdlc->state.raw_hdlc.settings, size))
+		if (copy_to_user(raw_s, hdlc->state, size))
 			return -EFAULT;
 		return 0;
 
@@ -86,12 +96,11 @@ int hdlc_raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr)
 		if (result)
 			return result;
 
-		hdlc_proto_detach(hdlc);
-		memcpy(&hdlc->state.raw_hdlc.settings, &new_settings, size);
-		memset(&hdlc->proto, 0, sizeof(hdlc->proto));
-
-		hdlc->proto.type_trans = eth_type_trans;
-		hdlc->proto.id = IF_PROTO_HDLC_ETH;
+		result = attach_hdlc_protocol(dev, &proto, NULL,
+					      sizeof(raw_hdlc_proto));
+		if (result)
+			return result;
+		memcpy(hdlc->state, &new_settings, size);
 		dev->hard_start_xmit = eth_tx;
 		old_ch_mtu = dev->change_mtu;
 		old_qlen = dev->tx_queue_len;
@@ -106,3 +115,25 @@ int hdlc_raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr)
 
 	return -EINVAL;
 }
+
+
+static int __init mod_init(void)
+{
+	register_hdlc_protocol(&proto);
+	return 0;
+}
+
+
+
+static void __exit mod_exit(void)
+{
+	unregister_hdlc_protocol(&proto);
+}
+
+
+module_init(mod_init);
+module_exit(mod_exit);
+
+MODULE_AUTHOR("Krzysztof Halasa <khc@pm.waw.pl>");
+MODULE_DESCRIPTION("Ethernet encapsulation support for generic HDLC");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/wan/hdlc_x25.c b/drivers/net/wan/hdlc_x25.c
index a867fb411f89..e4bb9f8ad433 100644
--- a/drivers/net/wan/hdlc_x25.c
+++ b/drivers/net/wan/hdlc_x25.c
@@ -2,7 +2,7 @@
  * Generic HDLC support routines for Linux
  * X.25 support
  *
- * Copyright (C) 1999 - 2003 Krzysztof Halasa <khc@pm.waw.pl>
+ * Copyright (C) 1999 - 2006 Krzysztof Halasa <khc@pm.waw.pl>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License
@@ -25,6 +25,8 @@
 
 #include <net/x25device.h>
 
+static int x25_ioctl(struct net_device *dev, struct ifreq *ifr);
+
 /* These functions are callbacks called by LAPB layer */
 
 static void x25_connect_disconnect(struct net_device *dev, int reason, int code)
@@ -162,30 +164,39 @@ static void x25_close(struct net_device *dev)
 
 static int x25_rx(struct sk_buff *skb)
 {
-	hdlc_device *hdlc = dev_to_hdlc(skb->dev);
+	struct hdlc_device_desc *desc = dev_to_desc(skb->dev);
 
 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
-		hdlc->stats.rx_dropped++;
+		desc->stats.rx_dropped++;
 		return NET_RX_DROP;
 	}
 
 	if (lapb_data_received(skb->dev, skb) == LAPB_OK)
 		return NET_RX_SUCCESS;
 
-	hdlc->stats.rx_errors++;
+	desc->stats.rx_errors++;
 	dev_kfree_skb_any(skb);
 	return NET_RX_DROP;
 }
 
 
+static struct hdlc_proto proto = {
+	.open		= x25_open,
+	.close		= x25_close,
+	.ioctl		= x25_ioctl,
+	.module		= THIS_MODULE,
+};
+
 
-int hdlc_x25_ioctl(struct net_device *dev, struct ifreq *ifr)
+static int x25_ioctl(struct net_device *dev, struct ifreq *ifr)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
 	int result;
 
 	switch (ifr->ifr_settings.type) {
 	case IF_GET_PROTO:
+		if (dev_to_hdlc(dev)->proto != &proto)
+			return -EINVAL;
 		ifr->ifr_settings.type = IF_PROTO_X25;
 		return 0; /* return protocol only, no settable parameters */
 
@@ -200,14 +211,9 @@ int hdlc_x25_ioctl(struct net_device *dev, struct ifreq *ifr)
 		if (result)
 			return result;
 
-		hdlc_proto_detach(hdlc);
-		memset(&hdlc->proto, 0, sizeof(hdlc->proto));
-
-		hdlc->proto.open = x25_open;
-		hdlc->proto.close = x25_close;
-		hdlc->proto.netif_rx = x25_rx;
-		hdlc->proto.type_trans = NULL;
-		hdlc->proto.id = IF_PROTO_X25;
+		if ((result = attach_hdlc_protocol(dev, &proto,
+						   x25_rx, 0)) != 0)
+			return result;
 		dev->hard_start_xmit = x25_xmit;
 		dev->hard_header = NULL;
 		dev->type = ARPHRD_X25;
@@ -218,3 +224,25 @@ int hdlc_x25_ioctl(struct net_device *dev, struct ifreq *ifr)
 
 	return -EINVAL;
 }
+
+
+static int __init mod_init(void)
+{
+	register_hdlc_protocol(&proto);
+	return 0;
+}
+
+
+
+static void __exit mod_exit(void)
+{
+	unregister_hdlc_protocol(&proto);
+}
+
+
+module_init(mod_init);
+module_exit(mod_exit);
+
+MODULE_AUTHOR("Krzysztof Halasa <khc@pm.waw.pl>");
+MODULE_DESCRIPTION("X.25 protocol support for generic HDLC");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h
index d5ebbb29aeae..d4b333938f73 100644
--- a/include/linux/hdlc.h
+++ b/include/linux/hdlc.h
@@ -11,95 +11,46 @@
 #ifndef __HDLC_H
 #define __HDLC_H
 
-#define GENERIC_HDLC_VERSION 4	/* For synchronization with sethdlc utility */
-
-#define CLOCK_DEFAULT   0	/* Default setting */
-#define CLOCK_EXT	1	/* External TX and RX clock - DTE */
-#define CLOCK_INT	2	/* Internal TX and RX clock - DCE */
-#define CLOCK_TXINT	3	/* Internal TX and external RX clock */
-#define CLOCK_TXFROMRX	4	/* TX clock derived from external RX clock */
-
-
-#define ENCODING_DEFAULT	0 /* Default setting */
-#define ENCODING_NRZ		1
-#define ENCODING_NRZI		2
-#define ENCODING_FM_MARK	3
-#define ENCODING_FM_SPACE	4
-#define ENCODING_MANCHESTER	5
-
-
-#define PARITY_DEFAULT		0 /* Default setting */
-#define PARITY_NONE		1 /* No parity */
-#define PARITY_CRC16_PR0	2 /* CRC16, initial value 0x0000 */
-#define PARITY_CRC16_PR1	3 /* CRC16, initial value 0xFFFF */
-#define PARITY_CRC16_PR0_CCITT	4 /* CRC16, initial 0x0000, ITU-T version */
-#define PARITY_CRC16_PR1_CCITT	5 /* CRC16, initial 0xFFFF, ITU-T version */
-#define PARITY_CRC32_PR0_CCITT	6 /* CRC32, initial value 0x00000000 */
-#define PARITY_CRC32_PR1_CCITT	7 /* CRC32, initial value 0xFFFFFFFF */
-
-#define LMI_DEFAULT		0 /* Default setting */
-#define LMI_NONE		1 /* No LMI, all PVCs are static */
-#define LMI_ANSI		2 /* ANSI Annex D */
-#define LMI_CCITT		3 /* ITU-T Annex A */
-#define LMI_CISCO		4 /* The "original" LMI, aka Gang of Four */
 
 #define HDLC_MAX_MTU 1500	/* Ethernet 1500 bytes */
+#if 0
 #define HDLC_MAX_MRU (HDLC_MAX_MTU + 10 + 14 + 4) /* for ETH+VLAN over FR */
+#else
+#define HDLC_MAX_MRU 1600 /* as required for FR network */
+#endif
 
 
 #ifdef __KERNEL__
 
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
-#include <net/syncppp.h>
 #include <linux/hdlc/ioctl.h>
 
 
-typedef struct {		/* Used in Cisco and PPP mode */
-	u8 address;
-	u8 control;
-	u16 protocol;
-}__attribute__ ((packed)) hdlc_header;
-
-
-
-typedef struct {
-	u32 type;		/* code */
-	u32 par1;
-	u32 par2;
-	u16 rel;		/* reliability */
-	u32 time;
-}__attribute__ ((packed)) cisco_packet;
-#define	CISCO_PACKET_LEN	18
-#define	CISCO_BIG_PACKET_LEN	20
-
-
-
-typedef struct pvc_device_struct {
-	struct net_device *master;
-	struct net_device *main;
-	struct net_device *ether; /* bridged Ethernet interface */
-	struct pvc_device_struct *next;	/* Sorted in ascending DLCI order */
-	int dlci;
-	int open_count;
-
-	struct {
-		unsigned int new: 1;
-		unsigned int active: 1;
-		unsigned int exist: 1;
-		unsigned int deleted: 1;
-		unsigned int fecn: 1;
-		unsigned int becn: 1;
-		unsigned int bandwidth;	/* Cisco LMI reporting only */
-	}state;
-}pvc_device;
-
-
-
-typedef struct hdlc_device_struct {
-	/* To be initialized by hardware driver */
+/* Used by all network devices here, pointed to by netdev_priv(dev) */
+struct hdlc_device_desc {
+	int (*netif_rx)(struct sk_buff *skb);
 	struct net_device_stats stats;
-
+};
+
+/* This structure is a private property of HDLC protocols.
+   Hardware drivers have no interest here */
+
+struct hdlc_proto {
+	int (*open)(struct net_device *dev);
+	void (*close)(struct net_device *dev);
+	void (*start)(struct net_device *dev); /* if open & DCD */
+	void (*stop)(struct net_device *dev); /* if open & !DCD */
+	void (*detach)(struct net_device *dev);
+	int (*ioctl)(struct net_device *dev, struct ifreq *ifr);
+	unsigned short (*type_trans)(struct sk_buff *skb,
+				     struct net_device *dev);
+	struct module *module;
+	struct hdlc_proto *next; /* next protocol in the list */
+};
+
+
+typedef struct hdlc_device {
 	/* used by HDLC layer to take control over HDLC device from hw driver*/
 	int (*attach)(struct net_device *dev,
 		      unsigned short encoding, unsigned short parity);
@@ -107,82 +58,18 @@ typedef struct hdlc_device_struct {
 	/* hardware driver must handle this instead of dev->hard_start_xmit */
 	int (*xmit)(struct sk_buff *skb, struct net_device *dev);
 
-
 	/* Things below are for HDLC layer internal use only */
-	struct {
-		int (*open)(struct net_device *dev);
-		void (*close)(struct net_device *dev);
-
-		/* if open & DCD */
-		void (*start)(struct net_device *dev);
-		/* if open & !DCD */
-		void (*stop)(struct net_device *dev);
-
-		void (*detach)(struct hdlc_device_struct *hdlc);
-		int (*netif_rx)(struct sk_buff *skb);
-		unsigned short (*type_trans)(struct sk_buff *skb,
-					     struct net_device *dev);
-		int id;		/* IF_PROTO_HDLC/CISCO/FR/etc. */
-	}proto;
-
+	const struct hdlc_proto *proto;
 	int carrier;
 	int open;
 	spinlock_t state_lock;
-
-	union {
-		struct {
-			fr_proto settings;
-			pvc_device *first_pvc;
-			int dce_pvc_count;
-
-			struct timer_list timer;
-			unsigned long last_poll;
-			int reliable;
-			int dce_changed;
-			int request;
-			int fullrep_sent;
-			u32 last_errors; /* last errors bit list */
-			u8 n391cnt;
-			u8 txseq; /* TX sequence number */
-			u8 rxseq; /* RX sequence number */
-		}fr;
-
-		struct {
-			cisco_proto settings;
-
-			struct timer_list timer;
-			unsigned long last_poll;
-			int up;
-			int request_sent;
-			u32 txseq; /* TX sequence number */
-			u32 rxseq; /* RX sequence number */
-		}cisco;
-
-		struct {
-			raw_hdlc_proto settings;
-		}raw_hdlc;
-
-		struct {
-			struct ppp_device pppdev;
-			struct ppp_device *syncppp_ptr;
-			int (*old_change_mtu)(struct net_device *dev,
-					      int new_mtu);
-		}ppp;
-	}state;
+	void *state;
 	void *priv;
 }hdlc_device;
 
 
-int hdlc_raw_ioctl(struct net_device *dev, struct ifreq *ifr);
-int hdlc_raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr);
-int hdlc_cisco_ioctl(struct net_device *dev, struct ifreq *ifr);
-int hdlc_ppp_ioctl(struct net_device *dev, struct ifreq *ifr);
-int hdlc_fr_ioctl(struct net_device *dev, struct ifreq *ifr);
-int hdlc_x25_ioctl(struct net_device *dev, struct ifreq *ifr);
-
-
-/* Exported from hdlc.o */
+/* Exported from hdlc module */
 
 /* Called by hardware driver when a user requests HDLC service */
 int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
@@ -191,17 +78,21 @@ int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
 #define register_hdlc_device(dev)	register_netdev(dev)
 void unregister_hdlc_device(struct net_device *dev);
 
+
+void register_hdlc_protocol(struct hdlc_proto *proto);
+void unregister_hdlc_protocol(struct hdlc_proto *proto);
+
 struct net_device *alloc_hdlcdev(void *priv);
 
-static __inline__ hdlc_device* dev_to_hdlc(struct net_device *dev)
+
+static __inline__ struct hdlc_device_desc* dev_to_desc(struct net_device *dev)
 {
 	return netdev_priv(dev);
 }
 
-
-static __inline__ pvc_device* dev_to_pvc(struct net_device *dev)
+static __inline__ hdlc_device* dev_to_hdlc(struct net_device *dev)
 {
-	return (pvc_device*)dev->priv;
+	return netdev_priv(dev) + sizeof(struct hdlc_device_desc);
 }
 
 
@@ -225,18 +116,14 @@ int hdlc_open(struct net_device *dev);
 /* Must be called by hardware driver when HDLC device is being closed */
 void hdlc_close(struct net_device *dev);
 
+int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto,
+			 int (*rx)(struct sk_buff *skb), size_t size);
 /* May be used by hardware driver to gain control over HDLC device */
-static __inline__ void hdlc_proto_detach(hdlc_device *hdlc)
-{
-	if (hdlc->proto.detach)
-		hdlc->proto.detach(hdlc);
-	hdlc->proto.detach = NULL;
-}
-
+void detach_hdlc_protocol(struct net_device *dev);
 
 static __inline__ struct net_device_stats *hdlc_stats(struct net_device *dev)
 {
-	return &dev_to_hdlc(dev)->stats;
+	return &dev_to_desc(dev)->stats;
 }
 
 
@@ -248,8 +135,8 @@ static __inline__ __be16 hdlc_type_trans(struct sk_buff *skb,
 	skb->mac.raw  = skb->data;
 	skb->dev      = dev;
 
-	if (hdlc->proto.type_trans)
-		return hdlc->proto.type_trans(skb, dev);
+	if (hdlc->proto->type_trans)
+		return hdlc->proto->type_trans(skb, dev);
 	else
 		return htons(ETH_P_HDLC);
 }
diff --git a/include/linux/hdlc/ioctl.h b/include/linux/hdlc/ioctl.h
index 78430ba3ea69..583972364357 100644
--- a/include/linux/hdlc/ioctl.h
+++ b/include/linux/hdlc/ioctl.h
@@ -1,6 +1,39 @@
 #ifndef __HDLC_IOCTL_H__
 #define __HDLC_IOCTL_H__
 
+
+#define GENERIC_HDLC_VERSION 4	/* For synchronization with sethdlc utility */
+
+#define CLOCK_DEFAULT   0	/* Default setting */
+#define CLOCK_EXT	1	/* External TX and RX clock - DTE */
+#define CLOCK_INT	2	/* Internal TX and RX clock - DCE */
+#define CLOCK_TXINT	3	/* Internal TX and external RX clock */
+#define CLOCK_TXFROMRX	4	/* TX clock derived from external RX clock */
+
+
+#define ENCODING_DEFAULT	0 /* Default setting */
+#define ENCODING_NRZ		1
+#define ENCODING_NRZI		2
+#define ENCODING_FM_MARK	3
+#define ENCODING_FM_SPACE	4
+#define ENCODING_MANCHESTER	5
+
+
+#define PARITY_DEFAULT		0 /* Default setting */
+#define PARITY_NONE		1 /* No parity */
+#define PARITY_CRC16_PR0	2 /* CRC16, initial value 0x0000 */
+#define PARITY_CRC16_PR1	3 /* CRC16, initial value 0xFFFF */
+#define PARITY_CRC16_PR0_CCITT	4 /* CRC16, initial 0x0000, ITU-T version */
+#define PARITY_CRC16_PR1_CCITT	5 /* CRC16, initial 0xFFFF, ITU-T version */
+#define PARITY_CRC32_PR0_CCITT	6 /* CRC32, initial value 0x00000000 */
+#define PARITY_CRC32_PR1_CCITT	7 /* CRC32, initial value 0xFFFFFFFF */
+
+#define LMI_DEFAULT		0 /* Default setting */
+#define LMI_NONE		1 /* No LMI, all PVCs are static */
+#define LMI_ANSI		2 /* ANSI Annex D */
+#define LMI_CCITT		3 /* ITU-T Annex A */
+#define LMI_CISCO		4 /* The "original" LMI, aka Gang of Four */
+
 typedef struct { 
 	unsigned int clock_rate; /* bits per second */
 	unsigned int clock_type; /* internal, external, TX-internal etc. */
-- 
cgit v1.2.3


From 51c3711704b66986373408cbc0540abea43d2380 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sun, 13 Aug 2006 23:33:16 +0200
Subject: i2c-algo-sibyte: Merge into i2c-sibyte

i2c-algo-sibyte: Merge into i2c-sibyte

Merge i2c-algo-sibyte into i2c-sibyte, as this is a complete,
hardware-dependent SMBus implementation and not a reusable algorithm.

Perform some basic coding style cleanups while we're here (mainly
space-based indentation replaced by tabulations.)

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/i2c/algos/Kconfig           |   6 --
 drivers/i2c/algos/Makefile          |   1 -
 drivers/i2c/algos/i2c-algo-sibyte.c | 186 ------------------------------------
 drivers/i2c/busses/i2c-sibyte.c     | 158 +++++++++++++++++++++++++++++-
 include/linux/i2c-algo-sibyte.h     |  33 -------
 5 files changed, 155 insertions(+), 229 deletions(-)
 delete mode 100644 drivers/i2c/algos/i2c-algo-sibyte.c
 delete mode 100644 include/linux/i2c-algo-sibyte.h

(limited to 'include/linux')

diff --git a/drivers/i2c/algos/Kconfig b/drivers/i2c/algos/Kconfig
index 30408015d231..c034820615bb 100644
--- a/drivers/i2c/algos/Kconfig
+++ b/drivers/i2c/algos/Kconfig
@@ -53,12 +53,6 @@ config I2C_ALGO8XX
 	tristate "MPC8xx CPM I2C interface"
 	depends on 8xx && I2C
 
-config I2C_ALGO_SIBYTE
-	tristate "SiByte SMBus interface"
-	depends on SIBYTE_SB1xxx_SOC && I2C
-	help
-	  Supports the SiByte SOC on-chip I2C interfaces (2 channels).
-
 config I2C_ALGO_SGI
 	tristate "I2C SGI interfaces"
 	depends on I2C && (SGI_IP22 || SGI_IP32 || X86_VISWS)
diff --git a/drivers/i2c/algos/Makefile b/drivers/i2c/algos/Makefile
index 867fe1f67401..208be04a3dbd 100644
--- a/drivers/i2c/algos/Makefile
+++ b/drivers/i2c/algos/Makefile
@@ -6,7 +6,6 @@ obj-$(CONFIG_I2C_ALGOBIT)	+= i2c-algo-bit.o
 obj-$(CONFIG_I2C_ALGOPCF)	+= i2c-algo-pcf.o
 obj-$(CONFIG_I2C_ALGOPCA)	+= i2c-algo-pca.o
 obj-$(CONFIG_I2C_ALGOITE)	+= i2c-algo-ite.o
-obj-$(CONFIG_I2C_ALGO_SIBYTE)	+= i2c-algo-sibyte.o
 obj-$(CONFIG_I2C_ALGO_SGI)	+= i2c-algo-sgi.o
 
 ifeq ($(CONFIG_I2C_DEBUG_ALGO),y)
diff --git a/drivers/i2c/algos/i2c-algo-sibyte.c b/drivers/i2c/algos/i2c-algo-sibyte.c
deleted file mode 100644
index 16d666fa16f3..000000000000
--- a/drivers/i2c/algos/i2c-algo-sibyte.c
+++ /dev/null
@@ -1,186 +0,0 @@
-/* ------------------------------------------------------------------------- */
-/* i2c-algo-sibyte.c i2c driver algorithms for bit-shift adapters		     */
-/* ------------------------------------------------------------------------- */
-/*   Copyright (C) 2001,2002,2003 Broadcom Corporation
-     Copyright (C) 1995-2000 Simon G. Vogl
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.		     */
-/* ------------------------------------------------------------------------- */
-
-/* With some changes from Ky�sti M�lkki <kmalkki@cc.hut.fi> and even
-   Frodo Looijaard <frodol@dds.nl>.  */
-
-/* Ported for SiByte SOCs by Broadcom Corporation.  */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-
-#include <asm/io.h>
-#include <asm/sibyte/sb1250_regs.h>
-#include <asm/sibyte/sb1250_smbus.h>
-
-#include <linux/i2c.h>
-#include <linux/i2c-algo-sibyte.h>
-
-/* ----- global defines ----------------------------------------------- */
-#define SMB_CSR(a,r) ((long)(a->reg_base + r))
-
-/* ----- global variables ---------------------------------------------	*/
-
-/* module parameters:
- */
-static int bit_scan;	/* have a look at what's hanging 'round		*/
-
-
-static int smbus_xfer(struct i2c_adapter *i2c_adap, u16 addr, 
-                      unsigned short flags, char read_write,
-                      u8 command, int size, union i2c_smbus_data * data)
-{
-	struct i2c_algo_sibyte_data *adap = i2c_adap->algo_data;
-        int data_bytes = 0;
-        int error;
-
-        while (csr_in32(SMB_CSR(adap, R_SMB_STATUS)) & M_SMB_BUSY)
-                ;
-
-        switch (size) {
-        case I2C_SMBUS_QUICK:
-                csr_out32((V_SMB_ADDR(addr) | (read_write == I2C_SMBUS_READ ? M_SMB_QDATA : 0) |
-			   V_SMB_TT_QUICKCMD), SMB_CSR(adap, R_SMB_START));
-                break;
-        case I2C_SMBUS_BYTE:
-                if (read_write == I2C_SMBUS_READ) {
-                        csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_RD1BYTE),
-				  SMB_CSR(adap, R_SMB_START));
-                        data_bytes = 1;
-                } else {
-                        csr_out32(V_SMB_CMD(command), SMB_CSR(adap, R_SMB_CMD));
-                        csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_WR1BYTE),
-				  SMB_CSR(adap, R_SMB_START));
-                }
-                break;
-        case I2C_SMBUS_BYTE_DATA:
-                csr_out32(V_SMB_CMD(command), SMB_CSR(adap, R_SMB_CMD));
-                if (read_write == I2C_SMBUS_READ) {
-                        csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_CMD_RD1BYTE),
-				  SMB_CSR(adap, R_SMB_START));
-                        data_bytes = 1;
-                } else {
-                        csr_out32(V_SMB_LB(data->byte), SMB_CSR(adap, R_SMB_DATA));
-                        csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_WR2BYTE),
-				  SMB_CSR(adap, R_SMB_START));
-                }
-                break;
-        case I2C_SMBUS_WORD_DATA:
-                csr_out32(V_SMB_CMD(command), SMB_CSR(adap, R_SMB_CMD));
-                if (read_write == I2C_SMBUS_READ) {
-                        csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_CMD_RD2BYTE),
-				  SMB_CSR(adap, R_SMB_START));
-                        data_bytes = 2;
-                } else {
-                        csr_out32(V_SMB_LB(data->word & 0xff), SMB_CSR(adap, R_SMB_DATA));
-                        csr_out32(V_SMB_MB(data->word >> 8), SMB_CSR(adap, R_SMB_DATA));
-                        csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_WR2BYTE),
-				  SMB_CSR(adap, R_SMB_START));
-                }
-                break;
-        default:
-                return -1;      /* XXXKW better error code? */
-        }
-
-        while (csr_in32(SMB_CSR(adap, R_SMB_STATUS)) & M_SMB_BUSY)
-                ;
-
-        error = csr_in32(SMB_CSR(adap, R_SMB_STATUS));
-        if (error & M_SMB_ERROR) {
-                /* Clear error bit by writing a 1 */
-                csr_out32(M_SMB_ERROR, SMB_CSR(adap, R_SMB_STATUS));
-                return -1;      /* XXXKW better error code? */
-        }
-
-        if (data_bytes == 1)
-                data->byte = csr_in32(SMB_CSR(adap, R_SMB_DATA)) & 0xff;
-        if (data_bytes == 2)
-                data->word = csr_in32(SMB_CSR(adap, R_SMB_DATA)) & 0xffff;
-
-        return 0;
-}
-
-static u32 bit_func(struct i2c_adapter *adap)
-{
-	return (I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
-                I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA);
-}
-
-
-/* -----exported algorithm data: -------------------------------------	*/
-
-static struct i2c_algorithm i2c_sibyte_algo = {
-	.smbus_xfer	= smbus_xfer,
-	.functionality	= bit_func,
-};
-
-/* 
- * registering functions to load algorithms at runtime 
- */
-int i2c_sibyte_add_bus(struct i2c_adapter *i2c_adap, int speed)
-{
-	int i;
-	struct i2c_algo_sibyte_data *adap = i2c_adap->algo_data;
-
-	/* register new adapter to i2c module... */
-	i2c_adap->algo = &i2c_sibyte_algo;
-        
-        /* Set the frequency to 100 kHz */
-        csr_out32(speed, SMB_CSR(adap,R_SMB_FREQ));
-        csr_out32(0, SMB_CSR(adap,R_SMB_CONTROL));
-
-	/* scan bus */
-	if (bit_scan) {
-                union i2c_smbus_data data;
-                int rc;
-		printk(KERN_INFO " i2c-algo-sibyte.o: scanning bus %s.\n",
-		       i2c_adap->name);
-		for (i = 0x00; i < 0x7f; i++) {
-                        /* XXXKW is this a realistic probe? */
-                        rc = smbus_xfer(i2c_adap, i, 0, I2C_SMBUS_READ, 0,
-                                        I2C_SMBUS_BYTE_DATA, &data);
-			if (!rc) {
-				printk("(%02x)",i); 
-			} else 
-				printk("."); 
-		}
-		printk("\n");
-	}
-
-	return i2c_add_adapter(i2c_adap);
-}
-
-
-int i2c_sibyte_del_bus(struct i2c_adapter *adap)
-{
-	return i2c_del_adapter(adap);
-}
-
-
-EXPORT_SYMBOL(i2c_sibyte_add_bus);
-EXPORT_SYMBOL(i2c_sibyte_del_bus);
-
-MODULE_AUTHOR("Kip Walker, Broadcom Corp.");
-MODULE_DESCRIPTION("SiByte I2C-Bus algorithm");
-module_param(bit_scan, int, 0);
-MODULE_PARM_DESC(bit_scan, "Scan for active chips on the bus");
-MODULE_LICENSE("GPL");
diff --git a/drivers/i2c/busses/i2c-sibyte.c b/drivers/i2c/busses/i2c-sibyte.c
index fa503ed9f86d..f516eb7a23f7 100644
--- a/drivers/i2c/busses/i2c-sibyte.c
+++ b/drivers/i2c/busses/i2c-sibyte.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (C) 2004 Steven J. Hill
  * Copyright (C) 2001,2002,2003 Broadcom Corporation
+ * Copyright (C) 1995-2000 Simon G. Vogl
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -17,11 +18,162 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
 
+#include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/i2c-algo-sibyte.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <asm/io.h>
 #include <asm/sibyte/sb1250_regs.h>
 #include <asm/sibyte/sb1250_smbus.h>
 
+
+struct i2c_algo_sibyte_data {
+	void *data;		/* private data */
+	int   bus;		/* which bus */
+	void *reg_base;		/* CSR base */
+};
+
+/* ----- global defines ----------------------------------------------- */
+#define SMB_CSR(a,r) ((long)(a->reg_base + r))
+
+/* ----- global variables --------------------------------------------- */
+
+/* module parameters:
+ */
+static int bit_scan;	/* have a look at what's hanging 'round */
+module_param(bit_scan, int, 0);
+MODULE_PARM_DESC(bit_scan, "Scan for active chips on the bus");
+
+
+static int smbus_xfer(struct i2c_adapter *i2c_adap, u16 addr,
+		      unsigned short flags, char read_write,
+		      u8 command, int size, union i2c_smbus_data * data)
+{
+	struct i2c_algo_sibyte_data *adap = i2c_adap->algo_data;
+	int data_bytes = 0;
+	int error;
+
+	while (csr_in32(SMB_CSR(adap, R_SMB_STATUS)) & M_SMB_BUSY)
+		;
+
+	switch (size) {
+	case I2C_SMBUS_QUICK:
+		csr_out32((V_SMB_ADDR(addr) |
+			   (read_write == I2C_SMBUS_READ ? M_SMB_QDATA : 0) |
+			   V_SMB_TT_QUICKCMD), SMB_CSR(adap, R_SMB_START));
+		break;
+	case I2C_SMBUS_BYTE:
+		if (read_write == I2C_SMBUS_READ) {
+			csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_RD1BYTE),
+				  SMB_CSR(adap, R_SMB_START));
+			data_bytes = 1;
+		} else {
+			csr_out32(V_SMB_CMD(command), SMB_CSR(adap, R_SMB_CMD));
+			csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_WR1BYTE),
+				  SMB_CSR(adap, R_SMB_START));
+		}
+		break;
+	case I2C_SMBUS_BYTE_DATA:
+		csr_out32(V_SMB_CMD(command), SMB_CSR(adap, R_SMB_CMD));
+		if (read_write == I2C_SMBUS_READ) {
+			csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_CMD_RD1BYTE),
+				  SMB_CSR(adap, R_SMB_START));
+			data_bytes = 1;
+		} else {
+			csr_out32(V_SMB_LB(data->byte),
+				  SMB_CSR(adap, R_SMB_DATA));
+			csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_WR2BYTE),
+				  SMB_CSR(adap, R_SMB_START));
+		}
+		break;
+	case I2C_SMBUS_WORD_DATA:
+		csr_out32(V_SMB_CMD(command), SMB_CSR(adap, R_SMB_CMD));
+		if (read_write == I2C_SMBUS_READ) {
+			csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_CMD_RD2BYTE),
+				  SMB_CSR(adap, R_SMB_START));
+			data_bytes = 2;
+		} else {
+			csr_out32(V_SMB_LB(data->word & 0xff),
+				  SMB_CSR(adap, R_SMB_DATA));
+			csr_out32(V_SMB_MB(data->word >> 8),
+				  SMB_CSR(adap, R_SMB_DATA));
+			csr_out32((V_SMB_ADDR(addr) | V_SMB_TT_WR2BYTE),
+				  SMB_CSR(adap, R_SMB_START));
+		}
+		break;
+	default:
+		return -1;      /* XXXKW better error code? */
+	}
+
+	while (csr_in32(SMB_CSR(adap, R_SMB_STATUS)) & M_SMB_BUSY)
+		;
+
+	error = csr_in32(SMB_CSR(adap, R_SMB_STATUS));
+	if (error & M_SMB_ERROR) {
+		/* Clear error bit by writing a 1 */
+		csr_out32(M_SMB_ERROR, SMB_CSR(adap, R_SMB_STATUS));
+		return -1;      /* XXXKW better error code? */
+	}
+
+	if (data_bytes == 1)
+		data->byte = csr_in32(SMB_CSR(adap, R_SMB_DATA)) & 0xff;
+	if (data_bytes == 2)
+		data->word = csr_in32(SMB_CSR(adap, R_SMB_DATA)) & 0xffff;
+
+	return 0;
+}
+
+static u32 bit_func(struct i2c_adapter *adap)
+{
+	return (I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
+		I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA);
+}
+
+
+/* -----exported algorithm data: -------------------------------------	*/
+
+static struct i2c_algorithm i2c_sibyte_algo = {
+	.smbus_xfer	= smbus_xfer,
+	.functionality	= bit_func,
+};
+
+/*
+ * registering functions to load algorithms at runtime
+ */
+int i2c_sibyte_add_bus(struct i2c_adapter *i2c_adap, int speed)
+{
+	int i;
+	struct i2c_algo_sibyte_data *adap = i2c_adap->algo_data;
+
+	/* register new adapter to i2c module... */
+	i2c_adap->algo = &i2c_sibyte_algo;
+
+	/* Set the frequency to 100 kHz */
+	csr_out32(speed, SMB_CSR(adap,R_SMB_FREQ));
+	csr_out32(0, SMB_CSR(adap,R_SMB_CONTROL));
+
+	/* scan bus */
+	if (bit_scan) {
+		union i2c_smbus_data data;
+		int rc;
+		printk(KERN_INFO " i2c-algo-sibyte.o: scanning bus %s.\n",
+		       i2c_adap->name);
+		for (i = 0x00; i < 0x7f; i++) {
+			/* XXXKW is this a realistic probe? */
+			rc = smbus_xfer(i2c_adap, i, 0, I2C_SMBUS_READ, 0,
+					I2C_SMBUS_BYTE_DATA, &data);
+			if (!rc) {
+				printk("(%02x)",i);
+			} else
+				printk(".");
+		}
+		printk("\n");
+	}
+
+	return i2c_add_adapter(i2c_adap);
+}
+
+
 static struct i2c_algo_sibyte_data sibyte_board_data[2] = {
 	{ NULL, 0, (void *) (CKSEG1+A_SMB_BASE(0)) },
 	{ NULL, 1, (void *) (CKSEG1+A_SMB_BASE(1)) }
@@ -58,8 +210,8 @@ static int __init i2c_sibyte_init(void)
 
 static void __exit i2c_sibyte_exit(void)
 {
-	i2c_sibyte_del_bus(&sibyte_board_adapter[0]);
-	i2c_sibyte_del_bus(&sibyte_board_adapter[1]);
+	i2c_del_bus(&sibyte_board_adapter[0]);
+	i2c_del_bus(&sibyte_board_adapter[1]);
 }
 
 module_init(i2c_sibyte_init);
diff --git a/include/linux/i2c-algo-sibyte.h b/include/linux/i2c-algo-sibyte.h
deleted file mode 100644
index 03914ded8614..000000000000
--- a/include/linux/i2c-algo-sibyte.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 2001,2002,2003 Broadcom Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- */
-
-#ifndef I2C_ALGO_SIBYTE_H
-#define I2C_ALGO_SIBYTE_H 1
-
-#include <linux/i2c.h>
-
-struct i2c_algo_sibyte_data {
-	void *data;		/* private data */
-        int   bus;		/* which bus */
-        void *reg_base;		/* CSR base */
-};
-
-int i2c_sibyte_add_bus(struct i2c_adapter *, int speed);
-int i2c_sibyte_del_bus(struct i2c_adapter *);
-
-#endif /* I2C_ALGO_SIBYTE_H */
-- 
cgit v1.2.3


From a0d9c63d3640bd4fc90a408e8334754ef44bcf48 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sun, 27 Aug 2006 11:46:49 +0200
Subject: i2c-algo-bit: Discard the mdelay data struct member

i2c-algo-bit: Discard the mdelay data struct member

The i2c_algo_bit_data structure has an mdelay member, which is not
used by the algorithm code (the code has always been ifdef'd out.)
Let's discard it to save some code and memory.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Mauro Carvalho Chehab <mchehab@brturbo.com.br>
Cc: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/acorn/char/i2c.c                   |  1 -
 drivers/i2c/algos/i2c-algo-bit.c           |  4 ----
 drivers/i2c/busses/i2c-hydra.c             |  1 -
 drivers/i2c/busses/i2c-i810.c              |  2 --
 drivers/i2c/busses/i2c-ixp2000.c           |  1 -
 drivers/i2c/busses/i2c-ixp4xx.c            |  1 -
 drivers/i2c/busses/i2c-parport-light.c     |  1 -
 drivers/i2c/busses/i2c-parport.c           |  1 -
 drivers/i2c/busses/i2c-prosavage.c         |  1 -
 drivers/i2c/busses/i2c-savage4.c           |  1 -
 drivers/i2c/busses/i2c-via.c               |  1 -
 drivers/i2c/busses/i2c-voodoo3.c           |  2 --
 drivers/i2c/busses/scx200_i2c.c            | 12 ++++++------
 drivers/ieee1394/pcilynx.c                 |  1 -
 drivers/media/video/bt8xx/bttv-i2c.c       |  1 -
 drivers/media/video/cx88/cx88-i2c.c        |  1 -
 drivers/media/video/cx88/cx88-vp3054-i2c.c |  1 -
 drivers/media/video/zoran_card.c           |  1 -
 drivers/video/i810/i810-i2c.c              |  1 -
 drivers/video/matrox/i2c-matroxfb.c        |  1 -
 drivers/video/savage/savagefb-i2c.c        |  1 -
 include/linux/i2c-algo-bit.h               |  1 -
 22 files changed, 6 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acorn/char/i2c.c b/drivers/acorn/char/i2c.c
index c26c08b36829..bdb9c8b78ed8 100644
--- a/drivers/acorn/char/i2c.c
+++ b/drivers/acorn/char/i2c.c
@@ -308,7 +308,6 @@ static struct i2c_algo_bit_data ioc_data = {
 	.getsda		= ioc_getsda,
 	.getscl		= ioc_getscl,
 	.udelay		= 80,
-	.mdelay		= 80,
 	.timeout	= 100
 };
 
diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c
index ab230c033f99..761df16838b4 100644
--- a/drivers/i2c/algos/i2c-algo-bit.c
+++ b/drivers/i2c/algos/i2c-algo-bit.c
@@ -354,10 +354,6 @@ static int sendbytes(struct i2c_adapter *i2c_adap, struct i2c_msg *msg)
 			return (retval<0)? retval : -EFAULT;
 			        /* got a better one ?? */
 		}
-#if 0
-		/* from asm/delay.h */
-		__delay(adap->mdelay * (loops_per_sec / 1000) );
-#endif
 	}
 	return wrcount;
 }
diff --git a/drivers/i2c/busses/i2c-hydra.c b/drivers/i2c/busses/i2c-hydra.c
index e0cb3b0f92fa..457d48a0ab9d 100644
--- a/drivers/i2c/busses/i2c-hydra.c
+++ b/drivers/i2c/busses/i2c-hydra.c
@@ -99,7 +99,6 @@ static struct i2c_algo_bit_data hydra_bit_data = {
 	.getsda		= hydra_bit_getsda,
 	.getscl		= hydra_bit_getscl,
 	.udelay		= 5,
-	.mdelay		= 5,
 	.timeout	= HZ
 };
 
diff --git a/drivers/i2c/busses/i2c-i810.c b/drivers/i2c/busses/i2c-i810.c
index 748be30f2bae..b66fb6bb1870 100644
--- a/drivers/i2c/busses/i2c-i810.c
+++ b/drivers/i2c/busses/i2c-i810.c
@@ -166,7 +166,6 @@ static struct i2c_algo_bit_data i810_i2c_bit_data = {
 	.getsda		= bit_i810i2c_getsda,
 	.getscl		= bit_i810i2c_getscl,
 	.udelay		= CYCLE_DELAY,
-	.mdelay		= CYCLE_DELAY,
 	.timeout	= TIMEOUT,
 };
 
@@ -182,7 +181,6 @@ static struct i2c_algo_bit_data i810_ddc_bit_data = {
 	.getsda		= bit_i810ddc_getsda,
 	.getscl		= bit_i810ddc_getscl,
 	.udelay		= CYCLE_DELAY,
-	.mdelay		= CYCLE_DELAY,
 	.timeout	= TIMEOUT,
 };
 
diff --git a/drivers/i2c/busses/i2c-ixp2000.c b/drivers/i2c/busses/i2c-ixp2000.c
index cd6f45d186ab..dd3f4cd3aa68 100644
--- a/drivers/i2c/busses/i2c-ixp2000.c
+++ b/drivers/i2c/busses/i2c-ixp2000.c
@@ -114,7 +114,6 @@ static int ixp2000_i2c_probe(struct platform_device *plat_dev)
 	drv_data->algo_data.getsda = ixp2000_bit_getsda;
 	drv_data->algo_data.getscl = ixp2000_bit_getscl;
 	drv_data->algo_data.udelay = 6;
-	drv_data->algo_data.mdelay = 6;
 	drv_data->algo_data.timeout = 100;
 
 	drv_data->adapter.id = I2C_HW_B_IXP2000,
diff --git a/drivers/i2c/busses/i2c-ixp4xx.c b/drivers/i2c/busses/i2c-ixp4xx.c
index 2ed07112d683..ab573254a8aa 100644
--- a/drivers/i2c/busses/i2c-ixp4xx.c
+++ b/drivers/i2c/busses/i2c-ixp4xx.c
@@ -122,7 +122,6 @@ static int ixp4xx_i2c_probe(struct platform_device *plat_dev)
 	drv_data->algo_data.getsda = ixp4xx_bit_getsda;
 	drv_data->algo_data.getscl = ixp4xx_bit_getscl;
 	drv_data->algo_data.udelay = 10;
-	drv_data->algo_data.mdelay = 10;
 	drv_data->algo_data.timeout = 100;
 
 	drv_data->adapter.id = I2C_HW_B_IXP4XX;
diff --git a/drivers/i2c/busses/i2c-parport-light.c b/drivers/i2c/busses/i2c-parport-light.c
index e09ebbb2f9f0..5eb2bd294fd9 100644
--- a/drivers/i2c/busses/i2c-parport-light.c
+++ b/drivers/i2c/busses/i2c-parport-light.c
@@ -103,7 +103,6 @@ static struct i2c_algo_bit_data parport_algo_data = {
 	.getsda		= parport_getsda,
 	.getscl		= parport_getscl,
 	.udelay		= 50,
-	.mdelay		= 50,
 	.timeout	= HZ,
 }; 
 
diff --git a/drivers/i2c/busses/i2c-parport.c b/drivers/i2c/busses/i2c-parport.c
index 934bd55bae15..48a829431c7b 100644
--- a/drivers/i2c/busses/i2c-parport.c
+++ b/drivers/i2c/busses/i2c-parport.c
@@ -138,7 +138,6 @@ static struct i2c_algo_bit_data parport_algo_data = {
 	.getsda		= parport_getsda,
 	.getscl		= parport_getscl,
 	.udelay		= 60,
-	.mdelay		= 60,
 	.timeout	= HZ,
 }; 
 
diff --git a/drivers/i2c/busses/i2c-prosavage.c b/drivers/i2c/busses/i2c-prosavage.c
index 9479525892e3..7745e21874a8 100644
--- a/drivers/i2c/busses/i2c-prosavage.c
+++ b/drivers/i2c/busses/i2c-prosavage.c
@@ -180,7 +180,6 @@ static int i2c_register_bus(struct pci_dev *dev, struct s_i2c_bus *p, void __iom
 	p->algo.getsda	  = bit_s3via_getsda;
 	p->algo.getscl	  = bit_s3via_getscl;
 	p->algo.udelay	  = CYCLE_DELAY;
-	p->algo.mdelay	  = CYCLE_DELAY;
 	p->algo.timeout	  = TIMEOUT;
 	p->algo.data	  = p;
 	p->mmvga	  = mmvga;
diff --git a/drivers/i2c/busses/i2c-savage4.c b/drivers/i2c/busses/i2c-savage4.c
index 0c8518298e4d..209f47ea1750 100644
--- a/drivers/i2c/busses/i2c-savage4.c
+++ b/drivers/i2c/busses/i2c-savage4.c
@@ -140,7 +140,6 @@ static struct i2c_algo_bit_data sav_i2c_bit_data = {
 	.getsda		= bit_savi2c_getsda,
 	.getscl		= bit_savi2c_getscl,
 	.udelay		= CYCLE_DELAY,
-	.mdelay		= CYCLE_DELAY,
 	.timeout	= TIMEOUT
 };
 
diff --git a/drivers/i2c/busses/i2c-via.c b/drivers/i2c/busses/i2c-via.c
index 484bbacfce6b..910e200ad500 100644
--- a/drivers/i2c/busses/i2c-via.c
+++ b/drivers/i2c/busses/i2c-via.c
@@ -81,7 +81,6 @@ static struct i2c_algo_bit_data bit_data = {
 	.getsda		= bit_via_getsda,
 	.getscl		= bit_via_getscl,
 	.udelay		= 5,
-	.mdelay		= 5,
 	.timeout	= HZ
 };
 
diff --git a/drivers/i2c/busses/i2c-voodoo3.c b/drivers/i2c/busses/i2c-voodoo3.c
index b675773b0cc1..6c8d25183382 100644
--- a/drivers/i2c/busses/i2c-voodoo3.c
+++ b/drivers/i2c/busses/i2c-voodoo3.c
@@ -160,7 +160,6 @@ static struct i2c_algo_bit_data voo_i2c_bit_data = {
 	.getsda		= bit_vooi2c_getsda,
 	.getscl		= bit_vooi2c_getscl,
 	.udelay		= CYCLE_DELAY,
-	.mdelay		= CYCLE_DELAY,
 	.timeout	= TIMEOUT
 };
 
@@ -177,7 +176,6 @@ static struct i2c_algo_bit_data voo_ddc_bit_data = {
 	.getsda		= bit_vooddc_getsda,
 	.getscl		= bit_vooddc_getscl,
 	.udelay		= CYCLE_DELAY,
-	.mdelay		= CYCLE_DELAY,
 	.timeout	= TIMEOUT
 };
 
diff --git a/drivers/i2c/busses/scx200_i2c.c b/drivers/i2c/busses/scx200_i2c.c
index cb3ef5ac99fd..8b65a5cf8251 100644
--- a/drivers/i2c/busses/scx200_i2c.c
+++ b/drivers/i2c/busses/scx200_i2c.c
@@ -71,12 +71,12 @@ static int scx200_i2c_getsda(void *data)
  */
 
 static struct i2c_algo_bit_data scx200_i2c_data = {
-	NULL,
-	scx200_i2c_setsda,
-	scx200_i2c_setscl,
-	scx200_i2c_getsda,
-	scx200_i2c_getscl,
-	10, 10, 100,		/* waits, timeout */
+	.setsda		= scx200_i2c_setsda,
+	.setscl		= scx200_i2c_setscl,
+	.getsda		= scx200_i2c_getsda,
+	.getscl		= scx200_i2c_getscl,
+	.udelay		= 10,
+	.timeout	= 100,
 };
 
 static struct i2c_adapter scx200_i2c_ops = {
diff --git a/drivers/ieee1394/pcilynx.c b/drivers/ieee1394/pcilynx.c
index e6f41238f5e8..b4f146f2c951 100644
--- a/drivers/ieee1394/pcilynx.c
+++ b/drivers/ieee1394/pcilynx.c
@@ -137,7 +137,6 @@ static struct i2c_algo_bit_data bit_data = {
 	.getsda			= bit_getsda,
 	.getscl			= bit_getscl,
 	.udelay			= 5,
-	.mdelay			= 5,
 	.timeout		= 100,
 };
 
diff --git a/drivers/media/video/bt8xx/bttv-i2c.c b/drivers/media/video/bt8xx/bttv-i2c.c
index 4b562b386fcf..0dfbcc85ebb9 100644
--- a/drivers/media/video/bt8xx/bttv-i2c.c
+++ b/drivers/media/video/bt8xx/bttv-i2c.c
@@ -100,7 +100,6 @@ static struct i2c_algo_bit_data bttv_i2c_algo_bit_template = {
 	.getsda  = bttv_bit_getsda,
 	.getscl  = bttv_bit_getscl,
 	.udelay  = 16,
-	.mdelay  = 10,
 	.timeout = 200,
 };
 
diff --git a/drivers/media/video/cx88/cx88-i2c.c b/drivers/media/video/cx88/cx88-i2c.c
index 70663805cc30..7bea34714861 100644
--- a/drivers/media/video/cx88/cx88-i2c.c
+++ b/drivers/media/video/cx88/cx88-i2c.c
@@ -155,7 +155,6 @@ static struct i2c_algo_bit_data cx8800_i2c_algo_template = {
 	.getsda  = cx8800_bit_getsda,
 	.getscl  = cx8800_bit_getscl,
 	.udelay  = 16,
-	.mdelay  = 10,
 	.timeout = 200,
 };
 
diff --git a/drivers/media/video/cx88/cx88-vp3054-i2c.c b/drivers/media/video/cx88/cx88-vp3054-i2c.c
index 751a754a45e9..2b4f1970c7df 100644
--- a/drivers/media/video/cx88/cx88-vp3054-i2c.c
+++ b/drivers/media/video/cx88/cx88-vp3054-i2c.c
@@ -100,7 +100,6 @@ static struct i2c_algo_bit_data vp3054_i2c_algo_template = {
 	.getsda  = vp3054_bit_getsda,
 	.getscl  = vp3054_bit_getscl,
 	.udelay  = 16,
-	.mdelay  = 10,
 	.timeout = 200,
 };
 
diff --git a/drivers/media/video/zoran_card.c b/drivers/media/video/zoran_card.c
index f2249ed25273..29f59c36f001 100644
--- a/drivers/media/video/zoran_card.c
+++ b/drivers/media/video/zoran_card.c
@@ -820,7 +820,6 @@ static struct i2c_algo_bit_data zoran_i2c_bit_data_template = {
 	.getsda = zoran_i2c_getsda,
 	.getscl = zoran_i2c_getscl,
 	.udelay = 10,
-	.mdelay = 0,
 	.timeout = 100,
 };
 
diff --git a/drivers/video/i810/i810-i2c.c b/drivers/video/i810/i810-i2c.c
index c1f7b49975dd..7d06b38e80a0 100644
--- a/drivers/video/i810/i810-i2c.c
+++ b/drivers/video/i810/i810-i2c.c
@@ -98,7 +98,6 @@ static int i810_setup_i2c_bus(struct i810fb_i2c_chan *chan, const char *name)
 	chan->algo.getsda               = i810i2c_getsda;
 	chan->algo.getscl               = i810i2c_getscl;
 	chan->algo.udelay               = 10;
-	chan->algo.mdelay               = 10;
         chan->algo.timeout              = (HZ/2);
         chan->algo.data                 = chan;
 
diff --git a/drivers/video/matrox/i2c-matroxfb.c b/drivers/video/matrox/i2c-matroxfb.c
index 9842042d2af5..795c1a99a680 100644
--- a/drivers/video/matrox/i2c-matroxfb.c
+++ b/drivers/video/matrox/i2c-matroxfb.c
@@ -100,7 +100,6 @@ static struct i2c_algo_bit_data matrox_i2c_algo_template =
 	.getsda		= matroxfb_gpio_getsda,
 	.getscl		= matroxfb_gpio_getscl,
 	.udelay		= 10,
-	.mdelay		= 10,
 	.timeout	= 100,
 };
 
diff --git a/drivers/video/savage/savagefb-i2c.c b/drivers/video/savage/savagefb-i2c.c
index e83befd16d63..d7d810dbf0bd 100644
--- a/drivers/video/savage/savagefb-i2c.c
+++ b/drivers/video/savage/savagefb-i2c.c
@@ -148,7 +148,6 @@ static int savage_setup_i2c_bus(struct savagefb_i2c_chan *chan,
 		chan->adapter.algo_data		= &chan->algo;
 		chan->adapter.dev.parent	= &chan->par->pcidev->dev;
 		chan->algo.udelay		= 40;
-		chan->algo.mdelay               = 5;
 		chan->algo.timeout		= 20;
 		chan->algo.data 		= chan;
 
diff --git a/include/linux/i2c-algo-bit.h b/include/linux/i2c-algo-bit.h
index c0e7fab28ce3..c8f8df25c7e0 100644
--- a/include/linux/i2c-algo-bit.h
+++ b/include/linux/i2c-algo-bit.h
@@ -40,7 +40,6 @@ struct i2c_algo_bit_data {
 	/* local settings */
 	int udelay;		/* half-clock-cycle time in microsecs */
 				/* i.e. clock is (500 / udelay) KHz */
-	int mdelay;		/* in millisecs, unused */
 	int timeout;		/* in jiffies */
 };
 
-- 
cgit v1.2.3


From 9b4ccb86b4abe644ffd218720da2f942b6a20fc2 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sun, 3 Sep 2006 22:22:50 +0200
Subject: i2c-algo-pcf: Discard the mdelay data struct member

i2c-algo-pcf: Discard the mdelay data struct member

Just as i2c-algo-bit, i2c-algo-pcf has an unused mdelay struct member,
which we can get rid of to spare some code and memory.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/i2c/busses/i2c-elektor.c | 1 -
 include/linux/i2c-algo-pcf.h     | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-elektor.c b/drivers/i2c/busses/i2c-elektor.c
index 59f8308c2356..caa8e5c8bfbb 100644
--- a/drivers/i2c/busses/i2c-elektor.c
+++ b/drivers/i2c/busses/i2c-elektor.c
@@ -196,7 +196,6 @@ static struct i2c_algo_pcf_data pcf_isa_data = {
 	.getclock   = pcf_isa_getclock,
 	.waitforpin = pcf_isa_waitforpin,
 	.udelay	    = 10,
-	.mdelay	    = 10,
 	.timeout    = 100,
 };
 
diff --git a/include/linux/i2c-algo-pcf.h b/include/linux/i2c-algo-pcf.h
index 18b0adf57a3d..9908f3fc4839 100644
--- a/include/linux/i2c-algo-pcf.h
+++ b/include/linux/i2c-algo-pcf.h
@@ -35,7 +35,6 @@ struct i2c_algo_pcf_data {
 
 	/* local settings */
 	int udelay;
-	int mdelay;
 	int timeout;
 };
 
-- 
cgit v1.2.3


From af71ff690b92894f66ccede27f731150dc10d80d Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Sun, 3 Sep 2006 22:37:11 +0200
Subject: i2c: Let drivers constify i2c_algorithm data

i2c: Let drivers constify i2c_algorithm data

Let drivers constify I2C algorithm method operations tables,
moving them from ".data" to ".rodata".

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/i2c.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index eb0628a7ecc6..23ad1ee42a4c 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -220,7 +220,7 @@ struct i2c_adapter {
 	struct module *owner;
 	unsigned int id;
 	unsigned int class;
-	struct i2c_algorithm *algo;/* the algorithm to access the bus	*/
+	const struct i2c_algorithm *algo; /* the algorithm to access the bus */
 	void *algo_data;
 
 	/* --- administration stuff. */
-- 
cgit v1.2.3


From 6d3aae9d74221b00e2cbf50a353527e5a71a58ba Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sun, 3 Sep 2006 22:41:08 +0200
Subject: i2c: Drop unimplemented slave functions

i2c: Drop unimplemented slave functions

Drop the function declarations for slave mode support of i2c adapters.
This was never implemented, and by the time it is I bet we will want
something different anyway.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/i2c.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 23ad1ee42a4c..9b5d04768c2c 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -64,14 +64,6 @@ extern int i2c_master_recv(struct i2c_client *,char* ,int);
  */
 extern int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num);
 
-/*
- * Some adapter types (i.e. PCF 8584 based ones) may support slave behaviuor. 
- * This is not tested/implemented yet and will change in the future.
- */
-extern int i2c_slave_send(struct i2c_client *,char*,int);
-extern int i2c_slave_recv(struct i2c_client *,char*,int);
-
-
 
 /* This is the very generalized SMBus access routine. You probably do not
    want to use this, though; one of the functions below may be much easier,
@@ -201,10 +193,6 @@ struct i2c_algorithm {
 	                   unsigned short flags, char read_write,
 	                   u8 command, int size, union i2c_smbus_data * data);
 
-	/* --- these optional/future use for some adapter types.*/
-	int (*slave_send)(struct i2c_adapter *,char*,int);
-	int (*slave_recv)(struct i2c_adapter *,char*,int);
-
 	/* --- ioctl like call to set div. parameters. */
 	int (*algo_control)(struct i2c_adapter *, unsigned int, unsigned long);
 
-- 
cgit v1.2.3


From 46ff34633ed09f36ebc4b5c40ac37e592172df74 Mon Sep 17 00:00:00 2001
From: Brice Goglin <brice@myri.com>
Date: Thu, 31 Aug 2006 01:55:24 -0400
Subject: MSI: Rename PCI_CAP_ID_HT_IRQCONF into PCI_CAP_ID_HT

0x08 is the HT capability, while PCI_CAP_ID_HT_IRQCONF would be
the subtype 0x80 that mpic_scan_ht_pic() uses.
Rename PCI_CAP_ID_HT_IRQCONF into PCI_CAP_ID_HT.

And by the way, use it in the ipath driver instead of defining its
own HT_CAPABILITY_ID.

Signed-off-by: Brice Goglin <brice@myri.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/powerpc/sysdev/mpic.c                  | 2 +-
 drivers/infiniband/hw/ipath/ipath_iba6110.c | 5 ++---
 include/linux/pci_regs.h                    | 2 +-
 3 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index b604926401f5..723972bb5bd9 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -339,7 +339,7 @@ static void __init mpic_scan_ht_pic(struct mpic *mpic, u8 __iomem *devbase,
 	for (pos = readb(devbase + PCI_CAPABILITY_LIST); pos != 0;
 	     pos = readb(devbase + pos + PCI_CAP_LIST_NEXT)) {
 		u8 id = readb(devbase + pos + PCI_CAP_LIST_ID);
-		if (id == PCI_CAP_ID_HT_IRQCONF) {
+		if (id == PCI_CAP_ID_HT) {
 			id = readb(devbase + pos + 3);
 			if (id == 0x80)
 				break;
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index bf2455a6d562..5c9b509e40e4 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -742,7 +742,6 @@ static int ipath_setup_ht_reset(struct ipath_devdata *dd)
 	return 0;
 }
 
-#define HT_CAPABILITY_ID   0x08	/* HT capabilities not defined in kernel */
 #define HT_INTR_DISC_CONFIG  0x80	/* HT interrupt and discovery cap */
 #define HT_INTR_REG_INDEX    2	/* intconfig requires indirect accesses */
 
@@ -973,7 +972,7 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd,
 	 * do this early, before we ever enable errors or hardware errors,
 	 * mostly to avoid causing the chip to enter freeze mode.
 	 */
-	pos = pci_find_capability(pdev, HT_CAPABILITY_ID);
+	pos = pci_find_capability(pdev, PCI_CAP_ID_HT);
 	if (!pos) {
 		ipath_dev_err(dd, "Couldn't find HyperTransport "
 			      "capability; no interrupts\n");
@@ -996,7 +995,7 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd,
 		else if (cap_type == HT_INTR_DISC_CONFIG)
 			ihandler = set_int_handler(dd, pdev, pos);
 	} while ((pos = pci_find_next_capability(pdev, pos,
-						 HT_CAPABILITY_ID)));
+						 PCI_CAP_ID_HT)));
 
 	if (!ihandler) {
 		ipath_dev_err(dd, "Couldn't find interrupt handler in "
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 96930cb5927c..7d0e26cba420 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -196,7 +196,7 @@
 #define  PCI_CAP_ID_MSI		0x05	/* Message Signalled Interrupts */
 #define  PCI_CAP_ID_CHSWP	0x06	/* CompactPCI HotSwap */
 #define  PCI_CAP_ID_PCIX	0x07	/* PCI-X */
-#define  PCI_CAP_ID_HT_IRQCONF	0x08	/* HyperTransport IRQ Configuration */
+#define  PCI_CAP_ID_HT		0x08	/* HyperTransport */
 #define  PCI_CAP_ID_VNDR	0x09	/* Vendor specific capability */
 #define  PCI_CAP_ID_SHPC 	0x0C	/* PCI Standard Hot-Plug Controller */
 #define  PCI_CAP_ID_EXP 	0x10	/* PCI Express */
-- 
cgit v1.2.3


From 6397c75cbc4d7dbc3d07278b57c82a47dafb21b5 Mon Sep 17 00:00:00 2001
From: Brice Goglin <brice@myri.com>
Date: Thu, 31 Aug 2006 01:55:32 -0400
Subject: MSI: Blacklist PCI-E chipsets depending on Hypertransport MSI
 capability

Introduce msi_ht_cap_enabled() to check the MSI capability in the
Hypertransport configuration space.
It is used in a generic quirk quirk_msi_ht_cap() to check whether
MSI is enabled on hypertransport chipset, and a nVidia specific quirk
quirk_nvidia_ck804_msi_ht_cap() where two 2 HT MSI mappings have to
be checked.
Both quirks set the PCI_BUS_FLAGS_NO_MSI bus flag when MSI is disabled.

Signed-off-by: Brice Goglin <brice@myri.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/pci/quirks.c    | 59 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci_ids.h |  1 +
 2 files changed, 60 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 8385b815ecb1..08cd86a6dd66 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -1703,6 +1703,65 @@ static void __devinit quirk_disable_msi(struct pci_dev *dev)
 	}
 }
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_disable_msi);
+
+/* Go through the list of Hypertransport capabilities and
+ * return 1 if a HT MSI capability is found and enabled */
+static int __devinit msi_ht_cap_enabled(struct pci_dev *dev)
+{
+	u8 pos;
+	int ttl;
+	for (pos = pci_find_capability(dev, PCI_CAP_ID_HT), ttl = 48;
+	     pos && ttl;
+	     pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_HT), ttl--) {
+		u32 cap_hdr;
+		/* MSI mapping section according to Hypertransport spec */
+		if (pci_read_config_dword(dev, pos, &cap_hdr) == 0
+		    && (cap_hdr & 0xf8000000) == 0xa8000000 /* MSI mapping */) {
+			printk(KERN_INFO "PCI: Found HT MSI mapping on %s with capability %s\n",
+			       pci_name(dev), cap_hdr & 0x10000 ? "enabled" : "disabled");
+			return (cap_hdr & 0x10000) != 0; /* MSI mapping cap enabled */
+		}
+	}
+	return 0;
+}
+
+/* Check the hypertransport MSI mapping to know whether MSI is enabled or not */
+static void __devinit quirk_msi_ht_cap(struct pci_dev *dev)
+{
+	if (dev->subordinate && !msi_ht_cap_enabled(dev)) {
+		printk(KERN_WARNING "PCI: MSI quirk detected. "
+		       "MSI disabled on chipset %s.\n",
+		       pci_name(dev));
+		dev->subordinate->bus_flags |= PCI_BUS_FLAGS_NO_MSI;
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT2000_PCIE,
+			quirk_msi_ht_cap);
+
+/* The nVidia CK804 chipset may have 2 HT MSI mappings.
+ * MSI are supported if the MSI capability set in any of these mappings.
+ */
+static void __devinit quirk_nvidia_ck804_msi_ht_cap(struct pci_dev *dev)
+{
+	struct pci_dev *pdev;
+
+	if (!dev->subordinate)
+		return;
+
+	/* check HT MSI cap on this chipset and the root one.
+	 * a single one having MSI is enough to be sure that MSI are supported.
+	 */
+	pdev = pci_find_slot(dev->bus->number, 0);
+	if (dev->subordinate && !msi_ht_cap_enabled(dev)
+	    && !msi_ht_cap_enabled(pdev)) {
+		printk(KERN_WARNING "PCI: MSI quirk detected. "
+		       "MSI disabled on chipset %s.\n",
+		       pci_name(dev));
+		dev->subordinate->bus_flags |= PCI_BUS_FLAGS_NO_MSI;
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_CK804_PCIE,
+			quirk_nvidia_ck804_msi_ht_cap);
 #endif /* CONFIG_PCI_MSI */
 
 EXPORT_SYMBOL(pcie_mch_quirk);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 6a1e09834559..b9e263adebab 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1411,6 +1411,7 @@
 #define PCI_DEVICE_ID_SERVERWORKS_LE	  0x0009
 #define PCI_DEVICE_ID_SERVERWORKS_GCNB_LE 0x0017
 #define PCI_DEVICE_ID_SERVERWORKS_EPB	  0x0103
+#define PCI_DEVICE_ID_SERVERWORKS_HT2000_PCIE	0x0132
 #define PCI_DEVICE_ID_SERVERWORKS_OSB4	  0x0200
 #define PCI_DEVICE_ID_SERVERWORKS_CSB5	  0x0201
 #define PCI_DEVICE_ID_SERVERWORKS_CSB6    0x0203
-- 
cgit v1.2.3


From 6c2b374d74857e892080ee726184ec1d15e7d4e4 Mon Sep 17 00:00:00 2001
From: "Zhang, Yanmin" <yanmin.zhang@intel.com>
Date: Mon, 31 Jul 2006 15:21:33 +0800
Subject: PCI-Express AER implemetation: AER core and aerdriver

Patch 3 implements the core part of PCI-Express AER and aerdrv
port service driver.

When a root port service device is probed, the aerdrv will call
request_irq to register irq handler for AER error interrupt.

When a device sends an PCI-Express error message to the root port,
the root port will trigger an interrupt, by either MSI or IO-APIC,
then kernel would run the irq handler. The handler collects root
error status register and schedules a work. The work will call
the core part to process the error based on its type
(Correctable/non-fatal/fatal).

As for Correctable errors, the patch chooses to just clear the correctable
error status register of the device.

As for the non-fatal error, the patch follows generic PCI error handler
rules to call the error callback functions of the endpoint's driver. If
the device is a bridge, the patch chooses to broadcast the error to
downstream devices.

As for the fatal error, the patch resets the pci-express link and
follows generic PCI error handler rules to call the error callback
functions of the endpoint's driver. If the device is a bridge, the patch
chooses to broadcast the error to downstream devices.

Signed-off-by: Zhang Yanmin <yanmin.zhang@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/pci/pcie/Kconfig               |   1 +
 drivers/pci/pcie/Makefile              |   3 +
 drivers/pci/pcie/aer/Kconfig           |  12 +
 drivers/pci/pcie/aer/Makefile          |   8 +
 drivers/pci/pcie/aer/aerdrv.c          | 346 +++++++++++++++
 drivers/pci/pcie/aer/aerdrv.h          | 125 ++++++
 drivers/pci/pcie/aer/aerdrv_acpi.c     |  68 +++
 drivers/pci/pcie/aer/aerdrv_core.c     | 757 +++++++++++++++++++++++++++++++++
 drivers/pci/pcie/aer/aerdrv_errprint.c | 248 +++++++++++
 include/linux/aer.h                    |  24 ++
 include/linux/pcieport_if.h            |   6 +
 11 files changed, 1598 insertions(+)
 create mode 100644 drivers/pci/pcie/aer/Kconfig
 create mode 100644 drivers/pci/pcie/aer/Makefile
 create mode 100644 drivers/pci/pcie/aer/aerdrv.c
 create mode 100644 drivers/pci/pcie/aer/aerdrv.h
 create mode 100644 drivers/pci/pcie/aer/aerdrv_acpi.c
 create mode 100644 drivers/pci/pcie/aer/aerdrv_core.c
 create mode 100644 drivers/pci/pcie/aer/aerdrv_errprint.c
 create mode 100644 include/linux/aer.h

(limited to 'include/linux')

diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
index 1012db8b8b2c..0ad92a8ad8b1 100644
--- a/drivers/pci/pcie/Kconfig
+++ b/drivers/pci/pcie/Kconfig
@@ -34,3 +34,4 @@ config HOTPLUG_PCI_PCIE_POLL_EVENT_MODE
 	   
 	  When in doubt, say N.
 
+source "drivers/pci/pcie/aer/Kconfig"
diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile
index 984fa87283e3..e00fb99acf44 100644
--- a/drivers/pci/pcie/Makefile
+++ b/drivers/pci/pcie/Makefile
@@ -5,3 +5,6 @@
 pcieportdrv-y			:= portdrv_core.o portdrv_pci.o portdrv_bus.o
 
 obj-$(CONFIG_PCIEPORTBUS)	+= pcieportdrv.o
+
+# Build PCI Express AER if needed
+obj-$(CONFIG_PCIEAER)		+= aer/
diff --git a/drivers/pci/pcie/aer/Kconfig b/drivers/pci/pcie/aer/Kconfig
new file mode 100644
index 000000000000..3f37a60a6438
--- /dev/null
+++ b/drivers/pci/pcie/aer/Kconfig
@@ -0,0 +1,12 @@
+#
+# PCI Express Root Port Device AER Configuration
+#
+
+config PCIEAER
+	boolean "Root Port Advanced Error Reporting support"
+	depends on PCIEPORTBUS && ACPI
+	default y
+	help
+	  This enables PCI Express Root Port Advanced Error Reporting
+	  (AER) driver support. Error reporting messages sent to Root
+	  Port will be handled by PCI Express AER driver.
diff --git a/drivers/pci/pcie/aer/Makefile b/drivers/pci/pcie/aer/Makefile
new file mode 100644
index 000000000000..15a4f40d520b
--- /dev/null
+++ b/drivers/pci/pcie/aer/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for PCI-Express Root Port Advanced Error Reporting Driver
+#
+
+obj-$(CONFIG_PCIEAER) += aerdriver.o
+
+aerdriver-objs := aerdrv_errprint.o aerdrv_core.o aerdrv.o aerdrv_acpi.o
+
diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
new file mode 100644
index 000000000000..0d4ac027d53e
--- /dev/null
+++ b/drivers/pci/pcie/aer/aerdrv.c
@@ -0,0 +1,346 @@
+/*
+ * drivers/pci/pcie/aer/aerdrv.c
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * This file implements the AER root port service driver. The driver will
+ * register an irq handler. When root port triggers an AER interrupt, the irq
+ * handler will collect root port status and schedule a work.
+ *
+ * Copyright (C) 2006 Intel Corp.
+ *	Tom Long Nguyen (tom.l.nguyen@intel.com)
+ *	Zhang Yanmin (yanmin.zhang@intel.com)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/pm.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/pcieport_if.h>
+
+#include "aerdrv.h"
+
+/*
+ * Version Information
+ */
+#define DRIVER_VERSION "v1.0"
+#define DRIVER_AUTHOR "tom.l.nguyen@intel.com"
+#define DRIVER_DESC "Root Port Advanced Error Reporting Driver"
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+
+static int __devinit aer_probe (struct pcie_device *dev,
+	const struct pcie_port_service_id *id );
+static void aer_remove(struct pcie_device *dev);
+static int aer_suspend(struct pcie_device *dev, pm_message_t state)
+{return 0;}
+static int aer_resume(struct pcie_device *dev) {return 0;}
+static pci_ers_result_t aer_error_detected(struct pci_dev *dev,
+	enum pci_channel_state error);
+static void aer_error_resume(struct pci_dev *dev);
+static pci_ers_result_t aer_root_reset(struct pci_dev *dev);
+
+/*
+ * PCI Express bus's AER Root service driver data structure
+ */
+static struct pcie_port_service_id aer_id[] = {
+	{
+	.vendor 	= PCI_ANY_ID,
+	.device 	= PCI_ANY_ID,
+	.port_type 	= PCIE_RC_PORT,
+	.service_type 	= PCIE_PORT_SERVICE_AER,
+	},
+	{ /* end: all zeroes */ }
+};
+
+static struct pci_error_handlers aer_error_handlers = {
+	.error_detected = aer_error_detected,
+	.resume = aer_error_resume,
+};
+
+static struct pcie_port_service_driver aerdrv = {
+	.name		= "aer",
+	.id_table	= &aer_id[0],
+
+	.probe		= aer_probe,
+	.remove		= aer_remove,
+
+	.suspend	= aer_suspend,
+	.resume		= aer_resume,
+
+	.err_handler	= &aer_error_handlers,
+
+	.reset_link	= aer_root_reset,
+};
+
+/**
+ * aer_irq - Root Port's ISR
+ * @irq: IRQ assigned to Root Port
+ * @context: pointer to Root Port data structure
+ * @r: pointer struct pt_regs
+ *
+ * Invoked when Root Port detects AER messages.
+ **/
+static irqreturn_t aer_irq(int irq, void *context, struct pt_regs * r)
+{
+	unsigned int status, id;
+	struct pcie_device *pdev = (struct pcie_device *)context;
+	struct aer_rpc *rpc = get_service_data(pdev);
+	int next_prod_idx;
+	unsigned long flags;
+	int pos;
+
+	pos = pci_find_aer_capability(pdev->port);
+	/*
+	 * Must lock access to Root Error Status Reg, Root Error ID Reg,
+	 * and Root error producer/consumer index
+	 */
+	spin_lock_irqsave(&rpc->e_lock, flags);
+
+	/* Read error status */
+	pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, &status);
+	if (!(status & ROOT_ERR_STATUS_MASKS)) {
+		spin_unlock_irqrestore(&rpc->e_lock, flags);
+		return IRQ_NONE;
+	}
+
+	/* Read error source and clear error status */
+	pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_COR_SRC, &id);
+	pci_write_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, status);
+
+	/* Store error source for later DPC handler */
+	next_prod_idx = rpc->prod_idx + 1;
+	if (next_prod_idx == AER_ERROR_SOURCES_MAX)
+		next_prod_idx = 0;
+	if (next_prod_idx == rpc->cons_idx) {
+		/*
+		 * Error Storm Condition - possibly the same error occurred.
+		 * Drop the error.
+		 */
+		spin_unlock_irqrestore(&rpc->e_lock, flags);
+		return IRQ_HANDLED;
+	}
+	rpc->e_sources[rpc->prod_idx].status =  status;
+	rpc->e_sources[rpc->prod_idx].id = id;
+	rpc->prod_idx = next_prod_idx;
+	spin_unlock_irqrestore(&rpc->e_lock, flags);
+
+	/*  Invoke DPC handler */
+	schedule_work(&rpc->dpc_handler);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * aer_alloc_rpc - allocate Root Port data structure
+ * @dev: pointer to the pcie_dev data structure
+ *
+ * Invoked when Root Port's AER service is loaded.
+ **/
+static struct aer_rpc* aer_alloc_rpc(struct pcie_device *dev)
+{
+	struct aer_rpc *rpc;
+
+	if (!(rpc = (struct aer_rpc *)kmalloc(sizeof(struct aer_rpc),
+		GFP_KERNEL)))
+		return NULL;
+
+	memset(rpc, 0, sizeof(struct aer_rpc));
+	/*
+	 * Initialize Root lock access, e_lock, to Root Error Status Reg,
+	 * Root Error ID Reg, and Root error producer/consumer index.
+	 */
+	rpc->e_lock = SPIN_LOCK_UNLOCKED;
+
+	rpc->rpd = dev;
+	INIT_WORK(&rpc->dpc_handler, aer_isr, (void *)dev);
+	rpc->prod_idx = rpc->cons_idx = 0;
+	mutex_init(&rpc->rpc_mutex);
+	init_waitqueue_head(&rpc->wait_release);
+
+	/* Use PCIE bus function to store rpc into PCIE device */
+	set_service_data(dev, rpc);
+
+	return rpc;
+}
+
+/**
+ * aer_remove - clean up resources
+ * @dev: pointer to the pcie_dev data structure
+ *
+ * Invoked when PCI Express bus unloads or AER probe fails.
+ **/
+static void aer_remove(struct pcie_device *dev)
+{
+	struct aer_rpc *rpc = get_service_data(dev);
+
+	if (rpc) {
+		/* If register interrupt service, it must be free. */
+		if (rpc->isr)
+			free_irq(dev->irq, dev);
+
+		wait_event(rpc->wait_release, rpc->prod_idx == rpc->cons_idx);
+
+		aer_delete_rootport(rpc);
+		set_service_data(dev, NULL);
+	}
+}
+
+/**
+ * aer_probe - initialize resources
+ * @dev: pointer to the pcie_dev data structure
+ * @id: pointer to the service id data structure
+ *
+ * Invoked when PCI Express bus loads AER service driver.
+ **/
+static int __devinit aer_probe (struct pcie_device *dev,
+				const struct pcie_port_service_id *id )
+{
+	int status;
+	struct aer_rpc *rpc;
+	struct device *device = &dev->device;
+
+	/* Init */
+	if ((status = aer_init(dev)))
+		return status;
+
+	/* Alloc rpc data structure */
+	if (!(rpc = aer_alloc_rpc(dev))) {
+		printk(KERN_DEBUG "%s: Alloc rpc fails on PCIE device[%s]\n",
+			__FUNCTION__, device->bus_id);
+		aer_remove(dev);
+		return -ENOMEM;
+	}
+
+	/* Request IRQ ISR */
+	if ((status = request_irq(dev->irq, aer_irq, SA_SHIRQ, "aerdrv",
+				dev))) {
+		printk(KERN_DEBUG "%s: Request ISR fails on PCIE device[%s]\n",
+			__FUNCTION__, device->bus_id);
+		aer_remove(dev);
+		return status;
+	}
+
+	rpc->isr = 1;
+
+	aer_enable_rootport(rpc);
+
+	return status;
+}
+
+/**
+ * aer_root_reset - reset link on Root Port
+ * @dev: pointer to Root Port's pci_dev data structure
+ *
+ * Invoked by Port Bus driver when performing link reset at Root Port.
+ **/
+static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
+{
+	u16 p2p_ctrl;
+	u32 status;
+	int pos;
+
+	pos = pci_find_aer_capability(dev);
+
+	/* Disable Root's interrupt in response to error messages */
+	pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, 0);
+
+	/* Assert Secondary Bus Reset */
+	pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &p2p_ctrl);
+	p2p_ctrl |= PCI_CB_BRIDGE_CTL_CB_RESET;
+	pci_write_config_word(dev, PCI_BRIDGE_CONTROL, p2p_ctrl);
+
+	/* De-assert Secondary Bus Reset */
+	p2p_ctrl &= ~PCI_CB_BRIDGE_CTL_CB_RESET;
+	pci_write_config_word(dev, PCI_BRIDGE_CONTROL, p2p_ctrl);
+
+	/*
+	 * System software must wait for at least 100ms from the end
+	 * of a reset of one or more device before it is permitted
+	 * to issue Configuration Requests to those devices.
+	 */
+	msleep(200);
+	printk(KERN_DEBUG "Complete link reset at Root[%s]\n", dev->dev.bus_id);
+
+	/* Enable Root Port's interrupt in response to error messages */
+	pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status);
+	pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, status);
+	pci_write_config_dword(dev,
+		pos + PCI_ERR_ROOT_COMMAND,
+		ROOT_PORT_INTR_ON_MESG_MASK);
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * aer_error_detected - update severity status
+ * @dev: pointer to Root Port's pci_dev data structure
+ * @error: error severity being notified by port bus
+ *
+ * Invoked by Port Bus driver during error recovery.
+ **/
+static pci_ers_result_t aer_error_detected(struct pci_dev *dev,
+			enum pci_channel_state error)
+{
+	/* Root Port has no impact. Always recovers. */
+	return PCI_ERS_RESULT_CAN_RECOVER;
+}
+
+/**
+ * aer_error_resume - clean up corresponding error status bits
+ * @dev: pointer to Root Port's pci_dev data structure
+ *
+ * Invoked by Port Bus driver during nonfatal recovery.
+ **/
+static void aer_error_resume(struct pci_dev *dev)
+{
+	int pos;
+	u32 status, mask;
+	u16 reg16;
+
+	/* Clean up Root device status */
+	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+	pci_read_config_word(dev, pos + PCI_EXP_DEVSTA, &reg16);
+	pci_write_config_word(dev, pos + PCI_EXP_DEVSTA, reg16);
+
+	/* Clean AER Root Error Status */
+	pos = pci_find_aer_capability(dev);
+	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
+	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask);
+	if (dev->error_state == pci_channel_io_normal)
+		status &= ~mask; /* Clear corresponding nonfatal bits */
+	else
+		status &= mask; /* Clear corresponding fatal bits */
+	pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
+}
+
+/**
+ * aer_service_init - register AER root service driver
+ *
+ * Invoked when AER root service driver is loaded.
+ **/
+static int __init aer_service_init(void)
+{
+	return pcie_port_service_register(&aerdrv);
+}
+
+/**
+ * aer_service_exit - unregister AER root service driver
+ *
+ * Invoked when AER root service driver is unloaded.
+ **/
+static void __exit aer_service_exit(void)
+{
+	pcie_port_service_unregister(&aerdrv);
+}
+
+module_init(aer_service_init);
+module_exit(aer_service_exit);
diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h
new file mode 100644
index 000000000000..daf0cad88fc8
--- /dev/null
+++ b/drivers/pci/pcie/aer/aerdrv.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2006 Intel Corp.
+ *	Tom Long Nguyen (tom.l.nguyen@intel.com)
+ *	Zhang Yanmin (yanmin.zhang@intel.com)
+ *
+ */
+
+#ifndef _AERDRV_H_
+#define _AERDRV_H_
+
+#include <linux/pcieport_if.h>
+#include <linux/aer.h>
+
+#define AER_NONFATAL			0
+#define AER_FATAL			1
+#define AER_CORRECTABLE			2
+#define AER_UNCORRECTABLE		4
+#define AER_ERROR_MASK			0x001fffff
+#define AER_ERROR(d)			(d & AER_ERROR_MASK)
+
+#define OSC_METHOD_RUN_SUCCESS		0
+#define OSC_METHOD_NOT_SUPPORTED	1
+#define OSC_METHOD_RUN_FAILURE		2
+
+/* Root Error Status Register Bits */
+#define ROOT_ERR_STATUS_MASKS			0x0f
+
+#define SYSTEM_ERROR_INTR_ON_MESG_MASK	(PCI_EXP_RTCTL_SECEE|	\
+					PCI_EXP_RTCTL_SENFEE|	\
+					PCI_EXP_RTCTL_SEFEE)
+#define ROOT_PORT_INTR_ON_MESG_MASK	(PCI_ERR_ROOT_CMD_COR_EN|	\
+					PCI_ERR_ROOT_CMD_NONFATAL_EN|	\
+					PCI_ERR_ROOT_CMD_FATAL_EN)
+#define ERR_COR_ID(d)			(d & 0xffff)
+#define ERR_UNCOR_ID(d)			(d >> 16)
+
+#define AER_SUCCESS			0
+#define AER_UNSUCCESS			1
+#define AER_ERROR_SOURCES_MAX		100
+
+#define AER_LOG_TLP_MASKS		(PCI_ERR_UNC_POISON_TLP|	\
+					PCI_ERR_UNC_ECRC|		\
+					PCI_ERR_UNC_UNSUP|		\
+					PCI_ERR_UNC_COMP_ABORT|		\
+					PCI_ERR_UNC_UNX_COMP|		\
+					PCI_ERR_UNC_MALF_TLP)
+
+/* AER Error Info Flags */
+#define AER_TLP_HEADER_VALID_FLAG	0x00000001
+#define AER_MULTI_ERROR_VALID_FLAG	0x00000002
+
+#define ERR_CORRECTABLE_ERROR_MASK	0x000031c1
+#define ERR_UNCORRECTABLE_ERROR_MASK	0x001ff010
+
+struct header_log_regs {
+	unsigned int dw0;
+	unsigned int dw1;
+	unsigned int dw2;
+	unsigned int dw3;
+};
+
+struct aer_err_info {
+	int severity;			/* 0:NONFATAL | 1:FATAL | 2:COR */
+	int flags;
+	unsigned int status;		/* COR/UNCOR Error Status */
+	struct header_log_regs tlp; 	/* TLP Header */
+};
+
+struct aer_err_source {
+	unsigned int status;
+	unsigned int id;
+};
+
+struct aer_rpc {
+	struct pcie_device *rpd;	/* Root Port device */
+	struct work_struct dpc_handler;
+	struct aer_err_source e_sources[AER_ERROR_SOURCES_MAX];
+	unsigned short prod_idx;	/* Error Producer Index */
+	unsigned short cons_idx;	/* Error Consumer Index */
+	int isr;
+	spinlock_t e_lock;		/*
+					 * Lock access to Error Status/ID Regs
+					 * and error producer/consumer index
+					 */
+	struct mutex rpc_mutex;		/*
+					 * only one thread could do
+					 * recovery on the same
+					 * root port hierachy
+					 */
+	wait_queue_head_t wait_release;
+};
+
+struct aer_broadcast_data {
+	enum pci_channel_state state;
+	enum pci_ers_result result;
+};
+
+static inline pci_ers_result_t merge_result(enum pci_ers_result orig,
+		enum pci_ers_result new)
+{
+	switch (orig) {
+	case PCI_ERS_RESULT_CAN_RECOVER:
+	case PCI_ERS_RESULT_RECOVERED:
+		orig = new;
+		break;
+	case PCI_ERS_RESULT_DISCONNECT:
+		if (new == PCI_ERS_RESULT_NEED_RESET)
+			orig = new;
+		break;
+	default:
+		break;
+	}
+
+	return orig;
+}
+
+extern struct bus_type pcie_port_bus_type;
+extern void aer_enable_rootport(struct aer_rpc *rpc);
+extern void aer_delete_rootport(struct aer_rpc *rpc);
+extern int aer_init(struct pcie_device *dev);
+extern void aer_isr(void *context);
+extern void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
+extern int aer_osc_setup(struct pci_dev *dev);
+
+#endif //_AERDRV_H_
diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c
new file mode 100644
index 000000000000..fa68e89ebec9
--- /dev/null
+++ b/drivers/pci/pcie/aer/aerdrv_acpi.c
@@ -0,0 +1,68 @@
+/*
+ * Access ACPI _OSC method
+ *
+ * Copyright (C) 2006 Intel Corp.
+ *	Tom Long Nguyen (tom.l.nguyen@intel.com)
+ *	Zhang Yanmin (yanmin.zhang@intel.com)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/pm.h>
+#include <linux/suspend.h>
+#include <linux/acpi.h>
+#include <linux/pci-acpi.h>
+#include <linux/delay.h>
+#include "aerdrv.h"
+
+/**
+ * aer_osc_setup - run ACPI _OSC method
+ *
+ * Return:
+ *	Zero if success. Nonzero for otherwise.
+ *
+ * Invoked when PCIE bus loads AER service driver. To avoid conflict with
+ * BIOS AER support requires BIOS to yield AER control to OS native driver.
+ **/
+int aer_osc_setup(struct pci_dev *dev)
+{
+	int retval = OSC_METHOD_RUN_SUCCESS;
+	acpi_status status;
+	acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+	struct pci_dev *pdev = dev;
+	struct pci_bus *parent;
+
+	while (!handle) {
+		if (!pdev || !pdev->bus->parent)
+			break;
+		parent = pdev->bus->parent;
+		if (!parent->self)
+			/* Parent must be a host bridge */
+			handle = acpi_get_pci_rootbridge_handle(
+					pci_domain_nr(parent),
+					parent->number);
+		else
+			handle = DEVICE_ACPI_HANDLE(
+					&(parent->self->dev));
+		pdev = parent->self;
+	}
+
+	if (!handle)
+		return OSC_METHOD_NOT_SUPPORTED;
+
+	pci_osc_support_set(OSC_EXT_PCI_CONFIG_SUPPORT);
+	status = pci_osc_control_set(handle, OSC_PCI_EXPRESS_AER_CONTROL |
+		OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL);
+	if (ACPI_FAILURE(status)) {
+		if (status == AE_SUPPORT)
+			retval = OSC_METHOD_NOT_SUPPORTED;
+	 	else
+			retval = OSC_METHOD_RUN_FAILURE;
+	}
+
+	return retval;
+}
+
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
new file mode 100644
index 000000000000..5591043acea7
--- /dev/null
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -0,0 +1,757 @@
+/*
+ * drivers/pci/pcie/aer/aerdrv_core.c
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * This file implements the core part of PCI-Express AER. When an pci-express
+ * error is delivered, an error message will be collected and printed to
+ * console, then, an error recovery procedure will be executed by following
+ * the pci error recovery rules.
+ *
+ * Copyright (C) 2006 Intel Corp.
+ *	Tom Long Nguyen (tom.l.nguyen@intel.com)
+ *	Zhang Yanmin (yanmin.zhang@intel.com)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/pm.h>
+#include <linux/suspend.h>
+#include <linux/acpi.h>
+#include <linux/pci-acpi.h>
+#include <linux/delay.h>
+#include "aerdrv.h"
+
+static int forceload;
+module_param(forceload, bool, 0);
+
+#define PCI_CFG_SPACE_SIZE	(0x100)
+int pci_find_aer_capability(struct pci_dev *dev)
+{
+	int pos;
+	u32 reg32 = 0;
+
+	/* Check if it's a pci-express device */
+	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+	if (!pos)
+		return 0;
+
+	/* Check if it supports pci-express AER */
+	pos = PCI_CFG_SPACE_SIZE;
+	while (pos) {
+		if (pci_read_config_dword(dev, pos, &reg32))
+			return 0;
+
+		/* some broken boards return ~0 */
+		if (reg32 == 0xffffffff)
+			return 0;
+
+		if (PCI_EXT_CAP_ID(reg32) == PCI_EXT_CAP_ID_ERR)
+			break;
+
+		pos = reg32 >> 20;
+	}
+
+	return pos;
+}
+
+int pci_enable_pcie_error_reporting(struct pci_dev *dev)
+{
+	u16 reg16 = 0;
+	int pos;
+
+	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+	if (!pos)
+		return -EIO;
+
+	pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
+	reg16 = reg16 |
+		PCI_EXP_DEVCTL_CERE |
+		PCI_EXP_DEVCTL_NFERE |
+		PCI_EXP_DEVCTL_FERE |
+		PCI_EXP_DEVCTL_URRE;
+	pci_write_config_word(dev, pos+PCI_EXP_DEVCTL,
+			reg16);
+	return 0;
+}
+
+int pci_disable_pcie_error_reporting(struct pci_dev *dev)
+{
+	u16 reg16 = 0;
+	int pos;
+
+	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+	if (!pos)
+		return -EIO;
+
+	pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
+	reg16 = reg16 & ~(PCI_EXP_DEVCTL_CERE |
+			PCI_EXP_DEVCTL_NFERE |
+			PCI_EXP_DEVCTL_FERE |
+			PCI_EXP_DEVCTL_URRE);
+	pci_write_config_word(dev, pos+PCI_EXP_DEVCTL,
+			reg16);
+	return 0;
+}
+
+int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
+{
+	int pos;
+	u32 status, mask;
+
+	pos = pci_find_aer_capability(dev);
+	if (!pos)
+		return -EIO;
+
+	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
+	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask);
+	if (dev->error_state == pci_channel_io_normal)
+		status &= ~mask; /* Clear corresponding nonfatal bits */
+	else
+		status &= mask; /* Clear corresponding fatal bits */
+	pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
+
+	return 0;
+}
+
+static int find_device_iter(struct device *device, void *data)
+{
+	struct pci_dev *dev;
+	u16 id = *(unsigned long *)data;
+	u8 secondary, subordinate, d_bus = id >> 8;
+
+	if (device->bus == &pci_bus_type) {
+		dev = to_pci_dev(device);
+		if (id == ((dev->bus->number << 8) | dev->devfn)) {
+			/*
+			 * Device ID match
+			 */
+			*(unsigned long*)data = (unsigned long)device;
+			return 1;
+		}
+
+		/*
+		 * If device is P2P, check if it is an upstream?
+		 */
+		if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE) {
+			pci_read_config_byte(dev, PCI_SECONDARY_BUS,
+				&secondary);
+			pci_read_config_byte(dev, PCI_SUBORDINATE_BUS,
+				&subordinate);
+			if (d_bus >= secondary && d_bus <= subordinate) {
+				*(unsigned long*)data = (unsigned long)device;
+				return 1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * find_source_device - search through device hierarchy for source device
+ * @p_dev: pointer to Root Port pci_dev data structure
+ * @id: device ID of agent who sends an error message to this Root Port
+ *
+ * Invoked when error is detected at the Root Port.
+ **/
+static struct device* find_source_device(struct pci_dev *parent, u16 id)
+{
+	struct pci_dev *dev = parent;
+	struct device *device;
+	unsigned long device_addr;
+	int status;
+
+	/* Is Root Port an agent that sends error message? */
+	if (id == ((dev->bus->number << 8) | dev->devfn))
+		return &dev->dev;
+
+	do {
+		device_addr = id;
+ 		if ((status = device_for_each_child(&dev->dev,
+			&device_addr, find_device_iter))) {
+			device = (struct device*)device_addr;
+			dev = to_pci_dev(device);
+			if (id == ((dev->bus->number << 8) | dev->devfn))
+				return device;
+		}
+ 	}while (status);
+
+	return NULL;
+}
+
+static void report_error_detected(struct pci_dev *dev, void *data)
+{
+	pci_ers_result_t vote;
+	struct pci_error_handlers *err_handler;
+	struct aer_broadcast_data *result_data;
+	result_data = (struct aer_broadcast_data *) data;
+
+	dev->error_state = result_data->state;
+
+	if (!dev->driver ||
+		!dev->driver->err_handler ||
+		!dev->driver->err_handler->error_detected) {
+		if (result_data->state == pci_channel_io_frozen &&
+			!(dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)) {
+			/*
+			 * In case of fatal recovery, if one of down-
+			 * stream device has no driver. We might be
+			 * unable to recover because a later insmod
+			 * of a driver for this device is unaware of
+			 * its hw state.
+			 */
+			printk(KERN_DEBUG "Device ID[%s] has %s\n",
+					dev->dev.bus_id, (dev->driver) ?
+					"no AER-aware driver" : "no driver");
+		}
+		return;
+	}
+
+	err_handler = dev->driver->err_handler;
+	vote = err_handler->error_detected(dev, result_data->state);
+	result_data->result = merge_result(result_data->result, vote);
+	return;
+}
+
+static void report_mmio_enabled(struct pci_dev *dev, void *data)
+{
+	pci_ers_result_t vote;
+	struct pci_error_handlers *err_handler;
+	struct aer_broadcast_data *result_data;
+	result_data = (struct aer_broadcast_data *) data;
+
+	if (!dev->driver ||
+		!dev->driver->err_handler ||
+		!dev->driver->err_handler->mmio_enabled)
+		return;
+
+	err_handler = dev->driver->err_handler;
+	vote = err_handler->mmio_enabled(dev);
+	result_data->result = merge_result(result_data->result, vote);
+	return;
+}
+
+static void report_slot_reset(struct pci_dev *dev, void *data)
+{
+	pci_ers_result_t vote;
+	struct pci_error_handlers *err_handler;
+	struct aer_broadcast_data *result_data;
+	result_data = (struct aer_broadcast_data *) data;
+
+	if (!dev->driver ||
+		!dev->driver->err_handler ||
+		!dev->driver->err_handler->slot_reset)
+		return;
+
+	err_handler = dev->driver->err_handler;
+	vote = err_handler->slot_reset(dev);
+	result_data->result = merge_result(result_data->result, vote);
+	return;
+}
+
+static void report_resume(struct pci_dev *dev, void *data)
+{
+	struct pci_error_handlers *err_handler;
+
+	dev->error_state = pci_channel_io_normal;
+
+	if (!dev->driver ||
+		!dev->driver->err_handler ||
+		!dev->driver->err_handler->slot_reset)
+		return;
+
+	err_handler = dev->driver->err_handler;
+	err_handler->resume(dev);
+	return;
+}
+
+/**
+ * broadcast_error_message - handle message broadcast to downstream drivers
+ * @device: pointer to from where in a hierarchy message is broadcasted down
+ * @api: callback to be broadcasted
+ * @state: error state
+ *
+ * Invoked during error recovery process. Once being invoked, the content
+ * of error severity will be broadcasted to all downstream drivers in a
+ * hierarchy in question.
+ **/
+static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
+	enum pci_channel_state state,
+	char *error_mesg,
+	void (*cb)(struct pci_dev *, void *))
+{
+	struct aer_broadcast_data result_data;
+
+	printk(KERN_DEBUG "Broadcast %s message\n", error_mesg);
+	result_data.state = state;
+	if (cb == report_error_detected)
+		result_data.result = PCI_ERS_RESULT_CAN_RECOVER;
+	else
+		result_data.result = PCI_ERS_RESULT_RECOVERED;
+
+	if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE) {
+		/*
+		 * If the error is reported by a bridge, we think this error
+		 * is related to the downstream link of the bridge, so we
+		 * do error recovery on all subordinates of the bridge instead
+		 * of the bridge and clear the error status of the bridge.
+		 */
+		if (cb == report_error_detected)
+			dev->error_state = state;
+		pci_walk_bus(dev->subordinate, cb, &result_data);
+		if (cb == report_resume) {
+			pci_cleanup_aer_uncorrect_error_status(dev);
+			dev->error_state = pci_channel_io_normal;
+		}
+	}
+	else {
+		/*
+		 * If the error is reported by an end point, we think this
+		 * error is related to the upstream link of the end point.
+		 */
+		pci_walk_bus(dev->bus, cb, &result_data);
+	}
+
+	return result_data.result;
+}
+
+struct find_aer_service_data {
+	struct pcie_port_service_driver *aer_driver;
+	int is_downstream;
+};
+
+static int find_aer_service_iter(struct device *device, void *data)
+{
+	struct device_driver *driver;
+	struct pcie_port_service_driver *service_driver;
+	struct pcie_device *pcie_dev;
+	struct find_aer_service_data *result;
+
+	result = (struct find_aer_service_data *) data;
+
+	if (device->bus == &pcie_port_bus_type) {
+		pcie_dev = to_pcie_device(device);
+		if (pcie_dev->id.port_type == PCIE_SW_DOWNSTREAM_PORT)
+			result->is_downstream = 1;
+
+		driver = device->driver;
+		if (driver) {
+			service_driver = to_service_driver(driver);
+			if (service_driver->id_table->service_type ==
+					PCIE_PORT_SERVICE_AER) {
+				result->aer_driver = service_driver;
+				return 1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void find_aer_service(struct pci_dev *dev,
+		struct find_aer_service_data *data)
+{
+	device_for_each_child(&dev->dev, data, find_aer_service_iter);
+}
+
+static pci_ers_result_t reset_link(struct pcie_device *aerdev,
+		struct pci_dev *dev)
+{
+	struct pci_dev *udev;
+	pci_ers_result_t status;
+	struct find_aer_service_data data;
+
+	if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)
+		udev = dev;
+	else
+		udev= dev->bus->self;
+
+	data.is_downstream = 0;
+	data.aer_driver = NULL;
+	find_aer_service(udev, &data);
+
+	/*
+	 * Use the aer driver of the error agent firstly.
+	 * If it hasn't the aer driver, use the root port's
+	 */
+	if (!data.aer_driver || !data.aer_driver->reset_link) {
+		if (data.is_downstream &&
+			aerdev->device.driver &&
+			to_service_driver(aerdev->device.driver)->reset_link) {
+			data.aer_driver =
+				to_service_driver(aerdev->device.driver);
+		} else {
+			printk(KERN_DEBUG "No link-reset support to Device ID"
+				"[%s]\n",
+				dev->dev.bus_id);
+			return PCI_ERS_RESULT_DISCONNECT;
+		}
+	}
+
+	status = data.aer_driver->reset_link(udev);
+	if (status != PCI_ERS_RESULT_RECOVERED) {
+		printk(KERN_DEBUG "Link reset at upstream Device ID"
+			"[%s] failed\n",
+			udev->dev.bus_id);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	return status;
+}
+
+/**
+ * do_recovery - handle nonfatal/fatal error recovery process
+ * @aerdev: pointer to a pcie_device data structure of root port
+ * @dev: pointer to a pci_dev data structure of agent detecting an error
+ * @severity: error severity type
+ *
+ * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast
+ * error detected message to all downstream drivers within a hierarchy in
+ * question and return the returned code.
+ **/
+static pci_ers_result_t do_recovery(struct pcie_device *aerdev,
+		struct pci_dev *dev,
+		int severity)
+{
+	pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED;
+	enum pci_channel_state state;
+
+	if (severity == AER_FATAL)
+		state = pci_channel_io_frozen;
+	else
+		state = pci_channel_io_normal;
+
+	status = broadcast_error_message(dev,
+			state,
+			"error_detected",
+			report_error_detected);
+
+	if (severity == AER_FATAL) {
+		result = reset_link(aerdev, dev);
+		if (result != PCI_ERS_RESULT_RECOVERED) {
+			/* TODO: Should panic here? */
+			return result;
+		}
+	}
+
+	if (status == PCI_ERS_RESULT_CAN_RECOVER)
+		status = broadcast_error_message(dev,
+				state,
+				"mmio_enabled",
+				report_mmio_enabled);
+
+	if (status == PCI_ERS_RESULT_NEED_RESET) {
+		/*
+		 * TODO: Should call platform-specific
+		 * functions to reset slot before calling
+		 * drivers' slot_reset callbacks?
+		 */
+		status = broadcast_error_message(dev,
+				state,
+				"slot_reset",
+				report_slot_reset);
+	}
+
+	if (status == PCI_ERS_RESULT_RECOVERED)
+		broadcast_error_message(dev,
+				state,
+				"resume",
+				report_resume);
+
+	return status;
+}
+
+/**
+ * handle_error_source - handle logging error into an event log
+ * @aerdev: pointer to pcie_device data structure of the root port
+ * @dev: pointer to pci_dev data structure of error source device
+ * @info: comprehensive error information
+ *
+ * Invoked when an error being detected by Root Port.
+ **/
+static void handle_error_source(struct pcie_device * aerdev,
+	struct pci_dev *dev,
+	struct aer_err_info info)
+{
+	pci_ers_result_t status = 0;
+	int pos;
+
+	if (info.severity == AER_CORRECTABLE) {
+		/*
+		 * Correctable error does not need software intevention.
+		 * No need to go through error recovery process.
+		 */
+		pos = pci_find_aer_capability(dev);
+		if (pos)
+			pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS,
+					info.status);
+	} else {
+		status = do_recovery(aerdev, dev, info.severity);
+		if (status == PCI_ERS_RESULT_RECOVERED) {
+			printk(KERN_DEBUG "AER driver successfully recovered\n");
+		} else {
+			/* TODO: Should kernel panic here? */
+			printk(KERN_DEBUG "AER driver didn't recover\n");
+		}
+	}
+}
+
+/**
+ * aer_enable_rootport - enable Root Port's interrupts when receiving messages
+ * @rpc: pointer to a Root Port data structure
+ *
+ * Invoked when PCIE bus loads AER service driver.
+ **/
+void aer_enable_rootport(struct aer_rpc *rpc)
+{
+	struct pci_dev *pdev = rpc->rpd->port;
+	int pos, aer_pos;
+	u16 reg16;
+	u32 reg32;
+
+	pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+	/* Clear PCIE Capability's Device Status */
+	pci_read_config_word(pdev, pos+PCI_EXP_DEVSTA, &reg16);
+	pci_write_config_word(pdev, pos+PCI_EXP_DEVSTA, reg16);
+
+	/* Disable system error generation in response to error messages */
+	pci_read_config_word(pdev, pos + PCI_EXP_RTCTL, &reg16);
+	reg16 &= ~(SYSTEM_ERROR_INTR_ON_MESG_MASK);
+	pci_write_config_word(pdev, pos + PCI_EXP_RTCTL, reg16);
+
+	aer_pos = pci_find_aer_capability(pdev);
+	/* Clear error status */
+	pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, &reg32);
+	pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32);
+	pci_read_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, &reg32);
+	pci_write_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, reg32);
+	pci_read_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, &reg32);
+	pci_write_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, reg32);
+
+	/* Enable Root Port device reporting error itself */
+	pci_read_config_word(pdev, pos+PCI_EXP_DEVCTL, &reg16);
+	reg16 = reg16 |
+		PCI_EXP_DEVCTL_CERE |
+		PCI_EXP_DEVCTL_NFERE |
+		PCI_EXP_DEVCTL_FERE |
+		PCI_EXP_DEVCTL_URRE;
+	pci_write_config_word(pdev, pos+PCI_EXP_DEVCTL,
+		reg16);
+
+	/* Enable Root Port's interrupt in response to error messages */
+	pci_write_config_dword(pdev,
+		aer_pos + PCI_ERR_ROOT_COMMAND,
+		ROOT_PORT_INTR_ON_MESG_MASK);
+}
+
+/**
+ * disable_root_aer - disable Root Port's interrupts when receiving messages
+ * @rpc: pointer to a Root Port data structure
+ *
+ * Invoked when PCIE bus unloads AER service driver.
+ **/
+static void disable_root_aer(struct aer_rpc *rpc)
+{
+	struct pci_dev *pdev = rpc->rpd->port;
+	u32 reg32;
+	int pos;
+
+	pos = pci_find_aer_capability(pdev);
+	/* Disable Root's interrupt in response to error messages */
+	pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, 0);
+
+	/* Clear Root's error status reg */
+	pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, &reg32);
+	pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, reg32);
+}
+
+/**
+ * get_e_source - retrieve an error source
+ * @rpc: pointer to the root port which holds an error
+ *
+ * Invoked by DPC handler to consume an error.
+ **/
+static struct aer_err_source* get_e_source(struct aer_rpc *rpc)
+{
+	struct aer_err_source *e_source;
+	unsigned long flags;
+
+	/* Lock access to Root error producer/consumer index */
+	spin_lock_irqsave(&rpc->e_lock, flags);
+	if (rpc->prod_idx == rpc->cons_idx) {
+		spin_unlock_irqrestore(&rpc->e_lock, flags);
+		return NULL;
+	}
+	e_source = &rpc->e_sources[rpc->cons_idx];
+	rpc->cons_idx++;
+	if (rpc->cons_idx == AER_ERROR_SOURCES_MAX)
+		rpc->cons_idx = 0;
+	spin_unlock_irqrestore(&rpc->e_lock, flags);
+
+	return e_source;
+}
+
+static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
+{
+	int pos;
+
+	pos = pci_find_aer_capability(dev);
+
+	/* The device might not support AER */
+	if (!pos)
+		return AER_SUCCESS;
+
+	if (info->severity == AER_CORRECTABLE) {
+		pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS,
+			&info->status);
+		if (!(info->status & ERR_CORRECTABLE_ERROR_MASK))
+			return AER_UNSUCCESS;
+	} else if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE ||
+		info->severity == AER_NONFATAL) {
+
+		/* Link is still healthy for IO reads */
+		pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS,
+			&info->status);
+		if (!(info->status & ERR_UNCORRECTABLE_ERROR_MASK))
+			return AER_UNSUCCESS;
+
+		if (info->status & AER_LOG_TLP_MASKS) {
+			info->flags |= AER_TLP_HEADER_VALID_FLAG;
+			pci_read_config_dword(dev,
+				pos + PCI_ERR_HEADER_LOG, &info->tlp.dw0);
+			pci_read_config_dword(dev,
+				pos + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1);
+			pci_read_config_dword(dev,
+				pos + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2);
+			pci_read_config_dword(dev,
+				pos + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3);
+		}
+	}
+
+	return AER_SUCCESS;
+}
+
+/**
+ * aer_isr_one_error - consume an error detected by root port
+ * @p_device: pointer to error root port service device
+ * @e_src: pointer to an error source
+ **/
+static void aer_isr_one_error(struct pcie_device *p_device,
+		struct aer_err_source *e_src)
+{
+	struct device *s_device;
+	struct aer_err_info e_info = {0, 0, 0,};
+	int i;
+	u16 id;
+
+	/*
+	 * There is a possibility that both correctable error and
+	 * uncorrectable error being logged. Report correctable error first.
+	 */
+	for (i = 1; i & ROOT_ERR_STATUS_MASKS ; i <<= 2) {
+		if (i > 4)
+			break;
+		if (!(e_src->status & i))
+			continue;
+
+		/* Init comprehensive error information */
+		if (i & PCI_ERR_ROOT_COR_RCV) {
+			id = ERR_COR_ID(e_src->id);
+			e_info.severity = AER_CORRECTABLE;
+		} else {
+			id = ERR_UNCOR_ID(e_src->id);
+			e_info.severity = ((e_src->status >> 6) & 1);
+		}
+		if (e_src->status &
+			(PCI_ERR_ROOT_MULTI_COR_RCV |
+			 PCI_ERR_ROOT_MULTI_UNCOR_RCV))
+			e_info.flags |= AER_MULTI_ERROR_VALID_FLAG;
+		if (!(s_device = find_source_device(p_device->port, id))) {
+			printk(KERN_DEBUG "%s->can't find device of ID%04x\n",
+				__FUNCTION__, id);
+			continue;
+		}
+		if (get_device_error_info(to_pci_dev(s_device), &e_info) ==
+				AER_SUCCESS) {
+			aer_print_error(to_pci_dev(s_device), &e_info);
+			handle_error_source(p_device,
+				to_pci_dev(s_device),
+				e_info);
+		}
+	}
+}
+
+/**
+ * aer_isr - consume errors detected by root port
+ * @context: pointer to a private data of pcie device
+ *
+ * Invoked, as DPC, when root port records new detected error
+ **/
+void aer_isr(void *context)
+{
+	struct pcie_device *p_device = (struct pcie_device *) context;
+	struct aer_rpc *rpc = get_service_data(p_device);
+	struct aer_err_source *e_src;
+
+	mutex_lock(&rpc->rpc_mutex);
+	e_src = get_e_source(rpc);
+	while (e_src) {
+		aer_isr_one_error(p_device, e_src);
+		e_src = get_e_source(rpc);
+	}
+	mutex_unlock(&rpc->rpc_mutex);
+
+	wake_up(&rpc->wait_release);
+}
+
+/**
+ * aer_delete_rootport - disable root port aer and delete service data
+ * @rpc: pointer to a root port device being deleted
+ *
+ * Invoked when AER service unloaded on a specific Root Port
+ **/
+void aer_delete_rootport(struct aer_rpc *rpc)
+{
+	/* Disable root port AER itself */
+	disable_root_aer(rpc);
+
+	kfree(rpc);
+}
+
+/**
+ * aer_init - provide AER initialization
+ * @dev: pointer to AER pcie device
+ *
+ * Invoked when AER service driver is loaded.
+ **/
+int aer_init(struct pcie_device *dev)
+{
+	int status;
+
+	/* Run _OSC Method */
+	status = aer_osc_setup(dev->port);
+
+	if(status != OSC_METHOD_RUN_SUCCESS) {
+		printk(KERN_DEBUG "%s: AER service init fails - %s\n",
+		__FUNCTION__,
+		(status == OSC_METHOD_NOT_SUPPORTED) ?
+			"No ACPI _OSC support" : "Run ACPI _OSC fails");
+
+		if (!forceload)
+			return status;
+	}
+
+	return AER_SUCCESS;
+}
+
+EXPORT_SYMBOL_GPL(pci_find_aer_capability);
+EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
+EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting);
+EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status);
+
diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c
new file mode 100644
index 000000000000..3933d4f30e8c
--- /dev/null
+++ b/drivers/pci/pcie/aer/aerdrv_errprint.c
@@ -0,0 +1,248 @@
+/*
+ * drivers/pci/pcie/aer/aerdrv_errprint.c
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Format error messages and print them to console.
+ *
+ * Copyright (C) 2006 Intel Corp.
+ *	Tom Long Nguyen (tom.l.nguyen@intel.com)
+ *	Zhang Yanmin (yanmin.zhang@intel.com)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/pm.h>
+#include <linux/suspend.h>
+
+#include "aerdrv.h"
+
+#define AER_AGENT_RECEIVER		0
+#define AER_AGENT_REQUESTER		1
+#define AER_AGENT_COMPLETER		2
+#define AER_AGENT_TRANSMITTER		3
+
+#define AER_AGENT_REQUESTER_MASK	(PCI_ERR_UNC_COMP_TIME|	\
+					PCI_ERR_UNC_UNSUP)
+
+#define AER_AGENT_COMPLETER_MASK	PCI_ERR_UNC_COMP_ABORT
+
+#define AER_AGENT_TRANSMITTER_MASK(t, e) (e & (PCI_ERR_COR_REP_ROLL| \
+	((t == AER_CORRECTABLE) ? PCI_ERR_COR_REP_TIMER: 0)))
+
+#define AER_GET_AGENT(t, e)						\
+	((e & AER_AGENT_COMPLETER_MASK) ? AER_AGENT_COMPLETER :		\
+	(e & AER_AGENT_REQUESTER_MASK) ? AER_AGENT_REQUESTER :		\
+	(AER_AGENT_TRANSMITTER_MASK(t, e)) ? AER_AGENT_TRANSMITTER :	\
+	AER_AGENT_RECEIVER)
+
+#define AER_PHYSICAL_LAYER_ERROR_MASK	PCI_ERR_COR_RCVR
+#define AER_DATA_LINK_LAYER_ERROR_MASK(t, e)	\
+		(PCI_ERR_UNC_DLP|		\
+		PCI_ERR_COR_BAD_TLP| 		\
+		PCI_ERR_COR_BAD_DLLP|		\
+		PCI_ERR_COR_REP_ROLL| 		\
+		((t == AER_CORRECTABLE) ?	\
+		PCI_ERR_COR_REP_TIMER: 0))
+
+#define AER_PHYSICAL_LAYER_ERROR	0
+#define AER_DATA_LINK_LAYER_ERROR	1
+#define AER_TRANSACTION_LAYER_ERROR	2
+
+#define AER_GET_LAYER_ERROR(t, e)				\
+	((e & AER_PHYSICAL_LAYER_ERROR_MASK) ?			\
+	AER_PHYSICAL_LAYER_ERROR :				\
+	(e & AER_DATA_LINK_LAYER_ERROR_MASK(t, e)) ?		\
+		AER_DATA_LINK_LAYER_ERROR : 			\
+		AER_TRANSACTION_LAYER_ERROR)
+
+/*
+ * AER error strings
+ */
+static char* aer_error_severity_string[] = {
+	"Uncorrected (Non-Fatal)",
+	"Uncorrected (Fatal)",
+	"Corrected"
+};
+
+static char* aer_error_layer[] = {
+	"Physical Layer",
+	"Data Link Layer",
+	"Transaction Layer"
+};
+static char* aer_correctable_error_string[] = {
+	"Receiver Error        ",	/* Bit Position 0 	*/
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	"Bad TLP               ",	/* Bit Position 6 	*/
+	"Bad DLLP              ",	/* Bit Position 7 	*/
+	"RELAY_NUM Rollover    ",	/* Bit Position 8 	*/
+	NULL,
+	NULL,
+	NULL,
+	"Replay Timer Timeout  ",	/* Bit Position 12 	*/
+	"Advisory Non-Fatal    ", 	/* Bit Position 13	*/
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+};
+
+static char* aer_uncorrectable_error_string[] = {
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	"Data Link Protocol    ",	/* Bit Position 4	*/
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	"Poisoned TLP          ",	/* Bit Position 12 	*/
+	"Flow Control Protocol ",	/* Bit Position 13	*/
+	"Completion Timeout    ",	/* Bit Position 14 	*/
+	"Completer Abort       ",	/* Bit Position 15 	*/
+	"Unexpected Completion ",	/* Bit Position 16	*/
+	"Receiver Overflow     ",	/* Bit Position 17	*/
+	"Malformed TLP         ",	/* Bit Position 18	*/
+	"ECRC                  ",	/* Bit Position 19	*/
+	"Unsupported Request   ",	/* Bit Position 20	*/
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+};
+
+static char* aer_agent_string[] = {
+	"Receiver ID",
+	"Requester ID",
+	"Completer ID",
+	"Transmitter ID"
+};
+
+static char * aer_get_error_source_name(int severity,
+			unsigned int status,
+			char errmsg_buff[])
+{
+	int i;
+	char * errmsg = NULL;
+
+	for (i = 0; i < 32; i++) {
+		if (!(status & (1 << i)))
+			continue;
+
+		if (severity == AER_CORRECTABLE)
+			errmsg = aer_correctable_error_string[i];
+		else
+			errmsg = aer_uncorrectable_error_string[i];
+
+		if (!errmsg) {
+			sprintf(errmsg_buff, "Unknown Error Bit %2d  ", i);
+			errmsg = errmsg_buff;
+		}
+
+		break;
+	}
+
+	return errmsg;
+}
+
+static DEFINE_SPINLOCK(logbuf_lock);
+static char errmsg_buff[100];
+void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
+{
+	char * errmsg;
+	int err_layer, agent;
+	char * loglevel;
+
+	if (info->severity == AER_CORRECTABLE)
+		loglevel = KERN_WARNING;
+	else
+		loglevel = KERN_ERR;
+
+	printk("%s+------ PCI-Express Device Error ------+\n", loglevel);
+	printk("%sError Severity\t\t: %s\n", loglevel,
+		aer_error_severity_string[info->severity]);
+
+	if ( info->status == 0) {
+		printk("%sPCIE Bus Error type\t: (Unaccessible)\n", loglevel);
+		printk("%sUnaccessible Received\t: %s\n", loglevel,
+			info->flags & AER_MULTI_ERROR_VALID_FLAG ?
+				"Multiple" : "First");
+		printk("%sUnregistered Agent ID\t: %04x\n", loglevel,
+			(dev->bus->number << 8) | dev->devfn);
+	} else {
+		err_layer = AER_GET_LAYER_ERROR(info->severity, info->status);
+		printk("%sPCIE Bus Error type\t: %s\n", loglevel,
+			aer_error_layer[err_layer]);
+
+		spin_lock(&logbuf_lock);
+		errmsg = aer_get_error_source_name(info->severity,
+				info->status,
+				errmsg_buff);
+		printk("%s%s\t: %s\n", loglevel, errmsg,
+			info->flags & AER_MULTI_ERROR_VALID_FLAG ?
+				"Multiple" : "First");
+		spin_unlock(&logbuf_lock);
+
+		agent = AER_GET_AGENT(info->severity, info->status);
+		printk("%s%s\t\t: %04x\n", loglevel,
+			aer_agent_string[agent],
+			(dev->bus->number << 8) | dev->devfn);
+
+		printk("%sVendorID=%04xh, DeviceID=%04xh,"
+			" Bus=%02xh, Device=%02xh, Function=%02xh\n",
+			loglevel,
+			dev->vendor,
+			dev->device,
+			dev->bus->number,
+			PCI_SLOT(dev->devfn),
+			PCI_FUNC(dev->devfn));
+
+		if (info->flags & AER_TLP_HEADER_VALID_FLAG) {
+			unsigned char *tlp = (unsigned char *) &info->tlp;
+			printk("%sTLB Header:\n", loglevel);
+			printk("%s%02x%02x%02x%02x %02x%02x%02x%02x"
+				" %02x%02x%02x%02x %02x%02x%02x%02x\n",
+				loglevel,
+				*(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
+				*(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
+				*(tlp + 11), *(tlp + 10), *(tlp + 9),
+				*(tlp + 8), *(tlp + 15), *(tlp + 14),
+				*(tlp + 13), *(tlp + 12));
+		}
+	}
+}
+
diff --git a/include/linux/aer.h b/include/linux/aer.h
new file mode 100644
index 000000000000..402e178b38eb
--- /dev/null
+++ b/include/linux/aer.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2006 Intel Corp.
+ *     Tom Long Nguyen (tom.l.nguyen@intel.com)
+ *     Zhang Yanmin (yanmin.zhang@intel.com)
+ */
+
+#ifndef _AER_H_
+#define _AER_H_
+
+#if defined(CONFIG_PCIEAER)
+/* pci-e port driver needs this function to enable aer */
+extern int pci_enable_pcie_error_reporting(struct pci_dev *dev);
+extern int pci_find_aer_capability(struct pci_dev *dev);
+extern int pci_disable_pcie_error_reporting(struct pci_dev *dev);
+extern int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
+#else
+#define pci_enable_pcie_error_reporting(dev)		do { } while (0)
+#define pci_find_aer_capability(dev)			do { } while (0)
+#define pci_disable_pcie_error_reporting(dev)		do { } while (0)
+#define pci_cleanup_aer_uncorrect_error_status(dev)	do { } while (0)
+#endif
+
+#endif //_AER_H_
+
diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h
index b44e01a70914..6cd91e3f9820 100644
--- a/include/linux/pcieport_if.h
+++ b/include/linux/pcieport_if.h
@@ -62,6 +62,12 @@ struct pcie_port_service_driver {
 	int (*suspend) (struct pcie_device *dev, pm_message_t state);
 	int (*resume) (struct pcie_device *dev);
 
+	/* Service Error Recovery Handler */
+	struct pci_error_handlers *err_handler;
+
+	/* Link Reset Capability - AER service driver specific */
+	pci_ers_result_t (*reset_link) (struct pci_dev *dev);
+
 	const struct pcie_port_service_id *id_table;
 	struct device_driver driver;
 };
-- 
cgit v1.2.3


From b19441af185559118e8247382ea4f2f76ebffc6d Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 28 Aug 2006 11:43:25 -0700
Subject: PCI: fix __must_check warnings

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/pci/bus.c                  |  22 ++++++--
 drivers/pci/hotplug/fakephp.c      |  18 ++++--
 drivers/pci/pci-driver.c           |   5 +-
 drivers/pci/pci-sysfs.c            | 112 ++++++++++++++++++++++++-------------
 drivers/pci/pcie/aer/aerdrv_core.c |   3 +-
 drivers/pci/pcie/portdrv_core.c    |   6 +-
 drivers/pci/pcie/portdrv_pci.c     |  16 ++++--
 drivers/pci/probe.c                |  16 +++++-
 include/linux/pci.h                |   2 +-
 9 files changed, 138 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 5f7db9d2436e..aadaa3c8096b 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -77,9 +77,12 @@ pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res,
  * This adds a single pci device to the global
  * device list and adds sysfs and procfs entries
  */
-void __devinit pci_bus_add_device(struct pci_dev *dev)
+int __devinit pci_bus_add_device(struct pci_dev *dev)
 {
-	device_add(&dev->dev);
+	int retval;
+	retval = device_add(&dev->dev);
+	if (retval)
+		return retval;
 
 	down_write(&pci_bus_sem);
 	list_add_tail(&dev->global_list, &pci_devices);
@@ -87,6 +90,7 @@ void __devinit pci_bus_add_device(struct pci_dev *dev)
 
 	pci_proc_attach_device(dev);
 	pci_create_sysfs_dev_files(dev);
+	return 0;
 }
 
 /**
@@ -104,6 +108,7 @@ void __devinit pci_bus_add_device(struct pci_dev *dev)
 void __devinit pci_bus_add_devices(struct pci_bus *bus)
 {
 	struct pci_dev *dev;
+	int retval;
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		/*
@@ -112,7 +117,9 @@ void __devinit pci_bus_add_devices(struct pci_bus *bus)
 		 */
 		if (!list_empty(&dev->global_list))
 			continue;
-		pci_bus_add_device(dev);
+		retval = pci_bus_add_device(dev);
+		if (retval)
+			dev_err(&dev->dev, "Error adding device, continuing\n");
 	}
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
@@ -129,10 +136,13 @@ void __devinit pci_bus_add_devices(struct pci_bus *bus)
 			       list_add_tail(&dev->subordinate->node,
 					       &dev->bus->children);
 			       up_write(&pci_bus_sem);
-		       }
+			}
 			pci_bus_add_devices(dev->subordinate);
-
-			sysfs_create_link(&dev->subordinate->class_dev.kobj, &dev->dev.kobj, "bridge");
+			retval = sysfs_create_link(&dev->subordinate->class_dev.kobj,
+						   &dev->dev.kobj, "bridge");
+			if (retval)
+				dev_err(&dev->dev, "Error creating sysfs "
+					"bridge symlink, continuing...\n");
 		}
 	}
 }
diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c
index dd2b762777c4..05a4f0f90186 100644
--- a/drivers/pci/hotplug/fakephp.c
+++ b/drivers/pci/hotplug/fakephp.c
@@ -176,7 +176,9 @@ static void pci_rescan_slot(struct pci_dev *temp)
 	struct pci_bus *bus = temp->bus;
 	struct pci_dev *dev;
 	int func;
+	int retval;
 	u8 hdr_type;
+
 	if (!pci_read_config_byte(temp, PCI_HEADER_TYPE, &hdr_type)) {
 		temp->hdr_type = hdr_type & 0x7f;
 		if (!pci_find_slot(bus->number, temp->devfn)) {
@@ -185,8 +187,12 @@ static void pci_rescan_slot(struct pci_dev *temp)
 				dbg("New device on %s function %x:%x\n",
 					bus->name, temp->devfn >> 3,
 					temp->devfn & 7);
-				pci_bus_add_device(dev);
-				add_slot(dev);
+				retval = pci_bus_add_device(dev);
+				if (retval)
+					dev_err(&dev->dev, "error adding "
+						"device, continuing.\n");
+				else
+					add_slot(dev);
 			}
 		}
 		/* multifunction device? */
@@ -205,8 +211,12 @@ static void pci_rescan_slot(struct pci_dev *temp)
 					dbg("New device on %s function %x:%x\n",
 						bus->name, temp->devfn >> 3,
 						temp->devfn & 7);
-					pci_bus_add_device(dev);
-					add_slot(dev);
+					retval = pci_bus_add_device(dev);
+					if (retval)
+						dev_err(&dev->dev, "error adding "
+							"device, continuing.\n");
+					else
+						add_slot(dev);
 				}
 			}
 		}
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index d8ace1f90dd2..309629e03bae 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -56,6 +56,7 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count)
 		subdevice=PCI_ANY_ID, class=0, class_mask=0;
 	unsigned long driver_data=0;
 	int fields=0;
+	int retval = 0;
 
 	fields = sscanf(buf, "%x %x %x %x %x %x %lux",
 			&vendor, &device, &subvendor, &subdevice,
@@ -82,10 +83,12 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count)
 	spin_unlock(&pdrv->dynids.lock);
 
 	if (get_driver(&pdrv->driver)) {
-		driver_attach(&pdrv->driver);
+		retval = driver_attach(&pdrv->driver);
 		put_driver(&pdrv->driver);
 	}
 
+	if (retval)
+		return retval;
 	return count;
 }
 static DRIVER_ATTR(new_id, S_IWUSR, NULL, store_new_id);
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 010e01c4bd43..a1d2e979b17f 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -117,6 +117,7 @@ is_enabled_store(struct device *dev, struct device_attribute *attr,
 		const char *buf, size_t count)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
+	int retval = 0;
 
 	/* this can crash the machine when done on the "wrong" device */
 	if (!capable(CAP_SYS_ADMIN))
@@ -126,8 +127,10 @@ is_enabled_store(struct device *dev, struct device_attribute *attr,
 		pci_disable_device(pdev);
 
 	if (*buf == '1')
-		pci_enable_device(pdev);
+		retval = pci_enable_device(pdev);
 
+	if (retval)
+		return retval;
 	return count;
 }
 
@@ -425,16 +428,39 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
 	return pci_mmap_page_range(pdev, vma, mmap_type, 0);
 }
 
+/**
+ * pci_remove_resource_files - cleanup resource files
+ * @dev: dev to cleanup
+ *
+ * If we created resource files for @dev, remove them from sysfs and
+ * free their resources.
+ */
+static void
+pci_remove_resource_files(struct pci_dev *pdev)
+{
+	int i;
+
+	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+		struct bin_attribute *res_attr;
+
+		res_attr = pdev->res_attr[i];
+		if (res_attr) {
+			sysfs_remove_bin_file(&pdev->dev.kobj, res_attr);
+			kfree(res_attr);
+		}
+	}
+}
+
 /**
  * pci_create_resource_files - create resource files in sysfs for @dev
  * @dev: dev in question
  *
  * Walk the resources in @dev creating files for each resource available.
  */
-static void
-pci_create_resource_files(struct pci_dev *pdev)
+static int pci_create_resource_files(struct pci_dev *pdev)
 {
 	int i;
+	int retval;
 
 	/* Expose the PCI resources from this device as files */
 	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
@@ -457,35 +483,19 @@ pci_create_resource_files(struct pci_dev *pdev)
 			res_attr->size = pci_resource_len(pdev, i);
 			res_attr->mmap = pci_mmap_resource;
 			res_attr->private = &pdev->resource[i];
-			sysfs_create_bin_file(&pdev->dev.kobj, res_attr);
-		}
-	}
-}
-
-/**
- * pci_remove_resource_files - cleanup resource files
- * @dev: dev to cleanup
- *
- * If we created resource files for @dev, remove them from sysfs and
- * free their resources.
- */
-static void
-pci_remove_resource_files(struct pci_dev *pdev)
-{
-	int i;
-
-	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
-		struct bin_attribute *res_attr;
-
-		res_attr = pdev->res_attr[i];
-		if (res_attr) {
-			sysfs_remove_bin_file(&pdev->dev.kobj, res_attr);
-			kfree(res_attr);
+			retval = sysfs_create_bin_file(&pdev->dev.kobj, res_attr);
+			if (retval) {
+				pci_remove_resource_files(pdev);
+				return retval;
+			}
+		} else {
+			return -ENOMEM;
 		}
 	}
+	return 0;
 }
 #else /* !HAVE_PCI_MMAP */
-static inline void pci_create_resource_files(struct pci_dev *dev) { return; }
+static inline int pci_create_resource_files(struct pci_dev *dev) { return 0; }
 static inline void pci_remove_resource_files(struct pci_dev *dev) { return; }
 #endif /* HAVE_PCI_MMAP */
 
@@ -570,22 +580,27 @@ static struct bin_attribute pcie_config_attr = {
 	.write = pci_write_config,
 };
 
-int pci_create_sysfs_dev_files (struct pci_dev *pdev)
+int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
 {
+	struct bin_attribute *rom_attr = NULL;
+	int retval;
+
 	if (!sysfs_initialized)
 		return -EACCES;
 
 	if (pdev->cfg_size < 4096)
-		sysfs_create_bin_file(&pdev->dev.kobj, &pci_config_attr);
+		retval = sysfs_create_bin_file(&pdev->dev.kobj, &pci_config_attr);
 	else
-		sysfs_create_bin_file(&pdev->dev.kobj, &pcie_config_attr);
+		retval = sysfs_create_bin_file(&pdev->dev.kobj, &pcie_config_attr);
+	if (retval)
+		goto err;
 
-	pci_create_resource_files(pdev);
+	retval = pci_create_resource_files(pdev);
+	if (retval)
+		goto err_bin_file;
 
 	/* If the device has a ROM, try to expose it in sysfs. */
 	if (pci_resource_len(pdev, PCI_ROM_RESOURCE)) {
-		struct bin_attribute *rom_attr;
-		
 		rom_attr = kzalloc(sizeof(*rom_attr), GFP_ATOMIC);
 		if (rom_attr) {
 			pdev->rom_attr = rom_attr;
@@ -595,13 +610,28 @@ int pci_create_sysfs_dev_files (struct pci_dev *pdev)
 			rom_attr->attr.owner = THIS_MODULE;
 			rom_attr->read = pci_read_rom;
 			rom_attr->write = pci_write_rom;
-			sysfs_create_bin_file(&pdev->dev.kobj, rom_attr);
+			retval = sysfs_create_bin_file(&pdev->dev.kobj, rom_attr);
+			if (retval)
+				goto err_rom;
+		} else {
+			retval = -ENOMEM;
+			goto err_bin_file;
 		}
 	}
 	/* add platform-specific attributes */
 	pcibios_add_platform_entries(pdev);
-	
+
 	return 0;
+
+err_rom:
+	kfree(rom_attr);
+err_bin_file:
+	if (pdev->cfg_size < 4096)
+		sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr);
+	else
+		sysfs_remove_bin_file(&pdev->dev.kobj, &pcie_config_attr);
+err:
+	return retval;
 }
 
 /**
@@ -630,10 +660,14 @@ void pci_remove_sysfs_dev_files(struct pci_dev *pdev)
 static int __init pci_sysfs_init(void)
 {
 	struct pci_dev *pdev = NULL;
-	
+	int retval;
+
 	sysfs_initialized = 1;
-	for_each_pci_dev(pdev)
-		pci_create_sysfs_dev_files(pdev);
+	for_each_pci_dev(pdev) {
+		retval = pci_create_sysfs_dev_files(pdev);
+		if (retval)
+			return retval;
+	}
 
 	return 0;
 }
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 5591043acea7..1c7e660d6535 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -357,7 +357,8 @@ static int find_aer_service_iter(struct device *device, void *data)
 static void find_aer_service(struct pci_dev *dev,
 		struct find_aer_service_data *data)
 {
-	device_for_each_child(&dev->dev, data, find_aer_service_iter);
+	int retval;
+	retval = device_for_each_child(&dev->dev, data, find_aer_service_iter);
 }
 
 static pci_ers_result_t reset_link(struct pcie_device *aerdev,
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index cf9e810b4bf8..bd6615b4d40e 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -340,8 +340,7 @@ static int suspend_iter(struct device *dev, void *data)
 
 int pcie_port_device_suspend(struct pci_dev *dev, pm_message_t state)
 {
-	device_for_each_child(&dev->dev, &state, suspend_iter);
-	return 0;
+	return device_for_each_child(&dev->dev, &state, suspend_iter);
 }
 
 static int resume_iter(struct device *dev, void *data)
@@ -359,8 +358,7 @@ static int resume_iter(struct device *dev, void *data)
 
 int pcie_port_device_resume(struct pci_dev *dev)
 {
-	device_for_each_child(&dev->dev, NULL, resume_iter);
-	return 0;
+	return device_for_each_child(&dev->dev, NULL, resume_iter);
 }
 #endif
 
diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index e4a2429986f0..037690e08f5f 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -147,8 +147,10 @@ static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev,
 {
 	struct aer_broadcast_data result_data =
 			{error, PCI_ERS_RESULT_CAN_RECOVER};
+	int retval;
 
-	device_for_each_child(&dev->dev, &result_data, error_detected_iter);
+	/* can not fail */
+	retval = device_for_each_child(&dev->dev, &result_data, error_detected_iter);
 
 	return result_data.result;
 }
@@ -181,8 +183,10 @@ static int mmio_enabled_iter(struct device *device, void *data)
 static pci_ers_result_t pcie_portdrv_mmio_enabled(struct pci_dev *dev)
 {
 	pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
+	int retval;
 
-	device_for_each_child(&dev->dev, &status, mmio_enabled_iter);
+	/* get true return value from &status */
+	retval = device_for_each_child(&dev->dev, &status, mmio_enabled_iter);
 	return status;
 }
 
@@ -214,6 +218,7 @@ static int slot_reset_iter(struct device *device, void *data)
 static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev)
 {
 	pci_ers_result_t status;
+	int retval;
 
 	/* If fatal, restore cfg space for possible link reset at upstream */
 	if (dev->error_state == pci_channel_io_frozen) {
@@ -221,7 +226,8 @@ static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev)
 		pci_enable_pcie_error_reporting(dev);
 	}
 
-	device_for_each_child(&dev->dev, &status, slot_reset_iter);
+	/* get true return value from &status */
+	retval = device_for_each_child(&dev->dev, &status, slot_reset_iter);
 
 	return status;
 }
@@ -248,7 +254,9 @@ static int resume_iter(struct device *device, void *data)
 
 static void pcie_portdrv_err_resume(struct pci_dev *dev)
 {
-	device_for_each_child(&dev->dev, NULL, resume_iter);
+	int retval;
+	/* nothing to do with error value, if it ever happens */
+	retval = device_for_each_child(&dev->dev, NULL, resume_iter);
 }
 
 /*
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index c5a58d1c6c1c..a3b0a5eb5054 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -339,6 +339,7 @@ pci_alloc_child_bus(struct pci_bus *parent, struct pci_dev *bridge, int busnr)
 {
 	struct pci_bus *child;
 	int i;
+	int retval;
 
 	/*
 	 * Allocate a new bus, and inherit stuff from the parent..
@@ -356,8 +357,13 @@ pci_alloc_child_bus(struct pci_bus *parent, struct pci_dev *bridge, int busnr)
 
 	child->class_dev.class = &pcibus_class;
 	sprintf(child->class_dev.class_id, "%04x:%02x", pci_domain_nr(child), busnr);
-	class_device_register(&child->class_dev);
-	class_device_create_file(&child->class_dev, &class_device_attr_cpuaffinity);
+	retval = class_device_register(&child->class_dev);
+	if (retval)
+		goto error_register;
+	retval = class_device_create_file(&child->class_dev,
+					  &class_device_attr_cpuaffinity);
+	if (retval)
+		goto error_file_create;
 
 	/*
 	 * Set up the primary, secondary and subordinate
@@ -375,6 +381,12 @@ pci_alloc_child_bus(struct pci_bus *parent, struct pci_dev *bridge, int busnr)
 	bridge->subordinate = child;
 
 	return child;
+
+error_file_create:
+	class_device_unregister(&child->class_dev);
+error_register:
+	kfree(child);
+	return NULL;
 }
 
 struct pci_bus * __devinit pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 3ec72551ac31..c9bb7bee52c7 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -431,7 +431,7 @@ int pci_scan_slot(struct pci_bus *bus, int devfn);
 struct pci_dev * pci_scan_single_device(struct pci_bus *bus, int devfn);
 void pci_device_add(struct pci_dev *dev, struct pci_bus *bus);
 unsigned int pci_scan_child_bus(struct pci_bus *bus);
-void pci_bus_add_device(struct pci_dev *dev);
+int __must_check pci_bus_add_device(struct pci_dev *dev);
 void pci_read_bridge_bases(struct pci_bus *child);
 struct resource *pci_find_parent_resource(const struct pci_dev *dev, struct resource *res);
 int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge);
-- 
cgit v1.2.3


From 50b0075520a0acba9cabab5203bbce918b966d9a Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Wed, 16 Aug 2006 17:42:18 +0100
Subject: PCI: Multiprobe sanitizer

There are numerous drivers that can use multithreaded probing but having
some kind of global flag as the way to control this makes migration to
threaded probing hard and since it enables it everywhere and is almost
as likely to cause serious pain as holding a clog dance in a minefield.

If we have a pci_driver multithread_probe flag to inherit you can turn
it on for one driver at a time.

From playing so far however I think we need a different model at the
device layer which serializes until the called probe function says "ok
you can start another one now". That would need some kind of flag and
semaphore plus a helper function.

Anyway in the absence of that this is a starting point to usefully play
with this stuff

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/pci/pci-driver.c | 6 +++++-
 include/linux/pci.h      | 2 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 309629e03bae..b1c0c707d96c 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -421,7 +421,11 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner)
 	drv->driver.bus = &pci_bus_type;
 	drv->driver.owner = owner;
 	drv->driver.kobj.ktype = &pci_driver_kobj_type;
-	drv->driver.multithread_probe = pci_multithread_probe;
+
+	if (pci_multithread_probe)
+		drv->driver.multithread_probe = pci_multithread_probe;
+	else
+		drv->driver.multithread_probe = drv->multithread_probe;
 
 	spin_lock_init(&drv->dynids.lock);
 	INIT_LIST_HEAD(&drv->dynids.list);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c9bb7bee52c7..549d8410974b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -356,6 +356,8 @@ struct pci_driver {
 	struct pci_error_handlers *err_handler;
 	struct device_driver	driver;
 	struct pci_dynids dynids;
+
+	int multithread_probe;
 };
 
 #define	to_pci_driver(drv) container_of(drv,struct pci_driver, driver)
-- 
cgit v1.2.3


From 24f8aa9b464b73e0553f092b747770940ee0ea54 Mon Sep 17 00:00:00 2001
From: Satoru Takeuchi <takeuchi_satoru@jp.fujitsu.com>
Date: Tue, 12 Sep 2006 10:16:36 -0700
Subject: PCI: add pci_stop_bus_device

This patch adds pci_stop_bus_device() which stops a PCI device (detach
the driver, remove from the global list and so on) and any children.
This is needed for ACPI based PCI-to-PCI bridge hot-remove, and it will
be also needed for ACPI based PCI root bridge hot-remove.

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: MUNEDA Takahiro <muneda.takahiro@jp.fujitsu.com>
Signed-off-by: Satoru Takeuchi <takeuchi_satoru@jp.fujitsu.com>
Signed-off-by: Kristen Carlson Accardi <kristen.c.accardi@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/pci/remove.c | 37 ++++++++++++++++++++++++++++++++++++-
 include/linux/pci.h  |  1 +
 2 files changed, 37 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 99ffbd478b29..430281b2e921 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -16,8 +16,11 @@ static void pci_free_resources(struct pci_dev *dev)
 	}
 }
 
-static void pci_destroy_dev(struct pci_dev *dev)
+static void pci_stop_dev(struct pci_dev *dev)
 {
+	if (!dev->global_list.next)
+		return;
+
 	if (!list_empty(&dev->global_list)) {
 		pci_proc_detach_device(dev);
 		pci_remove_sysfs_dev_files(dev);
@@ -27,6 +30,11 @@ static void pci_destroy_dev(struct pci_dev *dev)
 		dev->global_list.next = dev->global_list.prev = NULL;
 		up_write(&pci_bus_sem);
 	}
+}
+
+static void pci_destroy_dev(struct pci_dev *dev)
+{
+	pci_stop_dev(dev);
 
 	/* Remove the device from the device lists, and prevent any further
 	 * list accesses from this device */
@@ -119,5 +127,32 @@ void pci_remove_behind_bridge(struct pci_dev *dev)
 	}
 }
 
+static void pci_stop_bus_devices(struct pci_bus *bus)
+{
+	struct list_head *l, *n;
+
+	list_for_each_safe(l, n, &bus->devices) {
+		struct pci_dev *dev = pci_dev_b(l);
+		pci_stop_bus_device(dev);
+	}
+}
+
+/**
+ * pci_stop_bus_device - stop a PCI device and any children
+ * @dev: the device to stop
+ *
+ * Stop a PCI device (detach the driver, remove from the global list
+ * and so on). This also stop any subordinate buses and children in a
+ * depth-first manner.
+ */
+void pci_stop_bus_device(struct pci_dev *dev)
+{
+	if (dev->subordinate)
+		pci_stop_bus_devices(dev->subordinate);
+
+	pci_stop_dev(dev);
+}
+
 EXPORT_SYMBOL(pci_remove_bus_device);
 EXPORT_SYMBOL(pci_remove_behind_bridge);
+EXPORT_SYMBOL_GPL(pci_stop_bus_device);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 549d8410974b..5c3a4176eb64 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -441,6 +441,7 @@ extern struct pci_dev *pci_dev_get(struct pci_dev *dev);
 extern void pci_dev_put(struct pci_dev *dev);
 extern void pci_remove_bus(struct pci_bus *b);
 extern void pci_remove_bus_device(struct pci_dev *dev);
+extern void pci_stop_bus_device(struct pci_dev *dev);
 void pci_setup_cardbus(struct pci_bus *bus);
 
 /* Generic PCI functions exported to card drivers */
-- 
cgit v1.2.3


From d48f1de2d8170814fb64effa320848410c466f95 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 20 Sep 2006 20:56:02 +0100
Subject: [MIPS] Remove EV96100 as previously announced.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 Documentation/feature-removal-schedule.txt   |   8 -
 Documentation/kernel-parameters.txt          |   2 -
 arch/mips/Kconfig                            |  24 -
 arch/mips/Makefile                           |   7 -
 arch/mips/configs/atlas_defconfig            |   1 -
 arch/mips/configs/bigsur_defconfig           |   1 -
 arch/mips/configs/capcella_defconfig         |   1 -
 arch/mips/configs/cobalt_defconfig           |   1 -
 arch/mips/configs/db1000_defconfig           |   1 -
 arch/mips/configs/db1100_defconfig           |   1 -
 arch/mips/configs/db1200_defconfig           |   1 -
 arch/mips/configs/db1500_defconfig           |   1 -
 arch/mips/configs/db1550_defconfig           |   1 -
 arch/mips/configs/ddb5477_defconfig          |   1 -
 arch/mips/configs/decstation_defconfig       |   1 -
 arch/mips/configs/e55_defconfig              |   1 -
 arch/mips/configs/emma2rh_defconfig          |   1 -
 arch/mips/configs/ev64120_defconfig          |   1 -
 arch/mips/configs/ev96100_defconfig          | 850 ---------------------------
 arch/mips/configs/excite_defconfig           |   1 -
 arch/mips/configs/ip22_defconfig             |   1 -
 arch/mips/configs/ip27_defconfig             |   1 -
 arch/mips/configs/ip32_defconfig             |   1 -
 arch/mips/configs/it8172_defconfig           |   1 -
 arch/mips/configs/ivr_defconfig              |   1 -
 arch/mips/configs/jaguar-atx_defconfig       |   1 -
 arch/mips/configs/jmr3927_defconfig          |   1 -
 arch/mips/configs/lasat200_defconfig         |   1 -
 arch/mips/configs/malta_defconfig            |   1 -
 arch/mips/configs/mipssim_defconfig          |   1 -
 arch/mips/configs/mpc30x_defconfig           |   1 -
 arch/mips/configs/ocelot_3_defconfig         |   1 -
 arch/mips/configs/ocelot_c_defconfig         |   1 -
 arch/mips/configs/ocelot_defconfig           |   1 -
 arch/mips/configs/ocelot_g_defconfig         |   1 -
 arch/mips/configs/pb1100_defconfig           |   1 -
 arch/mips/configs/pb1500_defconfig           |   1 -
 arch/mips/configs/pb1550_defconfig           |   1 -
 arch/mips/configs/pnx8550-jbs_defconfig      |   1 -
 arch/mips/configs/pnx8550-v2pci_defconfig    |   1 -
 arch/mips/configs/qemu_defconfig             |   1 -
 arch/mips/configs/rbhma4500_defconfig        |   1 -
 arch/mips/configs/rm200_defconfig            |   1 -
 arch/mips/configs/sb1250-swarm_defconfig     |   1 -
 arch/mips/configs/sead_defconfig             |   1 -
 arch/mips/configs/tb0226_defconfig           |   1 -
 arch/mips/configs/tb0229_defconfig           |   1 -
 arch/mips/configs/tb0287_defconfig           |   1 -
 arch/mips/configs/workpad_defconfig          |   1 -
 arch/mips/configs/wrppmc_defconfig           |   1 -
 arch/mips/configs/yosemite_defconfig         |   1 -
 arch/mips/defconfig                          |   1 -
 arch/mips/galileo-boards/ev96100/Makefile    |   9 -
 arch/mips/galileo-boards/ev96100/init.c      | 173 ------
 arch/mips/galileo-boards/ev96100/irq.c       |  77 ---
 arch/mips/galileo-boards/ev96100/puts.c      | 138 -----
 arch/mips/galileo-boards/ev96100/reset.c     |  70 ---
 arch/mips/galileo-boards/ev96100/setup.c     | 159 -----
 arch/mips/galileo-boards/ev96100/time.c      |  88 ---
 arch/mips/pci/Makefile                       |   2 -
 arch/mips/pci/fixup-ev96100.c                |  48 --
 arch/mips/pci/ops-gt96100.c                  | 169 ------
 arch/mips/pci/pci-ev96100.c                  |  63 --
 include/asm-mips/bootinfo.h                  |   3 +-
 include/asm-mips/galileo-boards/gt96100.h    | 427 --------------
 include/asm-mips/mach-ev96100/mach-gt64120.h |  46 --
 include/asm-mips/serial.h                    |   4 +-
 include/linux/pci_ids.h                      |   3 -
 68 files changed, 3 insertions(+), 2414 deletions(-)
 delete mode 100644 arch/mips/configs/ev96100_defconfig
 delete mode 100644 arch/mips/galileo-boards/ev96100/Makefile
 delete mode 100644 arch/mips/galileo-boards/ev96100/init.c
 delete mode 100644 arch/mips/galileo-boards/ev96100/irq.c
 delete mode 100644 arch/mips/galileo-boards/ev96100/puts.c
 delete mode 100644 arch/mips/galileo-boards/ev96100/reset.c
 delete mode 100644 arch/mips/galileo-boards/ev96100/setup.c
 delete mode 100644 arch/mips/galileo-boards/ev96100/time.c
 delete mode 100644 arch/mips/pci/fixup-ev96100.c
 delete mode 100644 arch/mips/pci/ops-gt96100.c
 delete mode 100644 arch/mips/pci/pci-ev96100.c
 delete mode 100644 include/asm-mips/galileo-boards/gt96100.h
 delete mode 100644 include/asm-mips/mach-ev96100/mach-gt64120.h

(limited to 'include/linux')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 611acc32fdf5..bf56b20652b0 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -217,14 +217,6 @@ Who:	Nick Piggin <npiggin@suse.de>
 
 ---------------------------
 
-What:	Support for the MIPS EV96100 evaluation board
-When:	September 2006
-Why:	Does no longer build since at least November 15, 2003, apparently
-	no userbase left.
-Who:	Ralf Baechle <ralf@linux-mips.org>
-
----------------------------
-
 What:	Support for the Momentum / PMC-Sierra Jaguar ATX evaluation board
 When:	September 2006
 Why:	Does no longer build since quite some time, and was never popular,
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index c918cc3f65fb..255ec535bba8 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -573,8 +573,6 @@ running once the system is up.
 	gscd=		[HW,CD]
 			Format: <io>
 
-	gt96100eth=	[NET] MIPS GT96100 Advanced Communication Controller
-
 	gus=		[HW,OSS]
 			Format: <io>,<irq>,<dma>,<dma16>
 
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 235208d658ff..30750c54bdf5 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -203,26 +203,6 @@ config MIPS_EV64120
 	  <http://www.marvell.com/>.  Say Y here if you wish to build a
 	  kernel for this platform.
 
-config MIPS_EV96100
-	bool "Galileo EV96100 Evaluation board (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
-	select DMA_NONCOHERENT
-	select HW_HAS_PCI
-	select IRQ_CPU
-	select MIPS_GT96100
-	select RM7000_CPU_SCACHE
-	select SWAP_IO_SPACE
-	select SYS_HAS_CPU_R5000
-	select SYS_HAS_CPU_RM7000
-	select SYS_SUPPORTS_32BIT_KERNEL
-	select SYS_SUPPORTS_64BIT_KERNEL if EXPERIMENTAL
-	select SYS_SUPPORTS_BIG_ENDIAN
-	help
-	  This is an evaluation board based on the Galileo GT-96100 LAN/WAN
-	  communications controllers containing a MIPS R5000 compatible core
-	  running at 83MHz. Their website is <http://www.marvell.com/>. Say Y
-	  here if you wish to build a kernel for this platform.
-
 config MIPS_IVR
 	bool "Globespan IVR board"
 	select DMA_NONCOHERENT
@@ -1069,10 +1049,6 @@ config AU1X00_USB_DEVICE
 	depends on MIPS_PB1500 || MIPS_PB1100 || MIPS_PB1000
 	default n
 
-config MIPS_GT96100
-	bool
-	select MIPS_GT64120
-
 config IT8172_CIR
 	bool
 	depends on MIPS_ITE8172 || MIPS_IVR
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index f4227d24227d..e521826b4234 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -279,13 +279,6 @@ core-$(CONFIG_MIPS_EV64120)	+= arch/mips/gt64120/common/
 cflags-$(CONFIG_MIPS_EV64120)	+= -Iinclude/asm-mips/mach-ev64120
 load-$(CONFIG_MIPS_EV64120)	+= 0xffffffff80100000
 
-#
-# Galileo EV96100 Board
-#
-core-$(CONFIG_MIPS_EV96100)	+= arch/mips/galileo-boards/ev96100/
-cflags-$(CONFIG_MIPS_EV96100)	+= -Iinclude/asm-mips/mach-ev96100
-load-$(CONFIG_MIPS_EV96100)	+= 0xffffffff80100000
-
 #
 # Wind River PPMC Board (4KC + GT64120)
 #
diff --git a/arch/mips/configs/atlas_defconfig b/arch/mips/configs/atlas_defconfig
index 54274065e9a5..2774ba9606b7 100644
--- a/arch/mips/configs/atlas_defconfig
+++ b/arch/mips/configs/atlas_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig
index 887fd959482a..e12a475dcbf4 100644
--- a/arch/mips/configs/bigsur_defconfig
+++ b/arch/mips/configs/bigsur_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/capcella_defconfig b/arch/mips/configs/capcella_defconfig
index a01344f3a4c2..bfade9abb767 100644
--- a/arch/mips/configs/capcella_defconfig
+++ b/arch/mips/configs/capcella_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/cobalt_defconfig b/arch/mips/configs/cobalt_defconfig
index c95682445a28..78db2a8a711b 100644
--- a/arch/mips/configs/cobalt_defconfig
+++ b/arch/mips/configs/cobalt_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 CONFIG_MIPS_COBALT=y
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/db1000_defconfig b/arch/mips/configs/db1000_defconfig
index c2f33d3af62c..93cca1585bc3 100644
--- a/arch/mips/configs/db1000_defconfig
+++ b/arch/mips/configs/db1000_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS_DB1000=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/db1100_defconfig b/arch/mips/configs/db1100_defconfig
index 8c44d16ae9a2..ffd99252a837 100644
--- a/arch/mips/configs/db1100_defconfig
+++ b/arch/mips/configs/db1100_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS_DB1100=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/db1200_defconfig b/arch/mips/configs/db1200_defconfig
index c13768e75ac5..63eac5e89b9c 100644
--- a/arch/mips/configs/db1200_defconfig
+++ b/arch/mips/configs/db1200_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS_DB1200=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/db1500_defconfig b/arch/mips/configs/db1500_defconfig
index 8aea73fae7fb..25a095f7dc4e 100644
--- a/arch/mips/configs/db1500_defconfig
+++ b/arch/mips/configs/db1500_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS_DB1500=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/db1550_defconfig b/arch/mips/configs/db1550_defconfig
index 90ccb7359630..dda469c842b3 100644
--- a/arch/mips/configs/db1550_defconfig
+++ b/arch/mips/configs/db1550_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS_DB1550=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/ddb5477_defconfig b/arch/mips/configs/ddb5477_defconfig
index b598cf08f156..fcd3dd19bc74 100644
--- a/arch/mips/configs/ddb5477_defconfig
+++ b/arch/mips/configs/ddb5477_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig
index 597150b14077..8683e0df12e0 100644
--- a/arch/mips/configs/decstation_defconfig
+++ b/arch/mips/configs/decstation_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 CONFIG_MACH_DECSTATION=y
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/e55_defconfig b/arch/mips/configs/e55_defconfig
index d9a0d0eb0683..4ace61c95778 100644
--- a/arch/mips/configs/e55_defconfig
+++ b/arch/mips/configs/e55_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/emma2rh_defconfig b/arch/mips/configs/emma2rh_defconfig
index 375b2ac24a49..5847c916c130 100644
--- a/arch/mips/configs/emma2rh_defconfig
+++ b/arch/mips/configs/emma2rh_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/ev64120_defconfig b/arch/mips/configs/ev64120_defconfig
index b0afc118bd5c..bc4c4f125c48 100644
--- a/arch/mips/configs/ev64120_defconfig
+++ b/arch/mips/configs/ev64120_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 CONFIG_MIPS_EV64120=y
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/ev96100_defconfig b/arch/mips/configs/ev96100_defconfig
deleted file mode 100644
index 0bdc10f11610..000000000000
--- a/arch/mips/configs/ev96100_defconfig
+++ /dev/null
@@ -1,850 +0,0 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.18-rc1
-# Thu Jul  6 10:04:05 2006
-#
-CONFIG_MIPS=y
-
-#
-# Machine selection
-#
-# CONFIG_MIPS_MTX1 is not set
-# CONFIG_MIPS_BOSPORUS is not set
-# CONFIG_MIPS_PB1000 is not set
-# CONFIG_MIPS_PB1100 is not set
-# CONFIG_MIPS_PB1500 is not set
-# CONFIG_MIPS_PB1550 is not set
-# CONFIG_MIPS_PB1200 is not set
-# CONFIG_MIPS_DB1000 is not set
-# CONFIG_MIPS_DB1100 is not set
-# CONFIG_MIPS_DB1500 is not set
-# CONFIG_MIPS_DB1550 is not set
-# CONFIG_MIPS_DB1200 is not set
-# CONFIG_MIPS_MIRAGE is not set
-# CONFIG_BASLER_EXCITE is not set
-# CONFIG_MIPS_COBALT is not set
-# CONFIG_MACH_DECSTATION is not set
-# CONFIG_MIPS_EV64120 is not set
-CONFIG_MIPS_EV96100=y
-# CONFIG_MIPS_IVR is not set
-# CONFIG_MIPS_ITE8172 is not set
-# CONFIG_MACH_JAZZ is not set
-# CONFIG_LASAT is not set
-# CONFIG_MIPS_ATLAS is not set
-# CONFIG_MIPS_MALTA is not set
-# CONFIG_MIPS_SEAD is not set
-# CONFIG_WR_PPMC is not set
-# CONFIG_MIPS_SIM is not set
-# CONFIG_MOMENCO_JAGUAR_ATX is not set
-# CONFIG_MOMENCO_OCELOT is not set
-# CONFIG_MOMENCO_OCELOT_3 is not set
-# CONFIG_MOMENCO_OCELOT_C is not set
-# CONFIG_MOMENCO_OCELOT_G is not set
-# CONFIG_MIPS_XXS1500 is not set
-# CONFIG_PNX8550_V2PCI is not set
-# CONFIG_PNX8550_JBS is not set
-# CONFIG_DDB5477 is not set
-# CONFIG_MACH_VR41XX is not set
-# CONFIG_PMC_YOSEMITE is not set
-# CONFIG_QEMU is not set
-# CONFIG_MARKEINS is not set
-# CONFIG_SGI_IP22 is not set
-# CONFIG_SGI_IP27 is not set
-# CONFIG_SGI_IP32 is not set
-# CONFIG_SIBYTE_BIGSUR is not set
-# CONFIG_SIBYTE_SWARM is not set
-# CONFIG_SIBYTE_SENTOSA is not set
-# CONFIG_SIBYTE_RHONE is not set
-# CONFIG_SIBYTE_CARMEL is not set
-# CONFIG_SIBYTE_PTSWARM is not set
-# CONFIG_SIBYTE_LITTLESUR is not set
-# CONFIG_SIBYTE_CRHINE is not set
-# CONFIG_SIBYTE_CRHONE is not set
-# CONFIG_SNI_RM200_PCI is not set
-# CONFIG_TOSHIBA_JMR3927 is not set
-# CONFIG_TOSHIBA_RBTX4927 is not set
-# CONFIG_TOSHIBA_RBTX4938 is not set
-CONFIG_RWSEM_GENERIC_SPINLOCK=y
-CONFIG_GENERIC_FIND_NEXT_BIT=y
-CONFIG_GENERIC_HWEIGHT=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
-CONFIG_DMA_NONCOHERENT=y
-CONFIG_DMA_NEED_PCI_MAP_STATE=y
-CONFIG_CPU_BIG_ENDIAN=y
-# CONFIG_CPU_LITTLE_ENDIAN is not set
-CONFIG_SYS_SUPPORTS_BIG_ENDIAN=y
-CONFIG_IRQ_CPU=y
-CONFIG_MIPS_GT64120=y
-CONFIG_SWAP_IO_SPACE=y
-CONFIG_MIPS_GT96100=y
-CONFIG_MIPS_L1_CACHE_SHIFT=5
-
-#
-# CPU selection
-#
-# CONFIG_CPU_MIPS32_R1 is not set
-# CONFIG_CPU_MIPS32_R2 is not set
-# CONFIG_CPU_MIPS64_R1 is not set
-# CONFIG_CPU_MIPS64_R2 is not set
-# CONFIG_CPU_R3000 is not set
-# CONFIG_CPU_TX39XX is not set
-# CONFIG_CPU_VR41XX is not set
-# CONFIG_CPU_R4300 is not set
-# CONFIG_CPU_R4X00 is not set
-# CONFIG_CPU_TX49XX is not set
-# CONFIG_CPU_R5000 is not set
-# CONFIG_CPU_R5432 is not set
-# CONFIG_CPU_R6000 is not set
-# CONFIG_CPU_NEVADA is not set
-# CONFIG_CPU_R8000 is not set
-# CONFIG_CPU_R10000 is not set
-CONFIG_CPU_RM7000=y
-# CONFIG_CPU_RM9000 is not set
-# CONFIG_CPU_SB1 is not set
-CONFIG_SYS_HAS_CPU_R5000=y
-CONFIG_SYS_HAS_CPU_RM7000=y
-CONFIG_SYS_SUPPORTS_32BIT_KERNEL=y
-CONFIG_SYS_SUPPORTS_64BIT_KERNEL=y
-CONFIG_CPU_SUPPORTS_32BIT_KERNEL=y
-CONFIG_CPU_SUPPORTS_64BIT_KERNEL=y
-
-#
-# Kernel type
-#
-CONFIG_32BIT=y
-# CONFIG_64BIT is not set
-CONFIG_PAGE_SIZE_4KB=y
-# CONFIG_PAGE_SIZE_8KB is not set
-# CONFIG_PAGE_SIZE_16KB is not set
-# CONFIG_PAGE_SIZE_64KB is not set
-CONFIG_BOARD_SCACHE=y
-CONFIG_RM7000_CPU_SCACHE=y
-CONFIG_CPU_HAS_PREFETCH=y
-CONFIG_MIPS_MT_DISABLED=y
-# CONFIG_MIPS_MT_SMTC is not set
-# CONFIG_MIPS_MT_SMP is not set
-# CONFIG_MIPS_VPE_LOADER is not set
-# CONFIG_64BIT_PHYS_ADDR is not set
-CONFIG_CPU_HAS_LLSC=y
-CONFIG_CPU_HAS_SYNC=y
-CONFIG_GENERIC_HARDIRQS=y
-CONFIG_GENERIC_IRQ_PROBE=y
-CONFIG_CPU_SUPPORTS_HIGHMEM=y
-CONFIG_ARCH_FLATMEM_ENABLE=y
-CONFIG_SELECT_MEMORY_MODEL=y
-CONFIG_FLATMEM_MANUAL=y
-# CONFIG_DISCONTIGMEM_MANUAL is not set
-# CONFIG_SPARSEMEM_MANUAL is not set
-CONFIG_FLATMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
-# CONFIG_SPARSEMEM_STATIC is not set
-CONFIG_SPLIT_PTLOCK_CPUS=4
-# CONFIG_RESOURCES_64BIT is not set
-# CONFIG_HZ_48 is not set
-# CONFIG_HZ_100 is not set
-# CONFIG_HZ_128 is not set
-# CONFIG_HZ_250 is not set
-# CONFIG_HZ_256 is not set
-CONFIG_HZ_1000=y
-# CONFIG_HZ_1024 is not set
-CONFIG_SYS_SUPPORTS_ARBIT_HZ=y
-CONFIG_HZ=1000
-CONFIG_PREEMPT_NONE=y
-# CONFIG_PREEMPT_VOLUNTARY is not set
-# CONFIG_PREEMPT is not set
-CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
-
-#
-# Code maturity level options
-#
-CONFIG_EXPERIMENTAL=y
-CONFIG_BROKEN_ON_SMP=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-
-#
-# General setup
-#
-CONFIG_LOCALVERSION=""
-CONFIG_LOCALVERSION_AUTO=y
-CONFIG_SWAP=y
-CONFIG_SYSVIPC=y
-# CONFIG_POSIX_MQUEUE is not set
-# CONFIG_BSD_PROCESS_ACCT is not set
-CONFIG_SYSCTL=y
-# CONFIG_AUDIT is not set
-# CONFIG_IKCONFIG is not set
-CONFIG_RELAY=y
-CONFIG_INITRAMFS_SOURCE=""
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EMBEDDED=y
-CONFIG_KALLSYMS=y
-# CONFIG_KALLSYMS_EXTRA_PASS is not set
-# CONFIG_HOTPLUG is not set
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-CONFIG_BASE_FULL=y
-CONFIG_RT_MUTEXES=y
-CONFIG_FUTEX=y
-CONFIG_EPOLL=y
-CONFIG_SHMEM=y
-CONFIG_SLAB=y
-CONFIG_VM_EVENT_COUNTERS=y
-# CONFIG_TINY_SHMEM is not set
-CONFIG_BASE_SMALL=0
-# CONFIG_SLOB is not set
-
-#
-# Loadable module support
-#
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_MODULE_FORCE_UNLOAD is not set
-CONFIG_MODVERSIONS=y
-CONFIG_MODULE_SRCVERSION_ALL=y
-# CONFIG_KMOD is not set
-
-#
-# Block layer
-#
-# CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
-# CONFIG_LSF is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
-CONFIG_DEFAULT_AS=y
-# CONFIG_DEFAULT_DEADLINE is not set
-# CONFIG_DEFAULT_CFQ is not set
-# CONFIG_DEFAULT_NOOP is not set
-CONFIG_DEFAULT_IOSCHED="anticipatory"
-
-#
-# Bus options (PCI, PCMCIA, EISA, ISA, TC)
-#
-CONFIG_HW_HAS_PCI=y
-# CONFIG_PCI is not set
-CONFIG_MMU=y
-
-#
-# PCCARD (PCMCIA/CardBus) support
-#
-# CONFIG_PCCARD is not set
-
-#
-# PCI Hotplug Support
-#
-
-#
-# Executable file formats
-#
-CONFIG_BINFMT_ELF=y
-# CONFIG_BINFMT_MISC is not set
-CONFIG_TRAD_SIGNALS=y
-
-#
-# Networking
-#
-CONFIG_NET=y
-
-#
-# Networking options
-#
-# CONFIG_NETDEBUG is not set
-# CONFIG_PACKET is not set
-CONFIG_UNIX=y
-CONFIG_XFRM=y
-CONFIG_XFRM_USER=m
-CONFIG_NET_KEY=y
-CONFIG_INET=y
-# CONFIG_IP_MULTICAST is not set
-# CONFIG_IP_ADVANCED_ROUTER is not set
-CONFIG_IP_FIB_HASH=y
-CONFIG_IP_PNP=y
-# CONFIG_IP_PNP_DHCP is not set
-CONFIG_IP_PNP_BOOTP=y
-# CONFIG_IP_PNP_RARP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_ARPD is not set
-# CONFIG_SYN_COOKIES is not set
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_XFRM_TUNNEL is not set
-# CONFIG_INET_TUNNEL is not set
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_DIAG=y
-CONFIG_INET_TCP_DIAG=y
-# CONFIG_TCP_CONG_ADVANCED is not set
-CONFIG_TCP_CONG_BIC=y
-# CONFIG_IPV6 is not set
-# CONFIG_INET6_XFRM_TUNNEL is not set
-# CONFIG_INET6_TUNNEL is not set
-CONFIG_NETWORK_SECMARK=y
-# CONFIG_NETFILTER is not set
-
-#
-# DCCP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_DCCP is not set
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_SCTP is not set
-
-#
-# TIPC Configuration (EXPERIMENTAL)
-#
-# CONFIG_TIPC is not set
-# CONFIG_ATM is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_NET_DIVERT is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-
-#
-# QoS and/or fair queueing
-#
-# CONFIG_NET_SCHED is not set
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-CONFIG_IEEE80211=m
-# CONFIG_IEEE80211_DEBUG is not set
-CONFIG_IEEE80211_CRYPT_WEP=m
-CONFIG_IEEE80211_CRYPT_CCMP=m
-CONFIG_IEEE80211_SOFTMAC=m
-# CONFIG_IEEE80211_SOFTMAC_DEBUG is not set
-CONFIG_WIRELESS_EXT=y
-
-#
-# Device Drivers
-#
-
-#
-# Generic Driver Options
-#
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_FW_LOADER is not set
-# CONFIG_SYS_HYPERVISOR is not set
-
-#
-# Connector - unified userspace <-> kernelspace linker
-#
-CONFIG_CONNECTOR=m
-
-#
-# Memory Technology Devices (MTD)
-#
-# CONFIG_MTD is not set
-
-#
-# Parallel port support
-#
-# CONFIG_PARPORT is not set
-
-#
-# Plug and Play support
-#
-
-#
-# Block devices
-#
-# CONFIG_BLK_DEV_COW_COMMON is not set
-# CONFIG_BLK_DEV_LOOP is not set
-# CONFIG_BLK_DEV_NBD is not set
-# CONFIG_BLK_DEV_RAM is not set
-# CONFIG_BLK_DEV_INITRD is not set
-CONFIG_CDROM_PKTCDVD=m
-CONFIG_CDROM_PKTCDVD_BUFFERS=8
-# CONFIG_CDROM_PKTCDVD_WCACHE is not set
-CONFIG_ATA_OVER_ETH=m
-
-#
-# ATA/ATAPI/MFM/RLL support
-#
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-CONFIG_RAID_ATTRS=m
-# CONFIG_SCSI is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
-# CONFIG_MD is not set
-
-#
-# Fusion MPT device support
-#
-# CONFIG_FUSION is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-
-#
-# I2O device support
-#
-
-#
-# Network device support
-#
-CONFIG_NETDEVICES=y
-# CONFIG_DUMMY is not set
-# CONFIG_BONDING is not set
-# CONFIG_EQUALIZER is not set
-# CONFIG_TUN is not set
-
-#
-# PHY device support
-#
-CONFIG_PHYLIB=m
-
-#
-# MII PHY device drivers
-#
-CONFIG_MARVELL_PHY=m
-CONFIG_DAVICOM_PHY=m
-CONFIG_QSEMI_PHY=m
-CONFIG_LXT_PHY=m
-CONFIG_CICADA_PHY=m
-CONFIG_VITESSE_PHY=m
-CONFIG_SMSC_PHY=m
-
-#
-# Ethernet (10 or 100Mbit)
-#
-CONFIG_NET_ETHERNET=y
-# CONFIG_MII is not set
-CONFIG_MIPS_GT96100ETH=y
-# CONFIG_DM9000 is not set
-
-#
-# Ethernet (1000 Mbit)
-#
-
-#
-# Ethernet (10000 Mbit)
-#
-
-#
-# Token Ring devices
-#
-
-#
-# Wireless LAN (non-hamradio)
-#
-# CONFIG_NET_RADIO is not set
-
-#
-# Wan interfaces
-#
-# CONFIG_WAN is not set
-# CONFIG_PPP is not set
-# CONFIG_SLIP is not set
-# CONFIG_SHAPER is not set
-# CONFIG_NETCONSOLE is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-
-#
-# ISDN subsystem
-#
-# CONFIG_ISDN is not set
-
-#
-# Telephony Support
-#
-# CONFIG_PHONE is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-
-#
-# Userland interfaces
-#
-CONFIG_INPUT_MOUSEDEV=y
-CONFIG_INPUT_MOUSEDEV_PSAUX=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
-# CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_TSDEV is not set
-# CONFIG_INPUT_EVDEV is not set
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
-
-#
-# Hardware I/O ports
-#
-CONFIG_SERIO=y
-# CONFIG_SERIO_I8042 is not set
-CONFIG_SERIO_SERPORT=y
-# CONFIG_SERIO_LIBPS2 is not set
-CONFIG_SERIO_RAW=m
-# CONFIG_GAMEPORT is not set
-
-#
-# Character devices
-#
-CONFIG_VT=y
-CONFIG_VT_CONSOLE=y
-CONFIG_HW_CONSOLE=y
-CONFIG_VT_HW_CONSOLE_BINDING=y
-# CONFIG_SERIAL_NONSTANDARD is not set
-
-#
-# Serial drivers
-#
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_NR_UARTS=4
-CONFIG_SERIAL_8250_RUNTIME_UARTS=4
-# CONFIG_SERIAL_8250_EXTENDED is not set
-
-#
-# Non-8250 serial port support
-#
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-CONFIG_UNIX98_PTYS=y
-CONFIG_LEGACY_PTYS=y
-CONFIG_LEGACY_PTY_COUNT=256
-
-#
-# IPMI
-#
-# CONFIG_IPMI_HANDLER is not set
-
-#
-# Watchdog Cards
-#
-# CONFIG_WATCHDOG is not set
-# CONFIG_HW_RANDOM is not set
-# CONFIG_RTC is not set
-# CONFIG_GEN_RTC is not set
-# CONFIG_DTLK is not set
-# CONFIG_R3964 is not set
-
-#
-# Ftape, the floppy tape device driver
-#
-# CONFIG_RAW_DRIVER is not set
-
-#
-# TPM devices
-#
-# CONFIG_TCG_TPM is not set
-# CONFIG_TELCLOCK is not set
-
-#
-# I2C support
-#
-# CONFIG_I2C is not set
-
-#
-# SPI support
-#
-# CONFIG_SPI is not set
-# CONFIG_SPI_MASTER is not set
-
-#
-# Dallas's 1-wire bus
-#
-# CONFIG_W1 is not set
-
-#
-# Hardware Monitoring support
-#
-# CONFIG_HWMON is not set
-# CONFIG_HWMON_VID is not set
-
-#
-# Misc devices
-#
-
-#
-# Multimedia devices
-#
-# CONFIG_VIDEO_DEV is not set
-CONFIG_VIDEO_V4L2=y
-
-#
-# Digital Video Broadcasting Devices
-#
-# CONFIG_DVB is not set
-
-#
-# Graphics support
-#
-# CONFIG_FIRMWARE_EDID is not set
-# CONFIG_FB is not set
-
-#
-# Console display driver support
-#
-# CONFIG_VGA_CONSOLE is not set
-CONFIG_DUMMY_CONSOLE=y
-
-#
-# Sound
-#
-# CONFIG_SOUND is not set
-
-#
-# USB support
-#
-# CONFIG_USB_ARCH_HAS_HCD is not set
-# CONFIG_USB_ARCH_HAS_OHCI is not set
-# CONFIG_USB_ARCH_HAS_EHCI is not set
-
-#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
-#
-
-#
-# USB Gadget Support
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# MMC/SD Card support
-#
-# CONFIG_MMC is not set
-
-#
-# LED devices
-#
-# CONFIG_NEW_LEDS is not set
-
-#
-# LED drivers
-#
-
-#
-# LED Triggers
-#
-
-#
-# InfiniBand support
-#
-
-#
-# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
-#
-
-#
-# Real Time Clock
-#
-# CONFIG_RTC_CLASS is not set
-
-#
-# DMA Engine support
-#
-# CONFIG_DMA_ENGINE is not set
-
-#
-# DMA Clients
-#
-
-#
-# DMA Devices
-#
-
-#
-# File systems
-#
-CONFIG_EXT2_FS=y
-# CONFIG_EXT2_FS_XATTR is not set
-# CONFIG_EXT2_FS_XIP is not set
-# CONFIG_EXT3_FS is not set
-# CONFIG_REISERFS_FS is not set
-# CONFIG_JFS_FS is not set
-# CONFIG_FS_POSIX_ACL is not set
-# CONFIG_XFS_FS is not set
-# CONFIG_OCFS2_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_ROMFS_FS is not set
-CONFIG_INOTIFY=y
-CONFIG_INOTIFY_USER=y
-# CONFIG_QUOTA is not set
-CONFIG_DNOTIFY=y
-# CONFIG_AUTOFS_FS is not set
-# CONFIG_AUTOFS4_FS is not set
-CONFIG_FUSE_FS=m
-
-#
-# CD-ROM/DVD Filesystems
-#
-# CONFIG_ISO9660_FS is not set
-# CONFIG_UDF_FS is not set
-
-#
-# DOS/FAT/NT Filesystems
-#
-# CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_SYSFS=y
-# CONFIG_TMPFS is not set
-# CONFIG_HUGETLB_PAGE is not set
-CONFIG_RAMFS=y
-# CONFIG_CONFIGFS_FS is not set
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_ADFS_FS is not set
-# CONFIG_AFFS_FS is not set
-# CONFIG_HFS_FS is not set
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_BEFS_FS is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
-CONFIG_NFS_FS=y
-# CONFIG_NFS_V3 is not set
-# CONFIG_NFS_V4 is not set
-# CONFIG_NFS_DIRECTIO is not set
-# CONFIG_NFSD is not set
-CONFIG_ROOT_NFS=y
-CONFIG_LOCKD=y
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=y
-# CONFIG_RPCSEC_GSS_KRB5 is not set
-# CONFIG_RPCSEC_GSS_SPKM3 is not set
-# CONFIG_SMB_FS is not set
-# CONFIG_CIFS is not set
-# CONFIG_CIFS_DEBUG2 is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-# CONFIG_9P_FS is not set
-
-#
-# Partition Types
-#
-# CONFIG_PARTITION_ADVANCED is not set
-CONFIG_MSDOS_PARTITION=y
-
-#
-# Native Language Support
-#
-# CONFIG_NLS is not set
-
-#
-# Profiling support
-#
-# CONFIG_PROFILING is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
-# CONFIG_MAGIC_SYSRQ is not set
-# CONFIG_UNUSED_SYMBOLS is not set
-# CONFIG_DEBUG_KERNEL is not set
-CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_DEBUG_FS is not set
-CONFIG_CROSSCOMPILE=y
-CONFIG_CMDLINE=""
-
-#
-# Security options
-#
-CONFIG_KEYS=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
-# CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
-CONFIG_CRYPTO=y
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_NULL=m
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=m
-CONFIG_CRYPTO_SHA1=m
-CONFIG_CRYPTO_SHA256=m
-CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_DES=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_AES=m
-CONFIG_CRYPTO_CAST5=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_DEFLATE=m
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_CRC32C=m
-# CONFIG_CRYPTO_TEST is not set
-
-#
-# Hardware crypto devices
-#
-
-#
-# Library routines
-#
-# CONFIG_CRC_CCITT is not set
-CONFIG_CRC16=m
-CONFIG_CRC32=m
-CONFIG_LIBCRC32C=m
-CONFIG_ZLIB_INFLATE=m
-CONFIG_ZLIB_DEFLATE=m
-CONFIG_PLIST=y
diff --git a/arch/mips/configs/excite_defconfig b/arch/mips/configs/excite_defconfig
index 045ebd089893..eb87cbbfd037 100644
--- a/arch/mips/configs/excite_defconfig
+++ b/arch/mips/configs/excite_defconfig
@@ -26,7 +26,6 @@ CONFIG_BASLER_EXCITE=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig
index ef16d1fb5071..9f22d13e729d 100644
--- a/arch/mips/configs/ip22_defconfig
+++ b/arch/mips/configs/ip22_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig
index 4bf1ee7f5f00..414a649dff8a 100644
--- a/arch/mips/configs/ip27_defconfig
+++ b/arch/mips/configs/ip27_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig
index f83dc09c3ca9..dec2ba6ba03f 100644
--- a/arch/mips/configs/ip32_defconfig
+++ b/arch/mips/configs/ip32_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/it8172_defconfig b/arch/mips/configs/it8172_defconfig
index a91d72a9ca86..37f9dd7187b1 100644
--- a/arch/mips/configs/it8172_defconfig
+++ b/arch/mips/configs/it8172_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 CONFIG_MIPS_ITE8172=y
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/ivr_defconfig b/arch/mips/configs/ivr_defconfig
index cebc67212d06..18874a4c24fe 100644
--- a/arch/mips/configs/ivr_defconfig
+++ b/arch/mips/configs/ivr_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 CONFIG_MIPS_IVR=y
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/jaguar-atx_defconfig b/arch/mips/configs/jaguar-atx_defconfig
index 5d9eb11aba3d..9b529857f802 100644
--- a/arch/mips/configs/jaguar-atx_defconfig
+++ b/arch/mips/configs/jaguar-atx_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/jmr3927_defconfig b/arch/mips/configs/jmr3927_defconfig
index be45a9044d06..fded3f73815f 100644
--- a/arch/mips/configs/jmr3927_defconfig
+++ b/arch/mips/configs/jmr3927_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/lasat200_defconfig b/arch/mips/configs/lasat200_defconfig
index 64dc9f45a19c..0354bfa18b39 100644
--- a/arch/mips/configs/lasat200_defconfig
+++ b/arch/mips/configs/lasat200_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig
index 2690baf15a85..b21ca470273a 100644
--- a/arch/mips/configs/malta_defconfig
+++ b/arch/mips/configs/malta_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/mipssim_defconfig b/arch/mips/configs/mipssim_defconfig
index c298979c18ae..adbeeadddb8f 100644
--- a/arch/mips/configs/mipssim_defconfig
+++ b/arch/mips/configs/mipssim_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/mpc30x_defconfig b/arch/mips/configs/mpc30x_defconfig
index 4405d127a941..79fd544fcb2a 100644
--- a/arch/mips/configs/mpc30x_defconfig
+++ b/arch/mips/configs/mpc30x_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/ocelot_3_defconfig b/arch/mips/configs/ocelot_3_defconfig
index ec5758f22676..4d87da2b99fd 100644
--- a/arch/mips/configs/ocelot_3_defconfig
+++ b/arch/mips/configs/ocelot_3_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/ocelot_c_defconfig b/arch/mips/configs/ocelot_c_defconfig
index 0d33d87de1a1..ce7c26475328 100644
--- a/arch/mips/configs/ocelot_c_defconfig
+++ b/arch/mips/configs/ocelot_c_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/ocelot_defconfig b/arch/mips/configs/ocelot_defconfig
index 4b999102715e..3da2a43d1608 100644
--- a/arch/mips/configs/ocelot_defconfig
+++ b/arch/mips/configs/ocelot_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/ocelot_g_defconfig b/arch/mips/configs/ocelot_g_defconfig
index 827b344f6010..3a3e5ff4a3ef 100644
--- a/arch/mips/configs/ocelot_g_defconfig
+++ b/arch/mips/configs/ocelot_g_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/pb1100_defconfig b/arch/mips/configs/pb1100_defconfig
index 9ed60fef69e0..1a16e92900cb 100644
--- a/arch/mips/configs/pb1100_defconfig
+++ b/arch/mips/configs/pb1100_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS_PB1100=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/pb1500_defconfig b/arch/mips/configs/pb1500_defconfig
index 6774254b1be6..9ea8edea6f29 100644
--- a/arch/mips/configs/pb1500_defconfig
+++ b/arch/mips/configs/pb1500_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS_PB1500=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/pb1550_defconfig b/arch/mips/configs/pb1550_defconfig
index 1afe5bf6e765..c4a158976f8f 100644
--- a/arch/mips/configs/pb1550_defconfig
+++ b/arch/mips/configs/pb1550_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS_PB1550=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/pnx8550-jbs_defconfig b/arch/mips/configs/pnx8550-jbs_defconfig
index ac616c82d348..1cbf270c301c 100644
--- a/arch/mips/configs/pnx8550-jbs_defconfig
+++ b/arch/mips/configs/pnx8550-jbs_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/pnx8550-v2pci_defconfig b/arch/mips/configs/pnx8550-v2pci_defconfig
index a8eb51bae3f3..bec30b15b9bd 100644
--- a/arch/mips/configs/pnx8550-v2pci_defconfig
+++ b/arch/mips/configs/pnx8550-v2pci_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/qemu_defconfig b/arch/mips/configs/qemu_defconfig
index 6a63a113b7ea..daa40c8ff694 100644
--- a/arch/mips/configs/qemu_defconfig
+++ b/arch/mips/configs/qemu_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/rbhma4500_defconfig b/arch/mips/configs/rbhma4500_defconfig
index 6779f449bd2d..2f5650227ba3 100644
--- a/arch/mips/configs/rbhma4500_defconfig
+++ b/arch/mips/configs/rbhma4500_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig
index b7826d3a2b77..f26b338333ac 100644
--- a/arch/mips/configs/rm200_defconfig
+++ b/arch/mips/configs/rm200_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/sb1250-swarm_defconfig b/arch/mips/configs/sb1250-swarm_defconfig
index 625c1c619b6b..9041f095f96f 100644
--- a/arch/mips/configs/sb1250-swarm_defconfig
+++ b/arch/mips/configs/sb1250-swarm_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/sead_defconfig b/arch/mips/configs/sead_defconfig
index 4401b602118f..02abb2f1bfaf 100644
--- a/arch/mips/configs/sead_defconfig
+++ b/arch/mips/configs/sead_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/tb0226_defconfig b/arch/mips/configs/tb0226_defconfig
index 2ba4e25e8c34..ca3d0c4ba15b 100644
--- a/arch/mips/configs/tb0226_defconfig
+++ b/arch/mips/configs/tb0226_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/tb0229_defconfig b/arch/mips/configs/tb0229_defconfig
index fc8a407c1add..4e2009ace278 100644
--- a/arch/mips/configs/tb0229_defconfig
+++ b/arch/mips/configs/tb0229_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/tb0287_defconfig b/arch/mips/configs/tb0287_defconfig
index effcb63b81a3..535a813d01a9 100644
--- a/arch/mips/configs/tb0287_defconfig
+++ b/arch/mips/configs/tb0287_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/workpad_defconfig b/arch/mips/configs/workpad_defconfig
index 8f66ad71cb7e..3a3ef20b21cc 100644
--- a/arch/mips/configs/workpad_defconfig
+++ b/arch/mips/configs/workpad_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/wrppmc_defconfig b/arch/mips/configs/wrppmc_defconfig
index 3e4b16b39827..e6b1dea55842 100644
--- a/arch/mips/configs/wrppmc_defconfig
+++ b/arch/mips/configs/wrppmc_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/configs/yosemite_defconfig b/arch/mips/configs/yosemite_defconfig
index 3a68d8a25b66..06a072b77b1c 100644
--- a/arch/mips/configs/yosemite_defconfig
+++ b/arch/mips/configs/yosemite_defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/defconfig b/arch/mips/defconfig
index fff6fcc96212..d620c463a78b 100644
--- a/arch/mips/defconfig
+++ b/arch/mips/defconfig
@@ -25,7 +25,6 @@ CONFIG_MIPS=y
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MIPS_EV64120 is not set
-# CONFIG_MIPS_EV96100 is not set
 # CONFIG_MIPS_IVR is not set
 # CONFIG_MIPS_ITE8172 is not set
 # CONFIG_MACH_JAZZ is not set
diff --git a/arch/mips/galileo-boards/ev96100/Makefile b/arch/mips/galileo-boards/ev96100/Makefile
deleted file mode 100644
index cd868ec78cbc..000000000000
--- a/arch/mips/galileo-boards/ev96100/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-#
-#  Copyright 2000 MontaVista Software Inc.
-#  Author: MontaVista Software, Inc.
-#     	ppopov@mvista.com or source@mvista.com
-#
-# Makefile for the Galileo EV96100 board.
-#
-
-obj-y		+= init.o irq.o puts.o reset.o time.o setup.o
diff --git a/arch/mips/galileo-boards/ev96100/init.c b/arch/mips/galileo-boards/ev96100/init.c
deleted file mode 100644
index a01fe9b36f2c..000000000000
--- a/arch/mips/galileo-boards/ev96100/init.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright 2000 MontaVista Software Inc.
- * Author: MontaVista Software, Inc.
- *         	ppopov@mvista.com or source@mvista.com
- *
- * This file was derived from Carsten Langgaard's
- * arch/mips/mips-boards/generic/generic.c
- *
- * Carsten Langgaard, carstenl@mips.com
- * Copyright (C) 1999,2000 MIPS Technologies, Inc.  All rights reserved.
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/bootmem.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-
-#include <asm/addrspace.h>
-#include <asm/bootinfo.h>
-#include <asm/gt64120.h>
-
-
-/* Environment variable */
-
-typedef struct {
-	char *name;
-	char *val;
-} t_env_var;
-
-int prom_argc;
-char **prom_argv, **prom_envp;
-
-int init_debug = 0;
-
-char * __init prom_getcmdline(void)
-{
-	return &(arcs_cmdline[0]);
-}
-
-unsigned long __init prom_free_prom_memory(void)
-{
-	return 0;
-}
-
-void  __init prom_init_cmdline(void)
-{
-	char *cp;
-	int actr;
-
-	actr = 1; /* Always ignore argv[0] */
-
-	cp = &(arcs_cmdline[0]);
-	while(actr < prom_argc) {
-	        strcpy(cp, prom_argv[actr]);
-		cp += strlen(prom_argv[actr]);
-		*cp++ = ' ';
-		actr++;
-	}
-	if (cp != &(arcs_cmdline[0])) /* get rid of trailing space */
-		--cp;
-	*cp = '\0';
-}
-
-char *prom_getenv(char *envname)
-{
-	/*
-	 * Return a pointer to the given environment variable.
-	 */
-
-	t_env_var *env = (t_env_var *) prom_envp;
-	int i;
-
-	i = strlen(envname);
-
-	while (env->name) {
-		if (strncmp(envname, env->name, i) == 0) {
-			return (env->val);
-		}
-		env++;
-	}
-	return (NULL);
-}
-
-static inline unsigned char str2hexnum(unsigned char c)
-{
-	if (c >= '0' && c <= '9')
-		return c - '0';
-	if (c >= 'a' && c <= 'f')
-		return c - 'a' + 10;
-	return 0;		/* foo */
-}
-
-static inline void str2eaddr(unsigned char *ea, unsigned char *str)
-{
-	int i;
-
-	for (i = 0; i < 6; i++) {
-		unsigned char num;
-
-		if ((*str == '.') || (*str == ':'))
-			str++;
-		num = str2hexnum(*str++) << 4;
-		num |= (str2hexnum(*str++));
-		ea[i] = num;
-	}
-}
-
-int get_ethernet_addr(char *ethernet_addr)
-{
-	char *ethaddr_str;
-
-	ethaddr_str = prom_getenv("ethaddr");
-	if (!ethaddr_str) {
-		printk("ethaddr not set in boot prom\n");
-		return -1;
-	}
-	str2eaddr(ethernet_addr, ethaddr_str);
-
-	if (init_debug > 1) {
-		int i;
-		printk("get_ethernet_addr: ");
-		for (i = 0; i < 5; i++)
-			printk("%02x:",
-			       (unsigned char) *(ethernet_addr + i));
-		printk("%02x\n", *(ethernet_addr + i));
-	}
-
-	return 0;
-}
-
-const char *get_system_type(void)
-{
-	return "Galileo EV96100";
-}
-
-void __init prom_init(void)
-{
-	volatile unsigned char *uart;
-	char ppbuf[8];
-
-	prom_argc = fw_arg0;
-	prom_argv = (char **) fw_arg1;
-	prom_envp = (char **) fw_arg2;
-
-	mips_machgroup = MACH_GROUP_GALILEO;
-	mips_machtype = MACH_EV96100;
-
-	prom_init_cmdline();
-
-	/* 32 MB upgradable */
-	add_memory_region(0, 32 << 20, BOOT_MEM_RAM);
-}
diff --git a/arch/mips/galileo-boards/ev96100/irq.c b/arch/mips/galileo-boards/ev96100/irq.c
deleted file mode 100644
index ee5d6720f23b..000000000000
--- a/arch/mips/galileo-boards/ev96100/irq.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright 2000 MontaVista Software Inc.
- * Author: MontaVista Software, Inc.
- *         	ppopov@mvista.com or source@mvista.com
- *
- * This file was derived from Carsten Langgaard's
- * arch/mips/mips-boards/atlas/atlas_int.c.
- *
- * Carsten Langgaard, carstenl@mips.com
- * Copyright (C) 1999,2000 MIPS Technologies, Inc.  All rights reserved.
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/kernel_stat.h>
-#include <linux/irq.h>
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/interrupt.h>
-#include <asm/irq_cpu.h>
-
-static inline unsigned int ffz8(unsigned int word)
-{
-	unsigned long k;
-
-	k = 7;
-	if (word & 0x0fUL) { k -= 4;  word <<= 4;  }
-	if (word & 0x30UL) { k -= 2;  word <<= 2;  }
-	if (word & 0x40UL) { k -= 1; }
-
-	return k;
-}
-
-extern void mips_timer_interrupt(struct pt_regs *regs);
-
-asmlinkage void ev96100_cpu_irq(unsigned int pending, struct pt_regs *regs)
-{
-	do_IRQ(ffz8(pending >> 8), regs);
-}
-
-asmlinkage void plat_irq_dispatch(struct pt_regs *regs)
-{
-	unsigned int pending = read_c0_cause() & read_c0_status() & ST0_IM;
-
-	if (pending & CAUSEF_IP7)
-		mips_timer_interrupt(regs);
-	else if (pending)
-		ev96100_cpu_irq(pending, regs);
-	else
-		spurious_interrupt(regs);
-}
-
-void __init arch_init_irq(void)
-{
-	mips_cpu_irq_init(0);
-}
diff --git a/arch/mips/galileo-boards/ev96100/puts.c b/arch/mips/galileo-boards/ev96100/puts.c
deleted file mode 100644
index 49dc6d137b9c..000000000000
--- a/arch/mips/galileo-boards/ev96100/puts.c
+++ /dev/null
@@ -1,138 +0,0 @@
-
-/*
- * Debug routines which directly access the uart.
- */
-
-#include <linux/types.h>
-#include <asm/gt64120.h>
-
-
-//#define SERIAL_BASE    EV96100_UART0_REGS_BASE
-#define SERIAL_BASE    0xBD000020
-#define NS16550_BASE   SERIAL_BASE
-
-#define SERA_CMD       0x0D
-#define SERA_DATA      0x08
-//#define SERB_CMD       0x05
-#define SERB_CMD       20
-#define SERB_DATA      0x00
-#define TX_BUSY        0x20
-
-#define TIMEOUT    0xffff
-#undef SLOW_DOWN
-
-static const char digits[16] = "0123456789abcdef";
-static volatile unsigned char *const com1 = (unsigned char *) SERIAL_BASE;
-
-
-#ifdef SLOW_DOWN
-static inline void slow_down()
-{
-	int k;
-	for (k = 0; k < 10000; k++);
-}
-#else
-#define slow_down()
-#endif
-
-void putch(const unsigned char c)
-{
-	unsigned char ch;
-	int i = 0;
-
-	do {
-		ch = com1[SERB_CMD];
-		slow_down();
-		i++;
-		if (i > TIMEOUT) {
-			break;
-		}
-	} while (0 == (ch & TX_BUSY));
-	com1[SERB_DATA] = c;
-}
-
-void putchar(const unsigned char c)
-{
-	unsigned char ch;
-	int i = 0;
-
-	do {
-		ch = com1[SERB_CMD];
-		slow_down();
-		i++;
-		if (i > TIMEOUT) {
-			break;
-		}
-	} while (0 == (ch & TX_BUSY));
-	com1[SERB_DATA] = c;
-}
-
-void puts(unsigned char *cp)
-{
-	unsigned char ch;
-	int i = 0;
-
-	while (*cp) {
-		do {
-			ch = com1[SERB_CMD];
-			slow_down();
-			i++;
-			if (i > TIMEOUT) {
-				break;
-			}
-		} while (0 == (ch & TX_BUSY));
-		com1[SERB_DATA] = *cp++;
-	}
-	putch('\r');
-	putch('\n');
-}
-
-void fputs(unsigned char *cp)
-{
-	unsigned char ch;
-	int i = 0;
-
-	while (*cp) {
-
-		do {
-			ch = com1[SERB_CMD];
-			slow_down();
-			i++;
-			if (i > TIMEOUT) {
-				break;
-			}
-		} while (0 == (ch & TX_BUSY));
-		com1[SERB_DATA] = *cp++;
-	}
-}
-
-
-void put64(uint64_t ul)
-{
-	int cnt;
-	unsigned ch;
-
-	cnt = 16;		/* 16 nibbles in a 64 bit long */
-	putch('0');
-	putch('x');
-	do {
-		cnt--;
-		ch = (unsigned char) (ul >> cnt * 4) & 0x0F;
-		putch(digits[ch]);
-	} while (cnt > 0);
-}
-
-void put32(unsigned u)
-{
-	int cnt;
-	unsigned ch;
-
-	cnt = 8;		/* 8 nibbles in a 32 bit long */
-	putch('0');
-	putch('x');
-	do {
-		cnt--;
-		ch = (unsigned char) (u >> cnt * 4) & 0x0F;
-		putch(digits[ch]);
-	} while (cnt > 0);
-}
diff --git a/arch/mips/galileo-boards/ev96100/reset.c b/arch/mips/galileo-boards/ev96100/reset.c
deleted file mode 100644
index 5ef9b7f896e6..000000000000
--- a/arch/mips/galileo-boards/ev96100/reset.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * BRIEF MODULE DESCRIPTION
- *	Galileo EV96100 reset routines.
- *
- * Copyright 2000 MontaVista Software Inc.
- * Author: MontaVista Software, Inc.
- *         	ppopov@mvista.com or source@mvista.com
- *
- * This file was derived from Carsten Langgaard's
- * arch/mips/mips-boards/generic/reset.c
- *
- * Carsten Langgaard, carstenl@mips.com
- * Copyright (C) 1999,2000 MIPS Technologies, Inc.  All rights reserved.
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/reboot.h>
-#include <asm/system.h>
-#include <asm/gt64120.h>
-
-static void mips_machine_restart(char *command);
-static void mips_machine_halt(void);
-
-static void mips_machine_restart(char *command)
-{
-	set_c0_status(ST0_BEV | ST0_ERL);
-	change_c0_config(CONF_CM_CMASK, CONF_CM_UNCACHED);
-	flush_cache_all();
-	write_c0_wired(0);
-	__asm__ __volatile__("jr\t%0"::"r"(0xbfc00000));
-	while (1);
-}
-
-static void mips_machine_halt(void)
-{
-	printk(KERN_NOTICE "You can safely turn off the power\n");
-	while (1)
-		__asm__(".set\tmips3\n\t"
-	                "wait\n\t"
-			".set\tmips0");
-}
-
-void mips_reboot_setup(void)
-{
-	_machine_restart = mips_machine_restart;
-	_machine_halt = mips_machine_halt;
-}
diff --git a/arch/mips/galileo-boards/ev96100/setup.c b/arch/mips/galileo-boards/ev96100/setup.c
deleted file mode 100644
index 639ad5562c63..000000000000
--- a/arch/mips/galileo-boards/ev96100/setup.c
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * BRIEF MODULE DESCRIPTION
- *	Galileo EV96100 setup.
- *
- * Copyright 2000 MontaVista Software Inc.
- * Author: MontaVista Software, Inc.
- *         	ppopov@mvista.com or source@mvista.com
- *
- * This file was derived from Carsten Langgaard's
- * arch/mips/mips-boards/atlas/atlas_setup.c.
- *
- * Carsten Langgaard, carstenl@mips.com
- * Copyright (C) 1999,2000 MIPS Technologies, Inc.  All rights reserved.
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-#include <linux/pci.h>
-
-#include <asm/cpu.h>
-#include <asm/bootinfo.h>
-#include <asm/mipsregs.h>
-#include <asm/irq.h>
-#include <asm/delay.h>
-#include <asm/gt64120.h>
-#include <asm/galileo-boards/ev96100int.h>
-
-
-extern char *__init prom_getcmdline(void);
-
-extern void mips_reboot_setup(void);
-
-unsigned char mac_0_1[12];
-
-void __init plat_mem_setup(void)
-{
-	unsigned int config = read_c0_config();
-	unsigned int status = read_c0_status();
-	unsigned int info = read_c0_info();
-	u32 tmp;
-
-	char *argptr;
-
-	clear_c0_status(ST0_FR);
-
-	if (config & 0x8)
-		printk("Secondary cache is enabled\n");
-	else
-		printk("Secondary cache is disabled\n");
-
-	if (status & (1 << 27))
-		printk("User-mode cache ops enabled\n");
-	else
-		printk("User-mode cache ops disabled\n");
-
-	printk("CP0 info reg: %x\n", (unsigned) info);
-	if (info & (1 << 28))
-		printk("burst mode Scache RAMS\n");
-	else
-		printk("pipelined Scache RAMS\n");
-
-	if (info & 0x1)
-		printk("Atomic Enable is set\n");
-
-	argptr = prom_getcmdline();
-#ifdef CONFIG_SERIAL_CONSOLE
-	if (strstr(argptr, "console=") == NULL) {
-		argptr = prom_getcmdline();
-		strcat(argptr, " console=ttyS0,115200");
-	}
-#endif
-
-	mips_reboot_setup();
-
-	set_io_port_base(KSEG1);
-	ioport_resource.start = GT_PCI_IO_BASE;
-	ioport_resource.end = GT_PCI_IO_BASE + 0x01ffffff;
-
-#ifdef CONFIG_BLK_DEV_INITRD
-	ROOT_DEV = MKDEV(RAMDISK_MAJOR, 0);
-#endif
-
-
-	/*
-	 * Setup GT controller master bit so we can do config cycles
-	 */
-
-	/* Clear cause register bits */
-	GT_WRITE(GT_INTRCAUSE_OFS, ~(GT_INTRCAUSE_MASABORT0_BIT |
-				     GT_INTRCAUSE_TARABORT0_BIT));
-	/* Setup address */
-	GT_WRITE(GT_PCI0_CFGADDR_OFS,
-		 (0 << GT_PCI0_CFGADDR_BUSNUM_SHF) |
-		 (0 << GT_PCI0_CFGADDR_FUNCTNUM_SHF) |
-		 ((PCI_COMMAND / 4) << GT_PCI0_CFGADDR_REGNUM_SHF) |
-		 GT_PCI0_CFGADDR_CONFIGEN_BIT);
-
-	udelay(2);
-	tmp = GT_READ(GT_PCI0_CFGDATA_OFS);
-
-	tmp |= (PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
-		PCI_COMMAND_MASTER | PCI_COMMAND_SERR);
-	GT_WRITE(GT_PCI0_CFGADDR_OFS,
-		 (0 << GT_PCI0_CFGADDR_BUSNUM_SHF) |
-		 (0 << GT_PCI0_CFGADDR_FUNCTNUM_SHF) |
-		 ((PCI_COMMAND / 4) << GT_PCI0_CFGADDR_REGNUM_SHF) |
-		 GT_PCI0_CFGADDR_CONFIGEN_BIT);
-	udelay(2);
-	GT_WRITE(GT_PCI0_CFGDATA_OFS, tmp);
-
-	/* Setup address */
-	GT_WRITE(GT_PCI0_CFGADDR_OFS,
-		 (0 << GT_PCI0_CFGADDR_BUSNUM_SHF) |
-		 (0 << GT_PCI0_CFGADDR_FUNCTNUM_SHF) |
-		 ((PCI_COMMAND / 4) << GT_PCI0_CFGADDR_REGNUM_SHF) |
-		 GT_PCI0_CFGADDR_CONFIGEN_BIT);
-
-	udelay(2);
-	tmp = GT_READ(GT_PCI0_CFGDATA_OFS);
-}
-
-unsigned short get_gt_devid(void)
-{
-	u32 gt_devid;
-
-	/* Figure out if this is a gt96100 or gt96100A */
-	GT_WRITE(GT_PCI0_CFGADDR_OFS,
-		 (0 << GT_PCI0_CFGADDR_BUSNUM_SHF) |
-		 (0 << GT_PCI0_CFGADDR_FUNCTNUM_SHF) |
-		 ((PCI_VENDOR_ID / 4) << GT_PCI0_CFGADDR_REGNUM_SHF) |
-		 GT_PCI0_CFGADDR_CONFIGEN_BIT);
-
-	udelay(4);
-	gt_devid = GT_READ(GT_PCI0_CFGDATA_OFS);
-
-	return gt_devid >> 16;
-}
diff --git a/arch/mips/galileo-boards/ev96100/time.c b/arch/mips/galileo-boards/ev96100/time.c
deleted file mode 100644
index 8cbe8426491a..000000000000
--- a/arch/mips/galileo-boards/ev96100/time.c
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * BRIEF MODULE DESCRIPTION
- *	Galileo EV96100 rtc routines.
- *
- * Copyright 2000 MontaVista Software Inc.
- * Author: MontaVista Software, Inc.
- *         	ppopov@mvista.com or source@mvista.com
- *
- * This file was derived from Carsten Langgaard's
- * arch/mips/mips-boards/atlas/atlas_rtc.c.
- *
- * Carsten Langgaard, carstenl@mips.com
- * Copyright (C) 1999,2000 MIPS Technologies, Inc.  All rights reserved.
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#include <linux/init.h>
-#include <linux/kernel_stat.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/spinlock.h>
-#include <linux/timex.h>
-
-#include <asm/mipsregs.h>
-#include <asm/ptrace.h>
-#include <asm/time.h>
-
-
-#define ALLINTS (IE_IRQ0 | IE_IRQ1 | IE_IRQ2 | IE_IRQ3 | IE_IRQ4 | IE_IRQ5)
-
-extern volatile unsigned long wall_jiffies;
-unsigned long missed_heart_beats = 0;
-
-static unsigned long r4k_offset; /* Amount to increment compare reg each time */
-static unsigned long r4k_cur;    /* What counter should be at next timer irq */
-
-static inline void ack_r4ktimer(unsigned long newval)
-{
-	write_c0_compare(newval);
-}
-
-/*
- * There are a lot of conceptually broken versions of the MIPS timer interrupt
- * handler floating around.  This one is rather different, but the algorithm
- * is probably more robust.
- */
-void mips_timer_interrupt(struct pt_regs *regs)
-{
-        int irq = 7; /* FIX ME */
-
-	if (r4k_offset == 0) {
-            goto null;
-        }
-
-	do {
-		kstat_this_cpu.irqs[irq]++;
-		do_timer(regs);
-#ifndef CONFIG_SMP
-		update_process_times(user_mode(regs));
-#endif
-		r4k_cur += r4k_offset;
-		ack_r4ktimer(r4k_cur);
-
-	} while (((unsigned long)read_c0_count()
-                    - r4k_cur) < 0x7fffffff);
-	return;
-
-null:
-	ack_r4ktimer(0);
-}
diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile
index 368d3d896165..edefa97b2330 100644
--- a/arch/mips/pci/Makefile
+++ b/arch/mips/pci/Makefile
@@ -11,7 +11,6 @@ obj-$(CONFIG_ITE_BOARD_GEN)	+= ops-it8172.o
 obj-$(CONFIG_MIPS_BONITO64)	+= ops-bonito64.o
 obj-$(CONFIG_MIPS_GT64111)	+= ops-gt64111.o
 obj-$(CONFIG_MIPS_GT64120)	+= ops-gt64120.o
-obj-$(CONFIG_MIPS_GT96100)	+= ops-gt96100.o
 obj-$(CONFIG_PCI_MARVELL)	+= ops-marvell.o
 obj-$(CONFIG_MIPS_MSC)		+= ops-msc.o
 obj-$(CONFIG_MIPS_NILE4)	+= ops-nile4.o
@@ -29,7 +28,6 @@ obj-$(CONFIG_LASAT)		+= pci-lasat.o
 obj-$(CONFIG_MIPS_ATLAS)	+= fixup-atlas.o
 obj-$(CONFIG_MIPS_COBALT)	+= fixup-cobalt.o
 obj-$(CONFIG_MIPS_EV64120)	+= fixup-ev64120.o
-obj-$(CONFIG_MIPS_EV96100)	+= fixup-ev96100.o pci-ev96100.o
 obj-$(CONFIG_MIPS_ITE8172)	+= fixup-ite8172g.o
 obj-$(CONFIG_MIPS_IVR)		+= fixup-ivr.o
 obj-$(CONFIG_SOC_AU1500)	+= fixup-au1000.o ops-au1000.o
diff --git a/arch/mips/pci/fixup-ev96100.c b/arch/mips/pci/fixup-ev96100.c
deleted file mode 100644
index e2bc977b6d58..000000000000
--- a/arch/mips/pci/fixup-ev96100.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- *
- * BRIEF MODULE DESCRIPTION
- *	EV96100 Board specific pci fixups.
- *
- * Copyright 2001 MontaVista Software Inc.
- * Author: MontaVista Software, Inc.
- *         	ppopov@mvista.com or source@mvista.com
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/pci.h>
-
-static char irq_tab_ev96100[][5] __initdata = {
- [8] = { 0, 5, 5, 5, 5 },
- [9] = { 0, 2, 2, 2, 2 }
-};
-
-int __init pcibios_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
-{
-	return irq_tab_ev96100[slot][pin];
-}
-
-/* Do platform specific device initialization at pci_enable_device() time */
-int pcibios_plat_dev_init(struct pci_dev *dev)
-{
-	return 0;
-}
diff --git a/arch/mips/pci/ops-gt96100.c b/arch/mips/pci/ops-gt96100.c
deleted file mode 100644
index 9e4ea6627e21..000000000000
--- a/arch/mips/pci/ops-gt96100.c
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- *
- * BRIEF MODULE DESCRIPTION
- *	Galileo EV96100 board specific pci support.
- *
- * Copyright 2000 MontaVista Software Inc.
- * Author: MontaVista Software, Inc.
- *         	ppopov@mvista.com or source@mvista.com
- *
- * This file was derived from Carsten Langgaard's
- * arch/mips/mips-boards/generic/pci.c
- *
- * Carsten Langgaard, carstenl@mips.com
- * Copyright (C) 1999,2000 MIPS Technologies, Inc.  All rights reserved.
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#include <linux/types.h>
-#include <linux/pci.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-
-#include <asm/delay.h>
-#include <asm/gt64120.h>
-#include <asm/galileo-boards/ev96100.h>
-
-#define PCI_ACCESS_READ  0
-#define PCI_ACCESS_WRITE 1
-
-static int static gt96100_config_access(unsigned char access_type,
-	struct pci_bus *bus, unsigned int devfn, int where, u32 * data)
-{
-	unsigned char bus = bus->number;
-	u32 intr;
-
-	/*
-	 * Because of a bug in the galileo (for slot 31).
-	 */
-	if (bus == 0 && devfn >= PCI_DEVFN(31, 0))
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
-	/* Clear cause register bits */
-	GT_WRITE(GT_INTRCAUSE_OFS, ~(GT_INTRCAUSE_MASABORT0_BIT |
-				     GT_INTRCAUSE_TARABORT0_BIT));
-
-	/* Setup address */
-	GT_WRITE(GT_PCI0_CFGADDR_OFS,
-		 (bus << GT_PCI0_CFGADDR_BUSNUM_SHF) |
-		 (devfn << GT_PCI0_CFGADDR_FUNCTNUM_SHF) |
-		 ((where / 4) << GT_PCI0_CFGADDR_REGNUM_SHF) |
-		 GT_PCI0_CFGADDR_CONFIGEN_BIT);
-	udelay(2);
-
-
-	if (access_type == PCI_ACCESS_WRITE) {
-		if (devfn != 0)
-			*data = le32_to_cpu(*data);
-		GT_WRITE(GT_PCI0_CFGDATA_OFS, *data);
-	} else {
-		*data = GT_READ(GT_PCI0_CFGDATA_OFS);
-		if (devfn != 0)
-			*data = le32_to_cpu(*data);
-	}
-
-	udelay(2);
-
-	/* Check for master or target abort */
-	intr = GT_READ(GT_INTRCAUSE_OFS);
-
-	if (intr & (GT_INTRCAUSE_MASABORT0_BIT | GT_INTRCAUSE_TARABORT0_BIT)) {
-		/* Error occured */
-
-		/* Clear bits */
-		GT_WRITE(GT_INTRCAUSE_OFS, ~(GT_INTRCAUSE_MASABORT0_BIT |
-					     GT_INTRCAUSE_TARABORT0_BIT));
-		return -1;
-	}
-	return 0;
-}
-
-/*
- * We can't address 8 and 16 bit words directly.  Instead we have to
- * read/write a 32bit word and mask/modify the data we actually want.
- */
-static int gt96100_pcibios_read(struct pci_bus *bus, unsigned int devfn,
-	int where, int size, u32 * val)
-{
-	u32 data = 0;
-
-	if (gt96100_config_access(PCI_ACCESS_READ, bus, devfn, where, &data))
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
-	switch (size) {
-	case 1:
-		*val = (data >> ((where & 3) << 3)) & 0xff;
-		break;
-
-	case 2:
-		*val = (data >> ((where & 3) << 3)) & 0xffff;
-		break;
-
-	case 4:
-		*val = data;
-		break;
-	}
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static int gt96100_pcibios_write(struct pci_bus *bus, unsigned int devfn,
-	int where, int size, u32 val)
-{
-	u32 data = 0;
-
-	switch (size) {
-	case 1:
-		if (gt96100_config_access(PCI_ACCESS_READ, bus, devfn, where, &data))
-			return -1;
-
-		data = (data & ~(0xff << ((where & 3) << 3))) |
-		       (val << ((where & 3) << 3));
-
-		if (gt96100_config_access(PCI_ACCESS_WRITE, bus, devfn, where, &data))
-			return -1;
-
-		return PCIBIOS_SUCCESSFUL;
-
-	case 2:
-		if (gt96100_config_access(PCI_ACCESS_READ, bus, devfn, where, &data))
-			return -1;
-
-		data = (data & ~(0xffff << ((where & 3) << 3))) |
-		       (val << ((where & 3) << 3));
-
-		if (gt96100_config_access(PCI_ACCESS_WRITE, dev, where, &data))
-			return -1;
-
-
-		return PCIBIOS_SUCCESSFUL;
-
-	case 4:
-		if (gt96100_config_access(PCI_ACCESS_WRITE, dev, where, &val))
-			return -1;
-
-		return PCIBIOS_SUCCESSFUL;
-	}
-}
-
-struct pci_ops gt96100_pci_ops = {
-	.read	= gt96100_pcibios_read,
-	.write	= gt96100_pcibios_write
-};
diff --git a/arch/mips/pci/pci-ev96100.c b/arch/mips/pci/pci-ev96100.c
deleted file mode 100644
index f9457ea00def..000000000000
--- a/arch/mips/pci/pci-ev96100.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright 2000 MontaVista Software Inc.
- * Author: MontaVista Software, Inc.
- *         	ppopov@mvista.com or source@mvista.com
- *
- * Carsten Langgaard, carstenl@mips.com
- * Copyright (C) 1999,2000 MIPS Technologies, Inc.  All rights reserved.
- *
- * Copyright (C) 2004 by Ralf Baechle (ralf@linux-mips.org)
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#include <linux/types.h>
-#include <linux/pci.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-
-static struct resource pci_io_resource = {
-	.name	= "io pci IO space",
-	.start	= 0x10000000,
-	.end	= 0x11ffffff,
-	.flags	= IORESOURCE_IO
-};
-
-static struct resource pci_mem_resource = {
-	.name	= "ext pci memory space",
-	.start	= 0x12000000,
-	.end	= 0x13ffffff,
-	.flags	= IORESOURCE_MEM
-};
-
-extern struct pci_ops gt96100_pci_ops;
-
-struct pci_controller ev96100_controller = {
-	.pci_ops	= &gt96100_pci_ops,
-	.io_resource	= &pci_io_resource,
-	.mem_resource	= &pci_mem_resource,
-};
-
-static void ev96100_pci_init(void)
-{
-	register_pci_controller(&ev96100_controller);
-}
-
-arch_initcall(ev96100_pci_init);
diff --git a/include/asm-mips/bootinfo.h b/include/asm-mips/bootinfo.h
index 3b745e76f429..78c35ec46362 100644
--- a/include/asm-mips/bootinfo.h
+++ b/include/asm-mips/bootinfo.h
@@ -112,8 +112,7 @@
  * Valid machtype for group GALILEO
  */
 #define MACH_GROUP_GALILEO     11	/* Galileo Eval Boards		*/
-#define  MACH_EV96100		0	/* EV96100 */
-#define  MACH_EV64120A		1	/* EV64120A */
+#define  MACH_EV64120A		0	/* EV64120A */
 
 /*
  * Valid machtype for group MOMENCO
diff --git a/include/asm-mips/galileo-boards/gt96100.h b/include/asm-mips/galileo-boards/gt96100.h
deleted file mode 100644
index aabd1b629c19..000000000000
--- a/include/asm-mips/galileo-boards/gt96100.h
+++ /dev/null
@@ -1,427 +0,0 @@
-/*
- * Copyright 2000 MontaVista Software Inc.
- * Author: MontaVista Software, Inc.
- *         	stevel@mvista.com or source@mvista.com
- *
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License (Version 2) as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Register offsets of the MIPS GT96100 Advanced Communication Controller.
- */
-#ifndef _GT96100_H
-#define _GT96100_H
-
-/*
- * Galileo GT96100 internal register base.
- */
-#define MIPS_GT96100_BASE (KSEG1ADDR(0x14000000))
-
-#define GT96100_WRITE(ofs, data) \
-    *(volatile u32 *)(MIPS_GT96100_BASE+ofs) = cpu_to_le32(data)
-#define GT96100_READ(ofs) \
-    le32_to_cpu(*(volatile u32 *)(MIPS_GT96100_BASE+ofs))
-
-#define GT96100_ETH_IO_SIZE 0x4000
-
-/************************************************************************
- *  Register offset addresses follow
- ************************************************************************/
-
-/* CPU Interface Control Registers */
-#define GT96100_CPU_INTERF_CONFIG 0x000000
-
-/* Ethernet Ports */
-#define GT96100_ETH_PHY_ADDR_REG             0x080800
-#define GT96100_ETH_SMI_REG                  0x080810
-/*
-  These are offsets to port 0 registers. Add GT96100_ETH_IO_SIZE to
-  get offsets to port 1 registers.
-*/
-#define GT96100_ETH_PORT_CONFIG          0x084800
-#define GT96100_ETH_PORT_CONFIG_EXT      0x084808
-#define GT96100_ETH_PORT_COMM            0x084810
-#define GT96100_ETH_PORT_STATUS          0x084818
-#define GT96100_ETH_SER_PARAM            0x084820
-#define GT96100_ETH_HASH_TBL_PTR         0x084828
-#define GT96100_ETH_FLOW_CNTRL_SRC_ADDR_L    0x084830
-#define GT96100_ETH_FLOW_CNTRL_SRC_ADDR_H    0x084838
-#define GT96100_ETH_SDMA_CONFIG          0x084840
-#define GT96100_ETH_SDMA_COMM            0x084848
-#define GT96100_ETH_INT_CAUSE            0x084850
-#define GT96100_ETH_INT_MASK             0x084858
-#define GT96100_ETH_1ST_RX_DESC_PTR0         0x084880
-#define GT96100_ETH_1ST_RX_DESC_PTR1         0x084884
-#define GT96100_ETH_1ST_RX_DESC_PTR2         0x084888
-#define GT96100_ETH_1ST_RX_DESC_PTR3         0x08488C
-#define GT96100_ETH_CURR_RX_DESC_PTR0        0x0848A0
-#define GT96100_ETH_CURR_RX_DESC_PTR1        0x0848A4
-#define GT96100_ETH_CURR_RX_DESC_PTR2        0x0848A8
-#define GT96100_ETH_CURR_RX_DESC_PTR3        0x0848AC
-#define GT96100_ETH_CURR_TX_DESC_PTR0        0x0848E0
-#define GT96100_ETH_CURR_TX_DESC_PTR1        0x0848E4
-#define GT96100_ETH_MIB_COUNT_BASE           0x085800
-
-/* SDMAs */
-#define GT96100_SDMA_GROUP_CONFIG           0x101AF0
-/* SDMA Group 0 */
-#define GT96100_SDMA_G0_CHAN0_CONFIG        0x000900
-#define GT96100_SDMA_G0_CHAN0_COMM          0x000908
-#define GT96100_SDMA_G0_CHAN0_RX_DESC_BASE      0x008900
-#define GT96100_SDMA_G0_CHAN0_CURR_RX_DESC_PTR  0x008910
-#define GT96100_SDMA_G0_CHAN0_TX_DESC_BASE      0x00C900
-#define GT96100_SDMA_G0_CHAN0_CURR_TX_DESC_PTR  0x00C910
-#define GT96100_SDMA_G0_CHAN0_1ST_TX_DESC_PTR   0x00C914
-#define GT96100_SDMA_G0_CHAN1_CONFIG        0x010900
-#define GT96100_SDMA_G0_CHAN1_COMM          0x010908
-#define GT96100_SDMA_G0_CHAN1_RX_DESC_BASE      0x018900
-#define GT96100_SDMA_G0_CHAN1_CURR_RX_DESC_PTR  0x018910
-#define GT96100_SDMA_G0_CHAN1_TX_DESC_BASE      0x01C900
-#define GT96100_SDMA_G0_CHAN1_CURR_TX_DESC_PTR  0x01C910
-#define GT96100_SDMA_G0_CHAN1_1ST_TX_DESC_PTR   0x01C914
-#define GT96100_SDMA_G0_CHAN2_CONFIG        0x020900
-#define GT96100_SDMA_G0_CHAN2_COMM          0x020908
-#define GT96100_SDMA_G0_CHAN2_RX_DESC_BASE      0x028900
-#define GT96100_SDMA_G0_CHAN2_CURR_RX_DESC_PTR  0x028910
-#define GT96100_SDMA_G0_CHAN2_TX_DESC_BASE      0x02C900
-#define GT96100_SDMA_G0_CHAN2_CURR_TX_DESC_PTR  0x02C910
-#define GT96100_SDMA_G0_CHAN2_1ST_TX_DESC_PTR   0x02C914
-#define GT96100_SDMA_G0_CHAN3_CONFIG        0x030900
-#define GT96100_SDMA_G0_CHAN3_COMM          0x030908
-#define GT96100_SDMA_G0_CHAN3_RX_DESC_BASE      0x038900
-#define GT96100_SDMA_G0_CHAN3_CURR_RX_DESC_PTR  0x038910
-#define GT96100_SDMA_G0_CHAN3_TX_DESC_BASE      0x03C900
-#define GT96100_SDMA_G0_CHAN3_CURR_TX_DESC_PTR  0x03C910
-#define GT96100_SDMA_G0_CHAN3_1ST_TX_DESC_PTR   0x03C914
-#define GT96100_SDMA_G0_CHAN4_CONFIG        0x040900
-#define GT96100_SDMA_G0_CHAN4_COMM          0x040908
-#define GT96100_SDMA_G0_CHAN4_RX_DESC_BASE      0x048900
-#define GT96100_SDMA_G0_CHAN4_CURR_RX_DESC_PTR  0x048910
-#define GT96100_SDMA_G0_CHAN4_TX_DESC_BASE      0x04C900
-#define GT96100_SDMA_G0_CHAN4_CURR_TX_DESC_PTR  0x04C910
-#define GT96100_SDMA_G0_CHAN4_1ST_TX_DESC_PTR   0x04C914
-#define GT96100_SDMA_G0_CHAN5_CONFIG        0x050900
-#define GT96100_SDMA_G0_CHAN5_COMM          0x050908
-#define GT96100_SDMA_G0_CHAN5_RX_DESC_BASE      0x058900
-#define GT96100_SDMA_G0_CHAN5_CURR_RX_DESC_PTR  0x058910
-#define GT96100_SDMA_G0_CHAN5_TX_DESC_BASE      0x05C900
-#define GT96100_SDMA_G0_CHAN5_CURR_TX_DESC_PTR  0x05C910
-#define GT96100_SDMA_G0_CHAN5_1ST_TX_DESC_PTR   0x05C914
-#define GT96100_SDMA_G0_CHAN6_CONFIG        0x060900
-#define GT96100_SDMA_G0_CHAN6_COMM          0x060908
-#define GT96100_SDMA_G0_CHAN6_RX_DESC_BASE      0x068900
-#define GT96100_SDMA_G0_CHAN6_CURR_RX_DESC_PTR  0x068910
-#define GT96100_SDMA_G0_CHAN6_TX_DESC_BASE      0x06C900
-#define GT96100_SDMA_G0_CHAN6_CURR_TX_DESC_PTR  0x06C910
-#define GT96100_SDMA_G0_CHAN6_1ST_TX_DESC_PTR   0x06C914
-#define GT96100_SDMA_G0_CHAN7_CONFIG        0x070900
-#define GT96100_SDMA_G0_CHAN7_COMM          0x070908
-#define GT96100_SDMA_G0_CHAN7_RX_DESC_BASE      0x078900
-#define GT96100_SDMA_G0_CHAN7_CURR_RX_DESC_PTR  0x078910
-#define GT96100_SDMA_G0_CHAN7_TX_DESC_BASE      0x07C900
-#define GT96100_SDMA_G0_CHAN7_CURR_TX_DESC_PTR  0x07C910
-#define GT96100_SDMA_G0_CHAN7_1ST_TX_DESC_PTR   0x07C914
-/* SDMA Group 1 */
-#define GT96100_SDMA_G1_CHAN0_CONFIG        0x100900
-#define GT96100_SDMA_G1_CHAN0_COMM          0x100908
-#define GT96100_SDMA_G1_CHAN0_RX_DESC_BASE      0x108900
-#define GT96100_SDMA_G1_CHAN0_CURR_RX_DESC_PTR  0x108910
-#define GT96100_SDMA_G1_CHAN0_TX_DESC_BASE      0x10C900
-#define GT96100_SDMA_G1_CHAN0_CURR_TX_DESC_PTR  0x10C910
-#define GT96100_SDMA_G1_CHAN0_1ST_TX_DESC_PTR   0x10C914
-#define GT96100_SDMA_G1_CHAN1_CONFIG        0x110900
-#define GT96100_SDMA_G1_CHAN1_COMM          0x110908
-#define GT96100_SDMA_G1_CHAN1_RX_DESC_BASE      0x118900
-#define GT96100_SDMA_G1_CHAN1_CURR_RX_DESC_PTR  0x118910
-#define GT96100_SDMA_G1_CHAN1_TX_DESC_BASE      0x11C900
-#define GT96100_SDMA_G1_CHAN1_CURR_TX_DESC_PTR  0x11C910
-#define GT96100_SDMA_G1_CHAN1_1ST_TX_DESC_PTR   0x11C914
-#define GT96100_SDMA_G1_CHAN2_CONFIG        0x120900
-#define GT96100_SDMA_G1_CHAN2_COMM          0x120908
-#define GT96100_SDMA_G1_CHAN2_RX_DESC_BASE      0x128900
-#define GT96100_SDMA_G1_CHAN2_CURR_RX_DESC_PTR  0x128910
-#define GT96100_SDMA_G1_CHAN2_TX_DESC_BASE      0x12C900
-#define GT96100_SDMA_G1_CHAN2_CURR_TX_DESC_PTR  0x12C910
-#define GT96100_SDMA_G1_CHAN2_1ST_TX_DESC_PTR   0x12C914
-#define GT96100_SDMA_G1_CHAN3_CONFIG        0x130900
-#define GT96100_SDMA_G1_CHAN3_COMM          0x130908
-#define GT96100_SDMA_G1_CHAN3_RX_DESC_BASE      0x138900
-#define GT96100_SDMA_G1_CHAN3_CURR_RX_DESC_PTR  0x138910
-#define GT96100_SDMA_G1_CHAN3_TX_DESC_BASE      0x13C900
-#define GT96100_SDMA_G1_CHAN3_CURR_TX_DESC_PTR  0x13C910
-#define GT96100_SDMA_G1_CHAN3_1ST_TX_DESC_PTR   0x13C914
-#define GT96100_SDMA_G1_CHAN4_CONFIG        0x140900
-#define GT96100_SDMA_G1_CHAN4_COMM          0x140908
-#define GT96100_SDMA_G1_CHAN4_RX_DESC_BASE      0x148900
-#define GT96100_SDMA_G1_CHAN4_CURR_RX_DESC_PTR  0x148910
-#define GT96100_SDMA_G1_CHAN4_TX_DESC_BASE      0x14C900
-#define GT96100_SDMA_G1_CHAN4_CURR_TX_DESC_PTR  0x14C910
-#define GT96100_SDMA_G1_CHAN4_1ST_TX_DESC_PTR   0x14C914
-#define GT96100_SDMA_G1_CHAN5_CONFIG        0x150900
-#define GT96100_SDMA_G1_CHAN5_COMM          0x150908
-#define GT96100_SDMA_G1_CHAN5_RX_DESC_BASE      0x158900
-#define GT96100_SDMA_G1_CHAN5_CURR_RX_DESC_PTR  0x158910
-#define GT96100_SDMA_G1_CHAN5_TX_DESC_BASE      0x15C900
-#define GT96100_SDMA_G1_CHAN5_CURR_TX_DESC_PTR  0x15C910
-#define GT96100_SDMA_G1_CHAN5_1ST_TX_DESC_PTR   0x15C914
-#define GT96100_SDMA_G1_CHAN6_CONFIG        0x160900
-#define GT96100_SDMA_G1_CHAN6_COMM          0x160908
-#define GT96100_SDMA_G1_CHAN6_RX_DESC_BASE      0x168900
-#define GT96100_SDMA_G1_CHAN6_CURR_RX_DESC_PTR  0x168910
-#define GT96100_SDMA_G1_CHAN6_TX_DESC_BASE      0x16C900
-#define GT96100_SDMA_G1_CHAN6_CURR_TX_DESC_PTR  0x16C910
-#define GT96100_SDMA_G1_CHAN6_1ST_TX_DESC_PTR   0x16C914
-#define GT96100_SDMA_G1_CHAN7_CONFIG        0x170900
-#define GT96100_SDMA_G1_CHAN7_COMM          0x170908
-#define GT96100_SDMA_G1_CHAN7_RX_DESC_BASE      0x178900
-#define GT96100_SDMA_G1_CHAN7_CURR_RX_DESC_PTR  0x178910
-#define GT96100_SDMA_G1_CHAN7_TX_DESC_BASE      0x17C900
-#define GT96100_SDMA_G1_CHAN7_CURR_TX_DESC_PTR  0x17C910
-#define GT96100_SDMA_G1_CHAN7_1ST_TX_DESC_PTR   0x17C914
-/*  MPSCs  */
-#define GT96100_MPSC0_MAIN_CONFIG_LOW   0x000A00
-#define GT96100_MPSC0_MAIN_CONFIG_HIGH  0x000A04
-#define GT96100_MPSC0_PROTOCOL_CONFIG   0x000A08
-#define GT96100_MPSC_CHAN0_REG1         0x000A0C
-#define GT96100_MPSC_CHAN0_REG2         0x000A10
-#define GT96100_MPSC_CHAN0_REG3         0x000A14
-#define GT96100_MPSC_CHAN0_REG4         0x000A18
-#define GT96100_MPSC_CHAN0_REG5         0x000A1C
-#define GT96100_MPSC_CHAN0_REG6         0x000A20
-#define GT96100_MPSC_CHAN0_REG7         0x000A24
-#define GT96100_MPSC_CHAN0_REG8         0x000A28
-#define GT96100_MPSC_CHAN0_REG9         0x000A2C
-#define GT96100_MPSC_CHAN0_REG10        0x000A30
-#define GT96100_MPSC_CHAN0_REG11        0x000A34
-#define GT96100_MPSC1_MAIN_CONFIG_LOW   0x008A00
-#define GT96100_MPSC1_MAIN_CONFIG_HIGH  0x008A04
-#define GT96100_MPSC1_PROTOCOL_CONFIG   0x008A08
-#define GT96100_MPSC_CHAN1_REG1         0x008A0C
-#define GT96100_MPSC_CHAN1_REG2         0x008A10
-#define GT96100_MPSC_CHAN1_REG3         0x008A14
-#define GT96100_MPSC_CHAN1_REG4         0x008A18
-#define GT96100_MPSC_CHAN1_REG5         0x008A1C
-#define GT96100_MPSC_CHAN1_REG6         0x008A20
-#define GT96100_MPSC_CHAN1_REG7         0x008A24
-#define GT96100_MPSC_CHAN1_REG8         0x008A28
-#define GT96100_MPSC_CHAN1_REG9         0x008A2C
-#define GT96100_MPSC_CHAN1_REG10        0x008A30
-#define GT96100_MPSC_CHAN1_REG11        0x008A34
-#define GT96100_MPSC2_MAIN_CONFIG_LOW   0x010A00
-#define GT96100_MPSC2_MAIN_CONFIG_HIGH  0x010A04
-#define GT96100_MPSC2_PROTOCOL_CONFIG   0x010A08
-#define GT96100_MPSC_CHAN2_REG1         0x010A0C
-#define GT96100_MPSC_CHAN2_REG2         0x010A10
-#define GT96100_MPSC_CHAN2_REG3         0x010A14
-#define GT96100_MPSC_CHAN2_REG4         0x010A18
-#define GT96100_MPSC_CHAN2_REG5         0x010A1C
-#define GT96100_MPSC_CHAN2_REG6         0x010A20
-#define GT96100_MPSC_CHAN2_REG7         0x010A24
-#define GT96100_MPSC_CHAN2_REG8         0x010A28
-#define GT96100_MPSC_CHAN2_REG9         0x010A2C
-#define GT96100_MPSC_CHAN2_REG10        0x010A30
-#define GT96100_MPSC_CHAN2_REG11        0x010A34
-#define GT96100_MPSC3_MAIN_CONFIG_LOW   0x018A00
-#define GT96100_MPSC3_MAIN_CONFIG_HIGH  0x018A04
-#define GT96100_MPSC3_PROTOCOL_CONFIG   0x018A08
-#define GT96100_MPSC_CHAN3_REG1         0x018A0C
-#define GT96100_MPSC_CHAN3_REG2         0x018A10
-#define GT96100_MPSC_CHAN3_REG3         0x018A14
-#define GT96100_MPSC_CHAN3_REG4         0x018A18
-#define GT96100_MPSC_CHAN3_REG5         0x018A1C
-#define GT96100_MPSC_CHAN3_REG6         0x018A20
-#define GT96100_MPSC_CHAN3_REG7         0x018A24
-#define GT96100_MPSC_CHAN3_REG8         0x018A28
-#define GT96100_MPSC_CHAN3_REG9         0x018A2C
-#define GT96100_MPSC_CHAN3_REG10        0x018A30
-#define GT96100_MPSC_CHAN3_REG11        0x018A34
-#define GT96100_MPSC4_MAIN_CONFIG_LOW   0x020A00
-#define GT96100_MPSC4_MAIN_CONFIG_HIGH  0x020A04
-#define GT96100_MPSC4_PROTOCOL_CONFIG   0x020A08
-#define GT96100_MPSC_CHAN4_REG1         0x020A0C
-#define GT96100_MPSC_CHAN4_REG2         0x020A10
-#define GT96100_MPSC_CHAN4_REG3         0x020A14
-#define GT96100_MPSC_CHAN4_REG4         0x020A18
-#define GT96100_MPSC_CHAN4_REG5         0x020A1C
-#define GT96100_MPSC_CHAN4_REG6         0x020A20
-#define GT96100_MPSC_CHAN4_REG7         0x020A24
-#define GT96100_MPSC_CHAN4_REG8         0x020A28
-#define GT96100_MPSC_CHAN4_REG9         0x020A2C
-#define GT96100_MPSC_CHAN4_REG10        0x020A30
-#define GT96100_MPSC_CHAN4_REG11        0x020A34
-#define GT96100_MPSC5_MAIN_CONFIG_LOW   0x028A00
-#define GT96100_MPSC5_MAIN_CONFIG_HIGH  0x028A04
-#define GT96100_MPSC5_PROTOCOL_CONFIG   0x028A08
-#define GT96100_MPSC_CHAN5_REG1         0x028A0C
-#define GT96100_MPSC_CHAN5_REG2         0x028A10
-#define GT96100_MPSC_CHAN5_REG3         0x028A14
-#define GT96100_MPSC_CHAN5_REG4         0x028A18
-#define GT96100_MPSC_CHAN5_REG5         0x028A1C
-#define GT96100_MPSC_CHAN5_REG6         0x028A20
-#define GT96100_MPSC_CHAN5_REG7         0x028A24
-#define GT96100_MPSC_CHAN5_REG8         0x028A28
-#define GT96100_MPSC_CHAN5_REG9         0x028A2C
-#define GT96100_MPSC_CHAN5_REG10        0x028A30
-#define GT96100_MPSC_CHAN5_REG11        0x028A34
-#define GT96100_MPSC6_MAIN_CONFIG_LOW   0x030A00
-#define GT96100_MPSC6_MAIN_CONFIG_HIGH  0x030A04
-#define GT96100_MPSC6_PROTOCOL_CONFIG   0x030A08
-#define GT96100_MPSC_CHAN6_REG1         0x030A0C
-#define GT96100_MPSC_CHAN6_REG2         0x030A10
-#define GT96100_MPSC_CHAN6_REG3         0x030A14
-#define GT96100_MPSC_CHAN6_REG4         0x030A18
-#define GT96100_MPSC_CHAN6_REG5         0x030A1C
-#define GT96100_MPSC_CHAN6_REG6         0x030A20
-#define GT96100_MPSC_CHAN6_REG7         0x030A24
-#define GT96100_MPSC_CHAN6_REG8         0x030A28
-#define GT96100_MPSC_CHAN6_REG9         0x030A2C
-#define GT96100_MPSC_CHAN6_REG10        0x030A30
-#define GT96100_MPSC_CHAN6_REG11        0x030A34
-#define GT96100_MPSC7_MAIN_CONFIG_LOW   0x038A00
-#define GT96100_MPSC7_MAIN_CONFIG_HIGH  0x038A04
-#define GT96100_MPSC7_PROTOCOL_CONFIG   0x038A08
-#define GT96100_MPSC_CHAN7_REG1         0x038A0C
-#define GT96100_MPSC_CHAN7_REG2         0x038A10
-#define GT96100_MPSC_CHAN7_REG3         0x038A14
-#define GT96100_MPSC_CHAN7_REG4         0x038A18
-#define GT96100_MPSC_CHAN7_REG5         0x038A1C
-#define GT96100_MPSC_CHAN7_REG6         0x038A20
-#define GT96100_MPSC_CHAN7_REG7         0x038A24
-#define GT96100_MPSC_CHAN7_REG8         0x038A28
-#define GT96100_MPSC_CHAN7_REG9         0x038A2C
-#define GT96100_MPSC_CHAN7_REG10        0x038A30
-#define GT96100_MPSC_CHAN7_REG11        0x038A34
-/*  FlexTDMs  */
-/* TDPR0 - Transmit Dual Port RAM. block size 0xff */
-#define GT96100_FXTDM0_TDPR0_BLK0_BASE  0x000B00
-#define GT96100_FXTDM0_TDPR0_BLK1_BASE  0x001B00
-#define GT96100_FXTDM0_TDPR0_BLK2_BASE  0x002B00
-#define GT96100_FXTDM0_TDPR0_BLK3_BASE  0x003B00
-/* RDPR0 - Receive Dual Port RAM. block size 0xff */
-#define GT96100_FXTDM0_RDPR0_BLK0_BASE  0x004B00
-#define GT96100_FXTDM0_RDPR0_BLK1_BASE  0x005B00
-#define GT96100_FXTDM0_RDPR0_BLK2_BASE  0x006B00
-#define GT96100_FXTDM0_RDPR0_BLK3_BASE  0x007B00
-#define GT96100_FXTDM0_TX_READ_PTR      0x008B00
-#define GT96100_FXTDM0_RX_READ_PTR      0x008B04
-#define GT96100_FXTDM0_CONFIG       0x008B08
-#define GT96100_FXTDM0_AUX_CHANA_TX 0x008B0C
-#define GT96100_FXTDM0_AUX_CHANA_RX 0x008B10
-#define GT96100_FXTDM0_AUX_CHANB_TX 0x008B14
-#define GT96100_FXTDM0_AUX_CHANB_RX 0x008B18
-#define GT96100_FXTDM1_TDPR1_BLK0_BASE  0x010B00
-#define GT96100_FXTDM1_TDPR1_BLK1_BASE  0x011B00
-#define GT96100_FXTDM1_TDPR1_BLK2_BASE  0x012B00
-#define GT96100_FXTDM1_TDPR1_BLK3_BASE  0x013B00
-#define GT96100_FXTDM1_RDPR1_BLK0_BASE  0x014B00
-#define GT96100_FXTDM1_RDPR1_BLK1_BASE  0x015B00
-#define GT96100_FXTDM1_RDPR1_BLK2_BASE  0x016B00
-#define GT96100_FXTDM1_RDPR1_BLK3_BASE  0x017B00
-#define GT96100_FXTDM1_TX_READ_PTR      0x018B00
-#define GT96100_FXTDM1_RX_READ_PTR      0x018B04
-#define GT96100_FXTDM1_CONFIG       0x018B08
-#define GT96100_FXTDM1_AUX_CHANA_TX 0x018B0C
-#define GT96100_FXTDM1_AUX_CHANA_RX 0x018B10
-#define GT96100_FLTDM1_AUX_CHANB_TX 0x018B14
-#define GT96100_FLTDM1_AUX_CHANB_RX 0x018B18
-#define GT96100_FLTDM2_TDPR2_BLK0_BASE  0x020B00
-#define GT96100_FLTDM2_TDPR2_BLK1_BASE  0x021B00
-#define GT96100_FLTDM2_TDPR2_BLK2_BASE  0x022B00
-#define GT96100_FLTDM2_TDPR2_BLK3_BASE  0x023B00
-#define GT96100_FLTDM2_RDPR2_BLK0_BASE  0x024B00
-#define GT96100_FLTDM2_RDPR2_BLK1_BASE  0x025B00
-#define GT96100_FLTDM2_RDPR2_BLK2_BASE  0x026B00
-#define GT96100_FLTDM2_RDPR2_BLK3_BASE  0x027B00
-#define GT96100_FLTDM2_TX_READ_PTR      0x028B00
-#define GT96100_FLTDM2_RX_READ_PTR      0x028B04
-#define GT96100_FLTDM2_CONFIG       0x028B08
-#define GT96100_FLTDM2_AUX_CHANA_TX 0x028B0C
-#define GT96100_FLTDM2_AUX_CHANA_RX 0x028B10
-#define GT96100_FLTDM2_AUX_CHANB_TX 0x028B14
-#define GT96100_FLTDM2_AUX_CHANB_RX 0x028B18
-#define GT96100_FLTDM3_TDPR3_BLK0_BASE  0x030B00
-#define GT96100_FLTDM3_TDPR3_BLK1_BASE  0x031B00
-#define GT96100_FLTDM3_TDPR3_BLK2_BASE  0x032B00
-#define GT96100_FLTDM3_TDPR3_BLK3_BASE  0x033B00
-#define GT96100_FXTDM3_RDPR3_BLK0_BASE  0x034B00
-#define GT96100_FXTDM3_RDPR3_BLK1_BASE  0x035B00
-#define GT96100_FXTDM3_RDPR3_BLK2_BASE  0x036B00
-#define GT96100_FXTDM3_RDPR3_BLK3_BASE  0x037B00
-#define GT96100_FXTDM3_TX_READ_PTR      0x038B00
-#define GT96100_FXTDM3_RX_READ_PTR      0x038B04
-#define GT96100_FXTDM3_CONFIG       0x038B08
-#define GT96100_FXTDM3_AUX_CHANA_TX 0x038B0C
-#define GT96100_FXTDM3_AUX_CHANA_RX 0x038B10
-#define GT96100_FXTDM3_AUX_CHANB_TX 0x038B14
-#define GT96100_FXTDM3_AUX_CHANB_RX 0x038B18
-/*  Baud Rate Generators  */
-#define GT96100_BRG0_CONFIG     0x102A00
-#define GT96100_BRG0_BAUD_TUNE  0x102A04
-#define GT96100_BRG1_CONFIG     0x102A08
-#define GT96100_BRG1_BAUD_TUNE  0x102A0C
-#define GT96100_BRG2_CONFIG     0x102A10
-#define GT96100_BRG2_BAUD_TUNE  0x102A14
-#define GT96100_BRG3_CONFIG     0x102A18
-#define GT96100_BRG3_BAUD_TUNE  0x102A1C
-#define GT96100_BRG4_CONFIG     0x102A20
-#define GT96100_BRG4_BAUD_TUNE  0x102A24
-#define GT96100_BRG5_CONFIG     0x102A28
-#define GT96100_BRG5_BAUD_TUNE  0x102A2C
-#define GT96100_BRG6_CONFIG     0x102A30
-#define GT96100_BRG6_BAUD_TUNE  0x102A34
-#define GT96100_BRG7_CONFIG     0x102A38
-#define GT96100_BRG7_BAUD_TUNE  0x102A3C
-/*  Routing Registers  */
-#define GT96100_ROUTE_MAIN      0x101A00
-#define GT96100_ROUTE_RX_CLOCK  0x101A10
-#define GT96100_ROUTE_TX_CLOCK  0x101A20
-/*  General Purpose Ports  */
-#define GT96100_GPP_CONFIG0     0x100A00
-#define GT96100_GPP_CONFIG1     0x100A04
-#define GT96100_GPP_CONFIG2     0x100A08
-#define GT96100_GPP_CONFIG3     0x100A0C
-#define GT96100_GPP_IO0         0x100A20
-#define GT96100_GPP_IO1         0x100A24
-#define GT96100_GPP_IO2         0x100A28
-#define GT96100_GPP_IO3         0x100A2C
-#define GT96100_GPP_DATA0       0x100A40
-#define GT96100_GPP_DATA1       0x100A44
-#define GT96100_GPP_DATA2       0x100A48
-#define GT96100_GPP_DATA3       0x100A4C
-#define GT96100_GPP_LEVEL0      0x100A60
-#define GT96100_GPP_LEVEL1      0x100A64
-#define GT96100_GPP_LEVEL2      0x100A68
-#define GT96100_GPP_LEVEL3      0x100A6C
-/*  Watchdog  */
-#define GT96100_WD_CONFIG   0x101A80
-#define GT96100_WD_VALUE    0x101A84
-/* Communication Unit Arbiter  */
-#define GT96100_COMM_UNIT_ARBTR_CONFIG 0x101AC0
-/*  PCI Arbiters  */
-#define GT96100_PCI0_ARBTR_CONFIG 0x101AE0
-#define GT96100_PCI1_ARBTR_CONFIG 0x101AE4
-/* CIU Arbiter */
-#define GT96100_CIU_ARBITER_CONFIG 0x101AC0
-/* Interrupt Controller */
-#define GT96100_MAIN_CAUSE     0x000C18
-#define GT96100_INT0_MAIN_MASK 0x000C1C
-#define GT96100_INT1_MAIN_MASK 0x000C24
-#define GT96100_HIGH_CAUSE     0x000C98
-#define GT96100_INT0_HIGH_MASK 0x000C9C
-#define GT96100_INT1_HIGH_MASK 0x000CA4
-#define GT96100_INT0_SELECT    0x000C70
-#define GT96100_INT1_SELECT    0x000C74
-#define GT96100_SERIAL_CAUSE   0x103A00
-#define GT96100_SERINT0_MASK   0x103A80
-#define GT96100_SERINT1_MASK   0x103A88
-
-#endif /*  _GT96100_H */
diff --git a/include/asm-mips/mach-ev96100/mach-gt64120.h b/include/asm-mips/mach-ev96100/mach-gt64120.h
deleted file mode 100644
index 0ef1e6c25acf..000000000000
--- a/include/asm-mips/mach-ev96100/mach-gt64120.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- *  This is a direct copy of the ev96100.h file, with a global
- * search and replace.  The numbers are the same.
- *
- *  The reason I'm duplicating this is so that the 64120/96100
- * defines won't be confusing in the source code.
- */
-#ifndef _ASM_GT64120_EV96100_GT64120_DEP_H
-#define _ASM_GT64120_EV96100_GT64120_DEP_H
-
-/*
- *   GT96100 config space base address
- */
-#define GT64120_BASE	(KSEG1ADDR(0x14000000))
-
-/*
- *   PCI Bus allocation
- *
- *   (Guessing ...)
- */
-#define GT_PCI_MEM_BASE	0x12000000UL
-#define GT_PCI_MEM_SIZE	0x02000000UL
-#define GT_PCI_IO_BASE	0x10000000UL
-#define GT_PCI_IO_SIZE	0x02000000UL
-#define GT_ISA_IO_BASE	PCI_IO_BASE
-
-/*
- *   Duart I/O ports.
- */
-#define EV96100_COM1_BASE_ADDR	(0xBD000000 + 0x20)
-#define EV96100_COM2_BASE_ADDR	(0xBD000000 + 0x00)
-
-
-/*
- *   EV96100 interrupt controller register base.
- */
-#define EV96100_ICTRL_REGS_BASE	(KSEG1ADDR(0x1f000000))
-
-/*
- *   EV96100 UART register base.
- */
-#define EV96100_UART0_REGS_BASE	EV96100_COM1_BASE_ADDR
-#define EV96100_UART1_REGS_BASE	EV96100_COM2_BASE_ADDR
-#define EV96100_BASE_BAUD	( 3686400 / 16 )
-
-#endif /* _ASM_GT64120_EV96100_GT64120_DEP_H */
diff --git a/include/asm-mips/serial.h b/include/asm-mips/serial.h
index 584bd9c0ab2e..035637c67e7c 100644
--- a/include/asm-mips/serial.h
+++ b/include/asm-mips/serial.h
@@ -52,9 +52,9 @@
 #endif
 
 /*
- * Both Galileo boards have the same UART mappings.
+ * Galileo EV64120 evaluation board
  */
-#if defined (CONFIG_MIPS_EV96100) || defined (CONFIG_MIPS_EV64120)
+#ifdef CONFIG_MIPS_EV64120
 #include <asm/galileo-boards/ev96100.h>
 #include <asm/galileo-boards/ev96100int.h>
 #define EV96100_SERIAL_PORT_DEFNS                                  \
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 6a1e09834559..5c1c698a92ac 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1482,9 +1482,6 @@
 #define PCI_DEVICE_ID_MARVELL_GT64260	0x6430
 #define PCI_DEVICE_ID_MARVELL_MV64360	0x6460
 #define PCI_DEVICE_ID_MARVELL_MV64460	0x6480
-#define PCI_DEVICE_ID_MARVELL_GT96100	0x9652
-#define PCI_DEVICE_ID_MARVELL_GT96100A	0x9653
-
 
 #define PCI_VENDOR_ID_V3		0x11b0
 #define PCI_DEVICE_ID_V3_V960		0x0001
-- 
cgit v1.2.3


From ae6ddcc5f24d6b06ae9231dc128904750a4155e0 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 27 Sep 2006 01:49:27 -0700
Subject: [PATCH] ext3 and jbd cleanup: remove whitespace

Remove whitespace from ext3 and jbd, before we clone ext4.

Signed-off-by: Mingming Cao<cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c         |  16 +++---
 fs/ext3/bitmap.c         |   2 +-
 fs/ext3/dir.c            |  14 +++---
 fs/ext3/file.c           |   2 +-
 fs/ext3/fsync.c          |   6 +--
 fs/ext3/hash.c           |   6 +--
 fs/ext3/ialloc.c         |  48 +++++++++---------
 fs/ext3/inode.c          |  64 ++++++++++++------------
 fs/ext3/namei.c          |  28 +++++------
 fs/ext3/super.c          |  24 ++++-----
 fs/jbd/checkpoint.c      |  30 +++++------
 fs/jbd/journal.c         |  56 ++++++++++-----------
 fs/jbd/recovery.c        |  54 ++++++++++----------
 fs/jbd/revoke.c          |  70 +++++++++++++-------------
 fs/jbd/transaction.c     | 128 +++++++++++++++++++++++------------------------
 include/linux/ext3_jbd.h |  10 ++--
 include/linux/jbd.h      |  56 ++++++++++-----------
 17 files changed, 307 insertions(+), 307 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 063d994bda0b..e6b983707008 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -74,7 +74,7 @@ struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
 }
 
 /*
- * Read the bitmap for a given block_group, reading into the specified 
+ * Read the bitmap for a given block_group, reading into the specified
  * slot in the superblock's bitmap cache.
  *
  * Return buffer_head on success or NULL in case of failure.
@@ -419,8 +419,8 @@ do_more:
 		}
 		/* @@@ This prevents newly-allocated data from being
 		 * freed and then reallocated within the same
-		 * transaction. 
-		 * 
+		 * transaction.
+		 *
 		 * Ideally we would want to allow that to happen, but to
 		 * do so requires making journal_forget() capable of
 		 * revoking the queued write of a data block, which
@@ -433,7 +433,7 @@ do_more:
 		 * safe not to set the allocation bit in the committed
 		 * bitmap, because we know that there is no outstanding
 		 * activity on the buffer any more and so it is safe to
-		 * reallocate it.  
+		 * reallocate it.
 		 */
 		BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
 		J_ASSERT_BH(bitmap_bh,
@@ -518,7 +518,7 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode,
  * data would allow the old block to be overwritten before the
  * transaction committed (because we force data to disk before commit).
  * This would lead to corruption if we crashed between overwriting the
- * data and committing the delete. 
+ * data and committing the delete.
  *
  * @@@ We may want to make this allocation behaviour conditional on
  * data-writes at some point, and disable it for metadata allocations or
@@ -584,7 +584,7 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
 
 	if (start > 0) {
 		/*
-		 * The goal was occupied; search forward for a free 
+		 * The goal was occupied; search forward for a free
 		 * block within the next XX blocks.
 		 *
 		 * end_goal is more or less random, but it has to be
@@ -1194,7 +1194,7 @@ int ext3_should_retry_alloc(struct super_block *sb, int *retries)
 /*
  * ext3_new_block uses a goal block to assist allocation.  If the goal is
  * free, or there is a free block within 32 blocks of the goal, that block
- * is allocated.  Otherwise a forward search is made for a free block; within 
+ * is allocated.  Otherwise a forward search is made for a free block; within
  * each block group the search first looks for an entire free byte in the block
  * bitmap, and then for any free bit if that fails.
  * This function also updates quota and i_blocks field.
@@ -1303,7 +1303,7 @@ retry_alloc:
 	smp_rmb();
 
 	/*
-	 * Now search the rest of the groups.  We assume that 
+	 * Now search the rest of the groups.  We assume that
 	 * i and gdp correctly point to the last group visited.
 	 */
 	for (bgi = 0; bgi < ngroups; bgi++) {
diff --git a/fs/ext3/bitmap.c b/fs/ext3/bitmap.c
index ce4f82b9e528..b9176eed98d1 100644
--- a/fs/ext3/bitmap.c
+++ b/fs/ext3/bitmap.c
@@ -20,7 +20,7 @@ unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars)
 	unsigned int i;
 	unsigned long sum = 0;
 
-	if (!map) 
+	if (!map)
 		return (0);
 	for (i = 0; i < numchars; i++)
 		sum += nibblemap[map->b_data[i] & 0xf] +
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index fbb0d4ed07d4..6f9e5a523c87 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -59,7 +59,7 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
 
 	return (ext3_filetype_table[filetype]);
 }
-			       
+
 
 int ext3_check_dir_entry (const char * function, struct inode * dir,
 			  struct ext3_dir_entry_2 * de,
@@ -162,7 +162,7 @@ revalidate:
 		 * to make sure. */
 		if (filp->f_version != inode->i_version) {
 			for (i = 0; i < sb->s_blocksize && i < offset; ) {
-				de = (struct ext3_dir_entry_2 *) 
+				de = (struct ext3_dir_entry_2 *)
 					(bh->b_data + i);
 				/* It's too expensive to do a full
 				 * dirent test each time round this
@@ -181,7 +181,7 @@ revalidate:
 			filp->f_version = inode->i_version;
 		}
 
-		while (!error && filp->f_pos < inode->i_size 
+		while (!error && filp->f_pos < inode->i_size
 		       && offset < sb->s_blocksize) {
 			de = (struct ext3_dir_entry_2 *) (bh->b_data + offset);
 			if (!ext3_check_dir_entry ("ext3_readdir", inode, de,
@@ -229,7 +229,7 @@ out:
 /*
  * These functions convert from the major/minor hash to an f_pos
  * value.
- * 
+ *
  * Currently we only use major hash numer.  This is unfortunate, but
  * on 32-bit machines, the same VFS interface is used for lseek and
  * llseek, so if we use the 64 bit offset, then the 32-bit versions of
@@ -250,7 +250,7 @@ out:
 struct fname {
 	__u32		hash;
 	__u32		minor_hash;
-	struct rb_node	rb_hash; 
+	struct rb_node	rb_hash;
 	struct fname	*next;
 	__u32		inode;
 	__u8		name_len;
@@ -410,7 +410,7 @@ static int call_filldir(struct file * filp, void * dirent,
 	curr_pos = hash2pos(fname->hash, fname->minor_hash);
 	while (fname) {
 		error = filldir(dirent, fname->name,
-				fname->name_len, curr_pos, 
+				fname->name_len, curr_pos,
 				fname->inode,
 				get_dtype(sb, fname->file_type));
 		if (error) {
@@ -465,7 +465,7 @@ static int ext3_dx_readdir(struct file * filp,
 		/*
 		 * Fill the rbtree if we have no more entries,
 		 * or the inode has changed since we last read in the
-		 * cached entries. 
+		 * cached entries.
 		 */
 		if ((!info->curr_node) ||
 		    (filp->f_version != inode->i_version)) {
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 1efefb630ea9..994efd189f4e 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -100,7 +100,7 @@ ext3_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t
 
 force_commit:
 	err = ext3_force_commit(inode->i_sb);
-	if (err) 
+	if (err)
 		return err;
 	return ret;
 }
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index 49382a208e05..dd1fd3c0fc05 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -8,14 +8,14 @@
  *                      Universite Pierre et Marie Curie (Paris VI)
  *  from
  *  linux/fs/minix/truncate.c   Copyright (C) 1991, 1992  Linus Torvalds
- * 
+ *
  *  ext3fs fsync primitive
  *
  *  Big-endian to little-endian byte-swapping/bitmaps by
  *        David S. Miller (davem@caip.rutgers.edu), 1995
- * 
+ *
  *  Removed unnecessary code duplication for little endian machines
- *  and excessive __inline__s. 
+ *  and excessive __inline__s.
  *        Andi Kleen, 1997
  *
  * Major simplications and cleanup - we only need to do the metadata, because
diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c
index 5a2d1235ead0..7fa637cd322a 100644
--- a/fs/ext3/hash.c
+++ b/fs/ext3/hash.c
@@ -4,7 +4,7 @@
  * Copyright (C) 2002 by Theodore Ts'o
  *
  * This file is released under the GPL v2.
- * 
+ *
  * This file may be redistributed under the terms of the GNU Public
  * License.
  */
@@ -80,11 +80,11 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
  * Returns the hash of a filename.  If len is 0 and name is NULL, then
  * this function can be used to test whether or not a hash version is
  * supported.
- * 
+ *
  * The seed is an 4 longword (32 bits) "secret" which can be used to
  * uniquify a hash.  If the seed is all zero's, then some default seed
  * may be used.
- * 
+ *
  * A particular hash version specifies whether or not the seed is
  * represented, and whether or not the returned hash is 32 bits or 64
  * bits.  32 bit hashes will return 0 for the minor hash.
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 36546ed36a14..5e288368499b 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -216,7 +216,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent)
 			continue;
 		if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
 			continue;
-		if (!best_desc || 
+		if (!best_desc ||
 		    (le16_to_cpu(desc->bg_free_blocks_count) >
 		     le16_to_cpu(best_desc->bg_free_blocks_count))) {
 			best_group = group;
@@ -226,30 +226,30 @@ static int find_group_dir(struct super_block *sb, struct inode *parent)
 	return best_group;
 }
 
-/* 
- * Orlov's allocator for directories. 
- * 
+/*
+ * Orlov's allocator for directories.
+ *
  * We always try to spread first-level directories.
  *
- * If there are blockgroups with both free inodes and free blocks counts 
- * not worse than average we return one with smallest directory count. 
- * Otherwise we simply return a random group. 
- * 
- * For the rest rules look so: 
- * 
- * It's OK to put directory into a group unless 
- * it has too many directories already (max_dirs) or 
- * it has too few free inodes left (min_inodes) or 
- * it has too few free blocks left (min_blocks) or 
- * it's already running too large debt (max_debt). 
- * Parent's group is prefered, if it doesn't satisfy these 
- * conditions we search cyclically through the rest. If none 
- * of the groups look good we just look for a group with more 
- * free inodes than average (starting at parent's group). 
- * 
- * Debt is incremented each time we allocate a directory and decremented 
- * when we allocate an inode, within 0--255. 
- */ 
+ * If there are blockgroups with both free inodes and free blocks counts
+ * not worse than average we return one with smallest directory count.
+ * Otherwise we simply return a random group.
+ *
+ * For the rest rules look so:
+ *
+ * It's OK to put directory into a group unless
+ * it has too many directories already (max_dirs) or
+ * it has too few free inodes left (min_inodes) or
+ * it has too few free blocks left (min_blocks) or
+ * it's already running too large debt (max_debt).
+ * Parent's group is prefered, if it doesn't satisfy these
+ * conditions we search cyclically through the rest. If none
+ * of the groups look good we just look for a group with more
+ * free inodes than average (starting at parent's group).
+ *
+ * Debt is incremented each time we allocate a directory and decremented
+ * when we allocate an inode, within 0--255.
+ */
 
 #define INODE_COST 64
 #define BLOCK_COST 256
@@ -454,7 +454,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
 			group = find_group_dir(sb, dir);
 		else
 			group = find_group_orlov(sb, dir);
-	} else 
+	} else
 		group = find_group_other(sb, dir);
 
 	err = -ENOSPC;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 84be02e93652..473d206b1d7e 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -55,7 +55,7 @@ static int ext3_inode_is_fast_symlink(struct inode *inode)
 /*
  * The ext3 forget function must perform a revoke if we are freeing data
  * which has been journaled.  Metadata (eg. indirect blocks) must be
- * revoked in all cases. 
+ * revoked in all cases.
  *
  * "bh" may be NULL: a metadata block may have been freed from memory
  * but there may still be a record of it in the journal, and that record
@@ -105,7 +105,7 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
  * Work out how many blocks we need to proceed with the next chunk of a
  * truncate transaction.
  */
-static unsigned long blocks_for_truncate(struct inode *inode) 
+static unsigned long blocks_for_truncate(struct inode *inode)
 {
 	unsigned long needed;
 
@@ -122,13 +122,13 @@ static unsigned long blocks_for_truncate(struct inode *inode)
 
 	/* But we need to bound the transaction so we don't overflow the
 	 * journal. */
-	if (needed > EXT3_MAX_TRANS_DATA) 
+	if (needed > EXT3_MAX_TRANS_DATA)
 		needed = EXT3_MAX_TRANS_DATA;
 
 	return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
 }
 
-/* 
+/*
  * Truncate transactions can be complex and absolutely huge.  So we need to
  * be able to restart the transaction at a conventient checkpoint to make
  * sure we don't overflow the journal.
@@ -136,9 +136,9 @@ static unsigned long blocks_for_truncate(struct inode *inode)
  * start_transaction gets us a new handle for a truncate transaction,
  * and extend_transaction tries to extend the existing one a bit.  If
  * extend fails, we need to propagate the failure up and restart the
- * transaction in the top-level truncate loop. --sct 
+ * transaction in the top-level truncate loop. --sct
  */
-static handle_t *start_transaction(struct inode *inode) 
+static handle_t *start_transaction(struct inode *inode)
 {
 	handle_t *result;
 
@@ -215,12 +215,12 @@ void ext3_delete_inode (struct inode * inode)
 	ext3_orphan_del(handle, inode);
 	EXT3_I(inode)->i_dtime	= get_seconds();
 
-	/* 
+	/*
 	 * One subtle ordering requirement: if anything has gone wrong
 	 * (transaction abort, IO errors, whatever), then we can still
 	 * do these next steps (the fs will already have been marked as
 	 * having errors), but we can't free the inode if the mark_dirty
-	 * fails.  
+	 * fails.
 	 */
 	if (ext3_mark_inode_dirty(handle, inode))
 		/* If that failed, just do the required in-core inode clear. */
@@ -398,7 +398,7 @@ no_block:
  *	  + if there is a block to the left of our position - allocate near it.
  *	  + if pointer will live in indirect block - allocate near that block.
  *	  + if pointer will live in inode - allocate in the same
- *	    cylinder group. 
+ *	    cylinder group.
  *
  * In the latter case we colour the starting block by the callers PID to
  * prevent it from clashing with concurrent allocations for a different inode
@@ -744,7 +744,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
 		jbd_debug(5, "splicing indirect only\n");
 		BUFFER_TRACE(where->bh, "call ext3_journal_dirty_metadata");
 		err = ext3_journal_dirty_metadata(handle, where->bh);
-		if (err) 
+		if (err)
 			goto err_out;
 	} else {
 		/*
@@ -1137,7 +1137,7 @@ static int walk_page_buffers(	handle_t *handle,
  * So what we do is to rely on the fact that journal_stop/journal_start
  * will _not_ run commit under these circumstances because handle->h_ref
  * is elevated.  We'll still have enough credits for the tiny quotafile
- * write.  
+ * write.
  */
 static int do_journal_get_write_access(handle_t *handle,
 					struct buffer_head *bh)
@@ -1282,7 +1282,7 @@ static int ext3_journalled_commit_write(struct file *file,
 	if (inode->i_size > EXT3_I(inode)->i_disksize) {
 		EXT3_I(inode)->i_disksize = inode->i_size;
 		ret2 = ext3_mark_inode_dirty(handle, inode);
-		if (!ret) 
+		if (!ret)
 			ret = ret2;
 	}
 	ret2 = ext3_journal_stop(handle);
@@ -1291,7 +1291,7 @@ static int ext3_journalled_commit_write(struct file *file,
 	return ret;
 }
 
-/* 
+/*
  * bmap() is special.  It gets used by applications such as lilo and by
  * the swapper to find the on-disk block of a specific piece of data.
  *
@@ -1300,10 +1300,10 @@ static int ext3_journalled_commit_write(struct file *file,
  * filesystem and enables swap, then they may get a nasty shock when the
  * data getting swapped to that swapfile suddenly gets overwritten by
  * the original zero's written out previously to the journal and
- * awaiting writeback in the kernel's buffer cache. 
+ * awaiting writeback in the kernel's buffer cache.
  *
  * So, if we see any bmap calls here on a modified, data-journaled file,
- * take extra steps to flush any blocks which might be in the cache. 
+ * take extra steps to flush any blocks which might be in the cache.
  */
 static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
 {
@@ -1312,16 +1312,16 @@ static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
 	int err;
 
 	if (EXT3_I(inode)->i_state & EXT3_STATE_JDATA) {
-		/* 
+		/*
 		 * This is a REALLY heavyweight approach, but the use of
 		 * bmap on dirty files is expected to be extremely rare:
 		 * only if we run lilo or swapon on a freshly made file
-		 * do we expect this to happen. 
+		 * do we expect this to happen.
 		 *
 		 * (bmap requires CAP_SYS_RAWIO so this does not
 		 * represent an unprivileged user DOS attack --- we'd be
 		 * in trouble if mortal users could trigger this path at
-		 * will.) 
+		 * will.)
 		 *
 		 * NB. EXT3_STATE_JDATA is not set on files other than
 		 * regular files.  If somebody wants to bmap a directory
@@ -1457,7 +1457,7 @@ static int ext3_ordered_writepage(struct page *page,
 	 */
 
 	/*
-	 * And attach them to the current transaction.  But only if 
+	 * And attach them to the current transaction.  But only if
 	 * block_write_full_page() succeeded.  Otherwise they are unmapped,
 	 * and generally junk.
 	 */
@@ -1644,7 +1644,7 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
 		}
 	}
 
-	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 
+	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
 				 offset, nr_segs,
 				 ext3_get_block, NULL);
 
@@ -2025,7 +2025,7 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
 			   __le32 *first, __le32 *last)
 {
 	ext3_fsblk_t block_to_free = 0;    /* Starting block # of a run */
-	unsigned long count = 0;	    /* Number of blocks in the run */ 
+	unsigned long count = 0;	    /* Number of blocks in the run */
 	__le32 *block_to_free_p = NULL;	    /* Pointer into inode/ind
 					       corresponding to
 					       block_to_free */
@@ -2054,7 +2054,7 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
 			} else if (nr == block_to_free + count) {
 				count++;
 			} else {
-				ext3_clear_blocks(handle, inode, this_bh, 
+				ext3_clear_blocks(handle, inode, this_bh,
 						  block_to_free,
 						  count, block_to_free_p, p);
 				block_to_free = nr;
@@ -2184,7 +2184,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
 					*p = 0;
 					BUFFER_TRACE(parent_bh,
 					"call ext3_journal_dirty_metadata");
-					ext3_journal_dirty_metadata(handle, 
+					ext3_journal_dirty_metadata(handle,
 								    parent_bh);
 				}
 			}
@@ -2704,7 +2704,7 @@ void ext3_read_inode(struct inode * inode)
 		if (raw_inode->i_block[0])
 			init_special_inode(inode, inode->i_mode,
 			   old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
-		else 
+		else
 			init_special_inode(inode, inode->i_mode,
 			   new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
 	}
@@ -2724,8 +2724,8 @@ bad_inode:
  *
  * The caller must have write access to iloc->bh.
  */
-static int ext3_do_update_inode(handle_t *handle, 
-				struct inode *inode, 
+static int ext3_do_update_inode(handle_t *handle,
+				struct inode *inode,
 				struct ext3_iloc *iloc)
 {
 	struct ext3_inode *raw_inode = ext3_raw_inode(iloc);
@@ -2900,7 +2900,7 @@ int ext3_write_inode(struct inode *inode, int wait)
  * commit will leave the blocks being flushed in an unused state on
  * disk.  (On recovery, the inode will get truncated and the blocks will
  * be freed, so we have a strong guarantee that no future commit will
- * leave these blocks visible to the user.)  
+ * leave these blocks visible to the user.)
  *
  * Called with inode->sem down.
  */
@@ -3043,13 +3043,13 @@ int ext3_mark_iloc_dirty(handle_t *handle,
 	return err;
 }
 
-/* 
+/*
  * On success, We end up with an outstanding reference count against
- * iloc->bh.  This _must_ be cleaned up later. 
+ * iloc->bh.  This _must_ be cleaned up later.
  */
 
 int
-ext3_reserve_inode_write(handle_t *handle, struct inode *inode, 
+ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
 			 struct ext3_iloc *iloc)
 {
 	int err = 0;
@@ -3139,7 +3139,7 @@ out:
 }
 
 #if 0
-/* 
+/*
  * Bind an inode's backing buffer_head into this transaction, to prevent
  * it from being flushed to disk early.  Unlike
  * ext3_reserve_inode_write, this leaves behind no bh reference and
@@ -3157,7 +3157,7 @@ static int ext3_pin_inode(handle_t *handle, struct inode *inode)
 			BUFFER_TRACE(iloc.bh, "get_write_access");
 			err = journal_get_write_access(handle, iloc.bh);
 			if (!err)
-				err = ext3_journal_dirty_metadata(handle, 
+				err = ext3_journal_dirty_metadata(handle,
 								  iloc.bh);
 			brelse(iloc.bh);
 		}
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 2aa7101b27cd..4123f5261bcd 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -76,7 +76,7 @@ static struct buffer_head *ext3_append(handle_t *handle,
 #ifdef DX_DEBUG
 #define dxtrace(command) command
 #else
-#define dxtrace(command) 
+#define dxtrace(command)
 #endif
 
 struct fake_dirent
@@ -169,7 +169,7 @@ static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
 static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
 static int ext3_htree_next_block(struct inode *dir, __u32 hash,
 				 struct dx_frame *frame,
-				 struct dx_frame *frames, 
+				 struct dx_frame *frames,
 				 __u32 *start_hash);
 static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
 		       struct ext3_dir_entry_2 **res_dir, int *err);
@@ -250,7 +250,7 @@ static void dx_show_index (char * label, struct dx_entry *entries)
 }
 
 struct stats
-{ 
+{
 	unsigned names;
 	unsigned space;
 	unsigned bcount;
@@ -464,7 +464,7 @@ static void dx_release (struct dx_frame *frames)
  */
 static int ext3_htree_next_block(struct inode *dir, __u32 hash,
 				 struct dx_frame *frame,
-				 struct dx_frame *frames, 
+				 struct dx_frame *frames,
 				 __u32 *start_hash)
 {
 	struct dx_frame *p;
@@ -632,7 +632,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 		}
 		count += ret;
 		hashval = ~0;
-		ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, 
+		ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS,
 					    frame, frames, &hashval);
 		*next_hash = hashval;
 		if (ret < 0) {
@@ -649,7 +649,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 			break;
 	}
 	dx_release(frames);
-	dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n", 
+	dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
 		       count, *next_hash));
 	return count;
 errout:
@@ -1050,7 +1050,7 @@ struct dentry *ext3_get_parent(struct dentry *child)
 		parent = ERR_PTR(-ENOMEM);
 	}
 	return parent;
-} 
+}
 
 #define S_SHIFT 12
 static unsigned char ext3_type_by_mode[S_IFMT >> S_SHIFT] = {
@@ -1198,7 +1198,7 @@ errout:
  * add_dirent_to_buf will attempt search the directory block for
  * space.  It will return -ENOSPC if no space is available, and -EIO
  * and -EEXIST if directory entry already exists.
- * 
+ *
  * NOTE!  bh is NOT released in the case where ENOSPC is returned.  In
  * all other cases bh is released.
  */
@@ -1572,7 +1572,7 @@ cleanup:
  * ext3_delete_entry deletes a directory entry by merging it with the
  * previous entry
  */
-static int ext3_delete_entry (handle_t *handle, 
+static int ext3_delete_entry (handle_t *handle,
 			      struct inode * dir,
 			      struct ext3_dir_entry_2 * de_del,
 			      struct buffer_head * bh)
@@ -1643,12 +1643,12 @@ static int ext3_add_nondir(handle_t *handle,
  * is so far negative - it has no inode.
  *
  * If the create succeeds, we fill in the inode information
- * with d_instantiate(). 
+ * with d_instantiate().
  */
 static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
 		struct nameidata *nd)
 {
-	handle_t *handle; 
+	handle_t *handle;
 	struct inode * inode;
 	int err, retries = 0;
 
@@ -1813,7 +1813,7 @@ static int empty_dir (struct inode * inode)
 	de1 = (struct ext3_dir_entry_2 *)
 			((char *) de + le16_to_cpu(de->rec_len));
 	if (le32_to_cpu(de->inode) != inode->i_ino ||
-			!le32_to_cpu(de1->inode) || 
+			!le32_to_cpu(de1->inode) ||
 			strcmp (".", de->name) ||
 			strcmp ("..", de1->name)) {
 	    	ext3_warning (inode->i_sb, "empty_dir",
@@ -1883,7 +1883,7 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
 	 * being truncated, or files being unlinked. */
 
 	/* @@@ FIXME: Observation from aviro:
-	 * I think I can trigger J_ASSERT in ext3_orphan_add().  We block 
+	 * I think I can trigger J_ASSERT in ext3_orphan_add().  We block
 	 * here (on lock_super()), so race with ext3_link() which might bump
 	 * ->i_nlink. For, say it, character device. Not a regular file,
 	 * not a directory, not a symlink and ->i_nlink > 0.
@@ -2393,4 +2393,4 @@ struct inode_operations ext3_special_inode_operations = {
 	.removexattr	= generic_removexattr,
 #endif
 	.permission	= ext3_permission,
-}; 
+};
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 3559086eee5f..4b95bfe4c8f7 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -62,13 +62,13 @@ static void ext3_unlockfs(struct super_block *sb);
 static void ext3_write_super (struct super_block * sb);
 static void ext3_write_super_lockfs(struct super_block *sb);
 
-/* 
+/*
  * Wrappers for journal_start/end.
  *
  * The only special thing we need to do here is to make sure that all
  * journal_end calls result in the superblock being marked dirty, so
  * that sync() will call the filesystem's write_super callback if
- * appropriate. 
+ * appropriate.
  */
 handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
 {
@@ -90,11 +90,11 @@ handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
 	return journal_start(journal, nblocks);
 }
 
-/* 
+/*
  * The only special thing we need to do here is to make sure that all
  * journal_stop calls result in the superblock being marked dirty, so
  * that sync() will call the filesystem's write_super callback if
- * appropriate. 
+ * appropriate.
  */
 int __ext3_journal_stop(const char *where, handle_t *handle)
 {
@@ -369,7 +369,7 @@ static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
 {
 	struct list_head *l;
 
-	printk(KERN_ERR "sb orphan head is %d\n", 
+	printk(KERN_ERR "sb orphan head is %d\n",
 	       le32_to_cpu(sbi->s_es->s_last_orphan));
 
 	printk(KERN_ERR "sb_info orphan list:\n");
@@ -378,7 +378,7 @@ static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
 		printk(KERN_ERR "  "
 		       "inode %s:%ld at %p: mode %o, nlink %d, next %d\n",
 		       inode->i_sb->s_id, inode->i_ino, inode,
-		       inode->i_mode, inode->i_nlink, 
+		       inode->i_mode, inode->i_nlink,
 		       NEXT_ORPHAN(inode));
 	}
 }
@@ -475,7 +475,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 		inode_init_once(&ei->vfs_inode);
 	}
 }
- 
+
 static int init_inodecache(void)
 {
 	ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
@@ -1483,7 +1483,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	    (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
 	     EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
 	     EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
-		printk(KERN_WARNING 
+		printk(KERN_WARNING
 		       "EXT3-fs warning: feature flags set on rev 0 fs, "
 		       "running e2fsck is recommended\n");
 	/*
@@ -1509,7 +1509,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 
 	if (blocksize < EXT3_MIN_BLOCK_SIZE ||
 	    blocksize > EXT3_MAX_BLOCK_SIZE) {
-		printk(KERN_ERR 
+		printk(KERN_ERR
 		       "EXT3-fs: Unsupported filesystem blocksize %d on %s.\n",
 		       blocksize, sb->s_id);
 		goto failed_mount;
@@ -1533,14 +1533,14 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
 		bh = sb_bread(sb, logic_sb_block);
 		if (!bh) {
-			printk(KERN_ERR 
+			printk(KERN_ERR
 			       "EXT3-fs: Can't read superblock on 2nd try.\n");
 			goto failed_mount;
 		}
 		es = (struct ext3_super_block *)(((char *)bh->b_data) + offset);
 		sbi->s_es = es;
 		if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
-			printk (KERN_ERR 
+			printk (KERN_ERR
 				"EXT3-fs: Magic mismatch, very weird !\n");
 			goto failed_mount;
 		}
@@ -1820,7 +1820,7 @@ out_fail:
 /*
  * Setup any per-fs journal parameters now.  We'll do this both on
  * initial mount, once the journal has been initialised but before we've
- * done any recovery; and again on any subsequent remount. 
+ * done any recovery; and again on any subsequent remount.
  */
 static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
 {
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index d0685596e5a6..961ada28db5e 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -1,6 +1,6 @@
 /*
  * linux/fs/checkpoint.c
- * 
+ *
  * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
  *
  * Copyright 1999 Red Hat Software --- All Rights Reserved
@@ -9,8 +9,8 @@
  * the terms of the GNU General Public License, version 2, or at your
  * option, any later version, incorporated herein by reference.
  *
- * Checkpoint routines for the generic filesystem journaling code.  
- * Part of the ext2fs journaling system.  
+ * Checkpoint routines for the generic filesystem journaling code.
+ * Part of the ext2fs journaling system.
  *
  * Checkpointing is the process of ensuring that a section of the log is
  * committed fully to disk, so that that portion of the log can be
@@ -226,7 +226,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
  * Try to flush one buffer from the checkpoint list to disk.
  *
  * Return 1 if something happened which requires us to abort the current
- * scan of the checkpoint list.  
+ * scan of the checkpoint list.
  *
  * Called with j_list_lock held and drops it if 1 is returned
  * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
@@ -270,7 +270,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
 		 * possibly block, while still holding the journal lock.
 		 * We cannot afford to let the transaction logic start
 		 * messing around with this buffer before we write it to
-		 * disk, as that would break recoverability.  
+		 * disk, as that would break recoverability.
 		 */
 		BUFFER_TRACE(bh, "queue");
 		get_bh(bh);
@@ -293,7 +293,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
  * Perform an actual checkpoint. We take the first transaction on the
  * list of transactions to be checkpointed and send all its buffers
  * to disk. We submit larger chunks of data at once.
- * 
+ *
  * The journal should be locked before calling this function.
  */
 int log_do_checkpoint(journal_t *journal)
@@ -304,10 +304,10 @@ int log_do_checkpoint(journal_t *journal)
 
 	jbd_debug(1, "Start checkpoint\n");
 
-	/* 
+	/*
 	 * First thing: if there are any transactions in the log which
 	 * don't need checkpointing, just eliminate them from the
-	 * journal straight away.  
+	 * journal straight away.
 	 */
 	result = cleanup_journal_tail(journal);
 	jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
@@ -385,9 +385,9 @@ out:
  * we have already got rid of any since the last update of the log tail
  * in the journal superblock.  If so, we can instantly roll the
  * superblock forward to remove those transactions from the log.
- * 
+ *
  * Return <0 on error, 0 on success, 1 if there was nothing to clean up.
- * 
+ *
  * Called with the journal lock held.
  *
  * This is the only part of the journaling code which really needs to be
@@ -404,8 +404,8 @@ int cleanup_journal_tail(journal_t *journal)
 	unsigned long	blocknr, freed;
 
 	/* OK, work out the oldest transaction remaining in the log, and
-	 * the log block it starts at. 
-	 * 
+	 * the log block it starts at.
+	 *
 	 * If the log is now empty, we need to work out which is the
 	 * next transaction ID we will write, and where it will
 	 * start. */
@@ -558,7 +558,7 @@ out:
 	return ret;
 }
 
-/* 
+/*
  * journal_remove_checkpoint: called after a buffer has been committed
  * to disk (either by being write-back flushed to disk, or being
  * committed to the log).
@@ -636,7 +636,7 @@ out:
  * Called with the journal locked.
  * Called with j_list_lock held.
  */
-void __journal_insert_checkpoint(struct journal_head *jh, 
+void __journal_insert_checkpoint(struct journal_head *jh,
 			       transaction_t *transaction)
 {
 	JBUFFER_TRACE(jh, "entry");
@@ -658,7 +658,7 @@ void __journal_insert_checkpoint(struct journal_head *jh,
 
 /*
  * We've finished with this transaction structure: adios...
- * 
+ *
  * The transaction must have no links except for the checkpoint by this
  * point.
  *
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index f66724ce443a..87c5a6d00805 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -578,7 +578,7 @@ int journal_next_log_block(journal_t *journal, unsigned long *retp)
  * this is a no-op.  If needed, we can use j_blk_offset - everything is
  * ready.
  */
-int journal_bmap(journal_t *journal, unsigned long blocknr, 
+int journal_bmap(journal_t *journal, unsigned long blocknr,
 		 unsigned long *retp)
 {
 	int err = 0;
@@ -699,10 +699,10 @@ fail:
  *  @len:  Lenght of the journal in blocks.
  *  @blocksize: blocksize of journalling device
  *  @returns: a newly created journal_t *
- *  
+ *
  *  journal_init_dev creates a journal which maps a fixed contiguous
  *  range of blocks on an arbitrary block device.
- * 
+ *
  */
 journal_t * journal_init_dev(struct block_device *bdev,
 			struct block_device *fs_dev,
@@ -739,11 +739,11 @@ journal_t * journal_init_dev(struct block_device *bdev,
 
 	return journal;
 }
- 
-/** 
+
+/**
  *  journal_t * journal_init_inode () - creates a journal which maps to a inode.
  *  @inode: An inode to create the journal in
- *  
+ *
  * journal_init_inode creates a journal which maps an on-disk inode as
  * the journal.  The inode must exist already, must support bmap() and
  * must have all data blocks preallocated.
@@ -763,7 +763,7 @@ journal_t * journal_init_inode (struct inode *inode)
 	journal->j_inode = inode;
 	jbd_debug(1,
 		  "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n",
-		  journal, inode->i_sb->s_id, inode->i_ino, 
+		  journal, inode->i_sb->s_id, inode->i_ino,
 		  (long long) inode->i_size,
 		  inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
 
@@ -798,10 +798,10 @@ journal_t * journal_init_inode (struct inode *inode)
 	return journal;
 }
 
-/* 
+/*
  * If the journal init or create aborts, we need to mark the journal
  * superblock as being NULL to prevent the journal destroy from writing
- * back a bogus superblock. 
+ * back a bogus superblock.
  */
 static void journal_fail_superblock (journal_t *journal)
 {
@@ -844,13 +844,13 @@ static int journal_reset(journal_t *journal)
 	return 0;
 }
 
-/** 
+/**
  * int journal_create() - Initialise the new journal file
  * @journal: Journal to create. This structure must have been initialised
- * 
+ *
  * Given a journal_t structure which tells us which disk blocks we can
  * use, create a new journal superblock and initialise all of the
- * journal fields from scratch.  
+ * journal fields from scratch.
  **/
 int journal_create(journal_t *journal)
 {
@@ -915,7 +915,7 @@ int journal_create(journal_t *journal)
 	return journal_reset(journal);
 }
 
-/** 
+/**
  * void journal_update_superblock() - Update journal sb on disk.
  * @journal: The journal to update.
  * @wait: Set to '0' if you don't want to wait for IO completion.
@@ -939,7 +939,7 @@ void journal_update_superblock(journal_t *journal, int wait)
 				journal->j_transaction_sequence) {
 		jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
 			"(start %ld, seq %d, errno %d)\n",
-			journal->j_tail, journal->j_tail_sequence, 
+			journal->j_tail, journal->j_tail_sequence,
 			journal->j_errno);
 		goto out;
 	}
@@ -1062,7 +1062,7 @@ static int load_superblock(journal_t *journal)
 /**
  * int journal_load() - Read journal from disk.
  * @journal: Journal to act on.
- * 
+ *
  * Given a journal_t structure which tells us which disk blocks contain
  * a journal, read the journal from disk to initialise the in-memory
  * structures.
@@ -1172,9 +1172,9 @@ void journal_destroy(journal_t *journal)
  * @compat: bitmask of compatible features
  * @ro: bitmask of features that force read-only mount
  * @incompat: bitmask of incompatible features
- * 
+ *
  * Check whether the journal uses all of a given set of
- * features.  Return true (non-zero) if it does. 
+ * features.  Return true (non-zero) if it does.
  **/
 
 int journal_check_used_features (journal_t *journal, unsigned long compat,
@@ -1203,7 +1203,7 @@ int journal_check_used_features (journal_t *journal, unsigned long compat,
  * @compat: bitmask of compatible features
  * @ro: bitmask of features that force read-only mount
  * @incompat: bitmask of incompatible features
- * 
+ *
  * Check whether the journaling code supports the use of
  * all of a given set of features on this journal.  Return true
  * (non-zero) if it can. */
@@ -1241,7 +1241,7 @@ int journal_check_available_features (journal_t *journal, unsigned long compat,
  * @incompat: bitmask of incompatible features
  *
  * Mark a given journal feature as present on the
- * superblock.  Returns true if the requested features could be set. 
+ * superblock.  Returns true if the requested features could be set.
  *
  */
 
@@ -1327,7 +1327,7 @@ static int journal_convert_superblock_v1(journal_t *journal,
 /**
  * int journal_flush () - Flush journal
  * @journal: Journal to act on.
- * 
+ *
  * Flush all data for a given journal to disk and empty the journal.
  * Filesystems can use this when remounting readonly to ensure that
  * recovery does not need to happen on remount.
@@ -1394,7 +1394,7 @@ int journal_flush(journal_t *journal)
  * int journal_wipe() - Wipe journal contents
  * @journal: Journal to act on.
  * @write: flag (see below)
- * 
+ *
  * Wipe out all of the contents of a journal, safely.  This will produce
  * a warning if the journal contains any valid recovery information.
  * Must be called between journal_init_*() and journal_load().
@@ -1449,7 +1449,7 @@ static const char *journal_dev_name(journal_t *journal, char *buffer)
 
 /*
  * Journal abort has very specific semantics, which we describe
- * for journal abort. 
+ * for journal abort.
  *
  * Two internal function, which provide abort to te jbd layer
  * itself are here.
@@ -1504,7 +1504,7 @@ static void __journal_abort_soft (journal_t *journal, int errno)
  * Perform a complete, immediate shutdown of the ENTIRE
  * journal (not of a single transaction).  This operation cannot be
  * undone without closing and reopening the journal.
- *           
+ *
  * The journal_abort function is intended to support higher level error
  * recovery mechanisms such as the ext2/ext3 remount-readonly error
  * mode.
@@ -1538,7 +1538,7 @@ static void __journal_abort_soft (journal_t *journal, int errno)
  * supply an errno; a null errno implies that absolutely no further
  * writes are done to the journal (unless there are any already in
  * progress).
- * 
+ *
  */
 
 void journal_abort(journal_t *journal, int errno)
@@ -1546,7 +1546,7 @@ void journal_abort(journal_t *journal, int errno)
 	__journal_abort_soft(journal, errno);
 }
 
-/** 
+/**
  * int journal_errno () - returns the journal's error state.
  * @journal: journal to examine.
  *
@@ -1570,7 +1570,7 @@ int journal_errno(journal_t *journal)
 	return err;
 }
 
-/** 
+/**
  * int journal_clear_err () - clears the journal's error state
  * @journal: journal to act on.
  *
@@ -1590,7 +1590,7 @@ int journal_clear_err(journal_t *journal)
 	return err;
 }
 
-/** 
+/**
  * void journal_ack_err() - Ack journal err.
  * @journal: journal to act on.
  *
@@ -1612,7 +1612,7 @@ int journal_blocks_per_page(struct inode *inode)
 
 /*
  * Simple support for retrying memory allocations.  Introduced to help to
- * debug different VM deadlock avoidance strategies. 
+ * debug different VM deadlock avoidance strategies.
  */
 void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
 {
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index de5bafb4e853..73bb64806ed3 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -1,6 +1,6 @@
 /*
  * linux/fs/recovery.c
- * 
+ *
  * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
  *
  * Copyright 1999-2000 Red Hat Software --- All Rights Reserved
@@ -10,7 +10,7 @@
  * option, any later version, incorporated herein by reference.
  *
  * Journal recovery routines for the generic filesystem journaling code;
- * part of the ext2fs journaling system.  
+ * part of the ext2fs journaling system.
  */
 
 #ifndef __KERNEL__
@@ -25,9 +25,9 @@
 
 /*
  * Maintain information about the progress of the recovery job, so that
- * the different passes can carry information between them. 
+ * the different passes can carry information between them.
  */
-struct recovery_info 
+struct recovery_info
 {
 	tid_t		start_transaction;
 	tid_t		end_transaction;
@@ -116,7 +116,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
 	err = 0;
 
 failed:
-	if (nbufs) 
+	if (nbufs)
 		journal_brelse_array(bufs, nbufs);
 	return err;
 }
@@ -128,7 +128,7 @@ failed:
  * Read a block from the journal
  */
 
-static int jread(struct buffer_head **bhp, journal_t *journal, 
+static int jread(struct buffer_head **bhp, journal_t *journal,
 		 unsigned int offset)
 {
 	int err;
@@ -212,14 +212,14 @@ do {									\
 /**
  * journal_recover - recovers a on-disk journal
  * @journal: the journal to recover
- * 
+ *
  * The primary function for recovering the log contents when mounting a
- * journaled device.  
+ * journaled device.
  *
  * Recovery is done in three passes.  In the first pass, we look for the
  * end of the log.  In the second, we assemble the list of revoke
  * blocks.  In the third and final pass, we replay any un-revoked blocks
- * in the log.  
+ * in the log.
  */
 int journal_recover(journal_t *journal)
 {
@@ -231,10 +231,10 @@ int journal_recover(journal_t *journal)
 	memset(&info, 0, sizeof(info));
 	sb = journal->j_superblock;
 
-	/* 
+	/*
 	 * The journal superblock's s_start field (the current log head)
 	 * is always zero if, and only if, the journal was cleanly
-	 * unmounted.  
+	 * unmounted.
 	 */
 
 	if (!sb->s_start) {
@@ -253,7 +253,7 @@ int journal_recover(journal_t *journal)
 	jbd_debug(0, "JBD: recovery, exit status %d, "
 		  "recovered transactions %u to %u\n",
 		  err, info.start_transaction, info.end_transaction);
-	jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n", 
+	jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n",
 		  info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
 
 	/* Restart the log at the next transaction ID, thus invalidating
@@ -268,15 +268,15 @@ int journal_recover(journal_t *journal)
 /**
  * journal_skip_recovery - Start journal and wipe exiting records
  * @journal: journal to startup
- * 
+ *
  * Locate any valid recovery information from the journal and set up the
  * journal structures in memory to ignore it (presumably because the
- * caller has evidence that it is out of date).  
+ * caller has evidence that it is out of date).
  * This function does'nt appear to be exorted..
  *
  * We perform one pass over the journal to allow us to tell the user how
  * much recovery information is being erased, and to let us initialise
- * the journal transaction sequence numbers to the next unused ID. 
+ * the journal transaction sequence numbers to the next unused ID.
  */
 int journal_skip_recovery(journal_t *journal)
 {
@@ -297,7 +297,7 @@ int journal_skip_recovery(journal_t *journal)
 #ifdef CONFIG_JBD_DEBUG
 		int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
 #endif
-		jbd_debug(0, 
+		jbd_debug(0,
 			  "JBD: ignoring %d transaction%s from the journal.\n",
 			  dropped, (dropped == 1) ? "" : "s");
 		journal->j_transaction_sequence = ++info.end_transaction;
@@ -324,10 +324,10 @@ static int do_one_pass(journal_t *journal,
 	MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
 			       / sizeof(journal_block_tag_t));
 
-	/* 
+	/*
 	 * First thing is to establish what we expect to find in the log
 	 * (in terms of transaction IDs), and where (in terms of log
-	 * block offsets): query the superblock.  
+	 * block offsets): query the superblock.
 	 */
 
 	sb = journal->j_superblock;
@@ -344,7 +344,7 @@ static int do_one_pass(journal_t *journal,
 	 * Now we walk through the log, transaction by transaction,
 	 * making sure that each transaction has a commit block in the
 	 * expected place.  Each complete transaction gets replayed back
-	 * into the main filesystem. 
+	 * into the main filesystem.
 	 */
 
 	while (1) {
@@ -379,8 +379,8 @@ static int do_one_pass(journal_t *journal,
 		next_log_block++;
 		wrap(journal, next_log_block);
 
-		/* What kind of buffer is it? 
-		 * 
+		/* What kind of buffer is it?
+		 *
 		 * If it is a descriptor block, check that it has the
 		 * expected sequence number.  Otherwise, we're all done
 		 * here. */
@@ -394,7 +394,7 @@ static int do_one_pass(journal_t *journal,
 
 		blocktype = be32_to_cpu(tmp->h_blocktype);
 		sequence = be32_to_cpu(tmp->h_sequence);
-		jbd_debug(3, "Found magic %d, sequence %d\n", 
+		jbd_debug(3, "Found magic %d, sequence %d\n",
 			  blocktype, sequence);
 
 		if (sequence != next_commit_ID) {
@@ -438,7 +438,7 @@ static int do_one_pass(journal_t *journal,
 					/* Recover what we can, but
 					 * report failure at the end. */
 					success = err;
-					printk (KERN_ERR 
+					printk (KERN_ERR
 						"JBD: IO error %d recovering "
 						"block %ld in log\n",
 						err, io_block);
@@ -452,7 +452,7 @@ static int do_one_pass(journal_t *journal,
 					 * revoked, then we're all done
 					 * here. */
 					if (journal_test_revoke
-					    (journal, blocknr, 
+					    (journal, blocknr,
 					     next_commit_ID)) {
 						brelse(obh);
 						++info->nr_revoke_hits;
@@ -465,7 +465,7 @@ static int do_one_pass(journal_t *journal,
 							blocknr,
 							journal->j_blocksize);
 					if (nbh == NULL) {
-						printk(KERN_ERR 
+						printk(KERN_ERR
 						       "JBD: Out of memory "
 						       "during recovery.\n");
 						err = -ENOMEM;
@@ -537,7 +537,7 @@ static int do_one_pass(journal_t *journal,
 	}
 
  done:
-	/* 
+	/*
 	 * We broke out of the log scan loop: either we came to the
 	 * known end of the log or we found an unexpected block in the
 	 * log.  If the latter happened, then we know that the "current"
@@ -567,7 +567,7 @@ static int do_one_pass(journal_t *journal,
 
 /* Scan a revoke record, marking all blocks mentioned as revoked. */
 
-static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, 
+static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
 			       tid_t sequence, struct recovery_info *info)
 {
 	journal_revoke_header_t *header;
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index a56144183462..c532429d8d9b 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -1,6 +1,6 @@
 /*
  * linux/fs/revoke.c
- * 
+ *
  * Written by Stephen C. Tweedie <sct@redhat.com>, 2000
  *
  * Copyright 2000 Red Hat corp --- All Rights Reserved
@@ -15,10 +15,10 @@
  * Revoke is the mechanism used to prevent old log records for deleted
  * metadata from being replayed on top of newer data using the same
  * blocks.  The revoke mechanism is used in two separate places:
- * 
+ *
  * + Commit: during commit we write the entire list of the current
  *   transaction's revoked blocks to the journal
- * 
+ *
  * + Recovery: during recovery we record the transaction ID of all
  *   revoked blocks.  If there are multiple revoke records in the log
  *   for a single block, only the last one counts, and if there is a log
@@ -29,7 +29,7 @@
  * single transaction:
  *
  * Block is revoked and then journaled:
- *   The desired end result is the journaling of the new block, so we 
+ *   The desired end result is the journaling of the new block, so we
  *   cancel the revoke before the transaction commits.
  *
  * Block is journaled and then revoked:
@@ -41,7 +41,7 @@
  *   transaction must have happened after the block was journaled and so
  *   the revoke must take precedence.
  *
- * Block is revoked and then written as data: 
+ * Block is revoked and then written as data:
  *   The data write is allowed to succeed, but the revoke is _not_
  *   cancelled.  We still need to prevent old log records from
  *   overwriting the new data.  We don't even need to clear the revoke
@@ -54,7 +54,7 @@
  *			buffer has not been revoked, and cancel_revoke
  *			need do nothing.
  * RevokeValid set, Revoked set:
- *			buffer has been revoked.  
+ *			buffer has been revoked.
  */
 
 #ifndef __KERNEL__
@@ -77,7 +77,7 @@ static kmem_cache_t *revoke_table_cache;
    journal replay, this involves recording the transaction ID of the
    last transaction to revoke this block. */
 
-struct jbd_revoke_record_s 
+struct jbd_revoke_record_s
 {
 	struct list_head  hash;
 	tid_t		  sequence;	/* Used for recovery only */
@@ -90,8 +90,8 @@ struct jbd_revoke_table_s
 {
 	/* It is conceivable that we might want a larger hash table
 	 * for recovery.  Must be a power of two. */
-	int		  hash_size; 
-	int		  hash_shift; 
+	int		  hash_size;
+	int		  hash_shift;
 	struct list_head *hash_table;
 };
 
@@ -301,22 +301,22 @@ void journal_destroy_revoke(journal_t *journal)
 
 #ifdef __KERNEL__
 
-/* 
+/*
  * journal_revoke: revoke a given buffer_head from the journal.  This
  * prevents the block from being replayed during recovery if we take a
  * crash after this current transaction commits.  Any subsequent
  * metadata writes of the buffer in this transaction cancel the
- * revoke.  
+ * revoke.
  *
  * Note that this call may block --- it is up to the caller to make
  * sure that there are no further calls to journal_write_metadata
  * before the revoke is complete.  In ext3, this implies calling the
  * revoke before clearing the block bitmap when we are deleting
- * metadata. 
+ * metadata.
  *
  * Revoke performs a journal_forget on any buffer_head passed in as a
  * parameter, but does _not_ forget the buffer_head if the bh was only
- * found implicitly. 
+ * found implicitly.
  *
  * bh_in may not be a journalled buffer - it may have come off
  * the hash tables without an attached journal_head.
@@ -325,7 +325,7 @@ void journal_destroy_revoke(journal_t *journal)
  * by one.
  */
 
-int journal_revoke(handle_t *handle, unsigned long blocknr, 
+int journal_revoke(handle_t *handle, unsigned long blocknr,
 		   struct buffer_head *bh_in)
 {
 	struct buffer_head *bh = NULL;
@@ -487,7 +487,7 @@ void journal_switch_revoke_table(journal_t *journal)
 	else
 		journal->j_revoke = journal->j_revoke_table[0];
 
-	for (i = 0; i < journal->j_revoke->hash_size; i++) 
+	for (i = 0; i < journal->j_revoke->hash_size; i++)
 		INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]);
 }
 
@@ -498,7 +498,7 @@ void journal_switch_revoke_table(journal_t *journal)
  * Called with the journal lock held.
  */
 
-void journal_write_revoke_records(journal_t *journal, 
+void journal_write_revoke_records(journal_t *journal,
 				  transaction_t *transaction)
 {
 	struct journal_head *descriptor;
@@ -507,7 +507,7 @@ void journal_write_revoke_records(journal_t *journal,
 	struct list_head *hash_list;
 	int i, offset, count;
 
-	descriptor = NULL; 
+	descriptor = NULL;
 	offset = 0;
 	count = 0;
 
@@ -519,10 +519,10 @@ void journal_write_revoke_records(journal_t *journal,
 		hash_list = &revoke->hash_table[i];
 
 		while (!list_empty(hash_list)) {
-			record = (struct jbd_revoke_record_s *) 
+			record = (struct jbd_revoke_record_s *)
 				hash_list->next;
 			write_one_revoke_record(journal, transaction,
-						&descriptor, &offset, 
+						&descriptor, &offset,
 						record);
 			count++;
 			list_del(&record->hash);
@@ -534,14 +534,14 @@ void journal_write_revoke_records(journal_t *journal,
 	jbd_debug(1, "Wrote %d revoke records\n", count);
 }
 
-/* 
+/*
  * Write out one revoke record.  We need to create a new descriptor
- * block if the old one is full or if we have not already created one.  
+ * block if the old one is full or if we have not already created one.
  */
 
-static void write_one_revoke_record(journal_t *journal, 
+static void write_one_revoke_record(journal_t *journal,
 				    transaction_t *transaction,
-				    struct journal_head **descriptorp, 
+				    struct journal_head **descriptorp,
 				    int *offsetp,
 				    struct jbd_revoke_record_s *record)
 {
@@ -584,21 +584,21 @@ static void write_one_revoke_record(journal_t *journal,
 		*descriptorp = descriptor;
 	}
 
-	* ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = 
+	* ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
 		cpu_to_be32(record->blocknr);
 	offset += 4;
 	*offsetp = offset;
 }
 
-/* 
+/*
  * Flush a revoke descriptor out to the journal.  If we are aborting,
  * this is a noop; otherwise we are generating a buffer which needs to
  * be waited for during commit, so it has to go onto the appropriate
  * journal buffer list.
  */
 
-static void flush_descriptor(journal_t *journal, 
-			     struct journal_head *descriptor, 
+static void flush_descriptor(journal_t *journal,
+			     struct journal_head *descriptor,
 			     int offset)
 {
 	journal_revoke_header_t *header;
@@ -618,7 +618,7 @@ static void flush_descriptor(journal_t *journal,
 }
 #endif
 
-/* 
+/*
  * Revoke support for recovery.
  *
  * Recovery needs to be able to:
@@ -629,7 +629,7 @@ static void flush_descriptor(journal_t *journal,
  *  check whether a given block in a given transaction should be replayed
  *  (ie. has not been revoked by a revoke record in that or a subsequent
  *  transaction)
- * 
+ *
  *  empty the revoke table after recovery.
  */
 
@@ -637,11 +637,11 @@ static void flush_descriptor(journal_t *journal,
  * First, setting revoke records.  We create a new revoke record for
  * every block ever revoked in the log as we scan it for recovery, and
  * we update the existing records if we find multiple revokes for a
- * single block. 
+ * single block.
  */
 
-int journal_set_revoke(journal_t *journal, 
-		       unsigned long blocknr, 
+int journal_set_revoke(journal_t *journal,
+		       unsigned long blocknr,
 		       tid_t sequence)
 {
 	struct jbd_revoke_record_s *record;
@@ -653,18 +653,18 @@ int journal_set_revoke(journal_t *journal,
 		if (tid_gt(sequence, record->sequence))
 			record->sequence = sequence;
 		return 0;
-	} 
+	}
 	return insert_revoke_hash(journal, blocknr, sequence);
 }
 
-/* 
+/*
  * Test revoke records.  For a given block referenced in the log, has
  * that block been revoked?  A revoke record with a given transaction
  * sequence number revokes all blocks in that transaction and earlier
  * ones, but later transactions still need replayed.
  */
 
-int journal_test_revoke(journal_t *journal, 
+int journal_test_revoke(journal_t *journal,
 			unsigned long blocknr,
 			tid_t sequence)
 {
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index f5169a96260e..bf7fd7117817 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1,6 +1,6 @@
 /*
  * linux/fs/transaction.c
- * 
+ *
  * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
  *
  * Copyright 1998 Red Hat corp --- All Rights Reserved
@@ -10,7 +10,7 @@
  * option, any later version, incorporated herein by reference.
  *
  * Generic filesystem transaction handling code; part of the ext2fs
- * journaling system.  
+ * journaling system.
  *
  * This file manages transactions (compound commits managed by the
  * journaling code) and handles (individual atomic operations by the
@@ -74,7 +74,7 @@ get_transaction(journal_t *journal, transaction_t *transaction)
  * start_this_handle: Given a handle, deal with any locking or stalling
  * needed to make sure that there is enough journal space for the handle
  * to begin.  Attach the handle to a transaction and set up the
- * transaction's buffer credits.  
+ * transaction's buffer credits.
  */
 
 static int start_this_handle(journal_t *journal, handle_t *handle)
@@ -117,7 +117,7 @@ repeat_locked:
 	if (is_journal_aborted(journal) ||
 	    (journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) {
 		spin_unlock(&journal->j_state_lock);
-		ret = -EROFS; 
+		ret = -EROFS;
 		goto out;
 	}
 
@@ -182,7 +182,7 @@ repeat_locked:
 		goto repeat;
 	}
 
-	/* 
+	/*
 	 * The commit code assumes that it can get enough log space
 	 * without forcing a checkpoint.  This is *critical* for
 	 * correctness: a checkpoint of a buffer which is also
@@ -191,7 +191,7 @@ repeat_locked:
 	 *
 	 * We must therefore ensure the necessary space in the journal
 	 * *before* starting to dirty potentially checkpointed buffers
-	 * in the new transaction. 
+	 * in the new transaction.
 	 *
 	 * The worst part is, any transaction currently committing can
 	 * reduce the free space arbitrarily.  Be careful to account for
@@ -246,13 +246,13 @@ static handle_t *new_handle(int nblocks)
 }
 
 /**
- * handle_t *journal_start() - Obtain a new handle.  
+ * handle_t *journal_start() - Obtain a new handle.
  * @journal: Journal to start transaction on.
  * @nblocks: number of block buffer we might modify
  *
  * We make sure that the transaction can guarantee at least nblocks of
  * modified buffers in the log.  We block until the log can guarantee
- * that much space.  
+ * that much space.
  *
  * This function is visible to journal users (like ext3fs), so is not
  * called with the journal already locked.
@@ -292,11 +292,11 @@ handle_t *journal_start(journal_t *journal, int nblocks)
  * int journal_extend() - extend buffer credits.
  * @handle:  handle to 'extend'
  * @nblocks: nr blocks to try to extend by.
- * 
+ *
  * Some transactions, such as large extends and truncates, can be done
  * atomically all at once or in several stages.  The operation requests
  * a credit for a number of buffer modications in advance, but can
- * extend its credit if it needs more.  
+ * extend its credit if it needs more.
  *
  * journal_extend tries to give the running handle more buffer credits.
  * It does not guarantee that allocation - this is a best-effort only.
@@ -363,7 +363,7 @@ out:
  * int journal_restart() - restart a handle .
  * @handle:  handle to restart
  * @nblocks: nr credits requested
- * 
+ *
  * Restart a handle for a multi-transaction filesystem
  * operation.
  *
@@ -462,7 +462,7 @@ void journal_lock_updates(journal_t *journal)
 /**
  * void journal_unlock_updates (journal_t* journal) - release barrier
  * @journal:  Journal to release the barrier on.
- * 
+ *
  * Release a transaction barrier obtained with journal_lock_updates().
  *
  * Should be called without the journal lock held.
@@ -547,8 +547,8 @@ repeat:
 	jbd_lock_bh_state(bh);
 
 	/* We now hold the buffer lock so it is safe to query the buffer
-	 * state.  Is the buffer dirty? 
-	 * 
+	 * state.  Is the buffer dirty?
+	 *
 	 * If so, there are two possibilities.  The buffer may be
 	 * non-journaled, and undergoing a quite legitimate writeback.
 	 * Otherwise, it is journaled, and we don't expect dirty buffers
@@ -566,7 +566,7 @@ repeat:
 		 */
 		if (jh->b_transaction) {
 			J_ASSERT_JH(jh,
-				jh->b_transaction == transaction || 
+				jh->b_transaction == transaction ||
 				jh->b_transaction ==
 					journal->j_committing_transaction);
 			if (jh->b_next_transaction)
@@ -653,7 +653,7 @@ repeat:
 		 * buffer had better remain locked during the kmalloc,
 		 * but that should be true --- we hold the journal lock
 		 * still and the buffer is already on the BUF_JOURNAL
-		 * list so won't be flushed. 
+		 * list so won't be flushed.
 		 *
 		 * Subtle point, though: if this is a get_undo_access,
 		 * then we will be relying on the frozen_data to contain
@@ -765,8 +765,8 @@ int journal_get_write_access(handle_t *handle, struct buffer_head *bh)
  * manually rather than reading off disk), then we need to keep the
  * buffer_head locked until it has been completely filled with new
  * data.  In this case, we should be able to make the assertion that
- * the bh is not already part of an existing transaction.  
- * 
+ * the bh is not already part of an existing transaction.
+ *
  * The buffer should already be locked by the caller by this point.
  * There is no lock ranking violation: it was a newly created,
  * unlocked buffer beforehand. */
@@ -778,7 +778,7 @@ int journal_get_write_access(handle_t *handle, struct buffer_head *bh)
  *
  * Call this if you create a new bh.
  */
-int journal_get_create_access(handle_t *handle, struct buffer_head *bh) 
+int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
 {
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal = transaction->t_journal;
@@ -847,13 +847,13 @@ out:
  * do not reuse freed space until the deallocation has been committed,
  * since if we overwrote that space we would make the delete
  * un-rewindable in case of a crash.
- * 
+ *
  * To deal with that, journal_get_undo_access requests write access to a
  * buffer for parts of non-rewindable operations such as delete
  * operations on the bitmaps.  The journaling code must keep a copy of
  * the buffer's contents prior to the undo_access call until such time
  * as we know that the buffer has definitely been committed to disk.
- * 
+ *
  * We never need to know which transaction the committed data is part
  * of, buffers touched here are guaranteed to be dirtied later and so
  * will be committed to a new transaction in due course, at which point
@@ -911,13 +911,13 @@ out:
 	return err;
 }
 
-/** 
+/**
  * int journal_dirty_data() -  mark a buffer as containing dirty data which
  *                             needs to be flushed before we can commit the
- *                             current transaction.  
+ *                             current transaction.
  * @handle: transaction
  * @bh: bufferhead to mark
- * 
+ *
  * The buffer is placed on the transaction's data list and is marked as
  * belonging to the transaction.
  *
@@ -946,15 +946,15 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 
 	/*
 	 * What if the buffer is already part of a running transaction?
-	 * 
+	 *
 	 * There are two cases:
 	 * 1) It is part of the current running transaction.  Refile it,
 	 *    just in case we have allocated it as metadata, deallocated
-	 *    it, then reallocated it as data. 
+	 *    it, then reallocated it as data.
 	 * 2) It is part of the previous, still-committing transaction.
 	 *    If all we want to do is to guarantee that the buffer will be
 	 *    written to disk before this new transaction commits, then
-	 *    being sure that the *previous* transaction has this same 
+	 *    being sure that the *previous* transaction has this same
 	 *    property is sufficient for us!  Just leave it on its old
 	 *    transaction.
 	 *
@@ -1076,18 +1076,18 @@ no_journal:
 	return 0;
 }
 
-/** 
+/**
  * int journal_dirty_metadata() -  mark a buffer as containing dirty metadata
  * @handle: transaction to add buffer to.
- * @bh: buffer to mark 
- * 
+ * @bh: buffer to mark
+ *
  * mark dirty metadata which needs to be journaled as part of the current
  * transaction.
  *
  * The buffer is placed on the transaction's metadata list and is marked
- * as belonging to the transaction.  
+ * as belonging to the transaction.
  *
- * Returns error number or 0 on success.  
+ * Returns error number or 0 on success.
  *
  * Special care needs to be taken if the buffer already belongs to the
  * current committing transaction (in which case we should have frozen
@@ -1135,11 +1135,11 @@ int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 
 	set_buffer_jbddirty(bh);
 
-	/* 
+	/*
 	 * Metadata already on the current transaction list doesn't
 	 * need to be filed.  Metadata on another transaction's list must
 	 * be committing, and will be refiled once the commit completes:
-	 * leave it alone for now. 
+	 * leave it alone for now.
 	 */
 	if (jh->b_transaction != transaction) {
 		JBUFFER_TRACE(jh, "already on other transaction");
@@ -1165,7 +1165,7 @@ out:
 	return 0;
 }
 
-/* 
+/*
  * journal_release_buffer: undo a get_write_access without any buffer
  * updates, if the update decided in the end that it didn't need access.
  *
@@ -1176,20 +1176,20 @@ journal_release_buffer(handle_t *handle, struct buffer_head *bh)
 	BUFFER_TRACE(bh, "entry");
 }
 
-/** 
+/**
  * void journal_forget() - bforget() for potentially-journaled buffers.
  * @handle: transaction handle
  * @bh:     bh to 'forget'
  *
  * We can only do the bforget if there are no commits pending against the
  * buffer.  If the buffer is dirty in the current running transaction we
- * can safely unlink it. 
+ * can safely unlink it.
  *
  * bh may not be a journalled buffer at all - it may be a non-JBD
  * buffer which came off the hashtable.  Check for this.
  *
  * Decrements bh->b_count by one.
- * 
+ *
  * Allow this call even if the handle has aborted --- it may be part of
  * the caller's cleanup after an abort.
  */
@@ -1237,7 +1237,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
 
 		drop_reserve = 1;
 
-		/* 
+		/*
 		 * We are no longer going to journal this buffer.
 		 * However, the commit of this transaction is still
 		 * important to the buffer: the delete that we are now
@@ -1246,7 +1246,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
 		 *
 		 * So, if we have a checkpoint on the buffer, we should
 		 * now refile the buffer on our BJ_Forget list so that
-		 * we know to remove the checkpoint after we commit. 
+		 * we know to remove the checkpoint after we commit.
 		 */
 
 		if (jh->b_cp_transaction) {
@@ -1264,7 +1264,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
 			}
 		}
 	} else if (jh->b_transaction) {
-		J_ASSERT_JH(jh, (jh->b_transaction == 
+		J_ASSERT_JH(jh, (jh->b_transaction ==
 				 journal->j_committing_transaction));
 		/* However, if the buffer is still owned by a prior
 		 * (committing) transaction, we can't drop it yet... */
@@ -1294,7 +1294,7 @@ drop:
 /**
  * int journal_stop() - complete a transaction
  * @handle: tranaction to complete.
- * 
+ *
  * All done for a particular handle.
  *
  * There is not much action needed here.  We just return any remaining
@@ -1303,7 +1303,7 @@ drop:
  * filesystem is marked for synchronous update.
  *
  * journal_stop itself will not usually return an error, but it may
- * do so in unusual circumstances.  In particular, expect it to 
+ * do so in unusual circumstances.  In particular, expect it to
  * return -EIO if a journal_abort has been executed since the
  * transaction began.
  */
@@ -1388,7 +1388,7 @@ int journal_stop(handle_t *handle)
 
 		/*
 		 * Special case: JFS_SYNC synchronous updates require us
-		 * to wait for the commit to complete.  
+		 * to wait for the commit to complete.
 		 */
 		if (handle->h_sync && !(current->flags & PF_MEMALLOC))
 			err = log_wait_commit(journal, tid);
@@ -1439,7 +1439,7 @@ int journal_force_commit(journal_t *journal)
  * jbd_lock_bh_state(jh2bh(jh)) is held.
  */
 
-static inline void 
+static inline void
 __blist_add_buffer(struct journal_head **list, struct journal_head *jh)
 {
 	if (!*list) {
@@ -1454,7 +1454,7 @@ __blist_add_buffer(struct journal_head **list, struct journal_head *jh)
 	}
 }
 
-/* 
+/*
  * Remove a buffer from a transaction list, given the transaction's list
  * head pointer.
  *
@@ -1475,7 +1475,7 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
 	jh->b_tnext->b_tprev = jh->b_tprev;
 }
 
-/* 
+/*
  * Remove a buffer from the appropriate transaction list.
  *
  * Note that this function can *change* the value of
@@ -1595,17 +1595,17 @@ out:
 }
 
 
-/** 
+/**
  * int journal_try_to_free_buffers() - try to free page buffers.
  * @journal: journal for operation
  * @page: to try and free
  * @unused_gfp_mask: unused
  *
- * 
+ *
  * For all the buffers on this page,
  * if they are fully written out ordered data, move them onto BUF_CLEAN
  * so try_to_free_buffers() can reap them.
- * 
+ *
  * This function returns non-zero if we wish try_to_free_buffers()
  * to be called. We do this if the page is releasable by try_to_free_buffers().
  * We also do it if the page has locked or dirty buffers and the caller wants
@@ -1629,7 +1629,7 @@ out:
  * cannot happen because we never reallocate freed data as metadata
  * while the data is part of a transaction.  Yes?
  */
-int journal_try_to_free_buffers(journal_t *journal, 
+int journal_try_to_free_buffers(journal_t *journal,
 				struct page *page, gfp_t unused_gfp_mask)
 {
 	struct buffer_head *head;
@@ -1697,7 +1697,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
 }
 
 /*
- * journal_invalidatepage 
+ * journal_invalidatepage
  *
  * This code is tricky.  It has a number of cases to deal with.
  *
@@ -1705,15 +1705,15 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
  *
  * i_size must be updated on disk before we start calling invalidatepage on the
  * data.
- * 
+ *
  *  This is done in ext3 by defining an ext3_setattr method which
  *  updates i_size before truncate gets going.  By maintaining this
  *  invariant, we can be sure that it is safe to throw away any buffers
  *  attached to the current transaction: once the transaction commits,
  *  we know that the data will not be needed.
- * 
+ *
  *  Note however that we can *not* throw away data belonging to the
- *  previous, committing transaction!  
+ *  previous, committing transaction!
  *
  * Any disk blocks which *are* part of the previous, committing
  * transaction (and which therefore cannot be discarded immediately) are
@@ -1732,7 +1732,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
  * don't make guarantees about the order in which data hits disk --- in
  * particular we don't guarantee that new dirty data is flushed before
  * transaction commit --- so it is always safe just to discard data
- * immediately in that mode.  --sct 
+ * immediately in that mode.  --sct
  */
 
 /*
@@ -1876,9 +1876,9 @@ zap_buffer_unlocked:
 	return may_free;
 }
 
-/** 
+/**
  * void journal_invalidatepage()
- * @journal: journal to use for flush... 
+ * @journal: journal to use for flush...
  * @page:    page to flush
  * @offset:  length of page to invalidate.
  *
@@ -1886,7 +1886,7 @@ zap_buffer_unlocked:
  *
  */
 void journal_invalidatepage(journal_t *journal,
-		      struct page *page, 
+		      struct page *page,
 		      unsigned long offset)
 {
 	struct buffer_head *head, *bh, *next;
@@ -1924,8 +1924,8 @@ void journal_invalidatepage(journal_t *journal,
 	}
 }
 
-/* 
- * File a buffer on the given transaction list. 
+/*
+ * File a buffer on the given transaction list.
  */
 void __journal_file_buffer(struct journal_head *jh,
 			transaction_t *transaction, int jlist)
@@ -1948,7 +1948,7 @@ void __journal_file_buffer(struct journal_head *jh,
 	 * with __jbd_unexpected_dirty_buffer()'s handling of dirty
 	 * state. */
 
-	if (jlist == BJ_Metadata || jlist == BJ_Reserved || 
+	if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
 	    jlist == BJ_Shadow || jlist == BJ_Forget) {
 		if (test_clear_buffer_dirty(bh) ||
 		    test_clear_buffer_jbddirty(bh))
@@ -2008,7 +2008,7 @@ void journal_file_buffer(struct journal_head *jh,
 	jbd_unlock_bh_state(jh2bh(jh));
 }
 
-/* 
+/*
  * Remove a buffer from its current buffer list in preparation for
  * dropping it from its current transaction entirely.  If the buffer has
  * already started to be used by a subsequent transaction, refile the
@@ -2060,7 +2060,7 @@ void __journal_refile_buffer(struct journal_head *jh)
  * to the caller to remove the journal_head if necessary.  For the
  * unlocked journal_refile_buffer call, the caller isn't going to be
  * doing anything else to the buffer so we need to do the cleanup
- * ourselves to avoid a jh leak. 
+ * ourselves to avoid a jh leak.
  *
  * *** The journal_head may be freed by this call! ***
  */
diff --git a/include/linux/ext3_jbd.h b/include/linux/ext3_jbd.h
index c8307c02dd07..ce0e6109aff0 100644
--- a/include/linux/ext3_jbd.h
+++ b/include/linux/ext3_jbd.h
@@ -23,7 +23,7 @@
 
 /* Define the number of blocks we need to account to a transaction to
  * modify one block of data.
- * 
+ *
  * We may have to touch one inode, one bitmap buffer, up to three
  * indirection blocks, the group and superblock summaries, and the data
  * block to complete the transaction.  */
@@ -88,16 +88,16 @@
 #endif
 
 int
-ext3_mark_iloc_dirty(handle_t *handle, 
+ext3_mark_iloc_dirty(handle_t *handle,
 		     struct inode *inode,
 		     struct ext3_iloc *iloc);
 
-/* 
+/*
  * On success, We end up with an outstanding reference count against
- * iloc->bh.  This _must_ be cleaned up later. 
+ * iloc->bh.  This _must_ be cleaned up later.
  */
 
-int ext3_reserve_inode_write(handle_t *handle, struct inode *inode, 
+int ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
 			struct ext3_iloc *iloc);
 
 int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode);
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index a04c154c5207..7d847931ee5a 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -1,6 +1,6 @@
 /*
  * linux/include/linux/jbd.h
- * 
+ *
  * Written by Stephen C. Tweedie <sct@redhat.com>
  *
  * Copyright 1998-2000 Red Hat, Inc --- All Rights Reserved
@@ -97,8 +97,8 @@ extern void jbd_slab_free(void *ptr, size_t size);
  * number of outstanding buffers possible at any time.  When the
  * operation completes, any buffer credits not used are credited back to
  * the transaction, so that at all times we know how many buffers the
- * outstanding updates on a transaction might possibly touch. 
- * 
+ * outstanding updates on a transaction might possibly touch.
+ *
  * This is an opaque datatype.
  **/
 typedef struct handle_s		handle_t;	/* Atomic operation type */
@@ -108,7 +108,7 @@ typedef struct handle_s		handle_t;	/* Atomic operation type */
  * typedef journal_t - The journal_t maintains all of the journaling state information for a single filesystem.
  *
  * journal_t is linked to from the fs superblock structure.
- * 
+ *
  * We use the journal_t to keep track of all outstanding transaction
  * activity on the filesystem, and to manage the state of the log
  * writing process.
@@ -128,7 +128,7 @@ typedef struct journal_s	journal_t;	/* Journal control structure */
  * On-disk structures
  */
 
-/* 
+/*
  * Descriptor block types:
  */
 
@@ -149,8 +149,8 @@ typedef struct journal_header_s
 } journal_header_t;
 
 
-/* 
- * The block tag: used to describe a single buffer in the journal 
+/*
+ * The block tag: used to describe a single buffer in the journal
  */
 typedef struct journal_block_tag_s
 {
@@ -158,9 +158,9 @@ typedef struct journal_block_tag_s
 	__be32		t_flags;	/* See below */
 } journal_block_tag_t;
 
-/* 
+/*
  * The revoke descriptor: used on disk to describe a series of blocks to
- * be revoked from the log 
+ * be revoked from the log
  */
 typedef struct journal_revoke_header_s
 {
@@ -374,10 +374,10 @@ struct jbd_revoke_table_s;
  **/
 
 /* Docbook can't yet cope with the bit fields, but will leave the documentation
- * in so it can be fixed later. 
+ * in so it can be fixed later.
  */
 
-struct handle_s 
+struct handle_s
 {
 	/* Which compound transaction is this update a part of? */
 	transaction_t		*h_transaction;
@@ -435,7 +435,7 @@ struct handle_s
  *
  */
 
-struct transaction_s 
+struct transaction_s
 {
 	/* Pointer to the journal for this transaction. [no locking] */
 	journal_t		*t_journal;
@@ -455,7 +455,7 @@ struct transaction_s
 		T_RUNDOWN,
 		T_FLUSH,
 		T_COMMIT,
-		T_FINISHED 
+		T_FINISHED
 	}			t_state;
 
 	/*
@@ -569,7 +569,7 @@ struct transaction_s
  *     journal_t.
  * @j_flags:  General journaling state flags
  * @j_errno:  Is there an outstanding uncleared error on the journal (from a
- *     prior abort)? 
+ *     prior abort)?
  * @j_sb_buffer: First part of superblock buffer
  * @j_superblock: Second part of superblock buffer
  * @j_format_version: Version of the superblock format
@@ -583,7 +583,7 @@ struct transaction_s
  * @j_wait_transaction_locked: Wait queue for waiting for a locked transaction
  *  to start committing, or for a barrier lock to be released
  * @j_wait_logspace: Wait queue for waiting for checkpointing to complete
- * @j_wait_done_commit: Wait queue for waiting for commit to complete 
+ * @j_wait_done_commit: Wait queue for waiting for commit to complete
  * @j_wait_checkpoint:  Wait queue to trigger checkpointing
  * @j_wait_commit: Wait queue to trigger commit
  * @j_wait_updates: Wait queue to wait for updates to complete
@@ -592,7 +592,7 @@ struct transaction_s
  * @j_tail: Journal tail - identifies the oldest still-used block in the
  *  journal.
  * @j_free: Journal free - how many free blocks are there in the journal?
- * @j_first: The block number of the first usable block 
+ * @j_first: The block number of the first usable block
  * @j_last: The block number one beyond the last usable block
  * @j_dev: Device where we store the journal
  * @j_blocksize: blocksize for the location where we store the journal.
@@ -604,12 +604,12 @@ struct transaction_s
  * @j_list_lock: Protects the buffer lists and internal buffer state.
  * @j_inode: Optional inode where we store the journal.  If present, all journal
  *     block numbers are mapped into this inode via bmap().
- * @j_tail_sequence:  Sequence number of the oldest transaction in the log 
+ * @j_tail_sequence:  Sequence number of the oldest transaction in the log
  * @j_transaction_sequence: Sequence number of the next transaction to grant
  * @j_commit_sequence: Sequence number of the most recently committed
  *  transaction
  * @j_commit_request: Sequence number of the most recent transaction wanting
- *     commit 
+ *     commit
  * @j_uuid: Uuid of client object.
  * @j_task: Pointer to the current commit thread for this journal
  * @j_max_transaction_buffers:  Maximum number of metadata buffers to allow in a
@@ -823,8 +823,8 @@ struct journal_s
 	void *j_private;
 };
 
-/* 
- * Journal flag definitions 
+/*
+ * Journal flag definitions
  */
 #define JFS_UNMOUNT	0x001	/* Journal thread is being destroyed */
 #define JFS_ABORT	0x002	/* Journaling has been aborted for errors. */
@@ -833,7 +833,7 @@ struct journal_s
 #define JFS_LOADED	0x010	/* The journal superblock has been loaded */
 #define JFS_BARRIER	0x020	/* Use IDE barriers */
 
-/* 
+/*
  * Function declarations for the journaling transaction and buffer
  * management
  */
@@ -862,7 +862,7 @@ int __journal_remove_checkpoint(struct journal_head *);
 void __journal_insert_checkpoint(struct journal_head *, transaction_t *);
 
 /* Buffer IO */
-extern int 
+extern int
 journal_write_metadata_buffer(transaction_t	  *transaction,
 			      struct journal_head  *jh_in,
 			      struct journal_head **jh_out,
@@ -890,7 +890,7 @@ static inline handle_t *journal_current_handle(void)
 /* The journaling code user interface:
  *
  * Create and destroy handles
- * Register buffer modifications against the current transaction. 
+ * Register buffer modifications against the current transaction.
  */
 
 extern handle_t *journal_start(journal_t *, int nblocks);
@@ -917,11 +917,11 @@ extern journal_t * journal_init_dev(struct block_device *bdev,
 				int start, int len, int bsize);
 extern journal_t * journal_init_inode (struct inode *);
 extern int	   journal_update_format (journal_t *);
-extern int	   journal_check_used_features 
+extern int	   journal_check_used_features
 		   (journal_t *, unsigned long, unsigned long, unsigned long);
-extern int	   journal_check_available_features 
+extern int	   journal_check_available_features
 		   (journal_t *, unsigned long, unsigned long, unsigned long);
-extern int	   journal_set_features 
+extern int	   journal_set_features
 		   (journal_t *, unsigned long, unsigned long, unsigned long);
 extern int	   journal_create     (journal_t *);
 extern int	   journal_load       (journal_t *journal);
@@ -1015,7 +1015,7 @@ do {								           \
  * bit, when set, indicates that we have had a fatal error somewhere,
  * either inside the journaling layer or indicated to us by the client
  * (eg. ext3), and that we and should not commit any further
- * transactions.  
+ * transactions.
  */
 
 static inline int is_journal_aborted(journal_t *journal)
@@ -1082,7 +1082,7 @@ static inline int jbd_space_needed(journal_t *journal)
 #define BJ_Reserved	7	/* Buffer is reserved for access by journal */
 #define BJ_Locked	8	/* Locked for I/O during commit */
 #define BJ_Types	9
- 
+
 extern int jbd_blocks_per_page(struct inode *inode);
 
 #ifdef __KERNEL__
-- 
cgit v1.2.3


From 37ed322290eb6d5cf2ab33915793ed4219eae1d6 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <esandeen@redhat.com>
Date: Wed, 27 Sep 2006 01:49:31 -0700
Subject: [PATCH] JBD: 16T fixes

These are a few places I've found in jbd that look like they may not be
16T-safe, or consistent with the use of unsigned longs for block
containers.  Problems here would be somewhat hard to hit, would require
journal blocks past the 8T boundary, which would not be terribly common.
Still, should fix.

(some of these have come from the ext4 work on jbd as well).

I think there's one more possibility that the wrap() function may not be
safe IF your last block in the journal butts right up against the 232 block
boundary, but that seems like a VERY remote possibility, and I'm not
worrying about it at this point.

Signed-off-by: Eric Sandeen <esandeen@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd/journal.c    | 6 +++---
 include/linux/jbd.h | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 2613fca92740..88d970e09ce6 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -271,7 +271,7 @@ static void journal_kill_thread(journal_t *journal)
 int journal_write_metadata_buffer(transaction_t *transaction,
 				  struct journal_head  *jh_in,
 				  struct journal_head **jh_out,
-				  int blocknr)
+				  unsigned long blocknr)
 {
 	int need_copy_out = 0;
 	int done_copy_out = 0;
@@ -696,7 +696,7 @@ fail:
  *  @bdev: Block device on which to create the journal
  *  @fs_dev: Device which hold journalled filesystem for this journal.
  *  @start: Block nr Start of journal.
- *  @len:  Lenght of the journal in blocks.
+ *  @len:  Length of the journal in blocks.
  *  @blocksize: blocksize of journalling device
  *  @returns: a newly created journal_t *
  *
@@ -820,7 +820,7 @@ static void journal_fail_superblock (journal_t *journal)
 static int journal_reset(journal_t *journal)
 {
 	journal_superblock_t *sb = journal->j_superblock;
-	unsigned int first, last;
+	unsigned long first, last;
 
 	first = be32_to_cpu(sb->s_first);
 	last = be32_to_cpu(sb->s_maxlen);
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 7d847931ee5a..dc262624efa5 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -732,7 +732,7 @@ struct journal_s
 	 */
 	struct block_device	*j_dev;
 	int			j_blocksize;
-	unsigned int		j_blk_offset;
+	unsigned long		j_blk_offset;
 
 	/*
 	 * Device which holds the client fs.  For internal journal this will be
@@ -866,7 +866,7 @@ extern int
 journal_write_metadata_buffer(transaction_t	  *transaction,
 			      struct journal_head  *jh_in,
 			      struct journal_head **jh_out,
-			      int		   blocknr);
+			      unsigned long	   blocknr);
 
 /* Transaction locking */
 extern void		__wait_on_journal (journal_t *);
-- 
cgit v1.2.3


From e9ad5620bfb901df8a7a2603c88689ededeecaf1 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Wed, 27 Sep 2006 01:49:35 -0700
Subject: [PATCH] ext3: More whitespace cleanups

More white space cleanups in preparation of cloning ext4 from ext3.
Removing spaces that precede a tab.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/acl.c             |  2 +-
 fs/ext3/balloc.c          | 32 ++++++++++++++++----------------
 fs/ext3/dir.c             |  2 +-
 fs/ext3/hash.c            |  2 +-
 fs/ext3/inode.c           |  8 ++++----
 fs/ext3/namei.c           | 18 +++++++++---------
 fs/ext3/super.c           |  6 +++---
 fs/jbd/checkpoint.c       |  2 +-
 fs/jbd/journal.c          |  2 +-
 fs/jbd/recovery.c         |  2 +-
 fs/jbd/transaction.c      |  6 +++---
 include/linux/ext3_fs.h   |  2 +-
 include/linux/ext3_fs_i.h |  2 +-
 include/linux/jbd.h       | 10 +++++-----
 14 files changed, 48 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 0d21d558b87a..92bf78221429 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -258,7 +258,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 		default:
 			return -EINVAL;
 	}
- 	if (acl) {
+	if (acl) {
 		value = ext3_acl_to_disk(acl, &size);
 		if (IS_ERR(value))
 			return (int)PTR_ERR(value);
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index f1897feb18f4..c19be8fc3e51 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -40,7 +40,7 @@
 
 /**
  * ext3_get_group_desc() -- load group descriptor from disk
- * @sb: 		super block
+ * @sb:			super block
  * @block_group:	given block group
  * @bh:			pointer to the buffer head to store the block
  *			group descriptor
@@ -355,7 +355,7 @@ void ext3_init_block_alloc_info(struct inode *inode)
 		rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
 		rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
 
-	 	/*
+		/*
 		 * if filesystem is mounted with NORESERVATION, the goal
 		 * reservation window size is set to zero to indicate
 		 * block reservation is off
@@ -681,7 +681,7 @@ bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
 		jbd_lock_bh_state(bh);
 		if (jh->b_committed_data)
 			start = ext3_find_next_zero_bit(jh->b_committed_data,
-						 	maxblocks, next);
+							maxblocks, next);
 		jbd_unlock_bh_state(bh);
 	}
 	return -1;
@@ -790,10 +790,10 @@ claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh)
  * and at last, allocate the blocks by claiming the found free bit as allocated.
  *
  * To set the range of this allocation:
- * 	if there is a reservation window, only try to allocate block(s) from the
+ *	if there is a reservation window, only try to allocate block(s) from the
  *	file's own reservation window;
  *	Otherwise, the allocation range starts from the give goal block, ends at
- * 	the block group's last block.
+ *	the block group's last block.
  *
  * If we failed to allocate the desired block then we may end up crossing to a
  * new bitmap.  In that case we must release write access to the old one via
@@ -880,12 +880,12 @@ fail_access:
 }
 
 /**
- * 	find_next_reservable_window():
+ *	find_next_reservable_window():
  *		find a reservable space within the given range.
  *		It does not allocate the reservation window for now:
  *		alloc_new_reservation() will do the work later.
  *
- * 	@search_head: the head of the searching list;
+ *	@search_head: the head of the searching list;
  *		This is not necessarily the list head of the whole filesystem
  *
  *		We have both head and start_block to assist the search
@@ -893,12 +893,12 @@ fail_access:
  *		but we will shift to the place where start_block is,
  *		then start from there, when looking for a reservable space.
  *
- * 	@size: the target new reservation window size
+ *	@size: the target new reservation window size
  *
- * 	@group_first_block: the first block we consider to start
+ *	@group_first_block: the first block we consider to start
  *			the real search from
  *
- * 	@last_block:
+ *	@last_block:
  *		the maximum block number that our goal reservable space
  *		could start from. This is normally the last block in this
  *		group. The search will end when we found the start of next
@@ -906,10 +906,10 @@ fail_access:
  *		This could handle the cross boundary reservation window
  *		request.
  *
- * 	basically we search from the given range, rather than the whole
- * 	reservation double linked list, (start_block, last_block)
- * 	to find a free region that is of my size and has not
- * 	been reserved.
+ *	basically we search from the given range, rather than the whole
+ *	reservation double linked list, (start_block, last_block)
+ *	to find a free region that is of my size and has not
+ *	been reserved.
  *
  */
 static int find_next_reservable_window(
@@ -962,7 +962,7 @@ static int find_next_reservable_window(
 			/*
 			 * Found a reserveable space big enough.  We could
 			 * have a reservation across the group boundary here
-		 	 */
+			 */
 			break;
 		}
 	}
@@ -998,7 +998,7 @@ static int find_next_reservable_window(
 }
 
 /**
- * 	alloc_new_reservation()--allocate a new reservation window
+ *	alloc_new_reservation()--allocate a new reservation window
  *
  *		To make a new reservation, we search part of the filesystem
  *		reservation list (the list that inside the group). We try to
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 6f9e5a523c87..4d97f437cd48 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -67,7 +67,7 @@ int ext3_check_dir_entry (const char * function, struct inode * dir,
 			  unsigned long offset)
 {
 	const char * error_msg = NULL;
- 	const int rlen = le16_to_cpu(de->rec_len);
+	const int rlen = le16_to_cpu(de->rec_len);
 
 	if (rlen < EXT3_DIR_REC_LEN(1))
 		error_msg = "rec_len is smaller than minimal";
diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c
index 7fa637cd322a..deeb27b5ba83 100644
--- a/fs/ext3/hash.c
+++ b/fs/ext3/hash.c
@@ -95,7 +95,7 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
 	__u32	minor_hash = 0;
 	const char	*p;
 	int		i;
-	__u32 		in[8], buf[4];
+	__u32		in[8], buf[4];
 
 	/* Initialize the default seed for the hash checksum functions */
 	buf[0] = 0x67452301;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 56665fab027d..c9fc15f8b609 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -13,11 +13,11 @@
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *
  *  Goal-directed block allocation by Stephen Tweedie
- * 	(sct@redhat.com), 1993, 1998
+ *	(sct@redhat.com), 1993, 1998
  *  Big-endian to little-endian byte-swapping/bitmaps by
  *        David S. Miller (davem@caip.rutgers.edu), 1995
  *  64-bit file support on 64-bit platforms by Jakub Jelinek
- * 	(jj@sunsite.ms.mff.cuni.cz)
+ *	(jj@sunsite.ms.mff.cuni.cz)
  *
  *  Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000
  */
@@ -470,7 +470,7 @@ static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block,
  *	ext3_blks_to_allocate: Look up the block map and count the number
  *	of direct blocks need to be allocated for the given branch.
  *
- * 	@branch: chain of indirect blocks
+ *	@branch: chain of indirect blocks
  *	@k: number of blocks need for indirect blocks
  *	@blks: number of data blocks to be mapped.
  *	@blocks_to_boundary:  the offset in the indirect block
@@ -1098,7 +1098,7 @@ static int walk_page_buffers(	handle_t *handle,
 
 	for (	bh = head, block_start = 0;
 		ret == 0 && (bh != head || !block_start);
-	    	block_start = block_end, bh = next)
+		block_start = block_end, bh = next)
 	{
 		next = bh->b_this_page;
 		block_end = block_start + blocksize;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 70dc5206ebfa..85d132c37ee0 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -15,13 +15,13 @@
  *  Big-endian to little-endian byte-swapping/bitmaps by
  *        David S. Miller (davem@caip.rutgers.edu), 1995
  *  Directory entry file type support and forward compatibility hooks
- *  	for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
+ *	for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
  *  Hash Tree Directory indexing (c)
- *  	Daniel Phillips, 2001
+ *	Daniel Phillips, 2001
  *  Hash Tree Directory indexing porting
- *  	Christopher Li, 2002
+ *	Christopher Li, 2002
  *  Hash Tree Directory indexing cleanup
- * 	Theodore Ts'o, 2002
+ *	Theodore Ts'o, 2002
  */
 
 #include <linux/fs.h>
@@ -278,7 +278,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
 				       ((char *) de - base));
 			}
 			space += EXT3_DIR_REC_LEN(de->name_len);
-	 		names++;
+			names++;
 		}
 		de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
 	}
@@ -1688,7 +1688,7 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
 
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
-			 		EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
 					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
@@ -1816,7 +1816,7 @@ static int empty_dir (struct inode * inode)
 			!le32_to_cpu(de1->inode) ||
 			strcmp (".", de->name) ||
 			strcmp ("..", de1->name)) {
-	    	ext3_warning (inode->i_sb, "empty_dir",
+		ext3_warning (inode->i_sb, "empty_dir",
 			      "bad directory (dir #%lu) - no `.' or `..'",
 			      inode->i_ino);
 		brelse (bh);
@@ -2129,7 +2129,7 @@ static int ext3_symlink (struct inode * dir,
 
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
-			 		EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
+					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
 					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
@@ -2227,7 +2227,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
 		DQUOT_INIT(new_dentry->d_inode);
 	handle = ext3_journal_start(old_dir, 2 *
 					EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) +
-			 		EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
+					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 4b526b496102..0544325d9d7e 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -734,8 +734,8 @@ static match_table_t tokens = {
 
 static ext3_fsblk_t get_sb_block(void **data)
 {
-	ext3_fsblk_t 	sb_block;
-	char 		*options = (char *) *data;
+	ext3_fsblk_t	sb_block;
+	char		*options = (char *) *data;
 
 	if (!options || strncmp(options, "sb=", 3) != 0)
 		return 1;	/* Default location */
@@ -2740,7 +2740,7 @@ static int __init init_ext3_fs(void)
 out:
 	destroy_inodecache();
 out1:
- 	exit_ext3_xattr();
+	exit_ext3_xattr();
 	return err;
 }
 
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 961ada28db5e..0208cc7ac5d0 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -480,7 +480,7 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
 	if (!jh)
 		return 0;
 
- 	last_jh = jh->b_cpprev;
+	last_jh = jh->b_cpprev;
 	do {
 		jh = next_jh;
 		next_jh = jh->b_cpnext;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 88d970e09ce6..b571209d6a1e 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -181,7 +181,7 @@ loop:
 						transaction->t_expires))
 			should_sleep = 0;
 		if (journal->j_flags & JFS_UNMOUNT)
- 			should_sleep = 0;
+			should_sleep = 0;
 		if (should_sleep) {
 			spin_unlock(&journal->j_state_lock);
 			schedule();
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 73bb64806ed3..445eed6ce5dc 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -314,7 +314,7 @@ static int do_one_pass(journal_t *journal,
 	unsigned long		next_log_block;
 	int			err, success = 0;
 	journal_superblock_t *	sb;
-	journal_header_t * 	tmp;
+	journal_header_t *	tmp;
 	struct buffer_head *	bh;
 	unsigned int		sequence;
 	int			blocktype;
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index bf7fd7117817..e1b3c8af4d17 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -580,7 +580,7 @@ repeat:
 		 */
 		JBUFFER_TRACE(jh, "Unexpected dirty buffer");
 		jbd_unexpected_dirty_buffer(jh);
- 	}
+	}
 
 	unlock_buffer(bh);
 
@@ -1373,7 +1373,7 @@ int journal_stop(handle_t *handle)
 	if (handle->h_sync ||
 			transaction->t_outstanding_credits >
 				journal->j_max_transaction_buffers ||
-	    		time_after_eq(jiffies, transaction->t_expires)) {
+			time_after_eq(jiffies, transaction->t_expires)) {
 		/* Do this even for aborted journals: an abort still
 		 * completes the commit thread, it just doesn't write
 		 * anything to disk. */
@@ -1908,7 +1908,7 @@ void journal_invalidatepage(journal_t *journal,
 		next = bh->b_this_page;
 
 		if (offset <= curr_off) {
-		 	/* This block is wholly outside the truncation point */
+			/* This block is wholly outside the truncation point */
 			lock_buffer(bh);
 			may_free &= journal_unmap_buffer(journal, bh);
 			unlock_buffer(bh);
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 0eed918b3816..369c67502346 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -473,7 +473,7 @@ struct ext3_super_block {
 	__u8	s_reserved_char_pad;
 	__u16	s_reserved_word_pad;
 	__le32	s_default_mount_opts;
-	__le32	s_first_meta_bg; 	/* First metablock block group */
+	__le32	s_first_meta_bg;	/* First metablock block group */
 	__u32	s_reserved[190];	/* Padding to the end of the block */
 };
 
diff --git a/include/linux/ext3_fs_i.h b/include/linux/ext3_fs_i.h
index 2f18b9511f21..4395e5206746 100644
--- a/include/linux/ext3_fs_i.h
+++ b/include/linux/ext3_fs_i.h
@@ -35,7 +35,7 @@ struct ext3_reserve_window {
 };
 
 struct ext3_reserve_window_node {
-	struct rb_node	 	rsv_node;
+	struct rb_node		rsv_node;
 	__u32			rsv_goal_size;
 	__u32			rsv_alloc_hit;
 	struct ext3_reserve_window	rsv_window;
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index dc262624efa5..a6d9daa38c6d 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -64,7 +64,7 @@ extern int journal_enable_debug;
 		if ((n) <= journal_enable_debug) {			\
 			printk (KERN_DEBUG "(%s, %d): %s: ",		\
 				__FILE__, __LINE__, __FUNCTION__);	\
-		  	printk (f, ## a);				\
+			printk (f, ## a);				\
 		}							\
 	} while (0)
 #else
@@ -201,9 +201,9 @@ typedef struct journal_superblock_s
 
 /* 0x0024 */
 	/* Remaining fields are only valid in a version-2 superblock */
-	__be32	s_feature_compat; 	/* compatible feature set */
-	__be32	s_feature_incompat; 	/* incompatible feature set */
-	__be32	s_feature_ro_compat; 	/* readonly-compatible feature set */
+	__be32	s_feature_compat;	/* compatible feature set */
+	__be32	s_feature_incompat;	/* incompatible feature set */
+	__be32	s_feature_ro_compat;	/* readonly-compatible feature set */
 /* 0x0030 */
 	__u8	s_uuid[16];		/* 128-bit uuid for journal */
 
@@ -699,7 +699,7 @@ struct journal_s
 	wait_queue_head_t	j_wait_updates;
 
 	/* Semaphore for locking against concurrent checkpoints */
-	struct mutex	 	j_checkpoint_mutex;
+	struct mutex		j_checkpoint_mutex;
 
 	/*
 	 * Journal head: identifies the first unused block in the journal.
-- 
cgit v1.2.3


From a4e4de36dc446b2193bdc8ebb96a96e44b69dd94 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Wed, 27 Sep 2006 01:49:36 -0700
Subject: [PATCH] ext3: Fix sparse warnings

Fixing up some endian-ness warnings in preparation to clone ext4 from ext3.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/resize.c        | 21 +++++++++++----------
 fs/ext3/super.c         |  4 +---
 fs/jbd/journal.c        |  2 +-
 include/linux/ext3_fs.h |  2 +-
 4 files changed, 14 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 5e1337fd878a..e186f7fb698b 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -336,7 +336,7 @@ static int verify_reserved_gdb(struct super_block *sb,
 	unsigned five = 5;
 	unsigned seven = 7;
 	unsigned grp;
-	__u32 *p = (__u32 *)primary->b_data;
+	__le32 *p = (__le32 *)primary->b_data;
 	int gdbackups = 0;
 
 	while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
@@ -380,7 +380,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	struct buffer_head *dind;
 	int gdbackups;
 	struct ext3_iloc iloc;
-	__u32 *data;
+	__le32 *data;
 	int err;
 
 	if (test_opt(sb, DEBUG))
@@ -417,7 +417,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 		goto exit_bh;
 	}
 
-	data = (__u32 *)dind->b_data;
+	data = (__le32 *)dind->b_data;
 	if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
 		ext3_warning(sb, __FUNCTION__,
 			     "new group %u GDT block "E3FSBLK" not reserved",
@@ -519,7 +519,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 	struct buffer_head *dind;
 	struct ext3_iloc iloc;
 	ext3_fsblk_t blk;
-	__u32 *data, *end;
+	__le32 *data, *end;
 	int gdbackups = 0;
 	int res, i;
 	int err;
@@ -536,8 +536,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 	}
 
 	blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count;
-	data = (__u32 *)dind->b_data + EXT3_SB(sb)->s_gdb_count;
-	end = (__u32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb);
+	data = (__le32 *)dind->b_data + EXT3_SB(sb)->s_gdb_count;
+	end = (__le32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb);
 
 	/* Get each reserved primary GDT block and verify it holds backups */
 	for (res = 0; res < reserved_gdb; res++, blk++) {
@@ -545,7 +545,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 			ext3_warning(sb, __FUNCTION__,
 				     "reserved block "E3FSBLK
 				     " not at offset %ld",
-				     blk, (long)(data - (__u32 *)dind->b_data));
+				     blk,
+				     (long)(data - (__le32 *)dind->b_data));
 			err = -EINVAL;
 			goto exit_bh;
 		}
@@ -560,7 +561,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 			goto exit_bh;
 		}
 		if (++data >= end)
-			data = (__u32 *)dind->b_data;
+			data = (__le32 *)dind->b_data;
 	}
 
 	for (i = 0; i < reserved_gdb; i++) {
@@ -584,7 +585,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 	blk = input->group * EXT3_BLOCKS_PER_GROUP(sb);
 	for (i = 0; i < reserved_gdb; i++) {
 		int err2;
-		data = (__u32 *)primary[i]->b_data;
+		data = (__le32 *)primary[i]->b_data;
 		/* printk("reserving backup %lu[%u] = %lu\n",
 		       primary[i]->b_blocknr, gdbackups,
 		       blk + primary[i]->b_blocknr); */
@@ -689,7 +690,7 @@ exit_err:
 			     "can't update backup for group %d (err %d), "
 			     "forcing fsck on next reboot", group, err);
 		sbi->s_mount_state &= ~EXT3_VALID_FS;
-		sbi->s_es->s_state &= ~cpu_to_le16(EXT3_VALID_FS);
+		sbi->s_es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
 		mark_buffer_dirty(sbi->s_sbh);
 	}
 }
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 0544325d9d7e..10985017765b 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2348,10 +2348,8 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
 			 */
 			ext3_clear_journal_err(sb, es);
 			sbi->s_mount_state = le16_to_cpu(es->s_state);
-			if ((ret = ext3_group_extend(sb, es, n_blocks_count))) {
-				err = ret;
+			if ((err = ext3_group_extend(sb, es, n_blocks_count)))
 				goto restore_opts;
-			}
 			if (!ext3_setup_super (sb, es, 0))
 				sb->s_flags &= ~MS_RDONLY;
 		}
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index b571209d6a1e..2fc66c3e6681 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1094,7 +1094,7 @@ int journal_load(journal_t *journal)
 	/*
 	 * Create a slab for this blocksize
 	 */
-	err = journal_create_jbd_slab(cpu_to_be32(sb->s_blocksize));
+	err = journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
 	if (err)
 		return err;
 
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 369c67502346..cc08f56750da 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -460,7 +460,7 @@ struct ext3_super_block {
 	 */
 	__u8	s_prealloc_blocks;	/* Nr of blocks to try to preallocate*/
 	__u8	s_prealloc_dir_blocks;	/* Nr to preallocate for dirs */
-	__u16	s_reserved_gdt_blocks;	/* Per group desc for online growth */
+	__le16	s_reserved_gdt_blocks;	/* Per group desc for online growth */
 	/*
 	 * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
 	 */
-- 
cgit v1.2.3


From 133d205a18b7a4d8cb52959c5310f6664277cf61 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 27 Sep 2006 01:49:41 -0700
Subject: [PATCH] Make kmem_cache_destroy() return void

un-, de-, -free, -destroy, -exit, etc functions should in general return
void.  Also,

There is very little, say, filesystem driver code can do upon failed
kmem_cache_destroy().  If it will be decided to BUG in this case, BUG
should be put in generic code, instead.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/slab.h | 4 ++--
 mm/slab.c            | 6 ++----
 mm/slob.c            | 3 +--
 3 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 66d6eb78d1c6..a96fd9310d55 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -60,7 +60,7 @@ extern void __init kmem_cache_init(void);
 extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned long,
 				       void (*)(void *, kmem_cache_t *, unsigned long),
 				       void (*)(void *, kmem_cache_t *, unsigned long));
-extern int kmem_cache_destroy(kmem_cache_t *);
+extern void kmem_cache_destroy(kmem_cache_t *);
 extern int kmem_cache_shrink(kmem_cache_t *);
 extern void *kmem_cache_alloc(kmem_cache_t *, gfp_t);
 extern void *kmem_cache_zalloc(struct kmem_cache *, gfp_t);
@@ -249,7 +249,7 @@ struct kmem_cache *kmem_cache_create(const char *c, size_t, size_t,
 	unsigned long,
 	void (*)(void *, struct kmem_cache *, unsigned long),
 	void (*)(void *, struct kmem_cache *, unsigned long));
-int kmem_cache_destroy(struct kmem_cache *c);
+void kmem_cache_destroy(struct kmem_cache *c);
 void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags);
 void *kmem_cache_zalloc(struct kmem_cache *, gfp_t);
 void kmem_cache_free(struct kmem_cache *c, void *b);
diff --git a/mm/slab.c b/mm/slab.c
index 7a48eb1a60c8..c52ebf9c4462 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2442,7 +2442,6 @@ EXPORT_SYMBOL(kmem_cache_shrink);
  * @cachep: the cache to destroy
  *
  * Remove a struct kmem_cache object from the slab cache.
- * Returns 0 on success.
  *
  * It is expected this function will be called by a module when it is
  * unloaded.  This will remove the cache completely, and avoid a duplicate
@@ -2454,7 +2453,7 @@ EXPORT_SYMBOL(kmem_cache_shrink);
  * The caller must guarantee that noone will allocate memory from the cache
  * during the kmem_cache_destroy().
  */
-int kmem_cache_destroy(struct kmem_cache *cachep)
+void kmem_cache_destroy(struct kmem_cache *cachep)
 {
 	BUG_ON(!cachep || in_interrupt());
 
@@ -2475,7 +2474,7 @@ int kmem_cache_destroy(struct kmem_cache *cachep)
 		list_add(&cachep->next, &cache_chain);
 		mutex_unlock(&cache_chain_mutex);
 		unlock_cpu_hotplug();
-		return 1;
+		return;
 	}
 
 	if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
@@ -2483,7 +2482,6 @@ int kmem_cache_destroy(struct kmem_cache *cachep)
 
 	__kmem_cache_destroy(cachep);
 	unlock_cpu_hotplug();
-	return 0;
 }
 EXPORT_SYMBOL(kmem_cache_destroy);
 
diff --git a/mm/slob.c b/mm/slob.c
index 20188627347c..542394184a58 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -270,10 +270,9 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
 }
 EXPORT_SYMBOL(kmem_cache_create);
 
-int kmem_cache_destroy(struct kmem_cache *c)
+void kmem_cache_destroy(struct kmem_cache *c)
 {
 	slob_free(c, sizeof(struct kmem_cache));
-	return 0;
 }
 EXPORT_SYMBOL(kmem_cache_destroy);
 
-- 
cgit v1.2.3


From c713216deebd95d2b0ab38fef8bb2361c0180c2d Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Wed, 27 Sep 2006 01:49:43 -0700
Subject: [PATCH] Introduce mechanism for registering active regions of memory

At a basic level, architectures define structures to record where active
ranges of page frames are located.  Once located, the code to calculate zone
sizes and holes in each architecture is very similar.  Some of this zone and
hole sizing code is difficult to read for no good reason.  This set of patches
eliminates the similar-looking architecture-specific code.

The patches introduce a mechanism where architectures register where the
active ranges of page frames are with add_active_range().  When all areas have
been discovered, free_area_init_nodes() is called to initialise the pgdat and
zones.  The zone sizes and holes are then calculated in an architecture
independent manner.

Patch 1 introduces the mechanism for registering and initialising PFN ranges
Patch 2 changes ppc to use the mechanism - 139 arch-specific LOC removed
Patch 3 changes x86 to use the mechanism - 136 arch-specific LOC removed
Patch 4 changes x86_64 to use the mechanism - 74 arch-specific LOC removed
Patch 5 changes ia64 to use the mechanism - 52 arch-specific LOC removed
Patch 6 accounts for mem_map as a memory hole as the pages are not reclaimable.
	It adjusts the watermarks slightly

Tony Luck has successfully tested for ia64 on Itanium with tiger_defconfig,
gensparse_defconfig and defconfig.  Bob Picco has also tested and debugged on
IA64.  Jack Steiner successfully boot tested on a mammoth SGI IA64-based
machine.  These were on patches against 2.6.17-rc1 and release 3 of these
patches but there have been no ia64-changes since release 3.

There are differences in the zone sizes for x86_64 as the arch-specific code
for x86_64 accounts the kernel image and the starting mem_maps as memory holes
but the architecture-independent code accounts the memory as present.

The big benefit of this set of patches is a sizable reduction of
architecture-specific code, some of which is very hairy.  There should be a
greater reduction when other architectures use the same mechanisms for zone
and hole sizing but I lack the hardware to test on.

Additional credit;
	Dave Hansen for the initial suggestion and comments on early patches
	Andy Whitcroft for reviewing early versions and catching numerous
		errors
	Tony Luck for testing and debugging on IA64
	Bob Picco for fixing bugs related to pfn registration, reviewing a
		number of patch revisions, providing a number of suggestions
		on future direction and testing heavily
	Jack Steiner and Robin Holt for testing on IA64 and clarifying
		issues related to memory holes
	Yasunori for testing on IA64
	Andi Kleen for reviewing and feeding back about x86_64
	Christian Kujau for providing valuable information related to ACPI
		problems on x86_64 and testing potential fixes

This patch:

Define the structure to represent an active range of page frames within a node
in an architecture independent manner.  Architectures are expected to register
active ranges of PFNs using add_active_range(nid, start_pfn, end_pfn) and call
free_area_init_nodes() passing the PFNs of the end of each zone.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Bob Picco <bob.picco@hp.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Andi Kleen <ak@muc.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: "Keith Mannthey" <kmannth@gmail.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h     |  47 +++++
 include/linux/mmzone.h |  10 +-
 mm/page_alloc.c        | 552 ++++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 584 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 856f0ee7e84a..c0402da7cce0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -937,6 +937,53 @@ extern void free_area_init(unsigned long * zones_size);
 extern void free_area_init_node(int nid, pg_data_t *pgdat,
 	unsigned long * zones_size, unsigned long zone_start_pfn, 
 	unsigned long *zholes_size);
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+/*
+ * With CONFIG_ARCH_POPULATES_NODE_MAP set, an architecture may initialise its
+ * zones, allocate the backing mem_map and account for memory holes in a more
+ * architecture independent manner. This is a substitute for creating the
+ * zone_sizes[] and zholes_size[] arrays and passing them to
+ * free_area_init_node()
+ *
+ * An architecture is expected to register range of page frames backed by
+ * physical memory with add_active_range() before calling
+ * free_area_init_nodes() passing in the PFN each zone ends at. At a basic
+ * usage, an architecture is expected to do something like
+ *
+ * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn,
+ * 							 max_highmem_pfn};
+ * for_each_valid_physical_page_range()
+ * 	add_active_range(node_id, start_pfn, end_pfn)
+ * free_area_init_nodes(max_zone_pfns);
+ *
+ * If the architecture guarantees that there are no holes in the ranges
+ * registered with add_active_range(), free_bootmem_active_regions()
+ * will call free_bootmem_node() for each registered physical page range.
+ * Similarly sparse_memory_present_with_active_regions() calls
+ * memory_present() for each range when SPARSEMEM is enabled.
+ *
+ * See mm/page_alloc.c for more information on each function exposed by
+ * CONFIG_ARCH_POPULATES_NODE_MAP
+ */
+extern void free_area_init_nodes(unsigned long *max_zone_pfn);
+extern void add_active_range(unsigned int nid, unsigned long start_pfn,
+					unsigned long end_pfn);
+extern void shrink_active_range(unsigned int nid, unsigned long old_end_pfn,
+						unsigned long new_end_pfn);
+extern void remove_all_active_ranges(void);
+extern unsigned long absent_pages_in_range(unsigned long start_pfn,
+						unsigned long end_pfn);
+extern void get_pfn_range_for_nid(unsigned int nid,
+			unsigned long *start_pfn, unsigned long *end_pfn);
+extern unsigned long find_min_pfn_with_active_regions(void);
+extern unsigned long find_max_pfn_with_active_regions(void);
+extern void free_bootmem_with_active_regions(int nid,
+						unsigned long max_low_pfn);
+extern void sparse_memory_present_with_active_regions(int nid);
+#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
+extern int early_pfn_to_nid(unsigned long pfn);
+#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
+#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
 extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long);
 extern void setup_per_zone_pages_min(void);
 extern void mem_init(void);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3693f1a52788..7fa1cbe9fa7a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -305,6 +305,13 @@ struct zonelist {
 	struct zone *zones[MAX_NUMNODES * MAX_NR_ZONES + 1]; // NULL delimited
 };
 
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+struct node_active_region {
+	unsigned long start_pfn;
+	unsigned long end_pfn;
+	int nid;
+};
+#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
 
 /*
  * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM
@@ -518,7 +525,8 @@ extern struct zone *next_zone(struct zone *zone);
 
 #endif
 
-#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
+#if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \
+	!defined(CONFIG_ARCH_POPULATES_NODE_MAP)
 #define early_pfn_to_nid(nid)  (0UL)
 #endif
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9810f0a60db7..26c9939857fa 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -37,6 +37,8 @@
 #include <linux/vmalloc.h>
 #include <linux/mempolicy.h>
 #include <linux/stop_machine.h>
+#include <linux/sort.h>
+#include <linux/pfn.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -103,6 +105,33 @@ int min_free_kbytes = 1024;
 unsigned long __meminitdata nr_kernel_pages;
 unsigned long __meminitdata nr_all_pages;
 
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+  /*
+   * MAX_ACTIVE_REGIONS determines the maxmimum number of distinct
+   * ranges of memory (RAM) that may be registered with add_active_range().
+   * Ranges passed to add_active_range() will be merged if possible
+   * so the number of times add_active_range() can be called is
+   * related to the number of nodes and the number of holes
+   */
+  #ifdef CONFIG_MAX_ACTIVE_REGIONS
+    /* Allow an architecture to set MAX_ACTIVE_REGIONS to save memory */
+    #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS
+  #else
+    #if MAX_NUMNODES >= 32
+      /* If there can be many nodes, allow up to 50 holes per node */
+      #define MAX_ACTIVE_REGIONS (MAX_NUMNODES*50)
+    #else
+      /* By default, allow up to 256 distinct regions */
+      #define MAX_ACTIVE_REGIONS 256
+    #endif
+  #endif
+
+  struct node_active_region __initdata early_node_map[MAX_ACTIVE_REGIONS];
+  int __initdata nr_nodemap_entries;
+  unsigned long __initdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
+  unsigned long __initdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
+#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+
 #ifdef CONFIG_DEBUG_VM
 static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
 {
@@ -1642,25 +1671,6 @@ static inline unsigned long wait_table_bits(unsigned long size)
 
 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
 
-static void __init calculate_zone_totalpages(struct pglist_data *pgdat,
-		unsigned long *zones_size, unsigned long *zholes_size)
-{
-	unsigned long realtotalpages, totalpages = 0;
-	enum zone_type i;
-
-	for (i = 0; i < MAX_NR_ZONES; i++)
-		totalpages += zones_size[i];
-	pgdat->node_spanned_pages = totalpages;
-
-	realtotalpages = totalpages;
-	if (zholes_size)
-		for (i = 0; i < MAX_NR_ZONES; i++)
-			realtotalpages -= zholes_size[i];
-	pgdat->node_present_pages = realtotalpages;
-	printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages);
-}
-
-
 /*
  * Initially all pages are reserved - free ones are freed
  * up by free_all_bootmem() once the early boot process is
@@ -1977,6 +1987,272 @@ __meminit int init_currently_empty_zone(struct zone *zone,
 	return 0;
 }
 
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+/*
+ * Basic iterator support. Return the first range of PFNs for a node
+ * Note: nid == MAX_NUMNODES returns first region regardless of node
+ */
+static int __init first_active_region_index_in_nid(int nid)
+{
+	int i;
+
+	for (i = 0; i < nr_nodemap_entries; i++)
+		if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
+			return i;
+
+	return -1;
+}
+
+/*
+ * Basic iterator support. Return the next active range of PFNs for a node
+ * Note: nid == MAX_NUMNODES returns next region regardles of node
+ */
+static int __init next_active_region_index_in_nid(int index, int nid)
+{
+	for (index = index + 1; index < nr_nodemap_entries; index++)
+		if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
+			return index;
+
+	return -1;
+}
+
+#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
+/*
+ * Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
+ * Architectures may implement their own version but if add_active_range()
+ * was used and there are no special requirements, this is a convenient
+ * alternative
+ */
+int __init early_pfn_to_nid(unsigned long pfn)
+{
+	int i;
+
+	for (i = 0; i < nr_nodemap_entries; i++) {
+		unsigned long start_pfn = early_node_map[i].start_pfn;
+		unsigned long end_pfn = early_node_map[i].end_pfn;
+
+		if (start_pfn <= pfn && pfn < end_pfn)
+			return early_node_map[i].nid;
+	}
+
+	return 0;
+}
+#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
+
+/* Basic iterator support to walk early_node_map[] */
+#define for_each_active_range_index_in_nid(i, nid) \
+	for (i = first_active_region_index_in_nid(nid); i != -1; \
+				i = next_active_region_index_in_nid(i, nid))
+
+/**
+ * free_bootmem_with_active_regions - Call free_bootmem_node for each active range
+ * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed
+ * @max_low_pfn: The highest PFN that till be passed to free_bootmem_node
+ *
+ * If an architecture guarantees that all ranges registered with
+ * add_active_ranges() contain no holes and may be freed, this
+ * this function may be used instead of calling free_bootmem() manually.
+ */
+void __init free_bootmem_with_active_regions(int nid,
+						unsigned long max_low_pfn)
+{
+	int i;
+
+	for_each_active_range_index_in_nid(i, nid) {
+		unsigned long size_pages = 0;
+		unsigned long end_pfn = early_node_map[i].end_pfn;
+
+		if (early_node_map[i].start_pfn >= max_low_pfn)
+			continue;
+
+		if (end_pfn > max_low_pfn)
+			end_pfn = max_low_pfn;
+
+		size_pages = end_pfn - early_node_map[i].start_pfn;
+		free_bootmem_node(NODE_DATA(early_node_map[i].nid),
+				PFN_PHYS(early_node_map[i].start_pfn),
+				size_pages << PAGE_SHIFT);
+	}
+}
+
+/**
+ * sparse_memory_present_with_active_regions - Call memory_present for each active range
+ * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used
+ *
+ * If an architecture guarantees that all ranges registered with
+ * add_active_ranges() contain no holes and may be freed, this
+ * this function may be used instead of calling memory_present() manually.
+ */
+void __init sparse_memory_present_with_active_regions(int nid)
+{
+	int i;
+
+	for_each_active_range_index_in_nid(i, nid)
+		memory_present(early_node_map[i].nid,
+				early_node_map[i].start_pfn,
+				early_node_map[i].end_pfn);
+}
+
+/**
+ * get_pfn_range_for_nid - Return the start and end page frames for a node
+ * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned
+ * @start_pfn: Passed by reference. On return, it will have the node start_pfn
+ * @end_pfn: Passed by reference. On return, it will have the node end_pfn
+ *
+ * It returns the start and end page frame of a node based on information
+ * provided by an arch calling add_active_range(). If called for a node
+ * with no available memory, a warning is printed and the start and end
+ * PFNs will be 0
+ */
+void __init get_pfn_range_for_nid(unsigned int nid,
+			unsigned long *start_pfn, unsigned long *end_pfn)
+{
+	int i;
+	*start_pfn = -1UL;
+	*end_pfn = 0;
+
+	for_each_active_range_index_in_nid(i, nid) {
+		*start_pfn = min(*start_pfn, early_node_map[i].start_pfn);
+		*end_pfn = max(*end_pfn, early_node_map[i].end_pfn);
+	}
+
+	if (*start_pfn == -1UL) {
+		printk(KERN_WARNING "Node %u active with no memory\n", nid);
+		*start_pfn = 0;
+	}
+}
+
+/*
+ * Return the number of pages a zone spans in a node, including holes
+ * present_pages = zone_spanned_pages_in_node() - zone_absent_pages_in_node()
+ */
+unsigned long __init zone_spanned_pages_in_node(int nid,
+					unsigned long zone_type,
+					unsigned long *ignored)
+{
+	unsigned long node_start_pfn, node_end_pfn;
+	unsigned long zone_start_pfn, zone_end_pfn;
+
+	/* Get the start and end of the node and zone */
+	get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
+	zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
+	zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
+
+	/* Check that this node has pages within the zone's required range */
+	if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn)
+		return 0;
+
+	/* Move the zone boundaries inside the node if necessary */
+	zone_end_pfn = min(zone_end_pfn, node_end_pfn);
+	zone_start_pfn = max(zone_start_pfn, node_start_pfn);
+
+	/* Return the spanned pages */
+	return zone_end_pfn - zone_start_pfn;
+}
+
+/*
+ * Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
+ * then all holes in the requested range will be accounted for
+ */
+unsigned long __init __absent_pages_in_range(int nid,
+				unsigned long range_start_pfn,
+				unsigned long range_end_pfn)
+{
+	int i = 0;
+	unsigned long prev_end_pfn = 0, hole_pages = 0;
+	unsigned long start_pfn;
+
+	/* Find the end_pfn of the first active range of pfns in the node */
+	i = first_active_region_index_in_nid(nid);
+	if (i == -1)
+		return 0;
+
+	prev_end_pfn = early_node_map[i].start_pfn;
+
+	/* Find all holes for the zone within the node */
+	for (; i != -1; i = next_active_region_index_in_nid(i, nid)) {
+
+		/* No need to continue if prev_end_pfn is outside the zone */
+		if (prev_end_pfn >= range_end_pfn)
+			break;
+
+		/* Make sure the end of the zone is not within the hole */
+		start_pfn = min(early_node_map[i].start_pfn, range_end_pfn);
+		prev_end_pfn = max(prev_end_pfn, range_start_pfn);
+
+		/* Update the hole size cound and move on */
+		if (start_pfn > range_start_pfn) {
+			BUG_ON(prev_end_pfn > start_pfn);
+			hole_pages += start_pfn - prev_end_pfn;
+		}
+		prev_end_pfn = early_node_map[i].end_pfn;
+	}
+
+	return hole_pages;
+}
+
+/**
+ * absent_pages_in_range - Return number of page frames in holes within a range
+ * @start_pfn: The start PFN to start searching for holes
+ * @end_pfn: The end PFN to stop searching for holes
+ *
+ * It returns the number of pages frames in memory holes within a range
+ */
+unsigned long __init absent_pages_in_range(unsigned long start_pfn,
+							unsigned long end_pfn)
+{
+	return __absent_pages_in_range(MAX_NUMNODES, start_pfn, end_pfn);
+}
+
+/* Return the number of page frames in holes in a zone on a node */
+unsigned long __init zone_absent_pages_in_node(int nid,
+					unsigned long zone_type,
+					unsigned long *ignored)
+{
+	return __absent_pages_in_range(nid,
+				arch_zone_lowest_possible_pfn[zone_type],
+				arch_zone_highest_possible_pfn[zone_type]);
+}
+#else
+static inline unsigned long zone_spanned_pages_in_node(int nid,
+					unsigned long zone_type,
+					unsigned long *zones_size)
+{
+	return zones_size[zone_type];
+}
+
+static inline unsigned long zone_absent_pages_in_node(int nid,
+						unsigned long zone_type,
+						unsigned long *zholes_size)
+{
+	if (!zholes_size)
+		return 0;
+
+	return zholes_size[zone_type];
+}
+#endif
+
+static void __init calculate_node_totalpages(struct pglist_data *pgdat,
+		unsigned long *zones_size, unsigned long *zholes_size)
+{
+	unsigned long realtotalpages, totalpages = 0;
+	enum zone_type i;
+
+	for (i = 0; i < MAX_NR_ZONES; i++)
+		totalpages += zone_spanned_pages_in_node(pgdat->node_id, i,
+								zones_size);
+	pgdat->node_spanned_pages = totalpages;
+
+	realtotalpages = totalpages;
+	for (i = 0; i < MAX_NR_ZONES; i++)
+		realtotalpages -=
+			zone_absent_pages_in_node(pgdat->node_id, i,
+								zholes_size);
+	pgdat->node_present_pages = realtotalpages;
+	printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id,
+							realtotalpages);
+}
+
 /*
  * Set up the zone data structures:
  *   - mark all pages reserved
@@ -2000,9 +2276,9 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
 		struct zone *zone = pgdat->node_zones + j;
 		unsigned long size, realsize;
 
-		realsize = size = zones_size[j];
-		if (zholes_size)
-			realsize -= zholes_size[j];
+		size = zone_spanned_pages_in_node(nid, j, zones_size);
+		realsize = size - zone_absent_pages_in_node(nid, j,
+								zholes_size);
 
 		if (!is_highmem_idx(j))
 			nr_kernel_pages += realsize;
@@ -2073,8 +2349,13 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat)
 	/*
 	 * With no DISCONTIG, the global mem_map is just set as node 0's
 	 */
-	if (pgdat == NODE_DATA(0))
+	if (pgdat == NODE_DATA(0)) {
 		mem_map = NODE_DATA(0)->node_mem_map;
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+		if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
+			mem_map -= pgdat->node_start_pfn;
+#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+	}
 #endif
 #endif /* CONFIG_FLAT_NODE_MEM_MAP */
 }
@@ -2085,13 +2366,236 @@ void __meminit free_area_init_node(int nid, struct pglist_data *pgdat,
 {
 	pgdat->node_id = nid;
 	pgdat->node_start_pfn = node_start_pfn;
-	calculate_zone_totalpages(pgdat, zones_size, zholes_size);
+	calculate_node_totalpages(pgdat, zones_size, zholes_size);
 
 	alloc_node_mem_map(pgdat);
 
 	free_area_init_core(pgdat, zones_size, zholes_size);
 }
 
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+/**
+ * add_active_range - Register a range of PFNs backed by physical memory
+ * @nid: The node ID the range resides on
+ * @start_pfn: The start PFN of the available physical memory
+ * @end_pfn: The end PFN of the available physical memory
+ *
+ * These ranges are stored in an early_node_map[] and later used by
+ * free_area_init_nodes() to calculate zone sizes and holes. If the
+ * range spans a memory hole, it is up to the architecture to ensure
+ * the memory is not freed by the bootmem allocator. If possible
+ * the range being registered will be merged with existing ranges.
+ */
+void __init add_active_range(unsigned int nid, unsigned long start_pfn,
+						unsigned long end_pfn)
+{
+	int i;
+
+	printk(KERN_DEBUG "Entering add_active_range(%d, %lu, %lu) "
+			  "%d entries of %d used\n",
+			  nid, start_pfn, end_pfn,
+			  nr_nodemap_entries, MAX_ACTIVE_REGIONS);
+
+	/* Merge with existing active regions if possible */
+	for (i = 0; i < nr_nodemap_entries; i++) {
+		if (early_node_map[i].nid != nid)
+			continue;
+
+		/* Skip if an existing region covers this new one */
+		if (start_pfn >= early_node_map[i].start_pfn &&
+				end_pfn <= early_node_map[i].end_pfn)
+			return;
+
+		/* Merge forward if suitable */
+		if (start_pfn <= early_node_map[i].end_pfn &&
+				end_pfn > early_node_map[i].end_pfn) {
+			early_node_map[i].end_pfn = end_pfn;
+			return;
+		}
+
+		/* Merge backward if suitable */
+		if (start_pfn < early_node_map[i].end_pfn &&
+				end_pfn >= early_node_map[i].start_pfn) {
+			early_node_map[i].start_pfn = start_pfn;
+			return;
+		}
+	}
+
+	/* Check that early_node_map is large enough */
+	if (i >= MAX_ACTIVE_REGIONS) {
+		printk(KERN_CRIT "More than %d memory regions, truncating\n",
+							MAX_ACTIVE_REGIONS);
+		return;
+	}
+
+	early_node_map[i].nid = nid;
+	early_node_map[i].start_pfn = start_pfn;
+	early_node_map[i].end_pfn = end_pfn;
+	nr_nodemap_entries = i + 1;
+}
+
+/**
+ * shrink_active_range - Shrink an existing registered range of PFNs
+ * @nid: The node id the range is on that should be shrunk
+ * @old_end_pfn: The old end PFN of the range
+ * @new_end_pfn: The new PFN of the range
+ *
+ * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node.
+ * The map is kept at the end physical page range that has already been
+ * registered with add_active_range(). This function allows an arch to shrink
+ * an existing registered range.
+ */
+void __init shrink_active_range(unsigned int nid, unsigned long old_end_pfn,
+						unsigned long new_end_pfn)
+{
+	int i;
+
+	/* Find the old active region end and shrink */
+	for_each_active_range_index_in_nid(i, nid)
+		if (early_node_map[i].end_pfn == old_end_pfn) {
+			early_node_map[i].end_pfn = new_end_pfn;
+			break;
+		}
+}
+
+/**
+ * remove_all_active_ranges - Remove all currently registered regions
+ * During discovery, it may be found that a table like SRAT is invalid
+ * and an alternative discovery method must be used. This function removes
+ * all currently registered regions.
+ */
+void __init remove_all_active_ranges()
+{
+	memset(early_node_map, 0, sizeof(early_node_map));
+	nr_nodemap_entries = 0;
+}
+
+/* Compare two active node_active_regions */
+static int __init cmp_node_active_region(const void *a, const void *b)
+{
+	struct node_active_region *arange = (struct node_active_region *)a;
+	struct node_active_region *brange = (struct node_active_region *)b;
+
+	/* Done this way to avoid overflows */
+	if (arange->start_pfn > brange->start_pfn)
+		return 1;
+	if (arange->start_pfn < brange->start_pfn)
+		return -1;
+
+	return 0;
+}
+
+/* sort the node_map by start_pfn */
+static void __init sort_node_map(void)
+{
+	sort(early_node_map, (size_t)nr_nodemap_entries,
+			sizeof(struct node_active_region),
+			cmp_node_active_region, NULL);
+}
+
+/* Find the lowest pfn for a node. This depends on a sorted early_node_map */
+unsigned long __init find_min_pfn_for_node(unsigned long nid)
+{
+	int i;
+
+	/* Assuming a sorted map, the first range found has the starting pfn */
+	for_each_active_range_index_in_nid(i, nid)
+		return early_node_map[i].start_pfn;
+
+	printk(KERN_WARNING "Could not find start_pfn for node %lu\n", nid);
+	return 0;
+}
+
+/**
+ * find_min_pfn_with_active_regions - Find the minimum PFN registered
+ *
+ * It returns the minimum PFN based on information provided via
+ * add_active_range()
+ */
+unsigned long __init find_min_pfn_with_active_regions(void)
+{
+	return find_min_pfn_for_node(MAX_NUMNODES);
+}
+
+/**
+ * find_max_pfn_with_active_regions - Find the maximum PFN registered
+ *
+ * It returns the maximum PFN based on information provided via
+ * add_active_range()
+ */
+unsigned long __init find_max_pfn_with_active_regions(void)
+{
+	int i;
+	unsigned long max_pfn = 0;
+
+	for (i = 0; i < nr_nodemap_entries; i++)
+		max_pfn = max(max_pfn, early_node_map[i].end_pfn);
+
+	return max_pfn;
+}
+
+/**
+ * free_area_init_nodes - Initialise all pg_data_t and zone data
+ * @arch_max_dma_pfn: The maximum PFN usable for ZONE_DMA
+ * @arch_max_dma32_pfn: The maximum PFN usable for ZONE_DMA32
+ * @arch_max_low_pfn: The maximum PFN usable for ZONE_NORMAL
+ * @arch_max_high_pfn: The maximum PFN usable for ZONE_HIGHMEM
+ *
+ * This will call free_area_init_node() for each active node in the system.
+ * Using the page ranges provided by add_active_range(), the size of each
+ * zone in each node and their holes is calculated. If the maximum PFN
+ * between two adjacent zones match, it is assumed that the zone is empty.
+ * For example, if arch_max_dma_pfn == arch_max_dma32_pfn, it is assumed
+ * that arch_max_dma32_pfn has no pages. It is also assumed that a zone
+ * starts where the previous one ended. For example, ZONE_DMA32 starts
+ * at arch_max_dma_pfn.
+ */
+void __init free_area_init_nodes(unsigned long *max_zone_pfn)
+{
+	unsigned long nid;
+	enum zone_type i;
+
+	/* Record where the zone boundaries are */
+	memset(arch_zone_lowest_possible_pfn, 0,
+				sizeof(arch_zone_lowest_possible_pfn));
+	memset(arch_zone_highest_possible_pfn, 0,
+				sizeof(arch_zone_highest_possible_pfn));
+	arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions();
+	arch_zone_highest_possible_pfn[0] = max_zone_pfn[0];
+	for (i = 1; i < MAX_NR_ZONES; i++) {
+		arch_zone_lowest_possible_pfn[i] =
+			arch_zone_highest_possible_pfn[i-1];
+		arch_zone_highest_possible_pfn[i] =
+			max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]);
+	}
+
+	/* Regions in the early_node_map can be in any order */
+	sort_node_map();
+
+	/* Print out the zone ranges */
+	printk("Zone PFN ranges:\n");
+	for (i = 0; i < MAX_NR_ZONES; i++)
+		printk("  %-8s %8lu -> %8lu\n",
+				zone_names[i],
+				arch_zone_lowest_possible_pfn[i],
+				arch_zone_highest_possible_pfn[i]);
+
+	/* Print out the early_node_map[] */
+	printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries);
+	for (i = 0; i < nr_nodemap_entries; i++)
+		printk("  %3d: %8lu -> %8lu\n", early_node_map[i].nid,
+						early_node_map[i].start_pfn,
+						early_node_map[i].end_pfn);
+
+	/* Initialise every node */
+	for_each_online_node(nid) {
+		pg_data_t *pgdat = NODE_DATA(nid);
+		free_area_init_node(nid, pgdat, NULL,
+				find_min_pfn_for_node(nid), NULL);
+	}
+}
+#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 static bootmem_data_t contig_bootmem_data;
 struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data };
-- 
cgit v1.2.3


From 0e0b864e069c52a7b3e4a7da56e29b03a012fd75 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Wed, 27 Sep 2006 01:49:56 -0700
Subject: [PATCH] Account for memmap and optionally the kernel image as holes

The x86_64 code accounted for memmap and some portions of the the DMA zone as
holes.  This was because those areas would never be reclaimed and accounting
for them as memory affects min watermarks.  This patch will account for the
memmap as a memory hole.  Architectures may optionally use set_dma_reserve()
if they wish to account for a portion of memory in ZONE_DMA as a hole.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Andi Kleen <ak@muc.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: "Keith Mannthey" <kmannth@gmail.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/mm/init.c |  4 +++-
 include/linux/mm.h    |  1 +
 mm/page_alloc.c       | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 63 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 47928399e38a..3e16fe08150e 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -655,8 +655,10 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
 #else       		
 	reserve_bootmem(phys, len);    
 #endif
-	if (phys+len <= MAX_DMA_PFN*PAGE_SIZE)
+	if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
 		dma_reserve += len / PAGE_SIZE;
+		set_dma_reserve(dma_reserve);
+	}
 }
 
 int kern_addr_valid(unsigned long addr) 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c0402da7cce0..22936e1fcdf2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -984,6 +984,7 @@ extern void sparse_memory_present_with_active_regions(int nid);
 extern int early_pfn_to_nid(unsigned long pfn);
 #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
 #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+extern void set_dma_reserve(unsigned long new_dma_reserve);
 extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long);
 extern void setup_per_zone_pages_min(void);
 extern void mem_init(void);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 26c9939857fa..8d9a1eb9fbba 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -104,6 +104,7 @@ int min_free_kbytes = 1024;
 
 unsigned long __meminitdata nr_kernel_pages;
 unsigned long __meminitdata nr_all_pages;
+static unsigned long __initdata dma_reserve;
 
 #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
   /*
@@ -2213,6 +2214,20 @@ unsigned long __init zone_absent_pages_in_node(int nid,
 				arch_zone_lowest_possible_pfn[zone_type],
 				arch_zone_highest_possible_pfn[zone_type]);
 }
+
+/* Return the zone index a PFN is in */
+int memmap_zone_idx(struct page *lmem_map)
+{
+	int i;
+	unsigned long phys_addr = virt_to_phys(lmem_map);
+	unsigned long pfn = phys_addr >> PAGE_SHIFT;
+
+	for (i = 0; i < MAX_NR_ZONES; i++)
+		if (pfn < arch_zone_highest_possible_pfn[i])
+			break;
+
+	return i;
+}
 #else
 static inline unsigned long zone_spanned_pages_in_node(int nid,
 					unsigned long zone_type,
@@ -2230,6 +2245,11 @@ static inline unsigned long zone_absent_pages_in_node(int nid,
 
 	return zholes_size[zone_type];
 }
+
+static inline int memmap_zone_idx(struct page *lmem_map)
+{
+	return MAX_NR_ZONES;
+}
 #endif
 
 static void __init calculate_node_totalpages(struct pglist_data *pgdat,
@@ -2274,12 +2294,35 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
 	
 	for (j = 0; j < MAX_NR_ZONES; j++) {
 		struct zone *zone = pgdat->node_zones + j;
-		unsigned long size, realsize;
+		unsigned long size, realsize, memmap_pages;
 
 		size = zone_spanned_pages_in_node(nid, j, zones_size);
 		realsize = size - zone_absent_pages_in_node(nid, j,
 								zholes_size);
 
+		/*
+		 * Adjust realsize so that it accounts for how much memory
+		 * is used by this zone for memmap. This affects the watermark
+		 * and per-cpu initialisations
+		 */
+		memmap_pages = (size * sizeof(struct page)) >> PAGE_SHIFT;
+		if (realsize >= memmap_pages) {
+			realsize -= memmap_pages;
+			printk(KERN_DEBUG
+				"  %s zone: %lu pages used for memmap\n",
+				zone_names[j], memmap_pages);
+		} else
+			printk(KERN_WARNING
+				"  %s zone: %lu pages exceeds realsize %lu\n",
+				zone_names[j], memmap_pages, realsize);
+
+		/* Account for reserved DMA pages */
+		if (j == ZONE_DMA && realsize > dma_reserve) {
+			realsize -= dma_reserve;
+			printk(KERN_DEBUG "  DMA zone: %lu pages reserved\n",
+								dma_reserve);
+		}
+
 		if (!is_highmem_idx(j))
 			nr_kernel_pages += realsize;
 		nr_all_pages += realsize;
@@ -2596,6 +2639,21 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
 }
 #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
 
+/**
+ * set_dma_reserve - Account the specified number of pages reserved in ZONE_DMA
+ * @new_dma_reserve - The number of pages to mark reserved
+ *
+ * The per-cpu batchsize and zone watermarks are determined by present_pages.
+ * In the DMA zone, a significant percentage may be consumed by kernel image
+ * and other unfreeable allocations which can skew the watermarks badly. This
+ * function may optionally be used to account for unfreeable pages in
+ * ZONE_DMA. The effect will be lower watermarks and smaller per-cpu batchsize
+ */
+void __init set_dma_reserve(unsigned long new_dma_reserve)
+{
+	dma_reserve = new_dma_reserve;
+}
+
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 static bootmem_data_t contig_bootmem_data;
 struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data };
-- 
cgit v1.2.3


From fb01439c5b778d5974a488c5d4fe85e6d0e18a68 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@skynet.ie>
Date: Wed, 27 Sep 2006 01:49:59 -0700
Subject: [PATCH] Allow an arch to expand node boundaries

Arch-independent zone-sizing determines the size of a node
(pgdat->node_spanned_pages) based on the physical memory that was
registered by the architecture.  However, when
CONFIG_MEMORY_HOTPLUG_RESERVE is set, the architecture expects that the
spanned_pages will be much larger and that mem_map will be allocated that
is used lated on memory hot-add.

This patch allows an architecture that sets CONFIG_MEMORY_HOTPLUG_RESERVE
to call push_node_boundaries() which will set the node beginning and end to
at *least* the requested boundary.

Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Andi Kleen <ak@muc.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: "Keith Mannthey" <kmannth@gmail.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/mm/srat.c |  2 ++
 include/linux/mm.h    |  2 ++
 mm/page_alloc.c       | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c
index db1b2e11cf8f..f8c04d6935c9 100644
--- a/arch/x86_64/mm/srat.c
+++ b/arch/x86_64/mm/srat.c
@@ -324,6 +324,8 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
 	       nd->start, nd->end);
 	e820_register_active_regions(node, nd->start >> PAGE_SHIFT,
 						nd->end >> PAGE_SHIFT);
+	push_node_boundaries(node, nd->start >> PAGE_SHIFT,
+						nd->end >> PAGE_SHIFT);
 
 #ifdef RESERVE_HOTADD
  	if (ma->flags.hot_pluggable && reserve_hotadd(node, start, end) < 0) {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 22936e1fcdf2..9d046db31e76 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -970,6 +970,8 @@ extern void add_active_range(unsigned int nid, unsigned long start_pfn,
 					unsigned long end_pfn);
 extern void shrink_active_range(unsigned int nid, unsigned long old_end_pfn,
 						unsigned long new_end_pfn);
+extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn,
+					unsigned long end_pfn);
 extern void remove_all_active_ranges(void);
 extern unsigned long absent_pages_in_range(unsigned long start_pfn,
 						unsigned long end_pfn);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 75133e1dc4b1..acbf58f8a1b7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -131,6 +131,10 @@ static unsigned long __initdata dma_reserve;
   int __initdata nr_nodemap_entries;
   unsigned long __initdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
   unsigned long __initdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
+#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
+  unsigned long __initdata node_boundary_start_pfn[MAX_NUMNODES];
+  unsigned long __initdata node_boundary_end_pfn[MAX_NUMNODES];
+#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
 #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
 
 #ifdef CONFIG_DEBUG_VM
@@ -2094,6 +2098,62 @@ void __init sparse_memory_present_with_active_regions(int nid)
 				early_node_map[i].end_pfn);
 }
 
+/**
+ * push_node_boundaries - Push node boundaries to at least the requested boundary
+ * @nid: The nid of the node to push the boundary for
+ * @start_pfn: The start pfn of the node
+ * @end_pfn: The end pfn of the node
+ *
+ * In reserve-based hot-add, mem_map is allocated that is unused until hotadd
+ * time. Specifically, on x86_64, SRAT will report ranges that can potentially
+ * be hotplugged even though no physical memory exists. This function allows
+ * an arch to push out the node boundaries so mem_map is allocated that can
+ * be used later.
+ */
+#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
+void __init push_node_boundaries(unsigned int nid,
+		unsigned long start_pfn, unsigned long end_pfn)
+{
+	printk(KERN_DEBUG "Entering push_node_boundaries(%u, %lu, %lu)\n",
+			nid, start_pfn, end_pfn);
+
+	/* Initialise the boundary for this node if necessary */
+	if (node_boundary_end_pfn[nid] == 0)
+		node_boundary_start_pfn[nid] = -1UL;
+
+	/* Update the boundaries */
+	if (node_boundary_start_pfn[nid] > start_pfn)
+		node_boundary_start_pfn[nid] = start_pfn;
+	if (node_boundary_end_pfn[nid] < end_pfn)
+		node_boundary_end_pfn[nid] = end_pfn;
+}
+
+/* If necessary, push the node boundary out for reserve hotadd */
+static void __init account_node_boundary(unsigned int nid,
+		unsigned long *start_pfn, unsigned long *end_pfn)
+{
+	printk(KERN_DEBUG "Entering account_node_boundary(%u, %lu, %lu)\n",
+			nid, *start_pfn, *end_pfn);
+
+	/* Return if boundary information has not been provided */
+	if (node_boundary_end_pfn[nid] == 0)
+		return;
+
+	/* Check the boundaries and update if necessary */
+	if (node_boundary_start_pfn[nid] < *start_pfn)
+		*start_pfn = node_boundary_start_pfn[nid];
+	if (node_boundary_end_pfn[nid] > *end_pfn)
+		*end_pfn = node_boundary_end_pfn[nid];
+}
+#else
+void __init push_node_boundaries(unsigned int nid,
+		unsigned long start_pfn, unsigned long end_pfn) {}
+
+static void __init account_node_boundary(unsigned int nid,
+		unsigned long *start_pfn, unsigned long *end_pfn) {}
+#endif
+
+
 /**
  * get_pfn_range_for_nid - Return the start and end page frames for a node
  * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned
@@ -2121,6 +2181,9 @@ void __init get_pfn_range_for_nid(unsigned int nid,
 		printk(KERN_WARNING "Node %u active with no memory\n", nid);
 		*start_pfn = 0;
 	}
+
+	/* Push the node boundaries out if requested */
+	account_node_boundary(nid, start_pfn, end_pfn);
 }
 
 /*
@@ -2527,6 +2590,10 @@ void __init remove_all_active_ranges()
 {
 	memset(early_node_map, 0, sizeof(early_node_map));
 	nr_nodemap_entries = 0;
+#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
+	memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn));
+	memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn));
+#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
 }
 
 /* Compare two active node_active_regions */
-- 
cgit v1.2.3


From e129b5c23c2b471d47f1c5d2b8b193fc2034af43 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 27 Sep 2006 01:50:00 -0700
Subject: [PATCH] vm: add per-zone writeout counter

The VM is supposed to minimise the number of pages which get written off the
LRU (for IO scheduling efficiency, and for high reclaim-success rates).  But
we don't actually have a clear way of showing how true this is.

So add `nr_vmscan_write' to /proc/vmstat and /proc/zoneinfo - the number of
pages which have been written by the vm scanner in this zone and globally.

Cc: Christoph Lameter <clameter@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 1 +
 mm/vmscan.c            | 3 ++-
 mm/vmstat.c            | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7fa1cbe9fa7a..1b0680cd84d2 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -58,6 +58,7 @@ enum zone_stat_item {
 	NR_WRITEBACK,
 	NR_UNSTABLE_NFS,	/* NFS unstable pages */
 	NR_BOUNCE,
+	NR_VMSCAN_WRITE,
 #ifdef CONFIG_NUMA
 	NUMA_HIT,		/* allocated in intended node */
 	NUMA_MISS,		/* allocated in non intended node */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 87779dda4ec6..87d340999ce8 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -19,6 +19,7 @@
 #include <linux/pagemap.h>
 #include <linux/init.h>
 #include <linux/highmem.h>
+#include <linux/vmstat.h>
 #include <linux/file.h>
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
@@ -370,7 +371,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
 			/* synchronous write or broken a_ops? */
 			ClearPageReclaim(page);
 		}
-
+		inc_zone_page_state(page, NR_VMSCAN_WRITE);
 		return PAGE_SUCCESS;
 	}
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 490d8c1a0ded..69c657132e1f 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -465,6 +465,7 @@ static char *vmstat_text[] = {
 	"nr_writeback",
 	"nr_unstable",
 	"nr_bounce",
+	"nr_vmscan_write",
 
 #ifdef CONFIG_NUMA
 	"numa_hit",
-- 
cgit v1.2.3


From 5b99cd0effaf846240a15441aec459a592577eaf Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 27 Sep 2006 01:50:01 -0700
Subject: [PATCH] own header file for struct page

This moves the definition of struct page from mm.h to its own header file
page-struct.h.  This is a prereq to fix SetPageUptodate which is broken on
s390:

#define SetPageUptodate(_page)
       do {
               struct page *__page = (_page);
               if (!test_and_set_bit(PG_uptodate, &__page->flags))
                       page_test_and_clear_dirty(_page);
       } while (0)

_page gets used twice in this macro which can cause subtle bugs.  Using
__page for the page_test_and_clear_dirty call doesn't work since it causes
yet another problem with the page_test_and_clear_dirty macro as well.

In order to avoid all these problems caused by macros it seems to be a good
idea to get rid of them and convert them to static inline functions.
Because of header file include order it's necessary to have a seperate
header file for the struct page definition.

Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h       | 62 +-------------------------------------------
 include/linux/mm_types.h | 67 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mmzone.h   |  5 ++++
 3 files changed, 73 insertions(+), 61 deletions(-)
 create mode 100644 include/linux/mm_types.h

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9d046db31e76..7477fb59c4f2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -16,6 +16,7 @@
 #include <linux/mutex.h>
 #include <linux/debug_locks.h>
 #include <linux/backing-dev.h>
+#include <linux/mm_types.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -215,62 +216,6 @@ struct vm_operations_struct {
 struct mmu_gather;
 struct inode;
 
-/*
- * Each physical page in the system has a struct page associated with
- * it to keep track of whatever it is we are using the page for at the
- * moment. Note that we have no way to track which tasks are using
- * a page, though if it is a pagecache page, rmap structures can tell us
- * who is mapping it.
- */
-struct page {
-	unsigned long flags;		/* Atomic flags, some possibly
-					 * updated asynchronously */
-	atomic_t _count;		/* Usage count, see below. */
-	atomic_t _mapcount;		/* Count of ptes mapped in mms,
-					 * to show when page is mapped
-					 * & limit reverse map searches.
-					 */
-	union {
-	    struct {
-		unsigned long private;		/* Mapping-private opaque data:
-					 	 * usually used for buffer_heads
-						 * if PagePrivate set; used for
-						 * swp_entry_t if PageSwapCache;
-						 * indicates order in the buddy
-						 * system if PG_buddy is set.
-						 */
-		struct address_space *mapping;	/* If low bit clear, points to
-						 * inode address_space, or NULL.
-						 * If page mapped as anonymous
-						 * memory, low bit is set, and
-						 * it points to anon_vma object:
-						 * see PAGE_MAPPING_ANON below.
-						 */
-	    };
-#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
-	    spinlock_t ptl;
-#endif
-	};
-	pgoff_t index;			/* Our offset within mapping. */
-	struct list_head lru;		/* Pageout list, eg. active_list
-					 * protected by zone->lru_lock !
-					 */
-	/*
-	 * On machines where all RAM is mapped into kernel address space,
-	 * we can simply calculate the virtual address. On machines with
-	 * highmem some memory is mapped into kernel virtual memory
-	 * dynamically, so we need a place to store that address.
-	 * Note that this field could be 16 bits on x86 ... ;)
-	 *
-	 * Architectures with slow multiplication can define
-	 * WANT_PAGE_VIRTUAL in asm/page.h
-	 */
-#if defined(WANT_PAGE_VIRTUAL)
-	void *virtual;			/* Kernel virtual address (NULL if
-					   not kmapped, ie. highmem) */
-#endif /* WANT_PAGE_VIRTUAL */
-};
-
 #define page_private(page)		((page)->private)
 #define set_page_private(page, v)	((page)->private = (v))
 
@@ -546,11 +491,6 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
  */
 #include <linux/vmstat.h>
 
-#ifndef CONFIG_DISCONTIGMEM
-/* The array of struct pages - for discontigmem use pgdat->lmem_map */
-extern struct page *mem_map;
-#endif
-
 static __always_inline void *lowmem_page_address(struct page *page)
 {
 	return __va(page_to_pfn(page) << PAGE_SHIFT);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
new file mode 100644
index 000000000000..c3852fd4a1cc
--- /dev/null
+++ b/include/linux/mm_types.h
@@ -0,0 +1,67 @@
+#ifndef _LINUX_MM_TYPES_H
+#define _LINUX_MM_TYPES_H
+
+#include <linux/types.h>
+#include <linux/threads.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+struct address_space;
+
+/*
+ * Each physical page in the system has a struct page associated with
+ * it to keep track of whatever it is we are using the page for at the
+ * moment. Note that we have no way to track which tasks are using
+ * a page, though if it is a pagecache page, rmap structures can tell us
+ * who is mapping it.
+ */
+struct page {
+	unsigned long flags;		/* Atomic flags, some possibly
+					 * updated asynchronously */
+	atomic_t _count;		/* Usage count, see below. */
+	atomic_t _mapcount;		/* Count of ptes mapped in mms,
+					 * to show when page is mapped
+					 * & limit reverse map searches.
+					 */
+	union {
+	    struct {
+		unsigned long private;		/* Mapping-private opaque data:
+					 	 * usually used for buffer_heads
+						 * if PagePrivate set; used for
+						 * swp_entry_t if PageSwapCache;
+						 * indicates order in the buddy
+						 * system if PG_buddy is set.
+						 */
+		struct address_space *mapping;	/* If low bit clear, points to
+						 * inode address_space, or NULL.
+						 * If page mapped as anonymous
+						 * memory, low bit is set, and
+						 * it points to anon_vma object:
+						 * see PAGE_MAPPING_ANON below.
+						 */
+	    };
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+	    spinlock_t ptl;
+#endif
+	};
+	pgoff_t index;			/* Our offset within mapping. */
+	struct list_head lru;		/* Pageout list, eg. active_list
+					 * protected by zone->lru_lock !
+					 */
+	/*
+	 * On machines where all RAM is mapped into kernel address space,
+	 * we can simply calculate the virtual address. On machines with
+	 * highmem some memory is mapped into kernel virtual memory
+	 * dynamically, so we need a place to store that address.
+	 * Note that this field could be 16 bits on x86 ... ;)
+	 *
+	 * Architectures with slow multiplication can define
+	 * WANT_PAGE_VIRTUAL in asm/page.h
+	 */
+#if defined(WANT_PAGE_VIRTUAL)
+	void *virtual;			/* Kernel virtual address (NULL if
+					   not kmapped, ie. highmem) */
+#endif /* WANT_PAGE_VIRTUAL */
+};
+
+#endif /* _LINUX_MM_TYPES_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1b0680cd84d2..562cf7a8f3ee 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -314,6 +314,11 @@ struct node_active_region {
 };
 #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
 
+#ifndef CONFIG_DISCONTIGMEM
+/* The array of struct pages - for discontigmem use pgdat->lmem_map */
+extern struct page *mem_map;
+#endif
+
 /*
  * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM
  * (mostly NUMA machines?) to denote a higher-level memory zone than the
-- 
cgit v1.2.3


From 08e0f6a9705376732fd3bc9bf8ba97a6b5211eb1 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 27 Sep 2006 01:50:06 -0700
Subject: [PATCH] Add NUMA_BUILD definition in kernel.h to avoid #ifdef
 CONFIG_NUMA

The NUMA_BUILD constant is always available and will be set to 1 on
NUMA_BUILDs.  That way checks valid only under CONFIG_NUMA can easily be done
without #ifdef CONFIG_NUMA

F.e.

if (NUMA_BUILD && <numa_condition>) {
...
}

[akpm: not a thing we'd normally do, but CONFIG_NUMA is special: it is
 causing ifdef explosion in core kernel, so let's see if this is a comfortable
 way in whcih to control that]

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kernel.h |  7 +++++++
 mm/page_alloc.c        | 12 +++++-------
 2 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 4fa373bb18ac..4d00988dad03 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -350,4 +350,11 @@ struct sysinfo {
 /* Trap pasters of __FUNCTION__ at compile-time */
 #define __FUNCTION__ (__func__)
 
+/* This helps us to avoid #ifdef CONFIG_NUMA */
+#ifdef CONFIG_NUMA
+#define NUMA_BUILD 1
+#else
+#define NUMA_BUILD 0
+#endif
+
 #endif
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8a6418b0d2c5..4c76188b1681 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -942,7 +942,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
 	 */
 	do {
 		zone = *z;
-		if (unlikely((gfp_mask & __GFP_THISNODE) &&
+		if (unlikely(NUMA_BUILD && (gfp_mask & __GFP_THISNODE) &&
 			zone->zone_pgdat != zonelist->zones[0]->zone_pgdat))
 				break;
 		if ((alloc_flags & ALLOC_CPUSET) &&
@@ -1256,14 +1256,12 @@ unsigned int nr_free_pagecache_pages(void)
 {
 	return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER));
 }
-#ifdef CONFIG_NUMA
-static void show_node(struct zone *zone)
+
+static inline void show_node(struct zone *zone)
 {
-	printk("Node %ld ", zone_to_nid(zone));
+	if (NUMA_BUILD)
+		printk("Node %ld ", zone_to_nid(zone));
 }
-#else
-#define show_node(zone)	do { } while (0)
-#endif
 
 void si_meminfo(struct sysinfo *val)
 {
-- 
cgit v1.2.3


From 77f700dab4c05f8ee17584ec869672796d7bcb87 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 27 Sep 2006 01:50:07 -0700
Subject: [PATCH] Disable GFP_THISNODE in the non-NUMA case

GFP_THISNODE must be set to 0 in the non numa case otherwise we disable retry
and warnings for failing allocations in the SMP and UP case.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/gfp.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 8b34aabfe4c6..bf2b6bc3f6fd 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -67,7 +67,12 @@ struct vm_area_struct;
 #define GFP_HIGHUSER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
 			 __GFP_HIGHMEM)
 
+#ifdef CONFIG_NUMA
 #define GFP_THISNODE	(__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
+#else
+#define GFP_THISNODE	0
+#endif
+
 
 /* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
    platforms, used as appropriate on others */
-- 
cgit v1.2.3


From d5f541ed6e31518508c688912e7464facf253c87 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 27 Sep 2006 01:50:08 -0700
Subject: [PATCH] Add node to zone for the NUMA case

Add the node in order to optimize zone_to_nid.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Acked-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h     | 6 +++++-
 include/linux/mmzone.h | 1 +
 mm/page_alloc.c        | 1 +
 3 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7477fb59c4f2..8e433bbc6e7e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -446,7 +446,11 @@ static inline struct zone *page_zone(struct page *page)
 
 static inline unsigned long zone_to_nid(struct zone *zone)
 {
-	return zone->zone_pgdat->node_id;
+#ifdef CONFIG_NUMA
+	return zone->node;
+#else
+	return 0;
+#endif
 }
 
 static inline unsigned long page_to_nid(struct page *page)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 562cf7a8f3ee..59855b8718a0 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -168,6 +168,7 @@ struct zone {
 	unsigned long		lowmem_reserve[MAX_NR_ZONES];
 
 #ifdef CONFIG_NUMA
+	int node;
 	/*
 	 * zone reclaim becomes active if more unmapped pages exist.
 	 */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4c76188b1681..d0432e44f77d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2405,6 +2405,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
 		zone->spanned_pages = size;
 		zone->present_pages = realsize;
 #ifdef CONFIG_NUMA
+		zone->node = nid;
 		zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)
 						/ 100;
 		zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;
-- 
cgit v1.2.3


From f4b81804a2d1ab341a4613089dc31ecce0800ed8 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Wed, 27 Sep 2006 01:50:10 -0700
Subject: [PATCH] do_no_pfn()

Implement do_no_pfn() for handling mapping of memory without a struct page
backing it.  This avoids creating fake page table entries for regions which
are not backed by real memory.

This feature is used by the MSPEC driver and other users, where it is
highly undesirable to have a struct page sitting behind the page (for
instance if the page is accessed in cached mode via the struct page in
parallel to the the driver accessing it uncached, which can result in data
corruption on some architectures, such as ia64).

This version uses specific NOPFN_{SIGBUS,OOM} return values, rather than
expect all negative pfn values would be an error.  It also bugs on cow
mappings as this would not work with the VM.

[akpm@osdl.org: micro-optimise]
Signed-off-by: Jes Sorensen <jes@sgi.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h |  7 ++++++
 mm/memory.c        | 64 +++++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 66 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8e433bbc6e7e..22165cb18906 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -199,6 +199,7 @@ struct vm_operations_struct {
 	void (*open)(struct vm_area_struct * area);
 	void (*close)(struct vm_area_struct * area);
 	struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type);
+	unsigned long (*nopfn)(struct vm_area_struct * area, unsigned long address);
 	int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
 
 	/* notification that a previously read-only page is about to become
@@ -593,6 +594,12 @@ static inline int page_mapped(struct page *page)
 #define NOPAGE_SIGBUS	(NULL)
 #define NOPAGE_OOM	((struct page *) (-1))
 
+/*
+ * Error return values for the *_nopfn functions
+ */
+#define NOPFN_SIGBUS	((unsigned long) -1)
+#define NOPFN_OOM	((unsigned long) -2)
+
 /*
  * Different kinds of faults, as returned by handle_mm_fault().
  * Used to decide whether a process gets delivered SIGBUS or
diff --git a/mm/memory.c b/mm/memory.c
index 92a3ebd8d795..f2ef1dcfff77 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2255,6 +2255,54 @@ oom:
 	return VM_FAULT_OOM;
 }
 
+/*
+ * do_no_pfn() tries to create a new page mapping for a page without
+ * a struct_page backing it
+ *
+ * As this is called only for pages that do not currently exist, we
+ * do not need to flush old virtual caches or the TLB.
+ *
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
+ *
+ * It is expected that the ->nopfn handler always returns the same pfn
+ * for a given virtual mapping.
+ *
+ * Mark this `noinline' to prevent it from bloating the main pagefault code.
+ */
+static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma,
+		     unsigned long address, pte_t *page_table, pmd_t *pmd,
+		     int write_access)
+{
+	spinlock_t *ptl;
+	pte_t entry;
+	unsigned long pfn;
+	int ret = VM_FAULT_MINOR;
+
+	pte_unmap(page_table);
+	BUG_ON(!(vma->vm_flags & VM_PFNMAP));
+	BUG_ON(is_cow_mapping(vma->vm_flags));
+
+	pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK);
+	if (pfn == NOPFN_OOM)
+		return VM_FAULT_OOM;
+	if (pfn == NOPFN_SIGBUS)
+		return VM_FAULT_SIGBUS;
+
+	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+
+	/* Only go through if we didn't race with anybody else... */
+	if (pte_none(*page_table)) {
+		entry = pfn_pte(pfn, vma->vm_page_prot);
+		if (write_access)
+			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+		set_pte_at(mm, address, page_table, entry);
+	}
+	pte_unmap_unlock(page_table, ptl);
+	return ret;
+}
+
 /*
  * Fault of a previously existing named mapping. Repopulate the pte
  * from the encoded file_pte if possible. This enables swappable
@@ -2317,11 +2365,17 @@ static inline int handle_pte_fault(struct mm_struct *mm,
 	old_entry = entry = *pte;
 	if (!pte_present(entry)) {
 		if (pte_none(entry)) {
-			if (!vma->vm_ops || !vma->vm_ops->nopage)
-				return do_anonymous_page(mm, vma, address,
-					pte, pmd, write_access);
-			return do_no_page(mm, vma, address,
-					pte, pmd, write_access);
+			if (vma->vm_ops) {
+				if (vma->vm_ops->nopage)
+					return do_no_page(mm, vma, address,
+							  pte, pmd,
+							  write_access);
+				if (unlikely(vma->vm_ops->nopfn))
+					return do_no_pfn(mm, vma, address, pte,
+							 pmd, write_access);
+			}
+			return do_anonymous_page(mm, vma, address,
+						 pte, pmd, write_access);
 		}
 		if (pte_file(entry))
 			return do_file_page(mm, vma, address,
-- 
cgit v1.2.3


From d24afc57d51b1be41f95521e81399061fa5875a6 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eike-kernel@sf-tec.de>
Date: Wed, 27 Sep 2006 01:50:13 -0700
Subject: [PATCH] Mark __remove_vm_area() static

The function is exported but not used from anywhere else.  It's also marked as
"not for driver use" so noone out there should really care.

Signed-off-by: Rolf Eike Beer <eike-kernel@sf-tec.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/vmalloc.h | 1 -
 mm/vmalloc.c            | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index dee88c6b6fa7..ce5f1482e6be 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -62,7 +62,6 @@ extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
 extern struct vm_struct *get_vm_area_node(unsigned long size,
 					unsigned long flags, int node);
 extern struct vm_struct *remove_vm_area(void *addr);
-extern struct vm_struct *__remove_vm_area(void *addr);
 extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
 			struct page ***pages);
 extern void unmap_vm_area(struct vm_struct *area);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 659ec634856a..1ac191ce5641 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -272,7 +272,7 @@ static struct vm_struct *__find_vm_area(void *addr)
 }
 
 /* Caller must hold vmlist_lock */
-struct vm_struct *__remove_vm_area(void *addr)
+static struct vm_struct *__remove_vm_area(void *addr)
 {
 	struct vm_struct **p, *tmp;
 
-- 
cgit v1.2.3


From 5da6185bca064e35aa73a7c1f27488d2b96434f4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 27 Sep 2006 01:50:16 -0700
Subject: [PATCH] NOMMU: Set BDI capabilities for /dev/mem and /dev/kmem

Set the backing device info capabilities for /dev/mem and /dev/kmem to
permit direct sharing under no-MMU conditions and full mapping capabilities
under MMU conditions.  Make the BDI used by these available to all directly
mappable character devices.

Also comment the capabilities for /dev/zero.

[akpm@osdl.org: ifdef reductions]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/mem.c   | 39 +++++++++++++++++++++++++++++++++++++++
 fs/char_dev.c        | 20 ++++++++++++++++++++
 include/linux/cdev.h |  2 ++
 3 files changed, 61 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 917b20402664..4ac70ec697f0 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -238,6 +238,32 @@ static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 }
 #endif
 
+#ifndef CONFIG_MMU
+static unsigned long get_unmapped_area_mem(struct file *file,
+					   unsigned long addr,
+					   unsigned long len,
+					   unsigned long pgoff,
+					   unsigned long flags)
+{
+	if (!valid_mmap_phys_addr_range(pgoff, len))
+		return (unsigned long) -EINVAL;
+	return pgoff;
+}
+
+/* can't do an in-place private mapping if there's no MMU */
+static inline int private_mapping_ok(struct vm_area_struct *vma)
+{
+	return vma->vm_flags & VM_MAYSHARE;
+}
+#else
+#define get_unmapped_area_mem	NULL
+
+static inline int private_mapping_ok(struct vm_area_struct *vma)
+{
+	return 1;
+}
+#endif
+
 static int mmap_mem(struct file * file, struct vm_area_struct * vma)
 {
 	size_t size = vma->vm_end - vma->vm_start;
@@ -245,6 +271,9 @@ static int mmap_mem(struct file * file, struct vm_area_struct * vma)
 	if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
 		return -EINVAL;
 
+	if (!private_mapping_ok(vma))
+		return -ENOSYS;
+
 	vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
 						 size,
 						 vma->vm_page_prot);
@@ -782,6 +811,7 @@ static const struct file_operations mem_fops = {
 	.write		= write_mem,
 	.mmap		= mmap_mem,
 	.open		= open_mem,
+	.get_unmapped_area = get_unmapped_area_mem,
 };
 
 static const struct file_operations kmem_fops = {
@@ -790,6 +820,7 @@ static const struct file_operations kmem_fops = {
 	.write		= write_kmem,
 	.mmap		= mmap_kmem,
 	.open		= open_kmem,
+	.get_unmapped_area = get_unmapped_area_mem,
 };
 
 static const struct file_operations null_fops = {
@@ -815,6 +846,10 @@ static const struct file_operations zero_fops = {
 	.mmap		= mmap_zero,
 };
 
+/*
+ * capabilities for /dev/zero
+ * - permits private mappings, "copies" are taken of the source of zeros
+ */
 static struct backing_dev_info zero_bdi = {
 	.capabilities	= BDI_CAP_MAP_COPY,
 };
@@ -862,9 +897,13 @@ static int memory_open(struct inode * inode, struct file * filp)
 	switch (iminor(inode)) {
 		case 1:
 			filp->f_op = &mem_fops;
+			filp->f_mapping->backing_dev_info =
+				&directly_mappable_cdev_bdi;
 			break;
 		case 2:
 			filp->f_op = &kmem_fops;
+			filp->f_mapping->backing_dev_info =
+				&directly_mappable_cdev_bdi;
 			break;
 		case 3:
 			filp->f_op = &null_fops;
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 3483d3cf8087..0009346d827f 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -19,11 +19,30 @@
 #include <linux/kobj_map.h>
 #include <linux/cdev.h>
 #include <linux/mutex.h>
+#include <linux/backing-dev.h>
 
 #ifdef CONFIG_KMOD
 #include <linux/kmod.h>
 #endif
 
+/*
+ * capabilities for /dev/mem, /dev/kmem and similar directly mappable character
+ * devices
+ * - permits shared-mmap for read, write and/or exec
+ * - does not permit private mmap in NOMMU mode (can't do COW)
+ * - no readahead or I/O queue unplugging required
+ */
+struct backing_dev_info directly_mappable_cdev_bdi = {
+	.capabilities	= (
+#ifdef CONFIG_MMU
+		/* permit private copies of the data to be taken */
+		BDI_CAP_MAP_COPY |
+#endif
+		/* permit direct mmap, for read, write or exec */
+		BDI_CAP_MAP_DIRECT |
+		BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP),
+};
+
 static struct kobj_map *cdev_map;
 
 static DEFINE_MUTEX(chrdevs_lock);
@@ -461,3 +480,4 @@ EXPORT_SYMBOL(cdev_del);
 EXPORT_SYMBOL(cdev_add);
 EXPORT_SYMBOL(register_chrdev);
 EXPORT_SYMBOL(unregister_chrdev);
+EXPORT_SYMBOL(directly_mappable_cdev_bdi);
diff --git a/include/linux/cdev.h b/include/linux/cdev.h
index 2216638962d2..ee5f53f2ca15 100644
--- a/include/linux/cdev.h
+++ b/include/linux/cdev.h
@@ -23,5 +23,7 @@ void cdev_del(struct cdev *);
 
 void cd_forget(struct inode *);
 
+extern struct backing_dev_info directly_mappable_cdev_bdi;
+
 #endif
 #endif
-- 
cgit v1.2.3


From dbf8685c8e21404e3a8ed244bd0219d3c4b89101 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 27 Sep 2006 01:50:19 -0700
Subject: [PATCH] NOMMU: Implement /proc/pid/maps for NOMMU

Implement /proc/pid/maps for NOMMU by reading the vm_area_list attached to
current->mm->context.vmlist.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/nommu-mmap.txt |  3 ++
 fs/proc/internal.h           |  1 +
 fs/proc/nommu.c              | 20 ++++++++----
 fs/proc/task_nommu.c         | 74 +++++++++++++++++++++++++++++++++++---------
 include/linux/proc_fs.h      |  2 ++
 5 files changed, 80 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/nommu-mmap.txt b/Documentation/nommu-mmap.txt
index b88ebe4d808c..83f4b083d57e 100644
--- a/Documentation/nommu-mmap.txt
+++ b/Documentation/nommu-mmap.txt
@@ -116,6 +116,9 @@ FURTHER NOTES ON NO-MMU MMAP
  (*) A list of all the mappings on the system is visible through /proc/maps in
      no-MMU mode.
 
+ (*) A list of all the mappings in use by a process is visible through
+     /proc/<pid>/maps in no-MMU mode.
+
  (*) Supplying MAP_FIXED or a requesting a particular mapping address will
      result in an error.
 
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 146a434ba944..987c773dbb20 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -28,6 +28,7 @@ do {						\
 	(vmi)->largest_chunk = 0;		\
 } while(0)
 
+extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *);
 #endif
 
 extern void create_seq_entry(char *name, mode_t mode, const struct file_operations *f);
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index cff10ab1af63..d7dbdf9e0f49 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -33,19 +33,15 @@
 #include "internal.h"
 
 /*
- * display a list of all the VMAs the kernel knows about
- * - nommu kernals have a single flat list
+ * display a single VMA to a sequenced file
  */
-static int nommu_vma_list_show(struct seq_file *m, void *v)
+int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
 {
-	struct vm_area_struct *vma;
 	unsigned long ino = 0;
 	struct file *file;
 	dev_t dev = 0;
 	int flags, len;
 
-	vma = rb_entry((struct rb_node *) v, struct vm_area_struct, vm_rb);
-
 	flags = vma->vm_flags;
 	file = vma->vm_file;
 
@@ -78,6 +74,18 @@ static int nommu_vma_list_show(struct seq_file *m, void *v)
 	return 0;
 }
 
+/*
+ * display a list of all the VMAs the kernel knows about
+ * - nommu kernals have a single flat list
+ */
+static int nommu_vma_list_show(struct seq_file *m, void *v)
+{
+	struct vm_area_struct *vma;
+
+	vma = rb_entry((struct rb_node *) v, struct vm_area_struct, vm_rb);
+	return nommu_vma_show(m, vma);
+}
+
 static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos)
 {
 	struct rb_node *_rb;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 4616ed50ffcd..091aa8e48e02 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -138,25 +138,63 @@ out:
 }
 
 /*
- * Albert D. Cahalan suggested to fake entries for the traditional
- * sections here.  This might be worth investigating.
+ * display mapping lines for a particular process's /proc/pid/maps
  */
-static int show_map(struct seq_file *m, void *v)
+static int show_map(struct seq_file *m, void *_vml)
 {
-	return 0;
+	struct vm_list_struct *vml = _vml;
+	return nommu_vma_show(m, vml->vma);
 }
+
 static void *m_start(struct seq_file *m, loff_t *pos)
 {
+	struct proc_maps_private *priv = m->private;
+	struct vm_list_struct *vml;
+	struct mm_struct *mm;
+	loff_t n = *pos;
+
+	/* pin the task and mm whilst we play with them */
+	priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
+	if (!priv->task)
+		return NULL;
+
+	mm = get_task_mm(priv->task);
+	if (!mm) {
+		put_task_struct(priv->task);
+		priv->task = NULL;
+		return NULL;
+	}
+
+	down_read(&mm->mmap_sem);
+
+	/* start from the Nth VMA */
+	for (vml = mm->context.vmlist; vml; vml = vml->next)
+		if (n-- == 0)
+			return vml;
 	return NULL;
 }
-static void m_stop(struct seq_file *m, void *v)
+
+static void m_stop(struct seq_file *m, void *_vml)
 {
+	struct proc_maps_private *priv = m->private;
+
+	if (priv->task) {
+		struct mm_struct *mm = priv->task->mm;
+		up_read(&mm->mmap_sem);
+		mmput(mm);
+		put_task_struct(priv->task);
+	}
 }
-static void *m_next(struct seq_file *m, void *v, loff_t *pos)
+
+static void *m_next(struct seq_file *m, void *_vml, loff_t *pos)
 {
-	return NULL;
+	struct vm_list_struct *vml = _vml;
+
+	(*pos)++;
+	return vml ? vml->next : NULL;
 }
-static struct seq_operations proc_pid_maps_op = {
+
+static struct seq_operations proc_pid_maps_ops = {
 	.start	= m_start,
 	.next	= m_next,
 	.stop	= m_stop,
@@ -165,11 +203,19 @@ static struct seq_operations proc_pid_maps_op = {
 
 static int maps_open(struct inode *inode, struct file *file)
 {
-	int ret;
-	ret = seq_open(file, &proc_pid_maps_op);
-	if (!ret) {
-		struct seq_file *m = file->private_data;
-		m->private = NULL;
+	struct proc_maps_private *priv;
+	int ret = -ENOMEM;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (priv) {
+		priv->pid = proc_pid(inode);
+		ret = seq_open(file, &proc_pid_maps_ops);
+		if (!ret) {
+			struct seq_file *m = file->private_data;
+			m->private = priv;
+		} else {
+			kfree(priv);
+		}
 	}
 	return ret;
 }
@@ -178,6 +224,6 @@ struct file_operations proc_maps_operations = {
 	.open		= maps_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release,
+	.release	= seq_release_private,
 };
 
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 3435ca38dd14..57f70bc8b24b 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -268,7 +268,9 @@ static inline struct proc_dir_entry *PDE(const struct inode *inode)
 struct proc_maps_private {
 	struct pid *pid;
 	struct task_struct *task;
+#ifdef CONFIG_MMU
 	struct vm_area_struct *tail_vma;
+#endif
 };
 
 #endif /* _LINUX_PROC_FS_H */
-- 
cgit v1.2.3


From f269fdd1829acc5e53bf57b145003e5733133f2b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 27 Sep 2006 01:50:23 -0700
Subject: [PATCH] NOMMU: move the fallback arch_vma_name() to a sensible place

Move the fallback arch_vma_name() to a sensible place (kernel/signal.c).

Currently it's in fs/proc/task_mmu.c, a file that is dependent on both
CONFIG_PROC_FS and CONFIG_MMU being enabled, but it's used from
kernel/signal.c from where it is called unconditionally.

[akpm@osdl.org: build fix]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/task_mmu.c | 5 -----
 include/linux/mm.h | 2 +-
 kernel/signal.c    | 5 +++++
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 0a163a4f7764..6b769afac55a 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -122,11 +122,6 @@ struct mem_size_stats
 	unsigned long private_dirty;
 };
 
-__attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma)
-{
-	return NULL;
-}
-
 static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
 {
 	struct proc_maps_private *priv = m->private;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 22165cb18906..7b703b6d4358 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1131,7 +1131,7 @@ void drop_slab(void);
 extern int randomize_va_space;
 #endif
 
-const char *arch_vma_name(struct vm_area_struct *vma);
+__attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma);
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/kernel/signal.c b/kernel/signal.c
index bfdb5686fa3e..05853a7337e3 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2577,6 +2577,11 @@ asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize)
 }
 #endif /* __ARCH_WANT_SYS_RT_SIGSUSPEND */
 
+__attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma)
+{
+	return NULL;
+}
+
 void __init signals_init(void)
 {
 	sigqueue_cachep =
-- 
cgit v1.2.3


From 7e96287ddc4f42081e18248b6167041c0908004c Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@in.ibm.com>
Date: Wed, 27 Sep 2006 01:50:44 -0700
Subject: [PATCH] kdump: introduce "reset_devices" command line option

Resetting the devices during driver initialization can be a costly
operation in terms of time (especially scsi devices).  This option can be
used by drivers to know that user forcibly wants the devices to be reset
during initialization.

This option can be useful while kernel is booting in unreliable
environment.  For ex.  during kdump boot where devices are in unknown
random state and BIOS execution has been skipped.

Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/kernel-parameters.txt |  3 +++
 include/linux/init.h                |  1 +
 init/main.c                         | 20 ++++++++++++++++++++
 3 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 255ec535bba8..54983246930d 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1370,6 +1370,9 @@ running once the system is up.
 			Reserves a hole at the top of the kernel virtual
 			address space.
 
+	reset_devices	[KNL] Force drivers to reset the underlying device
+			during initialization.
+
 	resume=		[SWSUSP]
 			Specify the partition device for software suspend
 
diff --git a/include/linux/init.h b/include/linux/init.h
index 6667785dd1ff..e92b1455d7af 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -68,6 +68,7 @@ extern initcall_t __security_initcall_start[], __security_initcall_end[];
 
 /* Defined in init/main.c */
 extern char saved_command_line[];
+extern unsigned int reset_devices;
 
 /* used by init/main.c */
 extern void setup_arch(char **);
diff --git a/init/main.c b/init/main.c
index 913e48d658ee..0766e69712b2 100644
--- a/init/main.c
+++ b/init/main.c
@@ -127,6 +127,18 @@ static char *ramdisk_execute_command;
 /* Setup configured maximum number of CPUs to activate */
 static unsigned int max_cpus = NR_CPUS;
 
+/*
+ * If set, this is an indication to the drivers that reset the underlying
+ * device before going ahead with the initialization otherwise driver might
+ * rely on the BIOS and skip the reset operation.
+ *
+ * This is useful if kernel is booting in an unreliable environment.
+ * For ex. kdump situaiton where previous kernel has crashed, BIOS has been
+ * skipped and devices will be in unknown state.
+ */
+unsigned int reset_devices;
+EXPORT_SYMBOL(reset_devices);
+
 /*
  * Setup routine for controlling SMP activation
  *
@@ -153,6 +165,14 @@ static int __init maxcpus(char *str)
 
 __setup("maxcpus=", maxcpus);
 
+static int __init set_reset_devices(char *str)
+{
+	reset_devices = 1;
+	return 1;
+}
+
+__setup("reset_devices", set_reset_devices);
+
 static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
 char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
 static const char *panic_later, *panic_param;
-- 
cgit v1.2.3


From 8e18e2941c53416aa219708e7dcad21fb4bd6794 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 27 Sep 2006 01:50:46 -0700
Subject: [PATCH] inode_diet: Replace inode.u.generic_ip with inode.i_private

The following patches reduce the size of the VFS inode structure by 28 bytes
on a UP x86.  (It would be more on an x86_64 system).  This is a 10% reduction
in the inode size on a UP kernel that is configured in a production mode
(i.e., with no spinlock or other debugging functions enabled; if you want to
save memory taken up by in-core inodes, the first thing you should do is
disable the debugging options; they are responsible for a huge amount of bloat
in the VFS inode structure).

This patch:

The filesystem or device-specific pointer in the inode is inside a union,
which is pretty pointless given that all 30+ users of this field have been
using the void pointer.  Get rid of the union and rename it to i_private, with
a comment to explain who is allowed to use the void pointer.  This is just a
cleanup, but it allows us to reuse the union 'u' for something something where
the union will actually be used.

[judith@osdl.org: powerpc build fix]
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Judith Lebzelter <judith@osdl.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/powerpc/platforms/cell/spufs/inode.c      |  2 +-
 arch/powerpc/platforms/pseries/hvCall_inst.c   |  2 +-
 arch/s390/hypfs/inode.c                        |  6 ++---
 arch/s390/kernel/debug.c                       |  2 +-
 block/blktrace.c                               |  2 +-
 drivers/i2c/chips/tps65010.c                   |  2 +-
 drivers/infiniband/hw/ipath/ipath_fs.c         | 12 ++++-----
 drivers/infiniband/ulp/ipoib/ipoib_fs.c        |  4 +--
 drivers/misc/ibmasm/ibmasmfs.c                 | 16 ++++++------
 drivers/net/irda/vlsi_ir.h                     |  2 +-
 drivers/net/wireless/bcm43xx/bcm43xx_debugfs.c |  2 +-
 drivers/oprofile/oprofilefs.c                  | 10 ++++----
 drivers/pci/hotplug/cpqphp_sysfs.c             |  2 +-
 drivers/usb/core/devio.c                       |  2 +-
 drivers/usb/core/inode.c                       |  6 ++---
 drivers/usb/gadget/inode.c                     |  6 ++---
 drivers/usb/host/isp116x-hcd.c                 |  2 +-
 drivers/usb/host/uhci-debug.c                  |  2 +-
 drivers/usb/mon/mon_stat.c                     |  2 +-
 drivers/usb/mon/mon_text.c                     |  4 +--
 fs/autofs/inode.c                              |  2 +-
 fs/autofs/symlink.c                            |  2 +-
 fs/binfmt_misc.c                               |  8 +++---
 fs/debugfs/file.c                              |  4 +--
 fs/debugfs/inode.c                             |  4 +--
 fs/devpts/inode.c                              |  4 +--
 fs/freevxfs/vxfs.h                             |  2 +-
 fs/freevxfs/vxfs_inode.c                       |  4 +--
 fs/fuse/control.c                              |  6 ++---
 fs/inode.c                                     |  2 +-
 fs/jffs/inode-v23.c                            | 34 +++++++++++++-------------
 fs/libfs.c                                     |  2 +-
 fs/ocfs2/dlmglue.c                             |  2 +-
 include/linux/fs.h                             |  4 +--
 kernel/relay.c                                 |  2 +-
 security/inode.c                               |  8 +++---
 36 files changed, 88 insertions(+), 90 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 7b4572805db9..b837f12a84ae 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -120,7 +120,7 @@ spufs_new_file(struct super_block *sb, struct dentry *dentry,
 	ret = 0;
 	inode->i_op = &spufs_file_iops;
 	inode->i_fop = fops;
-	inode->u.generic_ip = SPUFS_I(inode)->i_ctx = get_spu_context(ctx);
+	inode->i_private = SPUFS_I(inode)->i_ctx = get_spu_context(ctx);
 	d_add(dentry, inode);
 out:
 	return ret;
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index 641e6511cf06..446e17d162a5 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -85,7 +85,7 @@ static int hcall_inst_seq_open(struct inode *inode, struct file *file)
 
 	rc = seq_open(file, &hcall_inst_seq_ops);
 	seq = file->private_data;
-	seq->private = file->f_dentry->d_inode->u.generic_ip;
+	seq->private = file->f_dentry->d_inode->i_private;
 
 	return rc;
 }
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index bdade5f2e325..8735024d235b 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -104,13 +104,13 @@ static struct inode *hypfs_make_inode(struct super_block *sb, int mode)
 
 static void hypfs_drop_inode(struct inode *inode)
 {
-	kfree(inode->u.generic_ip);
+	kfree(inode->i_private);
 	generic_delete_inode(inode);
 }
 
 static int hypfs_open(struct inode *inode, struct file *filp)
 {
-	char *data = filp->f_dentry->d_inode->u.generic_ip;
+	char *data = filp->f_dentry->d_inode->i_private;
 	struct hypfs_sb_info *fs_info;
 
 	if (filp->f_mode & FMODE_WRITE) {
@@ -352,7 +352,7 @@ static struct dentry *hypfs_create_file(struct super_block *sb,
 		parent->d_inode->i_nlink++;
 	} else
 		BUG();
-	inode->u.generic_ip = data;
+	inode->i_private = data;
 	d_instantiate(dentry, inode);
 	dget(dentry);
 	return dentry;
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 7ba20922a535..43f3d0c7e132 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -603,7 +603,7 @@ debug_open(struct inode *inode, struct file *file)
 	debug_info_t *debug_info, *debug_info_snapshot;
 
 	down(&debug_lock);
-	debug_info = (struct debug_info*)file->f_dentry->d_inode->u.generic_ip;
+	debug_info = file->f_dentry->d_inode->i_private;
 	/* find debug view */
 	for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
 		if (!debug_info->views[i])
diff --git a/block/blktrace.c b/block/blktrace.c
index 265f7a830619..2b4ef2b89b8d 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -217,7 +217,7 @@ static int blk_trace_remove(request_queue_t *q)
 
 static int blk_dropped_open(struct inode *inode, struct file *filp)
 {
-	filp->private_data = inode->u.generic_ip;
+	filp->private_data = inode->i_private;
 
 	return 0;
 }
diff --git a/drivers/i2c/chips/tps65010.c b/drivers/i2c/chips/tps65010.c
index 0be6fd6a267d..6a7578217177 100644
--- a/drivers/i2c/chips/tps65010.c
+++ b/drivers/i2c/chips/tps65010.c
@@ -305,7 +305,7 @@ static int dbg_show(struct seq_file *s, void *_)
 
 static int dbg_tps_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, dbg_show, inode->u.generic_ip);
+	return single_open(file, dbg_show, inode->i_private);
 }
 
 static struct file_operations debug_fops = {
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index a5eb30a06a5c..055cdd089b28 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -64,7 +64,7 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
 	inode->i_blksize = PAGE_CACHE_SIZE;
 	inode->i_blocks = 0;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-	inode->u.generic_ip = data;
+	inode->i_private = data;
 	if ((mode & S_IFMT) == S_IFDIR) {
 		inode->i_op = &simple_dir_inode_operations;
 		inode->i_nlink++;
@@ -119,7 +119,7 @@ static ssize_t atomic_counters_read(struct file *file, char __user *buf,
 	u16 i;
 	struct ipath_devdata *dd;
 
-	dd = file->f_dentry->d_inode->u.generic_ip;
+	dd = file->f_dentry->d_inode->i_private;
 
 	for (i = 0; i < NUM_COUNTERS; i++)
 		counters[i] = ipath_snap_cntr(dd, i);
@@ -139,7 +139,7 @@ static ssize_t atomic_node_info_read(struct file *file, char __user *buf,
 	struct ipath_devdata *dd;
 	u64 guid;
 
-	dd = file->f_dentry->d_inode->u.generic_ip;
+	dd = file->f_dentry->d_inode->i_private;
 
 	guid = be64_to_cpu(dd->ipath_guid);
 
@@ -178,7 +178,7 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf,
 	u32 tmp, tmp2;
 	struct ipath_devdata *dd;
 
-	dd = file->f_dentry->d_inode->u.generic_ip;
+	dd = file->f_dentry->d_inode->i_private;
 
 	/* so we only initialize non-zero fields. */
 	memset(portinfo, 0, sizeof portinfo);
@@ -325,7 +325,7 @@ static ssize_t flash_read(struct file *file, char __user *buf,
 		goto bail;
 	}
 
-	dd = file->f_dentry->d_inode->u.generic_ip;
+	dd = file->f_dentry->d_inode->i_private;
 	if (ipath_eeprom_read(dd, pos, tmp, count)) {
 		ipath_dev_err(dd, "failed to read from flash\n");
 		ret = -ENXIO;
@@ -381,7 +381,7 @@ static ssize_t flash_write(struct file *file, const char __user *buf,
 		goto bail_tmp;
 	}
 
-	dd = file->f_dentry->d_inode->u.generic_ip;
+	dd = file->f_dentry->d_inode->i_private;
 	if (ipath_eeprom_write(dd, pos, tmp, count)) {
 		ret = -ENXIO;
 		ipath_dev_err(dd, "failed to write to flash\n");
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 5dde380e8dbe..f1cb83688b31 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -141,7 +141,7 @@ static int ipoib_mcg_open(struct inode *inode, struct file *file)
 		return ret;
 
 	seq = file->private_data;
-	seq->private = inode->u.generic_ip;
+	seq->private = inode->i_private;
 
 	return 0;
 }
@@ -247,7 +247,7 @@ static int ipoib_path_open(struct inode *inode, struct file *file)
 		return ret;
 
 	seq = file->private_data;
-	seq->private = inode->u.generic_ip;
+	seq->private = inode->i_private;
 
 	return 0;
 }
diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
index 4a35caff5d02..0e909b617226 100644
--- a/drivers/misc/ibmasm/ibmasmfs.c
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -175,7 +175,7 @@ static struct dentry *ibmasmfs_create_file (struct super_block *sb,
 	}
 
 	inode->i_fop = fops;
-	inode->u.generic_ip = data;
+	inode->i_private = data;
 
 	d_add(dentry, inode);
 	return dentry;
@@ -244,7 +244,7 @@ static int command_file_open(struct inode *inode, struct file *file)
 {
 	struct ibmasmfs_command_data *command_data;
 
-	if (!inode->u.generic_ip)
+	if (!inode->i_private)
 		return -ENODEV;
 
 	command_data = kmalloc(sizeof(struct ibmasmfs_command_data), GFP_KERNEL);
@@ -252,7 +252,7 @@ static int command_file_open(struct inode *inode, struct file *file)
 		return -ENOMEM;
 
 	command_data->command = NULL;
-	command_data->sp = inode->u.generic_ip;
+	command_data->sp = inode->i_private;
 	file->private_data = command_data;
 	return 0;
 }
@@ -351,10 +351,10 @@ static int event_file_open(struct inode *inode, struct file *file)
 	struct ibmasmfs_event_data *event_data;
 	struct service_processor *sp; 
 
-	if (!inode->u.generic_ip)
+	if (!inode->i_private)
 		return -ENODEV;
 
-	sp = inode->u.generic_ip;
+	sp = inode->i_private;
 
 	event_data = kmalloc(sizeof(struct ibmasmfs_event_data), GFP_KERNEL);
 	if (!event_data)
@@ -439,14 +439,14 @@ static int r_heartbeat_file_open(struct inode *inode, struct file *file)
 {
 	struct ibmasmfs_heartbeat_data *rhbeat;
 
-	if (!inode->u.generic_ip)
+	if (!inode->i_private)
 		return -ENODEV;
 
 	rhbeat = kmalloc(sizeof(struct ibmasmfs_heartbeat_data), GFP_KERNEL);
 	if (!rhbeat)
 		return -ENOMEM;
 
-	rhbeat->sp = (struct service_processor *)inode->u.generic_ip;
+	rhbeat->sp = inode->i_private;
 	rhbeat->active = 0;
 	ibmasm_init_reverse_heartbeat(rhbeat->sp, &rhbeat->heartbeat);
 	file->private_data = rhbeat;
@@ -508,7 +508,7 @@ static ssize_t r_heartbeat_file_write(struct file *file, const char __user *buf,
 
 static int remote_settings_file_open(struct inode *inode, struct file *file)
 {
-	file->private_data = inode->u.generic_ip;
+	file->private_data = inode->i_private;
 	return 0;
 }
 
diff --git a/drivers/net/irda/vlsi_ir.h b/drivers/net/irda/vlsi_ir.h
index a82a4ba8de4f..c37f0bc4c7f9 100644
--- a/drivers/net/irda/vlsi_ir.h
+++ b/drivers/net/irda/vlsi_ir.h
@@ -58,7 +58,7 @@ typedef void irqreturn_t;
 
 /* PDE() introduced in 2.5.4 */
 #ifdef CONFIG_PROC_FS
-#define PDE(inode) ((inode)->u.generic_ip)
+#define PDE(inode) ((inode)->i_private)
 #endif
 
 /* irda crc16 calculation exported in 2.5.42 */
diff --git a/drivers/net/wireless/bcm43xx/bcm43xx_debugfs.c b/drivers/net/wireless/bcm43xx/bcm43xx_debugfs.c
index 923275ea0789..b9df06a06ea9 100644
--- a/drivers/net/wireless/bcm43xx/bcm43xx_debugfs.c
+++ b/drivers/net/wireless/bcm43xx/bcm43xx_debugfs.c
@@ -54,7 +54,7 @@ static ssize_t write_file_dummy(struct file *file, const char __user *buf,
 
 static int open_file_generic(struct inode *inode, struct file *file)
 {
-	file->private_data = inode->u.generic_ip;
+	file->private_data = inode->i_private;
 	return 0;
 }
 
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c
index 71c2da277d6e..deb37354785b 100644
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -110,8 +110,8 @@ static ssize_t ulong_write_file(struct file * file, char const __user * buf, siz
 
 static int default_open(struct inode * inode, struct file * filp)
 {
-	if (inode->u.generic_ip)
-		filp->private_data = inode->u.generic_ip;
+	if (inode->i_private)
+		filp->private_data = inode->i_private;
 	return 0;
 }
 
@@ -158,7 +158,7 @@ int oprofilefs_create_ulong(struct super_block * sb, struct dentry * root,
 	if (!d)
 		return -EFAULT;
 
-	d->d_inode->u.generic_ip = val;
+	d->d_inode->i_private = val;
 	return 0;
 }
 
@@ -171,7 +171,7 @@ int oprofilefs_create_ro_ulong(struct super_block * sb, struct dentry * root,
 	if (!d)
 		return -EFAULT;
 
-	d->d_inode->u.generic_ip = val;
+	d->d_inode->i_private = val;
 	return 0;
 }
 
@@ -197,7 +197,7 @@ int oprofilefs_create_ro_atomic(struct super_block * sb, struct dentry * root,
 	if (!d)
 		return -EFAULT;
 
-	d->d_inode->u.generic_ip = val;
+	d->d_inode->i_private = val;
 	return 0;
 }
 
diff --git a/drivers/pci/hotplug/cpqphp_sysfs.c b/drivers/pci/hotplug/cpqphp_sysfs.c
index 8b3da007e859..5bab666cd67e 100644
--- a/drivers/pci/hotplug/cpqphp_sysfs.c
+++ b/drivers/pci/hotplug/cpqphp_sysfs.c
@@ -140,7 +140,7 @@ struct ctrl_dbg {
 
 static int open(struct inode *inode, struct file *file)
 {
-	struct controller *ctrl = inode->u.generic_ip;
+	struct controller *ctrl = inode->i_private;
 	struct ctrl_dbg *dbg;
 	int retval = -ENOMEM;
 
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 218621b9958e..32e03000420c 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -555,7 +555,7 @@ static int usbdev_open(struct inode *inode, struct file *file)
 	if (imajor(inode) == USB_DEVICE_MAJOR)
 		dev = usbdev_lookup_minor(iminor(inode));
 	if (!dev)
-		dev = inode->u.generic_ip;
+		dev = inode->i_private;
 	if (!dev) {
 		kfree(ps);
 		goto out;
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index 3182c2224ba2..482f253085e5 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -402,8 +402,8 @@ static loff_t default_file_lseek (struct file *file, loff_t offset, int orig)
 
 static int default_open (struct inode *inode, struct file *file)
 {
-	if (inode->u.generic_ip)
-		file->private_data = inode->u.generic_ip;
+	if (inode->i_private)
+		file->private_data = inode->i_private;
 
 	return 0;
 }
@@ -509,7 +509,7 @@ static struct dentry *fs_create_file (const char *name, mode_t mode,
 	} else {
 		if (dentry->d_inode) {
 			if (data)
-				dentry->d_inode->u.generic_ip = data;
+				dentry->d_inode->i_private = data;
 			if (fops)
 				dentry->d_inode->i_fop = fops;
 			dentry->d_inode->i_uid = uid;
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index 3bdc5e3ba234..ffaa8c1afad8 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -844,7 +844,7 @@ fail1:
 static int
 ep_open (struct inode *inode, struct file *fd)
 {
-	struct ep_data		*data = inode->u.generic_ip;
+	struct ep_data		*data = inode->i_private;
 	int			value = -EBUSY;
 
 	if (down_interruptible (&data->lock) != 0)
@@ -1909,7 +1909,7 @@ fail:
 static int
 dev_open (struct inode *inode, struct file *fd)
 {
-	struct dev_data		*dev = inode->u.generic_ip;
+	struct dev_data		*dev = inode->i_private;
 	int			value = -EBUSY;
 
 	if (dev->state == STATE_DEV_DISABLED) {
@@ -1970,7 +1970,7 @@ gadgetfs_make_inode (struct super_block *sb,
 		inode->i_blocks = 0;
 		inode->i_atime = inode->i_mtime = inode->i_ctime
 				= CURRENT_TIME;
-		inode->u.generic_ip = data;
+		inode->i_private = data;
 		inode->i_fop = fops;
 	}
 	return inode;
diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c
index 5147ed4a6662..8c6b38a0b5bb 100644
--- a/drivers/usb/host/isp116x-hcd.c
+++ b/drivers/usb/host/isp116x-hcd.c
@@ -1204,7 +1204,7 @@ static int isp116x_show_dbg(struct seq_file *s, void *unused)
 
 static int isp116x_open_seq(struct inode *inode, struct file *file)
 {
-	return single_open(file, isp116x_show_dbg, inode->u.generic_ip);
+	return single_open(file, isp116x_show_dbg, inode->i_private);
 }
 
 static struct file_operations isp116x_debug_fops = {
diff --git a/drivers/usb/host/uhci-debug.c b/drivers/usb/host/uhci-debug.c
index dc286a48cafd..d1372cb27f33 100644
--- a/drivers/usb/host/uhci-debug.c
+++ b/drivers/usb/host/uhci-debug.c
@@ -428,7 +428,7 @@ struct uhci_debug {
 
 static int uhci_debug_open(struct inode *inode, struct file *file)
 {
-	struct uhci_hcd *uhci = inode->u.generic_ip;
+	struct uhci_hcd *uhci = inode->i_private;
 	struct uhci_debug *up;
 	int ret = -ENOMEM;
 	unsigned long flags;
diff --git a/drivers/usb/mon/mon_stat.c b/drivers/usb/mon/mon_stat.c
index 1fe01d994a79..86ad2b381c4b 100644
--- a/drivers/usb/mon/mon_stat.c
+++ b/drivers/usb/mon/mon_stat.c
@@ -28,7 +28,7 @@ static int mon_stat_open(struct inode *inode, struct file *file)
 	if ((sp = kmalloc(sizeof(struct snap), GFP_KERNEL)) == NULL)
 		return -ENOMEM;
 
-	mbus = inode->u.generic_ip;
+	mbus = inode->i_private;
 
 	sp->slen = snprintf(sp->str, STAT_BUF_SIZE,
 	    "nreaders %d events %u text_lost %u\n",
diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c
index f961a770cee2..2fd39b4fa166 100644
--- a/drivers/usb/mon/mon_text.c
+++ b/drivers/usb/mon/mon_text.c
@@ -238,7 +238,7 @@ static int mon_text_open(struct inode *inode, struct file *file)
 	int rc;
 
 	mutex_lock(&mon_lock);
-	mbus = inode->u.generic_ip;
+	mbus = inode->i_private;
 	ubus = mbus->u_bus;
 
 	rp = kzalloc(sizeof(struct mon_reader_text), GFP_KERNEL);
@@ -401,7 +401,7 @@ static int mon_text_release(struct inode *inode, struct file *file)
 	struct mon_event_text *ep;
 
 	mutex_lock(&mon_lock);
-	mbus = inode->u.generic_ip;
+	mbus = inode->i_private;
 
 	if (mbus->nreaders <= 0) {
 		printk(KERN_ERR TAG ": consistency error on close\n");
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index c81d6b8c2828..d39d2acd9b38 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -241,7 +241,7 @@ static void autofs_read_inode(struct inode *inode)
 		
 		inode->i_op = &autofs_symlink_inode_operations;
 		sl = &sbi->symlink[n];
-		inode->u.generic_ip = sl;
+		inode->i_private = sl;
 		inode->i_mode = S_IFLNK | S_IRWXUGO;
 		inode->i_mtime.tv_sec = inode->i_ctime.tv_sec = sl->mtime;
 		inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0;
diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c
index 52e8772b066e..c74f2eb65775 100644
--- a/fs/autofs/symlink.c
+++ b/fs/autofs/symlink.c
@@ -15,7 +15,7 @@
 /* Nothing to release.. */
 static void *autofs_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-	char *s=((struct autofs_symlink *)dentry->d_inode->u.generic_ip)->data;
+	char *s=((struct autofs_symlink *)dentry->d_inode->i_private)->data;
 	nd_set_link(nd, s);
 	return NULL;
 }
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 34ebbc191e46..6759b9839ce8 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -517,7 +517,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
 
 static void bm_clear_inode(struct inode *inode)
 {
-	kfree(inode->u.generic_ip);
+	kfree(inode->i_private);
 }
 
 static void kill_node(Node *e)
@@ -545,7 +545,7 @@ static void kill_node(Node *e)
 static ssize_t
 bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
 {
-	Node *e = file->f_dentry->d_inode->u.generic_ip;
+	Node *e = file->f_dentry->d_inode->i_private;
 	loff_t pos = *ppos;
 	ssize_t res;
 	char *page;
@@ -579,7 +579,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
 				size_t count, loff_t *ppos)
 {
 	struct dentry *root;
-	Node *e = file->f_dentry->d_inode->u.generic_ip;
+	Node *e = file->f_dentry->d_inode->i_private;
 	int res = parse_command(buffer, count);
 
 	switch (res) {
@@ -646,7 +646,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
 	}
 
 	e->dentry = dget(dentry);
-	inode->u.generic_ip = e;
+	inode->i_private = e;
 	inode->i_fop = &bm_entry_operations;
 
 	d_instantiate(dentry, inode);
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index e4b430552c88..bf3901ab1744 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -32,8 +32,8 @@ static ssize_t default_write_file(struct file *file, const char __user *buf,
 
 static int default_open(struct inode *inode, struct file *file)
 {
-	if (inode->u.generic_ip)
-		file->private_data = inode->u.generic_ip;
+	if (inode->i_private)
+		file->private_data = inode->i_private;
 
 	return 0;
 }
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 3ca268d2e5a2..717f4821ed02 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -168,7 +168,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
  *          directory dentry if set.  If this paramater is NULL, then the
  *          file will be created in the root of the debugfs filesystem.
  * @data: a pointer to something that the caller will want to get to later
- *        on.  The inode.u.generic_ip pointer will point to this value on
+ *        on.  The inode.i_private pointer will point to this value on
  *        the open() call.
  * @fops: a pointer to a struct file_operations that should be used for
  *        this file.
@@ -209,7 +209,7 @@ struct dentry *debugfs_create_file(const char *name, mode_t mode,
 
 	if (dentry->d_inode) {
 		if (data)
-			dentry->d_inode->u.generic_ip = data;
+			dentry->d_inode->i_private = data;
 		if (fops)
 			dentry->d_inode->i_fop = fops;
 	}
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index f7aef5bb584a..5bf06a10dddf 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -177,7 +177,7 @@ int devpts_pty_new(struct tty_struct *tty)
 	inode->i_gid = config.setgid ? config.gid : current->fsgid;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	init_special_inode(inode, S_IFCHR|config.mode, device);
-	inode->u.generic_ip = tty;
+	inode->i_private = tty;
 
 	dentry = get_node(number);
 	if (!IS_ERR(dentry) && !dentry->d_inode)
@@ -196,7 +196,7 @@ struct tty_struct *devpts_get_tty(int number)
 	tty = NULL;
 	if (!IS_ERR(dentry)) {
 		if (dentry->d_inode)
-			tty = dentry->d_inode->u.generic_ip;
+			tty = dentry->d_inode->i_private;
 		dput(dentry);
 	}
 
diff --git a/fs/freevxfs/vxfs.h b/fs/freevxfs/vxfs.h
index d35979a58743..c8a92652612a 100644
--- a/fs/freevxfs/vxfs.h
+++ b/fs/freevxfs/vxfs.h
@@ -252,7 +252,7 @@ enum {
  * Get filesystem private data from VFS inode.
  */
 #define VXFS_INO(ip) \
-	((struct vxfs_inode_info *)(ip)->u.generic_ip)
+	((struct vxfs_inode_info *)(ip)->i_private)
 
 /*
  * Get filesystem private data from VFS superblock.
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index ca6a39714771..32a82ed108e4 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -243,7 +243,7 @@ vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip)
 	ip->i_blocks = vip->vii_blocks;
 	ip->i_generation = vip->vii_gen;
 
-	ip->u.generic_ip = (void *)vip;
+	ip->i_private = vip;
 	
 }
 
@@ -338,5 +338,5 @@ vxfs_read_inode(struct inode *ip)
 void
 vxfs_clear_inode(struct inode *ip)
 {
-	kmem_cache_free(vxfs_inode_cachep, ip->u.generic_ip);
+	kmem_cache_free(vxfs_inode_cachep, ip->i_private);
 }
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 46fe60b2da23..79ec1f23d4d2 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -23,7 +23,7 @@ static struct fuse_conn *fuse_ctl_file_conn_get(struct file *file)
 {
 	struct fuse_conn *fc;
 	mutex_lock(&fuse_mutex);
-	fc = file->f_dentry->d_inode->u.generic_ip;
+	fc = file->f_dentry->d_inode->i_private;
 	if (fc)
 		fc = fuse_conn_get(fc);
 	mutex_unlock(&fuse_mutex);
@@ -98,7 +98,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
 		inode->i_op = iop;
 	inode->i_fop = fop;
 	inode->i_nlink = nlink;
-	inode->u.generic_ip = fc;
+	inode->i_private = fc;
 	d_add(dentry, inode);
 	return dentry;
 }
@@ -150,7 +150,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc)
 
 	for (i = fc->ctl_ndents - 1; i >= 0; i--) {
 		struct dentry *dentry = fc->ctl_dentry[i];
-		dentry->d_inode->u.generic_ip = NULL;
+		dentry->d_inode->i_private = NULL;
 		d_drop(dentry);
 		dput(dentry);
 	}
diff --git a/fs/inode.c b/fs/inode.c
index 0bf9f0444a96..77e254792025 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -163,7 +163,7 @@ static struct inode *alloc_inode(struct super_block *sb)
 				bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
 			mapping->backing_dev_info = bdi;
 		}
-		memset(&inode->u, 0, sizeof(inode->u));
+		inode->i_private = 0;
 		inode->i_mapping = mapping;
 	}
 	return inode;
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index b59553d28d13..7358ef87f16b 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -369,7 +369,7 @@ jffs_new_inode(const struct inode * dir, struct jffs_raw_inode *raw_inode,
 
 	f = jffs_find_file(c, raw_inode->ino);
 
-	inode->u.generic_ip = (void *)f;
+	inode->i_private = (void *)f;
 	insert_inode_hash(inode);
 
 	return inode;
@@ -442,7 +442,7 @@ jffs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	});
 
 	result = -ENOTDIR;
-	if (!(old_dir_f = (struct jffs_file *)old_dir->u.generic_ip)) {
+	if (!(old_dir_f = old_dir->i_private)) {
 		D(printk("jffs_rename(): Old dir invalid.\n"));
 		goto jffs_rename_end;
 	}
@@ -456,7 +456,7 @@ jffs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	/* Find the new directory.  */
 	result = -ENOTDIR;
-	if (!(new_dir_f = (struct jffs_file *)new_dir->u.generic_ip)) {
+	if (!(new_dir_f = new_dir->i_private)) {
 		D(printk("jffs_rename(): New dir invalid.\n"));
 		goto jffs_rename_end;
 	}
@@ -593,7 +593,7 @@ jffs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		}
 		else {
 			ddino = ((struct jffs_file *)
-				 inode->u.generic_ip)->pino;
+				 inode->i_private)->pino;
 		}
 		D3(printk("jffs_readdir(): \"..\" %u\n", ddino));
 		if (filldir(dirent, "..", 2, filp->f_pos, ddino, DT_DIR) < 0) {
@@ -604,7 +604,7 @@ jffs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		}
 		filp->f_pos++;
 	}
-	f = ((struct jffs_file *)inode->u.generic_ip)->children;
+	f = ((struct jffs_file *)inode->i_private)->children;
 
 	j = 2;
 	while(f && (f->deleted || j++ < filp->f_pos )) {
@@ -668,7 +668,7 @@ jffs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	}
 
 	r = -EACCES;
-	if (!(d = (struct jffs_file *)dir->u.generic_ip)) {
+	if (!(d = (struct jffs_file *)dir->i_private)) {
 		D(printk("jffs_lookup(): No such inode! (%lu)\n",
 			 dir->i_ino));
 		goto jffs_lookup_end;
@@ -739,7 +739,7 @@ jffs_do_readpage_nolock(struct file *file, struct page *page)
 	unsigned long read_len;
 	int result;
 	struct inode *inode = (struct inode*)page->mapping->host;
-	struct jffs_file *f = (struct jffs_file *)inode->u.generic_ip;
+	struct jffs_file *f = (struct jffs_file *)inode->i_private;
 	struct jffs_control *c = (struct jffs_control *)inode->i_sb->s_fs_info;
 	int r;
 	loff_t offset;
@@ -828,7 +828,7 @@ jffs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	});
 
 	lock_kernel();
-	dir_f = (struct jffs_file *)dir->u.generic_ip;
+	dir_f = dir->i_private;
 
 	ASSERT(if (!dir_f) {
 		printk(KERN_ERR "jffs_mkdir(): No reference to a "
@@ -972,7 +972,7 @@ jffs_remove(struct inode *dir, struct dentry *dentry, int type)
 		kfree(_name);
 	});
 
-	dir_f = (struct jffs_file *) dir->u.generic_ip;
+	dir_f = dir->i_private;
 	c = dir_f->c;
 
 	result = -ENOENT;
@@ -1082,7 +1082,7 @@ jffs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 	if (!old_valid_dev(rdev))
 		return -EINVAL;
 	lock_kernel();
-	dir_f = (struct jffs_file *)dir->u.generic_ip;
+	dir_f = dir->i_private;
 	c = dir_f->c;
 
 	D3(printk (KERN_NOTICE "mknod(): down biglock\n"));
@@ -1186,7 +1186,7 @@ jffs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 		kfree(_symname);
 	});
 
-	dir_f = (struct jffs_file *)dir->u.generic_ip;
+	dir_f = dir->i_private;
 	ASSERT(if (!dir_f) {
 		printk(KERN_ERR "jffs_symlink(): No reference to a "
 		       "jffs_file struct in inode.\n");
@@ -1289,7 +1289,7 @@ jffs_create(struct inode *dir, struct dentry *dentry, int mode,
 		kfree(s);
 	});
 
-	dir_f = (struct jffs_file *)dir->u.generic_ip;
+	dir_f = dir->i_private;
 	ASSERT(if (!dir_f) {
 		printk(KERN_ERR "jffs_create(): No reference to a "
 		       "jffs_file struct in inode.\n");
@@ -1403,9 +1403,9 @@ jffs_file_write(struct file *filp, const char *buf, size_t count,
 		goto out_isem;
 	}
 
-	if (!(f = (struct jffs_file *)inode->u.generic_ip)) {
-		D(printk("jffs_file_write(): inode->u.generic_ip = 0x%p\n",
-				inode->u.generic_ip));
+	if (!(f = inode->i_private)) {
+		D(printk("jffs_file_write(): inode->i_private = 0x%p\n",
+				inode->i_private));
 		goto out_isem;
 	}
 
@@ -1693,7 +1693,7 @@ jffs_read_inode(struct inode *inode)
 		mutex_unlock(&c->fmc->biglock);
 		return;
 	}
-	inode->u.generic_ip = (void *)f;
+	inode->i_private = f;
 	inode->i_mode = f->mode;
 	inode->i_nlink = f->nlink;
 	inode->i_uid = f->uid;
@@ -1748,7 +1748,7 @@ jffs_delete_inode(struct inode *inode)
 	lock_kernel();
 	inode->i_size = 0;
 	inode->i_blocks = 0;
-	inode->u.generic_ip = NULL;
+	inode->i_private = NULL;
 	clear_inode(inode);
 	if (inode->i_nlink == 0) {
 		c = (struct jffs_control *) inode->i_sb->s_fs_info;
diff --git a/fs/libfs.c b/fs/libfs.c
index ac02ea602c3d..2751793beeaa 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -547,7 +547,7 @@ int simple_attr_open(struct inode *inode, struct file *file,
 
 	attr->get = get;
 	attr->set = set;
-	attr->data = inode->u.generic_ip;
+	attr->data = inode->i_private;
 	attr->fmt = fmt;
 	mutex_init(&attr->mutex);
 
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index de887063dcfc..8801e41afe80 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2052,7 +2052,7 @@ static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file)
 		mlog_errno(ret);
 		goto out;
 	}
-	osb = (struct ocfs2_super *) inode->u.generic_ip;
+	osb = inode->i_private;
 	ocfs2_get_dlm_debug(osb->osb_dlm_debug);
 	priv->p_dlm_debug = osb->osb_dlm_debug;
 	INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1d3e601ece73..4f77ec9c3353 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -554,9 +554,7 @@ struct inode {
 
 	atomic_t		i_writecount;
 	void			*i_security;
-	union {
-		void		*generic_ip;
-	} u;
+	void			*i_private; /* fs or device private pointer */
 #ifdef __NEED_I_SIZE_ORDERED
 	seqcount_t		i_size_seqcount;
 #endif
diff --git a/kernel/relay.c b/kernel/relay.c
index 33345e73485c..85786ff2a4f9 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -669,7 +669,7 @@ EXPORT_SYMBOL_GPL(relay_flush);
  */
 static int relay_file_open(struct inode *inode, struct file *filp)
 {
-	struct rchan_buf *buf = inode->u.generic_ip;
+	struct rchan_buf *buf = inode->i_private;
 	kref_get(&buf->kref);
 	filp->private_data = buf;
 
diff --git a/security/inode.c b/security/inode.c
index 47eb63480dac..176aacea8ca4 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -44,8 +44,8 @@ static ssize_t default_write_file(struct file *file, const char __user *buf,
 
 static int default_open(struct inode *inode, struct file *file)
 {
-	if (inode->u.generic_ip)
-		file->private_data = inode->u.generic_ip;
+	if (inode->i_private)
+		file->private_data = inode->i_private;
 
 	return 0;
 }
@@ -194,7 +194,7 @@ static int create_by_name(const char *name, mode_t mode,
  *          directory dentry if set.  If this paramater is NULL, then the
  *          file will be created in the root of the securityfs filesystem.
  * @data: a pointer to something that the caller will want to get to later
- *        on.  The inode.u.generic_ip pointer will point to this value on
+ *        on.  The inode.i_private pointer will point to this value on
  *        the open() call.
  * @fops: a pointer to a struct file_operations that should be used for
  *        this file.
@@ -240,7 +240,7 @@ struct dentry *securityfs_create_file(const char *name, mode_t mode,
 		if (fops)
 			dentry->d_inode->i_fop = fops;
 		if (data)
-			dentry->d_inode->u.generic_ip = data;
+			dentry->d_inode->i_private = data;
 	}
 exit:
 	return dentry;
-- 
cgit v1.2.3


From 4c1541680f8d189d21dd07b053bc12996574646e Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 27 Sep 2006 01:50:47 -0700
Subject: [PATCH] inode-diet: Move i_pipe into a union

Move the i_pipe pointer into a union that will be shared with i_bdev and
i_cdev.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fs.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4f77ec9c3353..ca695fc8d69c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -528,9 +528,10 @@ struct inode {
 #ifdef CONFIG_QUOTA
 	struct dquot		*i_dquot[MAXQUOTAS];
 #endif
-	/* These three should probably be a union */
 	struct list_head	i_devices;
-	struct pipe_inode_info	*i_pipe;
+	union {
+		struct pipe_inode_info	*i_pipe;
+	};
 	struct block_device	*i_bdev;
 	struct cdev		*i_cdev;
 	int			i_cindex;
-- 
cgit v1.2.3


From eaf796e7ef6014f208c409b2b14fddcfaafe7e3a Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 27 Sep 2006 01:50:48 -0700
Subject: [PATCH] inode-diet: Move i_bdev into a union

Move the i_bdev pointer in struct inode into a union.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inode.c         | 2 +-
 include/linux/fs.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index 77e254792025..41b462232f7f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -254,7 +254,7 @@ void clear_inode(struct inode *inode)
 	DQUOT_DROP(inode);
 	if (inode->i_sb && inode->i_sb->s_op->clear_inode)
 		inode->i_sb->s_op->clear_inode(inode);
-	if (inode->i_bdev)
+	if (S_ISBLK(inode->i_mode) && inode->i_bdev)
 		bd_forget(inode);
 	if (inode->i_cdev)
 		cd_forget(inode);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ca695fc8d69c..98ff684a5b1c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -531,8 +531,8 @@ struct inode {
 	struct list_head	i_devices;
 	union {
 		struct pipe_inode_info	*i_pipe;
+		struct block_device	*i_bdev;
 	};
-	struct block_device	*i_bdev;
 	struct cdev		*i_cdev;
 	int			i_cindex;
 
-- 
cgit v1.2.3


From 577c4eb09d1034d0739e3135fd2cff50588024be Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 27 Sep 2006 01:50:49 -0700
Subject: [PATCH] inode-diet: Move i_cdev into a union

Move the i_cdev pointer in struct inode into a union.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/file_table.c    | 2 +-
 fs/inode.c         | 2 +-
 include/linux/fs.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/file_table.c b/fs/file_table.c
index 0131ba06e1ee..bc35a40417d7 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -169,7 +169,7 @@ void fastcall __fput(struct file *file)
 	if (file->f_op && file->f_op->release)
 		file->f_op->release(inode, file);
 	security_file_free(file);
-	if (unlikely(inode->i_cdev != NULL))
+	if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL))
 		cdev_put(inode->i_cdev);
 	fops_put(file->f_op);
 	if (file->f_mode & FMODE_WRITE)
diff --git a/fs/inode.c b/fs/inode.c
index 41b462232f7f..f5c04dd9ae8a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -256,7 +256,7 @@ void clear_inode(struct inode *inode)
 		inode->i_sb->s_op->clear_inode(inode);
 	if (S_ISBLK(inode->i_mode) && inode->i_bdev)
 		bd_forget(inode);
-	if (inode->i_cdev)
+	if (S_ISCHR(inode->i_mode) && inode->i_cdev)
 		cd_forget(inode);
 	inode->i_state = I_CLEAR;
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 98ff684a5b1c..192e69bb55b5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -532,8 +532,8 @@ struct inode {
 	union {
 		struct pipe_inode_info	*i_pipe;
 		struct block_device	*i_bdev;
+		struct cdev		*i_cdev;
 	};
-	struct cdev		*i_cdev;
 	int			i_cindex;
 
 	__u32			i_generation;
-- 
cgit v1.2.3


From ba52de123d454b57369f291348266d86f4b35070 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 27 Sep 2006 01:50:49 -0700
Subject: [PATCH] inode-diet: Eliminate i_blksize from the inode structure

This eliminates the i_blksize field from struct inode.  Filesystems that want
to provide a per-inode st_blksize can do so by providing their own getattr
routine instead of using the generic_fillattr() function.

Note that some filesystems were providing pretty much random (and incorrect)
values for i_blksize.

[bunk@stusta.de: cleanup]
[akpm@osdl.org: generic_fillattr() fix]
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/powerpc/platforms/cell/spufs/inode.c |  1 -
 arch/s390/hypfs/inode.c                   |  1 -
 drivers/block/loop.c                      |  7 +++++--
 drivers/infiniband/hw/ipath/ipath_fs.c    |  1 -
 drivers/isdn/capi/capifs.c                |  2 --
 drivers/misc/ibmasm/ibmasmfs.c            |  1 -
 drivers/oprofile/oprofilefs.c             |  1 -
 drivers/usb/core/inode.c                  |  1 -
 drivers/usb/gadget/inode.c                |  1 -
 fs/9p/vfs_inode.c                         |  4 +---
 fs/adfs/inode.c                           |  1 -
 fs/afs/inode.c                            |  1 -
 fs/autofs/inode.c                         |  1 -
 fs/autofs4/inode.c                        |  1 -
 fs/befs/linuxvfs.c                        |  1 -
 fs/bfs/dir.c                              |  2 +-
 fs/bfs/inode.c                            |  1 -
 fs/binfmt_misc.c                          |  1 -
 fs/cifs/cifsfs.c                          |  1 -
 fs/cifs/readdir.c                         |  5 ++---
 fs/coda/coda_linux.c                      |  2 --
 fs/configfs/inode.c                       |  1 -
 fs/cramfs/inode.c                         |  1 -
 fs/debugfs/inode.c                        |  1 -
 fs/devpts/inode.c                         |  2 --
 fs/eventpoll.c                            |  1 -
 fs/ext2/ialloc.c                          |  1 -
 fs/ext2/inode.c                           |  1 -
 fs/ext3/ialloc.c                          |  1 -
 fs/ext3/inode.c                           |  3 ---
 fs/fat/inode.c                            |  3 ---
 fs/freevxfs/vxfs_inode.c                  |  1 -
 fs/fuse/inode.c                           |  1 -
 fs/hfs/inode.c                            |  2 --
 fs/hfsplus/inode.c                        |  2 --
 fs/hostfs/hostfs_kern.c                   |  1 -
 fs/hpfs/inode.c                           |  1 -
 fs/hppfs/hppfs_kern.c                     |  1 -
 fs/hugetlbfs/inode.c                      |  1 -
 fs/isofs/inode.c                          |  3 +--
 fs/jffs/inode-v23.c                       |  2 --
 fs/jffs2/fs.c                             |  2 --
 fs/jfs/jfs_extent.c                       |  2 +-
 fs/jfs/jfs_imap.c                         |  1 -
 fs/jfs/jfs_inode.c                        |  1 -
 fs/jfs/jfs_metapage.c                     |  2 +-
 fs/libfs.c                                |  2 --
 fs/minix/bitmap.c                         |  2 +-
 fs/minix/inode.c                          |  4 ++--
 fs/ncpfs/inode.c                          |  1 -
 fs/nfs/inode.c                            |  4 ----
 fs/ntfs/inode.c                           |  4 ----
 fs/ntfs/mft.c                             |  5 -----
 fs/ocfs2/dlm/dlmfs.c                      |  2 --
 fs/ocfs2/inode.c                          |  4 ----
 fs/pipe.c                                 |  1 -
 fs/qnx4/inode.c                           |  1 -
 fs/ramfs/inode.c                          |  1 -
 fs/reiserfs/inode.c                       |  4 ----
 fs/smbfs/inode.c                          |  2 --
 fs/smbfs/proc.c                           |  1 -
 fs/stat.c                                 |  3 ++-
 fs/sysfs/inode.c                          |  1 -
 fs/sysv/ialloc.c                          |  2 +-
 fs/sysv/inode.c                           |  2 +-
 fs/udf/ialloc.c                           |  1 -
 fs/udf/inode.c                            |  2 --
 fs/ufs/ialloc.c                           |  1 -
 fs/ufs/inode.c                            |  1 -
 fs/xfs/linux-2.6/xfs_super.c              |  1 -
 fs/xfs/linux-2.6/xfs_vnode.c              |  1 -
 include/linux/fs.h                        |  1 -
 include/linux/nfsd/nfsfh.h                | 10 ++--------
 include/linux/smb.h                       |  1 -
 ipc/mqueue.c                              |  1 -
 kernel/cpuset.c                           |  1 -
 mm/shmem.c                                |  1 -
 net/sunrpc/rpc_pipe.c                     |  1 -
 security/inode.c                          |  1 -
 security/selinux/selinuxfs.c              |  1 -
 80 files changed, 21 insertions(+), 125 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index b837f12a84ae..3950ddccb2c8 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -82,7 +82,6 @@ spufs_new_inode(struct super_block *sb, int mode)
 	inode->i_mode = mode;
 	inode->i_uid = current->fsuid;
 	inode->i_gid = current->fsgid;
-	inode->i_blksize = PAGE_CACHE_SIZE;
 	inode->i_blocks = 0;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 out:
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 8735024d235b..813fc21358f9 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -91,7 +91,6 @@ static struct inode *hypfs_make_inode(struct super_block *sb, int mode)
 		ret->i_mode = mode;
 		ret->i_uid = hypfs_info->uid;
 		ret->i_gid = hypfs_info->gid;
-		ret->i_blksize = PAGE_CACHE_SIZE;
 		ret->i_blocks = 0;
 		ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
 		if (mode & S_IFDIR)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 7b3b94ddddcc..c774121684d7 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -662,7 +662,8 @@ static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
 
 	mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
 	lo->lo_backing_file = file;
-	lo->lo_blocksize = mapping->host->i_blksize;
+	lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
+		mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
 	lo->old_gfp_mask = mapping_gfp_mask(mapping);
 	mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
 	complete(&p->wait);
@@ -794,7 +795,9 @@ static int loop_set_fd(struct loop_device *lo, struct file *lo_file,
 		if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
 			lo_flags |= LO_FLAGS_READ_ONLY;
 
-		lo_blocksize = inode->i_blksize;
+		lo_blocksize = S_ISBLK(inode->i_mode) ?
+			inode->i_bdev->bd_block_size : PAGE_SIZE;
+
 		error = 0;
 	} else {
 		goto out_putf;
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 055cdd089b28..c8a8af0fe471 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -61,7 +61,6 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
 	inode->i_mode = mode;
 	inode->i_uid = 0;
 	inode->i_gid = 0;
-	inode->i_blksize = PAGE_CACHE_SIZE;
 	inode->i_blocks = 0;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	inode->i_private = data;
diff --git a/drivers/isdn/capi/capifs.c b/drivers/isdn/capi/capifs.c
index 9ea6bd0ddc35..2dd1b57b0ba4 100644
--- a/drivers/isdn/capi/capifs.c
+++ b/drivers/isdn/capi/capifs.c
@@ -104,7 +104,6 @@ capifs_fill_super(struct super_block *s, void *data, int silent)
 	inode->i_ino = 1;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	inode->i_blocks = 0;
-	inode->i_blksize = 1024;
 	inode->i_uid = inode->i_gid = 0;
 	inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
 	inode->i_op = &simple_dir_inode_operations;
@@ -149,7 +148,6 @@ void capifs_new_ncci(unsigned int number, dev_t device)
 	if (!inode)
 		return;
 	inode->i_ino = number+2;
-	inode->i_blksize = 1024;
 	inode->i_uid = config.setuid ? config.uid : current->fsuid;
 	inode->i_gid = config.setgid ? config.gid : current->fsgid;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
index 0e909b617226..b99dc507de2e 100644
--- a/drivers/misc/ibmasm/ibmasmfs.c
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -147,7 +147,6 @@ static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode)
 	if (ret) {
 		ret->i_mode = mode;
 		ret->i_uid = ret->i_gid = 0;
-		ret->i_blksize = PAGE_CACHE_SIZE;
 		ret->i_blocks = 0;
 		ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
 	}
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c
index deb37354785b..5756401fb15b 100644
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -31,7 +31,6 @@ static struct inode * oprofilefs_get_inode(struct super_block * sb, int mode)
 		inode->i_mode = mode;
 		inode->i_uid = 0;
 		inode->i_gid = 0;
-		inode->i_blksize = PAGE_CACHE_SIZE;
 		inode->i_blocks = 0;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	}
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index 482f253085e5..58b4b1012120 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -249,7 +249,6 @@ static struct inode *usbfs_get_inode (struct super_block *sb, int mode, dev_t de
 		inode->i_mode = mode;
 		inode->i_uid = current->fsuid;
 		inode->i_gid = current->fsgid;
-		inode->i_blksize = PAGE_CACHE_SIZE;
 		inode->i_blocks = 0;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		switch (mode & S_IFMT) {
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index ffaa8c1afad8..2a7162d89799 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -1966,7 +1966,6 @@ gadgetfs_make_inode (struct super_block *sb,
 		inode->i_mode = mode;
 		inode->i_uid = default_uid;
 		inode->i_gid = default_gid;
-		inode->i_blksize = PAGE_CACHE_SIZE;
 		inode->i_blocks = 0;
 		inode->i_atime = inode->i_mtime = inode->i_ctime
 				= CURRENT_TIME;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index eae50c9d6dc4..7a7ec2d1d2f4 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -204,7 +204,6 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
 		inode->i_mode = mode;
 		inode->i_uid = current->fsuid;
 		inode->i_gid = current->fsgid;
-		inode->i_blksize = sb->s_blocksize;
 		inode->i_blocks = 0;
 		inode->i_rdev = 0;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -950,9 +949,8 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode,
 
 	inode->i_size = stat->length;
 
-	inode->i_blksize = sb->s_blocksize;
 	inode->i_blocks =
-	    (inode->i_size + inode->i_blksize - 1) >> sb->s_blocksize_bits;
+	    (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
 }
 
 /**
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 534f3eecc985..7e7a04be1278 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -269,7 +269,6 @@ adfs_iget(struct super_block *sb, struct object_info *obj)
 	inode->i_ino	 = obj->file_id;
 	inode->i_size	 = obj->size;
 	inode->i_nlink	 = 2;
-	inode->i_blksize = PAGE_SIZE;
 	inode->i_blocks	 = (inode->i_size + sb->s_blocksize - 1) >>
 			    sb->s_blocksize_bits;
 
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 4ebb30a50ed5..6f37754906c2 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -72,7 +72,6 @@ static int afs_inode_map_status(struct afs_vnode *vnode)
 	inode->i_ctime.tv_sec	= vnode->status.mtime_server;
 	inode->i_ctime.tv_nsec	= 0;
 	inode->i_atime		= inode->i_mtime = inode->i_ctime;
-	inode->i_blksize	= PAGE_CACHE_SIZE;
 	inode->i_blocks		= 0;
 	inode->i_version	= vnode->fid.unique;
 	inode->i_mapping->a_ops	= &afs_fs_aops;
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index d39d2acd9b38..2c9759baad61 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -216,7 +216,6 @@ static void autofs_read_inode(struct inode *inode)
 	inode->i_nlink = 2;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	inode->i_blocks = 0;
-	inode->i_blksize = 1024;
 
 	if ( ino == AUTOFS_ROOT_INO ) {
 		inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 11a6a9ae51b7..800ce876caec 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -447,7 +447,6 @@ struct inode *autofs4_get_inode(struct super_block *sb,
 		inode->i_uid = 0;
 		inode->i_gid = 0;
 	}
-	inode->i_blksize = PAGE_CACHE_SIZE;
 	inode->i_blocks = 0;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index f6676fbe9484..57020c7a7e65 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -365,7 +365,6 @@ befs_read_inode(struct inode *inode)
 	inode->i_mtime.tv_nsec = 0;   /* lower 16 bits are not a time */	
 	inode->i_ctime = inode->i_mtime;
 	inode->i_atime = inode->i_mtime;
-	inode->i_blksize = befs_sb->block_size;
 
 	befs_ino->i_inode_num = fsrun_to_cpu(sb, raw_inode->inode_num);
 	befs_ino->i_parent = fsrun_to_cpu(sb, raw_inode->parent);
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 26fad9621738..dcf04cb13283 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -102,7 +102,7 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode,
 	inode->i_uid = current->fsuid;
 	inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
-	inode->i_blocks = inode->i_blksize = 0;
+	inode->i_blocks = 0;
 	inode->i_op = &bfs_file_inops;
 	inode->i_fop = &bfs_file_operations;
 	inode->i_mapping->a_ops = &bfs_aops;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 8fc2e8e49dbe..ed27ffb3459e 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -76,7 +76,6 @@ static void bfs_read_inode(struct inode * inode)
 	inode->i_size = BFS_FILESIZE(di);
 	inode->i_blocks = BFS_FILEBLOCKS(di);
         if (inode->i_size || inode->i_blocks) dprintf("Registered inode with %lld size, %ld blocks\n", inode->i_size, inode->i_blocks);
-	inode->i_blksize = PAGE_SIZE;
 	inode->i_atime.tv_sec =  le32_to_cpu(di->i_atime);
 	inode->i_mtime.tv_sec =  le32_to_cpu(di->i_mtime);
 	inode->i_ctime.tv_sec =  le32_to_cpu(di->i_ctime);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 6759b9839ce8..66ba137f8661 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -507,7 +507,6 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
 		inode->i_mode = mode;
 		inode->i_uid = 0;
 		inode->i_gid = 0;
-		inode->i_blksize = PAGE_CACHE_SIZE;
 		inode->i_blocks = 0;
 		inode->i_atime = inode->i_mtime = inode->i_ctime =
 			current_fs_time(inode->i_sb);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 4197a5043f13..22bcf4d7e7ae 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -253,7 +253,6 @@ cifs_alloc_inode(struct super_block *sb)
 	file data or metadata */
 	cifs_inode->clientCanCacheRead = FALSE;
 	cifs_inode->clientCanCacheAll = FALSE;
-	cifs_inode->vfs_inode.i_blksize = CIFS_MAX_MSGSIZE;
 	cifs_inode->vfs_inode.i_blkbits = 14;  /* 2**14 = CIFS_MAX_MSGSIZE */
 	cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME;
 	INIT_LIST_HEAD(&cifs_inode->openFileList);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 9aeb58a7d369..b27b34537bf2 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -216,10 +216,9 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
 
 	if (allocation_size < end_of_file)
 		cFYI(1, ("May be sparse file, allocation less than file size"));
-	cFYI(1, ("File Size %ld and blocks %llu and blocksize %ld",
+	cFYI(1, ("File Size %ld and blocks %llu",
 		(unsigned long)tmp_inode->i_size,
-		(unsigned long long)tmp_inode->i_blocks,
-		tmp_inode->i_blksize));
+		(unsigned long long)tmp_inode->i_blocks));
 	if (S_ISREG(tmp_inode->i_mode)) {
 		cFYI(1, ("File inode"));
 		tmp_inode->i_op = &cifs_file_inode_ops;
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 5597080cb811..95a54253c047 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -110,8 +110,6 @@ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr)
 	        inode->i_nlink = attr->va_nlink;
 	if (attr->va_size != -1)
 	        inode->i_size = attr->va_size;
-	if (attr->va_blocksize != -1)
-		inode->i_blksize = attr->va_blocksize;
 	if (attr->va_size != -1)
 		inode->i_blocks = (attr->va_size + 511) >> 9;
 	if (attr->va_atime.tv_sec != -1) 
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 5047e6a7581e..fb18917954a9 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -135,7 +135,6 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd)
 {
 	struct inode * inode = new_inode(configfs_sb);
 	if (inode) {
-		inode->i_blksize = PAGE_CACHE_SIZE;
 		inode->i_blocks = 0;
 		inode->i_mapping->a_ops = &configfs_aops;
 		inode->i_mapping->backing_dev_info = &configfs_backing_dev_info;
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index d09b6777c41a..ad96b6990715 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -73,7 +73,6 @@ static int cramfs_iget5_set(struct inode *inode, void *opaque)
 	inode->i_uid = cramfs_inode->uid;
 	inode->i_size = cramfs_inode->size;
 	inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
-	inode->i_blksize = PAGE_CACHE_SIZE;
 	inode->i_gid = cramfs_inode->gid;
 	/* Struct copy intentional */
 	inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 717f4821ed02..269e649e6dc6 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -40,7 +40,6 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
 		inode->i_mode = mode;
 		inode->i_uid = 0;
 		inode->i_gid = 0;
-		inode->i_blksize = PAGE_CACHE_SIZE;
 		inode->i_blocks = 0;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		switch (mode & S_IFMT) {
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 5bf06a10dddf..5f7b5a6025bf 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -113,7 +113,6 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
 	inode->i_ino = 1;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	inode->i_blocks = 0;
-	inode->i_blksize = 1024;
 	inode->i_uid = inode->i_gid = 0;
 	inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
 	inode->i_op = &simple_dir_inode_operations;
@@ -172,7 +171,6 @@ int devpts_pty_new(struct tty_struct *tty)
 		return -ENOMEM;
 
 	inode->i_ino = number+2;
-	inode->i_blksize = 1024;
 	inode->i_uid = config.setuid ? config.uid : current->fsuid;
 	inode->i_gid = config.setgid ? config.gid : current->fsgid;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 3a3567433b92..8d544334bcd2 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1590,7 +1590,6 @@ static struct inode *ep_eventpoll_inode(void)
 	inode->i_uid = current->fsuid;
 	inode->i_gid = current->fsgid;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-	inode->i_blksize = PAGE_SIZE;
 	return inode;
 
 eexit_1:
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 695f69ccf908..2cb545bf0f3c 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -574,7 +574,6 @@ got:
 	inode->i_mode = mode;
 
 	inode->i_ino = ino;
-	inode->i_blksize = PAGE_SIZE;	/* This is the optimal IO size (for stat), not the fs block size */
 	inode->i_blocks = 0;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
 	memset(ei->i_data, 0, sizeof(ei->i_data));
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index fb4d3220eb8d..dd4e14c221e0 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1094,7 +1094,6 @@ void ext2_read_inode (struct inode * inode)
 		brelse (bh);
 		goto bad_inode;
 	}
-	inode->i_blksize = PAGE_SIZE;	/* This is the optimal IO size (for stat), not the fs block size */
 	inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
 	ei->i_flags = le32_to_cpu(raw_inode->i_flags);
 	ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 1e4ded8aa3cd..e45dbd651736 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -559,7 +559,6 @@ got:
 
 	inode->i_ino = ino;
 	/* This is the optimal IO size (for stat), not the fs block size */
-	inode->i_blksize = PAGE_SIZE;
 	inode->i_blocks = 0;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
 
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index c9fc15f8b609..dcf4f1dd108b 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2632,9 +2632,6 @@ void ext3_read_inode(struct inode * inode)
 		 * recovery code: that's fine, we're about to complete
 		 * the process of deleting those. */
 	}
-	inode->i_blksize = PAGE_SIZE;	/* This is the optimal IO size
-					 * (for stat), not the fs block
-					 * size */  
 	inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
 	ei->i_flags = le32_to_cpu(raw_inode->i_flags);
 #ifdef EXT3_FRAGMENTS
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index e1035a590664..ab96ae823753 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -370,8 +370,6 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
 			inode->i_flags |= S_IMMUTABLE;
 	}
 	MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED;
-	/* this is as close to the truth as we can get ... */
-	inode->i_blksize = sbi->cluster_size;
 	inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
 			   & ~((loff_t)sbi->cluster_size - 1)) >> 9;
 	inode->i_mtime.tv_sec =
@@ -1131,7 +1129,6 @@ static int fat_read_root(struct inode *inode)
 		MSDOS_I(inode)->i_start = 0;
 		inode->i_size = sbi->dir_entries * sizeof(struct msdos_dir_entry);
 	}
-	inode->i_blksize = sbi->cluster_size;
 	inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
 			   & ~((loff_t)sbi->cluster_size - 1)) >> 9;
 	MSDOS_I(inode)->i_logstart = 0;
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 32a82ed108e4..4786d51ad3bd 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -239,7 +239,6 @@ vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip)
 	ip->i_ctime.tv_nsec = 0;
 	ip->i_mtime.tv_nsec = 0;
 
-	ip->i_blksize = PAGE_SIZE;
 	ip->i_blocks = vip->vii_blocks;
 	ip->i_generation = vip->vii_gen;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 7d25092262ae..cb7cadb0b790 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -118,7 +118,6 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
 	inode->i_uid     = attr->uid;
 	inode->i_gid     = attr->gid;
 	i_size_write(inode, attr->size);
-	inode->i_blksize = PAGE_CACHE_SIZE;
 	inode->i_blocks  = attr->blocks;
 	inode->i_atime.tv_sec   = attr->atime;
 	inode->i_atime.tv_nsec  = attr->atimensec;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 315cf44a90b2..d05641c35fc9 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -154,7 +154,6 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode)
 	inode->i_gid = current->fsgid;
 	inode->i_nlink = 1;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
-	inode->i_blksize = HFS_SB(sb)->alloc_blksz;
 	HFS_I(inode)->flags = 0;
 	HFS_I(inode)->rsrc_inode = NULL;
 	HFS_I(inode)->fs_blocks = 0;
@@ -284,7 +283,6 @@ static int hfs_read_inode(struct inode *inode, void *data)
 	inode->i_uid = hsb->s_uid;
 	inode->i_gid = hsb->s_gid;
 	inode->i_nlink = 1;
-	inode->i_blksize = HFS_SB(inode->i_sb)->alloc_blksz;
 
 	if (idata->key)
 		HFS_I(inode)->cat_key = *idata->key;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 924ecdef8091..0eb1a6092668 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -304,7 +304,6 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
 	inode->i_gid = current->fsgid;
 	inode->i_nlink = 1;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
-	inode->i_blksize = HFSPLUS_SB(sb).alloc_blksz;
 	INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
 	init_MUTEX(&HFSPLUS_I(inode).extents_lock);
 	atomic_set(&HFSPLUS_I(inode).opencnt, 0);
@@ -407,7 +406,6 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
 	type = hfs_bnode_read_u16(fd->bnode, fd->entryoffset);
 
 	HFSPLUS_I(inode).dev = 0;
-	inode->i_blksize = HFSPLUS_SB(inode->i_sb).alloc_blksz;
 	if (type == HFSPLUS_FOLDER) {
 		struct hfsplus_cat_folder *folder = &entry.folder;
 
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index b82e3d9c8790..322e876c35ed 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -156,7 +156,6 @@ static int read_name(struct inode *ino, char *name)
 	ino->i_mode = i_mode;
 	ino->i_nlink = i_nlink;
 	ino->i_size = i_size;
-	ino->i_blksize = i_blksize;
 	ino->i_blocks = i_blocks;
 	return(0);
 }
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 56f2c338c4d9..bcf6ee36e065 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -17,7 +17,6 @@ void hpfs_init_inode(struct inode *i)
 	i->i_gid = hpfs_sb(sb)->sb_gid;
 	i->i_mode = hpfs_sb(sb)->sb_mode;
 	hpfs_inode->i_conv = hpfs_sb(sb)->sb_conv;
-	i->i_blksize = 512;
 	i->i_size = -1;
 	i->i_blocks = -1;
 	
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index 3a9bdf58166f..dcb6d2e988b8 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -152,7 +152,6 @@ static void hppfs_read_inode(struct inode *ino)
 	ino->i_mode = proc_ino->i_mode;
 	ino->i_nlink = proc_ino->i_nlink;
 	ino->i_size = proc_ino->i_size;
-	ino->i_blksize = proc_ino->i_blksize;
 	ino->i_blocks = proc_ino->i_blocks;
 }
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index c3920c96dadf..e025a31b4c64 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -357,7 +357,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
 		inode->i_mode = mode;
 		inode->i_uid = uid;
 		inode->i_gid = gid;
-		inode->i_blksize = HPAGE_SIZE;
 		inode->i_blocks = 0;
 		inode->i_mapping->a_ops = &hugetlbfs_aops;
 		inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 10e47897bac7..4527692f432b 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1235,7 +1235,7 @@ static void isofs_read_inode(struct inode *inode)
 	}
 	inode->i_uid = sbi->s_uid;
 	inode->i_gid = sbi->s_gid;
-	inode->i_blocks = inode->i_blksize = 0;
+	inode->i_blocks = 0;
 
 	ei->i_format_parm[0] = 0;
 	ei->i_format_parm[1] = 0;
@@ -1291,7 +1291,6 @@ static void isofs_read_inode(struct inode *inode)
 			      isonum_711 (de->ext_attr_length));
 
 	/* Set the number of blocks for stat() - should be done before RR */
-	inode->i_blksize = PAGE_CACHE_SIZE; /* For stat() only */
 	inode->i_blocks  = (inode->i_size + 511) >> 9;
 
 	/*
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 7358ef87f16b..f5cf9c93e243 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -364,7 +364,6 @@ jffs_new_inode(const struct inode * dir, struct jffs_raw_inode *raw_inode,
 	inode->i_ctime.tv_nsec = 0;
 	inode->i_mtime.tv_nsec = 0;
 	inode->i_atime.tv_nsec = 0;
-	inode->i_blksize = PAGE_SIZE;
 	inode->i_blocks = (inode->i_size + 511) >> 9;
 
 	f = jffs_find_file(c, raw_inode->ino);
@@ -1706,7 +1705,6 @@ jffs_read_inode(struct inode *inode)
 	inode->i_mtime.tv_nsec = 
 	inode->i_ctime.tv_nsec = 0;
 
-	inode->i_blksize = PAGE_SIZE;
 	inode->i_blocks = (inode->i_size + 511) >> 9;
 	if (S_ISREG(inode->i_mode)) {
 		inode->i_op = &jffs_file_inode_operations;
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 4780f82825d6..72d9909d95ff 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -263,7 +263,6 @@ void jffs2_read_inode (struct inode *inode)
 
 	inode->i_nlink = f->inocache->nlink;
 
-	inode->i_blksize = PAGE_SIZE;
 	inode->i_blocks = (inode->i_size + 511) >> 9;
 
 	switch (inode->i_mode & S_IFMT) {
@@ -449,7 +448,6 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
 	inode->i_atime = inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
 	ri->atime = ri->mtime = ri->ctime = cpu_to_je32(I_SEC(inode->i_mtime));
 
-	inode->i_blksize = PAGE_SIZE;
 	inode->i_blocks = 0;
 	inode->i_size = 0;
 
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index 4d52593a5fc6..4c74f0944f7e 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -468,7 +468,7 @@ int extRecord(struct inode *ip, xad_t * xp)
 int extFill(struct inode *ip, xad_t * xp)
 {
 	int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage;
-	s64 blkno = offsetXAD(xp) >> ip->i_blksize;
+	s64 blkno = offsetXAD(xp) >> ip->i_blkbits;
 
 //      assert(ISSPARSE(ip));
 
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index ccbe60aff83d..369d7f39c040 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -3115,7 +3115,6 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip)
 	ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec);
 	ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec);
 	ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec);
-	ip->i_blksize = ip->i_sb->s_blocksize;
 	ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks));
 	ip->i_generation = le32_to_cpu(dip->di_gen);
 
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 495df402916d..bffaca9ae3a2 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -115,7 +115,6 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 	}
 	jfs_inode->mode2 |= mode;
 
-	inode->i_blksize = sb->s_blocksize;
 	inode->i_blocks = 0;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	jfs_inode->otime = inode->i_ctime.tv_sec;
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index e1e0a6e6ebdf..f5afc129d6b1 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -257,7 +257,7 @@ static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock,
 	int rc = 0;
 	int xflag;
 	s64 xaddr;
-	sector_t file_blocks = (inode->i_size + inode->i_blksize - 1) >>
+	sector_t file_blocks = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
 			       inode->i_blkbits;
 
 	if (lblock >= file_blocks)
diff --git a/fs/libfs.c b/fs/libfs.c
index 2751793beeaa..8db5afb7b0a7 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -383,7 +383,6 @@ int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files
 		return -ENOMEM;
 	inode->i_mode = S_IFDIR | 0755;
 	inode->i_uid = inode->i_gid = 0;
-	inode->i_blksize = PAGE_CACHE_SIZE;
 	inode->i_blocks = 0;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	inode->i_op = &simple_dir_inode_operations;
@@ -405,7 +404,6 @@ int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files
 			goto out;
 		inode->i_mode = S_IFREG | files->mode;
 		inode->i_uid = inode->i_gid = 0;
-		inode->i_blksize = PAGE_CACHE_SIZE;
 		inode->i_blocks = 0;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		inode->i_fop = files->ops;
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index 4a6abc49418e..df6b1075b549 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -254,7 +254,7 @@ struct inode * minix_new_inode(const struct inode * dir, int * error)
 	inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid;
 	inode->i_ino = j;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
-	inode->i_blocks = inode->i_blksize = 0;
+	inode->i_blocks = 0;
 	memset(&minix_i(inode)->u, 0, sizeof(minix_i(inode)->u));
 	insert_inode_hash(inode);
 	mark_inode_dirty(inode);
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 826b9d830650..c11a4b9fb863 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -396,7 +396,7 @@ static void V1_minix_read_inode(struct inode * inode)
 	inode->i_mtime.tv_nsec = 0;
 	inode->i_atime.tv_nsec = 0;
 	inode->i_ctime.tv_nsec = 0;
-	inode->i_blocks = inode->i_blksize = 0;
+	inode->i_blocks = 0;
 	for (i = 0; i < 9; i++)
 		minix_inode->u.i1_data[i] = raw_inode->i_zone[i];
 	minix_set_inode(inode, old_decode_dev(raw_inode->i_zone[0]));
@@ -429,7 +429,7 @@ static void V2_minix_read_inode(struct inode * inode)
 	inode->i_mtime.tv_nsec = 0;
 	inode->i_atime.tv_nsec = 0;
 	inode->i_ctime.tv_nsec = 0;
-	inode->i_blocks = inode->i_blksize = 0;
+	inode->i_blocks = 0;
 	for (i = 0; i < 10; i++)
 		minix_inode->u.i2_data[i] = raw_inode->i_zone[i];
 	minix_set_inode(inode, old_decode_dev(raw_inode->i_zone[0]));
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 8244710e97dd..42e3bef270c9 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -223,7 +223,6 @@ static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo)
 	inode->i_nlink = 1;
 	inode->i_uid = server->m.uid;
 	inode->i_gid = server->m.gid;
-	inode->i_blksize = NCP_BLOCK_SIZE;
 
 	ncp_update_dates(inode, &nwinfo->i);
 	ncp_update_inode(inode, nwinfo);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 931f52a19579..bc9376ca86cd 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -277,10 +277,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 			 * report the blocks in 512byte units
 			 */
 			inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
-			inode->i_blksize = inode->i_sb->s_blocksize;
 		} else {
 			inode->i_blocks = fattr->du.nfs2.blocks;
-			inode->i_blksize = fattr->du.nfs2.blocksize;
 		}
 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
 		nfsi->attrtimeo_timestamp = jiffies;
@@ -969,10 +967,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		 * report the blocks in 512byte units
 		 */
 		inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
-		inode->i_blksize = inode->i_sb->s_blocksize;
  	} else {
  		inode->i_blocks = fattr->du.nfs2.blocks;
- 		inode->i_blksize = fattr->du.nfs2.blocksize;
  	}
 
 	if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 31852121b3f5..933dbd89c2a4 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -556,8 +556,6 @@ static int ntfs_read_locked_inode(struct inode *vi)
 
 	/* Setup the generic vfs inode parts now. */
 
-	/* This is the optimal IO size (for stat), not the fs block size. */
-	vi->i_blksize = PAGE_CACHE_SIZE;
 	/*
 	 * This is for checking whether an inode has changed w.r.t. a file so
 	 * that the file can be updated if necessary (compare with f_version).
@@ -1234,7 +1232,6 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 	base_ni = NTFS_I(base_vi);
 
 	/* Just mirror the values from the base inode. */
-	vi->i_blksize	= base_vi->i_blksize;
 	vi->i_version	= base_vi->i_version;
 	vi->i_uid	= base_vi->i_uid;
 	vi->i_gid	= base_vi->i_gid;
@@ -1504,7 +1501,6 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
 	ni	= NTFS_I(vi);
 	base_ni = NTFS_I(base_vi);
 	/* Just mirror the values from the base inode. */
-	vi->i_blksize	= base_vi->i_blksize;
 	vi->i_version	= base_vi->i_version;
 	vi->i_uid	= base_vi->i_uid;
 	vi->i_gid	= base_vi->i_gid;
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 578fb3d5e803..584260fd6848 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -2637,11 +2637,6 @@ mft_rec_already_initialized:
 			goto undo_mftbmp_alloc;
 		}
 		vi->i_ino = bit;
-		/*
-		 * This is the optimal IO size (for stat), not the fs block
-		 * size.
-		 */
-		vi->i_blksize = PAGE_CACHE_SIZE;
 		/*
 		 * This is for checking whether an inode has changed w.r.t. a
 		 * file so that the file can be updated if necessary (compare
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 0ff0898a0b9c..0368c6402182 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -335,7 +335,6 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
 		inode->i_mode = mode;
 		inode->i_uid = current->fsuid;
 		inode->i_gid = current->fsgid;
-		inode->i_blksize = PAGE_CACHE_SIZE;
 		inode->i_blocks = 0;
 		inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -362,7 +361,6 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
 	inode->i_mode = mode;
 	inode->i_uid = current->fsuid;
 	inode->i_gid = current->fsgid;
-	inode->i_blksize = PAGE_CACHE_SIZE;
 	inode->i_blocks = 0;
 	inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 69d3db569166..16e8e74dc966 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -269,7 +269,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 	inode->i_mode = le16_to_cpu(fe->i_mode);
 	inode->i_uid = le32_to_cpu(fe->i_uid);
 	inode->i_gid = le32_to_cpu(fe->i_gid);
-	inode->i_blksize = (u32)osb->s_clustersize;
 
 	/* Fast symlinks will have i_size but no allocated clusters. */
 	if (S_ISLNK(inode->i_mode) && !fe->i_clusters)
@@ -1258,8 +1257,6 @@ leave:
 void ocfs2_refresh_inode(struct inode *inode,
 			 struct ocfs2_dinode *fe)
 {
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-
 	spin_lock(&OCFS2_I(inode)->ip_lock);
 
 	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
@@ -1270,7 +1267,6 @@ void ocfs2_refresh_inode(struct inode *inode,
 	inode->i_uid = le32_to_cpu(fe->i_uid);
 	inode->i_gid = le32_to_cpu(fe->i_gid);
 	inode->i_mode = le16_to_cpu(fe->i_mode);
-	inode->i_blksize = (u32) osb->s_clustersize;
 	if (S_ISLNK(inode->i_mode) && le32_to_cpu(fe->i_clusters) == 0)
 		inode->i_blocks = 0;
 	else
diff --git a/fs/pipe.c b/fs/pipe.c
index 20352573e025..f3b6f71e9d0b 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -879,7 +879,6 @@ static struct inode * get_pipe_inode(void)
 	inode->i_uid = current->fsuid;
 	inode->i_gid = current->fsgid;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-	inode->i_blksize = PAGE_SIZE;
 
 	return inode;
 
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index fddbd61c68d0..5a41db2a218d 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -496,7 +496,6 @@ static void qnx4_read_inode(struct inode *inode)
 	inode->i_ctime.tv_sec   = le32_to_cpu(raw_inode->di_ctime);
 	inode->i_ctime.tv_nsec = 0;
 	inode->i_blocks  = le32_to_cpu(raw_inode->di_first_xtnt.xtnt_size);
-	inode->i_blksize = QNX4_DIR_ENTRY_SIZE;
 
 	memcpy(qnx4_inode, raw_inode, QNX4_DIR_ENTRY_SIZE);
 	if (S_ISREG(inode->i_mode)) {
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index b9677335cc8d..bc0e51662424 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -58,7 +58,6 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev)
 		inode->i_mode = mode;
 		inode->i_uid = current->fsuid;
 		inode->i_gid = current->fsgid;
-		inode->i_blksize = PAGE_CACHE_SIZE;
 		inode->i_blocks = 0;
 		inode->i_mapping->a_ops = &ramfs_aops;
 		inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 52f1e2136546..8810fda0da46 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -17,8 +17,6 @@
 #include <linux/writeback.h>
 #include <linux/quotaops.h>
 
-extern int reiserfs_default_io_size;	/* default io size devuned in super.c */
-
 static int reiserfs_commit_write(struct file *f, struct page *page,
 				 unsigned from, unsigned to);
 static int reiserfs_prepare_write(struct file *f, struct page *page,
@@ -1122,7 +1120,6 @@ static void init_inode(struct inode *inode, struct path *path)
 	ih = PATH_PITEM_HEAD(path);
 
 	copy_key(INODE_PKEY(inode), &(ih->ih_key));
-	inode->i_blksize = reiserfs_default_io_size;
 
 	INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list));
 	REISERFS_I(inode)->i_flags = 0;
@@ -1877,7 +1874,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 	}
 	// these do not go to on-disk stat data
 	inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid);
-	inode->i_blksize = reiserfs_default_io_size;
 
 	// store in in-core inode the key of stat data and version all
 	// object items will have (directory items will have old offset
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 92cf60aa6121..2c122ee83adb 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -166,7 +166,6 @@ smb_get_inode_attr(struct inode *inode, struct smb_fattr *fattr)
 	fattr->f_mtime	= inode->i_mtime;
 	fattr->f_ctime	= inode->i_ctime;
 	fattr->f_atime	= inode->i_atime;
-	fattr->f_blksize= inode->i_blksize;
 	fattr->f_blocks	= inode->i_blocks;
 
 	fattr->attr	= SMB_I(inode)->attr;
@@ -200,7 +199,6 @@ smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr)
 	inode->i_uid	= fattr->f_uid;
 	inode->i_gid	= fattr->f_gid;
 	inode->i_ctime	= fattr->f_ctime;
-	inode->i_blksize= fattr->f_blksize;
 	inode->i_blocks = fattr->f_blocks;
 	inode->i_size	= fattr->f_size;
 	inode->i_mtime	= fattr->f_mtime;
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index c3495059889d..40e174db9872 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -1826,7 +1826,6 @@ smb_init_dirent(struct smb_sb_info *server, struct smb_fattr *fattr)
 	fattr->f_nlink = 1;
 	fattr->f_uid = server->mnt->uid;
 	fattr->f_gid = server->mnt->gid;
-	fattr->f_blksize = SMB_ST_BLKSIZE;
 	fattr->f_unix = 0;
 }
 
diff --git a/fs/stat.c b/fs/stat.c
index 3a44dcf97da2..60a31d5e5966 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -14,6 +14,7 @@
 #include <linux/namei.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/pagemap.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -32,7 +33,7 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
 	stat->ctime = inode->i_ctime;
 	stat->size = i_size_read(inode);
 	stat->blocks = inode->i_blocks;
-	stat->blksize = inode->i_blksize;
+	stat->blksize = (1 << inode->i_blkbits);
 }
 
 EXPORT_SYMBOL(generic_fillattr);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index fd7cd5f843d2..e79e38d52c00 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -125,7 +125,6 @@ struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd)
 {
 	struct inode * inode = new_inode(sysfs_sb);
 	if (inode) {
-		inode->i_blksize = PAGE_CACHE_SIZE;
 		inode->i_blocks = 0;
 		inode->i_mapping->a_ops = &sysfs_aops;
 		inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index 9b585d1081c0..115ab0d6f4bc 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -170,7 +170,7 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
 	inode->i_uid = current->fsuid;
 	inode->i_ino = fs16_to_cpu(sbi, ino);
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
-	inode->i_blocks = inode->i_blksize = 0;
+	inode->i_blocks = 0;
 	memset(SYSV_I(inode)->i_data, 0, sizeof(SYSV_I(inode)->i_data));
 	SYSV_I(inode)->i_dir_start_lookup = 0;
 	insert_inode_hash(inode);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 58b2d22142ba..d63c5e48b050 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -201,7 +201,7 @@ static void sysv_read_inode(struct inode *inode)
 	inode->i_ctime.tv_nsec = 0;
 	inode->i_atime.tv_nsec = 0;
 	inode->i_mtime.tv_nsec = 0;
-	inode->i_blocks = inode->i_blksize = 0;
+	inode->i_blocks = 0;
 
 	si = SYSV_I(inode);
 	for (block = 0; block < 10+1+1+1; block++)
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index d1d38238ce65..8206983f2ebf 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -121,7 +121,6 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err)
 	UDF_I_LOCATION(inode).logicalBlockNum = block;
 	UDF_I_LOCATION(inode).partitionReferenceNum = UDF_I_LOCATION(dir).partitionReferenceNum;
 	inode->i_ino = udf_get_lb_pblock(sb, UDF_I_LOCATION(inode), 0);
-	inode->i_blksize = PAGE_SIZE;
 	inode->i_blocks = 0;
 	UDF_I_LENEATTR(inode) = 0;
 	UDF_I_LENALLOC(inode) = 0;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 605f5111b6d8..b223b32db991 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -916,8 +916,6 @@ __udf_read_inode(struct inode *inode)
 	 *      i_nlink = 1
 	 *      i_op = NULL;
 	 */
-	inode->i_blksize = PAGE_SIZE;
-
 	bh = udf_read_ptagged(inode->i_sb, UDF_I_LOCATION(inode), 0, &ident);
 
 	if (!bh)
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 9501dcd3b213..2ad1259c6eca 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -255,7 +255,6 @@ cg_found:
 		inode->i_gid = current->fsgid;
 
 	inode->i_ino = cg * uspi->s_ipg + bit;
-	inode->i_blksize = PAGE_SIZE;	/* This is the optimal IO size (for stat), not the fs block size */
 	inode->i_blocks = 0;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
 	ufsi->i_flags = UFS_I(dir)->i_flags;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 30c6e8a9446c..ee1eaa6f4ec2 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -741,7 +741,6 @@ void ufs_read_inode(struct inode * inode)
 		ufs1_read_inode(inode, ufs_inode + ufs_inotofsbo(inode->i_ino));
 	}
 
-	inode->i_blksize = PAGE_SIZE;/*This is the optimal IO size (for stat)*/
 	inode->i_version++;
 	ufsi->i_lastfrag =
 		(inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 4754f342a5d3..9df9ed37d219 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -171,7 +171,6 @@ xfs_revalidate_inode(
 		break;
 	}
 
-	inode->i_blksize = xfs_preferred_iosize(mp);
 	inode->i_generation = ip->i_d.di_gen;
 	i_size_write(inode, ip->i_d.di_size);
 	inode->i_blocks =
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index 6628d96b6fd6..553fa731ade5 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -122,7 +122,6 @@ vn_revalidate_core(
 	inode->i_blocks	    = vap->va_nblocks;
 	inode->i_mtime	    = vap->va_mtime;
 	inode->i_ctime	    = vap->va_ctime;
-	inode->i_blksize    = vap->va_blocksize;
 	if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
 		inode->i_flags |= S_IMMUTABLE;
 	else
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 192e69bb55b5..8f74dfbb2edd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -512,7 +512,6 @@ struct inode {
 	struct timespec		i_mtime;
 	struct timespec		i_ctime;
 	unsigned int		i_blkbits;
-	unsigned long		i_blksize;
 	unsigned long		i_version;
 	blkcnt_t		i_blocks;
 	unsigned short          i_bytes;
diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index f9edcd2ff3c8..31a3cb617ce0 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -269,14 +269,8 @@ fill_post_wcc(struct svc_fh *fhp)
 	fhp->fh_post_uid	= inode->i_uid;
 	fhp->fh_post_gid	= inode->i_gid;
 	fhp->fh_post_size       = inode->i_size;
-	if (inode->i_blksize) {
-		fhp->fh_post_blksize    = inode->i_blksize;
-		fhp->fh_post_blocks     = inode->i_blocks;
-	} else {
-		fhp->fh_post_blksize    = BLOCK_SIZE;
-		/* how much do we care for accuracy with MinixFS? */
-		fhp->fh_post_blocks     = (inode->i_size+511) >> 9;
-	}
+	fhp->fh_post_blksize    = BLOCK_SIZE;
+	fhp->fh_post_blocks     = inode->i_blocks;
 	fhp->fh_post_rdev[0]    = htonl((u32)imajor(inode));
 	fhp->fh_post_rdev[1]    = htonl((u32)iminor(inode));
 	fhp->fh_post_atime      = inode->i_atime;
diff --git a/include/linux/smb.h b/include/linux/smb.h
index 6df3b1501559..f098dff93f6b 100644
--- a/include/linux/smb.h
+++ b/include/linux/smb.h
@@ -89,7 +89,6 @@ struct smb_fattr {
 	struct timespec	f_atime;
 	struct timespec f_mtime;
 	struct timespec f_ctime;
-	unsigned long	f_blksize;
 	unsigned long	f_blocks;
 	int		f_unix;
 };
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index f8381f0660b2..840f8a6fb85f 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -115,7 +115,6 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
 		inode->i_mode = mode;
 		inode->i_uid = current->fsuid;
 		inode->i_gid = current->fsgid;
-		inode->i_blksize = PAGE_CACHE_SIZE;
 		inode->i_blocks = 0;
 		inode->i_mtime = inode->i_ctime = inode->i_atime =
 				CURRENT_TIME;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index cff41511269f..1b32c2c04c15 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -289,7 +289,6 @@ static struct inode *cpuset_new_inode(mode_t mode)
 		inode->i_mode = mode;
 		inode->i_uid = current->fsuid;
 		inode->i_gid = current->fsgid;
-		inode->i_blksize = PAGE_CACHE_SIZE;
 		inode->i_blocks = 0;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		inode->i_mapping->backing_dev_info = &cpuset_backing_dev_info;
diff --git a/mm/shmem.c b/mm/shmem.c
index 0a8e29cf87e0..eda907c3a86a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1351,7 +1351,6 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
 		inode->i_mode = mode;
 		inode->i_uid = current->fsuid;
 		inode->i_gid = current->fsgid;
-		inode->i_blksize = PAGE_CACHE_SIZE;
 		inode->i_blocks = 0;
 		inode->i_mapping->a_ops = &shmem_aops;
 		inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index f65bea74734d..700c6e061a04 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -488,7 +488,6 @@ rpc_get_inode(struct super_block *sb, int mode)
 		return NULL;
 	inode->i_mode = mode;
 	inode->i_uid = inode->i_gid = 0;
-	inode->i_blksize = PAGE_CACHE_SIZE;
 	inode->i_blocks = 0;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	switch(mode & S_IFMT) {
diff --git a/security/inode.c b/security/inode.c
index 176aacea8ca4..49ee51529396 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -64,7 +64,6 @@ static struct inode *get_inode(struct super_block *sb, int mode, dev_t dev)
 		inode->i_mode = mode;
 		inode->i_uid = 0;
 		inode->i_gid = 0;
-		inode->i_blksize = PAGE_CACHE_SIZE;
 		inode->i_blocks = 0;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		switch (mode & S_IFMT) {
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 00534c302ba2..bab7b386cb8d 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -771,7 +771,6 @@ static struct inode *sel_make_inode(struct super_block *sb, int mode)
 	if (ret) {
 		ret->i_mode = mode;
 		ret->i_uid = ret->i_gid = 0;
-		ret->i_blksize = PAGE_CACHE_SIZE;
 		ret->i_blocks = 0;
 		ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
 	}
-- 
cgit v1.2.3


From ebba5f9fcb882306bef7175dee987342ec6fcf2f Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Wed, 27 Sep 2006 01:50:55 -0700
Subject: [PATCH] consistently use MAX_ERRNO in __syscall_return

Consistently use MAX_ERRNO when checking for errors in __syscall_return().

[ralf@linux-mips.org: build fix]
Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-arm/unistd.h       | 3 ++-
 include/asm-arm26/unistd.h     | 3 ++-
 include/asm-frv/unistd.h       | 3 ++-
 include/asm-h8300/unistd.h     | 6 +++---
 include/asm-i386/unistd.h      | 5 +++--
 include/asm-m32r/unistd.h      | 5 +++--
 include/asm-m68k/unistd.h      | 5 +++--
 include/asm-m68knommu/unistd.h | 5 +++--
 include/asm-s390/unistd.h      | 4 +++-
 include/asm-sh/unistd.h        | 7 +++++--
 include/asm-sh64/unistd.h      | 6 ++++--
 include/asm-v850/unistd.h      | 5 +++--
 include/asm-x86_64/unistd.h    | 5 +++--
 include/linux/err.h            | 4 ++++
 14 files changed, 43 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-arm/unistd.h b/include/asm-arm/unistd.h
index 1e891f860ef3..2ab4078334bf 100644
--- a/include/asm-arm/unistd.h
+++ b/include/asm-arm/unistd.h
@@ -377,6 +377,7 @@
 #endif
 
 #ifdef __KERNEL__
+#include <linux/err.h>
 #include <linux/linkage.h>
 
 #define __sys2(x) #x
@@ -396,7 +397,7 @@
 
 #define __syscall_return(type, res)					\
 do {									\
-	if ((unsigned long)(res) >= (unsigned long)(-129)) {		\
+	if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) {	\
 		errno = -(res);						\
 		res = -1;						\
 	}								\
diff --git a/include/asm-arm26/unistd.h b/include/asm-arm26/unistd.h
index 70eb6d91cfd0..c6d2436c9d34 100644
--- a/include/asm-arm26/unistd.h
+++ b/include/asm-arm26/unistd.h
@@ -311,6 +311,7 @@
 #define __ARM_NR_usr26			(__ARM_NR_BASE+3)
 
 #ifdef __KERNEL__
+#include <linux/err.h>
 #include <linux/linkage.h>
 
 #define __sys2(x) #x
@@ -322,7 +323,7 @@
 
 #define __syscall_return(type, res)					\
 do {									\
-	if ((unsigned long)(res) >= (unsigned long)(-125)) {		\
+	if ((unsigned long)(res) >= (unsigned long)-MAX_ERRNO) {	\
 		errno = -(res);						\
 		res = -1;						\
 	}								\
diff --git a/include/asm-frv/unistd.h b/include/asm-frv/unistd.h
index b80dbd839475..d104d1b91d39 100644
--- a/include/asm-frv/unistd.h
+++ b/include/asm-frv/unistd.h
@@ -320,6 +320,7 @@
 #ifdef __KERNEL__
 
 #define NR_syscalls 310
+#include <linux/err.h>
 
 /*
  * process the return value of a syscall, consigning it to one of two possible fates
@@ -329,7 +330,7 @@
 #define __syscall_return(type, res)					\
 do {									\
         unsigned long __sr2 = (res);					\
-	if (__builtin_expect(__sr2 >= (unsigned long)(-4095), 0)) {	\
+	if (__builtin_expect(__sr2 >= (unsigned long)(-MAX_ERRNO), 0)) { \
 		errno = (-__sr2);					\
 		__sr2 = ~0UL;						\
 	}								\
diff --git a/include/asm-h8300/unistd.h b/include/asm-h8300/unistd.h
index 226dd596c2da..a2dd90462d80 100644
--- a/include/asm-h8300/unistd.h
+++ b/include/asm-h8300/unistd.h
@@ -295,14 +295,14 @@
 #ifdef __KERNEL__
 
 #define NR_syscalls 289
+#include <linux/err.h>
 
-
-/* user-visible error numbers are in the range -1 - -122: see
+/* user-visible error numbers are in the range -1 - -MAX_ERRNO: see
    <asm-m68k/errno.h> */
 
 #define __syscall_return(type, res) \
 do { \
-	if ((unsigned long)(res) >= (unsigned long)(-125)) { \
+	if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \
 	/* avoid using res which is declared to be in register d0; \
 	   errno might expand to a function call and clobber it.  */ \
 		int __err = -(res); \
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index 565d0897b205..bd9987087adc 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -328,14 +328,15 @@
 #ifdef __KERNEL__
 
 #define NR_syscalls 319
+#include <linux/err.h>
 
 /*
- * user-visible error numbers are in the range -1 - -128: see
+ * user-visible error numbers are in the range -1 - -MAX_ERRNO: see
  * <asm-i386/errno.h>
  */
 #define __syscall_return(type, res) \
 do { \
-	if ((unsigned long)(res) >= (unsigned long)(-(128 + 1))) { \
+	if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \
 		errno = -(res); \
 		res = -1; \
 	} \
diff --git a/include/asm-m32r/unistd.h b/include/asm-m32r/unistd.h
index 89f376e6229f..5c6a9ac6cf1a 100644
--- a/include/asm-m32r/unistd.h
+++ b/include/asm-m32r/unistd.h
@@ -296,8 +296,9 @@
 #ifdef __KERNEL__
 
 #define NR_syscalls 285
+#include <linux/err.h>
 
-/* user-visible error numbers are in the range -1 - -124: see
+/* user-visible error numbers are in the range -1 - -MAX_ERRNO: see
  * <asm-m32r/errno.h>
  */
 
@@ -305,7 +306,7 @@
 
 #define __syscall_return(type, res) \
 do { \
-	if ((unsigned long)(res) >= (unsigned long)(-(124 + 1))) { \
+	if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \
 	/* Avoid using "res" which is declared to be in register r0; \
 	   errno might expand to a function call and clobber it.  */ \
 		int __err = -(res); \
diff --git a/include/asm-m68k/unistd.h b/include/asm-m68k/unistd.h
index 7c0b6296b45c..751632b904db 100644
--- a/include/asm-m68k/unistd.h
+++ b/include/asm-m68k/unistd.h
@@ -288,13 +288,14 @@
 #ifdef __KERNEL__
 
 #define NR_syscalls		282
+#include <linux/err.h>
 
-/* user-visible error numbers are in the range -1 - -124: see
+/* user-visible error numbers are in the range -1 - -MAX_ERRNO: see
    <asm-m68k/errno.h> */
 
 #define __syscall_return(type, res) \
 do { \
-	if ((unsigned long)(res) >= (unsigned long)(-125)) { \
+	if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \
 	/* avoid using res which is declared to be in register d0; \
 	   errno might expand to a function call and clobber it.  */ \
 		int __err = -(res); \
diff --git a/include/asm-m68knommu/unistd.h b/include/asm-m68knommu/unistd.h
index 1b2abdf281e1..21fdc37c5c2c 100644
--- a/include/asm-m68knommu/unistd.h
+++ b/include/asm-m68knommu/unistd.h
@@ -289,13 +289,14 @@
 #ifdef __KERNEL__
 
 #define NR_syscalls		282
+#include <linux/err.h>
 
-/* user-visible error numbers are in the range -1 - -122: see
+/* user-visible error numbers are in the range -1 - -MAX_ERRNO: see
    <asm-m68k/errno.h> */
 
 #define __syscall_return(type, res) \
 do { \
-	if ((unsigned long)(res) >= (unsigned long)(-125)) { \
+	if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \
 	/* avoid using res which is declared to be in register d0; \
 	   errno might expand to a function call and clobber it.  */ \
 		int __err = -(res); \
diff --git a/include/asm-s390/unistd.h b/include/asm-s390/unistd.h
index 02b942d85c37..d49c54cb5505 100644
--- a/include/asm-s390/unistd.h
+++ b/include/asm-s390/unistd.h
@@ -342,9 +342,11 @@
 
 #ifdef __KERNEL__
 
+#include <linux/err.h>
+
 #define __syscall_return(type, res)			     \
 do {							     \
-	if ((unsigned long)(res) >= (unsigned long)(-4095)) {\
+	if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \
 		errno = -(res);				     \
 		res = -1;				     \
 	}						     \
diff --git a/include/asm-sh/unistd.h b/include/asm-sh/unistd.h
index 76b5430cb458..da127d7901af 100644
--- a/include/asm-sh/unistd.h
+++ b/include/asm-sh/unistd.h
@@ -306,11 +306,14 @@
 
 #ifdef __KERNEL__
 
-/* user-visible error numbers are in the range -1 - -124: see <asm-sh/errno.h> */
+#include <linux/err.h>
+
+/* user-visible error numbers are in the range -1 - -MAX_ERRNO:
+ * see <asm-sh/errno.h> */
 
 #define __syscall_return(type, res) \
 do { \
-	if ((unsigned long)(res) >= (unsigned long)(-124)) { \
+	if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \
 	/* Avoid using "res" which is declared to be in register r0; \
 	   errno might expand to a function call and clobber it.  */ \
 		int __err = -(res); \
diff --git a/include/asm-sh64/unistd.h b/include/asm-sh64/unistd.h
index 9a1590fffc15..c113566bef33 100644
--- a/include/asm-sh64/unistd.h
+++ b/include/asm-sh64/unistd.h
@@ -347,8 +347,10 @@
 #ifdef __KERNEL__ 
 
 #define NR_syscalls 321
+#include <linux/err.h>
 
-/* user-visible error numbers are in the range -1 - -125: see <asm-sh64/errno.h> */
+/* user-visible error numbers are in the range -1 - -MAX_ERRNO:
+ * see <asm-sh64/errno.h> */
 
 #define __syscall_return(type, res) \
 do { \
@@ -358,7 +360,7 @@ do { \
 	**       life easier in the system call epilogue (see entry.S)      \
 	*/								    \
         register unsigned long __sr2 __asm__ ("r2") = res;		    \
-	if ((unsigned long)(res) >= (unsigned long)(-125)) { \
+	if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) {	    \
 		errno = -(res);						    \
 		__sr2 = -1; 						    \
 	} \
diff --git a/include/asm-v850/unistd.h b/include/asm-v850/unistd.h
index bcb44bfe577a..552b7c873a57 100644
--- a/include/asm-v850/unistd.h
+++ b/include/asm-v850/unistd.h
@@ -238,12 +238,13 @@
 #ifdef __KERNEL__
 
 #include <asm/clinkage.h>
+#include <linux/err.h>
 
 #define __syscall_return(type, res)					      \
   do {									      \
-	  /* user-visible error numbers are in the range -1 - -124:	      \
+	  /* user-visible error numbers are in the range -1 - -MAX_ERRNO:      \
 	     see <asm-v850/errno.h> */					      \
-	  if (__builtin_expect ((unsigned long)(res) >= (unsigned long)(-125), 0)) { \
+	  if (__builtin_expect ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO), 0)) { \
 		  errno = -(res);					      \
 		  res = -1;						      \
 	  }								      \
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index eeb98c168e98..6137146516d3 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -623,16 +623,17 @@ __SYSCALL(__NR_move_pages, sys_move_pages)
 #ifdef __KERNEL__
 
 #define __NR_syscall_max __NR_move_pages
+#include <linux/err.h>
 
 #ifndef __NO_STUBS
 
-/* user-visible error numbers are in the range -1 - -4095 */
+/* user-visible error numbers are in the range -1 - -MAX_ERRNO */
 
 #define __syscall_clobber "r11","rcx","memory" 
 
 #define __syscall_return(type, res) \
 do { \
-	if ((unsigned long)(res) >= (unsigned long)(-127)) { \
+	if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \
 		errno = -(res); \
 		res = -1; \
 	} \
diff --git a/include/linux/err.h b/include/linux/err.h
index cd3b367f7445..1ab1d44f8d3b 100644
--- a/include/linux/err.h
+++ b/include/linux/err.h
@@ -15,6 +15,8 @@
  */
 #define MAX_ERRNO	4095
 
+#ifndef __ASSEMBLY__
+
 #define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO)
 
 static inline void *ERR_PTR(long error)
@@ -32,4 +34,6 @@ static inline long IS_ERR(const void *ptr)
 	return IS_ERR_VALUE((unsigned long)ptr);
 }
 
+#endif
+
 #endif /* _LINUX_ERR_H */
-- 
cgit v1.2.3


From 07563c711fbc25389e58ab9c9f0b9de2fce56760 Mon Sep 17 00:00:00 2001
From: Michael Tokarev <mjt@tls.mks.ru>
Date: Wed, 27 Sep 2006 01:50:56 -0700
Subject: [PATCH] EISA bus MODALIAS attributes support

Add modalias attribute support for the almost forgotten now EISA bus and
(at least some) EISA-aware modules.

The modalias entry looks like (for an 3c509 NIC):

 eisa:sTCM5093

and the in-module alias like:

 eisa:sTCM5093*

The patch moves struct eisa_device_id declaration from include/linux/eisa.h
to include/linux/mod_devicetable.h (so that the former now #includes the
latter), adds proper MODULE_DEVICE_TABLE(eisa, ...) statements for all
drivers with EISA IDs I found (some drivers already have that DEVICE_TABLE
declared), and adds recognision of __mod_eisa_device_table to
scripts/mod/file2alias.c so that proper modules.alias will be generated.

There's no support for /lib/modules/$kver/modules.eisamap, as it's not used
by any existing tools, and because with in-kernel modalias mechanism those
maps are obsolete anyway.

The rationale for this patch is:

 a) to make EISA bus to act as other busses with modalias
    support, to unify driver loading

 b) to foget about EISA finally - with this patch, kernel
    (who still supports EISA) will be the only one who knows
    how to choose the necessary drivers for this bus ;)

[akpm@osdl.org: fix the kbuild bit]
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Acked-the-net-bits-by: Jeff Garzik <jeff@garzik.org>
Acked-the-tulip-bit-by: Valerie Henson <val_henson@linux.intel.com>
Cc: James Bottomley <James.Bottomley@steeleye.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/eisa/eisa-bus.c            | 23 +++++++++++++++++++++++
 drivers/net/3c509.c                |  1 +
 drivers/net/3c59x.c                |  1 +
 drivers/net/ne3210.c               |  1 +
 drivers/net/tulip/de4x5.c          |  1 +
 drivers/scsi/aha1740.c             |  1 +
 drivers/scsi/aic7xxx/aic7770_osm.c |  3 ++-
 drivers/scsi/sim710.c              |  1 +
 include/linux/eisa.h               |  8 +-------
 include/linux/mod_devicetable.h    | 12 ++++++++++++
 scripts/mod/file2alias.c           | 12 ++++++++++++
 11 files changed, 56 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/eisa/eisa-bus.c b/drivers/eisa/eisa-bus.c
index 6078e2f58817..3a365e159d89 100644
--- a/drivers/eisa/eisa-bus.c
+++ b/drivers/eisa/eisa-bus.c
@@ -128,9 +128,23 @@ static int eisa_bus_match (struct device *dev, struct device_driver *drv)
 	return 0;
 }
 
+static int eisa_bus_uevent(struct device *dev, char **envp, int num_envp,
+			   char *buffer, int buffer_size)
+{
+	struct eisa_device *edev = to_eisa_device(dev);
+	int i = 0;
+	int length = 0;
+
+	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+		       "MODALIAS=" EISA_DEVICE_MODALIAS_FMT, edev->id.sig);
+	envp[i] = NULL;
+	return 0;
+}
+
 struct bus_type eisa_bus_type = {
 	.name  = "eisa",
 	.match = eisa_bus_match,
+	.uevent = eisa_bus_uevent,
 };
 
 int eisa_driver_register (struct eisa_driver *edrv)
@@ -160,6 +174,14 @@ static ssize_t eisa_show_state (struct device *dev, struct device_attribute *att
 
 static DEVICE_ATTR(enabled, S_IRUGO, eisa_show_state, NULL);
 
+static ssize_t eisa_show_modalias (struct device *dev, struct device_attribute *attr, char *buf)
+{
+        struct eisa_device *edev = to_eisa_device (dev);
+        return sprintf (buf, EISA_DEVICE_MODALIAS_FMT "\n", edev->id.sig);
+}
+
+static DEVICE_ATTR(modalias, S_IRUGO, eisa_show_modalias, NULL);
+
 static int __init eisa_init_device (struct eisa_root_device *root,
 				    struct eisa_device *edev,
 				    int slot)
@@ -209,6 +231,7 @@ static int __init eisa_register_device (struct eisa_device *edev)
 
 	device_create_file (&edev->dev, &dev_attr_signature);
 	device_create_file (&edev->dev, &dev_attr_enabled);
+	device_create_file (&edev->dev, &dev_attr_modalias);
 
 	return 0;
 }
diff --git a/drivers/net/3c509.c b/drivers/net/3c509.c
index 59c33925be62..b936373ab2a5 100644
--- a/drivers/net/3c509.c
+++ b/drivers/net/3c509.c
@@ -225,6 +225,7 @@ static struct eisa_device_id el3_eisa_ids[] = {
 		{ "TCM5095" },
 		{ "" }
 };
+MODULE_DEVICE_TABLE(eisa, el3_eisa_ids);
 
 static int el3_eisa_probe (struct device *device);
 
diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index af301f09d674..df42e28cc80f 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -851,6 +851,7 @@ static struct eisa_device_id vortex_eisa_ids[] = {
 	{ "TCM5970", CH_3C597 },
 	{ "" }
 };
+MODULE_DEVICE_TABLE(eisa, vortex_eisa_ids);
 
 static int vortex_eisa_probe(struct device *device);
 static int vortex_eisa_remove(struct device *device);
diff --git a/drivers/net/ne3210.c b/drivers/net/ne3210.c
index 0fa8e4d22769..d66328975425 100644
--- a/drivers/net/ne3210.c
+++ b/drivers/net/ne3210.c
@@ -343,6 +343,7 @@ static struct eisa_device_id ne3210_ids[] = {
 	{ "NVL1801" },
 	{ "" },
 };
+MODULE_DEVICE_TABLE(eisa, ne3210_ids);
 
 static struct eisa_driver ne3210_eisa_driver = {
 	.id_table = ne3210_ids,
diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c
index e661d0a9cc64..fb5fa7d68888 100644
--- a/drivers/net/tulip/de4x5.c
+++ b/drivers/net/tulip/de4x5.c
@@ -2114,6 +2114,7 @@ static struct eisa_device_id de4x5_eisa_ids[] = {
         { "DEC4250", 0 },	/* 0 is the board name index... */
         { "" }
 };
+MODULE_DEVICE_TABLE(eisa, de4x5_eisa_ids);
 
 static struct eisa_driver de4x5_eisa_driver = {
         .id_table = de4x5_eisa_ids,
diff --git a/drivers/scsi/aha1740.c b/drivers/scsi/aha1740.c
index 0e4a7ebe300a..6b35ed8301e0 100644
--- a/drivers/scsi/aha1740.c
+++ b/drivers/scsi/aha1740.c
@@ -681,6 +681,7 @@ static struct eisa_device_id aha1740_ids[] = {
 	{ "ADP0400" },		/* 1744  */
 	{ "" }
 };
+MODULE_DEVICE_TABLE(eisa, aha1740_ids);
 
 static struct eisa_driver aha1740_driver = {
 	.id_table = aha1740_ids,
diff --git a/drivers/scsi/aic7xxx/aic7770_osm.c b/drivers/scsi/aic7xxx/aic7770_osm.c
index 867cbe23579b..1ac119733bac 100644
--- a/drivers/scsi/aic7xxx/aic7770_osm.c
+++ b/drivers/scsi/aic7xxx/aic7770_osm.c
@@ -132,7 +132,8 @@ static struct eisa_device_id aic7770_ids[] = {
 	{ "ADP7770", 5 }, /* AIC7770 generic */
 	{ "" }
 };
-  
+MODULE_DEVICE_TABLE(eisa, aic7770_ids);
+
 static struct eisa_driver aic7770_driver = {
 	.id_table	= aic7770_ids,
 	.driver = {
diff --git a/drivers/scsi/sim710.c b/drivers/scsi/sim710.c
index b27e85428daa..551baccec523 100644
--- a/drivers/scsi/sim710.c
+++ b/drivers/scsi/sim710.c
@@ -282,6 +282,7 @@ static struct eisa_device_id sim710_eisa_ids[] = {
 	{ "HWP0C80" },
 	{ "" }
 };
+MODULE_DEVICE_TABLE(eisa, sim710_eisa_ids);
 
 static __init int
 sim710_eisa_probe(struct device *dev)
diff --git a/include/linux/eisa.h b/include/linux/eisa.h
index 4079242dced8..1ff7c1392525 100644
--- a/include/linux/eisa.h
+++ b/include/linux/eisa.h
@@ -3,8 +3,8 @@
 
 #include <linux/ioport.h>
 #include <linux/device.h>
+#include <linux/mod_devicetable.h>
 
-#define EISA_SIG_LEN   8
 #define EISA_MAX_SLOTS 8
 
 #define EISA_MAX_RESOURCES 4
@@ -27,12 +27,6 @@
 #define EISA_CONFIG_ENABLED         1
 #define EISA_CONFIG_FORCED          2
 
-/* The EISA signature, in ASCII form, null terminated */
-struct eisa_device_id {
-	char          sig[EISA_SIG_LEN];
-	unsigned long driver_data;
-};
-
 /* There is not much we can say about an EISA device, apart from
  * signature, slot number, and base address. dma_mask is set by
  * default to parent device mask..*/
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index f7ca0b09075d..e0c393cc7240 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -308,4 +308,16 @@ struct input_device_id {
 	kernel_ulong_t driver_info;
 };
 
+/* EISA */
+
+#define EISA_SIG_LEN   8
+
+/* The EISA signature, in ASCII form, null terminated */
+struct eisa_device_id {
+	char          sig[EISA_SIG_LEN];
+	kernel_ulong_t driver_data;
+};
+
+#define EISA_DEVICE_MODALIAS_FMT "eisa:s%s"
+
 #endif /* LINUX_MOD_DEVICETABLE_H */
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index de76da80443f..f61c9ccef6aa 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -444,6 +444,14 @@ static int do_input_entry(const char *filename, struct input_device_id *id,
 	return 1;
 }
 
+static int do_eisa_entry(const char *filename, struct eisa_device_id *eisa,
+		char *alias)
+{
+	if (eisa->sig[0])
+		sprintf(alias, EISA_DEVICE_MODALIAS_FMT "*", eisa->sig);
+	return 1;
+}
+
 /* Ignore any prefix, eg. v850 prepends _ */
 static inline int sym_is(const char *symbol, const char *name)
 {
@@ -547,6 +555,10 @@ void handle_moddevtable(struct module *mod, struct elf_info *info,
 		do_table(symval, sym->st_size,
 			 sizeof(struct input_device_id), "input",
 			 do_input_entry, mod);
+	else if (sym_is(symname, "__mod_eisa_device_table"))
+		do_table(symval, sym->st_size,
+			 sizeof(struct eisa_device_id), "eisa",
+			 do_eisa_entry, mod);
 }
 
 /* Now add out buffered information to the generated C source */
-- 
cgit v1.2.3


From c18258c6f0848f97e85287f6271c511a092bb784 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 27 Sep 2006 01:51:06 -0700
Subject: [PATCH] pid: Implement transfer_pid and use it to simplify de_thread

In de_thread we move pids from one process to another, a rather ugly case.
The function transfer_pid makes it clear what we are doing, and makes the
action atomic.  This is useful we ever want to atomically traverse the
process group and session lists, in a rcu safe manner.

Even if the atomic properties this change should be a win as transfer_pid
should be less code to execute than executing both attach_pid and
detach_pid, and this should make de_thread slightly smaller as only a
single function call needs to be emitted.  The only downside is that the
code might be slower to execute as the odds are against transfer_pid being
in cache.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c           | 11 ++++-------
 include/linux/pid.h |  2 ++
 kernel/pid.c        |  9 +++++++++
 3 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 54135df2a966..b7aa3d6422d6 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -696,23 +696,20 @@ static int de_thread(struct task_struct *tsk)
 		 */
 
 		/* Become a process group leader with the old leader's pid.
-		 * Note: The old leader also uses thispid until release_task
+		 * The old leader becomes a thread of the this thread group.
+		 * Note: The old leader also uses this pid until release_task
 		 *       is called.  Odd but simple and correct.
 		 */
 		detach_pid(current, PIDTYPE_PID);
 		current->pid = leader->pid;
 		attach_pid(current, PIDTYPE_PID,  current->pid);
-		attach_pid(current, PIDTYPE_PGID, current->signal->pgrp);
-		attach_pid(current, PIDTYPE_SID,  current->signal->session);
+		transfer_pid(leader, current, PIDTYPE_PGID);
+		transfer_pid(leader, current, PIDTYPE_SID);
 		list_replace_rcu(&leader->tasks, &current->tasks);
 
 		current->group_leader = current;
 		leader->group_leader = current;
 
-		/* Reduce leader to a thread */
-		detach_pid(leader, PIDTYPE_PGID);
-		detach_pid(leader, PIDTYPE_SID);
-
 		current->exit_signal = SIGCHLD;
 
 		BUG_ON(leader->exit_state != EXIT_ZOMBIE);
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 29960b03bef7..93da7e2d9f30 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -76,6 +76,8 @@ extern int FASTCALL(attach_pid(struct task_struct *task,
 				enum pid_type type, int nr));
 
 extern void FASTCALL(detach_pid(struct task_struct *task, enum pid_type));
+extern void FASTCALL(transfer_pid(struct task_struct *old,
+				  struct task_struct *new, enum pid_type));
 
 /*
  * look up a PID in the hash table. Must be called with the tasklist_lock
diff --git a/kernel/pid.c b/kernel/pid.c
index 93e212f20671..6db82b68e2f8 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -252,6 +252,15 @@ void fastcall detach_pid(struct task_struct *task, enum pid_type type)
 	free_pid(pid);
 }
 
+/* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */
+void fastcall transfer_pid(struct task_struct *old, struct task_struct *new,
+			   enum pid_type type)
+{
+	new->pids[type].pid = old->pids[type].pid;
+	hlist_replace_rcu(&old->pids[type].node, &new->pids[type].node);
+	old->pids[type].pid = NULL;
+}
+
 struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type)
 {
 	struct task_struct *result = NULL;
-- 
cgit v1.2.3


From 66f37509fc7191df468a8d183374f48b13bacb73 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 27 Sep 2006 01:51:13 -0700
Subject: [PATCH] fs/nfs/: make code static

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/namespace.c     | 12 +++++++++---
 include/linux/nfs_fs.h |  4 ----
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 77b00684894d..60408646176b 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -26,6 +26,11 @@ LIST_HEAD(nfs_automount_list);
 static DECLARE_WORK(nfs_automount_task, nfs_expire_automounts, &nfs_automount_list);
 int nfs_mountpoint_expiry_timeout = 500 * HZ;
 
+static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
+					const struct dentry *dentry,
+					struct nfs_fh *fh,
+					struct nfs_fattr *fattr);
+
 /*
  * nfs_path - reconstruct the path given an arbitrary dentry
  * @base - arbitrary string to prepend to the path
@@ -209,9 +214,10 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
  * @fattr - attributes for new root inode
  *
  */
-struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
-		const struct dentry *dentry, struct nfs_fh *fh,
-		struct nfs_fattr *fattr)
+static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
+					const struct dentry *dentry,
+					struct nfs_fh *fh,
+					struct nfs_fattr *fattr)
 {
 	struct nfs_clone_mount mountdata = {
 		.sb = mnt_parent->mnt_sb,
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 36f5bcf513b0..98c9b9f667a5 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -315,10 +315,6 @@ extern void nfs_end_data_update(struct inode *);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
 extern void put_nfs_open_context(struct nfs_open_context *ctx);
 extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode);
-extern struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
-					const struct dentry *dentry,
-					struct nfs_fh *fh,
-					struct nfs_fattr *fattr);
 
 /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
 extern u32 root_nfs_parse_addr(char *name); /*__init*/
-- 
cgit v1.2.3


From 1b79e5513d52e8533a08af35a3595dad80c74d1f Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 27 Sep 2006 01:51:14 -0700
Subject: [PATCH] add probe_kernel_address()

Add a version of __get_user() which is safe to call inside mmap_sem.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/uaccess.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 391e7ed1eb3f..a48d7f11c7be 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -19,4 +19,26 @@ static inline unsigned long __copy_from_user_nocache(void *to,
 
 #endif		/* ARCH_HAS_NOCACHE_UACCESS */
 
+/**
+ * probe_kernel_address(): safely attempt to read from a location
+ * @addr: address to read from - its type is type typeof(retval)*
+ * @retval: read into this variable
+ *
+ * Safely read from address @addr into variable @revtal.  If a kernel fault
+ * happens, handle that and return -EFAULT.
+ * We ensure that the __get_user() is executed in atomic context so that
+ * do_page_fault() doesn't attempt to take mmap_sem.  This makes
+ * probe_kernel_address() suitable for use within regions where the caller
+ * already holds mmap_sem, or other locks which nest inside mmap_sem.
+ */
+#define probe_kernel_address(addr, retval)		\
+	({						\
+		long ret;				\
+							\
+		inc_preempt_count();			\
+		ret = __get_user(retval, addr);		\
+		dec_preempt_count();			\
+		ret;					\
+	})
+
 #endif		/* __LINUX_UACCESS_H__ */
-- 
cgit v1.2.3


From 3a16f7b4a75d68364c3278523f51ac141a12758a Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Thu, 29 Jun 2006 12:27:23 -0700
Subject: USB: move <linux/usb_otg.h> to <linux/usb/otg.h>

Move <linux/usb_otg.h> to <linux/usb/otg.h>.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/arm/plat-omap/usb.c         |   2 +-
 drivers/i2c/chips/isp1301_omap.c |   2 +-
 drivers/usb/gadget/omap_udc.c    |   2 +-
 drivers/usb/host/ohci-hcd.c      |   2 +-
 include/linux/usb/otg.h          | 131 +++++++++++++++++++++++++++++++++++++++
 include/linux/usb_otg.h          | 131 ---------------------------------------
 6 files changed, 135 insertions(+), 135 deletions(-)
 create mode 100644 include/linux/usb/otg.h
 delete mode 100644 include/linux/usb_otg.h

(limited to 'include/linux')

diff --git a/arch/arm/plat-omap/usb.c b/arch/arm/plat-omap/usb.c
index 9b815327b6a5..7e8096809be2 100644
--- a/arch/arm/plat-omap/usb.c
+++ b/arch/arm/plat-omap/usb.c
@@ -26,7 +26,7 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/usb_otg.h>
+#include <linux/usb/otg.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
diff --git a/drivers/i2c/chips/isp1301_omap.c b/drivers/i2c/chips/isp1301_omap.c
index f92505b94c61..182f04953466 100644
--- a/drivers/i2c/chips/isp1301_omap.c
+++ b/drivers/i2c/chips/isp1301_omap.c
@@ -30,7 +30,7 @@
 #include <linux/usb_ch9.h>
 #include <linux/usb_gadget.h>
 #include <linux/usb.h>
-#include <linux/usb_otg.h>
+#include <linux/usb/otg.h>
 #include <linux/i2c.h>
 #include <linux/workqueue.h>
 
diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c
index 2de9748ee673..81f0389fcc94 100644
--- a/drivers/usb/gadget/omap_udc.c
+++ b/drivers/usb/gadget/omap_udc.c
@@ -40,7 +40,7 @@
 #include <linux/platform_device.h>
 #include <linux/usb_ch9.h>
 #include <linux/usb_gadget.h>
-#include <linux/usb_otg.h>
+#include <linux/usb/otg.h>
 #include <linux/dma-mapping.h>
 
 #include <asm/byteorder.h>
diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c
index cbf38ae5ae76..7c3d8c60a60f 100644
--- a/drivers/usb/host/ohci-hcd.c
+++ b/drivers/usb/host/ohci-hcd.c
@@ -88,7 +88,7 @@
 #include <linux/timer.h>
 #include <linux/list.h>
 #include <linux/usb.h>
-#include <linux/usb_otg.h>
+#include <linux/usb/otg.h>
 #include <linux/dma-mapping.h> 
 #include <linux/dmapool.h>
 #include <linux/reboot.h>
diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h
new file mode 100644
index 000000000000..9897f7a818c5
--- /dev/null
+++ b/include/linux/usb/otg.h
@@ -0,0 +1,131 @@
+// include/linux/usb/otg.h
+
+/*
+ * These APIs may be used between USB controllers.  USB device drivers
+ * (for either host or peripheral roles) don't use these calls; they
+ * continue to use just usb_device and usb_gadget.
+ */
+
+
+/* OTG defines lots of enumeration states before device reset */
+enum usb_otg_state {
+	OTG_STATE_UNDEFINED = 0,
+
+	/* single-role peripheral, and dual-role default-b */
+	OTG_STATE_B_IDLE,
+	OTG_STATE_B_SRP_INIT,
+	OTG_STATE_B_PERIPHERAL,
+
+	/* extra dual-role default-b states */
+	OTG_STATE_B_WAIT_ACON,
+	OTG_STATE_B_HOST,
+
+	/* dual-role default-a */
+	OTG_STATE_A_IDLE,
+	OTG_STATE_A_WAIT_VRISE,
+	OTG_STATE_A_WAIT_BCON,
+	OTG_STATE_A_HOST,
+	OTG_STATE_A_SUSPEND,
+	OTG_STATE_A_PERIPHERAL,
+	OTG_STATE_A_WAIT_VFALL,
+	OTG_STATE_A_VBUS_ERR,
+};
+
+/*
+ * the otg driver needs to interact with both device side and host side
+ * usb controllers.  it decides which controller is active at a given
+ * moment, using the transceiver, ID signal, HNP and sometimes static
+ * configuration information (including "board isn't wired for otg").
+ */
+struct otg_transceiver {
+	struct device		*dev;
+	const char		*label;
+
+	u8			default_a;
+	enum usb_otg_state	state;
+
+	struct usb_bus		*host;
+	struct usb_gadget	*gadget;
+
+	/* to pass extra port status to the root hub */
+	u16			port_status;
+	u16			port_change;
+
+	/* bind/unbind the host controller */
+	int	(*set_host)(struct otg_transceiver *otg,
+				struct usb_bus *host);
+
+	/* bind/unbind the peripheral controller */
+	int	(*set_peripheral)(struct otg_transceiver *otg,
+				struct usb_gadget *gadget);
+
+	/* effective for B devices, ignored for A-peripheral */
+	int	(*set_power)(struct otg_transceiver *otg,
+				unsigned mA);
+
+	/* for non-OTG B devices: set transceiver into suspend mode */
+	int	(*set_suspend)(struct otg_transceiver *otg,
+				int suspend);
+
+	/* for B devices only:  start session with A-Host */
+	int	(*start_srp)(struct otg_transceiver *otg);
+
+	/* start or continue HNP role switch */
+	int	(*start_hnp)(struct otg_transceiver *otg);
+
+};
+
+
+/* for board-specific init logic */
+extern int otg_set_transceiver(struct otg_transceiver *);
+
+
+/* for usb host and peripheral controller drivers */
+extern struct otg_transceiver *otg_get_transceiver(void);
+
+static inline int
+otg_start_hnp(struct otg_transceiver *otg)
+{
+	return otg->start_hnp(otg);
+}
+
+
+/* for HCDs */
+static inline int
+otg_set_host(struct otg_transceiver *otg, struct usb_bus *host)
+{
+	return otg->set_host(otg, host);
+}
+
+
+/* for usb peripheral controller drivers */
+static inline int
+otg_set_peripheral(struct otg_transceiver *otg, struct usb_gadget *periph)
+{
+	return otg->set_peripheral(otg, periph);
+}
+
+static inline int
+otg_set_power(struct otg_transceiver *otg, unsigned mA)
+{
+	return otg->set_power(otg, mA);
+}
+
+static inline int
+otg_set_suspend(struct otg_transceiver *otg, int suspend)
+{
+	if (otg->set_suspend != NULL)
+		return otg->set_suspend(otg, suspend);
+	else
+		return 0;
+}
+
+static inline int
+otg_start_srp(struct otg_transceiver *otg)
+{
+	return otg->start_srp(otg);
+}
+
+
+/* for OTG controller drivers (and maybe other stuff) */
+extern int usb_bus_start_enum(struct usb_bus *bus, unsigned port_num);
diff --git a/include/linux/usb_otg.h b/include/linux/usb_otg.h
deleted file mode 100644
index f827f6e203c2..000000000000
--- a/include/linux/usb_otg.h
+++ /dev/null
@@ -1,131 +0,0 @@
-// include/linux/usb_otg.h 
-
-/*
- * These APIs may be used between USB controllers.  USB device drivers
- * (for either host or peripheral roles) don't use these calls; they
- * continue to use just usb_device and usb_gadget.
- */
-
-
-/* OTG defines lots of enumeration states before device reset */
-enum usb_otg_state {
-	OTG_STATE_UNDEFINED = 0,
-
-	/* single-role peripheral, and dual-role default-b */
-	OTG_STATE_B_IDLE,
-	OTG_STATE_B_SRP_INIT,
-	OTG_STATE_B_PERIPHERAL,
-
-	/* extra dual-role default-b states */
-	OTG_STATE_B_WAIT_ACON,
-	OTG_STATE_B_HOST,
-
-	/* dual-role default-a */
-	OTG_STATE_A_IDLE,
-	OTG_STATE_A_WAIT_VRISE,
-	OTG_STATE_A_WAIT_BCON,
-	OTG_STATE_A_HOST,
-	OTG_STATE_A_SUSPEND,
-	OTG_STATE_A_PERIPHERAL,
-	OTG_STATE_A_WAIT_VFALL,
-	OTG_STATE_A_VBUS_ERR,
-};
-
-/*
- * the otg driver needs to interact with both device side and host side
- * usb controllers.  it decides which controller is active at a given
- * moment, using the transceiver, ID signal, HNP and sometimes static
- * configuration information (including "board isn't wired for otg").
- */
-struct otg_transceiver {
-	struct device		*dev;
-	const char		*label;
-
-	u8			default_a;
-	enum usb_otg_state	state;
-
-	struct usb_bus		*host;
-	struct usb_gadget	*gadget;
-
-	/* to pass extra port status to the root hub */
-	u16			port_status;
-	u16			port_change;
-
-	/* bind/unbind the host controller */
-	int 	(*set_host)(struct otg_transceiver *otg,
-				struct usb_bus *host);
-
-	/* bind/unbind the peripheral controller */
-	int	(*set_peripheral)(struct otg_transceiver *otg,
-				struct usb_gadget *gadget);
-
-	/* effective for B devices, ignored for A-peripheral */
-	int	(*set_power)(struct otg_transceiver *otg,
-				unsigned mA);
-
-	/* for non-OTG B devices: set transceiver into suspend mode */
-	int	(*set_suspend)(struct otg_transceiver *otg,
-				int suspend);
-
-	/* for B devices only:  start session with A-Host */
-	int	(*start_srp)(struct otg_transceiver *otg);
-
-	/* start or continue HNP role switch */
-	int	(*start_hnp)(struct otg_transceiver *otg);
-
-};
-
-
-/* for board-specific init logic */
-extern int otg_set_transceiver(struct otg_transceiver *);
-
-
-/* for usb host and peripheral controller drivers */
-extern struct otg_transceiver *otg_get_transceiver(void);
-
-static inline int
-otg_start_hnp(struct otg_transceiver *otg)
-{
-	return otg->start_hnp(otg);
-}
-
-
-/* for HCDs */
-static inline int
-otg_set_host(struct otg_transceiver *otg, struct usb_bus *host)
-{
-	return otg->set_host(otg, host);
-}
-
-
-/* for usb peripheral controller drivers */
-static inline int
-otg_set_peripheral(struct otg_transceiver *otg, struct usb_gadget *periph)
-{
-	return otg->set_peripheral(otg, periph);
-}
-
-static inline int
-otg_set_power(struct otg_transceiver *otg, unsigned mA)
-{
-	return otg->set_power(otg, mA);
-}
-
-static inline int
-otg_set_suspend(struct otg_transceiver *otg, int suspend)
-{
-	if (otg->set_suspend != NULL)
-		return otg->set_suspend(otg, suspend);
-	else
-		return 0;
-}
-
-static inline int
-otg_start_srp(struct otg_transceiver *otg)
-{
-	return otg->start_srp(otg);
-}
-
-
-/* for OTG controller drivers (and maybe other stuff) */
-extern int usb_bus_start_enum(struct usb_bus *bus, unsigned port_num);
-- 
cgit v1.2.3


From 8bb54ab573ecd1b4fe2ed66416a8d99a86e65316 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Sat, 1 Jul 2006 22:08:49 -0400
Subject: usbcore: add usb_device_driver definition

This patch (as732) adds a usb_device_driver structure, for representing
drivers that manage an entire USB device as opposed to just an
interface.  Support routines like usb_register_device_driver,
usb_deregister_device_driver, usb_probe_device, and usb_unbind_device
are also added.

Unlike an earlier version of this patch, the new code is type-safe.  To
accomplish this, the existing struct driver embedded in struct
usb_driver had to be wrapped in an intermediate wrapper.  This enables
the core to tell at runtime whether a particular struct driver belongs
to a device driver or to an interface driver.


Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/driver.c  | 219 +++++++++++++++++++++++++++++++++------------
 drivers/usb/core/generic.c |  17 +---
 drivers/usb/core/usb.c     |  15 ++--
 drivers/usb/core/usb.h     |  20 ++++-
 include/linux/usb.h        |  58 ++++++++++--
 5 files changed, 245 insertions(+), 84 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 8dcf2cd0c569..0d4b5dcee3ab 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -84,7 +84,7 @@ static int usb_create_newid_file(struct usb_driver *usb_drv)
 		goto exit;
 
 	if (usb_drv->probe != NULL)
-		error = sysfs_create_file(&usb_drv->driver.kobj,
+		error = sysfs_create_file(&usb_drv->drvwrap.driver.kobj,
 					  &driver_attr_new_id.attr);
 exit:
 	return error;
@@ -96,7 +96,7 @@ static void usb_remove_newid_file(struct usb_driver *usb_drv)
 		return;
 
 	if (usb_drv->probe != NULL)
-		sysfs_remove_file(&usb_drv->driver.kobj,
+		sysfs_remove_file(&usb_drv->drvwrap.driver.kobj,
 				  &driver_attr_new_id.attr);
 }
 
@@ -143,18 +143,55 @@ static const struct usb_device_id *usb_match_dynamic_id(struct usb_interface *in
 }
 
 
-/* called from driver core with usb_bus_type.subsys writelock */
+/* called from driver core with dev locked */
+static int usb_probe_device(struct device *dev)
+{
+	struct usb_device_driver *udriver = to_usb_device_driver(dev->driver);
+	struct usb_device *udev;
+	int error = -ENODEV;
+
+	dev_dbg(dev, "%s\n", __FUNCTION__);
+
+	if (!is_usb_device(dev))	/* Sanity check */
+		return error;
+
+	udev = to_usb_device(dev);
+
+	/* FIXME: resume a suspended device */
+	if (udev->state == USB_STATE_SUSPENDED)
+		return -EHOSTUNREACH;
+
+	/* TODO: Add real matching code */
+
+	error = udriver->probe(udev);
+	return error;
+}
+
+/* called from driver core with dev locked */
+static int usb_unbind_device(struct device *dev)
+{
+	struct usb_device_driver *udriver = to_usb_device_driver(dev->driver);
+
+	udriver->disconnect(to_usb_device(dev));
+	return 0;
+}
+
+
+/* called from driver core with dev locked */
 static int usb_probe_interface(struct device *dev)
 {
-	struct usb_interface * intf = to_usb_interface(dev);
-	struct usb_driver * driver = to_usb_driver(dev->driver);
+	struct usb_driver *driver = to_usb_driver(dev->driver);
+	struct usb_interface *intf;
 	const struct usb_device_id *id;
 	int error = -ENODEV;
 
 	dev_dbg(dev, "%s\n", __FUNCTION__);
 
-	if (!driver->probe)
+	if (is_usb_device(dev))		/* Sanity check */
 		return error;
+
+	intf = to_usb_interface(dev);
+
 	/* FIXME we'd much prefer to just resume it ... */
 	if (interface_to_usbdev(intf)->state == USB_STATE_SUSPENDED)
 		return -EHOSTUNREACH;
@@ -182,19 +219,18 @@ static int usb_probe_interface(struct device *dev)
 	return error;
 }
 
-/* called from driver core with usb_bus_type.subsys writelock */
+/* called from driver core with dev locked */
 static int usb_unbind_interface(struct device *dev)
 {
+	struct usb_driver *driver = to_usb_driver(dev->driver);
 	struct usb_interface *intf = to_usb_interface(dev);
-	struct usb_driver *driver = to_usb_driver(intf->dev.driver);
 
 	intf->condition = USB_INTERFACE_UNBINDING;
 
 	/* release all urbs for this interface */
 	usb_disable_interface(interface_to_usbdev(intf), intf);
 
-	if (driver && driver->disconnect)
-		driver->disconnect(intf);
+	driver->disconnect(intf);
 
 	/* reset other interface state */
 	usb_set_interface(interface_to_usbdev(intf),
@@ -235,7 +271,7 @@ int usb_driver_claim_interface(struct usb_driver *driver,
 	if (dev->driver)
 		return -EBUSY;
 
-	dev->driver = &driver->driver;
+	dev->driver = &driver->drvwrap.driver;
 	usb_set_intfdata(iface, priv);
 	iface->condition = USB_INTERFACE_BOUND;
 	mark_active(iface);
@@ -270,7 +306,7 @@ void usb_driver_release_interface(struct usb_driver *driver,
 	struct device *dev = &iface->dev;
 
 	/* this should never happen, don't release something that's not ours */
-	if (!dev->driver || dev->driver != &driver->driver)
+	if (!dev->driver || dev->driver != &driver->drvwrap.driver)
 		return;
 
 	/* don't release from within disconnect() */
@@ -433,24 +469,37 @@ EXPORT_SYMBOL_GPL_FUTURE(usb_match_id);
 
 int usb_device_match(struct device *dev, struct device_driver *drv)
 {
-	struct usb_interface *intf;
-	struct usb_driver *usb_drv;
-	const struct usb_device_id *id;
-
-	/* check for generic driver, which we don't match any device with */
-	if (drv == &usb_generic_driver)
-		return 0;
+	/* devices and interfaces are handled separately */
+	if (is_usb_device(dev)) {
 
-	intf = to_usb_interface(dev);
-	usb_drv = to_usb_driver(drv);
+		/* interface drivers never match devices */
+		if (!is_usb_device_driver(drv))
+			return 0;
 
-	id = usb_match_id(intf, usb_drv->id_table);
-	if (id)
+		/* TODO: Add real matching code */
 		return 1;
 
-	id = usb_match_dynamic_id(intf, usb_drv);
-	if (id)
-		return 1;
+	} else {
+		struct usb_interface *intf;
+		struct usb_driver *usb_drv;
+		const struct usb_device_id *id;
+
+		/* device drivers never match interfaces */
+		if (is_usb_device_driver(drv))
+			return 0;
+
+		intf = to_usb_interface(dev);
+		usb_drv = to_usb_driver(drv);
+
+		id = usb_match_id(intf, usb_drv->id_table);
+		if (id)
+			return 1;
+
+		id = usb_match_dynamic_id(intf, usb_drv);
+		if (id)
+			return 1;
+	}
+
 	return 0;
 }
 
@@ -481,14 +530,13 @@ static int usb_uevent(struct device *dev, char **envp, int num_envp,
 	/* driver is often null here; dev_dbg() would oops */
 	pr_debug ("usb %s: uevent\n", dev->bus_id);
 
-	/* Must check driver_data here, as on remove driver is always NULL */
-	if ((dev->driver == &usb_generic_driver) ||
-	    (dev->driver_data == &usb_generic_driver_data))
+	if (is_usb_device(dev))
 		return 0;
-
-	intf = to_usb_interface(dev);
-	usb_dev = interface_to_usbdev (intf);
-	alt = intf->cur_altsetting;
+	else {
+		intf = to_usb_interface(dev);
+		usb_dev = interface_to_usbdev(intf);
+		alt = intf->cur_altsetting;
+	}
 
 	if (usb_dev->devnum < 0) {
 		pr_debug ("usb %s: already deleted?\n", dev->bus_id);
@@ -569,13 +617,71 @@ static int usb_uevent(struct device *dev, char **envp,
 #endif	/* CONFIG_HOTPLUG */
 
 /**
- * usb_register_driver - register a USB driver
- * @new_driver: USB operations for the driver
+ * usb_register_device_driver - register a USB device (not interface) driver
+ * @new_udriver: USB operations for the device driver
  * @owner: module owner of this driver.
  *
- * Registers a USB driver with the USB core.  The list of unattached
- * interfaces will be rescanned whenever a new driver is added, allowing
- * the new driver to attach to any recognized devices.
+ * Registers a USB device driver with the USB core.  The list of
+ * unattached devices will be rescanned whenever a new driver is
+ * added, allowing the new driver to attach to any recognized devices.
+ * Returns a negative error code on failure and 0 on success.
+ */
+int usb_register_device_driver(struct usb_device_driver *new_udriver,
+		struct module *owner)
+{
+	int retval = 0;
+
+	if (usb_disabled())
+		return -ENODEV;
+
+	new_udriver->drvwrap.for_devices = 1;
+	new_udriver->drvwrap.driver.name = (char *) new_udriver->name;
+	new_udriver->drvwrap.driver.bus = &usb_bus_type;
+	new_udriver->drvwrap.driver.probe = usb_probe_device;
+	new_udriver->drvwrap.driver.remove = usb_unbind_device;
+	new_udriver->drvwrap.driver.owner = owner;
+
+	retval = driver_register(&new_udriver->drvwrap.driver);
+
+	if (!retval) {
+		pr_info("%s: registered new device driver %s\n",
+			usbcore_name, new_udriver->name);
+		usbfs_update_special();
+	} else {
+		printk(KERN_ERR "%s: error %d registering device "
+			"	driver %s\n",
+			usbcore_name, retval, new_udriver->name);
+	}
+
+	return retval;
+}
+EXPORT_SYMBOL_GPL(usb_register_device_driver);
+
+/**
+ * usb_deregister_device_driver - unregister a USB device (not interface) driver
+ * @udriver: USB operations of the device driver to unregister
+ * Context: must be able to sleep
+ *
+ * Unlinks the specified driver from the internal USB driver list.
+ */
+void usb_deregister_device_driver(struct usb_device_driver *udriver)
+{
+	pr_info("%s: deregistering device driver %s\n",
+			usbcore_name, udriver->name);
+
+	driver_unregister(&udriver->drvwrap.driver);
+	usbfs_update_special();
+}
+EXPORT_SYMBOL_GPL(usb_deregister_device_driver);
+
+/**
+ * usb_register_driver - register a USB interface driver
+ * @new_driver: USB operations for the interface driver
+ * @owner: module owner of this driver.
+ *
+ * Registers a USB interface driver with the USB core.  The list of
+ * unattached interfaces will be rescanned whenever a new driver is
+ * added, allowing the new driver to attach to any recognized interfaces.
  * Returns a negative error code on failure and 0 on success.
  *
  * NOTE: if you want your driver to use the USB major number, you must call
@@ -589,23 +695,25 @@ int usb_register_driver(struct usb_driver *new_driver, struct module *owner)
 	if (usb_disabled())
 		return -ENODEV;
 
-	new_driver->driver.name = (char *)new_driver->name;
-	new_driver->driver.bus = &usb_bus_type;
-	new_driver->driver.probe = usb_probe_interface;
-	new_driver->driver.remove = usb_unbind_interface;
-	new_driver->driver.owner = owner;
+	new_driver->drvwrap.for_devices = 0;
+	new_driver->drvwrap.driver.name = (char *) new_driver->name;
+	new_driver->drvwrap.driver.bus = &usb_bus_type;
+	new_driver->drvwrap.driver.probe = usb_probe_interface;
+	new_driver->drvwrap.driver.remove = usb_unbind_interface;
+	new_driver->drvwrap.driver.owner = owner;
 	spin_lock_init(&new_driver->dynids.lock);
 	INIT_LIST_HEAD(&new_driver->dynids.list);
 
-	retval = driver_register(&new_driver->driver);
+	retval = driver_register(&new_driver->drvwrap.driver);
 
 	if (!retval) {
-		pr_info("%s: registered new driver %s\n",
+		pr_info("%s: registered new interface driver %s\n",
 			usbcore_name, new_driver->name);
 		usbfs_update_special();
 		usb_create_newid_file(new_driver);
 	} else {
-		printk(KERN_ERR "%s: error %d registering driver %s\n",
+		printk(KERN_ERR "%s: error %d registering interface "
+			"	driver %s\n",
 			usbcore_name, retval, new_driver->name);
 	}
 
@@ -614,8 +722,8 @@ int usb_register_driver(struct usb_driver *new_driver, struct module *owner)
 EXPORT_SYMBOL_GPL_FUTURE(usb_register_driver);
 
 /**
- * usb_deregister - unregister a USB driver
- * @driver: USB operations of the driver to unregister
+ * usb_deregister - unregister a USB interface driver
+ * @driver: USB operations of the interface driver to unregister
  * Context: must be able to sleep
  *
  * Unlinks the specified driver from the internal USB driver list.
@@ -626,11 +734,12 @@ EXPORT_SYMBOL_GPL_FUTURE(usb_register_driver);
  */
 void usb_deregister(struct usb_driver *driver)
 {
-	pr_info("%s: deregistering driver %s\n", usbcore_name, driver->name);
+	pr_info("%s: deregistering interface driver %s\n",
+			usbcore_name, driver->name);
 
 	usb_remove_newid_file(driver);
 	usb_free_dynids(driver);
-	driver_unregister(&driver->driver);
+	driver_unregister(&driver->drvwrap.driver);
 
 	usbfs_update_special();
 }
@@ -655,7 +764,7 @@ static int usb_generic_suspend(struct device *dev, pm_message_t message)
 	 * marked for FREEZE as soon as their children are already idled.
 	 * But those semantics are useless, so we equate the two (sigh).
 	 */
-	if (dev->driver == &usb_generic_driver) {
+	if (is_usb_device(dev)) {
 		if (dev->power.power_state.event == message.event)
 			return 0;
 		/* we need to rule out bogus requests through sysfs */
@@ -665,8 +774,7 @@ static int usb_generic_suspend(struct device *dev, pm_message_t message)
  		return usb_port_suspend(to_usb_device(dev));
 	}
 
-	if ((dev->driver == NULL) ||
-	    (dev->driver_data == &usb_generic_driver_data))
+	if (dev->driver == NULL)
 		return 0;
 
 	intf = to_usb_interface(dev);
@@ -705,15 +813,14 @@ static int usb_generic_resume(struct device *dev)
 	dev->power.power_state.event = PM_EVENT_ON;
 
 	/* devices resume through their hubs */
-	if (dev->driver == &usb_generic_driver) {
+	if (is_usb_device(dev)) {
 		udev = to_usb_device(dev);
 		if (udev->state == USB_STATE_NOTATTACHED)
 			return 0;
 		return usb_port_resume(udev);
 	}
 
-	if ((dev->driver == NULL) ||
-	    (dev->driver_data == &usb_generic_driver_data)) {
+	if (dev->driver == NULL) {
 		dev->power.power_state.event = PM_EVENT_FREEZE;
 		return 0;
 	}
diff --git a/drivers/usb/core/generic.c b/drivers/usb/core/generic.c
index 7bab9769b34f..fa6f34a12b4b 100644
--- a/drivers/usb/core/generic.c
+++ b/drivers/usb/core/generic.c
@@ -21,14 +21,12 @@
 #include <linux/usb.h>
 #include "usb.h"
 
-static int generic_probe(struct device *dev)
+static int generic_probe(struct usb_device *udev)
 {
 	return 0;
 }
-static int generic_remove(struct device *dev)
+static void generic_disconnect(struct usb_device *udev)
 {
-	struct usb_device *udev = to_usb_device(dev);
-
 	/* if this is only an unbind, not a physical disconnect, then
 	 * unconfigure the device */
 	if (udev->state == USB_STATE_CONFIGURED)
@@ -37,17 +35,10 @@ static int generic_remove(struct device *dev)
 	/* in case the call failed or the device was suspended */
 	if (udev->state >= USB_STATE_CONFIGURED)
 		usb_disable_device(udev, 0);
-	return 0;
 }
 
-struct device_driver usb_generic_driver = {
-	.owner = THIS_MODULE,
+struct usb_device_driver usb_generic_driver = {
 	.name =	"usb",
-	.bus = &usb_bus_type,
 	.probe = generic_probe,
-	.remove = generic_remove,
+	.disconnect = generic_disconnect,
 };
-
-/* Fun hack to determine if the struct device is a
- * usb device or a usb interface. */
-int usb_generic_driver_data;
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 0b8c67bcde60..6dfbc284369b 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -123,7 +123,7 @@ static int __find_interface(struct device * dev, void * data)
 	struct usb_interface *intf;
 
 	/* can't look at usb devices, only interfaces */
-	if (dev->driver == &usb_generic_driver)
+	if (is_usb_device(dev))
 		return 0;
 
 	intf = to_usb_interface(dev);
@@ -149,7 +149,8 @@ struct usb_interface *usb_find_interface(struct usb_driver *drv, int minor)
 
 	argb.minor = minor;
 	argb.interface = NULL;
-	driver_for_each_device(&drv->driver, NULL, &argb, __find_interface);
+	driver_for_each_device(&drv->drvwrap.driver, NULL, &argb,
+			__find_interface);
 	return argb.interface;
 }
 
@@ -204,11 +205,13 @@ usb_alloc_dev(struct usb_device *parent, struct usb_bus *bus, unsigned port1)
 	device_initialize(&dev->dev);
 	dev->dev.bus = &usb_bus_type;
 	dev->dev.dma_mask = bus->controller->dma_mask;
-	dev->dev.driver_data = &usb_generic_driver_data;
-	dev->dev.driver = &usb_generic_driver;
+	dev->dev.driver = &usb_generic_driver.drvwrap.driver;
 	dev->dev.release = usb_release_dev;
 	dev->state = USB_STATE_ATTACHED;
 
+	/* This magic assignment distinguishes devices from interfaces */
+	dev->dev.platform_data = &usb_generic_driver;
+
 	INIT_LIST_HEAD(&dev->ep0.urb_list);
 	dev->ep0.desc.bLength = USB_DT_ENDPOINT_SIZE;
 	dev->ep0.desc.bDescriptorType = USB_DT_ENDPOINT;
@@ -838,7 +841,7 @@ static int __init usb_init(void)
 	retval = usb_hub_init();
 	if (retval)
 		goto hub_init_failed;
-	retval = driver_register(&usb_generic_driver);
+	retval = usb_register_device_driver(&usb_generic_driver, THIS_MODULE);
 	if (!retval)
 		goto out;
 
@@ -868,7 +871,7 @@ static void __exit usb_exit(void)
 	if (nousb)
 		return;
 
-	driver_unregister(&usb_generic_driver);
+	usb_deregister_device_driver(&usb_generic_driver);
 	usb_major_cleanup();
 	usbfs_cleanup();
 	usb_deregister(&usbfs_driver);
diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index 82d397a6f773..1d25ccac7832 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -34,8 +34,24 @@ extern int usb_port_suspend(struct usb_device *dev);
 extern int usb_port_resume(struct usb_device *dev);
 
 extern struct bus_type usb_bus_type;
-extern struct device_driver usb_generic_driver;
-extern int usb_generic_driver_data;
+extern struct usb_device_driver usb_generic_driver;
+
+/* Here's how we tell apart devices and interfaces.  Luckily there's
+ * no such thing as a platform USB device, so we can steal the use
+ * of the platform_data field. */
+
+static inline int is_usb_device(struct device *dev)
+{
+	return dev->platform_data == &usb_generic_driver;
+}
+
+/* Do the same for device drivers and interface drivers. */
+
+static inline int is_usb_device_driver(struct device_driver *drv)
+{
+	return container_of(drv, struct usbdrv_wrap, driver)->
+			for_devices;
+}
 
 /* Interfaces and their "power state" are owned by usbcore */
 
diff --git a/include/linux/usb.h b/include/linux/usb.h
index d2bd0c8e0154..b4ccce6d0982 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -540,7 +540,17 @@ struct usb_dynids {
 };
 
 /**
- * struct usb_driver - identifies USB driver to usbcore
+ * struct usbdrv_wrap - wrapper for driver-model structure
+ * @driver: The driver-model core driver structure.
+ * @for_devices: Non-zero for device drivers, 0 for interface drivers.
+ */
+struct usbdrv_wrap {
+	struct device_driver driver;
+	int for_devices;
+};
+
+/**
+ * struct usb_driver - identifies USB interface driver to usbcore
  * @name: The driver name should be unique among USB drivers,
  *	and should normally be the same as the module name.
  * @probe: Called to see if the driver is willing to manage a particular
@@ -567,12 +577,12 @@ struct usb_dynids {
  *	or your driver's probe function will never get called.
  * @dynids: used internally to hold the list of dynamically added device
  *	ids for this driver.
- * @driver: the driver model core driver structure.
+ * @drvwrap: Driver-model core structure wrapper.
  * @no_dynamic_id: if set to 1, the USB core will not allow dynamic ids to be
  *	added to this driver by preventing the sysfs file from being created.
  *
- * USB drivers must provide a name, probe() and disconnect() methods,
- * and an id_table.  Other driver fields are optional.
+ * USB interface drivers must provide a name, probe() and disconnect()
+ * methods, and an id_table.  Other driver fields are optional.
  *
  * The id_table is used in hotplugging.  It holds a set of descriptors,
  * and specialized data may be associated with each entry.  That table
@@ -606,10 +616,40 @@ struct usb_driver {
 	const struct usb_device_id *id_table;
 
 	struct usb_dynids dynids;
-	struct device_driver driver;
+	struct usbdrv_wrap drvwrap;
 	unsigned int no_dynamic_id:1;
 };
-#define	to_usb_driver(d) container_of(d, struct usb_driver, driver)
+#define	to_usb_driver(d) container_of(d, struct usb_driver, drvwrap.driver)
+
+/**
+ * struct usb_device_driver - identifies USB device driver to usbcore
+ * @name: The driver name should be unique among USB drivers,
+ *	and should normally be the same as the module name.
+ * @probe: Called to see if the driver is willing to manage a particular
+ *	device.  If it is, probe returns zero and uses dev_set_drvdata()
+ *	to associate driver-specific data with the device.  If unwilling
+ *	to manage the device, return a negative errno value.
+ * @disconnect: Called when the device is no longer accessible, usually
+ *	because it has been (or is being) disconnected or the driver's
+ *	module is being unloaded.
+ * @suspend: Called when the device is going to be suspended by the system.
+ * @resume: Called when the device is being resumed by the system.
+ * @drvwrap: Driver-model core structure wrapper.
+ *
+ * USB drivers must provide all the fields listed above except drvwrap.
+ */
+struct usb_device_driver {
+	const char *name;
+
+	int (*probe) (struct usb_device *udev);
+	void (*disconnect) (struct usb_device *udev);
+
+	int (*suspend) (struct usb_device *udev, pm_message_t message);
+	int (*resume) (struct usb_device *udev);
+	struct usbdrv_wrap drvwrap;
+};
+#define	to_usb_device_driver(d) container_of(d, struct usb_device_driver, \
+		drvwrap.driver)
 
 extern struct bus_type usb_bus_type;
 
@@ -633,13 +673,17 @@ struct usb_class_driver {
  * use these in module_init()/module_exit()
  * and don't forget MODULE_DEVICE_TABLE(usb, ...)
  */
-int usb_register_driver(struct usb_driver *, struct module *);
+extern int usb_register_driver(struct usb_driver *, struct module *);
 static inline int usb_register(struct usb_driver *driver)
 {
 	return usb_register_driver(driver, THIS_MODULE);
 }
 extern void usb_deregister(struct usb_driver *);
 
+extern int usb_register_device_driver(struct usb_device_driver *,
+			struct module *);
+extern void usb_deregister_device_driver(struct usb_device_driver *);
+
 extern int usb_register_dev(struct usb_interface *intf,
 			    struct usb_class_driver *class_driver);
 extern void usb_deregister_dev(struct usb_interface *intf,
-- 
cgit v1.2.3


From 4d064c080265a41324d108fccc26b72106d43db3 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Sat, 1 Jul 2006 22:11:44 -0400
Subject: usbcore: track whether interfaces are suspended

Currently we rely on intf->dev.power.power_state.event for tracking
whether intf is suspended.  This is not a reliable technique because
that value is owned by the PM core, not by usbcore.  This patch (as718b)
adds a new flag so that we can accurately tell which interfaces are
suspended and which aren't.

At first one might think these flags aren't needed, since interfaces
will be suspended along with their devices.  It turns out there are a
couple of intermediate situations where that's not quite true, such as
while processing a remote-wakeup request.


Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/usb.h | 6 +++---
 include/linux/usb.h    | 3 +++
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index cc42972b6bb0..74df0db954c9 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -59,17 +59,17 @@ static inline int is_usb_device_driver(struct device_driver *drv)
 
 static inline void mark_active(struct usb_interface *f)
 {
-	f->dev.power.power_state.event = PM_EVENT_ON;
+	f->is_active = 1;
 }
 
 static inline void mark_quiesced(struct usb_interface *f)
 {
-	f->dev.power.power_state.event = PM_EVENT_FREEZE;
+	f->is_active = 0;
 }
 
 static inline int is_active(struct usb_interface *f)
 {
-	return f->dev.power.power_state.event == PM_EVENT_ON;
+	return f->is_active;
 }
 
 
diff --git a/include/linux/usb.h b/include/linux/usb.h
index b4ccce6d0982..e22f4b386605 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -102,6 +102,7 @@ enum usb_interface_condition {
  *	number from the USB core by calling usb_register_dev().
  * @condition: binding state of the interface: not bound, binding
  *	(in probe()), bound to a driver, or unbinding (in disconnect())
+ * @is_active: flag set when the interface is bound and not suspended.
  * @dev: driver model's view of this device
  * @class_dev: driver model's class view of this device.
  *
@@ -142,6 +143,8 @@ struct usb_interface {
 	int minor;			/* minor number this interface is
 					 * bound to */
 	enum usb_interface_condition condition;		/* state of binding */
+	unsigned is_active:1;		/* the interface is not suspended */
+
 	struct device dev;		/* interface specific device info */
 	struct class_device *class_dev;
 };
-- 
cgit v1.2.3


From f2ebf92c9e1930a8f79b7eb49a32122931929014 Mon Sep 17 00:00:00 2001
From: Ben Williamson <ben.williamson@greyinnovation.com>
Date: Tue, 1 Aug 2006 11:28:16 +1000
Subject: USB: gmidi: New USB MIDI Gadget class driver.

This driver is glue between the USB gadget interface
and the ALSA MIDI interface. It allows us to appear
as a MIDI Streaming device to a host system on the
other end of a USB cable.

This includes linux/usb/audio.h and linux/usb/midi.h
containing definitions from the relevant USB specifications
for USB audio and USB MIDI devices.

The following changes have been made since the first RFC
posting:

* Bug fixes to endpoint handling.
* Workaround for USB_REQ_SET_CONFIGURATION handling,
  not understood yet.
* Added SND and SND_RAWMIDI dependencies in Kconfig.
* Moved usb_audio.h and usb_midi.h to usb/*.h
* Added module parameters for ALSA card index and id.
* Added module parameters for USB descriptor IDs and strings.
* Removed some unneeded stuff inherited from zero.c, more to go.
* Provide DECLARE_* macros for the variable-length structs.
* Use kmalloc instead of usb_ep_alloc_buffer.
* Limit source to 80 columns.
* Return actual error code instead of -ENOMEM in a few places.

Signed-off-by: Ben Williamson <ben.williamson@greyinnovation.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/Kconfig  |   14 +
 drivers/usb/gadget/Makefile |    2 +
 drivers/usb/gadget/gmidi.c  | 1337 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/audio.h   |   53 ++
 include/linux/usb/midi.h    |  112 ++++
 5 files changed, 1518 insertions(+)
 create mode 100644 drivers/usb/gadget/gmidi.c
 create mode 100644 include/linux/usb/audio.h
 create mode 100644 include/linux/usb/midi.h

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 1a32d96774b4..4301e96c417b 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -404,6 +404,20 @@ config USB_G_SERIAL
 	  which includes instructions and a "driver info file" needed to
 	  make MS-Windows work with this driver.
 
+config USB_MIDI_GADGET
+	tristate "MIDI Gadget (EXPERIMENTAL)"
+	depends on SND && EXPERIMENTAL
+	select SND_RAWMIDI
+	help
+	  The MIDI Gadget acts as a USB Audio device, with one MIDI
+	  input and one MIDI output. These MIDI jacks appear as
+	  a sound "card" in the ALSA sound system. Other MIDI
+	  connections can then be made on the gadget system, using
+	  ALSA's aconnect utility etc.
+
+	  Say "y" to link the driver statically, or "m" to build a
+	  dynamically linked module called "g_midi".
+
 
 # put drivers that need isochronous transfer support (for audio
 # or video class gadget drivers), or specific hardware, here.
diff --git a/drivers/usb/gadget/Makefile b/drivers/usb/gadget/Makefile
index 5a28e61392ec..e71e086a1cfa 100644
--- a/drivers/usb/gadget/Makefile
+++ b/drivers/usb/gadget/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_USB_AT91)		+= at91_udc.o
 g_zero-objs			:= zero.o usbstring.o config.o epautoconf.o
 g_ether-objs			:= ether.o usbstring.o config.o epautoconf.o
 g_serial-objs			:= serial.o usbstring.o config.o epautoconf.o
+g_midi-objs			:= gmidi.o usbstring.o config.o epautoconf.o
 gadgetfs-objs			:= inode.o
 g_file_storage-objs		:= file_storage.o usbstring.o config.o \
 					epautoconf.o
@@ -28,4 +29,5 @@ obj-$(CONFIG_USB_ETH)		+= g_ether.o
 obj-$(CONFIG_USB_GADGETFS)	+= gadgetfs.o
 obj-$(CONFIG_USB_FILE_STORAGE)	+= g_file_storage.o
 obj-$(CONFIG_USB_G_SERIAL)	+= g_serial.o
+obj-$(CONFIG_USB_MIDI_GADGET)	+= g_midi.o
 
diff --git a/drivers/usb/gadget/gmidi.c b/drivers/usb/gadget/gmidi.c
new file mode 100644
index 000000000000..b68cecd57411
--- /dev/null
+++ b/drivers/usb/gadget/gmidi.c
@@ -0,0 +1,1337 @@
+/*
+ * gmidi.c -- USB MIDI Gadget Driver
+ *
+ * Copyright (C) 2006 Thumtronics Pty Ltd.
+ * Developed for Thumtronics by Grey Innovation
+ * Ben Williamson <ben.williamson@greyinnovation.com>
+ *
+ * This software is distributed under the terms of the GNU General Public
+ * License ("GPL") version 2, as published by the Free Software Foundation.
+ *
+ * This code is based in part on:
+ *
+ * Gadget Zero driver, Copyright (C) 2003-2004 David Brownell.
+ * USB Audio driver, Copyright (C) 2002 by Takashi Iwai.
+ * USB MIDI driver, Copyright (C) 2002-2005 Clemens Ladisch.
+ *
+ * Refer to the USB Device Class Definition for MIDI Devices:
+ * http://www.usb.org/developers/devclass_docs/midi10.pdf
+ */
+
+#define DEBUG 1
+// #define VERBOSE
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/utsname.h>
+#include <linux/device.h>
+#include <linux/moduleparam.h>
+
+#include <sound/driver.h>
+#include <sound/core.h>
+#include <sound/initval.h>
+#include <sound/rawmidi.h>
+
+#include <linux/usb_ch9.h>
+#include <linux/usb_gadget.h>
+#include <linux/usb/audio.h>
+#include <linux/usb/midi.h>
+
+#include "gadget_chips.h"
+
+MODULE_AUTHOR("Ben Williamson");
+MODULE_LICENSE("GPL v2");
+
+#define DRIVER_VERSION "25 Jul 2006"
+
+static const char shortname[] = "g_midi";
+static const char longname[] = "MIDI Gadget";
+
+static int index = SNDRV_DEFAULT_IDX1;
+static char *id = SNDRV_DEFAULT_STR1;
+
+module_param(index, int, 0444);
+MODULE_PARM_DESC(index, "Index value for the USB MIDI Gadget adapter.");
+module_param(id, charp, 0444);
+MODULE_PARM_DESC(id, "ID string for the USB MIDI Gadget adapter.");
+
+/* Some systems will want different product identifers published in the
+ * device descriptor, either numbers or strings or both.  These string
+ * parameters are in UTF-8 (superset of ASCII's 7 bit characters).
+ */
+
+static ushort idVendor;
+module_param(idVendor, ushort, S_IRUGO);
+MODULE_PARM_DESC(idVendor, "USB Vendor ID");
+
+static ushort idProduct;
+module_param(idProduct, ushort, S_IRUGO);
+MODULE_PARM_DESC(idProduct, "USB Product ID");
+
+static ushort bcdDevice;
+module_param(bcdDevice, ushort, S_IRUGO);
+MODULE_PARM_DESC(bcdDevice, "USB Device version (BCD)");
+
+static char *iManufacturer;
+module_param(iManufacturer, charp, S_IRUGO);
+MODULE_PARM_DESC(iManufacturer, "USB Manufacturer string");
+
+static char *iProduct;
+module_param(iProduct, charp, S_IRUGO);
+MODULE_PARM_DESC(iProduct, "USB Product string");
+
+static char *iSerialNumber;
+module_param(iSerialNumber, charp, S_IRUGO);
+MODULE_PARM_DESC(iSerialNumber, "SerialNumber");
+
+/*
+ * this version autoconfigures as much as possible,
+ * which is reasonable for most "bulk-only" drivers.
+ */
+static const char *EP_IN_NAME;
+static const char *EP_OUT_NAME;
+
+
+/* big enough to hold our biggest descriptor */
+#define USB_BUFSIZ 256
+
+
+/* This is a gadget, and the IN/OUT naming is from the host's perspective.
+   USB -> OUT endpoint -> rawmidi
+   USB <- IN endpoint  <- rawmidi */
+struct gmidi_in_port {
+	struct gmidi_device* dev;
+	int active;
+	uint8_t cable;		/* cable number << 4 */
+	uint8_t state;
+#define STATE_UNKNOWN	0
+#define STATE_1PARAM	1
+#define STATE_2PARAM_1	2
+#define STATE_2PARAM_2	3
+#define STATE_SYSEX_0	4
+#define STATE_SYSEX_1	5
+#define STATE_SYSEX_2	6
+	uint8_t data[2];
+};
+
+struct gmidi_device {
+	spinlock_t		lock;
+	struct usb_gadget	*gadget;
+	struct usb_request	*req;		/* for control responses */
+	u8			config;
+	struct usb_ep		*in_ep, *out_ep;
+	struct snd_card 	*card;
+	struct snd_rawmidi	*rmidi;
+	struct snd_rawmidi_substream *in_substream;
+	struct snd_rawmidi_substream *out_substream;
+
+	/* For the moment we only support one port in
+	   each direction, but in_port is kept as a
+	   separate struct so we can have more later. */
+	struct gmidi_in_port	in_port;
+	unsigned long		out_triggered;
+	struct tasklet_struct	tasklet;
+};
+
+static void gmidi_transmit(struct gmidi_device* dev, struct usb_request* req);
+
+
+#define xprintk(d,level,fmt,args...) \
+	dev_printk(level , &(d)->gadget->dev , fmt , ## args)
+
+#ifdef DEBUG
+#define DBG(dev,fmt,args...) \
+	xprintk(dev , KERN_DEBUG , fmt , ## args)
+#else
+#define DBG(dev,fmt,args...) \
+	do { } while (0)
+#endif /* DEBUG */
+
+#ifdef VERBOSE
+#define VDBG	DBG
+#else
+#define VDBG(dev,fmt,args...) \
+	do { } while (0)
+#endif /* VERBOSE */
+
+#define ERROR(dev,fmt,args...) \
+	xprintk(dev , KERN_ERR , fmt , ## args)
+#define WARN(dev,fmt,args...) \
+	xprintk(dev , KERN_WARNING , fmt , ## args)
+#define INFO(dev,fmt,args...) \
+	xprintk(dev , KERN_INFO , fmt , ## args)
+
+
+static unsigned buflen = 256;
+static unsigned qlen = 32;
+
+module_param(buflen, uint, S_IRUGO);
+module_param(qlen, uint, S_IRUGO);
+
+
+/* Thanks to Grey Innovation for donating this product ID.
+ *
+ * DO NOT REUSE THESE IDs with a protocol-incompatible driver!!  Ever!!
+ * Instead:  allocate your own, using normal USB-IF procedures.
+ */
+#define DRIVER_VENDOR_NUM	0x17b3		/* Grey Innovation */
+#define DRIVER_PRODUCT_NUM	0x0004		/* Linux-USB "MIDI Gadget" */
+
+
+/*
+ * DESCRIPTORS ... most are static, but strings and (full)
+ * configuration descriptors are built on demand.
+ */
+
+#define STRING_MANUFACTURER	25
+#define STRING_PRODUCT		42
+#define STRING_SERIAL		101
+#define STRING_MIDI_GADGET	250
+
+/* We only have the one configuration, it's number 1. */
+#define	GMIDI_CONFIG		1
+
+/* We have two interfaces- AudioControl and MIDIStreaming */
+#define GMIDI_AC_INTERFACE	0
+#define GMIDI_MS_INTERFACE	1
+#define GMIDI_NUM_INTERFACES	2
+
+DECLARE_USB_AC_HEADER_DESCRIPTOR(1);
+DECLARE_USB_MIDI_OUT_JACK_DESCRIPTOR(1);
+DECLARE_USB_MS_ENDPOINT_DESCRIPTOR(1);
+
+/* B.1  Device Descriptor */
+static struct usb_device_descriptor device_desc = {
+	.bLength =		USB_DT_DEVICE_SIZE,
+	.bDescriptorType =	USB_DT_DEVICE,
+	.bcdUSB =		__constant_cpu_to_le16(0x0200),
+	.bDeviceClass =		USB_CLASS_PER_INTERFACE,
+	.idVendor =		__constant_cpu_to_le16(DRIVER_VENDOR_NUM),
+	.idProduct =		__constant_cpu_to_le16(DRIVER_PRODUCT_NUM),
+	.iManufacturer =	STRING_MANUFACTURER,
+	.iProduct =		STRING_PRODUCT,
+	.bNumConfigurations =	1,
+};
+
+/* B.2  Configuration Descriptor */
+static struct usb_config_descriptor config_desc = {
+	.bLength =		USB_DT_CONFIG_SIZE,
+	.bDescriptorType =	USB_DT_CONFIG,
+	/* compute wTotalLength on the fly */
+	.bNumInterfaces =	GMIDI_NUM_INTERFACES,
+	.bConfigurationValue =	GMIDI_CONFIG,
+	.iConfiguration =	STRING_MIDI_GADGET,
+	/*
+	 * FIXME: When embedding this driver in a device,
+	 * these need to be set to reflect the actual
+	 * power properties of the device. Is it selfpowered?
+	 */
+	.bmAttributes =		USB_CONFIG_ATT_ONE,
+	.bMaxPower =		1,
+};
+
+/* B.3.1  Standard AC Interface Descriptor */
+static const struct usb_interface_descriptor ac_interface_desc = {
+	.bLength =		USB_DT_INTERFACE_SIZE,
+	.bDescriptorType =	USB_DT_INTERFACE,
+	.bInterfaceNumber =	GMIDI_AC_INTERFACE,
+	.bNumEndpoints =	0,
+	.bInterfaceClass =	USB_CLASS_AUDIO,
+	.bInterfaceSubClass =	USB_SUBCLASS_AUDIOCONTROL,
+	.iInterface =		STRING_MIDI_GADGET,
+};
+
+/* B.3.2  Class-Specific AC Interface Descriptor */
+static const struct usb_ac_header_descriptor_1 ac_header_desc = {
+	.bLength =		USB_DT_AC_HEADER_SIZE(1),
+	.bDescriptorType =	USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype =	USB_MS_HEADER,
+	.bcdADC =		__constant_cpu_to_le16(0x0100),
+	.wTotalLength =		USB_DT_AC_HEADER_SIZE(1),
+	.bInCollection =	1,
+	.baInterfaceNr = {
+		[0] =		GMIDI_MS_INTERFACE,
+	}
+};
+
+/* B.4.1  Standard MS Interface Descriptor */
+static const struct usb_interface_descriptor ms_interface_desc = {
+	.bLength =		USB_DT_INTERFACE_SIZE,
+	.bDescriptorType =	USB_DT_INTERFACE,
+	.bInterfaceNumber =	GMIDI_MS_INTERFACE,
+	.bNumEndpoints =	2,
+	.bInterfaceClass =	USB_CLASS_AUDIO,
+	.bInterfaceSubClass =	USB_SUBCLASS_MIDISTREAMING,
+	.iInterface =		STRING_MIDI_GADGET,
+};
+
+/* B.4.2  Class-Specific MS Interface Descriptor */
+static const struct usb_ms_header_descriptor ms_header_desc = {
+	.bLength =		USB_DT_MS_HEADER_SIZE,
+	.bDescriptorType =	USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype =	USB_MS_HEADER,
+	.bcdMSC =		__constant_cpu_to_le16(0x0100),
+	.wTotalLength =		USB_DT_MS_HEADER_SIZE
+				+ 2*USB_DT_MIDI_IN_SIZE
+				+ 2*USB_DT_MIDI_OUT_SIZE(1),
+};
+
+#define JACK_IN_EMB	1
+#define JACK_IN_EXT	2
+#define JACK_OUT_EMB	3
+#define JACK_OUT_EXT	4
+
+/* B.4.3  MIDI IN Jack Descriptors */
+static const struct usb_midi_in_jack_descriptor jack_in_emb_desc = {
+	.bLength =		USB_DT_MIDI_IN_SIZE,
+	.bDescriptorType =	USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype =	USB_MS_MIDI_IN_JACK,
+	.bJackType =		USB_MS_EMBEDDED,
+	.bJackID =		JACK_IN_EMB,
+};
+
+static const struct usb_midi_in_jack_descriptor jack_in_ext_desc = {
+	.bLength =		USB_DT_MIDI_IN_SIZE,
+	.bDescriptorType =	USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype =	USB_MS_MIDI_IN_JACK,
+	.bJackType =		USB_MS_EXTERNAL,
+	.bJackID =		JACK_IN_EXT,
+};
+
+/* B.4.4  MIDI OUT Jack Descriptors */
+static const struct usb_midi_out_jack_descriptor_1 jack_out_emb_desc = {
+	.bLength =		USB_DT_MIDI_OUT_SIZE(1),
+	.bDescriptorType =	USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype =	USB_MS_MIDI_OUT_JACK,
+	.bJackType =		USB_MS_EMBEDDED,
+	.bJackID =		JACK_OUT_EMB,
+	.bNrInputPins =		1,
+	.pins = {
+		[0] = {
+			.baSourceID =	JACK_IN_EXT,
+			.baSourcePin =	1,
+		}
+	}
+};
+
+static const struct usb_midi_out_jack_descriptor_1 jack_out_ext_desc = {
+	.bLength =		USB_DT_MIDI_OUT_SIZE(1),
+	.bDescriptorType =	USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype =	USB_MS_MIDI_OUT_JACK,
+	.bJackType =		USB_MS_EXTERNAL,
+	.bJackID =		JACK_OUT_EXT,
+	.bNrInputPins =		1,
+	.pins = {
+		[0] = {
+			.baSourceID =	JACK_IN_EMB,
+			.baSourcePin =	1,
+		}
+	}
+};
+
+/* B.5.1  Standard Bulk OUT Endpoint Descriptor */
+static struct usb_endpoint_descriptor bulk_out_desc = {
+	.bLength =		USB_DT_ENDPOINT_AUDIO_SIZE,
+	.bDescriptorType =	USB_DT_ENDPOINT,
+	.bEndpointAddress =	USB_DIR_OUT,
+	.bmAttributes =		USB_ENDPOINT_XFER_BULK,
+};
+
+/* B.5.2  Class-specific MS Bulk OUT Endpoint Descriptor */
+static const struct usb_ms_endpoint_descriptor_1 ms_out_desc = {
+	.bLength =		USB_DT_MS_ENDPOINT_SIZE(1),
+	.bDescriptorType =	USB_DT_CS_ENDPOINT,
+	.bDescriptorSubtype =	USB_MS_GENERAL,
+	.bNumEmbMIDIJack =	1,
+	.baAssocJackID = {
+		[0] =		JACK_IN_EMB,
+	}
+};
+
+/* B.6.1  Standard Bulk IN Endpoint Descriptor */
+static struct usb_endpoint_descriptor bulk_in_desc = {
+	.bLength =		USB_DT_ENDPOINT_AUDIO_SIZE,
+	.bDescriptorType =	USB_DT_ENDPOINT,
+	.bEndpointAddress =	USB_DIR_IN,
+	.bmAttributes =		USB_ENDPOINT_XFER_BULK,
+};
+
+/* B.6.2  Class-specific MS Bulk IN Endpoint Descriptor */
+static const struct usb_ms_endpoint_descriptor_1 ms_in_desc = {
+	.bLength =		USB_DT_MS_ENDPOINT_SIZE(1),
+	.bDescriptorType =	USB_DT_CS_ENDPOINT,
+	.bDescriptorSubtype =	USB_MS_GENERAL,
+	.bNumEmbMIDIJack =	1,
+	.baAssocJackID = {
+		[0] =		JACK_OUT_EMB,
+	}
+};
+
+static const struct usb_descriptor_header *gmidi_function [] = {
+	(struct usb_descriptor_header *)&ac_interface_desc,
+	(struct usb_descriptor_header *)&ac_header_desc,
+	(struct usb_descriptor_header *)&ms_interface_desc,
+
+	(struct usb_descriptor_header *)&ms_header_desc,
+	(struct usb_descriptor_header *)&jack_in_emb_desc,
+	(struct usb_descriptor_header *)&jack_in_ext_desc,
+	(struct usb_descriptor_header *)&jack_out_emb_desc,
+	(struct usb_descriptor_header *)&jack_out_ext_desc,
+	/* If you add more jacks, update ms_header_desc.wTotalLength */
+
+	(struct usb_descriptor_header *)&bulk_out_desc,
+	(struct usb_descriptor_header *)&ms_out_desc,
+	(struct usb_descriptor_header *)&bulk_in_desc,
+	(struct usb_descriptor_header *)&ms_in_desc,
+	NULL,
+};
+
+static char manufacturer[50];
+static char product_desc[40] = "MIDI Gadget";
+static char serial_number[20];
+
+/* static strings, in UTF-8 */
+static struct usb_string strings [] = {
+	{ STRING_MANUFACTURER, manufacturer, },
+	{ STRING_PRODUCT, product_desc, },
+	{ STRING_SERIAL, serial_number, },
+	{ STRING_MIDI_GADGET, longname, },
+	{  }			/* end of list */
+};
+
+static struct usb_gadget_strings stringtab = {
+	.language	= 0x0409,	/* en-us */
+	.strings	= strings,
+};
+
+static int config_buf(struct usb_gadget *gadget,
+		u8 *buf, u8 type, unsigned index)
+{
+	int len;
+
+	/* only one configuration */
+	if (index != 0) {
+		return -EINVAL;
+	}
+	len = usb_gadget_config_buf(&config_desc,
+			buf, USB_BUFSIZ, gmidi_function);
+	if (len < 0) {
+		return len;
+	}
+	((struct usb_config_descriptor *)buf)->bDescriptorType = type;
+	return len;
+}
+
+static struct usb_request* alloc_ep_req(struct usb_ep *ep, unsigned length)
+{
+	struct usb_request	*req;
+
+	req = usb_ep_alloc_request(ep, GFP_ATOMIC);
+	if (req) {
+		req->length = length;
+		req->buf = kmalloc(length, GFP_ATOMIC);
+		if (!req->buf) {
+			usb_ep_free_request(ep, req);
+			req = NULL;
+		}
+	}
+	return req;
+}
+
+static void free_ep_req(struct usb_ep *ep, struct usb_request *req)
+{
+	kfree(req->buf);
+	usb_ep_free_request(ep, req);
+}
+
+static const uint8_t gmidi_cin_length[] = {
+	0, 0, 2, 3, 3, 1, 2, 3, 3, 3, 3, 3, 2, 2, 3, 1
+};
+
+/*
+ * Receives a chunk of MIDI data.
+ */
+static void gmidi_read_data(struct usb_ep *ep, int cable,
+				   uint8_t* data, int length)
+{
+	struct gmidi_device *dev = ep->driver_data;
+	/* cable is ignored, because for now we only have one. */
+
+	if (!dev->out_substream) {
+		/* Nobody is listening - throw it on the floor. */
+		return;
+	}
+	if (!test_bit(dev->out_substream->number, &dev->out_triggered)) {
+		return;
+	}
+	snd_rawmidi_receive(dev->out_substream, data, length);
+}
+
+static void gmidi_handle_out_data(struct usb_ep *ep, struct usb_request *req)
+{
+	unsigned i;
+	u8 *buf = req->buf;
+
+	for (i = 0; i + 3 < req->actual; i += 4) {
+		if (buf[i] != 0) {
+			int cable = buf[i] >> 4;
+			int length = gmidi_cin_length[buf[i] & 0x0f];
+			gmidi_read_data(ep, cable, &buf[i + 1], length);
+		}
+	}
+}
+
+static void gmidi_complete(struct usb_ep *ep, struct usb_request *req)
+{
+	struct gmidi_device *dev = ep->driver_data;
+	int status = req->status;
+
+	switch (status) {
+	case 0: 			/* normal completion */
+		if (ep == dev->out_ep) {
+			/* we received stuff.
+			   req is queued again, below */
+			gmidi_handle_out_data(ep, req);
+		} else if (ep == dev->in_ep) {
+			/* our transmit completed.
+			   see if there's more to go.
+			   gmidi_transmit eats req, don't queue it again. */
+			gmidi_transmit(dev, req);
+			return;
+		}
+		break;
+
+	/* this endpoint is normally active while we're configured */
+	case -ECONNABORTED: 		/* hardware forced ep reset */
+	case -ECONNRESET:		/* request dequeued */
+	case -ESHUTDOWN:		/* disconnect from host */
+		VDBG(dev, "%s gone (%d), %d/%d\n", ep->name, status,
+				req->actual, req->length);
+		if (ep == dev->out_ep) {
+			gmidi_handle_out_data(ep, req);
+		}
+		free_ep_req(ep, req);
+		return;
+
+	case -EOVERFLOW:		/* buffer overrun on read means that
+					 * we didn't provide a big enough
+					 * buffer.
+					 */
+	default:
+		DBG(dev, "%s complete --> %d, %d/%d\n", ep->name,
+				status, req->actual, req->length);
+		break;
+	case -EREMOTEIO:		/* short read */
+		break;
+	}
+
+	status = usb_ep_queue(ep, req, GFP_ATOMIC);
+	if (status) {
+		ERROR(dev, "kill %s:  resubmit %d bytes --> %d\n",
+				ep->name, req->length, status);
+		usb_ep_set_halt(ep);
+		/* FIXME recover later ... somehow */
+	}
+}
+
+static int set_gmidi_config(struct gmidi_device *dev, gfp_t gfp_flags)
+{
+	int err = 0;
+	struct usb_request *req;
+	struct usb_ep* ep;
+	unsigned i;
+
+	err = usb_ep_enable(dev->in_ep, &bulk_in_desc);
+	if (err) {
+		ERROR(dev, "can't start %s: %d\n", dev->in_ep->name, err);
+		goto fail;
+	}
+	dev->in_ep->driver_data = dev;
+
+	err = usb_ep_enable(dev->out_ep, &bulk_out_desc);
+	if (err) {
+		ERROR(dev, "can't start %s: %d\n", dev->out_ep->name, err);
+		goto fail;
+	}
+	dev->out_ep->driver_data = dev;
+
+	/* allocate a bunch of read buffers and queue them all at once. */
+	ep = dev->out_ep;
+	for (i = 0; i < qlen && err == 0; i++) {
+		req = alloc_ep_req(ep, buflen);
+		if (req) {
+			req->complete = gmidi_complete;
+			err = usb_ep_queue(ep, req, GFP_ATOMIC);
+			if (err) {
+				DBG(dev, "%s queue req: %d\n", ep->name, err);
+			}
+		} else {
+			err = -ENOMEM;
+		}
+	}
+fail:
+	/* caller is responsible for cleanup on error */
+	return err;
+}
+
+
+static void gmidi_reset_config(struct gmidi_device *dev)
+{
+	if (dev->config == 0) {
+		return;
+	}
+
+	DBG(dev, "reset config\n");
+
+	/* just disable endpoints, forcing completion of pending i/o.
+	 * all our completion handlers free their requests in this case.
+	 */
+	usb_ep_disable(dev->in_ep);
+	usb_ep_disable(dev->out_ep);
+	dev->config = 0;
+}
+
+/* change our operational config.  this code must agree with the code
+ * that returns config descriptors, and altsetting code.
+ *
+ * it's also responsible for power management interactions. some
+ * configurations might not work with our current power sources.
+ *
+ * note that some device controller hardware will constrain what this
+ * code can do, perhaps by disallowing more than one configuration or
+ * by limiting configuration choices (like the pxa2xx).
+ */
+static int
+gmidi_set_config(struct gmidi_device *dev, unsigned number, gfp_t gfp_flags)
+{
+	int result = 0;
+	struct usb_gadget *gadget = dev->gadget;
+
+#if 0
+	/* FIXME */
+	/* Hacking this bit out fixes a bug where on receipt of two
+	   USB_REQ_SET_CONFIGURATION messages, we end up with no
+	   buffered OUT requests waiting for data. This is clearly
+	   hiding a bug elsewhere, because if the config didn't
+	   change then we really shouldn't do anything. */
+	/* Having said that, when we do "change" from config 1
+	   to config 1, we at least gmidi_reset_config() which
+	   clears out any requests on endpoints, so it's not like
+	   we leak or anything. */
+	if (number == dev->config) {
+		return 0;
+	}
+#endif
+
+	if (gadget_is_sa1100(gadget) && dev->config) {
+		/* tx fifo is full, but we can't clear it...*/
+		INFO(dev, "can't change configurations\n");
+		return -ESPIPE;
+	}
+	gmidi_reset_config(dev);
+
+	switch (number) {
+	case GMIDI_CONFIG:
+		result = set_gmidi_config(dev, gfp_flags);
+		break;
+	default:
+		result = -EINVAL;
+		/* FALL THROUGH */
+	case 0:
+		return result;
+	}
+
+	if (!result && (!dev->in_ep || !dev->out_ep)) {
+		result = -ENODEV;
+	}
+	if (result) {
+		gmidi_reset_config(dev);
+	} else {
+		char *speed;
+
+		switch (gadget->speed) {
+		case USB_SPEED_LOW:	speed = "low"; break;
+		case USB_SPEED_FULL:	speed = "full"; break;
+		case USB_SPEED_HIGH:	speed = "high"; break;
+		default: 		speed = "?"; break;
+		}
+
+		dev->config = number;
+		INFO(dev, "%s speed\n", speed);
+	}
+	return result;
+}
+
+
+static void gmidi_setup_complete(struct usb_ep *ep, struct usb_request *req)
+{
+	if (req->status || req->actual != req->length) {
+		DBG((struct gmidi_device *) ep->driver_data,
+				"setup complete --> %d, %d/%d\n",
+				req->status, req->actual, req->length);
+	}
+}
+
+/*
+ * The setup() callback implements all the ep0 functionality that's
+ * not handled lower down, in hardware or the hardware driver (like
+ * device and endpoint feature flags, and their status).  It's all
+ * housekeeping for the gadget function we're implementing.  Most of
+ * the work is in config-specific setup.
+ */
+static int gmidi_setup(struct usb_gadget *gadget,
+			const struct usb_ctrlrequest *ctrl)
+{
+	struct gmidi_device *dev = get_gadget_data(gadget);
+	struct usb_request *req = dev->req;
+	int value = -EOPNOTSUPP;
+	u16 w_index = le16_to_cpu(ctrl->wIndex);
+	u16 w_value = le16_to_cpu(ctrl->wValue);
+	u16 w_length = le16_to_cpu(ctrl->wLength);
+
+	/* usually this stores reply data in the pre-allocated ep0 buffer,
+	 * but config change events will reconfigure hardware.
+	 */
+	req->zero = 0;
+	switch (ctrl->bRequest) {
+
+	case USB_REQ_GET_DESCRIPTOR:
+		if (ctrl->bRequestType != USB_DIR_IN) {
+			goto unknown;
+		}
+		switch (w_value >> 8) {
+
+		case USB_DT_DEVICE:
+			value = min(w_length, (u16) sizeof(device_desc));
+			memcpy(req->buf, &device_desc, value);
+			break;
+		case USB_DT_CONFIG:
+			value = config_buf(gadget, req->buf,
+					w_value >> 8,
+					w_value & 0xff);
+			if (value >= 0) {
+				value = min(w_length, (u16)value);
+			}
+			break;
+
+		case USB_DT_STRING:
+			/* wIndex == language code.
+			 * this driver only handles one language, you can
+			 * add string tables for other languages, using
+			 * any UTF-8 characters
+			 */
+			value = usb_gadget_get_string(&stringtab,
+					w_value & 0xff, req->buf);
+			if (value >= 0) {
+				value = min(w_length, (u16)value);
+			}
+			break;
+		}
+		break;
+
+	/* currently two configs, two speeds */
+	case USB_REQ_SET_CONFIGURATION:
+		if (ctrl->bRequestType != 0) {
+			goto unknown;
+		}
+		if (gadget->a_hnp_support) {
+			DBG(dev, "HNP available\n");
+		} else if (gadget->a_alt_hnp_support) {
+			DBG(dev, "HNP needs a different root port\n");
+		} else {
+			VDBG(dev, "HNP inactive\n");
+		}
+		spin_lock(&dev->lock);
+		value = gmidi_set_config(dev, w_value, GFP_ATOMIC);
+		spin_unlock(&dev->lock);
+		break;
+	case USB_REQ_GET_CONFIGURATION:
+		if (ctrl->bRequestType != USB_DIR_IN) {
+			goto unknown;
+		}
+		*(u8 *)req->buf = dev->config;
+		value = min(w_length, (u16)1);
+		break;
+
+	/* until we add altsetting support, or other interfaces,
+	 * only 0/0 are possible.  pxa2xx only supports 0/0 (poorly)
+	 * and already killed pending endpoint I/O.
+	 */
+	case USB_REQ_SET_INTERFACE:
+		if (ctrl->bRequestType != USB_RECIP_INTERFACE) {
+			goto unknown;
+		}
+		spin_lock(&dev->lock);
+		if (dev->config && w_index < GMIDI_NUM_INTERFACES
+			&& w_value == 0)
+		{
+			u8 config = dev->config;
+
+			/* resets interface configuration, forgets about
+			 * previous transaction state (queued bufs, etc)
+			 * and re-inits endpoint state (toggle etc)
+			 * no response queued, just zero status == success.
+			 * if we had more than one interface we couldn't
+			 * use this "reset the config" shortcut.
+			 */
+			gmidi_reset_config(dev);
+			gmidi_set_config(dev, config, GFP_ATOMIC);
+			value = 0;
+		}
+		spin_unlock(&dev->lock);
+		break;
+	case USB_REQ_GET_INTERFACE:
+		if (ctrl->bRequestType != (USB_DIR_IN|USB_RECIP_INTERFACE)) {
+			goto unknown;
+		}
+		if (!dev->config) {
+			break;
+		}
+		if (w_index >= GMIDI_NUM_INTERFACES) {
+			value = -EDOM;
+			break;
+		}
+		*(u8 *)req->buf = 0;
+		value = min(w_length, (u16)1);
+		break;
+
+	default:
+unknown:
+		VDBG(dev, "unknown control req%02x.%02x v%04x i%04x l%d\n",
+			ctrl->bRequestType, ctrl->bRequest,
+			w_value, w_index, w_length);
+	}
+
+	/* respond with data transfer before status phase? */
+	if (value >= 0) {
+		req->length = value;
+		req->zero = value < w_length;
+		value = usb_ep_queue(gadget->ep0, req, GFP_ATOMIC);
+		if (value < 0) {
+			DBG(dev, "ep_queue --> %d\n", value);
+			req->status = 0;
+			gmidi_setup_complete(gadget->ep0, req);
+		}
+	}
+
+	/* device either stalls (value < 0) or reports success */
+	return value;
+}
+
+static void gmidi_disconnect(struct usb_gadget *gadget)
+{
+	struct gmidi_device *dev = get_gadget_data(gadget);
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	gmidi_reset_config(dev);
+
+	/* a more significant application might have some non-usb
+	 * activities to quiesce here, saving resources like power
+	 * or pushing the notification up a network stack.
+	 */
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	/* next we may get setup() calls to enumerate new connections;
+	 * or an unbind() during shutdown (including removing module).
+	 */
+}
+
+static void /* __init_or_exit */ gmidi_unbind(struct usb_gadget *gadget)
+{
+	struct gmidi_device *dev = get_gadget_data(gadget);
+	struct snd_card* card;
+
+	DBG(dev, "unbind\n");
+
+	card = dev->card;
+	dev->card = NULL;
+	if (card) {
+		snd_card_free(card);
+	}
+
+	/* we've already been disconnected ... no i/o is active */
+	if (dev->req) {
+		dev->req->length = USB_BUFSIZ;
+		free_ep_req(gadget->ep0, dev->req);
+	}
+	kfree(dev);
+	set_gadget_data(gadget, NULL);
+}
+
+static int gmidi_snd_free(struct snd_device *device)
+{
+	return 0;
+}
+
+static void gmidi_transmit_packet(struct usb_request* req, uint8_t p0,
+					uint8_t p1, uint8_t p2, uint8_t p3)
+{
+	unsigned length = req->length;
+
+	uint8_t* buf = (uint8_t*)req->buf + length;
+	buf[0] = p0;
+	buf[1] = p1;
+	buf[2] = p2;
+	buf[3] = p3;
+	req->length = length + 4;
+}
+
+/*
+ * Converts MIDI commands to USB MIDI packets.
+ */
+static void gmidi_transmit_byte(struct usb_request* req,
+				struct gmidi_in_port* port, uint8_t b)
+{
+	uint8_t p0 = port->cable;
+
+	if (b >= 0xf8) {
+		gmidi_transmit_packet(req, p0 | 0x0f, b, 0, 0);
+	} else if (b >= 0xf0) {
+		switch (b) {
+		case 0xf0:
+			port->data[0] = b;
+			port->state = STATE_SYSEX_1;
+			break;
+		case 0xf1:
+		case 0xf3:
+			port->data[0] = b;
+			port->state = STATE_1PARAM;
+			break;
+		case 0xf2:
+			port->data[0] = b;
+			port->state = STATE_2PARAM_1;
+			break;
+		case 0xf4:
+		case 0xf5:
+			port->state = STATE_UNKNOWN;
+			break;
+		case 0xf6:
+			gmidi_transmit_packet(req, p0 | 0x05, 0xf6, 0, 0);
+			port->state = STATE_UNKNOWN;
+			break;
+		case 0xf7:
+			switch (port->state) {
+			case STATE_SYSEX_0:
+				gmidi_transmit_packet(req,
+					p0 | 0x05, 0xf7, 0, 0);
+				break;
+			case STATE_SYSEX_1:
+				gmidi_transmit_packet(req,
+					p0 | 0x06, port->data[0], 0xf7, 0);
+				break;
+			case STATE_SYSEX_2:
+				gmidi_transmit_packet(req,
+					p0 | 0x07, port->data[0],
+					port->data[1], 0xf7);
+				break;
+			}
+			port->state = STATE_UNKNOWN;
+			break;
+		}
+	} else if (b >= 0x80) {
+		port->data[0] = b;
+		if (b >= 0xc0 && b <= 0xdf)
+			port->state = STATE_1PARAM;
+		else
+			port->state = STATE_2PARAM_1;
+	} else { /* b < 0x80 */
+		switch (port->state) {
+		case STATE_1PARAM:
+			if (port->data[0] < 0xf0) {
+				p0 |= port->data[0] >> 4;
+			} else {
+				p0 |= 0x02;
+				port->state = STATE_UNKNOWN;
+			}
+			gmidi_transmit_packet(req, p0, port->data[0], b, 0);
+			break;
+		case STATE_2PARAM_1:
+			port->data[1] = b;
+			port->state = STATE_2PARAM_2;
+			break;
+		case STATE_2PARAM_2:
+			if (port->data[0] < 0xf0) {
+				p0 |= port->data[0] >> 4;
+				port->state = STATE_2PARAM_1;
+			} else {
+				p0 |= 0x03;
+				port->state = STATE_UNKNOWN;
+			}
+			gmidi_transmit_packet(req,
+				p0, port->data[0], port->data[1], b);
+			break;
+		case STATE_SYSEX_0:
+			port->data[0] = b;
+			port->state = STATE_SYSEX_1;
+			break;
+		case STATE_SYSEX_1:
+			port->data[1] = b;
+			port->state = STATE_SYSEX_2;
+			break;
+		case STATE_SYSEX_2:
+			gmidi_transmit_packet(req,
+				p0 | 0x04, port->data[0], port->data[1], b);
+			port->state = STATE_SYSEX_0;
+			break;
+		}
+	}
+}
+
+static void gmidi_transmit(struct gmidi_device* dev, struct usb_request* req)
+{
+	struct usb_ep* ep = dev->in_ep;
+	struct gmidi_in_port* port = &dev->in_port;
+
+	if (!ep) {
+		return;
+	}
+	if (!req) {
+		req = alloc_ep_req(ep, buflen);
+	}
+	if (!req) {
+		ERROR(dev, "gmidi_transmit: alloc_ep_request failed\n");
+		return;
+	}
+	req->length = 0;
+	req->complete = gmidi_complete;
+
+	if (port->active) {
+		while (req->length + 3 < buflen) {
+			uint8_t b;
+			if (snd_rawmidi_transmit(dev->in_substream, &b, 1)
+				!= 1)
+			{
+				port->active = 0;
+				break;
+			}
+			gmidi_transmit_byte(req, port, b);
+		}
+	}
+	if (req->length > 0) {
+		usb_ep_queue(ep, req, GFP_ATOMIC);
+	} else {
+		free_ep_req(ep, req);
+	}
+}
+
+static void gmidi_in_tasklet(unsigned long data)
+{
+	struct gmidi_device* dev = (struct gmidi_device*)data;
+
+	gmidi_transmit(dev, NULL);
+}
+
+static int gmidi_in_open(struct snd_rawmidi_substream *substream)
+{
+	struct gmidi_device* dev = substream->rmidi->private_data;
+
+	VDBG(dev, "gmidi_in_open\n");
+	dev->in_substream = substream;
+	dev->in_port.state = STATE_UNKNOWN;
+	return 0;
+}
+
+static int gmidi_in_close(struct snd_rawmidi_substream *substream)
+{
+	VDBG(dev, "gmidi_in_close\n");
+	return 0;
+}
+
+static void gmidi_in_trigger(struct snd_rawmidi_substream *substream, int up)
+{
+	struct gmidi_device* dev = substream->rmidi->private_data;
+
+	VDBG(dev, "gmidi_in_trigger %d\n", up);
+	dev->in_port.active = up;
+	if (up) {
+		tasklet_hi_schedule(&dev->tasklet);
+	}
+}
+
+static int gmidi_out_open(struct snd_rawmidi_substream *substream)
+{
+	struct gmidi_device* dev = substream->rmidi->private_data;
+
+	VDBG(dev, "gmidi_out_open\n");
+	dev->out_substream = substream;
+	return 0;
+}
+
+static int gmidi_out_close(struct snd_rawmidi_substream *substream)
+{
+	VDBG(dev, "gmidi_out_close\n");
+	return 0;
+}
+
+static void gmidi_out_trigger(struct snd_rawmidi_substream *substream, int up)
+{
+	struct gmidi_device* dev = substream->rmidi->private_data;
+
+	VDBG(dev, "gmidi_out_trigger %d\n", up);
+	if (up) {
+		set_bit(substream->number, &dev->out_triggered);
+	} else {
+		clear_bit(substream->number, &dev->out_triggered);
+	}
+}
+
+static struct snd_rawmidi_ops gmidi_in_ops = {
+	.open = gmidi_in_open,
+	.close = gmidi_in_close,
+	.trigger = gmidi_in_trigger,
+};
+
+static struct snd_rawmidi_ops gmidi_out_ops = {
+	.open = gmidi_out_open,
+	.close = gmidi_out_close,
+	.trigger = gmidi_out_trigger
+};
+
+/* register as a sound "card" */
+static int gmidi_register_card(struct gmidi_device *dev)
+{
+	struct snd_card *card;
+	struct snd_rawmidi *rmidi;
+	int err;
+	int out_ports = 1;
+	int in_ports = 1;
+	static struct snd_device_ops ops = {
+		.dev_free = gmidi_snd_free,
+	};
+
+	card = snd_card_new(index, id, THIS_MODULE, 0);
+	if (!card) {
+		ERROR(dev, "snd_card_new failed\n");
+		err = -ENOMEM;
+		goto fail;
+	}
+	dev->card = card;
+
+	err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, dev, &ops);
+	if (err < 0) {
+		ERROR(dev, "snd_device_new failed: error %d\n", err);
+		goto fail;
+	}
+
+	strcpy(card->driver, longname);
+	strcpy(card->longname, longname);
+	strcpy(card->shortname, shortname);
+
+	/* Set up rawmidi */
+	dev->in_port.dev = dev;
+	dev->in_port.active = 0;
+	snd_component_add(card, "MIDI");
+	err = snd_rawmidi_new(card, "USB MIDI Gadget", 0,
+			      out_ports, in_ports, &rmidi);
+	if (err < 0) {
+		ERROR(dev, "snd_rawmidi_new failed: error %d\n", err);
+		goto fail;
+	}
+	dev->rmidi = rmidi;
+	strcpy(rmidi->name, card->shortname);
+	rmidi->info_flags = SNDRV_RAWMIDI_INFO_OUTPUT |
+			    SNDRV_RAWMIDI_INFO_INPUT |
+			    SNDRV_RAWMIDI_INFO_DUPLEX;
+	rmidi->private_data = dev;
+
+	/* Yes, rawmidi OUTPUT = USB IN, and rawmidi INPUT = USB OUT.
+	   It's an upside-down world being a gadget. */
+	snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT, &gmidi_in_ops);
+	snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_INPUT, &gmidi_out_ops);
+
+	snd_card_set_dev(card, &dev->gadget->dev);
+
+	/* register it - we're ready to go */
+	err = snd_card_register(card);
+	if (err < 0) {
+		ERROR(dev, "snd_card_register failed\n");
+		goto fail;
+	}
+
+	VDBG(dev, "gmidi_register_card finished ok\n");
+	return 0;
+
+fail:
+	if (dev->card) {
+		snd_card_free(dev->card);
+		dev->card = NULL;
+	}
+	return err;
+}
+
+/*
+ * Creates an output endpoint, and initializes output ports.
+ */
+static int __devinit gmidi_bind(struct usb_gadget *gadget)
+{
+	struct gmidi_device *dev;
+	struct usb_ep *in_ep, *out_ep;
+	int gcnum, err = 0;
+
+	/* support optional vendor/distro customization */
+	if (idVendor) {
+		if (!idProduct) {
+			printk(KERN_ERR "idVendor needs idProduct!\n");
+			return -ENODEV;
+		}
+		device_desc.idVendor = cpu_to_le16(idVendor);
+		device_desc.idProduct = cpu_to_le16(idProduct);
+		if (bcdDevice) {
+			device_desc.bcdDevice = cpu_to_le16(bcdDevice);
+		}
+	}
+	if (iManufacturer) {
+		strlcpy(manufacturer, iManufacturer, sizeof(manufacturer));
+	} else {
+		snprintf(manufacturer, sizeof(manufacturer), "%s %s with %s",
+			system_utsname.sysname, system_utsname.release,
+			gadget->name);
+	}
+	if (iProduct) {
+		strlcpy(product_desc, iProduct, sizeof(product_desc));
+	}
+	if (iSerialNumber) {
+		device_desc.iSerialNumber = STRING_SERIAL,
+		strlcpy(serial_number, iSerialNumber, sizeof(serial_number));
+	}
+
+	/* Bulk-only drivers like this one SHOULD be able to
+	 * autoconfigure on any sane usb controller driver,
+	 * but there may also be important quirks to address.
+	 */
+	usb_ep_autoconfig_reset(gadget);
+	in_ep = usb_ep_autoconfig(gadget, &bulk_in_desc);
+	if (!in_ep) {
+autoconf_fail:
+		printk(KERN_ERR "%s: can't autoconfigure on %s\n",
+			shortname, gadget->name);
+		return -ENODEV;
+	}
+	EP_IN_NAME = in_ep->name;
+	in_ep->driver_data = in_ep;	/* claim */
+
+	out_ep = usb_ep_autoconfig(gadget, &bulk_out_desc);
+	if (!out_ep) {
+		goto autoconf_fail;
+	}
+	EP_OUT_NAME = out_ep->name;
+	out_ep->driver_data = out_ep;	/* claim */
+
+	gcnum = usb_gadget_controller_number(gadget);
+	if (gcnum >= 0) {
+		device_desc.bcdDevice = cpu_to_le16(0x0200 + gcnum);
+	} else {
+		/* gmidi is so simple (no altsettings) that
+		 * it SHOULD NOT have problems with bulk-capable hardware.
+		 * so warn about unrecognized controllers, don't panic.
+		 */
+		printk(KERN_WARNING "%s: controller '%s' not recognized\n",
+			shortname, gadget->name);
+		device_desc.bcdDevice = __constant_cpu_to_le16(0x9999);
+	}
+
+
+	/* ok, we made sense of the hardware ... */
+	dev = kzalloc(sizeof(*dev), SLAB_KERNEL);
+	if (!dev) {
+		return -ENOMEM;
+	}
+	spin_lock_init(&dev->lock);
+	dev->gadget = gadget;
+	dev->in_ep = in_ep;
+	dev->out_ep = out_ep;
+	set_gadget_data(gadget, dev);
+	tasklet_init(&dev->tasklet, gmidi_in_tasklet, (unsigned long)dev);
+
+	/* preallocate control response and buffer */
+	dev->req = usb_ep_alloc_request(gadget->ep0, GFP_KERNEL);
+	if (!dev->req) {
+		err = -ENOMEM;
+		goto fail;
+	}
+	dev->req->buf = usb_ep_alloc_buffer(gadget->ep0, USB_BUFSIZ,
+				&dev->req->dma, GFP_KERNEL);
+	if (!dev->req->buf) {
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	dev->req->complete = gmidi_setup_complete;
+
+	device_desc.bMaxPacketSize0 = gadget->ep0->maxpacket;
+
+	gadget->ep0->driver_data = dev;
+
+	INFO(dev, "%s, version: " DRIVER_VERSION "\n", longname);
+	INFO(dev, "using %s, OUT %s IN %s\n", gadget->name,
+		EP_OUT_NAME, EP_IN_NAME);
+
+	/* register as an ALSA sound card */
+	err = gmidi_register_card(dev);
+	if (err < 0) {
+		goto fail;
+	}
+
+	VDBG(dev, "gmidi_bind finished ok\n");
+	return 0;
+
+fail:
+	gmidi_unbind(gadget);
+	return err;
+}
+
+
+static void gmidi_suspend(struct usb_gadget *gadget)
+{
+	struct gmidi_device *dev = get_gadget_data(gadget);
+
+	if (gadget->speed == USB_SPEED_UNKNOWN) {
+		return;
+	}
+
+	DBG(dev, "suspend\n");
+}
+
+static void gmidi_resume(struct usb_gadget *gadget)
+{
+	struct gmidi_device *dev = get_gadget_data(gadget);
+
+	DBG(dev, "resume\n");
+}
+
+
+static struct usb_gadget_driver gmidi_driver = {
+	.speed		= USB_SPEED_FULL,
+	.function	= (char *)longname,
+	.bind		= gmidi_bind,
+	.unbind		= __exit_p(gmidi_unbind),
+
+	.setup		= gmidi_setup,
+	.disconnect	= gmidi_disconnect,
+
+	.suspend	= gmidi_suspend,
+	.resume		= gmidi_resume,
+
+	.driver 	= {
+		.name		= (char *)shortname,
+		.owner		= THIS_MODULE,
+	},
+};
+
+static int __init gmidi_init(void)
+{
+	return usb_gadget_register_driver(&gmidi_driver);
+}
+module_init(gmidi_init);
+
+static void __exit gmidi_cleanup(void)
+{
+	usb_gadget_unregister_driver(&gmidi_driver);
+}
+module_exit(gmidi_cleanup);
+
diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
new file mode 100644
index 000000000000..6bd235994dc2
--- /dev/null
+++ b/include/linux/usb/audio.h
@@ -0,0 +1,53 @@
+/*
+ * <linux/usb/audio.h> -- USB Audio definitions.
+ *
+ * Copyright (C) 2006 Thumtronics Pty Ltd.
+ * Developed for Thumtronics by Grey Innovation
+ * Ben Williamson <ben.williamson@greyinnovation.com>
+ *
+ * This software is distributed under the terms of the GNU General Public
+ * License ("GPL") version 2, as published by the Free Software Foundation.
+ *
+ * This file holds USB constants and structures defined
+ * by the USB Device Class Definition for Audio Devices.
+ * Comments below reference relevant sections of that document:
+ *
+ * http://www.usb.org/developers/devclass_docs/audio10.pdf
+ */
+
+#ifndef __LINUX_USB_AUDIO_H
+#define __LINUX_USB_AUDIO_H
+
+#include <linux/types.h>
+
+/* A.2 Audio Interface Subclass Codes */
+#define USB_SUBCLASS_AUDIOCONTROL	0x01
+#define USB_SUBCLASS_AUDIOSTREAMING	0x02
+#define USB_SUBCLASS_MIDISTREAMING	0x03
+
+/* 4.3.2  Class-Specific AC Interface Descriptor */
+struct usb_ac_header_descriptor {
+	__u8  bLength;			// 8+n
+	__u8  bDescriptorType;		// USB_DT_CS_INTERFACE
+	__u8  bDescriptorSubtype;	// USB_MS_HEADER
+	__le16 bcdADC;			// 0x0100
+	__le16 wTotalLength;		// includes Unit and Terminal desc.
+	__u8  bInCollection;		// n
+	__u8  baInterfaceNr[];		// [n]
+} __attribute__ ((packed));
+
+#define USB_DT_AC_HEADER_SIZE(n)	(8+(n))
+
+/* As above, but more useful for defining your own descriptors: */
+#define DECLARE_USB_AC_HEADER_DESCRIPTOR(n) 			\
+struct usb_ac_header_descriptor_##n {				\
+	__u8  bLength;						\
+	__u8  bDescriptorType;					\
+	__u8  bDescriptorSubtype;				\
+	__le16 bcdADC;						\
+	__le16 wTotalLength;					\
+	__u8  bInCollection;					\
+	__u8  baInterfaceNr[n];					\
+} __attribute__ ((packed))
+
+#endif
diff --git a/include/linux/usb/midi.h b/include/linux/usb/midi.h
new file mode 100644
index 000000000000..11a97d5ffd34
--- /dev/null
+++ b/include/linux/usb/midi.h
@@ -0,0 +1,112 @@
+/*
+ * <linux/usb/midi.h> -- USB MIDI definitions.
+ *
+ * Copyright (C) 2006 Thumtronics Pty Ltd.
+ * Developed for Thumtronics by Grey Innovation
+ * Ben Williamson <ben.williamson@greyinnovation.com>
+ *
+ * This software is distributed under the terms of the GNU General Public
+ * License ("GPL") version 2, as published by the Free Software Foundation.
+ *
+ * This file holds USB constants and structures defined
+ * by the USB Device Class Definition for MIDI Devices.
+ * Comments below reference relevant sections of that document:
+ *
+ * http://www.usb.org/developers/devclass_docs/midi10.pdf
+ */
+
+#ifndef __LINUX_USB_MIDI_H
+#define __LINUX_USB_MIDI_H
+
+#include <linux/types.h>
+
+/* A.1  MS Class-Specific Interface Descriptor Subtypes */
+#define USB_MS_HEADER		0x01
+#define USB_MS_MIDI_IN_JACK	0x02
+#define USB_MS_MIDI_OUT_JACK	0x03
+#define USB_MS_ELEMENT		0x04
+
+/* A.2  MS Class-Specific Endpoint Descriptor Subtypes */
+#define USB_MS_GENERAL		0x01
+
+/* A.3  MS MIDI IN and OUT Jack Types */
+#define USB_MS_EMBEDDED		0x01
+#define USB_MS_EXTERNAL		0x02
+
+/* 6.1.2.1  Class-Specific MS Interface Header Descriptor */
+struct usb_ms_header_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDescriptorSubtype;
+	__le16 bcdMSC;
+	__le16 wTotalLength;
+} __attribute__ ((packed));
+
+#define USB_DT_MS_HEADER_SIZE	7
+
+/* 6.1.2.2  MIDI IN Jack Descriptor */
+struct usb_midi_in_jack_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;		// USB_DT_CS_INTERFACE
+	__u8  bDescriptorSubtype;	// USB_MS_MIDI_IN_JACK
+	__u8  bJackType;		// USB_MS_EMBEDDED/EXTERNAL
+	__u8  bJackID;
+	__u8  iJack;
+} __attribute__ ((packed));
+
+#define USB_DT_MIDI_IN_SIZE	6
+
+struct usb_midi_source_pin {
+	__u8  baSourceID;
+	__u8  baSourcePin;
+} __attribute__ ((packed));
+
+/* 6.1.2.3  MIDI OUT Jack Descriptor */
+struct usb_midi_out_jack_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;		// USB_DT_CS_INTERFACE
+	__u8  bDescriptorSubtype;	// USB_MS_MIDI_OUT_JACK
+	__u8  bJackType;		// USB_MS_EMBEDDED/EXTERNAL
+	__u8  bJackID;
+	__u8  bNrInputPins;		// p
+	struct usb_midi_source_pin pins[]; // [p]
+	/*__u8  iJack;  -- ommitted due to variable-sized pins[] */
+} __attribute__ ((packed));
+
+#define USB_DT_MIDI_OUT_SIZE(p)	(7 + 2 * (p))
+
+/* As above, but more useful for defining your own descriptors: */
+#define DECLARE_USB_MIDI_OUT_JACK_DESCRIPTOR(p)			\
+struct usb_midi_out_jack_descriptor_##p {			\
+	__u8  bLength;						\
+	__u8  bDescriptorType;					\
+	__u8  bDescriptorSubtype;				\
+	__u8  bJackType;					\
+	__u8  bJackID;						\
+	__u8  bNrInputPins;					\
+	struct usb_midi_source_pin pins[p];			\
+	__u8  iJack;						\
+} __attribute__ ((packed))
+
+/* 6.2.2  Class-Specific MS Bulk Data Endpoint Descriptor */
+struct usb_ms_endpoint_descriptor {
+	__u8  bLength;			// 4+n
+	__u8  bDescriptorType;		// USB_DT_CS_ENDPOINT
+	__u8  bDescriptorSubtype;	// USB_MS_GENERAL
+	__u8  bNumEmbMIDIJack;		// n
+	__u8  baAssocJackID[];		// [n]
+} __attribute__ ((packed));
+
+#define USB_DT_MS_ENDPOINT_SIZE(n)	(4 + (n))
+
+/* As above, but more useful for defining your own descriptors: */
+#define DECLARE_USB_MS_ENDPOINT_DESCRIPTOR(n)			\
+struct usb_ms_endpoint_descriptor_##n {				\
+	__u8  bLength;						\
+	__u8  bDescriptorType;					\
+	__u8  bDescriptorSubtype;				\
+	__u8  bNumEmbMIDIJack;					\
+	__u8  baAssocJackID[n];					\
+} __attribute__ ((packed))
+
+#endif
-- 
cgit v1.2.3


From 3d5b2510f6e361e2203e163c03b93d0026de5629 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jesper.juhl@gmail.com>
Date: Sun, 30 Jul 2006 18:43:43 +0200
Subject: USB: making the kernel -Wshadow clean - USB & completion

include/linux/usb.h causes a lot of -Wshadow warnings - fix them.

  include/linux/usb.h:901: warning: declaration of 'complete' shadows a global declaration
  include/linux/completion.h:52: warning: shadowed declaration is here
  include/linux/usb.h:932: warning: declaration of 'complete' shadows a global declaration
  include/linux/completion.h:52: warning: shadowed declaration is here
  include/linux/usb.h:967: warning: declaration of 'complete' shadows a global declaration
  include/linux/completion.h:52: warning: shadowed declaration is here


Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index e22f4b386605..3d5cfa731680 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -932,7 +932,7 @@ struct urb
  * @setup_packet: pointer to the setup_packet buffer
  * @transfer_buffer: pointer to the transfer buffer
  * @buffer_length: length of the transfer buffer
- * @complete: pointer to the usb_complete_t function
+ * @complete_fn: pointer to the usb_complete_t function
  * @context: what to set the urb context to.
  *
  * Initializes a control urb with the proper information needed to submit
@@ -944,7 +944,7 @@ static inline void usb_fill_control_urb (struct urb *urb,
 					 unsigned char *setup_packet,
 					 void *transfer_buffer,
 					 int buffer_length,
-					 usb_complete_t complete,
+					 usb_complete_t complete_fn,
 					 void *context)
 {
 	spin_lock_init(&urb->lock);
@@ -953,7 +953,7 @@ static inline void usb_fill_control_urb (struct urb *urb,
 	urb->setup_packet = setup_packet;
 	urb->transfer_buffer = transfer_buffer;
 	urb->transfer_buffer_length = buffer_length;
-	urb->complete = complete;
+	urb->complete = complete_fn;
 	urb->context = context;
 }
 
@@ -964,7 +964,7 @@ static inline void usb_fill_control_urb (struct urb *urb,
  * @pipe: the endpoint pipe
  * @transfer_buffer: pointer to the transfer buffer
  * @buffer_length: length of the transfer buffer
- * @complete: pointer to the usb_complete_t function
+ * @complete_fn: pointer to the usb_complete_t function
  * @context: what to set the urb context to.
  *
  * Initializes a bulk urb with the proper information needed to submit it
@@ -975,7 +975,7 @@ static inline void usb_fill_bulk_urb (struct urb *urb,
 				      unsigned int pipe,
 				      void *transfer_buffer,
 				      int buffer_length,
-				      usb_complete_t complete,
+				      usb_complete_t complete_fn,
 				      void *context)
 {
 	spin_lock_init(&urb->lock);
@@ -983,7 +983,7 @@ static inline void usb_fill_bulk_urb (struct urb *urb,
 	urb->pipe = pipe;
 	urb->transfer_buffer = transfer_buffer;
 	urb->transfer_buffer_length = buffer_length;
-	urb->complete = complete;
+	urb->complete = complete_fn;
 	urb->context = context;
 }
 
@@ -994,7 +994,7 @@ static inline void usb_fill_bulk_urb (struct urb *urb,
  * @pipe: the endpoint pipe
  * @transfer_buffer: pointer to the transfer buffer
  * @buffer_length: length of the transfer buffer
- * @complete: pointer to the usb_complete_t function
+ * @complete_fn: pointer to the usb_complete_t function
  * @context: what to set the urb context to.
  * @interval: what to set the urb interval to, encoded like
  *	the endpoint descriptor's bInterval value.
@@ -1010,7 +1010,7 @@ static inline void usb_fill_int_urb (struct urb *urb,
 				     unsigned int pipe,
 				     void *transfer_buffer,
 				     int buffer_length,
-				     usb_complete_t complete,
+				     usb_complete_t complete_fn,
 				     void *context,
 				     int interval)
 {
@@ -1019,7 +1019,7 @@ static inline void usb_fill_int_urb (struct urb *urb,
 	urb->pipe = pipe;
 	urb->transfer_buffer = transfer_buffer;
 	urb->transfer_buffer_length = buffer_length;
-	urb->complete = complete;
+	urb->complete = complete_fn;
 	urb->context = context;
 	if (dev->speed == USB_SPEED_HIGH)
 		urb->interval = 1 << (interval - 1);
-- 
cgit v1.2.3


From b7cfaaaf86571732c7728e95a2231a860385463c Mon Sep 17 00:00:00 2001
From: "Luiz Fernando N. Capitulino" <lcapitulino@mandriva.com.br>
Date: Wed, 27 Sep 2006 11:58:53 -0700
Subject: USB: New functions to check endpoints info.

These functions makes USB driver's code simpler when dealing with endpoints
by avoiding them from accessing the endpoint's descriptor structure directly
when they only need to know the endpoint's transfer type and/or
direction.

Please, read each functions' documentation in order to know how to use
them.

Signed-off-by: Luiz Fernando N. Capitulino <lcapitulino@mandriva.com.br>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/usb.c | 144 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb.h    |  14 +++++
 2 files changed, 158 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 9ebfc0fe819d..82837d45b484 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -482,6 +482,138 @@ int usb_get_current_frame_number(struct usb_device *dev)
 	return dev->bus->op->get_frame_number (dev);
 }
 
+/**
+ * usb_endpoint_dir_in - check if the endpoint has IN direction
+ * @epd: endpoint to be checked
+ *
+ * Returns true if the endpoint is of type IN, otherwise it returns false.
+ */
+int usb_endpoint_dir_in(const struct usb_endpoint_descriptor *epd)
+{
+	return ((epd->bEndpointAddress & USB_ENDPOINT_DIR_MASK) == USB_DIR_IN);
+}
+
+/**
+ * usb_endpoint_dir_out - check if the endpoint has OUT direction
+ * @epd: endpoint to be checked
+ *
+ * Returns true if the endpoint is of type OUT, otherwise it returns false.
+ */
+int usb_endpoint_dir_out(const struct usb_endpoint_descriptor *epd)
+{
+	return ((epd->bEndpointAddress & USB_ENDPOINT_DIR_MASK) == USB_DIR_OUT);
+}
+
+/**
+ * usb_endpoint_xfer_bulk - check if the endpoint has bulk transfer type
+ * @epd: endpoint to be checked
+ *
+ * Returns true if the endpoint is of type bulk, otherwise it returns false.
+ */
+int usb_endpoint_xfer_bulk(const struct usb_endpoint_descriptor *epd)
+{
+	return ((epd->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) ==
+		USB_ENDPOINT_XFER_BULK);
+}
+
+/**
+ * usb_endpoint_xfer_int - check if the endpoint has interrupt transfer type
+ * @epd: endpoint to be checked
+ *
+ * Returns true if the endpoint is of type interrupt, otherwise it returns
+ * false.
+ */
+int usb_endpoint_xfer_int(const struct usb_endpoint_descriptor *epd)
+{
+	return ((epd->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) ==
+		USB_ENDPOINT_XFER_INT);
+}
+
+/**
+ * usb_endpoint_xfer_isoc - check if the endpoint has isochronous transfer type
+ * @epd: endpoint to be checked
+ *
+ * Returns true if the endpoint is of type isochronous, otherwise it returns
+ * false.
+ */
+int usb_endpoint_xfer_isoc(const struct usb_endpoint_descriptor *epd)
+{
+	return ((epd->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) ==
+		USB_ENDPOINT_XFER_ISOC);
+}
+
+/**
+ * usb_endpoint_is_bulk_in - check if the endpoint is bulk IN
+ * @epd: endpoint to be checked
+ *
+ * Returns true if the endpoint has bulk transfer type and IN direction,
+ * otherwise it returns false.
+ */
+int usb_endpoint_is_bulk_in(const struct usb_endpoint_descriptor *epd)
+{
+	return (usb_endpoint_xfer_bulk(epd) && usb_endpoint_dir_in(epd));
+}
+
+/**
+ * usb_endpoint_is_bulk_out - check if the endpoint is bulk OUT
+ * @epd: endpoint to be checked
+ *
+ * Returns true if the endpoint has bulk transfer type and OUT direction,
+ * otherwise it returns false.
+ */
+int usb_endpoint_is_bulk_out(const struct usb_endpoint_descriptor *epd)
+{
+	return (usb_endpoint_xfer_bulk(epd) && usb_endpoint_dir_out(epd));
+}
+
+/**
+ * usb_endpoint_is_int_in - check if the endpoint is interrupt IN
+ * @epd: endpoint to be checked
+ *
+ * Returns true if the endpoint has interrupt transfer type and IN direction,
+ * otherwise it returns false.
+ */
+int usb_endpoint_is_int_in(const struct usb_endpoint_descriptor *epd)
+{
+	return (usb_endpoint_xfer_int(epd) && usb_endpoint_dir_in(epd));
+}
+
+/**
+ * usb_endpoint_is_int_out - check if the endpoint is interrupt OUT
+ * @epd: endpoint to be checked
+ *
+ * Returns true if the endpoint has interrupt transfer type and OUT direction,
+ * otherwise it returns false.
+ */
+int usb_endpoint_is_int_out(const struct usb_endpoint_descriptor *epd)
+{
+	return (usb_endpoint_xfer_int(epd) && usb_endpoint_dir_out(epd));
+}
+
+/**
+ * usb_endpoint_is_isoc_in - check if the endpoint is isochronous IN
+ * @epd: endpoint to be checked
+ *
+ * Returns true if the endpoint has isochronous transfer type and IN direction,
+ * otherwise it returns false.
+ */
+int usb_endpoint_is_isoc_in(const struct usb_endpoint_descriptor *epd)
+{
+	return (usb_endpoint_xfer_isoc(epd) && usb_endpoint_dir_in(epd));
+}
+
+/**
+ * usb_endpoint_is_isoc_out - check if the endpoint is isochronous OUT
+ * @epd: endpoint to be checked
+ *
+ * Returns true if the endpoint has isochronous transfer type and OUT direction,
+ * otherwise it returns false.
+ */
+int usb_endpoint_is_isoc_out(const struct usb_endpoint_descriptor *epd)
+{
+	return (usb_endpoint_xfer_isoc(epd) && usb_endpoint_dir_out(epd));
+}
+
 /*-------------------------------------------------------------------*/
 /*
  * __usb_get_extra_descriptor() finds a descriptor of specific type in the
@@ -909,6 +1041,18 @@ EXPORT_SYMBOL(__usb_get_extra_descriptor);
 EXPORT_SYMBOL(usb_find_device);
 EXPORT_SYMBOL(usb_get_current_frame_number);
 
+EXPORT_SYMBOL_GPL(usb_endpoint_dir_in);
+EXPORT_SYMBOL_GPL(usb_endpoint_dir_out);
+EXPORT_SYMBOL_GPL(usb_endpoint_xfer_bulk);
+EXPORT_SYMBOL_GPL(usb_endpoint_xfer_int);
+EXPORT_SYMBOL_GPL(usb_endpoint_xfer_isoc);
+EXPORT_SYMBOL_GPL(usb_endpoint_is_bulk_in);
+EXPORT_SYMBOL_GPL(usb_endpoint_is_bulk_out);
+EXPORT_SYMBOL_GPL(usb_endpoint_is_int_in);
+EXPORT_SYMBOL_GPL(usb_endpoint_is_int_out);
+EXPORT_SYMBOL_GPL(usb_endpoint_is_isoc_in);
+EXPORT_SYMBOL_GPL(usb_endpoint_is_isoc_out);
+
 EXPORT_SYMBOL (usb_buffer_alloc);
 EXPORT_SYMBOL (usb_buffer_free);
 
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 3d5cfa731680..f807479ef65b 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -467,6 +467,20 @@ static inline int usb_make_path (struct usb_device *dev, char *buf,
 
 /*-------------------------------------------------------------------------*/
 
+extern int usb_endpoint_dir_in(const struct usb_endpoint_descriptor *epd);
+extern int usb_endpoint_dir_out(const struct usb_endpoint_descriptor *epd);
+extern int usb_endpoint_xfer_bulk(const struct usb_endpoint_descriptor *epd);
+extern int usb_endpoint_xfer_int(const struct usb_endpoint_descriptor *epd);
+extern int usb_endpoint_xfer_isoc(const struct usb_endpoint_descriptor *epd);
+extern int usb_endpoint_is_bulk_in(const struct usb_endpoint_descriptor *epd);
+extern int usb_endpoint_is_bulk_out(const struct usb_endpoint_descriptor *epd);
+extern int usb_endpoint_is_int_in(const struct usb_endpoint_descriptor *epd);
+extern int usb_endpoint_is_int_out(const struct usb_endpoint_descriptor *epd);
+extern int usb_endpoint_is_isoc_in(const struct usb_endpoint_descriptor *epd);
+extern int usb_endpoint_is_isoc_out(const struct usb_endpoint_descriptor *epd);
+
+/*-------------------------------------------------------------------------*/
+
 #define USB_DEVICE_ID_MATCH_DEVICE \
 		(USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_PRODUCT)
 #define USB_DEVICE_ID_MATCH_DEV_RANGE \
-- 
cgit v1.2.3


From dfe0d3ba20e860d0b9a16c4c6524180b8f93be05 Mon Sep 17 00:00:00 2001
From: Matthew Dharm <mdharm-usb@one-eyed-alien.net>
Date: Sun, 13 Aug 2006 17:30:14 -0700
Subject: USB Storage: add rio karma eject support

This changeset from Keith Bennett (via Bob Copeland) moves the Karma
initializer to its own file and adds trapping of the START_STOP command to
enable eject of the device.

Signed-off-by: Keith Bennett <keith@mcs.st-and.ac.uk>
Signed-off-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: Matthew Dharm <mdharm-usb@one-eyed-alien.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/storage/Kconfig        |  12 +++
 drivers/usb/storage/Makefile       |   1 +
 drivers/usb/storage/initializers.c |  73 -----------------
 drivers/usb/storage/initializers.h |   1 -
 drivers/usb/storage/karma.c        | 155 +++++++++++++++++++++++++++++++++++++
 drivers/usb/storage/karma.h        |   7 ++
 drivers/usb/storage/unusual_devs.h |   2 +-
 drivers/usb/storage/usb.c          |  11 +++
 include/linux/usb_usual.h          |   3 +
 9 files changed, 190 insertions(+), 75 deletions(-)
 create mode 100644 drivers/usb/storage/karma.c
 create mode 100644 drivers/usb/storage/karma.h

(limited to 'include/linux')

diff --git a/drivers/usb/storage/Kconfig b/drivers/usb/storage/Kconfig
index be9eec225743..86e48c42d6af 100644
--- a/drivers/usb/storage/Kconfig
+++ b/drivers/usb/storage/Kconfig
@@ -135,6 +135,18 @@ config USB_STORAGE_ONETOUCH
 	  this input in any keybinding software. (e.g. gnome's keyboard short-
 	  cuts)
 
+config USB_STORAGE_KARMA
+	bool "Support for Rio Karma music player"
+	depends on USB_STORAGE
+	help
+	  Say Y here to include additional code to support the Rio Karma
+	  USB interface.
+
+	  This code places the Rio Karma into mass storage mode, enabling
+	  it to be mounted as an ordinary filesystem. Performing an eject
+	  on the resulting scsi device node returns the Karma to normal
+	  operation.
+
 config USB_LIBUSUAL
 	bool "The shared table of common (or usual) storage devices"
 	depends on USB
diff --git a/drivers/usb/storage/Makefile b/drivers/usb/storage/Makefile
index 8cbba22508a4..023969b4385b 100644
--- a/drivers/usb/storage/Makefile
+++ b/drivers/usb/storage/Makefile
@@ -20,6 +20,7 @@ usb-storage-obj-$(CONFIG_USB_STORAGE_DATAFAB)	+= datafab.o
 usb-storage-obj-$(CONFIG_USB_STORAGE_JUMPSHOT)	+= jumpshot.o
 usb-storage-obj-$(CONFIG_USB_STORAGE_ALAUDA)	+= alauda.o
 usb-storage-obj-$(CONFIG_USB_STORAGE_ONETOUCH)	+= onetouch.o
+usb-storage-obj-$(CONFIG_USB_STORAGE_KARMA)	+= karma.o
 
 usb-storage-objs :=	scsiglue.o protocol.o transport.o usb.o \
 			initializers.o $(usb-storage-obj-y)
diff --git a/drivers/usb/storage/initializers.c b/drivers/usb/storage/initializers.c
index ab173b30076e..5b06f9240d05 100644
--- a/drivers/usb/storage/initializers.c
+++ b/drivers/usb/storage/initializers.c
@@ -45,12 +45,6 @@
 #include "debug.h"
 #include "transport.h"
 
-#define RIO_MSC 0x08
-#define RIOP_INIT "RIOP\x00\x01\x08"
-#define RIOP_INIT_LEN 7
-#define RIO_SEND_LEN 40
-#define RIO_RECV_LEN 0x200
-
 /* This places the Shuttle/SCM USB<->SCSI bridge devices in multi-target
  * mode */
 int usb_stor_euscsi_init(struct us_data *us)
@@ -97,70 +91,3 @@ int usb_stor_ucr61s2b_init(struct us_data *us)
 
 	return (res ? -1 : 0);
 }
-
-/* Place the Rio Karma into mass storage mode.
- *
- * The initialization begins by sending 40 bytes starting
- * RIOP\x00\x01\x08\x00, which the device will ack with a 512-byte
- * packet with the high four bits set and everything else null.
- *
- * Next, we send RIOP\x80\x00\x08\x00.  Each time, a 512 byte response
- * must be read, but we must loop until byte 5 in the response is 0x08,
- * indicating success.  */
-int rio_karma_init(struct us_data *us)
-{
-	int result, partial;
-	char *recv;
-	unsigned long timeout;
-
-	// us->iobuf is big enough to hold cmd but not receive
-	if (!(recv = kmalloc(RIO_RECV_LEN, GFP_KERNEL)))
-		goto die_nomem;
-
-	US_DEBUGP("Initializing Karma...\n");
-
-	memset(us->iobuf, 0, RIO_SEND_LEN);
-	memcpy(us->iobuf, RIOP_INIT, RIOP_INIT_LEN);
-
-	result = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe,
-		us->iobuf, RIO_SEND_LEN, &partial);
-	if (result != USB_STOR_XFER_GOOD)
-		goto die;
-
-	result = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe,
-		recv, RIO_RECV_LEN, &partial);
-	if (result != USB_STOR_XFER_GOOD)
-		goto die;
-
-	us->iobuf[4] = 0x80;
-	us->iobuf[5] = 0;
-	timeout = jiffies + msecs_to_jiffies(3000);
-	for (;;) {
-		US_DEBUGP("Sending init command\n");
-		result = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe,
-			us->iobuf, RIO_SEND_LEN, &partial);
-		if (result != USB_STOR_XFER_GOOD)
-			goto die;
-
-		result = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe,
-			recv, RIO_RECV_LEN, &partial);
-		if (result != USB_STOR_XFER_GOOD)
-			goto die;
-
-		if (recv[5] == RIO_MSC)
-			break;
-		if (time_after(jiffies, timeout))
-			goto die;
-		msleep(10);
-	}
-	US_DEBUGP("Karma initialized.\n");
-	kfree(recv);
-	return 0;
-
-die:
-	kfree(recv);
-die_nomem:
-	US_DEBUGP("Could not initialize karma.\n");
-	return USB_STOR_TRANSPORT_FAILED;
-}
-
diff --git a/drivers/usb/storage/initializers.h b/drivers/usb/storage/initializers.h
index 927f7781080f..e2967a4d48a2 100644
--- a/drivers/usb/storage/initializers.h
+++ b/drivers/usb/storage/initializers.h
@@ -47,4 +47,3 @@ int usb_stor_euscsi_init(struct us_data *us);
 /* This function is required to activate all four slots on the UCR-61S2B
  * flash reader */
 int usb_stor_ucr61s2b_init(struct us_data *us);
-int rio_karma_init(struct us_data *us);
diff --git a/drivers/usb/storage/karma.c b/drivers/usb/storage/karma.c
new file mode 100644
index 000000000000..0d79ae5683f7
--- /dev/null
+++ b/drivers/usb/storage/karma.c
@@ -0,0 +1,155 @@
+/* Driver for Rio Karma
+ *
+ *   (c) 2006 Bob Copeland <me@bobcopeland.com>
+ *   (c) 2006 Keith Bennett <keith@mcs.st-and.ac.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+
+#include "usb.h"
+#include "transport.h"
+#include "debug.h"
+#include "karma.h"
+
+#define RIO_PREFIX "RIOP\x00"
+#define RIO_PREFIX_LEN 5
+#define RIO_SEND_LEN 40
+#define RIO_RECV_LEN 0x200
+
+#define RIO_ENTER_STORAGE 0x1
+#define RIO_LEAVE_STORAGE 0x2
+#define RIO_RESET 0xC
+
+extern int usb_stor_Bulk_transport(struct scsi_cmnd *, struct us_data *);
+
+struct karma_data {
+	int in_storage;
+	char *recv;
+};
+
+/*
+ * Send commands to Rio Karma.
+ *
+ * For each command we send 40 bytes starting 'RIOP\0' followed by
+ * the command number and a sequence number, which the device will ack
+ * with a 512-byte packet with the high four bits set and everything
+ * else null.  Then we send 'RIOP\x80' followed by a zero and the
+ * sequence number, until byte 5 in the response repeats the sequence
+ * number.
+ */
+static int rio_karma_send_command(char cmd, struct us_data *us)
+{
+	int result, partial;
+	unsigned long timeout;
+	static unsigned char seq = 1;
+	struct karma_data *data = (struct karma_data *) us->extra;
+
+	US_DEBUGP("karma: sending command %04x\n", cmd);
+	memset(us->iobuf, 0, RIO_SEND_LEN);
+	memcpy(us->iobuf, RIO_PREFIX, RIO_PREFIX_LEN);
+	us->iobuf[5] = cmd;
+	us->iobuf[6] = seq;
+
+	timeout = jiffies + msecs_to_jiffies(6000);
+	for (;;) {
+		result = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe,
+			us->iobuf, RIO_SEND_LEN, &partial);
+		if (result != USB_STOR_XFER_GOOD)
+			goto err;
+
+		result = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe,
+			data->recv, RIO_RECV_LEN, &partial);
+		if (result != USB_STOR_XFER_GOOD)
+			goto err;
+
+		if (data->recv[5] == seq)
+			break;
+
+		if (time_after(jiffies, timeout))
+			goto err;
+
+		us->iobuf[4] = 0x80;
+		us->iobuf[5] = 0;
+		msleep(50);
+	}
+
+	seq++;
+	if (seq == 0)
+		seq = 1;
+
+	US_DEBUGP("karma: sent command %04x\n", cmd);
+	return 0;
+err:
+	US_DEBUGP("karma: command %04x failed\n", cmd);
+	return USB_STOR_TRANSPORT_FAILED;
+}
+
+/*
+ * Trap START_STOP and READ_10 to leave/re-enter storage mode.
+ * Everything else is propagated to the normal bulk layer.
+ */
+int rio_karma_transport(struct scsi_cmnd *srb, struct us_data *us)
+{
+	int ret;
+	struct karma_data *data = (struct karma_data *) us->extra;
+
+	if (srb->cmnd[0] == READ_10 && !data->in_storage) {
+		ret = rio_karma_send_command(RIO_ENTER_STORAGE, us);
+		if (ret)
+			return ret;
+
+		data->in_storage = 1;
+		return usb_stor_Bulk_transport(srb, us);
+	} else if (srb->cmnd[0] == START_STOP) {
+		ret = rio_karma_send_command(RIO_LEAVE_STORAGE, us);
+		if (ret)
+			return ret;
+
+		data->in_storage = 0;
+		return rio_karma_send_command(RIO_RESET, us);
+	}
+	return usb_stor_Bulk_transport(srb, us);
+}
+
+static void rio_karma_destructor(void *extra)
+{
+	struct karma_data *data = (struct karma_data *) extra;
+	kfree(data->recv);
+}
+
+int rio_karma_init(struct us_data *us)
+{
+	int ret = 0;
+	struct karma_data *data = kzalloc(sizeof(struct karma_data), GFP_NOIO);
+	if (!data)
+		goto out;
+
+	data->recv = kmalloc(RIO_RECV_LEN, GFP_NOIO);
+	if (!data->recv) {
+		kfree(data);
+		goto out;
+	}
+
+	us->extra = data;
+	us->extra_destructor = rio_karma_destructor;
+	ret = rio_karma_send_command(RIO_ENTER_STORAGE, us);
+	data->in_storage = (ret == 0);
+out:
+	return ret;
+}
diff --git a/drivers/usb/storage/karma.h b/drivers/usb/storage/karma.h
new file mode 100644
index 000000000000..8a60972af8c5
--- /dev/null
+++ b/drivers/usb/storage/karma.h
@@ -0,0 +1,7 @@
+#ifndef _KARMA_USB_H
+#define _KARMA_USB_H
+
+extern int rio_karma_init(struct us_data *us);
+extern int rio_karma_transport(struct scsi_cmnd *srb, struct us_data *us);
+
+#endif
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index fa49357289c4..1f11c9d44eaa 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -221,7 +221,7 @@ UNUSUAL_DEV(  0x0457, 0x0151, 0x0100, 0x0100,
 UNUSUAL_DEV(  0x045a, 0x5210, 0x0101, 0x0101,
 		"Rio",
 		"Rio Karma",
-		US_SC_SCSI, US_PR_BULK, rio_karma_init, 0),
+		US_SC_SCSI, US_PR_KARMA, rio_karma_init, 0),
 
 /* Patch submitted by Philipp Friedrich <philipp@void.at> */
 UNUSUAL_DEV(  0x0482, 0x0100, 0x0100, 0x0100,
diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
index 8d7bdcb5924d..b8d6031b0975 100644
--- a/drivers/usb/storage/usb.c
+++ b/drivers/usb/storage/usb.c
@@ -98,6 +98,9 @@
 #ifdef CONFIG_USB_STORAGE_ALAUDA
 #include "alauda.h"
 #endif
+#ifdef CONFIG_USB_STORAGE_KARMA
+#include "karma.h"
+#endif
 
 /* Some informational data */
 MODULE_AUTHOR("Matthew Dharm <mdharm-usb@one-eyed-alien.net>");
@@ -646,6 +649,14 @@ static int get_transport(struct us_data *us)
 		break;
 #endif
 
+#ifdef CONFIG_USB_STORAGE_KARMA
+	case US_PR_KARMA:
+		us->transport_name = "Rio Karma/Bulk";
+		us->transport = rio_karma_transport;
+		us->transport_reset = usb_stor_Bulk_reset;
+		break;
+#endif
+
 	default:
 		return -EIO;
 	}
diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index e7fc5fed5b98..2ae76fe52ff7 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -108,6 +108,9 @@ enum { US_DO_ALL_FLAGS };
 #ifdef CONFIG_USB_STORAGE_ALAUDA
 #define US_PR_ALAUDA    0xf4		/* Alauda chipsets */
 #endif
+#ifdef CONFIG_USB_STORAGE_KARMA
+#define US_PR_KARMA     0xf5		/* Rio Karma */
+#endif
 
 #define US_PR_DEVICE	0xff		/* Use device's value */
 
-- 
cgit v1.2.3


From 095bc335360a51623dd8571839bbf465851a7f4b Mon Sep 17 00:00:00 2001
From: "Luiz Fernando N. Capitulino" <lcapitulino@mandriva.com.br>
Date: Sat, 26 Aug 2006 23:48:11 -0300
Subject: USB core: Use const where possible.

This patch marks some USB core's functions parameters as const. This
improves the design (we're saying to the caller that its parameter is
not going to be modified) and may help in compiler's optimisation work.

Signed-off-by: Luiz Fernando N. Capitulino <lcapitulino@mandriva.com.br>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/usb.c | 21 +++++++++++----------
 drivers/usb/core/usb.h |  4 ++--
 include/linux/usb.h    | 18 +++++++++---------
 3 files changed, 22 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 82837d45b484..4eb98eb3804f 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -67,7 +67,8 @@ static int nousb;	/* Disable USB when built into kernel image */
  * Don't call this function unless you are bound to one of the interfaces
  * on this device or you have locked the device!
  */
-struct usb_interface *usb_ifnum_to_if(struct usb_device *dev, unsigned ifnum)
+struct usb_interface *usb_ifnum_to_if(const struct usb_device *dev,
+				      unsigned ifnum)
 {
 	struct usb_host_config *config = dev->actconfig;
 	int i;
@@ -100,8 +101,8 @@ struct usb_interface *usb_ifnum_to_if(struct usb_device *dev, unsigned ifnum)
  * Don't call this function unless you are bound to the intf interface
  * or you have locked the device!
  */
-struct usb_host_interface *usb_altnum_to_altsetting(struct usb_interface *intf,
-		unsigned int altnum)
+struct usb_host_interface *usb_altnum_to_altsetting(const struct usb_interface *intf,
+						    unsigned int altnum)
 {
 	int i;
 
@@ -356,7 +357,7 @@ void usb_put_intf(struct usb_interface *intf)
  * case the driver already owns the device lock.)
  */
 int usb_lock_device_for_reset(struct usb_device *udev,
-		struct usb_interface *iface)
+			      const struct usb_interface *iface)
 {
 	unsigned long jiffies_expire = jiffies + HZ;
 
@@ -852,8 +853,8 @@ void usb_buffer_unmap (struct urb *urb)
  *
  * Reverse the effect of this call with usb_buffer_unmap_sg().
  */
-int usb_buffer_map_sg (struct usb_device *dev, unsigned pipe,
-		struct scatterlist *sg, int nents)
+int usb_buffer_map_sg(const struct usb_device *dev, unsigned pipe,
+		      struct scatterlist *sg, int nents)
 {
 	struct usb_bus		*bus;
 	struct device		*controller;
@@ -887,8 +888,8 @@ int usb_buffer_map_sg (struct usb_device *dev, unsigned pipe,
  * Use this when you are re-using a scatterlist's data buffers for
  * another USB request.
  */
-void usb_buffer_dmasync_sg (struct usb_device *dev, unsigned pipe,
-		struct scatterlist *sg, int n_hw_ents)
+void usb_buffer_dmasync_sg(const struct usb_device *dev, unsigned pipe,
+			   struct scatterlist *sg, int n_hw_ents)
 {
 	struct usb_bus		*bus;
 	struct device		*controller;
@@ -913,8 +914,8 @@ void usb_buffer_dmasync_sg (struct usb_device *dev, unsigned pipe,
  *
  * Reverses the effect of usb_buffer_map_sg().
  */
-void usb_buffer_unmap_sg (struct usb_device *dev, unsigned pipe,
-		struct scatterlist *sg, int n_hw_ents)
+void usb_buffer_unmap_sg(const struct usb_device *dev, unsigned pipe,
+			 struct scatterlist *sg, int n_hw_ents)
 {
 	struct usb_bus		*bus;
 	struct device		*controller;
diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index 6096ead2758c..67da6d0b316f 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -53,7 +53,7 @@ extern struct usb_device_driver usb_generic_driver;
  * no such thing as a platform USB device, so we can steal the use
  * of the platform_data field. */
 
-static inline int is_usb_device(struct device *dev)
+static inline int is_usb_device(const struct device *dev)
 {
 	return dev->platform_data == &usb_generic_driver;
 }
@@ -78,7 +78,7 @@ static inline void mark_quiesced(struct usb_interface *f)
 	f->is_active = 0;
 }
 
-static inline int is_active(struct usb_interface *f)
+static inline int is_active(const struct usb_interface *f)
 {
 	return f->is_active;
 }
diff --git a/include/linux/usb.h b/include/linux/usb.h
index f807479ef65b..26d8a5f36896 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -387,7 +387,7 @@ extern void usb_put_dev(struct usb_device *dev);
 #define usb_unlock_device(udev)		up(&(udev)->dev.sem)
 #define usb_trylock_device(udev)	down_trylock(&(udev)->dev.sem)
 extern int usb_lock_device_for_reset(struct usb_device *udev,
-		struct usb_interface *iface);
+				     const struct usb_interface *iface);
 
 /* USB port reset for device reinitialization */
 extern int usb_reset_device(struct usb_device *dev);
@@ -426,10 +426,10 @@ const struct usb_device_id *usb_match_id(struct usb_interface *interface,
 
 extern struct usb_interface *usb_find_interface(struct usb_driver *drv,
 		int minor);
-extern struct usb_interface *usb_ifnum_to_if(struct usb_device *dev,
+extern struct usb_interface *usb_ifnum_to_if(const struct usb_device *dev,
 		unsigned ifnum);
 extern struct usb_host_interface *usb_altnum_to_altsetting(
-		struct usb_interface *intf, unsigned int altnum);
+		const struct usb_interface *intf, unsigned int altnum);
 
 
 /**
@@ -1064,14 +1064,14 @@ void usb_buffer_unmap (struct urb *urb);
 #endif
 
 struct scatterlist;
-int usb_buffer_map_sg (struct usb_device *dev, unsigned pipe,
-		struct scatterlist *sg, int nents);
+int usb_buffer_map_sg(const struct usb_device *dev, unsigned pipe,
+		      struct scatterlist *sg, int nents);
 #if 0
-void usb_buffer_dmasync_sg (struct usb_device *dev, unsigned pipe,
-		struct scatterlist *sg, int n_hw_ents);
+void usb_buffer_dmasync_sg(const struct usb_device *dev, unsigned pipe,
+			   struct scatterlist *sg, int n_hw_ents);
 #endif
-void usb_buffer_unmap_sg (struct usb_device *dev, unsigned pipe,
-		struct scatterlist *sg, int n_hw_ents);
+void usb_buffer_unmap_sg(const struct usb_device *dev, unsigned pipe,
+			 struct scatterlist *sg, int n_hw_ents);
 
 /*-------------------------------------------------------------------*
  *                         SYNCHRONOUS CALL SUPPORT                  *
-- 
cgit v1.2.3


From 088dc270e1da03744d977cbd9edd4311af142348 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 21 Aug 2006 12:08:19 -0400
Subject: usbcore: help drivers to change device configs

It's generally a bad idea for USB interface drivers to try to change a
device's configuration, and usbcore doesn't provide any way for them
to do it.  However in a few exceptional circumstances it can make
sense.  This patch (as767) adds a roundabout mechanism to help drivers
that may need it.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/message.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb.h        |  3 +++
 2 files changed, 62 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index 49cfd7928a1c..333b22c68aa4 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -1493,6 +1493,65 @@ free_interfaces:
 	return 0;
 }
 
+struct set_config_request {
+	struct usb_device	*udev;
+	int			config;
+	struct work_struct	work;
+};
+
+/* Worker routine for usb_driver_set_configuration() */
+static void driver_set_config_work(void *_req)
+{
+	struct set_config_request *req = _req;
+
+	usb_lock_device(req->udev);
+	usb_set_configuration(req->udev, req->config);
+	usb_unlock_device(req->udev);
+	usb_put_dev(req->udev);
+	kfree(req);
+}
+
+/**
+ * usb_driver_set_configuration - Provide a way for drivers to change device configurations
+ * @udev: the device whose configuration is being updated
+ * @config: the configuration being chosen.
+ * Context: In process context, must be able to sleep
+ *
+ * Device interface drivers are not allowed to change device configurations.
+ * This is because changing configurations will destroy the interface the
+ * driver is bound to and create new ones; it would be like a floppy-disk
+ * driver telling the computer to replace the floppy-disk drive with a
+ * tape drive!
+ *
+ * Still, in certain specialized circumstances the need may arise.  This
+ * routine gets around the normal restrictions by using a work thread to
+ * submit the change-config request.
+ *
+ * Returns 0 if the request was succesfully queued, error code otherwise.
+ * The caller has no way to know whether the queued request will eventually
+ * succeed.
+ */
+int usb_driver_set_configuration(struct usb_device *udev, int config)
+{
+	struct set_config_request *req;
+
+	req = kmalloc(sizeof(*req), GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+	req->udev = udev;
+	req->config = config;
+	INIT_WORK(&req->work, driver_set_config_work, req);
+
+	usb_get_dev(udev);
+	if (!schedule_work(&req->work)) {
+		usb_put_dev(udev);
+		kfree(req);
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(usb_driver_set_configuration);
+
 // synchronous request completion model
 EXPORT_SYMBOL(usb_control_msg);
 EXPORT_SYMBOL(usb_bulk_msg);
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 26d8a5f36896..f104efa04d79 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1099,6 +1099,9 @@ extern int usb_clear_halt(struct usb_device *dev, int pipe);
 extern int usb_reset_configuration(struct usb_device *dev);
 extern int usb_set_interface(struct usb_device *dev, int ifnum, int alternate);
 
+/* this request isn't really synchronous, but it belongs with the others */
+extern int usb_driver_set_configuration(struct usb_device *udev, int config);
+
 /*
  * timeouts, in milliseconds, used for sending/receiving control messages
  * they typically complete within a few frames (msec) after they're issued
-- 
cgit v1.2.3


From a6d2bb9ff919b4685bd684620ec7a1ffa8bf2349 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 30 Aug 2006 11:27:36 -0400
Subject: USB: remove struct usb_operations

All of the currently-supported USB host controller drivers use the HCD
bus-glue framework.  As part of the program for flattening out the glue
layer, this patch (as769) removes the usb_operations structure.  All
function calls now go directly to the HCD routines (slightly renamed
to remain within the "usb_" namespace).

The patch also removes usb_alloc_bus(), because it's not useful in the
HCD framework and it wasn't referenced anywhere.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hcd.c     | 49 +++++-----------------------------------------
 drivers/usb/core/hcd.h     | 32 +++++++-----------------------
 drivers/usb/core/message.c |  4 ++--
 drivers/usb/core/urb.c     | 13 +++++-------
 drivers/usb/core/usb.c     | 10 +++++-----
 include/linux/usb.h        |  4 ----
 6 files changed, 24 insertions(+), 88 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index ea20a3a5a9b9..2102c4deec1e 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -731,30 +731,6 @@ static void usb_bus_init (struct usb_bus *bus)
 	kref_init(&bus->kref);
 }
 
-/**
- * usb_alloc_bus - creates a new USB host controller structure
- * @op: pointer to a struct usb_operations that this bus structure should use
- * Context: !in_interrupt()
- *
- * Creates a USB host controller bus structure with the specified 
- * usb_operations and initializes all the necessary internal objects.
- *
- * If no memory is available, NULL is returned.
- *
- * The caller should call usb_put_bus() when it is finished with the structure.
- */
-struct usb_bus *usb_alloc_bus (struct usb_operations *op)
-{
-	struct usb_bus *bus;
-
-	bus = kzalloc (sizeof *bus, GFP_KERNEL);
-	if (!bus)
-		return NULL;
-	usb_bus_init (bus);
-	bus->op = op;
-	return bus;
-}
-
 /*-------------------------------------------------------------------------*/
 
 /**
@@ -1102,7 +1078,7 @@ static void urb_unlink (struct urb *urb)
  * expects usb_submit_urb() to have sanity checked and conditioned all
  * inputs in the urb
  */
-static int hcd_submit_urb (struct urb *urb, gfp_t mem_flags)
+int usb_hcd_submit_urb (struct urb *urb, gfp_t mem_flags)
 {
 	int			status;
 	struct usb_hcd		*hcd = urb->dev->bus->hcpriv;
@@ -1211,7 +1187,7 @@ done:
 /*-------------------------------------------------------------------------*/
 
 /* called in any context */
-static int hcd_get_frame_number (struct usb_device *udev)
+int usb_hcd_get_frame_number (struct usb_device *udev)
 {
 	struct usb_hcd	*hcd = (struct usb_hcd *)udev->bus->hcpriv;
 	if (!HC_IS_RUNNING (hcd->state))
@@ -1253,7 +1229,7 @@ unlink1 (struct usb_hcd *hcd, struct urb *urb)
  * caller guarantees urb won't be recycled till both unlink()
  * and the urb's completion function return
  */
-static int hcd_unlink_urb (struct urb *urb, int status)
+int usb_hcd_unlink_urb (struct urb *urb, int status)
 {
 	struct usb_host_endpoint	*ep;
 	struct usb_hcd			*hcd = NULL;
@@ -1351,8 +1327,8 @@ done:
  * example:  a qh stored in ep->hcpriv, holding state related to endpoint
  * type, maxpacket size, toggle, halt status, and scheduling.
  */
-static void
-hcd_endpoint_disable (struct usb_device *udev, struct usb_host_endpoint *ep)
+void usb_hcd_endpoint_disable (struct usb_device *udev,
+		struct usb_host_endpoint *ep)
 {
 	struct usb_hcd		*hcd;
 	struct urb		*urb;
@@ -1589,20 +1565,6 @@ EXPORT_SYMBOL (usb_bus_start_enum);
 
 /*-------------------------------------------------------------------------*/
 
-/*
- * usb_hcd_operations - adapts usb_bus framework to HCD framework (bus glue)
- */
-static struct usb_operations usb_hcd_operations = {
-	.get_frame_number =	hcd_get_frame_number,
-	.submit_urb =		hcd_submit_urb,
-	.unlink_urb =		hcd_unlink_urb,
-	.buffer_alloc =		hcd_buffer_alloc,
-	.buffer_free =		hcd_buffer_free,
-	.disable =		hcd_endpoint_disable,
-};
-
-/*-------------------------------------------------------------------------*/
-
 /**
  * usb_hcd_giveback_urb - return URB from HCD to device driver
  * @hcd: host controller returning the URB
@@ -1744,7 +1706,6 @@ struct usb_hcd *usb_create_hcd (const struct hc_driver *driver,
 	dev_set_drvdata(dev, hcd);
 
 	usb_bus_init(&hcd->self);
-	hcd->self.op = &usb_hcd_operations;
 	hcd->self.hcpriv = hcd;
 	hcd->self.release = &hcd_release;
 	hcd->self.controller = dev;
diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h
index fc71a08a1af4..83e229914797 100644
--- a/drivers/usb/core/hcd.h
+++ b/drivers/usb/core/hcd.h
@@ -139,28 +139,6 @@ struct hcd_timeout {	/* timeouts we allocate */
 
 /*-------------------------------------------------------------------------*/
 
-/*
- * FIXME usb_operations should vanish or become hc_driver,
- * when usb_bus and usb_hcd become the same thing.
- */
-
-struct usb_operations {
-	int (*get_frame_number) (struct usb_device *usb_dev);
-	int (*submit_urb) (struct urb *urb, gfp_t mem_flags);
-	int (*unlink_urb) (struct urb *urb, int status);
-
-	/* allocate dma-consistent buffer for URB_DMA_NOMAPPING */
-	void *(*buffer_alloc)(struct usb_bus *bus, size_t size,
-			gfp_t mem_flags,
-			dma_addr_t *dma);
-	void (*buffer_free)(struct usb_bus *bus, size_t size,
-			void *addr, dma_addr_t dma);
-
-	void (*disable)(struct usb_device *udev,
-			struct usb_host_endpoint *ep);
-};
-
-/* each driver provides one of these, and hardware init support */
 
 struct pt_regs;
 
@@ -222,7 +200,13 @@ struct hc_driver {
 		/* Needed only if port-change IRQs are level-triggered */
 };
 
-extern void usb_hcd_giveback_urb (struct usb_hcd *hcd, struct urb *urb, struct pt_regs *regs);
+extern int usb_hcd_submit_urb (struct urb *urb, gfp_t mem_flags);
+extern int usb_hcd_unlink_urb (struct urb *urb, int status);
+extern void usb_hcd_giveback_urb (struct usb_hcd *hcd, struct urb *urb,
+		struct pt_regs *regs);
+extern void usb_hcd_endpoint_disable (struct usb_device *udev,
+		struct usb_host_endpoint *ep);
+extern int usb_hcd_get_frame_number (struct usb_device *udev);
 
 extern struct usb_hcd *usb_create_hcd (const struct hc_driver *driver,
 		struct device *dev, char *bus_name);
@@ -361,8 +345,6 @@ extern long usb_calc_bus_time (int speed, int is_input,
 
 /*-------------------------------------------------------------------------*/
 
-extern struct usb_bus *usb_alloc_bus (struct usb_operations *);
-
 extern void usb_set_device_state(struct usb_device *udev,
 		enum usb_device_state new_state);
 
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index 333b22c68aa4..1580c81a0db7 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -984,8 +984,8 @@ void usb_disable_endpoint(struct usb_device *dev, unsigned int epaddr)
 		ep = dev->ep_in[epnum];
 		dev->ep_in[epnum] = NULL;
 	}
-	if (ep && dev->bus && dev->bus->op && dev->bus->op->disable)
-		dev->bus->op->disable(dev, ep);
+	if (ep && dev->bus)
+		usb_hcd_endpoint_disable(dev, ep);
 }
 
 /**
diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
index 9864988377c7..576919927f53 100644
--- a/drivers/usb/core/urb.c
+++ b/drivers/usb/core/urb.c
@@ -221,7 +221,6 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags)
 {
 	int			pipe, temp, max;
 	struct usb_device	*dev;
-	struct usb_operations	*op;
 	int			is_out;
 
 	if (!urb || urb->hcpriv || !urb->complete)
@@ -233,8 +232,6 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags)
 	if (dev->bus->controller->power.power_state.event != PM_EVENT_ON
 			|| dev->state == USB_STATE_SUSPENDED)
 		return -EHOSTUNREACH;
-	if (!(op = dev->bus->op) || !op->submit_urb)
-		return -ENODEV;
 
 	urb->status = -EINPROGRESS;
 	urb->actual_length = 0;
@@ -376,7 +373,7 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags)
 		urb->interval = temp;
 	}
 
-	return op->submit_urb (urb, mem_flags);
+	return usb_hcd_submit_urb (urb, mem_flags);
 }
 
 /*-------------------------------------------------------------------*/
@@ -440,9 +437,9 @@ int usb_unlink_urb(struct urb *urb)
 {
 	if (!urb)
 		return -EINVAL;
-	if (!(urb->dev && urb->dev->bus && urb->dev->bus->op))
+	if (!(urb->dev && urb->dev->bus))
 		return -ENODEV;
-	return urb->dev->bus->op->unlink_urb(urb, -ECONNRESET);
+	return usb_hcd_unlink_urb(urb, -ECONNRESET);
 }
 
 /**
@@ -468,13 +465,13 @@ int usb_unlink_urb(struct urb *urb)
 void usb_kill_urb(struct urb *urb)
 {
 	might_sleep();
-	if (!(urb && urb->dev && urb->dev->bus && urb->dev->bus->op))
+	if (!(urb && urb->dev && urb->dev->bus))
 		return;
 	spin_lock_irq(&urb->lock);
 	++urb->reject;
 	spin_unlock_irq(&urb->lock);
 
-	urb->dev->bus->op->unlink_urb(urb, -ENOENT);
+	usb_hcd_unlink_urb(urb, -ENOENT);
 	wait_event(usb_kill_urb_queue, atomic_read(&urb->use_count) == 0);
 
 	spin_lock_irq(&urb->lock);
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 4eb98eb3804f..7ab9d29215f8 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -480,7 +480,7 @@ exit:
  */
 int usb_get_current_frame_number(struct usb_device *dev)
 {
-	return dev->bus->op->get_frame_number (dev);
+	return usb_hcd_get_frame_number (dev);
 }
 
 /**
@@ -677,9 +677,9 @@ void *usb_buffer_alloc (
 	dma_addr_t *dma
 )
 {
-	if (!dev || !dev->bus || !dev->bus->op || !dev->bus->op->buffer_alloc)
+	if (!dev || !dev->bus)
 		return NULL;
-	return dev->bus->op->buffer_alloc (dev->bus, size, mem_flags, dma);
+	return hcd_buffer_alloc (dev->bus, size, mem_flags, dma);
 }
 
 /**
@@ -700,11 +700,11 @@ void usb_buffer_free (
 	dma_addr_t dma
 )
 {
-	if (!dev || !dev->bus || !dev->bus->op || !dev->bus->op->buffer_free)
+	if (!dev || !dev->bus)
 		return;
 	if (!addr)
 		return;
-	dev->bus->op->buffer_free (dev->bus, size, addr, dma);
+	hcd_buffer_free (dev->bus, size, addr, dma);
 }
 
 /**
diff --git a/include/linux/usb.h b/include/linux/usb.h
index f104efa04d79..4709033f8fa7 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -257,8 +257,6 @@ int __usb_get_extra_descriptor(char *buffer, unsigned size,
 
 /* ----------------------------------------------------------------------- */
 
-struct usb_operations;
-
 /* USB device number allocation bitmap */
 struct usb_devmap {
 	unsigned long devicemap[128 / (8*sizeof(unsigned long))];
@@ -279,7 +277,6 @@ struct usb_bus {
 					 * round-robin allocation */
 
 	struct usb_devmap devmap;	/* device address allocation map */
-	struct usb_operations *op;	/* Operations (specific to the HC) */
 	struct usb_device *root_hub;	/* Root hub */
 	struct list_head bus_list;	/* list of busses */
 	void *hcpriv;                   /* Host Controller private data */
@@ -1051,7 +1048,6 @@ extern int usb_submit_urb(struct urb *urb, gfp_t mem_flags);
 extern int usb_unlink_urb(struct urb *urb);
 extern void usb_kill_urb(struct urb *urb);
 
-#define HAVE_USB_BUFFERS
 void *usb_buffer_alloc (struct usb_device *dev, size_t size,
 	gfp_t mem_flags, dma_addr_t *dma);
 void usb_buffer_free (struct usb_device *dev, size_t size,
-- 
cgit v1.2.3


From dd990f16a39d4e615c0b70a0ab50b79b32bfb16d Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 30 Aug 2006 11:29:56 -0400
Subject: usbcore: Add flag for whether a host controller uses DMA

This patch (as770b) introduces a new field to usb_bus: a flag
indicating whether or not the host controller uses DMA.  This serves
to encapsulate the computation.  It also means we will have only one
spot to update if the DMA API changes.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hcd.c | 8 +++++---
 include/linux/usb.h    | 1 +
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 2102c4deec1e..0cc14206920a 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1152,7 +1152,7 @@ doit:
 	/* lower level hcd code should use *_dma exclusively,
 	 * unless it uses pio or talks to another transport.
 	 */
-	if (hcd->self.controller->dma_mask) {
+	if (hcd->self.uses_dma) {
 		if (usb_pipecontrol (urb->pipe)
 			&& !(urb->transfer_flags & URB_NO_SETUP_DMA_MAP))
 			urb->setup_dma = dma_map_single (
@@ -1585,8 +1585,9 @@ void usb_hcd_giveback_urb (struct usb_hcd *hcd, struct urb *urb, struct pt_regs
 	at_root_hub = (urb->dev == hcd->self.root_hub);
 	urb_unlink (urb);
 
-	/* lower level hcd code should use *_dma exclusively */
-	if (hcd->self.controller->dma_mask && !at_root_hub) {
+	/* lower level hcd code should use *_dma exclusively if the
+	 * host controller does DMA */
+	if (hcd->self.uses_dma && !at_root_hub) {
 		if (usb_pipecontrol (urb->pipe)
 			&& !(urb->transfer_flags & URB_NO_SETUP_DMA_MAP))
 			dma_unmap_single (hcd->self.controller, urb->setup_dma,
@@ -1710,6 +1711,7 @@ struct usb_hcd *usb_create_hcd (const struct hc_driver *driver,
 	hcd->self.release = &hcd_release;
 	hcd->self.controller = dev;
 	hcd->self.bus_name = bus_name;
+	hcd->self.uses_dma = (dev->dma_mask != NULL);
 
 	init_timer(&hcd->rh_timer);
 	hcd->rh_timer.function = rh_timer_func;
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 4709033f8fa7..09661759621f 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -269,6 +269,7 @@ struct usb_bus {
 	struct device *controller;	/* host/master side hardware */
 	int busnum;			/* Bus number (in order of reg) */
 	char *bus_name;			/* stable id (PCI slot_name etc) */
+	u8 uses_dma;			/* Does the host controller use DMA? */
 	u8 otg_port;			/* 0, or number of OTG/HNP port */
 	unsigned is_b_host:1;		/* true during some HNP roleswitches */
 	unsigned b_hnp_enable:1;	/* OTG: did A-Host enable HNP? */
-- 
cgit v1.2.3


From 1720058343fa43a1a25bfad9e62ea06e7e9743b6 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 30 Aug 2006 11:32:52 -0400
Subject: usbcore: trim down usb_bus structure

As part of the ongoing program to flatten out the HCD bus-glue layer,
this patch (as771b) eliminates the hcpriv, release, and kref fields
from struct usb_bus.  hcpriv and release were not being used for
anything worthwhile, and kref has been moved into the enclosing
usb_hcd structure.

Along with those changes, the patch gets rid of usb_bus_get and
usb_bus_put, replacing them with usb_get_hcd and usb_put_hcd.

The one interesting aspect is that the dev_set_drvdata call was
removed from usb_put_hcd, where it clearly doesn't belong.  This means
the driver private data won't get reset to NULL.  It shouldn't cause
any problems, since the private data is undefined when no driver is
bound.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/buffer.c      |  4 +--
 drivers/usb/core/hcd.c         | 67 +++++++++++++++---------------------------
 drivers/usb/core/hcd.h         | 13 ++++----
 drivers/usb/core/usb.c         |  5 ++--
 drivers/usb/gadget/dummy_hcd.c |  8 ++---
 drivers/usb/host/ehci-dbg.c    |  6 ++--
 drivers/usb/host/ohci-dbg.c    |  6 ++--
 drivers/usb/mon/mon_main.c     |  6 ++--
 include/linux/usb.h            |  3 --
 9 files changed, 44 insertions(+), 74 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c
index f4f4ef0f377a..840442a25b61 100644
--- a/drivers/usb/core/buffer.c
+++ b/drivers/usb/core/buffer.c
@@ -104,7 +104,7 @@ void *hcd_buffer_alloc (
 	dma_addr_t		*dma
 )
 {
-	struct usb_hcd		*hcd = bus->hcpriv;
+	struct usb_hcd		*hcd = bus_to_hcd(bus);
 	int 			i;
 
 	/* some USB hosts just use PIO */
@@ -127,7 +127,7 @@ void hcd_buffer_free (
 	dma_addr_t		dma
 )
 {
-	struct usb_hcd		*hcd = bus->hcpriv;
+	struct usb_hcd		*hcd = bus_to_hcd(bus);
 	int 			i;
 
 	if (!addr)
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 0cc14206920a..9dfc812de034 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -664,31 +664,6 @@ static int usb_rh_urb_dequeue (struct usb_hcd *hcd, struct urb *urb)
 
 /*-------------------------------------------------------------------------*/
 
-/* exported only within usbcore */
-struct usb_bus *usb_bus_get(struct usb_bus *bus)
-{
-	if (bus)
-		kref_get(&bus->kref);
-	return bus;
-}
-
-static void usb_host_release(struct kref *kref)
-{
-	struct usb_bus *bus = container_of(kref, struct usb_bus, kref);
-
-	if (bus->release)
-		bus->release(bus);
-}
-
-/* exported only within usbcore */
-void usb_bus_put(struct usb_bus *bus)
-{
-	if (bus)
-		kref_put(&bus->kref, usb_host_release);
-}
-
-/*-------------------------------------------------------------------------*/
-
 static struct class *usb_host_class;
 
 int usb_host_init(void)
@@ -720,15 +695,12 @@ static void usb_bus_init (struct usb_bus *bus)
 	bus->devnum_next = 1;
 
 	bus->root_hub = NULL;
-	bus->hcpriv = NULL;
 	bus->busnum = -1;
 	bus->bandwidth_allocated = 0;
 	bus->bandwidth_int_reqs  = 0;
 	bus->bandwidth_isoc_reqs = 0;
 
 	INIT_LIST_HEAD (&bus->bus_list);
-
-	kref_init(&bus->kref);
 }
 
 /*-------------------------------------------------------------------------*/
@@ -1081,7 +1053,7 @@ static void urb_unlink (struct urb *urb)
 int usb_hcd_submit_urb (struct urb *urb, gfp_t mem_flags)
 {
 	int			status;
-	struct usb_hcd		*hcd = urb->dev->bus->hcpriv;
+	struct usb_hcd		*hcd = bus_to_hcd(urb->dev->bus);
 	struct usb_host_endpoint *ep;
 	unsigned long		flags;
 
@@ -1189,7 +1161,8 @@ done:
 /* called in any context */
 int usb_hcd_get_frame_number (struct usb_device *udev)
 {
-	struct usb_hcd	*hcd = (struct usb_hcd *)udev->bus->hcpriv;
+	struct usb_hcd	*hcd = bus_to_hcd(udev->bus);
+
 	if (!HC_IS_RUNNING (hcd->state))
 		return -ESHUTDOWN;
 	return hcd->driver->get_frame_number (hcd);
@@ -1262,7 +1235,7 @@ int usb_hcd_unlink_urb (struct urb *urb, int status)
 	spin_lock (&hcd_data_lock);
 
 	sys = &urb->dev->dev;
-	hcd = urb->dev->bus->hcpriv;
+	hcd = bus_to_hcd(urb->dev->bus);
 	if (hcd == NULL) {
 		retval = -ENODEV;
 		goto done;
@@ -1333,7 +1306,7 @@ void usb_hcd_endpoint_disable (struct usb_device *udev,
 	struct usb_hcd		*hcd;
 	struct urb		*urb;
 
-	hcd = udev->bus->hcpriv;
+	hcd = bus_to_hcd(udev->bus);
 
 	WARN_ON (!HC_IS_RUNNING (hcd->state) && hcd->state != HC_STATE_HALT &&
 			udev->state != USB_STATE_NOTATTACHED);
@@ -1673,14 +1646,6 @@ EXPORT_SYMBOL_GPL (usb_hc_died);
 
 /*-------------------------------------------------------------------------*/
 
-static void hcd_release (struct usb_bus *bus)
-{
-	struct usb_hcd *hcd;
-
-	hcd = container_of(bus, struct usb_hcd, self);
-	kfree(hcd);
-}
-
 /**
  * usb_create_hcd - create and initialize an HCD structure
  * @driver: HC driver that will use this hcd
@@ -1705,10 +1670,9 @@ struct usb_hcd *usb_create_hcd (const struct hc_driver *driver,
 		return NULL;
 	}
 	dev_set_drvdata(dev, hcd);
+	kref_init(&hcd->kref);
 
 	usb_bus_init(&hcd->self);
-	hcd->self.hcpriv = hcd;
-	hcd->self.release = &hcd_release;
 	hcd->self.controller = dev;
 	hcd->self.bus_name = bus_name;
 	hcd->self.uses_dma = (dev->dma_mask != NULL);
@@ -1725,10 +1689,25 @@ struct usb_hcd *usb_create_hcd (const struct hc_driver *driver,
 }
 EXPORT_SYMBOL (usb_create_hcd);
 
+static void hcd_release (struct kref *kref)
+{
+	struct usb_hcd *hcd = container_of (kref, struct usb_hcd, kref);
+
+	kfree(hcd);
+}
+
+struct usb_hcd *usb_get_hcd (struct usb_hcd *hcd)
+{
+	if (hcd)
+		kref_get (&hcd->kref);
+	return hcd;
+}
+EXPORT_SYMBOL (usb_get_hcd);
+
 void usb_put_hcd (struct usb_hcd *hcd)
 {
-	dev_set_drvdata(hcd->self.controller, NULL);
-	usb_bus_put(&hcd->self);
+	if (hcd)
+		kref_put (&hcd->kref, hcd_release);
 }
 EXPORT_SYMBOL (usb_put_hcd);
 
diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h
index 83e229914797..7a2bcba2ae61 100644
--- a/drivers/usb/core/hcd.h
+++ b/drivers/usb/core/hcd.h
@@ -55,12 +55,13 @@
 
 /*-------------------------------------------------------------------------*/
 
-struct usb_hcd {	/* usb_bus.hcpriv points to this */
+struct usb_hcd {
 
 	/*
 	 * housekeeping
 	 */
 	struct usb_bus		self;		/* hcd is-a bus */
+	struct kref		kref;		/* reference counter */
 
 	const char		*product_desc;	/* product/vendor string */
 	char			irq_descr[24];	/* driver + bus # */
@@ -129,8 +130,10 @@ static inline struct usb_bus *hcd_to_bus (struct usb_hcd *hcd)
 	return &hcd->self;
 }
 
-
-// urb.hcpriv is really hardware-specific
+static inline struct usb_hcd *bus_to_hcd (struct usb_bus *bus)
+{
+	return container_of(bus, struct usb_hcd, self);
+}
 
 struct hcd_timeout {	/* timeouts we allocate */
 	struct list_head	timeout_list;
@@ -210,6 +213,7 @@ extern int usb_hcd_get_frame_number (struct usb_device *udev);
 
 extern struct usb_hcd *usb_create_hcd (const struct hc_driver *driver,
 		struct device *dev, char *bus_name);
+extern struct usb_hcd *usb_get_hcd (struct usb_hcd *hcd);
 extern void usb_put_hcd (struct usb_hcd *hcd);
 extern int usb_add_hcd(struct usb_hcd *hcd,
 		unsigned int irqnum, unsigned long irqflags);
@@ -356,9 +360,6 @@ extern struct list_head usb_bus_list;
 extern struct mutex usb_bus_list_lock;
 extern wait_queue_head_t usb_kill_urb_queue;
 
-extern struct usb_bus *usb_bus_get (struct usb_bus *bus);
-extern void usb_bus_put (struct usb_bus *bus);
-
 extern void usb_enable_root_hub_irq (struct usb_bus *bus);
 
 extern int usb_find_interface_driver (struct usb_device *dev,
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 7ab9d29215f8..b0c0a993338f 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -169,7 +169,7 @@ static void usb_release_dev(struct device *dev)
 	udev = to_usb_device(dev);
 
 	usb_destroy_configuration(udev);
-	usb_bus_put(udev->bus);
+	usb_put_hcd(bus_to_hcd(udev->bus));
 	kfree(udev->product);
 	kfree(udev->manufacturer);
 	kfree(udev->serial);
@@ -197,8 +197,7 @@ usb_alloc_dev(struct usb_device *parent, struct usb_bus *bus, unsigned port1)
 	if (!dev)
 		return NULL;
 
-	bus = usb_bus_get(bus);
-	if (!bus) {
+	if (!usb_get_hcd(bus_to_hcd(bus))) {
 		kfree(dev);
 		return NULL;
 	}
diff --git a/drivers/usb/gadget/dummy_hcd.c b/drivers/usb/gadget/dummy_hcd.c
index 7d1c22c34957..fdab97a27c08 100644
--- a/drivers/usb/gadget/dummy_hcd.c
+++ b/drivers/usb/gadget/dummy_hcd.c
@@ -889,11 +889,9 @@ EXPORT_SYMBOL (net2280_set_fifo_mode);
 static void
 dummy_gadget_release (struct device *dev)
 {
-#if 0		/* usb_bus_put isn't EXPORTed! */
 	struct dummy	*dum = gadget_dev_to_dummy (dev);
 
-	usb_bus_put (&dummy_to_hcd (dum)->self);
-#endif
+	usb_put_hcd (dummy_to_hcd (dum));
 }
 
 static int dummy_udc_probe (struct platform_device *pdev)
@@ -915,9 +913,7 @@ static int dummy_udc_probe (struct platform_device *pdev)
 	if (rc < 0)
 		return rc;
 
-#if 0		/* usb_bus_get isn't EXPORTed! */
-	usb_bus_get (&dummy_to_hcd (dum)->self);
-#endif
+	usb_get_hcd (dummy_to_hcd (dum));
 
 	platform_set_drvdata (pdev, dum);
 	device_create_file (&dum->gadget.dev, &dev_attr_function);
diff --git a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c
index 65ac9fef3a7c..215ce6d06394 100644
--- a/drivers/usb/host/ehci-dbg.c
+++ b/drivers/usb/host/ehci-dbg.c
@@ -451,7 +451,7 @@ show_async (struct class_device *class_dev, char *buf)
 	*buf = 0;
 
 	bus = class_get_devdata(class_dev);
-	hcd = bus->hcpriv;
+	hcd = bus_to_hcd(bus);
 	ehci = hcd_to_ehci (hcd);
 	next = buf;
 	size = PAGE_SIZE;
@@ -497,7 +497,7 @@ show_periodic (struct class_device *class_dev, char *buf)
 	seen_count = 0;
 
 	bus = class_get_devdata(class_dev);
-	hcd = bus->hcpriv;
+	hcd = bus_to_hcd(bus);
 	ehci = hcd_to_ehci (hcd);
 	next = buf;
 	size = PAGE_SIZE;
@@ -634,7 +634,7 @@ show_registers (struct class_device *class_dev, char *buf)
 	static char		label [] = "";
 
 	bus = class_get_devdata(class_dev);
-	hcd = bus->hcpriv;
+	hcd = bus_to_hcd(bus);
 	ehci = hcd_to_ehci (hcd);
 	next = buf;
 	size = PAGE_SIZE;
diff --git a/drivers/usb/host/ohci-dbg.c b/drivers/usb/host/ohci-dbg.c
index da52609a9290..534d07dcb824 100644
--- a/drivers/usb/host/ohci-dbg.c
+++ b/drivers/usb/host/ohci-dbg.c
@@ -477,7 +477,7 @@ show_async (struct class_device *class_dev, char *buf)
 	unsigned long		flags;
 
 	bus = class_get_devdata(class_dev);
-	hcd = bus->hcpriv;
+	hcd = bus_to_hcd(bus);
 	ohci = hcd_to_ohci(hcd);
 
 	/* display control and bulk lists together, for simplicity */
@@ -510,7 +510,7 @@ show_periodic (struct class_device *class_dev, char *buf)
 	seen_count = 0;
 
 	bus = class_get_devdata(class_dev);
-	hcd = bus->hcpriv;
+	hcd = bus_to_hcd(bus);
 	ohci = hcd_to_ohci(hcd);
 	next = buf;
 	size = PAGE_SIZE;
@@ -607,7 +607,7 @@ show_registers (struct class_device *class_dev, char *buf)
 	u32			rdata;
 
 	bus = class_get_devdata(class_dev);
-	hcd = bus->hcpriv;
+	hcd = bus_to_hcd(bus);
 	ohci = hcd_to_ohci(hcd);
 	regs = ohci->regs;
 	next = buf;
diff --git a/drivers/usb/mon/mon_main.c b/drivers/usb/mon/mon_main.c
index 275a66f83058..e0ed36cdfd8b 100644
--- a/drivers/usb/mon/mon_main.c
+++ b/drivers/usb/mon/mon_main.c
@@ -265,7 +265,6 @@ static void mon_dissolve(struct mon_bus *mbus, struct usb_bus *ubus)
 	ubus->mon_bus = NULL;
 	mbus->u_bus = NULL;
 	mb();
-	// usb_bus_put(ubus);
 }
 
 /*
@@ -297,10 +296,9 @@ static void mon_bus_init(struct dentry *mondir, struct usb_bus *ubus)
 	INIT_LIST_HEAD(&mbus->r_list);
 
 	/*
-	 * This usb_bus_get here is superfluous, because we receive
-	 * a notification if usb_bus is about to be removed.
+	 * We don't need to take a reference to ubus, because we receive
+	 * a notification if the bus is about to be removed.
 	 */
-	// usb_bus_get(ubus);
 	mbus->u_bus = ubus;
 	ubus->mon_bus = mbus;
 
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 09661759621f..c66303285a45 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -280,7 +280,6 @@ struct usb_bus {
 	struct usb_devmap devmap;	/* device address allocation map */
 	struct usb_device *root_hub;	/* Root hub */
 	struct list_head bus_list;	/* list of busses */
-	void *hcpriv;                   /* Host Controller private data */
 
 	int bandwidth_allocated;	/* on this bus: how much of the time
 					 * reserved for periodic (intr/iso)
@@ -295,8 +294,6 @@ struct usb_bus {
 	struct dentry *usbfs_dentry;	/* usbfs dentry entry for the bus */
 
 	struct class_device *class_dev;	/* class device for this bus */
-	struct kref kref;		/* reference counting for this bus */
-	void (*release)(struct usb_bus *bus);
 
 #if defined(CONFIG_USB_MON)
 	struct mon_bus *mon_bus;	/* non-null when associated */
-- 
cgit v1.2.3


From b6956ffa595db97656d5901ca8fec77ef272d41a Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 30 Aug 2006 15:46:48 -0400
Subject: usbcore: store each usb_device's level in the tree

This patch (as778) adds a field to struct usb_device to store the
device's level in the USB tree.  In itself this number isn't really
important.  But the overhead is very low, and in a later patch it will
be used for preventing bogus warnings from the lockdep checker.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hub.c | 1 +
 include/linux/usb.h    | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index f5adce049b35..78e910b2046c 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2414,6 +2414,7 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1,
 		usb_set_device_state(udev, USB_STATE_POWERED);
 		udev->speed = USB_SPEED_UNKNOWN;
  		udev->bus_mA = hub->mA_per_port;
+		udev->level = hdev->level + 1;
 
 		/* set the address */
 		choose_address(udev);
diff --git a/include/linux/usb.h b/include/linux/usb.h
index c66303285a45..df5c93eb3ce9 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -348,6 +348,7 @@ struct usb_device {
 
 	unsigned short bus_mA;		/* Current available from the bus */
 	u8 portnum;			/* Parent port number (origin 1) */
+	u8 level;			/* Number of USB hub ancestors */
 
 	int have_langid;		/* whether string_langid is valid */
 	int string_langid;		/* language ID for strings */
-- 
cgit v1.2.3


From 645daaab0b6adc35c1838df2a82f9d729fdb1767 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 30 Aug 2006 15:47:02 -0400
Subject: usbcore: add autosuspend/autoresume infrastructure

This patch (as739) adds the basic infrastructure for USB autosuspend
and autoresume.  The main features are:

	PM usage counters added to struct usb_device and struct
	usb_interface, indicating whether it's okay to autosuspend
	them or they are currently in use.

	Flag added to usb_device indicating whether the current
	suspend/resume operation originated from outside or as an
	autosuspend/autoresume.

	Flag added to usb_driver indicating whether the driver
	supports autosuspend.  If not, no device bound to the driver
	will be autosuspended.

	Mutex added to usb_device for protecting PM operations.
	Unlike the device semaphore, the locking rule for the pm_mutex
	is that you must acquire the locks going _up_ the device tree.

	New routines handling autosuspend/autoresume requests for
	interfaces and devices.

	Suspend and resume requests are propagated up the device tree
	(but not outside the USB subsystem).

	work_struct added to usb_device, for carrying out delayed
	autosuspend requests.

	Autoresume added (and autosuspend prevented) during probe and
	disconnect.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/driver.c | 419 ++++++++++++++++++++++++++++++++++++++++++----
 drivers/usb/core/hub.c    |  49 ++++--
 drivers/usb/core/usb.c    |  23 +++
 drivers/usb/core/usb.h    |  14 ++
 include/linux/usb.h       |  33 ++++
 5 files changed, 493 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index a5d11461f5a9..2b2000ac05ab 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -157,12 +157,13 @@ static int usb_probe_device(struct device *dev)
 
 	udev = to_usb_device(dev);
 
-	/* FIXME: resume a suspended device */
-	if (udev->state == USB_STATE_SUSPENDED)
-		return -EHOSTUNREACH;
-
 	/* TODO: Add real matching code */
 
+	/* The device should always appear to be in use
+	 * unless the driver suports autosuspend.
+	 */
+	udev->pm_usage_cnt = !(udriver->supports_autosuspend);
+
 	error = udriver->probe(udev);
 	return error;
 }
@@ -182,6 +183,7 @@ static int usb_probe_interface(struct device *dev)
 {
 	struct usb_driver *driver = to_usb_driver(dev->driver);
 	struct usb_interface *intf;
+	struct usb_device *udev;
 	const struct usb_device_id *id;
 	int error = -ENODEV;
 
@@ -191,10 +193,7 @@ static int usb_probe_interface(struct device *dev)
 		return error;
 
 	intf = to_usb_interface(dev);
-
-	/* FIXME we'd much prefer to just resume it ... */
-	if (interface_to_usbdev(intf)->state == USB_STATE_SUSPENDED)
-		return -EHOSTUNREACH;
+	udev = interface_to_usbdev(intf);
 
 	id = usb_match_id(intf, driver->id_table);
 	if (!id)
@@ -202,18 +201,31 @@ static int usb_probe_interface(struct device *dev)
 	if (id) {
 		dev_dbg(dev, "%s - got id\n", __FUNCTION__);
 
+		error = usb_autoresume_device(udev, 1);
+		if (error)
+			return error;
+
 		/* Interface "power state" doesn't correspond to any hardware
 		 * state whatsoever.  We use it to record when it's bound to
 		 * a driver that may start I/0:  it's not frozen/quiesced.
 		 */
 		mark_active(intf);
 		intf->condition = USB_INTERFACE_BINDING;
+
+		/* The interface should always appear to be in use
+		 * unless the driver suports autosuspend.
+		 */
+		intf->pm_usage_cnt = !(driver->supports_autosuspend);
+
 		error = driver->probe(intf, id);
 		if (error) {
 			mark_quiesced(intf);
+			intf->needs_remote_wakeup = 0;
 			intf->condition = USB_INTERFACE_UNBOUND;
 		} else
 			intf->condition = USB_INTERFACE_BOUND;
+
+		usb_autosuspend_device(udev, 1);
 	}
 
 	return error;
@@ -224,9 +236,15 @@ static int usb_unbind_interface(struct device *dev)
 {
 	struct usb_driver *driver = to_usb_driver(dev->driver);
 	struct usb_interface *intf = to_usb_interface(dev);
+	struct usb_device *udev;
+	int error;
 
 	intf->condition = USB_INTERFACE_UNBINDING;
 
+	/* Autoresume for set_interface call below */
+	udev = interface_to_usbdev(intf);
+	error = usb_autoresume_device(udev, 1);
+
 	/* release all urbs for this interface */
 	usb_disable_interface(interface_to_usbdev(intf), intf);
 
@@ -237,8 +255,13 @@ static int usb_unbind_interface(struct device *dev)
 			intf->altsetting[0].desc.bInterfaceNumber,
 			0);
 	usb_set_intfdata(intf, NULL);
+
 	intf->condition = USB_INTERFACE_UNBOUND;
 	mark_quiesced(intf);
+	intf->needs_remote_wakeup = 0;
+
+	if (!error)
+		usb_autosuspend_device(udev, 1);
 
 	return 0;
 }
@@ -267,14 +290,19 @@ int usb_driver_claim_interface(struct usb_driver *driver,
 				struct usb_interface *iface, void* priv)
 {
 	struct device *dev = &iface->dev;
+	struct usb_device *udev = interface_to_usbdev(iface);
 
 	if (dev->driver)
 		return -EBUSY;
 
 	dev->driver = &driver->drvwrap.driver;
 	usb_set_intfdata(iface, priv);
+
+	mutex_lock_nested(&udev->pm_mutex, udev->level);
 	iface->condition = USB_INTERFACE_BOUND;
 	mark_active(iface);
+	iface->pm_usage_cnt = !(driver->supports_autosuspend);
+	mutex_unlock(&udev->pm_mutex);
 
 	/* if interface was already added, bind now; else let
 	 * the future device_add() bind it, bypassing probe()
@@ -304,6 +332,7 @@ void usb_driver_release_interface(struct usb_driver *driver,
 					struct usb_interface *iface)
 {
 	struct device *dev = &iface->dev;
+	struct usb_device *udev = interface_to_usbdev(iface);
 
 	/* this should never happen, don't release something that's not ours */
 	if (!dev->driver || dev->driver != &driver->drvwrap.driver)
@@ -321,8 +350,12 @@ void usb_driver_release_interface(struct usb_driver *driver,
 
 	dev->driver = NULL;
 	usb_set_intfdata(iface, NULL);
+
+	mutex_lock_nested(&udev->pm_mutex, udev->level);
 	iface->condition = USB_INTERFACE_UNBOUND;
 	mark_quiesced(iface);
+	iface->needs_remote_wakeup = 0;
+	mutex_unlock(&udev->pm_mutex);
 }
 EXPORT_SYMBOL(usb_driver_release_interface);
 
@@ -751,7 +784,7 @@ EXPORT_SYMBOL_GPL_FUTURE(usb_deregister);
 
 #ifdef CONFIG_PM
 
-/* Caller has locked udev */
+/* Caller has locked udev->pm_mutex */
 static int suspend_device(struct usb_device *udev, pm_message_t msg)
 {
 	struct usb_device_driver	*udriver;
@@ -763,6 +796,7 @@ static int suspend_device(struct usb_device *udev, pm_message_t msg)
 
 	/* For devices that don't have a driver, we do a standard suspend. */
 	if (udev->dev.driver == NULL) {
+		udev->do_remote_wakeup = 0;
 		status = usb_port_suspend(udev);
 		goto done;
 	}
@@ -771,12 +805,13 @@ static int suspend_device(struct usb_device *udev, pm_message_t msg)
 	status = udriver->suspend(udev, msg);
 
 done:
+	// dev_dbg(&udev->dev, "%s: status %d\n", __FUNCTION__, status);
 	if (status == 0)
 		udev->dev.power.power_state.event = msg.event;
 	return status;
 }
 
-/* Caller has locked udev */
+/* Caller has locked udev->pm_mutex */
 static int resume_device(struct usb_device *udev)
 {
 	struct usb_device_driver	*udriver;
@@ -796,12 +831,13 @@ static int resume_device(struct usb_device *udev)
 	status = udriver->resume(udev);
 
 done:
+	// dev_dbg(&udev->dev, "%s: status %d\n", __FUNCTION__, status);
 	if (status == 0)
 		udev->dev.power.power_state.event = PM_EVENT_ON;
 	return status;
 }
 
-/* Caller has locked intf's usb_device */
+/* Caller has locked intf's usb_device's pm_mutex */
 static int suspend_interface(struct usb_interface *intf, pm_message_t msg)
 {
 	struct usb_driver	*driver;
@@ -812,31 +848,33 @@ static int suspend_interface(struct usb_interface *intf, pm_message_t msg)
 			!is_active(intf))
 		goto done;
 
-	if (intf->dev.driver == NULL)		/* This can't happen */
+	if (intf->condition == USB_INTERFACE_UNBOUND)	/* This can't happen */
 		goto done;
 	driver = to_usb_driver(intf->dev.driver);
 
 	if (driver->suspend && driver->resume) {
 		status = driver->suspend(intf, msg);
-		if (status)
+		if (status == 0)
+			mark_quiesced(intf);
+		else if (!interface_to_usbdev(intf)->auto_pm)
 			dev_err(&intf->dev, "%s error %d\n",
 					"suspend", status);
-		else
-			mark_quiesced(intf);
 	} else {
 		// FIXME else if there's no suspend method, disconnect...
+		// Not possible if auto_pm is set...
 		dev_warn(&intf->dev, "no suspend for driver %s?\n",
 				driver->name);
 		mark_quiesced(intf);
 	}
 
 done:
+	// dev_dbg(&intf->dev, "%s: status %d\n", __FUNCTION__, status);
 	if (status == 0)
 		intf->dev.power.power_state.event = msg.event;
 	return status;
 }
 
-/* Caller has locked intf's usb_device */
+/* Caller has locked intf's usb_device's pm_mutex */
 static int resume_interface(struct usb_interface *intf)
 {
 	struct usb_driver	*driver;
@@ -846,8 +884,12 @@ static int resume_interface(struct usb_interface *intf)
 			is_active(intf))
 		goto done;
 
+	/* Don't let autoresume interfere with unbinding */
+	if (intf->condition == USB_INTERFACE_UNBINDING)
+		goto done;
+
 	/* Can't resume it if it doesn't have a driver. */
-	if (intf->dev.driver == NULL) {
+	if (intf->condition == USB_INTERFACE_UNBOUND) {
 		status = -ENOTCONN;
 		goto done;
 	}
@@ -867,18 +909,88 @@ static int resume_interface(struct usb_interface *intf)
 	}
 
 done:
+	// dev_dbg(&intf->dev, "%s: status %d\n", __FUNCTION__, status);
 	if (status == 0)
 		intf->dev.power.power_state.event = PM_EVENT_ON;
 	return status;
 }
 
-/* Caller has locked udev */
+/**
+ * usb_suspend_both - suspend a USB device and its interfaces
+ * @udev: the usb_device to suspend
+ * @msg: Power Management message describing this state transition
+ *
+ * This is the central routine for suspending USB devices.  It calls the
+ * suspend methods for all the interface drivers in @udev and then calls
+ * the suspend method for @udev itself.  If an error occurs at any stage,
+ * all the interfaces which were suspended are resumed so that they remain
+ * in the same state as the device.
+ *
+ * If an autosuspend is in progress (@udev->auto_pm is set), the routine
+ * checks first to make sure that neither the device itself or any of its
+ * active interfaces is in use (pm_usage_cnt is greater than 0).  If they
+ * are, the autosuspend fails.
+ *
+ * If the suspend succeeds, the routine recursively queues an autosuspend
+ * request for @udev's parent device, thereby propagating the change up
+ * the device tree.  If all of the parent's children are now suspended,
+ * the parent will autosuspend in turn.
+ *
+ * The suspend method calls are subject to mutual exclusion under control
+ * of @udev's pm_mutex.  Many of these calls are also under the protection
+ * of @udev's device lock (including all requests originating outside the
+ * USB subsystem), but autosuspend requests generated by a child device or
+ * interface driver may not be.  Usbcore will insure that the method calls
+ * do not arrive during bind, unbind, or reset operations.  However, drivers
+ * must be prepared to handle suspend calls arriving at unpredictable times.
+ * The only way to block such calls is to do an autoresume (preventing
+ * autosuspends) while holding @udev's device lock (preventing outside
+ * suspends).
+ *
+ * The caller must hold @udev->pm_mutex.
+ *
+ * This routine can run only in process context.
+ */
 int usb_suspend_both(struct usb_device *udev, pm_message_t msg)
 {
 	int			status = 0;
 	int			i = 0;
 	struct usb_interface	*intf;
+	struct usb_device	*parent = udev->parent;
+
+	cancel_delayed_work(&udev->autosuspend);
+	if (udev->state == USB_STATE_NOTATTACHED)
+		return 0;
+	if (udev->state == USB_STATE_SUSPENDED)
+		return 0;
 
+	udev->do_remote_wakeup = device_may_wakeup(&udev->dev);
+
+	/* For autosuspend, fail fast if anything is in use.
+	 * Also fail if any interfaces require remote wakeup but it
+	 * isn't available. */
+	if (udev->auto_pm) {
+		if (udev->pm_usage_cnt > 0)
+			return -EBUSY;
+		if (udev->actconfig) {
+			for (; i < udev->actconfig->desc.bNumInterfaces; i++) {
+				intf = udev->actconfig->interface[i];
+				if (!is_active(intf))
+					continue;
+				if (intf->pm_usage_cnt > 0)
+					return -EBUSY;
+				if (intf->needs_remote_wakeup &&
+						!udev->do_remote_wakeup) {
+					dev_dbg(&udev->dev,
+	"remote wakeup needed for autosuspend\n");
+					return -EOPNOTSUPP;
+				}
+			}
+			i = 0;
+		}
+	}
+
+	/* Suspend all the interfaces and then udev itself */
 	if (udev->actconfig) {
 		for (; i < udev->actconfig->desc.bNumInterfaces; i++) {
 			intf = udev->actconfig->interface[i];
@@ -896,40 +1008,282 @@ int usb_suspend_both(struct usb_device *udev, pm_message_t msg)
 			intf = udev->actconfig->interface[i];
 			resume_interface(intf);
 		}
-	}
+
+	/* If the suspend succeeded, propagate it up the tree */
+	} else if (parent)
+		usb_autosuspend_device(parent, 0);
+
+	// dev_dbg(&udev->dev, "%s: status %d\n", __FUNCTION__, status);
 	return status;
 }
 
-/* Caller has locked udev */
+/**
+ * usb_resume_both - resume a USB device and its interfaces
+ * @udev: the usb_device to resume
+ *
+ * This is the central routine for resuming USB devices.  It calls the
+ * the resume method for @udev and then calls the resume methods for all
+ * the interface drivers in @udev.
+ *
+ * Before starting the resume, the routine calls itself recursively for
+ * the parent device of @udev, thereby propagating the change up the device
+ * tree and assuring that @udev will be able to resume.  If the parent is
+ * unable to resume successfully, the routine fails.
+ *
+ * The resume method calls are subject to mutual exclusion under control
+ * of @udev's pm_mutex.  Many of these calls are also under the protection
+ * of @udev's device lock (including all requests originating outside the
+ * USB subsystem), but autoresume requests generated by a child device or
+ * interface driver may not be.  Usbcore will insure that the method calls
+ * do not arrive during bind, unbind, or reset operations.  However, drivers
+ * must be prepared to handle resume calls arriving at unpredictable times.
+ * The only way to block such calls is to do an autoresume (preventing
+ * other autoresumes) while holding @udev's device lock (preventing outside
+ * resumes).
+ *
+ * The caller must hold @udev->pm_mutex.
+ *
+ * This routine can run only in process context.
+ */
 int usb_resume_both(struct usb_device *udev)
 {
-	int			status;
+	int			status = 0;
 	int			i;
 	struct usb_interface	*intf;
+	struct usb_device	*parent = udev->parent;
+
+	cancel_delayed_work(&udev->autosuspend);
+	if (udev->state == USB_STATE_NOTATTACHED)
+		return -ENODEV;
 
-	/* Can't resume if the parent is suspended */
-	if (udev->parent && udev->parent->state == USB_STATE_SUSPENDED) {
-		dev_warn(&udev->dev, "can't resume; parent is suspended\n");
-		return -EHOSTUNREACH;
+	/* Propagate the resume up the tree, if necessary */
+	if (udev->state == USB_STATE_SUSPENDED) {
+		if (parent) {
+			mutex_lock_nested(&parent->pm_mutex, parent->level);
+			parent->auto_pm = 1;
+			status = usb_resume_both(parent);
+		} else {
+
+			/* We can't progagate beyond the USB subsystem,
+			 * so if a root hub's controller is suspended
+			 * then we're stuck. */
+			if (udev->dev.parent->power.power_state.event !=
+					PM_EVENT_ON)
+				status = -EHOSTUNREACH;
+		}
+		if (status == 0 && udev->state == USB_STATE_SUSPENDED)
+			status = resume_device(udev);
+		if (parent)
+			mutex_unlock(&parent->pm_mutex);
 	}
 
-	status = resume_device(udev);
+	/* Now the parent won't suspend until we are finished */
+
 	if (status == 0 && udev->actconfig) {
 		for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) {
 			intf = udev->actconfig->interface[i];
 			resume_interface(intf);
 		}
 	}
+
+	// dev_dbg(&udev->dev, "%s: status %d\n", __FUNCTION__, status);
+	return status;
+}
+
+#ifdef CONFIG_USB_SUSPEND
+
+/**
+ * usb_autosuspend_device - delayed autosuspend of a USB device and its interfaces
+ * @udev - the usb_device to autosuspend
+ * @dec_usage_cnt - flag to decrement @udev's PM-usage counter
+ *
+ * This routine should be called when a core subsystem is finished using
+ * @udev and wants to allow it to autosuspend.  Examples would be when
+ * @udev's device file in usbfs is closed or after a configuration change.
+ *
+ * @dec_usage_cnt should be 1 if the subsystem previously incremented
+ * @udev's usage counter (such as by passing 1 to usb_autoresume_device);
+ * otherwise it should be 0.
+ *
+ * If the usage counter for @udev or any of its active interfaces is greater
+ * than 0, the autosuspend request will not be queued.  (If an interface
+ * driver does not support autosuspend then its usage counter is permanently
+ * positive.)  Likewise, if an interface driver requires remote-wakeup
+ * capability during autosuspend but remote wakeup is disabled, the
+ * autosuspend will fail.
+ *
+ * Often the caller will hold @udev's device lock, but this is not
+ * necessary.
+ *
+ * This routine can run only in process context.
+ */
+void usb_autosuspend_device(struct usb_device *udev, int dec_usage_cnt)
+{
+	mutex_lock_nested(&udev->pm_mutex, udev->level);
+	udev->pm_usage_cnt -= dec_usage_cnt;
+	if (udev->pm_usage_cnt <= 0)
+		schedule_delayed_work(&udev->autosuspend,
+				USB_AUTOSUSPEND_DELAY);
+	mutex_unlock(&udev->pm_mutex);
+	// dev_dbg(&udev->dev, "%s: cnt %d\n",
+	//		__FUNCTION__, udev->pm_usage_cnt);
+}
+
+/**
+ * usb_autoresume_device - immediately autoresume a USB device and its interfaces
+ * @udev - the usb_device to autoresume
+ * @inc_usage_cnt - flag to increment @udev's PM-usage counter
+ *
+ * This routine should be called when a core subsystem wants to use @udev
+ * and needs to guarantee that it is not suspended.  In addition, the
+ * caller can prevent @udev from being autosuspended subsequently.  (Note
+ * that this will not prevent suspend events originating in the PM core.)
+ * Examples would be when @udev's device file in usbfs is opened (autosuspend
+ * should be prevented until the file is closed) or when a remote-wakeup
+ * request is received (later autosuspends should not be prevented).
+ *
+ * @inc_usage_cnt should be 1 to increment @udev's usage counter and prevent
+ * autosuspends.  This prevention will persist until the usage counter is
+ * decremented again (such as by passing 1 to usb_autosuspend_device).
+ * Otherwise @inc_usage_cnt should be 0 to leave the usage counter unchanged.
+ * Regardless, if the autoresume fails then the usage counter is not
+ * incremented.
+ *
+ * Often the caller will hold @udev's device lock, but this is not
+ * necessary (and attempting it might cause deadlock).
+ *
+ * This routine can run only in process context.
+ */
+int usb_autoresume_device(struct usb_device *udev, int inc_usage_cnt)
+{
+	int	status;
+
+	mutex_lock_nested(&udev->pm_mutex, udev->level);
+	udev->pm_usage_cnt += inc_usage_cnt;
+	udev->auto_pm = 1;
+	status = usb_resume_both(udev);
+	if (status != 0)
+		udev->pm_usage_cnt -= inc_usage_cnt;
+	mutex_unlock(&udev->pm_mutex);
+	// dev_dbg(&udev->dev, "%s: status %d cnt %d\n",
+	//		__FUNCTION__, status, udev->pm_usage_cnt);
+	return status;
+}
+
+/**
+ * usb_autopm_put_interface - decrement a USB interface's PM-usage counter
+ * @intf - the usb_interface whose counter should be decremented
+ *
+ * This routine should be called by an interface driver when it is
+ * finished using @intf and wants to allow it to autosuspend.  A typical
+ * example would be a character-device driver when its device file is
+ * closed.
+ *
+ * The routine decrements @intf's usage counter.  When the counter reaches
+ * 0, a delayed autosuspend request for @intf's device is queued.  When
+ * the delay expires, if @intf->pm_usage_cnt is still <= 0 along with all
+ * the other usage counters for the sibling interfaces and @intf's
+ * usb_device, the device and all its interfaces will be autosuspended.
+ *
+ * Note that @intf->pm_usage_cnt is owned by the interface driver.  The
+ * core will not change its value other than the increment and decrement
+ * in usb_autopm_get_interface and usb_autopm_put_interface.  The driver
+ * may use this simple counter-oriented discipline or may set the value
+ * any way it likes.
+ *
+ * If the driver has set @intf->needs_remote_wakeup then autosuspend will
+ * take place only if the device's remote-wakeup facility is enabled.
+ *
+ * Suspend method calls queued by this routine can arrive at any time
+ * while @intf is resumed and its usage counter is equal to 0.  They are
+ * not protected by the usb_device's lock but only by its pm_mutex.
+ * Drivers must provide their own synchronization.
+ *
+ * This routine can run only in process context.
+ */
+void usb_autopm_put_interface(struct usb_interface *intf)
+{
+	struct usb_device	*udev = interface_to_usbdev(intf);
+
+	mutex_lock_nested(&udev->pm_mutex, udev->level);
+	if (intf->condition != USB_INTERFACE_UNBOUND) {
+		if (--intf->pm_usage_cnt <= 0)
+			schedule_delayed_work(&udev->autosuspend,
+					USB_AUTOSUSPEND_DELAY);
+	}
+	mutex_unlock(&udev->pm_mutex);
+	// dev_dbg(&intf->dev, "%s: cnt %d\n",
+	//		__FUNCTION__, intf->pm_usage_cnt);
+}
+EXPORT_SYMBOL_GPL(usb_autopm_put_interface);
+
+/**
+ * usb_autopm_get_interface - increment a USB interface's PM-usage counter
+ * @intf - the usb_interface whose counter should be incremented
+ *
+ * This routine should be called by an interface driver when it wants to
+ * use @intf and needs to guarantee that it is not suspended.  In addition,
+ * the routine prevents @intf from being autosuspended subsequently.  (Note
+ * that this will not prevent suspend events originating in the PM core.)
+ * This prevention will persist until usb_autopm_put_interface() is called
+ * or @intf is unbound.  A typical example would be a character-device
+ * driver when its device file is opened.
+ *
+ * The routine increments @intf's usage counter.  So long as the counter
+ * is greater than 0, autosuspend will not be allowed for @intf or its
+ * usb_device.  When the driver is finished using @intf it should call
+ * usb_autopm_put_interface() to decrement the usage counter and queue
+ * a delayed autosuspend request (if the counter is <= 0).
+ *
+ * Note that @intf->pm_usage_cnt is owned by the interface driver.  The
+ * core will not change its value other than the increment and decrement
+ * in usb_autopm_get_interface and usb_autopm_put_interface.  The driver
+ * may use this simple counter-oriented discipline or may set the value
+ * any way it likes.
+ *
+ * Resume method calls generated by this routine can arrive at any time
+ * while @intf is suspended.  They are not protected by the usb_device's
+ * lock but only by its pm_mutex.  Drivers must provide their own
+ * synchronization.
+ *
+ * This routine can run only in process context.
+ */
+int usb_autopm_get_interface(struct usb_interface *intf)
+{
+	struct usb_device	*udev = interface_to_usbdev(intf);
+	int			status;
+
+	mutex_lock_nested(&udev->pm_mutex, udev->level);
+	if (intf->condition == USB_INTERFACE_UNBOUND)
+		status = -ENODEV;
+	else {
+		++intf->pm_usage_cnt;
+		udev->auto_pm = 1;
+		status = usb_resume_both(udev);
+		if (status != 0)
+			--intf->pm_usage_cnt;
+	}
+	mutex_unlock(&udev->pm_mutex);
+	// dev_dbg(&intf->dev, "%s: status %d cnt %d\n",
+	//		__FUNCTION__, status, intf->pm_usage_cnt);
 	return status;
 }
+EXPORT_SYMBOL_GPL(usb_autopm_get_interface);
+
+#endif /* CONFIG_USB_SUSPEND */
 
 static int usb_suspend(struct device *dev, pm_message_t message)
 {
 	int	status;
 
-	if (is_usb_device(dev))
-		status = usb_suspend_both(to_usb_device(dev), message);
-	else
+	if (is_usb_device(dev)) {
+		struct usb_device *udev = to_usb_device(dev);
+
+		mutex_lock_nested(&udev->pm_mutex, udev->level);
+		udev->auto_pm = 0;
+		status = usb_suspend_both(udev, message);
+		mutex_unlock(&udev->pm_mutex);
+	} else
 		status = 0;
 	return status;
 }
@@ -939,7 +1293,12 @@ static int usb_resume(struct device *dev)
 	int	status;
 
 	if (is_usb_device(dev)) {
-		status = usb_resume_both(to_usb_device(dev));
+		struct usb_device *udev = to_usb_device(dev);
+
+		mutex_lock_nested(&udev->pm_mutex, udev->level);
+		udev->auto_pm = 0;
+		status = usb_resume_both(udev);
+		mutex_unlock(&udev->pm_mutex);
 
 		/* Rebind drivers that had no suspend method? */
 	} else
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 78e910b2046c..dee812bc6c43 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1017,19 +1017,22 @@ void usb_set_device_state(struct usb_device *udev,
 	if (udev->state == USB_STATE_NOTATTACHED)
 		;	/* do nothing */
 	else if (new_state != USB_STATE_NOTATTACHED) {
-		udev->state = new_state;
 
 		/* root hub wakeup capabilities are managed out-of-band
 		 * and may involve silicon errata ... ignore them here.
 		 */
 		if (udev->parent) {
-			if (new_state == USB_STATE_CONFIGURED)
+			if (udev->state == USB_STATE_SUSPENDED
+					|| new_state == USB_STATE_SUSPENDED)
+				;	/* No change to wakeup settings */
+			else if (new_state == USB_STATE_CONFIGURED)
 				device_init_wakeup(&udev->dev,
 					(udev->actconfig->desc.bmAttributes
 					 & USB_CONFIG_ATT_WAKEUP));
-			else if (new_state != USB_STATE_SUSPENDED)
+			else
 				device_init_wakeup(&udev->dev, 0);
 		}
+		udev->state = new_state;
 	} else
 		recursively_mark_NOTATTACHED(udev);
 	spin_unlock_irqrestore(&device_state_lock, flags);
@@ -1507,7 +1510,7 @@ static int hub_port_suspend(struct usb_hub *hub, int port1,
 	 * NOTE:  OTG devices may issue remote wakeup (or SRP) even when
 	 * we don't explicitly enable it here.
 	 */
-	if (device_may_wakeup(&udev->dev)) {
+	if (udev->do_remote_wakeup) {
 		status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
 				USB_REQ_SET_FEATURE, USB_RECIP_DEVICE,
 				USB_DEVICE_REMOTE_WAKEUP, 0,
@@ -1533,7 +1536,8 @@ static int hub_port_suspend(struct usb_hub *hub, int port1,
 				USB_CTRL_SET_TIMEOUT);
 	} else {
 		/* device has up to 10 msec to fully suspend */
-		dev_dbg(&udev->dev, "usb suspend\n");
+		dev_dbg(&udev->dev, "usb %ssuspend\n",
+				udev->auto_pm ? "auto-" : "");
 		usb_set_device_state(udev, USB_STATE_SUSPENDED);
 		msleep(10);
 	}
@@ -1573,7 +1577,8 @@ static int __usb_port_suspend (struct usb_device *udev, int port1)
 		status = hub_port_suspend(hdev_to_hub(udev->parent), port1,
 				udev);
 	else {
-		dev_dbg(&udev->dev, "usb suspend\n");
+		dev_dbg(&udev->dev, "usb %ssuspend\n",
+				udev->auto_pm ? "auto-" : "");
 		usb_set_device_state(udev, USB_STATE_SUSPENDED);
 	}
 	return status;
@@ -1687,7 +1692,8 @@ hub_port_resume(struct usb_hub *hub, int port1, struct usb_device *udev)
 
 		/* drive resume for at least 20 msec */
 		if (udev)
-			dev_dbg(&udev->dev, "RESUME\n");
+			dev_dbg(&udev->dev, "usb %sresume\n",
+					udev->auto_pm ? "auto-" : "");
 		msleep(25);
 
 #define LIVE_FLAGS	( USB_PORT_STAT_POWER \
@@ -1754,8 +1760,11 @@ int usb_port_resume(struct usb_device *udev)
 		// NOTE this fails if parent is also suspended...
 		status = hub_port_resume(hdev_to_hub(udev->parent),
 				udev->portnum, udev);
-	} else
+	} else {
+		dev_dbg(&udev->dev, "usb %sresume\n",
+				udev->auto_pm ? "auto-" : "");
 		status = finish_port_resume(udev);
+	}
 	if (status < 0)
 		dev_dbg(&udev->dev, "can't resume, status %d\n", status);
 	return status;
@@ -1765,19 +1774,23 @@ static int remote_wakeup(struct usb_device *udev)
 {
 	int	status = 0;
 
-	/* don't repeat RESUME sequence if this device
-	 * was already woken up by some other task
-	 */
+	/* All this just to avoid sending a port-resume message
+	 * to the parent hub! */
+
 	usb_lock_device(udev);
+	mutex_lock_nested(&udev->pm_mutex, udev->level);
 	if (udev->state == USB_STATE_SUSPENDED) {
-		dev_dbg(&udev->dev, "RESUME (wakeup)\n");
+		dev_dbg(&udev->dev, "usb %sresume\n", "wakeup-");
 		/* TRSMRCY = 10 msec */
 		msleep(10);
 		status = finish_port_resume(udev);
+		if (status == 0)
+			udev->dev.power.power_state.event = PM_EVENT_ON;
 	}
+	mutex_unlock(&udev->pm_mutex);
 
 	if (status == 0)
-		usb_resume_both(udev);
+		usb_autoresume_device(udev, 0);
 	usb_unlock_device(udev);
 	return status;
 }
@@ -1834,7 +1847,9 @@ static int hub_suspend(struct usb_interface *intf, pm_message_t msg)
 					== PM_EVENT_ON
 #endif
 				) {
-			dev_dbg(&intf->dev, "port %d nyet suspended\n", port1);
+			if (!hdev->auto_pm)
+				dev_dbg(&intf->dev, "port %d nyet suspended\n",
+						port1);
 			return -EBUSY;
 		}
 	}
@@ -2587,7 +2602,7 @@ static void hub_events(void)
 		 * stub "device" node was never suspended.
 		 */
 		if (i)
-			usb_resume_both(hdev);
+			usb_autoresume_device(hdev, 0);
 
 		/* If this is an inactive or suspended hub, do nothing */
 		if (hub->quiescing)
@@ -2993,6 +3008,9 @@ int usb_reset_composite_device(struct usb_device *udev,
 		return -EINVAL;
 	}
 
+	/* Prevent autosuspend during the reset */
+	usb_autoresume_device(udev, 1);
+
 	if (iface && iface->condition != USB_INTERFACE_BINDING)
 		iface = NULL;
 
@@ -3034,6 +3052,7 @@ int usb_reset_composite_device(struct usb_device *udev,
 		}
 	}
 
+	usb_autosuspend_device(udev, 1);
 	return ret;
 }
 EXPORT_SYMBOL(usb_reset_composite_device);
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index b0c0a993338f..6b029cdb8671 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -168,6 +168,10 @@ static void usb_release_dev(struct device *dev)
 
 	udev = to_usb_device(dev);
 
+#ifdef	CONFIG_PM
+	cancel_delayed_work(&udev->autosuspend);
+	flush_scheduled_work();
+#endif
 	usb_destroy_configuration(udev);
 	usb_put_hcd(bus_to_hcd(udev->bus));
 	kfree(udev->product);
@@ -176,6 +180,21 @@ static void usb_release_dev(struct device *dev)
 	kfree(udev);
 }
 
+#ifdef	CONFIG_PM
+
+/* usb_autosuspend_work - callback routine to autosuspend a USB device */
+static void usb_autosuspend_work(void *_udev)
+{
+	struct usb_device	*udev = _udev;
+
+	mutex_lock_nested(&udev->pm_mutex, udev->level);
+	udev->auto_pm = 1;
+	usb_suspend_both(udev, PMSG_SUSPEND);
+	mutex_unlock(&udev->pm_mutex);
+}
+
+#endif
+
 /**
  * usb_alloc_dev - usb device constructor (usbcore-internal)
  * @parent: hub to which device is connected; null to allocate a root hub
@@ -251,6 +270,10 @@ usb_alloc_dev(struct usb_device *parent, struct usb_bus *bus, unsigned port1)
 	dev->parent = parent;
 	INIT_LIST_HEAD(&dev->filelist);
 
+#ifdef	CONFIG_PM
+	mutex_init(&dev->pm_mutex);
+	INIT_WORK(&dev->autosuspend, usb_autosuspend_work, dev);
+#endif
 	return dev;
 }
 
diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index 5162cb370215..10688ad73c6d 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -49,6 +49,20 @@ static inline int usb_resume_both(struct usb_device *udev)
 
 #endif
 
+#ifdef CONFIG_USB_SUSPEND
+
+#define USB_AUTOSUSPEND_DELAY	(HZ*2)
+
+extern void usb_autosuspend_device(struct usb_device *udev, int dec_busy_cnt);
+extern int usb_autoresume_device(struct usb_device *udev, int inc_busy_cnt);
+
+#else
+
+#define usb_autosuspend_device(udev, dec_busy_cnt)	do {} while (0)
+#define usb_autoresume_device(udev, inc_busy_cnt)	0
+
+#endif
+
 extern struct bus_type usb_bus_type;
 extern struct usb_device_driver usb_generic_driver;
 
diff --git a/include/linux/usb.h b/include/linux/usb.h
index df5c93eb3ce9..0da15b0b02be 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -19,6 +19,7 @@
 #include <linux/fs.h>		/* for struct file_operations */
 #include <linux/completion.h>	/* for struct completion */
 #include <linux/sched.h>	/* for current && schedule_timeout */
+#include <linux/mutex.h>	/* for struct mutex */
 
 struct usb_device;
 struct usb_driver;
@@ -103,8 +104,12 @@ enum usb_interface_condition {
  * @condition: binding state of the interface: not bound, binding
  *	(in probe()), bound to a driver, or unbinding (in disconnect())
  * @is_active: flag set when the interface is bound and not suspended.
+ * @needs_remote_wakeup: flag set when the driver requires remote-wakeup
+ *	capability during autosuspend.
  * @dev: driver model's view of this device
  * @class_dev: driver model's class view of this device.
+ * @pm_usage_cnt: PM usage counter for this interface; autosuspend is not
+ *	allowed unless the counter is 0.
  *
  * USB device drivers attach to interfaces on a physical device.  Each
  * interface encapsulates a single high level function, such as feeding
@@ -144,9 +149,11 @@ struct usb_interface {
 					 * bound to */
 	enum usb_interface_condition condition;		/* state of binding */
 	unsigned is_active:1;		/* the interface is not suspended */
+	unsigned needs_remote_wakeup:1;	/* driver requires remote wakeup */
 
 	struct device dev;		/* interface specific device info */
 	struct class_device *class_dev;
+	int pm_usage_cnt;		/* usage counter for autosuspend */
 };
 #define	to_usb_interface(d) container_of(d, struct usb_interface, dev)
 #define	interface_to_usbdev(intf) \
@@ -372,6 +379,15 @@ struct usb_device {
 
 	int maxchild;			/* Number of ports if hub */
 	struct usb_device *children[USB_MAXCHILDREN];
+
+#ifdef CONFIG_PM
+	struct work_struct autosuspend;	/* for delayed autosuspends */
+	struct mutex pm_mutex;		/* protects PM operations */
+	int pm_usage_cnt;		/* usage counter for autosuspend */
+
+	unsigned auto_pm:1;		/* autosuspend/resume in progress */
+	unsigned do_remote_wakeup:1;	/* remote wakeup should be enabled */
+#endif
 };
 #define	to_usb_device(d) container_of(d, struct usb_device, dev)
 
@@ -392,6 +408,17 @@ extern int usb_reset_composite_device(struct usb_device *dev,
 
 extern struct usb_device *usb_find_device(u16 vendor_id, u16 product_id);
 
+/* USB autosuspend and autoresume */
+#ifdef CONFIG_USB_SUSPEND
+extern int usb_autopm_get_interface(struct usb_interface *intf);
+extern void usb_autopm_put_interface(struct usb_interface *intf);
+
+#else
+#define usb_autopm_get_interface(intf)		0
+#define usb_autopm_put_interface(intf)		do {} while (0)
+#endif
+
+
 /*-------------------------------------------------------------------------*/
 
 /* for drivers using iso endpoints */
@@ -593,6 +620,8 @@ struct usbdrv_wrap {
  * @drvwrap: Driver-model core structure wrapper.
  * @no_dynamic_id: if set to 1, the USB core will not allow dynamic ids to be
  *	added to this driver by preventing the sysfs file from being created.
+ * @supports_autosuspend: if set to 0, the USB core will not allow autosuspend
+ *	for interfaces bound to this driver.
  *
  * USB interface drivers must provide a name, probe() and disconnect()
  * methods, and an id_table.  Other driver fields are optional.
@@ -631,6 +660,7 @@ struct usb_driver {
 	struct usb_dynids dynids;
 	struct usbdrv_wrap drvwrap;
 	unsigned int no_dynamic_id:1;
+	unsigned int supports_autosuspend:1;
 };
 #define	to_usb_driver(d) container_of(d, struct usb_driver, drvwrap.driver)
 
@@ -648,6 +678,8 @@ struct usb_driver {
  * @suspend: Called when the device is going to be suspended by the system.
  * @resume: Called when the device is being resumed by the system.
  * @drvwrap: Driver-model core structure wrapper.
+ * @supports_autosuspend: if set to 0, the USB core will not allow autosuspend
+ *	for devices bound to this driver.
  *
  * USB drivers must provide all the fields listed above except drvwrap.
  */
@@ -660,6 +692,7 @@ struct usb_device_driver {
 	int (*suspend) (struct usb_device *udev, pm_message_t message);
 	int (*resume) (struct usb_device *udev);
 	struct usbdrv_wrap drvwrap;
+	unsigned int supports_autosuspend:1;
 };
 #define	to_usb_device_driver(d) container_of(d, struct usb_device_driver, \
 		drvwrap.driver)
-- 
cgit v1.2.3