From 5e6557722e69840506eb8bc5a1edcdb4e447a917 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Wed, 6 Jul 2005 15:44:41 -0400
Subject: [PATCH] openfirmware: generate device table for userspace

This converts the usage of struct of_match to struct of_device_id,
similar to pci_device_id.  This allows a device table to be generated,
which can be parsed by depmod(8) to generate a map file for module
loading.

In order for hotplug to work with macio devices, patches to
module-init-tools and hotplug must be applied.  Those patches are
available at:

 ftp://ftp.suse.com/pub/people/jeffm/linux/macio-hotplug/

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mod_devicetable.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 9b6d05172ed4..dce53ac1625d 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -174,6 +174,17 @@ struct serio_device_id {
 	__u8 proto;
 };
 
+/*
+ * Struct used for matching a device
+ */
+struct of_device_id
+{
+	char	name[32];
+	char	type[32];
+	char	compatible[128];
+	void	*data;
+};
+
 
 /* PCMCIA */
 
-- 
cgit v1.2.3


From 40725181b74be6b0e3bdc8c05bd1e0b9873ec5cc Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 6 Jul 2005 13:51:52 -0700
Subject: [CRYPTO] Add support for low-level multi-block operations

This patch adds hooks for cipher algorithms to implement multi-block
ECB/CBC operations directly.  This is expected to provide significant
performance boots to the VIA Padlock.

It could also be used for improving software implementations such as
AES where operating on multiple blocks at a time may enable certain
optimisations.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/cipher.c        | 38 ++++++++++++++++++--------------------
 crypto/internal.h      |  5 -----
 include/linux/crypto.h | 28 +++++++++++++++++++++++++++-
 3 files changed, 45 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/cipher.c b/crypto/cipher.c
index c4243345b154..54c4a560070d 100644
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -23,14 +23,6 @@
 #include "internal.h"
 #include "scatterwalk.h"
 
-struct cipher_desc {
-	struct crypto_tfm *tfm;
-	void (*crfn)(void *ctx, u8 *dst, const u8 *src);
-	unsigned int (*prfn)(const struct cipher_desc *desc, u8 *dst,
-			     const u8 *src, unsigned int nbytes);
-	void *info;
-};
-
 static inline void xor_64(u8 *a, const u8 *b)
 {
 	((u32 *)a)[0] ^= ((u32 *)b)[0];
@@ -224,10 +216,11 @@ static int ecb_encrypt(struct crypto_tfm *tfm,
                        struct scatterlist *src, unsigned int nbytes)
 {
 	struct cipher_desc desc;
+	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
 
 	desc.tfm = tfm;
-	desc.crfn = tfm->__crt_alg->cra_cipher.cia_encrypt;
-	desc.prfn = ecb_process;
+	desc.crfn = cipher->cia_encrypt;
+	desc.prfn = cipher->cia_encrypt_ecb ?: ecb_process;
 
 	return crypt(&desc, dst, src, nbytes);
 }
@@ -238,10 +231,11 @@ static int ecb_decrypt(struct crypto_tfm *tfm,
 		       unsigned int nbytes)
 {
 	struct cipher_desc desc;
+	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
 
 	desc.tfm = tfm;
-	desc.crfn = tfm->__crt_alg->cra_cipher.cia_decrypt;
-	desc.prfn = ecb_process;
+	desc.crfn = cipher->cia_decrypt;
+	desc.prfn = cipher->cia_decrypt_ecb ?: ecb_process;
 
 	return crypt(&desc, dst, src, nbytes);
 }
@@ -252,10 +246,11 @@ static int cbc_encrypt(struct crypto_tfm *tfm,
 		       unsigned int nbytes)
 {
 	struct cipher_desc desc;
+	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
 
 	desc.tfm = tfm;
-	desc.crfn = tfm->__crt_alg->cra_cipher.cia_encrypt;
-	desc.prfn = cbc_process_encrypt;
+	desc.crfn = cipher->cia_encrypt;
+	desc.prfn = cipher->cia_encrypt_cbc ?: cbc_process_encrypt;
 	desc.info = tfm->crt_cipher.cit_iv;
 
 	return crypt(&desc, dst, src, nbytes);
@@ -267,10 +262,11 @@ static int cbc_encrypt_iv(struct crypto_tfm *tfm,
                           unsigned int nbytes, u8 *iv)
 {
 	struct cipher_desc desc;
+	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
 
 	desc.tfm = tfm;
-	desc.crfn = tfm->__crt_alg->cra_cipher.cia_encrypt;
-	desc.prfn = cbc_process_encrypt;
+	desc.crfn = cipher->cia_encrypt;
+	desc.prfn = cipher->cia_encrypt_cbc ?: cbc_process_encrypt;
 	desc.info = iv;
 
 	return crypt(&desc, dst, src, nbytes);
@@ -282,10 +278,11 @@ static int cbc_decrypt(struct crypto_tfm *tfm,
 		       unsigned int nbytes)
 {
 	struct cipher_desc desc;
+	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
 
 	desc.tfm = tfm;
-	desc.crfn = tfm->__crt_alg->cra_cipher.cia_decrypt;
-	desc.prfn = cbc_process_decrypt;
+	desc.crfn = cipher->cia_decrypt;
+	desc.prfn = cipher->cia_decrypt_cbc ?: cbc_process_decrypt;
 	desc.info = tfm->crt_cipher.cit_iv;
 
 	return crypt(&desc, dst, src, nbytes);
@@ -297,10 +294,11 @@ static int cbc_decrypt_iv(struct crypto_tfm *tfm,
                           unsigned int nbytes, u8 *iv)
 {
 	struct cipher_desc desc;
+	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
 
 	desc.tfm = tfm;
-	desc.crfn = tfm->__crt_alg->cra_cipher.cia_decrypt;
-	desc.prfn = cbc_process_decrypt;
+	desc.crfn = cipher->cia_decrypt;
+	desc.prfn = cipher->cia_decrypt_cbc ?: cbc_process_decrypt;
 	desc.info = iv;
 
 	return crypt(&desc, dst, src, nbytes);
diff --git a/crypto/internal.h b/crypto/internal.h
index 964b9a60ca24..5ed383f7dce6 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -42,11 +42,6 @@ static inline void crypto_yield(struct crypto_tfm *tfm)
 		cond_resched();
 }
 
-static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm)
-{
-	return (void *)&tfm[1];
-}
-
 struct crypto_alg *crypto_alg_lookup(const char *name);
 
 /* A far more intelligent version of this is planned.  For now, just
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 387da6a3e58c..26ce01c25745 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -61,6 +61,15 @@
 #define CRYPTO_DIR_DECRYPT		0
 
 struct scatterlist;
+struct crypto_tfm;
+
+struct cipher_desc {
+	struct crypto_tfm *tfm;
+	void (*crfn)(void *ctx, u8 *dst, const u8 *src);
+	unsigned int (*prfn)(const struct cipher_desc *desc, u8 *dst,
+			     const u8 *src, unsigned int nbytes);
+	void *info;
+};
 
 /*
  * Algorithms: modular crypto algorithm implementations, managed
@@ -73,6 +82,19 @@ struct cipher_alg {
 	                  unsigned int keylen, u32 *flags);
 	void (*cia_encrypt)(void *ctx, u8 *dst, const u8 *src);
 	void (*cia_decrypt)(void *ctx, u8 *dst, const u8 *src);
+
+	unsigned int (*cia_encrypt_ecb)(const struct cipher_desc *desc,
+					u8 *dst, const u8 *src,
+					unsigned int nbytes);
+	unsigned int (*cia_decrypt_ecb)(const struct cipher_desc *desc,
+					u8 *dst, const u8 *src,
+					unsigned int nbytes);
+	unsigned int (*cia_encrypt_cbc)(const struct cipher_desc *desc,
+					u8 *dst, const u8 *src,
+					unsigned int nbytes);
+	unsigned int (*cia_decrypt_cbc)(const struct cipher_desc *desc,
+					u8 *dst, const u8 *src,
+					unsigned int nbytes);
 };
 
 struct digest_alg {
@@ -136,7 +158,6 @@ static inline int crypto_alg_available(const char *name, u32 flags)
  * and core processing logic.  Managed via crypto_alloc_tfm() and
  * crypto_free_tfm(), as well as the various helpers below.
  */
-struct crypto_tfm;
 
 struct cipher_tfm {
 	void *cit_iv;
@@ -266,6 +287,11 @@ static inline unsigned int crypto_tfm_alg_digestsize(struct crypto_tfm *tfm)
 	return tfm->__crt_alg->cra_digest.dia_digestsize;
 }
 
+static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm)
+{
+	return (void *)&tfm[1];
+}
+
 /*
  * API wrappers.
  */
-- 
cgit v1.2.3


From 95477377995aefa2ec1654a9a3777bd57ea99146 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 6 Jul 2005 13:52:09 -0700
Subject: [CRYPTO] Add alignmask for low-level cipher implementations

The VIA Padlock device requires the input and output buffers to
be aligned on 16-byte boundaries.  This patch adds the alignmask
attribute for low-level cipher implementations to indicate their
alignment requirements.

The mid-level crypt() function will copy the input/output buffers
if they are not aligned correctly before they are passed to the
low-level implementation.

Strictly speaking, some of the software implementations require
the buffers to be aligned on 4-byte boundaries as they do 32-bit
loads.  However, it is not clear whether it is better to copy
the buffers or pay the penalty for unaligned loads/stores.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/api.c           |  6 ++++++
 crypto/cipher.c        | 43 ++++++++++++++++++++++++++++++++++++-------
 crypto/scatterwalk.h   |  6 ++++++
 include/linux/crypto.h |  1 +
 4 files changed, 49 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/api.c b/crypto/api.c
index 394169a8577d..f55856b21992 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -168,6 +168,12 @@ int crypto_register_alg(struct crypto_alg *alg)
 {
 	int ret = 0;
 	struct crypto_alg *q;
+
+	if (alg->cra_alignmask & (alg->cra_alignmask + 1))
+		return -EINVAL;
+
+	if (alg->cra_alignmask > PAGE_SIZE)
+		return -EINVAL;
 	
 	down_write(&crypto_alg_sem);
 	
diff --git a/crypto/cipher.c b/crypto/cipher.c
index 54c4a560070d..85eb12f8e564 100644
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -41,8 +41,10 @@ static unsigned int crypt_slow(const struct cipher_desc *desc,
 			       struct scatter_walk *in,
 			       struct scatter_walk *out, unsigned int bsize)
 {
-	u8 src[bsize];
-	u8 dst[bsize];
+	unsigned int alignmask = desc->tfm->__crt_alg->cra_alignmask;
+	u8 buffer[bsize * 2 + alignmask];
+	u8 *src = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
+	u8 *dst = src + bsize;
 	unsigned int n;
 
 	n = scatterwalk_copychunks(src, in, bsize, 0);
@@ -59,15 +61,24 @@ static unsigned int crypt_slow(const struct cipher_desc *desc,
 static inline unsigned int crypt_fast(const struct cipher_desc *desc,
 				      struct scatter_walk *in,
 				      struct scatter_walk *out,
-				      unsigned int nbytes)
+				      unsigned int nbytes, u8 *tmp)
 {
 	u8 *src, *dst;
 
 	src = in->data;
 	dst = scatterwalk_samebuf(in, out) ? src : out->data;
 
+	if (tmp) {
+		memcpy(tmp, in->data, nbytes);
+		src = tmp;
+		dst = tmp;
+	}
+
 	nbytes = desc->prfn(desc, dst, src, nbytes);
 
+	if (tmp)
+		memcpy(out->data, tmp, nbytes);
+
 	scatterwalk_advance(in, nbytes);
 	scatterwalk_advance(out, nbytes);
 
@@ -87,6 +98,8 @@ static int crypt(const struct cipher_desc *desc,
 	struct scatter_walk walk_in, walk_out;
 	struct crypto_tfm *tfm = desc->tfm;
 	const unsigned int bsize = crypto_tfm_alg_blocksize(tfm);
+	unsigned int alignmask = tfm->__crt_alg->cra_alignmask;
+	unsigned long buffer = 0;
 
 	if (!nbytes)
 		return 0;
@@ -100,16 +113,27 @@ static int crypt(const struct cipher_desc *desc,
 	scatterwalk_start(&walk_out, dst);
 
 	for(;;) {
-		unsigned int n;
+		unsigned int n = nbytes;
+		u8 *tmp = NULL;
+
+		if (!scatterwalk_aligned(&walk_in, alignmask) ||
+		    !scatterwalk_aligned(&walk_out, alignmask)) {
+			if (!buffer) {
+				buffer = __get_free_page(GFP_ATOMIC);
+				if (!buffer)
+					n = 0;
+			}
+			tmp = (u8 *)buffer;
+		}
 
 		scatterwalk_map(&walk_in, 0);
 		scatterwalk_map(&walk_out, 1);
 
-		n = scatterwalk_clamp(&walk_in, nbytes);
+		n = scatterwalk_clamp(&walk_in, n);
 		n = scatterwalk_clamp(&walk_out, n);
 
 		if (likely(n >= bsize))
-			n = crypt_fast(desc, &walk_in, &walk_out, n);
+			n = crypt_fast(desc, &walk_in, &walk_out, n, tmp);
 		else
 			n = crypt_slow(desc, &walk_in, &walk_out, bsize);
 
@@ -119,10 +143,15 @@ static int crypt(const struct cipher_desc *desc,
 		scatterwalk_done(&walk_out, 1, nbytes);
 
 		if (!nbytes)
-			return 0;
+			break;
 
 		crypto_yield(tfm);
 	}
+
+	if (buffer)
+		free_page(buffer);
+
+	return 0;
 }
 
 static unsigned int cbc_process_encrypt(const struct cipher_desc *desc,
diff --git a/crypto/scatterwalk.h b/crypto/scatterwalk.h
index 5495bb970816..e79925c474a3 100644
--- a/crypto/scatterwalk.h
+++ b/crypto/scatterwalk.h
@@ -55,6 +55,12 @@ static inline void scatterwalk_advance(struct scatter_walk *walk,
 	walk->len_this_segment -= nbytes;
 }
 
+static inline unsigned int scatterwalk_aligned(struct scatter_walk *walk,
+					       unsigned int alignmask)
+{
+	return !(walk->offset & alignmask);
+}
+
 void scatterwalk_start(struct scatter_walk *walk, struct scatterlist *sg);
 int scatterwalk_copychunks(void *buf, struct scatter_walk *walk, size_t nbytes, int out);
 void scatterwalk_map(struct scatter_walk *walk, int out);
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 26ce01c25745..ac9d49beecd3 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -124,6 +124,7 @@ struct crypto_alg {
 	u32 cra_flags;
 	unsigned int cra_blocksize;
 	unsigned int cra_ctxsize;
+	unsigned int cra_alignmask;
 	const char cra_name[CRYPTO_MAX_ALG_NAME];
 
 	union {
-- 
cgit v1.2.3


From fbdae9f3e7fb57c07cb0d973f113eb25da2e8ff2 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 6 Jul 2005 13:53:29 -0700
Subject: [CRYPTO] Ensure cit_iv is aligned correctly

This patch ensures that cit_iv is aligned according to cra_alignmask
by allocating it as part of the tfm structure.  As a side effect the
crypto layer will also guarantee that the tfm ctx area has enough space
to be aligned by cra_alignmask.  This allows us to remove the extra
space reservation from the Padlock driver.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/api.c                 | 32 +++++++++++++++++++++++++++++---
 crypto/cipher.c              | 15 +++++++++------
 crypto/internal.h            | 28 ++++++++++++++++++++++++++++
 drivers/crypto/padlock-aes.c |  3 +--
 include/linux/crypto.h       |  5 +++++
 5 files changed, 72 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/api.c b/crypto/api.c
index 0b583d24f7fa..2d8d828c0ca2 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -125,20 +125,46 @@ static void crypto_exit_ops(struct crypto_tfm *tfm)
 	}
 }
 
+static unsigned int crypto_ctxsize(struct crypto_alg *alg, int flags)
+{
+	unsigned int len;
+
+	switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) {
+	default:
+		BUG();
+
+	case CRYPTO_ALG_TYPE_CIPHER:
+		len = crypto_cipher_ctxsize(alg, flags);
+		break;
+		
+	case CRYPTO_ALG_TYPE_DIGEST:
+		len = crypto_digest_ctxsize(alg, flags);
+		break;
+		
+	case CRYPTO_ALG_TYPE_COMPRESS:
+		len = crypto_compress_ctxsize(alg, flags);
+		break;
+	}
+
+	return len + alg->cra_alignmask;
+}
+
 struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags)
 {
 	struct crypto_tfm *tfm = NULL;
 	struct crypto_alg *alg;
+	unsigned int tfm_size;
 
 	alg = crypto_alg_mod_lookup(name);
 	if (alg == NULL)
 		goto out;
-	
-	tfm = kmalloc(sizeof(*tfm) + alg->cra_ctxsize, GFP_KERNEL);
+
+	tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, flags);
+	tfm = kmalloc(tfm_size, GFP_KERNEL);
 	if (tfm == NULL)
 		goto out_put;
 
-	memset(tfm, 0, sizeof(*tfm) + alg->cra_ctxsize);
+	memset(tfm, 0, tfm_size);
 	
 	tfm->__crt_alg = alg;
 	
diff --git a/crypto/cipher.c b/crypto/cipher.c
index 85eb12f8e564..d3295ce14a57 100644
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -41,7 +41,7 @@ static unsigned int crypt_slow(const struct cipher_desc *desc,
 			       struct scatter_walk *in,
 			       struct scatter_walk *out, unsigned int bsize)
 {
-	unsigned int alignmask = desc->tfm->__crt_alg->cra_alignmask;
+	unsigned int alignmask = crypto_tfm_alg_alignmask(desc->tfm);
 	u8 buffer[bsize * 2 + alignmask];
 	u8 *src = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
 	u8 *dst = src + bsize;
@@ -98,7 +98,7 @@ static int crypt(const struct cipher_desc *desc,
 	struct scatter_walk walk_in, walk_out;
 	struct crypto_tfm *tfm = desc->tfm;
 	const unsigned int bsize = crypto_tfm_alg_blocksize(tfm);
-	unsigned int alignmask = tfm->__crt_alg->cra_alignmask;
+	unsigned int alignmask = crypto_tfm_alg_alignmask(tfm);
 	unsigned long buffer = 0;
 
 	if (!nbytes)
@@ -399,6 +399,8 @@ int crypto_init_cipher_ops(struct crypto_tfm *tfm)
 	}
 	
 	if (ops->cit_mode == CRYPTO_TFM_MODE_CBC) {
+		unsigned int align;
+		unsigned long addr;
 	    	
 	    	switch (crypto_tfm_alg_blocksize(tfm)) {
 	    	case 8:
@@ -418,9 +420,11 @@ int crypto_init_cipher_ops(struct crypto_tfm *tfm)
 	    	}
 	    	
 		ops->cit_ivsize = crypto_tfm_alg_blocksize(tfm);
-	    	ops->cit_iv = kmalloc(ops->cit_ivsize, GFP_KERNEL);
-		if (ops->cit_iv == NULL)
-			ret = -ENOMEM;
+		align = crypto_tfm_alg_alignmask(tfm) + 1;
+		addr = (unsigned long)crypto_tfm_ctx(tfm);
+		addr = ALIGN(addr, align);
+		addr += ALIGN(tfm->__crt_alg->cra_ctxsize, align);
+		ops->cit_iv = (void *)addr;
 	}
 
 out:	
@@ -429,5 +433,4 @@ out:
 
 void crypto_exit_cipher_ops(struct crypto_tfm *tfm)
 {
-	kfree(tfm->crt_cipher.cit_iv);
 }
diff --git a/crypto/internal.h b/crypto/internal.h
index 83b1b6d6d92b..68612874b5fd 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -16,6 +16,7 @@
 #include <linux/highmem.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
+#include <linux/kernel.h>
 #include <asm/kmap_types.h>
 
 extern enum km_type crypto_km_types[];
@@ -61,6 +62,33 @@ static inline void crypto_init_proc(void)
 { }
 #endif
 
+static inline unsigned int crypto_digest_ctxsize(struct crypto_alg *alg,
+						 int flags)
+{
+	return alg->cra_ctxsize;
+}
+
+static inline unsigned int crypto_cipher_ctxsize(struct crypto_alg *alg,
+						 int flags)
+{
+	unsigned int len = alg->cra_ctxsize;
+	
+	switch (flags & CRYPTO_TFM_MODE_MASK) {
+	case CRYPTO_TFM_MODE_CBC:
+		len = ALIGN(len, alg->cra_alignmask + 1);
+		len += alg->cra_blocksize;
+		break;
+	}
+
+	return len;
+}
+
+static inline unsigned int crypto_compress_ctxsize(struct crypto_alg *alg,
+						   int flags)
+{
+	return alg->cra_ctxsize;
+}
+
 int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags);
 int crypto_init_cipher_flags(struct crypto_tfm *tfm, u32 flags);
 int crypto_init_compress_flags(struct crypto_tfm *tfm, u32 flags);
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index d2745ff4699c..c5b58fae95f2 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -465,8 +465,7 @@ static struct crypto_alg aes_alg = {
 	.cra_name		=	"aes",
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct aes_ctx) +
-					PADLOCK_ALIGNMENT,
+	.cra_ctxsize		=	sizeof(struct aes_ctx),
 	.cra_alignmask		=	PADLOCK_ALIGNMENT - 1,
 	.cra_module		=	THIS_MODULE,
 	.cra_list		=	LIST_HEAD_INIT(aes_alg.cra_list),
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index ac9d49beecd3..5e2bcc636a02 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -288,6 +288,11 @@ static inline unsigned int crypto_tfm_alg_digestsize(struct crypto_tfm *tfm)
 	return tfm->__crt_alg->cra_digest.dia_digestsize;
 }
 
+static inline unsigned int crypto_tfm_alg_alignmask(struct crypto_tfm *tfm)
+{
+	return tfm->__crt_alg->cra_alignmask;
+}
+
 static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm)
 {
 	return (void *)&tfm[1];
-- 
cgit v1.2.3


From cb2c0233755429037462e16ea0d5497a0092738c Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Thu, 7 Jul 2005 17:56:03 -0700
Subject: [PATCH] export generic_drop_inode() to modules

OCFS2 wants to mark an inode which has been orphaned by another node so
that during final iput it takes the correct path through the VFS and can
pass through the OCFS2 delete_inode callback.  Since i_nlink can get out of
date with other nodes, the best way I see to accomplish this is by clearing
i_nlink on those inodes at drop_inode time.  Other than this small amount
of work, nothing different needs to happen, so I think it would be cleanest
to be able to just call generic_drop_inode at the end of the OCFS2
drop_inode callback.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inode.c         | 4 +++-
 include/linux/fs.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index 1f9a3a2b89bc..6d695037a0a3 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1052,7 +1052,7 @@ static void generic_forget_inode(struct inode *inode)
  * inode when the usage count drops to zero, and
  * i_nlink is zero.
  */
-static void generic_drop_inode(struct inode *inode)
+void generic_drop_inode(struct inode *inode)
 {
 	if (!inode->i_nlink)
 		generic_delete_inode(inode);
@@ -1060,6 +1060,8 @@ static void generic_drop_inode(struct inode *inode)
 		generic_forget_inode(inode);
 }
 
+EXPORT_SYMBOL_GPL(generic_drop_inode);
+
 /*
  * Called when we're dropping the last reference
  * to an inode. 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 047bde30836a..302ec20838ca 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1435,6 +1435,7 @@ extern struct inode * igrab(struct inode *);
 extern ino_t iunique(struct super_block *, ino_t);
 extern int inode_needs_sync(struct inode *inode);
 extern void generic_delete_inode(struct inode *inode);
+extern void generic_drop_inode(struct inode *inode);
 
 extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
 		int (*test)(struct inode *, void *), void *data);
-- 
cgit v1.2.3


From 79b9ce311e192e9a31fd9f3cf1ee4a4edf9e2650 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <marcelo.tosatti@cyclades.com>
Date: Thu, 7 Jul 2005 17:56:04 -0700
Subject: [PATCH] print order information when OOM killing

Dump the current allocation order when OOM killing.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/sysrq.c | 2 +-
 include/linux/swap.h | 2 +-
 mm/oom_kill.c        | 4 ++--
 mm/page_alloc.c      | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index af79805b5576..12d563c648f7 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -228,7 +228,7 @@ static struct sysrq_key_op sysrq_term_op = {
 
 static void moom_callback(void *ignored)
 {
-	out_of_memory(GFP_KERNEL);
+	out_of_memory(GFP_KERNEL, 0);
 }
 
 static DECLARE_WORK(moom_work, moom_callback, NULL);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 2343f999e6e1..c75954f2d868 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -148,7 +148,7 @@ struct swap_list_t {
 #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages)
 
 /* linux/mm/oom_kill.c */
-extern void out_of_memory(unsigned int __nocast gfp_mask);
+extern void out_of_memory(unsigned int __nocast gfp_mask, int order);
 
 /* linux/mm/memory.c */
 extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 59666d905f19..e20d559edbaf 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -253,12 +253,12 @@ static struct mm_struct *oom_kill_process(struct task_struct *p)
  * OR try to be smart about which process to kill. Note that we
  * don't have to be perfect here, we just have to be good.
  */
-void out_of_memory(unsigned int __nocast gfp_mask)
+void out_of_memory(unsigned int __nocast gfp_mask, int order)
 {
 	struct mm_struct *mm = NULL;
 	task_t * p;
 
-	printk("oom-killer: gfp_mask=0x%x\n", gfp_mask);
+	printk("oom-killer: gfp_mask=0x%x, order=%d\n", gfp_mask, order);
 	/* print memory stats */
 	show_mem();
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3c9f7f881125..7fbd3ea8765c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -936,7 +936,7 @@ rebalance:
 				goto got_pg;
 		}
 
-		out_of_memory(gfp_mask);
+		out_of_memory(gfp_mask, order);
 		goto restart;
 	}
 
-- 
cgit v1.2.3


From cf36680887d6d942d2119c1ff1dfb2428b0f21f4 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 7 Jul 2005 17:56:13 -0700
Subject: [PATCH] move ioprio syscalls into syscalls.h

- Make ioprio syscalls return long, like set/getpriority syscalls.
- Move function prototypes into syscalls.h so we can pick them up in the
  32/64bit compat code.

Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ioprio.c              | 4 ++--
 include/linux/ioprio.h   | 3 ---
 include/linux/syscalls.h | 3 +++
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ioprio.c b/fs/ioprio.c
index 663e420636d6..97e1f088ba00 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -43,7 +43,7 @@ static int set_task_ioprio(struct task_struct *task, int ioprio)
 	return 0;
 }
 
-asmlinkage int sys_ioprio_set(int which, int who, int ioprio)
+asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
 {
 	int class = IOPRIO_PRIO_CLASS(ioprio);
 	int data = IOPRIO_PRIO_DATA(ioprio);
@@ -115,7 +115,7 @@ asmlinkage int sys_ioprio_set(int which, int who, int ioprio)
 	return ret;
 }
 
-asmlinkage int sys_ioprio_get(int which, int who)
+asmlinkage long sys_ioprio_get(int which, int who)
 {
 	struct task_struct *g, *p;
 	struct user_struct *user;
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index 8a453a0b5e4b..88d5961f7a3f 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -34,9 +34,6 @@ enum {
  */
 #define IOPRIO_BE_NR	(8)
 
-asmlinkage int sys_ioprio_set(int, int, int);
-asmlinkage int sys_ioprio_get(int, int);
-
 enum {
 	IOPRIO_WHO_PROCESS = 1,
 	IOPRIO_WHO_PGRP,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 52830b6d94e5..425f58c8ea4a 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -506,4 +506,7 @@ asmlinkage long sys_request_key(const char __user *_type,
 asmlinkage long sys_keyctl(int cmd, unsigned long arg2, unsigned long arg3,
 			   unsigned long arg4, unsigned long arg5);
 
+asmlinkage long sys_ioprio_set(int which, int who, int ioprio);
+asmlinkage long sys_ioprio_get(int which, int who);
+
 #endif
-- 
cgit v1.2.3


From e00d9967e3addea86dded46deefc5daec5d52e5a Mon Sep 17 00:00:00 2001
From: Bernard Blackham <bernard@blackham.com.au>
Date: Thu, 7 Jul 2005 17:56:42 -0700
Subject: [PATCH] pm: fix u32 vs. pm_message_t confusion in cpufreq

Fix u32 vs pm_message_t confusion in cpufreq.

Signed-off-by: Bernard Blackham <bernard@blackham.com.au>
Signed-off-by: Pavel Machek <pavel@suse.cz>
Cc: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/ppc/platforms/pmac_cpufreq.c | 2 +-
 drivers/cpufreq/cpufreq.c         | 4 ++--
 include/linux/cpufreq.h           | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ppc/platforms/pmac_cpufreq.c b/arch/ppc/platforms/pmac_cpufreq.c
index 5fdd4f607a40..c0605244edda 100644
--- a/arch/ppc/platforms/pmac_cpufreq.c
+++ b/arch/ppc/platforms/pmac_cpufreq.c
@@ -452,7 +452,7 @@ static u32 __pmac read_gpio(struct device_node *np)
 	return offset;
 }
 
-static int __pmac pmac_cpufreq_suspend(struct cpufreq_policy *policy, u32 state)
+static int __pmac pmac_cpufreq_suspend(struct cpufreq_policy *policy, pm_message_t pmsg)
 {
 	/* Ok, this could be made a bit smarter, but let's be robust for now. We
 	 * always force a speed change to high speed before sleep, to make sure
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index bf62dfe4976a..7a7859dd0d98 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -869,7 +869,7 @@ EXPORT_SYMBOL(cpufreq_get);
  *	cpufreq_suspend - let the low level driver prepare for suspend
  */
 
-static int cpufreq_suspend(struct sys_device * sysdev, u32 state)
+static int cpufreq_suspend(struct sys_device * sysdev, pm_message_t pmsg)
 {
 	int cpu = sysdev->id;
 	unsigned int ret = 0;
@@ -897,7 +897,7 @@ static int cpufreq_suspend(struct sys_device * sysdev, u32 state)
 	}
 
 	if (cpufreq_driver->suspend) {
-		ret = cpufreq_driver->suspend(cpu_policy, state);
+		ret = cpufreq_driver->suspend(cpu_policy, pmsg);
 		if (ret) {
 			printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
 					"step on CPU %u\n", cpu_policy->cpu);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 927daa86c9b3..ff7f80f48df1 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -201,7 +201,7 @@ struct cpufreq_driver {
 
 	/* optional */
 	int	(*exit)		(struct cpufreq_policy *policy);
-	int	(*suspend)	(struct cpufreq_policy *policy, u32 state);
+	int	(*suspend)	(struct cpufreq_policy *policy, pm_message_t pmsg);
 	int	(*resume)	(struct cpufreq_policy *policy);
 	struct freq_attr	**attr;
 };
-- 
cgit v1.2.3


From a39722034ae37f80a1803bf781fe3fe1b03e20bc Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Thu, 7 Jul 2005 17:56:56 -0700
Subject: [PATCH] page_uptodate locking scalability

Use a bit spin lock in the first buffer of the page to synchronise asynch
IO buffer completions, instead of the global page_uptodate_lock, which is
showing some scalabilty problems.

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c                 | 25 +++++++++++++++++--------
 include/linux/buffer_head.h |  3 +++
 2 files changed, 20 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 561e63a14966..6a25d7df89b1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -513,8 +513,8 @@ static void free_more_memory(void)
  */
 static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
 {
-	static DEFINE_SPINLOCK(page_uptodate_lock);
 	unsigned long flags;
+	struct buffer_head *first;
 	struct buffer_head *tmp;
 	struct page *page;
 	int page_uptodate = 1;
@@ -536,7 +536,9 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
 	 * two buffer heads end IO at almost the same time and both
 	 * decide that the page is now completely done.
 	 */
-	spin_lock_irqsave(&page_uptodate_lock, flags);
+	first = page_buffers(page);
+	local_irq_save(flags);
+	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
 	clear_buffer_async_read(bh);
 	unlock_buffer(bh);
 	tmp = bh;
@@ -549,7 +551,8 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
 		}
 		tmp = tmp->b_this_page;
 	} while (tmp != bh);
-	spin_unlock_irqrestore(&page_uptodate_lock, flags);
+	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+	local_irq_restore(flags);
 
 	/*
 	 * If none of the buffers had errors and they are all
@@ -561,7 +564,8 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
 	return;
 
 still_busy:
-	spin_unlock_irqrestore(&page_uptodate_lock, flags);
+	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+	local_irq_restore(flags);
 	return;
 }
 
@@ -572,8 +576,8 @@ still_busy:
 void end_buffer_async_write(struct buffer_head *bh, int uptodate)
 {
 	char b[BDEVNAME_SIZE];
-	static DEFINE_SPINLOCK(page_uptodate_lock);
 	unsigned long flags;
+	struct buffer_head *first;
 	struct buffer_head *tmp;
 	struct page *page;
 
@@ -594,7 +598,10 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
 		SetPageError(page);
 	}
 
-	spin_lock_irqsave(&page_uptodate_lock, flags);
+	first = page_buffers(page);
+	local_irq_save(flags);
+	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
+
 	clear_buffer_async_write(bh);
 	unlock_buffer(bh);
 	tmp = bh->b_this_page;
@@ -605,12 +612,14 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
 		}
 		tmp = tmp->b_this_page;
 	}
-	spin_unlock_irqrestore(&page_uptodate_lock, flags);
+	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+	local_irq_restore(flags);
 	end_page_writeback(page);
 	return;
 
 still_busy:
-	spin_unlock_irqrestore(&page_uptodate_lock, flags);
+	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+	local_irq_restore(flags);
 	return;
 }
 
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 802c91e9b3da..90828493791f 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -19,6 +19,9 @@ enum bh_state_bits {
 	BH_Dirty,	/* Is dirty */
 	BH_Lock,	/* Is locked */
 	BH_Req,		/* Has been submitted for I/O */
+	BH_Uptodate_Lock,/* Used by the first bh in a page, to serialise
+			  * IO completion of other buffers in the page
+			  */
 
 	BH_Mapped,	/* Has a disk mapping */
 	BH_New,		/* Disk mapping was newly created by get_block */
-- 
cgit v1.2.3


From 0db925af1db5f3dfe1691c35b39496e2baaff9c9 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 7 Jul 2005 17:56:58 -0700
Subject: [PATCH] propagate __nocast annotations

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/gfp.h    |  4 ++--
 include/linux/slab.h   |  4 ++--
 include/linux/string.h |  2 +-
 mm/mempool.c           |  2 +-
 mm/slab.c              | 12 +++++++-----
 5 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 8d6bf608b199..7c7400137e97 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -12,8 +12,8 @@ struct vm_area_struct;
  * GFP bitmasks..
  */
 /* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */
-#define __GFP_DMA	0x01
-#define __GFP_HIGHMEM	0x02
+#define __GFP_DMA	0x01u
+#define __GFP_HIGHMEM	0x02u
 
 /*
  * Action modifiers - doesn't change the zoning
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 76cf7e60216c..4c8e552471b0 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -65,7 +65,7 @@ extern void *kmem_cache_alloc(kmem_cache_t *, unsigned int __nocast);
 extern void kmem_cache_free(kmem_cache_t *, void *);
 extern unsigned int kmem_cache_size(kmem_cache_t *);
 extern const char *kmem_cache_name(kmem_cache_t *);
-extern kmem_cache_t *kmem_find_general_cachep(size_t size, int gfpflags);
+extern kmem_cache_t *kmem_find_general_cachep(size_t size, unsigned int __nocast gfpflags);
 
 /* Size description struct for general caches. */
 struct cache_sizes {
@@ -105,7 +105,7 @@ extern unsigned int ksize(const void *);
 
 #ifdef CONFIG_NUMA
 extern void *kmem_cache_alloc_node(kmem_cache_t *, int flags, int node);
-extern void *kmalloc_node(size_t size, int flags, int node);
+extern void *kmalloc_node(size_t size, unsigned int __nocast flags, int node);
 #else
 static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, int flags, int node)
 {
diff --git a/include/linux/string.h b/include/linux/string.h
index 93994c613095..dab2652acbd8 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -88,7 +88,7 @@ extern int memcmp(const void *,const void *,__kernel_size_t);
 extern void * memchr(const void *,int,__kernel_size_t);
 #endif
 
-extern char *kstrdup(const char *s, int gfp);
+extern char *kstrdup(const char *s, unsigned int __nocast gfp);
 
 #ifdef __cplusplus
 }
diff --git a/mm/mempool.c b/mm/mempool.c
index 9a72f7d918fa..65f2957b8d51 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -205,7 +205,7 @@ void * mempool_alloc(mempool_t *pool, unsigned int __nocast gfp_mask)
 	void *element;
 	unsigned long flags;
 	wait_queue_t wait;
-	int gfp_temp;
+	unsigned int gfp_temp;
 
 	might_sleep_if(gfp_mask & __GFP_WAIT);
 
diff --git a/mm/slab.c b/mm/slab.c
index e57abd45eede..c9e706db4634 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -584,7 +584,8 @@ static inline struct array_cache *ac_data(kmem_cache_t *cachep)
 	return cachep->array[smp_processor_id()];
 }
 
-static inline kmem_cache_t *__find_general_cachep(size_t size, int gfpflags)
+static inline kmem_cache_t *__find_general_cachep(size_t size,
+						unsigned int __nocast gfpflags)
 {
 	struct cache_sizes *csizep = malloc_sizes;
 
@@ -608,7 +609,8 @@ static inline kmem_cache_t *__find_general_cachep(size_t size, int gfpflags)
 	return csizep->cs_cachep;
 }
 
-kmem_cache_t *kmem_find_general_cachep(size_t size, int gfpflags)
+kmem_cache_t *kmem_find_general_cachep(size_t size,
+		unsigned int __nocast gfpflags)
 {
 	return __find_general_cachep(size, gfpflags);
 }
@@ -2100,7 +2102,7 @@ cache_alloc_debugcheck_before(kmem_cache_t *cachep, unsigned int __nocast flags)
 #if DEBUG
 static void *
 cache_alloc_debugcheck_after(kmem_cache_t *cachep,
-			unsigned long flags, void *objp, void *caller)
+			unsigned int __nocast flags, void *objp, void *caller)
 {
 	if (!objp)	
 		return objp;
@@ -2442,7 +2444,7 @@ got_slabp:
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node);
 
-void *kmalloc_node(size_t size, int flags, int node)
+void *kmalloc_node(size_t size, unsigned int __nocast flags, int node)
 {
 	kmem_cache_t *cachep;
 
@@ -3094,7 +3096,7 @@ unsigned int ksize(const void *objp)
  * @s: the string to duplicate
  * @gfp: the GFP mask used in the kmalloc() call when allocating memory
  */
-char *kstrdup(const char *s, int gfp)
+char *kstrdup(const char *s, unsigned int __nocast gfp)
 {
 	size_t len;
 	char *buf;
-- 
cgit v1.2.3


From 6c036527a630720063b67d9a65455e8caca2c8fa Mon Sep 17 00:00:00 2001
From: Christoph Lameter <christoph@lameter.com>
Date: Thu, 7 Jul 2005 17:56:59 -0700
Subject: [PATCH] mostly_read data section

Add a new section called ".data.read_mostly" for data items that are read
frequently and rarely written to like cpumaps etc.

If these maps are placed in the .data section then these frequenly read
items may end up in cachelines with data is is frequently updated.  In that
case all processors in an SMP system must needlessly reload the cachelines
again and again containing elements of those frequently used variables.

The ability to share these cachelines will allow each cpu in an SMP system
to keep local copies of those shared cachelines thereby optimizing
performance.

Signed-off-by: Alok N Kataria <alokk@calsoftinc.com>
Signed-off-by: Shobhit Dayal <shobhit@calsoftinc.com>
Signed-off-by: Christoph Lameter <christoph@scalex86.org>
Signed-off-by: Shai Fultheim <shai@scalex86.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/cpu/intel.c         |  2 +-
 arch/i386/kernel/smpboot.c           | 18 +++++++++---------
 arch/i386/kernel/time.c              |  2 +-
 arch/i386/kernel/timers/timer_hpet.c |  4 ++--
 arch/i386/kernel/vmlinux.lds.S       |  3 +++
 arch/x86_64/kernel/vmlinux.lds.S     |  4 ++++
 drivers/char/random.c                |  2 +-
 fs/bio.c                             |  2 +-
 include/linux/cache.h                |  6 ++++++
 kernel/profile.c                     |  4 ++--
 lib/radix-tree.c                     |  2 +-
 11 files changed, 31 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index 96a75d045835..a2c33c1a46c5 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -25,7 +25,7 @@ extern int trap_init_f00f_bug(void);
 /*
  * Alignment at which movsl is preferred for bulk memory copies.
  */
-struct movsl_mask movsl_mask;
+struct movsl_mask movsl_mask __read_mostly;
 #endif
 
 void __devinit early_intel_workaround(struct cpuinfo_x86 *c)
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index d66bf489a2e9..8ac8e9fd5614 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -68,21 +68,21 @@ EXPORT_SYMBOL(smp_num_siblings);
 #endif
 
 /* Package ID of each logical CPU */
-int phys_proc_id[NR_CPUS] = {[0 ... NR_CPUS-1] = BAD_APICID};
+int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
 EXPORT_SYMBOL(phys_proc_id);
 
 /* Core ID of each logical CPU */
-int cpu_core_id[NR_CPUS] = {[0 ... NR_CPUS-1] = BAD_APICID};
+int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
 EXPORT_SYMBOL(cpu_core_id);
 
-cpumask_t cpu_sibling_map[NR_CPUS];
+cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(cpu_sibling_map);
 
-cpumask_t cpu_core_map[NR_CPUS];
+cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(cpu_core_map);
 
 /* bitmap of online cpus */
-cpumask_t cpu_online_map;
+cpumask_t cpu_online_map __read_mostly;
 EXPORT_SYMBOL(cpu_online_map);
 
 cpumask_t cpu_callin_map;
@@ -100,7 +100,7 @@ static int __devinitdata tsc_sync_disabled;
 struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
 EXPORT_SYMBOL(cpu_data);
 
-u8 x86_cpu_to_apicid[NR_CPUS] =
+u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly =
 			{ [0 ... NR_CPUS-1] = 0xff };
 EXPORT_SYMBOL(x86_cpu_to_apicid);
 
@@ -550,10 +550,10 @@ extern struct {
 #ifdef CONFIG_NUMA
 
 /* which logical CPUs are on which nodes */
-cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
+cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly =
 				{ [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
 /* which node each logical CPU is on */
-int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
+int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
 EXPORT_SYMBOL(cpu_2_node);
 
 /* set up a mapping between cpu and node. */
@@ -581,7 +581,7 @@ static inline void unmap_cpu_to_node(int cpu)
 
 #endif /* CONFIG_NUMA */
 
-u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
 
 static void map_cpu_to_logical_apicid(void)
 {
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 2854c357377f..0ee9dee8af06 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -91,7 +91,7 @@ EXPORT_SYMBOL(rtc_lock);
 DEFINE_SPINLOCK(i8253_lock);
 EXPORT_SYMBOL(i8253_lock);
 
-struct timer_opts *cur_timer = &timer_none;
+struct timer_opts *cur_timer __read_mostly = &timer_none;
 
 /*
  * This is a special lock that is owned by the CPU and holds the index
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c
index d766e0963ac1..ef8dac5dd33b 100644
--- a/arch/i386/kernel/timers/timer_hpet.c
+++ b/arch/i386/kernel/timers/timer_hpet.c
@@ -18,7 +18,7 @@
 #include "mach_timer.h"
 #include <asm/hpet.h>
 
-static unsigned long hpet_usec_quotient;	/* convert hpet clks to usec */
+static unsigned long __read_mostly hpet_usec_quotient;	/* convert hpet clks to usec */
 static unsigned long tsc_hpet_quotient;		/* convert tsc to hpet clks */
 static unsigned long hpet_last; 	/* hpet counter value at last tick*/
 static unsigned long last_tsc_low;	/* lsb 32 bits of Time Stamp Counter */
@@ -180,7 +180,7 @@ static int __init init_hpet(char* override)
 /************************************************************/
 
 /* tsc timer_opts struct */
-static struct timer_opts timer_hpet = {
+static struct timer_opts timer_hpet __read_mostly = {
 	.name = 		"hpet",
 	.mark_offset =		mark_offset_hpet,
 	.get_offset =		get_offset_hpet,
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index 7e01a528a83a..761972f8cb6c 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -57,6 +57,9 @@ SECTIONS
 	*(.data.cacheline_aligned)
   }
 
+  /* rarely changed data like cpu maps */
+  . = ALIGN(32);
+  .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) }
   _edata = .;			/* End of data section */
 
   . = ALIGN(THREAD_SIZE);	/* init_task */
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 73389f51c4e5..61c12758ca70 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -56,6 +56,10 @@ SECTIONS
   .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
 	*(.data.cacheline_aligned)
   }
+  . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+  .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
+  	*(.data.read_mostly)
+  }
 
 #define VSYSCALL_ADDR (-10*1024*1024)
 #define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.cacheline_aligned) + SIZEOF(.data.cacheline_aligned) + 4095) & ~(4095))
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 460b5d475edd..6b11d6b2129f 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -271,7 +271,7 @@ static int random_write_wakeup_thresh = 128;
  * samples to avoid wasting CPU time and reduce lock contention.
  */
 
-static int trickle_thresh = INPUT_POOL_WORDS * 28;
+static int trickle_thresh __read_mostly = INPUT_POOL_WORDS * 28;
 
 static DEFINE_PER_CPU(int, trickle_count) = 0;
 
diff --git a/fs/bio.c b/fs/bio.c
index 3a1472acc361..ca8f7a850fe3 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -52,7 +52,7 @@ struct biovec_slab {
  */
 
 #define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
-static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] = {
+static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
 	BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
 };
 #undef BV
diff --git a/include/linux/cache.h b/include/linux/cache.h
index 4d767b93738a..2b66a36d85f0 100644
--- a/include/linux/cache.h
+++ b/include/linux/cache.h
@@ -13,6 +13,12 @@
 #define SMP_CACHE_BYTES L1_CACHE_BYTES
 #endif
 
+#ifdef CONFIG_X86
+#define __read_mostly __attribute__((__section__(".data.read_mostly")))
+#else
+#define __read_mostly
+#endif
+
 #ifndef ____cacheline_aligned
 #define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
 #endif
diff --git a/kernel/profile.c b/kernel/profile.c
index ad8cbb75ffa2..f89248e6d704 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -35,11 +35,11 @@ struct profile_hit {
 #define NR_PROFILE_GRP		(NR_PROFILE_HIT/PROFILE_GRPSZ)
 
 /* Oprofile timer tick hook */
-int (*timer_hook)(struct pt_regs *);
+int (*timer_hook)(struct pt_regs *) __read_mostly;
 
 static atomic_t *prof_buffer;
 static unsigned long prof_len, prof_shift;
-static int prof_on;
+static int prof_on __read_mostly;
 static cpumask_t prof_cpu_mask = CPU_MASK_ALL;
 #ifdef CONFIG_SMP
 static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 04d664377f2c..10bed1c8c3c3 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -58,7 +58,7 @@ struct radix_tree_path {
 #define RADIX_TREE_INDEX_BITS  (8 /* CHAR_BIT */ * sizeof(unsigned long))
 #define RADIX_TREE_MAX_PATH (RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 2)
 
-static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH];
+static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH] __read_mostly;
 
 /*
  * Radix tree node cache.
-- 
cgit v1.2.3


From 1ce88cf466f7b6078b14d67d186a3d7c19dd5609 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Thu, 7 Jul 2005 17:57:24 -0700
Subject: [PATCH] namespace.c: fix race in mark_mounts_for_expiry()

This patch fixes a race found by Ram in mark_mounts_for_expiry() in
fs/namespace.c.

The bug can only be triggered with simultaneous exiting of a process having
a private namespace, and expiry of a mount from within that namespace.
It's practically impossible to trigger, and I haven't even tried.  But
still, a bug is a bug.

The race happens when put_namespace() is called by another task, while
mark_mounts_for_expiry() is between atomic_read() and get_namespace().  In
that case get_namespace() will be called on an already dead namespace with
unforeseeable results.

The solution was suggested by Al Viro, with his own words:

      Instead of screwing with atomic_read() in there, why don't we
      simply do the following:
      	a) atomic_dec_and_lock() in put_namespace()
      	b) __put_namespace() called without dropping lock
      	c) the first thing done by __put_namespace would be
      struct vfsmount *root = namespace->root;
      namespace->root = NULL;
      spin_unlock(...);
      ....
      umount_tree(root);
      ...
      	d) check in mark_... would be simply namespace && namespace->root.

      And we are all set; no screwing around with atomic_read(), no magic
      at all.  Dying namespace gets NULL ->root.
      All changes of ->root happen under spinlock.
      If under a spinlock we see non-NULL ->mnt_namespace, it won't be
      freed until we drop the lock (we will set ->mnt_namespace to NULL
      under that lock before we get to freeing namespace).
      If under a spinlock we see non-NULL ->mnt_namespace and
      ->mnt_namespace->root, we can grab a reference to namespace and be
      sure that it won't go away.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Acked-by: Al Viro <viro@parcelfarce.linux.theplanet.co.uk>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namespace.c            | 7 +++++--
 include/linux/namespace.h | 3 ++-
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index a0d0ef1f1a48..9d17541ebafa 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -869,7 +869,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
 		/* don't do anything if the namespace is dead - all the
 		 * vfsmounts from it are going away anyway */
 		namespace = mnt->mnt_namespace;
-		if (!namespace || atomic_read(&namespace->count) <= 0)
+		if (!namespace || !namespace->root)
 			continue;
 		get_namespace(namespace);
 
@@ -1450,9 +1450,12 @@ void __init mnt_init(unsigned long mempages)
 
 void __put_namespace(struct namespace *namespace)
 {
+	struct vfsmount *root = namespace->root;
+	namespace->root = NULL;
+	spin_unlock(&vfsmount_lock);
 	down_write(&namespace->sem);
 	spin_lock(&vfsmount_lock);
-	umount_tree(namespace->root);
+	umount_tree(root);
 	spin_unlock(&vfsmount_lock);
 	up_write(&namespace->sem);
 	kfree(namespace);
diff --git a/include/linux/namespace.h b/include/linux/namespace.h
index 697991b69f9b..0e5a86f13b2f 100644
--- a/include/linux/namespace.h
+++ b/include/linux/namespace.h
@@ -17,7 +17,8 @@ extern void __put_namespace(struct namespace *namespace);
 
 static inline void put_namespace(struct namespace *namespace)
 {
-	if (atomic_dec_and_test(&namespace->count))
+	if (atomic_dec_and_lock(&namespace->count, &vfsmount_lock))
+		/* releases vfsmount_lock */
 		__put_namespace(namespace);
 }
 
-- 
cgit v1.2.3


From 55e700b924f9e0ba24e3a071d1097d050b05abe6 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Thu, 7 Jul 2005 17:57:30 -0700
Subject: [PATCH] namespace: rename mnt_fslink to mnt_expire

This patch renames vfsmount->mnt_fslink to something a little more
descriptive: vfsmount->mnt_expire.

Signed-off-by: Mike Waychison <michael.waychison@sun.com>
Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namespace.c        | 24 ++++++++++++------------
 include/linux/mount.h |  2 +-
 2 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index b168dc37eaab..587eb0d707ee 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -61,7 +61,7 @@ struct vfsmount *alloc_vfsmnt(const char *name)
 		INIT_LIST_HEAD(&mnt->mnt_child);
 		INIT_LIST_HEAD(&mnt->mnt_mounts);
 		INIT_LIST_HEAD(&mnt->mnt_list);
-		INIT_LIST_HEAD(&mnt->mnt_fslink);
+		INIT_LIST_HEAD(&mnt->mnt_expire);
 		if (name) {
 			int size = strlen(name)+1;
 			char *newname = kmalloc(size, GFP_KERNEL);
@@ -165,8 +165,8 @@ clone_mnt(struct vfsmount *old, struct dentry *root)
 		/* stick the duplicate mount on the same expiry list
 		 * as the original if that was on one */
 		spin_lock(&vfsmount_lock);
-		if (!list_empty(&old->mnt_fslink))
-			list_add(&mnt->mnt_fslink, &old->mnt_fslink);
+		if (!list_empty(&old->mnt_expire))
+			list_add(&mnt->mnt_expire, &old->mnt_expire);
 		spin_unlock(&vfsmount_lock);
 	}
 	return mnt;
@@ -351,7 +351,7 @@ static void umount_tree(struct vfsmount *mnt)
 	while (!list_empty(&kill)) {
 		mnt = list_entry(kill.next, struct vfsmount, mnt_list);
 		list_del_init(&mnt->mnt_list);
-		list_del_init(&mnt->mnt_fslink);
+		list_del_init(&mnt->mnt_expire);
 		if (mnt->mnt_parent == mnt) {
 			spin_unlock(&vfsmount_lock);
 		} else {
@@ -645,7 +645,7 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
 	if (mnt) {
 		/* stop bind mounts from expiring */
 		spin_lock(&vfsmount_lock);
-		list_del_init(&mnt->mnt_fslink);
+		list_del_init(&mnt->mnt_expire);
 		spin_unlock(&vfsmount_lock);
 
 		err = graft_tree(mnt, nd);
@@ -744,7 +744,7 @@ static int do_move_mount(struct nameidata *nd, char *old_name)
 
 	/* if the mount is moved, it should no longer be expire
 	 * automatically */
-	list_del_init(&old_nd.mnt->mnt_fslink);
+	list_del_init(&old_nd.mnt->mnt_expire);
 out2:
 	spin_unlock(&vfsmount_lock);
 out1:
@@ -814,7 +814,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
 	if (err == 0 && fslist) {
 		/* add to the specified expiration list */
 		spin_lock(&vfsmount_lock);
-		list_add_tail(&newmnt->mnt_fslink, fslist);
+		list_add_tail(&newmnt->mnt_expire, fslist);
 		spin_unlock(&vfsmount_lock);
 	}
 
@@ -869,7 +869,7 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts)
 		 * Someone brought it back to life whilst we didn't have any
 		 * locks held so return it to the expiration list
 		 */
-		list_add_tail(&mnt->mnt_fslink, mounts);
+		list_add_tail(&mnt->mnt_expire, mounts);
 		spin_unlock(&vfsmount_lock);
 	}
 }
@@ -896,13 +896,13 @@ void mark_mounts_for_expiry(struct list_head *mounts)
 	 * - still marked for expiry (marked on the last call here; marks are
 	 *   cleared by mntput())
 	 */
-	list_for_each_entry_safe(mnt, next, mounts, mnt_fslink) {
+	list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
 		if (!xchg(&mnt->mnt_expiry_mark, 1) ||
 		    atomic_read(&mnt->mnt_count) != 1)
 			continue;
 
 		mntget(mnt);
-		list_move(&mnt->mnt_fslink, &graveyard);
+		list_move(&mnt->mnt_expire, &graveyard);
 	}
 
 	/*
@@ -912,8 +912,8 @@ void mark_mounts_for_expiry(struct list_head *mounts)
 	 * - dispose of the corpse
 	 */
 	while (!list_empty(&graveyard)) {
-		mnt = list_entry(graveyard.next, struct vfsmount, mnt_fslink);
-		list_del_init(&mnt->mnt_fslink);
+		mnt = list_entry(graveyard.next, struct vfsmount, mnt_expire);
+		list_del_init(&mnt->mnt_expire);
 
 		/* don't do anything if the namespace is dead - all the
 		 * vfsmounts from it are going away anyway */
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 8b8d3b9beefd..196d2d6de4a3 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -34,7 +34,7 @@ struct vfsmount
 	int mnt_expiry_mark;		/* true if marked for expiry */
 	char *mnt_devname;		/* Name of device e.g. /dev/dsk/hda1 */
 	struct list_head mnt_list;
-	struct list_head mnt_fslink;	/* link in fs-specific expiry list */
+	struct list_head mnt_expire;	/* link in fs-specific expiry list */
 	struct namespace *mnt_namespace; /* containing namespace */
 };
 
-- 
cgit v1.2.3


From 751c404b8f63e8199d5f2f8f2bcfd69b41d11caa Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Thu, 7 Jul 2005 17:57:30 -0700
Subject: [PATCH] namespace: rename _mntput to mntput_no_expire

This patch renames _mntput() to something a little more descriptive:
mntput_no_expire().

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namei.c            | 2 +-
 include/linux/mount.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index fa8df81ce8ca..1d93cb4f7c5f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -314,7 +314,7 @@ void path_release(struct nameidata *nd)
 void path_release_on_umount(struct nameidata *nd)
 {
 	dput(nd->dentry);
-	_mntput(nd->mnt);
+	mntput_no_expire(nd->mnt);
 }
 
 /*
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 196d2d6de4a3..74b4727a4e30 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -47,7 +47,7 @@ static inline struct vfsmount *mntget(struct vfsmount *mnt)
 
 extern void __mntput(struct vfsmount *mnt);
 
-static inline void _mntput(struct vfsmount *mnt)
+static inline void mntput_no_expire(struct vfsmount *mnt)
 {
 	if (mnt) {
 		if (atomic_dec_and_test(&mnt->mnt_count))
@@ -59,7 +59,7 @@ static inline void mntput(struct vfsmount *mnt)
 {
 	if (mnt) {
 		mnt->mnt_expiry_mark = 0;
-		_mntput(mnt);
+		mntput_no_expire(mnt);
 	}
 }
 
-- 
cgit v1.2.3


From a6ccbbb8865101d83c2e716f08feae1da1c48584 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 7 Jul 2005 17:59:11 -0700
Subject: [PATCH] nfsd4: fix sync'ing of recovery directory

We need to fsync the recovery directory after writing to it, but we weren't
doing this correctly.  (For example, we weren't taking the i_sem when calling
->fsync().)

Just reuse the existing nfsd fsync code instead.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4recover.c     | 29 ++++++++---------------------
 fs/nfsd/vfs.c             |  2 +-
 include/linux/nfsd/nfsd.h |  1 +
 3 files changed, 10 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 095f1740f3ae..bb40083b6b7d 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -119,25 +119,12 @@ out:
 	return status;
 }
 
-static int
-nfsd4_rec_fsync(struct dentry *dentry)
+static void
+nfsd4_sync_rec_dir(void)
 {
-	struct file *filp;
-	int status = nfs_ok;
-
-	dprintk("NFSD: nfs4_fsync_rec_dir\n");
-	filp = dentry_open(dget(dentry), mntget(rec_dir.mnt), O_RDWR);
-	if (IS_ERR(filp)) {
-		status = PTR_ERR(filp);
-		goto out;
-	}
-	if (filp->f_op && filp->f_op->fsync)
-		status = filp->f_op->fsync(filp, filp->f_dentry, 0);
-	fput(filp);
-out:
-	if (status)
-		printk("nfsd4: unable to sync recovery directory\n");
-	return status;
+	down(&rec_dir.dentry->d_inode->i_sem);
+	nfsd_sync_dir(rec_dir.dentry);
+	up(&rec_dir.dentry->d_inode->i_sem);
 }
 
 int
@@ -176,7 +163,7 @@ out_unlock:
 	up(&rec_dir.dentry->d_inode->i_sem);
 	if (status == 0) {
 		clp->cl_firststate = 1;
-		status = nfsd4_rec_fsync(rec_dir.dentry);
+		nfsd4_sync_rec_dir();
 	}
 	nfs4_reset_user(uid, gid);
 	dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status);
@@ -331,7 +318,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
 	status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
 	nfs4_reset_user(uid, gid);
 	if (status == 0)
-		status = nfsd4_rec_fsync(rec_dir.dentry);
+		nfsd4_sync_rec_dir();
 	if (status)
 		printk("NFSD: Failed to remove expired client state directory"
 				" %.*s\n", HEXDIR_LEN, clp->cl_recdir);
@@ -362,7 +349,7 @@ nfsd4_recdir_purge_old(void) {
 		return;
 	status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old);
 	if (status == 0)
-		status = nfsd4_rec_fsync(rec_dir.dentry);
+		nfsd4_sync_rec_dir();
 	if (status)
 		printk("nfsd4: failed to purge old clients from recovery"
 			" directory %s\n", rec_dir.dentry->d_name.name);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index be24ead89d94..5e0bf3917607 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -733,7 +733,7 @@ nfsd_sync(struct file *filp)
 	up(&inode->i_sem);
 }
 
-static void
+void
 nfsd_sync_dir(struct dentry *dp)
 {
 	nfsd_dosync(NULL, dp, dp->d_inode->i_fop);
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 5791dfd30dd0..c2da1b62d416 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -124,6 +124,7 @@ int		nfsd_statfs(struct svc_rqst *, struct svc_fh *,
 
 int		nfsd_notify_change(struct inode *, struct iattr *);
 int		nfsd_permission(struct svc_export *, struct dentry *, int);
+void		nfsd_sync_dir(struct dentry *dp);
 
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 #ifdef CONFIG_NFSD_V2_ACL
-- 
cgit v1.2.3


From 7fb64cee34f5dc743f697041717cafda8a94b5ac Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 7 Jul 2005 17:59:20 -0700
Subject: [PATCH] nfsd4: seqid comments

Add some comments on the use of so_seqid, in an attempt to avoid some of the
confusion outlined in the previous patch....

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4xdr.c          | 8 ++++----
 include/linux/nfsd/state.h | 4 +++-
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5207068cde1a..1515c5b8096f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1210,10 +1210,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 	save = resp->p;
 
 /*
- * Routine for encoding the result of a
- * "seqid-mutating" NFSv4 operation.  This is
- * where seqids are incremented, and the
- * replay cache is filled.
+ * Routine for encoding the result of a "seqid-mutating" NFSv4 operation.  This
+ * is where sequence id's are incremented, and the replay cache is filled.
+ * Note that we increment sequence id's here, at the last moment, so we're sure
+ * we know whether the error to be returned is a sequence id mutating error.
  */
 
 #define ENCODE_SEQID_OP_TAIL(stateowner) do {			\
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index a84a3fa99be1..2d19431f47ea 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -203,7 +203,9 @@ struct nfs4_stateowner {
 	int			so_is_open_owner; /* 1=openowner,0=lockowner */
 	u32                     so_id;
 	struct nfs4_client *    so_client;
-	u32                     so_seqid;    
+	/* after increment in ENCODE_SEQID_OP_TAIL, represents the next
+	 * sequence id expected from the client: */
+	u32                     so_seqid;
 	struct xdr_netobj       so_owner;     /* open owner name */
 	int                     so_confirmed; /* successful OPEN_CONFIRM? */
 	struct nfs4_replay	so_replay;
-- 
cgit v1.2.3


From b700949b781480819e53bdc38a53f053226dd75e Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 7 Jul 2005 17:59:23 -0700
Subject: [PATCH] nfsd4: return better error on io incompatible with open mode

from RFC 3530:
"Share reservations are established by OPEN operations and by their
nature are mandatory in that when the OPEN denies READ or WRITE
operations, that denial results in such operations being rejected
with error NFS4ERR_LOCKED."

(Note that share_denied is really only a legal error for OPEN.)

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c       | 2 +-
 include/linux/nfsd/nfsd.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b96714ae3dd7..3647c942915e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1295,7 +1295,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
 	fp = find_file(ino);
 	if (!fp)
 		return nfs_ok;
-	ret = nfserr_share_denied;
+	ret = nfserr_locked;
 	/* Search for conflicting share reservations */
 	list_for_each_entry(stp, &fp->fi_stateids, st_perfile) {
 		if (test_bit(deny_type, &stp->st_deny_bmap) ||
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index c2da1b62d416..6d5a24f3fc6d 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -231,6 +231,7 @@ void		nfsd_lockd_shutdown(void);
 #define	nfserr_reclaim_bad	__constant_htonl(NFSERR_RECLAIM_BAD)
 #define	nfserr_badname		__constant_htonl(NFSERR_BADNAME)
 #define	nfserr_cb_path_down	__constant_htonl(NFSERR_CB_PATH_DOWN)
+#define	nfserr_locked		__constant_htonl(NFSERR_LOCKED)
 
 /* error codes for internal use */
 /* if a request fails due to kmalloc failure, it gets dropped.
-- 
cgit v1.2.3


From 4c4cd222ee329025840bc2f8cebf71d36c62440c Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 7 Jul 2005 17:59:27 -0700
Subject: [PATCH] nfsd4: check lock type against openmode.

We shouldn't be allowing, e.g., write locks on files not open for read.  To
enforce this, we add a pointer from the lock stateid back to the open stateid
it came from, so that the check will continue to be correct even after the
open is upgraded or downgraded.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c        | 49 +++++++++++++++++++++++++++++++---------------
 include/linux/nfsd/state.h |  5 +++++
 2 files changed, 38 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 59b214f01b6d..b83f8fb441e1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1160,6 +1160,7 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *
 	stp->st_deny_bmap = 0;
 	__set_bit(open->op_share_access, &stp->st_access_bmap);
 	__set_bit(open->op_share_deny, &stp->st_deny_bmap);
+	stp->st_openstp = NULL;
 }
 
 static void
@@ -2158,12 +2159,18 @@ out:
 	return status;
 }
 
+static inline int
+setlkflg (int type)
+{
+	return (type == NFS4_READW_LT || type == NFS4_READ_LT) ?
+		RD_STATE : WR_STATE;
+}
 
 /* 
  * Checks for sequence id mutating operations. 
  */
 static int
-nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, clientid_t *lockclid)
+nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, struct nfsd4_lock *lock)
 {
 	struct nfs4_stateid *stp;
 	struct nfs4_stateowner *sop;
@@ -2201,21 +2208,31 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
 		goto check_replay;
 	}
 
-	/* for new lock stateowners:
-	 * check that the lock->v.new.open_stateid
-	 * refers to an open stateowner
-	 *
-	 * check that the lockclid (nfs4_lock->v.new.clientid) is the same
-	 * as the open_stateid->st_stateowner->so_client->clientid
-	 */
-	if (lockclid) {
+	if (lock) {
 		struct nfs4_stateowner *sop = stp->st_stateowner;
+		clientid_t *lockclid = &lock->v.new.clientid;
 		struct nfs4_client *clp = sop->so_client;
+		int lkflg = 0;
+		int status;
+
+		lkflg = setlkflg(lock->lk_type);
+
+		if (lock->lk_is_new) {
+                       if (!sop->so_is_open_owner)
+			       return nfserr_bad_stateid;
+                       if (!cmp_clid(&clp->cl_clientid, lockclid))
+			       return nfserr_bad_stateid;
+                       /* stp is the open stateid */
+                       status = nfs4_check_openmode(stp, lkflg);
+                       if (status)
+			       return status;
+               } else {
+                       /* stp is the lock stateid */
+                       status = nfs4_check_openmode(stp->st_openstp, lkflg);
+                       if (status)
+			       return status;
+               }
 
-		if (!sop->so_is_open_owner)
-			return nfserr_bad_stateid;
-		if (!cmp_clid(&clp->cl_clientid, lockclid))
-			return nfserr_bad_stateid;
 	}
 
 	if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) {
@@ -2642,6 +2659,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
 	stp->st_vfs_file = open_stp->st_vfs_file; /* FIXME refcount?? */
 	stp->st_access_bmap = open_stp->st_access_bmap;
 	stp->st_deny_bmap = open_stp->st_deny_bmap;
+	stp->st_openstp = open_stp;
 
 out:
 	return stp;
@@ -2697,8 +2715,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 				        lock->lk_new_open_seqid,
 		                        &lock->lk_new_open_stateid,
 		                        CHECK_FH | OPEN_STATE,
-		                        &open_sop, &open_stp,
-					&lock->v.new.clientid);
+		                        &open_sop, &open_stp, lock);
 		if (status)
 			goto out;
 		/* create lockowner and lock stateid */
@@ -2726,7 +2743,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 				       lock->lk_old_lock_seqid, 
 				       &lock->lk_old_lock_stateid, 
 				       CHECK_FH | LOCK_STATE, 
-				       &lock->lk_stateowner, &lock_stp, NULL);
+				       &lock->lk_stateowner, &lock_stp, lock);
 		if (status)
 			goto out;
 	}
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 2d19431f47ea..8bf23cf8b603 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -237,6 +237,10 @@ struct nfs4_file {
 *       st_perlockowner: (open stateid) list of lock nfs4_stateowners
 * 	st_access_bmap: used only for open stateid
 * 	st_deny_bmap: used only for open stateid
+*	st_openstp: open stateid lock stateid was derived from
+*
+* XXX: open stateids and lock stateids have diverged sufficiently that
+* we should consider defining separate structs for the two cases.
 */
 
 struct nfs4_stateid {
@@ -250,6 +254,7 @@ struct nfs4_stateid {
 	struct file                 * st_vfs_file;
 	unsigned long                 st_access_bmap;
 	unsigned long                 st_deny_bmap;
+	struct nfs4_stateid         * st_openstp;
 };
 
 /* flags for preprocess_seqid_op() */
-- 
cgit v1.2.3


From 86a76caf8705e3524e15f343f3c4806939a06dc8 Mon Sep 17 00:00:00 2001
From: Victor Fusco <victor@cetuc.puc-rio.br>
Date: Fri, 8 Jul 2005 14:57:47 -0700
Subject: [NET]: Fix sparse warnings

From: Victor Fusco <victor@cetuc.puc-rio.br>

Fix the sparse warning "implicit cast to nocast type"

Signed-off-by: Victor Fusco <victor@cetuc.puc-rio.br>
Signed-off-by: Domen Puncer <domen@coderock.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 29 ++++++++++++++++++-----------
 include/net/sock.h     | 18 +++++++++++-------
 include/net/tcp.h      |  3 ++-
 net/core/dev.c         |  2 +-
 net/core/skbuff.c      | 17 ++++++++++-------
 net/core/sock.c        | 11 +++++++----
 net/ipv4/tcp_output.c  |  2 +-
 7 files changed, 50 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 14b950413495..5d4a990d5577 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -300,20 +300,26 @@ struct sk_buff {
 #include <asm/system.h>
 
 extern void	       __kfree_skb(struct sk_buff *skb);
-extern struct sk_buff *alloc_skb(unsigned int size, int priority);
+extern struct sk_buff *alloc_skb(unsigned int size,
+				 unsigned int __nocast priority);
 extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
-					    unsigned int size, int priority);
+					    unsigned int size,
+					    unsigned int __nocast priority);
 extern void	       kfree_skbmem(struct sk_buff *skb);
-extern struct sk_buff *skb_clone(struct sk_buff *skb, int priority);
-extern struct sk_buff *skb_copy(const struct sk_buff *skb, int priority);
-extern struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask);
+extern struct sk_buff *skb_clone(struct sk_buff *skb,
+				 unsigned int __nocast priority);
+extern struct sk_buff *skb_copy(const struct sk_buff *skb,
+				unsigned int __nocast priority);
+extern struct sk_buff *pskb_copy(struct sk_buff *skb,
+				 unsigned int __nocast gfp_mask);
 extern int	       pskb_expand_head(struct sk_buff *skb,
-					int nhead, int ntail, int gfp_mask);
+					int nhead, int ntail,
+					unsigned int __nocast gfp_mask);
 extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
 					    unsigned int headroom);
 extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
 				       int newheadroom, int newtailroom,
-				       int priority);
+				       unsigned int __nocast priority);
 extern struct sk_buff *		skb_pad(struct sk_buff *skb, int pad);
 #define dev_kfree_skb(a)	kfree_skb(a)
 extern void	      skb_over_panic(struct sk_buff *skb, int len,
@@ -464,7 +470,8 @@ static inline int skb_shared(const struct sk_buff *skb)
  *
  *	NULL is returned on a memory allocation failure.
  */
-static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri)
+static inline struct sk_buff *skb_share_check(struct sk_buff *skb,
+					      unsigned int __nocast pri)
 {
 	might_sleep_if(pri & __GFP_WAIT);
 	if (skb_shared(skb)) {
@@ -1001,7 +1008,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
  *	%NULL is returned in there is no free memory.
  */
 static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
-					      int gfp_mask)
+					      unsigned int __nocast gfp_mask)
 {
 	struct sk_buff *skb = alloc_skb(length + 16, gfp_mask);
 	if (likely(skb))
@@ -1114,8 +1121,8 @@ static inline int skb_can_coalesce(struct sk_buff *skb, int i,
  *	If there is no free memory -ENOMEM is returned, otherwise zero
  *	is returned and the old skb data released.
  */
-extern int __skb_linearize(struct sk_buff *skb, int gfp);
-static inline int skb_linearize(struct sk_buff *skb, int gfp)
+extern int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp);
+static inline int skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp)
 {
 	return __skb_linearize(skb, gfp);
 }
diff --git a/include/net/sock.h b/include/net/sock.h
index 7b76f891ae2d..a1042d08becd 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -684,16 +684,17 @@ extern void FASTCALL(release_sock(struct sock *sk));
 #define bh_lock_sock(__sk)	spin_lock(&((__sk)->sk_lock.slock))
 #define bh_unlock_sock(__sk)	spin_unlock(&((__sk)->sk_lock.slock))
 
-extern struct sock		*sk_alloc(int family, int priority,
+extern struct sock		*sk_alloc(int family,
+					  unsigned int __nocast priority,
 					  struct proto *prot, int zero_it);
 extern void			sk_free(struct sock *sk);
 
 extern struct sk_buff		*sock_wmalloc(struct sock *sk,
 					      unsigned long size, int force,
-					      int priority);
+					      unsigned int __nocast priority);
 extern struct sk_buff		*sock_rmalloc(struct sock *sk,
 					      unsigned long size, int force,
-					      int priority);
+					      unsigned int __nocast priority);
 extern void			sock_wfree(struct sk_buff *skb);
 extern void			sock_rfree(struct sk_buff *skb);
 
@@ -708,7 +709,8 @@ extern struct sk_buff 		*sock_alloc_send_skb(struct sock *sk,
 						     unsigned long size,
 						     int noblock,
 						     int *errcode);
-extern void *sock_kmalloc(struct sock *sk, int size, int priority);
+extern void *sock_kmalloc(struct sock *sk, int size,
+			  unsigned int __nocast priority);
 extern void sock_kfree_s(struct sock *sk, void *mem, int size);
 extern void sk_send_sigurg(struct sock *sk);
 
@@ -1132,7 +1134,8 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk)
 }
 
 static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk,
-						   int size, int mem, int gfp)
+						   int size, int mem,
+						   unsigned int __nocast gfp)
 {
 	struct sk_buff *skb;
 	int hdr_len;
@@ -1155,7 +1158,8 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk,
 }
 
 static inline struct sk_buff *sk_stream_alloc_skb(struct sock *sk,
-						  int size, int gfp)
+						  int size,
+						  unsigned int __nocast gfp)
 {
 	return sk_stream_alloc_pskb(sk, size, 0, gfp);
 }
@@ -1188,7 +1192,7 @@ static inline int sock_writeable(const struct sock *sk)
 	return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf / 2);
 }
 
-static inline int gfp_any(void)
+static inline unsigned int __nocast gfp_any(void)
 {
 	return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
 }
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4d5b12e4dc11..f4f9aba07ac2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -860,7 +860,8 @@ extern void tcp_send_probe0(struct sock *);
 extern void tcp_send_partial(struct sock *);
 extern int  tcp_write_wakeup(struct sock *);
 extern void tcp_send_fin(struct sock *sk);
-extern void tcp_send_active_reset(struct sock *sk, int priority);
+extern void tcp_send_active_reset(struct sock *sk,
+                                  unsigned int __nocast priority);
 extern int  tcp_send_synack(struct sock *);
 extern void tcp_push_one(struct sock *, unsigned int mss_now);
 extern void tcp_send_ack(struct sock *sk);
diff --git a/net/core/dev.c b/net/core/dev.c
index 7f5f62c65115..ff9dc029233a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1127,7 +1127,7 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 extern void skb_release_data(struct sk_buff *);
 
 /* Keep head the same: replace data */
-int __skb_linearize(struct sk_buff *skb, int gfp_mask)
+int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp_mask)
 {
 	unsigned int size;
 	u8 *data;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 733deee24b9f..d9f7b06fe886 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -129,7 +129,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
  *	Buffers may only be allocated from interrupts using a @gfp_mask of
  *	%GFP_ATOMIC.
  */
-struct sk_buff *alloc_skb(unsigned int size, int gfp_mask)
+struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
 {
 	struct sk_buff *skb;
 	u8 *data;
@@ -182,7 +182,8 @@ nodata:
  *	%GFP_ATOMIC.
  */
 struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
-				     unsigned int size, int gfp_mask)
+				     unsigned int size,
+				     unsigned int __nocast gfp_mask)
 {
 	struct sk_buff *skb;
 	u8 *data;
@@ -322,7 +323,7 @@ void __kfree_skb(struct sk_buff *skb)
  *	%GFP_ATOMIC.
  */
 
-struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
+struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
 {
 	struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
 
@@ -460,7 +461,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
  *	header is going to be modified. Use pskb_copy() instead.
  */
 
-struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
+struct sk_buff *skb_copy(const struct sk_buff *skb, unsigned int __nocast gfp_mask)
 {
 	int headerlen = skb->data - skb->head;
 	/*
@@ -499,7 +500,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
  *	The returned buffer has a reference count of 1.
  */
 
-struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
+struct sk_buff *pskb_copy(struct sk_buff *skb, unsigned int __nocast gfp_mask)
 {
 	/*
 	 *	Allocate the copy buffer
@@ -557,7 +558,8 @@ out:
  *	reloaded after call to this function.
  */
 
-int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
+int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
+		     unsigned int __nocast gfp_mask)
 {
 	int i;
 	u8 *data;
@@ -647,7 +649,8 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
  *	only by netfilter in the cases when checksum is recalculated? --ANK
  */
 struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
-				int newheadroom, int newtailroom, int gfp_mask)
+				int newheadroom, int newtailroom,
+				unsigned int __nocast gfp_mask)
 {
 	/*
 	 *	Allocate the copy buffer
diff --git a/net/core/sock.c b/net/core/sock.c
index a6ec3ada7f9e..8b35ccdc2b3b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -622,7 +622,8 @@ lenout:
  *	@prot: struct proto associated with this new sock instance
  *	@zero_it: if we should zero the newly allocated sock
  */
-struct sock *sk_alloc(int family, int priority, struct proto *prot, int zero_it)
+struct sock *sk_alloc(int family, unsigned int __nocast priority,
+		      struct proto *prot, int zero_it)
 {
 	struct sock *sk = NULL;
 	kmem_cache_t *slab = prot->slab;
@@ -750,7 +751,8 @@ unsigned long sock_i_ino(struct sock *sk)
 /*
  * Allocate a skb from the socket's send buffer.
  */
-struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority)
+struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
+			     unsigned int __nocast priority)
 {
 	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
 		struct sk_buff * skb = alloc_skb(size, priority);
@@ -765,7 +767,8 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int
 /*
  * Allocate a skb from the socket's receive buffer.
  */ 
-struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority)
+struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
+			     unsigned int __nocast priority)
 {
 	if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
 		struct sk_buff *skb = alloc_skb(size, priority);
@@ -780,7 +783,7 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int
 /* 
  * Allocate a memory block from the socket's option memory buffer.
  */ 
-void *sock_kmalloc(struct sock *sk, int size, int priority)
+void *sock_kmalloc(struct sock *sk, int size, unsigned int __nocast priority)
 {
 	if ((unsigned)size <= sysctl_optmem_max &&
 	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e041d057ec86..e3f8ea1bfa9c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1613,7 +1613,7 @@ void tcp_send_fin(struct sock *sk)
  * was unread data in the receive queue.  This behavior is recommended
  * by draft-ietf-tcpimpl-prob-03.txt section 3.10.  -DaveM
  */
-void tcp_send_active_reset(struct sock *sk, int priority)
+void tcp_send_active_reset(struct sock *sk, unsigned int __nocast priority)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
-- 
cgit v1.2.3


From ca9b907d140a5f249250d19f956129dbbbf84f73 Mon Sep 17 00:00:00 2001
From: David L Stevens <dlstevens@us.ibm.com>
Date: Fri, 8 Jul 2005 17:38:07 -0700
Subject: [IPV4]: multicast API "join" issues

        This patch corrects a few problems with the IP_ADD_MEMBERSHIP
socket option:

1) The existing code makes an attempt at reference counting joins when
   using the ip_mreqn/imr_ifindex interface. Joining the same group
   on the same socket is an error, whatever the API. This leads to
   unexpected results when mixing ip_mreqn by index with ip_mreqn by
   address, ip_mreq, or other API's. For example, ip_mreq followed by
   ip_mreqn of the same group will "work" while the same two reversed
   will not.
           Fixed to always return EADDRINUSE on a duplicate join and
   removed the (now unused) reference count in ip_mc_socklist.

2) The group-search list in ip_mc_join_group() is comparing a full
   ip_mreqn structure and all of it must match for it to find the
   group. This doesn't correctly match a group that was joined with
   ip_mreq or ip_mreqn with an address (with or without an index). It
   also doesn't match groups that are joined by different addresses on
   the same interface. All of these are the same multicast group,
   which is identified by group address and interface index.
           Fixed the check to correctly match groups so we don't get
   duplicate group entries on the ip_mc_socklist.

3) The old code allocates a multicast address before searching for
   duplicates requiring it to free in various error cases. This
   patch moves the allocate until after the search and
   igmp_max_memberships check, so never a need to allocate, then free
   an entry.

Signed-off-by: David L Stevens <dlstevens@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h |  1 -
 net/ipv4/igmp.c      | 35 ++++++++++++-----------------------
 2 files changed, 12 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 390e760a96d3..0c31ef0b5bad 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -148,7 +148,6 @@ struct ip_sf_socklist
 struct ip_mc_socklist
 {
 	struct ip_mc_socklist	*next;
-	int			count;
 	struct ip_mreqn		multi;
 	unsigned int		sfmode;		/* MCAST_{INCLUDE,EXCLUDE} */
 	struct ip_sf_socklist	*sflist;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 1f3183168a90..111eb678cbac 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1615,9 +1615,10 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
 {
 	int err;
 	u32 addr = imr->imr_multiaddr.s_addr;
-	struct ip_mc_socklist *iml, *i;
+	struct ip_mc_socklist *iml=NULL, *i;
 	struct in_device *in_dev;
 	struct inet_sock *inet = inet_sk(sk);
+	int ifindex;
 	int count = 0;
 
 	if (!MULTICAST(addr))
@@ -1633,37 +1634,30 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
 		goto done;
 	}
 
-	iml = (struct ip_mc_socklist *)sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL);
-
 	err = -EADDRINUSE;
+	ifindex = imr->imr_ifindex;
 	for (i = inet->mc_list; i; i = i->next) {
-		if (memcmp(&i->multi, imr, sizeof(*imr)) == 0) {
-			/* New style additions are reference counted */
-			if (imr->imr_address.s_addr == 0) {
-				i->count++;
-				err = 0;
-			}
+		if (i->multi.imr_multiaddr.s_addr == addr &&
+		    i->multi.imr_ifindex == ifindex)
 			goto done;
-		}
 		count++;
 	}
 	err = -ENOBUFS;
-	if (iml == NULL || count >= sysctl_igmp_max_memberships)
+	if (count >= sysctl_igmp_max_memberships)
+		goto done;
+	iml = (struct ip_mc_socklist *)sock_kmalloc(sk,sizeof(*iml),GFP_KERNEL);
+	if (iml == NULL)
 		goto done;
+
 	memcpy(&iml->multi, imr, sizeof(*imr));
 	iml->next = inet->mc_list;
-	iml->count = 1;
 	iml->sflist = NULL;
 	iml->sfmode = MCAST_EXCLUDE;
 	inet->mc_list = iml;
 	ip_mc_inc_group(in_dev, addr);
-	iml = NULL;
 	err = 0;
-
 done:
 	rtnl_shunlock();
-	if (iml)
-		sock_kfree_s(sk, iml, sizeof(*iml));
 	return err;
 }
 
@@ -1704,12 +1698,6 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 			in_dev = inetdev_by_index(iml->multi.imr_ifindex);
 			if (in_dev)
 				(void) ip_mc_leave_src(sk, iml, in_dev);
-			if (--iml->count) {
-				rtnl_unlock();
-				if (in_dev)
-					in_dev_put(in_dev);
-				return 0;
-			}
 
 			*imlp = iml->next;
 
@@ -1755,7 +1743,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 	err = -EADDRNOTAVAIL;
 
 	for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
-		if (memcmp(&pmc->multi, mreqs, 2*sizeof(__u32)) == 0)
+		if (pmc->multi.imr_multiaddr.s_addr == imr.imr_multiaddr.s_addr
+		    && pmc->multi.imr_ifindex == imr.imr_ifindex)
 			break;
 	}
 	if (!pmc)		/* must have a prior join */
-- 
cgit v1.2.3


From d369ddd2fc00fc3f46e9052d1017cbf407e3cdf7 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 10 Jul 2005 15:45:11 -0700
Subject: [SPARC64]: Add __read_mostly support.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/smp.c         | 10 +++++-----
 arch/sparc64/kernel/time.c        | 24 +++++++++---------------
 arch/sparc64/kernel/vmlinux.lds.S |  2 ++
 include/linux/cache.h             |  2 +-
 4 files changed, 17 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index e5b9c7a27789..441fc2e52ce6 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -45,8 +45,8 @@ extern void calibrate_delay(void);
 /* Please don't make this stuff initdata!!!  --DaveM */
 static unsigned char boot_cpu_id;
 
-cpumask_t cpu_online_map = CPU_MASK_NONE;
-cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
+cpumask_t cpu_online_map = CPU_MASK_NONE __read_mostly;
+cpumask_t phys_cpu_present_map = CPU_MASK_NONE __read_mostly;
 static cpumask_t smp_commenced_mask;
 static cpumask_t cpu_callout_map;
 
@@ -155,7 +155,7 @@ void cpu_panic(void)
 	panic("SMP bolixed\n");
 }
 
-static unsigned long current_tick_offset;
+static unsigned long current_tick_offset __read_mostly;
 
 /* This tick register synchronization scheme is taken entirely from
  * the ia64 port, see arch/ia64/kernel/smpboot.c for details and credit.
@@ -1193,8 +1193,8 @@ void smp_send_stop(void)
 {
 }
 
-unsigned long __per_cpu_base;
-unsigned long __per_cpu_shift;
+unsigned long __per_cpu_base __read_mostly;
+unsigned long __per_cpu_shift __read_mostly;
 
 EXPORT_SYMBOL(__per_cpu_base);
 EXPORT_SYMBOL(__per_cpu_shift);
diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c
index b40db389f90b..362b9c26871b 100644
--- a/arch/sparc64/kernel/time.c
+++ b/arch/sparc64/kernel/time.c
@@ -73,7 +73,7 @@ static __initdata struct sparc64_tick_ops dummy_tick_ops = {
 	.get_tick	= dummy_get_tick,
 };
 
-struct sparc64_tick_ops *tick_ops = &dummy_tick_ops;
+struct sparc64_tick_ops *tick_ops __read_mostly = &dummy_tick_ops;
 
 #define TICK_PRIV_BIT	(1UL << 63)
 
@@ -195,7 +195,7 @@ static unsigned long tick_add_tick(unsigned long adj, unsigned long offset)
 	return new_tick;
 }
 
-static struct sparc64_tick_ops tick_operations = {
+static struct sparc64_tick_ops tick_operations __read_mostly = {
 	.init_tick	=	tick_init_tick,
 	.get_tick	=	tick_get_tick,
 	.get_compare	=	tick_get_compare,
@@ -276,7 +276,7 @@ static unsigned long stick_add_compare(unsigned long adj)
 	return new_compare;
 }
 
-static struct sparc64_tick_ops stick_operations = {
+static struct sparc64_tick_ops stick_operations __read_mostly = {
 	.init_tick	=	stick_init_tick,
 	.get_tick	=	stick_get_tick,
 	.get_compare	=	stick_get_compare,
@@ -422,7 +422,7 @@ static unsigned long hbtick_add_compare(unsigned long adj)
 	return val;
 }
 
-static struct sparc64_tick_ops hbtick_operations = {
+static struct sparc64_tick_ops hbtick_operations __read_mostly = {
 	.init_tick	=	hbtick_init_tick,
 	.get_tick	=	hbtick_get_tick,
 	.get_compare	=	hbtick_get_compare,
@@ -437,10 +437,9 @@ static struct sparc64_tick_ops hbtick_operations = {
  * NOTE: On SUN5 systems the ticker interrupt comes in using 2
  *       interrupts, one at level14 and one with softint bit 0.
  */
-unsigned long timer_tick_offset;
-unsigned long timer_tick_compare;
+unsigned long timer_tick_offset __read_mostly;
 
-static unsigned long timer_ticks_per_nsec_quotient;
+static unsigned long timer_ticks_per_nsec_quotient __read_mostly;
 
 #define TICK_SIZE (tick_nsec / 1000)
 
@@ -464,7 +463,7 @@ static inline void timer_check_rtc(void)
 
 static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs * regs)
 {
-	unsigned long ticks, pstate;
+	unsigned long ticks, compare, pstate;
 
 	write_seqlock(&xtime_lock);
 
@@ -483,14 +482,14 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs * regs)
 				     : "=r" (pstate)
 				     : "i" (PSTATE_IE));
 
-		timer_tick_compare = tick_ops->add_compare(timer_tick_offset);
+		compare = tick_ops->add_compare(timer_tick_offset);
 		ticks = tick_ops->get_tick();
 
 		/* Restore PSTATE_IE. */
 		__asm__ __volatile__("wrpr	%0, 0x0, %%pstate"
 				     : /* no outputs */
 				     : "r" (pstate));
-	} while (time_after_eq(ticks, timer_tick_compare));
+	} while (time_after_eq(ticks, compare));
 
 	timer_check_rtc();
 
@@ -506,11 +505,6 @@ void timer_tick_interrupt(struct pt_regs *regs)
 
 	do_timer(regs);
 
-	/*
-	 * Only keep timer_tick_offset uptodate, but don't set TICK_CMPR.
-	 */
-	timer_tick_compare = tick_ops->get_compare() + timer_tick_offset;
-
 	timer_check_rtc();
 
 	write_sequnlock(&xtime_lock);
diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S
index 382fd6798bb9..950423da8a6a 100644
--- a/arch/sparc64/kernel/vmlinux.lds.S
+++ b/arch/sparc64/kernel/vmlinux.lds.S
@@ -32,6 +32,8 @@ SECTIONS
   .data1   : { *(.data1) }
   . = ALIGN(64);
   .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+  . = ALIGN(64);
+  .data.read_mostly : { *(.data.read_mostly) }
   _edata  =  .;
   PROVIDE (edata = .);
   .fixup   : { *(.fixup) }
diff --git a/include/linux/cache.h b/include/linux/cache.h
index 2b66a36d85f0..f6b5a46c5f82 100644
--- a/include/linux/cache.h
+++ b/include/linux/cache.h
@@ -13,7 +13,7 @@
 #define SMP_CACHE_BYTES L1_CACHE_BYTES
 #endif
 
-#ifdef CONFIG_X86
+#if defined(CONFIG_X86) || defined(CONFIG_SPARC64)
 #define __read_mostly __attribute__((__section__(".data.read_mostly")))
 #else
 #define __read_mostly
-- 
cgit v1.2.3


From f7ceba360cce9af3fbc4e5a5b1bd40b570b7021c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 10 Jul 2005 19:29:45 -0700
Subject: [SPARC64]: Add syscall auditing support.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/entry.S       | 10 +++++-----
 arch/sparc64/kernel/ptrace.c      | 32 +++++++++++++++++++++++++++++---
 include/asm-sparc64/thread_info.h |  8 +++++---
 include/linux/audit.h             |  2 +-
 init/Kconfig                      |  2 +-
 5 files changed, 41 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S
index 8b7ed760c50e..d781f10adc52 100644
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -1552,7 +1552,7 @@ sys_ptrace:	add		%sp, PTREGS_OFF, %o0
 		nop
 		.align		32
 1:		ldx		[%curptr + TI_FLAGS], %l5
-		andcc		%l5, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP), %g0
+		andcc		%l5, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %g0
 		be,pt		%icc, rtrap
 		 clr		%l6
 		add		%sp, PTREGS_OFF, %o0
@@ -1679,7 +1679,7 @@ linux_sparc_syscall32:
 
 	srl		%i5, 0, %o5				! IEU1
 	srl		%i2, 0, %o2				! IEU0	Group
-	andcc		%l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP), %g0 ! IEU0	Group
+	andcc		%l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %g0
 	bne,pn		%icc, linux_syscall_trace32		! CTI
 	 mov		%i0, %l5				! IEU1
 	call		%l7					! CTI	Group brk forced
@@ -1702,7 +1702,7 @@ linux_sparc_syscall:
 
 	mov		%i3, %o3				! IEU1
 	mov		%i4, %o4				! IEU0	Group
-	andcc		%l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP), %g0 ! IEU1	Group+1 bubble
+	andcc		%l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %g0
 	bne,pn		%icc, linux_syscall_trace		! CTI	Group
 	 mov		%i0, %l5				! IEU0
 2:	call		%l7					! CTI	Group brk forced
@@ -1730,7 +1730,7 @@ ret_sys_call:
 1:
 	cmp		%o0, -ERESTART_RESTARTBLOCK
 	bgeu,pn		%xcc, 1f
-	 andcc		%l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP), %l6
+	 andcc		%l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %l6
 80:
 	/* System call success, clear Carry condition code. */
 	andn		%g3, %g2, %g3
@@ -1745,7 +1745,7 @@ ret_sys_call:
 	/* System call failure, set Carry condition code.
 	 * Also, get abs(errno) to return to the process.
 	 */
-	andcc		%l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP), %l6	
+	andcc		%l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %l6	
 	sub		%g0, %o0, %o0
 	or		%g3, %g2, %g3
 	stx		%o0, [%sp + PTREGS_OFF + PT_V9_I0]
diff --git a/arch/sparc64/kernel/ptrace.c b/arch/sparc64/kernel/ptrace.c
index c57dc9ea731b..23ad839d113f 100644
--- a/arch/sparc64/kernel/ptrace.c
+++ b/arch/sparc64/kernel/ptrace.c
@@ -19,6 +19,8 @@
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
 #include <linux/security.h>
+#include <linux/seccomp.h>
+#include <linux/audit.h>
 #include <linux/signal.h>
 
 #include <asm/asi.h>
@@ -633,10 +635,22 @@ asmlinkage void syscall_trace(struct pt_regs *regs, int syscall_exit_p)
 	/* do the secure computing check first */
 	secure_computing(regs->u_regs[UREG_G1]);
 
-	if (!test_thread_flag(TIF_SYSCALL_TRACE))
-		return;
+	if (unlikely(current->audit_context) && syscall_exit_p) {
+		unsigned long tstate = regs->tstate;
+		int result = AUDITSC_SUCCESS;
+
+		if (unlikely(tstate & (TSTATE_XCARRY | TSTATE_ICARRY)))
+			result = AUDITSC_FAILURE;
+
+		audit_syscall_exit(current, result, regs->u_regs[UREG_I0]);
+	}
+
 	if (!(current->ptrace & PT_PTRACED))
-		return;
+		goto out;
+
+	if (!test_thread_flag(TIF_SYSCALL_TRACE))
+		goto out;
+
 	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
 				 ? 0x80 : 0));
 
@@ -649,4 +663,16 @@ asmlinkage void syscall_trace(struct pt_regs *regs, int syscall_exit_p)
 		send_sig(current->exit_code, current, 1);
 		current->exit_code = 0;
 	}
+
+out:
+	if (unlikely(current->audit_context) && !syscall_exit_p)
+		audit_syscall_entry(current,
+				    (test_thread_flag(TIF_32BIT) ?
+				     AUDIT_ARCH_SPARC :
+				     AUDIT_ARCH_SPARC64),
+				    regs->u_regs[UREG_G1],
+				    regs->u_regs[UREG_I0],
+				    regs->u_regs[UREG_I1],
+				    regs->u_regs[UREG_I2],
+				    regs->u_regs[UREG_I3]);
 }
diff --git a/include/asm-sparc64/thread_info.h b/include/asm-sparc64/thread_info.h
index 6b2fbb89bb63..a1d25c06f92a 100644
--- a/include/asm-sparc64/thread_info.h
+++ b/include/asm-sparc64/thread_info.h
@@ -221,7 +221,7 @@ register struct thread_info *current_thread_info_reg asm("g6");
 #define TIF_32BIT		7	/* 32-bit binary */
 #define TIF_NEWCHILD		8	/* just-spawned child process */
 #define TIF_SECCOMP		9	/* secure computing */
-#define TIF_POLLING_NRFLAG	10
+#define TIF_SYSCALL_AUDIT	10	/* syscall auditing active */
 #define TIF_SYSCALL_SUCCESS	11
 /* NOTE: Thread flags >= 12 should be ones we have no interest
  *       in using in assembly, else we can't use the mask as
@@ -229,6 +229,7 @@ register struct thread_info *current_thread_info_reg asm("g6");
  */
 #define TIF_ABI_PENDING		12
 #define TIF_MEMDIE		13
+#define TIF_POLLING_NRFLAG	14
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
@@ -240,9 +241,10 @@ register struct thread_info *current_thread_info_reg asm("g6");
 #define _TIF_32BIT		(1<<TIF_32BIT)
 #define _TIF_NEWCHILD		(1<<TIF_NEWCHILD)
 #define _TIF_SECCOMP		(1<<TIF_SECCOMP)
-#define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
-#define _TIF_ABI_PENDING	(1<<TIF_ABI_PENDING)
+#define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
 #define _TIF_SYSCALL_SUCCESS	(1<<TIF_SYSCALL_SUCCESS)
+#define _TIF_ABI_PENDING	(1<<TIF_ABI_PENDING)
+#define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 
 #define _TIF_USER_WORK_MASK	((0xff << TI_FLAG_WSAVED_SHIFT) | \
 				 (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | \
diff --git a/include/linux/audit.h b/include/linux/audit.h
index bf2ad3ba72eb..68aba0c02e49 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -165,7 +165,7 @@
 #define AUDIT_ARCH_SH64		(EM_SH|__AUDIT_ARCH_64BIT)
 #define AUDIT_ARCH_SHEL64	(EM_SH|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_SPARC	(EM_SPARC)
-#define AUDIT_ARCH_SPARC64	(EM_SPARC64|__AUDIT_ARCH_64BIT)
+#define AUDIT_ARCH_SPARC64	(EM_SPARCV9|__AUDIT_ARCH_64BIT)
 #define AUDIT_ARCH_V850		(EM_V850|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_X86_64	(EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 
diff --git a/init/Kconfig b/init/Kconfig
index b1091d7542ce..75755ef50c89 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -174,7 +174,7 @@ config AUDIT
 
 config AUDITSYSCALL
 	bool "Enable system-call auditing support"
-	depends on AUDIT && (X86 || PPC || PPC64 || ARCH_S390 || IA64 || UML)
+	depends on AUDIT && (X86 || PPC || PPC64 || ARCH_S390 || IA64 || UML || SPARC64)
 	default y if SECURITY_SELINUX
 	help
 	  Enable low-overhead system-call auditing infrastructure that
-- 
cgit v1.2.3