From 5e6557722e69840506eb8bc5a1edcdb4e447a917 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 6 Jul 2005 15:44:41 -0400 Subject: [PATCH] openfirmware: generate device table for userspace This converts the usage of struct of_match to struct of_device_id, similar to pci_device_id. This allows a device table to be generated, which can be parsed by depmod(8) to generate a map file for module loading. In order for hotplug to work with macio devices, patches to module-init-tools and hotplug must be applied. Those patches are available at: ftp://ftp.suse.com/pub/people/jeffm/linux/macio-hotplug/ Signed-off-by: Jeff Mahoney Signed-off-by: Linus Torvalds --- include/linux/mod_devicetable.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 9b6d05172ed4..dce53ac1625d 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -174,6 +174,17 @@ struct serio_device_id { __u8 proto; }; +/* + * Struct used for matching a device + */ +struct of_device_id +{ + char name[32]; + char type[32]; + char compatible[128]; + void *data; +}; + /* PCMCIA */ -- cgit v1.2.3 From 40725181b74be6b0e3bdc8c05bd1e0b9873ec5cc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 6 Jul 2005 13:51:52 -0700 Subject: [CRYPTO] Add support for low-level multi-block operations This patch adds hooks for cipher algorithms to implement multi-block ECB/CBC operations directly. This is expected to provide significant performance boots to the VIA Padlock. It could also be used for improving software implementations such as AES where operating on multiple blocks at a time may enable certain optimisations. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/cipher.c | 38 ++++++++++++++++++-------------------- crypto/internal.h | 5 ----- include/linux/crypto.h | 28 +++++++++++++++++++++++++++- 3 files changed, 45 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/crypto/cipher.c b/crypto/cipher.c index c4243345b154..54c4a560070d 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -23,14 +23,6 @@ #include "internal.h" #include "scatterwalk.h" -struct cipher_desc { - struct crypto_tfm *tfm; - void (*crfn)(void *ctx, u8 *dst, const u8 *src); - unsigned int (*prfn)(const struct cipher_desc *desc, u8 *dst, - const u8 *src, unsigned int nbytes); - void *info; -}; - static inline void xor_64(u8 *a, const u8 *b) { ((u32 *)a)[0] ^= ((u32 *)b)[0]; @@ -224,10 +216,11 @@ static int ecb_encrypt(struct crypto_tfm *tfm, struct scatterlist *src, unsigned int nbytes) { struct cipher_desc desc; + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; desc.tfm = tfm; - desc.crfn = tfm->__crt_alg->cra_cipher.cia_encrypt; - desc.prfn = ecb_process; + desc.crfn = cipher->cia_encrypt; + desc.prfn = cipher->cia_encrypt_ecb ?: ecb_process; return crypt(&desc, dst, src, nbytes); } @@ -238,10 +231,11 @@ static int ecb_decrypt(struct crypto_tfm *tfm, unsigned int nbytes) { struct cipher_desc desc; + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; desc.tfm = tfm; - desc.crfn = tfm->__crt_alg->cra_cipher.cia_decrypt; - desc.prfn = ecb_process; + desc.crfn = cipher->cia_decrypt; + desc.prfn = cipher->cia_decrypt_ecb ?: ecb_process; return crypt(&desc, dst, src, nbytes); } @@ -252,10 +246,11 @@ static int cbc_encrypt(struct crypto_tfm *tfm, unsigned int nbytes) { struct cipher_desc desc; + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; desc.tfm = tfm; - desc.crfn = tfm->__crt_alg->cra_cipher.cia_encrypt; - desc.prfn = cbc_process_encrypt; + desc.crfn = cipher->cia_encrypt; + desc.prfn = cipher->cia_encrypt_cbc ?: cbc_process_encrypt; desc.info = tfm->crt_cipher.cit_iv; return crypt(&desc, dst, src, nbytes); @@ -267,10 +262,11 @@ static int cbc_encrypt_iv(struct crypto_tfm *tfm, unsigned int nbytes, u8 *iv) { struct cipher_desc desc; + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; desc.tfm = tfm; - desc.crfn = tfm->__crt_alg->cra_cipher.cia_encrypt; - desc.prfn = cbc_process_encrypt; + desc.crfn = cipher->cia_encrypt; + desc.prfn = cipher->cia_encrypt_cbc ?: cbc_process_encrypt; desc.info = iv; return crypt(&desc, dst, src, nbytes); @@ -282,10 +278,11 @@ static int cbc_decrypt(struct crypto_tfm *tfm, unsigned int nbytes) { struct cipher_desc desc; + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; desc.tfm = tfm; - desc.crfn = tfm->__crt_alg->cra_cipher.cia_decrypt; - desc.prfn = cbc_process_decrypt; + desc.crfn = cipher->cia_decrypt; + desc.prfn = cipher->cia_decrypt_cbc ?: cbc_process_decrypt; desc.info = tfm->crt_cipher.cit_iv; return crypt(&desc, dst, src, nbytes); @@ -297,10 +294,11 @@ static int cbc_decrypt_iv(struct crypto_tfm *tfm, unsigned int nbytes, u8 *iv) { struct cipher_desc desc; + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; desc.tfm = tfm; - desc.crfn = tfm->__crt_alg->cra_cipher.cia_decrypt; - desc.prfn = cbc_process_decrypt; + desc.crfn = cipher->cia_decrypt; + desc.prfn = cipher->cia_decrypt_cbc ?: cbc_process_decrypt; desc.info = iv; return crypt(&desc, dst, src, nbytes); diff --git a/crypto/internal.h b/crypto/internal.h index 964b9a60ca24..5ed383f7dce6 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -42,11 +42,6 @@ static inline void crypto_yield(struct crypto_tfm *tfm) cond_resched(); } -static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm) -{ - return (void *)&tfm[1]; -} - struct crypto_alg *crypto_alg_lookup(const char *name); /* A far more intelligent version of this is planned. For now, just diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 387da6a3e58c..26ce01c25745 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -61,6 +61,15 @@ #define CRYPTO_DIR_DECRYPT 0 struct scatterlist; +struct crypto_tfm; + +struct cipher_desc { + struct crypto_tfm *tfm; + void (*crfn)(void *ctx, u8 *dst, const u8 *src); + unsigned int (*prfn)(const struct cipher_desc *desc, u8 *dst, + const u8 *src, unsigned int nbytes); + void *info; +}; /* * Algorithms: modular crypto algorithm implementations, managed @@ -73,6 +82,19 @@ struct cipher_alg { unsigned int keylen, u32 *flags); void (*cia_encrypt)(void *ctx, u8 *dst, const u8 *src); void (*cia_decrypt)(void *ctx, u8 *dst, const u8 *src); + + unsigned int (*cia_encrypt_ecb)(const struct cipher_desc *desc, + u8 *dst, const u8 *src, + unsigned int nbytes); + unsigned int (*cia_decrypt_ecb)(const struct cipher_desc *desc, + u8 *dst, const u8 *src, + unsigned int nbytes); + unsigned int (*cia_encrypt_cbc)(const struct cipher_desc *desc, + u8 *dst, const u8 *src, + unsigned int nbytes); + unsigned int (*cia_decrypt_cbc)(const struct cipher_desc *desc, + u8 *dst, const u8 *src, + unsigned int nbytes); }; struct digest_alg { @@ -136,7 +158,6 @@ static inline int crypto_alg_available(const char *name, u32 flags) * and core processing logic. Managed via crypto_alloc_tfm() and * crypto_free_tfm(), as well as the various helpers below. */ -struct crypto_tfm; struct cipher_tfm { void *cit_iv; @@ -266,6 +287,11 @@ static inline unsigned int crypto_tfm_alg_digestsize(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_digest.dia_digestsize; } +static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm) +{ + return (void *)&tfm[1]; +} + /* * API wrappers. */ -- cgit v1.2.3 From 95477377995aefa2ec1654a9a3777bd57ea99146 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 6 Jul 2005 13:52:09 -0700 Subject: [CRYPTO] Add alignmask for low-level cipher implementations The VIA Padlock device requires the input and output buffers to be aligned on 16-byte boundaries. This patch adds the alignmask attribute for low-level cipher implementations to indicate their alignment requirements. The mid-level crypt() function will copy the input/output buffers if they are not aligned correctly before they are passed to the low-level implementation. Strictly speaking, some of the software implementations require the buffers to be aligned on 4-byte boundaries as they do 32-bit loads. However, it is not clear whether it is better to copy the buffers or pay the penalty for unaligned loads/stores. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/api.c | 6 ++++++ crypto/cipher.c | 43 ++++++++++++++++++++++++++++++++++++------- crypto/scatterwalk.h | 6 ++++++ include/linux/crypto.h | 1 + 4 files changed, 49 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/crypto/api.c b/crypto/api.c index 394169a8577d..f55856b21992 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -168,6 +168,12 @@ int crypto_register_alg(struct crypto_alg *alg) { int ret = 0; struct crypto_alg *q; + + if (alg->cra_alignmask & (alg->cra_alignmask + 1)) + return -EINVAL; + + if (alg->cra_alignmask > PAGE_SIZE) + return -EINVAL; down_write(&crypto_alg_sem); diff --git a/crypto/cipher.c b/crypto/cipher.c index 54c4a560070d..85eb12f8e564 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -41,8 +41,10 @@ static unsigned int crypt_slow(const struct cipher_desc *desc, struct scatter_walk *in, struct scatter_walk *out, unsigned int bsize) { - u8 src[bsize]; - u8 dst[bsize]; + unsigned int alignmask = desc->tfm->__crt_alg->cra_alignmask; + u8 buffer[bsize * 2 + alignmask]; + u8 *src = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); + u8 *dst = src + bsize; unsigned int n; n = scatterwalk_copychunks(src, in, bsize, 0); @@ -59,15 +61,24 @@ static unsigned int crypt_slow(const struct cipher_desc *desc, static inline unsigned int crypt_fast(const struct cipher_desc *desc, struct scatter_walk *in, struct scatter_walk *out, - unsigned int nbytes) + unsigned int nbytes, u8 *tmp) { u8 *src, *dst; src = in->data; dst = scatterwalk_samebuf(in, out) ? src : out->data; + if (tmp) { + memcpy(tmp, in->data, nbytes); + src = tmp; + dst = tmp; + } + nbytes = desc->prfn(desc, dst, src, nbytes); + if (tmp) + memcpy(out->data, tmp, nbytes); + scatterwalk_advance(in, nbytes); scatterwalk_advance(out, nbytes); @@ -87,6 +98,8 @@ static int crypt(const struct cipher_desc *desc, struct scatter_walk walk_in, walk_out; struct crypto_tfm *tfm = desc->tfm; const unsigned int bsize = crypto_tfm_alg_blocksize(tfm); + unsigned int alignmask = tfm->__crt_alg->cra_alignmask; + unsigned long buffer = 0; if (!nbytes) return 0; @@ -100,16 +113,27 @@ static int crypt(const struct cipher_desc *desc, scatterwalk_start(&walk_out, dst); for(;;) { - unsigned int n; + unsigned int n = nbytes; + u8 *tmp = NULL; + + if (!scatterwalk_aligned(&walk_in, alignmask) || + !scatterwalk_aligned(&walk_out, alignmask)) { + if (!buffer) { + buffer = __get_free_page(GFP_ATOMIC); + if (!buffer) + n = 0; + } + tmp = (u8 *)buffer; + } scatterwalk_map(&walk_in, 0); scatterwalk_map(&walk_out, 1); - n = scatterwalk_clamp(&walk_in, nbytes); + n = scatterwalk_clamp(&walk_in, n); n = scatterwalk_clamp(&walk_out, n); if (likely(n >= bsize)) - n = crypt_fast(desc, &walk_in, &walk_out, n); + n = crypt_fast(desc, &walk_in, &walk_out, n, tmp); else n = crypt_slow(desc, &walk_in, &walk_out, bsize); @@ -119,10 +143,15 @@ static int crypt(const struct cipher_desc *desc, scatterwalk_done(&walk_out, 1, nbytes); if (!nbytes) - return 0; + break; crypto_yield(tfm); } + + if (buffer) + free_page(buffer); + + return 0; } static unsigned int cbc_process_encrypt(const struct cipher_desc *desc, diff --git a/crypto/scatterwalk.h b/crypto/scatterwalk.h index 5495bb970816..e79925c474a3 100644 --- a/crypto/scatterwalk.h +++ b/crypto/scatterwalk.h @@ -55,6 +55,12 @@ static inline void scatterwalk_advance(struct scatter_walk *walk, walk->len_this_segment -= nbytes; } +static inline unsigned int scatterwalk_aligned(struct scatter_walk *walk, + unsigned int alignmask) +{ + return !(walk->offset & alignmask); +} + void scatterwalk_start(struct scatter_walk *walk, struct scatterlist *sg); int scatterwalk_copychunks(void *buf, struct scatter_walk *walk, size_t nbytes, int out); void scatterwalk_map(struct scatter_walk *walk, int out); diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 26ce01c25745..ac9d49beecd3 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -124,6 +124,7 @@ struct crypto_alg { u32 cra_flags; unsigned int cra_blocksize; unsigned int cra_ctxsize; + unsigned int cra_alignmask; const char cra_name[CRYPTO_MAX_ALG_NAME]; union { -- cgit v1.2.3 From fbdae9f3e7fb57c07cb0d973f113eb25da2e8ff2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 6 Jul 2005 13:53:29 -0700 Subject: [CRYPTO] Ensure cit_iv is aligned correctly This patch ensures that cit_iv is aligned according to cra_alignmask by allocating it as part of the tfm structure. As a side effect the crypto layer will also guarantee that the tfm ctx area has enough space to be aligned by cra_alignmask. This allows us to remove the extra space reservation from the Padlock driver. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/api.c | 32 +++++++++++++++++++++++++++++--- crypto/cipher.c | 15 +++++++++------ crypto/internal.h | 28 ++++++++++++++++++++++++++++ drivers/crypto/padlock-aes.c | 3 +-- include/linux/crypto.h | 5 +++++ 5 files changed, 72 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/crypto/api.c b/crypto/api.c index 0b583d24f7fa..2d8d828c0ca2 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -125,20 +125,46 @@ static void crypto_exit_ops(struct crypto_tfm *tfm) } } +static unsigned int crypto_ctxsize(struct crypto_alg *alg, int flags) +{ + unsigned int len; + + switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) { + default: + BUG(); + + case CRYPTO_ALG_TYPE_CIPHER: + len = crypto_cipher_ctxsize(alg, flags); + break; + + case CRYPTO_ALG_TYPE_DIGEST: + len = crypto_digest_ctxsize(alg, flags); + break; + + case CRYPTO_ALG_TYPE_COMPRESS: + len = crypto_compress_ctxsize(alg, flags); + break; + } + + return len + alg->cra_alignmask; +} + struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags) { struct crypto_tfm *tfm = NULL; struct crypto_alg *alg; + unsigned int tfm_size; alg = crypto_alg_mod_lookup(name); if (alg == NULL) goto out; - - tfm = kmalloc(sizeof(*tfm) + alg->cra_ctxsize, GFP_KERNEL); + + tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, flags); + tfm = kmalloc(tfm_size, GFP_KERNEL); if (tfm == NULL) goto out_put; - memset(tfm, 0, sizeof(*tfm) + alg->cra_ctxsize); + memset(tfm, 0, tfm_size); tfm->__crt_alg = alg; diff --git a/crypto/cipher.c b/crypto/cipher.c index 85eb12f8e564..d3295ce14a57 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -41,7 +41,7 @@ static unsigned int crypt_slow(const struct cipher_desc *desc, struct scatter_walk *in, struct scatter_walk *out, unsigned int bsize) { - unsigned int alignmask = desc->tfm->__crt_alg->cra_alignmask; + unsigned int alignmask = crypto_tfm_alg_alignmask(desc->tfm); u8 buffer[bsize * 2 + alignmask]; u8 *src = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); u8 *dst = src + bsize; @@ -98,7 +98,7 @@ static int crypt(const struct cipher_desc *desc, struct scatter_walk walk_in, walk_out; struct crypto_tfm *tfm = desc->tfm; const unsigned int bsize = crypto_tfm_alg_blocksize(tfm); - unsigned int alignmask = tfm->__crt_alg->cra_alignmask; + unsigned int alignmask = crypto_tfm_alg_alignmask(tfm); unsigned long buffer = 0; if (!nbytes) @@ -399,6 +399,8 @@ int crypto_init_cipher_ops(struct crypto_tfm *tfm) } if (ops->cit_mode == CRYPTO_TFM_MODE_CBC) { + unsigned int align; + unsigned long addr; switch (crypto_tfm_alg_blocksize(tfm)) { case 8: @@ -418,9 +420,11 @@ int crypto_init_cipher_ops(struct crypto_tfm *tfm) } ops->cit_ivsize = crypto_tfm_alg_blocksize(tfm); - ops->cit_iv = kmalloc(ops->cit_ivsize, GFP_KERNEL); - if (ops->cit_iv == NULL) - ret = -ENOMEM; + align = crypto_tfm_alg_alignmask(tfm) + 1; + addr = (unsigned long)crypto_tfm_ctx(tfm); + addr = ALIGN(addr, align); + addr += ALIGN(tfm->__crt_alg->cra_ctxsize, align); + ops->cit_iv = (void *)addr; } out: @@ -429,5 +433,4 @@ out: void crypto_exit_cipher_ops(struct crypto_tfm *tfm) { - kfree(tfm->crt_cipher.cit_iv); } diff --git a/crypto/internal.h b/crypto/internal.h index 83b1b6d6d92b..68612874b5fd 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -16,6 +16,7 @@ #include #include #include +#include #include extern enum km_type crypto_km_types[]; @@ -61,6 +62,33 @@ static inline void crypto_init_proc(void) { } #endif +static inline unsigned int crypto_digest_ctxsize(struct crypto_alg *alg, + int flags) +{ + return alg->cra_ctxsize; +} + +static inline unsigned int crypto_cipher_ctxsize(struct crypto_alg *alg, + int flags) +{ + unsigned int len = alg->cra_ctxsize; + + switch (flags & CRYPTO_TFM_MODE_MASK) { + case CRYPTO_TFM_MODE_CBC: + len = ALIGN(len, alg->cra_alignmask + 1); + len += alg->cra_blocksize; + break; + } + + return len; +} + +static inline unsigned int crypto_compress_ctxsize(struct crypto_alg *alg, + int flags) +{ + return alg->cra_ctxsize; +} + int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags); int crypto_init_cipher_flags(struct crypto_tfm *tfm, u32 flags); int crypto_init_compress_flags(struct crypto_tfm *tfm, u32 flags); diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index d2745ff4699c..c5b58fae95f2 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -465,8 +465,7 @@ static struct crypto_alg aes_alg = { .cra_name = "aes", .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aes_ctx) + - PADLOCK_ALIGNMENT, + .cra_ctxsize = sizeof(struct aes_ctx), .cra_alignmask = PADLOCK_ALIGNMENT - 1, .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), diff --git a/include/linux/crypto.h b/include/linux/crypto.h index ac9d49beecd3..5e2bcc636a02 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -288,6 +288,11 @@ static inline unsigned int crypto_tfm_alg_digestsize(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_digest.dia_digestsize; } +static inline unsigned int crypto_tfm_alg_alignmask(struct crypto_tfm *tfm) +{ + return tfm->__crt_alg->cra_alignmask; +} + static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm) { return (void *)&tfm[1]; -- cgit v1.2.3 From cb2c0233755429037462e16ea0d5497a0092738c Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Thu, 7 Jul 2005 17:56:03 -0700 Subject: [PATCH] export generic_drop_inode() to modules OCFS2 wants to mark an inode which has been orphaned by another node so that during final iput it takes the correct path through the VFS and can pass through the OCFS2 delete_inode callback. Since i_nlink can get out of date with other nodes, the best way I see to accomplish this is by clearing i_nlink on those inodes at drop_inode time. Other than this small amount of work, nothing different needs to happen, so I think it would be cleanest to be able to just call generic_drop_inode at the end of the OCFS2 drop_inode callback. Signed-off-by: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inode.c | 4 +++- include/linux/fs.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/inode.c b/fs/inode.c index 1f9a3a2b89bc..6d695037a0a3 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1052,7 +1052,7 @@ static void generic_forget_inode(struct inode *inode) * inode when the usage count drops to zero, and * i_nlink is zero. */ -static void generic_drop_inode(struct inode *inode) +void generic_drop_inode(struct inode *inode) { if (!inode->i_nlink) generic_delete_inode(inode); @@ -1060,6 +1060,8 @@ static void generic_drop_inode(struct inode *inode) generic_forget_inode(inode); } +EXPORT_SYMBOL_GPL(generic_drop_inode); + /* * Called when we're dropping the last reference * to an inode. diff --git a/include/linux/fs.h b/include/linux/fs.h index 047bde30836a..302ec20838ca 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1435,6 +1435,7 @@ extern struct inode * igrab(struct inode *); extern ino_t iunique(struct super_block *, ino_t); extern int inode_needs_sync(struct inode *inode); extern void generic_delete_inode(struct inode *inode); +extern void generic_drop_inode(struct inode *inode); extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data); -- cgit v1.2.3 From 79b9ce311e192e9a31fd9f3cf1ee4a4edf9e2650 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 7 Jul 2005 17:56:04 -0700 Subject: [PATCH] print order information when OOM killing Dump the current allocation order when OOM killing. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/sysrq.c | 2 +- include/linux/swap.h | 2 +- mm/oom_kill.c | 4 ++-- mm/page_alloc.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index af79805b5576..12d563c648f7 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -228,7 +228,7 @@ static struct sysrq_key_op sysrq_term_op = { static void moom_callback(void *ignored) { - out_of_memory(GFP_KERNEL); + out_of_memory(GFP_KERNEL, 0); } static DECLARE_WORK(moom_work, moom_callback, NULL); diff --git a/include/linux/swap.h b/include/linux/swap.h index 2343f999e6e1..c75954f2d868 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -148,7 +148,7 @@ struct swap_list_t { #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages) /* linux/mm/oom_kill.c */ -extern void out_of_memory(unsigned int __nocast gfp_mask); +extern void out_of_memory(unsigned int __nocast gfp_mask, int order); /* linux/mm/memory.c */ extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 59666d905f19..e20d559edbaf 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -253,12 +253,12 @@ static struct mm_struct *oom_kill_process(struct task_struct *p) * OR try to be smart about which process to kill. Note that we * don't have to be perfect here, we just have to be good. */ -void out_of_memory(unsigned int __nocast gfp_mask) +void out_of_memory(unsigned int __nocast gfp_mask, int order) { struct mm_struct *mm = NULL; task_t * p; - printk("oom-killer: gfp_mask=0x%x\n", gfp_mask); + printk("oom-killer: gfp_mask=0x%x, order=%d\n", gfp_mask, order); /* print memory stats */ show_mem(); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3c9f7f881125..7fbd3ea8765c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -936,7 +936,7 @@ rebalance: goto got_pg; } - out_of_memory(gfp_mask); + out_of_memory(gfp_mask, order); goto restart; } -- cgit v1.2.3 From cf36680887d6d942d2119c1ff1dfb2428b0f21f4 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 7 Jul 2005 17:56:13 -0700 Subject: [PATCH] move ioprio syscalls into syscalls.h - Make ioprio syscalls return long, like set/getpriority syscalls. - Move function prototypes into syscalls.h so we can pick them up in the 32/64bit compat code. Signed-off-by: Anton Blanchard Acked-by: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ioprio.c | 4 ++-- include/linux/ioprio.h | 3 --- include/linux/syscalls.h | 3 +++ 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/ioprio.c b/fs/ioprio.c index 663e420636d6..97e1f088ba00 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -43,7 +43,7 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) return 0; } -asmlinkage int sys_ioprio_set(int which, int who, int ioprio) +asmlinkage long sys_ioprio_set(int which, int who, int ioprio) { int class = IOPRIO_PRIO_CLASS(ioprio); int data = IOPRIO_PRIO_DATA(ioprio); @@ -115,7 +115,7 @@ asmlinkage int sys_ioprio_set(int which, int who, int ioprio) return ret; } -asmlinkage int sys_ioprio_get(int which, int who) +asmlinkage long sys_ioprio_get(int which, int who) { struct task_struct *g, *p; struct user_struct *user; diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 8a453a0b5e4b..88d5961f7a3f 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -34,9 +34,6 @@ enum { */ #define IOPRIO_BE_NR (8) -asmlinkage int sys_ioprio_set(int, int, int); -asmlinkage int sys_ioprio_get(int, int); - enum { IOPRIO_WHO_PROCESS = 1, IOPRIO_WHO_PGRP, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 52830b6d94e5..425f58c8ea4a 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -506,4 +506,7 @@ asmlinkage long sys_request_key(const char __user *_type, asmlinkage long sys_keyctl(int cmd, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); +asmlinkage long sys_ioprio_set(int which, int who, int ioprio); +asmlinkage long sys_ioprio_get(int which, int who); + #endif -- cgit v1.2.3 From e00d9967e3addea86dded46deefc5daec5d52e5a Mon Sep 17 00:00:00 2001 From: Bernard Blackham Date: Thu, 7 Jul 2005 17:56:42 -0700 Subject: [PATCH] pm: fix u32 vs. pm_message_t confusion in cpufreq Fix u32 vs pm_message_t confusion in cpufreq. Signed-off-by: Bernard Blackham Signed-off-by: Pavel Machek Cc: Dave Jones Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc/platforms/pmac_cpufreq.c | 2 +- drivers/cpufreq/cpufreq.c | 4 ++-- include/linux/cpufreq.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/arch/ppc/platforms/pmac_cpufreq.c b/arch/ppc/platforms/pmac_cpufreq.c index 5fdd4f607a40..c0605244edda 100644 --- a/arch/ppc/platforms/pmac_cpufreq.c +++ b/arch/ppc/platforms/pmac_cpufreq.c @@ -452,7 +452,7 @@ static u32 __pmac read_gpio(struct device_node *np) return offset; } -static int __pmac pmac_cpufreq_suspend(struct cpufreq_policy *policy, u32 state) +static int __pmac pmac_cpufreq_suspend(struct cpufreq_policy *policy, pm_message_t pmsg) { /* Ok, this could be made a bit smarter, but let's be robust for now. We * always force a speed change to high speed before sleep, to make sure diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index bf62dfe4976a..7a7859dd0d98 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -869,7 +869,7 @@ EXPORT_SYMBOL(cpufreq_get); * cpufreq_suspend - let the low level driver prepare for suspend */ -static int cpufreq_suspend(struct sys_device * sysdev, u32 state) +static int cpufreq_suspend(struct sys_device * sysdev, pm_message_t pmsg) { int cpu = sysdev->id; unsigned int ret = 0; @@ -897,7 +897,7 @@ static int cpufreq_suspend(struct sys_device * sysdev, u32 state) } if (cpufreq_driver->suspend) { - ret = cpufreq_driver->suspend(cpu_policy, state); + ret = cpufreq_driver->suspend(cpu_policy, pmsg); if (ret) { printk(KERN_ERR "cpufreq: suspend failed in ->suspend " "step on CPU %u\n", cpu_policy->cpu); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 927daa86c9b3..ff7f80f48df1 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -201,7 +201,7 @@ struct cpufreq_driver { /* optional */ int (*exit) (struct cpufreq_policy *policy); - int (*suspend) (struct cpufreq_policy *policy, u32 state); + int (*suspend) (struct cpufreq_policy *policy, pm_message_t pmsg); int (*resume) (struct cpufreq_policy *policy); struct freq_attr **attr; }; -- cgit v1.2.3 From a39722034ae37f80a1803bf781fe3fe1b03e20bc Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 7 Jul 2005 17:56:56 -0700 Subject: [PATCH] page_uptodate locking scalability Use a bit spin lock in the first buffer of the page to synchronise asynch IO buffer completions, instead of the global page_uptodate_lock, which is showing some scalabilty problems. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 25 +++++++++++++++++-------- include/linux/buffer_head.h | 3 +++ 2 files changed, 20 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index 561e63a14966..6a25d7df89b1 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -513,8 +513,8 @@ static void free_more_memory(void) */ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) { - static DEFINE_SPINLOCK(page_uptodate_lock); unsigned long flags; + struct buffer_head *first; struct buffer_head *tmp; struct page *page; int page_uptodate = 1; @@ -536,7 +536,9 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) * two buffer heads end IO at almost the same time and both * decide that the page is now completely done. */ - spin_lock_irqsave(&page_uptodate_lock, flags); + first = page_buffers(page); + local_irq_save(flags); + bit_spin_lock(BH_Uptodate_Lock, &first->b_state); clear_buffer_async_read(bh); unlock_buffer(bh); tmp = bh; @@ -549,7 +551,8 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) } tmp = tmp->b_this_page; } while (tmp != bh); - spin_unlock_irqrestore(&page_uptodate_lock, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); /* * If none of the buffers had errors and they are all @@ -561,7 +564,8 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) return; still_busy: - spin_unlock_irqrestore(&page_uptodate_lock, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); return; } @@ -572,8 +576,8 @@ still_busy: void end_buffer_async_write(struct buffer_head *bh, int uptodate) { char b[BDEVNAME_SIZE]; - static DEFINE_SPINLOCK(page_uptodate_lock); unsigned long flags; + struct buffer_head *first; struct buffer_head *tmp; struct page *page; @@ -594,7 +598,10 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) SetPageError(page); } - spin_lock_irqsave(&page_uptodate_lock, flags); + first = page_buffers(page); + local_irq_save(flags); + bit_spin_lock(BH_Uptodate_Lock, &first->b_state); + clear_buffer_async_write(bh); unlock_buffer(bh); tmp = bh->b_this_page; @@ -605,12 +612,14 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) } tmp = tmp->b_this_page; } - spin_unlock_irqrestore(&page_uptodate_lock, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); end_page_writeback(page); return; still_busy: - spin_unlock_irqrestore(&page_uptodate_lock, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); return; } diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 802c91e9b3da..90828493791f 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -19,6 +19,9 @@ enum bh_state_bits { BH_Dirty, /* Is dirty */ BH_Lock, /* Is locked */ BH_Req, /* Has been submitted for I/O */ + BH_Uptodate_Lock,/* Used by the first bh in a page, to serialise + * IO completion of other buffers in the page + */ BH_Mapped, /* Has a disk mapping */ BH_New, /* Disk mapping was newly created by get_block */ -- cgit v1.2.3 From 0db925af1db5f3dfe1691c35b39496e2baaff9c9 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 7 Jul 2005 17:56:58 -0700 Subject: [PATCH] propagate __nocast annotations Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 4 ++-- include/linux/slab.h | 4 ++-- include/linux/string.h | 2 +- mm/mempool.c | 2 +- mm/slab.c | 12 +++++++----- 5 files changed, 13 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 8d6bf608b199..7c7400137e97 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -12,8 +12,8 @@ struct vm_area_struct; * GFP bitmasks.. */ /* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */ -#define __GFP_DMA 0x01 -#define __GFP_HIGHMEM 0x02 +#define __GFP_DMA 0x01u +#define __GFP_HIGHMEM 0x02u /* * Action modifiers - doesn't change the zoning diff --git a/include/linux/slab.h b/include/linux/slab.h index 76cf7e60216c..4c8e552471b0 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -65,7 +65,7 @@ extern void *kmem_cache_alloc(kmem_cache_t *, unsigned int __nocast); extern void kmem_cache_free(kmem_cache_t *, void *); extern unsigned int kmem_cache_size(kmem_cache_t *); extern const char *kmem_cache_name(kmem_cache_t *); -extern kmem_cache_t *kmem_find_general_cachep(size_t size, int gfpflags); +extern kmem_cache_t *kmem_find_general_cachep(size_t size, unsigned int __nocast gfpflags); /* Size description struct for general caches. */ struct cache_sizes { @@ -105,7 +105,7 @@ extern unsigned int ksize(const void *); #ifdef CONFIG_NUMA extern void *kmem_cache_alloc_node(kmem_cache_t *, int flags, int node); -extern void *kmalloc_node(size_t size, int flags, int node); +extern void *kmalloc_node(size_t size, unsigned int __nocast flags, int node); #else static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, int flags, int node) { diff --git a/include/linux/string.h b/include/linux/string.h index 93994c613095..dab2652acbd8 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -88,7 +88,7 @@ extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); #endif -extern char *kstrdup(const char *s, int gfp); +extern char *kstrdup(const char *s, unsigned int __nocast gfp); #ifdef __cplusplus } diff --git a/mm/mempool.c b/mm/mempool.c index 9a72f7d918fa..65f2957b8d51 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -205,7 +205,7 @@ void * mempool_alloc(mempool_t *pool, unsigned int __nocast gfp_mask) void *element; unsigned long flags; wait_queue_t wait; - int gfp_temp; + unsigned int gfp_temp; might_sleep_if(gfp_mask & __GFP_WAIT); diff --git a/mm/slab.c b/mm/slab.c index e57abd45eede..c9e706db4634 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -584,7 +584,8 @@ static inline struct array_cache *ac_data(kmem_cache_t *cachep) return cachep->array[smp_processor_id()]; } -static inline kmem_cache_t *__find_general_cachep(size_t size, int gfpflags) +static inline kmem_cache_t *__find_general_cachep(size_t size, + unsigned int __nocast gfpflags) { struct cache_sizes *csizep = malloc_sizes; @@ -608,7 +609,8 @@ static inline kmem_cache_t *__find_general_cachep(size_t size, int gfpflags) return csizep->cs_cachep; } -kmem_cache_t *kmem_find_general_cachep(size_t size, int gfpflags) +kmem_cache_t *kmem_find_general_cachep(size_t size, + unsigned int __nocast gfpflags) { return __find_general_cachep(size, gfpflags); } @@ -2100,7 +2102,7 @@ cache_alloc_debugcheck_before(kmem_cache_t *cachep, unsigned int __nocast flags) #if DEBUG static void * cache_alloc_debugcheck_after(kmem_cache_t *cachep, - unsigned long flags, void *objp, void *caller) + unsigned int __nocast flags, void *objp, void *caller) { if (!objp) return objp; @@ -2442,7 +2444,7 @@ got_slabp: } EXPORT_SYMBOL(kmem_cache_alloc_node); -void *kmalloc_node(size_t size, int flags, int node) +void *kmalloc_node(size_t size, unsigned int __nocast flags, int node) { kmem_cache_t *cachep; @@ -3094,7 +3096,7 @@ unsigned int ksize(const void *objp) * @s: the string to duplicate * @gfp: the GFP mask used in the kmalloc() call when allocating memory */ -char *kstrdup(const char *s, int gfp) +char *kstrdup(const char *s, unsigned int __nocast gfp) { size_t len; char *buf; -- cgit v1.2.3 From 6c036527a630720063b67d9a65455e8caca2c8fa Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 7 Jul 2005 17:56:59 -0700 Subject: [PATCH] mostly_read data section Add a new section called ".data.read_mostly" for data items that are read frequently and rarely written to like cpumaps etc. If these maps are placed in the .data section then these frequenly read items may end up in cachelines with data is is frequently updated. In that case all processors in an SMP system must needlessly reload the cachelines again and again containing elements of those frequently used variables. The ability to share these cachelines will allow each cpu in an SMP system to keep local copies of those shared cachelines thereby optimizing performance. Signed-off-by: Alok N Kataria Signed-off-by: Shobhit Dayal Signed-off-by: Christoph Lameter Signed-off-by: Shai Fultheim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/intel.c | 2 +- arch/i386/kernel/smpboot.c | 18 +++++++++--------- arch/i386/kernel/time.c | 2 +- arch/i386/kernel/timers/timer_hpet.c | 4 ++-- arch/i386/kernel/vmlinux.lds.S | 3 +++ arch/x86_64/kernel/vmlinux.lds.S | 4 ++++ drivers/char/random.c | 2 +- fs/bio.c | 2 +- include/linux/cache.h | 6 ++++++ kernel/profile.c | 4 ++-- lib/radix-tree.c | 2 +- 11 files changed, 31 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 96a75d045835..a2c33c1a46c5 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -25,7 +25,7 @@ extern int trap_init_f00f_bug(void); /* * Alignment at which movsl is preferred for bulk memory copies. */ -struct movsl_mask movsl_mask; +struct movsl_mask movsl_mask __read_mostly; #endif void __devinit early_intel_workaround(struct cpuinfo_x86 *c) diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index d66bf489a2e9..8ac8e9fd5614 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -68,21 +68,21 @@ EXPORT_SYMBOL(smp_num_siblings); #endif /* Package ID of each logical CPU */ -int phys_proc_id[NR_CPUS] = {[0 ... NR_CPUS-1] = BAD_APICID}; +int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; EXPORT_SYMBOL(phys_proc_id); /* Core ID of each logical CPU */ -int cpu_core_id[NR_CPUS] = {[0 ... NR_CPUS-1] = BAD_APICID}; +int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; EXPORT_SYMBOL(cpu_core_id); -cpumask_t cpu_sibling_map[NR_CPUS]; +cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_sibling_map); -cpumask_t cpu_core_map[NR_CPUS]; +cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); /* bitmap of online cpus */ -cpumask_t cpu_online_map; +cpumask_t cpu_online_map __read_mostly; EXPORT_SYMBOL(cpu_online_map); cpumask_t cpu_callin_map; @@ -100,7 +100,7 @@ static int __devinitdata tsc_sync_disabled; struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; EXPORT_SYMBOL(cpu_data); -u8 x86_cpu_to_apicid[NR_CPUS] = +u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0xff }; EXPORT_SYMBOL(x86_cpu_to_apicid); @@ -550,10 +550,10 @@ extern struct { #ifdef CONFIG_NUMA /* which logical CPUs are on which nodes */ -cpumask_t node_2_cpu_mask[MAX_NUMNODES] = +cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly = { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; /* which node each logical CPU is on */ -int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 }; +int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 }; EXPORT_SYMBOL(cpu_2_node); /* set up a mapping between cpu and node. */ @@ -581,7 +581,7 @@ static inline void unmap_cpu_to_node(int cpu) #endif /* CONFIG_NUMA */ -u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; +u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; static void map_cpu_to_logical_apicid(void) { diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 2854c357377f..0ee9dee8af06 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -91,7 +91,7 @@ EXPORT_SYMBOL(rtc_lock); DEFINE_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); -struct timer_opts *cur_timer = &timer_none; +struct timer_opts *cur_timer __read_mostly = &timer_none; /* * This is a special lock that is owned by the CPU and holds the index diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index d766e0963ac1..ef8dac5dd33b 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c @@ -18,7 +18,7 @@ #include "mach_timer.h" #include -static unsigned long hpet_usec_quotient; /* convert hpet clks to usec */ +static unsigned long __read_mostly hpet_usec_quotient; /* convert hpet clks to usec */ static unsigned long tsc_hpet_quotient; /* convert tsc to hpet clks */ static unsigned long hpet_last; /* hpet counter value at last tick*/ static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ @@ -180,7 +180,7 @@ static int __init init_hpet(char* override) /************************************************************/ /* tsc timer_opts struct */ -static struct timer_opts timer_hpet = { +static struct timer_opts timer_hpet __read_mostly = { .name = "hpet", .mark_offset = mark_offset_hpet, .get_offset = get_offset_hpet, diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 7e01a528a83a..761972f8cb6c 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -57,6 +57,9 @@ SECTIONS *(.data.cacheline_aligned) } + /* rarely changed data like cpu maps */ + . = ALIGN(32); + .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } _edata = .; /* End of data section */ . = ALIGN(THREAD_SIZE); /* init_task */ diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 73389f51c4e5..61c12758ca70 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -56,6 +56,10 @@ SECTIONS .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { *(.data.cacheline_aligned) } + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { + *(.data.read_mostly) + } #define VSYSCALL_ADDR (-10*1024*1024) #define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.cacheline_aligned) + SIZEOF(.data.cacheline_aligned) + 4095) & ~(4095)) diff --git a/drivers/char/random.c b/drivers/char/random.c index 460b5d475edd..6b11d6b2129f 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -271,7 +271,7 @@ static int random_write_wakeup_thresh = 128; * samples to avoid wasting CPU time and reduce lock contention. */ -static int trickle_thresh = INPUT_POOL_WORDS * 28; +static int trickle_thresh __read_mostly = INPUT_POOL_WORDS * 28; static DEFINE_PER_CPU(int, trickle_count) = 0; diff --git a/fs/bio.c b/fs/bio.c index 3a1472acc361..ca8f7a850fe3 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -52,7 +52,7 @@ struct biovec_slab { */ #define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } -static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] = { +static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), }; #undef BV diff --git a/include/linux/cache.h b/include/linux/cache.h index 4d767b93738a..2b66a36d85f0 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -13,6 +13,12 @@ #define SMP_CACHE_BYTES L1_CACHE_BYTES #endif +#ifdef CONFIG_X86 +#define __read_mostly __attribute__((__section__(".data.read_mostly"))) +#else +#define __read_mostly +#endif + #ifndef ____cacheline_aligned #define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) #endif diff --git a/kernel/profile.c b/kernel/profile.c index ad8cbb75ffa2..f89248e6d704 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -35,11 +35,11 @@ struct profile_hit { #define NR_PROFILE_GRP (NR_PROFILE_HIT/PROFILE_GRPSZ) /* Oprofile timer tick hook */ -int (*timer_hook)(struct pt_regs *); +int (*timer_hook)(struct pt_regs *) __read_mostly; static atomic_t *prof_buffer; static unsigned long prof_len, prof_shift; -static int prof_on; +static int prof_on __read_mostly; static cpumask_t prof_cpu_mask = CPU_MASK_ALL; #ifdef CONFIG_SMP static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 04d664377f2c..10bed1c8c3c3 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -58,7 +58,7 @@ struct radix_tree_path { #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) #define RADIX_TREE_MAX_PATH (RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 2) -static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH]; +static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH] __read_mostly; /* * Radix tree node cache. -- cgit v1.2.3 From 1ce88cf466f7b6078b14d67d186a3d7c19dd5609 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:24 -0700 Subject: [PATCH] namespace.c: fix race in mark_mounts_for_expiry() This patch fixes a race found by Ram in mark_mounts_for_expiry() in fs/namespace.c. The bug can only be triggered with simultaneous exiting of a process having a private namespace, and expiry of a mount from within that namespace. It's practically impossible to trigger, and I haven't even tried. But still, a bug is a bug. The race happens when put_namespace() is called by another task, while mark_mounts_for_expiry() is between atomic_read() and get_namespace(). In that case get_namespace() will be called on an already dead namespace with unforeseeable results. The solution was suggested by Al Viro, with his own words: Instead of screwing with atomic_read() in there, why don't we simply do the following: a) atomic_dec_and_lock() in put_namespace() b) __put_namespace() called without dropping lock c) the first thing done by __put_namespace would be struct vfsmount *root = namespace->root; namespace->root = NULL; spin_unlock(...); .... umount_tree(root); ... d) check in mark_... would be simply namespace && namespace->root. And we are all set; no screwing around with atomic_read(), no magic at all. Dying namespace gets NULL ->root. All changes of ->root happen under spinlock. If under a spinlock we see non-NULL ->mnt_namespace, it won't be freed until we drop the lock (we will set ->mnt_namespace to NULL under that lock before we get to freeing namespace). If under a spinlock we see non-NULL ->mnt_namespace and ->mnt_namespace->root, we can grab a reference to namespace and be sure that it won't go away. Signed-off-by: Miklos Szeredi Acked-by: Al Viro Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 7 +++++-- include/linux/namespace.h | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/namespace.c b/fs/namespace.c index a0d0ef1f1a48..9d17541ebafa 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -869,7 +869,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) /* don't do anything if the namespace is dead - all the * vfsmounts from it are going away anyway */ namespace = mnt->mnt_namespace; - if (!namespace || atomic_read(&namespace->count) <= 0) + if (!namespace || !namespace->root) continue; get_namespace(namespace); @@ -1450,9 +1450,12 @@ void __init mnt_init(unsigned long mempages) void __put_namespace(struct namespace *namespace) { + struct vfsmount *root = namespace->root; + namespace->root = NULL; + spin_unlock(&vfsmount_lock); down_write(&namespace->sem); spin_lock(&vfsmount_lock); - umount_tree(namespace->root); + umount_tree(root); spin_unlock(&vfsmount_lock); up_write(&namespace->sem); kfree(namespace); diff --git a/include/linux/namespace.h b/include/linux/namespace.h index 697991b69f9b..0e5a86f13b2f 100644 --- a/include/linux/namespace.h +++ b/include/linux/namespace.h @@ -17,7 +17,8 @@ extern void __put_namespace(struct namespace *namespace); static inline void put_namespace(struct namespace *namespace) { - if (atomic_dec_and_test(&namespace->count)) + if (atomic_dec_and_lock(&namespace->count, &vfsmount_lock)) + /* releases vfsmount_lock */ __put_namespace(namespace); } -- cgit v1.2.3 From 55e700b924f9e0ba24e3a071d1097d050b05abe6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:30 -0700 Subject: [PATCH] namespace: rename mnt_fslink to mnt_expire This patch renames vfsmount->mnt_fslink to something a little more descriptive: vfsmount->mnt_expire. Signed-off-by: Mike Waychison Signed-off-by: Miklos Szeredi Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 24 ++++++++++++------------ include/linux/mount.h | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/fs/namespace.c b/fs/namespace.c index b168dc37eaab..587eb0d707ee 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -61,7 +61,7 @@ struct vfsmount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_mounts); INIT_LIST_HEAD(&mnt->mnt_list); - INIT_LIST_HEAD(&mnt->mnt_fslink); + INIT_LIST_HEAD(&mnt->mnt_expire); if (name) { int size = strlen(name)+1; char *newname = kmalloc(size, GFP_KERNEL); @@ -165,8 +165,8 @@ clone_mnt(struct vfsmount *old, struct dentry *root) /* stick the duplicate mount on the same expiry list * as the original if that was on one */ spin_lock(&vfsmount_lock); - if (!list_empty(&old->mnt_fslink)) - list_add(&mnt->mnt_fslink, &old->mnt_fslink); + if (!list_empty(&old->mnt_expire)) + list_add(&mnt->mnt_expire, &old->mnt_expire); spin_unlock(&vfsmount_lock); } return mnt; @@ -351,7 +351,7 @@ static void umount_tree(struct vfsmount *mnt) while (!list_empty(&kill)) { mnt = list_entry(kill.next, struct vfsmount, mnt_list); list_del_init(&mnt->mnt_list); - list_del_init(&mnt->mnt_fslink); + list_del_init(&mnt->mnt_expire); if (mnt->mnt_parent == mnt) { spin_unlock(&vfsmount_lock); } else { @@ -645,7 +645,7 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse) if (mnt) { /* stop bind mounts from expiring */ spin_lock(&vfsmount_lock); - list_del_init(&mnt->mnt_fslink); + list_del_init(&mnt->mnt_expire); spin_unlock(&vfsmount_lock); err = graft_tree(mnt, nd); @@ -744,7 +744,7 @@ static int do_move_mount(struct nameidata *nd, char *old_name) /* if the mount is moved, it should no longer be expire * automatically */ - list_del_init(&old_nd.mnt->mnt_fslink); + list_del_init(&old_nd.mnt->mnt_expire); out2: spin_unlock(&vfsmount_lock); out1: @@ -814,7 +814,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, if (err == 0 && fslist) { /* add to the specified expiration list */ spin_lock(&vfsmount_lock); - list_add_tail(&newmnt->mnt_fslink, fslist); + list_add_tail(&newmnt->mnt_expire, fslist); spin_unlock(&vfsmount_lock); } @@ -869,7 +869,7 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts) * Someone brought it back to life whilst we didn't have any * locks held so return it to the expiration list */ - list_add_tail(&mnt->mnt_fslink, mounts); + list_add_tail(&mnt->mnt_expire, mounts); spin_unlock(&vfsmount_lock); } } @@ -896,13 +896,13 @@ void mark_mounts_for_expiry(struct list_head *mounts) * - still marked for expiry (marked on the last call here; marks are * cleared by mntput()) */ - list_for_each_entry_safe(mnt, next, mounts, mnt_fslink) { + list_for_each_entry_safe(mnt, next, mounts, mnt_expire) { if (!xchg(&mnt->mnt_expiry_mark, 1) || atomic_read(&mnt->mnt_count) != 1) continue; mntget(mnt); - list_move(&mnt->mnt_fslink, &graveyard); + list_move(&mnt->mnt_expire, &graveyard); } /* @@ -912,8 +912,8 @@ void mark_mounts_for_expiry(struct list_head *mounts) * - dispose of the corpse */ while (!list_empty(&graveyard)) { - mnt = list_entry(graveyard.next, struct vfsmount, mnt_fslink); - list_del_init(&mnt->mnt_fslink); + mnt = list_entry(graveyard.next, struct vfsmount, mnt_expire); + list_del_init(&mnt->mnt_expire); /* don't do anything if the namespace is dead - all the * vfsmounts from it are going away anyway */ diff --git a/include/linux/mount.h b/include/linux/mount.h index 8b8d3b9beefd..196d2d6de4a3 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -34,7 +34,7 @@ struct vfsmount int mnt_expiry_mark; /* true if marked for expiry */ char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ struct list_head mnt_list; - struct list_head mnt_fslink; /* link in fs-specific expiry list */ + struct list_head mnt_expire; /* link in fs-specific expiry list */ struct namespace *mnt_namespace; /* containing namespace */ }; -- cgit v1.2.3 From 751c404b8f63e8199d5f2f8f2bcfd69b41d11caa Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:30 -0700 Subject: [PATCH] namespace: rename _mntput to mntput_no_expire This patch renames _mntput() to something a little more descriptive: mntput_no_expire(). Signed-off-by: Miklos Szeredi Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namei.c | 2 +- include/linux/mount.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index fa8df81ce8ca..1d93cb4f7c5f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -314,7 +314,7 @@ void path_release(struct nameidata *nd) void path_release_on_umount(struct nameidata *nd) { dput(nd->dentry); - _mntput(nd->mnt); + mntput_no_expire(nd->mnt); } /* diff --git a/include/linux/mount.h b/include/linux/mount.h index 196d2d6de4a3..74b4727a4e30 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -47,7 +47,7 @@ static inline struct vfsmount *mntget(struct vfsmount *mnt) extern void __mntput(struct vfsmount *mnt); -static inline void _mntput(struct vfsmount *mnt) +static inline void mntput_no_expire(struct vfsmount *mnt) { if (mnt) { if (atomic_dec_and_test(&mnt->mnt_count)) @@ -59,7 +59,7 @@ static inline void mntput(struct vfsmount *mnt) { if (mnt) { mnt->mnt_expiry_mark = 0; - _mntput(mnt); + mntput_no_expire(mnt); } } -- cgit v1.2.3 From a6ccbbb8865101d83c2e716f08feae1da1c48584 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:11 -0700 Subject: [PATCH] nfsd4: fix sync'ing of recovery directory We need to fsync the recovery directory after writing to it, but we weren't doing this correctly. (For example, we weren't taking the i_sem when calling ->fsync().) Just reuse the existing nfsd fsync code instead. Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4recover.c | 29 ++++++++--------------------- fs/nfsd/vfs.c | 2 +- include/linux/nfsd/nfsd.h | 1 + 3 files changed, 10 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 095f1740f3ae..bb40083b6b7d 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -119,25 +119,12 @@ out: return status; } -static int -nfsd4_rec_fsync(struct dentry *dentry) +static void +nfsd4_sync_rec_dir(void) { - struct file *filp; - int status = nfs_ok; - - dprintk("NFSD: nfs4_fsync_rec_dir\n"); - filp = dentry_open(dget(dentry), mntget(rec_dir.mnt), O_RDWR); - if (IS_ERR(filp)) { - status = PTR_ERR(filp); - goto out; - } - if (filp->f_op && filp->f_op->fsync) - status = filp->f_op->fsync(filp, filp->f_dentry, 0); - fput(filp); -out: - if (status) - printk("nfsd4: unable to sync recovery directory\n"); - return status; + down(&rec_dir.dentry->d_inode->i_sem); + nfsd_sync_dir(rec_dir.dentry); + up(&rec_dir.dentry->d_inode->i_sem); } int @@ -176,7 +163,7 @@ out_unlock: up(&rec_dir.dentry->d_inode->i_sem); if (status == 0) { clp->cl_firststate = 1; - status = nfsd4_rec_fsync(rec_dir.dentry); + nfsd4_sync_rec_dir(); } nfs4_reset_user(uid, gid); dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status); @@ -331,7 +318,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); nfs4_reset_user(uid, gid); if (status == 0) - status = nfsd4_rec_fsync(rec_dir.dentry); + nfsd4_sync_rec_dir(); if (status) printk("NFSD: Failed to remove expired client state directory" " %.*s\n", HEXDIR_LEN, clp->cl_recdir); @@ -362,7 +349,7 @@ nfsd4_recdir_purge_old(void) { return; status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old); if (status == 0) - status = nfsd4_rec_fsync(rec_dir.dentry); + nfsd4_sync_rec_dir(); if (status) printk("nfsd4: failed to purge old clients from recovery" " directory %s\n", rec_dir.dentry->d_name.name); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index be24ead89d94..5e0bf3917607 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -733,7 +733,7 @@ nfsd_sync(struct file *filp) up(&inode->i_sem); } -static void +void nfsd_sync_dir(struct dentry *dp) { nfsd_dosync(NULL, dp, dp->d_inode->i_fop); diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 5791dfd30dd0..c2da1b62d416 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -124,6 +124,7 @@ int nfsd_statfs(struct svc_rqst *, struct svc_fh *, int nfsd_notify_change(struct inode *, struct iattr *); int nfsd_permission(struct svc_export *, struct dentry *, int); +void nfsd_sync_dir(struct dentry *dp); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) #ifdef CONFIG_NFSD_V2_ACL -- cgit v1.2.3 From 7fb64cee34f5dc743f697041717cafda8a94b5ac Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:20 -0700 Subject: [PATCH] nfsd4: seqid comments Add some comments on the use of so_seqid, in an attempt to avoid some of the confusion outlined in the previous patch.... Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4xdr.c | 8 ++++---- include/linux/nfsd/state.h | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5207068cde1a..1515c5b8096f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1210,10 +1210,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) save = resp->p; /* - * Routine for encoding the result of a - * "seqid-mutating" NFSv4 operation. This is - * where seqids are incremented, and the - * replay cache is filled. + * Routine for encoding the result of a "seqid-mutating" NFSv4 operation. This + * is where sequence id's are incremented, and the replay cache is filled. + * Note that we increment sequence id's here, at the last moment, so we're sure + * we know whether the error to be returned is a sequence id mutating error. */ #define ENCODE_SEQID_OP_TAIL(stateowner) do { \ diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index a84a3fa99be1..2d19431f47ea 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -203,7 +203,9 @@ struct nfs4_stateowner { int so_is_open_owner; /* 1=openowner,0=lockowner */ u32 so_id; struct nfs4_client * so_client; - u32 so_seqid; + /* after increment in ENCODE_SEQID_OP_TAIL, represents the next + * sequence id expected from the client: */ + u32 so_seqid; struct xdr_netobj so_owner; /* open owner name */ int so_confirmed; /* successful OPEN_CONFIRM? */ struct nfs4_replay so_replay; -- cgit v1.2.3 From b700949b781480819e53bdc38a53f053226dd75e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:23 -0700 Subject: [PATCH] nfsd4: return better error on io incompatible with open mode from RFC 3530: "Share reservations are established by OPEN operations and by their nature are mandatory in that when the OPEN denies READ or WRITE operations, that denial results in such operations being rejected with error NFS4ERR_LOCKED." (Note that share_denied is really only a legal error for OPEN.) Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4state.c | 2 +- include/linux/nfsd/nfsd.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b96714ae3dd7..3647c942915e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1295,7 +1295,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) fp = find_file(ino); if (!fp) return nfs_ok; - ret = nfserr_share_denied; + ret = nfserr_locked; /* Search for conflicting share reservations */ list_for_each_entry(stp, &fp->fi_stateids, st_perfile) { if (test_bit(deny_type, &stp->st_deny_bmap) || diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index c2da1b62d416..6d5a24f3fc6d 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -231,6 +231,7 @@ void nfsd_lockd_shutdown(void); #define nfserr_reclaim_bad __constant_htonl(NFSERR_RECLAIM_BAD) #define nfserr_badname __constant_htonl(NFSERR_BADNAME) #define nfserr_cb_path_down __constant_htonl(NFSERR_CB_PATH_DOWN) +#define nfserr_locked __constant_htonl(NFSERR_LOCKED) /* error codes for internal use */ /* if a request fails due to kmalloc failure, it gets dropped. -- cgit v1.2.3 From 4c4cd222ee329025840bc2f8cebf71d36c62440c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:27 -0700 Subject: [PATCH] nfsd4: check lock type against openmode. We shouldn't be allowing, e.g., write locks on files not open for read. To enforce this, we add a pointer from the lock stateid back to the open stateid it came from, so that the check will continue to be correct even after the open is upgraded or downgraded. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4state.c | 49 +++++++++++++++++++++++++++++++--------------- include/linux/nfsd/state.h | 5 +++++ 2 files changed, 38 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 59b214f01b6d..b83f8fb441e1 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1160,6 +1160,7 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open * stp->st_deny_bmap = 0; __set_bit(open->op_share_access, &stp->st_access_bmap); __set_bit(open->op_share_deny, &stp->st_deny_bmap); + stp->st_openstp = NULL; } static void @@ -2158,12 +2159,18 @@ out: return status; } +static inline int +setlkflg (int type) +{ + return (type == NFS4_READW_LT || type == NFS4_READ_LT) ? + RD_STATE : WR_STATE; +} /* * Checks for sequence id mutating operations. */ static int -nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, clientid_t *lockclid) +nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, struct nfsd4_lock *lock) { struct nfs4_stateid *stp; struct nfs4_stateowner *sop; @@ -2201,21 +2208,31 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei goto check_replay; } - /* for new lock stateowners: - * check that the lock->v.new.open_stateid - * refers to an open stateowner - * - * check that the lockclid (nfs4_lock->v.new.clientid) is the same - * as the open_stateid->st_stateowner->so_client->clientid - */ - if (lockclid) { + if (lock) { struct nfs4_stateowner *sop = stp->st_stateowner; + clientid_t *lockclid = &lock->v.new.clientid; struct nfs4_client *clp = sop->so_client; + int lkflg = 0; + int status; + + lkflg = setlkflg(lock->lk_type); + + if (lock->lk_is_new) { + if (!sop->so_is_open_owner) + return nfserr_bad_stateid; + if (!cmp_clid(&clp->cl_clientid, lockclid)) + return nfserr_bad_stateid; + /* stp is the open stateid */ + status = nfs4_check_openmode(stp, lkflg); + if (status) + return status; + } else { + /* stp is the lock stateid */ + status = nfs4_check_openmode(stp->st_openstp, lkflg); + if (status) + return status; + } - if (!sop->so_is_open_owner) - return nfserr_bad_stateid; - if (!cmp_clid(&clp->cl_clientid, lockclid)) - return nfserr_bad_stateid; } if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) { @@ -2642,6 +2659,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc stp->st_vfs_file = open_stp->st_vfs_file; /* FIXME refcount?? */ stp->st_access_bmap = open_stp->st_access_bmap; stp->st_deny_bmap = open_stp->st_deny_bmap; + stp->st_openstp = open_stp; out: return stp; @@ -2697,8 +2715,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock lock->lk_new_open_seqid, &lock->lk_new_open_stateid, CHECK_FH | OPEN_STATE, - &open_sop, &open_stp, - &lock->v.new.clientid); + &open_sop, &open_stp, lock); if (status) goto out; /* create lockowner and lock stateid */ @@ -2726,7 +2743,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock lock->lk_old_lock_seqid, &lock->lk_old_lock_stateid, CHECK_FH | LOCK_STATE, - &lock->lk_stateowner, &lock_stp, NULL); + &lock->lk_stateowner, &lock_stp, lock); if (status) goto out; } diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 2d19431f47ea..8bf23cf8b603 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -237,6 +237,10 @@ struct nfs4_file { * st_perlockowner: (open stateid) list of lock nfs4_stateowners * st_access_bmap: used only for open stateid * st_deny_bmap: used only for open stateid +* st_openstp: open stateid lock stateid was derived from +* +* XXX: open stateids and lock stateids have diverged sufficiently that +* we should consider defining separate structs for the two cases. */ struct nfs4_stateid { @@ -250,6 +254,7 @@ struct nfs4_stateid { struct file * st_vfs_file; unsigned long st_access_bmap; unsigned long st_deny_bmap; + struct nfs4_stateid * st_openstp; }; /* flags for preprocess_seqid_op() */ -- cgit v1.2.3 From 86a76caf8705e3524e15f343f3c4806939a06dc8 Mon Sep 17 00:00:00 2001 From: Victor Fusco Date: Fri, 8 Jul 2005 14:57:47 -0700 Subject: [NET]: Fix sparse warnings From: Victor Fusco Fix the sparse warning "implicit cast to nocast type" Signed-off-by: Victor Fusco Signed-off-by: Domen Puncer Signed-off-by: David S. Miller --- include/linux/skbuff.h | 29 ++++++++++++++++++----------- include/net/sock.h | 18 +++++++++++------- include/net/tcp.h | 3 ++- net/core/dev.c | 2 +- net/core/skbuff.c | 17 ++++++++++------- net/core/sock.c | 11 +++++++---- net/ipv4/tcp_output.c | 2 +- 7 files changed, 50 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 14b950413495..5d4a990d5577 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -300,20 +300,26 @@ struct sk_buff { #include extern void __kfree_skb(struct sk_buff *skb); -extern struct sk_buff *alloc_skb(unsigned int size, int priority); +extern struct sk_buff *alloc_skb(unsigned int size, + unsigned int __nocast priority); extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, - unsigned int size, int priority); + unsigned int size, + unsigned int __nocast priority); extern void kfree_skbmem(struct sk_buff *skb); -extern struct sk_buff *skb_clone(struct sk_buff *skb, int priority); -extern struct sk_buff *skb_copy(const struct sk_buff *skb, int priority); -extern struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask); +extern struct sk_buff *skb_clone(struct sk_buff *skb, + unsigned int __nocast priority); +extern struct sk_buff *skb_copy(const struct sk_buff *skb, + unsigned int __nocast priority); +extern struct sk_buff *pskb_copy(struct sk_buff *skb, + unsigned int __nocast gfp_mask); extern int pskb_expand_head(struct sk_buff *skb, - int nhead, int ntail, int gfp_mask); + int nhead, int ntail, + unsigned int __nocast gfp_mask); extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, - int priority); + unsigned int __nocast priority); extern struct sk_buff * skb_pad(struct sk_buff *skb, int pad); #define dev_kfree_skb(a) kfree_skb(a) extern void skb_over_panic(struct sk_buff *skb, int len, @@ -464,7 +470,8 @@ static inline int skb_shared(const struct sk_buff *skb) * * NULL is returned on a memory allocation failure. */ -static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri) +static inline struct sk_buff *skb_share_check(struct sk_buff *skb, + unsigned int __nocast pri) { might_sleep_if(pri & __GFP_WAIT); if (skb_shared(skb)) { @@ -1001,7 +1008,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) * %NULL is returned in there is no free memory. */ static inline struct sk_buff *__dev_alloc_skb(unsigned int length, - int gfp_mask) + unsigned int __nocast gfp_mask) { struct sk_buff *skb = alloc_skb(length + 16, gfp_mask); if (likely(skb)) @@ -1114,8 +1121,8 @@ static inline int skb_can_coalesce(struct sk_buff *skb, int i, * If there is no free memory -ENOMEM is returned, otherwise zero * is returned and the old skb data released. */ -extern int __skb_linearize(struct sk_buff *skb, int gfp); -static inline int skb_linearize(struct sk_buff *skb, int gfp) +extern int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp); +static inline int skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp) { return __skb_linearize(skb, gfp); } diff --git a/include/net/sock.h b/include/net/sock.h index 7b76f891ae2d..a1042d08becd 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -684,16 +684,17 @@ extern void FASTCALL(release_sock(struct sock *sk)); #define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock)) #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) -extern struct sock *sk_alloc(int family, int priority, +extern struct sock *sk_alloc(int family, + unsigned int __nocast priority, struct proto *prot, int zero_it); extern void sk_free(struct sock *sk); extern struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, - int priority); + unsigned int __nocast priority); extern struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, - int priority); + unsigned int __nocast priority); extern void sock_wfree(struct sk_buff *skb); extern void sock_rfree(struct sk_buff *skb); @@ -708,7 +709,8 @@ extern struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, int noblock, int *errcode); -extern void *sock_kmalloc(struct sock *sk, int size, int priority); +extern void *sock_kmalloc(struct sock *sk, int size, + unsigned int __nocast priority); extern void sock_kfree_s(struct sock *sk, void *mem, int size); extern void sk_send_sigurg(struct sock *sk); @@ -1132,7 +1134,8 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk) } static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, - int size, int mem, int gfp) + int size, int mem, + unsigned int __nocast gfp) { struct sk_buff *skb; int hdr_len; @@ -1155,7 +1158,8 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, } static inline struct sk_buff *sk_stream_alloc_skb(struct sock *sk, - int size, int gfp) + int size, + unsigned int __nocast gfp) { return sk_stream_alloc_pskb(sk, size, 0, gfp); } @@ -1188,7 +1192,7 @@ static inline int sock_writeable(const struct sock *sk) return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf / 2); } -static inline int gfp_any(void) +static inline unsigned int __nocast gfp_any(void) { return in_softirq() ? GFP_ATOMIC : GFP_KERNEL; } diff --git a/include/net/tcp.h b/include/net/tcp.h index 4d5b12e4dc11..f4f9aba07ac2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -860,7 +860,8 @@ extern void tcp_send_probe0(struct sock *); extern void tcp_send_partial(struct sock *); extern int tcp_write_wakeup(struct sock *); extern void tcp_send_fin(struct sock *sk); -extern void tcp_send_active_reset(struct sock *sk, int priority); +extern void tcp_send_active_reset(struct sock *sk, + unsigned int __nocast priority); extern int tcp_send_synack(struct sock *); extern void tcp_push_one(struct sock *, unsigned int mss_now); extern void tcp_send_ack(struct sock *sk); diff --git a/net/core/dev.c b/net/core/dev.c index 7f5f62c65115..ff9dc029233a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1127,7 +1127,7 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) extern void skb_release_data(struct sk_buff *); /* Keep head the same: replace data */ -int __skb_linearize(struct sk_buff *skb, int gfp_mask) +int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp_mask) { unsigned int size; u8 *data; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 733deee24b9f..d9f7b06fe886 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -129,7 +129,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) * Buffers may only be allocated from interrupts using a @gfp_mask of * %GFP_ATOMIC. */ -struct sk_buff *alloc_skb(unsigned int size, int gfp_mask) +struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask) { struct sk_buff *skb; u8 *data; @@ -182,7 +182,8 @@ nodata: * %GFP_ATOMIC. */ struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, - unsigned int size, int gfp_mask) + unsigned int size, + unsigned int __nocast gfp_mask) { struct sk_buff *skb; u8 *data; @@ -322,7 +323,7 @@ void __kfree_skb(struct sk_buff *skb) * %GFP_ATOMIC. */ -struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) +struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) { struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); @@ -460,7 +461,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) * header is going to be modified. Use pskb_copy() instead. */ -struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) +struct sk_buff *skb_copy(const struct sk_buff *skb, unsigned int __nocast gfp_mask) { int headerlen = skb->data - skb->head; /* @@ -499,7 +500,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) * The returned buffer has a reference count of 1. */ -struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) +struct sk_buff *pskb_copy(struct sk_buff *skb, unsigned int __nocast gfp_mask) { /* * Allocate the copy buffer @@ -557,7 +558,8 @@ out: * reloaded after call to this function. */ -int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask) +int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, + unsigned int __nocast gfp_mask) { int i; u8 *data; @@ -647,7 +649,8 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) * only by netfilter in the cases when checksum is recalculated? --ANK */ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, - int newheadroom, int newtailroom, int gfp_mask) + int newheadroom, int newtailroom, + unsigned int __nocast gfp_mask) { /* * Allocate the copy buffer diff --git a/net/core/sock.c b/net/core/sock.c index a6ec3ada7f9e..8b35ccdc2b3b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -622,7 +622,8 @@ lenout: * @prot: struct proto associated with this new sock instance * @zero_it: if we should zero the newly allocated sock */ -struct sock *sk_alloc(int family, int priority, struct proto *prot, int zero_it) +struct sock *sk_alloc(int family, unsigned int __nocast priority, + struct proto *prot, int zero_it) { struct sock *sk = NULL; kmem_cache_t *slab = prot->slab; @@ -750,7 +751,8 @@ unsigned long sock_i_ino(struct sock *sk) /* * Allocate a skb from the socket's send buffer. */ -struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority) +struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, + unsigned int __nocast priority) { if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { struct sk_buff * skb = alloc_skb(size, priority); @@ -765,7 +767,8 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int /* * Allocate a skb from the socket's receive buffer. */ -struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority) +struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, + unsigned int __nocast priority) { if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { struct sk_buff *skb = alloc_skb(size, priority); @@ -780,7 +783,7 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int /* * Allocate a memory block from the socket's option memory buffer. */ -void *sock_kmalloc(struct sock *sk, int size, int priority) +void *sock_kmalloc(struct sock *sk, int size, unsigned int __nocast priority) { if ((unsigned)size <= sysctl_optmem_max && atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e041d057ec86..e3f8ea1bfa9c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1613,7 +1613,7 @@ void tcp_send_fin(struct sock *sk) * was unread data in the receive queue. This behavior is recommended * by draft-ietf-tcpimpl-prob-03.txt section 3.10. -DaveM */ -void tcp_send_active_reset(struct sock *sk, int priority) +void tcp_send_active_reset(struct sock *sk, unsigned int __nocast priority) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; -- cgit v1.2.3 From ca9b907d140a5f249250d19f956129dbbbf84f73 Mon Sep 17 00:00:00 2001 From: David L Stevens Date: Fri, 8 Jul 2005 17:38:07 -0700 Subject: [IPV4]: multicast API "join" issues This patch corrects a few problems with the IP_ADD_MEMBERSHIP socket option: 1) The existing code makes an attempt at reference counting joins when using the ip_mreqn/imr_ifindex interface. Joining the same group on the same socket is an error, whatever the API. This leads to unexpected results when mixing ip_mreqn by index with ip_mreqn by address, ip_mreq, or other API's. For example, ip_mreq followed by ip_mreqn of the same group will "work" while the same two reversed will not. Fixed to always return EADDRINUSE on a duplicate join and removed the (now unused) reference count in ip_mc_socklist. 2) The group-search list in ip_mc_join_group() is comparing a full ip_mreqn structure and all of it must match for it to find the group. This doesn't correctly match a group that was joined with ip_mreq or ip_mreqn with an address (with or without an index). It also doesn't match groups that are joined by different addresses on the same interface. All of these are the same multicast group, which is identified by group address and interface index. Fixed the check to correctly match groups so we don't get duplicate group entries on the ip_mc_socklist. 3) The old code allocates a multicast address before searching for duplicates requiring it to free in various error cases. This patch moves the allocate until after the search and igmp_max_memberships check, so never a need to allocate, then free an entry. Signed-off-by: David L Stevens Signed-off-by: David S. Miller --- include/linux/igmp.h | 1 - net/ipv4/igmp.c | 35 ++++++++++++----------------------- 2 files changed, 12 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 390e760a96d3..0c31ef0b5bad 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -148,7 +148,6 @@ struct ip_sf_socklist struct ip_mc_socklist { struct ip_mc_socklist *next; - int count; struct ip_mreqn multi; unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ struct ip_sf_socklist *sflist; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 1f3183168a90..111eb678cbac 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1615,9 +1615,10 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) { int err; u32 addr = imr->imr_multiaddr.s_addr; - struct ip_mc_socklist *iml, *i; + struct ip_mc_socklist *iml=NULL, *i; struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); + int ifindex; int count = 0; if (!MULTICAST(addr)) @@ -1633,37 +1634,30 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) goto done; } - iml = (struct ip_mc_socklist *)sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL); - err = -EADDRINUSE; + ifindex = imr->imr_ifindex; for (i = inet->mc_list; i; i = i->next) { - if (memcmp(&i->multi, imr, sizeof(*imr)) == 0) { - /* New style additions are reference counted */ - if (imr->imr_address.s_addr == 0) { - i->count++; - err = 0; - } + if (i->multi.imr_multiaddr.s_addr == addr && + i->multi.imr_ifindex == ifindex) goto done; - } count++; } err = -ENOBUFS; - if (iml == NULL || count >= sysctl_igmp_max_memberships) + if (count >= sysctl_igmp_max_memberships) + goto done; + iml = (struct ip_mc_socklist *)sock_kmalloc(sk,sizeof(*iml),GFP_KERNEL); + if (iml == NULL) goto done; + memcpy(&iml->multi, imr, sizeof(*imr)); iml->next = inet->mc_list; - iml->count = 1; iml->sflist = NULL; iml->sfmode = MCAST_EXCLUDE; inet->mc_list = iml; ip_mc_inc_group(in_dev, addr); - iml = NULL; err = 0; - done: rtnl_shunlock(); - if (iml) - sock_kfree_s(sk, iml, sizeof(*iml)); return err; } @@ -1704,12 +1698,6 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) in_dev = inetdev_by_index(iml->multi.imr_ifindex); if (in_dev) (void) ip_mc_leave_src(sk, iml, in_dev); - if (--iml->count) { - rtnl_unlock(); - if (in_dev) - in_dev_put(in_dev); - return 0; - } *imlp = iml->next; @@ -1755,7 +1743,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct err = -EADDRNOTAVAIL; for (pmc=inet->mc_list; pmc; pmc=pmc->next) { - if (memcmp(&pmc->multi, mreqs, 2*sizeof(__u32)) == 0) + if (pmc->multi.imr_multiaddr.s_addr == imr.imr_multiaddr.s_addr + && pmc->multi.imr_ifindex == imr.imr_ifindex) break; } if (!pmc) /* must have a prior join */ -- cgit v1.2.3 From d369ddd2fc00fc3f46e9052d1017cbf407e3cdf7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Jul 2005 15:45:11 -0700 Subject: [SPARC64]: Add __read_mostly support. Signed-off-by: David S. Miller --- arch/sparc64/kernel/smp.c | 10 +++++----- arch/sparc64/kernel/time.c | 24 +++++++++--------------- arch/sparc64/kernel/vmlinux.lds.S | 2 ++ include/linux/cache.h | 2 +- 4 files changed, 17 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index e5b9c7a27789..441fc2e52ce6 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -45,8 +45,8 @@ extern void calibrate_delay(void); /* Please don't make this stuff initdata!!! --DaveM */ static unsigned char boot_cpu_id; -cpumask_t cpu_online_map = CPU_MASK_NONE; -cpumask_t phys_cpu_present_map = CPU_MASK_NONE; +cpumask_t cpu_online_map = CPU_MASK_NONE __read_mostly; +cpumask_t phys_cpu_present_map = CPU_MASK_NONE __read_mostly; static cpumask_t smp_commenced_mask; static cpumask_t cpu_callout_map; @@ -155,7 +155,7 @@ void cpu_panic(void) panic("SMP bolixed\n"); } -static unsigned long current_tick_offset; +static unsigned long current_tick_offset __read_mostly; /* This tick register synchronization scheme is taken entirely from * the ia64 port, see arch/ia64/kernel/smpboot.c for details and credit. @@ -1193,8 +1193,8 @@ void smp_send_stop(void) { } -unsigned long __per_cpu_base; -unsigned long __per_cpu_shift; +unsigned long __per_cpu_base __read_mostly; +unsigned long __per_cpu_shift __read_mostly; EXPORT_SYMBOL(__per_cpu_base); EXPORT_SYMBOL(__per_cpu_shift); diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c index b40db389f90b..362b9c26871b 100644 --- a/arch/sparc64/kernel/time.c +++ b/arch/sparc64/kernel/time.c @@ -73,7 +73,7 @@ static __initdata struct sparc64_tick_ops dummy_tick_ops = { .get_tick = dummy_get_tick, }; -struct sparc64_tick_ops *tick_ops = &dummy_tick_ops; +struct sparc64_tick_ops *tick_ops __read_mostly = &dummy_tick_ops; #define TICK_PRIV_BIT (1UL << 63) @@ -195,7 +195,7 @@ static unsigned long tick_add_tick(unsigned long adj, unsigned long offset) return new_tick; } -static struct sparc64_tick_ops tick_operations = { +static struct sparc64_tick_ops tick_operations __read_mostly = { .init_tick = tick_init_tick, .get_tick = tick_get_tick, .get_compare = tick_get_compare, @@ -276,7 +276,7 @@ static unsigned long stick_add_compare(unsigned long adj) return new_compare; } -static struct sparc64_tick_ops stick_operations = { +static struct sparc64_tick_ops stick_operations __read_mostly = { .init_tick = stick_init_tick, .get_tick = stick_get_tick, .get_compare = stick_get_compare, @@ -422,7 +422,7 @@ static unsigned long hbtick_add_compare(unsigned long adj) return val; } -static struct sparc64_tick_ops hbtick_operations = { +static struct sparc64_tick_ops hbtick_operations __read_mostly = { .init_tick = hbtick_init_tick, .get_tick = hbtick_get_tick, .get_compare = hbtick_get_compare, @@ -437,10 +437,9 @@ static struct sparc64_tick_ops hbtick_operations = { * NOTE: On SUN5 systems the ticker interrupt comes in using 2 * interrupts, one at level14 and one with softint bit 0. */ -unsigned long timer_tick_offset; -unsigned long timer_tick_compare; +unsigned long timer_tick_offset __read_mostly; -static unsigned long timer_ticks_per_nsec_quotient; +static unsigned long timer_ticks_per_nsec_quotient __read_mostly; #define TICK_SIZE (tick_nsec / 1000) @@ -464,7 +463,7 @@ static inline void timer_check_rtc(void) static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs * regs) { - unsigned long ticks, pstate; + unsigned long ticks, compare, pstate; write_seqlock(&xtime_lock); @@ -483,14 +482,14 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs * regs) : "=r" (pstate) : "i" (PSTATE_IE)); - timer_tick_compare = tick_ops->add_compare(timer_tick_offset); + compare = tick_ops->add_compare(timer_tick_offset); ticks = tick_ops->get_tick(); /* Restore PSTATE_IE. */ __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : /* no outputs */ : "r" (pstate)); - } while (time_after_eq(ticks, timer_tick_compare)); + } while (time_after_eq(ticks, compare)); timer_check_rtc(); @@ -506,11 +505,6 @@ void timer_tick_interrupt(struct pt_regs *regs) do_timer(regs); - /* - * Only keep timer_tick_offset uptodate, but don't set TICK_CMPR. - */ - timer_tick_compare = tick_ops->get_compare() + timer_tick_offset; - timer_check_rtc(); write_sequnlock(&xtime_lock); diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S index 382fd6798bb9..950423da8a6a 100644 --- a/arch/sparc64/kernel/vmlinux.lds.S +++ b/arch/sparc64/kernel/vmlinux.lds.S @@ -32,6 +32,8 @@ SECTIONS .data1 : { *(.data1) } . = ALIGN(64); .data.cacheline_aligned : { *(.data.cacheline_aligned) } + . = ALIGN(64); + .data.read_mostly : { *(.data.read_mostly) } _edata = .; PROVIDE (edata = .); .fixup : { *(.fixup) } diff --git a/include/linux/cache.h b/include/linux/cache.h index 2b66a36d85f0..f6b5a46c5f82 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -13,7 +13,7 @@ #define SMP_CACHE_BYTES L1_CACHE_BYTES #endif -#ifdef CONFIG_X86 +#if defined(CONFIG_X86) || defined(CONFIG_SPARC64) #define __read_mostly __attribute__((__section__(".data.read_mostly"))) #else #define __read_mostly -- cgit v1.2.3 From f7ceba360cce9af3fbc4e5a5b1bd40b570b7021c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Jul 2005 19:29:45 -0700 Subject: [SPARC64]: Add syscall auditing support. Signed-off-by: David S. Miller --- arch/sparc64/kernel/entry.S | 10 +++++----- arch/sparc64/kernel/ptrace.c | 32 +++++++++++++++++++++++++++++--- include/asm-sparc64/thread_info.h | 8 +++++--- include/linux/audit.h | 2 +- init/Kconfig | 2 +- 5 files changed, 41 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index 8b7ed760c50e..d781f10adc52 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -1552,7 +1552,7 @@ sys_ptrace: add %sp, PTREGS_OFF, %o0 nop .align 32 1: ldx [%curptr + TI_FLAGS], %l5 - andcc %l5, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP), %g0 + andcc %l5, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %g0 be,pt %icc, rtrap clr %l6 add %sp, PTREGS_OFF, %o0 @@ -1679,7 +1679,7 @@ linux_sparc_syscall32: srl %i5, 0, %o5 ! IEU1 srl %i2, 0, %o2 ! IEU0 Group - andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP), %g0 ! IEU0 Group + andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %g0 bne,pn %icc, linux_syscall_trace32 ! CTI mov %i0, %l5 ! IEU1 call %l7 ! CTI Group brk forced @@ -1702,7 +1702,7 @@ linux_sparc_syscall: mov %i3, %o3 ! IEU1 mov %i4, %o4 ! IEU0 Group - andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP), %g0 ! IEU1 Group+1 bubble + andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %g0 bne,pn %icc, linux_syscall_trace ! CTI Group mov %i0, %l5 ! IEU0 2: call %l7 ! CTI Group brk forced @@ -1730,7 +1730,7 @@ ret_sys_call: 1: cmp %o0, -ERESTART_RESTARTBLOCK bgeu,pn %xcc, 1f - andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP), %l6 + andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %l6 80: /* System call success, clear Carry condition code. */ andn %g3, %g2, %g3 @@ -1745,7 +1745,7 @@ ret_sys_call: /* System call failure, set Carry condition code. * Also, get abs(errno) to return to the process. */ - andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP), %l6 + andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %l6 sub %g0, %o0, %o0 or %g3, %g2, %g3 stx %o0, [%sp + PTREGS_OFF + PT_V9_I0] diff --git a/arch/sparc64/kernel/ptrace.c b/arch/sparc64/kernel/ptrace.c index c57dc9ea731b..23ad839d113f 100644 --- a/arch/sparc64/kernel/ptrace.c +++ b/arch/sparc64/kernel/ptrace.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include @@ -633,10 +635,22 @@ asmlinkage void syscall_trace(struct pt_regs *regs, int syscall_exit_p) /* do the secure computing check first */ secure_computing(regs->u_regs[UREG_G1]); - if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return; + if (unlikely(current->audit_context) && syscall_exit_p) { + unsigned long tstate = regs->tstate; + int result = AUDITSC_SUCCESS; + + if (unlikely(tstate & (TSTATE_XCARRY | TSTATE_ICARRY))) + result = AUDITSC_FAILURE; + + audit_syscall_exit(current, result, regs->u_regs[UREG_I0]); + } + if (!(current->ptrace & PT_PTRACED)) - return; + goto out; + + if (!test_thread_flag(TIF_SYSCALL_TRACE)) + goto out; + ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); @@ -649,4 +663,16 @@ asmlinkage void syscall_trace(struct pt_regs *regs, int syscall_exit_p) send_sig(current->exit_code, current, 1); current->exit_code = 0; } + +out: + if (unlikely(current->audit_context) && !syscall_exit_p) + audit_syscall_entry(current, + (test_thread_flag(TIF_32BIT) ? + AUDIT_ARCH_SPARC : + AUDIT_ARCH_SPARC64), + regs->u_regs[UREG_G1], + regs->u_regs[UREG_I0], + regs->u_regs[UREG_I1], + regs->u_regs[UREG_I2], + regs->u_regs[UREG_I3]); } diff --git a/include/asm-sparc64/thread_info.h b/include/asm-sparc64/thread_info.h index 6b2fbb89bb63..a1d25c06f92a 100644 --- a/include/asm-sparc64/thread_info.h +++ b/include/asm-sparc64/thread_info.h @@ -221,7 +221,7 @@ register struct thread_info *current_thread_info_reg asm("g6"); #define TIF_32BIT 7 /* 32-bit binary */ #define TIF_NEWCHILD 8 /* just-spawned child process */ #define TIF_SECCOMP 9 /* secure computing */ -#define TIF_POLLING_NRFLAG 10 +#define TIF_SYSCALL_AUDIT 10 /* syscall auditing active */ #define TIF_SYSCALL_SUCCESS 11 /* NOTE: Thread flags >= 12 should be ones we have no interest * in using in assembly, else we can't use the mask as @@ -229,6 +229,7 @@ register struct thread_info *current_thread_info_reg asm("g6"); */ #define TIF_ABI_PENDING 12 #define TIF_MEMDIE 13 +#define TIF_POLLING_NRFLAG 14 #define _TIF_SYSCALL_TRACE (1<