From 53e872681fed6a43047e71bf927f77d06f467988 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 25 Dec 2012 13:29:52 -0500 Subject: ext4: fix deadlock in journal_unmap_buffer() We cannot wait for transaction commit in journal_unmap_buffer() because we hold page lock which ranks below transaction start. We solve the issue by bailing out of journal_unmap_buffer() and jbd2_journal_invalidatepage() with -EBUSY. Caller is then responsible for waiting for transaction commit to finish and try invalidation again. Since the issue can happen only for page stradding i_size, it is simple enough to manually call jbd2_journal_invalidatepage() for such page from ext4_setattr(), check the return value and wait if necessary. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 1be23d9fdacb..e30b66346942 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1098,7 +1098,7 @@ void jbd2_journal_set_triggers(struct buffer_head *, extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); extern int jbd2_journal_forget (handle_t *, struct buffer_head *); extern void journal_sync_buffer (struct buffer_head *); -extern void jbd2_journal_invalidatepage(journal_t *, +extern int jbd2_journal_invalidatepage(journal_t *, struct page *, unsigned long); extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); extern int jbd2_journal_stop(handle_t *); -- cgit v1.2.3 From 08b60f8438879a84246d7debded31c9cb7aea6e4 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Mon, 24 Dec 2012 11:14:58 -0700 Subject: namei.h: include errno.h This solves: In file included from fs/ext3/symlink.c:20:0: include/linux/namei.h: In function 'retry_estale': include/linux/namei.h:114:19: error: 'ESTALE' undeclared (first use in this function) Signed-off-by: Stephen Warren Signed-off-by: Al Viro --- include/linux/namei.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/namei.h b/include/linux/namei.h index e998c030061d..5a5ff57ceed4 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -2,6 +2,7 @@ #define _LINUX_NAMEI_H #include +#include #include #include -- cgit v1.2.3 From 812089e01b9f65f90fc8fc670d8cce72a0e01fbb Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 1 Dec 2012 12:37:20 -0800 Subject: PCI: Reduce Ricoh 0xe822 SD card reader base clock frequency to 50MHz Otherwise it fails like this on cards like the Transcend 16GB SDHC card: mmc0: new SDHC card at address b368 mmcblk0: mmc0:b368 SDC 15.0 GiB mmcblk0: error -110 sending status command, retrying mmcblk0: error -84 transferring data, sector 0, nr 8, cmd response 0x900, card status 0xb0 Tested on my Lenovo x200 laptop. [bhelgaas: changelog] Signed-off-by: Andy Lutomirski Signed-off-by: Bjorn Helgaas Acked-by: Chris Ball CC: Manoj Iyer CC: stable@vger.kernel.org --- drivers/pci/quirks.c | 7 +++++-- include/linux/pci_ids.h | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 8f7a6344e79e..0369fb6fc1da 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2725,7 +2725,7 @@ static void ricoh_mmc_fixup_r5c832(struct pci_dev *dev) if (PCI_FUNC(dev->devfn)) return; /* - * RICOH 0xe823 SD/MMC card reader fails to recognize + * RICOH 0xe822 and 0xe823 SD/MMC card readers fail to recognize * certain types of SD/MMC cards. Lowering the SD base * clock frequency from 200Mhz to 50Mhz fixes this issue. * @@ -2736,7 +2736,8 @@ static void ricoh_mmc_fixup_r5c832(struct pci_dev *dev) * 0xf9 - Key register for 0x150 * 0xfc - key register for 0xe1 */ - if (dev->device == PCI_DEVICE_ID_RICOH_R5CE823) { + if (dev->device == PCI_DEVICE_ID_RICOH_R5CE822 || + dev->device == PCI_DEVICE_ID_RICOH_R5CE823) { pci_write_config_byte(dev, 0xf9, 0xfc); pci_write_config_byte(dev, 0x150, 0x10); pci_write_config_byte(dev, 0xf9, 0x00); @@ -2763,6 +2764,8 @@ static void ricoh_mmc_fixup_r5c832(struct pci_dev *dev) } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832); DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE822, ricoh_mmc_fixup_r5c832); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE822, ricoh_mmc_fixup_r5c832); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE823, ricoh_mmc_fixup_r5c832); DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE823, ricoh_mmc_fixup_r5c832); #endif /*CONFIG_MMC_RICOH_MMC*/ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0f8447376ddb..0eb65796bcb9 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1568,6 +1568,7 @@ #define PCI_DEVICE_ID_RICOH_RL5C476 0x0476 #define PCI_DEVICE_ID_RICOH_RL5C478 0x0478 #define PCI_DEVICE_ID_RICOH_R5C822 0x0822 +#define PCI_DEVICE_ID_RICOH_R5CE822 0xe822 #define PCI_DEVICE_ID_RICOH_R5CE823 0xe823 #define PCI_DEVICE_ID_RICOH_R5C832 0x0832 #define PCI_DEVICE_ID_RICOH_R5C843 0x0843 -- cgit v1.2.3 From a7a88b23737095e6c18a20c5d4eef9e25ec5b829 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 2 Jan 2013 02:04:23 -0800 Subject: mempolicy: remove arg from mpol_parse_str, mpol_to_str Remove the unused argument (formerly no_context) from mpol_parse_str() and from mpol_to_str(). Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 2 +- include/linux/mempolicy.h | 11 ++++------- mm/mempolicy.c | 6 ++---- mm/shmem.c | 4 ++-- 4 files changed, 9 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 448455b7fd91..ca5ce7f9f800 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1278,7 +1278,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) walk.mm = mm; pol = get_vma_policy(task, vma, vma->vm_start); - mpol_to_str(buffer, sizeof(buffer), pol, 0); + mpol_to_str(buffer, sizeof(buffer), pol); mpol_cond_put(pol); seq_printf(m, "%08lx %s", vma->vm_start, buffer); diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 9adc270de7ef..92bc9988a180 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -165,11 +165,10 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, #ifdef CONFIG_TMPFS -extern int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context); +extern int mpol_parse_str(char *str, struct mempolicy **mpol); #endif -extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, - int no_context); +extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol); /* Check if a vma is migratable */ static inline int vma_migratable(struct vm_area_struct *vma) @@ -296,15 +295,13 @@ static inline void check_highest_zone(int k) } #ifdef CONFIG_TMPFS -static inline int mpol_parse_str(char *str, struct mempolicy **mpol, - int no_context) +static inline int mpol_parse_str(char *str, struct mempolicy **mpol) { return 1; /* error */ } #endif -static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, - int no_context) +static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) { return 0; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 02c914cca53d..1cb200af3828 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2612,14 +2612,13 @@ static const char * const policy_modes[] = * mpol_parse_str - parse string to mempolicy, for tmpfs mpol mount option. * @str: string containing mempolicy to parse * @mpol: pointer to struct mempolicy pointer, returned on success. - * @unused: redundant argument, to be removed later. * * Format of input: * [=][:] * * On success, returns 0, else 1 */ -int mpol_parse_str(char *str, struct mempolicy **mpol, int unused) +int mpol_parse_str(char *str, struct mempolicy **mpol) { struct mempolicy *new = NULL; unsigned short mode; @@ -2747,13 +2746,12 @@ out: * @buffer: to contain formatted mempolicy string * @maxlen: length of @buffer * @pol: pointer to mempolicy to be formatted - * @unused: redundant argument, to be removed later. * * Convert a mempolicy into a string. * Returns the number of characters in buffer (if positive) * or an error (negative) */ -int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int unused) +int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) { char *p = buffer; int l; diff --git a/mm/shmem.c b/mm/shmem.c index 5c90d84c2b02..5dd56f6efdbd 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -889,7 +889,7 @@ static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol) if (!mpol || mpol->mode == MPOL_DEFAULT) return; /* show nothing */ - mpol_to_str(buffer, sizeof(buffer), mpol, 1); + mpol_to_str(buffer, sizeof(buffer), mpol); seq_printf(seq, ",mpol=%s", buffer); } @@ -2463,7 +2463,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, if (!gid_valid(sbinfo->gid)) goto bad_val; } else if (!strcmp(this_char,"mpol")) { - if (mpol_parse_str(value, &sbinfo->mpol, 1)) + if (mpol_parse_str(value, &sbinfo->mpol)) goto bad_val; } else { printk(KERN_ERR "tmpfs: Bad mount option %s\n", -- cgit v1.2.3 From 42288fe366c4f1ce7522bc9f27d0bc2a81c55264 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 21 Dec 2012 23:10:25 +0000 Subject: mm: mempolicy: Convert shared_policy mutex to spinlock Sasha was fuzzing with trinity and reported the following problem: BUG: sleeping function called from invalid context at kernel/mutex.c:269 in_atomic(): 1, irqs_disabled(): 0, pid: 6361, name: trinity-main 2 locks held by trinity-main/6361: #0: (&mm->mmap_sem){++++++}, at: [] __do_page_fault+0x1e4/0x4f0 #1: (&(&mm->page_table_lock)->rlock){+.+...}, at: [] handle_pte_fault+0x3f7/0x6a0 Pid: 6361, comm: trinity-main Tainted: G W 3.7.0-rc2-next-20121024-sasha-00001-gd95ef01-dirty #74 Call Trace: __might_sleep+0x1c3/0x1e0 mutex_lock_nested+0x29/0x50 mpol_shared_policy_lookup+0x2e/0x90 shmem_get_policy+0x2e/0x30 get_vma_policy+0x5a/0xa0 mpol_misplaced+0x41/0x1d0 handle_pte_fault+0x465/0x6a0 This was triggered by a different version of automatic NUMA balancing but in theory the current version is vunerable to the same problem. do_numa_page -> numa_migrate_prep -> mpol_misplaced -> get_vma_policy -> shmem_get_policy It's very unlikely this will happen as shared pages are not marked pte_numa -- see the page_mapcount() check in change_pte_range() -- but it is possible. To address this, this patch restores sp->lock as originally implemented by Kosaki Motohiro. In the path where get_vma_policy() is called, it should not be calling sp_alloc() so it is not necessary to treat the PTL specially. Signed-off-by: KOSAKI Motohiro Tested-by: KOSAKI Motohiro Signed-off-by: Mel Gorman Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 2 +- mm/mempolicy.c | 68 +++++++++++++++++++++++++++++++++-------------- 2 files changed, 49 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 92bc9988a180..0d7df39a5885 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -123,7 +123,7 @@ struct sp_node { struct shared_policy { struct rb_root root; - struct mutex mutex; + spinlock_t lock; }; void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 1cb200af3828..e2df1c1fb41f 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2132,7 +2132,7 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b) */ /* lookup first element intersecting start-end */ -/* Caller holds sp->mutex */ +/* Caller holds sp->lock */ static struct sp_node * sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end) { @@ -2196,13 +2196,13 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx) if (!sp->root.rb_node) return NULL; - mutex_lock(&sp->mutex); + spin_lock(&sp->lock); sn = sp_lookup(sp, idx, idx+1); if (sn) { mpol_get(sn->policy); pol = sn->policy; } - mutex_unlock(&sp->mutex); + spin_unlock(&sp->lock); return pol; } @@ -2328,6 +2328,14 @@ static void sp_delete(struct shared_policy *sp, struct sp_node *n) sp_free(n); } +static void sp_node_init(struct sp_node *node, unsigned long start, + unsigned long end, struct mempolicy *pol) +{ + node->start = start; + node->end = end; + node->policy = pol; +} + static struct sp_node *sp_alloc(unsigned long start, unsigned long end, struct mempolicy *pol) { @@ -2344,10 +2352,7 @@ static struct sp_node *sp_alloc(unsigned long start, unsigned long end, return NULL; } newpol->flags |= MPOL_F_SHARED; - - n->start = start; - n->end = end; - n->policy = newpol; + sp_node_init(n, start, end, newpol); return n; } @@ -2357,9 +2362,12 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start, unsigned long end, struct sp_node *new) { struct sp_node *n; + struct sp_node *n_new = NULL; + struct mempolicy *mpol_new = NULL; int ret = 0; - mutex_lock(&sp->mutex); +restart: + spin_lock(&sp->lock); n = sp_lookup(sp, start, end); /* Take care of old policies in the same range. */ while (n && n->start < end) { @@ -2372,14 +2380,16 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start, } else { /* Old policy spanning whole new range. */ if (n->end > end) { - struct sp_node *new2; - new2 = sp_alloc(end, n->end, n->policy); - if (!new2) { - ret = -ENOMEM; - goto out; - } + if (!n_new) + goto alloc_new; + + *mpol_new = *n->policy; + atomic_set(&mpol_new->refcnt, 1); + sp_node_init(n_new, n->end, end, mpol_new); + sp_insert(sp, n_new); n->end = start; - sp_insert(sp, new2); + n_new = NULL; + mpol_new = NULL; break; } else n->end = start; @@ -2390,9 +2400,27 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start, } if (new) sp_insert(sp, new); -out: - mutex_unlock(&sp->mutex); + spin_unlock(&sp->lock); + ret = 0; + +err_out: + if (mpol_new) + mpol_put(mpol_new); + if (n_new) + kmem_cache_free(sn_cache, n_new); + return ret; + +alloc_new: + spin_unlock(&sp->lock); + ret = -ENOMEM; + n_new = kmem_cache_alloc(sn_cache, GFP_KERNEL); + if (!n_new) + goto err_out; + mpol_new = kmem_cache_alloc(policy_cache, GFP_KERNEL); + if (!mpol_new) + goto err_out; + goto restart; } /** @@ -2410,7 +2438,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) int ret; sp->root = RB_ROOT; /* empty tree == default mempolicy */ - mutex_init(&sp->mutex); + spin_lock_init(&sp->lock); if (mpol) { struct vm_area_struct pvma; @@ -2476,14 +2504,14 @@ void mpol_free_shared_policy(struct shared_policy *p) if (!p->root.rb_node) return; - mutex_lock(&p->mutex); + spin_lock(&p->lock); next = rb_first(&p->root); while (next) { n = rb_entry(next, struct sp_node, nd); next = rb_next(&n->nd); sp_delete(p, n); } - mutex_unlock(&p->mutex); + spin_unlock(&p->lock); } #ifdef CONFIG_NUMA_BALANCING -- cgit v1.2.3 From 3d33fcc11bdd11b6949cf5c406726a094395dc4f Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 2 Jan 2013 15:12:55 +0000 Subject: UAPI: Remove empty Kbuild files Empty files can get deleted by the patch program, so remove empty Kbuild files and their links from the parent Kbuilds. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/Kbuild | 3 --- include/linux/Kbuild | 5 ----- include/linux/hdlc/Kbuild | 0 include/linux/hsi/Kbuild | 0 include/linux/raid/Kbuild | 0 include/linux/usb/Kbuild | 0 include/rdma/Kbuild | 0 include/sound/Kbuild | 0 8 files changed, 8 deletions(-) delete mode 100644 include/linux/Kbuild delete mode 100644 include/linux/hdlc/Kbuild delete mode 100644 include/linux/hsi/Kbuild delete mode 100644 include/linux/raid/Kbuild delete mode 100644 include/linux/usb/Kbuild delete mode 100644 include/rdma/Kbuild delete mode 100644 include/sound/Kbuild (limited to 'include/linux') diff --git a/include/Kbuild b/include/Kbuild index 83256b64166a..1dfd33e8d43b 100644 --- a/include/Kbuild +++ b/include/Kbuild @@ -1,8 +1,5 @@ # Top-level Makefile calls into asm-$(ARCH) # List only non-arch directories below -header-y += linux/ -header-y += sound/ -header-y += rdma/ header-y += video/ header-y += scsi/ diff --git a/include/linux/Kbuild b/include/linux/Kbuild deleted file mode 100644 index 7fe2dae251e5..000000000000 --- a/include/linux/Kbuild +++ /dev/null @@ -1,5 +0,0 @@ -header-y += dvb/ -header-y += hdlc/ -header-y += hsi/ -header-y += raid/ -header-y += usb/ diff --git a/include/linux/hdlc/Kbuild b/include/linux/hdlc/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/include/linux/hsi/Kbuild b/include/linux/hsi/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/include/linux/raid/Kbuild b/include/linux/raid/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/include/linux/usb/Kbuild b/include/linux/usb/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/include/rdma/Kbuild b/include/rdma/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/include/sound/Kbuild b/include/sound/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 -- cgit v1.2.3 From f568f6ca811fe681ecfd11c4ce78b6aa488020c0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Dec 2012 15:02:05 -0800 Subject: pstore: remove __dev* attributes. CONFIG_HOTPLUG is going away as an option. As a result, the __dev* markings need to be removed. This change removes the use of __devinit from the pstore filesystem. Based on patches originally written by Bill Pemberton, but redone by me in order to handle some of the coding style issues better, by hand. Cc: Bill Pemberton Cc: Anton Vorontsov Cc: Colin Cross Cc: Kees Cook Cc: Tony Luck Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram.c | 14 ++++++-------- fs/pstore/ram_core.c | 9 ++++----- include/linux/pstore_ram.h | 5 ++--- 3 files changed, 12 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index f883e7e74305..7003e5266f25 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -291,9 +291,8 @@ static void ramoops_free_przs(struct ramoops_context *cxt) kfree(cxt->przs); } -static int __devinit ramoops_init_przs(struct device *dev, - struct ramoops_context *cxt, - phys_addr_t *paddr, size_t dump_mem_sz) +static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt, + phys_addr_t *paddr, size_t dump_mem_sz) { int err = -ENOMEM; int i; @@ -336,10 +335,9 @@ fail_prz: return err; } -static int __devinit ramoops_init_prz(struct device *dev, - struct ramoops_context *cxt, - struct persistent_ram_zone **prz, - phys_addr_t *paddr, size_t sz, u32 sig) +static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt, + struct persistent_ram_zone **prz, + phys_addr_t *paddr, size_t sz, u32 sig) { if (!sz) return 0; @@ -367,7 +365,7 @@ static int __devinit ramoops_init_prz(struct device *dev, return 0; } -static int __devinit ramoops_probe(struct platform_device *pdev) +static int ramoops_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct ramoops_platform_data *pdata = pdev->dev.platform_data; diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index eecd2a8a84dd..0306303be372 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -390,8 +390,8 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size, return 0; } -static int __devinit persistent_ram_post_init(struct persistent_ram_zone *prz, - u32 sig, int ecc_size) +static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig, + int ecc_size) { int ret; @@ -443,9 +443,8 @@ void persistent_ram_free(struct persistent_ram_zone *prz) kfree(prz); } -struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start, - size_t size, u32 sig, - int ecc_size) +struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, + u32 sig, int ecc_size) { struct persistent_ram_zone *prz; int ret = -ENOMEM; diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 098d2a838296..cb6ab5feab67 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -46,9 +46,8 @@ struct persistent_ram_zone { size_t old_log_size; }; -struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start, - size_t size, u32 sig, - int ecc_size); +struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, + u32 sig, int ecc_size); void persistent_ram_free(struct persistent_ram_zone *prz); void persistent_ram_zap(struct persistent_ram_zone *prz); -- cgit v1.2.3 From 0f58a01ddd5e8177255705ba15e64c3b74d67993 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Dec 2012 15:12:59 -0800 Subject: Drivers: bcma: remove __dev* attributes. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CONFIG_HOTPLUG is going away as an option. As a result, the __dev* markings need to be removed. This change removes the use of __devinit, __devexit_p, and __devexit from these drivers. Based on patches originally written by Bill Pemberton, but redone by me in order to handle some of the coding style issues better, by hand. Cc: Bill Pemberton Cc: "Rafał Miłecki" Signed-off-by: Greg Kroah-Hartman --- drivers/bcma/bcma_private.h | 6 +++--- drivers/bcma/driver_gmac_cmn.c | 2 +- drivers/bcma/driver_pci.c | 4 ++-- drivers/bcma/driver_pci_host.c | 13 ++++++------- drivers/bcma/host_pci.c | 8 ++++---- drivers/bcma/main.c | 2 +- include/linux/bcma/bcma_driver_gmac_cmn.h | 2 +- include/linux/bcma/bcma_driver_pci.h | 2 +- 8 files changed, 19 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h index 4a2d72ec6d43..19e3fbfd5757 100644 --- a/drivers/bcma/bcma_private.h +++ b/drivers/bcma/bcma_private.h @@ -22,7 +22,7 @@ struct bcma_bus; /* main.c */ -int __devinit bcma_bus_register(struct bcma_bus *bus); +int bcma_bus_register(struct bcma_bus *bus); void bcma_bus_unregister(struct bcma_bus *bus); int __init bcma_bus_early_register(struct bcma_bus *bus, struct bcma_device *core_cc, @@ -87,8 +87,8 @@ u32 bcma_pcie_read(struct bcma_drv_pci *pc, u32 address); extern int bcma_chipco_watchdog_register(struct bcma_drv_cc *cc); #ifdef CONFIG_BCMA_DRIVER_PCI_HOSTMODE -bool __devinit bcma_core_pci_is_in_hostmode(struct bcma_drv_pci *pc); -void __devinit bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc); +bool bcma_core_pci_is_in_hostmode(struct bcma_drv_pci *pc); +void bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc); #endif /* CONFIG_BCMA_DRIVER_PCI_HOSTMODE */ #ifdef CONFIG_BCMA_DRIVER_GPIO diff --git a/drivers/bcma/driver_gmac_cmn.c b/drivers/bcma/driver_gmac_cmn.c index 834225f65e8f..dcb137926d31 100644 --- a/drivers/bcma/driver_gmac_cmn.c +++ b/drivers/bcma/driver_gmac_cmn.c @@ -8,7 +8,7 @@ #include "bcma_private.h" #include -void __devinit bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc) +void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc) { mutex_init(&gc->phy_mutex); } diff --git a/drivers/bcma/driver_pci.c b/drivers/bcma/driver_pci.c index c39ee6d45850..cf7a476a519f 100644 --- a/drivers/bcma/driver_pci.c +++ b/drivers/bcma/driver_pci.c @@ -207,14 +207,14 @@ static void bcma_core_pci_config_fixup(struct bcma_drv_pci *pc) * Init. **************************************************/ -static void __devinit bcma_core_pci_clientmode_init(struct bcma_drv_pci *pc) +static void bcma_core_pci_clientmode_init(struct bcma_drv_pci *pc) { bcma_core_pci_fixcfg(pc); bcma_pcicore_serdes_workaround(pc); bcma_core_pci_config_fixup(pc); } -void __devinit bcma_core_pci_init(struct bcma_drv_pci *pc) +void bcma_core_pci_init(struct bcma_drv_pci *pc) { if (pc->setup_done) return; diff --git a/drivers/bcma/driver_pci_host.c b/drivers/bcma/driver_pci_host.c index e6b5c89469dc..af0c9fabee54 100644 --- a/drivers/bcma/driver_pci_host.c +++ b/drivers/bcma/driver_pci_host.c @@ -24,7 +24,7 @@ #define BCMA_PCI_SLOT_MAX 16 #define PCI_CONFIG_SPACE_SIZE 256 -bool __devinit bcma_core_pci_is_in_hostmode(struct bcma_drv_pci *pc) +bool bcma_core_pci_is_in_hostmode(struct bcma_drv_pci *pc) { struct bcma_bus *bus = pc->core->bus; u16 chipid_top; @@ -264,10 +264,9 @@ static int bcma_core_pci_hostmode_write_config(struct pci_bus *bus, } /* return cap_offset if requested capability exists in the PCI config space */ -static u8 __devinit bcma_find_pci_capability(struct bcma_drv_pci *pc, - unsigned int dev, - unsigned int func, u8 req_cap_id, - unsigned char *buf, u32 *buflen) +static u8 bcma_find_pci_capability(struct bcma_drv_pci *pc, unsigned int dev, + unsigned int func, u8 req_cap_id, + unsigned char *buf, u32 *buflen) { u8 cap_id; u8 cap_ptr = 0; @@ -334,7 +333,7 @@ static u8 __devinit bcma_find_pci_capability(struct bcma_drv_pci *pc, * Retry Status (CRS) Completion Status to software then * enable the feature. */ -static void __devinit bcma_core_pci_enable_crs(struct bcma_drv_pci *pc) +static void bcma_core_pci_enable_crs(struct bcma_drv_pci *pc) { struct bcma_bus *bus = pc->core->bus; u8 cap_ptr, root_ctrl, root_cap, dev; @@ -381,7 +380,7 @@ static void __devinit bcma_core_pci_enable_crs(struct bcma_drv_pci *pc) } } -void __devinit bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc) +void bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc) { struct bcma_bus *bus = pc->core->bus; struct bcma_drv_pci_host *pc_host; diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c index 98fdc3e014e7..fbf2759e7e4e 100644 --- a/drivers/bcma/host_pci.c +++ b/drivers/bcma/host_pci.c @@ -155,8 +155,8 @@ static const struct bcma_host_ops bcma_host_pci_ops = { .awrite32 = bcma_host_pci_awrite32, }; -static int __devinit bcma_host_pci_probe(struct pci_dev *dev, - const struct pci_device_id *id) +static int bcma_host_pci_probe(struct pci_dev *dev, + const struct pci_device_id *id) { struct bcma_bus *bus; int err = -ENOMEM; @@ -226,7 +226,7 @@ err_kfree_bus: return err; } -static void __devexit bcma_host_pci_remove(struct pci_dev *dev) +static void bcma_host_pci_remove(struct pci_dev *dev) { struct bcma_bus *bus = pci_get_drvdata(dev); @@ -284,7 +284,7 @@ static struct pci_driver bcma_pci_bridge_driver = { .name = "bcma-pci-bridge", .id_table = bcma_pci_bridge_tbl, .probe = bcma_host_pci_probe, - .remove = __devexit_p(bcma_host_pci_remove), + .remove = bcma_host_pci_remove, .driver.pm = BCMA_PM_OPS, }; diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index 53ba20ca17e0..4a92f647b58b 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -192,7 +192,7 @@ static void bcma_unregister_cores(struct bcma_bus *bus) platform_device_unregister(bus->drv_cc.watchdog); } -int __devinit bcma_bus_register(struct bcma_bus *bus) +int bcma_bus_register(struct bcma_bus *bus) { int err; struct bcma_device *core; diff --git a/include/linux/bcma/bcma_driver_gmac_cmn.h b/include/linux/bcma/bcma_driver_gmac_cmn.h index def894b83b0d..4dd1f33e36a2 100644 --- a/include/linux/bcma/bcma_driver_gmac_cmn.h +++ b/include/linux/bcma/bcma_driver_gmac_cmn.h @@ -92,7 +92,7 @@ struct bcma_drv_gmac_cmn { #define gmac_cmn_write32(gc, offset, val) bcma_write32((gc)->core, offset, val) #ifdef CONFIG_BCMA_DRIVER_GMAC_CMN -extern void __devinit bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc); +extern void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc); #else static inline void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc) { } #endif diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 41da581e1612..c48d98d27b77 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -214,7 +214,7 @@ struct bcma_drv_pci { #define pcicore_write16(pc, offset, val) bcma_write16((pc)->core, offset, val) #define pcicore_write32(pc, offset, val) bcma_write32((pc)->core, offset, val) -extern void __devinit bcma_core_pci_init(struct bcma_drv_pci *pc); +extern void bcma_core_pci_init(struct bcma_drv_pci *pc); extern int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc, struct bcma_device *core, bool enable); extern void bcma_core_pci_extend_L1timer(struct bcma_drv_pci *pc, bool extend); -- cgit v1.2.3 From e389623a68622e3c9be440ab522fac1aa1ca3454 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Dec 2012 15:15:49 -0800 Subject: include: remove __dev* attributes. CONFIG_HOTPLUG is going away as an option. As a result, the __dev* markings need to be removed. This change removes the use of __devinit from some include files that were previously missed. Based on patches originally written by Bill Pemberton, but redone by me in order to handle some of the coding style issues better, by hand. Cc: Bill Pemberton Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/parport.h | 4 ++-- include/linux/ata_platform.h | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/asm-generic/parport.h b/include/asm-generic/parport.h index 40528cb977e8..2c9f9d4336ca 100644 --- a/include/asm-generic/parport.h +++ b/include/asm-generic/parport.h @@ -10,8 +10,8 @@ * to devices on the PCI bus. */ -static int __devinit parport_pc_find_isa_ports(int autoirq, int autodma); -static int __devinit parport_pc_find_nonpci_ports(int autoirq, int autodma) +static int parport_pc_find_isa_ports(int autoirq, int autodma); +static int parport_pc_find_nonpci_ports(int autoirq, int autodma) { #ifdef CONFIG_ISA return parport_pc_find_isa_ports(autoirq, autodma); diff --git a/include/linux/ata_platform.h b/include/linux/ata_platform.h index fe9989636b62..b9fde17f767c 100644 --- a/include/linux/ata_platform.h +++ b/include/linux/ata_platform.h @@ -15,12 +15,12 @@ struct pata_platform_info { unsigned int irq_flags; }; -extern int __devinit __pata_platform_probe(struct device *dev, - struct resource *io_res, - struct resource *ctl_res, - struct resource *irq_res, - unsigned int ioport_shift, - int __pio_mask); +extern int __pata_platform_probe(struct device *dev, + struct resource *io_res, + struct resource *ctl_res, + struct resource *irq_res, + unsigned int ioport_shift, + int __pio_mask); /* * Marvell SATA private data -- cgit v1.2.3 From 03f595668017f1a1fb971c02fc37140bc6e7bb1c Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 4 Jan 2013 15:34:50 -0800 Subject: ipc: add sysctl to specify desired next object id Add 3 new variables and sysctls to tune them (by one "next_id" variable for messages, semaphores and shared memory respectively). This variable can be used to set desired id for next allocated IPC object. By default it's equal to -1 and old behaviour is preserved. If this variable is non-negative, then desired idr will be extracted from it and used as a start value to search for free IDR slot. Notes: 1) this patch doesn't guarantee that the new object will have desired id. So it's up to user space how to handle new object with wrong id. 2) After a sucessful id allocation attempt, "next_id" will be set back to -1 (if it was non-negative). [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Stanislav Kinsbursky Cc: Serge Hallyn Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Al Viro Cc: KOSAKI Motohiro Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/kernel.txt | 19 +++++++++++++++++++ include/linux/ipc_namespace.h | 1 + ipc/ipc_sysctl.c | 32 ++++++++++++++++++++++++++++++++ ipc/util.c | 16 ++++++++++++---- ipc/util.h | 1 + 5 files changed, 65 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 2907ba6c3607..51b953a1b149 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -38,6 +38,7 @@ show up in /proc/sys/kernel: - l2cr [ PPC only ] - modprobe ==> Documentation/debugging-modules.txt - modules_disabled +- msg_next_id [ sysv ipc ] - msgmax - msgmnb - msgmni @@ -62,7 +63,9 @@ show up in /proc/sys/kernel: - rtsig-max - rtsig-nr - sem +- sem_next_id [ sysv ipc ] - sg-big-buff [ generic SCSI device (sg) ] +- shm_next_id [ sysv ipc ] - shm_rmid_forced - shmall - shmmax [ sysv ipc ] @@ -320,6 +323,22 @@ to false. ============================================================== +msg_next_id, sem_next_id, and shm_next_id: + +These three toggles allows to specify desired id for next allocated IPC +object: message, semaphore or shared memory respectively. + +By default they are equal to -1, which means generic allocation logic. +Possible values to set are in range {0..INT_MAX}. + +Notes: +1) kernel doesn't guarantee, that new object will have desired id. So, +it's up to userspace, how to handle an object with "wrong" id. +2) Toggle with non-default value will be set back to -1 by kernel after +successful IPC object allocation. + +============================================================== + nmi_watchdog: Enables/Disables the NMI watchdog on x86 systems. When the value is diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index fe771978e877..ae221a7b5092 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -24,6 +24,7 @@ struct ipc_ids { unsigned short seq_max; struct rw_semaphore rw_mutex; struct idr ipcs_idr; + int next_id; }; struct ipc_namespace { diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 00fba2bab87d..130dfece27ac 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -158,6 +158,9 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, static int zero; static int one = 1; +#ifdef CONFIG_CHECKPOINT_RESTORE +static int int_max = INT_MAX; +#endif static struct ctl_table ipc_kern_table[] = { { @@ -227,6 +230,35 @@ static struct ctl_table ipc_kern_table[] = { .extra1 = &zero, .extra2 = &one, }, +#ifdef CONFIG_CHECKPOINT_RESTORE + { + .procname = "sem_next_id", + .data = &init_ipc_ns.ids[IPC_SEM_IDS].next_id, + .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id), + .mode = 0644, + .proc_handler = proc_ipc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max, + }, + { + .procname = "msg_next_id", + .data = &init_ipc_ns.ids[IPC_MSG_IDS].next_id, + .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id), + .mode = 0644, + .proc_handler = proc_ipc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max, + }, + { + .procname = "shm_next_id", + .data = &init_ipc_ns.ids[IPC_SHM_IDS].next_id, + .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id), + .mode = 0644, + .proc_handler = proc_ipc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max, + }, +#endif {} }; diff --git a/ipc/util.c b/ipc/util.c index 72fd0785ac94..74e1d9c7a98a 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -122,6 +122,7 @@ void ipc_init_ids(struct ipc_ids *ids) ids->in_use = 0; ids->seq = 0; + ids->next_id = -1; { int seq_limit = INT_MAX/SEQ_MULTIPLIER; if (seq_limit > USHRT_MAX) @@ -252,6 +253,7 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) kuid_t euid; kgid_t egid; int id, err; + int next_id = ids->next_id; if (size > IPCMNI) size = IPCMNI; @@ -264,7 +266,8 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) rcu_read_lock(); spin_lock(&new->lock); - err = idr_get_new(&ids->ipcs_idr, new, &id); + err = idr_get_new_above(&ids->ipcs_idr, new, + (next_id < 0) ? 0 : ipcid_to_idx(next_id), &id); if (err) { spin_unlock(&new->lock); rcu_read_unlock(); @@ -277,9 +280,14 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) new->cuid = new->uid = euid; new->gid = new->cgid = egid; - new->seq = ids->seq++; - if(ids->seq > ids->seq_max) - ids->seq = 0; + if (next_id < 0) { + new->seq = ids->seq++; + if (ids->seq > ids->seq_max) + ids->seq = 0; + } else { + new->seq = ipcid_to_seqx(next_id); + ids->next_id = -1; + } new->id = ipc_buildid(id, new->seq); return id; diff --git a/ipc/util.h b/ipc/util.h index c8fe2f7631e9..a61e0ca2bffd 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -92,6 +92,7 @@ void __init ipc_init_proc_interface(const char *path, const char *header, #define IPC_SHM_IDS 2 #define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER) +#define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER) /* must be called with ids->rw_mutex acquired for writing */ int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int); -- cgit v1.2.3 From f9dd87f4738c7555aca2cdf8cb2b2326cafb0cad Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 4 Jan 2013 15:34:52 -0800 Subject: ipc: message queue receive cleanup Move all message related manipulation into one function msg_fill(). Actually, two functions because of the compat one. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Stanislav Kinsbursky Cc: Serge Hallyn Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Al Viro Cc: KOSAKI Motohiro Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/msg.h | 5 +++-- ipc/compat.c | 45 +++++++++++++++++++-------------------------- ipc/msg.c | 44 +++++++++++++++++++++++--------------------- 3 files changed, 45 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/include/linux/msg.h b/include/linux/msg.h index 7a4b9e97d29a..fc5743a554e6 100644 --- a/include/linux/msg.h +++ b/include/linux/msg.h @@ -34,7 +34,8 @@ struct msg_queue { /* Helper routines for sys_msgsnd and sys_msgrcv */ extern long do_msgsnd(int msqid, long mtype, void __user *mtext, size_t msgsz, int msgflg); -extern long do_msgrcv(int msqid, long *pmtype, void __user *mtext, - size_t msgsz, long msgtyp, int msgflg); +extern long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, + int msgflg, + long (*msg_fill)(void __user *, struct msg_msg *, size_t)); #endif /* _LINUX_MSG_H */ diff --git a/ipc/compat.c b/ipc/compat.c index ad9518eb26e0..eb3ea16d2d1d 100644 --- a/ipc/compat.c +++ b/ipc/compat.c @@ -306,6 +306,20 @@ static long do_compat_semctl(int first, int second, int third, u32 pad) return err; } +long compat_do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) +{ + struct compat_msgbuf __user *msgp = dest; + size_t msgsz; + + if (put_user(msg->m_type, &msgp->mtype)) + return -EFAULT; + + msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz; + if (store_msg(msgp->mtext, msg, msgsz)) + return -EFAULT; + return msgsz; +} + #ifdef CONFIG_ARCH_WANT_OLD_COMPAT_IPC long compat_sys_semctl(int first, int second, int third, void __user *uptr) { @@ -337,10 +351,6 @@ long compat_sys_msgsnd(int first, int second, int third, void __user *uptr) long compat_sys_msgrcv(int first, int second, int msgtyp, int third, int version, void __user *uptr) { - struct compat_msgbuf __user *up; - long type; - int err; - if (first < 0) return -EINVAL; if (second < 0) @@ -348,23 +358,14 @@ long compat_sys_msgrcv(int first, int second, int msgtyp, int third, if (!version) { struct compat_ipc_kludge ipck; - err = -EINVAL; if (!uptr) - goto out; - err = -EFAULT; + return -EINVAL; if (copy_from_user (&ipck, uptr, sizeof(ipck))) - goto out; + return -EFAULT; uptr = compat_ptr(ipck.msgp); msgtyp = ipck.msgtyp; } - up = uptr; - err = do_msgrcv(first, &type, up->mtext, second, msgtyp, third); - if (err < 0) - goto out; - if (put_user(type, &up->mtype)) - err = -EFAULT; -out: - return err; + return do_msgrcv(first, uptr, second, msgtyp, third, compat_do_msg_fill); } #else long compat_sys_semctl(int semid, int semnum, int cmd, int arg) @@ -385,16 +386,8 @@ long compat_sys_msgsnd(int msqid, struct compat_msgbuf __user *msgp, long compat_sys_msgrcv(int msqid, struct compat_msgbuf __user *msgp, compat_ssize_t msgsz, long msgtyp, int msgflg) { - long err, mtype; - - err = do_msgrcv(msqid, &mtype, msgp->mtext, (ssize_t)msgsz, msgtyp, msgflg); - if (err < 0) - goto out; - - if (put_user(mtype, &msgp->mtype)) - err = -EFAULT; - out: - return err; + return do_msgrcv(msqid, msgp, (ssize_t)msgsz, msgtyp, msgflg, + compat_do_msg_fill); } #endif diff --git a/ipc/msg.c b/ipc/msg.c index 2f272fa76595..cefc24f46e3e 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -755,15 +755,30 @@ static inline int convert_mode(long *msgtyp, int msgflg) return SEARCH_EQUAL; } -long do_msgrcv(int msqid, long *pmtype, void __user *mtext, - size_t msgsz, long msgtyp, int msgflg) +static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) +{ + struct msgbuf __user *msgp = dest; + size_t msgsz; + + if (put_user(msg->m_type, &msgp->mtype)) + return -EFAULT; + + msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz; + if (store_msg(msgp->mtext, msg, msgsz)) + return -EFAULT; + return msgsz; +} + +long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, + int msgflg, + long (*msg_handler)(void __user *, struct msg_msg *, size_t)) { struct msg_queue *msq; struct msg_msg *msg; int mode; struct ipc_namespace *ns; - if (msqid < 0 || (long) msgsz < 0) + if (msqid < 0 || (long) bufsz < 0) return -EINVAL; mode = convert_mode(&msgtyp, msgflg); ns = current->nsproxy->ipc_ns; @@ -804,7 +819,7 @@ long do_msgrcv(int msqid, long *pmtype, void __user *mtext, * Found a suitable message. * Unlink it from the queue. */ - if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { + if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { msg = ERR_PTR(-E2BIG); goto out_unlock; } @@ -831,7 +846,7 @@ long do_msgrcv(int msqid, long *pmtype, void __user *mtext, if (msgflg & MSG_NOERROR) msr_d.r_maxsize = INT_MAX; else - msr_d.r_maxsize = msgsz; + msr_d.r_maxsize = bufsz; msr_d.r_msg = ERR_PTR(-EAGAIN); current->state = TASK_INTERRUPTIBLE; msg_unlock(msq); @@ -894,29 +909,16 @@ out_unlock: if (IS_ERR(msg)) return PTR_ERR(msg); - msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz; - *pmtype = msg->m_type; - if (store_msg(mtext, msg, msgsz)) - msgsz = -EFAULT; - + bufsz = msg_handler(buf, msg, bufsz); free_msg(msg); - return msgsz; + return bufsz; } SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, long, msgtyp, int, msgflg) { - long err, mtype; - - err = do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg); - if (err < 0) - goto out; - - if (put_user(mtype, &msgp->mtype)) - err = -EFAULT; -out: - return err; + return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill); } #ifdef CONFIG_PROC_FS -- cgit v1.2.3 From 3a665531a3b7c2ad2c87903b24646be6916340e4 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 4 Jan 2013 15:34:56 -0800 Subject: selftests: IPC message queue copy feature test This test can be used to check wheither kernel supports IPC message queue copy and restore features (required by CRIU project). Signed-off-by: Stanislav Kinsbursky Cc: Serge Hallyn Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Al Viro Cc: KOSAKI Motohiro Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/msg.h | 3 +- ipc/compat.c | 3 +- tools/testing/selftests/ipc/Makefile | 25 ++++ tools/testing/selftests/ipc/msgque.c | 246 +++++++++++++++++++++++++++++++++++ 4 files changed, 275 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/ipc/Makefile create mode 100644 tools/testing/selftests/ipc/msgque.c (limited to 'include/linux') diff --git a/include/linux/msg.h b/include/linux/msg.h index fc5743a554e6..391af8d11cce 100644 --- a/include/linux/msg.h +++ b/include/linux/msg.h @@ -36,6 +36,7 @@ extern long do_msgsnd(int msqid, long mtype, void __user *mtext, size_t msgsz, int msgflg); extern long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, - long (*msg_fill)(void __user *, struct msg_msg *, size_t)); + long (*msg_fill)(void __user *, struct msg_msg *, + size_t)); #endif /* _LINUX_MSG_H */ diff --git a/ipc/compat.c b/ipc/compat.c index eb3ea16d2d1d..2547f29dcd1b 100644 --- a/ipc/compat.c +++ b/ipc/compat.c @@ -365,7 +365,8 @@ long compat_sys_msgrcv(int first, int second, int msgtyp, int third, uptr = compat_ptr(ipck.msgp); msgtyp = ipck.msgtyp; } - return do_msgrcv(first, uptr, second, msgtyp, third, compat_do_msg_fill); + return do_msgrcv(first, uptr, second, msgtyp, third, + compat_do_msg_fill); } #else long compat_sys_semctl(int semid, int semnum, int cmd, int arg) diff --git a/tools/testing/selftests/ipc/Makefile b/tools/testing/selftests/ipc/Makefile new file mode 100644 index 000000000000..5386fd7c43ae --- /dev/null +++ b/tools/testing/selftests/ipc/Makefile @@ -0,0 +1,25 @@ +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/) +ifeq ($(ARCH),i386) + ARCH := X86 + CFLAGS := -DCONFIG_X86_32 -D__i386__ +endif +ifeq ($(ARCH),x86_64) + ARCH := X86 + CFLAGS := -DCONFIG_X86_64 -D__x86_64__ +endif + +CFLAGS += -I../../../../usr/include/ + +all: +ifeq ($(ARCH),X86) + gcc $(CFLAGS) msgque.c -o msgque_test +else + echo "Not an x86 target, can't build msgque selftest" +endif + +run_tests: all + ./msgque_test + +clean: + rm -fr ./msgque_test diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c new file mode 100644 index 000000000000..d66418237d21 --- /dev/null +++ b/tools/testing/selftests/ipc/msgque.c @@ -0,0 +1,246 @@ +#include +#include +#include +#include +#include +#include + +#define MAX_MSG_SIZE 32 + +struct msg1 { + int msize; + long mtype; + char mtext[MAX_MSG_SIZE]; +}; + +#define TEST_STRING "Test sysv5 msg" +#define MSG_TYPE 1 + +#define ANOTHER_TEST_STRING "Yet another test sysv5 msg" +#define ANOTHER_MSG_TYPE 26538 + +struct msgque_data { + key_t key; + int msq_id; + int qbytes; + int qnum; + int mode; + struct msg1 *messages; +}; + +int restore_queue(struct msgque_data *msgque) +{ + int fd, ret, id, i; + char buf[32]; + + fd = open("/proc/sys/kernel/msg_next_id", O_WRONLY); + if (fd == -1) { + printf("Failed to open /proc/sys/kernel/msg_next_id\n"); + return -errno; + } + sprintf(buf, "%d", msgque->msq_id); + + ret = write(fd, buf, strlen(buf)); + if (ret != strlen(buf)) { + printf("Failed to write to /proc/sys/kernel/msg_next_id\n"); + return -errno; + } + + id = msgget(msgque->key, msgque->mode | IPC_CREAT | IPC_EXCL); + if (id == -1) { + printf("Failed to create queue\n"); + return -errno; + } + + if (id != msgque->msq_id) { + printf("Restored queue has wrong id (%d instead of %d)\n", + id, msgque->msq_id); + ret = -EFAULT; + goto destroy; + } + + for (i = 0; i < msgque->qnum; i++) { + if (msgsnd(msgque->msq_id, &msgque->messages[i].mtype, + msgque->messages[i].msize, IPC_NOWAIT) != 0) { + printf("msgsnd failed (%m)\n"); + ret = -errno; + goto destroy; + }; + } + return 0; + +destroy: + if (msgctl(id, IPC_RMID, 0)) + printf("Failed to destroy queue: %d\n", -errno); + return ret; +} + +int check_and_destroy_queue(struct msgque_data *msgque) +{ + struct msg1 message; + int cnt = 0, ret; + + while (1) { + ret = msgrcv(msgque->msq_id, &message.mtype, MAX_MSG_SIZE, + 0, IPC_NOWAIT); + if (ret < 0) { + if (errno == ENOMSG) + break; + printf("Failed to read IPC message: %m\n"); + ret = -errno; + goto err; + } + if (ret != msgque->messages[cnt].msize) { + printf("Wrong message size: %d (expected %d)\n", ret, + msgque->messages[cnt].msize); + ret = -EINVAL; + goto err; + } + if (message.mtype != msgque->messages[cnt].mtype) { + printf("Wrong message type\n"); + ret = -EINVAL; + goto err; + } + if (memcmp(message.mtext, msgque->messages[cnt].mtext, ret)) { + printf("Wrong message content\n"); + ret = -EINVAL; + goto err; + } + cnt++; + } + + if (cnt != msgque->qnum) { + printf("Wrong message number\n"); + ret = -EINVAL; + goto err; + } + + ret = 0; +err: + if (msgctl(msgque->msq_id, IPC_RMID, 0)) { + printf("Failed to destroy queue: %d\n", -errno); + return -errno; + } + return ret; +} + +int dump_queue(struct msgque_data *msgque) +{ + struct msqid64_ds ds; + int kern_id; + int i, ret; + + for (kern_id = 0; kern_id < 256; kern_id++) { + ret = msgctl(kern_id, MSG_STAT, &ds); + if (ret < 0) { + if (errno == -EINVAL) + continue; + printf("Failed to get stats for IPC queue with id %d\n", + kern_id); + return -errno; + } + + if (ret == msgque->msq_id) + break; + } + + msgque->messages = malloc(sizeof(struct msg1) * ds.msg_qnum); + if (msgque->messages == NULL) { + printf("Failed to get stats for IPC queue\n"); + return -ENOMEM; + } + + msgque->qnum = ds.msg_qnum; + msgque->mode = ds.msg_perm.mode; + msgque->qbytes = ds.msg_qbytes; + + for (i = 0; i < msgque->qnum; i++) { + ret = msgrcv(msgque->msq_id, &msgque->messages[i].mtype, + MAX_MSG_SIZE, i, IPC_NOWAIT | MSG_COPY); + if (ret < 0) { + printf("Failed to copy IPC message: %m (%d)\n", errno); + return -errno; + } + msgque->messages[i].msize = ret; + } + return 0; +} + +int fill_msgque(struct msgque_data *msgque) +{ + struct msg1 msgbuf; + + msgbuf.mtype = MSG_TYPE; + memcpy(msgbuf.mtext, TEST_STRING, sizeof(TEST_STRING)); + if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(TEST_STRING), + IPC_NOWAIT) != 0) { + printf("First message send failed (%m)\n"); + return -errno; + }; + + msgbuf.mtype = ANOTHER_MSG_TYPE; + memcpy(msgbuf.mtext, ANOTHER_TEST_STRING, sizeof(ANOTHER_TEST_STRING)); + if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(ANOTHER_TEST_STRING), + IPC_NOWAIT) != 0) { + printf("Second message send failed (%m)\n"); + return -errno; + }; + return 0; +} + +int main(int argc, char **argv) +{ + int msg, pid, err; + struct msgque_data msgque; + + msgque.key = ftok(argv[0], 822155650); + if (msgque.key == -1) { + printf("Can't make key\n"); + return -errno; + } + + msgque.msq_id = msgget(msgque.key, IPC_CREAT | IPC_EXCL | 0666); + if (msgque.msq_id == -1) { + printf("Can't create queue\n"); + goto err_out; + } + + err = fill_msgque(&msgque); + if (err) { + printf("Failed to fill queue\n"); + goto err_destroy; + } + + err = dump_queue(&msgque); + if (err) { + printf("Failed to dump queue\n"); + goto err_destroy; + } + + err = check_and_destroy_queue(&msgque); + if (err) { + printf("Failed to check and destroy queue\n"); + goto err_out; + } + + err = restore_queue(&msgque); + if (err) { + printf("Failed to restore queue\n"); + goto err_destroy; + } + + err = check_and_destroy_queue(&msgque); + if (err) { + printf("Failed to test queue\n"); + goto err_out; + } + return 0; + +err_destroy: + if (msgctl(msgque.msq_id, IPC_RMID, 0)) { + printf("Failed to destroy queue: %d\n", -errno); + return -errno; + } +err_out: + return err; +} -- cgit v1.2.3 From a458431e176ddb27e8ef8b98c2a681b217337393 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 4 Jan 2013 15:35:08 -0800 Subject: mm: fix zone_watermark_ok_safe() accounting of isolated pages Commit 702d1a6e0766 ("memory-hotplug: fix kswapd looping forever problem") added an isolated pageblocks counter (nr_pageblock_isolate in struct zone) and used it to adjust free pages counter in zone_watermark_ok_safe() to prevent kswapd looping forever problem. Then later, commit 2139cbe627b8 ("cma: fix counting of isolated pages") fixed accounting of isolated pages in global free pages counter. It made the previous zone_watermark_ok_safe() fix unnecessary and potentially harmful (cause now isolated pages may be accounted twice making free pages counter incorrect). This patch removes the special isolated pageblocks counter altogether which fixes zone_watermark_ok_safe() free pages check. Reported-by: Tomasz Stanislawski Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Kyungmin Park Cc: Minchan Kim Cc: KOSAKI Motohiro Cc: Aaditya Kumar Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Michal Hocko Cc: Marek Szyprowski Cc: Michal Nazarewicz Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 8 -------- mm/page_alloc.c | 27 --------------------------- mm/page_isolation.c | 26 ++------------------------ 3 files changed, 2 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4bec5be82cab..73b64a38b984 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -503,14 +503,6 @@ struct zone { * rarely used fields: */ const char *name; -#ifdef CONFIG_MEMORY_ISOLATION - /* - * the number of MIGRATE_ISOLATE *pageblock*. - * We need this for free page counting. Look at zone_watermark_ok_safe. - * It's protected by zone->lock - */ - int nr_pageblock_isolate; -#endif } ____cacheline_internodealigned_in_smp; typedef enum { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4ba5e37127fc..bc6cc0e913bd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -221,11 +221,6 @@ EXPORT_SYMBOL(nr_online_nodes); int page_group_by_mobility_disabled __read_mostly; -/* - * NOTE: - * Don't use set_pageblock_migratetype(page, MIGRATE_ISOLATE) directly. - * Instead, use {un}set_pageblock_isolate. - */ void set_pageblock_migratetype(struct page *page, int migratetype) { @@ -1655,20 +1650,6 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, return true; } -#ifdef CONFIG_MEMORY_ISOLATION -static inline unsigned long nr_zone_isolate_freepages(struct zone *zone) -{ - if (unlikely(zone->nr_pageblock_isolate)) - return zone->nr_pageblock_isolate * pageblock_nr_pages; - return 0; -} -#else -static inline unsigned long nr_zone_isolate_freepages(struct zone *zone) -{ - return 0; -} -#endif - bool zone_watermark_ok(struct zone *z, int order, unsigned long mark, int classzone_idx, int alloc_flags) { @@ -1684,14 +1665,6 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark) free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES); - /* - * If the zone has MIGRATE_ISOLATE type free pages, we should consider - * it. nr_zone_isolate_freepages is never accurate so kswapd might not - * sleep although it could do so. But this is more desirable for memory - * hotplug than sleeping which can cause a livelock in the direct - * reclaim path. - */ - free_pages -= nr_zone_isolate_freepages(z); return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, free_pages); } diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 9d2264ea4606..383bdbb98b04 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -8,28 +8,6 @@ #include #include "internal.h" -/* called while holding zone->lock */ -static void set_pageblock_isolate(struct page *page) -{ - if (get_pageblock_migratetype(page) == MIGRATE_ISOLATE) - return; - - set_pageblock_migratetype(page, MIGRATE_ISOLATE); - page_zone(page)->nr_pageblock_isolate++; -} - -/* called while holding zone->lock */ -static void restore_pageblock_isolate(struct page *page, int migratetype) -{ - struct zone *zone = page_zone(page); - if (WARN_ON(get_pageblock_migratetype(page) != MIGRATE_ISOLATE)) - return; - - BUG_ON(zone->nr_pageblock_isolate <= 0); - set_pageblock_migratetype(page, migratetype); - zone->nr_pageblock_isolate--; -} - int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages) { struct zone *zone; @@ -80,7 +58,7 @@ out: unsigned long nr_pages; int migratetype = get_pageblock_migratetype(page); - set_pageblock_isolate(page); + set_pageblock_migratetype(page, MIGRATE_ISOLATE); nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE); __mod_zone_freepage_state(zone, -nr_pages, migratetype); @@ -103,7 +81,7 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype) goto out; nr_pages = move_freepages_block(zone, page, migratetype); __mod_zone_freepage_state(zone, nr_pages, migratetype); - restore_pageblock_isolate(page, migratetype); + set_pageblock_migratetype(page, migratetype); out: spin_unlock_irqrestore(&zone->lock, flags); } -- cgit v1.2.3 From 08c097fc3bb283299a6915a6a3795edab85979b1 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Wed, 9 Jan 2013 14:16:30 +0000 Subject: cred: Remove tgcred pointer from struct cred Commit 3a50597de863 ("KEYS: Make the session and process keyrings per-thread") removed the definition of the thread_group_cred structure, but left a now unused pointer in struct cred. Signed-off-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/linux/cred.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index abb2cd50f6b2..04421e825365 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -128,7 +128,6 @@ struct cred { struct key *process_keyring; /* keyring private to this process */ struct key *thread_keyring; /* keyring private to this thread */ struct key *request_key_auth; /* assumed request_key authority */ - struct thread_group_cred *tgcred; /* thread-group shared credentials */ #endif #ifdef CONFIG_SECURITY void *security; /* subjective LSM security */ -- cgit v1.2.3