From 70a7d3cc1308a55104fbe505d76f2aca8a4cf53e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 22 Dec 2008 10:26:05 -0800 Subject: swiotlb: add hwdev to swiotlb_phys_to_bus() / swiotlb_sg_to_bus() Impact: extend functions with a (yet unused) parameter, update callsites Some architectures need it - in preparation for highmem swiotlb. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-swiotlb_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 242c3440687f..6cf8a816dc29 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -23,7 +23,7 @@ void *swiotlb_alloc(unsigned order, unsigned long nslabs) return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); } -dma_addr_t swiotlb_phys_to_bus(phys_addr_t paddr) +dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) { return paddr; } -- cgit v1.2.3 From 1da4f9894c243a9c58c505fd8f3e6cc0709a8825 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sun, 28 Dec 2008 15:02:05 +0900 Subject: swiotlb: replace architecture-specific swiotlb.h with linux/swiotlb.h Impact: cleanup This replaces architecture-specific swiotlb.h (X86 and IA64) with linux/swiotlb.h. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/swiotlb.h | 39 +-------------------------------------- arch/x86/include/asm/swiotlb.h | 38 +------------------------------------- 2 files changed, 2 insertions(+), 75 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/include/asm/swiotlb.h b/arch/ia64/include/asm/swiotlb.h index fb79423834d0..dcbaea7ce128 100644 --- a/arch/ia64/include/asm/swiotlb.h +++ b/arch/ia64/include/asm/swiotlb.h @@ -2,44 +2,7 @@ #define ASM_IA64__SWIOTLB_H #include - -/* SWIOTLB interface */ - -extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr, - size_t size, int dir); -extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags); -extern void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir); -extern void swiotlb_sync_single_for_cpu(struct device *hwdev, - dma_addr_t dev_addr, - size_t size, int dir); -extern void swiotlb_sync_single_for_device(struct device *hwdev, - dma_addr_t dev_addr, - size_t size, int dir); -extern void swiotlb_sync_single_range_for_cpu(struct device *hwdev, - dma_addr_t dev_addr, - unsigned long offset, - size_t size, int dir); -extern void swiotlb_sync_single_range_for_device(struct device *hwdev, - dma_addr_t dev_addr, - unsigned long offset, - size_t size, int dir); -extern void swiotlb_sync_sg_for_cpu(struct device *hwdev, - struct scatterlist *sg, int nelems, - int dir); -extern void swiotlb_sync_sg_for_device(struct device *hwdev, - struct scatterlist *sg, int nelems, - int dir); -extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, - int nents, int direction); -extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, - int nents, int direction); -extern int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); -extern void swiotlb_free_coherent(struct device *hwdev, size_t size, - void *vaddr, dma_addr_t dma_handle); -extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); -extern void swiotlb_init(void); +#include extern int swiotlb_force; diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index 51fb2c76ad74..b9e4e20174fb 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -1,46 +1,10 @@ #ifndef _ASM_X86_SWIOTLB_H #define _ASM_X86_SWIOTLB_H -#include +#include /* SWIOTLB interface */ -extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr, - size_t size, int dir); -extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags); -extern void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir); -extern void swiotlb_sync_single_for_cpu(struct device *hwdev, - dma_addr_t dev_addr, - size_t size, int dir); -extern void swiotlb_sync_single_for_device(struct device *hwdev, - dma_addr_t dev_addr, - size_t size, int dir); -extern void swiotlb_sync_single_range_for_cpu(struct device *hwdev, - dma_addr_t dev_addr, - unsigned long offset, - size_t size, int dir); -extern void swiotlb_sync_single_range_for_device(struct device *hwdev, - dma_addr_t dev_addr, - unsigned long offset, - size_t size, int dir); -extern void swiotlb_sync_sg_for_cpu(struct device *hwdev, - struct scatterlist *sg, int nelems, - int dir); -extern void swiotlb_sync_sg_for_device(struct device *hwdev, - struct scatterlist *sg, int nelems, - int dir); -extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, - int nents, int direction); -extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, - int nents, int direction); -extern int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); -extern void swiotlb_free_coherent(struct device *hwdev, size_t size, - void *vaddr, dma_addr_t dma_handle); -extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); -extern void swiotlb_init(void); - extern int swiotlb_force; #ifdef CONFIG_SWIOTLB -- cgit v1.2.3 From 1c0fe6e3bda0464728c23c8d84aa47567e8b716c Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 6 Jan 2009 14:38:59 -0800 Subject: mm: invoke oom-killer from page fault Rather than have the pagefault handler kill a process directly if it gets a VM_FAULT_OOM, have it call into the OOM killer. With increasingly sophisticated oom behaviour (cpusets, memory cgroups, oom killing throttling, oom priority adjustment or selective disabling, panic on oom, etc), it's silly to unconditionally kill the faulting process at page fault time. Create a hook for pagefault oom path to call into instead. Only converted x86 and uml so far. [akpm@linux-foundation.org: make __out_of_memory() static] [akpm@linux-foundation.org: fix comment] Signed-off-by: Nick Piggin Cc: Jeff Dike Acked-by: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/trap.c | 24 +++++-------- arch/x86/mm/fault.c | 24 ++++--------- include/linux/mm.h | 5 +++ mm/oom_kill.c | 94 +++++++++++++++++++++++++++++++++++---------------- 4 files changed, 84 insertions(+), 63 deletions(-) (limited to 'arch/x86') diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 44e490419495..7384d8accfe7 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -64,11 +64,10 @@ good_area: do { int fault; -survive: + fault = handle_mm_fault(mm, vma, address, is_write); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) { - err = -ENOMEM; goto out_of_memory; } else if (fault & VM_FAULT_SIGBUS) { err = -EACCES; @@ -104,18 +103,14 @@ out: out_nosemaphore: return err; -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ out_of_memory: - if (is_global_init(current)) { - up_read(&mm->mmap_sem); - yield(); - down_read(&mm->mmap_sem); - goto survive; - } - goto out; + /* + * We ran out of memory, call the OOM killer, and return the userspace + * (which will retry the fault, or kill us if we got oom-killed). + */ + up_read(&mm->mmap_sem); + pagefault_out_of_memory(); + return 0; } static void bad_segv(struct faultinfo fi, unsigned long ip) @@ -214,9 +209,6 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, si.si_addr = (void __user *)address; current->thread.arch.faultinfo = fi; force_sig_info(SIGBUS, &si, current); - } else if (err == -ENOMEM) { - printk(KERN_INFO "VM: killing process %s\n", current->comm); - do_exit(SIGKILL); } else { BUG_ON(err != -EFAULT); si.si_signo = SIGSEGV; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 57ec8c86a877..9e268b6b204e 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -667,7 +667,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(in_atomic() || !mm)) goto bad_area_nosemaphore; -again: /* * When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -859,25 +858,14 @@ no_context: oops_end(flags, regs, sig); #endif -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ out_of_memory: + /* + * We ran out of memory, call the OOM killer, and return the userspace + * (which will retry the fault, or kill us if we got oom-killed). + */ up_read(&mm->mmap_sem); - if (is_global_init(tsk)) { - yield(); - /* - * Re-lookup the vma - in theory the vma tree might - * have changed: - */ - goto again; - } - - printk("VM: killing process %s\n", tsk->comm); - if (error_code & PF_USER) - do_group_exit(SIGKILL); - goto no_context; + pagefault_out_of_memory(); + return; do_sigbus: up_read(&mm->mmap_sem); diff --git a/include/linux/mm.h b/include/linux/mm.h index aaa8b843be28..4a3d28c86443 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -717,6 +717,11 @@ static inline int page_mapped(struct page *page) #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS) +/* + * Can be called by the pagefault handler when it gets a VM_FAULT_OOM. + */ +extern void pagefault_out_of_memory(void); + #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) extern void show_free_areas(void); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 558f9afe6e4e..c592965dab2f 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -509,6 +509,69 @@ void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask) spin_unlock(&zone_scan_mutex); } +/* + * Must be called with tasklist_lock held for read. + */ +static void __out_of_memory(gfp_t gfp_mask, int order) +{ + if (sysctl_oom_kill_allocating_task) { + oom_kill_process(current, gfp_mask, order, 0, NULL, + "Out of memory (oom_kill_allocating_task)"); + + } else { + unsigned long points; + struct task_struct *p; + +retry: + /* + * Rambo mode: Shoot down a process and hope it solves whatever + * issues we may have. + */ + p = select_bad_process(&points, NULL); + + if (PTR_ERR(p) == -1UL) + return; + + /* Found nothing?!?! Either we hang forever, or we panic. */ + if (!p) { + read_unlock(&tasklist_lock); + panic("Out of memory and no killable processes...\n"); + } + + if (oom_kill_process(p, gfp_mask, order, points, NULL, + "Out of memory")) + goto retry; + } +} + +/* + * pagefault handler calls into here because it is out of memory but + * doesn't know exactly how or why. + */ +void pagefault_out_of_memory(void) +{ + unsigned long freed = 0; + + blocking_notifier_call_chain(&oom_notify_list, 0, &freed); + if (freed > 0) + /* Got some memory back in the last second. */ + return; + + if (sysctl_panic_on_oom) + panic("out of memory from page fault. panic_on_oom is selected.\n"); + + read_lock(&tasklist_lock); + __out_of_memory(0, 0); /* unknown gfp_mask and order */ + read_unlock(&tasklist_lock); + + /* + * Give "p" a good chance of killing itself before we + * retry to allocate memory. + */ + if (!test_thread_flag(TIF_MEMDIE)) + schedule_timeout_uninterruptible(1); +} + /** * out_of_memory - kill the "best" process when we run out of memory * @zonelist: zonelist pointer @@ -522,8 +585,6 @@ void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask) */ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) { - struct task_struct *p; - unsigned long points = 0; unsigned long freed = 0; enum oom_constraint constraint; @@ -544,7 +605,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) switch (constraint) { case CONSTRAINT_MEMORY_POLICY: - oom_kill_process(current, gfp_mask, order, points, NULL, + oom_kill_process(current, gfp_mask, order, 0, NULL, "No available memory (MPOL_BIND)"); break; @@ -553,35 +614,10 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) panic("out of memory. panic_on_oom is selected\n"); /* Fall-through */ case CONSTRAINT_CPUSET: - if (sysctl_oom_kill_allocating_task) { - oom_kill_process(current, gfp_mask, order, points, NULL, - "Out of memory (oom_kill_allocating_task)"); - break; - } -retry: - /* - * Rambo mode: Shoot down a process and hope it solves whatever - * issues we may have. - */ - p = select_bad_process(&points, NULL); - - if (PTR_ERR(p) == -1UL) - goto out; - - /* Found nothing?!?! Either we hang forever, or we panic. */ - if (!p) { - read_unlock(&tasklist_lock); - panic("Out of memory and no killable processes...\n"); - } - - if (oom_kill_process(p, gfp_mask, order, points, NULL, - "Out of memory")) - goto retry; - + __out_of_memory(gfp_mask, order); break; } -out: read_unlock(&tasklist_lock); /* -- cgit v1.2.3 From c04fc586c1a480ba198f03ae7b6cbd7b57380b91 Mon Sep 17 00:00:00 2001 From: Gary Hade Date: Tue, 6 Jan 2009 14:39:14 -0800 Subject: mm: show node to memory section relationship with symlinks in sysfs Show node to memory section relationship with symlinks in sysfs Add /sys/devices/system/node/nodeX/memoryY symlinks for all the memory sections located on nodeX. For example: /sys/devices/system/node/node1/memory135 -> ../../memory/memory135 indicates that memory section 135 resides on node1. Also revises documentation to cover this change as well as updating Documentation/ABI/testing/sysfs-devices-memory to include descriptions of memory hotremove files 'phys_device', 'phys_index', and 'state' that were previously not described there. In addition to it always being a good policy to provide users with the maximum possible amount of physical location information for resources that can be hot-added and/or hot-removed, the following are some (but likely not all) of the user benefits provided by this change. Immediate: - Provides information needed to determine the specific node on which a defective DIMM is located. This will reduce system downtime when the node or defective DIMM is swapped out. - Prevents unintended onlining of a memory section that was previously offlined due to a defective DIMM. This could happen during node hot-add when the user or node hot-add assist script onlines _all_ offlined sections due to user or script inability to identify the specific memory sections located on the hot-added node. The consequences of reintroducing the defective memory could be ugly. - Provides information needed to vary the amount and distribution of memory on specific nodes for testing or debugging purposes. Future: - Will provide information needed to identify the memory sections that need to be offlined prior to physical removal of a specific node. Symlink creation during boot was tested on 2-node x86_64, 2-node ppc64, and 2-node ia64 systems. Symlink creation during physical memory hot-add tested on a 2-node x86_64 system. Signed-off-by: Gary Hade Signed-off-by: Badari Pulavarty Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/sysfs-devices-memory | 51 +++++++++++- Documentation/memory-hotplug.txt | 16 +++- arch/ia64/mm/init.c | 2 +- arch/powerpc/mm/mem.c | 2 +- arch/s390/mm/init.c | 2 +- arch/sh/mm/init.c | 3 +- arch/x86/mm/init_32.c | 2 +- arch/x86/mm/init_64.c | 2 +- drivers/base/memory.c | 19 +++-- drivers/base/node.c | 103 +++++++++++++++++++++++++ include/linux/memory.h | 6 +- include/linux/memory_hotplug.h | 2 +- include/linux/node.h | 13 ++++ mm/memory_hotplug.c | 11 +-- 14 files changed, 209 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/ABI/testing/sysfs-devices-memory b/Documentation/ABI/testing/sysfs-devices-memory index 7a16fe1e2270..9fe91c02ee40 100644 --- a/Documentation/ABI/testing/sysfs-devices-memory +++ b/Documentation/ABI/testing/sysfs-devices-memory @@ -6,7 +6,6 @@ Description: internal state of the kernel memory blocks. Files could be added or removed dynamically to represent hot-add/remove operations. - Users: hotplug memory add/remove tools https://w3.opensource.ibm.com/projects/powerpc-utils/ @@ -19,6 +18,56 @@ Description: This is useful for a user-level agent to determine identify removable sections of the memory before attempting potentially expensive hot-remove memory operation +Users: hotplug memory remove tools + https://w3.opensource.ibm.com/projects/powerpc-utils/ + +What: /sys/devices/system/memory/memoryX/phys_device +Date: September 2008 +Contact: Badari Pulavarty +Description: + The file /sys/devices/system/memory/memoryX/phys_device + is read-only and is designed to show the name of physical + memory device. Implementation is currently incomplete. +What: /sys/devices/system/memory/memoryX/phys_index +Date: September 2008 +Contact: Badari Pulavarty +Description: + The file /sys/devices/system/memory/memoryX/phys_index + is read-only and contains the section ID in hexadecimal + which is equivalent to decimal X contained in the + memory section directory name. + +What: /sys/devices/system/memory/memoryX/state +Date: September 2008 +Contact: Badari Pulavarty +Description: + The file /sys/devices/system/memory/memoryX/state + is read-write. When read, it's contents show the + online/offline state of the memory section. When written, + root can toggle the the online/offline state of a removable + memory section (see removable file description above) + using the following commands. + # echo online > /sys/devices/system/memory/memoryX/state + # echo offline > /sys/devices/system/memory/memoryX/state + + For example, if /sys/devices/system/memory/memory22/removable + contains a value of 1 and + /sys/devices/system/memory/memory22/state contains the + string "online" the following command can be executed by + by root to offline that section. + # echo offline > /sys/devices/system/memory/memory22/state Users: hotplug memory remove tools https://w3.opensource.ibm.com/projects/powerpc-utils/ + +What: /sys/devices/system/node/nodeX/memoryY +Date: September 2008 +Contact: Gary Hade +Description: + When CONFIG_NUMA is enabled + /sys/devices/system/node/nodeX/memoryY is a symbolic link that + points to the corresponding /sys/devices/system/memory/memoryY + memory section directory. For example, the following symbolic + link is created for memory section 9 on node0. + /sys/devices/system/node/node0/memory9 -> ../../memory/memory9 + diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt index 168117bd6ee8..4c2ecf537a4a 100644 --- a/Documentation/memory-hotplug.txt +++ b/Documentation/memory-hotplug.txt @@ -124,7 +124,7 @@ config options. This option can be kernel module too. -------------------------------- -3 sysfs files for memory hotplug +4 sysfs files for memory hotplug -------------------------------- All sections have their device information under /sys/devices/system/memory as @@ -138,11 +138,12 @@ For example, assume 1GiB section size. A device for a memory starting at (0x100000000 / 1Gib = 4) This device covers address range [0x100000000 ... 0x140000000) -Under each section, you can see 3 files. +Under each section, you can see 4 files. /sys/devices/system/memory/memoryXXX/phys_index /sys/devices/system/memory/memoryXXX/phys_device /sys/devices/system/memory/memoryXXX/state +/sys/devices/system/memory/memoryXXX/removable 'phys_index' : read-only and contains section id, same as XXX. 'state' : read-write @@ -150,10 +151,20 @@ Under each section, you can see 3 files. at write: user can specify "online", "offline" command 'phys_device': read-only: designed to show the name of physical memory device. This is not well implemented now. +'removable' : read-only: contains an integer value indicating + whether the memory section is removable or not + removable. A value of 1 indicates that the memory + section is removable and a value of 0 indicates that + it is not removable. NOTE: These directories/files appear after physical memory hotplug phase. +If CONFIG_NUMA is enabled the +/sys/devices/system/memory/memoryXXX memory section +directories can also be accessed via symbolic links located in +the /sys/devices/system/node/node* directories. For example: +/sys/devices/system/node/node0/memory9 -> ../../memory/memory9 -------------------------------- 4. Physical memory hot-add phase @@ -365,7 +376,6 @@ node if necessary. - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like sysctl or new control file. - showing memory section and physical device relationship. - - showing memory section and node relationship (maybe good for NUMA) - showing memory section is under ZONE_MOVABLE or not - test and make it better memory offlining. - support HugeTLB page migration and offlining. diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 054bcd9439aa..56e12903973c 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -692,7 +692,7 @@ int arch_add_memory(int nid, u64 start, u64 size) pgdat = NODE_DATA(nid); zone = pgdat->node_zones + ZONE_NORMAL; - ret = __add_pages(zone, start_pfn, nr_pages); + ret = __add_pages(nid, zone, start_pfn, nr_pages); if (ret) printk("%s: Problem encountered in __add_pages() as ret=%d\n", diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 53b06ebb3f2f..f00f09a77f12 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -132,7 +132,7 @@ int arch_add_memory(int nid, u64 start, u64 size) /* this should work for most non-highmem platforms */ zone = pgdata->node_zones; - return __add_pages(zone, start_pfn, nr_pages); + return __add_pages(nid, zone, start_pfn, nr_pages); } #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 158b0d6d7046..f0258ca3b17e 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -183,7 +183,7 @@ int arch_add_memory(int nid, u64 start, u64 size) rc = vmem_add_mapping(start, size); if (rc) return rc; - rc = __add_pages(zone, PFN_DOWN(start), PFN_DOWN(size)); + rc = __add_pages(nid, zone, PFN_DOWN(start), PFN_DOWN(size)); if (rc) vmem_remove_mapping(start, size); return rc; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 6cbef8caeb56..3edf297c829b 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -311,7 +311,8 @@ int arch_add_memory(int nid, u64 start, u64 size) pgdat = NODE_DATA(nid); /* We only have ZONE_NORMAL, so this is easy.. */ - ret = __add_pages(pgdat->node_zones + ZONE_NORMAL, start_pfn, nr_pages); + ret = __add_pages(nid, pgdat->node_zones + ZONE_NORMAL, + start_pfn, nr_pages); if (unlikely(ret)) printk("%s: Failed, __add_pages() == %d\n", __func__, ret); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index f99a6c6c432e..544d724caeee 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -1079,7 +1079,7 @@ int arch_add_memory(int nid, u64 start, u64 size) unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - return __add_pages(zone, start_pfn, nr_pages); + return __add_pages(nid, zone, start_pfn, nr_pages); } #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 9f7a0d24d42a..54c437e96541 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -857,7 +857,7 @@ int arch_add_memory(int nid, u64 start, u64 size) if (last_mapped_pfn > max_pfn_mapped) max_pfn_mapped = last_mapped_pfn; - ret = __add_pages(zone, start_pfn, nr_pages); + ret = __add_pages(nid, zone, start_pfn, nr_pages); WARN_ON_ONCE(ret); return ret; diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 5260e9e0df48..989429cfed88 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -347,8 +347,9 @@ static inline int memory_probe_init(void) * section belongs to... */ -static int add_memory_block(unsigned long node_id, struct mem_section *section, - unsigned long state, int phys_device) +static int add_memory_block(int nid, struct mem_section *section, + unsigned long state, int phys_device, + enum mem_add_context context) { struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL); int ret = 0; @@ -370,6 +371,10 @@ static int add_memory_block(unsigned long node_id, struct mem_section *section, ret = mem_create_simple_file(mem, phys_device); if (!ret) ret = mem_create_simple_file(mem, removable); + if (!ret) { + if (context == HOTPLUG) + ret = register_mem_sect_under_node(mem, nid); + } return ret; } @@ -382,7 +387,7 @@ static int add_memory_block(unsigned long node_id, struct mem_section *section, * * This could be made generic for all sysdev classes. */ -static struct memory_block *find_memory_block(struct mem_section *section) +struct memory_block *find_memory_block(struct mem_section *section) { struct kobject *kobj; struct sys_device *sysdev; @@ -411,6 +416,7 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section, struct memory_block *mem; mem = find_memory_block(section); + unregister_mem_sect_under_nodes(mem); mem_remove_simple_file(mem, phys_index); mem_remove_simple_file(mem, state); mem_remove_simple_file(mem, phys_device); @@ -424,9 +430,9 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section, * need an interface for the VM to add new memory regions, * but without onlining it. */ -int register_new_memory(struct mem_section *section) +int register_new_memory(int nid, struct mem_section *section) { - return add_memory_block(0, section, MEM_OFFLINE, 0); + return add_memory_block(nid, section, MEM_OFFLINE, 0, HOTPLUG); } int unregister_memory_section(struct mem_section *section) @@ -458,7 +464,8 @@ int __init memory_dev_init(void) for (i = 0; i < NR_MEM_SECTIONS; i++) { if (!present_section_nr(i)) continue; - err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, 0); + err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, + 0, BOOT); if (!ret) ret = err; } diff --git a/drivers/base/node.c b/drivers/base/node.c index 91636cd8b6c9..43fa90b837ee 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -248,6 +249,105 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) return 0; } +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +#define page_initialized(page) (page->lru.next) + +static int get_nid_for_pfn(unsigned long pfn) +{ + struct page *page; + + if (!pfn_valid_within(pfn)) + return -1; + page = pfn_to_page(pfn); + if (!page_initialized(page)) + return -1; + return pfn_to_nid(pfn); +} + +/* register memory section under specified node if it spans that node */ +int register_mem_sect_under_node(struct memory_block *mem_blk, int nid) +{ + unsigned long pfn, sect_start_pfn, sect_end_pfn; + + if (!mem_blk) + return -EFAULT; + if (!node_online(nid)) + return 0; + sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index); + sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1; + for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { + int page_nid; + + page_nid = get_nid_for_pfn(pfn); + if (page_nid < 0) + continue; + if (page_nid != nid) + continue; + return sysfs_create_link_nowarn(&node_devices[nid].sysdev.kobj, + &mem_blk->sysdev.kobj, + kobject_name(&mem_blk->sysdev.kobj)); + } + /* mem section does not span the specified node */ + return 0; +} + +/* unregister memory section under all nodes that it spans */ +int unregister_mem_sect_under_nodes(struct memory_block *mem_blk) +{ + nodemask_t unlinked_nodes; + unsigned long pfn, sect_start_pfn, sect_end_pfn; + + if (!mem_blk) + return -EFAULT; + nodes_clear(unlinked_nodes); + sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index); + sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1; + for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { + unsigned int nid; + + nid = get_nid_for_pfn(pfn); + if (nid < 0) + continue; + if (!node_online(nid)) + continue; + if (node_test_and_set(nid, unlinked_nodes)) + continue; + sysfs_remove_link(&node_devices[nid].sysdev.kobj, + kobject_name(&mem_blk->sysdev.kobj)); + } + return 0; +} + +static int link_mem_sections(int nid) +{ + unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn; + unsigned long end_pfn = start_pfn + NODE_DATA(nid)->node_spanned_pages; + unsigned long pfn; + int err = 0; + + for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { + unsigned long section_nr = pfn_to_section_nr(pfn); + struct mem_section *mem_sect; + struct memory_block *mem_blk; + int ret; + + if (!present_section_nr(section_nr)) + continue; + mem_sect = __nr_to_section(section_nr); + mem_blk = find_memory_block(mem_sect); + ret = register_mem_sect_under_node(mem_blk, nid); + if (!err) + err = ret; + + /* discard ref obtained in find_memory_block() */ + kobject_put(&mem_blk->sysdev.kobj); + } + return err; +} +#else +static int link_mem_sections(int nid) { return 0; } +#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ + int register_one_node(int nid) { int error = 0; @@ -267,6 +367,9 @@ int register_one_node(int nid) if (cpu_to_node(cpu) == nid) register_cpu_under_node(cpu, nid); } + + /* link memory sections under this node */ + error = link_mem_sections(nid); } return error; diff --git a/include/linux/memory.h b/include/linux/memory.h index 36c82c9e6ea7..3fdc10806d31 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -79,14 +79,14 @@ static inline int memory_notify(unsigned long val, void *v) #else extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); -extern int register_new_memory(struct mem_section *); +extern int register_new_memory(int, struct mem_section *); extern int unregister_memory_section(struct mem_section *); extern int memory_dev_init(void); extern int remove_memory_block(unsigned long, struct mem_section *, int); extern int memory_notify(unsigned long val, void *v); +extern struct memory_block *find_memory_block(struct mem_section *); #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION< Date: Tue, 6 Jan 2009 14:40:39 -0800 Subject: atomic_t: unify all arch definitions The atomic_t type cannot currently be used in some header files because it would create an include loop with asm/atomic.h. Move the type definition to linux/types.h to break the loop. Signed-off-by: Matthew Wilcox Cc: Huang Ying Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/atomic.h | 9 +-------- arch/arm/include/asm/atomic.h | 3 +-- arch/avr32/include/asm/atomic.h | 2 +- arch/blackfin/include/asm/atomic.h | 4 +--- arch/cris/include/asm/atomic.h | 4 +--- arch/h8300/include/asm/atomic.h | 3 ++- arch/ia64/include/asm/atomic.h | 6 ------ arch/m68knommu/include/asm/atomic.h | 2 +- arch/mips/include/asm/atomic.h | 5 +---- arch/parisc/include/asm/atomic.h | 11 +++-------- arch/powerpc/include/asm/atomic.h | 4 +--- arch/s390/include/asm/atomic.h | 7 +------ arch/sh/include/asm/atomic.h | 7 +++---- arch/sparc/include/asm/atomic_32.h | 2 -- arch/sparc/include/asm/atomic_64.h | 3 --- arch/x86/include/asm/atomic_32.h | 10 +--------- arch/x86/include/asm/atomic_64.h | 18 ++---------------- include/asm-frv/atomic.h | 4 ---- include/asm-m32r/atomic.h | 8 +------- include/asm-m68k/atomic.h | 3 +-- include/asm-mn10300/atomic.h | 9 --------- include/asm-xtensa/atomic.h | 3 +-- include/linux/types.h | 10 ++++++++++ 23 files changed, 33 insertions(+), 104 deletions(-) (limited to 'arch/x86') diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index ca88e54dec93..62b363584b2b 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -1,6 +1,7 @@ #ifndef _ALPHA_ATOMIC_H #define _ALPHA_ATOMIC_H +#include #include #include @@ -13,14 +14,6 @@ */ -/* - * Counter is volatile to make sure gcc doesn't try to be clever - * and move things around on us. We need to use _exactly_ the address - * the user gave us, not some alias that contains the same information. - */ -typedef struct { volatile int counter; } atomic_t; -typedef struct { volatile long counter; } atomic64_t; - #define ATOMIC_INIT(i) ( (atomic_t) { (i) } ) #define ATOMIC64_INIT(i) ( (atomic64_t) { (i) } ) diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index 325f881ccb50..ee99723b3a6c 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -12,10 +12,9 @@ #define __ASM_ARM_ATOMIC_H #include +#include #include -typedef struct { volatile int counter; } atomic_t; - #define ATOMIC_INIT(i) { (i) } #ifdef __KERNEL__ diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h index 7ef3862a73d0..318815107748 100644 --- a/arch/avr32/include/asm/atomic.h +++ b/arch/avr32/include/asm/atomic.h @@ -14,9 +14,9 @@ #ifndef __ASM_AVR32_ATOMIC_H #define __ASM_AVR32_ATOMIC_H +#include #include -typedef struct { volatile int counter; } atomic_t; #define ATOMIC_INIT(i) { (i) } #define atomic_read(v) ((v)->counter) diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h index 7cf508718605..25776c19064b 100644 --- a/arch/blackfin/include/asm/atomic.h +++ b/arch/blackfin/include/asm/atomic.h @@ -1,6 +1,7 @@ #ifndef __ARCH_BLACKFIN_ATOMIC__ #define __ARCH_BLACKFIN_ATOMIC__ +#include #include /* local_irq_XXX() */ /* @@ -13,9 +14,6 @@ * Tony Kou (tonyko@lineo.ca) Lineo Inc. 2001 */ -typedef struct { - int counter; -} atomic_t; #define ATOMIC_INIT(i) { (i) } #define atomic_read(v) ((v)->counter) diff --git a/arch/cris/include/asm/atomic.h b/arch/cris/include/asm/atomic.h index f71ea686a2ea..5718dd8902a1 100644 --- a/arch/cris/include/asm/atomic.h +++ b/arch/cris/include/asm/atomic.h @@ -4,7 +4,7 @@ #define __ASM_CRIS_ATOMIC__ #include - +#include #include #include @@ -13,8 +13,6 @@ * resource counting etc.. */ -typedef struct { volatile int counter; } atomic_t; - #define ATOMIC_INIT(i) { (i) } #define atomic_read(v) ((v)->counter) diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h index b4cf0ea97ede..833186c8dc3b 100644 --- a/arch/h8300/include/asm/atomic.h +++ b/arch/h8300/include/asm/atomic.h @@ -1,12 +1,13 @@ #ifndef __ARCH_H8300_ATOMIC__ #define __ARCH_H8300_ATOMIC__ +#include + /* * Atomic operations that C can't guarantee us. Useful for * resource counting etc.. */ -typedef struct { int counter; } atomic_t; #define ATOMIC_INIT(i) { (i) } #define atomic_read(v) ((v)->counter) diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 50c2b83fd5a0..d37292bd9875 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -17,12 +17,6 @@ #include #include -/* - * On IA-64, counter must always be volatile to ensure that that the - * memory accesses are ordered. - */ -typedef struct { volatile __s32 counter; } atomic_t; -typedef struct { volatile __s64 counter; } atomic64_t; #define ATOMIC_INIT(i) ((atomic_t) { (i) }) #define ATOMIC64_INIT(i) ((atomic64_t) { (i) }) diff --git a/arch/m68knommu/include/asm/atomic.h b/arch/m68knommu/include/asm/atomic.h index d5632a305dae..6bb674855a3f 100644 --- a/arch/m68knommu/include/asm/atomic.h +++ b/arch/m68knommu/include/asm/atomic.h @@ -1,6 +1,7 @@ #ifndef __ARCH_M68KNOMMU_ATOMIC__ #define __ARCH_M68KNOMMU_ATOMIC__ +#include #include /* @@ -12,7 +13,6 @@ * We do not have SMP m68k systems, so we don't have to deal with that. */ -typedef struct { int counter; } atomic_t; #define ATOMIC_INIT(i) { (i) } #define atomic_read(v) ((v)->counter) diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 1232be3885b0..c996c3b4d074 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -15,13 +15,12 @@ #define _ASM_ATOMIC_H #include +#include #include #include #include #include -typedef struct { volatile int counter; } atomic_t; - #define ATOMIC_INIT(i) { (i) } /* @@ -404,8 +403,6 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) #ifdef CONFIG_64BIT -typedef struct { volatile long counter; } atomic64_t; - #define ATOMIC64_INIT(i) { (i) } /* diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 57fcc4a5ebb4..edbfe25c5fc1 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -155,14 +155,11 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) #endif -/* Note that we need not lock read accesses - aligned word writes/reads - * are atomic, so a reader never sees unconsistent values. - * - * Cache-line alignment would conflict with, for example, linux/module.h +/* + * Note that we need not lock read accesses - aligned word writes/reads + * are atomic, so a reader never sees inconsistent values. */ -typedef struct { volatile int counter; } atomic_t; - /* It's possible to reduce all atomic operations to either * __atomic_add_return, atomic_set and atomic_read (the latter * is there only for consistency). @@ -260,8 +257,6 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) #ifdef CONFIG_64BIT -typedef struct { volatile s64 counter; } atomic64_t; - #define ATOMIC64_INIT(i) ((atomic64_t) { (i) }) static __inline__ int diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 499be5bdd6fa..b401950f5259 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -5,7 +5,7 @@ * PowerPC atomic operations */ -typedef struct { int counter; } atomic_t; +#include #ifdef __KERNEL__ #include @@ -251,8 +251,6 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v) #ifdef __powerpc64__ -typedef struct { long counter; } atomic64_t; - #define ATOMIC64_INIT(i) { (i) } static __inline__ long atomic64_read(const atomic64_t *v) diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 2d184655bc5d..de432f2de2d2 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -2,6 +2,7 @@ #define __ARCH_S390_ATOMIC__ #include +#include /* * include/asm-s390/atomic.h @@ -23,9 +24,6 @@ * S390 uses 'Compare And Swap' for atomicity in SMP enviroment */ -typedef struct { - int counter; -} __attribute__ ((aligned (4))) atomic_t; #define ATOMIC_INIT(i) { (i) } #ifdef __KERNEL__ @@ -149,9 +147,6 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) #undef __CS_LOOP #ifdef __s390x__ -typedef struct { - long long counter; -} __attribute__ ((aligned (8))) atomic64_t; #define ATOMIC64_INIT(i) { (i) } #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2) diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h index c043ef003028..6327ffbb1992 100644 --- a/arch/sh/include/asm/atomic.h +++ b/arch/sh/include/asm/atomic.h @@ -7,16 +7,15 @@ * */ -typedef struct { volatile int counter; } atomic_t; +#include +#include +#include #define ATOMIC_INIT(i) ( (atomic_t) { (i) } ) #define atomic_read(v) ((v)->counter) #define atomic_set(v,i) ((v)->counter = (i)) -#include -#include - #if defined(CONFIG_GUSA_RB) #include #elif defined(CONFIG_CPU_SH4A) diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index 5c944b5a8040..ce465975a6a5 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h @@ -13,8 +13,6 @@ #include -typedef struct { volatile int counter; } atomic_t; - #ifdef __KERNEL__ #define ATOMIC_INIT(i) { (i) } diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index 5982c5ae7f07..a0a706492696 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h @@ -10,9 +10,6 @@ #include #include -typedef struct { volatile int counter; } atomic_t; -typedef struct { volatile __s64 counter; } atomic64_t; - #define ATOMIC_INIT(i) { (i) } #define ATOMIC64_INIT(i) { (i) } diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h index ad5b9f6ecddf..85b46fba4229 100644 --- a/arch/x86/include/asm/atomic_32.h +++ b/arch/x86/include/asm/atomic_32.h @@ -2,6 +2,7 @@ #define _ASM_X86_ATOMIC_32_H #include +#include #include #include @@ -10,15 +11,6 @@ * resource counting etc.. */ -/* - * Make sure gcc doesn't try to be clever and move things around - * on us. We need to use _exactly_ the address the user gave us, - * not some alias that contains the same information. - */ -typedef struct { - int counter; -} atomic_t; - #define ATOMIC_INIT(i) { (i) } /** diff --git a/arch/x86/include/asm/atomic_64.h b/arch/x86/include/asm/atomic_64.h index 279d2a731f3f..8c21731984da 100644 --- a/arch/x86/include/asm/atomic_64.h +++ b/arch/x86/include/asm/atomic_64.h @@ -1,25 +1,15 @@ #ifndef _ASM_X86_ATOMIC_64_H #define _ASM_X86_ATOMIC_64_H +#include #include #include -/* atomic_t should be 32 bit signed type */ - /* * Atomic operations that C can't guarantee us. Useful for * resource counting etc.. */ -/* - * Make sure gcc doesn't try to be clever and move things around - * on us. We need to use _exactly_ the address the user gave us, - * not some alias that contains the same information. - */ -typedef struct { - int counter; -} atomic_t; - #define ATOMIC_INIT(i) { (i) } /** @@ -191,11 +181,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) #define atomic_inc_return(v) (atomic_add_return(1, v)) #define atomic_dec_return(v) (atomic_sub_return(1, v)) -/* An 64bit atomic type */ - -typedef struct { - long counter; -} atomic64_t; +/* The 64-bit atomic type */ #define ATOMIC64_INIT(i) { (i) } diff --git a/include/asm-frv/atomic.h b/include/asm-frv/atomic.h index 46d696b331e7..296c35cfb207 100644 --- a/include/asm-frv/atomic.h +++ b/include/asm-frv/atomic.h @@ -35,10 +35,6 @@ #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() -typedef struct { - int counter; -} atomic_t; - #define ATOMIC_INIT(i) { (i) } #define atomic_read(v) ((v)->counter) #define atomic_set(v, i) (((v)->counter) = (i)) diff --git a/include/asm-m32r/atomic.h b/include/asm-m32r/atomic.h index 3a38ffe4a4f4..2eed30f84080 100644 --- a/include/asm-m32r/atomic.h +++ b/include/asm-m32r/atomic.h @@ -9,6 +9,7 @@ * Copyright (C) 2004 Hirokazu Takata */ +#include #include #include @@ -17,13 +18,6 @@ * resource counting etc.. */ -/* - * Make sure gcc doesn't try to be clever and move things around - * on us. We need to use _exactly_ the address the user gave us, - * not some alias that contains the same information. - */ -typedef struct { volatile int counter; } atomic_t; - #define ATOMIC_INIT(i) { (i) } /** diff --git a/include/asm-m68k/atomic.h b/include/asm-m68k/atomic.h index 4915294fea63..eb0ab9d4ee77 100644 --- a/include/asm-m68k/atomic.h +++ b/include/asm-m68k/atomic.h @@ -1,7 +1,7 @@ #ifndef __ARCH_M68K_ATOMIC__ #define __ARCH_M68K_ATOMIC__ - +#include #include /* @@ -13,7 +13,6 @@ * We do not have SMP m68k systems, so we don't have to deal with that. */ -typedef struct { int counter; } atomic_t; #define ATOMIC_INIT(i) { (i) } #define atomic_read(v) ((v)->counter) diff --git a/include/asm-mn10300/atomic.h b/include/asm-mn10300/atomic.h index 27c9690b9574..bc064825f9b1 100644 --- a/include/asm-mn10300/atomic.h +++ b/include/asm-mn10300/atomic.h @@ -20,15 +20,6 @@ * resource counting etc.. */ -/* - * Make sure gcc doesn't try to be clever and move things around - * on us. We need to use _exactly_ the address the user gave us, - * not some alias that contains the same information. - */ -typedef struct { - int counter; -} atomic_t; - #define ATOMIC_INIT(i) { (i) } #ifdef __KERNEL__ diff --git a/include/asm-xtensa/atomic.h b/include/asm-xtensa/atomic.h index b3b23540f14d..67ad67bed8c1 100644 --- a/include/asm-xtensa/atomic.h +++ b/include/asm-xtensa/atomic.h @@ -14,8 +14,7 @@ #define _XTENSA_ATOMIC_H #include - -typedef struct { volatile int counter; } atomic_t; +#include #ifdef __KERNEL__ #include diff --git a/include/linux/types.h b/include/linux/types.h index 121f349cb7ec..3b864f2d9560 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -195,6 +195,16 @@ typedef u32 phys_addr_t; typedef phys_addr_t resource_size_t; +typedef struct { + volatile int counter; +} atomic_t; + +#ifdef CONFIG_64BIT +typedef struct { + volatile long counter; +} atomic64_t; +#endif + struct ustat { __kernel_daddr_t f_tfree; __kernel_ino_t f_tinode; -- cgit v1.2.3 From f1883f86dea84fe47a71a39fc1afccc005915ed8 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Jan 2009 14:40:45 -0800 Subject: Remove remaining unwinder code Signed-off-by: Alexey Dobriyan Cc: Gabor Gombas Cc: Jan Beulich Cc: Andi Kleen Cc: Ingo Molnar , Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/unwind.h | 13 --------- arch/x86/kernel/traps.c | 2 -- include/linux/module.h | 3 -- include/linux/unwind.h | 68 ------------------------------------------- init/main.c | 3 -- kernel/module.c | 15 ---------- lib/fault-inject.c | 1 - 7 files changed, 105 deletions(-) delete mode 100644 arch/x86/include/asm/unwind.h delete mode 100644 include/linux/unwind.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h deleted file mode 100644 index 8b064bd9c553..000000000000 --- a/arch/x86/include/asm/unwind.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef _ASM_X86_UNWIND_H -#define _ASM_X86_UNWIND_H - -#define UNW_PC(frame) ((void)(frame), 0UL) -#define UNW_SP(frame) ((void)(frame), 0UL) -#define UNW_FP(frame) ((void)(frame), 0UL) - -static inline int arch_unw_user_mode(const void *info) -{ - return 0; -} - -#endif /* _ASM_X86_UNWIND_H */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ce6650eb64e9..c9a666cdd3db 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -51,7 +50,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/module.h b/include/linux/module.h index 3bfed013350b..03cb93d1865a 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -294,9 +294,6 @@ struct module /* The size of the executable code in each section. */ unsigned int init_text_size, core_text_size; - /* The handle returned from unwind_add_table. */ - void *unwind_info; - /* Arch-specific module values */ struct mod_arch_specific arch; diff --git a/include/linux/unwind.h b/include/linux/unwind.h deleted file mode 100644 index 7760860fa170..000000000000 --- a/include/linux/unwind.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef _LINUX_UNWIND_H -#define _LINUX_UNWIND_H - -/* - * Copyright (C) 2002-2006 Novell, Inc. - * Jan Beulich - * This code is released under version 2 of the GNU GPL. - * - * A simple API for unwinding kernel stacks. This is used for - * debugging and error reporting purposes. The kernel doesn't need - * full-blown stack unwinding with all the bells and whistles, so there - * is not much point in implementing the full Dwarf2 unwind API. - */ - -struct module; - -struct unwind_frame_info {}; - -static inline void unwind_init(void) {} -static inline void unwind_setup(void) {} - -#ifdef CONFIG_MODULES - -static inline void *unwind_add_table(struct module *mod, - const void *table_start, - unsigned long table_size) -{ - return NULL; -} - -static inline void unwind_remove_table(void *handle, int init_only) -{ -} - -#endif - -static inline int unwind_init_frame_info(struct unwind_frame_info *info, - struct task_struct *tsk, - const struct pt_regs *regs) -{ - return -ENOSYS; -} - -static inline int unwind_init_blocked(struct unwind_frame_info *info, - struct task_struct *tsk) -{ - return -ENOSYS; -} - -static inline int unwind_init_running(struct unwind_frame_info *info, - asmlinkage int (*cb)(struct unwind_frame_info *, - void *arg), - void *arg) -{ - return -ENOSYS; -} - -static inline int unwind(struct unwind_frame_info *info) -{ - return -ENOSYS; -} - -static inline int unwind_to_user(struct unwind_frame_info *info) -{ - return -ENOSYS; -} - -#endif /* _LINUX_UNWIND_H */ diff --git a/init/main.c b/init/main.c index 90926dadc20d..e119dd28dd7d 100644 --- a/init/main.c +++ b/init/main.c @@ -50,7 +50,6 @@ #include #include #include -#include #include #include #include @@ -537,7 +536,6 @@ asmlinkage void __init start_kernel(void) * Need to run as early as possible, to initialize the * lockdep hash: */ - unwind_init(); lockdep_init(); debug_objects_early_init(); cgroup_init_early(); @@ -559,7 +557,6 @@ asmlinkage void __init start_kernel(void) setup_arch(&command_line); mm_init_owner(&init_mm, &init_task); setup_command_line(command_line); - unwind_setup(); setup_per_cpu_areas(); setup_nr_cpu_ids(); smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ diff --git a/kernel/module.c b/kernel/module.c index f47cce910f25..34b56cf06615 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include #include @@ -1449,8 +1448,6 @@ static void free_module(struct module *mod) remove_sect_attrs(mod); mod_kobject_remove(mod); - unwind_remove_table(mod->unwind_info, 0); - /* Arch-specific cleanup. */ module_arch_cleanup(mod); @@ -1867,7 +1864,6 @@ static noinline struct module *load_module(void __user *umod, unsigned int symindex = 0; unsigned int strindex = 0; unsigned int modindex, versindex, infoindex, pcpuindex; - unsigned int unwindex = 0; unsigned int num_kp, num_mcount; struct kernel_param *kp; struct module *mod; @@ -1957,9 +1953,6 @@ static noinline struct module *load_module(void __user *umod, versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); -#ifdef ARCH_UNWIND_SECTION_NAME - unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME); -#endif /* Don't keep modinfo and version sections. */ sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; @@ -1969,8 +1962,6 @@ static noinline struct module *load_module(void __user *umod, sechdrs[symindex].sh_flags |= SHF_ALLOC; sechdrs[strindex].sh_flags |= SHF_ALLOC; #endif - if (unwindex) - sechdrs[unwindex].sh_flags |= SHF_ALLOC; /* Check module struct version now, before we try to use module. */ if (!check_modstruct_version(sechdrs, versindex, mod)) { @@ -2267,11 +2258,6 @@ static noinline struct module *load_module(void __user *umod, add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs); - /* Size of section 0 is 0, so this works well if no unwind info. */ - mod->unwind_info = unwind_add_table(mod, - (void *)sechdrs[unwindex].sh_addr, - sechdrs[unwindex].sh_size); - /* Get rid of temporary copy */ vfree(hdr); @@ -2370,7 +2356,6 @@ sys_init_module(void __user *umod, mutex_lock(&module_mutex); /* Drop initial reference. */ module_put(mod); - unwind_remove_table(mod->unwind_info, 1); module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; diff --git a/lib/fault-inject.c b/lib/fault-inject.c index a50a311554cc..f97af55bdd96 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 129415607845d4daea11ddcba706005c69dcb942 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 6 Jan 2009 14:41:50 -0800 Subject: kprobes: add kprobe_insn_mutex and cleanup arch_remove_kprobe() Add kprobe_insn_mutex for protecting kprobe_insn_pages hlist, and remove kprobe_mutex from architecture dependent code. This allows us to call arch_remove_kprobe() (and free_insn_slot) while holding kprobe_mutex. Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Russell King Cc: "Luck, Tony" Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/kernel/kprobes.c | 2 -- arch/ia64/kernel/kprobes.c | 8 +++++--- arch/powerpc/kernel/kprobes.c | 7 ++++--- arch/s390/kernel/kprobes.c | 7 ++++--- arch/x86/kernel/kprobes.c | 7 ++++--- include/linux/kprobes.h | 1 - kernel/kprobes.c | 25 +++++++++++++++++++++---- 7 files changed, 38 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c index 3f9abe0e9aff..f692efddd449 100644 --- a/arch/arm/kernel/kprobes.c +++ b/arch/arm/kernel/kprobes.c @@ -92,9 +92,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { if (p->ainsn.insn) { - mutex_lock(&kprobe_mutex); free_insn_slot(p->ainsn.insn, 0); - mutex_unlock(&kprobe_mutex); p->ainsn.insn = NULL; } } diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index f07688da947c..097b84d54e73 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -670,9 +670,11 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { - mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn, p->ainsn.inst_flag & INST_FLAG_BOOSTABLE); - mutex_unlock(&kprobe_mutex); + if (p->ainsn.insn) { + free_insn_slot(p->ainsn.insn, + p->ainsn.inst_flag & INST_FLAG_BOOSTABLE); + p->ainsn.insn = NULL; + } } /* * We are resuming execution after a single step fault, so the pt_regs diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index de79915452c8..989edcdf0297 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -96,9 +96,10 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { - mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn, 0); - mutex_unlock(&kprobe_mutex); + if (p->ainsn.insn) { + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; + } } static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 569079ec4ff0..9b92856632cf 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -218,9 +218,10 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { - mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn, 0); - mutex_unlock(&kprobe_mutex); + if (p->ainsn.insn) { + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; + } } static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 6c27679ec6aa..eead6f8f9218 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -376,9 +376,10 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { - mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); - mutex_unlock(&kprobe_mutex); + if (p->ainsn.insn) { + free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); + p->ainsn.insn = NULL; + } } static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 497b1d1f7a05..b93e44ce2284 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -201,7 +201,6 @@ static inline int init_test_probes(void) } #endif /* CONFIG_KPROBES_SANITY_TEST */ -extern struct mutex kprobe_mutex; extern int arch_prepare_kprobe(struct kprobe *p); extern void arch_arm_kprobe(struct kprobe *p); extern void arch_disarm_kprobe(struct kprobe *p); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 3afd354c46f1..29e87921437d 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -69,7 +69,7 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; /* NOTE: change this value only with kprobe_mutex held */ static bool kprobe_enabled; -DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ +static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; static struct { spinlock_t lock ____cacheline_aligned_in_smp; @@ -115,6 +115,7 @@ enum kprobe_slot_state { SLOT_USED = 2, }; +static DEFINE_MUTEX(kprobe_insn_mutex); /* Protects kprobe_insn_pages */ static struct hlist_head kprobe_insn_pages; static int kprobe_garbage_slots; static int collect_garbage_slots(void); @@ -144,10 +145,10 @@ loop_end: } /** - * get_insn_slot() - Find a slot on an executable page for an instruction. + * __get_insn_slot() - Find a slot on an executable page for an instruction. * We allocate an executable page if there's no room on existing ones. */ -kprobe_opcode_t __kprobes *get_insn_slot(void) +static kprobe_opcode_t __kprobes *__get_insn_slot(void) { struct kprobe_insn_page *kip; struct hlist_node *pos; @@ -196,6 +197,15 @@ kprobe_opcode_t __kprobes *get_insn_slot(void) return kip->insns; } +kprobe_opcode_t __kprobes *get_insn_slot(void) +{ + kprobe_opcode_t *ret; + mutex_lock(&kprobe_insn_mutex); + ret = __get_insn_slot(); + mutex_unlock(&kprobe_insn_mutex); + return ret; +} + /* Return 1 if all garbages are collected, otherwise 0. */ static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx) { @@ -226,9 +236,13 @@ static int __kprobes collect_garbage_slots(void) { struct kprobe_insn_page *kip; struct hlist_node *pos, *next; + int safety; /* Ensure no-one is preepmted on the garbages */ - if (check_safety() != 0) + mutex_unlock(&kprobe_insn_mutex); + safety = check_safety(); + mutex_lock(&kprobe_insn_mutex); + if (safety != 0) return -EAGAIN; hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) { @@ -251,6 +265,7 @@ void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty) struct kprobe_insn_page *kip; struct hlist_node *pos; + mutex_lock(&kprobe_insn_mutex); hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) { if (kip->insns <= slot && slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) { @@ -267,6 +282,8 @@ void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty) if (dirty && ++kprobe_garbage_slots > INSNS_PER_PAGE) collect_garbage_slots(); + + mutex_unlock(&kprobe_insn_mutex); } #endif -- cgit v1.2.3 From 5d30a683888c60b8f93bef3ddc139d1a91ca58f4 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 6 Jan 2009 14:56:28 -0800 Subject: x86: introduce asm/swab.h Signed-off-by: Harvey Harrison Signed-off-by: Linus Torvalds --- arch/x86/include/asm/Kbuild | 1 + arch/x86/include/asm/byteorder.h | 62 ++-------------------------------------- arch/x86/include/asm/swab.h | 61 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 60 deletions(-) create mode 100644 arch/x86/include/asm/swab.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 4a8e80cdcfa5..a9f8a814a1f7 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -22,3 +22,4 @@ unifdef-y += unistd_32.h unifdef-y += unistd_64.h unifdef-y += vm86.h unifdef-y += vsyscall.h +unifdef-y += swab.h diff --git a/arch/x86/include/asm/byteorder.h b/arch/x86/include/asm/byteorder.h index f110ad417df3..7c49917e3d9d 100644 --- a/arch/x86/include/asm/byteorder.h +++ b/arch/x86/include/asm/byteorder.h @@ -1,65 +1,7 @@ #ifndef _ASM_X86_BYTEORDER_H #define _ASM_X86_BYTEORDER_H -#include -#include - -#define __LITTLE_ENDIAN - -static inline __attribute_const__ __u32 __arch_swab32(__u32 val) -{ -#ifdef __i386__ -# ifdef CONFIG_X86_BSWAP - asm("bswap %0" : "=r" (val) : "0" (val)); -# else - asm("xchgb %b0,%h0\n\t" /* swap lower bytes */ - "rorl $16,%0\n\t" /* swap words */ - "xchgb %b0,%h0" /* swap higher bytes */ - : "=q" (val) - : "0" (val)); -# endif - -#else /* __i386__ */ - asm("bswapl %0" - : "=r" (val) - : "0" (val)); -#endif - return val; -} -#define __arch_swab32 __arch_swab32 - -static inline __attribute_const__ __u64 __arch_swab64(__u64 val) -{ -#ifdef __i386__ - union { - struct { - __u32 a; - __u32 b; - } s; - __u64 u; - } v; - v.u = val; -# ifdef CONFIG_X86_BSWAP - asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1" - : "=r" (v.s.a), "=r" (v.s.b) - : "0" (v.s.a), "1" (v.s.b)); -# else - v.s.a = __arch_swab32(v.s.a); - v.s.b = __arch_swab32(v.s.b); - asm("xchgl %0,%1" - : "=r" (v.s.a), "=r" (v.s.b) - : "0" (v.s.a), "1" (v.s.b)); -# endif - return v.u; -#else /* __i386__ */ - asm("bswapq %0" - : "=r" (val) - : "0" (val)); - return val; -#endif -} -#define __arch_swab64 __arch_swab64 - -#include +#include +#include #endif /* _ASM_X86_BYTEORDER_H */ diff --git a/arch/x86/include/asm/swab.h b/arch/x86/include/asm/swab.h new file mode 100644 index 000000000000..306d4178ffc9 --- /dev/null +++ b/arch/x86/include/asm/swab.h @@ -0,0 +1,61 @@ +#ifndef _ASM_X86_SWAB_H +#define _ASM_X86_SWAB_H + +#include +#include + +static inline __attribute_const__ __u32 __arch_swab32(__u32 val) +{ +#ifdef __i386__ +# ifdef CONFIG_X86_BSWAP + asm("bswap %0" : "=r" (val) : "0" (val)); +# else + asm("xchgb %b0,%h0\n\t" /* swap lower bytes */ + "rorl $16,%0\n\t" /* swap words */ + "xchgb %b0,%h0" /* swap higher bytes */ + : "=q" (val) + : "0" (val)); +# endif + +#else /* __i386__ */ + asm("bswapl %0" + : "=r" (val) + : "0" (val)); +#endif + return val; +} +#define __arch_swab32 __arch_swab32 + +static inline __attribute_const__ __u64 __arch_swab64(__u64 val) +{ +#ifdef __i386__ + union { + struct { + __u32 a; + __u32 b; + } s; + __u64 u; + } v; + v.u = val; +# ifdef CONFIG_X86_BSWAP + asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1" + : "=r" (v.s.a), "=r" (v.s.b) + : "0" (v.s.a), "1" (v.s.b)); +# else + v.s.a = __arch_swab32(v.s.a); + v.s.b = __arch_swab32(v.s.b); + asm("xchgl %0,%1" + : "=r" (v.s.a), "=r" (v.s.b) + : "0" (v.s.a), "1" (v.s.b)); +# endif + return v.u; +#else /* __i386__ */ + asm("bswapq %0" + : "=r" (val) + : "0" (val)); + return val; +#endif +} +#define __arch_swab64 __arch_swab64 + +#endif /* _ASM_X86_SWAB_H */ -- cgit v1.2.3