From 71a9048448de302d1e968f336de01060d02fae71 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 12 Jan 2011 16:59:35 -0800 Subject: include/linux/kernel.h: abs(): fix handling of 32-bit unsigneds on 64-bit Michal reports: In the framebuffer subsystem the abs() macro is often used as a part of the calculation of a Manhattan metric, which in turn is used as a measure of similarity between video modes. The arguments of abs() are sometimes unsigned numbers. This worked fine until commit a49c59c0 ("Make sure the value in abs() does not get truncated if it is greater than 2^32:) , which changed the definition of abs() to prevent truncation. As a result of this change, in the following piece of code: u32 a = 0, b = 1; u32 c = abs(a - b); 'c' will end up with a value of 0xffffffff instead of the expected 0x1. A problem caused by this change and visible by the end user is that framebuffer drivers relying on functions from modedb.c will fail to find high resolution video modes similar to that explicitly requested by the user if an exact match cannot be found (see e.g. Fix this by special-casing `long' types within abs(). This patch reduces x86_64 code size a bit - drivers/video/uvesafb.o shrunk by 15 bytes, presumably because it is doing abs() on 4-byte quantities, and expanding those to 8-byte longs adds code. testcase: #define oldabs(x) ({ \ long __x = (x); \ (__x < 0) ? -__x : __x; \ }) #define newabs(x) ({ \ long ret; \ if (sizeof(x) == sizeof(long)) { \ long __x = (x); \ ret = (__x < 0) ? -__x : __x; \ } else { \ int __x = (x); \ ret = (__x < 0) ? -__x : __x; \ } \ ret; \ }) typedef unsigned int u32; main() { u32 a = 0; u32 b = 1; u32 oldc = oldabs(a - b); u32 newc = newabs(a - b); printf("%u %u\n", oldc, newc); } akpm:/home/akpm> gcc t.c akpm:/home/akpm> ./a.out 4294967295 1 Reported-by: Michal Januszewski Cc: Rolf Eike Beer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux/kernel.h') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d0fbc043de60..57dac7022b63 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -143,9 +143,22 @@ extern int _cond_resched(void); #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0) -#define abs(x) ({ \ - long __x = (x); \ - (__x < 0) ? -__x : __x; \ +/* + * abs() handles unsigned and signed longs, ints, shorts and chars. For all + * input types abs() returns a signed long. + * abs() should not be used for 64-bit types (s64, u64, long long) - use abs64() + * for those. + */ +#define abs(x) ({ \ + long ret; \ + if (sizeof(x) == sizeof(long)) { \ + long __x = (x); \ + ret = (__x < 0) ? -__x : __x; \ + } else { \ + int __x = (x); \ + ret = (__x < 0) ? -__x : __x; \ + } \ + ret; \ }) #define abs64(x) ({ \ -- cgit v1.2.3 From 3e7d344970673c5334cf7b5bb27c8c0942b06126 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 13 Jan 2011 15:45:56 -0800 Subject: mm: vmscan: reclaim order-0 and use compaction instead of lumpy reclaim Lumpy reclaim is disruptive. It reclaims a large number of pages and ignores the age of the pages it reclaims. This can incur significant stalls and potentially increase the number of major faults. Compaction has reached the point where it is considered reasonably stable (meaning it has passed a lot of testing) and is a potential candidate for displacing lumpy reclaim. This patch introduces an alternative to lumpy reclaim whe compaction is available called reclaim/compaction. The basic operation is very simple - instead of selecting a contiguous range of pages to reclaim, a number of order-0 pages are reclaimed and then compaction is later by either kswapd (compact_zone_order()) or direct compaction (__alloc_pages_direct_compact()). [akpm@linux-foundation.org: fix build] [akpm@linux-foundation.org: use conventional task_struct naming] Signed-off-by: Mel Gorman Cc: Andrea Arcangeli Cc: KOSAKI Motohiro Cc: Rik van Riel Acked-by: Johannes Weiner Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 14 +++++++ include/linux/kernel.h | 7 ++++ mm/compaction.c | 89 ++++++++++++++++++++++++--------------- mm/migrate.c | 17 ++++++++ mm/page_alloc.c | 16 +++++++ mm/vmscan.c | 102 ++++++++++++++++++++++++++++++++++++++------- 6 files changed, 196 insertions(+), 49 deletions(-) (limited to 'include/linux/kernel.h') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 5ac51552d908..2592883d862d 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -22,6 +22,9 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write, extern int fragmentation_index(struct zone *zone, unsigned int order); extern unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); +extern unsigned long compaction_suitable(struct zone *zone, int order); +extern unsigned long compact_zone_order(struct zone *zone, int order, + gfp_t gfp_mask); /* Do not skip compaction more than 64 times */ #define COMPACT_MAX_DEFER_SHIFT 6 @@ -59,6 +62,17 @@ static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, return COMPACT_CONTINUE; } +static inline unsigned long compaction_suitable(struct zone *zone, int order) +{ + return COMPACT_SKIPPED; +} + +static inline unsigned long compact_zone_order(struct zone *zone, int order, + gfp_t gfp_mask) +{ + return 0; +} + static inline void defer_compaction(struct zone *zone) { } diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 57dac7022b63..5a9d9059520b 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -600,6 +600,13 @@ struct sysinfo { #define NUMA_BUILD 0 #endif +/* This helps us avoid #ifdef CONFIG_COMPACTION */ +#ifdef CONFIG_COMPACTION +#define COMPACTION_BUILD 1 +#else +#define COMPACTION_BUILD 0 +#endif + /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */ #ifdef CONFIG_FTRACE_MCOUNT_RECORD # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD diff --git a/mm/compaction.c b/mm/compaction.c index 20011a850fef..8fe917ec7c11 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -384,10 +384,62 @@ static int compact_finished(struct zone *zone, return COMPACT_CONTINUE; } +/* + * compaction_suitable: Is this suitable to run compaction on this zone now? + * Returns + * COMPACT_SKIPPED - If there are too few free pages for compaction + * COMPACT_PARTIAL - If the allocation would succeed without compaction + * COMPACT_CONTINUE - If compaction should run now + */ +unsigned long compaction_suitable(struct zone *zone, int order) +{ + int fragindex; + unsigned long watermark; + + /* + * Watermarks for order-0 must be met for compaction. Note the 2UL. + * This is because during migration, copies of pages need to be + * allocated and for a short time, the footprint is higher + */ + watermark = low_wmark_pages(zone) + (2UL << order); + if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) + return COMPACT_SKIPPED; + + /* + * fragmentation index determines if allocation failures are due to + * low memory or external fragmentation + * + * index of -1 implies allocations might succeed dependingon watermarks + * index towards 0 implies failure is due to lack of memory + * index towards 1000 implies failure is due to fragmentation + * + * Only compact if a failure would be due to fragmentation. + */ + fragindex = fragmentation_index(zone, order); + if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold) + return COMPACT_SKIPPED; + + if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0)) + return COMPACT_PARTIAL; + + return COMPACT_CONTINUE; +} + static int compact_zone(struct zone *zone, struct compact_control *cc) { int ret; + ret = compaction_suitable(zone, cc->order); + switch (ret) { + case COMPACT_PARTIAL: + case COMPACT_SKIPPED: + /* Compaction is likely to fail */ + return ret; + case COMPACT_CONTINUE: + /* Fall through to compaction */ + ; + } + /* Setup to move all movable pages to the end of the zone */ cc->migrate_pfn = zone->zone_start_pfn; cc->free_pfn = cc->migrate_pfn + zone->spanned_pages; @@ -429,7 +481,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) return ret; } -static unsigned long compact_zone_order(struct zone *zone, +unsigned long compact_zone_order(struct zone *zone, int order, gfp_t gfp_mask) { struct compact_control cc = { @@ -462,7 +514,6 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, enum zone_type high_zoneidx = gfp_zone(gfp_mask); int may_enter_fs = gfp_mask & __GFP_FS; int may_perform_io = gfp_mask & __GFP_IO; - unsigned long watermark; struct zoneref *z; struct zone *zone; int rc = COMPACT_SKIPPED; @@ -480,43 +531,13 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, /* Compact each zone in the list */ for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, nodemask) { - int fragindex; int status; - /* - * Watermarks for order-0 must be met for compaction. Note - * the 2UL. This is because during migration, copies of - * pages need to be allocated and for a short time, the - * footprint is higher - */ - watermark = low_wmark_pages(zone) + (2UL << order); - if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) - continue; - - /* - * fragmentation index determines if allocation failures are - * due to low memory or external fragmentation - * - * index of -1 implies allocations might succeed depending - * on watermarks - * index towards 0 implies failure is due to lack of memory - * index towards 1000 implies failure is due to fragmentation - * - * Only compact if a failure would be due to fragmentation. - */ - fragindex = fragmentation_index(zone, order); - if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold) - continue; - - if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0)) { - rc = COMPACT_PARTIAL; - break; - } - status = compact_zone_order(zone, order, gfp_mask); rc = max(status, rc); - if (zone_watermark_ok(zone, order, watermark, 0, 0)) + /* If a normal allocation would succeed, stop compacting */ + if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0)) break; } diff --git a/mm/migrate.c b/mm/migrate.c index 6ae8a66a7045..94875b265928 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -639,6 +639,23 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, if (!trylock_page(page)) { if (!force) goto move_newpage; + + /* + * It's not safe for direct compaction to call lock_page. + * For example, during page readahead pages are added locked + * to the LRU. Later, when the IO completes the pages are + * marked uptodate and unlocked. However, the queueing + * could be merging multiple pages for one bio (e.g. + * mpage_readpages). If an allocation happens for the + * second or third page, the process can end up locking + * the same page twice and deadlocking. Rather than + * trying to be clever about what pages can be locked, + * avoid the use of lock_page for direct compaction + * altogether. + */ + if (current->flags & PF_MEMALLOC) + goto move_newpage; + lock_page(page); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 22a1bb7723e4..03a66a31bfcd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1815,12 +1815,15 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, int migratetype, unsigned long *did_some_progress) { struct page *page; + struct task_struct *tsk = current; if (!order || compaction_deferred(preferred_zone)) return NULL; + tsk->flags |= PF_MEMALLOC; *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, nodemask); + tsk->flags &= ~PF_MEMALLOC; if (*did_some_progress != COMPACT_SKIPPED) { /* Page migration frees to the PCP lists but we want merging */ @@ -2121,6 +2124,19 @@ rebalance: /* Wait for some write requests to complete then retry */ wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50); goto rebalance; + } else { + /* + * High-order allocations do not necessarily loop after + * direct reclaim and reclaim/compaction depends on compaction + * being called after reclaim so call directly if necessary + */ + page = __alloc_pages_direct_compact(gfp_mask, order, + zonelist, high_zoneidx, + nodemask, + alloc_flags, preferred_zone, + migratetype, &did_some_progress); + if (page) + goto got_pg; } nopage: diff --git a/mm/vmscan.c b/mm/vmscan.c index 3464312bde07..10ebd74a423c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -59,12 +60,15 @@ * LUMPY_MODE_CONTIGRECLAIM: For high-order allocations, take a reference * page from the LRU and reclaim all pages within a * naturally aligned range + * LUMPY_MODE_COMPACTION: For high-order allocations, reclaim a number of + * order-0 pages and then compact the zone */ typedef unsigned __bitwise__ lumpy_mode; #define LUMPY_MODE_SINGLE ((__force lumpy_mode)0x01u) #define LUMPY_MODE_ASYNC ((__force lumpy_mode)0x02u) #define LUMPY_MODE_SYNC ((__force lumpy_mode)0x04u) #define LUMPY_MODE_CONTIGRECLAIM ((__force lumpy_mode)0x08u) +#define LUMPY_MODE_COMPACTION ((__force lumpy_mode)0x10u) struct scan_control { /* Incremented by the number of inactive pages that were scanned */ @@ -286,18 +290,20 @@ static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc, lumpy_mode syncmode = sync ? LUMPY_MODE_SYNC : LUMPY_MODE_ASYNC; /* - * Some reclaim have alredy been failed. No worth to try synchronous - * lumpy reclaim. + * Initially assume we are entering either lumpy reclaim or + * reclaim/compaction.Depending on the order, we will either set the + * sync mode or just reclaim order-0 pages later. */ - if (sync && sc->lumpy_reclaim_mode & LUMPY_MODE_SINGLE) - return; + if (COMPACTION_BUILD) + sc->lumpy_reclaim_mode = LUMPY_MODE_COMPACTION; + else + sc->lumpy_reclaim_mode = LUMPY_MODE_CONTIGRECLAIM; /* - * If we need a large contiguous chunk of memory, or have - * trouble getting a small set of contiguous pages, we - * will reclaim both active and inactive pages. + * Avoid using lumpy reclaim or reclaim/compaction if possible by + * restricting when its set to either costly allocations or when + * under memory pressure */ - sc->lumpy_reclaim_mode = LUMPY_MODE_CONTIGRECLAIM; if (sc->order > PAGE_ALLOC_COSTLY_ORDER) sc->lumpy_reclaim_mode |= syncmode; else if (sc->order && priority < DEF_PRIORITY - 2) @@ -1385,8 +1391,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, if (scanning_global_lru(sc)) { nr_taken = isolate_pages_global(nr_to_scan, &page_list, &nr_scanned, sc->order, - sc->lumpy_reclaim_mode & LUMPY_MODE_SINGLE ? - ISOLATE_INACTIVE : ISOLATE_BOTH, + sc->lumpy_reclaim_mode & LUMPY_MODE_CONTIGRECLAIM ? + ISOLATE_BOTH : ISOLATE_INACTIVE, zone, 0, file); zone->pages_scanned += nr_scanned; if (current_is_kswapd()) @@ -1398,8 +1404,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, } else { nr_taken = mem_cgroup_isolate_pages(nr_to_scan, &page_list, &nr_scanned, sc->order, - sc->lumpy_reclaim_mode & LUMPY_MODE_SINGLE ? - ISOLATE_INACTIVE : ISOLATE_BOTH, + sc->lumpy_reclaim_mode & LUMPY_MODE_CONTIGRECLAIM ? + ISOLATE_BOTH : ISOLATE_INACTIVE, zone, sc->mem_cgroup, 0, file); /* @@ -1814,6 +1820,57 @@ out: } } +/* + * Reclaim/compaction depends on a number of pages being freed. To avoid + * disruption to the system, a small number of order-0 pages continue to be + * rotated and reclaimed in the normal fashion. However, by the time we get + * back to the allocator and call try_to_compact_zone(), we ensure that + * there are enough free pages for it to be likely successful + */ +static inline bool should_continue_reclaim(struct zone *zone, + unsigned long nr_reclaimed, + unsigned long nr_scanned, + struct scan_control *sc) +{ + unsigned long pages_for_compaction; + unsigned long inactive_lru_pages; + + /* If not in reclaim/compaction mode, stop */ + if (!(sc->lumpy_reclaim_mode & LUMPY_MODE_COMPACTION)) + return false; + + /* + * If we failed to reclaim and have scanned the full list, stop. + * NOTE: Checking just nr_reclaimed would exit reclaim/compaction far + * faster but obviously would be less likely to succeed + * allocation. If this is desirable, use GFP_REPEAT to decide + * if both reclaimed and scanned should be checked or just + * reclaimed + */ + if (!nr_reclaimed && !nr_scanned) + return false; + + /* + * If we have not reclaimed enough pages for compaction and the + * inactive lists are large enough, continue reclaiming + */ + pages_for_compaction = (2UL << sc->order); + inactive_lru_pages = zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON) + + zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE); + if (sc->nr_reclaimed < pages_for_compaction && + inactive_lru_pages > pages_for_compaction) + return true; + + /* If compaction would go ahead or the allocation would succeed, stop */ + switch (compaction_suitable(zone, sc->order)) { + case COMPACT_PARTIAL: + case COMPACT_CONTINUE: + return false; + default: + return true; + } +} + /* * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. */ @@ -1823,9 +1880,12 @@ static void shrink_zone(int priority, struct zone *zone, unsigned long nr[NR_LRU_LISTS]; unsigned long nr_to_scan; enum lru_list l; - unsigned long nr_reclaimed = sc->nr_reclaimed; + unsigned long nr_reclaimed; unsigned long nr_to_reclaim = sc->nr_to_reclaim; + unsigned long nr_scanned = sc->nr_scanned; +restart: + nr_reclaimed = 0; get_scan_count(zone, sc, nr, priority); while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || @@ -1851,8 +1911,7 @@ static void shrink_zone(int priority, struct zone *zone, if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY) break; } - - sc->nr_reclaimed = nr_reclaimed; + sc->nr_reclaimed += nr_reclaimed; /* * Even if we did not try to evict anon pages at all, we want to @@ -1861,6 +1920,11 @@ static void shrink_zone(int priority, struct zone *zone, if (inactive_anon_is_low(zone, sc)) shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); + /* reclaim/compaction might need reclaim to continue */ + if (should_continue_reclaim(zone, nr_reclaimed, + sc->nr_scanned - nr_scanned, sc)) + goto restart; + throttle_vm_writeout(sc->gfp_mask); } @@ -2307,6 +2371,14 @@ loop_again: total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) sc.may_writepage = 1; + /* + * Compact the zone for higher orders to reduce + * latencies for higher-order allocations that + * would ordinarily call try_to_compact_pages() + */ + if (sc.order > PAGE_ALLOC_COSTLY_ORDER) + compact_zone_order(zone, sc.order, sc.gfp_mask); + if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone), end_zone, 0)) { all_zones_ok = 0; -- cgit v1.2.3 From 2ce802f62ba32a7d95748ac92bf351f76affb6ff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 20 Jan 2011 12:06:35 +0100 Subject: lockdep: Move early boot local IRQ enable/disable status to init/main.c During early boot, local IRQ is disabled until IRQ subsystem is properly initialized. During this time, no one should enable local IRQ and some operations which usually are not allowed with IRQ disabled, e.g. operations which might sleep or require communications with other processors, are allowed. lockdep tracked this with early_boot_irqs_off/on() callbacks. As other subsystems need this information too, move it to init/main.c and make it generally available. While at it, toggle the boolean to early_boot_irqs_disabled instead of enabled so that it can be initialized with %false and %true indicates the exceptional condition. Signed-off-by: Tejun Heo Acked-by: Peter Zijlstra Acked-by: Pekka Enberg Cc: Linus Torvalds LKML-Reference: <20110120110635.GB6036@htj.dyndns.org> Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 2 +- include/linux/kernel.h | 2 ++ include/linux/lockdep.h | 8 -------- init/main.c | 13 +++++++++++-- kernel/lockdep.c | 18 +----------------- kernel/trace/trace_irqsoff.c | 8 -------- 6 files changed, 15 insertions(+), 36 deletions(-) (limited to 'include/linux/kernel.h') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 7e8d3bc80af6..50542efe45fb 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1194,7 +1194,7 @@ asmlinkage void __init xen_start_kernel(void) per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; local_irq_disable(); - early_boot_irqs_off(); + early_boot_irqs_disabled = true; memblock_init(); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 5a9d9059520b..d07d8057e440 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -243,6 +243,8 @@ extern int test_taint(unsigned flag); extern unsigned long get_taint(void); extern int root_mountflags; +extern bool early_boot_irqs_disabled; + /* Values used for system_state */ extern enum system_states { SYSTEM_BOOTING, diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 71c09b26c759..f638fd78d106 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -436,16 +436,8 @@ do { \ #endif /* CONFIG_LOCKDEP */ #ifdef CONFIG_TRACE_IRQFLAGS -extern void early_boot_irqs_off(void); -extern void early_boot_irqs_on(void); extern void print_irqtrace_events(struct task_struct *curr); #else -static inline void early_boot_irqs_off(void) -{ -} -static inline void early_boot_irqs_on(void) -{ -} static inline void print_irqtrace_events(struct task_struct *curr) { } diff --git a/init/main.c b/init/main.c index 00799c1d4628..33c37c379e96 100644 --- a/init/main.c +++ b/init/main.c @@ -96,6 +96,15 @@ static inline void mark_rodata_ro(void) { } extern void tc_init(void); #endif +/* + * Debug helper: via this flag we know that we are in 'early bootup code' + * where only the boot processor is running with IRQ disabled. This means + * two things - IRQ must not be enabled before the flag is cleared and some + * operations which are not allowed with IRQ disabled are allowed while the + * flag is set. + */ +bool early_boot_irqs_disabled __read_mostly; + enum system_states system_state __read_mostly; EXPORT_SYMBOL(system_state); @@ -554,7 +563,7 @@ asmlinkage void __init start_kernel(void) cgroup_init_early(); local_irq_disable(); - early_boot_irqs_off(); + early_boot_irqs_disabled = true; /* * Interrupts are still disabled. Do necessary setups, then @@ -621,7 +630,7 @@ asmlinkage void __init start_kernel(void) if (!irqs_disabled()) printk(KERN_CRIT "start_kernel(): bug: interrupts were " "enabled early\n"); - early_boot_irqs_on(); + early_boot_irqs_disabled = false; local_irq_enable(); /* Interrupts are enabled now so all GFP allocations are safe. */ diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 42ba65dff7d9..0d2058da80f5 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -2291,22 +2291,6 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark) return 1; } -/* - * Debugging helper: via this flag we know that we are in - * 'early bootup code', and will warn about any invalid irqs-on event: - */ -static int early_boot_irqs_enabled; - -void early_boot_irqs_off(void) -{ - early_boot_irqs_enabled = 0; -} - -void early_boot_irqs_on(void) -{ - early_boot_irqs_enabled = 1; -} - /* * Hardirqs will be enabled: */ @@ -2319,7 +2303,7 @@ void trace_hardirqs_on_caller(unsigned long ip) if (unlikely(!debug_locks || current->lockdep_recursion)) return; - if (DEBUG_LOCKS_WARN_ON(unlikely(!early_boot_irqs_enabled))) + if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled))) return; if (unlikely(curr->hardirqs_enabled)) { diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 5cf8c602b880..92b6e1e12d98 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -453,14 +453,6 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1) * Stubs: */ -void early_boot_irqs_off(void) -{ -} - -void early_boot_irqs_on(void) -{ -} - void trace_softirqs_on(unsigned long ip) { } -- cgit v1.2.3 From 7ef88ad561457c0346355dfd1f53e503ddfde719 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 24 Jan 2011 14:45:10 -0600 Subject: BUILD_BUG_ON: make it handle more cases BUILD_BUG_ON used to use the optimizer to do code elimination or fail at link time; it was changed to first the size of a negative array (a nicer compile time error), then (in 8c87df457cb58fe75b9b893007917cf8095660a0) to a bitfield. This forced us to change some non-constant cases to MAYBE_BUILD_BUG_ON(); as Jan points out in that commit, it didn't work as intended anyway. bitfields: needs a literal constant at parse time, and can't be put under "if (__builtin_constant_p(x))" for example. negative array: can handle anything, but if the compiler can't tell it's a constant, silently has no effect. link time: breaks link if the compiler can't determine the value, but the linker output is not usually as informative as a compiler error. If we use the negative-array-size method *and* the link time trick, we get the ability to use BUILD_BUG_ON() under __builtin_constant_p() branches, and maximal ability for the compiler to detect errors at build time. We also document it thoroughly. Signed-off-by: Rusty Russell Cc: Jan Beulich Acked-by: Hollis Blanchard --- include/linux/kernel.h | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) (limited to 'include/linux/kernel.h') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d07d8057e440..864712f3653d 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -575,12 +575,6 @@ struct sysinfo { char _f[20-2*sizeof(long)-sizeof(int)]; /* Padding: libc5 uses this.. */ }; -/* Force a compilation error if condition is true */ -#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition)) - -/* Force a compilation error if condition is constant and true */ -#define MAYBE_BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2 * !!(cond)])) - /* Force a compilation error if a constant expression is not a power of 2 */ #define BUILD_BUG_ON_NOT_POWER_OF_2(n) \ BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0)) @@ -592,6 +586,33 @@ struct sysinfo { #define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) #define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) +/** + * BUILD_BUG_ON - break compile if a condition is true. + * @cond: the condition which the compiler should know is false. + * + * If you have some code which relies on certain constants being equal, or + * other compile-time-evaluated condition, you should use BUILD_BUG_ON to + * detect if someone changes it. + * + * The implementation uses gcc's reluctance to create a negative array, but + * gcc (as of 4.4) only emits that error for obvious cases (eg. not arguments + * to inline functions). So as a fallback we use the optimizer; if it can't + * prove the condition is false, it will cause a link error on the undefined + * "__build_bug_on_failed". This error message can be harder to track down + * though, hence the two different methods. + */ +#ifndef __OPTIMIZE__ +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) +#else +extern int __build_bug_on_failed; +#define BUILD_BUG_ON(condition) \ + do { \ + ((void)sizeof(char[1 - 2*!!(condition)])); \ + if (condition) __build_bug_on_failed = 1; \ + } while(0) +#endif +#define MAYBE_BUILD_BUG_ON(condition) BUILD_BUG_ON(condition) + /* Trap pasters of __FUNCTION__ at compile-time */ #define __FUNCTION__ (__func__) -- cgit v1.2.3 From 1765e3a4933ea0870fabd755feffc5473c4363ce Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 24 Jan 2011 14:45:10 -0600 Subject: Remove MAYBE_BUILD_BUG_ON Now BUILD_BUG_ON() can handle optimizable constants, we don't need MAYBE_BUILD_BUG_ON any more. Signed-off-by: Rusty Russell --- include/linux/gfp.h | 2 +- include/linux/kernel.h | 1 - include/linux/kmemcheck.h | 2 +- include/linux/virtio_config.h | 5 ++++- 4 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux/kernel.h') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index a3b148a91874..0b84c61607e8 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -249,7 +249,7 @@ static inline enum zone_type gfp_zone(gfp_t flags) ((1 << ZONES_SHIFT) - 1); if (__builtin_constant_p(bit)) - MAYBE_BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1); + BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1); else { #ifdef CONFIG_DEBUG_VM BUG_ON((GFP_ZONE_BAD >> bit) & 1); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 864712f3653d..e2f4d6af2125 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -611,7 +611,6 @@ extern int __build_bug_on_failed; if (condition) __build_bug_on_failed = 1; \ } while(0) #endif -#define MAYBE_BUILD_BUG_ON(condition) BUILD_BUG_ON(condition) /* Trap pasters of __FUNCTION__ at compile-time */ #define __FUNCTION__ (__func__) diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h index 08d7dc4ddf40..39f8453239f7 100644 --- a/include/linux/kmemcheck.h +++ b/include/linux/kmemcheck.h @@ -76,7 +76,7 @@ bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size); \ _n = (long) &((ptr)->name##_end) \ - (long) &((ptr)->name##_begin); \ - MAYBE_BUILD_BUG_ON(_n < 0); \ + BUILD_BUG_ON(_n < 0); \ \ kmemcheck_mark_initialized(&((ptr)->name##_begin), _n); \ } while (0) diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 0093dd7c1d6f..800617b4ddd5 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -109,7 +109,10 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev, unsigned int fbit) { /* Did you forget to fix assumptions on max features? */ - MAYBE_BUILD_BUG_ON(fbit >= 32); + if (__builtin_constant_p(fbit)) + BUILD_BUG_ON(fbit >= 32); + else + BUG_ON(fbit >= 32); if (fbit < VIRTIO_TRANSPORT_F_START) virtio_check_driver_offered_feature(vdev, fbit); -- cgit v1.2.3 From ffbbf2da9e578dc7b7ae4f945412c4b74f54b20e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 31 Jan 2011 09:29:14 -0800 Subject: kernel.h: fix kernel-doc warning Fix kernel-doc warning in kernel.h from commit 7ef88ad56145 ("BUILD_BUG_ON: make it handle more cases"): Warning(include/linux/kernel.h:605): No description found for parameter 'condition' Warning(include/linux/kernel.h:605): Excess function parameter 'cond' description in 'BUILD_BUG_ON' Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/kernel.h') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e2f4d6af2125..2fe6e84894a4 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -588,7 +588,7 @@ struct sysinfo { /** * BUILD_BUG_ON - break compile if a condition is true. - * @cond: the condition which the compiler should know is false. + * @condition: the condition which the compiler should know is false. * * If you have some code which relies on certain constants being equal, or * other compile-time-evaluated condition, you should use BUILD_BUG_ON to -- cgit v1.2.3