diff options
| author | Paul Mundt <lethal@linux-sh.org> | 2011-01-13 09:06:28 +0300 |
|---|---|---|
| committer | Paul Mundt <lethal@linux-sh.org> | 2011-01-13 09:06:28 +0300 |
| commit | f43dc23d5ea91fca257be02138a255f02d98e806 (patch) | |
| tree | b29722f6e965316e90ac97abf79923ced250dc21 /lib | |
| parent | f8e53553f452dcbf67cb89c8cba63a1cd6eb4cc0 (diff) | |
| parent | 4162cf64973df51fc885825bc9ca4d055891c49f (diff) | |
| download | linux-f43dc23d5ea91fca257be02138a255f02d98e806.tar.xz | |
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6 into common/serial-rework
Conflicts:
arch/sh/kernel/cpu/sh2/setup-sh7619.c
arch/sh/kernel/cpu/sh2a/setup-mxg.c
arch/sh/kernel/cpu/sh2a/setup-sh7201.c
arch/sh/kernel/cpu/sh2a/setup-sh7203.c
arch/sh/kernel/cpu/sh2a/setup-sh7206.c
arch/sh/kernel/cpu/sh3/setup-sh7705.c
arch/sh/kernel/cpu/sh3/setup-sh770x.c
arch/sh/kernel/cpu/sh3/setup-sh7710.c
arch/sh/kernel/cpu/sh3/setup-sh7720.c
arch/sh/kernel/cpu/sh4/setup-sh4-202.c
arch/sh/kernel/cpu/sh4/setup-sh7750.c
arch/sh/kernel/cpu/sh4/setup-sh7760.c
arch/sh/kernel/cpu/sh4a/setup-sh7343.c
arch/sh/kernel/cpu/sh4a/setup-sh7366.c
arch/sh/kernel/cpu/sh4a/setup-sh7722.c
arch/sh/kernel/cpu/sh4a/setup-sh7723.c
arch/sh/kernel/cpu/sh4a/setup-sh7724.c
arch/sh/kernel/cpu/sh4a/setup-sh7763.c
arch/sh/kernel/cpu/sh4a/setup-sh7770.c
arch/sh/kernel/cpu/sh4a/setup-sh7780.c
arch/sh/kernel/cpu/sh4a/setup-sh7785.c
arch/sh/kernel/cpu/sh4a/setup-sh7786.c
arch/sh/kernel/cpu/sh4a/setup-shx3.c
arch/sh/kernel/cpu/sh5/setup-sh5.c
drivers/serial/sh-sci.c
drivers/serial/sh-sci.h
include/linux/serial_sci.h
Diffstat (limited to 'lib')
86 files changed, 7260 insertions, 1936 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index bb1326d3839c..3116aa631af6 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -7,6 +7,9 @@ config BINARY_PRINTF menu "Library routines" +config RAID6_PQ + tristate + config BITREVERSE tristate @@ -117,6 +120,10 @@ config DECOMPRESS_BZIP2 config DECOMPRESS_LZMA tristate +config DECOMPRESS_LZO + select LZO_DECOMPRESS + tristate + # # Generic allocator support is selected if needed # @@ -156,6 +163,9 @@ config TEXTSEARCH_BM config TEXTSEARCH_FSM tristate +config BTREE + boolean + config HAS_IOMEM boolean depends on !NO_IOMEM @@ -174,9 +184,6 @@ config HAS_DMA config CHECK_SIGNATURE bool -config HAVE_LMB - boolean - config CPUMASK_OFFSTACK bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS help @@ -200,4 +207,10 @@ config NLATTR config GENERIC_ATOMIC64 bool +config LRU_CACHE + tristate + +config AVERAGE + bool + endmenu diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 23067ab1a73c..2d05adb98401 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -50,6 +50,14 @@ config MAGIC_SYSRQ keys are documented in <file:Documentation/sysrq.txt>. Don't say Y unless you really know what this hack does. +config STRIP_ASM_SYMS + bool "Strip assembler-generated symbols during link" + default n + help + Strip internal assembler-generated symbols during a link (symbols + that look like '.Lxxx') so they don't pollute the output of + get_wchan() and suchlike. + config UNUSED_SYMBOLS bool "Enable unused/obsolete exported symbols" default y if X86 @@ -68,7 +76,6 @@ config UNUSED_SYMBOLS config DEBUG_FS bool "Debug Filesystem" - depends on SYSFS help debugfs is a virtual file system that kernel developers use to put debugging files into. Enable this option to be able to read and @@ -95,9 +102,10 @@ config HEADERS_CHECK config DEBUG_SECTION_MISMATCH bool "Enable full Section mismatch analysis" - depends on UNDEFINED + depends on UNDEFINED || (BLACKFIN) + default y # This option is on purpose disabled for now. - # It will be enabled when we are down to a resonable number + # It will be enabled when we are down to a reasonable number # of section mismatch warnings (< 10 for an allyesconfig build) help The section mismatch analysis checks if there are illegal @@ -143,28 +151,34 @@ config DEBUG_SHIRQ Drivers ought to be able to handle interrupts coming in at those points; some don't and need to be caught. -config DETECT_SOFTLOCKUP - bool "Detect Soft Lockups" +config LOCKUP_DETECTOR + bool "Detect Hard and Soft Lockups" depends on DEBUG_KERNEL && !S390 - default y help - Say Y here to enable the kernel to detect "soft lockups", - which are bugs that cause the kernel to loop in kernel + Say Y here to enable the kernel to act as a watchdog to detect + hard and soft lockups. + + Softlockups are bugs that cause the kernel to loop in kernel mode for more than 60 seconds, without giving other tasks a - chance to run. + chance to run. The current stack trace is displayed upon + detection and the system will stay locked up. - When a soft-lockup is detected, the kernel will print the - current stack trace (which you should report), but the - system will stay locked up. This feature has negligible - overhead. + Hardlockups are bugs that cause the CPU to loop in kernel mode + for more than 60 seconds, without letting other interrupts have a + chance to run. The current stack trace is displayed upon detection + and the system will stay locked up. - (Note that "hard lockups" are separate type of bugs that - can be detected via the NMI-watchdog, on platforms that - support it.) + The overhead should be minimal. A periodic hrtimer runs to + generate interrupts and kick the watchdog task every 10-12 seconds. + An NMI is generated every 60 seconds or so to check for hardlockups. + +config HARDLOCKUP_DETECTOR + def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \ + !ARCH_HAS_NMI_WATCHDOG config BOOTPARAM_SOFTLOCKUP_PANIC bool "Panic (Reboot) On Soft Lockups" - depends on DETECT_SOFTLOCKUP + depends on LOCKUP_DETECTOR help Say Y here to enable the kernel to panic on "soft lockups", which are bugs that cause the kernel to loop in kernel @@ -181,7 +195,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE int - depends on DETECT_SOFTLOCKUP + depends on LOCKUP_DETECTOR range 0 1 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC default 1 if BOOTPARAM_SOFTLOCKUP_PANIC @@ -290,6 +304,28 @@ config DEBUG_OBJECTS_TIMERS timer routines to track the life time of timer objects and validate the timer operations. +config DEBUG_OBJECTS_WORK + bool "Debug work objects" + depends on DEBUG_OBJECTS + help + If you say Y here, additional code will be inserted into the + work queue routines to track the life time of work objects and + validate the work operations. + +config DEBUG_OBJECTS_RCU_HEAD + bool "Debug RCU callbacks objects" + depends on DEBUG_OBJECTS && PREEMPT + help + Enable this to turn on debugging of RCU list heads (call_rcu() usage). + +config DEBUG_OBJECTS_PERCPU_COUNTER + bool "Debug percpu counter objects" + depends on DEBUG_OBJECTS + help + If you say Y here, additional code will be inserted into the + percpu counter routines to track the life time of percpu counter + objects and validate the percpu counter operations. + config DEBUG_OBJECTS_ENABLE_DEFAULT int "debug_objects bootup default value (0-1)" range 0 1 @@ -326,7 +362,7 @@ config SLUB_DEBUG_ON config SLUB_STATS default n bool "Enable SLUB performance statistics" - depends on SLUB && SLUB_DEBUG && SYSFS + depends on SLUB && SYSFS help SLUB statistics are useful to debug SLUBs allocation behavior in order find ways to optimize the allocator. This should never be @@ -338,13 +374,13 @@ config SLUB_STATS config DEBUG_KMEMLEAK bool "Kernel memory leak detector" - depends on DEBUG_KERNEL && EXPERIMENTAL && (X86 || ARM) && \ - !MEMORY_HOTPLUG - select DEBUG_SLAB if SLAB - select SLUB_DEBUG if SLUB + depends on DEBUG_KERNEL && EXPERIMENTAL && !MEMORY_HOTPLUG && \ + (X86 || ARM || PPC || S390 || SPARC64 || SUPERH || MICROBLAZE || TILE) + select DEBUG_FS if SYSFS select STACKTRACE if STACKTRACE_SUPPORT select KALLSYMS + select CRC32 help Say Y here if you want to enable the memory leak detector. The memory allocation/freeing is traced in a way @@ -355,9 +391,24 @@ config DEBUG_KMEMLEAK allocations. See Documentation/kmemleak.txt for more details. + Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances + of finding leaks due to the slab objects poisoning. + In order to access the kmemleak file, debugfs needs to be mounted (usually at /sys/kernel/debug). +config DEBUG_KMEMLEAK_EARLY_LOG_SIZE + int "Maximum kmemleak early log entries" + depends on DEBUG_KMEMLEAK + range 200 40000 + default 400 + help + Kmemleak must track all the memory allocations to avoid + reporting false positives. Since memory may be allocated or + freed before kmemleak is initialised, an early log buffer is + used to store these actions. If kmemleak reports "early log + buffer exceeded", please increase this value. + config DEBUG_KMEMLEAK_TEST tristate "Simple test for the kernel memory leak detector" depends on DEBUG_KMEMLEAK @@ -368,9 +419,16 @@ config DEBUG_KMEMLEAK_TEST If unsure, say N. +config DEBUG_KMEMLEAK_DEFAULT_OFF + bool "Default kmemleak to off" + depends on DEBUG_KMEMLEAK + help + Say Y here to disable kmemleak by default. It can then be enabled + on the command line via kmemleak=on. + config DEBUG_PREEMPT bool "Debug preemptible kernel" - depends on DEBUG_KERNEL && PREEMPT && (TRACE_IRQFLAGS_SUPPORT || PPC64) + depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT default y help If you say Y here then the kernel will use a debug variant of the @@ -412,6 +470,15 @@ config DEBUG_MUTEXES This feature allows mutex semantics violations to be detected and reported. +config BKL + bool "Big Kernel Lock" if (SMP || PREEMPT) + default y + help + This is the traditional lock that is used in old code instead + of proper locking. All drivers that use the BKL should depend + on this symbol. + Say Y here unless you are working on removing the BKL. + config DEBUG_LOCK_ALLOC bool "Lock debugging: detect incorrect freeing of live locks" depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT @@ -433,6 +500,7 @@ config PROVE_LOCKING select DEBUG_SPINLOCK select DEBUG_MUTEXES select DEBUG_LOCK_ALLOC + select TRACE_IRQFLAGS default n help This feature enables the kernel to prove that all locking @@ -468,11 +536,52 @@ config PROVE_LOCKING For more details, see Documentation/lockdep-design.txt. +config PROVE_RCU + bool "RCU debugging: prove RCU correctness" + depends on PROVE_LOCKING + default n + help + This feature enables lockdep extensions that check for correct + use of RCU APIs. This is currently under development. Say Y + if you want to debug RCU usage or help work on the PROVE_RCU + feature. + + Say N if you are unsure. + +config PROVE_RCU_REPEATEDLY + bool "RCU debugging: don't disable PROVE_RCU on first splat" + depends on PROVE_RCU + default n + help + By itself, PROVE_RCU will disable checking upon issuing the + first warning (or "splat"). This feature prevents such + disabling, allowing multiple RCU-lockdep warnings to be printed + on a single reboot. + + Say Y to allow multiple RCU-lockdep warnings per boot. + + Say N if you are unsure. + +config SPARSE_RCU_POINTER + bool "RCU debugging: sparse-based checks for pointer usage" + default n + help + This feature enables the __rcu sparse annotation for + RCU-protected pointers. This annotation will cause sparse + to flag any non-RCU used of annotated pointers. This can be + helpful when debugging RCU usage. Please note that this feature + is not intended to enforce code cleanliness; it is instead merely + a debugging aid. + + Say Y to make sparse flag questionable use of RCU-protected pointers + + Say N if you are unsure. + config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT select STACKTRACE - select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 + select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE select KALLSYMS select KALLSYMS_ALL @@ -489,6 +598,14 @@ config LOCK_STAT For more details, see Documentation/lockstat.txt + This also enables lock events required by "perf lock", + subcommand of perf. + If you want to use "perf lock", you also need to turn on + CONFIG_EVENT_TRACING. + + CONFIG_LOCK_STAT defines "contended" and "acquired" lock events. + (CONFIG_LOCKDEP defines "acquire" and "release" events.) + config DEBUG_LOCKDEP bool "Lock dependency engine debugging" depends on DEBUG_KERNEL && LOCKDEP @@ -498,11 +615,10 @@ config DEBUG_LOCKDEP of more runtime overhead. config TRACE_IRQFLAGS - depends on DEBUG_KERNEL bool - default y - depends on TRACE_IRQFLAGS_SUPPORT - depends on PROVE_LOCKING + help + Enables hooks to interrupt enabling and disabling for + either tracing or lock debugging. config DEBUG_SPINLOCK_SLEEP bool "Spinlock debugging: sleep-inside-spinlock checking" @@ -545,7 +661,7 @@ config DEBUG_BUGVERBOSE depends on BUG depends on ARM || AVR32 || M32R || M68K || SPARC32 || SPARC64 || \ FRV || SUPERH || GENERIC_BUG || BLACKFIN || MN10300 - default !EMBEDDED + default y help Say Y here to make BUG() panics output the file name and line number of the BUG call as well as the EIP and oops trace. This aids @@ -564,6 +680,19 @@ config DEBUG_INFO If unsure, say N. +config DEBUG_INFO_REDUCED + bool "Reduce debugging information" + depends on DEBUG_INFO + help + If you say Y here gcc is instructed to generate less debugging + information for structure types. This means that tools that + need full debugging information (like kgdb or systemtap) won't + be happy. But if you merely need debugging information to + resolve line numbers there is no loss. Advantage is that + build directory object sizes shrink dramatically over a full + DEBUG_INFO build and compile times are reduced too. + Only works with newer gcc versions. + config DEBUG_VM bool "Debug VM" depends on DEBUG_KERNEL @@ -620,6 +749,15 @@ config DEBUG_LIST If unsure, say N. +config TEST_LIST_SORT + bool "Linked list sorting test" + depends on DEBUG_KERNEL + help + Enable this to turn on 'list_sort()' function test. This test is + executed only once during system boot, so affects only boot time. + + If unsure, say N. + config DEBUG_SG bool "Debug SG table operations" depends on DEBUG_KERNEL @@ -640,6 +778,21 @@ config DEBUG_NOTIFIERS This is a relatively cheap check but if you care about maximum performance, say N. +config DEBUG_CREDENTIALS + bool "Debug credential management" + depends on DEBUG_KERNEL + help + Enable this to turn on some debug checking for credential + management. The additional code keeps track of the number of + pointers from task_structs to any given cred struct, and checks to + see that this number never exceeds the usage count of the cred + struct. + + Furthermore, if SELinux is enabled, this also checks that the + security pointer in the cred struct is never seen to be invalid. + + If unsure, say N. + # # Select this config option from the architecture Kconfig, if it # it is preferred to always offer frame pointers as a config @@ -712,17 +865,53 @@ config RCU_TORTURE_TEST_RUNNABLE config RCU_CPU_STALL_DETECTOR bool "Check for stalled CPUs delaying RCU grace periods" - depends on CLASSIC_RCU || TREE_RCU - default n + depends on TREE_RCU || TREE_PREEMPT_RCU + default y help This option causes RCU to printk information on which CPUs are delaying the current grace period, but only when the grace period extends for excessive time periods. - Say Y if you want RCU to perform such checks. + Say N if you want to disable such checks. + + Say Y if you are unsure. + +config RCU_CPU_STALL_TIMEOUT + int "RCU CPU stall timeout in seconds" + depends on RCU_CPU_STALL_DETECTOR + range 3 300 + default 60 + help + If a given RCU grace period extends more than the specified + number of seconds, a CPU stall warning is printed. If the + RCU grace period persists, additional CPU stall warnings are + printed at more widely spaced intervals. + +config RCU_CPU_STALL_DETECTOR_RUNNABLE + bool "RCU CPU stall checking starts automatically at boot" + depends on RCU_CPU_STALL_DETECTOR + default y + help + If set, start checking for RCU CPU stalls immediately on + boot. Otherwise, RCU CPU stall checking must be manually + enabled. + + Say Y if you are unsure. + + Say N if you wish to suppress RCU CPU stall checking during boot. + +config RCU_CPU_STALL_VERBOSE + bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR" + depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU + default y + help + This option causes RCU to printk detailed per-task information + for any tasks that are stalling the current RCU grace period. Say N if you are unsure. + Say Y if you want to enable such checks. + config KPROBES_SANITY_TEST bool "Kprobes sanity tests" depends on DEBUG_KERNEL @@ -777,10 +966,24 @@ config DEBUG_BLOCK_EXT_DEVT Say N if you are unsure. +config DEBUG_FORCE_WEAK_PER_CPU + bool "Force weak per-cpu definitions" + depends on DEBUG_KERNEL + help + s390 and alpha require percpu variables in modules to be + defined weak to work around addressing range issue which + puts the following two restrictions on percpu variable + definitions. + + 1. percpu symbols must be unique whether static or not + 2. percpu variables can't be defined inside a function + + To ensure that generic code follows the above rules, this + option forces all percpu variables to be defined as weak. + config LKDTM tristate "Linux Kernel Dump Test Tool Module" - depends on DEBUG_KERNEL - depends on KPROBES + depends on DEBUG_FS depends on BLOCK default n help @@ -791,7 +994,19 @@ config LKDTM called lkdtm. Documentation on how to use the module can be found in - drivers/misc/lkdtm.c + Documentation/fault-injection/provoke-crashes.txt + +config CPU_NOTIFIER_ERROR_INJECT + tristate "CPU notifier error injection module" + depends on HOTPLUG_CPU && DEBUG_KERNEL + help + This option provides a kernel module that can be used to test + the error handling of the cpu notifiers + + To compile this code as a module, choose M here: the module will + be called cpu-notifier-error-inject. + + If unsure, say N. config FAULT_INJECTION bool "Fault-injection framework" @@ -820,7 +1035,7 @@ config FAIL_MAKE_REQUEST Provide fault-injection capability for disk IO. config FAIL_IO_TIMEOUT - bool "Faul-injection capability for faking disk interrupts" + bool "Fault-injection capability for faking disk interrupts" depends on FAULT_INJECTION && BLOCK help Provide fault-injection capability on end IO handling. This @@ -841,26 +1056,29 @@ config FAULT_INJECTION_STACKTRACE_FILTER depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT depends on !X86_64 select STACKTRACE - select FRAME_POINTER if !PPC && !S390 + select FRAME_POINTER if !PPC && !S390 && !MICROBLAZE help Provide stacktrace filter for fault-injection capabilities config LATENCYTOP bool "Latency measuring infrastructure" - select FRAME_POINTER if !MIPS && !PPC && !S390 + depends on HAVE_LATENCYTOP_SUPPORT + depends on DEBUG_KERNEL + depends on STACKTRACE_SUPPORT + depends on PROC_FS + select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE select KALLSYMS select KALLSYMS_ALL select STACKTRACE select SCHEDSTATS select SCHED_DEBUG - depends on HAVE_LATENCYTOP_SUPPORT help Enable this option if you want to use the LatencyTOP tool to find out which userspace is blocking on what kernel operations. config SYSCTL_SYSCALL_CHECK bool "Sysctl checks" - depends on SYSCTL_SYSCALL + depends on SYSCTL ---help--- sys_sysctl uses binary paths that have been found challenging to properly maintain and use. This enables checks that help @@ -934,10 +1152,10 @@ config DYNAMIC_DEBUG Usage: - Dynamic debugging is controlled via the 'dynamic_debug/ddebug' file, + Dynamic debugging is controlled via the 'dynamic_debug/control' file, which is contained in the 'debugfs' filesystem. Thus, the debugfs filesystem must first be mounted before making use of this feature. - We refer the control file as: <debugfs>/dynamic_debug/ddebug. This + We refer the control file as: <debugfs>/dynamic_debug/control. This file contains a list of the debug statements that can be enabled. The format for each line of the file is: @@ -952,7 +1170,7 @@ config DYNAMIC_DEBUG From a live system: - nullarbor:~ # cat <debugfs>/dynamic_debug/ddebug + nullarbor:~ # cat <debugfs>/dynamic_debug/control # filename:lineno [module]function flags format fs/aio.c:222 [aio]__put_ioctx - "__put_ioctx:\040freeing\040%p\012" fs/aio.c:248 [aio]ioctx_alloc - "ENOMEM:\040nr_events\040too\040high\012" @@ -962,23 +1180,23 @@ config DYNAMIC_DEBUG // enable the message at line 1603 of file svcsock.c nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' > - <debugfs>/dynamic_debug/ddebug + <debugfs>/dynamic_debug/control // enable all the messages in file svcsock.c nullarbor:~ # echo -n 'file svcsock.c +p' > - <debugfs>/dynamic_debug/ddebug + <debugfs>/dynamic_debug/control // enable all the messages in the NFS server module nullarbor:~ # echo -n 'module nfsd +p' > - <debugfs>/dynamic_debug/ddebug + <debugfs>/dynamic_debug/control // enable all 12 messages in the function svc_process() nullarbor:~ # echo -n 'func svc_process +p' > - <debugfs>/dynamic_debug/ddebug + <debugfs>/dynamic_debug/control // disable all 12 messages in the function svc_process() nullarbor:~ # echo -n 'func svc_process -p' > - <debugfs>/dynamic_debug/ddebug + <debugfs>/dynamic_debug/control See Documentation/dynamic-debug-howto.txt for additional information. @@ -993,6 +1211,26 @@ config DMA_API_DEBUG This option causes a performance degredation. Use only if you want to debug device drivers. If unsure, say N. +config ATOMIC64_SELFTEST + bool "Perform an atomic64_t self-test at boot" + help + Enable this option to test the atomic64_t functions at boot. + + If unsure, say N. + +config ASYNC_RAID6_TEST + tristate "Self test for hardware accelerated raid6 recovery" + depends on ASYNC_RAID6_RECOV + select ASYNC_MEMCPY + ---help--- + This is a one-shot self test that permutes through the + recovery of all the possible two disk failure scenarios for a + N-disk array. Recovery is performed with the asynchronous + raid6 recovery routines, and will optionally use an offload + engine if one is available. + + If unsure, say N. + source "samples/Kconfig" source "lib/Kconfig.kgdb" diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb index 9b5d1d7f2ef7..43cb93fa2651 100644 --- a/lib/Kconfig.kgdb +++ b/lib/Kconfig.kgdb @@ -3,7 +3,7 @@ config HAVE_ARCH_KGDB bool menuconfig KGDB - bool "KGDB: kernel debugging with remote gdb" + bool "KGDB: kernel debugger" depends on HAVE_ARCH_KGDB depends on DEBUG_KERNEL && EXPERIMENTAL help @@ -57,4 +57,26 @@ config KGDB_TESTS_BOOT_STRING information about other strings you could use beyond the default of V1F100. +config KGDB_LOW_LEVEL_TRAP + bool "KGDB: Allow debugging with traps in notifiers" + depends on X86 || MIPS + default n + help + This will add an extra call back to kgdb for the breakpoint + exception handler on which will will allow kgdb to step + through a notify handler. + +config KGDB_KDB + bool "KGDB_KDB: include kdb frontend for kgdb" + default n + help + KDB frontend for kernel + +config KDB_KEYBOARD + bool "KGDB_KDB: keyboard as input device" + depends on VT && KGDB_KDB + default n + help + KDB can use a PS/2 type keyboard for an input device + endif # KGDB diff --git a/lib/Kconfig.kmemcheck b/lib/Kconfig.kmemcheck index 603c81b66549..846e039a86b4 100644 --- a/lib/Kconfig.kmemcheck +++ b/lib/Kconfig.kmemcheck @@ -1,6 +1,8 @@ config HAVE_ARCH_KMEMCHECK bool +if HAVE_ARCH_KMEMCHECK + menuconfig KMEMCHECK bool "kmemcheck: trap use of uninitialized memory" depends on DEBUG_KERNEL @@ -89,3 +91,4 @@ config KMEMCHECK_BITOPS_OK accesses where not all the bits are initialized at the same time. This may also hide some real bugs. +endif diff --git a/lib/Makefile b/lib/Makefile index b6d1857bbf08..d7b6e30a3a1e 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -8,11 +8,11 @@ KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) endif lib-y := ctype.o string.o vsprintf.o cmdline.o \ - rbtree.o radix-tree.o dump_stack.o \ + rbtree.o radix-tree.o dump_stack.o timerqueue.o\ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o prio_heap.o ratelimit.o show_mem.o \ - is_single_threaded.o plist.o decompress.o + is_single_threaded.o plist.o decompress.o flex_array.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o @@ -21,7 +21,7 @@ lib-y += kobject.o kref.o klist.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ - string_helpers.o gcd.o + string_helpers.o gcd.o lcm.o list_sort.o uuid.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG @@ -39,8 +39,12 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o + +CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o + obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o +obj-$(CONFIG_BTREE) += btree.o obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o obj-$(CONFIG_DEBUG_LIST) += list_debug.o obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o @@ -65,10 +69,12 @@ obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ obj-$(CONFIG_LZO_COMPRESS) += lzo/ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ +obj-$(CONFIG_RAID6_PQ) += raid6/ lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o +lib-$(CONFIG_DECOMPRESS_LZO) += decompress_unlzo.o obj-$(CONFIG_TEXTSEARCH) += textsearch.o obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o @@ -80,23 +86,28 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o +obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o lib-$(CONFIG_GENERIC_BUG) += bug.o -obj-$(CONFIG_HAVE_LMB) += lmb.o - obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o obj-$(CONFIG_NLATTR) += nlattr.o +obj-$(CONFIG_LRU_CACHE) += lru_cache.o + obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o obj-$(CONFIG_GENERIC_CSUM) += checksum.o obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o +obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o + +obj-$(CONFIG_AVERAGE) += average.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/argv_split.c b/lib/argv_split.c index 5205a8dae5bc..4b1b083f219c 100644 --- a/lib/argv_split.c +++ b/lib/argv_split.c @@ -4,17 +4,10 @@ #include <linux/kernel.h> #include <linux/ctype.h> +#include <linux/string.h> #include <linux/slab.h> #include <linux/module.h> -static const char *skip_sep(const char *cp) -{ - while (*cp && isspace(*cp)) - cp++; - - return cp; -} - static const char *skip_arg(const char *cp) { while (*cp && !isspace(*cp)) @@ -28,7 +21,7 @@ static int count_argc(const char *str) int count = 0; while (*str) { - str = skip_sep(str); + str = skip_spaces(str); if (*str) { count++; str = skip_arg(str); @@ -82,7 +75,7 @@ char **argv_split(gfp_t gfp, const char *str, int *argcp) argvp = argv; while (*str) { - str = skip_sep(str); + str = skip_spaces(str); if (*str) { const char *p = str; diff --git a/lib/atomic64.c b/lib/atomic64.c index c5e725562416..a21c12bc727c 100644 --- a/lib/atomic64.c +++ b/lib/atomic64.c @@ -13,6 +13,7 @@ #include <linux/cache.h> #include <linux/spinlock.h> #include <linux/init.h> +#include <linux/module.h> #include <asm/atomic.h> /* @@ -52,6 +53,7 @@ long long atomic64_read(const atomic64_t *v) spin_unlock_irqrestore(lock, flags); return val; } +EXPORT_SYMBOL(atomic64_read); void atomic64_set(atomic64_t *v, long long i) { @@ -62,6 +64,7 @@ void atomic64_set(atomic64_t *v, long long i) v->counter = i; spin_unlock_irqrestore(lock, flags); } +EXPORT_SYMBOL(atomic64_set); void atomic64_add(long long a, atomic64_t *v) { @@ -72,6 +75,7 @@ void atomic64_add(long long a, atomic64_t *v) v->counter += a; spin_unlock_irqrestore(lock, flags); } +EXPORT_SYMBOL(atomic64_add); long long atomic64_add_return(long long a, atomic64_t *v) { @@ -84,6 +88,7 @@ long long atomic64_add_return(long long a, atomic64_t *v) spin_unlock_irqrestore(lock, flags); return val; } +EXPORT_SYMBOL(atomic64_add_return); void atomic64_sub(long long a, atomic64_t *v) { @@ -94,6 +99,7 @@ void atomic64_sub(long long a, atomic64_t *v) v->counter -= a; spin_unlock_irqrestore(lock, flags); } +EXPORT_SYMBOL(atomic64_sub); long long atomic64_sub_return(long long a, atomic64_t *v) { @@ -106,6 +112,7 @@ long long atomic64_sub_return(long long a, atomic64_t *v) spin_unlock_irqrestore(lock, flags); return val; } +EXPORT_SYMBOL(atomic64_sub_return); long long atomic64_dec_if_positive(atomic64_t *v) { @@ -120,6 +127,7 @@ long long atomic64_dec_if_positive(atomic64_t *v) spin_unlock_irqrestore(lock, flags); return val; } +EXPORT_SYMBOL(atomic64_dec_if_positive); long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n) { @@ -134,6 +142,7 @@ long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n) spin_unlock_irqrestore(lock, flags); return val; } +EXPORT_SYMBOL(atomic64_cmpxchg); long long atomic64_xchg(atomic64_t *v, long long new) { @@ -147,21 +156,23 @@ long long atomic64_xchg(atomic64_t *v, long long new) spin_unlock_irqrestore(lock, flags); return val; } +EXPORT_SYMBOL(atomic64_xchg); int atomic64_add_unless(atomic64_t *v, long long a, long long u) { unsigned long flags; spinlock_t *lock = lock_addr(v); - int ret = 1; + int ret = 0; spin_lock_irqsave(lock, flags); if (v->counter != u) { v->counter += a; - ret = 0; + ret = 1; } spin_unlock_irqrestore(lock, flags); return ret; } +EXPORT_SYMBOL(atomic64_add_unless); static int init_atomic64_lock(void) { diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c new file mode 100644 index 000000000000..44524cc8c32a --- /dev/null +++ b/lib/atomic64_test.c @@ -0,0 +1,166 @@ +/* + * Testsuite for atomic64_t functions + * + * Copyright © 2010 Luca Barbieri + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <asm/atomic.h> + +#define INIT(c) do { atomic64_set(&v, c); r = c; } while (0) +static __init int test_atomic64(void) +{ + long long v0 = 0xaaa31337c001d00dLL; + long long v1 = 0xdeadbeefdeafcafeLL; + long long v2 = 0xfaceabadf00df001LL; + long long onestwos = 0x1111111122222222LL; + long long one = 1LL; + + atomic64_t v = ATOMIC64_INIT(v0); + long long r = v0; + BUG_ON(v.counter != r); + + atomic64_set(&v, v1); + r = v1; + BUG_ON(v.counter != r); + BUG_ON(atomic64_read(&v) != r); + + INIT(v0); + atomic64_add(onestwos, &v); + r += onestwos; + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_add(-one, &v); + r += -one; + BUG_ON(v.counter != r); + + INIT(v0); + r += onestwos; + BUG_ON(atomic64_add_return(onestwos, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + r += -one; + BUG_ON(atomic64_add_return(-one, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_sub(onestwos, &v); + r -= onestwos; + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_sub(-one, &v); + r -= -one; + BUG_ON(v.counter != r); + + INIT(v0); + r -= onestwos; + BUG_ON(atomic64_sub_return(onestwos, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + r -= -one; + BUG_ON(atomic64_sub_return(-one, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_inc(&v); + r += one; + BUG_ON(v.counter != r); + + INIT(v0); + r += one; + BUG_ON(atomic64_inc_return(&v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_dec(&v); + r -= one; + BUG_ON(v.counter != r); + + INIT(v0); + r -= one; + BUG_ON(atomic64_dec_return(&v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_xchg(&v, v1) != v0); + r = v1; + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_cmpxchg(&v, v0, v1) != v0); + r = v1; + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_cmpxchg(&v, v2, v1) != v0); + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_add_unless(&v, one, v0)); + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(!atomic64_add_unless(&v, one, v1)); + r += one; + BUG_ON(v.counter != r); + +#if defined(CONFIG_X86) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || \ + defined(CONFIG_S390) || defined(_ASM_GENERIC_ATOMIC64_H) || defined(CONFIG_ARM) + INIT(onestwos); + BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1)); + r -= one; + BUG_ON(v.counter != r); + + INIT(0); + BUG_ON(atomic64_dec_if_positive(&v) != -one); + BUG_ON(v.counter != r); + + INIT(-one); + BUG_ON(atomic64_dec_if_positive(&v) != (-one - one)); + BUG_ON(v.counter != r); +#else +#warning Please implement atomic64_dec_if_positive for your architecture, and add it to the IF above +#endif + + INIT(onestwos); + BUG_ON(!atomic64_inc_not_zero(&v)); + r += one; + BUG_ON(v.counter != r); + + INIT(0); + BUG_ON(atomic64_inc_not_zero(&v)); + BUG_ON(v.counter != r); + + INIT(-one); + BUG_ON(!atomic64_inc_not_zero(&v)); + r += one; + BUG_ON(v.counter != r); + +#ifdef CONFIG_X86 + printk(KERN_INFO "atomic64 test passed for %s platform %s CX8 and %s SSE\n", +#ifdef CONFIG_X86_64 + "x86-64", +#elif defined(CONFIG_X86_CMPXCHG64) + "i586+", +#else + "i386+", +#endif + boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without", + boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without"); +#else + printk(KERN_INFO "atomic64 test passed\n"); +#endif + + return 0; +} + +core_initcall(test_atomic64); diff --git a/lib/average.c b/lib/average.c new file mode 100644 index 000000000000..5576c2841496 --- /dev/null +++ b/lib/average.c @@ -0,0 +1,61 @@ +/* + * lib/average.c + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/module.h> +#include <linux/average.h> +#include <linux/bug.h> +#include <linux/log2.h> + +/** + * DOC: Exponentially Weighted Moving Average (EWMA) + * + * These are generic functions for calculating Exponentially Weighted Moving + * Averages (EWMA). We keep a structure with the EWMA parameters and a scaled + * up internal representation of the average value to prevent rounding errors. + * The factor for scaling up and the exponential weight (or decay rate) have to + * be specified thru the init fuction. The structure should not be accessed + * directly but only thru the helper functions. + */ + +/** + * ewma_init() - Initialize EWMA parameters + * @avg: Average structure + * @factor: Factor to use for the scaled up internal value. The maximum value + * of averages can be ULONG_MAX/(factor*weight). For performance reasons + * factor has to be a power of 2. + * @weight: Exponential weight, or decay rate. This defines how fast the + * influence of older values decreases. For performance reasons weight has + * to be a power of 2. + * + * Initialize the EWMA parameters for a given struct ewma @avg. + */ +void ewma_init(struct ewma *avg, unsigned long factor, unsigned long weight) +{ + WARN_ON(!is_power_of_2(weight) || !is_power_of_2(factor)); + + avg->weight = ilog2(weight); + avg->factor = ilog2(factor); + avg->internal = 0; +} +EXPORT_SYMBOL(ewma_init); + +/** + * ewma_add() - Exponentially weighted moving average (EWMA) + * @avg: Average structure + * @val: Current value + * + * Add a sample to the average. + */ +struct ewma *ewma_add(struct ewma *avg, unsigned long val) +{ + avg->internal = avg->internal ? + (((avg->internal << avg->weight) - avg->internal) + + (val << avg->factor)) >> avg->weight : + (val << avg->factor); + return avg; +} +EXPORT_SYMBOL(ewma_add); diff --git a/lib/bitmap.c b/lib/bitmap.c index 35a1f7ff4149..741fae905ae3 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -179,14 +179,16 @@ void __bitmap_shift_left(unsigned long *dst, } EXPORT_SYMBOL(__bitmap_shift_left); -void __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, +int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, int bits) { int k; int nr = BITS_TO_LONGS(bits); + unsigned long result = 0; for (k = 0; k < nr; k++) - dst[k] = bitmap1[k] & bitmap2[k]; + result |= (dst[k] = bitmap1[k] & bitmap2[k]); + return result != 0; } EXPORT_SYMBOL(__bitmap_and); @@ -212,14 +214,16 @@ void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, } EXPORT_SYMBOL(__bitmap_xor); -void __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, +int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, int bits) { int k; int nr = BITS_TO_LONGS(bits); + unsigned long result = 0; for (k = 0; k < nr; k++) - dst[k] = bitmap1[k] & ~bitmap2[k]; + result |= (dst[k] = bitmap1[k] & ~bitmap2[k]); + return result != 0; } EXPORT_SYMBOL(__bitmap_andnot); @@ -267,6 +271,87 @@ int __bitmap_weight(const unsigned long *bitmap, int bits) } EXPORT_SYMBOL(__bitmap_weight); +#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG)) + +void bitmap_set(unsigned long *map, int start, int nr) +{ + unsigned long *p = map + BIT_WORD(start); + const int size = start + nr; + int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); + + while (nr - bits_to_set >= 0) { + *p |= mask_to_set; + nr -= bits_to_set; + bits_to_set = BITS_PER_LONG; + mask_to_set = ~0UL; + p++; + } + if (nr) { + mask_to_set &= BITMAP_LAST_WORD_MASK(size); + *p |= mask_to_set; + } +} +EXPORT_SYMBOL(bitmap_set); + +void bitmap_clear(unsigned long *map, int start, int nr) +{ + unsigned long *p = map + BIT_WORD(start); + const int size = start + nr; + int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); + + while (nr - bits_to_clear >= 0) { + *p &= ~mask_to_clear; + nr -= bits_to_clear; + bits_to_clear = BITS_PER_LONG; + mask_to_clear = ~0UL; + p++; + } + if (nr) { + mask_to_clear &= BITMAP_LAST_WORD_MASK(size); + *p &= ~mask_to_clear; + } +} +EXPORT_SYMBOL(bitmap_clear); + +/* + * bitmap_find_next_zero_area - find a contiguous aligned zero area + * @map: The address to base the search on + * @size: The bitmap size in bits + * @start: The bitnumber to start searching at + * @nr: The number of zeroed bits we're looking for + * @align_mask: Alignment mask for zero area + * + * The @align_mask should be one less than a power of 2; the effect is that + * the bit offset of all zero areas this function finds is multiples of that + * power of 2. A @align_mask of 0 means no alignment is required. + */ +unsigned long bitmap_find_next_zero_area(unsigned long *map, + unsigned long size, + unsigned long start, + unsigned int nr, + unsigned long align_mask) +{ + unsigned long index, end, i; +again: + index = find_next_zero_bit(map, size, start); + + /* Align allocation */ + index = __ALIGN_MASK(index, align_mask); + + end = index + nr; + if (end > size) + return end; + i = find_next_bit(map, end, index); + if (i < end) { + start = i + 1; + goto again; + } + return index; +} +EXPORT_SYMBOL(bitmap_find_next_zero_area); + /* * Bitmap printing & parsing functions: first version by Bill Irwin, * second version by Paul Jackson, third by Joe Korty. @@ -274,7 +359,6 @@ EXPORT_SYMBOL(__bitmap_weight); #define CHUNKSZ 32 #define nbits_to_hold_value(val) fls(val) -#define unhex(c) (isdigit(c) ? (c - '0') : (toupper(c) - 'A' + 10)) #define BASEDEC 10 /* fancier cpuset lists input in decimal */ /** @@ -381,7 +465,7 @@ int __bitmap_parse(const char *buf, unsigned int buflen, if (chunk & ~((1UL << (CHUNKSZ - 4)) - 1)) return -EOVERFLOW; - chunk = (chunk << 4) | unhex(c); + chunk = (chunk << 4) | hex_to_bin(c); ndigits++; totaldigits++; } if (ndigits == 0) @@ -402,7 +486,7 @@ int __bitmap_parse(const char *buf, unsigned int buflen, EXPORT_SYMBOL(__bitmap_parse); /** - * bitmap_parse_user() + * bitmap_parse_user - convert an ASCII hex string in a user buffer into a bitmap * * @ubuf: pointer to user buffer containing string. * @ulen: buffer size in bytes. If string is smaller than this @@ -534,7 +618,7 @@ int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits) EXPORT_SYMBOL(bitmap_parselist); /** - * bitmap_pos_to_ord(buf, pos, bits) + * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap * @buf: pointer to a bitmap * @pos: a bit position in @buf (0 <= @pos < @bits) * @bits: number of valid bit positions in @buf @@ -570,7 +654,7 @@ static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits) } /** - * bitmap_ord_to_pos(buf, ord, bits) + * bitmap_ord_to_pos - find position of n-th set bit in bitmap * @buf: pointer to bitmap * @ord: ordinal bit position (n-th set bit, n >= 0) * @bits: number of valid bit positions in @buf @@ -648,10 +732,9 @@ void bitmap_remap(unsigned long *dst, const unsigned long *src, bitmap_zero(dst, bits); w = bitmap_weight(new, bits); - for (oldbit = find_first_bit(src, bits); - oldbit < bits; - oldbit = find_next_bit(src, bits, oldbit + 1)) { + for_each_set_bit(oldbit, src, bits) { int n = bitmap_pos_to_ord(old, oldbit, bits); + if (n < 0 || w == 0) set_bit(oldbit, dst); /* identity map */ else @@ -818,9 +901,7 @@ void bitmap_onto(unsigned long *dst, const unsigned long *orig, */ m = 0; - for (n = find_first_bit(relmap, bits); - n < bits; - n = find_next_bit(relmap, bits, n + 1)) { + for_each_set_bit(n, relmap, bits) { /* m == bitmap_pos_to_ord(relmap, n, bits) */ if (test_bit(m, orig)) set_bit(n, dst); @@ -849,9 +930,7 @@ void bitmap_fold(unsigned long *dst, const unsigned long *orig, return; bitmap_zero(dst, bits); - for (oldbit = find_first_bit(orig, bits); - oldbit < bits; - oldbit = find_next_bit(orig, bits, oldbit + 1)) + for_each_set_bit(oldbit, orig, bits) set_bit(oldbit % sz, dst); } EXPORT_SYMBOL(bitmap_fold); diff --git a/lib/btree.c b/lib/btree.c new file mode 100644 index 000000000000..c9c6f0351526 --- /dev/null +++ b/lib/btree.c @@ -0,0 +1,798 @@ +/* + * lib/btree.c - Simple In-memory B+Tree + * + * As should be obvious for Linux kernel code, license is GPLv2 + * + * Copyright (c) 2007-2008 Joern Engel <joern@logfs.org> + * Bits and pieces stolen from Peter Zijlstra's code, which is + * Copyright 2007, Red Hat Inc. Peter Zijlstra <pzijlstr@redhat.com> + * GPLv2 + * + * see http://programming.kicks-ass.net/kernel-patches/vma_lookup/btree.patch + * + * A relatively simple B+Tree implementation. I have written it as a learning + * excercise to understand how B+Trees work. Turned out to be useful as well. + * + * B+Trees can be used similar to Linux radix trees (which don't have anything + * in common with textbook radix trees, beware). Prerequisite for them working + * well is that access to a random tree node is much faster than a large number + * of operations within each node. + * + * Disks have fulfilled the prerequisite for a long time. More recently DRAM + * has gained similar properties, as memory access times, when measured in cpu + * cycles, have increased. Cacheline sizes have increased as well, which also + * helps B+Trees. + * + * Compared to radix trees, B+Trees are more efficient when dealing with a + * sparsely populated address space. Between 25% and 50% of the memory is + * occupied with valid pointers. When densely populated, radix trees contain + * ~98% pointers - hard to beat. Very sparse radix trees contain only ~2% + * pointers. + * + * This particular implementation stores pointers identified by a long value. + * Storing NULL pointers is illegal, lookup will return NULL when no entry + * was found. + * + * A tricks was used that is not commonly found in textbooks. The lowest + * values are to the right, not to the left. All used slots within a node + * are on the left, all unused slots contain NUL values. Most operations + * simply loop once over all slots and terminate on the first NUL. + */ + +#include <linux/btree.h> +#include <linux/cache.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/module.h> + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define NODESIZE MAX(L1_CACHE_BYTES, 128) + +struct btree_geo { + int keylen; + int no_pairs; + int no_longs; +}; + +struct btree_geo btree_geo32 = { + .keylen = 1, + .no_pairs = NODESIZE / sizeof(long) / 2, + .no_longs = NODESIZE / sizeof(long) / 2, +}; +EXPORT_SYMBOL_GPL(btree_geo32); + +#define LONG_PER_U64 (64 / BITS_PER_LONG) +struct btree_geo btree_geo64 = { + .keylen = LONG_PER_U64, + .no_pairs = NODESIZE / sizeof(long) / (1 + LONG_PER_U64), + .no_longs = LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + LONG_PER_U64)), +}; +EXPORT_SYMBOL_GPL(btree_geo64); + +struct btree_geo btree_geo128 = { + .keylen = 2 * LONG_PER_U64, + .no_pairs = NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64), + .no_longs = 2 * LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64)), +}; +EXPORT_SYMBOL_GPL(btree_geo128); + +static struct kmem_cache *btree_cachep; + +void *btree_alloc(gfp_t gfp_mask, void *pool_data) +{ + return kmem_cache_alloc(btree_cachep, gfp_mask); +} +EXPORT_SYMBOL_GPL(btree_alloc); + +void btree_free(void *element, void *pool_data) +{ + kmem_cache_free(btree_cachep, element); +} +EXPORT_SYMBOL_GPL(btree_free); + +static unsigned long *btree_node_alloc(struct btree_head *head, gfp_t gfp) +{ + unsigned long *node; + + node = mempool_alloc(head->mempool, gfp); + if (likely(node)) + memset(node, 0, NODESIZE); + return node; +} + +static int longcmp(const unsigned long *l1, const unsigned long *l2, size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) { + if (l1[i] < l2[i]) + return -1; + if (l1[i] > l2[i]) + return 1; + } + return 0; +} + +static unsigned long *longcpy(unsigned long *dest, const unsigned long *src, + size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) + dest[i] = src[i]; + return dest; +} + +static unsigned long *longset(unsigned long *s, unsigned long c, size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) + s[i] = c; + return s; +} + +static void dec_key(struct btree_geo *geo, unsigned long *key) +{ + unsigned long val; + int i; + + for (i = geo->keylen - 1; i >= 0; i--) { + val = key[i]; + key[i] = val - 1; + if (val) + break; + } +} + +static unsigned long *bkey(struct btree_geo *geo, unsigned long *node, int n) +{ + return &node[n * geo->keylen]; +} + +static void *bval(struct btree_geo *geo, unsigned long *node, int n) +{ + return (void *)node[geo->no_longs + n]; +} + +static void setkey(struct btree_geo *geo, unsigned long *node, int n, + unsigned long *key) +{ + longcpy(bkey(geo, node, n), key, geo->keylen); +} + +static void setval(struct btree_geo *geo, unsigned long *node, int n, + void *val) +{ + node[geo->no_longs + n] = (unsigned long) val; +} + +static void clearpair(struct btree_geo *geo, unsigned long *node, int n) +{ + longset(bkey(geo, node, n), 0, geo->keylen); + node[geo->no_longs + n] = 0; +} + +static inline void __btree_init(struct btree_head *head) +{ + head->node = NULL; + head->height = 0; +} + +void btree_init_mempool(struct btree_head *head, mempool_t *mempool) +{ + __btree_init(head); + head->mempool = mempool; +} +EXPORT_SYMBOL_GPL(btree_init_mempool); + +int btree_init(struct btree_head *head) +{ + __btree_init(head); + head->mempool = mempool_create(0, btree_alloc, btree_free, NULL); + if (!head->mempool) + return -ENOMEM; + return 0; +} +EXPORT_SYMBOL_GPL(btree_init); + +void btree_destroy(struct btree_head *head) +{ + mempool_destroy(head->mempool); + head->mempool = NULL; +} +EXPORT_SYMBOL_GPL(btree_destroy); + +void *btree_last(struct btree_head *head, struct btree_geo *geo, + unsigned long *key) +{ + int height = head->height; + unsigned long *node = head->node; + + if (height == 0) + return NULL; + + for ( ; height > 1; height--) + node = bval(geo, node, 0); + + longcpy(key, bkey(geo, node, 0), geo->keylen); + return bval(geo, node, 0); +} +EXPORT_SYMBOL_GPL(btree_last); + +static int keycmp(struct btree_geo *geo, unsigned long *node, int pos, + unsigned long *key) +{ + return longcmp(bkey(geo, node, pos), key, geo->keylen); +} + +static int keyzero(struct btree_geo *geo, unsigned long *key) +{ + int i; + + for (i = 0; i < geo->keylen; i++) + if (key[i]) + return 0; + + return 1; +} + +void *btree_lookup(struct btree_head *head, struct btree_geo *geo, + unsigned long *key) +{ + int i, height = head->height; + unsigned long *node = head->node; + + if (height == 0) + return NULL; + + for ( ; height > 1; height--) { + for (i = 0; i < geo->no_pairs; i++) + if (keycmp(geo, node, i, key) <= 0) + break; + if (i == geo->no_pairs) + return NULL; + node = bval(geo, node, i); + if (!node) + return NULL; + } + + if (!node) + return NULL; + + for (i = 0; i < geo->no_pairs; i++) + if (keycmp(geo, node, i, key) == 0) + return bval(geo, node, i); + return NULL; +} +EXPORT_SYMBOL_GPL(btree_lookup); + +int btree_update(struct btree_head *head, struct btree_geo *geo, + unsigned long *key, void *val) +{ + int i, height = head->height; + unsigned long *node = head->node; + + if (height == 0) + return -ENOENT; + + for ( ; height > 1; height--) { + for (i = 0; i < geo->no_pairs; i++) + if (keycmp(geo, node, i, key) <= 0) + break; + if (i == geo->no_pairs) + return -ENOENT; + node = bval(geo, node, i); + if (!node) + return -ENOENT; + } + + if (!node) + return -ENOENT; + + for (i = 0; i < geo->no_pairs; i++) + if (keycmp(geo, node, i, key) == 0) { + setval(geo, node, i, val); + return 0; + } + return -ENOENT; +} +EXPORT_SYMBOL_GPL(btree_update); + +/* + * Usually this function is quite similar to normal lookup. But the key of + * a parent node may be smaller than the smallest key of all its siblings. + * In such a case we cannot just return NULL, as we have only proven that no + * key smaller than __key, but larger than this parent key exists. + * So we set __key to the parent key and retry. We have to use the smallest + * such parent key, which is the last parent key we encountered. + */ +void *btree_get_prev(struct btree_head *head, struct btree_geo *geo, + unsigned long *__key) +{ + int i, height; + unsigned long *node, *oldnode; + unsigned long *retry_key = NULL, key[geo->keylen]; + + if (keyzero(geo, __key)) + return NULL; + + if (head->height == 0) + return NULL; +retry: + longcpy(key, __key, geo->keylen); + dec_key(geo, key); + + node = head->node; + for (height = head->height ; height > 1; height--) { + for (i = 0; i < geo->no_pairs; i++) + if (keycmp(geo, node, i, key) <= 0) + break; + if (i == geo->no_pairs) + goto miss; + oldnode = node; + node = bval(geo, node, i); + if (!node) + goto miss; + retry_key = bkey(geo, oldnode, i); + } + + if (!node) + goto miss; + + for (i = 0; i < geo->no_pairs; i++) { + if (keycmp(geo, node, i, key) <= 0) { + if (bval(geo, node, i)) { + longcpy(__key, bkey(geo, node, i), geo->keylen); + return bval(geo, node, i); + } else + goto miss; + } + } +miss: + if (retry_key) { + __key = retry_key; + retry_key = NULL; + goto retry; + } + return NULL; +} + +static int getpos(struct btree_geo *geo, unsigned long *node, + unsigned long *key) +{ + int i; + + for (i = 0; i < geo->no_pairs; i++) { + if (keycmp(geo, node, i, key) <= 0) + break; + } + return i; +} + +static int getfill(struct btree_geo *geo, unsigned long *node, int start) +{ + int i; + + for (i = start; i < geo->no_pairs; i++) + if (!bval(geo, node, i)) + break; + return i; +} + +/* + * locate the correct leaf node in the btree + */ +static unsigned long *find_level(struct btree_head *head, struct btree_geo *geo, + unsigned long *key, int level) +{ + unsigned long *node = head->node; + int i, height; + + for (height = head->height; height > level; height--) { + for (i = 0; i < geo->no_pairs; i++) + if (keycmp(geo, node, i, key) <= 0) + break; + + if ((i == geo->no_pairs) || !bval(geo, node, i)) { + /* right-most key is too large, update it */ + /* FIXME: If the right-most key on higher levels is + * always zero, this wouldn't be necessary. */ + i--; + setkey(geo, node, i, key); + } + BUG_ON(i < 0); + node = bval(geo, node, i); + } + BUG_ON(!node); + return node; +} + +static int btree_grow(struct btree_head *head, struct btree_geo *geo, + gfp_t gfp) +{ + unsigned long *node; + int fill; + + node = btree_node_alloc(head, gfp); + if (!node) + return -ENOMEM; + if (head->node) { + fill = getfill(geo, head->node, 0); + setkey(geo, node, 0, bkey(geo, head->node, fill - 1)); + setval(geo, node, 0, head->node); + } + head->node = node; + head->height++; + return 0; +} + +static void btree_shrink(struct btree_head *head, struct btree_geo *geo) +{ + unsigned long *node; + int fill; + + if (head->height <= 1) + return; + + node = head->node; + fill = getfill(geo, node, 0); + BUG_ON(fill > 1); + head->node = bval(geo, node, 0); + head->height--; + mempool_free(node, head->mempool); +} + +static int btree_insert_level(struct btree_head *head, struct btree_geo *geo, + unsigned long *key, void *val, int level, + gfp_t gfp) +{ + unsigned long *node; + int i, pos, fill, err; + + BUG_ON(!val); + if (head->height < level) { + err = btree_grow(head, geo, gfp); + if (err) + return err; + } + +retry: + node = find_level(head, geo, key, level); + pos = getpos(geo, node, key); + fill = getfill(geo, node, pos); + /* two identical keys are not allowed */ + BUG_ON(pos < fill && keycmp(geo, node, pos, key) == 0); + + if (fill == geo->no_pairs) { + /* need to split node */ + unsigned long *new; + + new = btree_node_alloc(head, gfp); + if (!new) + return -ENOMEM; + err = btree_insert_level(head, geo, + bkey(geo, node, fill / 2 - 1), + new, level + 1, gfp); + if (err) { + mempool_free(new, head->mempool); + return err; + } + for (i = 0; i < fill / 2; i++) { + setkey(geo, new, i, bkey(geo, node, i)); + setval(geo, new, i, bval(geo, node, i)); + setkey(geo, node, i, bkey(geo, node, i + fill / 2)); + setval(geo, node, i, bval(geo, node, i + fill / 2)); + clearpair(geo, node, i + fill / 2); + } + if (fill & 1) { + setkey(geo, node, i, bkey(geo, node, fill - 1)); + setval(geo, node, i, bval(geo, node, fill - 1)); + clearpair(geo, node, fill - 1); + } + goto retry; + } + BUG_ON(fill >= geo->no_pairs); + + /* shift and insert */ + for (i = fill; i > pos; i--) { + setkey(geo, node, i, bkey(geo, node, i - 1)); + setval(geo, node, i, bval(geo, node, i - 1)); + } + setkey(geo, node, pos, key); + setval(geo, node, pos, val); + + return 0; +} + +int btree_insert(struct btree_head *head, struct btree_geo *geo, + unsigned long *key, void *val, gfp_t gfp) +{ + return btree_insert_level(head, geo, key, val, 1, gfp); +} +EXPORT_SYMBOL_GPL(btree_insert); + +static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo, + unsigned long *key, int level); +static void merge(struct btree_head *head, struct btree_geo *geo, int level, + unsigned long *left, int lfill, + unsigned long *right, int rfill, + unsigned long *parent, int lpos) +{ + int i; + + for (i = 0; i < rfill; i++) { + /* Move all keys to the left */ + setkey(geo, left, lfill + i, bkey(geo, right, i)); + setval(geo, left, lfill + i, bval(geo, right, i)); + } + /* Exchange left and right child in parent */ + setval(geo, parent, lpos, right); + setval(geo, parent, lpos + 1, left); + /* Remove left (formerly right) child from parent */ + btree_remove_level(head, geo, bkey(geo, parent, lpos), level + 1); + mempool_free(right, head->mempool); +} + +static void rebalance(struct btree_head *head, struct btree_geo *geo, + unsigned long *key, int level, unsigned long *child, int fill) +{ + unsigned long *parent, *left = NULL, *right = NULL; + int i, no_left, no_right; + + if (fill == 0) { + /* Because we don't steal entries from a neigbour, this case + * can happen. Parent node contains a single child, this + * node, so merging with a sibling never happens. + */ + btree_remove_level(head, geo, key, level + 1); + mempool_free(child, head->mempool); + return; + } + + parent = find_level(head, geo, key, level + 1); + i = getpos(geo, parent, key); + BUG_ON(bval(geo, parent, i) != child); + + if (i > 0) { + left = bval(geo, parent, i - 1); + no_left = getfill(geo, left, 0); + if (fill + no_left <= geo->no_pairs) { + merge(head, geo, level, + left, no_left, + child, fill, + parent, i - 1); + return; + } + } + if (i + 1 < getfill(geo, parent, i)) { + right = bval(geo, parent, i + 1); + no_right = getfill(geo, right, 0); + if (fill + no_right <= geo->no_pairs) { + merge(head, geo, level, + child, fill, + right, no_right, + parent, i); + return; + } + } + /* + * We could also try to steal one entry from the left or right + * neighbor. By not doing so we changed the invariant from + * "all nodes are at least half full" to "no two neighboring + * nodes can be merged". Which means that the average fill of + * all nodes is still half or better. + */ +} + +static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo, + unsigned long *key, int level) +{ + unsigned long *node; + int i, pos, fill; + void *ret; + + if (level > head->height) { + /* we recursed all the way up */ + head->height = 0; + head->node = NULL; + return NULL; + } + + node = find_level(head, geo, key, level); + pos = getpos(geo, node, key); + fill = getfill(geo, node, pos); + if ((level == 1) && (keycmp(geo, node, pos, key) != 0)) + return NULL; + ret = bval(geo, node, pos); + + /* remove and shift */ + for (i = pos; i < fill - 1; i++) { + setkey(geo, node, i, bkey(geo, node, i + 1)); + setval(geo, node, i, bval(geo, node, i + 1)); + } + clearpair(geo, node, fill - 1); + + if (fill - 1 < geo->no_pairs / 2) { + if (level < head->height) + rebalance(head, geo, key, level, node, fill - 1); + else if (fill - 1 == 1) + btree_shrink(head, geo); + } + + return ret; +} + +void *btree_remove(struct btree_head *head, struct btree_geo *geo, + unsigned long *key) +{ + if (head->height == 0) + return NULL; + + return btree_remove_level(head, geo, key, 1); +} +EXPORT_SYMBOL_GPL(btree_remove); + +int btree_merge(struct btree_head *target, struct btree_head *victim, + struct btree_geo *geo, gfp_t gfp) +{ + unsigned long key[geo->keylen]; + unsigned long dup[geo->keylen]; + void *val; + int err; + + BUG_ON(target == victim); + + if (!(target->node)) { + /* target is empty, just copy fields over */ + target->node = victim->node; + target->height = victim->height; + __btree_init(victim); + return 0; + } + + /* TODO: This needs some optimizations. Currently we do three tree + * walks to remove a single object from the victim. + */ + for (;;) { + if (!btree_last(victim, geo, key)) + break; + val = btree_lookup(victim, geo, key); + err = btree_insert(target, geo, key, val, gfp); + if (err) + return err; + /* We must make a copy of the key, as the original will get + * mangled inside btree_remove. */ + longcpy(dup, key, geo->keylen); + btree_remove(victim, geo, dup); + } + return 0; +} +EXPORT_SYMBOL_GPL(btree_merge); + +static size_t __btree_for_each(struct btree_head *head, struct btree_geo *geo, + unsigned long *node, unsigned long opaque, + void (*func)(void *elem, unsigned long opaque, + unsigned long *key, size_t index, + void *func2), + void *func2, int reap, int height, size_t count) +{ + int i; + unsigned long *child; + + for (i = 0; i < geo->no_pairs; i++) { + child = bval(geo, node, i); + if (!child) + break; + if (height > 1) + count = __btree_for_each(head, geo, child, opaque, + func, func2, reap, height - 1, count); + else + func(child, opaque, bkey(geo, node, i), count++, + func2); + } + if (reap) + mempool_free(node, head->mempool); + return count; +} + +static void empty(void *elem, unsigned long opaque, unsigned long *key, + size_t index, void *func2) +{ +} + +void visitorl(void *elem, unsigned long opaque, unsigned long *key, + size_t index, void *__func) +{ + visitorl_t func = __func; + + func(elem, opaque, *key, index); +} +EXPORT_SYMBOL_GPL(visitorl); + +void visitor32(void *elem, unsigned long opaque, unsigned long *__key, + size_t index, void *__func) +{ + visitor32_t func = __func; + u32 *key = (void *)__key; + + func(elem, opaque, *key, index); +} +EXPORT_SYMBOL_GPL(visitor32); + +void visitor64(void *elem, unsigned long opaque, unsigned long *__key, + size_t index, void *__func) +{ + visitor64_t func = __func; + u64 *key = (void *)__key; + + func(elem, opaque, *key, index); +} +EXPORT_SYMBOL_GPL(visitor64); + +void visitor128(void *elem, unsigned long opaque, unsigned long *__key, + size_t index, void *__func) +{ + visitor128_t func = __func; + u64 *key = (void *)__key; + + func(elem, opaque, key[0], key[1], index); +} +EXPORT_SYMBOL_GPL(visitor128); + +size_t btree_visitor(struct btree_head *head, struct btree_geo *geo, + unsigned long opaque, + void (*func)(void *elem, unsigned long opaque, + unsigned long *key, + size_t index, void *func2), + void *func2) +{ + size_t count = 0; + + if (!func2) + func = empty; + if (head->node) + count = __btree_for_each(head, geo, head->node, opaque, func, + func2, 0, head->height, 0); + return count; +} +EXPORT_SYMBOL_GPL(btree_visitor); + +size_t btree_grim_visitor(struct btree_head *head, struct btree_geo *geo, + unsigned long opaque, + void (*func)(void *elem, unsigned long opaque, + unsigned long *key, + size_t index, void *func2), + void *func2) +{ + size_t count = 0; + + if (!func2) + func = empty; + if (head->node) + count = __btree_for_each(head, geo, head->node, opaque, func, + func2, 1, head->height, 0); + __btree_init(head); + return count; +} +EXPORT_SYMBOL_GPL(btree_grim_visitor); + +static int __init btree_module_init(void) +{ + btree_cachep = kmem_cache_create("btree_node", NODESIZE, 0, + SLAB_HWCACHE_ALIGN, NULL); + return 0; +} + +static void __exit btree_module_exit(void) +{ + kmem_cache_destroy(btree_cachep); +} + +/* If core code starts using btree, initialization should happen even earlier */ +module_init(btree_module_init); +module_exit(btree_module_exit); + +MODULE_AUTHOR("Joern Engel <joern@logfs.org>"); +MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>"); +MODULE_LICENSE("GPL"); diff --git a/lib/bug.c b/lib/bug.c index 300e41afbf97..19552096d16b 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -72,8 +72,8 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr) return NULL; } -int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, - struct module *mod) +void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, + struct module *mod) { char *secstrings; unsigned int i; @@ -97,8 +97,6 @@ int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, * could potentially lead to deadlock and thus be counter-productive. */ list_add(&mod->bug_list, &module_bug_list); - - return 0; } void module_bug_cleanup(struct module *mod) @@ -136,8 +134,6 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) bug = find_bug(bugaddr); - printk(KERN_EMERG "------------[ cut here ]------------\n"); - file = NULL; line = 0; warning = 0; @@ -156,19 +152,25 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) if (warning) { /* this is a WARN_ON rather than BUG/BUG_ON */ + printk(KERN_WARNING "------------[ cut here ]------------\n"); + if (file) - printk(KERN_ERR "Badness at %s:%u\n", + printk(KERN_WARNING "WARNING: at %s:%u\n", file, line); else - printk(KERN_ERR "Badness at %p " + printk(KERN_WARNING "WARNING: at %p " "[verbose debug info unavailable]\n", (void *)bugaddr); + print_modules(); show_regs(regs); - add_taint(TAINT_WARN); + print_oops_end_marker(); + add_taint(BUG_GET_TAINT(bug)); return BUG_TRAP_TYPE_WARN; } + printk(KERN_EMERG "------------[ cut here ]------------\n"); + if (file) printk(KERN_CRIT "kernel BUG at %s:%u!\n", file, line); diff --git a/lib/checksum.c b/lib/checksum.c index 12e5a1c91cda..097508732f34 100644 --- a/lib/checksum.c +++ b/lib/checksum.c @@ -37,7 +37,8 @@ #include <asm/byteorder.h> -static inline unsigned short from32to16(unsigned long x) +#ifndef do_csum +static inline unsigned short from32to16(unsigned int x) { /* add up 16-bit and 16-bit for 16+c bit */ x = (x & 0xffff) + (x >> 16); @@ -49,13 +50,17 @@ static inline unsigned short from32to16(unsigned long x) static unsigned int do_csum(const unsigned char *buff, int len) { int odd, count; - unsigned long result = 0; + unsigned int result = 0; if (len <= 0) goto out; odd = 1 & (unsigned long) buff; if (odd) { +#ifdef __LITTLE_ENDIAN + result += (*buff << 8); +#else result = *buff; +#endif len--; buff++; } @@ -69,9 +74,9 @@ static unsigned int do_csum(const unsigned char *buff, int len) } count >>= 1; /* nr of 32-bit words.. */ if (count) { - unsigned long carry = 0; + unsigned int carry = 0; do { - unsigned long w = *(unsigned long *) buff; + unsigned int w = *(unsigned int *) buff; count--; buff += 4; result += carry; @@ -87,13 +92,18 @@ static unsigned int do_csum(const unsigned char *buff, int len) } } if (len & 1) +#ifdef __LITTLE_ENDIAN + result += *buff; +#else result += (*buff << 8); +#endif result = from32to16(result); if (odd) result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); out: return result; } +#endif /* * This is a version of ip_compute_csum() optimized for IP headers, diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c new file mode 100644 index 000000000000..4dc20321b0d5 --- /dev/null +++ b/lib/cpu-notifier-error-inject.c @@ -0,0 +1,63 @@ +#include <linux/kernel.h> +#include <linux/cpu.h> +#include <linux/module.h> +#include <linux/notifier.h> + +static int priority; +static int cpu_up_prepare_error; +static int cpu_down_prepare_error; + +module_param(priority, int, 0); +MODULE_PARM_DESC(priority, "specify cpu notifier priority"); + +module_param(cpu_up_prepare_error, int, 0644); +MODULE_PARM_DESC(cpu_up_prepare_error, + "specify error code to inject CPU_UP_PREPARE action"); + +module_param(cpu_down_prepare_error, int, 0644); +MODULE_PARM_DESC(cpu_down_prepare_error, + "specify error code to inject CPU_DOWN_PREPARE action"); + +static int err_inject_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + int err = 0; + + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + err = cpu_up_prepare_error; + break; + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + err = cpu_down_prepare_error; + break; + } + if (err) + printk(KERN_INFO "Injecting error (%d) at cpu notifier\n", err); + + return notifier_from_errno(err); +} + +static struct notifier_block err_inject_cpu_notifier = { + .notifier_call = err_inject_cpu_callback, +}; + +static int err_inject_init(void) +{ + err_inject_cpu_notifier.priority = priority; + + return register_hotcpu_notifier(&err_inject_cpu_notifier); +} + +static void err_inject_exit(void) +{ + unregister_hotcpu_notifier(&err_inject_cpu_notifier); +} + +module_init(err_inject_init); +module_exit(err_inject_exit); + +MODULE_DESCRIPTION("CPU notifier error injection module"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>"); diff --git a/lib/cpumask.c b/lib/cpumask.c index 7bb4142a502f..05d6aca7fc19 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -1,3 +1,4 @@ +#include <linux/slab.h> #include <linux/kernel.h> #include <linux/bitops.h> #include <linux/cpumask.h> diff --git a/lib/crc32.c b/lib/crc32.c index 49d1c9e3ce38..4855995fcde9 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -25,16 +25,19 @@ #include <linux/module.h> #include <linux/compiler.h> #include <linux/types.h> -#include <linux/slab.h> #include <linux/init.h> #include <asm/atomic.h> #include "crc32defs.h" #if CRC_LE_BITS == 8 -#define tole(x) __constant_cpu_to_le32(x) -#define tobe(x) __constant_cpu_to_be32(x) +# define tole(x) __constant_cpu_to_le32(x) #else -#define tole(x) (x) -#define tobe(x) (x) +# define tole(x) (x) +#endif + +#if CRC_BE_BITS == 8 +# define tobe(x) __constant_cpu_to_be32(x) +#else +# define tobe(x) (x) #endif #include "crc32table.h" @@ -42,6 +45,54 @@ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>"); MODULE_DESCRIPTION("Ethernet CRC32 calculations"); MODULE_LICENSE("GPL"); +#if CRC_LE_BITS == 8 || CRC_BE_BITS == 8 + +static inline u32 +crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) +{ +# ifdef __LITTLE_ENDIAN +# define DO_CRC(x) crc = tab[0][(crc ^ (x)) & 255] ^ (crc >> 8) +# define DO_CRC4 crc = tab[3][(crc) & 255] ^ \ + tab[2][(crc >> 8) & 255] ^ \ + tab[1][(crc >> 16) & 255] ^ \ + tab[0][(crc >> 24) & 255] +# else +# define DO_CRC(x) crc = tab[0][((crc >> 24) ^ (x)) & 255] ^ (crc << 8) +# define DO_CRC4 crc = tab[0][(crc) & 255] ^ \ + tab[1][(crc >> 8) & 255] ^ \ + tab[2][(crc >> 16) & 255] ^ \ + tab[3][(crc >> 24) & 255] +# endif + const u32 *b; + size_t rem_len; + + /* Align it */ + if (unlikely((long)buf & 3 && len)) { + do { + DO_CRC(*buf++); + } while ((--len) && ((long)buf)&3); + } + rem_len = len & 3; + /* load data 32 bits wide, xor data 32 bits wide. */ + len = len >> 2; + b = (const u32 *)buf; + for (--b; len; --len) { + crc ^= *++b; /* use pre increment for speed */ + DO_CRC4; + } + len = rem_len; + /* And the last few bytes */ + if (len) { + u8 *p = (u8 *)(b + 1) - 1; + do { + DO_CRC(*++p); /* use pre increment for speed */ + } while (--len); + } + return crc; +#undef DO_CRC +#undef DO_CRC4 +} +#endif /** * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32 * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for @@ -72,52 +123,11 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) { # if CRC_LE_BITS == 8 - const u32 *b =(u32 *)p; - const u32 *tab = crc32table_le; - -# ifdef __LITTLE_ENDIAN -# define DO_CRC(x) crc = tab[ (crc ^ (x)) & 255 ] ^ (crc>>8) -# else -# define DO_CRC(x) crc = tab[ ((crc >> 24) ^ (x)) & 255] ^ (crc<<8) -# endif + const u32 (*tab)[] = crc32table_le; crc = __cpu_to_le32(crc); - /* Align it */ - if(unlikely(((long)b)&3 && len)){ - do { - u8 *p = (u8 *)b; - DO_CRC(*p++); - b = (void *)p; - } while ((--len) && ((long)b)&3 ); - } - if(likely(len >= 4)){ - /* load data 32 bits wide, xor data 32 bits wide. */ - size_t save_len = len & 3; - len = len >> 2; - --b; /* use pre increment below(*++b) for speed */ - do { - crc ^= *++b; - DO_CRC(0); - DO_CRC(0); - DO_CRC(0); - DO_CRC(0); - } while (--len); - b++; /* point to next byte(s) */ - len = save_len; - } - /* And the last few bytes */ - if(len){ - do { - u8 *p = (u8 *)b; - DO_CRC(*p++); - b = (void *)p; - } while (--len); - } - + crc = crc32_body(crc, p, len, tab); return __le32_to_cpu(crc); -#undef ENDIAN_SHIFT -#undef DO_CRC - # elif CRC_LE_BITS == 4 while (len--) { crc ^= *p++; @@ -170,51 +180,11 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) { # if CRC_BE_BITS == 8 - const u32 *b =(u32 *)p; - const u32 *tab = crc32table_be; - -# ifdef __LITTLE_ENDIAN -# define DO_CRC(x) crc = tab[ (crc ^ (x)) & 255 ] ^ (crc>>8) -# else -# define DO_CRC(x) crc = tab[ ((crc >> 24) ^ (x)) & 255] ^ (crc<<8) -# endif + const u32 (*tab)[] = crc32table_be; crc = __cpu_to_be32(crc); - /* Align it */ - if(unlikely(((long)b)&3 && len)){ - do { - u8 *p = (u8 *)b; - DO_CRC(*p++); - b = (u32 *)p; - } while ((--len) && ((long)b)&3 ); - } - if(likely(len >= 4)){ - /* load data 32 bits wide, xor data 32 bits wide. */ - size_t save_len = len & 3; - len = len >> 2; - --b; /* use pre increment below(*++b) for speed */ - do { - crc ^= *++b; - DO_CRC(0); - DO_CRC(0); - DO_CRC(0); - DO_CRC(0); - } while (--len); - b++; /* point to next byte(s) */ - len = save_len; - } - /* And the last few bytes */ - if(len){ - do { - u8 *p = (u8 *)b; - DO_CRC(*p++); - b = (void *)p; - } while (--len); - } + crc = crc32_body(crc, p, len, tab); return __be32_to_cpu(crc); -#undef ENDIAN_SHIFT -#undef DO_CRC - # elif CRC_BE_BITS == 4 while (len--) { crc ^= *p++ << 24; diff --git a/lib/ctype.c b/lib/ctype.c index d02ace14a322..26baa620e95b 100644 --- a/lib/ctype.c +++ b/lib/ctype.c @@ -7,30 +7,30 @@ #include <linux/ctype.h> #include <linux/module.h> -unsigned char _ctype[] = { -_C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */ -_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C, /* 8-15 */ -_C,_C,_C,_C,_C,_C,_C,_C, /* 16-23 */ -_C,_C,_C,_C,_C,_C,_C,_C, /* 24-31 */ -_S|_SP,_P,_P,_P,_P,_P,_P,_P, /* 32-39 */ -_P,_P,_P,_P,_P,_P,_P,_P, /* 40-47 */ -_D,_D,_D,_D,_D,_D,_D,_D, /* 48-55 */ -_D,_D,_P,_P,_P,_P,_P,_P, /* 56-63 */ -_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U, /* 64-71 */ -_U,_U,_U,_U,_U,_U,_U,_U, /* 72-79 */ -_U,_U,_U,_U,_U,_U,_U,_U, /* 80-87 */ -_U,_U,_U,_P,_P,_P,_P,_P, /* 88-95 */ -_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L, /* 96-103 */ -_L,_L,_L,_L,_L,_L,_L,_L, /* 104-111 */ -_L,_L,_L,_L,_L,_L,_L,_L, /* 112-119 */ -_L,_L,_L,_P,_P,_P,_P,_C, /* 120-127 */ -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */ -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */ -_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */ -_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */ -_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */ -_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */ -_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */ -_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */ +const unsigned char _ctype[] = { +_C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */ +_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C, /* 8-15 */ +_C,_C,_C,_C,_C,_C,_C,_C, /* 16-23 */ +_C,_C,_C,_C,_C,_C,_C,_C, /* 24-31 */ +_S|_SP,_P,_P,_P,_P,_P,_P,_P, /* 32-39 */ +_P,_P,_P,_P,_P,_P,_P,_P, /* 40-47 */ +_D,_D,_D,_D,_D,_D,_D,_D, /* 48-55 */ +_D,_D,_P,_P,_P,_P,_P,_P, /* 56-63 */ +_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U, /* 64-71 */ +_U,_U,_U,_U,_U,_U,_U,_U, /* 72-79 */ +_U,_U,_U,_U,_U,_U,_U,_U, /* 80-87 */ +_U,_U,_U,_P,_P,_P,_P,_P, /* 88-95 */ +_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L, /* 96-103 */ +_L,_L,_L,_L,_L,_L,_L,_L, /* 104-111 */ +_L,_L,_L,_L,_L,_L,_L,_L, /* 112-119 */ +_L,_L,_L,_P,_P,_P,_P,_C, /* 120-127 */ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */ +_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */ +_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */ +_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */ +_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */ +_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */ +_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */ EXPORT_SYMBOL(_ctype); diff --git a/lib/debug_locks.c b/lib/debug_locks.c index bc3b11731b9c..b1c177307677 100644 --- a/lib/debug_locks.c +++ b/lib/debug_locks.c @@ -8,7 +8,6 @@ * * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> */ -#include <linux/kernel.h> #include <linux/rwsem.h> #include <linux/mutex.h> #include <linux/module.h> @@ -23,6 +22,7 @@ * shut up after that. */ int debug_locks = 1; +EXPORT_SYMBOL_GPL(debug_locks); /* * The locking-testsuite uses <debug_locks_silent> to get a @@ -38,7 +38,6 @@ int debug_locks_off(void) { if (__debug_locks_off()) { if (!debug_locks_silent) { - oops_in_progress = 1; console_verbose(); return 1; } diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 2755a3bd16a1..deebcc57d4e6 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -9,8 +9,10 @@ */ #include <linux/debugobjects.h> #include <linux/interrupt.h> +#include <linux/sched.h> #include <linux/seq_file.h> #include <linux/debugfs.h> +#include <linux/slab.h> #include <linux/hash.h> #define ODEBUG_HASH_BITS 14 @@ -25,14 +27,14 @@ struct debug_bucket { struct hlist_head list; - spinlock_t lock; + raw_spinlock_t lock; }; static struct debug_bucket obj_hash[ODEBUG_HASH_SIZE]; static struct debug_obj obj_static_pool[ODEBUG_POOL_SIZE] __initdata; -static DEFINE_SPINLOCK(pool_lock); +static DEFINE_RAW_SPINLOCK(pool_lock); static HLIST_HEAD(obj_pool); @@ -95,10 +97,10 @@ static int fill_pool(void) if (!new) return obj_pool_free; - spin_lock_irqsave(&pool_lock, flags); + raw_spin_lock_irqsave(&pool_lock, flags); hlist_add_head(&new->node, &obj_pool); obj_pool_free++; - spin_unlock_irqrestore(&pool_lock, flags); + raw_spin_unlock_irqrestore(&pool_lock, flags); } return obj_pool_free; } @@ -132,13 +134,14 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) { struct debug_obj *obj = NULL; - spin_lock(&pool_lock); + raw_spin_lock(&pool_lock); if (obj_pool.first) { obj = hlist_entry(obj_pool.first, typeof(*obj), node); obj->object = addr; obj->descr = descr; obj->state = ODEBUG_STATE_NONE; + obj->astate = 0; hlist_del(&obj->node); hlist_add_head(&obj->node, &b->list); @@ -151,7 +154,7 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) if (obj_pool_free < obj_pool_min_free) obj_pool_min_free = obj_pool_free; } - spin_unlock(&pool_lock); + raw_spin_unlock(&pool_lock); return obj; } @@ -164,7 +167,7 @@ static void free_obj_work(struct work_struct *work) struct debug_obj *obj; unsigned long flags; - spin_lock_irqsave(&pool_lock, flags); + raw_spin_lock_irqsave(&pool_lock, flags); while (obj_pool_free > ODEBUG_POOL_SIZE) { obj = hlist_entry(obj_pool.first, typeof(*obj), node); hlist_del(&obj->node); @@ -173,11 +176,11 @@ static void free_obj_work(struct work_struct *work) * We release pool_lock across kmem_cache_free() to * avoid contention on pool_lock. */ - spin_unlock_irqrestore(&pool_lock, flags); + raw_spin_unlock_irqrestore(&pool_lock, flags); kmem_cache_free(obj_cache, obj); - spin_lock_irqsave(&pool_lock, flags); + raw_spin_lock_irqsave(&pool_lock, flags); } - spin_unlock_irqrestore(&pool_lock, flags); + raw_spin_unlock_irqrestore(&pool_lock, flags); } /* @@ -189,7 +192,7 @@ static void free_object(struct debug_obj *obj) unsigned long flags; int sched = 0; - spin_lock_irqsave(&pool_lock, flags); + raw_spin_lock_irqsave(&pool_lock, flags); /* * schedule work when the pool is filled and the cache is * initialized: @@ -199,7 +202,7 @@ static void free_object(struct debug_obj *obj) hlist_add_head(&obj->node, &obj_pool); obj_pool_free++; obj_pool_used--; - spin_unlock_irqrestore(&pool_lock, flags); + raw_spin_unlock_irqrestore(&pool_lock, flags); if (sched) schedule_work(&debug_obj_work); } @@ -220,9 +223,9 @@ static void debug_objects_oom(void) printk(KERN_WARNING "ODEBUG: Out of memory. ODEBUG disabled\n"); for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) { - spin_lock_irqsave(&db->lock, flags); + raw_spin_lock_irqsave(&db->lock, flags); hlist_move_list(&db->list, &freelist); - spin_unlock_irqrestore(&db->lock, flags); + raw_spin_unlock_irqrestore(&db->lock, flags); /* Now free them */ hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) { @@ -250,8 +253,10 @@ static void debug_print_object(struct debug_obj *obj, char *msg) if (limit < 5 && obj->descr != descr_test) { limit++; - WARN(1, KERN_ERR "ODEBUG: %s %s object type: %s\n", msg, - obj_states[obj->state], obj->descr->name); + WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) " + "object type: %s\n", + msg, obj_states[obj->state], obj->astate, + obj->descr->name); } debug_objects_warnings++; } @@ -302,14 +307,14 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack) db = get_bucket((unsigned long) addr); - spin_lock_irqsave(&db->lock, flags); + raw_spin_lock_irqsave(&db->lock, flags); obj = lookup_object(addr, db); if (!obj) { obj = alloc_object(addr, db, descr); if (!obj) { debug_objects_enabled = 0; - spin_unlock_irqrestore(&db->lock, flags); + raw_spin_unlock_irqrestore(&db->lock, flags); debug_objects_oom(); return; } @@ -326,7 +331,7 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack) case ODEBUG_STATE_ACTIVE: debug_print_object(obj, "init"); state = obj->state; - spin_unlock_irqrestore(&db->lock, flags); + raw_spin_unlock_irqrestore(&db->lock, flags); debug_object_fixup(descr->fixup_init, addr, state); return; @@ -337,7 +342,7 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack) break; } - spin_unlock_irqrestore(&db->lock, flags); + raw_spin_unlock_irqrestore(&db->lock, flags); } /** @@ -384,7 +389,7 @@ void debug_object_activate(void *addr, struct debug_obj_descr *descr) db = get_bucket((unsigned long) addr); - spin_lock_irqsave(&db->lock, flags); + raw_spin_lock_irqsave(&db->lock, flags); obj = lookup_object(addr, db); if (obj) { @@ -397,7 +402,7 @@ void debug_object_activate(void *addr, struct debug_obj_descr *descr) case ODEBUG_STATE_ACTIVE: debug_print_object(obj, "activate"); state = obj->state; - spin_unlock_irqrestore(&db->lock, flags); + raw_spin_unlock_irqrestore(&db->lock, flags); debug_object_fixup(descr->fixup_activate, addr, state); return; @@ -407,11 +412,11 @@ void debug_object_activate(void *addr, struct debug_obj_descr *descr) default: break; } - spin_unlock_irqrestore(&db->lock, flags); + raw_spin_unlock_irqrestore(&db->lock, flags); return; } - spin_unlock_irqrestore(&db->lock, flags); + raw_spin_unlock_irqrestore(&db->lock, flags); /* * This happens when a static object is activated. We * let the type specific code decide whether this is @@ -437,7 +442,7 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr) db = get_bucket((unsigned long) addr); - spin_lock_irqsave(&db->lock, flags); + raw_spin_lock_irqsave(&db->lock, flags); obj = lookup_object(addr, db); if (obj) { @@ -445,7 +450,10 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr) case ODEBUG_STATE_INIT: case ODEBUG_STATE_INACTIVE: case ODEBUG_STATE_ACTIVE: - obj->state = ODEBUG_STATE_INACTIVE; + if (!obj->astate) + obj->state = ODEBUG_STATE_INACTIVE; + else + debug_print_object(obj, "deactivate"); break; case ODEBUG_STATE_DESTROYED: @@ -462,7 +470,7 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr) debug_print_object(&o, "deactivate"); } - spin_unlock_irqrestore(&db->lock, flags); + raw_spin_unlock_irqrestore(&db->lock, flags); } /** @@ -482,7 +490,7 @@ void debug_object_destroy(void *addr, struct debug_obj_descr *descr) db = get_bucket((unsigned long) addr); - spin_lock_irqsave(&db->lock, flags); + raw_spin_lock_irqsave(&db->lock, flags); obj = lookup_object(addr, db); if (!obj) @@ -497,7 +505,7 @@ void debug_object_destroy(void *addr, struct debug_obj_descr *descr) case ODEBUG_STATE_ACTIVE: debug_print_object(obj, "destroy"); state = obj->state; - spin_unlock_irqrestore(&db->lock, flags); + raw_spin_unlock_irqrestore(&db->lock, flags); debug_object_fixup(descr->fixup_destroy, addr, state); return; @@ -508,7 +516,7 @@ void debug_object_destroy(void *addr, struct debug_obj_descr *descr) break; } out_unlock: - spin_unlock_irqrestore(&db->lock, flags); + raw_spin_unlock_irqrestore(&db->lock, flags); } /** @@ -528,7 +536,7 @@ void debug_object_free(void *addr, struct debug_obj_descr *descr) db = get_bucket((unsigned long) addr); - spin_lock_irqsave(&db->lock, flags); + raw_spin_lock_irqsave(&db->lock, flags); obj = lookup_object(addr, db); if (!obj) @@ -538,17 +546,64 @@ void debug_object_free(void *addr, struct debug_obj_descr *descr) case ODEBUG_STATE_ACTIVE: debug_print_object(obj, "free"); state = obj->state; - spin_unlock_irqrestore(&db->lock, flags); + raw_spin_unlock_irqrestore(&db->lock, flags); debug_object_fixup(descr->fixup_free, addr, state); return; default: hlist_del(&obj->node); - spin_unlock_irqrestore(&db->lock, flags); + raw_spin_unlock_irqrestore(&db->lock, flags); free_object(obj); return; } out_unlock: - spin_unlock_irqrestore(&db->lock, flags); + raw_spin_unlock_irqrestore(&db->lock, flags); +} + +/** + * debug_object_active_state - debug checks object usage state machine + * @addr: address of the object + * @descr: pointer to an object specific debug description structure + * @expect: expected state + * @next: state to move to if expected state is found + */ +void +debug_object_active_state(void *addr, struct debug_obj_descr *descr, + unsigned int expect, unsigned int next) +{ + struct debug_bucket *db; + struct debug_obj *obj; + unsigned long flags; + + if (!debug_objects_enabled) + return; + + db = get_bucket((unsigned long) addr); + + raw_spin_lock_irqsave(&db->lock, flags); + + obj = lookup_object(addr, db); + if (obj) { + switch (obj->state) { + case ODEBUG_STATE_ACTIVE: + if (obj->astate == expect) + obj->astate = next; + else + debug_print_object(obj, "active_state"); + break; + + default: + debug_print_object(obj, "active_state"); + break; + } + } else { + struct debug_obj o = { .object = addr, + .state = ODEBUG_STATE_NOTAVAILABLE, + .descr = descr }; + + debug_print_object(&o, "active_state"); + } + + raw_spin_unlock_irqrestore(&db->lock, flags); } #ifdef CONFIG_DEBUG_OBJECTS_FREE @@ -574,7 +629,7 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size) repeat: cnt = 0; - spin_lock_irqsave(&db->lock, flags); + raw_spin_lock_irqsave(&db->lock, flags); hlist_for_each_entry_safe(obj, node, tmp, &db->list, node) { cnt++; oaddr = (unsigned long) obj->object; @@ -586,7 +641,7 @@ repeat: debug_print_object(obj, "free"); descr = obj->descr; state = obj->state; - spin_unlock_irqrestore(&db->lock, flags); + raw_spin_unlock_irqrestore(&db->lock, flags); debug_object_fixup(descr->fixup_free, (void *) oaddr, state); goto repeat; @@ -596,7 +651,7 @@ repeat: break; } } - spin_unlock_irqrestore(&db->lock, flags); + raw_spin_unlock_irqrestore(&db->lock, flags); /* Now free them */ hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) { @@ -772,7 +827,7 @@ static int __init fixup_free(void *addr, enum debug_obj_state state) } } -static int +static int __init check_results(void *addr, enum debug_obj_state state, int fixups, int warnings) { struct debug_bucket *db; @@ -782,7 +837,7 @@ check_results(void *addr, enum debug_obj_state state, int fixups, int warnings) db = get_bucket((unsigned long) addr); - spin_lock_irqsave(&db->lock, flags); + raw_spin_lock_irqsave(&db->lock, flags); obj = lookup_object(addr, db); if (!obj && state != ODEBUG_STATE_NONE) { @@ -806,7 +861,7 @@ check_results(void *addr, enum debug_obj_state state, int fixups, int warnings) } res = 0; out: - spin_unlock_irqrestore(&db->lock, flags); + raw_spin_unlock_irqrestore(&db->lock, flags); if (res) debug_objects_enabled = 0; return res; @@ -906,7 +961,7 @@ void __init debug_objects_early_init(void) int i; for (i = 0; i < ODEBUG_HASH_SIZE; i++) - spin_lock_init(&obj_hash[i].lock); + raw_spin_lock_init(&obj_hash[i].lock); for (i = 0; i < ODEBUG_POOL_SIZE; i++) hlist_add_head(&obj_static_pool[i].node, &obj_pool); @@ -915,7 +970,7 @@ void __init debug_objects_early_init(void) /* * Convert the statically allocated objects to dynamic ones: */ -static int debug_objects_replace_static_objects(void) +static int __init debug_objects_replace_static_objects(void) { struct debug_bucket *db = obj_hash; struct hlist_node *node, *tmp; diff --git a/lib/decompress.c b/lib/decompress.c index d2842f571674..a7606815541f 100644 --- a/lib/decompress.c +++ b/lib/decompress.c @@ -9,6 +9,7 @@ #include <linux/decompress/bunzip2.h> #include <linux/decompress/unlzma.h> #include <linux/decompress/inflate.h> +#include <linux/decompress/unlzo.h> #include <linux/types.h> #include <linux/string.h> @@ -22,6 +23,9 @@ #ifndef CONFIG_DECOMPRESS_LZMA # define unlzma NULL #endif +#ifndef CONFIG_DECOMPRESS_LZO +# define unlzo NULL +#endif static const struct compress_format { unsigned char magic[2]; @@ -32,6 +36,7 @@ static const struct compress_format { { {037, 0236}, "gzip", gunzip }, { {0x42, 0x5a}, "bzip2", bunzip2 }, { {0x5d, 0x00}, "lzma", unlzma }, + { {0x89, 0x4c}, "lzo", unlzo }, { {0, 0}, NULL, NULL } }; diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index 708e2a86d87b..81c8bb1cc6aa 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -45,12 +45,14 @@ */ -#ifndef STATIC +#ifdef STATIC +#define PREBOOT +#else #include <linux/decompress/bunzip2.h> -#endif /* !STATIC */ +#include <linux/slab.h> +#endif /* STATIC */ #include <linux/decompress/mm.h> -#include <linux/slab.h> #ifndef INT_MAX #define INT_MAX 0x7fffffff @@ -105,6 +107,8 @@ struct bunzip_data { unsigned char selectors[32768]; /* nSelectors = 15 bits */ struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ int io_error; /* non-zero if we have IO error */ + int byteCount[256]; + unsigned char symToByte[256], mtfSymbol[256]; }; @@ -156,14 +160,16 @@ static int INIT get_next_block(struct bunzip_data *bd) int *base = NULL; int *limit = NULL; int dbufCount, nextSym, dbufSize, groupCount, selector, - i, j, k, t, runPos, symCount, symTotal, nSelectors, - byteCount[256]; - unsigned char uc, symToByte[256], mtfSymbol[256], *selectors; + i, j, k, t, runPos, symCount, symTotal, nSelectors, *byteCount; + unsigned char uc, *symToByte, *mtfSymbol, *selectors; unsigned int *dbuf, origPtr; dbuf = bd->dbuf; dbufSize = bd->dbufSize; selectors = bd->selectors; + byteCount = bd->byteCount; + symToByte = bd->symToByte; + mtfSymbol = bd->mtfSymbol; /* Read in header signature and CRC, then validate signature. (last block signature means CRC is for whole file, return now) */ @@ -297,7 +303,7 @@ static int INIT get_next_block(struct bunzip_data *bd) again when using them (during symbol decoding).*/ base = hufGroup->base-1; limit = hufGroup->limit-1; - /* Calculate permute[]. Concurently, initialize + /* Calculate permute[]. Concurrently, initialize * temp[] and limit[]. */ pp = 0; for (i = minLen; i <= maxLen; i++) { @@ -635,6 +641,8 @@ static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len, /* Allocate bunzip_data. Most fields initialize to zero. */ bd = *bdp = malloc(i); + if (!bd) + return RETVAL_OUT_OF_MEMORY; memset(bd, 0, sizeof(struct bunzip_data)); /* Setup input buffer */ bd->inbuf = inbuf; @@ -662,6 +670,8 @@ static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len, bd->dbufSize = 100000*(i-BZh0); bd->dbuf = large_malloc(bd->dbufSize * sizeof(int)); + if (!bd->dbuf) + return RETVAL_OUT_OF_MEMORY; return RETVAL_OK; } @@ -681,12 +691,10 @@ STATIC int INIT bunzip2(unsigned char *buf, int len, set_error_fn(error_fn); if (flush) outbuf = malloc(BZIP2_IOBUF_SIZE); - else - len -= 4; /* Uncompressed size hack active in pre-boot - environment */ + if (!outbuf) { error("Could not allocate output bufer"); - return -1; + return RETVAL_OUT_OF_MEMORY; } if (buf) inbuf = buf; @@ -694,6 +702,7 @@ STATIC int INIT bunzip2(unsigned char *buf, int len, inbuf = malloc(BZIP2_IOBUF_SIZE); if (!inbuf) { error("Could not allocate input bufer"); + i = RETVAL_OUT_OF_MEMORY; goto exit_0; } i = start_bunzip(&bd, inbuf, len, fill); @@ -720,11 +729,14 @@ STATIC int INIT bunzip2(unsigned char *buf, int len, } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) { error("Compressed file ends unexpectedly"); } + if (!bd) + goto exit_1; if (bd->dbuf) large_free(bd->dbuf); if (pos) *pos = bd->inbufPos; free(bd); +exit_1: if (!buf) free(inbuf); exit_0: @@ -733,4 +745,14 @@ exit_0: return i; } -#define decompress bunzip2 +#ifdef PREBOOT +STATIC int INIT decompress(unsigned char *buf, int len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *outbuf, + int *pos, + void(*error_fn)(char *x)) +{ + return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error_fn); +} +#endif diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c index e36b296fc9f8..fc686c7a0a0d 100644 --- a/lib/decompress_inflate.c +++ b/lib/decompress_inflate.c @@ -19,13 +19,18 @@ #include "zlib_inflate/inflate.h" #include "zlib_inflate/infutil.h" +#include <linux/slab.h> #endif /* STATIC */ #include <linux/decompress/mm.h> -#include <linux/slab.h> -#define INBUF_LEN (16*1024) +#define GZIP_IOBUF_SIZE (16*1024) + +static int nofill(void *buffer, unsigned int len) +{ + return -1; +} /* Included from initramfs et al code */ STATIC int INIT gunzip(unsigned char *buf, int len, @@ -55,7 +60,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len, if (buf) zbuf = buf; else { - zbuf = malloc(INBUF_LEN); + zbuf = malloc(GZIP_IOBUF_SIZE); len = 0; } if (!zbuf) { @@ -76,8 +81,11 @@ STATIC int INIT gunzip(unsigned char *buf, int len, goto gunzip_nomem4; } + if (!fill) + fill = nofill; + if (len == 0) - len = fill(zbuf, INBUF_LEN); + len = fill(zbuf, GZIP_IOBUF_SIZE); /* verify the gzip header */ if (len < 10 || @@ -113,7 +121,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len, while (rc == Z_OK) { if (strm->avail_in == 0) { /* TODO: handle case where both pos and fill are set */ - len = fill(zbuf, INBUF_LEN); + len = fill(zbuf, GZIP_IOBUF_SIZE); if (len < 0) { rc = -1; error("read error"); diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c index 32123a1340e6..ca82fde81c8f 100644 --- a/lib/decompress_unlzma.c +++ b/lib/decompress_unlzma.c @@ -29,12 +29,14 @@ *Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef STATIC +#ifdef STATIC +#define PREBOOT +#else #include <linux/decompress/unlzma.h> +#include <linux/slab.h> #endif /* STATIC */ #include <linux/decompress/mm.h> -#include <linux/slab.h> #define MIN(a, b) (((a) < (b)) ? (a) : (b)) @@ -80,6 +82,11 @@ struct rc { #define RC_MODEL_TOTAL_BITS 11 +static int nofill(void *buffer, unsigned int len) +{ + return -1; +} + /* Called twice: once at startup and once in rc_normalize() */ static void INIT rc_read(struct rc *rc) { @@ -95,7 +102,10 @@ static inline void INIT rc_init(struct rc *rc, int (*fill)(void*, unsigned int), char *buffer, int buffer_size) { - rc->fill = fill; + if (fill) + rc->fill = fill; + else + rc->fill = nofill; rc->buffer = (uint8_t *)buffer; rc->buffer_size = buffer_size; rc->buffer_end = rc->buffer + rc->buffer_size; @@ -543,9 +553,7 @@ STATIC inline int INIT unlzma(unsigned char *buf, int in_len, int ret = -1; set_error_fn(error_fn); - if (!flush) - in_len -= 4; /* Uncompressed size hack active in pre-boot - environment */ + if (buf) inbuf = buf; else @@ -645,4 +653,15 @@ exit_0: return ret; } -#define decompress unlzma +#ifdef PREBOOT +STATIC int INIT decompress(unsigned char *buf, int in_len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *posp, + void(*error_fn)(char *x) + ) +{ + return unlzma(buf, in_len - 4, fill, flush, output, posp, error_fn); +} +#endif diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c new file mode 100644 index 000000000000..bcb3a4bd68ff --- /dev/null +++ b/lib/decompress_unlzo.c @@ -0,0 +1,217 @@ +/* + * LZO decompressor for the Linux kernel. Code borrowed from the lzo + * implementation by Markus Franz Xaver Johannes Oberhumer. + * + * Linux kernel adaptation: + * Copyright (C) 2009 + * Albin Tonnerre, Free Electrons <albin.tonnerre@free-electrons.com> + * + * Original code: + * Copyright (C) 1996-2005 Markus Franz Xaver Johannes Oberhumer + * All Rights Reserved. + * + * lzop and the LZO library are free software; you can redistribute them + * and/or modify them under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. + * If not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Markus F.X.J. Oberhumer + * <markus@oberhumer.com> + * http://www.oberhumer.com/opensource/lzop/ + */ + +#ifdef STATIC +#include "lzo/lzo1x_decompress.c" +#else +#include <linux/slab.h> +#include <linux/decompress/unlzo.h> +#endif + +#include <linux/types.h> +#include <linux/lzo.h> +#include <linux/decompress/mm.h> + +#include <linux/compiler.h> +#include <asm/unaligned.h> + +static const unsigned char lzop_magic[] = { + 0x89, 0x4c, 0x5a, 0x4f, 0x00, 0x0d, 0x0a, 0x1a, 0x0a }; + +#define LZO_BLOCK_SIZE (256*1024l) +#define HEADER_HAS_FILTER 0x00000800L + +STATIC inline int INIT parse_header(u8 *input, u8 *skip) +{ + int l; + u8 *parse = input; + u8 level = 0; + u16 version; + + /* read magic: 9 first bits */ + for (l = 0; l < 9; l++) { + if (*parse++ != lzop_magic[l]) + return 0; + } + /* get version (2bytes), skip library version (2), + * 'need to be extracted' version (2) and + * method (1) */ + version = get_unaligned_be16(parse); + parse += 7; + if (version >= 0x0940) + level = *parse++; + if (get_unaligned_be32(parse) & HEADER_HAS_FILTER) + parse += 8; /* flags + filter info */ + else + parse += 4; /* flags */ + + /* skip mode and mtime_low */ + parse += 8; + if (version >= 0x0940) + parse += 4; /* skip mtime_high */ + + l = *parse++; + /* don't care about the file name, and skip checksum */ + parse += l + 4; + + *skip = parse - input; + return 1; +} + +STATIC inline int INIT unlzo(u8 *input, int in_len, + int (*fill) (void *, unsigned int), + int (*flush) (void *, unsigned int), + u8 *output, int *posp, + void (*error_fn) (char *x)) +{ + u8 skip = 0, r = 0; + u32 src_len, dst_len; + size_t tmp; + u8 *in_buf, *in_buf_save, *out_buf; + int ret = -1; + + set_error_fn(error_fn); + + if (output) { + out_buf = output; + } else if (!flush) { + error("NULL output pointer and no flush function provided"); + goto exit; + } else { + out_buf = malloc(LZO_BLOCK_SIZE); + if (!out_buf) { + error("Could not allocate output buffer"); + goto exit; + } + } + + if (input && fill) { + error("Both input pointer and fill function provided, don't know what to do"); + goto exit_1; + } else if (input) { + in_buf = input; + } else if (!fill || !posp) { + error("NULL input pointer and missing position pointer or fill function"); + goto exit_1; + } else { + in_buf = malloc(lzo1x_worst_compress(LZO_BLOCK_SIZE)); + if (!in_buf) { + error("Could not allocate input buffer"); + goto exit_1; + } + } + in_buf_save = in_buf; + + if (posp) + *posp = 0; + + if (fill) + fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE)); + + if (!parse_header(input, &skip)) { + error("invalid header"); + goto exit_2; + } + in_buf += skip; + + if (posp) + *posp = skip; + + for (;;) { + /* read uncompressed block size */ + dst_len = get_unaligned_be32(in_buf); + in_buf += 4; + + /* exit if last block */ + if (dst_len == 0) { + if (posp) + *posp += 4; + break; + } + + if (dst_len > LZO_BLOCK_SIZE) { + error("dest len longer than block size"); + goto exit_2; + } + + /* read compressed block size, and skip block checksum info */ + src_len = get_unaligned_be32(in_buf); + in_buf += 8; + + if (src_len <= 0 || src_len > dst_len) { + error("file corrupted"); + goto exit_2; + } + + /* decompress */ + tmp = dst_len; + + /* When the input data is not compressed at all, + * lzo1x_decompress_safe will fail, so call memcpy() + * instead */ + if (unlikely(dst_len == src_len)) + memcpy(out_buf, in_buf, src_len); + else { + r = lzo1x_decompress_safe((u8 *) in_buf, src_len, + out_buf, &tmp); + + if (r != LZO_E_OK || dst_len != tmp) { + error("Compressed data violation"); + goto exit_2; + } + } + + if (flush) + flush(out_buf, dst_len); + if (output) + out_buf += dst_len; + if (posp) + *posp += src_len + 12; + if (fill) { + in_buf = in_buf_save; + fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE)); + } else + in_buf += src_len; + } + + ret = 0; +exit_2: + if (!input) + free(in_buf); +exit_1: + if (!output) + free(out_buf); +exit: + return ret; +} + +#define decompress unlzo diff --git a/lib/devres.c b/lib/devres.c index 72c8909006da..6efddf53b90c 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -1,5 +1,6 @@ #include <linux/pci.h> #include <linux/io.h> +#include <linux/gfp.h> #include <linux/module.h> void devm_ioremap_release(struct device *dev, void *res) @@ -327,7 +328,7 @@ EXPORT_SYMBOL(pcim_iomap_regions_request_all); * @pdev: PCI device to map IO resources for * @mask: Mask of BARs to unmap and release * - * Unamp and release regions specified by @mask. + * Unmap and release regions specified by @mask. */ void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask) { diff --git a/lib/div64.c b/lib/div64.c index a111eb8de9cf..5b4919191778 100644 --- a/lib/div64.c +++ b/lib/div64.c @@ -77,26 +77,58 @@ s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) EXPORT_SYMBOL(div_s64_rem); #endif -/* 64bit divisor, dividend and result. dynamic precision */ +/** + * div64_u64 - unsigned 64bit divide with 64bit divisor + * @dividend: 64bit dividend + * @divisor: 64bit divisor + * + * This implementation is a modified version of the algorithm proposed + * by the book 'Hacker's Delight'. The original source and full proof + * can be found here and is available for use without restriction. + * + * 'http://www.hackersdelight.org/HDcode/newCode/divDouble.c' + */ #ifndef div64_u64 u64 div64_u64(u64 dividend, u64 divisor) { - u32 high, d; + u32 high = divisor >> 32; + u64 quot; - high = divisor >> 32; - if (high) { - unsigned int shift = fls(high); + if (high == 0) { + quot = div_u64(dividend, divisor); + } else { + int n = 1 + fls(high); + quot = div_u64(dividend >> n, divisor >> n); - d = divisor >> shift; - dividend >>= shift; - } else - d = divisor; + if (quot != 0) + quot--; + if ((dividend - quot * divisor) >= divisor) + quot++; + } - return div_u64(dividend, d); + return quot; } EXPORT_SYMBOL(div64_u64); #endif +/** + * div64_s64 - signed 64bit divide with 64bit divisor + * @dividend: 64bit dividend + * @divisor: 64bit divisor + */ +#ifndef div64_s64 +s64 div64_s64(s64 dividend, s64 divisor) +{ + s64 quot, t; + + quot = div64_u64(abs64(dividend), abs64(divisor)); + t = (dividend ^ divisor) >> 63; + + return (quot ^ t) - t; +} +EXPORT_SYMBOL(div64_s64); +#endif + #endif /* BITS_PER_LONG == 32 */ /* diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 3b93129a968c..4bfb0471f106 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -156,9 +156,13 @@ static bool driver_filter(struct device *dev) return true; /* driver filter on and initialized */ - if (current_driver && dev->driver == current_driver) + if (current_driver && dev && dev->driver == current_driver) return true; + /* driver filter on, but we can't filter on a NULL device... */ + if (!dev) + return false; + if (current_driver || !current_driver_name[0]) return false; @@ -183,17 +187,17 @@ static bool driver_filter(struct device *dev) return ret; } -#define err_printk(dev, entry, format, arg...) do { \ - error_count += 1; \ - if (driver_filter(dev) && \ - (show_all_errors || show_num_errors > 0)) { \ - WARN(1, "%s %s: " format, \ - dev_driver_string(dev), \ - dev_name(dev) , ## arg); \ - dump_entry_trace(entry); \ - } \ - if (!show_all_errors && show_num_errors > 0) \ - show_num_errors -= 1; \ +#define err_printk(dev, entry, format, arg...) do { \ + error_count += 1; \ + if (driver_filter(dev) && \ + (show_all_errors || show_num_errors > 0)) { \ + WARN(1, "%s %s: " format, \ + dev ? dev_driver_string(dev) : "NULL", \ + dev ? dev_name(dev) : "NULL", ## arg); \ + dump_entry_trace(entry); \ + } \ + if (!show_all_errors && show_num_errors > 0) \ + show_num_errors -= 1; \ } while (0); /* @@ -255,7 +259,7 @@ static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket, * times. Without a hardware IOMMU this results in the * same device addresses being put into the dma-debug * hash multiple times too. This can result in false - * positives being reported. Therfore we implement a + * positives being reported. Therefore we implement a * best-fit algorithm here which returns the entry from * the hash which fits best to the reference value * instead of the first-fit. @@ -566,7 +570,7 @@ static ssize_t filter_write(struct file *file, const char __user *userbuf, * Now parse out the first token and use it as the name for the * driver to filter for. */ - for (i = 0; i < NAME_MAX_LEN; ++i) { + for (i = 0; i < NAME_MAX_LEN - 1; ++i) { current_driver_name[i] = buf[i]; if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0) break; @@ -583,9 +587,10 @@ out_unlock: return count; } -const struct file_operations filter_fops = { +static const struct file_operations filter_fops = { .read = filter_read, .write = filter_write, + .llseek = default_llseek, }; static int dma_debug_fs_init(void) @@ -666,12 +671,13 @@ static int device_dma_allocations(struct device *dev) return count; } -static int dma_debug_device_change(struct notifier_block *nb, - unsigned long action, void *data) +static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; int count; + if (global_disable) + return 0; switch (action) { case BUS_NOTIFY_UNBOUND_DRIVER: @@ -693,6 +699,9 @@ void dma_debug_add_bus(struct bus_type *bus) { struct notifier_block *nb; + if (global_disable) + return; + nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL); if (nb == NULL) { pr_err("dma_debug_add_bus: out of memory\n"); @@ -716,7 +725,7 @@ void dma_debug_init(u32 num_entries) for (i = 0; i < HASH_SIZE; ++i) { INIT_LIST_HEAD(&dma_entry_hash[i].list); - dma_entry_hash[i].lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&dma_entry_hash[i].lock); } if (dma_debug_fs_init() != 0) { @@ -815,9 +824,11 @@ static void check_unmap(struct dma_debug_entry *ref) err_printk(ref->dev, entry, "DMA-API: device driver frees " "DMA memory with different CPU address " "[device address=0x%016llx] [size=%llu bytes] " - "[cpu alloc address=%p] [cpu free address=%p]", + "[cpu alloc address=0x%016llx] " + "[cpu free address=0x%016llx]", ref->dev_addr, ref->size, - (void *)entry->paddr, (void *)ref->paddr); + (unsigned long long)entry->paddr, + (unsigned long long)ref->paddr); } if (ref->sg_call_ents && ref->type == dma_debug_sg && @@ -856,22 +867,21 @@ static void check_for_stack(struct device *dev, void *addr) "stack [addr=%p]\n", addr); } -static inline bool overlap(void *addr, u64 size, void *start, void *end) +static inline bool overlap(void *addr, unsigned long len, void *start, void *end) { - void *addr2 = (char *)addr + size; + unsigned long a1 = (unsigned long)addr; + unsigned long b1 = a1 + len; + unsigned long a2 = (unsigned long)start; + unsigned long b2 = (unsigned long)end; - return ((addr >= start && addr < end) || - (addr2 >= start && addr2 < end) || - ((addr < start) && (addr2 >= end))); + return !(b1 <= a2 || a1 >= b2); } -static void check_for_illegal_area(struct device *dev, void *addr, u64 size) +static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len) { - if (overlap(addr, size, _text, _etext) || - overlap(addr, size, __start_rodata, __end_rodata)) - err_printk(dev, NULL, "DMA-API: device driver maps " - "memory from kernel text or rodata " - "[addr=%p] [size=%llu]\n", addr, size); + if (overlap(addr, len, _text, _etext) || + overlap(addr, len, __start_rodata, __end_rodata)) + err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len); } static void check_sync(struct device *dev, @@ -904,6 +914,9 @@ static void check_sync(struct device *dev, ref->size); } + if (entry->direction == DMA_BIDIRECTIONAL) + goto out; + if (ref->direction != entry->direction) { err_printk(dev, entry, "DMA-API: device driver syncs " "DMA memory with different direction " @@ -914,9 +927,6 @@ static void check_sync(struct device *dev, dir2name[ref->direction]); } - if (entry->direction == DMA_BIDIRECTIONAL) - goto out; - if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) && !(ref->direction == DMA_TO_DEVICE)) err_printk(dev, entry, "DMA-API: device driver syncs " @@ -939,7 +949,6 @@ static void check_sync(struct device *dev, out: put_hash_bucket(bucket, &flags); - } void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, @@ -969,7 +978,8 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, entry->type = dma_debug_single; if (!PageHighMem(page)) { - void *addr = ((char *)page_address(page)) + offset; + void *addr = page_address(page) + offset; + check_for_stack(dev, addr); check_for_illegal_area(dev, addr, size); } diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 833139ce1e22..b335acb43be2 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -21,22 +21,16 @@ #include <linux/list.h> #include <linux/sysctl.h> #include <linux/ctype.h> +#include <linux/string.h> #include <linux/uaccess.h> #include <linux/dynamic_debug.h> #include <linux/debugfs.h> +#include <linux/slab.h> +#include <linux/jump_label.h> extern struct _ddebug __start___verbose[]; extern struct _ddebug __stop___verbose[]; -/* dynamic_debug_enabled, and dynamic_debug_enabled2 are bitmasks in which - * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They - * use independent hash functions, to reduce the chance of false positives. - */ -long long dynamic_debug_enabled; -EXPORT_SYMBOL_GPL(dynamic_debug_enabled); -long long dynamic_debug_enabled2; -EXPORT_SYMBOL_GPL(dynamic_debug_enabled2); - struct ddebug_table { struct list_head link; char *mod_name; @@ -86,26 +80,6 @@ static char *ddebug_describe_flags(struct _ddebug *dp, char *buf, } /* - * must be called with ddebug_lock held - */ - -static int disabled_hash(char hash, bool first_table) -{ - struct ddebug_table *dt; - char table_hash_value; - - list_for_each_entry(dt, &ddebug_tables, link) { - if (first_table) - table_hash_value = dt->ddebugs->primary_hash; - else - table_hash_value = dt->ddebugs->secondary_hash; - if (dt->num_enabled && (hash == table_hash_value)) - return 0; - } - return 1; -} - -/* * Search the tables for _ddebug's which match the given * `query' and apply the `flags' and `mask' to them. Tells * the user which ddebug's were changed, or whether none @@ -164,22 +138,13 @@ static void ddebug_change(const struct ddebug_query *query, if (!newflags) dt->num_enabled--; - else if (!dp-flags) + else if (!dp->flags) dt->num_enabled++; dp->flags = newflags; - if (newflags) { - dynamic_debug_enabled |= - (1LL << dp->primary_hash); - dynamic_debug_enabled2 |= - (1LL << dp->secondary_hash); - } else { - if (disabled_hash(dp->primary_hash, true)) - dynamic_debug_enabled &= - ~(1LL << dp->primary_hash); - if (disabled_hash(dp->secondary_hash, false)) - dynamic_debug_enabled2 &= - ~(1LL << dp->secondary_hash); - } + if (newflags) + dp->enabled = 1; + else + dp->enabled = 0; if (verbose) printk(KERN_INFO "ddebug: changed %s:%d [%s]%s %s\n", @@ -209,8 +174,7 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) char *end; /* Skip leading whitespace */ - while (*buf && isspace(*buf)) - buf++; + buf = skip_spaces(buf); if (!*buf) break; /* oh, it was trailing whitespace */ @@ -428,6 +392,40 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp, return 0; } +static int ddebug_exec_query(char *query_string) +{ + unsigned int flags = 0, mask = 0; + struct ddebug_query query; +#define MAXWORDS 9 + int nwords; + char *words[MAXWORDS]; + + nwords = ddebug_tokenize(query_string, words, MAXWORDS); + if (nwords <= 0) + return -EINVAL; + if (ddebug_parse_query(words, nwords-1, &query)) + return -EINVAL; + if (ddebug_parse_flags(words[nwords-1], &flags, &mask)) + return -EINVAL; + + /* actually go and implement the change */ + ddebug_change(&query, flags, mask); + return 0; +} + +static __initdata char ddebug_setup_string[1024]; +static __init int ddebug_setup_query(char *str) +{ + if (strlen(str) >= 1024) { + pr_warning("ddebug boot param string too large\n"); + return 0; + } + strcpy(ddebug_setup_string, str); + return 1; +} + +__setup("ddebug_query=", ddebug_setup_query); + /* * File_ops->write method for <debugfs>/dynamic_debug/conrol. Gathers the * command text from userspace, parses and executes it. @@ -435,12 +433,8 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp, static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp) { - unsigned int flags = 0, mask = 0; - struct ddebug_query query; -#define MAXWORDS 9 - int nwords; - char *words[MAXWORDS]; char tmpbuf[256]; + int ret; if (len == 0) return 0; @@ -454,16 +448,9 @@ static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf, printk(KERN_INFO "%s: read %d bytes from userspace\n", __func__, (int)len); - nwords = ddebug_tokenize(tmpbuf, words, MAXWORDS); - if (nwords < 0) - return -EINVAL; - if (ddebug_parse_query(words, nwords-1, &query)) - return -EINVAL; - if (ddebug_parse_flags(words[nwords-1], &flags, &mask)) - return -EINVAL; - - /* actually go and implement the change */ - ddebug_change(&query, flags, mask); + ret = ddebug_exec_query(tmpbuf); + if (ret) + return ret; *offp += len; return len; @@ -691,7 +678,7 @@ static void ddebug_table_free(struct ddebug_table *dt) * Called in response to a module being unloaded. Removes * any ddebug_table's which point at the module. */ -int ddebug_remove_module(char *mod_name) +int ddebug_remove_module(const char *mod_name) { struct ddebug_table *dt, *nextdt; int ret = -ENOENT; @@ -724,13 +711,14 @@ static void ddebug_remove_all_tables(void) mutex_unlock(&ddebug_lock); } -static int __init dynamic_debug_init(void) +static __initdata int ddebug_init_success; + +static int __init dynamic_debug_init_debugfs(void) { struct dentry *dir, *file; - struct _ddebug *iter, *iter_start; - const char *modname = NULL; - int ret = 0; - int n = 0; + + if (!ddebug_init_success) + return -ENODEV; dir = debugfs_create_dir("dynamic_debug", NULL); if (!dir) @@ -741,6 +729,16 @@ static int __init dynamic_debug_init(void) debugfs_remove(dir); return -ENOMEM; } + return 0; +} + +static int __init dynamic_debug_init(void) +{ + struct _ddebug *iter, *iter_start; + const char *modname = NULL; + int ret = 0; + int n = 0; + if (__start___verbose != __stop___verbose) { iter = __start___verbose; modname = iter->modname; @@ -758,12 +756,26 @@ static int __init dynamic_debug_init(void) } ret = ddebug_add_module(iter_start, n, modname); } + + /* ddebug_query boot param got passed -> set it up */ + if (ddebug_setup_string[0] != '\0') { + ret = ddebug_exec_query(ddebug_setup_string); + if (ret) + pr_warning("Invalid ddebug boot param %s", + ddebug_setup_string); + else + pr_info("ddebug initialized with string %s", + ddebug_setup_string); + } + out_free: - if (ret) { + if (ret) ddebug_remove_all_tables(); - debugfs_remove(dir); - debugfs_remove(file); - } + else + ddebug_init_success = 1; return 0; } -module_init(dynamic_debug_init); +/* Allow early initialization for boot messages via boot param */ +arch_initcall(dynamic_debug_init); +/* Debugfs setup must be done later */ +module_init(dynamic_debug_init_debugfs); diff --git a/lib/fault-inject.c b/lib/fault-inject.c index f97af55bdd96..7e65af70635e 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -1,6 +1,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/random.h> +#include <linux/sched.h> #include <linux/stat.h> #include <linux/types.h> #include <linux/fs.h> diff --git a/lib/flex_array.c b/lib/flex_array.c new file mode 100644 index 000000000000..77a6fea7481e --- /dev/null +++ b/lib/flex_array.c @@ -0,0 +1,350 @@ +/* + * Flexible array managed in PAGE_SIZE parts + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2009 + * + * Author: Dave Hansen <dave@linux.vnet.ibm.com> + */ + +#include <linux/flex_array.h> +#include <linux/slab.h> +#include <linux/stddef.h> + +struct flex_array_part { + char elements[FLEX_ARRAY_PART_SIZE]; +}; + +/* + * If a user requests an allocation which is small + * enough, we may simply use the space in the + * flex_array->parts[] array to store the user + * data. + */ +static inline int elements_fit_in_base(struct flex_array *fa) +{ + int data_size = fa->element_size * fa->total_nr_elements; + if (data_size <= FLEX_ARRAY_BASE_BYTES_LEFT) + return 1; + return 0; +} + +/** + * flex_array_alloc - allocate a new flexible array + * @element_size: the size of individual elements in the array + * @total: total number of elements that this should hold + * @flags: page allocation flags to use for base array + * + * Note: all locking must be provided by the caller. + * + * @total is used to size internal structures. If the user ever + * accesses any array indexes >=@total, it will produce errors. + * + * The maximum number of elements is defined as: the number of + * elements that can be stored in a page times the number of + * page pointers that we can fit in the base structure or (using + * integer math): + * + * (PAGE_SIZE/element_size) * (PAGE_SIZE-8)/sizeof(void *) + * + * Here's a table showing example capacities. Note that the maximum + * index that the get/put() functions is just nr_objects-1. This + * basically means that you get 4MB of storage on 32-bit and 2MB on + * 64-bit. + * + * + * Element size | Objects | Objects | + * PAGE_SIZE=4k | 32-bit | 64-bit | + * ---------------------------------| + * 1 bytes | 4186112 | 2093056 | + * 2 bytes | 2093056 | 1046528 | + * 3 bytes | 1395030 | 697515 | + * 4 bytes | 1046528 | 523264 | + * 32 bytes | 130816 | 65408 | + * 33 bytes | 126728 | 63364 | + * 2048 bytes | 2044 | 1022 | + * 2049 bytes | 1022 | 511 | + * void * | 1046528 | 261632 | + * + * Since 64-bit pointers are twice the size, we lose half the + * capacity in the base structure. Also note that no effort is made + * to efficiently pack objects across page boundaries. + */ +struct flex_array *flex_array_alloc(int element_size, unsigned int total, + gfp_t flags) +{ + struct flex_array *ret; + int max_size = FLEX_ARRAY_NR_BASE_PTRS * + FLEX_ARRAY_ELEMENTS_PER_PART(element_size); + + /* max_size will end up 0 if element_size > PAGE_SIZE */ + if (total > max_size) + return NULL; + ret = kzalloc(sizeof(struct flex_array), flags); + if (!ret) + return NULL; + ret->element_size = element_size; + ret->total_nr_elements = total; + if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO)) + memset(&ret->parts[0], FLEX_ARRAY_FREE, + FLEX_ARRAY_BASE_BYTES_LEFT); + return ret; +} + +static int fa_element_to_part_nr(struct flex_array *fa, + unsigned int element_nr) +{ + return element_nr / FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size); +} + +/** + * flex_array_free_parts - just free the second-level pages + * @fa: the flex array from which to free parts + * + * This is to be used in cases where the base 'struct flex_array' + * has been statically allocated and should not be free. + */ +void flex_array_free_parts(struct flex_array *fa) +{ + int part_nr; + + if (elements_fit_in_base(fa)) + return; + for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) + kfree(fa->parts[part_nr]); +} + +void flex_array_free(struct flex_array *fa) +{ + flex_array_free_parts(fa); + kfree(fa); +} + +static unsigned int index_inside_part(struct flex_array *fa, + unsigned int element_nr) +{ + unsigned int part_offset; + + part_offset = element_nr % + FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size); + return part_offset * fa->element_size; +} + +static struct flex_array_part * +__fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags) +{ + struct flex_array_part *part = fa->parts[part_nr]; + if (!part) { + part = kmalloc(sizeof(struct flex_array_part), flags); + if (!part) + return NULL; + if (!(flags & __GFP_ZERO)) + memset(part, FLEX_ARRAY_FREE, + sizeof(struct flex_array_part)); + fa->parts[part_nr] = part; + } + return part; +} + +/** + * flex_array_put - copy data into the array at @element_nr + * @fa: the flex array to copy data into + * @element_nr: index of the position in which to insert + * the new element. + * @src: address of data to copy into the array + * @flags: page allocation flags to use for array expansion + * + * + * Note that this *copies* the contents of @src into + * the array. If you are trying to store an array of + * pointers, make sure to pass in &ptr instead of ptr. + * You may instead wish to use the flex_array_put_ptr() + * helper function. + * + * Locking must be provided by the caller. + */ +int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, + gfp_t flags) +{ + int part_nr = fa_element_to_part_nr(fa, element_nr); + struct flex_array_part *part; + void *dst; + + if (element_nr >= fa->total_nr_elements) + return -ENOSPC; + if (elements_fit_in_base(fa)) + part = (struct flex_array_part *)&fa->parts[0]; + else { + part = __fa_get_part(fa, part_nr, flags); + if (!part) + return -ENOMEM; + } + dst = &part->elements[index_inside_part(fa, element_nr)]; + memcpy(dst, src, fa->element_size); + return 0; +} + +/** + * flex_array_clear - clear element in array at @element_nr + * @fa: the flex array of the element. + * @element_nr: index of the position to clear. + * + * Locking must be provided by the caller. + */ +int flex_array_clear(struct flex_array *fa, unsigned int element_nr) +{ + int part_nr = fa_element_to_part_nr(fa, element_nr); + struct flex_array_part *part; + void *dst; + + if (element_nr >= fa->total_nr_elements) + return -ENOSPC; + if (elements_fit_in_base(fa)) + part = (struct flex_array_part *)&fa->parts[0]; + else { + part = fa->parts[part_nr]; + if (!part) + return -EINVAL; + } + dst = &part->elements[index_inside_part(fa, element_nr)]; + memset(dst, FLEX_ARRAY_FREE, fa->element_size); + return 0; +} + +/** + * flex_array_prealloc - guarantee that array space exists + * @fa: the flex array for which to preallocate parts + * @start: index of first array element for which space is allocated + * @end: index of last (inclusive) element for which space is allocated + * @flags: page allocation flags + * + * This will guarantee that no future calls to flex_array_put() + * will allocate memory. It can be used if you are expecting to + * be holding a lock or in some atomic context while writing + * data into the array. + * + * Locking must be provided by the caller. + */ +int flex_array_prealloc(struct flex_array *fa, unsigned int start, + unsigned int end, gfp_t flags) +{ + int start_part; + int end_part; + int part_nr; + struct flex_array_part *part; + + if (start >= fa->total_nr_elements || end >= fa->total_nr_elements) + return -ENOSPC; + if (elements_fit_in_base(fa)) + return 0; + start_part = fa_element_to_part_nr(fa, start); + end_part = fa_element_to_part_nr(fa, end); + for (part_nr = start_part; part_nr <= end_part; part_nr++) { + part = __fa_get_part(fa, part_nr, flags); + if (!part) + return -ENOMEM; + } + return 0; +} + +/** + * flex_array_get - pull data back out of the array + * @fa: the flex array from which to extract data + * @element_nr: index of the element to fetch from the array + * + * Returns a pointer to the data at index @element_nr. Note + * that this is a copy of the data that was passed in. If you + * are using this to store pointers, you'll get back &ptr. You + * may instead wish to use the flex_array_get_ptr helper. + * + * Locking must be provided by the caller. + */ +void *flex_array_get(struct flex_array *fa, unsigned int element_nr) +{ + int part_nr = fa_element_to_part_nr(fa, element_nr); + struct flex_array_part *part; + + if (element_nr >= fa->total_nr_elements) + return NULL; + if (elements_fit_in_base(fa)) + part = (struct flex_array_part *)&fa->parts[0]; + else { + part = fa->parts[part_nr]; + if (!part) + return NULL; + } + return &part->elements[index_inside_part(fa, element_nr)]; +} + +/** + * flex_array_get_ptr - pull a ptr back out of the array + * @fa: the flex array from which to extract data + * @element_nr: index of the element to fetch from the array + * + * Returns the pointer placed in the flex array at element_nr using + * flex_array_put_ptr(). This function should not be called if the + * element in question was not set using the _put_ptr() helper. + */ +void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr) +{ + void **tmp; + + tmp = flex_array_get(fa, element_nr); + if (!tmp) + return NULL; + + return *tmp; +} + +static int part_is_free(struct flex_array_part *part) +{ + int i; + + for (i = 0; i < sizeof(struct flex_array_part); i++) + if (part->elements[i] != FLEX_ARRAY_FREE) + return 0; + return 1; +} + +/** + * flex_array_shrink - free unused second-level pages + * @fa: the flex array to shrink + * + * Frees all second-level pages that consist solely of unused + * elements. Returns the number of pages freed. + * + * Locking must be provided by the caller. + */ +int flex_array_shrink(struct flex_array *fa) +{ + struct flex_array_part *part; + int part_nr; + int ret = 0; + + if (elements_fit_in_base(fa)) + return ret; + for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) { + part = fa->parts[part_nr]; + if (!part) + continue; + if (part_is_free(part)) { + fa->parts[part_nr] = NULL; + kfree(part); + ret++; + } + } + return ret; +} diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c index bea5d97df991..85d0e412a04f 100644 --- a/lib/gen_crc32table.c +++ b/lib/gen_crc32table.c @@ -7,8 +7,8 @@ #define LE_TABLE_SIZE (1 << CRC_LE_BITS) #define BE_TABLE_SIZE (1 << CRC_BE_BITS) -static uint32_t crc32table_le[LE_TABLE_SIZE]; -static uint32_t crc32table_be[BE_TABLE_SIZE]; +static uint32_t crc32table_le[4][LE_TABLE_SIZE]; +static uint32_t crc32table_be[4][BE_TABLE_SIZE]; /** * crc32init_le() - allocate and initialize LE table data @@ -22,12 +22,19 @@ static void crc32init_le(void) unsigned i, j; uint32_t crc = 1; - crc32table_le[0] = 0; + crc32table_le[0][0] = 0; for (i = 1 << (CRC_LE_BITS - 1); i; i >>= 1) { crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); for (j = 0; j < LE_TABLE_SIZE; j += 2 * i) - crc32table_le[i + j] = crc ^ crc32table_le[j]; + crc32table_le[0][i + j] = crc ^ crc32table_le[0][j]; + } + for (i = 0; i < LE_TABLE_SIZE; i++) { + crc = crc32table_le[0][i]; + for (j = 1; j < 4; j++) { + crc = crc32table_le[0][crc & 0xff] ^ (crc >> 8); + crc32table_le[j][i] = crc; + } } } @@ -39,25 +46,35 @@ static void crc32init_be(void) unsigned i, j; uint32_t crc = 0x80000000; - crc32table_be[0] = 0; + crc32table_be[0][0] = 0; for (i = 1; i < BE_TABLE_SIZE; i <<= 1) { crc = (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 0); for (j = 0; j < i; j++) - crc32table_be[i + j] = crc ^ crc32table_be[j]; + crc32table_be[0][i + j] = crc ^ crc32table_be[0][j]; + } + for (i = 0; i < BE_TABLE_SIZE; i++) { + crc = crc32table_be[0][i]; + for (j = 1; j < 4; j++) { + crc = crc32table_be[0][(crc >> 24) & 0xff] ^ (crc << 8); + crc32table_be[j][i] = crc; + } } } -static void output_table(uint32_t table[], int len, char *trans) +static void output_table(uint32_t table[4][256], int len, char *trans) { - int i; + int i, j; - for (i = 0; i < len - 1; i++) { - if (i % ENTRIES_PER_LINE == 0) - printf("\n"); - printf("%s(0x%8.8xL), ", trans, table[i]); + for (j = 0 ; j < 4; j++) { + printf("{"); + for (i = 0; i < len - 1; i++) { + if (i % ENTRIES_PER_LINE == 0) + printf("\n"); + printf("%s(0x%8.8xL), ", trans, table[j][i]); + } + printf("%s(0x%8.8xL)},\n", trans, table[j][len - 1]); } - printf("%s(0x%8.8xL)\n", trans, table[len - 1]); } int main(int argc, char** argv) @@ -66,14 +83,14 @@ int main(int argc, char** argv) if (CRC_LE_BITS > 1) { crc32init_le(); - printf("static const u32 crc32table_le[] = {"); + printf("static const u32 crc32table_le[4][256] = {"); output_table(crc32table_le, LE_TABLE_SIZE, "tole"); printf("};\n"); } if (CRC_BE_BITS > 1) { crc32init_be(); - printf("static const u32 crc32table_be[] = {"); + printf("static const u32 crc32table_be[4][256] = {"); output_table(crc32table_be, BE_TABLE_SIZE, "tobe"); printf("};\n"); } diff --git a/lib/genalloc.c b/lib/genalloc.c index eed2bdb865e7..1923f1490e72 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -10,7 +10,9 @@ * Version 2. See the file COPYING for more details. */ +#include <linux/slab.h> #include <linux/module.h> +#include <linux/bitmap.h> #include <linux/genalloc.h> @@ -114,7 +116,7 @@ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) struct gen_pool_chunk *chunk; unsigned long addr, flags; int order = pool->min_alloc_order; - int nbits, bit, start_bit, end_bit; + int nbits, start_bit, end_bit; if (size == 0) return 0; @@ -126,32 +128,21 @@ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); end_bit = (chunk->end_addr - chunk->start_addr) >> order; - end_bit -= nbits + 1; spin_lock_irqsave(&chunk->lock, flags); - bit = -1; - while (bit + 1 < end_bit) { - bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); - if (bit >= end_bit) - break; - - start_bit = bit; - if (nbits > 1) { - bit = find_next_bit(chunk->bits, bit + nbits, - bit + 1); - if (bit - start_bit < nbits) - continue; - } - - addr = chunk->start_addr + - ((unsigned long)start_bit << order); - while (nbits--) - __set_bit(start_bit++, chunk->bits); + start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 0, + nbits, 0); + if (start_bit >= end_bit) { spin_unlock_irqrestore(&chunk->lock, flags); - read_unlock(&pool->lock); - return addr; + continue; } + + addr = chunk->start_addr + ((unsigned long)start_bit << order); + + bitmap_set(chunk->bits, start_bit, nbits); spin_unlock_irqrestore(&chunk->lock, flags); + read_unlock(&pool->lock); + return addr; } read_unlock(&pool->lock); return 0; diff --git a/lib/hexdump.c b/lib/hexdump.c index 39af2560f765..b66b2bd67952 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -16,6 +16,40 @@ const char hex_asc[] = "0123456789abcdef"; EXPORT_SYMBOL(hex_asc); /** + * hex_to_bin - convert a hex digit to its real value + * @ch: ascii character represents hex digit + * + * hex_to_bin() converts one hex digit to its actual value or -1 in case of bad + * input. + */ +int hex_to_bin(char ch) +{ + if ((ch >= '0') && (ch <= '9')) + return ch - '0'; + ch = tolower(ch); + if ((ch >= 'a') && (ch <= 'f')) + return ch - 'a' + 10; + return -1; +} +EXPORT_SYMBOL(hex_to_bin); + +/** + * hex2bin - convert an ascii hexadecimal string to its binary representation + * @dst: binary result + * @src: ascii hexadecimal string + * @count: result length + */ +void hex2bin(u8 *dst, const char *src, size_t count) +{ + while (count--) { + *dst = hex_to_bin(*src++) << 4; + *dst += hex_to_bin(*src++); + dst++; + } +} +EXPORT_SYMBOL(hex2bin); + +/** * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory * @buf: data blob to dump * @len: number of bytes in the @buf @@ -34,7 +68,7 @@ EXPORT_SYMBOL(hex_asc); * * E.g.: * hex_dump_to_buffer(frame->data, frame->len, 16, 1, - * linebuf, sizeof(linebuf), 1); + * linebuf, sizeof(linebuf), true); * * example output buffer: * 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO @@ -65,8 +99,8 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize, for (j = 0; j < ngroups; j++) lx += scnprintf(linebuf + lx, linebuflen - lx, - "%s%16.16llx", j ? " " : "", - (unsigned long long)*(ptr8 + j)); + "%s%16.16llx", j ? " " : "", + (unsigned long long)*(ptr8 + j)); ascii_column = 17 * ngroups + 2; break; } @@ -77,7 +111,7 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize, for (j = 0; j < ngroups; j++) lx += scnprintf(linebuf + lx, linebuflen - lx, - "%s%8.8x", j ? " " : "", *(ptr4 + j)); + "%s%8.8x", j ? " " : "", *(ptr4 + j)); ascii_column = 9 * ngroups + 2; break; } @@ -88,7 +122,7 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize, for (j = 0; j < ngroups; j++) lx += scnprintf(linebuf + lx, linebuflen - lx, - "%s%4.4x", j ? " " : "", *(ptr2 + j)); + "%s%4.4x", j ? " " : "", *(ptr2 + j)); ascii_column = 5 * ngroups + 2; break; } @@ -111,9 +145,10 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize, while (lx < (linebuflen - 1) && lx < (ascii_column - 1)) linebuf[lx++] = ' '; - for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) - linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j] - : '.'; + for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) { + ch = ptr[j]; + linebuf[lx++] = (isascii(ch) && isprint(ch)) ? ch : '.'; + } nil: linebuf[lx++] = '\0'; } @@ -143,7 +178,7 @@ EXPORT_SYMBOL(hex_dump_to_buffer); * * E.g.: * print_hex_dump(KERN_DEBUG, "raw data: ", DUMP_PREFIX_ADDRESS, - * 16, 1, frame->data, frame->len, 1); + * 16, 1, frame->data, frame->len, true); * * Example output using %DUMP_PREFIX_OFFSET and 1-byte mode: * 0009ab42: 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO @@ -151,12 +186,12 @@ EXPORT_SYMBOL(hex_dump_to_buffer); * ffffffff88089af0: 73727170 77767574 7b7a7978 7f7e7d7c pqrstuvwxyz{|}~. */ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, - int rowsize, int groupsize, - const void *buf, size_t len, bool ascii) + int rowsize, int groupsize, + const void *buf, size_t len, bool ascii) { const u8 *ptr = buf; int i, linelen, remaining = len; - unsigned char linebuf[200]; + unsigned char linebuf[32 * 3 + 2 + 32 + 1]; if (rowsize != 16 && rowsize != 32) rowsize = 16; @@ -164,13 +199,14 @@ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, for (i = 0; i < len; i += rowsize) { linelen = min(remaining, rowsize); remaining -= rowsize; + hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, - linebuf, sizeof(linebuf), ascii); + linebuf, sizeof(linebuf), ascii); switch (prefix_type) { case DUMP_PREFIX_ADDRESS: - printk("%s%s%*p: %s\n", level, prefix_str, - (int)(2 * sizeof(void *)), ptr + i, linebuf); + printk("%s%s%p: %s\n", + level, prefix_str, ptr + i, linebuf); break; case DUMP_PREFIX_OFFSET: printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf); @@ -196,9 +232,9 @@ EXPORT_SYMBOL(print_hex_dump); * rowsize of 16, groupsize of 1, and ASCII output included. */ void print_hex_dump_bytes(const char *prefix_str, int prefix_type, - const void *buf, size_t len) + const void *buf, size_t len) { print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, 16, 1, - buf, len, 1); + buf, len, true); } EXPORT_SYMBOL(print_hex_dump_bytes); diff --git a/lib/hweight.c b/lib/hweight.c index 389424ecb129..3c79d50814cf 100644 --- a/lib/hweight.c +++ b/lib/hweight.c @@ -9,37 +9,45 @@ * The Hamming Weight of a number is the total number of bits set in it. */ -unsigned int hweight32(unsigned int w) +unsigned int __sw_hweight32(unsigned int w) { +#ifdef ARCH_HAS_FAST_MULTIPLIER + w -= (w >> 1) & 0x55555555; + w = (w & 0x33333333) + ((w >> 2) & 0x33333333); + w = (w + (w >> 4)) & 0x0f0f0f0f; + return (w * 0x01010101) >> 24; +#else unsigned int res = w - ((w >> 1) & 0x55555555); res = (res & 0x33333333) + ((res >> 2) & 0x33333333); res = (res + (res >> 4)) & 0x0F0F0F0F; res = res + (res >> 8); return (res + (res >> 16)) & 0x000000FF; +#endif } -EXPORT_SYMBOL(hweight32); +EXPORT_SYMBOL(__sw_hweight32); -unsigned int hweight16(unsigned int w) +unsigned int __sw_hweight16(unsigned int w) { unsigned int res = w - ((w >> 1) & 0x5555); res = (res & 0x3333) + ((res >> 2) & 0x3333); res = (res + (res >> 4)) & 0x0F0F; return (res + (res >> 8)) & 0x00FF; } -EXPORT_SYMBOL(hweight16); +EXPORT_SYMBOL(__sw_hweight16); -unsigned int hweight8(unsigned int w) +unsigned int __sw_hweight8(unsigned int w) { unsigned int res = w - ((w >> 1) & 0x55); res = (res & 0x33) + ((res >> 2) & 0x33); return (res + (res >> 4)) & 0x0F; } -EXPORT_SYMBOL(hweight8); +EXPORT_SYMBOL(__sw_hweight8); -unsigned long hweight64(__u64 w) +unsigned long __sw_hweight64(__u64 w) { #if BITS_PER_LONG == 32 - return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w); + return __sw_hweight32((unsigned int)(w >> 32)) + + __sw_hweight32((unsigned int)w); #elif BITS_PER_LONG == 64 #ifdef ARCH_HAS_FAST_MULTIPLIER w -= (w >> 1) & 0x5555555555555555ul; @@ -56,4 +64,4 @@ unsigned long hweight64(__u64 w) #endif #endif } -EXPORT_SYMBOL(hweight64); +EXPORT_SYMBOL(__sw_hweight64); diff --git a/lib/idr.c b/lib/idr.c index 80ca9aca038b..e15502e8b21e 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -106,16 +106,17 @@ static void idr_mark_full(struct idr_layer **pa, int id) } /** - * idr_pre_get - reserver resources for idr allocation + * idr_pre_get - reserve resources for idr allocation * @idp: idr handle * @gfp_mask: memory allocation flags * - * This function should be called prior to locking and calling the - * idr_get_new* functions. It preallocates enough memory to satisfy - * the worst possible allocation. + * This function should be called prior to calling the idr_get_new* functions. + * It preallocates enough memory to satisfy the worst possible allocation. The + * caller should pass in GFP_KERNEL if possible. This of course requires that + * no spinning locks be held. * - * If the system is REALLY out of memory this function returns 0, - * otherwise 1. + * If the system is REALLY out of memory this function returns %0, + * otherwise %1. */ int idr_pre_get(struct idr *idp, gfp_t gfp_mask) { @@ -156,10 +157,12 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1; /* if already at the top layer, we need to grow */ - if (!(p = pa[l])) { + if (id >= 1 << (idp->layers * IDR_BITS)) { *starting_id = id; return IDR_NEED_TO_GROW; } + p = pa[l]; + BUG_ON(!p); /* If we need to go up one layer, continue the * loop; otherwise, restart from the top. @@ -281,18 +284,20 @@ static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id) /** * idr_get_new_above - allocate new idr entry above or equal to a start id * @idp: idr handle - * @ptr: pointer you want associated with the ide - * @start_id: id to start search at + * @ptr: pointer you want associated with the id + * @starting_id: id to start search at * @id: pointer to the allocated handle * * This is the allocate id function. It should be called with any * required locks. * - * If memory is required, it will return -EAGAIN, you should unlock - * and go back to the idr_pre_get() call. If the idr is full, it will - * return -ENOSPC. + * If allocation from IDR's private freelist fails, idr_get_new_above() will + * return %-EAGAIN. The caller should retry the idr_pre_get() call to refill + * IDR's preallocation and then retry the idr_get_new_above() call. + * + * If the idr is full idr_get_new_above() will return %-ENOSPC. * - * @id returns a value in the range @starting_id ... 0x7fffffff + * @id returns a value in the range @starting_id ... %0x7fffffff */ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) { @@ -313,17 +318,16 @@ EXPORT_SYMBOL(idr_get_new_above); /** * idr_get_new - allocate new idr entry * @idp: idr handle - * @ptr: pointer you want associated with the ide + * @ptr: pointer you want associated with the id * @id: pointer to the allocated handle * - * This is the allocate id function. It should be called with any - * required locks. + * If allocation from IDR's private freelist fails, idr_get_new_above() will + * return %-EAGAIN. The caller should retry the idr_pre_get() call to refill + * IDR's preallocation and then retry the idr_get_new_above() call. * - * If memory is required, it will return -EAGAIN, you should unlock - * and go back to the idr_pre_get() call. If the idr is full, it will - * return -ENOSPC. + * If the idr is full idr_get_new_above() will return %-ENOSPC. * - * @id returns a value in the range 0 ... 0x7fffffff + * @id returns a value in the range %0 ... %0x7fffffff */ int idr_get_new(struct idr *idp, void *ptr, int *id) { @@ -386,7 +390,7 @@ static void sub_remove(struct idr *idp, int shift, int id) } /** - * idr_remove - remove the given id and free it's slot + * idr_remove - remove the given id and free its slot * @idp: idr handle * @id: unique key */ @@ -435,7 +439,7 @@ EXPORT_SYMBOL(idr_remove); * function will remove all id mappings and leave all idp_layers * unused. * - * A typical clean-up sequence for objects stored in an idr tree, will + * A typical clean-up sequence for objects stored in an idr tree will * use idr_for_each() to free all objects, if necessay, then * idr_remove_all() to remove all ids, and idr_destroy() to free * up the cached idr_layers. @@ -443,6 +447,7 @@ EXPORT_SYMBOL(idr_remove); void idr_remove_all(struct idr *idp) { int n, id, max; + int bt_mask; struct idr_layer *p; struct idr_layer *pa[MAX_LEVEL]; struct idr_layer **paa = &pa[0]; @@ -460,8 +465,10 @@ void idr_remove_all(struct idr *idp) p = p->ary[(id >> n) & IDR_MASK]; } + bt_mask = id; id += 1 << n; - while (n < fls(id)) { + /* Get the highest bit that the above add changed from 0->1. */ + while (n < fls(id ^ bt_mask)) { if (p) free_layer(p); n += IDR_BITS; @@ -474,7 +481,7 @@ EXPORT_SYMBOL(idr_remove_all); /** * idr_destroy - release all cached layers within an idr tree - * idp: idr handle + * @idp: idr handle */ void idr_destroy(struct idr *idp) { @@ -502,7 +509,7 @@ void *idr_find(struct idr *idp, int id) int n; struct idr_layer *p; - p = rcu_dereference(idp->top); + p = rcu_dereference_raw(idp->top); if (!p) return NULL; n = (p->layer+1) * IDR_BITS; @@ -517,7 +524,7 @@ void *idr_find(struct idr *idp, int id) while (n > 0 && p) { n -= IDR_BITS; BUG_ON(n != p->layer*IDR_BITS); - p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]); + p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]); } return((void *)p); } @@ -537,7 +544,7 @@ EXPORT_SYMBOL(idr_find); * not allowed. * * We check the return of @fn each time. If it returns anything other - * than 0, we break out and return that value. + * than %0, we break out and return that value. * * The caller must serialize idr_for_each() vs idr_get_new() and idr_remove(). */ @@ -550,7 +557,7 @@ int idr_for_each(struct idr *idp, struct idr_layer **paa = &pa[0]; n = idp->layers * IDR_BITS; - p = rcu_dereference(idp->top); + p = rcu_dereference_raw(idp->top); max = 1 << n; id = 0; @@ -558,7 +565,7 @@ int idr_for_each(struct idr *idp, while (n > 0 && p) { n -= IDR_BITS; *paa++ = p; - p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]); + p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]); } if (p) { @@ -581,10 +588,11 @@ EXPORT_SYMBOL(idr_for_each); /** * idr_get_next - lookup next object of id to given id. * @idp: idr handle - * @id: pointer to lookup key + * @nextidp: pointer to lookup key * * Returns pointer to registered object with id, which is next number to - * given id. + * given id. After being looked up, *@nextidp will be updated for the next + * iteration. */ void *idr_get_next(struct idr *idp, int *nextidp) @@ -597,7 +605,7 @@ void *idr_get_next(struct idr *idp, int *nextidp) /* find first ent */ n = idp->layers * IDR_BITS; max = 1 << n; - p = rcu_dereference(idp->top); + p = rcu_dereference_raw(idp->top); if (!p) return NULL; @@ -605,7 +613,7 @@ void *idr_get_next(struct idr *idp, int *nextidp) while (n > 0 && p) { n -= IDR_BITS; *paa++ = p; - p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]); + p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]); } if (p) { @@ -621,7 +629,7 @@ void *idr_get_next(struct idr *idp, int *nextidp) } return NULL; } - +EXPORT_SYMBOL(idr_get_next); /** @@ -631,8 +639,8 @@ void *idr_get_next(struct idr *idp, int *nextidp) * @id: lookup key * * Replace the pointer registered with an id and return the old value. - * A -ENOENT return indicates that @id was not found. - * A -EINVAL return indicates that @id was not within valid constraints. + * A %-ENOENT return indicates that @id was not found. + * A %-EINVAL return indicates that @id was not within valid constraints. * * The caller must serialize with writers. */ @@ -690,10 +698,11 @@ void idr_init(struct idr *idp) EXPORT_SYMBOL(idr_init); -/* +/** + * DOC: IDA description * IDA - IDR based ID allocator * - * this is id allocator without id -> pointer translation. Memory + * This is id allocator without id -> pointer translation. Memory * usage is much lower than full blown idr because each id only * occupies a bit. ida uses a custom leaf node which contains * IDA_BITMAP_BITS slots. @@ -726,8 +735,8 @@ static void free_bitmap(struct ida *ida, struct ida_bitmap *bitmap) * following function. It preallocates enough memory to satisfy the * worst possible allocation. * - * If the system is REALLY out of memory this function returns 0, - * otherwise 1. + * If the system is REALLY out of memory this function returns %0, + * otherwise %1. */ int ida_pre_get(struct ida *ida, gfp_t gfp_mask) { @@ -753,17 +762,17 @@ EXPORT_SYMBOL(ida_pre_get); /** * ida_get_new_above - allocate new ID above or equal to a start id * @ida: ida handle - * @staring_id: id to start search at + * @starting_id: id to start search at * @p_id: pointer to the allocated handle * * Allocate new ID above or equal to @ida. It should be called with * any required locks. * - * If memory is required, it will return -EAGAIN, you should unlock + * If memory is required, it will return %-EAGAIN, you should unlock * and go back to the ida_pre_get() call. If the ida is full, it will - * return -ENOSPC. + * return %-ENOSPC. * - * @p_id returns a value in the range @starting_id ... 0x7fffffff. + * @p_id returns a value in the range @starting_id ... %0x7fffffff. */ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) { @@ -845,11 +854,11 @@ EXPORT_SYMBOL(ida_get_new_above); * * Allocate new ID. It should be called with any required locks. * - * If memory is required, it will return -EAGAIN, you should unlock + * If memory is required, it will return %-EAGAIN, you should unlock * and go back to the idr_pre_get() call. If the idr is full, it will - * return -ENOSPC. + * return %-ENOSPC. * - * @id returns a value in the range 0 ... 0x7fffffff. + * @id returns a value in the range %0 ... %0x7fffffff. */ int ida_get_new(struct ida *ida, int *p_id) { @@ -907,7 +916,7 @@ EXPORT_SYMBOL(ida_remove); /** * ida_destroy - release all cached layers within an ida tree - * ida: ida handle + * @ida: ida handle */ void ida_destroy(struct ida *ida) { diff --git a/lib/inflate.c b/lib/inflate.c index 1a8e8a978128..013a76193481 100644 --- a/lib/inflate.c +++ b/lib/inflate.c @@ -7,7 +7,7 @@ * Adapted for booting Linux by Hannu Savolainen 1993 * based on gzip-1.0.3 * - * Nicolas Pitre <nico@cam.org>, 1999/04/14 : + * Nicolas Pitre <nico@fluxnic.net>, 1999/04/14 : * Little mods for all variable to reside either into rodata or bss segments * by marking constant variables with 'const' and initializing all the others * at run-time only. This allows for the kernel uncompressor to run @@ -103,6 +103,9 @@ the two sets of lengths. */ #include <linux/compiler.h> +#ifdef NO_INFLATE_MALLOC +#include <linux/slab.h> +#endif #ifdef RCSID static char rcsid[] = "#Id: inflate.c,v 0.14 1993/06/10 13:27:04 jloup Exp #"; diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c index 75dbda03f4fb..da053313ee5c 100644 --- a/lib/iommu-helper.c +++ b/lib/iommu-helper.c @@ -3,41 +3,7 @@ */ #include <linux/module.h> -#include <linux/bitops.h> - -static unsigned long find_next_zero_area(unsigned long *map, - unsigned long size, - unsigned long start, - unsigned int nr, - unsigned long align_mask) -{ - unsigned long index, end, i; -again: - index = find_next_zero_bit(map, size, start); - - /* Align allocation */ - index = (index + align_mask) & ~align_mask; - - end = index + nr; - if (end >= size) - return -1; - for (i = index; i < end; i++) { - if (test_bit(i, map)) { - start = i+1; - goto again; - } - } - return index; -} - -void iommu_area_reserve(unsigned long *map, unsigned long i, int len) -{ - unsigned long end = i + len; - while (i < end) { - __set_bit(i, map); - i++; - } -} +#include <linux/bitmap.h> int iommu_is_span_boundary(unsigned int index, unsigned int nr, unsigned long shift, @@ -55,36 +21,20 @@ unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, unsigned long align_mask) { unsigned long index; + + /* We don't want the last of the limit */ + size -= 1; again: - index = find_next_zero_area(map, size, start, nr, align_mask); - if (index != -1) { + index = bitmap_find_next_zero_area(map, size, start, nr, align_mask); + if (index < size) { if (iommu_is_span_boundary(index, nr, shift, boundary_size)) { /* we could do more effectively */ start = index + 1; goto again; } - iommu_area_reserve(map, index, nr); + bitmap_set(map, index, nr); + return index; } - return index; + return -1; } EXPORT_SYMBOL(iommu_area_alloc); - -void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr) -{ - unsigned long end = start + nr; - - while (start < end) { - __clear_bit(start, map); - start++; - } -} -EXPORT_SYMBOL(iommu_area_free); - -unsigned long iommu_num_pages(unsigned long addr, unsigned long len, - unsigned long io_page_size) -{ - unsigned long size = (addr & (io_page_size - 1)) + len; - - return DIV_ROUND_UP(size, io_page_size); -} -EXPORT_SYMBOL(iommu_num_pages); diff --git a/lib/ioremap.c b/lib/ioremap.c index 14c6078f17a2..5730ecd3eb66 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -13,10 +13,10 @@ #include <asm/pgtable.h> static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, - unsigned long end, unsigned long phys_addr, pgprot_t prot) + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pte_t *pte; - unsigned long pfn; + u64 pfn; pfn = phys_addr >> PAGE_SHIFT; pte = pte_alloc_kernel(pmd, addr); @@ -31,7 +31,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, } static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, - unsigned long end, unsigned long phys_addr, pgprot_t prot) + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pmd_t *pmd; unsigned long next; @@ -49,7 +49,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, } static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, - unsigned long end, unsigned long phys_addr, pgprot_t prot) + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pud_t *pud; unsigned long next; @@ -67,7 +67,7 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, } int ioremap_page_range(unsigned long addr, - unsigned long end, unsigned long phys_addr, pgprot_t prot) + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pgd_t *pgd; unsigned long start; diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c index f1ed2fe76c65..bd2bea963364 100644 --- a/lib/is_single_threaded.c +++ b/lib/is_single_threaded.c @@ -12,34 +12,47 @@ #include <linux/sched.h> -/** - * is_single_threaded - Determine if a thread group is single-threaded or not - * @p: A task in the thread group in question - * - * This returns true if the thread group to which a task belongs is single - * threaded, false if it is not. +/* + * Returns true if the task does not share ->mm with another thread/process. */ -bool is_single_threaded(struct task_struct *p) +bool current_is_single_threaded(void) { - struct task_struct *g, *t; - struct mm_struct *mm = p->mm; + struct task_struct *task = current; + struct mm_struct *mm = task->mm; + struct task_struct *p, *t; + bool ret; - if (atomic_read(&p->signal->count) != 1) - goto no; + if (atomic_read(&task->signal->live) != 1) + return false; - if (atomic_read(&p->mm->mm_users) != 1) { - read_lock(&tasklist_lock); - do_each_thread(g, t) { - if (t->mm == mm && t != p) - goto no_unlock; - } while_each_thread(g, t); - read_unlock(&tasklist_lock); - } + if (atomic_read(&mm->mm_users) == 1) + return true; - return true; + ret = false; + rcu_read_lock(); + for_each_process(p) { + if (unlikely(p->flags & PF_KTHREAD)) + continue; + if (unlikely(p == task->group_leader)) + continue; + + t = p; + do { + if (unlikely(t->mm == mm)) + goto found; + if (likely(t->mm)) + break; + /* + * t->mm == NULL. Make sure next_thread/next_task + * will see other CLONE_VM tasks which might be + * forked before exiting. + */ + smp_rmb(); + } while_each_thread(p, t); + } + ret = true; +found: + rcu_read_unlock(); -no_unlock: - read_unlock(&tasklist_lock); -no: - return false; + return ret; } diff --git a/lib/kasprintf.c b/lib/kasprintf.c index c5ff1fd10030..9c4233b23783 100644 --- a/lib/kasprintf.c +++ b/lib/kasprintf.c @@ -6,6 +6,7 @@ #include <stdarg.h> #include <linux/module.h> +#include <linux/slab.h> #include <linux/types.h> #include <linux/string.h> diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c index 39f1029e3525..b135d04aa48a 100644 --- a/lib/kernel_lock.c +++ b/lib/kernel_lock.c @@ -5,10 +5,13 @@ * relegated to obsolescence, but used by various less * important (or lazy) subsystems. */ -#include <linux/smp_lock.h> #include <linux/module.h> #include <linux/kallsyms.h> #include <linux/semaphore.h> +#include <linux/smp_lock.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/bkl.h> /* * The 'big kernel lock' @@ -20,7 +23,7 @@ * * Don't use in new code. */ -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kernel_flag); +static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(kernel_flag); /* @@ -33,12 +36,12 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kernel_flag); * If it successfully gets the lock, it should increment * the preemption count like any spinlock does. * - * (This works on UP too - _raw_spin_trylock will never + * (This works on UP too - do_raw_spin_trylock will never * return false in that case) */ int __lockfunc __reacquire_kernel_lock(void) { - while (!_raw_spin_trylock(&kernel_flag)) { + while (!do_raw_spin_trylock(&kernel_flag)) { if (need_resched()) return -EAGAIN; cpu_relax(); @@ -49,27 +52,27 @@ int __lockfunc __reacquire_kernel_lock(void) void __lockfunc __release_kernel_lock(void) { - _raw_spin_unlock(&kernel_flag); + do_raw_spin_unlock(&kernel_flag); preempt_enable_no_resched(); } /* * These are the BKL spinlocks - we try to be polite about preemption. * If SMP is not on (ie UP preemption), this all goes away because the - * _raw_spin_trylock() will always succeed. + * do_raw_spin_trylock() will always succeed. */ #ifdef CONFIG_PREEMPT static inline void __lock_kernel(void) { preempt_disable(); - if (unlikely(!_raw_spin_trylock(&kernel_flag))) { + if (unlikely(!do_raw_spin_trylock(&kernel_flag))) { /* * If preemption was disabled even before this * was called, there's nothing we can be polite * about - just spin. */ if (preempt_count() > 1) { - _raw_spin_lock(&kernel_flag); + do_raw_spin_lock(&kernel_flag); return; } @@ -79,10 +82,10 @@ static inline void __lock_kernel(void) */ do { preempt_enable(); - while (spin_is_locked(&kernel_flag)) + while (raw_spin_is_locked(&kernel_flag)) cpu_relax(); preempt_disable(); - } while (!_raw_spin_trylock(&kernel_flag)); + } while (!do_raw_spin_trylock(&kernel_flag)); } } @@ -93,7 +96,7 @@ static inline void __lock_kernel(void) */ static inline void __lock_kernel(void) { - _raw_spin_lock(&kernel_flag); + do_raw_spin_lock(&kernel_flag); } #endif @@ -103,7 +106,7 @@ static inline void __unlock_kernel(void) * the BKL is not covered by lockdep, so we open-code the * unlocking sequence (and thus avoid the dep-chain ops): */ - _raw_spin_unlock(&kernel_flag); + do_raw_spin_unlock(&kernel_flag); preempt_enable(); } @@ -113,21 +116,28 @@ static inline void __unlock_kernel(void) * This cannot happen asynchronously, so we only need to * worry about other CPU's. */ -void __lockfunc lock_kernel(void) +void __lockfunc _lock_kernel(const char *func, const char *file, int line) { - int depth = current->lock_depth+1; - if (likely(!depth)) + int depth = current->lock_depth + 1; + + trace_lock_kernel(func, file, line); + + if (likely(!depth)) { + might_sleep(); __lock_kernel(); + } current->lock_depth = depth; } -void __lockfunc unlock_kernel(void) +void __lockfunc _unlock_kernel(const char *func, const char *file, int line) { BUG_ON(current->lock_depth < 0); if (likely(--current->lock_depth < 0)) __unlock_kernel(); + + trace_unlock_kernel(func, file, line); } -EXPORT_SYMBOL(lock_kernel); -EXPORT_SYMBOL(unlock_kernel); +EXPORT_SYMBOL(_lock_kernel); +EXPORT_SYMBOL(_unlock_kernel); diff --git a/lib/kobject.c b/lib/kobject.c index b512b746d2af..82dc34c095c2 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -700,7 +700,7 @@ static ssize_t kobj_attr_store(struct kobject *kobj, struct attribute *attr, return ret; } -struct sysfs_ops kobj_sysfs_ops = { +const struct sysfs_ops kobj_sysfs_ops = { .show = kobj_attr_show, .store = kobj_attr_store, }; @@ -746,17 +746,56 @@ void kset_unregister(struct kset *k) */ struct kobject *kset_find_obj(struct kset *kset, const char *name) { + return kset_find_obj_hinted(kset, name, NULL); +} + +/** + * kset_find_obj_hinted - search for object in kset given a predecessor hint. + * @kset: kset we're looking in. + * @name: object's name. + * @hint: hint to possible object's predecessor. + * + * Check the hint's next object and if it is a match return it directly, + * otherwise, fall back to the behavior of kset_find_obj(). Either way + * a reference for the returned object is held and the reference on the + * hinted object is released. + */ +struct kobject *kset_find_obj_hinted(struct kset *kset, const char *name, + struct kobject *hint) +{ struct kobject *k; struct kobject *ret = NULL; spin_lock(&kset->list_lock); + + if (!hint) + goto slow_search; + + /* end of list detection */ + if (hint->entry.next == kset->list.next) + goto slow_search; + + k = container_of(hint->entry.next, struct kobject, entry); + if (!kobject_name(k) || strcmp(kobject_name(k), name)) + goto slow_search; + + ret = kobject_get(k); + goto unlock_exit; + +slow_search: list_for_each_entry(k, &kset->list, entry) { if (kobject_name(k) && !strcmp(kobject_name(k), name)) { ret = kobject_get(k); break; } } + +unlock_exit: spin_unlock(&kset->list_lock); + + if (hint) + kobject_put(hint); + return ret; } @@ -789,7 +828,7 @@ static struct kobj_type kset_ktype = { * If the kset was not able to be created, NULL will be returned. */ static struct kset *kset_create(const char *name, - struct kset_uevent_ops *uevent_ops, + const struct kset_uevent_ops *uevent_ops, struct kobject *parent_kobj) { struct kset *kset; @@ -832,7 +871,7 @@ static struct kset *kset_create(const char *name, * If the kset was not able to be created, NULL will be returned. */ struct kset *kset_create_and_add(const char *name, - struct kset_uevent_ops *uevent_ops, + const struct kset_uevent_ops *uevent_ops, struct kobject *parent_kobj) { struct kset *kset; @@ -850,6 +889,121 @@ struct kset *kset_create_and_add(const char *name, } EXPORT_SYMBOL_GPL(kset_create_and_add); + +static DEFINE_SPINLOCK(kobj_ns_type_lock); +static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES]; + +int kobj_ns_type_register(const struct kobj_ns_type_operations *ops) +{ + enum kobj_ns_type type = ops->type; + int error; + + spin_lock(&kobj_ns_type_lock); + + error = -EINVAL; + if (type >= KOBJ_NS_TYPES) + goto out; + + error = -EINVAL; + if (type <= KOBJ_NS_TYPE_NONE) + goto out; + + error = -EBUSY; + if (kobj_ns_ops_tbl[type]) + goto out; + + error = 0; + kobj_ns_ops_tbl[type] = ops; + +out: + spin_unlock(&kobj_ns_type_lock); + return error; +} + +int kobj_ns_type_registered(enum kobj_ns_type type) +{ + int registered = 0; + + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES)) + registered = kobj_ns_ops_tbl[type] != NULL; + spin_unlock(&kobj_ns_type_lock); + + return registered; +} + +const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent) +{ + const struct kobj_ns_type_operations *ops = NULL; + + if (parent && parent->ktype->child_ns_type) + ops = parent->ktype->child_ns_type(parent); + + return ops; +} + +const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj) +{ + return kobj_child_ns_ops(kobj->parent); +} + + +const void *kobj_ns_current(enum kobj_ns_type type) +{ + const void *ns = NULL; + + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && + kobj_ns_ops_tbl[type]) + ns = kobj_ns_ops_tbl[type]->current_ns(); + spin_unlock(&kobj_ns_type_lock); + + return ns; +} + +const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk) +{ + const void *ns = NULL; + + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && + kobj_ns_ops_tbl[type]) + ns = kobj_ns_ops_tbl[type]->netlink_ns(sk); + spin_unlock(&kobj_ns_type_lock); + + return ns; +} + +const void *kobj_ns_initial(enum kobj_ns_type type) +{ + const void *ns = NULL; + + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && + kobj_ns_ops_tbl[type]) + ns = kobj_ns_ops_tbl[type]->initial_ns(); + spin_unlock(&kobj_ns_type_lock); + + return ns; +} + +/* + * kobj_ns_exit - invalidate a namespace tag + * + * @type: the namespace type (i.e. KOBJ_NS_TYPE_NET) + * @ns: the actual namespace being invalidated + * + * This is called when a tag is no longer valid. For instance, + * when a network namespace exits, it uses this helper to + * make sure no sb's sysfs_info points to the now-invalidated + * netns. + */ +void kobj_ns_exit(enum kobj_ns_type type, const void *ns) +{ + sysfs_exit_ns(type, ns); +} + + EXPORT_SYMBOL(kobject_get); EXPORT_SYMBOL(kobject_put); EXPORT_SYMBOL(kobject_del); diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 920a3ca6e259..70af0a7f97c0 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -18,18 +18,25 @@ #include <linux/string.h> #include <linux/kobject.h> #include <linux/module.h> - +#include <linux/slab.h> +#include <linux/user_namespace.h> #include <linux/socket.h> #include <linux/skbuff.h> #include <linux/netlink.h> #include <net/sock.h> +#include <net/net_namespace.h> u64 uevent_seqnum; char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH; static DEFINE_SPINLOCK(sequence_lock); -#if defined(CONFIG_NET) -static struct sock *uevent_sock; +#ifdef CONFIG_NET +struct uevent_sock { + struct list_head list; + struct sock *sk; +}; +static LIST_HEAD(uevent_sock_list); +static DEFINE_MUTEX(uevent_sock_mutex); #endif /* the strings here must match the enum in include/linux/kobject.h */ @@ -76,6 +83,39 @@ out: return ret; } +#ifdef CONFIG_NET +static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data) +{ + struct kobject *kobj = data; + const struct kobj_ns_type_operations *ops; + + ops = kobj_ns_ops(kobj); + if (ops) { + const void *sock_ns, *ns; + ns = kobj->ktype->namespace(kobj); + sock_ns = ops->netlink_ns(dsk); + return sock_ns != ns; + } + + return 0; +} +#endif + +static int kobj_usermode_filter(struct kobject *kobj) +{ + const struct kobj_ns_type_operations *ops; + + ops = kobj_ns_ops(kobj); + if (ops) { + const void *init_ns, *ns; + ns = kobj->ktype->namespace(kobj); + init_ns = ops->initial_ns(); + return ns != init_ns; + } + + return 0; +} + /** * kobject_uevent_env - send an uevent with environmental data * @@ -83,7 +123,7 @@ out: * @kobj: struct kobject that the action is happening to * @envp_ext: pointer to environmental data * - * Returns 0 if kobject_uevent() is completed with success or the + * Returns 0 if kobject_uevent_env() is completed with success or the * corresponding error when it fails. */ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, @@ -95,10 +135,13 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, const char *subsystem; struct kobject *top_kobj; struct kset *kset; - struct kset_uevent_ops *uevent_ops; + const struct kset_uevent_ops *uevent_ops; u64 seq; int i = 0; int retval = 0; +#ifdef CONFIG_NET + struct uevent_sock *ue_sk; +#endif pr_debug("kobject: '%s' (%p): %s\n", kobject_name(kobj), kobj, __func__); @@ -210,7 +253,9 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, #if defined(CONFIG_NET) /* send netlink message */ - if (uevent_sock) { + mutex_lock(&uevent_sock_mutex); + list_for_each_entry(ue_sk, &uevent_sock_list, list) { + struct sock *uevent_sock = ue_sk->sk; struct sk_buff *skb; size_t len; @@ -232,18 +277,21 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, } NETLINK_CB(skb).dst_group = 1; - retval = netlink_broadcast(uevent_sock, skb, 0, 1, - GFP_KERNEL); + retval = netlink_broadcast_filtered(uevent_sock, skb, + 0, 1, GFP_KERNEL, + kobj_bcast_filter, + kobj); /* ENOBUFS should be handled in userspace */ if (retval == -ENOBUFS) retval = 0; } else retval = -ENOMEM; } + mutex_unlock(&uevent_sock_mutex); #endif /* call uevent_helper, usually only enabled during early boot */ - if (uevent_helper[0]) { + if (uevent_helper[0] && !kobj_usermode_filter(kobj)) { char *argv [3]; argv [0] = uevent_helper; @@ -269,7 +317,7 @@ exit: EXPORT_SYMBOL_GPL(kobject_uevent_env); /** - * kobject_uevent - notify userspace by ending an uevent + * kobject_uevent - notify userspace by sending an uevent * * @action: action that is happening * @kobj: struct kobject that the action is happening to @@ -319,18 +367,59 @@ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...) EXPORT_SYMBOL_GPL(add_uevent_var); #if defined(CONFIG_NET) -static int __init kobject_uevent_init(void) +static int uevent_net_init(struct net *net) { - uevent_sock = netlink_kernel_create(&init_net, NETLINK_KOBJECT_UEVENT, - 1, NULL, NULL, THIS_MODULE); - if (!uevent_sock) { + struct uevent_sock *ue_sk; + + ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL); + if (!ue_sk) + return -ENOMEM; + + ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT, + 1, NULL, NULL, THIS_MODULE); + if (!ue_sk->sk) { printk(KERN_ERR "kobject_uevent: unable to create netlink socket!\n"); + kfree(ue_sk); return -ENODEV; } - netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV); + mutex_lock(&uevent_sock_mutex); + list_add_tail(&ue_sk->list, &uevent_sock_list); + mutex_unlock(&uevent_sock_mutex); return 0; } +static void uevent_net_exit(struct net *net) +{ + struct uevent_sock *ue_sk; + + mutex_lock(&uevent_sock_mutex); + list_for_each_entry(ue_sk, &uevent_sock_list, list) { + if (sock_net(ue_sk->sk) == net) + goto found; + } + mutex_unlock(&uevent_sock_mutex); + return; + +found: + list_del(&ue_sk->list); + mutex_unlock(&uevent_sock_mutex); + + netlink_kernel_release(ue_sk->sk); + kfree(ue_sk); +} + +static struct pernet_operations uevent_net_ops = { + .init = uevent_net_init, + .exit = uevent_net_exit, +}; + +static int __init kobject_uevent_init(void) +{ + netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV); + return register_pernet_subsys(&uevent_net_ops); +} + + postcore_initcall(kobject_uevent_init); #endif diff --git a/lib/kref.c b/lib/kref.c index 9ecd6e865610..3efb882b11db 100644 --- a/lib/kref.c +++ b/lib/kref.c @@ -13,17 +13,7 @@ #include <linux/kref.h> #include <linux/module.h> - -/** - * kref_set - initialize object and set refcount to requested number. - * @kref: object in question. - * @num: initial reference counter - */ -void kref_set(struct kref *kref, int num) -{ - atomic_set(&kref->refcount, num); - smp_mb(); -} +#include <linux/slab.h> /** * kref_init - initialize object. @@ -31,7 +21,8 @@ void kref_set(struct kref *kref, int num) */ void kref_init(struct kref *kref) { - kref_set(kref, 1); + atomic_set(&kref->refcount, 1); + smp_mb(); } /** @@ -71,7 +62,36 @@ int kref_put(struct kref *kref, void (*release)(struct kref *kref)) return 0; } -EXPORT_SYMBOL(kref_set); + +/** + * kref_sub - subtract a number of refcounts for object. + * @kref: object. + * @count: Number of recounts to subtract. + * @release: pointer to the function that will clean up the object when the + * last reference to the object is released. + * This pointer is required, and it is not acceptable to pass kfree + * in as this function. + * + * Subtract @count from the refcount, and if 0, call release(). + * Return 1 if the object was removed, otherwise return 0. Beware, if this + * function returns 0, you still can not count on the kref from remaining in + * memory. Only use the return value if you want to see if the kref is now + * gone, not present. + */ +int kref_sub(struct kref *kref, unsigned int count, + void (*release)(struct kref *kref)) +{ + WARN_ON(release == NULL); + WARN_ON(release == (void (*)(struct kref *))kfree); + + if (atomic_sub_and_test((int) count, &kref->refcount)) { + release(kref); + return 1; + } + return 0; +} + EXPORT_SYMBOL(kref_init); EXPORT_SYMBOL(kref_get); EXPORT_SYMBOL(kref_put); +EXPORT_SYMBOL(kref_sub); diff --git a/lib/lcm.c b/lib/lcm.c new file mode 100644 index 000000000000..157cd88a6ffc --- /dev/null +++ b/lib/lcm.c @@ -0,0 +1,15 @@ +#include <linux/kernel.h> +#include <linux/gcd.h> +#include <linux/module.h> + +/* Lowest common multiple */ +unsigned long lcm(unsigned long a, unsigned long b) +{ + if (a && b) + return (a * b) / gcd(a, b); + else if (b) + return b; + + return a; +} +EXPORT_SYMBOL_GPL(lcm); diff --git a/lib/list_debug.c b/lib/list_debug.c index 1a39f4e3ae1f..344c710d16ca 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -43,6 +43,12 @@ EXPORT_SYMBOL(__list_add); */ void list_del(struct list_head *entry) { + WARN(entry->next == LIST_POISON1, + "list_del corruption, next is LIST_POISON1 (%p)\n", + LIST_POISON1); + WARN(entry->next != LIST_POISON1 && entry->prev == LIST_POISON2, + "list_del corruption, prev is LIST_POISON2 (%p)\n", + LIST_POISON2); WARN(entry->prev->next != entry, "list_del corruption. prev->next should be %p, " "but was %p\n", entry, entry->prev->next); diff --git a/lib/list_sort.c b/lib/list_sort.c new file mode 100644 index 000000000000..d7325c6b103f --- /dev/null +++ b/lib/list_sort.c @@ -0,0 +1,291 @@ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/list_sort.h> +#include <linux/slab.h> +#include <linux/list.h> + +#define MAX_LIST_LENGTH_BITS 20 + +/* + * Returns a list organized in an intermediate format suited + * to chaining of merge() calls: null-terminated, no reserved or + * sentinel head node, "prev" links not maintained. + */ +static struct list_head *merge(void *priv, + int (*cmp)(void *priv, struct list_head *a, + struct list_head *b), + struct list_head *a, struct list_head *b) +{ + struct list_head head, *tail = &head; + + while (a && b) { + /* if equal, take 'a' -- important for sort stability */ + if ((*cmp)(priv, a, b) <= 0) { + tail->next = a; + a = a->next; + } else { + tail->next = b; + b = b->next; + } + tail = tail->next; + } + tail->next = a?:b; + return head.next; +} + +/* + * Combine final list merge with restoration of standard doubly-linked + * list structure. This approach duplicates code from merge(), but + * runs faster than the tidier alternatives of either a separate final + * prev-link restoration pass, or maintaining the prev links + * throughout. + */ +static void merge_and_restore_back_links(void *priv, + int (*cmp)(void *priv, struct list_head *a, + struct list_head *b), + struct list_head *head, + struct list_head *a, struct list_head *b) +{ + struct list_head *tail = head; + + while (a && b) { + /* if equal, take 'a' -- important for sort stability */ + if ((*cmp)(priv, a, b) <= 0) { + tail->next = a; + a->prev = tail; + a = a->next; + } else { + tail->next = b; + b->prev = tail; + b = b->next; + } + tail = tail->next; + } + tail->next = a ? : b; + + do { + /* + * In worst cases this loop may run many iterations. + * Continue callbacks to the client even though no + * element comparison is needed, so the client's cmp() + * routine can invoke cond_resched() periodically. + */ + (*cmp)(priv, tail->next, tail->next); + + tail->next->prev = tail; + tail = tail->next; + } while (tail->next); + + tail->next = head; + head->prev = tail; +} + +/** + * list_sort - sort a list + * @priv: private data, opaque to list_sort(), passed to @cmp + * @head: the list to sort + * @cmp: the elements comparison function + * + * This function implements "merge sort", which has O(nlog(n)) + * complexity. + * + * The comparison function @cmp must return a negative value if @a + * should sort before @b, and a positive value if @a should sort after + * @b. If @a and @b are equivalent, and their original relative + * ordering is to be preserved, @cmp must return 0. + */ +void list_sort(void *priv, struct list_head *head, + int (*cmp)(void *priv, struct list_head *a, + struct list_head *b)) +{ + struct list_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists + -- last slot is a sentinel */ + int lev; /* index into part[] */ + int max_lev = 0; + struct list_head *list; + + if (list_empty(head)) + return; + + memset(part, 0, sizeof(part)); + + head->prev->next = NULL; + list = head->next; + + while (list) { + struct list_head *cur = list; + list = list->next; + cur->next = NULL; + + for (lev = 0; part[lev]; lev++) { + cur = merge(priv, cmp, part[lev], cur); + part[lev] = NULL; + } + if (lev > max_lev) { + if (unlikely(lev >= ARRAY_SIZE(part)-1)) { + printk_once(KERN_DEBUG "list passed to" + " list_sort() too long for" + " efficiency\n"); + lev--; + } + max_lev = lev; + } + part[lev] = cur; + } + + for (lev = 0; lev < max_lev; lev++) + if (part[lev]) + list = merge(priv, cmp, part[lev], list); + + merge_and_restore_back_links(priv, cmp, head, part[max_lev], list); +} +EXPORT_SYMBOL(list_sort); + +#ifdef CONFIG_TEST_LIST_SORT + +#include <linux/random.h> + +/* + * The pattern of set bits in the list length determines which cases + * are hit in list_sort(). + */ +#define TEST_LIST_LEN (512+128+2) /* not including head */ + +#define TEST_POISON1 0xDEADBEEF +#define TEST_POISON2 0xA324354C + +struct debug_el { + unsigned int poison1; + struct list_head list; + unsigned int poison2; + int value; + unsigned serial; +}; + +/* Array, containing pointers to all elements in the test list */ +static struct debug_el **elts __initdata; + +static int __init check(struct debug_el *ela, struct debug_el *elb) +{ + if (ela->serial >= TEST_LIST_LEN) { + printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n", + ela->serial); + return -EINVAL; + } + if (elb->serial >= TEST_LIST_LEN) { + printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n", + elb->serial); + return -EINVAL; + } + if (elts[ela->serial] != ela || elts[elb->serial] != elb) { + printk(KERN_ERR "list_sort_test: error: phantom element\n"); + return -EINVAL; + } + if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) { + printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n", + ela->poison1, ela->poison2); + return -EINVAL; + } + if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) { + printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n", + elb->poison1, elb->poison2); + return -EINVAL; + } + return 0; +} + +static int __init cmp(void *priv, struct list_head *a, struct list_head *b) +{ + struct debug_el *ela, *elb; + + ela = container_of(a, struct debug_el, list); + elb = container_of(b, struct debug_el, list); + + check(ela, elb); + return ela->value - elb->value; +} + +static int __init list_sort_test(void) +{ + int i, count = 1, err = -EINVAL; + struct debug_el *el; + struct list_head *cur, *tmp; + LIST_HEAD(head); + + printk(KERN_DEBUG "list_sort_test: start testing list_sort()\n"); + + elts = kmalloc(sizeof(void *) * TEST_LIST_LEN, GFP_KERNEL); + if (!elts) { + printk(KERN_ERR "list_sort_test: error: cannot allocate " + "memory\n"); + goto exit; + } + + for (i = 0; i < TEST_LIST_LEN; i++) { + el = kmalloc(sizeof(*el), GFP_KERNEL); + if (!el) { + printk(KERN_ERR "list_sort_test: error: cannot " + "allocate memory\n"); + goto exit; + } + /* force some equivalencies */ + el->value = random32() % (TEST_LIST_LEN/3); + el->serial = i; + el->poison1 = TEST_POISON1; + el->poison2 = TEST_POISON2; + elts[i] = el; + list_add_tail(&el->list, &head); + } + + list_sort(NULL, &head, cmp); + + for (cur = head.next; cur->next != &head; cur = cur->next) { + struct debug_el *el1; + int cmp_result; + + if (cur->next->prev != cur) { + printk(KERN_ERR "list_sort_test: error: list is " + "corrupted\n"); + goto exit; + } + + cmp_result = cmp(NULL, cur, cur->next); + if (cmp_result > 0) { + printk(KERN_ERR "list_sort_test: error: list is not " + "sorted\n"); + goto exit; + } + + el = container_of(cur, struct debug_el, list); + el1 = container_of(cur->next, struct debug_el, list); + if (cmp_result == 0 && el->serial >= el1->serial) { + printk(KERN_ERR "list_sort_test: error: order of " + "equivalent elements not preserved\n"); + goto exit; + } + + if (check(el, el1)) { + printk(KERN_ERR "list_sort_test: error: element check " + "failed\n"); + goto exit; + } + count++; + } + + if (count != TEST_LIST_LEN) { + printk(KERN_ERR "list_sort_test: error: bad list length %d", + count); + goto exit; + } + + err = 0; +exit: + kfree(elts); + list_for_each_safe(cur, tmp, &head) { + list_del(cur); + kfree(container_of(cur, struct debug_el, list)); + } + return err; +} +module_init(list_sort_test); +#endif /* CONFIG_TEST_LIST_SORT */ diff --git a/lib/lmb.c b/lib/lmb.c deleted file mode 100644 index e4a6482d8b26..000000000000 --- a/lib/lmb.c +++ /dev/null @@ -1,527 +0,0 @@ -/* - * Procedures for maintaining information about logical memory blocks. - * - * Peter Bergner, IBM Corp. June 2001. - * Copyright (C) 2001 Peter Bergner. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/bitops.h> -#include <linux/lmb.h> - -#define LMB_ALLOC_ANYWHERE 0 - -struct lmb lmb; - -static int lmb_debug; - -static int __init early_lmb(char *p) -{ - if (p && strstr(p, "debug")) - lmb_debug = 1; - return 0; -} -early_param("lmb", early_lmb); - -static void lmb_dump(struct lmb_region *region, char *name) -{ - unsigned long long base, size; - int i; - - pr_info(" %s.cnt = 0x%lx\n", name, region->cnt); - - for (i = 0; i < region->cnt; i++) { - base = region->region[i].base; - size = region->region[i].size; - - pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n", - name, i, base, base + size - 1, size); - } -} - -void lmb_dump_all(void) -{ - if (!lmb_debug) - return; - - pr_info("LMB configuration:\n"); - pr_info(" rmo_size = 0x%llx\n", (unsigned long long)lmb.rmo_size); - pr_info(" memory.size = 0x%llx\n", (unsigned long long)lmb.memory.size); - - lmb_dump(&lmb.memory, "memory"); - lmb_dump(&lmb.reserved, "reserved"); -} - -static unsigned long lmb_addrs_overlap(u64 base1, u64 size1, u64 base2, - u64 size2) -{ - return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); -} - -static long lmb_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2) -{ - if (base2 == base1 + size1) - return 1; - else if (base1 == base2 + size2) - return -1; - - return 0; -} - -static long lmb_regions_adjacent(struct lmb_region *rgn, - unsigned long r1, unsigned long r2) -{ - u64 base1 = rgn->region[r1].base; - u64 size1 = rgn->region[r1].size; - u64 base2 = rgn->region[r2].base; - u64 size2 = rgn->region[r2].size; - - return lmb_addrs_adjacent(base1, size1, base2, size2); -} - -static void lmb_remove_region(struct lmb_region *rgn, unsigned long r) -{ - unsigned long i; - - for (i = r; i < rgn->cnt - 1; i++) { - rgn->region[i].base = rgn->region[i + 1].base; - rgn->region[i].size = rgn->region[i + 1].size; - } - rgn->cnt--; -} - -/* Assumption: base addr of region 1 < base addr of region 2 */ -static void lmb_coalesce_regions(struct lmb_region *rgn, - unsigned long r1, unsigned long r2) -{ - rgn->region[r1].size += rgn->region[r2].size; - lmb_remove_region(rgn, r2); -} - -void __init lmb_init(void) -{ - /* Create a dummy zero size LMB which will get coalesced away later. - * This simplifies the lmb_add() code below... - */ - lmb.memory.region[0].base = 0; - lmb.memory.region[0].size = 0; - lmb.memory.cnt = 1; - - /* Ditto. */ - lmb.reserved.region[0].base = 0; - lmb.reserved.region[0].size = 0; - lmb.reserved.cnt = 1; -} - -void __init lmb_analyze(void) -{ - int i; - - lmb.memory.size = 0; - - for (i = 0; i < lmb.memory.cnt; i++) - lmb.memory.size += lmb.memory.region[i].size; -} - -static long lmb_add_region(struct lmb_region *rgn, u64 base, u64 size) -{ - unsigned long coalesced = 0; - long adjacent, i; - - if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) { - rgn->region[0].base = base; - rgn->region[0].size = size; - return 0; - } - - /* First try and coalesce this LMB with another. */ - for (i = 0; i < rgn->cnt; i++) { - u64 rgnbase = rgn->region[i].base; - u64 rgnsize = rgn->region[i].size; - - if ((rgnbase == base) && (rgnsize == size)) - /* Already have this region, so we're done */ - return 0; - - adjacent = lmb_addrs_adjacent(base, size, rgnbase, rgnsize); - if (adjacent > 0) { - rgn->region[i].base -= size; - rgn->region[i].size += size; - coalesced++; - break; - } else if (adjacent < 0) { - rgn->region[i].size += size; - coalesced++; - break; - } - } - - if ((i < rgn->cnt - 1) && lmb_regions_adjacent(rgn, i, i+1)) { - lmb_coalesce_regions(rgn, i, i+1); - coalesced++; - } - - if (coalesced) - return coalesced; - if (rgn->cnt >= MAX_LMB_REGIONS) - return -1; - - /* Couldn't coalesce the LMB, so add it to the sorted table. */ - for (i = rgn->cnt - 1; i >= 0; i--) { - if (base < rgn->region[i].base) { - rgn->region[i+1].base = rgn->region[i].base; - rgn->region[i+1].size = rgn->region[i].size; - } else { - rgn->region[i+1].base = base; - rgn->region[i+1].size = size; - break; - } - } - - if (base < rgn->region[0].base) { - rgn->region[0].base = base; - rgn->region[0].size = size; - } - rgn->cnt++; - - return 0; -} - -long lmb_add(u64 base, u64 size) -{ - struct lmb_region *_rgn = &lmb.memory; - - /* On pSeries LPAR systems, the first LMB is our RMO region. */ - if (base == 0) - lmb.rmo_size = size; - - return lmb_add_region(_rgn, base, size); - -} - -long lmb_remove(u64 base, u64 size) -{ - struct lmb_region *rgn = &(lmb.memory); - u64 rgnbegin, rgnend; - u64 end = base + size; - int i; - - rgnbegin = rgnend = 0; /* supress gcc warnings */ - - /* Find the region where (base, size) belongs to */ - for (i=0; i < rgn->cnt; i++) { - rgnbegin = rgn->region[i].base; - rgnend = rgnbegin + rgn->region[i].size; - - if ((rgnbegin <= base) && (end <= rgnend)) - break; - } - - /* Didn't find the region */ - if (i == rgn->cnt) - return -1; - - /* Check to see if we are removing entire region */ - if ((rgnbegin == base) && (rgnend == end)) { - lmb_remove_region(rgn, i); - return 0; - } - - /* Check to see if region is matching at the front */ - if (rgnbegin == base) { - rgn->region[i].base = end; - rgn->region[i].size -= size; - return 0; - } - - /* Check to see if the region is matching at the end */ - if (rgnend == end) { - rgn->region[i].size -= size; - return 0; - } - - /* - * We need to split the entry - adjust the current one to the - * beginging of the hole and add the region after hole. - */ - rgn->region[i].size = base - rgn->region[i].base; - return lmb_add_region(rgn, end, rgnend - end); -} - -long __init lmb_reserve(u64 base, u64 size) -{ - struct lmb_region *_rgn = &lmb.reserved; - - BUG_ON(0 == size); - - return lmb_add_region(_rgn, base, size); -} - -long __init lmb_overlaps_region(struct lmb_region *rgn, u64 base, u64 size) -{ - unsigned long i; - - for (i = 0; i < rgn->cnt; i++) { - u64 rgnbase = rgn->region[i].base; - u64 rgnsize = rgn->region[i].size; - if (lmb_addrs_overlap(base, size, rgnbase, rgnsize)) - break; - } - - return (i < rgn->cnt) ? i : -1; -} - -static u64 lmb_align_down(u64 addr, u64 size) -{ - return addr & ~(size - 1); -} - -static u64 lmb_align_up(u64 addr, u64 size) -{ - return (addr + (size - 1)) & ~(size - 1); -} - -static u64 __init lmb_alloc_nid_unreserved(u64 start, u64 end, - u64 size, u64 align) -{ - u64 base, res_base; - long j; - - base = lmb_align_down((end - size), align); - while (start <= base) { - j = lmb_overlaps_region(&lmb.reserved, base, size); - if (j < 0) { - /* this area isn't reserved, take it */ - if (lmb_add_region(&lmb.reserved, base, size) < 0) - base = ~(u64)0; - return base; - } - res_base = lmb.reserved.region[j].base; - if (res_base < size) - break; - base = lmb_align_down(res_base - size, align); - } - - return ~(u64)0; -} - -static u64 __init lmb_alloc_nid_region(struct lmb_property *mp, - u64 (*nid_range)(u64, u64, int *), - u64 size, u64 align, int nid) -{ - u64 start, end; - - start = mp->base; - end = start + mp->size; - - start = lmb_align_up(start, align); - while (start < end) { - u64 this_end; - int this_nid; - - this_end = nid_range(start, end, &this_nid); - if (this_nid == nid) { - u64 ret = lmb_alloc_nid_unreserved(start, this_end, - size, align); - if (ret != ~(u64)0) - return ret; - } - start = this_end; - } - - return ~(u64)0; -} - -u64 __init lmb_alloc_nid(u64 size, u64 align, int nid, - u64 (*nid_range)(u64 start, u64 end, int *nid)) -{ - struct lmb_region *mem = &lmb.memory; - int i; - - BUG_ON(0 == size); - - size = lmb_align_up(size, align); - - for (i = 0; i < mem->cnt; i++) { - u64 ret = lmb_alloc_nid_region(&mem->region[i], - nid_range, - size, align, nid); - if (ret != ~(u64)0) - return ret; - } - - return lmb_alloc(size, align); -} - -u64 __init lmb_alloc(u64 size, u64 align) -{ - return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE); -} - -u64 __init lmb_alloc_base(u64 size, u64 align, u64 max_addr) -{ - u64 alloc; - - alloc = __lmb_alloc_base(size, align, max_addr); - - if (alloc == 0) - panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n", - (unsigned long long) size, (unsigned long long) max_addr); - - return alloc; -} - -u64 __init __lmb_alloc_base(u64 size, u64 align, u64 max_addr) -{ - long i, j; - u64 base = 0; - u64 res_base; - - BUG_ON(0 == size); - - size = lmb_align_up(size, align); - - /* On some platforms, make sure we allocate lowmem */ - /* Note that LMB_REAL_LIMIT may be LMB_ALLOC_ANYWHERE */ - if (max_addr == LMB_ALLOC_ANYWHERE) - max_addr = LMB_REAL_LIMIT; - - for (i = lmb.memory.cnt - 1; i >= 0; i--) { - u64 lmbbase = lmb.memory.region[i].base; - u64 lmbsize = lmb.memory.region[i].size; - - if (lmbsize < size) - continue; - if (max_addr == LMB_ALLOC_ANYWHERE) - base = lmb_align_down(lmbbase + lmbsize - size, align); - else if (lmbbase < max_addr) { - base = min(lmbbase + lmbsize, max_addr); - base = lmb_align_down(base - size, align); - } else - continue; - - while (base && lmbbase <= base) { - j = lmb_overlaps_region(&lmb.reserved, base, size); - if (j < 0) { - /* this area isn't reserved, take it */ - if (lmb_add_region(&lmb.reserved, base, size) < 0) - return 0; - return base; - } - res_base = lmb.reserved.region[j].base; - if (res_base < size) - break; - base = lmb_align_down(res_base - size, align); - } - } - return 0; -} - -/* You must call lmb_analyze() before this. */ -u64 __init lmb_phys_mem_size(void) -{ - return lmb.memory.size; -} - -u64 __init lmb_end_of_DRAM(void) -{ - int idx = lmb.memory.cnt - 1; - - return (lmb.memory.region[idx].base + lmb.memory.region[idx].size); -} - -/* You must call lmb_analyze() after this. */ -void __init lmb_enforce_memory_limit(u64 memory_limit) -{ - unsigned long i; - u64 limit; - struct lmb_property *p; - - if (!memory_limit) - return; - - /* Truncate the lmb regions to satisfy the memory limit. */ - limit = memory_limit; - for (i = 0; i < lmb.memory.cnt; i++) { - if (limit > lmb.memory.region[i].size) { - limit -= lmb.memory.region[i].size; - continue; - } - - lmb.memory.region[i].size = limit; - lmb.memory.cnt = i + 1; - break; - } - - if (lmb.memory.region[0].size < lmb.rmo_size) - lmb.rmo_size = lmb.memory.region[0].size; - - memory_limit = lmb_end_of_DRAM(); - - /* And truncate any reserves above the limit also. */ - for (i = 0; i < lmb.reserved.cnt; i++) { - p = &lmb.reserved.region[i]; - - if (p->base > memory_limit) - p->size = 0; - else if ((p->base + p->size) > memory_limit) - p->size = memory_limit - p->base; - - if (p->size == 0) { - lmb_remove_region(&lmb.reserved, i); - i--; - } - } -} - -int __init lmb_is_reserved(u64 addr) -{ - int i; - - for (i = 0; i < lmb.reserved.cnt; i++) { - u64 upper = lmb.reserved.region[i].base + - lmb.reserved.region[i].size - 1; - if ((addr >= lmb.reserved.region[i].base) && (addr <= upper)) - return 1; - } - return 0; -} - -/* - * Given a <base, len>, find which memory regions belong to this range. - * Adjust the request and return a contiguous chunk. - */ -int lmb_find(struct lmb_property *res) -{ - int i; - u64 rstart, rend; - - rstart = res->base; - rend = rstart + res->size - 1; - - for (i = 0; i < lmb.memory.cnt; i++) { - u64 start = lmb.memory.region[i].base; - u64 end = start + lmb.memory.region[i].size - 1; - - if (start > rend) - return -1; - - if ((end >= rstart) && (start < rend)) { - /* adjust the request */ - if (rstart < start) - rstart = start; - if (rend > end) - rend = end; - res->base = rstart; - res->size = rend - rstart + 1; - return 0; - } - } - return -1; -} diff --git a/lib/lru_cache.c b/lib/lru_cache.c new file mode 100644 index 000000000000..270de9d31b8c --- /dev/null +++ b/lib/lru_cache.c @@ -0,0 +1,560 @@ +/* + lru_cache.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>. + Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#include <linux/module.h> +#include <linux/bitops.h> +#include <linux/slab.h> +#include <linux/string.h> /* for memset */ +#include <linux/seq_file.h> /* for seq_printf */ +#include <linux/lru_cache.h> + +MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " + "Lars Ellenberg <lars@linbit.com>"); +MODULE_DESCRIPTION("lru_cache - Track sets of hot objects"); +MODULE_LICENSE("GPL"); + +/* this is developers aid only. + * it catches concurrent access (lack of locking on the users part) */ +#define PARANOIA_ENTRY() do { \ + BUG_ON(!lc); \ + BUG_ON(!lc->nr_elements); \ + BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)); \ +} while (0) + +#define RETURN(x...) do { \ + clear_bit(__LC_PARANOIA, &lc->flags); \ + smp_mb__after_clear_bit(); return x ; } while (0) + +/* BUG() if e is not one of the elements tracked by lc */ +#define PARANOIA_LC_ELEMENT(lc, e) do { \ + struct lru_cache *lc_ = (lc); \ + struct lc_element *e_ = (e); \ + unsigned i = e_->lc_index; \ + BUG_ON(i >= lc_->nr_elements); \ + BUG_ON(lc_->lc_element[i] != e_); } while (0) + +/** + * lc_create - prepares to track objects in an active set + * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details + * @e_count: number of elements allowed to be active simultaneously + * @e_size: size of the tracked objects + * @e_off: offset to the &struct lc_element member in a tracked object + * + * Returns a pointer to a newly initialized struct lru_cache on success, + * or NULL on (allocation) failure. + */ +struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, + unsigned e_count, size_t e_size, size_t e_off) +{ + struct hlist_head *slot = NULL; + struct lc_element **element = NULL; + struct lru_cache *lc; + struct lc_element *e; + unsigned cache_obj_size = kmem_cache_size(cache); + unsigned i; + + WARN_ON(cache_obj_size < e_size); + if (cache_obj_size < e_size) + return NULL; + + /* e_count too big; would probably fail the allocation below anyways. + * for typical use cases, e_count should be few thousand at most. */ + if (e_count > LC_MAX_ACTIVE) + return NULL; + + slot = kzalloc(e_count * sizeof(struct hlist_head*), GFP_KERNEL); + if (!slot) + goto out_fail; + element = kzalloc(e_count * sizeof(struct lc_element *), GFP_KERNEL); + if (!element) + goto out_fail; + + lc = kzalloc(sizeof(*lc), GFP_KERNEL); + if (!lc) + goto out_fail; + + INIT_LIST_HEAD(&lc->in_use); + INIT_LIST_HEAD(&lc->lru); + INIT_LIST_HEAD(&lc->free); + + lc->name = name; + lc->element_size = e_size; + lc->element_off = e_off; + lc->nr_elements = e_count; + lc->new_number = LC_FREE; + lc->lc_cache = cache; + lc->lc_element = element; + lc->lc_slot = slot; + + /* preallocate all objects */ + for (i = 0; i < e_count; i++) { + void *p = kmem_cache_alloc(cache, GFP_KERNEL); + if (!p) + break; + memset(p, 0, lc->element_size); + e = p + e_off; + e->lc_index = i; + e->lc_number = LC_FREE; + list_add(&e->list, &lc->free); + element[i] = e; + } + if (i == e_count) + return lc; + + /* else: could not allocate all elements, give up */ + for (i--; i; i--) { + void *p = element[i]; + kmem_cache_free(cache, p - e_off); + } + kfree(lc); +out_fail: + kfree(element); + kfree(slot); + return NULL; +} + +void lc_free_by_index(struct lru_cache *lc, unsigned i) +{ + void *p = lc->lc_element[i]; + WARN_ON(!p); + if (p) { + p -= lc->element_off; + kmem_cache_free(lc->lc_cache, p); + } +} + +/** + * lc_destroy - frees memory allocated by lc_create() + * @lc: the lru cache to destroy + */ +void lc_destroy(struct lru_cache *lc) +{ + unsigned i; + if (!lc) + return; + for (i = 0; i < lc->nr_elements; i++) + lc_free_by_index(lc, i); + kfree(lc->lc_element); + kfree(lc->lc_slot); + kfree(lc); +} + +/** + * lc_reset - does a full reset for @lc and the hash table slots. + * @lc: the lru cache to operate on + * + * It is roughly the equivalent of re-allocating a fresh lru_cache object, + * basically a short cut to lc_destroy(lc); lc = lc_create(...); + */ +void lc_reset(struct lru_cache *lc) +{ + unsigned i; + + INIT_LIST_HEAD(&lc->in_use); + INIT_LIST_HEAD(&lc->lru); + INIT_LIST_HEAD(&lc->free); + lc->used = 0; + lc->hits = 0; + lc->misses = 0; + lc->starving = 0; + lc->dirty = 0; + lc->changed = 0; + lc->flags = 0; + lc->changing_element = NULL; + lc->new_number = LC_FREE; + memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); + + for (i = 0; i < lc->nr_elements; i++) { + struct lc_element *e = lc->lc_element[i]; + void *p = e; + p -= lc->element_off; + memset(p, 0, lc->element_size); + /* re-init it */ + e->lc_index = i; + e->lc_number = LC_FREE; + list_add(&e->list, &lc->free); + } +} + +/** + * lc_seq_printf_stats - print stats about @lc into @seq + * @seq: the seq_file to print into + * @lc: the lru cache to print statistics of + */ +size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) +{ + /* NOTE: + * total calls to lc_get are + * (starving + hits + misses) + * misses include "dirty" count (update from an other thread in + * progress) and "changed", when this in fact lead to an successful + * update of the cache. + */ + return seq_printf(seq, "\t%s: used:%u/%u " + "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n", + lc->name, lc->used, lc->nr_elements, + lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed); +} + +static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) +{ + return lc->lc_slot + (enr % lc->nr_elements); +} + + +/** + * lc_find - find element by label, if present in the hash table + * @lc: The lru_cache object + * @enr: element number + * + * Returns the pointer to an element, if the element with the requested + * "label" or element number is present in the hash table, + * or NULL if not found. Does not change the refcnt. + */ +struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) +{ + struct hlist_node *n; + struct lc_element *e; + + BUG_ON(!lc); + BUG_ON(!lc->nr_elements); + hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { + if (e->lc_number == enr) + return e; + } + return NULL; +} + +/* returned element will be "recycled" immediately */ +static struct lc_element *lc_evict(struct lru_cache *lc) +{ + struct list_head *n; + struct lc_element *e; + + if (list_empty(&lc->lru)) + return NULL; + + n = lc->lru.prev; + e = list_entry(n, struct lc_element, list); + + PARANOIA_LC_ELEMENT(lc, e); + + list_del(&e->list); + hlist_del(&e->colision); + return e; +} + +/** + * lc_del - removes an element from the cache + * @lc: The lru_cache object + * @e: The element to remove + * + * @e must be unused (refcnt == 0). Moves @e from "lru" to "free" list, + * sets @e->enr to %LC_FREE. + */ +void lc_del(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_ENTRY(); + PARANOIA_LC_ELEMENT(lc, e); + BUG_ON(e->refcnt); + + e->lc_number = LC_FREE; + hlist_del_init(&e->colision); + list_move(&e->list, &lc->free); + RETURN(); +} + +static struct lc_element *lc_get_unused_element(struct lru_cache *lc) +{ + struct list_head *n; + + if (list_empty(&lc->free)) + return lc_evict(lc); + + n = lc->free.next; + list_del(n); + return list_entry(n, struct lc_element, list); +} + +static int lc_unused_element_available(struct lru_cache *lc) +{ + if (!list_empty(&lc->free)) + return 1; /* something on the free list */ + if (!list_empty(&lc->lru)) + return 1; /* something to evict */ + + return 0; +} + + +/** + * lc_get - get element by label, maybe change the active set + * @lc: the lru cache to operate on + * @enr: the label to look up + * + * Finds an element in the cache, increases its usage count, + * "touches" and returns it. + * + * In case the requested number is not present, it needs to be added to the + * cache. Therefore it is possible that an other element becomes evicted from + * the cache. In either case, the user is notified so he is able to e.g. keep + * a persistent log of the cache changes, and therefore the objects in use. + * + * Return values: + * NULL + * The cache was marked %LC_STARVING, + * or the requested label was not in the active set + * and a changing transaction is still pending (@lc was marked %LC_DIRTY). + * Or no unused or free element could be recycled (@lc will be marked as + * %LC_STARVING, blocking further lc_get() operations). + * + * pointer to the element with the REQUESTED element number. + * In this case, it can be used right away + * + * pointer to an UNUSED element with some different element number, + * where that different number may also be %LC_FREE. + * + * In this case, the cache is marked %LC_DIRTY (blocking further changes), + * and the returned element pointer is removed from the lru list and + * hash collision chains. The user now should do whatever housekeeping + * is necessary. + * Then he must call lc_changed(lc,element_pointer), to finish + * the change. + * + * NOTE: The user needs to check the lc_number on EACH use, so he recognizes + * any cache set change. + */ +struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) +{ + struct lc_element *e; + + PARANOIA_ENTRY(); + if (lc->flags & LC_STARVING) { + ++lc->starving; + RETURN(NULL); + } + + e = lc_find(lc, enr); + if (e) { + ++lc->hits; + if (e->refcnt++ == 0) + lc->used++; + list_move(&e->list, &lc->in_use); /* Not evictable... */ + RETURN(e); + } + + ++lc->misses; + + /* In case there is nothing available and we can not kick out + * the LRU element, we have to wait ... + */ + if (!lc_unused_element_available(lc)) { + __set_bit(__LC_STARVING, &lc->flags); + RETURN(NULL); + } + + /* it was not present in the active set. + * we are going to recycle an unused (or even "free") element. + * user may need to commit a transaction to record that change. + * we serialize on flags & TF_DIRTY */ + if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { + ++lc->dirty; + RETURN(NULL); + } + + e = lc_get_unused_element(lc); + BUG_ON(!e); + + clear_bit(__LC_STARVING, &lc->flags); + BUG_ON(++e->refcnt != 1); + lc->used++; + + lc->changing_element = e; + lc->new_number = enr; + + RETURN(e); +} + +/* similar to lc_get, + * but only gets a new reference on an existing element. + * you either get the requested element, or NULL. + * will be consolidated into one function. + */ +struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) +{ + struct lc_element *e; + + PARANOIA_ENTRY(); + if (lc->flags & LC_STARVING) { + ++lc->starving; + RETURN(NULL); + } + + e = lc_find(lc, enr); + if (e) { + ++lc->hits; + if (e->refcnt++ == 0) + lc->used++; + list_move(&e->list, &lc->in_use); /* Not evictable... */ + } + RETURN(e); +} + +/** + * lc_changed - tell @lc that the change has been recorded + * @lc: the lru cache to operate on + * @e: the element pending label change + */ +void lc_changed(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_ENTRY(); + BUG_ON(e != lc->changing_element); + PARANOIA_LC_ELEMENT(lc, e); + ++lc->changed; + e->lc_number = lc->new_number; + list_add(&e->list, &lc->in_use); + hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); + lc->changing_element = NULL; + lc->new_number = LC_FREE; + clear_bit(__LC_DIRTY, &lc->flags); + smp_mb__after_clear_bit(); + RETURN(); +} + + +/** + * lc_put - give up refcnt of @e + * @lc: the lru cache to operate on + * @e: the element to put + * + * If refcnt reaches zero, the element is moved to the lru list, + * and a %LC_STARVING (if set) is cleared. + * Returns the new (post-decrement) refcnt. + */ +unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_ENTRY(); + PARANOIA_LC_ELEMENT(lc, e); + BUG_ON(e->refcnt == 0); + BUG_ON(e == lc->changing_element); + if (--e->refcnt == 0) { + /* move it to the front of LRU. */ + list_move(&e->list, &lc->lru); + lc->used--; + clear_bit(__LC_STARVING, &lc->flags); + smp_mb__after_clear_bit(); + } + RETURN(e->refcnt); +} + +/** + * lc_element_by_index + * @lc: the lru cache to operate on + * @i: the index of the element to return + */ +struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i) +{ + BUG_ON(i >= lc->nr_elements); + BUG_ON(lc->lc_element[i] == NULL); + BUG_ON(lc->lc_element[i]->lc_index != i); + return lc->lc_element[i]; +} + +/** + * lc_index_of + * @lc: the lru cache to operate on + * @e: the element to query for its index position in lc->element + */ +unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_LC_ELEMENT(lc, e); + return e->lc_index; +} + +/** + * lc_set - associate index with label + * @lc: the lru cache to operate on + * @enr: the label to set + * @index: the element index to associate label with. + * + * Used to initialize the active set to some previously recorded state. + */ +void lc_set(struct lru_cache *lc, unsigned int enr, int index) +{ + struct lc_element *e; + + if (index < 0 || index >= lc->nr_elements) + return; + + e = lc_element_by_index(lc, index); + e->lc_number = enr; + + hlist_del_init(&e->colision); + hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); + list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru); +} + +/** + * lc_dump - Dump a complete LRU cache to seq in textual form. + * @lc: the lru cache to operate on + * @seq: the &struct seq_file pointer to seq_printf into + * @utext: user supplied "heading" or other info + * @detail: function pointer the user may provide to dump further details + * of the object the lc_element is embedded in. + */ +void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext, + void (*detail) (struct seq_file *, struct lc_element *)) +{ + unsigned int nr_elements = lc->nr_elements; + struct lc_element *e; + int i; + + seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext); + for (i = 0; i < nr_elements; i++) { + e = lc_element_by_index(lc, i); + if (e->lc_number == LC_FREE) { + seq_printf(seq, "\t%2d: FREE\n", i); + } else { + seq_printf(seq, "\t%2d: %4u %4u ", i, + e->lc_number, e->refcnt); + detail(seq, e); + } + } +} + +EXPORT_SYMBOL(lc_create); +EXPORT_SYMBOL(lc_reset); +EXPORT_SYMBOL(lc_destroy); +EXPORT_SYMBOL(lc_set); +EXPORT_SYMBOL(lc_del); +EXPORT_SYMBOL(lc_try_get); +EXPORT_SYMBOL(lc_find); +EXPORT_SYMBOL(lc_get); +EXPORT_SYMBOL(lc_put); +EXPORT_SYMBOL(lc_changed); +EXPORT_SYMBOL(lc_element_by_index); +EXPORT_SYMBOL(lc_index_of); +EXPORT_SYMBOL(lc_seq_printf_stats); +EXPORT_SYMBOL(lc_seq_dump_details); diff --git a/lib/lzo/lzo1x_decompress.c b/lib/lzo/lzo1x_decompress.c index 5dc6b29c1575..f2fd09850223 100644 --- a/lib/lzo/lzo1x_decompress.c +++ b/lib/lzo/lzo1x_decompress.c @@ -11,11 +11,13 @@ * Richard Purdie <rpurdie@openedhand.com> */ +#ifndef STATIC #include <linux/module.h> #include <linux/kernel.h> -#include <linux/lzo.h> -#include <asm/byteorder.h> +#endif + #include <asm/unaligned.h> +#include <linux/lzo.h> #include "lzodefs.h" #define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x)) @@ -244,9 +246,10 @@ lookbehind_overrun: *out_len = op - out; return LZO_E_LOOKBEHIND_OVERRUN; } - +#ifndef STATIC EXPORT_SYMBOL_GPL(lzo1x_decompress_safe); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("LZO1X Decompressor"); +#endif diff --git a/lib/nlattr.c b/lib/nlattr.c index c4706eb98d3d..00e8a02681a6 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -15,7 +15,7 @@ #include <linux/types.h> #include <net/netlink.h> -static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { +static const u16 nla_attr_minlen[NLA_TYPE_MAX+1] = { [NLA_U8] = sizeof(u8), [NLA_U16] = sizeof(u16), [NLA_U32] = sizeof(u32), @@ -23,7 +23,7 @@ static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { [NLA_NESTED] = NLA_HDRLEN, }; -static int validate_nla(struct nlattr *nla, int maxtype, +static int validate_nla(const struct nlattr *nla, int maxtype, const struct nla_policy *policy) { const struct nla_policy *pt; @@ -115,10 +115,10 @@ static int validate_nla(struct nlattr *nla, int maxtype, * * Returns 0 on success or a negative error code. */ -int nla_validate(struct nlattr *head, int len, int maxtype, +int nla_validate(const struct nlattr *head, int len, int maxtype, const struct nla_policy *policy) { - struct nlattr *nla; + const struct nlattr *nla; int rem, err; nla_for_each_attr(nla, head, len, rem) { @@ -173,10 +173,10 @@ nla_policy_len(const struct nla_policy *p, int n) * * Returns 0 on success or a negative error code. */ -int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, - const struct nla_policy *policy) +int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, + int len, const struct nla_policy *policy) { - struct nlattr *nla; + const struct nlattr *nla; int rem, err; memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); @@ -191,7 +191,7 @@ int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, goto errout; } - tb[type] = nla; + tb[type] = (struct nlattr *)nla; } } @@ -212,14 +212,14 @@ errout: * * Returns the first attribute in the stream matching the specified type. */ -struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) +struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype) { - struct nlattr *nla; + const struct nlattr *nla; int rem; nla_for_each_attr(nla, head, len, rem) if (nla_type(nla) == attrtype) - return nla; + return (struct nlattr *)nla; return NULL; } diff --git a/lib/parser.c b/lib/parser.c index b00d02059a5f..6e89eca5cca0 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -56,13 +56,16 @@ static int match_one(char *s, const char *p, substring_t args[]) args[argc].from = s; switch (*p++) { - case 's': - if (strlen(s) == 0) + case 's': { + size_t str_len = strlen(s); + + if (str_len == 0) return 0; - else if (len == -1 || len > strlen(s)) - len = strlen(s); + if (len == -1 || len > str_len) + len = str_len; args[argc].to = s + len; break; + } case 'd': simple_strtol(s, &args[argc].to, 0); goto num; @@ -125,12 +128,13 @@ static int match_number(substring_t *s, int *result, int base) char *endp; char *buf; int ret; + size_t len = s->to - s->from; - buf = kmalloc(s->to - s->from + 1, GFP_KERNEL); + buf = kmalloc(len + 1, GFP_KERNEL); if (!buf) return -ENOMEM; - memcpy(buf, s->from, s->to - s->from); - buf[s->to - s->from] = '\0'; + memcpy(buf, s->from, len); + buf[len] = '\0'; *result = simple_strtol(buf, &endp, base); ret = 0; if (endp == buf) diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index aeaa6d734447..28f2c33c6b53 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -8,10 +8,53 @@ #include <linux/init.h> #include <linux/cpu.h> #include <linux/module.h> +#include <linux/debugobjects.h> static LIST_HEAD(percpu_counters); static DEFINE_MUTEX(percpu_counters_lock); +#ifdef CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER + +static struct debug_obj_descr percpu_counter_debug_descr; + +static int percpu_counter_fixup_free(void *addr, enum debug_obj_state state) +{ + struct percpu_counter *fbc = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + percpu_counter_destroy(fbc); + debug_object_free(fbc, &percpu_counter_debug_descr); + return 1; + default: + return 0; + } +} + +static struct debug_obj_descr percpu_counter_debug_descr = { + .name = "percpu_counter", + .fixup_free = percpu_counter_fixup_free, +}; + +static inline void debug_percpu_counter_activate(struct percpu_counter *fbc) +{ + debug_object_init(fbc, &percpu_counter_debug_descr); + debug_object_activate(fbc, &percpu_counter_debug_descr); +} + +static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc) +{ + debug_object_deactivate(fbc, &percpu_counter_debug_descr); + debug_object_free(fbc, &percpu_counter_debug_descr); +} + +#else /* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */ +static inline void debug_percpu_counter_activate(struct percpu_counter *fbc) +{ } +static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc) +{ } +#endif /* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */ + void percpu_counter_set(struct percpu_counter *fbc, s64 amount) { int cpu; @@ -29,20 +72,18 @@ EXPORT_SYMBOL(percpu_counter_set); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) { s64 count; - s32 *pcount; - int cpu = get_cpu(); - pcount = per_cpu_ptr(fbc->counters, cpu); - count = *pcount + amount; + preempt_disable(); + count = __this_cpu_read(*fbc->counters) + amount; if (count >= batch || count <= -batch) { spin_lock(&fbc->lock); fbc->count += count; - *pcount = 0; + __this_cpu_write(*fbc->counters, 0); spin_unlock(&fbc->lock); } else { - *pcount = count; + __this_cpu_write(*fbc->counters, count); } - put_cpu(); + preempt_enable(); } EXPORT_SYMBOL(__percpu_counter_add); @@ -75,7 +116,11 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, fbc->counters = alloc_percpu(s32); if (!fbc->counters) return -ENOMEM; + + debug_percpu_counter_activate(fbc); + #ifdef CONFIG_HOTPLUG_CPU + INIT_LIST_HEAD(&fbc->list); mutex_lock(&percpu_counters_lock); list_add(&fbc->list, &percpu_counters); mutex_unlock(&percpu_counters_lock); @@ -89,6 +134,8 @@ void percpu_counter_destroy(struct percpu_counter *fbc) if (!fbc->counters) return; + debug_percpu_counter_deactivate(fbc); + #ifdef CONFIG_HOTPLUG_CPU mutex_lock(&percpu_counters_lock); list_del(&fbc->list); @@ -137,6 +184,33 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb, return NOTIFY_OK; } +/* + * Compare counter against given value. + * Return 1 if greater, 0 if equal and -1 if less + */ +int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) +{ + s64 count; + + count = percpu_counter_read(fbc); + /* Check to see if rough count will be sufficient for comparison */ + if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) { + if (count > rhs) + return 1; + else + return -1; + } + /* Need to use precise count */ + count = percpu_counter_sum(fbc); + if (count > rhs) + return 1; + else if (count < rhs) + return -1; + else + return 0; +} +EXPORT_SYMBOL(percpu_counter_compare); + static int __init percpu_counter_startup(void) { compute_batch_value(); diff --git a/lib/plist.c b/lib/plist.c index d6c64a824e1d..1471988d9190 100644 --- a/lib/plist.c +++ b/lib/plist.c @@ -54,9 +54,11 @@ static void plist_check_list(struct list_head *top) static void plist_check_head(struct plist_head *head) { - WARN_ON(!head->lock); - if (head->lock) - WARN_ON_SMP(!spin_is_locked(head->lock)); + WARN_ON(!head->rawlock && !head->spinlock); + if (head->rawlock) + WARN_ON_SMP(!raw_spin_is_locked(head->rawlock)); + if (head->spinlock) + WARN_ON_SMP(!spin_is_locked(head->spinlock)); plist_check_list(&head->prio_list); plist_check_list(&head->node_list); } diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 23abbd93cae1..5086bb962b4d 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -28,7 +28,6 @@ #include <linux/slab.h> #include <linux/notifier.h> #include <linux/cpu.h> -#include <linux/gfp.h> #include <linux/string.h> #include <linux/bitops.h> #include <linux/rcupdate.h> @@ -50,7 +49,7 @@ struct radix_tree_node { unsigned int height; /* Height from the bottom */ unsigned int count; struct rcu_head rcu_head; - void *slots[RADIX_TREE_MAP_SIZE]; + void __rcu *slots[RADIX_TREE_MAP_SIZE]; unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; }; @@ -83,6 +82,16 @@ struct radix_tree_preload { }; static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; +static inline void *ptr_to_indirect(void *ptr) +{ + return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR); +} + +static inline void *indirect_to_ptr(void *ptr) +{ + return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); +} + static inline gfp_t root_gfp_mask(struct radix_tree_root *root) { return root->gfp_mask & __GFP_BITS_MASK; @@ -175,14 +184,16 @@ static void radix_tree_node_rcu_free(struct rcu_head *head) { struct radix_tree_node *node = container_of(head, struct radix_tree_node, rcu_head); + int i; /* * must only free zeroed nodes into the slab. radix_tree_shrink * can leave us with a non-NULL entry in the first slot, so clear * that here to make sure. */ - tag_clear(node, 0, 0); - tag_clear(node, 1, 0); + for (i = 0; i < RADIX_TREE_MAX_TAGS; i++) + tag_clear(node, i, 0); + node->slots[0] = NULL; node->count = 0; @@ -200,6 +211,9 @@ radix_tree_node_free(struct radix_tree_node *node) * ensure that the addition of a single element in the tree cannot fail. On * success, return zero, with preemption disabled. On error, return -ENOMEM * with preemption not disabled. + * + * To make use of this facility, the radix tree must be initialised without + * __GFP_WAIT being passed to INIT_RADIX_TREE(). */ int radix_tree_preload(gfp_t gfp_mask) { @@ -261,7 +275,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) return -ENOMEM; /* Increase the height. */ - node->slots[0] = radix_tree_indirect_to_ptr(root->rnode); + node->slots[0] = indirect_to_ptr(root->rnode); /* Propagate the aggregated tag info into the new root */ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { @@ -272,7 +286,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) newheight = root->height+1; node->height = newheight; node->count = 1; - node = radix_tree_ptr_to_indirect(node); + node = ptr_to_indirect(node); rcu_assign_pointer(root->rnode, node); root->height = newheight; } while (height > root->height); @@ -305,7 +319,7 @@ int radix_tree_insert(struct radix_tree_root *root, return error; } - slot = radix_tree_indirect_to_ptr(root->rnode); + slot = indirect_to_ptr(root->rnode); height = root->height; shift = (height-1) * RADIX_TREE_MAP_SHIFT; @@ -321,8 +335,7 @@ int radix_tree_insert(struct radix_tree_root *root, rcu_assign_pointer(node->slots[offset], slot); node->count++; } else - rcu_assign_pointer(root->rnode, - radix_tree_ptr_to_indirect(slot)); + rcu_assign_pointer(root->rnode, ptr_to_indirect(slot)); } /* Go a level down */ @@ -361,7 +374,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, unsigned int height, shift; struct radix_tree_node *node, **slot; - node = rcu_dereference(root->rnode); + node = rcu_dereference_raw(root->rnode); if (node == NULL) return NULL; @@ -370,7 +383,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, return NULL; return is_slot ? (void *)&root->rnode : node; } - node = radix_tree_indirect_to_ptr(node); + node = indirect_to_ptr(node); height = node->height; if (index > radix_tree_maxindex(height)) @@ -381,7 +394,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, do { slot = (struct radix_tree_node **) (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); - node = rcu_dereference(*slot); + node = rcu_dereference_raw(*slot); if (node == NULL) return NULL; @@ -389,7 +402,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, height--; } while (height > 0); - return is_slot ? (void *)slot:node; + return is_slot ? (void *)slot : indirect_to_ptr(node); } /** @@ -451,7 +464,7 @@ void *radix_tree_tag_set(struct radix_tree_root *root, height = root->height; BUG_ON(index > radix_tree_maxindex(height)); - slot = radix_tree_indirect_to_ptr(root->rnode); + slot = indirect_to_ptr(root->rnode); shift = (height - 1) * RADIX_TREE_MAP_SHIFT; while (height > 0) { @@ -505,7 +518,7 @@ void *radix_tree_tag_clear(struct radix_tree_root *root, shift = (height - 1) * RADIX_TREE_MAP_SHIFT; pathp->node = NULL; - slot = radix_tree_indirect_to_ptr(root->rnode); + slot = indirect_to_ptr(root->rnode); while (height > 0) { int offset; @@ -543,7 +556,6 @@ out: } EXPORT_SYMBOL(radix_tree_tag_clear); -#ifndef __KERNEL__ /* Only the test harness uses this at present */ /** * radix_tree_tag_get - get a tag on a radix tree node * @root: radix tree root @@ -554,6 +566,10 @@ EXPORT_SYMBOL(radix_tree_tag_clear); * * 0: tag not present or not set * 1: tag set + * + * Note that the return value of this function may not be relied on, even if + * the RCU lock is held, unless tag modification and node deletion are excluded + * from concurrency. */ int radix_tree_tag_get(struct radix_tree_root *root, unsigned long index, unsigned int tag) @@ -566,13 +582,13 @@ int radix_tree_tag_get(struct radix_tree_root *root, if (!root_tag_get(root, tag)) return 0; - node = rcu_dereference(root->rnode); + node = rcu_dereference_raw(root->rnode); if (node == NULL) return 0; if (!radix_tree_is_indirect_ptr(node)) return (index == 0); - node = radix_tree_indirect_to_ptr(node); + node = indirect_to_ptr(node); height = node->height; if (index > radix_tree_maxindex(height)) @@ -594,19 +610,142 @@ int radix_tree_tag_get(struct radix_tree_root *root, */ if (!tag_get(node, tag, offset)) saw_unset_tag = 1; - if (height == 1) { - int ret = tag_get(node, tag, offset); - - BUG_ON(ret && saw_unset_tag); - return !!ret; - } - node = rcu_dereference(node->slots[offset]); + if (height == 1) + return !!tag_get(node, tag, offset); + node = rcu_dereference_raw(node->slots[offset]); shift -= RADIX_TREE_MAP_SHIFT; height--; } } EXPORT_SYMBOL(radix_tree_tag_get); -#endif + +/** + * radix_tree_range_tag_if_tagged - for each item in given range set given + * tag if item has another tag set + * @root: radix tree root + * @first_indexp: pointer to a starting index of a range to scan + * @last_index: last index of a range to scan + * @nr_to_tag: maximum number items to tag + * @iftag: tag index to test + * @settag: tag index to set if tested tag is set + * + * This function scans range of radix tree from first_index to last_index + * (inclusive). For each item in the range if iftag is set, the function sets + * also settag. The function stops either after tagging nr_to_tag items or + * after reaching last_index. + * + * The tags must be set from the leaf level only and propagated back up the + * path to the root. We must do this so that we resolve the full path before + * setting any tags on intermediate nodes. If we set tags as we descend, then + * we can get to the leaf node and find that the index that has the iftag + * set is outside the range we are scanning. This reults in dangling tags and + * can lead to problems with later tag operations (e.g. livelocks on lookups). + * + * The function returns number of leaves where the tag was set and sets + * *first_indexp to the first unscanned index. + * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must + * be prepared to handle that. + */ +unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, + unsigned long *first_indexp, unsigned long last_index, + unsigned long nr_to_tag, + unsigned int iftag, unsigned int settag) +{ + unsigned int height = root->height; + struct radix_tree_path path[height]; + struct radix_tree_path *pathp = path; + struct radix_tree_node *slot; + unsigned int shift; + unsigned long tagged = 0; + unsigned long index = *first_indexp; + + last_index = min(last_index, radix_tree_maxindex(height)); + if (index > last_index) + return 0; + if (!nr_to_tag) + return 0; + if (!root_tag_get(root, iftag)) { + *first_indexp = last_index + 1; + return 0; + } + if (height == 0) { + *first_indexp = last_index + 1; + root_tag_set(root, settag); + return 1; + } + + shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + slot = indirect_to_ptr(root->rnode); + + /* + * we fill the path from (root->height - 2) to 0, leaving the index at + * (root->height - 1) as a terminator. Zero the node in the terminator + * so that we can use this to end walk loops back up the path. + */ + path[height - 1].node = NULL; + + for (;;) { + int offset; + + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + if (!slot->slots[offset]) + goto next; + if (!tag_get(slot, iftag, offset)) + goto next; + if (height > 1) { + /* Go down one level */ + height--; + shift -= RADIX_TREE_MAP_SHIFT; + path[height - 1].node = slot; + path[height - 1].offset = offset; + slot = slot->slots[offset]; + continue; + } + + /* tag the leaf */ + tagged++; + tag_set(slot, settag, offset); + + /* walk back up the path tagging interior nodes */ + pathp = &path[0]; + while (pathp->node) { + /* stop if we find a node with the tag already set */ + if (tag_get(pathp->node, settag, pathp->offset)) + break; + tag_set(pathp->node, settag, pathp->offset); + pathp++; + } + +next: + /* Go to next item at level determined by 'shift' */ + index = ((index >> shift) + 1) << shift; + /* Overflow can happen when last_index is ~0UL... */ + if (index > last_index || !index) + break; + if (tagged >= nr_to_tag) + break; + while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) { + /* + * We've fully scanned this node. Go up. Because + * last_index is guaranteed to be in the tree, what + * we do below cannot wander astray. + */ + slot = path[height - 1].node; + height++; + shift += RADIX_TREE_MAP_SHIFT; + } + } + /* + * The iftag must have been set somewhere because otherwise + * we would return immediated at the beginning of the function + */ + root_tag_set(root, settag); + *first_indexp = index; + + return tagged; +} +EXPORT_SYMBOL(radix_tree_range_tag_if_tagged); + /** * radix_tree_next_hole - find the next hole (not-present entry) @@ -656,7 +795,7 @@ EXPORT_SYMBOL(radix_tree_next_hole); * * Returns: the index of the hole if found, otherwise returns an index * outside of the set specified (in which case 'index - return >= max_scan' - * will be true). In rare cases of wrap-around, LONG_MAX will be returned. + * will be true). In rare cases of wrap-around, ULONG_MAX will be returned. * * radix_tree_next_hole may be called under rcu_read_lock. However, like * radix_tree_gang_lookup, this will not atomically search a snapshot of @@ -674,7 +813,7 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root, if (!radix_tree_lookup(root, index)) break; index--; - if (index == LONG_MAX) + if (index == ULONG_MAX) break; } @@ -710,7 +849,7 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long index, } shift -= RADIX_TREE_MAP_SHIFT; - slot = rcu_dereference(slot->slots[i]); + slot = rcu_dereference_raw(slot->slots[i]); if (slot == NULL) goto out; } @@ -757,7 +896,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, unsigned long cur_index = first_index; unsigned int ret; - node = rcu_dereference(root->rnode); + node = rcu_dereference_raw(root->rnode); if (!node) return 0; @@ -767,7 +906,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, results[0] = node; return 1; } - node = radix_tree_indirect_to_ptr(node); + node = indirect_to_ptr(node); max_index = radix_tree_maxindex(node->height); @@ -786,7 +925,8 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, slot = *(((void ***)results)[ret + i]); if (!slot) continue; - results[ret + nr_found] = rcu_dereference(slot); + results[ret + nr_found] = + indirect_to_ptr(rcu_dereference_raw(slot)); nr_found++; } ret += nr_found; @@ -825,7 +965,7 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, unsigned long cur_index = first_index; unsigned int ret; - node = rcu_dereference(root->rnode); + node = rcu_dereference_raw(root->rnode); if (!node) return 0; @@ -835,7 +975,7 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, results[0] = (void **)&root->rnode; return 1; } - node = radix_tree_indirect_to_ptr(node); + node = indirect_to_ptr(node); max_index = radix_tree_maxindex(node->height); @@ -914,7 +1054,7 @@ __lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index, } } shift -= RADIX_TREE_MAP_SHIFT; - slot = rcu_dereference(slot->slots[i]); + slot = rcu_dereference_raw(slot->slots[i]); if (slot == NULL) break; } @@ -950,7 +1090,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, if (!root_tag_get(root, tag)) return 0; - node = rcu_dereference(root->rnode); + node = rcu_dereference_raw(root->rnode); if (!node) return 0; @@ -960,7 +1100,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, results[0] = node; return 1; } - node = radix_tree_indirect_to_ptr(node); + node = indirect_to_ptr(node); max_index = radix_tree_maxindex(node->height); @@ -979,7 +1119,8 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, slot = *(((void ***)results)[ret + i]); if (!slot) continue; - results[ret + nr_found] = rcu_dereference(slot); + results[ret + nr_found] = + indirect_to_ptr(rcu_dereference_raw(slot)); nr_found++; } ret += nr_found; @@ -1019,7 +1160,7 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, if (!root_tag_get(root, tag)) return 0; - node = rcu_dereference(root->rnode); + node = rcu_dereference_raw(root->rnode); if (!node) return 0; @@ -1029,7 +1170,7 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, results[0] = (void **)&root->rnode; return 1; } - node = radix_tree_indirect_to_ptr(node); + node = indirect_to_ptr(node); max_index = radix_tree_maxindex(node->height); @@ -1065,7 +1206,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) void *newptr; BUG_ON(!radix_tree_is_indirect_ptr(to_free)); - to_free = radix_tree_indirect_to_ptr(to_free); + to_free = indirect_to_ptr(to_free); /* * The candidate node has more than one child, or its child @@ -1078,16 +1219,39 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) /* * We don't need rcu_assign_pointer(), since we are simply - * moving the node from one part of the tree to another. If - * it was safe to dereference the old pointer to it + * moving the node from one part of the tree to another: if it + * was safe to dereference the old pointer to it * (to_free->slots[0]), it will be safe to dereference the new - * one (root->rnode). + * one (root->rnode) as far as dependent read barriers go. */ newptr = to_free->slots[0]; if (root->height > 1) - newptr = radix_tree_ptr_to_indirect(newptr); + newptr = ptr_to_indirect(newptr); root->rnode = newptr; root->height--; + + /* + * We have a dilemma here. The node's slot[0] must not be + * NULLed in case there are concurrent lookups expecting to + * find the item. However if this was a bottom-level node, + * then it may be subject to the slot pointer being visible + * to callers dereferencing it. If item corresponding to + * slot[0] is subsequently deleted, these callers would expect + * their slot to become empty sooner or later. + * + * For example, lockless pagecache will look up a slot, deref + * the page pointer, and if the page is 0 refcount it means it + * was concurrently deleted from pagecache so try the deref + * again. Fortunately there is already a requirement for logic + * to retry the entire slot lookup -- the indirect pointer + * problem (replacing direct root node with an indirect pointer + * also results in a stale slot). So tag the slot as indirect + * to force callers to retry. + */ + if (root->height == 0) + *((unsigned long *)&to_free->slots[0]) |= + RADIX_TREE_INDIRECT_PTR; + radix_tree_node_free(to_free); } } @@ -1124,7 +1288,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) root->rnode = NULL; goto out; } - slot = radix_tree_indirect_to_ptr(slot); + slot = indirect_to_ptr(slot); shift = (height - 1) * RADIX_TREE_MAP_SHIFT; pathp->node = NULL; @@ -1166,8 +1330,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) radix_tree_node_free(to_free); if (pathp->node->count) { - if (pathp->node == - radix_tree_indirect_to_ptr(root->rnode)) + if (pathp->node == indirect_to_ptr(root->rnode)) radix_tree_shrink(root); goto out; } diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore new file mode 100644 index 000000000000..162becacf97c --- /dev/null +++ b/lib/raid6/.gitignore @@ -0,0 +1,4 @@ +mktables +altivec*.c +int*.c +tables.c diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile new file mode 100644 index 000000000000..8a38102770f3 --- /dev/null +++ b/lib/raid6/Makefile @@ -0,0 +1,75 @@ +obj-$(CONFIG_RAID6_PQ) += raid6_pq.o + +raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ + int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \ + altivec8.o mmx.o sse1.o sse2.o +hostprogs-y += mktables + +quiet_cmd_unroll = UNROLL $@ + cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) \ + < $< > $@ || ( rm -f $@ && exit 1 ) + +ifeq ($(CONFIG_ALTIVEC),y) +altivec_flags := -maltivec -mabi=altivec +endif + +targets += int1.c +$(obj)/int1.c: UNROLL := 1 +$(obj)/int1.c: $(src)/int.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +targets += int2.c +$(obj)/int2.c: UNROLL := 2 +$(obj)/int2.c: $(src)/int.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +targets += int4.c +$(obj)/int4.c: UNROLL := 4 +$(obj)/int4.c: $(src)/int.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +targets += int8.c +$(obj)/int8.c: UNROLL := 8 +$(obj)/int8.c: $(src)/int.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +targets += int16.c +$(obj)/int16.c: UNROLL := 16 +$(obj)/int16.c: $(src)/int.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +targets += int32.c +$(obj)/int32.c: UNROLL := 32 +$(obj)/int32.c: $(src)/int.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_altivec1.o += $(altivec_flags) +targets += altivec1.c +$(obj)/altivec1.c: UNROLL := 1 +$(obj)/altivec1.c: $(src)/altivec.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_altivec2.o += $(altivec_flags) +targets += altivec2.c +$(obj)/altivec2.c: UNROLL := 2 +$(obj)/altivec2.c: $(src)/altivec.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_altivec4.o += $(altivec_flags) +targets += altivec4.c +$(obj)/altivec4.c: UNROLL := 4 +$(obj)/altivec4.c: $(src)/altivec.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_altivec8.o += $(altivec_flags) +targets += altivec8.c +$(obj)/altivec8.c: UNROLL := 8 +$(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +quiet_cmd_mktable = TABLE $@ + cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) + +targets += tables.c +$(obj)/tables.c: $(obj)/mktables FORCE + $(call if_changed,mktable) diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c new file mode 100644 index 000000000000..b595f560bee7 --- /dev/null +++ b/lib/raid6/algos.c @@ -0,0 +1,154 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6/algos.c + * + * Algorithm list and algorithm selection for RAID-6 + */ + +#include <linux/raid/pq.h> +#ifndef __KERNEL__ +#include <sys/mman.h> +#include <stdio.h> +#else +#include <linux/gfp.h> +#if !RAID6_USE_EMPTY_ZERO_PAGE +/* In .bss so it's zeroed */ +const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); +EXPORT_SYMBOL(raid6_empty_zero_page); +#endif +#endif + +struct raid6_calls raid6_call; +EXPORT_SYMBOL_GPL(raid6_call); + +const struct raid6_calls * const raid6_algos[] = { + &raid6_intx1, + &raid6_intx2, + &raid6_intx4, + &raid6_intx8, +#if defined(__ia64__) + &raid6_intx16, + &raid6_intx32, +#endif +#if defined(__i386__) && !defined(__arch_um__) + &raid6_mmxx1, + &raid6_mmxx2, + &raid6_sse1x1, + &raid6_sse1x2, + &raid6_sse2x1, + &raid6_sse2x2, +#endif +#if defined(__x86_64__) && !defined(__arch_um__) + &raid6_sse2x1, + &raid6_sse2x2, + &raid6_sse2x4, +#endif +#ifdef CONFIG_ALTIVEC + &raid6_altivec1, + &raid6_altivec2, + &raid6_altivec4, + &raid6_altivec8, +#endif + NULL +}; + +#ifdef __KERNEL__ +#define RAID6_TIME_JIFFIES_LG2 4 +#else +/* Need more time to be stable in userspace */ +#define RAID6_TIME_JIFFIES_LG2 9 +#define time_before(x, y) ((x) < (y)) +#endif + +/* Try to pick the best algorithm */ +/* This code uses the gfmul table as convenient data set to abuse */ + +int __init raid6_select_algo(void) +{ + const struct raid6_calls * const * algo; + const struct raid6_calls * best; + char *syndromes; + void *dptrs[(65536/PAGE_SIZE)+2]; + int i, disks; + unsigned long perf, bestperf; + int bestprefer; + unsigned long j0, j1; + + disks = (65536/PAGE_SIZE)+2; + for ( i = 0 ; i < disks-2 ; i++ ) { + dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i; + } + + /* Normal code - use a 2-page allocation to avoid D$ conflict */ + syndromes = (void *) __get_free_pages(GFP_KERNEL, 1); + + if ( !syndromes ) { + printk("raid6: Yikes! No memory available.\n"); + return -ENOMEM; + } + + dptrs[disks-2] = syndromes; + dptrs[disks-1] = syndromes + PAGE_SIZE; + + bestperf = 0; bestprefer = 0; best = NULL; + + for ( algo = raid6_algos ; *algo ; algo++ ) { + if ( !(*algo)->valid || (*algo)->valid() ) { + perf = 0; + + preempt_disable(); + j0 = jiffies; + while ( (j1 = jiffies) == j0 ) + cpu_relax(); + while (time_before(jiffies, + j1 + (1<<RAID6_TIME_JIFFIES_LG2))) { + (*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs); + perf++; + } + preempt_enable(); + + if ( (*algo)->prefer > bestprefer || + ((*algo)->prefer == bestprefer && + perf > bestperf) ) { + best = *algo; + bestprefer = best->prefer; + bestperf = perf; + } + printk("raid6: %-8s %5ld MB/s\n", (*algo)->name, + (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); + } + } + + if (best) { + printk("raid6: using algorithm %s (%ld MB/s)\n", + best->name, + (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); + raid6_call = *best; + } else + printk("raid6: Yikes! No algorithm found!\n"); + + free_pages((unsigned long)syndromes, 1); + + return best ? 0 : -EINVAL; +} + +static void raid6_exit(void) +{ + do { } while (0); +} + +subsys_initcall(raid6_select_algo); +module_exit(raid6_exit); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("RAID6 Q-syndrome calculations"); diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc new file mode 100644 index 000000000000..2654d5c854be --- /dev/null +++ b/lib/raid6/altivec.uc @@ -0,0 +1,130 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6altivec$#.c + * + * $#-way unrolled portable integer math RAID-6 instruction set + * + * This file is postprocessed using unroll.awk + * + * <benh> hpa: in process, + * you can just "steal" the vec unit with enable_kernel_altivec() (but + * bracked this with preempt_disable/enable or in a lock) + */ + +#include <linux/raid/pq.h> + +#ifdef CONFIG_ALTIVEC + +#include <altivec.h> +#ifdef __KERNEL__ +# include <asm/system.h> +# include <asm/cputable.h> +#endif + +/* + * This is the C data type to use. We use a vector of + * signed char so vec_cmpgt() will generate the right + * instruction. + */ + +typedef vector signed char unative_t; + +#define NBYTES(x) ((vector signed char) {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x}) +#define NSIZE sizeof(unative_t) + +/* + * The SHLBYTE() operation shifts each byte left by 1, *not* + * rolling over into the next byte + */ +static inline __attribute_const__ unative_t SHLBYTE(unative_t v) +{ + return vec_add(v,v); +} + +/* + * The MASK() operation returns 0xFF in any byte for which the high + * bit is 1, 0x00 for any byte for which the high bit is 0. + */ +static inline __attribute_const__ unative_t MASK(unative_t v) +{ + unative_t zv = NBYTES(0); + + /* vec_cmpgt returns a vector bool char; thus the need for the cast */ + return (unative_t)vec_cmpgt(zv, v); +} + + +/* This is noinline to make damned sure that gcc doesn't move any of the + Altivec code around the enable/disable code */ +static void noinline +raid6_altivec$#_gen_syndrome_real(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + unative_t wd$$, wq$$, wp$$, w1$$, w2$$; + unative_t x1d = NBYTES(0x1d); + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { + wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; + for ( z = z0-1 ; z >= 0 ; z-- ) { + wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; + wp$$ = vec_xor(wp$$, wd$$); + w2$$ = MASK(wq$$); + w1$$ = SHLBYTE(wq$$); + w2$$ = vec_and(w2$$, x1d); + w1$$ = vec_xor(w1$$, w2$$); + wq$$ = vec_xor(w1$$, wd$$); + } + *(unative_t *)&p[d+NSIZE*$$] = wp$$; + *(unative_t *)&q[d+NSIZE*$$] = wq$$; + } +} + +static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + preempt_disable(); + enable_kernel_altivec(); + + raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs); + + preempt_enable(); +} + +int raid6_have_altivec(void); +#if $# == 1 +int raid6_have_altivec(void) +{ + /* This assumes either all CPUs have Altivec or none does */ +# ifdef __KERNEL__ + return cpu_has_feature(CPU_FTR_ALTIVEC); +# else + return 1; +# endif +} +#endif + +const struct raid6_calls raid6_altivec$# = { + raid6_altivec$#_gen_syndrome, + raid6_have_altivec, + "altivecx$#", + 0 +}; + +#endif /* CONFIG_ALTIVEC */ diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc new file mode 100644 index 000000000000..d1e276a14fab --- /dev/null +++ b/lib/raid6/int.uc @@ -0,0 +1,117 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6int$#.c + * + * $#-way unrolled portable integer math RAID-6 instruction set + * + * This file is postprocessed using unroll.awk + */ + +#include <linux/raid/pq.h> + +/* + * This is the C data type to use + */ + +/* Change this from BITS_PER_LONG if there is something better... */ +#if BITS_PER_LONG == 64 +# define NBYTES(x) ((x) * 0x0101010101010101UL) +# define NSIZE 8 +# define NSHIFT 3 +# define NSTRING "64" +typedef u64 unative_t; +#else +# define NBYTES(x) ((x) * 0x01010101U) +# define NSIZE 4 +# define NSHIFT 2 +# define NSTRING "32" +typedef u32 unative_t; +#endif + + + +/* + * IA-64 wants insane amounts of unrolling. On other architectures that + * is just a waste of space. + */ +#if ($# <= 8) || defined(__ia64__) + + +/* + * These sub-operations are separate inlines since they can sometimes be + * specially optimized using architecture-specific hacks. + */ + +/* + * The SHLBYTE() operation shifts each byte left by 1, *not* + * rolling over into the next byte + */ +static inline __attribute_const__ unative_t SHLBYTE(unative_t v) +{ + unative_t vv; + + vv = (v << 1) & NBYTES(0xfe); + return vv; +} + +/* + * The MASK() operation returns 0xFF in any byte for which the high + * bit is 1, 0x00 for any byte for which the high bit is 0. + */ +static inline __attribute_const__ unative_t MASK(unative_t v) +{ + unative_t vv; + + vv = v & NBYTES(0x80); + vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */ + return vv; +} + + +static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + unative_t wd$$, wq$$, wp$$, w1$$, w2$$; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { + wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; + for ( z = z0-1 ; z >= 0 ; z-- ) { + wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; + wp$$ ^= wd$$; + w2$$ = MASK(wq$$); + w1$$ = SHLBYTE(wq$$); + w2$$ &= NBYTES(0x1d); + w1$$ ^= w2$$; + wq$$ = w1$$ ^ wd$$; + } + *(unative_t *)&p[d+NSIZE*$$] = wp$$; + *(unative_t *)&q[d+NSIZE*$$] = wq$$; + } +} + +const struct raid6_calls raid6_intx$# = { + raid6_int$#_gen_syndrome, + NULL, /* always valid */ + "int" NSTRING "x$#", + 0 +}; + +#endif diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c new file mode 100644 index 000000000000..3b1500843bba --- /dev/null +++ b/lib/raid6/mktables.c @@ -0,0 +1,132 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2 or (at your + * option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * mktables.c + * + * Make RAID-6 tables. This is a host user space program to be run at + * compile time. + */ + +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <stdlib.h> +#include <time.h> + +static uint8_t gfmul(uint8_t a, uint8_t b) +{ + uint8_t v = 0; + + while (b) { + if (b & 1) + v ^= a; + a = (a << 1) ^ (a & 0x80 ? 0x1d : 0); + b >>= 1; + } + + return v; +} + +static uint8_t gfpow(uint8_t a, int b) +{ + uint8_t v = 1; + + b %= 255; + if (b < 0) + b += 255; + + while (b) { + if (b & 1) + v = gfmul(v, a); + a = gfmul(a, a); + b >>= 1; + } + + return v; +} + +int main(int argc, char *argv[]) +{ + int i, j, k; + uint8_t v; + uint8_t exptbl[256], invtbl[256]; + + printf("#include <linux/raid/pq.h>\n"); + + /* Compute multiplication table */ + printf("\nconst u8 __attribute__((aligned(256)))\n" + "raid6_gfmul[256][256] =\n" + "{\n"); + for (i = 0; i < 256; i++) { + printf("\t{\n"); + for (j = 0; j < 256; j += 8) { + printf("\t\t"); + for (k = 0; k < 8; k++) + printf("0x%02x,%c", gfmul(i, j + k), + (k == 7) ? '\n' : ' '); + } + printf("\t},\n"); + } + printf("};\n"); + printf("#ifdef __KERNEL__\n"); + printf("EXPORT_SYMBOL(raid6_gfmul);\n"); + printf("#endif\n"); + + /* Compute power-of-2 table (exponent) */ + v = 1; + printf("\nconst u8 __attribute__((aligned(256)))\n" + "raid6_gfexp[256] =\n" "{\n"); + for (i = 0; i < 256; i += 8) { + printf("\t"); + for (j = 0; j < 8; j++) { + exptbl[i + j] = v; + printf("0x%02x,%c", v, (j == 7) ? '\n' : ' '); + v = gfmul(v, 2); + if (v == 1) + v = 0; /* For entry 255, not a real entry */ + } + } + printf("};\n"); + printf("#ifdef __KERNEL__\n"); + printf("EXPORT_SYMBOL(raid6_gfexp);\n"); + printf("#endif\n"); + + /* Compute inverse table x^-1 == x^254 */ + printf("\nconst u8 __attribute__((aligned(256)))\n" + "raid6_gfinv[256] =\n" "{\n"); + for (i = 0; i < 256; i += 8) { + printf("\t"); + for (j = 0; j < 8; j++) { + invtbl[i + j] = v = gfpow(i + j, 254); + printf("0x%02x,%c", v, (j == 7) ? '\n' : ' '); + } + } + printf("};\n"); + printf("#ifdef __KERNEL__\n"); + printf("EXPORT_SYMBOL(raid6_gfinv);\n"); + printf("#endif\n"); + + /* Compute inv(2^x + 1) (exponent-xor-inverse) table */ + printf("\nconst u8 __attribute__((aligned(256)))\n" + "raid6_gfexi[256] =\n" "{\n"); + for (i = 0; i < 256; i += 8) { + printf("\t"); + for (j = 0; j < 8; j++) + printf("0x%02x,%c", invtbl[exptbl[i + j] ^ 1], + (j == 7) ? '\n' : ' '); + } + printf("};\n"); + printf("#ifdef __KERNEL__\n"); + printf("EXPORT_SYMBOL(raid6_gfexi);\n"); + printf("#endif\n"); + + return 0; +} diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c new file mode 100644 index 000000000000..279347f23094 --- /dev/null +++ b/lib/raid6/mmx.c @@ -0,0 +1,142 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6/mmx.c + * + * MMX implementation of RAID-6 syndrome functions + */ + +#if defined(__i386__) && !defined(__arch_um__) + +#include <linux/raid/pq.h> +#include "x86.h" + +/* Shared with raid6/sse1.c */ +const struct raid6_mmx_constants { + u64 x1d; +} raid6_mmx_constants = { + 0x1d1d1d1d1d1d1d1dULL, +}; + +static int raid6_have_mmx(void) +{ + /* Not really "boot_cpu" but "all_cpus" */ + return boot_cpu_has(X86_FEATURE_MMX); +} + +/* + * Plain MMX implementation + */ +static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); + asm volatile("pxor %mm5,%mm5"); /* Zero temp */ + + for ( d = 0 ; d < bytes ; d += 8 ) { + asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ + asm volatile("movq %mm2,%mm4"); /* Q[0] */ + for ( z = z0-1 ; z >= 0 ; z-- ) { + asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d])); + asm volatile("pcmpgtb %mm4,%mm5"); + asm volatile("paddb %mm4,%mm4"); + asm volatile("pand %mm0,%mm5"); + asm volatile("pxor %mm5,%mm4"); + asm volatile("pxor %mm5,%mm5"); + asm volatile("pxor %mm6,%mm2"); + asm volatile("pxor %mm6,%mm4"); + } + asm volatile("movq %%mm2,%0" : "=m" (p[d])); + asm volatile("pxor %mm2,%mm2"); + asm volatile("movq %%mm4,%0" : "=m" (q[d])); + asm volatile("pxor %mm4,%mm4"); + } + + kernel_fpu_end(); +} + +const struct raid6_calls raid6_mmxx1 = { + raid6_mmx1_gen_syndrome, + raid6_have_mmx, + "mmxx1", + 0 +}; + +/* + * Unrolled-by-2 MMX implementation + */ +static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); + asm volatile("pxor %mm5,%mm5"); /* Zero temp */ + asm volatile("pxor %mm7,%mm7"); /* Zero temp */ + + for ( d = 0 ; d < bytes ; d += 16 ) { + asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ + asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); + asm volatile("movq %mm2,%mm4"); /* Q[0] */ + asm volatile("movq %mm3,%mm6"); /* Q[1] */ + for ( z = z0-1 ; z >= 0 ; z-- ) { + asm volatile("pcmpgtb %mm4,%mm5"); + asm volatile("pcmpgtb %mm6,%mm7"); + asm volatile("paddb %mm4,%mm4"); + asm volatile("paddb %mm6,%mm6"); + asm volatile("pand %mm0,%mm5"); + asm volatile("pand %mm0,%mm7"); + asm volatile("pxor %mm5,%mm4"); + asm volatile("pxor %mm7,%mm6"); + asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d])); + asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8])); + asm volatile("pxor %mm5,%mm2"); + asm volatile("pxor %mm7,%mm3"); + asm volatile("pxor %mm5,%mm4"); + asm volatile("pxor %mm7,%mm6"); + asm volatile("pxor %mm5,%mm5"); + asm volatile("pxor %mm7,%mm7"); + } + asm volatile("movq %%mm2,%0" : "=m" (p[d])); + asm volatile("movq %%mm3,%0" : "=m" (p[d+8])); + asm volatile("movq %%mm4,%0" : "=m" (q[d])); + asm volatile("movq %%mm6,%0" : "=m" (q[d+8])); + } + + kernel_fpu_end(); +} + +const struct raid6_calls raid6_mmxx2 = { + raid6_mmx2_gen_syndrome, + raid6_have_mmx, + "mmxx2", + 0 +}; + +#endif diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c new file mode 100644 index 000000000000..8590d19cf522 --- /dev/null +++ b/lib/raid6/recov.c @@ -0,0 +1,132 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6/recov.c + * + * RAID-6 data recovery in dual failure mode. In single failure mode, + * use the RAID-5 algorithm (or, in the case of Q failure, just reconstruct + * the syndrome.) + */ + +#include <linux/raid/pq.h> + +/* Recover two failed data blocks. */ +void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, + void **ptrs) +{ + u8 *p, *q, *dp, *dq; + u8 px, qx, db; + const u8 *pbmul; /* P multiplier table for B data */ + const u8 *qmul; /* Q multiplier table (for both) */ + + p = (u8 *)ptrs[disks-2]; + q = (u8 *)ptrs[disks-1]; + + /* Compute syndrome with zero for the missing data pages + Use the dead data pages as temporary storage for + delta p and delta q */ + dp = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks-2] = dp; + dq = (u8 *)ptrs[failb]; + ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[disks-1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dp; + ptrs[failb] = dq; + ptrs[disks-2] = p; + ptrs[disks-1] = q; + + /* Now, pick the proper data tables */ + pbmul = raid6_gfmul[raid6_gfexi[failb-faila]]; + qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]]; + + /* Now do it... */ + while ( bytes-- ) { + px = *p ^ *dp; + qx = qmul[*q ^ *dq]; + *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */ + *dp++ = db ^ px; /* Reconstructed A */ + p++; q++; + } +} +EXPORT_SYMBOL_GPL(raid6_2data_recov); + +/* Recover failure of one data block plus the P block */ +void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs) +{ + u8 *p, *q, *dq; + const u8 *qmul; /* Q multiplier table */ + + p = (u8 *)ptrs[disks-2]; + q = (u8 *)ptrs[disks-1]; + + /* Compute syndrome with zero for the missing data page + Use the dead data page as temporary storage for delta q */ + dq = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks-1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dq; + ptrs[disks-1] = q; + + /* Now, pick the proper data tables */ + qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]]; + + /* Now do it... */ + while ( bytes-- ) { + *p++ ^= *dq = qmul[*q ^ *dq]; + q++; dq++; + } +} +EXPORT_SYMBOL_GPL(raid6_datap_recov); + +#ifndef __KERNEL__ +/* Testing only */ + +/* Recover two failed blocks. */ +void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs) +{ + if ( faila > failb ) { + int tmp = faila; + faila = failb; + failb = tmp; + } + + if ( failb == disks-1 ) { + if ( faila == disks-2 ) { + /* P+Q failure. Just rebuild the syndrome. */ + raid6_call.gen_syndrome(disks, bytes, ptrs); + } else { + /* data+Q failure. Reconstruct data from P, + then rebuild syndrome. */ + /* NOT IMPLEMENTED - equivalent to RAID-5 */ + } + } else { + if ( failb == disks-2 ) { + /* data+P failure. */ + raid6_datap_recov(disks, bytes, faila, ptrs); + } else { + /* data+data failure. */ + raid6_2data_recov(disks, bytes, faila, failb, ptrs); + } + } +} + +#endif diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c new file mode 100644 index 000000000000..10dd91948c07 --- /dev/null +++ b/lib/raid6/sse1.c @@ -0,0 +1,162 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6/sse1.c + * + * SSE-1/MMXEXT implementation of RAID-6 syndrome functions + * + * This is really an MMX implementation, but it requires SSE-1 or + * AMD MMXEXT for prefetch support and a few other features. The + * support for nontemporal memory accesses is enough to make this + * worthwhile as a separate implementation. + */ + +#if defined(__i386__) && !defined(__arch_um__) + +#include <linux/raid/pq.h> +#include "x86.h" + +/* Defined in raid6/mmx.c */ +extern const struct raid6_mmx_constants { + u64 x1d; +} raid6_mmx_constants; + +static int raid6_have_sse1_or_mmxext(void) +{ + /* Not really boot_cpu but "all_cpus" */ + return boot_cpu_has(X86_FEATURE_MMX) && + (boot_cpu_has(X86_FEATURE_XMM) || + boot_cpu_has(X86_FEATURE_MMXEXT)); +} + +/* + * Plain SSE1 implementation + */ +static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); + asm volatile("pxor %mm5,%mm5"); /* Zero temp */ + + for ( d = 0 ; d < bytes ; d += 8 ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); + asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ + asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); + asm volatile("movq %mm2,%mm4"); /* Q[0] */ + asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d])); + for ( z = z0-2 ; z >= 0 ; z-- ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); + asm volatile("pcmpgtb %mm4,%mm5"); + asm volatile("paddb %mm4,%mm4"); + asm volatile("pand %mm0,%mm5"); + asm volatile("pxor %mm5,%mm4"); + asm volatile("pxor %mm5,%mm5"); + asm volatile("pxor %mm6,%mm2"); + asm volatile("pxor %mm6,%mm4"); + asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d])); + } + asm volatile("pcmpgtb %mm4,%mm5"); + asm volatile("paddb %mm4,%mm4"); + asm volatile("pand %mm0,%mm5"); + asm volatile("pxor %mm5,%mm4"); + asm volatile("pxor %mm5,%mm5"); + asm volatile("pxor %mm6,%mm2"); + asm volatile("pxor %mm6,%mm4"); + + asm volatile("movntq %%mm2,%0" : "=m" (p[d])); + asm volatile("movntq %%mm4,%0" : "=m" (q[d])); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse1x1 = { + raid6_sse11_gen_syndrome, + raid6_have_sse1_or_mmxext, + "sse1x1", + 1 /* Has cache hints */ +}; + +/* + * Unrolled-by-2 SSE1 implementation + */ +static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); + asm volatile("pxor %mm5,%mm5"); /* Zero temp */ + asm volatile("pxor %mm7,%mm7"); /* Zero temp */ + + /* We uniformly assume a single prefetch covers at least 16 bytes */ + for ( d = 0 ; d < bytes ; d += 16 ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); + asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ + asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */ + asm volatile("movq %mm2,%mm4"); /* Q[0] */ + asm volatile("movq %mm3,%mm6"); /* Q[1] */ + for ( z = z0-1 ; z >= 0 ; z-- ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); + asm volatile("pcmpgtb %mm4,%mm5"); + asm volatile("pcmpgtb %mm6,%mm7"); + asm volatile("paddb %mm4,%mm4"); + asm volatile("paddb %mm6,%mm6"); + asm volatile("pand %mm0,%mm5"); + asm volatile("pand %mm0,%mm7"); + asm volatile("pxor %mm5,%mm4"); + asm volatile("pxor %mm7,%mm6"); + asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d])); + asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8])); + asm volatile("pxor %mm5,%mm2"); + asm volatile("pxor %mm7,%mm3"); + asm volatile("pxor %mm5,%mm4"); + asm volatile("pxor %mm7,%mm6"); + asm volatile("pxor %mm5,%mm5"); + asm volatile("pxor %mm7,%mm7"); + } + asm volatile("movntq %%mm2,%0" : "=m" (p[d])); + asm volatile("movntq %%mm3,%0" : "=m" (p[d+8])); + asm volatile("movntq %%mm4,%0" : "=m" (q[d])); + asm volatile("movntq %%mm6,%0" : "=m" (q[d+8])); + } + + asm volatile("sfence" : :: "memory"); + kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse1x2 = { + raid6_sse12_gen_syndrome, + raid6_have_sse1_or_mmxext, + "sse1x2", + 1 /* Has cache hints */ +}; + +#endif diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c new file mode 100644 index 000000000000..bc2d57daa589 --- /dev/null +++ b/lib/raid6/sse2.c @@ -0,0 +1,262 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6/sse2.c + * + * SSE-2 implementation of RAID-6 syndrome functions + * + */ + +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) + +#include <linux/raid/pq.h> +#include "x86.h" + +static const struct raid6_sse_constants { + u64 x1d[2]; +} raid6_sse_constants __attribute__((aligned(16))) = { + { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL }, +}; + +static int raid6_have_sse2(void) +{ + /* Not really boot_cpu but "all_cpus" */ + return boot_cpu_has(X86_FEATURE_MMX) && + boot_cpu_has(X86_FEATURE_FXSR) && + boot_cpu_has(X86_FEATURE_XMM) && + boot_cpu_has(X86_FEATURE_XMM2); +} + +/* + * Plain SSE2 implementation + */ +static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); + asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ + + for ( d = 0 ; d < bytes ; d += 16 ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); + asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */ + asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); + asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */ + asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d])); + for ( z = z0-2 ; z >= 0 ; z-- ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm6,%xmm2"); + asm volatile("pxor %xmm6,%xmm4"); + asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d])); + } + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm6,%xmm2"); + asm volatile("pxor %xmm6,%xmm4"); + + asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); + asm volatile("pxor %xmm2,%xmm2"); + asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); + asm volatile("pxor %xmm4,%xmm4"); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse2x1 = { + raid6_sse21_gen_syndrome, + raid6_have_sse2, + "sse2x1", + 1 /* Has cache hints */ +}; + +/* + * Unrolled-by-2 SSE2 implementation + */ +static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); + asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ + asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */ + + /* We uniformly assume a single prefetch covers at least 32 bytes */ + for ( d = 0 ; d < bytes ; d += 32 ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); + asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */ + asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */ + asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */ + asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */ + for ( z = z0-1 ; z >= 0 ; z-- ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("pcmpgtb %xmm6,%xmm7"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("paddb %xmm6,%xmm6"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pand %xmm0,%xmm7"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d])); + asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16])); + asm volatile("pxor %xmm5,%xmm2"); + asm volatile("pxor %xmm7,%xmm3"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm7,%xmm7"); + } + asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); + asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); + asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); + asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse2x2 = { + raid6_sse22_gen_syndrome, + raid6_have_sse2, + "sse2x2", + 1 /* Has cache hints */ +}; + +#endif + +#if defined(__x86_64__) && !defined(__arch_um__) + +/* + * Unrolled-by-4 SSE2 implementation + */ +static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0])); + asm volatile("pxor %xmm2,%xmm2"); /* P[0] */ + asm volatile("pxor %xmm3,%xmm3"); /* P[1] */ + asm volatile("pxor %xmm4,%xmm4"); /* Q[0] */ + asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ + asm volatile("pxor %xmm6,%xmm6"); /* Q[1] */ + asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */ + asm volatile("pxor %xmm10,%xmm10"); /* P[2] */ + asm volatile("pxor %xmm11,%xmm11"); /* P[3] */ + asm volatile("pxor %xmm12,%xmm12"); /* Q[2] */ + asm volatile("pxor %xmm13,%xmm13"); /* Zero temp */ + asm volatile("pxor %xmm14,%xmm14"); /* Q[3] */ + asm volatile("pxor %xmm15,%xmm15"); /* Zero temp */ + + for ( d = 0 ; d < bytes ; d += 64 ) { + for ( z = z0 ; z >= 0 ; z-- ) { + /* The second prefetch seems to improve performance... */ + asm volatile("prefetchnta %0" :: "m" (dptr[z][d])); + asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32])); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("pcmpgtb %xmm6,%xmm7"); + asm volatile("pcmpgtb %xmm12,%xmm13"); + asm volatile("pcmpgtb %xmm14,%xmm15"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("paddb %xmm6,%xmm6"); + asm volatile("paddb %xmm12,%xmm12"); + asm volatile("paddb %xmm14,%xmm14"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pand %xmm0,%xmm7"); + asm volatile("pand %xmm0,%xmm13"); + asm volatile("pand %xmm0,%xmm15"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("pxor %xmm13,%xmm12"); + asm volatile("pxor %xmm15,%xmm14"); + asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); + asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16])); + asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32])); + asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48])); + asm volatile("pxor %xmm5,%xmm2"); + asm volatile("pxor %xmm7,%xmm3"); + asm volatile("pxor %xmm13,%xmm10"); + asm volatile("pxor %xmm15,%xmm11"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("pxor %xmm13,%xmm12"); + asm volatile("pxor %xmm15,%xmm14"); + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm7,%xmm7"); + asm volatile("pxor %xmm13,%xmm13"); + asm volatile("pxor %xmm15,%xmm15"); + } + asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); + asm volatile("pxor %xmm2,%xmm2"); + asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); + asm volatile("pxor %xmm3,%xmm3"); + asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32])); + asm volatile("pxor %xmm10,%xmm10"); + asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48])); + asm volatile("pxor %xmm11,%xmm11"); + asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); + asm volatile("pxor %xmm4,%xmm4"); + asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); + asm volatile("pxor %xmm6,%xmm6"); + asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32])); + asm volatile("pxor %xmm12,%xmm12"); + asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48])); + asm volatile("pxor %xmm14,%xmm14"); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse2x4 = { + raid6_sse24_gen_syndrome, + raid6_have_sse2, + "sse2x4", + 1 /* Has cache hints */ +}; + +#endif diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile new file mode 100644 index 000000000000..aa651697b6dc --- /dev/null +++ b/lib/raid6/test/Makefile @@ -0,0 +1,72 @@ +# +# This is a simple Makefile to test some of the RAID-6 code +# from userspace. +# + +CC = gcc +OPTFLAGS = -O2 # Adjust as desired +CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS) +LD = ld +AWK = awk -f +AR = ar +RANLIB = ranlib + +.c.o: + $(CC) $(CFLAGS) -c -o $@ $< + +%.c: ../%.c + cp -f $< $@ + +%.uc: ../%.uc + cp -f $< $@ + +all: raid6.a raid6test + +raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \ + altivec1.o altivec2.o altivec4.o altivec8.o recov.o algos.o \ + tables.o + rm -f $@ + $(AR) cq $@ $^ + $(RANLIB) $@ + +raid6test: test.c raid6.a + $(CC) $(CFLAGS) -o raid6test $^ + +altivec1.c: altivec.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=1 < altivec.uc > $@ + +altivec2.c: altivec.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=2 < altivec.uc > $@ + +altivec4.c: altivec.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=4 < altivec.uc > $@ + +altivec8.c: altivec.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=8 < altivec.uc > $@ + +int1.c: int.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=1 < int.uc > $@ + +int2.c: int.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=2 < int.uc > $@ + +int4.c: int.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=4 < int.uc > $@ + +int8.c: int.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=8 < int.uc > $@ + +int16.c: int.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=16 < int.uc > $@ + +int32.c: int.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=32 < int.uc > $@ + +tables.c: mktables + ./mktables > tables.c + +clean: + rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c tables.c raid6test + +spotless: clean + rm -f *~ diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c new file mode 100644 index 000000000000..7a930318b17d --- /dev/null +++ b/lib/raid6/test/test.c @@ -0,0 +1,124 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2 or (at your + * option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6test.c + * + * Test RAID-6 recovery with various algorithms + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <linux/raid/pq.h> + +#define NDISKS 16 /* Including P and Q */ + +const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); +struct raid6_calls raid6_call; + +char *dataptrs[NDISKS]; +char data[NDISKS][PAGE_SIZE]; +char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; + +static void makedata(void) +{ + int i, j; + + for (i = 0; i < NDISKS; i++) { + for (j = 0; j < PAGE_SIZE; j++) + data[i][j] = rand(); + + dataptrs[i] = data[i]; + } +} + +static char disk_type(int d) +{ + switch (d) { + case NDISKS-2: + return 'P'; + case NDISKS-1: + return 'Q'; + default: + return 'D'; + } +} + +static int test_disks(int i, int j) +{ + int erra, errb; + + memset(recovi, 0xf0, PAGE_SIZE); + memset(recovj, 0xba, PAGE_SIZE); + + dataptrs[i] = recovi; + dataptrs[j] = recovj; + + raid6_dual_recov(NDISKS, PAGE_SIZE, i, j, (void **)&dataptrs); + + erra = memcmp(data[i], recovi, PAGE_SIZE); + errb = memcmp(data[j], recovj, PAGE_SIZE); + + if (i < NDISKS-2 && j == NDISKS-1) { + /* We don't implement the DQ failure scenario, since it's + equivalent to a RAID-5 failure (XOR, then recompute Q) */ + erra = errb = 0; + } else { + printf("algo=%-8s faila=%3d(%c) failb=%3d(%c) %s\n", + raid6_call.name, + i, disk_type(i), + j, disk_type(j), + (!erra && !errb) ? "OK" : + !erra ? "ERRB" : + !errb ? "ERRA" : "ERRAB"); + } + + dataptrs[i] = data[i]; + dataptrs[j] = data[j]; + + return erra || errb; +} + +int main(int argc, char *argv[]) +{ + const struct raid6_calls *const *algo; + int i, j; + int err = 0; + + makedata(); + + for (algo = raid6_algos; *algo; algo++) { + if (!(*algo)->valid || (*algo)->valid()) { + raid6_call = **algo; + + /* Nuke syndromes */ + memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); + + /* Generate assumed good syndrome */ + raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, + (void **)&dataptrs); + + for (i = 0; i < NDISKS-1; i++) + for (j = i+1; j < NDISKS; j++) + err += test_disks(i, j); + } + printf("\n"); + } + + printf("\n"); + /* Pick the best algorithm test */ + raid6_select_algo(); + + if (err) + printf("\n*** ERRORS FOUND ***\n"); + + return err; +} diff --git a/lib/raid6/unroll.awk b/lib/raid6/unroll.awk new file mode 100644 index 000000000000..c6aa03631df8 --- /dev/null +++ b/lib/raid6/unroll.awk @@ -0,0 +1,20 @@ + +# This filter requires one command line option of form -vN=n +# where n must be a decimal number. +# +# Repeat each input line containing $$ n times, replacing $$ with 0...n-1. +# Replace each $# with n, and each $* with a single $. + +BEGIN { + n = N + 0 +} +{ + if (/\$\$/) { rep = n } else { rep = 1 } + for (i = 0; i < rep; ++i) { + tmp = $0 + gsub(/\$\$/, i, tmp) + gsub(/\$\#/, n, tmp) + gsub(/\$\*/, "$", tmp) + print tmp + } +} diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h new file mode 100644 index 000000000000..cb2a8c91c886 --- /dev/null +++ b/lib/raid6/x86.h @@ -0,0 +1,61 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6/x86.h + * + * Definitions common to x86 and x86-64 RAID-6 code only + */ + +#ifndef LINUX_RAID_RAID6X86_H +#define LINUX_RAID_RAID6X86_H + +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) + +#ifdef __KERNEL__ /* Real code */ + +#include <asm/i387.h> + +#else /* Dummy code for user space testing */ + +static inline void kernel_fpu_begin(void) +{ +} + +static inline void kernel_fpu_end(void) +{ +} + +#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ +#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions + * (fast save and restore) */ +#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ +#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ +#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ + +/* Should work well enough on modern CPUs for testing */ +static inline int boot_cpu_has(int flag) +{ + u32 eax = (flag >> 5) ? 0x80000001 : 1; + u32 edx; + + asm volatile("cpuid" + : "+a" (eax), "=d" (edx) + : : "ecx", "ebx"); + + return (edx >> (flag & 31)) & 1; +} + +#endif /* ndef __KERNEL__ */ + +#endif +#endif diff --git a/lib/random32.c b/lib/random32.c index 217d5c4b666d..fc3545a32771 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -39,13 +39,16 @@ #include <linux/jiffies.h> #include <linux/random.h> -struct rnd_state { - u32 s1, s2, s3; -}; - static DEFINE_PER_CPU(struct rnd_state, net_rand_state); -static u32 __random32(struct rnd_state *state) +/** + * prandom32 - seeded pseudo-random number generator. + * @state: pointer to state structure holding seeded state. + * + * This is used for pseudo-randomness with no outside seeding. + * For more random results, use random32(). + */ +u32 prandom32(struct rnd_state *state) { #define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b) @@ -55,14 +58,7 @@ static u32 __random32(struct rnd_state *state) return (state->s1 ^ state->s2 ^ state->s3); } - -/* - * Handle minimum values for seeds - */ -static inline u32 __seed(u32 x, u32 m) -{ - return (x < m) ? x + m : x; -} +EXPORT_SYMBOL(prandom32); /** * random32 - pseudo random number generator @@ -75,7 +71,7 @@ u32 random32(void) { unsigned long r; struct rnd_state *state = &get_cpu_var(net_rand_state); - r = __random32(state); + r = prandom32(state); put_cpu_var(state); return r; } @@ -118,12 +114,12 @@ static int __init random32_init(void) state->s3 = __seed(LCG(state->s2), 15); /* "warm it up" */ - __random32(state); - __random32(state); - __random32(state); - __random32(state); - __random32(state); - __random32(state); + prandom32(state); + prandom32(state); + prandom32(state); + prandom32(state); + prandom32(state); + prandom32(state); } return 0; } @@ -131,7 +127,7 @@ core_initcall(random32_init); /* * Generate better values after random number generator - * is fully initalized. + * is fully initialized. */ static int __init random32_reseed(void) { @@ -147,7 +143,7 @@ static int __init random32_reseed(void) state->s3 = __seed(seeds[2], 15); /* mix it in */ - __random32(state); + prandom32(state); } return 0; } diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 26187edcc7ea..027a03f4c56d 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -7,51 +7,61 @@ * parameter. Now every user can use their own standalone ratelimit_state. * * This file is released under the GPLv2. - * */ -#include <linux/kernel.h> +#include <linux/ratelimit.h> #include <linux/jiffies.h> #include <linux/module.h> -static DEFINE_SPINLOCK(ratelimit_lock); - /* * __ratelimit - rate limiting * @rs: ratelimit_state data + * @func: name of calling function + * + * This enforces a rate limit: not more than @rs->burst callbacks + * in every @rs->interval * - * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks - * in every @rs->ratelimit_jiffies + * RETURNS: + * 0 means callbacks will be suppressed. + * 1 means go ahead and do it. */ -int __ratelimit(struct ratelimit_state *rs) +int ___ratelimit(struct ratelimit_state *rs, const char *func) { unsigned long flags; + int ret; if (!rs->interval) return 1; - spin_lock_irqsave(&ratelimit_lock, flags); + /* + * If we contend on this state's lock then almost + * by definition we are too busy to print a message, + * in addition to the one that will be printed by + * the entity that is holding the lock already: + */ + if (!spin_trylock_irqsave(&rs->lock, flags)) + return 0; + if (!rs->begin) rs->begin = jiffies; if (time_is_before_jiffies(rs->begin + rs->interval)) { if (rs->missed) printk(KERN_WARNING "%s: %d callbacks suppressed\n", - __func__, rs->missed); - rs->begin = 0; + func, rs->missed); + rs->begin = 0; rs->printed = 0; - rs->missed = 0; + rs->missed = 0; } - if (rs->burst && rs->burst > rs->printed) - goto print; - - rs->missed++; - spin_unlock_irqrestore(&ratelimit_lock, flags); - return 0; + if (rs->burst && rs->burst > rs->printed) { + rs->printed++; + ret = 1; + } else { + rs->missed++; + ret = 0; + } + spin_unlock_irqrestore(&rs->lock, flags); -print: - rs->printed++; - spin_unlock_irqrestore(&ratelimit_lock, flags); - return 1; + return ret; } -EXPORT_SYMBOL(__ratelimit); +EXPORT_SYMBOL(___ratelimit); diff --git a/lib/rational.c b/lib/rational.c index b3c099b5478e..3ed247b80662 100644 --- a/lib/rational.c +++ b/lib/rational.c @@ -7,6 +7,7 @@ */ #include <linux/rational.h> +#include <linux/module.h> /* * calculate best rational approximation for a given fraction diff --git a/lib/rbtree.c b/lib/rbtree.c index e2aa3be29858..4693f79195d3 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -283,6 +283,74 @@ void rb_erase(struct rb_node *node, struct rb_root *root) } EXPORT_SYMBOL(rb_erase); +static void rb_augment_path(struct rb_node *node, rb_augment_f func, void *data) +{ + struct rb_node *parent; + +up: + func(node, data); + parent = rb_parent(node); + if (!parent) + return; + + if (node == parent->rb_left && parent->rb_right) + func(parent->rb_right, data); + else if (parent->rb_left) + func(parent->rb_left, data); + + node = parent; + goto up; +} + +/* + * after inserting @node into the tree, update the tree to account for + * both the new entry and any damage done by rebalance + */ +void rb_augment_insert(struct rb_node *node, rb_augment_f func, void *data) +{ + if (node->rb_left) + node = node->rb_left; + else if (node->rb_right) + node = node->rb_right; + + rb_augment_path(node, func, data); +} + +/* + * before removing the node, find the deepest node on the rebalance path + * that will still be there after @node gets removed + */ +struct rb_node *rb_augment_erase_begin(struct rb_node *node) +{ + struct rb_node *deepest; + + if (!node->rb_right && !node->rb_left) + deepest = rb_parent(node); + else if (!node->rb_right) + deepest = node->rb_left; + else if (!node->rb_left) + deepest = node->rb_right; + else { + deepest = rb_next(node); + if (deepest->rb_right) + deepest = deepest->rb_right; + else if (rb_parent(deepest) != node) + deepest = rb_parent(deepest); + } + + return deepest; +} + +/* + * after removal, update the tree to account for the removed entry + * and any rebalance damage. + */ +void rb_augment_erase_end(struct rb_node *node, rb_augment_f func, void *data) +{ + if (node) + rb_augment_path(node, func, data); +} + /* * This function returns the first node (in sort order) of the tree. */ diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c index 9df3ca56db11..ffc9fc7f3b05 100644 --- a/lib/rwsem-spinlock.c +++ b/lib/rwsem-spinlock.c @@ -17,6 +17,19 @@ struct rwsem_waiter { #define RWSEM_WAITING_FOR_WRITE 0x00000002 }; +int rwsem_is_locked(struct rw_semaphore *sem) +{ + int ret = 1; + unsigned long flags; + + if (spin_trylock_irqsave(&sem->wait_lock, flags)) { + ret = (sem->activity != 0); + spin_unlock_irqrestore(&sem->wait_lock, flags); + } + return ret; +} +EXPORT_SYMBOL(rwsem_is_locked); + /* * initialise the semaphore */ @@ -34,6 +47,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); } +EXPORT_SYMBOL(__init_rwsem); /* * handle the lock release when processes blocked on it that can now run @@ -129,13 +143,14 @@ void __sched __down_read(struct rw_semaphore *sem) { struct rwsem_waiter waiter; struct task_struct *tsk; + unsigned long flags; - spin_lock_irq(&sem->wait_lock); + spin_lock_irqsave(&sem->wait_lock, flags); if (sem->activity >= 0 && list_empty(&sem->wait_list)) { /* granted */ sem->activity++; - spin_unlock_irq(&sem->wait_lock); + spin_unlock_irqrestore(&sem->wait_lock, flags); goto out; } @@ -150,7 +165,7 @@ void __sched __down_read(struct rw_semaphore *sem) list_add_tail(&waiter.list, &sem->wait_list); /* we don't need to touch the semaphore struct anymore */ - spin_unlock_irq(&sem->wait_lock); + spin_unlock_irqrestore(&sem->wait_lock, flags); /* wait to be given the lock */ for (;;) { @@ -195,13 +210,14 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) { struct rwsem_waiter waiter; struct task_struct *tsk; + unsigned long flags; - spin_lock_irq(&sem->wait_lock); + spin_lock_irqsave(&sem->wait_lock, flags); if (sem->activity == 0 && list_empty(&sem->wait_list)) { /* granted */ sem->activity = -1; - spin_unlock_irq(&sem->wait_lock); + spin_unlock_irqrestore(&sem->wait_lock, flags); goto out; } @@ -216,7 +232,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) list_add_tail(&waiter.list, &sem->wait_list); /* we don't need to touch the semaphore struct anymore */ - spin_unlock_irq(&sem->wait_lock); + spin_unlock_irqrestore(&sem->wait_lock, flags); /* wait to be given the lock */ for (;;) { @@ -305,12 +321,3 @@ void __downgrade_write(struct rw_semaphore *sem) spin_unlock_irqrestore(&sem->wait_lock, flags); } -EXPORT_SYMBOL(__init_rwsem); -EXPORT_SYMBOL(__down_read); -EXPORT_SYMBOL(__down_read_trylock); -EXPORT_SYMBOL(__down_write_nested); -EXPORT_SYMBOL(__down_write); -EXPORT_SYMBOL(__down_write_trylock); -EXPORT_SYMBOL(__up_read); -EXPORT_SYMBOL(__up_write); -EXPORT_SYMBOL(__downgrade_write); diff --git a/lib/rwsem.c b/lib/rwsem.c index 3e3365e5665e..f236d7cd5cf3 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -36,45 +36,56 @@ struct rwsem_waiter { #define RWSEM_WAITING_FOR_WRITE 0x00000002 }; +/* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and + * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held + * since the rwsem value was observed. + */ +#define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */ +#define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */ +#define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */ + /* * handle the lock release when processes blocked on it that can now run * - if we come here from up_xxxx(), then: * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) - * - there must be someone on the queue + * - there must be someone on the queue * - the spinlock must be held by the caller * - woken process blocks are discarded from the list after having task zeroed * - writers are only woken if downgrading is false */ -static inline struct rw_semaphore * -__rwsem_do_wake(struct rw_semaphore *sem, int downgrading) +static struct rw_semaphore * +__rwsem_do_wake(struct rw_semaphore *sem, int wake_type) { struct rwsem_waiter *waiter; struct task_struct *tsk; struct list_head *next; - signed long oldcount, woken, loop; - - if (downgrading) - goto dont_wake_writers; - - /* if we came through an up_xxxx() call, we only only wake someone up - * if we can transition the active part of the count from 0 -> 1 - */ - try_again: - oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS, sem) - - RWSEM_ACTIVE_BIAS; - if (oldcount & RWSEM_ACTIVE_MASK) - goto undo; + signed long oldcount, woken, loop, adjustment; waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - - /* try to grant a single write lock if there's a writer at the front - * of the queue - note we leave the 'active part' of the count - * incremented by 1 and the waiting part incremented by 0x00010000 - */ if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) goto readers_only; + if (wake_type == RWSEM_WAKE_READ_OWNED) + /* Another active reader was observed, so wakeup is not + * likely to succeed. Save the atomic op. + */ + goto out; + + /* There's a writer at the front of the queue - try to grant it the + * write lock. However, we only wake this writer if we can transition + * the active part of the count from 0 -> 1 + */ + adjustment = RWSEM_ACTIVE_WRITE_BIAS; + if (waiter->list.next == &sem->wait_list) + adjustment -= RWSEM_WAITING_BIAS; + + try_again_write: + oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; + if (oldcount & RWSEM_ACTIVE_MASK) + /* Someone grabbed the sem already */ + goto undo_write; + /* We must be careful not to touch 'waiter' after we set ->task = NULL. * It is an allocated on the waiter's stack and may become invalid at * any time after that point (due to a wakeup from another source). @@ -87,18 +98,30 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) put_task_struct(tsk); goto out; - /* don't want to wake any writers */ - dont_wake_writers: - waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - if (waiter->flags & RWSEM_WAITING_FOR_WRITE) + readers_only: + /* If we come here from up_xxxx(), another thread might have reached + * rwsem_down_failed_common() before we acquired the spinlock and + * woken up a waiter, making it now active. We prefer to check for + * this first in order to not spend too much time with the spinlock + * held if we're not going to be able to wake up readers in the end. + * + * Note that we do not need to update the rwsem count: any writer + * trying to acquire rwsem will run rwsem_down_write_failed() due + * to the waiting threads and block trying to acquire the spinlock. + * + * We use a dummy atomic update in order to acquire the cache line + * exclusively since we expect to succeed and run the final rwsem + * count adjustment pretty soon. + */ + if (wake_type == RWSEM_WAKE_ANY && + rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS) + /* Someone grabbed the sem for write already */ goto out; - /* grant an infinite number of read locks to the readers at the front - * of the queue - * - note we increment the 'active part' of the count by the number of - * readers before waking any processes up + /* Grant an infinite number of read locks to the readers at the front + * of the queue. Note we increment the 'active part' of the count by + * the number of readers before waking any processes up. */ - readers_only: woken = 0; do { woken++; @@ -111,16 +134,15 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) } while (waiter->flags & RWSEM_WAITING_FOR_READ); - loop = woken; - woken *= RWSEM_ACTIVE_BIAS - RWSEM_WAITING_BIAS; - if (!downgrading) - /* we'd already done one increment earlier */ - woken -= RWSEM_ACTIVE_BIAS; + adjustment = woken * RWSEM_ACTIVE_READ_BIAS; + if (waiter->flags & RWSEM_WAITING_FOR_READ) + /* hit end of list above */ + adjustment -= RWSEM_WAITING_BIAS; - rwsem_atomic_add(woken, sem); + rwsem_atomic_add(adjustment, sem); next = sem->wait_list.next; - for (; loop > 0; loop--) { + for (loop = woken; loop > 0; loop--) { waiter = list_entry(next, struct rwsem_waiter, list); next = waiter->list.next; tsk = waiter->task; @@ -136,11 +158,12 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) out: return sem; - /* undo the change to count, but check for a transition 1->0 */ - undo: - if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) != 0) + /* undo the change to the active count, but check for a transition + * 1->0 */ + undo_write: + if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) goto out; - goto try_again; + goto try_again_write; } /* @@ -148,8 +171,9 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) */ static struct rw_semaphore __sched * rwsem_down_failed_common(struct rw_semaphore *sem, - struct rwsem_waiter *waiter, signed long adjustment) + unsigned int flags, signed long adjustment) { + struct rwsem_waiter waiter; struct task_struct *tsk = current; signed long count; @@ -157,23 +181,34 @@ rwsem_down_failed_common(struct rw_semaphore *sem, /* set up my own style of waitqueue */ spin_lock_irq(&sem->wait_lock); - waiter->task = tsk; + waiter.task = tsk; + waiter.flags = flags; get_task_struct(tsk); - list_add_tail(&waiter->list, &sem->wait_list); + if (list_empty(&sem->wait_list)) + adjustment += RWSEM_WAITING_BIAS; + list_add_tail(&waiter.list, &sem->wait_list); - /* we're now waiting on the lock, but no longer actively read-locking */ + /* we're now waiting on the lock, but no longer actively locking */ count = rwsem_atomic_update(adjustment, sem); - /* if there are no active locks, wake the front queued process(es) up */ - if (!(count & RWSEM_ACTIVE_MASK)) - sem = __rwsem_do_wake(sem, 0); + /* If there are no active locks, wake the front queued process(es) up. + * + * Alternatively, if we're called from a failed down_write(), there + * were already threads queued before us and there are no active + * writers, the lock must be read owned; so we try to wake any read + * locks that were queued ahead of us. */ + if (count == RWSEM_WAITING_BIAS) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE); + else if (count > RWSEM_WAITING_BIAS && + adjustment == -RWSEM_ACTIVE_WRITE_BIAS) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); spin_unlock_irq(&sem->wait_lock); /* wait to be given the lock */ for (;;) { - if (!waiter->task) + if (!waiter.task) break; schedule(); set_task_state(tsk, TASK_UNINTERRUPTIBLE); @@ -190,12 +225,8 @@ rwsem_down_failed_common(struct rw_semaphore *sem, asmregparm struct rw_semaphore __sched * rwsem_down_read_failed(struct rw_semaphore *sem) { - struct rwsem_waiter waiter; - - waiter.flags = RWSEM_WAITING_FOR_READ; - rwsem_down_failed_common(sem, &waiter, - RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS); - return sem; + return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ, + -RWSEM_ACTIVE_READ_BIAS); } /* @@ -204,12 +235,8 @@ rwsem_down_read_failed(struct rw_semaphore *sem) asmregparm struct rw_semaphore __sched * rwsem_down_write_failed(struct rw_semaphore *sem) { - struct rwsem_waiter waiter; - - waiter.flags = RWSEM_WAITING_FOR_WRITE; - rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS); - - return sem; + return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE, + -RWSEM_ACTIVE_WRITE_BIAS); } /* @@ -224,7 +251,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) /* do nothing if list empty */ if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem, 0); + sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); spin_unlock_irqrestore(&sem->wait_lock, flags); @@ -244,7 +271,7 @@ asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) /* do nothing if list empty */ if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem, 1); + sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); spin_unlock_irqrestore(&sem->wait_lock, flags); diff --git a/lib/scatterlist.c b/lib/scatterlist.c index a295e404e908..4ceb05d772ae 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -7,8 +7,10 @@ * Version 2. See the file COPYING for more details. */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/scatterlist.h> #include <linux/highmem.h> +#include <linux/kmemleak.h> /** * sg_next - return the next scatterlist entry in a list @@ -114,17 +116,29 @@ EXPORT_SYMBOL(sg_init_one); */ static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask) { - if (nents == SG_MAX_SINGLE_ALLOC) - return (struct scatterlist *) __get_free_page(gfp_mask); - else + if (nents == SG_MAX_SINGLE_ALLOC) { + /* + * Kmemleak doesn't track page allocations as they are not + * commonly used (in a raw form) for kernel data structures. + * As we chain together a list of pages and then a normal + * kmalloc (tracked by kmemleak), in order to for that last + * allocation not to become decoupled (and thus a + * false-positive) we need to inform kmemleak of all the + * intermediate allocations. + */ + void *ptr = (void *) __get_free_page(gfp_mask); + kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask); + return ptr; + } else return kmalloc(nents * sizeof(struct scatterlist), gfp_mask); } static void sg_kfree(struct scatterlist *sg, unsigned int nents) { - if (nents == SG_MAX_SINGLE_ALLOC) + if (nents == SG_MAX_SINGLE_ALLOC) { + kmemleak_free(sg); free_page((unsigned long) sg); - else + } else kfree(sg); } @@ -234,8 +248,18 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents, left -= sg_size; sg = alloc_fn(alloc_size, gfp_mask); - if (unlikely(!sg)) - return -ENOMEM; + if (unlikely(!sg)) { + /* + * Adjust entry count to reflect that the last + * entry of the previous table won't be used for + * linkage. Without this, sg_kfree() may get + * confused. + */ + if (prv) + table->nents = ++table->orig_nents; + + return -ENOMEM; + } sg_init_table(sg, alloc_size); table->nents = table->orig_nents += sg_size; @@ -314,6 +338,7 @@ void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl, miter->__sg = sgl; miter->__nents = nents; miter->__offset = 0; + WARN_ON(!(flags & (SG_MITER_TO_SG | SG_MITER_FROM_SG))); miter->__flags = flags; } EXPORT_SYMBOL(sg_miter_start); @@ -394,6 +419,9 @@ void sg_miter_stop(struct sg_mapping_iter *miter) if (miter->addr) { miter->__offset += miter->consumed; + if (miter->__flags & SG_MITER_TO_SG) + flush_kernel_dcache_page(miter->page); + if (miter->__flags & SG_MITER_ATOMIC) { WARN_ON(!irqs_disabled()); kunmap_atomic(miter->addr, KM_BIO_SRC_IRQ); @@ -426,8 +454,14 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, unsigned int offset = 0; struct sg_mapping_iter miter; unsigned long flags; + unsigned int sg_flags = SG_MITER_ATOMIC; + + if (to_buffer) + sg_flags |= SG_MITER_FROM_SG; + else + sg_flags |= SG_MITER_TO_SG; - sg_miter_start(&miter, sgl, nents, SG_MITER_ATOMIC); + sg_miter_start(&miter, sgl, nents, sg_flags); local_irq_save(flags); @@ -438,10 +472,8 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, if (to_buffer) memcpy(buf + offset, miter.addr, len); - else { + else memcpy(miter.addr, buf + offset, len); - flush_kernel_dcache_page(miter.page); - } offset += len; } diff --git a/lib/show_mem.c b/lib/show_mem.c index 238e72a18ce1..fdc77c82f922 100644 --- a/lib/show_mem.c +++ b/lib/show_mem.c @@ -15,7 +15,7 @@ void show_mem(void) unsigned long total = 0, reserved = 0, shared = 0, nonshared = 0, highmem = 0; - printk(KERN_INFO "Mem-Info:\n"); + printk("Mem-Info:\n"); show_free_areas(); for_each_online_pgdat(pgdat) { @@ -49,15 +49,15 @@ void show_mem(void) pgdat_resize_unlock(pgdat, &flags); } - printk(KERN_INFO "%lu pages RAM\n", total); + printk("%lu pages RAM\n", total); #ifdef CONFIG_HIGHMEM - printk(KERN_INFO "%lu pages HighMem\n", highmem); + printk("%lu pages HighMem\n", highmem); #endif - printk(KERN_INFO "%lu pages reserved\n", reserved); - printk(KERN_INFO "%lu pages shared\n", shared); - printk(KERN_INFO "%lu pages non-shared\n", nonshared); + printk("%lu pages reserved\n", reserved); + printk("%lu pages shared\n", shared); + printk("%lu pages non-shared\n", nonshared); #ifdef CONFIG_QUICKLIST - printk(KERN_INFO "%lu pages in pagetable cache\n", + printk("%lu pages in pagetable cache\n", quicklist_total_size()); #endif } diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c index 9c4b0256490b..4755b98b6dfb 100644 --- a/lib/spinlock_debug.c +++ b/lib/spinlock_debug.c @@ -13,8 +13,8 @@ #include <linux/delay.h> #include <linux/module.h> -void __spin_lock_init(spinlock_t *lock, const char *name, - struct lock_class_key *key) +void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name, + struct lock_class_key *key) { #ifdef CONFIG_DEBUG_LOCK_ALLOC /* @@ -23,13 +23,13 @@ void __spin_lock_init(spinlock_t *lock, const char *name, debug_check_no_locks_freed((void *)lock, sizeof(*lock)); lockdep_init_map(&lock->dep_map, name, key, 0); #endif - lock->raw_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; lock->magic = SPINLOCK_MAGIC; lock->owner = SPINLOCK_OWNER_INIT; lock->owner_cpu = -1; } -EXPORT_SYMBOL(__spin_lock_init); +EXPORT_SYMBOL(__raw_spin_lock_init); void __rwlock_init(rwlock_t *lock, const char *name, struct lock_class_key *key) @@ -41,7 +41,7 @@ void __rwlock_init(rwlock_t *lock, const char *name, debug_check_no_locks_freed((void *)lock, sizeof(*lock)); lockdep_init_map(&lock->dep_map, name, key, 0); #endif - lock->raw_lock = (raw_rwlock_t) __RAW_RW_LOCK_UNLOCKED; + lock->raw_lock = (arch_rwlock_t) __ARCH_RW_LOCK_UNLOCKED; lock->magic = RWLOCK_MAGIC; lock->owner = SPINLOCK_OWNER_INIT; lock->owner_cpu = -1; @@ -49,7 +49,7 @@ void __rwlock_init(rwlock_t *lock, const char *name, EXPORT_SYMBOL(__rwlock_init); -static void spin_bug(spinlock_t *lock, const char *msg) +static void spin_bug(raw_spinlock_t *lock, const char *msg) { struct task_struct *owner = NULL; @@ -73,7 +73,7 @@ static void spin_bug(spinlock_t *lock, const char *msg) #define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg) static inline void -debug_spin_lock_before(spinlock_t *lock) +debug_spin_lock_before(raw_spinlock_t *lock) { SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); SPIN_BUG_ON(lock->owner == current, lock, "recursion"); @@ -81,16 +81,16 @@ debug_spin_lock_before(spinlock_t *lock) lock, "cpu recursion"); } -static inline void debug_spin_lock_after(spinlock_t *lock) +static inline void debug_spin_lock_after(raw_spinlock_t *lock) { lock->owner_cpu = raw_smp_processor_id(); lock->owner = current; } -static inline void debug_spin_unlock(spinlock_t *lock) +static inline void debug_spin_unlock(raw_spinlock_t *lock) { SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); - SPIN_BUG_ON(!spin_is_locked(lock), lock, "already unlocked"); + SPIN_BUG_ON(!raw_spin_is_locked(lock), lock, "already unlocked"); SPIN_BUG_ON(lock->owner != current, lock, "wrong owner"); SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(), lock, "wrong CPU"); @@ -98,7 +98,7 @@ static inline void debug_spin_unlock(spinlock_t *lock) lock->owner_cpu = -1; } -static void __spin_lock_debug(spinlock_t *lock) +static void __spin_lock_debug(raw_spinlock_t *lock) { u64 i; u64 loops = loops_per_jiffy * HZ; @@ -106,7 +106,7 @@ static void __spin_lock_debug(spinlock_t *lock) for (;;) { for (i = 0; i < loops; i++) { - if (__raw_spin_trylock(&lock->raw_lock)) + if (arch_spin_trylock(&lock->raw_lock)) return; __delay(1); } @@ -125,17 +125,17 @@ static void __spin_lock_debug(spinlock_t *lock) } } -void _raw_spin_lock(spinlock_t *lock) +void do_raw_spin_lock(raw_spinlock_t *lock) { debug_spin_lock_before(lock); - if (unlikely(!__raw_spin_trylock(&lock->raw_lock))) + if (unlikely(!arch_spin_trylock(&lock->raw_lock))) __spin_lock_debug(lock); debug_spin_lock_after(lock); } -int _raw_spin_trylock(spinlock_t *lock) +int do_raw_spin_trylock(raw_spinlock_t *lock) { - int ret = __raw_spin_trylock(&lock->raw_lock); + int ret = arch_spin_trylock(&lock->raw_lock); if (ret) debug_spin_lock_after(lock); @@ -148,10 +148,10 @@ int _raw_spin_trylock(spinlock_t *lock) return ret; } -void _raw_spin_unlock(spinlock_t *lock) +void do_raw_spin_unlock(raw_spinlock_t *lock) { debug_spin_unlock(lock); - __raw_spin_unlock(&lock->raw_lock); + arch_spin_unlock(&lock->raw_lock); } static void rwlock_bug(rwlock_t *lock, const char *msg) @@ -176,7 +176,7 @@ static void __read_lock_debug(rwlock_t *lock) for (;;) { for (i = 0; i < loops; i++) { - if (__raw_read_trylock(&lock->raw_lock)) + if (arch_read_trylock(&lock->raw_lock)) return; __delay(1); } @@ -193,15 +193,15 @@ static void __read_lock_debug(rwlock_t *lock) } #endif -void _raw_read_lock(rwlock_t *lock) +void do_raw_read_lock(rwlock_t *lock) { RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); - __raw_read_lock(&lock->raw_lock); + arch_read_lock(&lock->raw_lock); } -int _raw_read_trylock(rwlock_t *lock) +int do_raw_read_trylock(rwlock_t *lock) { - int ret = __raw_read_trylock(&lock->raw_lock); + int ret = arch_read_trylock(&lock->raw_lock); #ifndef CONFIG_SMP /* @@ -212,10 +212,10 @@ int _raw_read_trylock(rwlock_t *lock) return ret; } -void _raw_read_unlock(rwlock_t *lock) +void do_raw_read_unlock(rwlock_t *lock) { RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); - __raw_read_unlock(&lock->raw_lock); + arch_read_unlock(&lock->raw_lock); } static inline void debug_write_lock_before(rwlock_t *lock) @@ -251,7 +251,7 @@ static void __write_lock_debug(rwlock_t *lock) for (;;) { for (i = 0; i < loops; i++) { - if (__raw_write_trylock(&lock->raw_lock)) + if (arch_write_trylock(&lock->raw_lock)) return; __delay(1); } @@ -268,16 +268,16 @@ static void __write_lock_debug(rwlock_t *lock) } #endif -void _raw_write_lock(rwlock_t *lock) +void do_raw_write_lock(rwlock_t *lock) { debug_write_lock_before(lock); - __raw_write_lock(&lock->raw_lock); + arch_write_lock(&lock->raw_lock); debug_write_lock_after(lock); } -int _raw_write_trylock(rwlock_t *lock) +int do_raw_write_trylock(rwlock_t *lock) { - int ret = __raw_write_trylock(&lock->raw_lock); + int ret = arch_write_trylock(&lock->raw_lock); if (ret) debug_write_lock_after(lock); @@ -290,8 +290,8 @@ int _raw_write_trylock(rwlock_t *lock) return ret; } -void _raw_write_unlock(rwlock_t *lock) +void do_raw_write_unlock(rwlock_t *lock) { debug_write_unlock(lock); - __raw_write_unlock(&lock->raw_lock); + arch_write_unlock(&lock->raw_lock); } diff --git a/lib/string.c b/lib/string.c index b19b87af65a3..f71bead1be3e 100644 --- a/lib/string.c +++ b/lib/string.c @@ -36,25 +36,21 @@ int strnicmp(const char *s1, const char *s2, size_t len) /* Yes, Virginia, it had better be unsigned */ unsigned char c1, c2; - c1 = c2 = 0; - if (len) { - do { - c1 = *s1; - c2 = *s2; - s1++; - s2++; - if (!c1) - break; - if (!c2) - break; - if (c1 == c2) - continue; - c1 = tolower(c1); - c2 = tolower(c2); - if (c1 != c2) - break; - } while (--len); - } + if (!len) + return 0; + + do { + c1 = *s1++; + c2 = *s2++; + if (!c1 || !c2) + break; + if (c1 == c2) + continue; + c1 = tolower(c1); + c2 = tolower(c2); + if (c1 != c2) + break; + } while (--len); return (int)c1 - (int)c2; } EXPORT_SYMBOL(strnicmp); @@ -246,13 +242,17 @@ EXPORT_SYMBOL(strlcat); #undef strcmp int strcmp(const char *cs, const char *ct) { - signed char __res; + unsigned char c1, c2; while (1) { - if ((__res = *cs - *ct++) != 0 || !*cs++) + c1 = *cs++; + c2 = *ct++; + if (c1 != c2) + return c1 < c2 ? -1 : 1; + if (!c1) break; } - return __res; + return 0; } EXPORT_SYMBOL(strcmp); #endif @@ -266,14 +266,18 @@ EXPORT_SYMBOL(strcmp); */ int strncmp(const char *cs, const char *ct, size_t count) { - signed char __res = 0; + unsigned char c1, c2; while (count) { - if ((__res = *cs - *ct++) != 0 || !*cs++) + c1 = *cs++; + c2 = *ct++; + if (c1 != c2) + return c1 < c2 ? -1 : 1; + if (!c1) break; count--; } - return __res; + return 0; } EXPORT_SYMBOL(strncmp); #endif @@ -330,20 +334,34 @@ EXPORT_SYMBOL(strnchr); #endif /** - * strstrip - Removes leading and trailing whitespace from @s. + * skip_spaces - Removes leading whitespace from @str. + * @str: The string to be stripped. + * + * Returns a pointer to the first non-whitespace character in @str. + */ +char *skip_spaces(const char *str) +{ + while (isspace(*str)) + ++str; + return (char *)str; +} +EXPORT_SYMBOL(skip_spaces); + +/** + * strim - Removes leading and trailing whitespace from @s. * @s: The string to be stripped. * * Note that the first trailing whitespace is replaced with a %NUL-terminator * in the given string @s. Returns a pointer to the first non-whitespace * character in @s. */ -char *strstrip(char *s) +char *strim(char *s) { size_t size; char *end; + s = skip_spaces(s); size = strlen(s); - if (!size) return s; @@ -352,12 +370,9 @@ char *strstrip(char *s) end--; *(end + 1) = '\0'; - while (*s && isspace(*s)) - s++; - return s; } -EXPORT_SYMBOL(strstrip); +EXPORT_SYMBOL(strim); #ifndef __HAVE_ARCH_STRLEN /** @@ -648,7 +663,7 @@ EXPORT_SYMBOL(memscan); */ char *strstr(const char *s1, const char *s2) { - int l1, l2; + size_t l1, l2; l2 = strlen(s2); if (!l2) @@ -665,6 +680,31 @@ char *strstr(const char *s1, const char *s2) EXPORT_SYMBOL(strstr); #endif +#ifndef __HAVE_ARCH_STRNSTR +/** + * strnstr - Find the first substring in a length-limited string + * @s1: The string to be searched + * @s2: The string to search for + * @len: the maximum number of characters to search + */ +char *strnstr(const char *s1, const char *s2, size_t len) +{ + size_t l2; + + l2 = strlen(s2); + if (!l2) + return (char *)s1; + while (len >= l2) { + len--; + if (!memcmp(s1, s2, l2)) + return (char *)s1; + s1++; + } + return NULL; +} +EXPORT_SYMBOL(strnstr); +#endif + #ifndef __HAVE_ARCH_MEMCHR /** * memchr - Find a character in an area of memory. diff --git a/lib/swiotlb.c b/lib/swiotlb.c index bffe6d7ef9d9..7c06ee51a29a 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -28,6 +28,7 @@ #include <linux/types.h> #include <linux/ctype.h> #include <linux/highmem.h> +#include <linux/gfp.h> #include <asm/io.h> #include <asm/dma.h> @@ -49,19 +50,11 @@ */ #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) -/* - * Enumeration for sync targets - */ -enum dma_sync_target { - SYNC_FOR_CPU = 0, - SYNC_FOR_DEVICE = 1, -}; - int swiotlb_force; /* - * Used to do a quick range check in unmap_single and - * sync_single_*, to see if the memory was in fact allocated by this + * Used to do a quick range check in swiotlb_tbl_unmap_single and + * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this * API. */ static char *io_tlb_start, *io_tlb_end; @@ -77,7 +70,7 @@ static unsigned long io_tlb_nslabs; */ static unsigned long io_tlb_overflow = 32*1024; -void *io_tlb_overflow_buffer; +static void *io_tlb_overflow_buffer; /* * This is a free list describing the number of free entries available from @@ -97,6 +90,8 @@ static phys_addr_t *io_tlb_orig_addr; */ static DEFINE_SPINLOCK(io_tlb_lock); +static int late_alloc; + static int __init setup_io_tlb_npages(char *str) { @@ -109,55 +104,22 @@ setup_io_tlb_npages(char *str) ++str; if (!strcmp(str, "force")) swiotlb_force = 1; + return 1; } __setup("swiotlb=", setup_io_tlb_npages); /* make io_tlb_overflow tunable too? */ -void * __weak __init swiotlb_alloc_boot(size_t size, unsigned long nslabs) -{ - return alloc_bootmem_low_pages(size); -} - -void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs) -{ - return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); -} - -dma_addr_t __weak swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) -{ - return paddr; -} - -phys_addr_t __weak swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr) -{ - return baddr; -} - +/* Note that this doesn't work with highmem page */ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, volatile void *address) { - return swiotlb_phys_to_bus(hwdev, virt_to_phys(address)); -} - -void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address) -{ - return phys_to_virt(swiotlb_bus_to_phys(hwdev, address)); -} - -int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev, - dma_addr_t addr, size_t size) -{ - return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size); + return phys_to_dma(hwdev, virt_to_phys(address)); } -int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size) -{ - return 0; -} - -static void swiotlb_print_info(unsigned long bytes) +void swiotlb_print_info(void) { + unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; phys_addr_t pstart, pend; pstart = virt_to_phys(io_tlb_start); @@ -170,28 +132,14 @@ static void swiotlb_print_info(unsigned long bytes) (unsigned long long)pend); } -/* - * Statically reserve bounce buffer space and initialize bounce buffer data - * structures for the software IO TLB used to implement the DMA API. - */ -void __init -swiotlb_init_with_default_size(size_t default_size) +void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) { unsigned long i, bytes; - if (!io_tlb_nslabs) { - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } - - bytes = io_tlb_nslabs << IO_TLB_SHIFT; + bytes = nslabs << IO_TLB_SHIFT; - /* - * Get IO TLB memory from the low pages - */ - io_tlb_start = swiotlb_alloc_boot(bytes, io_tlb_nslabs); - if (!io_tlb_start) - panic("Cannot allocate SWIOTLB buffer"); + io_tlb_nslabs = nslabs; + io_tlb_start = tlb; io_tlb_end = io_tlb_start + bytes; /* @@ -199,26 +147,52 @@ swiotlb_init_with_default_size(size_t default_size) * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE * between io_tlb_start and io_tlb_end. */ - io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int)); + io_tlb_list = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); for (i = 0; i < io_tlb_nslabs; i++) io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); io_tlb_index = 0; - io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t)); + io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); /* * Get the overflow emergency buffer */ - io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); + io_tlb_overflow_buffer = alloc_bootmem_low_pages(PAGE_ALIGN(io_tlb_overflow)); if (!io_tlb_overflow_buffer) panic("Cannot allocate SWIOTLB overflow buffer!\n"); + if (verbose) + swiotlb_print_info(); +} + +/* + * Statically reserve bounce buffer space and initialize bounce buffer data + * structures for the software IO TLB used to implement the DMA API. + */ +void __init +swiotlb_init_with_default_size(size_t default_size, int verbose) +{ + unsigned long bytes; + + if (!io_tlb_nslabs) { + io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + } + + bytes = io_tlb_nslabs << IO_TLB_SHIFT; + + /* + * Get IO TLB memory from the low pages + */ + io_tlb_start = alloc_bootmem_low_pages(PAGE_ALIGN(bytes)); + if (!io_tlb_start) + panic("Cannot allocate SWIOTLB buffer"); - swiotlb_print_info(bytes); + swiotlb_init_with_tbl(io_tlb_start, io_tlb_nslabs, verbose); } void __init -swiotlb_init(void) +swiotlb_init(int verbose) { - swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ + swiotlb_init_with_default_size(64 * (1<<20), verbose); /* default to 64MB */ } /* @@ -245,7 +219,8 @@ swiotlb_late_init_with_default_size(size_t default_size) bytes = io_tlb_nslabs << IO_TLB_SHIFT; while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { - io_tlb_start = swiotlb_alloc(order, io_tlb_nslabs); + io_tlb_start = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, + order); if (io_tlb_start) break; order--; @@ -294,7 +269,9 @@ swiotlb_late_init_with_default_size(size_t default_size) if (!io_tlb_overflow_buffer) goto cleanup4; - swiotlb_print_info(bytes); + swiotlb_print_info(); + + late_alloc = 1; return 0; @@ -315,27 +292,43 @@ cleanup1: return -ENOMEM; } -static inline int -address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size) +void __init swiotlb_free(void) { - return swiotlb_arch_address_needs_mapping(hwdev, addr, size); -} + if (!io_tlb_overflow_buffer) + return; -static inline int range_needs_mapping(phys_addr_t paddr, size_t size) -{ - return swiotlb_force || swiotlb_arch_range_needs_mapping(paddr, size); + if (late_alloc) { + free_pages((unsigned long)io_tlb_overflow_buffer, + get_order(io_tlb_overflow)); + free_pages((unsigned long)io_tlb_orig_addr, + get_order(io_tlb_nslabs * sizeof(phys_addr_t))); + free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * + sizeof(int))); + free_pages((unsigned long)io_tlb_start, + get_order(io_tlb_nslabs << IO_TLB_SHIFT)); + } else { + free_bootmem_late(__pa(io_tlb_overflow_buffer), + PAGE_ALIGN(io_tlb_overflow)); + free_bootmem_late(__pa(io_tlb_orig_addr), + PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); + free_bootmem_late(__pa(io_tlb_list), + PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); + free_bootmem_late(__pa(io_tlb_start), + PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + } } -static int is_swiotlb_buffer(char *addr) +static int is_swiotlb_buffer(phys_addr_t paddr) { - return addr >= io_tlb_start && addr < io_tlb_end; + return paddr >= virt_to_phys(io_tlb_start) && + paddr < virt_to_phys(io_tlb_end); } /* * Bounce: copy the swiotlb buffer back to the original dma location */ -static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, - enum dma_data_direction dir) +void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, + enum dma_data_direction dir) { unsigned long pfn = PFN_DOWN(phys); @@ -371,26 +364,25 @@ static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, memcpy(phys_to_virt(phys), dma_addr, size); } } +EXPORT_SYMBOL_GPL(swiotlb_bounce); -/* - * Allocates bounce buffer and returns its kernel virtual address. - */ -static void * -map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir) +void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, + phys_addr_t phys, size_t size, + enum dma_data_direction dir) { unsigned long flags; char *dma_addr; unsigned int nslots, stride, index, wrap; int i; - unsigned long start_dma_addr; unsigned long mask; unsigned long offset_slots; unsigned long max_slots; mask = dma_get_seg_boundary(hwdev); - start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start) & mask; - offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + tbl_dma_addr &= mask; + + offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; /* * Carefully handle integer overflow which can occur when mask == ~0UL. @@ -477,12 +469,27 @@ found: return dma_addr; } +EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single); + +/* + * Allocates bounce buffer and returns its kernel virtual address. + */ + +static void * +map_single(struct device *hwdev, phys_addr_t phys, size_t size, + enum dma_data_direction dir) +{ + dma_addr_t start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start); + + return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir); +} /* * dma_addr is the kernel virtual address of the bounce buffer to unmap. */ -static void -do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) +void +swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size, + enum dma_data_direction dir) { unsigned long flags; int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; @@ -497,7 +504,7 @@ do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) /* * Return the buffer to the free list by setting the corresponding - * entries to indicate the number of contigous entries available. + * entries to indicate the number of contiguous entries available. * While returning the entries to the free list, we merge the entries * with slots below and above the pool being returned. */ @@ -520,10 +527,12 @@ do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) } spin_unlock_irqrestore(&io_tlb_lock, flags); } +EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single); -static void -sync_single(struct device *hwdev, char *dma_addr, size_t size, - int dir, int target) +void +swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size, + enum dma_data_direction dir, + enum dma_sync_target target) { int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; phys_addr_t phys = io_tlb_orig_addr[index]; @@ -547,6 +556,7 @@ sync_single(struct device *hwdev, char *dma_addr, size_t size, BUG(); } } +EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single); void * swiotlb_alloc_coherent(struct device *hwdev, size_t size, @@ -561,9 +571,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_mask = hwdev->coherent_dma_mask; ret = (void *)__get_free_pages(flags, order); - if (ret && - !is_buffer_dma_capable(dma_mask, swiotlb_virt_to_bus(hwdev, ret), - size)) { + if (ret && swiotlb_virt_to_bus(hwdev, ret) + size - 1 > dma_mask) { /* * The allocated memory isn't reachable by the device. */ @@ -572,8 +580,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, } if (!ret) { /* - * We are either out of memory or the device can't DMA - * to GFP_DMA memory; fall back on map_single(), which + * We are either out of memory or the device can't DMA to + * GFP_DMA memory; fall back on map_single(), which * will grab memory from the lowest available address range. */ ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE); @@ -585,13 +593,13 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, dev_addr = swiotlb_virt_to_bus(hwdev, ret); /* Confirm address can be DMA'd by device */ - if (!is_buffer_dma_capable(dma_mask, dev_addr, size)) { + if (dev_addr + size - 1 > dma_mask) { printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", (unsigned long long)dma_mask, (unsigned long long)dev_addr); /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ - do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE); + swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE); return NULL; } *dma_handle = dev_addr; @@ -601,19 +609,22 @@ EXPORT_SYMBOL(swiotlb_alloc_coherent); void swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, - dma_addr_t dma_handle) + dma_addr_t dev_addr) { + phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); + WARN_ON(irqs_disabled()); - if (!is_swiotlb_buffer(vaddr)) - free_pages((unsigned long) vaddr, get_order(size)); + if (!is_swiotlb_buffer(paddr)) + free_pages((unsigned long)vaddr, get_order(size)); else - /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ - do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); + /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */ + swiotlb_tbl_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); } EXPORT_SYMBOL(swiotlb_free_coherent); static void -swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) +swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, + int do_panic) { /* * Ran out of IOMMU space for this operation. This is very bad. @@ -625,12 +636,15 @@ swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) printk(KERN_ERR "DMA: Out of SW-IOMMU space for %zu bytes at " "device %s\n", size, dev ? dev_name(dev) : "?"); - if (size > io_tlb_overflow && do_panic) { - if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) - panic("DMA: Memory would be corrupted\n"); - if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) - panic("DMA: Random memory would be DMAed\n"); - } + if (size <= io_tlb_overflow || !do_panic) + return; + + if (dir == DMA_BIDIRECTIONAL) + panic("DMA: Random memory could be DMA accessed\n"); + if (dir == DMA_FROM_DEVICE) + panic("DMA: Random memory could be DMA written\n"); + if (dir == DMA_TO_DEVICE) + panic("DMA: Random memory could be DMA read\n"); } /* @@ -646,7 +660,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, struct dma_attrs *attrs) { phys_addr_t phys = page_to_phys(page) + offset; - dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys); + dma_addr_t dev_addr = phys_to_dma(dev, phys); void *map; BUG_ON(dir == DMA_NONE); @@ -655,8 +669,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, * we can safely return the device addr and not worry about bounce * buffering it. */ - if (!address_needs_mapping(dev, dev_addr, size) && - !range_needs_mapping(phys, size)) + if (dma_capable(dev, dev_addr, size) && !swiotlb_force) return dev_addr; /* @@ -673,7 +686,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, /* * Ensure that the address returned is DMA'ble */ - if (address_needs_mapping(dev, dev_addr, size)) + if (!dma_capable(dev, dev_addr, size)) panic("map_single: bounce buffer is not DMA'ble"); return dev_addr; @@ -689,21 +702,27 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page); * whatever the device wrote there. */ static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir) + size_t size, enum dma_data_direction dir) { - char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr); + phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); BUG_ON(dir == DMA_NONE); - if (is_swiotlb_buffer(dma_addr)) { - do_unmap_single(hwdev, dma_addr, size, dir); + if (is_swiotlb_buffer(paddr)) { + swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir); return; } if (dir != DMA_FROM_DEVICE) return; - dma_mark_clean(dma_addr, size); + /* + * phys_to_virt doesn't work with hihgmem page but we could + * call dma_mark_clean() with hihgmem page here. However, we + * are fine since dma_mark_clean() is null on POWERPC. We can + * make dma_mark_clean() take a physical address if necessary. + */ + dma_mark_clean(phys_to_virt(paddr), size); } void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, @@ -726,21 +745,23 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page); */ static void swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir, int target) + size_t size, enum dma_data_direction dir, + enum dma_sync_target target) { - char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr); + phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); BUG_ON(dir == DMA_NONE); - if (is_swiotlb_buffer(dma_addr)) { - sync_single(hwdev, dma_addr, size, dir, target); + if (is_swiotlb_buffer(paddr)) { + swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir, + target); return; } if (dir != DMA_FROM_DEVICE) return; - dma_mark_clean(dma_addr, size); + dma_mark_clean(phys_to_virt(paddr), size); } void @@ -760,37 +781,6 @@ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, EXPORT_SYMBOL(swiotlb_sync_single_for_device); /* - * Same as above, but for a sub-range of the mapping. - */ -static void -swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, - int dir, int target) -{ - swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target); -} - -void -swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, - enum dma_data_direction dir) -{ - swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, - SYNC_FOR_CPU); -} -EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu); - -void -swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, - enum dma_data_direction dir) -{ - swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, - SYNC_FOR_DEVICE); -} -EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device); - -/* * Map a set of buffers described by scatterlist in streaming mode for DMA. * This is the scatter-gather version of the above swiotlb_map_page * interface. Here the scatter gather list elements are each tagged with the @@ -817,10 +807,10 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, for_each_sg(sgl, sg, nelems, i) { phys_addr_t paddr = sg_phys(sg); - dma_addr_t dev_addr = swiotlb_phys_to_bus(hwdev, paddr); + dma_addr_t dev_addr = phys_to_dma(hwdev, paddr); - if (range_needs_mapping(paddr, sg->length) || - address_needs_mapping(hwdev, dev_addr, sg->length)) { + if (swiotlb_force || + !dma_capable(hwdev, dev_addr, sg->length)) { void *map = map_single(hwdev, sg_phys(sg), sg->length, dir); if (!map) { @@ -843,7 +833,7 @@ EXPORT_SYMBOL(swiotlb_map_sg_attrs); int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, - int dir) + enum dma_data_direction dir) { return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL); } @@ -870,7 +860,7 @@ EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, - int dir) + enum dma_data_direction dir) { return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL); } @@ -885,7 +875,8 @@ EXPORT_SYMBOL(swiotlb_unmap_sg); */ static void swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, - int nelems, int dir, int target) + int nelems, enum dma_data_direction dir, + enum dma_sync_target target) { struct scatterlist *sg; int i; diff --git a/lib/textsearch.c b/lib/textsearch.c index 9fbcb44c554f..d608331b3e47 100644 --- a/lib/textsearch.c +++ b/lib/textsearch.c @@ -103,6 +103,7 @@ #include <linux/rcupdate.h> #include <linux/err.h> #include <linux/textsearch.h> +#include <linux/slab.h> static LIST_HEAD(ts_ops); static DEFINE_SPINLOCK(ts_mod_lock); diff --git a/lib/timerqueue.c b/lib/timerqueue.c new file mode 100644 index 000000000000..e3a1050e6820 --- /dev/null +++ b/lib/timerqueue.c @@ -0,0 +1,107 @@ +/* + * Generic Timer-queue + * + * Manages a simple queue of timers, ordered by expiration time. + * Uses rbtrees for quick list adds and expiration. + * + * NOTE: All of the following functions need to be serialized + * to avoid races. No locking is done by this libary code. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/timerqueue.h> +#include <linux/rbtree.h> +#include <linux/module.h> + +/** + * timerqueue_add - Adds timer to timerqueue. + * + * @head: head of timerqueue + * @node: timer node to be added + * + * Adds the timer node to the timerqueue, sorted by the + * node's expires value. + */ +void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) +{ + struct rb_node **p = &head->head.rb_node; + struct rb_node *parent = NULL; + struct timerqueue_node *ptr; + + /* Make sure we don't add nodes that are already added */ + WARN_ON_ONCE(!RB_EMPTY_NODE(&node->node)); + + while (*p) { + parent = *p; + ptr = rb_entry(parent, struct timerqueue_node, node); + if (node->expires.tv64 < ptr->expires.tv64) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + rb_link_node(&node->node, parent, p); + rb_insert_color(&node->node, &head->head); + + if (!head->next || node->expires.tv64 < head->next->expires.tv64) + head->next = node; +} +EXPORT_SYMBOL_GPL(timerqueue_add); + +/** + * timerqueue_del - Removes a timer from the timerqueue. + * + * @head: head of timerqueue + * @node: timer node to be removed + * + * Removes the timer node from the timerqueue. + */ +void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node) +{ + WARN_ON_ONCE(RB_EMPTY_NODE(&node->node)); + + /* update next pointer */ + if (head->next == node) { + struct rb_node *rbn = rb_next(&node->node); + + head->next = rbn ? + rb_entry(rbn, struct timerqueue_node, node) : NULL; + } + rb_erase(&node->node, &head->head); + RB_CLEAR_NODE(&node->node); +} +EXPORT_SYMBOL_GPL(timerqueue_del); + +/** + * timerqueue_iterate_next - Returns the timer after the provided timer + * + * @node: Pointer to a timer. + * + * Provides the timer that is after the given node. This is used, when + * necessary, to iterate through the list of timers in a timer list + * without modifying the list. + */ +struct timerqueue_node *timerqueue_iterate_next(struct timerqueue_node *node) +{ + struct rb_node *next; + + if (!node) + return NULL; + next = rb_next(&node->node); + if (!next) + return NULL; + return container_of(next, struct timerqueue_node, node); +} +EXPORT_SYMBOL_GPL(timerqueue_iterate_next); diff --git a/lib/uuid.c b/lib/uuid.c new file mode 100644 index 000000000000..8fadd7cef46c --- /dev/null +++ b/lib/uuid.c @@ -0,0 +1,53 @@ +/* + * Unified UUID/GUID definition + * + * Copyright (C) 2009, Intel Corp. + * Huang Ying <ying.huang@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/uuid.h> +#include <linux/random.h> + +static void __uuid_gen_common(__u8 b[16]) +{ + int i; + u32 r; + + for (i = 0; i < 4; i++) { + r = random32(); + memcpy(b + i * 4, &r, 4); + } + /* reversion 0b10 */ + b[8] = (b[8] & 0x3F) | 0x80; +} + +void uuid_le_gen(uuid_le *lu) +{ + __uuid_gen_common(lu->b); + /* version 4 : random generation */ + lu->b[7] = (lu->b[7] & 0x0F) | 0x40; +} +EXPORT_SYMBOL_GPL(uuid_le_gen); + +void uuid_be_gen(uuid_be *bu) +{ + __uuid_gen_common(bu->b); + /* version 4 : random generation */ + bu->b[6] = (bu->b[6] & 0x0F) | 0x40; +} +EXPORT_SYMBOL_GPL(uuid_be_gen); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 756ccafa9cec..c150d3dafff4 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -9,7 +9,7 @@ * Wirzenius wrote this portably, Torvalds fucked it up :-) */ -/* +/* * Fri Jul 13 2001 Crutcher Dunnavant <crutcher+kernel@datastacks.com> * - changed to provide snprintf and vsnprintf functions * So Feb 1 16:51:32 CET 2004 Juergen Quade <quade@hsnr.de> @@ -25,6 +25,7 @@ #include <linux/kallsyms.h> #include <linux/uaccess.h> #include <linux/ioport.h> +#include <net/addrconf.h> #include <asm/page.h> /* for PAGE_SIZE */ #include <asm/div64.h> @@ -46,14 +47,14 @@ static unsigned int simple_guess_base(const char *cp) } /** - * simple_strtoul - convert a string to an unsigned long + * simple_strtoull - convert a string to an unsigned long long * @cp: The start of the string * @endp: A pointer to the end of the parsed string will be placed here * @base: The number base to use */ -unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base) +unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base) { - unsigned long result = 0; + unsigned long long result = 0; if (!base) base = simple_guess_base(cp); @@ -70,58 +71,39 @@ unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base) result = result * base + value; cp++; } - if (endp) *endp = (char *)cp; + return result; } -EXPORT_SYMBOL(simple_strtoul); +EXPORT_SYMBOL(simple_strtoull); /** - * simple_strtol - convert a string to a signed long + * simple_strtoul - convert a string to an unsigned long * @cp: The start of the string * @endp: A pointer to the end of the parsed string will be placed here * @base: The number base to use */ -long simple_strtol(const char *cp, char **endp, unsigned int base) +unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base) { - if(*cp == '-') - return -simple_strtoul(cp + 1, endp, base); - return simple_strtoul(cp, endp, base); + return simple_strtoull(cp, endp, base); } -EXPORT_SYMBOL(simple_strtol); +EXPORT_SYMBOL(simple_strtoul); /** - * simple_strtoull - convert a string to an unsigned long long + * simple_strtol - convert a string to a signed long * @cp: The start of the string * @endp: A pointer to the end of the parsed string will be placed here * @base: The number base to use */ -unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base) +long simple_strtol(const char *cp, char **endp, unsigned int base) { - unsigned long long result = 0; - - if (!base) - base = simple_guess_base(cp); - - if (base == 16 && cp[0] == '0' && TOLOWER(cp[1]) == 'x') - cp += 2; - - while (isxdigit(*cp)) { - unsigned int value; - - value = isdigit(*cp) ? *cp - '0' : TOLOWER(*cp) - 'a' + 10; - if (value >= base) - break; - result = result * base + value; - cp++; - } + if (*cp == '-') + return -simple_strtoul(cp + 1, endp, base); - if (endp) - *endp = (char *)cp; - return result; + return simple_strtoul(cp, endp, base); } -EXPORT_SYMBOL(simple_strtoull); +EXPORT_SYMBOL(simple_strtol); /** * simple_strtoll - convert a string to a signed long long @@ -131,10 +113,12 @@ EXPORT_SYMBOL(simple_strtoull); */ long long simple_strtoll(const char *cp, char **endp, unsigned int base) { - if(*cp=='-') + if (*cp == '-') return -simple_strtoull(cp + 1, endp, base); + return simple_strtoull(cp, endp, base); } +EXPORT_SYMBOL(simple_strtoll); /** * strict_strtoul - convert a string to an unsigned long strictly @@ -162,18 +146,16 @@ int strict_strtoul(const char *cp, unsigned int base, unsigned long *res) { char *tail; unsigned long val; - size_t len; *res = 0; - len = strlen(cp); - if (len == 0) + if (!*cp) return -EINVAL; val = simple_strtoul(cp, &tail, base); if (tail == cp) return -EINVAL; - if ((*tail == '\0') || - ((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) { + + if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) { *res = val; return 0; } @@ -235,18 +217,15 @@ int strict_strtoull(const char *cp, unsigned int base, unsigned long long *res) { char *tail; unsigned long long val; - size_t len; *res = 0; - len = strlen(cp); - if (len == 0) + if (!*cp) return -EINVAL; val = simple_strtoull(cp, &tail, base); if (tail == cp) return -EINVAL; - if ((*tail == '\0') || - ((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) { + if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) { *res = val; return 0; } @@ -282,12 +261,14 @@ int strict_strtoll(const char *cp, unsigned int base, long long *res) } EXPORT_SYMBOL(strict_strtoll); -static int skip_atoi(const char **s) +static noinline_for_stack +int skip_atoi(const char **s) { - int i=0; + int i = 0; while (isdigit(**s)) i = i*10 + *((*s)++) - '0'; + return i; } @@ -301,7 +282,8 @@ static int skip_atoi(const char **s) /* Formats correctly any integer in [0,99999]. * Outputs from one to five digits depending on input. * On i386 gcc 4.1.2 -O2: ~250 bytes of code. */ -static char* put_dec_trunc(char *buf, unsigned q) +static noinline_for_stack +char *put_dec_trunc(char *buf, unsigned q) { unsigned d3, d2, d1, d0; d1 = (q>>4) & 0xf; @@ -330,14 +312,16 @@ static char* put_dec_trunc(char *buf, unsigned q) d3 = d3 - 10*q; *buf++ = d3 + '0'; /* next digit */ if (q != 0) - *buf++ = q + '0'; /* most sign. digit */ + *buf++ = q + '0'; /* most sign. digit */ } } } + return buf; } /* Same with if's removed. Always emits five digits */ -static char* put_dec_full(char *buf, unsigned q) +static noinline_for_stack +char *put_dec_full(char *buf, unsigned q) { /* BTW, if q is in [0,9999], 8-bit ints will be enough, */ /* but anyway, gcc produces better code with full-sized ints */ @@ -346,14 +330,15 @@ static char* put_dec_full(char *buf, unsigned q) d2 = (q>>8) & 0xf; d3 = (q>>12); - /* Possible ways to approx. divide by 10 */ - /* gcc -O2 replaces multiply with shifts and adds */ - // (x * 0xcd) >> 11: 11001101 - shorter code than * 0x67 (on i386) - // (x * 0x67) >> 10: 1100111 - // (x * 0x34) >> 9: 110100 - same - // (x * 0x1a) >> 8: 11010 - same - // (x * 0x0d) >> 7: 1101 - same, shortest code (on i386) - + /* + * Possible ways to approx. divide by 10 + * gcc -O2 replaces multiply with shifts and adds + * (x * 0xcd) >> 11: 11001101 - shorter code than * 0x67 (on i386) + * (x * 0x67) >> 10: 1100111 + * (x * 0x34) >> 9: 110100 - same + * (x * 0x1a) >> 8: 11010 - same + * (x * 0x0d) >> 7: 1101 - same, shortest code (on i386) + */ d0 = 6*(d3 + d2 + d1) + (q & 0xf); q = (d0 * 0xcd) >> 11; d0 = d0 - 10*q; @@ -374,10 +359,12 @@ static char* put_dec_full(char *buf, unsigned q) d3 = d3 - 10*q; *buf++ = d3 + '0'; *buf++ = q + '0'; + return buf; } /* No inlining helps gcc to use registers better */ -static noinline char* put_dec(char *buf, unsigned long long num) +static noinline_for_stack +char *put_dec(char *buf, unsigned long long num) { while (1) { unsigned rem; @@ -393,8 +380,8 @@ static noinline char* put_dec(char *buf, unsigned long long num) #define PLUS 4 /* show plus */ #define SPACE 8 /* space if plus */ #define LEFT 16 /* left justified */ -#define SMALL 32 /* Must be 32 == 0x20 */ -#define SPECIAL 64 /* 0x */ +#define SMALL 32 /* use lowercase in hex (must be 32 == 0x20) */ +#define SPECIAL 64 /* prefix hex with "0x", octal with "0" */ enum format_type { FORMAT_TYPE_NONE, /* Just a string part */ @@ -420,16 +407,17 @@ enum format_type { }; struct printf_spec { - enum format_type type; - int flags; /* flags to number() */ - int field_width; /* width of output field */ - int base; - int precision; /* # of digits/chars */ - int qualifier; + u8 type; /* format_type enum */ + u8 flags; /* flags to number() */ + u8 base; /* number base, 8, 10 or 16 only */ + u8 qualifier; /* number qualifier, one of 'hHlLtzZ' */ + s16 field_width; /* width of output field */ + s16 precision; /* # of digits/chars */ }; -static char *number(char *buf, char *end, unsigned long long num, - struct printf_spec spec) +static noinline_for_stack +char *number(char *buf, char *end, unsigned long long num, + struct printf_spec spec) { /* we are called with base 8, 10 or 16, only, thus don't need "G..." */ static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */ @@ -447,9 +435,9 @@ static char *number(char *buf, char *end, unsigned long long num, spec.flags &= ~ZEROPAD; sign = 0; if (spec.flags & SIGN) { - if ((signed long long) num < 0) { + if ((signed long long)num < 0) { sign = '-'; - num = - (signed long long) num; + num = -(signed long long)num; spec.field_width--; } else if (spec.flags & PLUS) { sign = '+'; @@ -477,7 +465,9 @@ static char *number(char *buf, char *end, unsigned long long num, else if (spec.base != 10) { /* 8 or 16 */ int mask = spec.base - 1; int shift = 3; - if (spec.base == 16) shift = 4; + + if (spec.base == 16) + shift = 4; do { tmp[i++] = (digits[((unsigned char)num) & mask] | locase); num >>= shift; @@ -492,7 +482,7 @@ static char *number(char *buf, char *end, unsigned long long num, /* leading space padding */ spec.field_width -= spec.precision; if (!(spec.flags & (ZEROPAD+LEFT))) { - while(--spec.field_width >= 0) { + while (--spec.field_width >= 0) { if (buf < end) *buf = ' '; ++buf; @@ -542,15 +532,17 @@ static char *number(char *buf, char *end, unsigned long long num, *buf = ' '; ++buf; } + return buf; } -static char *string(char *buf, char *end, char *s, struct printf_spec spec) +static noinline_for_stack +char *string(char *buf, char *end, const char *s, struct printf_spec spec) { int len, i; if ((unsigned long)s < PAGE_SIZE) - s = "<NULL>"; + s = "(null)"; len = strnlen(s, spec.precision); @@ -571,123 +563,379 @@ static char *string(char *buf, char *end, char *s, struct printf_spec spec) *buf = ' '; ++buf; } + return buf; } -static char *symbol_string(char *buf, char *end, void *ptr, - struct printf_spec spec, char ext) +static noinline_for_stack +char *symbol_string(char *buf, char *end, void *ptr, + struct printf_spec spec, char ext) { unsigned long value = (unsigned long) ptr; #ifdef CONFIG_KALLSYMS char sym[KSYM_SYMBOL_LEN]; - if (ext != 'f') + if (ext != 'f' && ext != 's') sprint_symbol(sym, value); else kallsyms_lookup(value, NULL, NULL, NULL, sym); + return string(buf, end, sym, spec); #else - spec.field_width = 2*sizeof(void *); + spec.field_width = 2 * sizeof(void *); spec.flags |= SPECIAL | SMALL | ZEROPAD; spec.base = 16; + return number(buf, end, value, spec); #endif } -static char *resource_string(char *buf, char *end, struct resource *res, - struct printf_spec spec) +static noinline_for_stack +char *resource_string(char *buf, char *end, struct resource *res, + struct printf_spec spec, const char *fmt) { #ifndef IO_RSRC_PRINTK_SIZE -#define IO_RSRC_PRINTK_SIZE 4 +#define IO_RSRC_PRINTK_SIZE 6 #endif #ifndef MEM_RSRC_PRINTK_SIZE -#define MEM_RSRC_PRINTK_SIZE 8 +#define MEM_RSRC_PRINTK_SIZE 10 #endif - struct printf_spec num_spec = { + static const struct printf_spec io_spec = { .base = 16, + .field_width = IO_RSRC_PRINTK_SIZE, .precision = -1, .flags = SPECIAL | SMALL | ZEROPAD, }; - /* room for the actual numbers, the two "0x", -, [, ] and the final zero */ - char sym[4*sizeof(resource_size_t) + 8]; - char *p = sym, *pend = sym + sizeof(sym); - int size = -1; + static const struct printf_spec mem_spec = { + .base = 16, + .field_width = MEM_RSRC_PRINTK_SIZE, + .precision = -1, + .flags = SPECIAL | SMALL | ZEROPAD, + }; + static const struct printf_spec bus_spec = { + .base = 16, + .field_width = 2, + .precision = -1, + .flags = SMALL | ZEROPAD, + }; + static const struct printf_spec dec_spec = { + .base = 10, + .precision = -1, + .flags = 0, + }; + static const struct printf_spec str_spec = { + .field_width = -1, + .precision = 10, + .flags = LEFT, + }; + static const struct printf_spec flag_spec = { + .base = 16, + .precision = -1, + .flags = SPECIAL | SMALL, + }; - if (res->flags & IORESOURCE_IO) - size = IO_RSRC_PRINTK_SIZE; - else if (res->flags & IORESOURCE_MEM) - size = MEM_RSRC_PRINTK_SIZE; + /* 32-bit res (sizeof==4): 10 chars in dec, 10 in hex ("0x" + 8) + * 64-bit res (sizeof==8): 20 chars in dec, 18 in hex ("0x" + 16) */ +#define RSRC_BUF_SIZE ((2 * sizeof(resource_size_t)) + 4) +#define FLAG_BUF_SIZE (2 * sizeof(res->flags)) +#define DECODED_BUF_SIZE sizeof("[mem - 64bit pref window disabled]") +#define RAW_BUF_SIZE sizeof("[mem - flags 0x]") + char sym[max(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE, + 2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)]; + + char *p = sym, *pend = sym + sizeof(sym); + int decode = (fmt[0] == 'R') ? 1 : 0; + const struct printf_spec *specp; *p++ = '['; - num_spec.field_width = size; - p = number(p, pend, res->start, num_spec); - *p++ = '-'; - p = number(p, pend, res->end, num_spec); + if (res->flags & IORESOURCE_IO) { + p = string(p, pend, "io ", str_spec); + specp = &io_spec; + } else if (res->flags & IORESOURCE_MEM) { + p = string(p, pend, "mem ", str_spec); + specp = &mem_spec; + } else if (res->flags & IORESOURCE_IRQ) { + p = string(p, pend, "irq ", str_spec); + specp = &dec_spec; + } else if (res->flags & IORESOURCE_DMA) { + p = string(p, pend, "dma ", str_spec); + specp = &dec_spec; + } else if (res->flags & IORESOURCE_BUS) { + p = string(p, pend, "bus ", str_spec); + specp = &bus_spec; + } else { + p = string(p, pend, "??? ", str_spec); + specp = &mem_spec; + decode = 0; + } + p = number(p, pend, res->start, *specp); + if (res->start != res->end) { + *p++ = '-'; + p = number(p, pend, res->end, *specp); + } + if (decode) { + if (res->flags & IORESOURCE_MEM_64) + p = string(p, pend, " 64bit", str_spec); + if (res->flags & IORESOURCE_PREFETCH) + p = string(p, pend, " pref", str_spec); + if (res->flags & IORESOURCE_WINDOW) + p = string(p, pend, " window", str_spec); + if (res->flags & IORESOURCE_DISABLED) + p = string(p, pend, " disabled", str_spec); + } else { + p = string(p, pend, " flags ", str_spec); + p = number(p, pend, res->flags, flag_spec); + } *p++ = ']'; - *p = 0; + *p = '\0'; return string(buf, end, sym, spec); } -static char *mac_address_string(char *buf, char *end, u8 *addr, - struct printf_spec spec) +static noinline_for_stack +char *mac_address_string(char *buf, char *end, u8 *addr, + struct printf_spec spec, const char *fmt) { - char mac_addr[6 * 3]; /* (6 * 2 hex digits), 5 colons and trailing zero */ + char mac_addr[sizeof("xx:xx:xx:xx:xx:xx")]; char *p = mac_addr; int i; + char separator; + + if (fmt[1] == 'F') { /* FDDI canonical format */ + separator = '-'; + } else { + separator = ':'; + } for (i = 0; i < 6; i++) { p = pack_hex_byte(p, addr[i]); - if (!(spec.flags & SPECIAL) && i != 5) - *p++ = ':'; + if (fmt[0] == 'M' && i != 5) + *p++ = separator; } *p = '\0'; - spec.flags &= ~SPECIAL; return string(buf, end, mac_addr, spec); } -static char *ip6_addr_string(char *buf, char *end, u8 *addr, - struct printf_spec spec) +static noinline_for_stack +char *ip4_string(char *p, const u8 *addr, const char *fmt) { - char ip6_addr[8 * 5]; /* (8 * 4 hex digits), 7 colons and trailing zero */ - char *p = ip6_addr; int i; + bool leading_zeros = (fmt[0] == 'i'); + int index; + int step; + + switch (fmt[2]) { + case 'h': +#ifdef __BIG_ENDIAN + index = 0; + step = 1; +#else + index = 3; + step = -1; +#endif + break; + case 'l': + index = 3; + step = -1; + break; + case 'n': + case 'b': + default: + index = 0; + step = 1; + break; + } + for (i = 0; i < 4; i++) { + char temp[3]; /* hold each IP quad in reverse order */ + int digits = put_dec_trunc(temp, addr[index]) - temp; + if (leading_zeros) { + if (digits < 3) + *p++ = '0'; + if (digits < 2) + *p++ = '0'; + } + /* reverse the digits in the quad */ + while (digits--) + *p++ = temp[digits]; + if (i < 3) + *p++ = '.'; + index += step; + } + *p = '\0'; - for (i = 0; i < 8; i++) { - p = pack_hex_byte(p, addr[2 * i]); - p = pack_hex_byte(p, addr[2 * i + 1]); - if (!(spec.flags & SPECIAL) && i != 7) + return p; +} + +static noinline_for_stack +char *ip6_compressed_string(char *p, const char *addr) +{ + int i, j, range; + unsigned char zerolength[8]; + int longest = 1; + int colonpos = -1; + u16 word; + u8 hi, lo; + bool needcolon = false; + bool useIPv4; + struct in6_addr in6; + + memcpy(&in6, addr, sizeof(struct in6_addr)); + + useIPv4 = ipv6_addr_v4mapped(&in6) || ipv6_addr_is_isatap(&in6); + + memset(zerolength, 0, sizeof(zerolength)); + + if (useIPv4) + range = 6; + else + range = 8; + + /* find position of longest 0 run */ + for (i = 0; i < range; i++) { + for (j = i; j < range; j++) { + if (in6.s6_addr16[j] != 0) + break; + zerolength[i]++; + } + } + for (i = 0; i < range; i++) { + if (zerolength[i] > longest) { + longest = zerolength[i]; + colonpos = i; + } + } + + /* emit address */ + for (i = 0; i < range; i++) { + if (i == colonpos) { + if (needcolon || i == 0) + *p++ = ':'; *p++ = ':'; + needcolon = false; + i += longest - 1; + continue; + } + if (needcolon) { + *p++ = ':'; + needcolon = false; + } + /* hex u16 without leading 0s */ + word = ntohs(in6.s6_addr16[i]); + hi = word >> 8; + lo = word & 0xff; + if (hi) { + if (hi > 0x0f) + p = pack_hex_byte(p, hi); + else + *p++ = hex_asc_lo(hi); + p = pack_hex_byte(p, lo); + } + else if (lo > 0x0f) + p = pack_hex_byte(p, lo); + else + *p++ = hex_asc_lo(lo); + needcolon = true; + } + + if (useIPv4) { + if (needcolon) + *p++ = ':'; + p = ip4_string(p, &in6.s6_addr[12], "I4"); } *p = '\0'; - spec.flags &= ~SPECIAL; - return string(buf, end, ip6_addr, spec); + return p; } -static char *ip4_addr_string(char *buf, char *end, u8 *addr, - struct printf_spec spec) +static noinline_for_stack +char *ip6_string(char *p, const char *addr, const char *fmt) { - char ip4_addr[4 * 4]; /* (4 * 3 decimal digits), 3 dots and trailing zero */ - char temp[3]; /* hold each IP quad in reverse order */ - char *p = ip4_addr; - int i, digits; + int i; - for (i = 0; i < 4; i++) { - digits = put_dec_trunc(temp, addr[i]) - temp; - /* reverse the digits in the quad */ - while (digits--) - *p++ = temp[digits]; - if (i != 3) - *p++ = '.'; + for (i = 0; i < 8; i++) { + p = pack_hex_byte(p, *addr++); + p = pack_hex_byte(p, *addr++); + if (fmt[0] == 'I' && i != 7) + *p++ = ':'; } *p = '\0'; - spec.flags &= ~SPECIAL; + + return p; +} + +static noinline_for_stack +char *ip6_addr_string(char *buf, char *end, const u8 *addr, + struct printf_spec spec, const char *fmt) +{ + char ip6_addr[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255")]; + + if (fmt[0] == 'I' && fmt[2] == 'c') + ip6_compressed_string(ip6_addr, addr); + else + ip6_string(ip6_addr, addr, fmt); + + return string(buf, end, ip6_addr, spec); +} + +static noinline_for_stack +char *ip4_addr_string(char *buf, char *end, const u8 *addr, + struct printf_spec spec, const char *fmt) +{ + char ip4_addr[sizeof("255.255.255.255")]; + + ip4_string(ip4_addr, addr, fmt); return string(buf, end, ip4_addr, spec); } +static noinline_for_stack +char *uuid_string(char *buf, char *end, const u8 *addr, + struct printf_spec spec, const char *fmt) +{ + char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")]; + char *p = uuid; + int i; + static const u8 be[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + static const u8 le[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15}; + const u8 *index = be; + bool uc = false; + + switch (*(++fmt)) { + case 'L': + uc = true; /* fall-through */ + case 'l': + index = le; + break; + case 'B': + uc = true; + break; + } + + for (i = 0; i < 16; i++) { + p = pack_hex_byte(p, addr[index[i]]); + switch (i) { + case 3: + case 5: + case 7: + case 9: + *p++ = '-'; + break; + } + } + + *p = 0; + + if (uc) { + p = uuid; + do { + *p = toupper(*p); + } while (*(++p)); + } + + return string(buf, end, uuid, spec); +} + /* * Show a '%p' thing. A kernel extension is that the '%p' is followed * by an extra set of alphanumeric characters that are extended format @@ -697,25 +945,58 @@ static char *ip4_addr_string(char *buf, char *end, u8 *addr, * * - 'F' For symbolic function descriptor pointers with offset * - 'f' For simple symbolic function names without offset - * - 'S' For symbolic direct pointers - * - 'R' For a struct resource pointer, it prints the range of - * addresses (not the name nor the flags) + * - 'S' For symbolic direct pointers with offset + * - 's' For symbolic direct pointers without offset + * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref] + * - 'r' For raw struct resource, e.g., [mem 0x0-0x1f flags 0x201] * - 'M' For a 6-byte MAC address, it prints the address in the * usual colon-separated hex notation - * - 'I' [46] for IPv4/IPv6 addresses printed in the usual way (dot-separated - * decimal for v4 and colon separated network-order 16 bit hex for v6) - * - 'i' [46] for 'raw' IPv4/IPv6 addresses, IPv6 omits the colons, IPv4 is - * currently the same + * - 'm' For a 6-byte MAC address, it prints the hex address without colons + * - 'MF' For a 6-byte MAC FDDI address, it prints the address + * with a dash-separated hex notation + * - 'I' [46] for IPv4/IPv6 addresses printed in the usual way + * IPv4 uses dot-separated decimal without leading 0's (1.2.3.4) + * IPv6 uses colon separated network-order 16 bit hex with leading 0's + * - 'i' [46] for 'raw' IPv4/IPv6 addresses + * IPv6 omits the colons (01020304...0f) + * IPv4 uses dot-separated decimal with leading 0's (010.123.045.006) + * - '[Ii]4[hnbl]' IPv4 addresses in host, network, big or little endian order + * - 'I6c' for IPv6 addresses printed as specified by + * http://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-00 + * - 'U' For a 16 byte UUID/GUID, it prints the UUID/GUID in the form + * "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" + * Options for %pU are: + * b big endian lower case hex (default) + * B big endian UPPER case hex + * l little endian lower case hex + * L little endian UPPER case hex + * big endian output byte order is: + * [0][1][2][3]-[4][5]-[6][7]-[8][9]-[10][11][12][13][14][15] + * little endian output byte order is: + * [3][2][1][0]-[5][4]-[7][6]-[8][9]-[10][11][12][13][14][15] + * - 'V' For a struct va_format which contains a format string * and va_list *, + * call vsnprintf(->format, *->va_list). + * Implements a "recursive vsnprintf". + * Do not use this feature without some mechanism to verify the + * correctness of the format string and va_list arguments. * * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 * function pointers are really function descriptors, which contain a * pointer to the real address. */ -static char *pointer(const char *fmt, char *buf, char *end, void *ptr, - struct printf_spec spec) +static noinline_for_stack +char *pointer(const char *fmt, char *buf, char *end, void *ptr, + struct printf_spec spec) { - if (!ptr) + if (!ptr) { + /* + * Print (null) with the same width as a pointer so it makes + * tabular output look nice. + */ + if (spec.field_width == -1) + spec.field_width = 2 * sizeof(void *); return string(buf, end, "(null)", spec); + } switch (*fmt) { case 'F': @@ -723,28 +1004,41 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, ptr = dereference_function_descriptor(ptr); /* Fallthrough */ case 'S': + case 's': return symbol_string(buf, end, ptr, spec, *fmt); case 'R': - return resource_string(buf, end, ptr, spec); - case 'm': - spec.flags |= SPECIAL; - /* Fallthrough */ - case 'M': - return mac_address_string(buf, end, ptr, spec); - case 'i': - spec.flags |= SPECIAL; - /* Fallthrough */ - case 'I': - if (fmt[1] == '6') - return ip6_addr_string(buf, end, ptr, spec); - if (fmt[1] == '4') - return ip4_addr_string(buf, end, ptr, spec); - spec.flags &= ~SPECIAL; + case 'r': + return resource_string(buf, end, ptr, spec, fmt); + case 'M': /* Colon separated: 00:01:02:03:04:05 */ + case 'm': /* Contiguous: 000102030405 */ + /* [mM]F (FDDI, bit reversed) */ + return mac_address_string(buf, end, ptr, spec, fmt); + case 'I': /* Formatted IP supported + * 4: 1.2.3.4 + * 6: 0001:0203:...:0708 + * 6c: 1::708 or 1::1.2.3.4 + */ + case 'i': /* Contiguous: + * 4: 001.002.003.004 + * 6: 000102...0f + */ + switch (fmt[1]) { + case '6': + return ip6_addr_string(buf, end, ptr, spec, fmt); + case '4': + return ip4_addr_string(buf, end, ptr, spec, fmt); + } break; + case 'U': + return uuid_string(buf, end, ptr, spec, fmt); + case 'V': + return buf + vsnprintf(buf, end - buf, + ((struct va_format *)ptr)->fmt, + *(((struct va_format *)ptr)->va)); } spec.flags |= SMALL; if (spec.field_width == -1) { - spec.field_width = 2*sizeof(void *); + spec.field_width = 2 * sizeof(void *); spec.flags |= ZEROPAD; } spec.base = 16; @@ -772,7 +1066,8 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, * @precision: precision of a number * @qualifier: qualifier of a number (long, size_t, ...) */ -static int format_decode(const char *fmt, struct printf_spec *spec) +static noinline_for_stack +int format_decode(const char *fmt, struct printf_spec *spec) { const char *start = fmt; @@ -858,8 +1153,8 @@ precision: qualifier: /* get the conversion qualifier */ spec->qualifier = -1; - if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || - *fmt == 'Z' || *fmt == 'z' || *fmt == 't') { + if (*fmt == 'h' || TOLOWER(*fmt) == 'l' || + TOLOWER(*fmt) == 'z' || *fmt == 't') { spec->qualifier = *fmt++; if (unlikely(spec->qualifier == *fmt)) { if (spec->qualifier == 'l') { @@ -926,7 +1221,7 @@ qualifier: spec->type = FORMAT_TYPE_LONG; else spec->type = FORMAT_TYPE_ULONG; - } else if (spec->qualifier == 'Z' || spec->qualifier == 'z') { + } else if (TOLOWER(spec->qualifier) == 'z') { spec->type = FORMAT_TYPE_SIZE_T; } else if (spec->qualifier == 't') { spec->type = FORMAT_TYPE_PTRDIFF; @@ -958,10 +1253,23 @@ qualifier: * @args: Arguments for the format string * * This function follows C99 vsnprintf, but has some extensions: - * %pS output the name of a text symbol + * %pS output the name of a text symbol with offset + * %ps output the name of a text symbol without offset * %pF output the name of a function pointer with its offset * %pf output the name of a function pointer without its offset - * %pR output the address range in a struct resource + * %pR output the address range in a struct resource with decoded flags + * %pr output the address range in a struct resource with raw flags + * %pM output a 6-byte MAC address with colons + * %pm output a 6-byte MAC address without colons + * %pI4 print an IPv4 address without leading zeros + * %pi4 print an IPv4 address with leading zeros + * %pI6 print an IPv6 address with colons + * %pi6 print an IPv6 address without colons + * %pI6c print an IPv6 address as specified by + * http://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-00 + * %pU[bBlL] print a UUID/GUID in big or little endian using lower or upper + * case. + * %n is ignored * * The return value is the number of characters which would * be generated for the given input, excluding the trailing @@ -977,19 +1285,13 @@ qualifier: int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) { unsigned long long num; - char *str, *end, c; - int read; + char *str, *end; struct printf_spec spec = {0}; /* Reject out-of-range values early. Large positive sizes are used for unknown buffer sizes. */ - if (unlikely((int) size < 0)) { - /* There can be only one.. */ - static char warn = 1; - WARN_ON(warn); - warn = 0; + if (WARN_ON_ONCE((int) size < 0)) return 0; - } str = buf; end = buf + size; @@ -1002,8 +1304,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) while (*fmt) { const char *old_fmt = fmt; - - read = format_decode(fmt, &spec); + int read = format_decode(fmt, &spec); fmt += read; @@ -1027,7 +1328,9 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) spec.precision = va_arg(args, int); break; - case FORMAT_TYPE_CHAR: + case FORMAT_TYPE_CHAR: { + char c; + if (!(spec.flags & LEFT)) { while (--spec.field_width > 0) { if (str < end) @@ -1046,6 +1349,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) ++str; } break; + } case FORMAT_TYPE_STR: str = string(str, end, va_arg(args, char *), spec); @@ -1071,13 +1375,12 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) break; case FORMAT_TYPE_NRCHARS: { - int qualifier = spec.qualifier; + u8 qualifier = spec.qualifier; if (qualifier == 'l') { long *ip = va_arg(args, long *); *ip = (str - buf); - } else if (qualifier == 'Z' || - qualifier == 'z') { + } else if (TOLOWER(qualifier) == 'z') { size_t *ip = va_arg(args, size_t *); *ip = (str - buf); } else { @@ -1160,7 +1463,8 @@ int vscnprintf(char *buf, size_t size, const char *fmt, va_list args) { int i; - i=vsnprintf(buf,size,fmt,args); + i = vsnprintf(buf, size, fmt, args); + return (i >= size) ? (size - 1) : i; } EXPORT_SYMBOL(vscnprintf); @@ -1179,14 +1483,15 @@ EXPORT_SYMBOL(vscnprintf); * * See the vsnprintf() documentation for format string extensions over C99. */ -int snprintf(char * buf, size_t size, const char *fmt, ...) +int snprintf(char *buf, size_t size, const char *fmt, ...) { va_list args; int i; va_start(args, fmt); - i=vsnprintf(buf,size,fmt,args); + i = vsnprintf(buf, size, fmt, args); va_end(args); + return i; } EXPORT_SYMBOL(snprintf); @@ -1199,10 +1504,10 @@ EXPORT_SYMBOL(snprintf); * @...: Arguments for the format string * * The return value is the number of characters written into @buf not including - * the trailing '\0'. If @size is <= 0 the function returns 0. + * the trailing '\0'. If @size is == 0 the function returns 0. */ -int scnprintf(char * buf, size_t size, const char *fmt, ...) +int scnprintf(char *buf, size_t size, const char *fmt, ...) { va_list args; int i; @@ -1210,7 +1515,12 @@ int scnprintf(char * buf, size_t size, const char *fmt, ...) va_start(args, fmt); i = vsnprintf(buf, size, fmt, args); va_end(args); - return (i >= size) ? (size - 1) : i; + + if (likely(i < size)) + return i; + if (size != 0) + return size - 1; + return 0; } EXPORT_SYMBOL(scnprintf); @@ -1247,14 +1557,15 @@ EXPORT_SYMBOL(vsprintf); * * See the vsnprintf() documentation for format string extensions over C99. */ -int sprintf(char * buf, const char *fmt, ...) +int sprintf(char *buf, const char *fmt, ...) { va_list args; int i; va_start(args, fmt); - i=vsnprintf(buf, INT_MAX, fmt, args); + i = vsnprintf(buf, INT_MAX, fmt, args); va_end(args); + return i; } EXPORT_SYMBOL(sprintf); @@ -1287,7 +1598,6 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) { struct printf_spec spec = {0}; char *str, *end; - int read; str = (char *)bin_buf; end = (char *)(bin_buf + size); @@ -1312,14 +1622,15 @@ do { \ str += sizeof(type); \ } while (0) - while (*fmt) { - read = format_decode(fmt, &spec); + int read = format_decode(fmt, &spec); fmt += read; switch (spec.type) { case FORMAT_TYPE_NONE: + case FORMAT_TYPE_INVALID: + case FORMAT_TYPE_PERCENT_CHAR: break; case FORMAT_TYPE_WIDTH: @@ -1334,13 +1645,14 @@ do { \ case FORMAT_TYPE_STR: { const char *save_str = va_arg(args, char *); size_t len; + if ((unsigned long)save_str > (unsigned long)-PAGE_SIZE || (unsigned long)save_str < PAGE_SIZE) - save_str = "<NULL>"; - len = strlen(save_str); - if (str + len + 1 < end) - memcpy(str, save_str, len + 1); - str += len + 1; + save_str = "(null)"; + len = strlen(save_str) + 1; + if (str + len < end) + memcpy(str, save_str, len); + str += len; break; } @@ -1351,19 +1663,13 @@ do { \ fmt++; break; - case FORMAT_TYPE_PERCENT_CHAR: - break; - - case FORMAT_TYPE_INVALID: - break; - case FORMAT_TYPE_NRCHARS: { /* skip %n 's argument */ - int qualifier = spec.qualifier; + u8 qualifier = spec.qualifier; void *skip_arg; if (qualifier == 'l') skip_arg = va_arg(args, long *); - else if (qualifier == 'Z' || qualifier == 'z') + else if (TOLOWER(qualifier) == 'z') skip_arg = va_arg(args, size_t *); else skip_arg = va_arg(args, int *); @@ -1399,8 +1705,8 @@ do { \ } } } - return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf; + return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf; #undef save_arg } EXPORT_SYMBOL_GPL(vbin_printf); @@ -1417,11 +1723,7 @@ EXPORT_SYMBOL_GPL(vbin_printf); * a binary buffer that generated by vbin_printf. * * The format follows C99 vsnprintf, but has some extensions: - * %pS output the name of a text symbol - * %pF output the name of a function pointer with its offset - * %pf output the name of a function pointer without its offset - * %pR output the address range in a struct resource - * %n is ignored + * see vsnprintf comment for details. * * The return value is the number of characters which would * be generated for the given input, excluding the trailing @@ -1433,19 +1735,12 @@ EXPORT_SYMBOL_GPL(vbin_printf); */ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) { - unsigned long long num; - char *str, *end, c; - const char *args = (const char *)bin_buf; - struct printf_spec spec = {0}; + char *str, *end; + const char *args = (const char *)bin_buf; - if (unlikely((int) size < 0)) { - /* There can be only one.. */ - static char warn = 1; - WARN_ON(warn); - warn = 0; + if (WARN_ON_ONCE((int) size < 0)) return 0; - } str = buf; end = buf + size; @@ -1472,10 +1767,8 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) } while (*fmt) { - int read; const char *old_fmt = fmt; - - read = format_decode(fmt, &spec); + int read = format_decode(fmt, &spec); fmt += read; @@ -1499,7 +1792,9 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) spec.precision = get_arg(int); break; - case FORMAT_TYPE_CHAR: + case FORMAT_TYPE_CHAR: { + char c; + if (!(spec.flags & LEFT)) { while (--spec.field_width > 0) { if (str < end) @@ -1517,11 +1812,11 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) ++str; } break; + } case FORMAT_TYPE_STR: { const char *str_arg = args; - size_t len = strlen(str_arg); - args += len + 1; + args += strlen(str_arg) + 1; str = string(str, end, (char *)str_arg, spec); break; } @@ -1533,11 +1828,6 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) break; case FORMAT_TYPE_PERCENT_CHAR: - if (str < end) - *str = '%'; - ++str; - break; - case FORMAT_TYPE_INVALID: if (str < end) *str = '%'; @@ -1548,15 +1838,15 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) /* skip */ break; - default: + default: { + unsigned long long num; + switch (spec.type) { case FORMAT_TYPE_LONG_LONG: num = get_arg(long long); break; case FORMAT_TYPE_ULONG: - num = get_arg(unsigned long); - break; case FORMAT_TYPE_LONG: num = get_arg(unsigned long); break; @@ -1586,8 +1876,9 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) } str = number(str, end, num, spec); - } - } + } /* default: */ + } /* switch(spec.type) */ + } /* while(*fmt) */ if (size > 0) { if (str < end) @@ -1621,6 +1912,7 @@ int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) va_start(args, fmt); ret = vbin_printf(bin_buf, size, fmt, args); va_end(args); + return ret; } EXPORT_SYMBOL_GPL(bprintf); @@ -1633,27 +1925,25 @@ EXPORT_SYMBOL_GPL(bprintf); * @fmt: format of buffer * @args: arguments */ -int vsscanf(const char * buf, const char * fmt, va_list args) +int vsscanf(const char *buf, const char *fmt, va_list args) { const char *str = buf; char *next; char digit; int num = 0; - int qualifier; - int base; - int field_width; - int is_sign = 0; + u8 qualifier; + u8 base; + s16 field_width; + bool is_sign; - while(*fmt && *str) { + while (*fmt && *str) { /* skip any white space in format */ /* white space in format matchs any amount of * white space, including none, in the input. */ if (isspace(*fmt)) { - while (isspace(*fmt)) - ++fmt; - while (isspace(*str)) - ++str; + fmt = skip_spaces(++fmt); + str = skip_spaces(str); } /* anything that is not a conversion must match exactly */ @@ -1666,12 +1956,12 @@ int vsscanf(const char * buf, const char * fmt, va_list args) if (!*fmt) break; ++fmt; - + /* skip this conversion. * advance both strings to next white space */ if (*fmt == '*') { - while (!isspace(*fmt) && *fmt) + while (!isspace(*fmt) && *fmt != '%' && *fmt) fmt++; while (!isspace(*str) && *str) str++; @@ -1685,8 +1975,8 @@ int vsscanf(const char * buf, const char * fmt, va_list args) /* get conversion qualifier */ qualifier = -1; - if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || - *fmt == 'Z' || *fmt == 'z') { + if (*fmt == 'h' || TOLOWER(*fmt) == 'l' || + TOLOWER(*fmt) == 'z') { qualifier = *fmt++; if (unlikely(qualifier == *fmt)) { if (qualifier == 'h') { @@ -1698,16 +1988,17 @@ int vsscanf(const char * buf, const char * fmt, va_list args) } } } - base = 10; - is_sign = 0; if (!*fmt || !*str) break; - switch(*fmt++) { + base = 10; + is_sign = 0; + + switch (*fmt++) { case 'c': { - char *s = (char *) va_arg(args,char*); + char *s = (char *)va_arg(args, char*); if (field_width == -1) field_width = 1; do { @@ -1718,17 +2009,15 @@ int vsscanf(const char * buf, const char * fmt, va_list args) continue; case 's': { - char *s = (char *) va_arg(args, char *); - if(field_width == -1) - field_width = INT_MAX; + char *s = (char *)va_arg(args, char *); + if (field_width == -1) + field_width = SHRT_MAX; /* first, skip leading white space in buffer */ - while (isspace(*str)) - str++; + str = skip_spaces(str); /* now copy until next white space */ - while (*str && !isspace(*str) && field_width--) { + while (*str && !isspace(*str) && field_width--) *s++ = *str++; - } *s = '\0'; num++; } @@ -1736,7 +2025,7 @@ int vsscanf(const char * buf, const char * fmt, va_list args) case 'n': /* return number of characters read so far */ { - int *i = (int *)va_arg(args,int*); + int *i = (int *)va_arg(args, int*); *i = str - buf; } continue; @@ -1748,14 +2037,14 @@ int vsscanf(const char * buf, const char * fmt, va_list args) base = 16; break; case 'i': - base = 0; + base = 0; case 'd': is_sign = 1; case 'u': break; case '%': /* looking for '%' in str */ - if (*str++ != '%') + if (*str++ != '%') return num; continue; default: @@ -1766,71 +2055,70 @@ int vsscanf(const char * buf, const char * fmt, va_list args) /* have some sort of integer conversion. * first, skip white space in buffer. */ - while (isspace(*str)) - str++; + str = skip_spaces(str); digit = *str; if (is_sign && digit == '-') digit = *(str + 1); if (!digit - || (base == 16 && !isxdigit(digit)) - || (base == 10 && !isdigit(digit)) - || (base == 8 && (!isdigit(digit) || digit > '7')) - || (base == 0 && !isdigit(digit))) - break; + || (base == 16 && !isxdigit(digit)) + || (base == 10 && !isdigit(digit)) + || (base == 8 && (!isdigit(digit) || digit > '7')) + || (base == 0 && !isdigit(digit))) + break; - switch(qualifier) { + switch (qualifier) { case 'H': /* that's 'hh' in format */ if (is_sign) { - signed char *s = (signed char *) va_arg(args,signed char *); - *s = (signed char) simple_strtol(str,&next,base); + signed char *s = (signed char *)va_arg(args, signed char *); + *s = (signed char)simple_strtol(str, &next, base); } else { - unsigned char *s = (unsigned char *) va_arg(args, unsigned char *); - *s = (unsigned char) simple_strtoul(str, &next, base); + unsigned char *s = (unsigned char *)va_arg(args, unsigned char *); + *s = (unsigned char)simple_strtoul(str, &next, base); } break; case 'h': if (is_sign) { - short *s = (short *) va_arg(args,short *); - *s = (short) simple_strtol(str,&next,base); + short *s = (short *)va_arg(args, short *); + *s = (short)simple_strtol(str, &next, base); } else { - unsigned short *s = (unsigned short *) va_arg(args, unsigned short *); - *s = (unsigned short) simple_strtoul(str, &next, base); + unsigned short *s = (unsigned short *)va_arg(args, unsigned short *); + *s = (unsigned short)simple_strtoul(str, &next, base); } break; case 'l': if (is_sign) { - long *l = (long *) va_arg(args,long *); - *l = simple_strtol(str,&next,base); + long *l = (long *)va_arg(args, long *); + *l = simple_strtol(str, &next, base); } else { - unsigned long *l = (unsigned long*) va_arg(args,unsigned long*); - *l = simple_strtoul(str,&next,base); + unsigned long *l = (unsigned long *)va_arg(args, unsigned long *); + *l = simple_strtoul(str, &next, base); } break; case 'L': if (is_sign) { - long long *l = (long long*) va_arg(args,long long *); - *l = simple_strtoll(str,&next,base); + long long *l = (long long *)va_arg(args, long long *); + *l = simple_strtoll(str, &next, base); } else { - unsigned long long *l = (unsigned long long*) va_arg(args,unsigned long long*); - *l = simple_strtoull(str,&next,base); + unsigned long long *l = (unsigned long long *)va_arg(args, unsigned long long *); + *l = simple_strtoull(str, &next, base); } break; case 'Z': case 'z': { - size_t *s = (size_t*) va_arg(args,size_t*); - *s = (size_t) simple_strtoul(str,&next,base); + size_t *s = (size_t *)va_arg(args, size_t *); + *s = (size_t)simple_strtoul(str, &next, base); } break; default: if (is_sign) { - int *i = (int *) va_arg(args, int*); - *i = (int) simple_strtol(str,&next,base); + int *i = (int *)va_arg(args, int *); + *i = (int)simple_strtol(str, &next, base); } else { - unsigned int *i = (unsigned int*) va_arg(args, unsigned int*); - *i = (unsigned int) simple_strtoul(str,&next,base); + unsigned int *i = (unsigned int *)va_arg(args, unsigned int*); + *i = (unsigned int)simple_strtoul(str, &next, base); } break; } @@ -1861,14 +2149,15 @@ EXPORT_SYMBOL(vsscanf); * @fmt: formatting of buffer * @...: resulting arguments */ -int sscanf(const char * buf, const char * fmt, ...) +int sscanf(const char *buf, const char *fmt, ...) { va_list args; int i; - va_start(args,fmt); - i = vsscanf(buf,fmt,args); + va_start(args, fmt); + i = vsscanf(buf, fmt, args); va_end(args); + return i; } EXPORT_SYMBOL(sscanf); diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c index c3e4a2baf835..46a31e5f49c3 100644 --- a/lib/zlib_deflate/deflate.c +++ b/lib/zlib_deflate/deflate.c @@ -135,7 +135,7 @@ static const config configuration_table[10] = { /* =========================================================================== * Update a hash value with the given input byte - * IN assertion: all calls to to UPDATE_HASH are made with consecutive + * IN assertion: all calls to UPDATE_HASH are made with consecutive * input characters, so that a running hash key can be computed from the * previous key instead of complete recalculation each time. */ @@ -146,7 +146,7 @@ static const config configuration_table[10] = { * Insert string str in the dictionary and set match_head to the previous head * of the hash chain (the most recent string with same hash key). Return * the previous length of the hash chain. - * IN assertion: all calls to to INSERT_STRING are made with consecutive + * IN assertion: all calls to INSERT_STRING are made with consecutive * input characters and the first MIN_MATCH bytes of str are valid * (except for the last MIN_MATCH-1 bytes of the input file). */ diff --git a/lib/zlib_inflate/inffast.c b/lib/zlib_inflate/inffast.c index 8550b0c05d00..2c13ecc5bb2c 100644 --- a/lib/zlib_inflate/inffast.c +++ b/lib/zlib_inflate/inffast.c @@ -21,12 +21,31 @@ - Pentium III (Anderson) - M68060 (Nikl) */ +union uu { + unsigned short us; + unsigned char b[2]; +}; + +/* Endian independed version */ +static inline unsigned short +get_unaligned16(const unsigned short *p) +{ + union uu mm; + unsigned char *b = (unsigned char *)p; + + mm.b[0] = b[0]; + mm.b[1] = b[1]; + return mm.us; +} + #ifdef POSTINC # define OFF 0 # define PUP(a) *(a)++ +# define UP_UNALIGNED(a) get_unaligned16((a)++) #else # define OFF 1 # define PUP(a) *++(a) +# define UP_UNALIGNED(a) get_unaligned16(++(a)) #endif /* @@ -239,18 +258,50 @@ void inflate_fast(z_streamp strm, unsigned start) } } else { + unsigned short *sout; + unsigned long loops; + from = out - dist; /* copy direct from output */ - do { /* minimum length is three */ - PUP(out) = PUP(from); - PUP(out) = PUP(from); - PUP(out) = PUP(from); - len -= 3; - } while (len > 2); - if (len) { - PUP(out) = PUP(from); - if (len > 1) - PUP(out) = PUP(from); - } + /* minimum length is three */ + /* Align out addr */ + if (!((long)(out - 1 + OFF) & 1)) { + PUP(out) = PUP(from); + len--; + } + sout = (unsigned short *)(out - OFF); + if (dist > 2) { + unsigned short *sfrom; + + sfrom = (unsigned short *)(from - OFF); + loops = len >> 1; + do +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + PUP(sout) = PUP(sfrom); +#else + PUP(sout) = UP_UNALIGNED(sfrom); +#endif + while (--loops); + out = (unsigned char *)sout + OFF; + from = (unsigned char *)sfrom + OFF; + } else { /* dist == 1 or dist == 2 */ + unsigned short pat16; + + pat16 = *(sout-1+OFF); + if (dist == 1) { + union uu mm; + /* copy one char pattern to both bytes */ + mm.us = pat16; + mm.b[0] = mm.b[1]; + pat16 = mm.us; + } + loops = len >> 1; + do + PUP(sout) = pat16; + while (--loops); + out = (unsigned char *)sout + OFF; + } + if (len & 1) + PUP(out) = PUP(from); } } else if ((op & 64) == 0) { /* 2nd level distance code */ |
